Repository: triton-inference-server/server
Branch: main
Commit: f642e5343589
Files: 1631
Total size: 9.3 MB

Directory structure:
gitextract_5r79t389/

├── .clang-format
├── .dockerignore
├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug_report.md
│   │   └── feature_request.md
│   ├── PULL_REQUEST_TEMPLATE/
│   │   ├── pull_request_template_external_contrib.md
│   │   └── pull_request_template_internal_contrib.md
│   ├── pull_request_template.md
│   └── workflows/
│       ├── codeql.yml
│       └── pre-commit.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CITATION.cff
├── CMakeLists.txt
├── CONTRIBUTING.md
├── Dockerfile.QA
├── Dockerfile.sdk
├── Dockerfile.win10.min
├── LICENSE
├── README.md
├── SECURITY.md
├── TRITON_VERSION
├── build.py
├── compose.py
├── deploy/
│   ├── alibaba-cloud/
│   │   └── README.md
│   ├── aws/
│   │   ├── Chart.yaml
│   │   ├── README.md
│   │   ├── dashboard.json
│   │   ├── templates/
│   │   │   ├── _helpers.tpl
│   │   │   ├── deployment.yaml
│   │   │   ├── secrets.yaml
│   │   │   └── service.yaml
│   │   └── values.yaml
│   ├── fleetcommand/
│   │   ├── Chart.yaml
│   │   ├── README.md
│   │   ├── dashboard.json
│   │   ├── templates/
│   │   │   ├── _helpers.tpl
│   │   │   ├── configmap-grafana-dashboard.yaml
│   │   │   ├── deployment.yaml
│   │   │   ├── secrets.yaml
│   │   │   └── service.yaml
│   │   └── values.yaml
│   ├── gcp/
│   │   ├── Chart.yaml
│   │   ├── README.md
│   │   ├── dashboard.json
│   │   ├── templates/
│   │   │   ├── _helpers.tpl
│   │   │   ├── deployment.yaml
│   │   │   └── service.yaml
│   │   └── values.yaml
│   ├── gke-marketplace-app/
│   │   ├── README.md
│   │   ├── benchmark/
│   │   │   ├── README.md
│   │   │   ├── model-store/
│   │   │   │   ├── bert_base_tf_cpu/
│   │   │   │   │   └── config.pbtxt
│   │   │   │   ├── bert_base_tf_gpu/
│   │   │   │   │   └── config.pbtxt
│   │   │   │   ├── bert_base_trt_gpu/
│   │   │   │   │   └── config.pbtxt
│   │   │   │   ├── bert_base_trt_gpu_seqlen128/
│   │   │   │   │   └── config.pbtxt
│   │   │   │   ├── bert_distill_tf_cpu/
│   │   │   │   │   └── config.pbtxt
│   │   │   │   └── bert_distill_tf_gpu/
│   │   │   │       └── config.pbtxt
│   │   │   └── perf-analyzer-script/
│   │   │       ├── perf_query.sh
│   │   │       └── triton_client.yaml
│   │   ├── client-sample/
│   │   │   ├── bert_request.json
│   │   │   ├── locustfile_bert.py
│   │   │   └── perf_analyzer_grpc.sh
│   │   ├── server-deployer/
│   │   │   ├── Dockerfile
│   │   │   ├── build_and_push.sh
│   │   │   ├── chart/
│   │   │   │   └── triton/
│   │   │   │       ├── Chart.yaml
│   │   │   │       ├── templates/
│   │   │   │       │   ├── _helpers.tpl
│   │   │   │       │   ├── application.yaml
│   │   │   │       │   ├── deployment.yaml
│   │   │   │       │   ├── hpa.yaml
│   │   │   │       │   ├── ingress.yaml
│   │   │   │       │   └── service.yaml
│   │   │   │       └── values.yaml
│   │   │   ├── data-test/
│   │   │   │   └── schema.yaml
│   │   │   └── schema.yaml
│   │   └── trt-engine/
│   │       └── README.md
│   ├── k8s-onprem/
│   │   ├── Chart.yaml
│   │   ├── README.md
│   │   ├── dashboard.json
│   │   ├── templates/
│   │   │   ├── _helpers.tpl
│   │   │   ├── deployment.yaml
│   │   │   ├── hpa.yaml
│   │   │   ├── ingressroute.yaml
│   │   │   ├── rbac.yaml
│   │   │   ├── service.yaml
│   │   │   └── serviceaccount.yaml
│   │   └── values.yaml
│   ├── mlflow-triton-plugin/
│   │   ├── README.md
│   │   ├── examples/
│   │   │   ├── expected_output.json
│   │   │   ├── input.json
│   │   │   └── onnx_float32_int32_int32/
│   │   │       ├── 1/
│   │   │       │   └── model.onnx
│   │   │       └── config.pbtxt
│   │   ├── mlflow_triton/
│   │   │   ├── __init__.py
│   │   │   ├── config.py
│   │   │   └── deployments.py
│   │   ├── scripts/
│   │   │   ├── publish_model_to_mlflow.py
│   │   │   └── triton_flavor.py
│   │   └── setup.py
│   └── oci/
│       ├── Chart.yaml
│       ├── README.md
│       ├── dashboard.json
│       ├── templates/
│       │   ├── _helpers.tpl
│       │   ├── deployment.yaml
│       │   ├── secrets.yaml
│       │   └── service.yaml
│       └── values.yaml
├── docker/
│   ├── README.third-party-src
│   ├── cpu_only/
│   │   ├── entrypoint.d/
│   │   │   ├── 12-banner.sh
│   │   │   └── 50-gpu-driver-check2.sh
│   │   └── nvidia_entrypoint.sh
│   ├── entrypoint.d/
│   │   ├── 10-banner.txt
│   │   ├── 15-container-copyright.txt
│   │   ├── 50-gpu-driver-check2.sh
│   │   ├── 56-network-driver-version-check.sh
│   │   ├── 70-shm-check.sh
│   │   └── 99-check-run-aip-mode.sh
│   └── sagemaker/
│       └── serve
├── docs/
│   ├── Dockerfile.docs
│   ├── Makefile
│   ├── README.md
│   ├── _reference/
│   │   └── tritonclient_api.rst
│   ├── _static/
│   │   ├── .gitattributes
│   │   ├── custom.css
│   │   └── rtd-data.js
│   ├── _templates/
│   │   └── layout.html
│   ├── backend_guide/
│   │   └── vllm.rst
│   ├── client_guide/
│   │   ├── api_reference.rst
│   │   ├── in_process.rst
│   │   ├── kserve.rst
│   │   ├── kserve_extension.rst
│   │   └── python.rst
│   ├── conf.py
│   ├── contents.rst
│   ├── customization_guide/
│   │   ├── build.md
│   │   ├── compose.md
│   │   ├── deploy.md
│   │   ├── inference_protocols.md
│   │   ├── inprocess_c_api.md
│   │   ├── inprocess_java_api.md
│   │   ├── repository_agents.md
│   │   ├── sagemaker.md
│   │   ├── test.md
│   │   └── tritonfrontend.md
│   ├── examples/
│   │   ├── README.md
│   │   ├── fetch_models.sh
│   │   ├── jetson/
│   │   │   ├── README.md
│   │   │   └── concurrency_and_dynamic_batching/
│   │   │       ├── Makefile
│   │   │       ├── README.md
│   │   │       ├── common.h
│   │   │       ├── labels.txt
│   │   │       ├── people_detection.cc
│   │   │       ├── tao/
│   │   │       │   ├── convert_peoplenet.sh
│   │   │       │   └── models/
│   │   │       │       └── peoplenet/
│   │   │       │           └── .gitkeep
│   │   │       ├── trtis_model_repo_sample_1/
│   │   │       │   └── peoplenet/
│   │   │       │       ├── 1/
│   │   │       │       │   └── .gitkeep
│   │   │       │       └── config.pbtxt
│   │   │       └── trtis_model_repo_sample_2/
│   │   │           └── peoplenet/
│   │   │               ├── 1/
│   │   │               │   └── .gitkeep
│   │   │               └── config.pbtxt
│   │   └── model_repository/
│   │       ├── densenet_onnx/
│   │       │   ├── config.pbtxt
│   │       │   └── densenet_labels.txt
│   │       ├── inception_onnx/
│   │       │   ├── config.pbtxt
│   │       │   └── inception_labels.txt
│   │       ├── simple/
│   │       │   ├── 1/
│   │       │   │   └── model.onnx
│   │       │   └── config.pbtxt
│   │       ├── simple_dyna_sequence/
│   │       │   ├── 1/
│   │       │   │   └── model.onnx
│   │       │   └── config.pbtxt
│   │       ├── simple_identity/
│   │       │   ├── 1/
│   │       │   │   └── model.py
│   │       │   └── config.pbtxt
│   │       ├── simple_int8/
│   │       │   ├── 1/
│   │       │   │   └── model.onnx
│   │       │   └── config.pbtxt
│   │       ├── simple_sequence/
│   │       │   ├── 1/
│   │       │   │   └── model.onnx
│   │       │   └── config.pbtxt
│   │       └── simple_string/
│   │           ├── 1/
│   │           │   └── model.onnx
│   │           └── config.pbtxt
│   ├── exclusions.txt
│   ├── generate_docs.py
│   ├── getting_started/
│   │   ├── llm.md
│   │   ├── quick_deployment.rst
│   │   ├── quickstart.md
│   │   └── trtllm_user_guide.md
│   ├── index.md
│   ├── introduction/
│   │   ├── compatibility.md
│   │   ├── index.md
│   │   └── release_notes.md
│   ├── llm_features/
│   │   └── speculative_decoding.rst
│   ├── perf_benchmark/
│   │   ├── genai_perf.rst
│   │   ├── model_analyzer.rst
│   │   └── perf_analyzer.rst
│   ├── protocol/
│   │   ├── README.md
│   │   ├── extension_binary_data.md
│   │   ├── extension_classification.md
│   │   ├── extension_generate.md
│   │   ├── extension_logging.md
│   │   ├── extension_model_configuration.md
│   │   ├── extension_model_repository.md
│   │   ├── extension_parameters.md
│   │   ├── extension_schedule_policy.md
│   │   ├── extension_sequence.md
│   │   ├── extension_shared_memory.md
│   │   ├── extension_statistics.md
│   │   └── extension_trace.md
│   ├── repositories.txt
│   ├── scaling_guide/
│   │   └── scaling_guide.rst
│   ├── server_guide/
│   │   ├── features.rst
│   │   ├── model_pipelines.rst
│   │   └── state_management.rst
│   └── user_guide/
│       ├── architecture.md
│       ├── batcher.md
│       ├── bls.md
│       ├── custom_operations.md
│       ├── debugging_guide.md
│       ├── decoupled_models.md
│       ├── ensemble_models.md
│       ├── faq.md
│       ├── implicit_state_management.md
│       ├── jetson.md
│       ├── metrics.md
│       ├── model_analyzer.md
│       ├── model_configuration.md
│       ├── model_execution.md
│       ├── model_management.md
│       ├── model_repository.md
│       ├── optimization.md
│       ├── perf_analyzer.md
│       ├── performance_tuning.md
│       ├── ragged_batching.md
│       ├── rate_limiter.md
│       ├── request_cancellation.md
│       ├── response_cache.md
│       ├── scheduler.md
│       ├── trace.md
│       └── v1_to_v2.md
├── enhancements/
│   ├── NNNN-template-complete.md
│   ├── NNNN-template-limited.md
│   ├── README.md
│   └── teps/
│       └── 0000-tep-process.md
├── pyproject.toml
├── python/
│   └── openai/
│       ├── README.md
│       ├── openai_frontend/
│       │   ├── __init__.py
│       │   ├── engine/
│       │   │   ├── __init__.py
│       │   │   ├── engine.py
│       │   │   ├── triton_engine.py
│       │   │   └── utils/
│       │   │       ├── __init__.py
│       │   │       ├── chat.py
│       │   │       ├── tokenizer.py
│       │   │       ├── tool_call_parsers/
│       │   │       │   ├── __init__.py
│       │   │       │   ├── llama_tool_call_parser.py
│       │   │       │   ├── mistral_tool_call_parser.py
│       │   │       │   ├── tool_call_parser.py
│       │   │       │   └── utils.py
│       │   │       └── triton.py
│       │   ├── frontend/
│       │   │   ├── __init__.py
│       │   │   ├── fastapi/
│       │   │   │   ├── __init__.py
│       │   │   │   ├── middleware/
│       │   │   │   │   ├── __init__.py
│       │   │   │   │   └── api_restriction.py
│       │   │   │   └── routers/
│       │   │   │       ├── __init__.py
│       │   │   │       ├── chat.py
│       │   │   │       ├── completions.py
│       │   │   │       ├── embeddings.py
│       │   │   │       ├── models.py
│       │   │   │       └── observability.py
│       │   │   ├── fastapi_frontend.py
│       │   │   └── frontend.py
│       │   ├── main.py
│       │   ├── schemas/
│       │   │   ├── __init__.py
│       │   │   └── openai.py
│       │   └── utils/
│       │       └── utils.py
│       ├── requirements-test.txt
│       ├── requirements.txt
│       └── tests/
│           ├── __init__.py
│           ├── conftest.py
│           ├── test_chat_completions.py
│           ├── test_completions.py
│           ├── test_embeddings.py
│           ├── test_lora.py
│           ├── test_models/
│           │   ├── identity_py/
│           │   │   ├── 1/
│           │   │   │   └── model.py
│           │   │   └── config.pbtxt
│           │   └── mock_llm/
│           │       ├── 1/
│           │       │   └── model.py
│           │       └── config.pbtxt
│           ├── test_observability.py
│           ├── test_openai_client.py
│           ├── test_openai_restricted_apis.py
│           ├── test_tool_calling.py
│           ├── utils.py
│           ├── vllm_embedding_models/
│           │   └── all-MiniLM-L6-v2/
│           │       ├── 1/
│           │       │   └── model.json
│           │       └── config.pbtxt
│           ├── vllm_mistral_models/
│           │   └── mistral-nemo-instruct-2407/
│           │       ├── 1/
│           │       │   └── model.json
│           │       └── config.pbtxt
│           └── vllm_models/
│               └── llama-3.1-8b-instruct/
│                   ├── 1/
│                   │   └── model.json
│                   └── config.pbtxt
├── qa/
│   ├── L0_additional_dependency_dirs/
│   │   └── test.sh
│   ├── L0_async_work_queue/
│   │   └── test.sh
│   ├── L0_backend_bls/
│   │   └── test.sh
│   ├── L0_backend_config/
│   │   └── test.sh
│   ├── L0_backend_fastertransformer/
│   │   └── test.sh
│   ├── L0_backend_identity/
│   │   ├── identity_test.py
│   │   └── test.sh
│   ├── L0_backend_onnxruntime/
│   │   ├── gen_add_bf16_onnx_model.py
│   │   ├── test.py
│   │   └── test.sh
│   ├── L0_backend_output_detail/
│   │   └── test.sh
│   ├── L0_backend_python/
│   │   ├── argument_validation/
│   │   │   ├── models/
│   │   │   │   └── argument_validation/
│   │   │   │       ├── 1/
│   │   │   │       │   └── model.py
│   │   │   │       └── config.pbtxt
│   │   │   └── test.sh
│   │   ├── async_execute/
│   │   │   ├── concurrency_test.py
│   │   │   └── test.sh
│   │   ├── bls/
│   │   │   ├── bls_parameters_test.py
│   │   │   └── test.sh
│   │   ├── common.sh
│   │   ├── custom_metrics/
│   │   │   └── test.sh
│   │   ├── decoupled/
│   │   │   ├── decoupled_test.py
│   │   │   ├── models/
│   │   │   │   ├── decoupled_bls/
│   │   │   │   │   ├── 1/
│   │   │   │   │   │   └── model.py
│   │   │   │   │   └── config.pbtxt
│   │   │   │   ├── decoupled_bls_async_cancel/
│   │   │   │   │   ├── 1/
│   │   │   │   │   │   └── model.py
│   │   │   │   │   └── config.pbtxt
│   │   │   │   ├── decoupled_bls_cancel/
│   │   │   │   │   ├── 1/
│   │   │   │   │   │   └── model.py
│   │   │   │   │   └── config.pbtxt
│   │   │   │   ├── decoupled_bls_cancel_after_complete/
│   │   │   │   │   ├── 1/
│   │   │   │   │   │   └── model.py
│   │   │   │   │   └── config.pbtxt
│   │   │   │   ├── decoupled_bls_stream/
│   │   │   │   │   ├── 1/
│   │   │   │   │   │   └── model.py
│   │   │   │   │   └── config.pbtxt
│   │   │   │   ├── decoupled_execute_error/
│   │   │   │   │   ├── 1/
│   │   │   │   │   │   └── model.py
│   │   │   │   │   └── config.pbtxt
│   │   │   │   ├── decoupled_raise_exception/
│   │   │   │   │   ├── 1/
│   │   │   │   │   │   └── model.py
│   │   │   │   │   └── config.pbtxt
│   │   │   │   ├── decoupled_return_response_error/
│   │   │   │   │   ├── 1/
│   │   │   │   │   │   └── model.py
│   │   │   │   │   └── config.pbtxt
│   │   │   │   └── decoupled_send_after_close_error/
│   │   │   │       ├── 1/
│   │   │   │       │   └── model.py
│   │   │   │       └── config.pbtxt
│   │   │   └── test.sh
│   │   ├── ensemble/
│   │   │   ├── ensemble_test.py
│   │   │   └── test.sh
│   │   ├── env/
│   │   │   └── test.sh
│   │   ├── examples/
│   │   │   └── test.sh
│   │   ├── io/
│   │   │   ├── io_test.py
│   │   │   ├── requested_output_model/
│   │   │   │   ├── config.pbtxt
│   │   │   │   └── model.py
│   │   │   └── test.sh
│   │   ├── lifecycle/
│   │   │   ├── lifecycle_test.py
│   │   │   └── test.sh
│   │   ├── logging/
│   │   │   ├── logging_test.py
│   │   │   └── test.sh
│   │   ├── model_control/
│   │   │   ├── model_control_test.py
│   │   │   └── test.sh
│   │   ├── model_readiness/
│   │   │   ├── test.sh
│   │   │   ├── test_model_readiness.py
│   │   │   └── test_models/
│   │   │       ├── is_ready_fn_returns_true_decoupled/
│   │   │       │   ├── config.pbtxt
│   │   │       │   └── model.py
│   │   │       ├── readiness_coroutine_model.py
│   │   │       └── readiness_model.py
│   │   ├── parameters/
│   │   │   ├── response_parameters_test.py
│   │   │   └── test.sh
│   │   ├── python_based_backends/
│   │   │   ├── python_based_backends_test.py
│   │   │   └── test.sh
│   │   ├── python_test.py
│   │   ├── request_rescheduling/
│   │   │   ├── grpc_endpoint_test.py
│   │   │   └── test.sh
│   │   ├── response_sender/
│   │   │   ├── response_sender_complete_final_test.py
│   │   │   ├── response_sender_test.py
│   │   │   └── test.sh
│   │   ├── restart/
│   │   │   ├── models/
│   │   │   │   └── restart/
│   │   │   │       ├── 1/
│   │   │   │       │   └── model.py
│   │   │   │       └── config.pbtxt
│   │   │   ├── restart_test.py
│   │   │   └── test.sh
│   │   ├── setup_python_enviroment.sh
│   │   ├── test.sh
│   │   ├── test_infer_shm_leak.py
│   │   └── variants/
│   │       └── test.sh
│   ├── L0_backend_release/
│   │   └── test.sh
│   ├── L0_backend_tutorial/
│   │   └── test.sh
│   ├── L0_batch_custom/
│   │   ├── batch_custom_test.py
│   │   └── test.sh
│   ├── L0_batch_input/
│   │   ├── batch_input_test.py
│   │   └── test.sh
│   ├── L0_batcher/
│   │   ├── batcher_test.py
│   │   ├── queue_timeout_test.py
│   │   ├── test.sh
│   │   └── verify_timestamps.py
│   ├── L0_buffer_attributes/
│   │   ├── buffer_attributes_test.py
│   │   ├── models/
│   │   │   ├── bls/
│   │   │   │   ├── 1/
│   │   │   │   │   └── model.py
│   │   │   │   └── config.pbtxt
│   │   │   └── identity/
│   │   │       ├── 1/
│   │   │       │   └── model.py
│   │   │       └── config.pbtxt
│   │   └── test.sh
│   ├── L0_client_build_variants/
│   │   └── test.sh
│   ├── L0_client_java/
│   │   └── test.sh
│   ├── L0_client_memory_growth/
│   │   ├── client_memory_mail.py
│   │   ├── models/
│   │   │   └── custom_identity_int32/
│   │   │       └── config.pbtxt
│   │   └── test.sh
│   ├── L0_client_nobatch/
│   │   ├── client_test.py
│   │   └── test.sh
│   ├── L0_client_timeout/
│   │   ├── client_infer_timeout_test.py
│   │   ├── client_non_infer_timeout_test.py
│   │   ├── models/
│   │   │   └── custom_identity_int32/
│   │   │       └── config.pbtxt
│   │   └── test.sh
│   ├── L0_client_valgrind/
│   │   ├── models/
│   │   │   └── custom_identity_int32/
│   │   │       └── config.pbtxt
│   │   └── test.sh
│   ├── L0_cmdline_trace/
│   │   ├── test.sh
│   │   └── trace_client.py
│   ├── L0_compute_capability/
│   │   └── test.sh
│   ├── L0_config_json/
│   │   ├── ensemble_config.pbtxt
│   │   ├── max_priority_level.pbtxt
│   │   └── test.sh
│   ├── L0_cuda_graph/
│   │   ├── test.sh
│   │   └── trt_cuda_graph_test.py
│   ├── L0_cuda_shared_memory/
│   │   ├── cuda_shared_memory_test.py
│   │   └── test.sh
│   ├── L0_custom_model_config/
│   │   └── test.sh
│   ├── L0_custom_ops/
│   │   ├── mod_op_test.py
│   │   ├── onnx_op_test.py
│   │   ├── test.sh
│   │   └── vision_op_test.py
│   ├── L0_data_compression/
│   │   ├── test.sh
│   │   └── validation.py
│   ├── L0_decoupled/
│   │   ├── decoupled_test.py
│   │   ├── models/
│   │   │   ├── fan_repeat/
│   │   │   │   └── config.pbtxt
│   │   │   ├── identity_int32/
│   │   │   │   └── config.pbtxt
│   │   │   ├── nested_square/
│   │   │   │   └── config.pbtxt
│   │   │   ├── repeat_square/
│   │   │   │   └── config.pbtxt
│   │   │   ├── sequence_repeat/
│   │   │   │   └── config.pbtxt
│   │   │   └── simple_repeat/
│   │   │       └── config.pbtxt
│   │   └── test.sh
│   ├── L0_device_memory_tracker/
│   │   ├── test.py
│   │   └── test.sh
│   ├── L0_dlpack_multi_gpu/
│   │   └── test.sh
│   ├── L0_doc_links/
│   │   ├── mkdocs.yml
│   │   └── test.sh
│   ├── L0_dyna_implicit_state/
│   │   └── test.sh
│   ├── L0_dyna_sequence_batcher/
│   │   ├── dyna_sequence_batcher_test.py
│   │   └── test.sh
│   ├── L0_grpc/
│   │   ├── client_plugin_models/
│   │   │   └── client_plugin_test/
│   │   │       ├── 1/
│   │   │       │   └── model.py
│   │   │       └── config.pbtxt
│   │   ├── grpc_basic_auth_test.py
│   │   ├── grpc_client_plugin_test.py
│   │   ├── nginx.conf
│   │   ├── python_grpc_aio_test.py
│   │   ├── python_unit_test.py
│   │   └── test.sh
│   ├── L0_grpc_state_cleanup/
│   │   ├── cleanup_test.py
│   │   └── test.sh
│   ├── L0_http/
│   │   ├── generate_endpoint_test.py
│   │   ├── http_basic_auth_test.py
│   │   ├── http_client_plugin_test.py
│   │   ├── http_input_size_limit_test.py
│   │   ├── http_request_many_chunks.py
│   │   ├── http_restricted_api_test.py
│   │   ├── http_test.py
│   │   ├── nginx.conf
│   │   ├── python_http_aio_test.py
│   │   └── test.sh
│   ├── L0_http_fuzz/
│   │   ├── fuzztest.py
│   │   └── test.sh
│   ├── L0_https/
│   │   ├── nginx.conf
│   │   └── test.sh
│   ├── L0_implicit_state/
│   │   ├── implicit_state.py
│   │   ├── models/
│   │   │   ├── growable_memory/
│   │   │   │   └── config.pbtxt
│   │   │   ├── no_implicit_state/
│   │   │   │   └── config.pbtxt
│   │   │   ├── no_state_update/
│   │   │   │   └── config.pbtxt
│   │   │   ├── single_state_buffer/
│   │   │   │   └── config.pbtxt
│   │   │   └── wrong_internal_state/
│   │   │       └── config.pbtxt
│   │   └── test.sh
│   ├── L0_infer/
│   │   ├── infer_test.py
│   │   ├── install_and_test.sh
│   │   └── test.sh
│   ├── L0_infer_reshape/
│   │   ├── infer_reshape_test.py
│   │   └── test.sh
│   ├── L0_infer_variable/
│   │   ├── infer_variable_test.py
│   │   └── test.sh
│   ├── L0_infer_zero/
│   │   ├── infer_zero_test.py
│   │   └── test.sh
│   ├── L0_input_validation/
│   │   ├── input_validation_test.py
│   │   ├── models/
│   │   │   ├── input_all_optional/
│   │   │   │   ├── 1/
│   │   │   │   │   └── model.py
│   │   │   │   └── config.pbtxt
│   │   │   ├── input_all_required/
│   │   │   │   ├── 1/
│   │   │   │   │   └── model.py
│   │   │   │   └── config.pbtxt
│   │   │   └── input_optional/
│   │   │       ├── 1/
│   │   │       │   └── model.py
│   │   │       └── config.pbtxt
│   │   └── test.sh
│   ├── L0_io/
│   │   ├── gen_libtorch_model.py
│   │   └── test.sh
│   ├── L0_iterative_sequence/
│   │   ├── iterative_sequence_e2e.py
│   │   ├── models/
│   │   │   └── iterative_sequence/
│   │   │       └── config.pbtxt
│   │   └── test.sh
│   ├── L0_java_memory_growth/
│   │   ├── MemoryGrowthTest.java
│   │   └── test.sh
│   ├── L0_java_resnet/
│   │   ├── ResnetTest.java
│   │   ├── expected_output_data/
│   │   │   ├── expected_output_onnx.txt
│   │   │   ├── expected_output_pytorch.txt
│   │   │   └── expected_output_tensorflow.txt
│   │   └── test.sh
│   ├── L0_java_sequence_batcher/
│   │   ├── SequenceTest.java
│   │   └── test.sh
│   ├── L0_java_simple_example/
│   │   └── test.sh
│   ├── L0_json/
│   │   └── test.sh
│   ├── L0_large_payload/
│   │   ├── large_payload_test.py
│   │   └── test.sh
│   ├── L0_libtorch_disable_cudnn/
│   │   └── test.sh
│   ├── L0_libtorch_inference_mode/
│   │   └── test.sh
│   ├── L0_libtorch_instance_group_kind_model/
│   │   ├── client.py
│   │   ├── gen_models.py
│   │   ├── models/
│   │   │   └── libtorch_multi_device/
│   │   │       └── config.pbtxt
│   │   └── test.sh
│   ├── L0_libtorch_io_names/
│   │   ├── io_names_client.py
│   │   └── test.sh
│   ├── L0_libtorch_io_types/
│   │   └── test.sh
│   ├── L0_libtorch_optimized_execution/
│   │   └── test.sh
│   ├── L0_libtorch_shared_weights/
│   │   ├── libtorch_shared_weights_test.py
│   │   └── test.sh
│   ├── L0_lifecycle/
│   │   ├── ensemble_zero_1_float32/
│   │   │   └── config.pbtxt
│   │   ├── identity_zero_1_int32/
│   │   │   └── config.pbtxt
│   │   ├── lifecycle_test.py
│   │   ├── retry_model/
│   │   │   └── 1/
│   │   │       └── model.py
│   │   └── test.sh
│   ├── L0_logging/
│   │   ├── log_format_test.py
│   │   ├── logging_endpoint_test.py
│   │   └── test.sh
│   ├── L0_long_running_stress/
│   │   ├── crashing_client.py
│   │   ├── scenarios.py
│   │   ├── stress.py
│   │   ├── stress_mail.py
│   │   └── test.sh
│   ├── L0_memory/
│   │   ├── client.py
│   │   └── test.sh
│   ├── L0_memory_growth/
│   │   ├── busy_op_test.py
│   │   ├── server_memory_mail.py
│   │   └── test.sh
│   ├── L0_metrics/
│   │   ├── cpu_metrics_test.py
│   │   ├── ensemble_decoupled/
│   │   │   ├── async_execute_decouple/
│   │   │   │   ├── 1/
│   │   │   │   │   └── model.py
│   │   │   │   └── config.pbtxt
│   │   │   └── ensemble/
│   │   │       └── config.pbtxt
│   │   ├── ensemble_delay/
│   │   │   └── config.pbtxt
│   │   ├── histogram_metrics_test.py
│   │   ├── identity_delay/
│   │   │   └── config.pbtxt
│   │   ├── metrics_config_test.py
│   │   ├── metrics_queue_size_test.py
│   │   ├── model_namespacing_repos/
│   │   │   ├── addsub_repo/
│   │   │   │   ├── addsub_ensemble/
│   │   │   │   │   └── config.pbtxt
│   │   │   │   └── composing_model/
│   │   │   │       └── 1/
│   │   │   │           └── model.py
│   │   │   └── subadd_repo/
│   │   │       ├── composing_model/
│   │   │       │   └── 1/
│   │   │       │       └── model.py
│   │   │       └── subadd_ensemble/
│   │   │           └── config.pbtxt
│   │   ├── pinned_memory_metrics_test.py
│   │   ├── test.sh
│   │   └── unit_test_models/
│   │       ├── identity_cache_off/
│   │       │   └── config.pbtxt
│   │       └── identity_cache_on/
│   │           └── config.pbtxt
│   ├── L0_mlflow/
│   │   ├── plugin_test.py
│   │   └── test.sh
│   ├── L0_model_config/
│   │   ├── autofill_noplatform/
│   │   │   ├── common/
│   │   │   │   └── no_version/
│   │   │   │       ├── config.pbtxt
│   │   │   │       └── expected
│   │   │   ├── custom/
│   │   │   │   ├── no_delimiter/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   └── unknown_backend.unknown/
│   │   │   │       ├── config.pbtxt
│   │   │   │       └── expected
│   │   │   ├── ensemble/
│   │   │   │   ├── circular_dependency/
│   │   │   │   │   ├── circular_dependency/
│   │   │   │   │   │   └── config.pbtxt
│   │   │   │   │   ├── circular_dependency_2/
│   │   │   │   │   │   └── config.pbtxt
│   │   │   │   │   ├── expected
│   │   │   │   │   └── expected_2
│   │   │   │   ├── ensemble_scheduling_no_set/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   ├── has_backend/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   ├── inconsistent_data_type/
│   │   │   │   │   ├── expected
│   │   │   │   │   ├── fp32_dim1_batch2/
│   │   │   │   │   │   └── config.pbtxt
│   │   │   │   │   ├── inconsistent_data_type/
│   │   │   │   │   │   └── config.pbtxt
│   │   │   │   │   └── int32_dim1_batch4/
│   │   │   │   │       └── config.pbtxt
│   │   │   │   ├── inconsistent_shape/
│   │   │   │   │   ├── expected
│   │   │   │   │   ├── expected_2
│   │   │   │   │   ├── fp32_dim1_batch4/
│   │   │   │   │   │   └── config.pbtxt
│   │   │   │   │   ├── fp32_dim3_batch4/
│   │   │   │   │   │   └── config.pbtxt
│   │   │   │   │   └── inconsistent_shape/
│   │   │   │   │       └── config.pbtxt
│   │   │   │   ├── instance_group_set/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   ├── invalid_batch_size/
│   │   │   │   │   ├── expected
│   │   │   │   │   ├── fp32_dim1_batch2/
│   │   │   │   │   │   └── config.pbtxt
│   │   │   │   │   ├── fp32_dim1_batch4/
│   │   │   │   │   │   └── config.pbtxt
│   │   │   │   │   └── invalid_batch_size/
│   │   │   │   │       └── config.pbtxt
│   │   │   │   ├── invalid_decoupled_branching/
│   │   │   │   │   ├── expected
│   │   │   │   │   ├── int32_dim1_nobatch_output2/
│   │   │   │   │   │   └── config.pbtxt
│   │   │   │   │   ├── invalid_decoupled_branching/
│   │   │   │   │   │   └── config.pbtxt
│   │   │   │   │   └── repeat_int32/
│   │   │   │   │       └── config.pbtxt
│   │   │   │   ├── invalid_decoupled_branching_2/
│   │   │   │   │   ├── expected
│   │   │   │   │   ├── invalid_decoupled_branching_2/
│   │   │   │   │   │   └── config.pbtxt
│   │   │   │   │   └── repeat_int32/
│   │   │   │   │       └── config.pbtxt
│   │   │   │   ├── invalid_input_map/
│   │   │   │   │   ├── expected
│   │   │   │   │   ├── fp32_dim1_batch4/
│   │   │   │   │   │   └── config.pbtxt
│   │   │   │   │   ├── fp32_dim1_batch4_input4/
│   │   │   │   │   │   └── config.pbtxt
│   │   │   │   │   ├── fp32_dim1_batch4_output3/
│   │   │   │   │   │   └── config.pbtxt
│   │   │   │   │   └── invalid_input_map/
│   │   │   │   │       └── config.pbtxt
│   │   │   │   ├── invalid_output_map/
│   │   │   │   │   ├── expected
│   │   │   │   │   ├── fp32_dim1_batch4/
│   │   │   │   │   │   └── config.pbtxt
│   │   │   │   │   ├── fp32_dim1_batch4_input4/
│   │   │   │   │   │   └── config.pbtxt
│   │   │   │   │   ├── fp32_dim1_batch4_output3/
│   │   │   │   │   │   └── config.pbtxt
│   │   │   │   │   └── invalid_output_map/
│   │   │   │   │       └── config.pbtxt
│   │   │   │   ├── model_warm_up_set/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   ├── no_input_map/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   ├── no_model_name/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   ├── no_output_map/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   ├── no_required_version/
│   │   │   │   │   ├── expected
│   │   │   │   │   ├── no_required_version/
│   │   │   │   │   │   └── config.pbtxt
│   │   │   │   │   └── simple/
│   │   │   │   │       └── config.pbtxt
│   │   │   │   ├── no_required_version_2/
│   │   │   │   │   ├── expected
│   │   │   │   │   ├── no_required_version_2/
│   │   │   │   │   │   └── config.pbtxt
│   │   │   │   │   └── simple/
│   │   │   │   │       └── config.pbtxt
│   │   │   │   ├── no_required_version_3/
│   │   │   │   │   ├── expected
│   │   │   │   │   ├── no_required_version_3/
│   │   │   │   │   │   └── config.pbtxt
│   │   │   │   │   └── simple/
│   │   │   │   │       └── config.pbtxt
│   │   │   │   ├── no_step/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   ├── no_step_2/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   ├── non_existing_model/
│   │   │   │   │   ├── expected
│   │   │   │   │   ├── fp32_dim1_batch4/
│   │   │   │   │   │   └── config.pbtxt
│   │   │   │   │   ├── fp32_dim1_batch4_output3/
│   │   │   │   │   │   └── config.pbtxt
│   │   │   │   │   └── non_existing_model/
│   │   │   │   │       └── config.pbtxt
│   │   │   │   ├── optimization_set/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   ├── output_to_tensor_overmapped/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   ├── redundant_tensor_as_input/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   ├── expected
│   │   │   │   │   └── expected_2
│   │   │   │   ├── redundant_tensor_as_output/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   ├── self_circular_dependency/
│   │   │   │   │   ├── expected
│   │   │   │   │   ├── fp32_dim1_batch4/
│   │   │   │   │   │   └── config.pbtxt
│   │   │   │   │   ├── fp32_dim1_batch4_input4/
│   │   │   │   │   │   └── config.pbtxt
│   │   │   │   │   ├── fp32_dim1_batch4_output3/
│   │   │   │   │   │   └── config.pbtxt
│   │   │   │   │   └── self_circular_dependency/
│   │   │   │   │       └── config.pbtxt
│   │   │   │   ├── tensor_to_input_overmapped/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   ├── unmapped_input/
│   │   │   │   │   ├── expected
│   │   │   │   │   ├── fp32_dim1_batch4/
│   │   │   │   │   │   └── config.pbtxt
│   │   │   │   │   ├── fp32_dim1_batch4_input4/
│   │   │   │   │   │   └── config.pbtxt
│   │   │   │   │   ├── fp32_dim1_batch4_output3/
│   │   │   │   │   │   └── config.pbtxt
│   │   │   │   │   └── unmapped_input/
│   │   │   │   │       └── config.pbtxt
│   │   │   │   ├── unreachable_input/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   ├── unreachable_output/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   ├── unreachable_output_2/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   └── unreachable_output_3/
│   │   │   │       ├── config.pbtxt
│   │   │   │       └── expected
│   │   │   ├── onnx/
│   │   │   │   ├── bad_input_dims/
│   │   │   │   │   ├── 1/
│   │   │   │   │   │   └── model.onnx
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   ├── bad_max_batch_size/
│   │   │   │   │   ├── 1/
│   │   │   │   │   │   └── model.onnx
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   ├── bad_output_dims/
│   │   │   │   │   ├── 1/
│   │   │   │   │   │   └── model.onnx
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   ├── too_few_inputs/
│   │   │   │   │   ├── 1/
│   │   │   │   │   │   └── model.onnx
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   ├── too_many_inputs/
│   │   │   │   │   ├── 1/
│   │   │   │   │   │   └── model.onnx
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   ├── unknown_input/
│   │   │   │   │   ├── 1/
│   │   │   │   │   │   └── model.onnx
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   └── unknown_output/
│   │   │   │       ├── 1/
│   │   │   │       │   └── model.onnx
│   │   │   │       ├── config.pbtxt
│   │   │   │       └── expected
│   │   │   ├── openvino/
│   │   │   │   ├── bad_input_dims/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   ├── bad_output_dims/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   ├── too_few_inputs/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   ├── too_many_inputs/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   ├── unknown_input/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   └── unknown_output/
│   │   │   │       ├── config.pbtxt
│   │   │   │       └── expected
│   │   │   ├── python/
│   │   │   │   ├── conflicting_max_batch_size/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   ├── expected
│   │   │   │   │   └── model.py
│   │   │   │   ├── conflicting_scheduler_sequence/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   ├── expected
│   │   │   │   │   └── model.py
│   │   │   │   ├── input_mismatch_datatype/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   ├── input_mismatch_dims/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   ├── input_missing_datatype/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   ├── expected
│   │   │   │   │   └── model.py
│   │   │   │   ├── input_missing_dims/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   ├── expected
│   │   │   │   │   └── model.py
│   │   │   │   ├── input_missing_name/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   ├── expected
│   │   │   │   │   └── model.py
│   │   │   │   ├── input_wrong_property/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   ├── expected
│   │   │   │   │   └── model.py
│   │   │   │   ├── model_transaction_policy_invalid_args/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   ├── expected
│   │   │   │   │   └── model.py
│   │   │   │   ├── model_transaction_policy_mismatch/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   ├── expected
│   │   │   │   │   └── model.py
│   │   │   │   ├── no_return/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   ├── expected
│   │   │   │   │   └── model.py
│   │   │   │   ├── output_mismatch_datatype/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   ├── output_mismatch_dims/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   ├── output_missing_datatype/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   ├── expected
│   │   │   │   │   └── model.py
│   │   │   │   ├── output_missing_dims/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   ├── expected
│   │   │   │   │   └── model.py
│   │   │   │   ├── output_missing_name/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   ├── expected
│   │   │   │   │   └── model.py
│   │   │   │   └── output_wrong_property/
│   │   │   │       ├── config.pbtxt
│   │   │   │       ├── expected
│   │   │   │       └── model.py
│   │   │   ├── pytorch/
│   │   │   │   ├── too_few_inputs/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   └── too_few_outputs/
│   │   │   │       ├── config.pbtxt
│   │   │   │       └── expected
│   │   │   └── tensorrt/
│   │   │       ├── bad_dynamic_shapes_max/
│   │   │       │   ├── config.pbtxt
│   │   │       │   └── expected
│   │   │       ├── bad_dynamic_shapes_min/
│   │   │       │   ├── config.pbtxt
│   │   │       │   └── expected
│   │   │       ├── bad_input_dims/
│   │   │       │   ├── config.pbtxt
│   │   │       │   └── expected
│   │   │       ├── bad_input_non_linear_format_io/
│   │   │       │   ├── config.pbtxt
│   │   │       │   └── expected
│   │   │       ├── bad_input_shape/
│   │   │       │   ├── config.pbtxt
│   │   │       │   └── expected
│   │   │       ├── bad_input_shape_tensor/
│   │   │       │   ├── config.pbtxt
│   │   │       │   └── expected
│   │   │       ├── bad_input_type/
│   │   │       │   ├── config.pbtxt
│   │   │       │   └── expected
│   │   │       ├── bad_output_dims/
│   │   │       │   ├── config.pbtxt
│   │   │       │   └── expected
│   │   │       ├── bad_output_shape/
│   │   │       │   ├── config.pbtxt
│   │   │       │   └── expected
│   │   │       ├── bad_output_shape_tensor/
│   │   │       │   ├── config.pbtxt
│   │   │       │   └── expected
│   │   │       ├── bad_output_type/
│   │   │       │   ├── config.pbtxt
│   │   │       │   └── expected
│   │   │       ├── bad_outut_non_linear_format_io/
│   │   │       │   ├── config.pbtxt
│   │   │       │   └── expected
│   │   │       ├── mixed_batch_hint_dims/
│   │   │       │   ├── config.pbtxt
│   │   │       │   └── expected
│   │   │       ├── mixed_batch_hint_shape_values/
│   │   │       │   ├── config.pbtxt
│   │   │       │   └── expected
│   │   │       ├── too_few_inputs/
│   │   │       │   ├── config.pbtxt
│   │   │       │   └── expected
│   │   │       ├── too_many_inputs/
│   │   │       │   ├── config.pbtxt
│   │   │       │   └── expected
│   │   │       ├── unknown_input/
│   │   │       │   ├── config.pbtxt
│   │   │       │   └── expected
│   │   │       └── unknown_output/
│   │   │           ├── config.pbtxt
│   │   │           └── expected
│   │   ├── autofill_noplatform_success/
│   │   │   ├── custom/
│   │   │   │   ├── empty_config.identity/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   └── no_backend.identity/
│   │   │   │       ├── config.pbtxt
│   │   │   │       └── expected
│   │   │   ├── ensemble/
│   │   │   │   ├── embedded_ensemble/
│   │   │   │   │   ├── embedded_ensemble/
│   │   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   │   └── expected
│   │   │   │   │   ├── fp32_dim1_batch4/
│   │   │   │   │   │   └── config.pbtxt
│   │   │   │   │   └── inner_ensemble/
│   │   │   │   │       └── config.pbtxt
│   │   │   │   ├── inconsistent_shape/
│   │   │   │   │   ├── fp32_dim1_batch4/
│   │   │   │   │   │   └── config.pbtxt
│   │   │   │   │   ├── fp32_dim2_nobatch/
│   │   │   │   │   │   └── config.pbtxt
│   │   │   │   │   └── inconsistent_shape/
│   │   │   │   │       ├── config.pbtxt
│   │   │   │   │       └── expected
│   │   │   │   ├── inconsistent_shape_2/
│   │   │   │   │   ├── fp32_dim1_batch4/
│   │   │   │   │   │   └── config.pbtxt
│   │   │   │   │   ├── fp32_dim2_nobatch/
│   │   │   │   │   │   └── config.pbtxt
│   │   │   │   │   └── inconsistent_shape_2/
│   │   │   │   │       ├── config.pbtxt
│   │   │   │   │       └── expected
│   │   │   │   └── unmapped_output/
│   │   │   │       ├── fp32_dim1_batch4_output3/
│   │   │   │       │   └── config.pbtxt
│   │   │   │       └── unmapped_output/
│   │   │   │           ├── config.pbtxt
│   │   │   │           └── expected
│   │   │   ├── onnx/
│   │   │   │   ├── cpu_instance/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   ├── empty_config/
│   │   │   │   │   ├── 1/
│   │   │   │   │   │   └── model.onnx
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   ├── expected
│   │   │   │   │   ├── expected.1
│   │   │   │   │   ├── expected.2
│   │   │   │   │   └── expected.3
│   │   │   │   ├── no_config/
│   │   │   │   │   ├── 1/
│   │   │   │   │   │   └── model.onnx
│   │   │   │   │   ├── expected
│   │   │   │   │   ├── expected.1
│   │   │   │   │   ├── expected.2
│   │   │   │   │   └── expected.3
│   │   │   │   └── no_config_no_batch/
│   │   │   │       ├── 1/
│   │   │   │       │   └── model.onnx
│   │   │   │       ├── config.pbtxt
│   │   │   │       ├── expected
│   │   │   │       ├── expected.1
│   │   │   │       ├── expected.2
│   │   │   │       └── expected.3
│   │   │   ├── openvino/
│   │   │   │   ├── dynamic_batch/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   ├── expected
│   │   │   │   │   ├── expected.1
│   │   │   │   │   ├── expected.2
│   │   │   │   │   └── expected.3
│   │   │   │   ├── empty_config/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   ├── expected
│   │   │   │   │   ├── expected.1
│   │   │   │   │   ├── expected.2
│   │   │   │   │   └── expected.3
│   │   │   │   ├── no_config/
│   │   │   │   │   ├── expected
│   │   │   │   │   ├── expected.1
│   │   │   │   │   ├── expected.2
│   │   │   │   │   └── expected.3
│   │   │   │   └── partial_config/
│   │   │   │       ├── config.pbtxt
│   │   │   │       ├── expected
│   │   │   │       └── expected.1
│   │   │   ├── python/
│   │   │   │   ├── conflicting_scheduler_ensemble/
│   │   │   │   │   ├── conflicting_scheduler_ensemble/
│   │   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   │   ├── expected
│   │   │   │   │   │   └── model.py
│   │   │   │   │   ├── ensemble_first_step/
│   │   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   │   └── model.py
│   │   │   │   │   └── ensemble_second_step/
│   │   │   │   │       ├── config.pbtxt
│   │   │   │   │       └── model.py
│   │   │   │   ├── dynamic_batching/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   ├── expected
│   │   │   │   │   ├── expected.1
│   │   │   │   │   ├── expected.2
│   │   │   │   │   ├── expected.3
│   │   │   │   │   └── model.py
│   │   │   │   ├── dynamic_batching_no_op/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   ├── expected
│   │   │   │   │   ├── expected.1
│   │   │   │   │   ├── expected.2
│   │   │   │   │   ├── expected.3
│   │   │   │   │   └── model.py
│   │   │   │   ├── empty_config/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   ├── expected
│   │   │   │   │   ├── expected.1
│   │   │   │   │   ├── expected.2
│   │   │   │   │   └── expected.3
│   │   │   │   ├── incomplete_input/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   ├── expected
│   │   │   │   │   └── model.py
│   │   │   │   ├── incomplete_output/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   ├── model_transaction_policy/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   ├── expected
│   │   │   │   │   ├── expected.1
│   │   │   │   │   ├── expected.2
│   │   │   │   │   ├── expected.3
│   │   │   │   │   └── model.py
│   │   │   │   ├── model_transaction_policy_decoupled_false/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   ├── expected
│   │   │   │   │   ├── expected.1
│   │   │   │   │   ├── expected.2
│   │   │   │   │   ├── expected.3
│   │   │   │   │   └── model.py
│   │   │   │   ├── model_transaction_policy_no_op/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   ├── expected
│   │   │   │   │   ├── expected.1
│   │   │   │   │   ├── expected.2
│   │   │   │   │   ├── expected.3
│   │   │   │   │   └── model.py
│   │   │   │   ├── optional_input/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   ├── expected
│   │   │   │   │   └── model.py
│   │   │   │   ├── unknown_input/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   └── unknown_output/
│   │   │   │       ├── config.pbtxt
│   │   │   │       └── expected
│   │   │   ├── pytorch/
│   │   │   │   ├── cpu_instance/
│   │   │   │   │   ├── config.pbtxt
│   │   │   │   │   └── expected
│   │   │   │   └── no_name_platform/
│   │   │   │       ├── config.pbtxt
│   │   │   │       └── expected
│   │   │   └── tensorrt/
│   │   │       ├── empty_config/
│   │   │       │   ├── config.pbtxt
│   │   │       │   └── expected
│   │   │       ├── empty_config_variable/
│   │   │       │   ├── config.pbtxt
│   │   │       │   └── expected
│   │   │       ├── hint_for_no_batch/
│   │   │       │   ├── config.pbtxt
│   │   │       │   └── expected
│   │   │       ├── incomplete_input/
│   │   │       │   ├── config.pbtxt
│   │   │       │   ├── expected
│   │   │       │   ├── expected.1
│   │   │       │   ├── expected.2
│   │   │       │   └── expected.3
│   │   │       ├── incomplete_output/
│   │   │       │   ├── config.pbtxt
│   │   │       │   ├── expected
│   │   │       │   ├── expected.1
│   │   │       │   ├── expected.2
│   │   │       │   └── expected.3
│   │   │       ├── multi_prof_max_bs/
│   │   │       │   ├── config.pbtxt
│   │   │       │   └── expected
│   │   │       ├── no_config/
│   │   │       │   └── expected
│   │   │       ├── no_config_non_linear_format_io/
│   │   │       │   └── expected
│   │   │       ├── no_config_shape_tensor/
│   │   │       │   └── expected
│   │   │       ├── no_config_variable/
│   │   │       │   └── expected
│   │   │       ├── no_name_platform/
│   │   │       │   ├── config.pbtxt
│   │   │       │   └── expected
│   │   │       ├── no_name_platform_variable/
│   │   │       │   ├── config.pbtxt
│   │   │       │   └── expected
│   │   │       └── reshape_config_provided/
│   │   │           ├── config.pbtxt
│   │   │           └── expected
│   │   ├── cli_messages/
│   │   │   ├── cli_deprecation/
│   │   │   │   └── expected
│   │   │   └── cli_override/
│   │   │       └── expected
│   │   ├── compare_status.py
│   │   ├── custom_parameters/
│   │   │   └── tensorrt/
│   │   │       ├── invalid/
│   │   │       │   └── allocation_strategy_invalid_value/
│   │   │       │       ├── expected
│   │   │       │       └── partial.pbtxt
│   │   │       └── valid/
│   │   │           ├── allocation_strategy_no_key/
│   │   │           │   └── partial.pbtxt
│   │   │           ├── allocation_strategy_no_parameters/
│   │   │           │   └── partial.pbtxt
│   │   │           ├── allocation_strategy_value_1/
│   │   │           │   ├── expected
│   │   │           │   └── partial.pbtxt
│   │   │           └── allocation_strategy_value_2/
│   │   │               ├── expected
│   │   │               └── partial.pbtxt
│   │   ├── model_metrics/
│   │   │   ├── invalid_config/
│   │   │   │   ├── empty_buckets/
│   │   │   │   │   ├── expected
│   │   │   │   │   └── partial.pbtxt
│   │   │   │   ├── empty_metric_family/
│   │   │   │   │   ├── expected
│   │   │   │   │   └── partial.pbtxt
│   │   │   │   ├── no_buckets/
│   │   │   │   │   ├── expected
│   │   │   │   │   └── partial.pbtxt
│   │   │   │   ├── no_histogram_options/
│   │   │   │   │   ├── expected
│   │   │   │   │   └── partial.pbtxt
│   │   │   │   ├── no_metric_family/
│   │   │   │   │   ├── expected
│   │   │   │   │   └── partial.pbtxt
│   │   │   │   └── no_metric_identifier/
│   │   │   │       ├── expected
│   │   │   │       └── partial.pbtxt
│   │   │   ├── valid_config/
│   │   │   │   └── valid_model_metrics/
│   │   │   │       └── partial.pbtxt
│   │   │   └── valid_config_with_warn/
│   │   │       └── unknown_metric_family/
│   │   │           ├── expected
│   │   │           └── partial.pbtxt
│   │   ├── noautofill_platform/
│   │   │   ├── batch_input_less_source0/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_unsupported
│   │   │   ├── batch_input_less_source1/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_unsupported
│   │   │   ├── batch_input_less_source2/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_unsupported
│   │   │   ├── batch_input_less_source3/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_unsupported
│   │   │   ├── batch_input_many_source0/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_unsupported
│   │   │   ├── batch_input_many_source1/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_unsupported
│   │   │   ├── batch_input_many_source2/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_unsupported
│   │   │   ├── batch_input_many_source3/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_unsupported
│   │   │   ├── batch_input_unknown_source/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_unsupported
│   │   │   ├── batch_output_duplicated_target/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_unsupported
│   │   │   ├── batch_output_less_source/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_unsupported
│   │   │   ├── batch_output_many_source/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_unsupported
│   │   │   ├── batch_output_unknown_source/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_unsupported
│   │   │   ├── batch_output_unknown_target/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_unsupported
│   │   │   ├── control_kind_end_multiple/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── control_kind_ready_multiple/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── control_kind_start_multiple/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── control_tensor_multiple/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── control_tensor_no_value/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── default_priority_level0/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── default_priority_level1/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── ensemble_scheduling_set/
│   │   │   │   ├── config.pbtxt
│   │   │   │   └── expected
│   │   │   ├── invalid_cpu/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── invalid_gpu/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── missing_datatype/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── negative_gpu/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── negative_max_batch_size/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── preserve_ordering0/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── preserve_ordering1/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── preserve_ordering2/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── priority_level0/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── priority_level1/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── reshape_elementcount0/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── reshape_elementcount1/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── reshape_elementcount2/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── reshape_elementcount3/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── reshape_nobatch_empty0/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── reshape_nobatch_empty1/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── reshape_nobatch_variable0/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── reshape_nobatch_variable1/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── reshape_nobatch_variable2/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── reshape_nobatch_variable3/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── reshape_nobatch_variable4/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── reshape_nobatch_variable5/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── reshape_nobatch_zerodims0/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── reshape_nobatch_zerodims1/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── reshape_variable0/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── reshape_variable1/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── reshape_variable2/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── reshape_variable3/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── reshape_variable4/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── reshape_variable5/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── reshape_zerodims0/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── reshape_zerodims1/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── zerodims_input0/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── zerodims_input1/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   ├── zerodims_output0/
│   │   │   │   ├── config.pbtxt
│   │   │   │   ├── expected
│   │   │   │   └── expected_ensemble
│   │   │   └── zerodims_output1/
│   │   │       ├── config.pbtxt
│   │   │       ├── expected
│   │   │       └── expected_ensemble
│   │   ├── special_cases/
│   │   │   ├── invalid_platform/
│   │   │   │   ├── config.pbtxt
│   │   │   │   └── expected
│   │   │   ├── invalid_runtime/
│   │   │   │   ├── config.pbtxt
│   │   │   │   └── expected
│   │   │   └── runtime_escape/
│   │   │       ├── config.pbtxt
│   │   │       └── expected
│   │   └── test.sh
│   ├── L0_model_namespacing/
│   │   ├── python_addsub/
│   │   │   └── __init__.py
│   │   ├── python_subadd/
│   │   │   └── __init__.py
│   │   ├── test.py
│   │   ├── test.sh
│   │   ├── test_duplication/
│   │   │   ├── addsub_repo/
│   │   │   │   ├── composing_model/
│   │   │   │   │   └── 1/
│   │   │   │   │       └── model.py
│   │   │   │   └── simple_addsub/
│   │   │   │       └── config.pbtxt
│   │   │   └── subadd_repo/
│   │   │       ├── composing_model/
│   │   │       │   └── 1/
│   │   │       │       └── model.py
│   │   │       └── simple_subadd/
│   │   │           └── config.pbtxt
│   │   ├── test_dynamic_resolution/
│   │   │   ├── addsub_repo/
│   │   │   │   ├── composing_model/
│   │   │   │   │   └── 1/
│   │   │   │   │       └── model.py
│   │   │   │   └── simple_addsub/
│   │   │   │       └── config.pbtxt
│   │   │   └── subadd_repo/
│   │   │       ├── composing_model/
│   │   │       │   └── 1/
│   │   │       │       └── model.py
│   │   │       └── simple_subadd/
│   │   │           └── config.pbtxt
│   │   ├── test_ensemble_duplication/
│   │   │   ├── addsub_repo/
│   │   │   │   ├── composing_addsub/
│   │   │   │   │   └── 1/
│   │   │   │   │       └── model.py
│   │   │   │   └── simple_ensemble/
│   │   │   │       └── config.pbtxt
│   │   │   └── subadd_repo/
│   │   │       ├── composing_subadd/
│   │   │       │   └── 1/
│   │   │       │       └── model.py
│   │   │       └── simple_ensemble/
│   │   │           └── config.pbtxt
│   │   └── test_no_duplication/
│   │       ├── addsub_repo/
│   │       │   ├── composing_addsub/
│   │       │   │   └── 1/
│   │       │   │       └── model.py
│   │       │   └── simple_addsub/
│   │       │       └── config.pbtxt
│   │       └── subadd_repo/
│   │           ├── composing_subadd/
│   │           │   └── 1/
│   │           │       └── model.py
│   │           └── simple_subadd/
│   │               └── config.pbtxt
│   ├── L0_model_queue/
│   │   ├── ensemble_zero_1_float32/
│   │   │   └── config.pbtxt
│   │   ├── model_queue_test.py
│   │   └── test.sh
│   ├── L0_model_update/
│   │   ├── instance_update_test.py
│   │   └── test.sh
│   ├── L0_multi_server/
│   │   └── test.sh
│   ├── L0_nan_inf/
│   │   ├── models/
│   │   │   └── nan_inf_output/
│   │   │       ├── 1/
│   │   │       │   └── model.py
│   │   │       └── config.pbtxt
│   │   ├── nan_inf_test.py
│   │   └── test.sh
│   ├── L0_nullchar_string/
│   │   ├── nullchar_string_client.py
│   │   └── test.sh
│   ├── L0_onnx_optimization/
│   │   └── test.sh
│   ├── L0_openai/
│   │   ├── generate_engine.py
│   │   └── test.sh
│   ├── L0_optional_input/
│   │   ├── models/
│   │   │   ├── ensemble_identity_2_float32/
│   │   │   │   └── config.pbtxt
│   │   │   ├── identity_2_float32/
│   │   │   │   └── config.pbtxt
│   │   │   ├── optional_connecting_tensor/
│   │   │   │   └── config.pbtxt
│   │   │   ├── optional_identity/
│   │   │   │   ├── 1/
│   │   │   │   │   └── model.py
│   │   │   │   └── config.pbtxt
│   │   │   └── pipeline_identity_2_float32/
│   │   │       └── config.pbtxt
│   │   ├── optional_input_test.py
│   │   └── test.sh
│   ├── L0_orca/
│   │   ├── orca_http_test.py
│   │   └── test.sh
│   ├── L0_output_name/
│   │   ├── output_name_test.py
│   │   └── test.sh
│   ├── L0_output_validation/
│   │   ├── lt_op_val_client.py
│   │   └── test.sh
│   ├── L0_parallel_copy/
│   │   ├── parallel_copy_test.py
│   │   └── test.sh
│   ├── L0_parameters/
│   │   ├── class_count_test.py
│   │   ├── model_repository/
│   │   │   ├── ensemble/
│   │   │   │   └── config.pbtxt
│   │   │   ├── identity/
│   │   │   │   └── config.pbtxt
│   │   │   └── parameter/
│   │   │       └── 1/
│   │   │           └── model.py
│   │   ├── parameters_test.py
│   │   └── test.sh
│   ├── L0_passive_instance/
│   │   ├── models/
│   │   │   └── distributed_int32_int32_int32/
│   │   │       └── config.pbtxt
│   │   ├── passive_instance_test.py
│   │   └── test.sh
│   ├── L0_perf_deeprecommender/
│   │   ├── run_test.sh
│   │   └── test.sh
│   ├── L0_perf_kaldi/
│   │   ├── create_data.sh
│   │   └── test.sh
│   ├── L0_perf_nomodel/
│   │   ├── custom_models/
│   │   │   └── custom_zero_1_float32/
│   │   │       └── config.pbtxt
│   │   ├── run_test.sh
│   │   └── test.sh
│   ├── L0_perf_pyclients/
│   │   ├── custom_models/
│   │   │   └── custom_zero_1_int32/
│   │   │       └── config.pbtxt
│   │   ├── simple_perf_client.py
│   │   └── test.sh
│   ├── L0_perf_resnet/
│   │   ├── run_test.sh
│   │   └── test.sh
│   ├── L0_perf_tensorrt_llm/
│   │   └── test.sh
│   ├── L0_perf_vllm/
│   │   └── test.sh
│   ├── L0_pinned_memory/
│   │   ├── libtorch_ensemble.pbtxt
│   │   └── test.sh
│   ├── L0_priority/
│   │   └── test.sh
│   ├── L0_python_api/
│   │   ├── test.sh
│   │   ├── test_kserve.py
│   │   ├── test_model_repository/
│   │   │   ├── delayed_identity/
│   │   │   │   ├── 1/
│   │   │   │   │   └── model.py
│   │   │   │   └── config.pbtxt
│   │   │   └── identity/
│   │   │       ├── 1/
│   │   │       │   └── model.py
│   │   │       └── config.pbtxt
│   │   └── testing_utils.py
│   ├── L0_python_client_unit_tests/
│   │   └── test.sh
│   ├── L0_pytorch_python_runtime/
│   │   ├── infer.py
│   │   ├── test.sh
│   │   └── unit_test.py
│   ├── L0_query/
│   │   ├── models/
│   │   │   └── query/
│   │   │       └── config.pbtxt
│   │   ├── query_e2e.py
│   │   └── test.sh
│   ├── L0_rate_limiter/
│   │   ├── rate_limiter_test.py
│   │   └── test.sh
│   ├── L0_register/
│   │   ├── config.pbtxt
│   │   └── test.sh
│   ├── L0_repoagent_checksum/
│   │   ├── identity_test.py
│   │   ├── models/
│   │   │   └── identity_int32/
│   │   │       ├── config.pbtxt
│   │   │       └── data_file
│   │   └── test.sh
│   ├── L0_request_cancellation/
│   │   ├── grpc_cancellation_test.py
│   │   ├── implicit_state_model/
│   │   │   ├── config.pbtxt
│   │   │   ├── gen_model.py
│   │   │   └── model.pt
│   │   ├── implicit_state_test.py
│   │   ├── scheduler_test.py
│   │   └── test.sh
│   ├── L0_response_cache/
│   │   ├── ensemble_cache_test.py
│   │   ├── generate_random_data.py
│   │   ├── models/
│   │   │   ├── decoupled_cache/
│   │   │   │   └── config.pbtxt
│   │   │   └── identity_cache/
│   │   │       └── config.pbtxt
│   │   └── test.sh
│   ├── L0_response_statistics/
│   │   ├── response_statistics_test.py
│   │   └── test.sh
│   ├── L0_sagemaker/
│   │   ├── sagemaker_generate_stream_test.py
│   │   ├── sagemaker_generate_test.py
│   │   ├── sagemaker_multi_model_test.py
│   │   ├── sagemaker_request_many_chunks.py
│   │   ├── sagemaker_test.py
│   │   └── test.sh
│   ├── L0_scalar_io/
│   │   ├── scalar_test.py
│   │   └── test.sh
│   ├── L0_sdk/
│   │   ├── grpc_test.cc
│   │   ├── http_test.cc
│   │   └── test.sh
│   ├── L0_secure_grpc/
│   │   └── test.sh
│   ├── L0_sequence_batcher/
│   │   ├── request_timeout_models/
│   │   │   └── custom_sequence_int32_timeout/
│   │   │       └── config.pbtxt
│   │   ├── sequence_batcher_test.py
│   │   └── test.sh
│   ├── L0_sequence_corrid_batcher/
│   │   ├── sequence_corrid_batcher_test.py
│   │   └── test.sh
│   ├── L0_sequence_stress/
│   │   ├── sequence_stress.py
│   │   └── test.sh
│   ├── L0_server_status/
│   │   ├── server_status_test.py
│   │   └── test.sh
│   ├── L0_shared_memory/
│   │   ├── shared_memory_test.py
│   │   └── test.sh
│   ├── L0_simple_ensemble/
│   │   ├── backpressure_test_models/
│   │   │   ├── decoupled_producer/
│   │   │   │   ├── 1/
│   │   │   │   │   └── model.py
│   │   │   │   └── config.pbtxt
│   │   │   └── ensemble_disabled_max_inflight_requests/
│   │   │       └── config.pbtxt
│   │   ├── ensemble_backpressure_test.py
│   │   ├── ensemble_test.py
│   │   ├── models/
│   │   │   ├── ensemble_add_sub_int32_int32_int32/
│   │   │   │   └── config.pbtxt
│   │   │   ├── ensemble_partial_add_sub/
│   │   │   │   └── config.pbtxt
│   │   │   ├── partial_add_sub/
│   │   │   │   ├── 1/
│   │   │   │   │   └── model.py
│   │   │   │   └── config.pbtxt
│   │   │   └── simple/
│   │   │       └── config.pbtxt
│   │   └── test.sh
│   ├── L0_simple_example/
│   │   └── test.sh
│   ├── L0_simple_go_client/
│   │   └── test.sh
│   ├── L0_simple_lib/
│   │   └── test.sh
│   ├── L0_simple_nodejs_client/
│   │   └── test.sh
│   ├── L0_socket/
│   │   ├── models/
│   │   │   └── simple/
│   │   │       └── config.pbtxt
│   │   └── test.sh
│   ├── L0_storage_S3/
│   │   └── test.sh
│   ├── L0_storage_S3_local/
│   │   ├── mock_s3_service.py
│   │   └── test.sh
│   ├── L0_storage_azure/
│   │   └── test.sh
│   ├── L0_storage_swiftstack/
│   │   ├── infer_test.py
│   │   └── test.sh
│   ├── L0_string_io/
│   │   ├── string_client_test.py
│   │   └── test.sh
│   ├── L0_trace/
│   │   ├── models/
│   │   │   └── input_all_required/
│   │   │       ├── 1/
│   │   │       │   └── model.py
│   │   │       └── config.pbtxt
│   │   ├── opentelemetry_unittest.py
│   │   ├── test.sh
│   │   ├── trace-config.yaml
│   │   ├── trace_context.py
│   │   ├── trace_endpoint_test.py
│   │   └── trace_stress_grpc_client.py
│   ├── L0_triton_repo_agent/
│   │   ├── models/
│   │   │   ├── chain_relocation/
│   │   │   │   └── config.pbtxt
│   │   │   └── relocation_sanity_check/
│   │   │       └── config.pbtxt
│   │   └── test.sh
│   ├── L0_trt_bf16_dtype/
│   │   ├── test.sh
│   │   └── trt_bf16_dtype_test.py
│   ├── L0_trt_compat/
│   │   ├── test.sh
│   │   └── trt_compatibility_test.py
│   ├── L0_trt_data_dependent_shape/
│   │   ├── test.sh
│   │   └── trt_data_dependent_shape_test.py
│   ├── L0_trt_dla/
│   │   ├── dla_test.py
│   │   └── test.sh
│   ├── L0_trt_dynamic_shape/
│   │   ├── test.sh
│   │   └── trt_dynamic_shape_test.py
│   ├── L0_trt_error_propagation/
│   │   ├── test.sh
│   │   └── trt_error_propagation_test.py
│   ├── L0_trt_plugin/
│   │   ├── test.sh
│   │   └── trt_plugin_test.py
│   ├── L0_trt_reformat_free/
│   │   ├── test.sh
│   │   └── trt_reformat_free_test.py
│   ├── L0_trt_shape_tensors/
│   │   ├── test.sh
│   │   └── trt_shape_tensor_test.py
│   ├── L0_vertex_ai/
│   │   ├── test.sh
│   │   └── vertex_ai_test.py
│   ├── L0_warmup/
│   │   ├── decoupled/
│   │   │   ├── 1/
│   │   │   │   └── model.py
│   │   │   └── config.pbtxt
│   │   ├── failing_infer/
│   │   │   ├── 1/
│   │   │   │   └── model.py
│   │   │   └── config.pbtxt
│   │   ├── raw_mug_data
│   │   └── test.sh
│   ├── common/
│   │   ├── busy_op_kernel.cu.cc
│   │   ├── check_copyright.py
│   │   ├── check_massif_log.py
│   │   ├── check_valgrind_log.py
│   │   ├── gen_common.py
│   │   ├── gen_ensemble_model_utils.py
│   │   ├── gen_jetson_trt_models
│   │   ├── gen_qa_custom_ops_models.py
│   │   ├── gen_qa_dyna_sequence_implicit_models.py
│   │   ├── gen_qa_dyna_sequence_models.py
│   │   ├── gen_qa_identity_models.py
│   │   ├── gen_qa_image_models.py
│   │   ├── gen_qa_implicit_models.py
│   │   ├── gen_qa_model_repository
│   │   ├── gen_qa_models.py
│   │   ├── gen_qa_ort_scalar_models.py
│   │   ├── gen_qa_pytorch_model.py
│   │   ├── gen_qa_ragged_models.py
│   │   ├── gen_qa_reshape_models.py
│   │   ├── gen_qa_sequence_models.py
│   │   ├── gen_qa_torchtrt_models.py
│   │   ├── gen_qa_trt_data_dependent_shape.py
│   │   ├── gen_qa_trt_format_models.py
│   │   ├── gen_qa_trt_plugin_models.py
│   │   ├── infer_test.py
│   │   ├── infer_util.py
│   │   ├── inferentia_perf_analyzer_input_data_json/
│   │   │   ├── non_aligned_validation_batched.json
│   │   │   ├── non_aligned_validation_no_batch.json
│   │   │   ├── simple_model.py
│   │   │   ├── validation_batched.json
│   │   │   ├── validation_no_batch.json
│   │   │   ├── wrong_validation_batched.json
│   │   │   └── wrong_validation_no_batch.json
│   │   ├── libtorch_infer_client.py
│   │   ├── nightly_email_helper.py
│   │   ├── orca_header_test.py
│   │   ├── perf_analyzer_input_data_json/
│   │   │   ├── float_data_with_shape.json
│   │   │   ├── image_data.json
│   │   │   ├── int_data.json
│   │   │   ├── int_data_diff_shape.json
│   │   │   ├── int_data_optional.json
│   │   │   ├── non_aligned_output.json
│   │   │   ├── output.json
│   │   │   ├── repeat_int32_data.json
│   │   │   ├── seq_data.json
│   │   │   ├── seq_output.json
│   │   │   ├── seq_wrong_output.json
│   │   │   ├── shape_tensor_data.json
│   │   │   ├── string_data.json
│   │   │   ├── string_data_with_shape.json
│   │   │   ├── wrong_output.json
│   │   │   └── wrong_output_2.json
│   │   ├── reporter.py
│   │   ├── resnet50_labels.txt
│   │   ├── run_all_tests.sh
│   │   ├── sequence_util.py
│   │   ├── shm_util.py
│   │   ├── show_testlogs
│   │   ├── test_util.py
│   │   ├── trace_summary.py
│   │   ├── trtllm_util.sh
│   │   └── util.sh
│   ├── custom_models/
│   │   ├── custom_dyna_sequence_int32/
│   │   │   └── config.pbtxt
│   │   ├── custom_sequence_int32/
│   │   │   └── config.pbtxt
│   │   └── custom_zero_1_float32/
│   │       └── config.pbtxt
│   ├── ensemble_models/
│   │   ├── batch_to_nobatch_float32_float32_float32/
│   │   │   └── config.pbtxt
│   │   ├── batch_to_nobatch_nobatch_float32_float32_float32/
│   │   │   └── config.pbtxt
│   │   ├── label_override_int32_float32_float32/
│   │   │   ├── config.pbtxt
│   │   │   └── output0_labels.txt
│   │   ├── mix_ensemble_int32_float32_float32/
│   │   │   └── config.pbtxt
│   │   ├── mix_nobatch_batch_float32_float32_float32/
│   │   │   └── config.pbtxt
│   │   ├── mix_platform_float32_float32_float32/
│   │   │   └── config.pbtxt
│   │   ├── mix_type_int32_float32_float32/
│   │   │   └── config.pbtxt
│   │   ├── nobatch_to_batch_float32_float32_float32/
│   │   │   └── config.pbtxt
│   │   ├── nobatch_to_batch_nobatch_float32_float32_float32/
│   │   │   └── config.pbtxt
│   │   └── wrong_label_int32_float32_float32/
│   │       ├── config.pbtxt
│   │       └── output0_labels.txt
│   ├── openvino_models/
│   │   ├── README.md
│   │   ├── dynamic_batch/
│   │   │   └── 1/
│   │   │       └── model.mapping
│   │   └── fixed_batch/
│   │       └── 1/
│   │           └── model.mapping
│   └── python_models/
│       ├── add_sub/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── add_sub_gpu/
│       │   └── config.pbtxt
│       ├── async_execute_decouple/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── async_execute_decouple_bls/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── auto_complete/
│       │   └── model.py
│       ├── auto_complete_error/
│       │   └── model.py
│       ├── bls/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── bls_async/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── bls_finalize_error/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── bls_init_error/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── bls_memory/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── bls_memory_async/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── bls_model_loading/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── bls_onnx_warmup/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── bls_parameters/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── bls_request_rescheduling/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── bls_simple/
│       │   └── bls_simple.py
│       ├── bls_undefined/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── busy_op/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── cuda_memory_consumer/
│       │   ├── 1/
│       │   │   └── model.py
│       │   └── config.pbtxt
│       ├── custom_metrics/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── delayed_model/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── dlpack_add_sub/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── dlpack_empty_output/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── dlpack_identity/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── dlpack_io_identity/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── dlpack_io_identity_decoupled/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── dlpack_square/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── dlpack_sub_add/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── dlpack_test/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── ensemble/
│       │   └── config.pbtxt
│       ├── ensemble_gpu/
│       │   └── config.pbtxt
│       ├── ensemble_io/
│       │   └── config.pbtxt
│       ├── error_code/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── execute_cancel/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── execute_delayed_model/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── execute_error/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── execute_grpc_error/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── execute_return_error/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── fan_add_sub/
│       │   └── config.pbtxt
│       ├── fini_error/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── generate_models/
│       │   └── mock_llm/
│       │       ├── 1/
│       │       │   └── model.py
│       │       └── config.pbtxt
│       ├── ground_truth/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── identity_bf16/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── identity_fp32/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── identity_fp32_logging/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── identity_fp32_timeout/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── init_args/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── init_error/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── init_exit/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── iterative_sequence/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── model_env/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── model_init_del/
│       │   ├── config.pbtxt
│       │   ├── model.py
│       │   └── util.py
│       ├── multi_file/
│       │   ├── file1.py
│       │   ├── file2.py
│       │   └── model.py
│       ├── non_contiguous/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── optional/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── python_based_backends/
│       │   └── add_sub_backend/
│       │       └── model.py
│       ├── python_version/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── pytorch_fp32_fp32/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── request_rescheduling_addsub/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── response_parameters/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── response_parameters_bls/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── response_parameters_decoupled/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── response_sender/
│       │   ├── config.pbtxt
│       │   ├── model.py
│       │   ├── model_async.py
│       │   └── model_common.py
│       ├── response_sender_complete_final/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── response_sender_error/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── response_sender_until_cancelled/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── sequence_int32/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── sequence_py/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── simple_identity_fp32/
│       │   └── config.pbtxt
│       ├── string/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── string_fixed/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── string_identity/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── sub_add/
│       │   └── model.py
│       ├── torchvision/
│       │   └── resnet50/
│       │       ├── config.pbtxt
│       │       └── model.py
│       ├── variable_gpu_output/
│       │   ├── config.pbtxt
│       │   └── model.py
│       ├── wrong_model/
│       │   ├── config.pbtxt
│       │   └── model.py
│       └── wrong_return_type/
│           ├── config.pbtxt
│           └── model.py
├── src/
│   ├── CMakeLists.txt
│   ├── classification.cc
│   ├── classification.h
│   ├── command_line_parser.cc
│   ├── command_line_parser.h
│   ├── common.cc
│   ├── common.h
│   ├── data_compressor.h
│   ├── grpc/
│   │   ├── CMakeLists.txt
│   │   ├── grpc_handler.h
│   │   ├── grpc_server.cc
│   │   ├── grpc_server.h
│   │   ├── grpc_utils.cc
│   │   ├── grpc_utils.h
│   │   ├── infer_handler.cc
│   │   ├── infer_handler.h
│   │   ├── stream_infer_handler.cc
│   │   └── stream_infer_handler.h
│   ├── http_server.cc
│   ├── http_server.h
│   ├── main.cc
│   ├── memory_alloc.cc
│   ├── multi_server.cc
│   ├── orca_http.cc
│   ├── orca_http.h
│   ├── python/
│   │   ├── CMakeLists.txt
│   │   ├── build_wheel.py
│   │   ├── examples/
│   │   │   ├── example.py
│   │   │   └── example_model_repository/
│   │   │       └── identity/
│   │   │           ├── 1/
│   │   │           │   └── model.onnx
│   │   │           └── config.pbtxt
│   │   ├── setup.py
│   │   └── tritonfrontend/
│   │       ├── CMakeLists.txt
│   │       ├── __init__.py
│   │       ├── __init__.pyi
│   │       ├── _api/
│   │       │   ├── __init__.py
│   │       │   ├── _error_mapping.py
│   │       │   ├── _kservegrpc.py
│   │       │   ├── _kservegrpc.pyi
│   │       │   ├── _kservehttp.py
│   │       │   ├── _kservehttp.pyi
│   │       │   ├── _metrics.py
│   │       │   └── _metrics.pyi
│   │       ├── _c/
│   │       │   ├── __init__.py
│   │       │   ├── __init__.pyi
│   │       │   ├── tritonfrontend.h
│   │       │   ├── tritonfrontend_bindings.pyi
│   │       │   └── tritonfrontend_pybind.cc
│   │       └── py.typed
│   ├── restricted_features.h
│   ├── sagemaker_server.cc
│   ├── sagemaker_server.h
│   ├── shared_memory_manager.cc
│   ├── shared_memory_manager.h
│   ├── simple.cc
│   ├── test/
│   │   ├── CMakeLists.txt
│   │   ├── data_compressor_test.cc
│   │   ├── distributed_addsub/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── cmake/
│   │   │   │   └── TritonDistributedAddsubBackendConfig.cmake.in
│   │   │   └── src/
│   │   │       ├── distributed_addsub.cc
│   │   │       └── libtriton_distributed_addsub.ldscript
│   │   ├── dyna_sequence/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── cmake/
│   │   │   │   └── TritonDynaSequenceBackendConfig.cmake.in
│   │   │   └── src/
│   │   │       ├── dyna_sequence.cc
│   │   │       └── libtriton_dyna_sequence.ldscript
│   │   ├── implicit_state/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── cmake/
│   │   │   │   └── TritonImplicitStateBackendConfig.cmake.in
│   │   │   └── src/
│   │   │       ├── implicit_state.cc
│   │   │       └── libtriton_implicit_state.ldscript
│   │   ├── iterative_sequence/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── cmake/
│   │   │   │   └── TritonIterativeSequenceBackendConfig.cmake.in
│   │   │   └── src/
│   │   │       ├── iterative_sequence.cc
│   │   │       └── libtriton_iterative_sequence.ldscript
│   │   ├── models/
│   │   │   ├── identity_fp32/
│   │   │   │   └── config.pbtxt
│   │   │   ├── repeat_int32/
│   │   │   │   └── config.pbtxt
│   │   │   └── square_int32/
│   │   │       └── config.pbtxt
│   │   ├── query_backend/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── cmake/
│   │   │   │   └── TritonQueryBackendConfig.cmake.in
│   │   │   └── src/
│   │   │       ├── libtriton_query.ldscript
│   │   │       └── query.cc
│   │   ├── repoagent/
│   │   │   └── relocation_repoagent/
│   │   │       ├── CMakeLists.txt
│   │   │       ├── cmake/
│   │   │       │   └── TritonRelocationRepoAgentConfig.cmake.in
│   │   │       └── src/
│   │   │           ├── libtritonrepoagent_relocation.ldscript
│   │   │           └── relocation.cc
│   │   ├── sequence/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── cmake/
│   │   │   │   └── TritonSequenceBackendConfig.cmake.in
│   │   │   └── src/
│   │   │       ├── libtriton_sequence.ldscript
│   │   │       └── sequence.cc
│   │   └── tensor_size_test.cc
│   ├── tracer.cc
│   ├── tracer.h
│   ├── triton_signal.cc
│   ├── triton_signal.h
│   ├── vertex_ai_server.cc
│   └── vertex_ai_server.h
└── tools/
    └── add_copyright.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .clang-format
================================================
---
BasedOnStyle: Google

IndentWidth: 2
ColumnLimit: 80
ContinuationIndentWidth: 4
UseTab: Never
MaxEmptyLinesToKeep: 2

SortIncludes: true
CompactNamespaces: true
ReflowComments: true

DerivePointerAlignment: false
PointerAlignment: Left

AllowShortIfStatementsOnASingleLine: false
AllowShortBlocksOnASingleLine: false
AllowShortFunctionsOnASingleLine: Inline

AlwaysBreakAfterReturnType: TopLevelDefinitions
AlignAfterOpenBracket: AlwaysBreak
BreakBeforeBraces: Custom
BraceWrapping:
  AfterClass: false
  AfterControlStatement: false
  AfterEnum: false
  AfterFunction: true
  AfterNamespace: false
  AfterStruct: false
  AfterUnion: false
  BeforeCatch: true

BinPackArguments: true
BinPackParameters: true
ConstructorInitializerAllOnOneLineOrOnePerLine: false

IndentCaseLabels: true


================================================
FILE: .dockerignore
================================================
.git*


================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.md
================================================
---
name: Bug report
about: Create a report to help us improve
title: ''
labels: ''
assignees: ''

---

**Description**
A clear and concise description of what the bug is.

**Triton Information**
What version of Triton are you using?

Are you using the Triton container or did you build it yourself?

**To Reproduce**
Steps to reproduce the behavior.

Describe the models (framework, inputs, outputs), ideally include the model configuration file (if using an ensemble include the model configuration file for that as well).

**Expected behavior**
A clear and concise description of what you expected to happen.


================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.md
================================================
---
name: Feature request
about: Suggest an idea for this project
title: ''
labels: ''
assignees: ''

---

**Is your feature request related to a problem? Please describe.**
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]

**Describe the solution you'd like**
A clear and concise description of what you want to happen.

**Describe alternatives you've considered**
A clear and concise description of any alternative solutions or features you've considered.

**Additional context**
Add any other context or screenshots about the feature request here.


================================================
FILE: .github/PULL_REQUEST_TEMPLATE/pull_request_template_external_contrib.md
================================================
#### What does the PR do?
<!-- Describe your pull request here. Please read the text below the line, and make sure you follow the checklist.-->

#### Checklist
- [ ] I have read the [Contribution guidelines](#../../CONTRIBUTING.md) and signed the [Contributor License
Agreement](https://github.com/NVIDIA/triton-inference-server/blob/master/Triton-CCLA-v1.pdf)
- [ ] PR title reflects the change and is of format `<commit_type>: <Title>`
- [ ] Changes are described in the pull request.
- [ ] Related issues are referenced.
- [ ] Populated [github labels](https://docs.github.com/en/issues/using-labels-and-milestones-to-track-work/managing-labels) field
- [ ] Added [test plan](#test-plan) and verified test passes.
- [ ] Verified that the PR passes existing CI.
- [ ] I ran pre-commit locally (`pre-commit install, pre-commit run --all`)
- [ ] Verified copyright is correct on all changed files.
- [ ] Added _succinct_ git squash message before merging [ref](https://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html).
- [ ] All template sections are filled out.
- [ ] Optional: Additional screenshots for behavior/output changes with before/after.

#### Commit Type:
Check the [conventional commit type](https://github.com/angular/angular/blob/22b96b9/CONTRIBUTING.md#type)
box here and add the label to the github PR.
- [ ] build
- [ ] ci
- [ ] docs
- [ ] feat
- [ ] fix
- [ ] perf
- [ ] refactor
- [ ] revert
- [ ] style
- [ ] test

#### Related PRs:
<!-- Related PRs from other Repositories -->

#### Where should the reviewer start?
<!-- call out specific files that should be looked at closely -->

#### Test plan:
<!-- list steps to verify feature works -->
<!-- were e2e tests added?-->

#### Caveats:
<!-- any limitations or possible things missing from this PR -->

#### Background
<!-- e.g. what led to this change being made. this is optional extra information to help the reviewer -->

#### Related Issues: (use one of the action keywords Closes / Fixes / Resolves / Relates to)
- closes GitHub issue: #xxx


================================================
FILE: .github/PULL_REQUEST_TEMPLATE/pull_request_template_internal_contrib.md
================================================
#### What does the PR do?
<!-- Describe your pull request here. Please read the text below the line, and make sure you follow the checklist.-->

#### Checklist
- [ ] PR title reflects the change and is of format `<commit_type>: <Title>`
- [ ] Changes are described in the pull request.
- [ ] Related issues are referenced.
- [ ] Populated [github labels](https://docs.github.com/en/issues/using-labels-and-milestones-to-track-work/managing-labels) field
- [ ] Added [test plan](#test-plan) and verified test passes.
- [ ] Verified that the PR passes existing CI.
- [ ] Verified copyright is correct on all changed files.
- [ ] Added _succinct_ git squash message before merging [ref](https://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html).
- [ ] All template sections are filled out.
- [ ] Optional: Additional screenshots for behavior/output changes with before/after.

#### Commit Type:
Check the [conventional commit type](https://github.com/angular/angular/blob/22b96b9/CONTRIBUTING.md#type)
box here and add the label to the github PR.
- [ ] build
- [ ] ci
- [ ] docs
- [ ] feat
- [ ] fix
- [ ] perf
- [ ] refactor
- [ ] revert
- [ ] style
- [ ] test

#### Related PRs:
<!-- Related PRs from other Repositories -->

#### Where should the reviewer start?
<!-- call out specific files that should be looked at closely -->

#### Test plan:
<!-- list steps to verify -->
<!-- were e2e tests added?-->

- CI Pipeline ID:
<!-- Only Pipeline ID and no direct link here -->

#### Caveats:
<!-- any limitations or possible things missing from this PR -->

#### Background
<!-- e.g. what led to this change being made. this is optional extra information to help the reviewer -->

#### Related Issues: (use one of the action keywords Closes / Fixes / Resolves / Relates to)
- closes GitHub issue: #xxx


================================================
FILE: .github/pull_request_template.md
================================================
Thanks for submitting a PR to Triton!
Please go the the `Preview` tab above this description box and select the appropriate sub-template:

* [PR description template for Triton Engineers](?expand=1&template=pull_request_template_internal_contrib.md)
* [PR description template for External Contributors](?expand=1&template=pull_request_template_external_contrib.md)

If you already created the PR, please replace this message with one of
* [External contribution template](https://raw.githubusercontent.com/triton-inference-server/server/main/.github/PULL_REQUEST_TEMPLATE/pull_request_template_external_contrib.md)
* [Internal contribution template](https://raw.githubusercontent.com/triton-inference-server/server/main/.github/PULL_REQUEST_TEMPLATE/pull_request_template_internal_contrib.md)

and fill it out.


================================================
FILE: .github/workflows/codeql.yml
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "CodeQL"

on:
  pull_request:

jobs:
  analyze:
    name: Analyze
    runs-on: ubuntu-latest
    permissions:
      actions: read
      contents: read
      security-events: write

    strategy:
      fail-fast: false
      matrix:
        language: [ 'python' ]
        # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
        # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support

    steps:
    - name: Checkout repository
      uses: actions/checkout@v3

    # Initializes the CodeQL tools for scanning.
    - name: Initialize CodeQL
      uses: github/codeql-action/init@v2
      with:
        languages: ${{ matrix.language }}
        # If you wish to specify custom queries, you can do so here or in a config file.
        # By default, queries listed here will override any specified in a config file.
        # Prefix the list here with "+" to use these queries and those in the config file.

        # Details on CodeQL's query packs refer to:
        # https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
        queries: +security-and-quality


    # Autobuild attempts to build any compiled languages  (C/C++, C#, Go, or Java).
    # If this step fails, then you should remove it and run the build manually (see below)
    - name: Autobuild
      uses: github/codeql-action/autobuild@v2

    # Command-line programs to run using the OS shell.
    # See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun

    #   If the Autobuild fails above, remove it and uncomment the following three lines.
    #   modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.

    # - run: |
    #   echo "Run, Build Application using script"
    #   ./location_of_script_within_repo/buildscript.sh

    - name: Perform CodeQL Analysis
      uses: github/codeql-action/analyze@v2
      with:
        category: "/language:${{matrix.language}}"


================================================
FILE: .github/workflows/pre-commit.yml
================================================
# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: pre-commit

on:
  pull_request:

jobs:
  pre-commit:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v5.0.0
      with:
        fetch-depth: 2
    - name: Get modified files
      id: modified-files
      run: echo "modified_files=$(git diff --name-only -r HEAD^1 HEAD | xargs)" >> $GITHUB_OUTPUT
    - uses: actions/setup-python@v6.0.0
    - uses: pre-commit/action@v3.0.1
      with:
        extra_args: --files ${{ steps.modified-files.outputs.modified_files }}


================================================
FILE: .gitignore
================================================
/build
/builddir
/.vscode
*.so
__pycache__
tmp
*.log
*.xml
test_results.txt
artifacts
cprofile
*.prof
.venv
**/.venv

# Test exclusions
qa/L0_openai/openai
tensorrtllm_models
tensorrtllm_mistral_models/
custom_tokenizer


================================================
FILE: .pre-commit-config.yaml
================================================
# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

repos:
- repo: https://github.com/PyCQA/isort
  rev: 5.12.0
  hooks:
  - id: isort
    additional_dependencies: [toml]
- repo: https://github.com/psf/black
  rev: 23.1.0
  hooks:
  - id: black
    types_or: [python, cython]
- repo: https://github.com/PyCQA/flake8
  rev: 7.3.0
  hooks:
  - id: flake8
    args: [--max-line-length=88, --select=C,E,F,W,B,B950, --extend-ignore = E203,E501]
    types_or: [python, cython]
- repo: https://github.com/pre-commit/mirrors-clang-format
  rev: v16.0.5
  hooks:
  - id: clang-format
    types_or: [c, c++, cuda, proto, textproto, java]
    args: ["-fallback-style=none", "-style=file", "-i"]
- repo: https://github.com/codespell-project/codespell
  rev: v2.2.4
  hooks:
  - id: codespell
    additional_dependencies: [tomli]
    args: ["--toml", "pyproject.toml"]
    exclude: (?x)^(.*stemmer.*|.*stop_words.*|^CHANGELOG.md$)
# More details about these pre-commit hooks here:
# https://pre-commit.com/hooks.html
- repo: https://github.com/pre-commit/pre-commit-hooks
  rev: v6.0.0
  hooks:
  - id: check-case-conflict
  - id: check-executables-have-shebangs
  - id: check-merge-conflict
  - id: check-json
  - id: check-toml
  - id: check-yaml
    exclude: ^deploy(\/[^\/]+)*\/templates\/.*$
  - id: check-shebang-scripts-are-executable
  - id: end-of-file-fixer
    types_or: [c, c++, cuda, proto, textproto, java, python]
  - id: mixed-line-ending
  - id: requirements-txt-fixer
  - id: trailing-whitespace

- repo: local
  hooks:
  - id: add-license
    name: Add License
    entry: python tools/add_copyright.py
    language: python
    stages: [pre-commit]
    verbose: true
    require_serial: true


================================================
FILE: CITATION.cff
================================================
cff-version: 1.2.0
message: "If you use this software, please cite it as below."
title: "Triton Inference Server: An Optimized Cloud and Edge Inferencing Solution."
url: https://github.com/triton-inference-server
repository-code: https://github.com/triton-inference-server/server
authors:
  - name: "NVIDIA Corporation"


================================================
FILE: CMakeLists.txt
================================================
# Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

cmake_minimum_required(VERSION 3.31.8)

project(tritonserver LANGUAGES C CXX)

include(CMakeDependentOption)

# Use C++17 standard as Triton's minimum required.
set(TRITON_MIN_CXX_STANDARD 17 CACHE STRING "The minimum C++ standard which features are requested to build this target.")

set(TRITON_VERSION "0.0.0" CACHE STRING "The version of the Triton shared library" )

option(TRITON_ENABLE_LOGGING "Include logging support in server" ON)
option(TRITON_ENABLE_STATS "Include statistics collections in server" ON)
option(TRITON_ENABLE_TRACING "Include tracing support in server" OFF)
option(TRITON_ENABLE_NVTX "Include NVTX support in server" OFF)
option(TRITON_ENABLE_GPU "Enable GPU support in server" ON)
option(TRITON_ENABLE_MALI_GPU "Enable Arm Mali GPU support in server" OFF)
option(TRITON_IGPU_BUILD "Enable options for iGPU compilation in sever" OFF)
set(TRITON_MIN_COMPUTE_CAPABILITY "7.5" CACHE STRING
    "The minimum CUDA compute capability supported by Triton" )
set(TRITON_EXTRA_LIB_PATHS "" CACHE PATH "Extra library paths for Triton Server build")

# Ensemble
option(TRITON_ENABLE_ENSEMBLE "Include ensemble support in server" OFF)

# Endpoints
option(TRITON_ENABLE_HTTP "Include HTTP API in server" ON)
option(TRITON_ENABLE_GRPC "Include GRPC API in server" ON)
option(TRITON_ENABLE_SAGEMAKER "Include AWS SageMaker API in server" OFF)
option(TRITON_ENABLE_VERTEX_AI "Include Vertex AI API in server" OFF)

# Metrics
option(TRITON_ENABLE_METRICS "Include metrics support in server" ON)
option(TRITON_ENABLE_METRICS_GPU "Include GPU metrics support in server" ON)
option(TRITON_ENABLE_METRICS_CPU "Include CPU metrics support in server" ON)

# Cloud storage
option(TRITON_ENABLE_GCS "Include GCS Filesystem support in server" OFF)
option(TRITON_ENABLE_S3 "Include S3 Filesystem support in server" OFF)
option(TRITON_ENABLE_AZURE_STORAGE "Include Azure Storage Filesystem support in server" OFF)

# Need to know if TensorRT is available when building unit tests
option(TRITON_ENABLE_TENSORRT "Include TensorRT backend in server" OFF)

# ASAN
option(TRITON_ENABLE_ASAN "Build with address sanitizer" OFF)

# Repo tags
set(TRITON_REPO_ORGANIZATION "https://github.com/triton-inference-server" CACHE STRING "Git repository to pull from")
set(TRITON_THIRD_PARTY_REPO_TAG "main" CACHE STRING
    "Tag for triton-inference-server/third_party repo")
set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo")
set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo")
set(TRITON_BACKEND_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/backend repo")

# Third-party location
set(TRITON_THIRD_PARTY_INSTALL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/third-party" CACHE STRING "Location of third-party build")
set(TRITON_THIRD_PARTY_SRC_INSTALL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/third-party-src" CACHE STRING "Location of third-party source")

if(TRITON_ENABLE_METRICS AND NOT TRITON_ENABLE_STATS)
  message(FATAL_ERROR "TRITON_ENABLE_METRICS=ON requires TRITON_ENABLE_STATS=ON")
endif()

if(TRITON_ENABLE_TRACING AND NOT TRITON_ENABLE_STATS)
  message(FATAL_ERROR "TRITON_ENABLE_TRACING=ON requires TRITON_ENABLE_STATS=ON")
endif()

if (TRITON_ENABLE_METRICS_CPU AND NOT TRITON_ENABLE_METRICS)
  message(FATAL_ERROR "TRITON_ENABLE_METRICS_CPU=ON requires TRITON_ENABLE_METRICS=ON")
endif()

if (TRITON_ENABLE_METRICS_GPU AND NOT TRITON_ENABLE_METRICS)
  message(FATAL_ERROR "TRITON_ENABLE_METRICS_GPU=ON requires TRITON_ENABLE_METRICS=ON")
endif()

if (TRITON_ENABLE_METRICS_GPU AND NOT TRITON_ENABLE_GPU)
  message(FATAL_ERROR "TRITON_ENABLE_METRICS_GPU=ON requires TRITON_ENABLE_GPU=ON")
endif()

if(TRITON_ENABLE_ASAN AND TRITON_ENABLE_GPU)
  message(FATAL_ERROR "TRITON_ENABLE_ASAN=ON requires TRITON_ENABLE_GPU=OFF")
endif()

#
# Dependencies
#
include(FetchContent)

FetchContent_Declare(
  repo-core
  GIT_REPOSITORY ${TRITON_REPO_ORGANIZATION}/core.git
  GIT_TAG ${TRITON_CORE_REPO_TAG}
)
FetchContent_Declare(
  repo-third-party
  GIT_REPOSITORY ${TRITON_REPO_ORGANIZATION}/third_party.git
  GIT_TAG ${TRITON_THIRD_PARTY_REPO_TAG}
)

# Some libs are installed to ${TRITON_THIRD_PARTY_INSTALL_PREFIX}/{LIB}/lib64 instead
# of ${TRITON_THIRD_PARTY_INSTALL_PREFIX}/{LIB}/lib on Centos
set(LIB_DIR "lib")
if(LINUX)
  file(STRINGS "/etc/os-release" DISTRO_ID_LIKE REGEX "ID_LIKE")
  if(${DISTRO_ID_LIKE} MATCHES "rhel|centos")
    set (LIB_DIR "lib64")
  endif(${DISTRO_ID_LIKE} MATCHES "rhel|centos")
endif(LINUX)
set(TRITON_CORE_HEADERS_ONLY OFF)

FetchContent_MakeAvailable(repo-third-party repo-core)

#
# Triton server executable and examples
#

# Need to use ExternalProject for our builds so that we can get the
# correct dependencies between Triton executable and the
# ExternalProject dependencies (found in the third_party repo)
include(ExternalProject)

# If CMAKE_TOOLCHAIN_FILE is set, propagate that hint path to the external
# projects.
set(_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE "")
if (CMAKE_TOOLCHAIN_FILE)
  set(_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE "-DCMAKE_TOOLCHAIN_FILE:PATH=${CMAKE_TOOLCHAIN_FILE}")
endif()

# If VCPKG_TARGET_TRIPLET is set, propagate that hint path to the external
# projects.
set(_CMAKE_ARGS_VCPKG_TARGET_TRIPLET "")
if (VCPKG_TARGET_TRIPLET)
  set(_CMAKE_ARGS_VCPKG_TARGET_TRIPLET "-DVCPKG_TARGET_TRIPLET:STRING=${VCPKG_TARGET_TRIPLET}")
endif()

# If OPENSSL_ROOT_DIR is set, propagate that hint path to the external
# projects with OpenSSL dependency.
set(_CMAKE_ARGS_OPENSSL_ROOT_DIR "")
if (OPENSSL_ROOT_DIR)
  set(_CMAKE_ARGS_OPENSSL_ROOT_DIR "-DOPENSSL_ROOT_DIR:PATH=${OPENSSL_ROOT_DIR}")
endif()

# Location where protobuf-config.cmake will be installed varies by
# platform
if (WIN32)
  set(_FINDPACKAGE_PROTOBUF_CONFIG_DIR "${TRITON_THIRD_PARTY_INSTALL_PREFIX}/protobuf/cmake")
else()
  set(_FINDPACKAGE_PROTOBUF_CONFIG_DIR "${TRITON_THIRD_PARTY_INSTALL_PREFIX}/protobuf/${LIB_DIR}/cmake/protobuf")
endif()

# Triton with Opentelemetry is not supported on Windows
# FIXME: add location for Windows, when support is added
# JIRA DLIS-4786
if (WIN32)
  set(_FINDPACKAGE_OPENTELEMETRY_CONFIG_DIR "")
else()
  set(_FINDPACKAGE_OPENTELEMETRY_CONFIG_DIR "${TRITON_THIRD_PARTY_INSTALL_PREFIX}/opentelemetry-cpp/${LIB_DIR}/cmake/opentelemetry-cpp")
endif()

if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
  set(TRITON_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/install)
else()
  set(TRITON_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX})
endif()

set(TRITON_DEPENDS triton-core protobuf googletest re2)
if(${TRITON_ENABLE_GCS})
  set(TRITON_DEPENDS ${TRITON_DEPENDS} google-cloud-cpp)
endif() # TRITON_ENABLE_GCS
if(${TRITON_ENABLE_S3})
  set(TRITON_DEPENDS ${TRITON_DEPENDS} aws-sdk-cpp)
endif() # TRITON_ENABLE_S3
if(${TRITON_ENABLE_HTTP} OR ${TRITON_ENABLE_METRICS} OR ${TRITON_ENABLE_SAGEMAKER} OR ${TRITON_ENABLE_VERTEX_AI})
  set(TRITON_DEPENDS ${TRITON_DEPENDS} libevent libevhtp)
endif() # TRITON_ENABLE_HTTP || TRITON_ENABLE_METRICS || TRITON_ENABLE_SAGEMAKER || TRITON_ENABLE_VERTEX_AI
if(${TRITON_ENABLE_GRPC})
  set(TRITON_DEPENDS ${TRITON_DEPENDS} grpc)
endif() # TRITON_ENABLE_GRPC
if(NOT WIN32 AND ${TRITON_ENABLE_TRACING})
  set(TRITON_DEPENDS ${TRITON_DEPENDS} opentelemetry-cpp)
endif() # TRITON_ENABLE_TRACING

ExternalProject_Add(triton-server
  PREFIX triton-server
  SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src"
  BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/triton-server"
  CMAKE_CACHE_ARGS
    -DProtobuf_DIR:PATH=${_FINDPACKAGE_PROTOBUF_CONFIG_DIR}
    ${_CMAKE_ARGS_OPENSSL_ROOT_DIR}
    ${_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE}
    ${_CMAKE_ARGS_VCPKG_TARGET_TRIPLET}
    -DGTEST_ROOT:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/googletest
    -DgRPC_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/grpc/lib/cmake/grpc
    -Dc-ares_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/c-ares/${LIB_DIR}/cmake/c-ares
    -Dre2_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/re2/${LIB_DIR}/cmake/re2
    -Dabsl_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/absl/${LIB_DIR}/cmake/absl
    -DCURL_DIR:STRING=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/curl/${LIB_DIR}/cmake/CURL
    -Dnlohmann_json_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/nlohmann_json/share/cmake/nlohmann_json
    -DLibevent_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/libevent/lib/cmake/libevent
    -Dlibevhtp_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/libevhtp/lib/cmake/libevhtp
    -Dstorage_client_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/google-cloud-cpp/${LIB_DIR}/cmake/storage_client
    -Dgoogle_cloud_cpp_common_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/google-cloud-cpp/${LIB_DIR}/cmake/google_cloud_cpp_common
    -DCrc32c_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/crc32c/${LIB_DIR}/cmake/Crc32c
    -DAWSSDK_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/${LIB_DIR}/cmake/AWSSDK
    -Daws-cpp-sdk-core_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/${LIB_DIR}/cmake/aws-cpp-sdk-core
    -Daws-cpp-sdk-s3_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/${LIB_DIR}/cmake/aws-cpp-sdk-s3
    -Daws-c-event-stream_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/${LIB_DIR}/aws-c-event-stream/cmake
    -Daws-c-common_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/${LIB_DIR}/aws-c-common/cmake
    -Daws-checksums_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/${LIB_DIR}/aws-checksums/cmake
    -Dopentelemetry-cpp_DIR:PATH=${_FINDPACKAGE_OPENTELEMETRY_CONFIG_DIR}
    -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION}
    -DTRITON_IGPU_BUILD:BOOL=${TRITON_IGPU_BUILD}
    -DTRITON_THIRD_PARTY_REPO_TAG:STRING=${TRITON_THIRD_PARTY_REPO_TAG}
    -DTRITON_COMMON_REPO_TAG:STRING=${TRITON_COMMON_REPO_TAG}
    -DTRITON_CORE_REPO_TAG:STRING=${TRITON_CORE_REPO_TAG}
    -DTRITON_BACKEND_REPO_TAG:STRING=${TRITON_BACKEND_REPO_TAG}
    -DTRITON_EXTRA_LIB_PATHS:PATH=${TRITON_EXTRA_LIB_PATHS}
    -DTRITON_ENABLE_ASAN:BOOL=${TRITON_ENABLE_ASAN}
    -DTRITON_ENABLE_NVTX:BOOL=${TRITON_ENABLE_NVTX}
    -DTRITON_ENABLE_TRACING:BOOL=${TRITON_ENABLE_TRACING}
    -DTRITON_ENABLE_LOGGING:BOOL=${TRITON_ENABLE_LOGGING}
    -DTRITON_ENABLE_STATS:BOOL=${TRITON_ENABLE_STATS}
    -DTRITON_ENABLE_GPU:BOOL=${TRITON_ENABLE_GPU}
    -DTRITON_ENABLE_MALI_GPU:BOOL=${TRITON_ENABLE_MALI_GPU}
    -DTRITON_ENABLE_HTTP:BOOL=${TRITON_ENABLE_HTTP}
    -DTRITON_ENABLE_SAGEMAKER:BOOL=${TRITON_ENABLE_SAGEMAKER}
    -DTRITON_ENABLE_VERTEX_AI:BOOL=${TRITON_ENABLE_VERTEX_AI}
    -DTRITON_ENABLE_GRPC:BOOL=${TRITON_ENABLE_GRPC}
    -DTRITON_MIN_COMPUTE_CAPABILITY:STRING=${TRITON_MIN_COMPUTE_CAPABILITY}
    -DTRITON_ENABLE_METRICS:BOOL=${TRITON_ENABLE_METRICS}
    -DTRITON_ENABLE_METRICS_GPU:BOOL=${TRITON_ENABLE_METRICS_GPU}
    -DTRITON_ENABLE_METRICS_CPU:BOOL=${TRITON_ENABLE_METRICS_CPU}
    -DTRITON_ENABLE_GCS:BOOL=${TRITON_ENABLE_GCS}
    -DTRITON_ENABLE_AZURE_STORAGE:BOOL=${TRITON_ENABLE_AZURE_STORAGE}
    -DTRITON_ENABLE_S3:BOOL=${TRITON_ENABLE_S3}
    -DTRITON_ENABLE_TENSORRT:BOOL=${TRITON_ENABLE_TENSORRT}
    -DTRITON_ENABLE_ENSEMBLE:BOOL=${TRITON_ENABLE_ENSEMBLE}
    -DTRITON_MIN_CXX_STANDARD:STRING=${TRITON_MIN_CXX_STANDARD}
    -DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
    -DCMAKE_INSTALL_PREFIX:PATH=${TRITON_INSTALL_PREFIX}
    -DTRITON_VERSION:STRING=${TRITON_VERSION}
  DEPENDS ${TRITON_DEPENDS}
)


================================================
FILE: CONTRIBUTING.md
================================================
<!--
# Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Contribution Guidelines

Contributions that fix documentation errors or that make small changes
to existing code can be contributed directly by following the rules
below and submitting an appropriate PR.

Contributions intended to add significant new functionality must
follow a more collaborative path described in the following
points. Before submitting a large PR that adds a major enhancement or
extension, be sure to submit a GitHub issue that describes the
proposed change so that the Triton team can provide feedback.

- As part of the GitHub issue discussion, a design for your change
  will be agreed upon. An up-front design discussion is required to
  ensure that your enhancement is done in a manner that is consistent
  with Triton's overall architecture.

- The Triton project is spread across multiple repos. The Triton team
  will provide guidance about how and where your enhancement should be
  implemented.

- [Testing](docs/customization_guide/test.md) is a critical part of any Triton
  enhancement. You should plan on spending significant time on
  creating tests for your change. The Triton team will help you to
  design your testing so that it is compatible with existing testing
  infrastructure.

- If your enhancement provides a user visible feature then you need to
  provide documentation.

# Contribution Rules

- The code style convention is enforced by clang-format. See below on
  how to ensure your contributions conform. In general please follow
  the existing conventions in the relevant file, submodule, module,
  and project when you add new code or when you extend/fix existing
  functionality.

- Avoid introducing unnecessary complexity into existing code so that
  maintainability and readability are preserved.

- Try to keep pull requests (PRs) as concise as possible:

  - Avoid committing commented-out code.

  - Wherever possible, each PR should address a single concern. If
    there are several otherwise-unrelated things that should be fixed
    to reach a desired endpoint, it is perfectly fine to open several
    PRs and state in the description which PR depends on another
    PR. The more complex the changes are in a single PR, the more time
    it will take to review those changes.

  - Make sure that the build log is clean, meaning no warnings or
    errors should be present.

- Make sure all `L0_*` tests pass:

  - In the `qa/` directory, there are basic sanity tests scripted in
    directories named `L0_...`.  See the [Test](docs/customization_guide/test.md)
    documentation for instructions on running these tests.

- Triton Inference Server's default build assumes recent versions of
  dependencies (CUDA, PyTorch, TensorRT,
  etc.). Contributions that add compatibility with older versions of
  those dependencies will be considered, but NVIDIA cannot guarantee
  that all possible build configurations work, are not broken by
  future contributions, and retain highest performance.

- Make sure that you can contribute your work to open source (no
  license and/or patent conflict is introduced by your code). You need
  to complete the CLA described below before your PR can be merged.

- Thanks in advance for your patience as we review your contributions;
  we do appreciate them!

# Coding Convention

All pull requests are checked against the
[pre-commit hooks](https://github.com/pre-commit/pre-commit-hooks)
located [in the repository's top-level .pre-commit-config.yaml](.pre-commit-config.yaml).
The hooks do some sanity checking like linting and formatting.
These checks must pass to merge a change.

To run these locally, you can
[install pre-commit,](https://pre-commit.com/#install)
then run `pre-commit install` inside the cloned repo. When you
commit a change, the pre-commit hooks will run automatically.
If a fix is implemented by a pre-commit hook, adding the file again
and running `git commit` a second time will pass and successfully
commit.

# Contributor License Agreement (CLA)

Triton requires that all contributors (or their corporate entity) send
a signed copy of the [Contributor License
Agreement](https://github.com/NVIDIA/triton-inference-server/blob/master/Triton-CCLA-v1.pdf)
to triton-cla@nvidia.com.
*NOTE*: Contributors with no company affiliation can fill `N/A` in the
`Corporation Name` and `Corporation Address` fields.


================================================
FILE: Dockerfile.QA
================================================
# Copyright 2018-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

ARG BASE_IMAGE=tritonserver
ARG CIBASE_IMAGE=tritonserver_cibase
ARG SDK_IMAGE=tritonserver_sdk
ARG TRITON_REPO_ORGANIZATION=http://github.com/triton-inference-server
ARG TRITON_COMMON_REPO_TAG=main
ARG TRITON_CORE_REPO_TAG=main
ARG TRITON_THIRD_PARTY_REPO_TAG=main
ARG TRITON_BACKEND_REPO_TAG=main
ARG TRITONTMP_DIR=/tmp
ARG IGPU_BUILD=0

############################################################################
## Test artifacts built as part of the tritonserver build are
## available in CIBASE_IMAGE. Copy these artifacts into the QA area.
############################################################################
FROM ${CIBASE_IMAGE} AS cibase

ARG TRITONTMP_DIR
ARG TRITON_REPO_ORGANIZATION
ARG TRITON_COMMON_REPO_TAG
ARG TRITON_CORE_REPO_TAG
ARG TRITON_THIRD_PARTY_REPO_TAG
ARG TRITON_BACKEND_REPO_TAG
ARG IGPU_BUILD

# Ensure apt-get won't prompt for selecting options
ENV DEBIAN_FRONTEND=noninteractive

RUN apt-get update && \
    apt-get install -y --no-install-recommends \
            build-essential \
            libarchive-dev \
            libboost-dev \
            python3-dev \
            python3-pip \
            python3-wheel \
            python3-setuptools \
            python3-venv \
            rapidjson-dev \
            software-properties-common && \
    rm -rf /var/lib/apt/lists/*

RUN pip3 install cmake==4.0.3
ENV CMAKE_POLICY_VERSION_MINIMUM=3.5

# Add densenet_onnx model to example repo
# FIXME: This should be changed to using the fetch_models.sh script
# in order to ensure the public facing docs are up-to-date.
WORKDIR /workspace/docs/examples/model_repository
RUN mkdir -p densenet_onnx/1 && \
        wget -O densenet_onnx/1/model.onnx \
            https://github.com/onnx/models/raw/main/validated/vision/classification/densenet-121/model/densenet-7.onnx

# Update the qa/ directory with test executables, models, etc.
WORKDIR /workspace
RUN mkdir -p qa/common && \
    cp -r /workspace/src/test/models/repeat_int32 qa/L0_decoupled/models/ && \
    cp -r /workspace/src/test/models/square_int32 qa/L0_decoupled/models/ && \
    mkdir qa/L0_simple_example/models && \
    cp -r docs/examples/model_repository/simple qa/L0_simple_example/models/. && \
    mkdir qa/L0_simple_go_client/models && \
    cp -r docs/examples/model_repository/simple qa/L0_simple_go_client/models/. && \
    mkdir qa/L0_backend_release/simple_models && \
    cp -r docs/examples/model_repository/simple qa/L0_backend_release/simple_models/. && \
    mkdir qa/L0_simple_nodejs_client/models && \
    cp -r docs/examples/model_repository/simple qa/L0_simple_nodejs_client/models/. && \
    mkdir qa/L0_backend_release/simple_seq_models && \
    cp -r /workspace/docs/examples/model_repository/simple_sequence qa/L0_backend_release/simple_seq_models/. && \
    mkdir qa/L0_shared_memory/models && \
    cp -r docs/examples/model_repository/simple qa/L0_shared_memory/models/. && \
    mkdir qa/L0_cuda_shared_memory/models && \
    cp -r docs/examples/model_repository/simple qa/L0_cuda_shared_memory/models/. && \
    mkdir qa/L0_client_java/models && \
    cp -r docs/examples/model_repository/simple qa/L0_client_java/models && \
    mkdir qa/L0_grpc/models && \
    cp -r docs/examples/model_repository/simple qa/L0_grpc/models && \
    cp -r docs/examples/model_repository/simple_dyna_sequence qa/L0_grpc/models && \
    cp -r docs/examples/model_repository/simple_int8 qa/L0_grpc/models && \
    cp -r docs/examples/model_repository/simple_identity qa/L0_grpc/models && \
    cp -r docs/examples/model_repository/simple_sequence qa/L0_grpc/models && \
    cp -r docs/examples/model_repository/simple_string qa/L0_grpc/models && \
    cp -r docs/examples/model_repository/densenet_onnx qa/L0_grpc/models && \
    mkdir qa/L0_grpc_state_cleanup/models && \
    cp -r /workspace/src/test/models/repeat_int32 qa/L0_grpc_state_cleanup/models/ && \
    mkdir qa/L0_http/models && \
    cp -r docs/examples/model_repository/simple qa/L0_http/models && \
    cp -r docs/examples/model_repository/simple_dyna_sequence qa/L0_http/models && \
    cp -r docs/examples/model_repository/simple_identity qa/L0_http/models && \
    cp -r docs/examples/model_repository/simple_sequence qa/L0_http/models && \
    cp -r docs/examples/model_repository/simple_string qa/L0_http/models && \
    cp -r docs/examples/model_repository/densenet_onnx qa/L0_http/models && \
    mkdir qa/L0_https/models && \
    cp -r docs/examples/model_repository/simple qa/L0_https/models/. && \
    mkdir qa/L0_secure_grpc/models && \
    cp -r docs/examples/model_repository/simple qa/L0_secure_grpc/models/. && \
    cp bin/simple qa/L0_simple_lib/. && \
    cp bin/memory_alloc qa/L0_io/. && \
    cp bin/multi_server qa/L0_multi_server/. && \
    cp bin/memory_test qa/L0_memory/. && \
    cp bin/pinned_memory_manager_test qa/L0_memory/. && \
    mkdir -p qa/L0_memory/python_models/repeat_int32/1 && \
    cp bin/repo_agent_test qa/L0_triton_repo_agent/. && \
    cp lib/libtritonrepoagent_relocation.so qa/L0_triton_repo_agent/. && \
    mkdir qa/L0_query/models/query/1 && \
    cp tritonbuild/tritonserver/backends/query/libtriton_query.so qa/L0_query/models/query/1/. && \
    cp bin/query_test qa/L0_query/. && \
    mkdir qa/L0_iterative_sequence/models/iterative_sequence/1 && \
    cp tritonbuild/tritonserver/backends/iterative_sequence/libtriton_iterative_sequence.so qa/L0_iterative_sequence/models/iterative_sequence/1/. && \
    cp bin/register_api_test qa/L0_register/. && \
    cp bin/async_work_queue_test qa/L0_async_work_queue/. && \
    cp tritonbuild/tritonserver/backends/implicit_state/libtriton_implicit_state.so \
       qa/L0_implicit_state/. && \
    mkdir qa/L0_data_compression/models && \
    cp -r docs/examples/model_repository/simple qa/L0_data_compression/models && \
    cp bin/data_compressor_test qa/L0_data_compression/. && \
    cp bin/tensor_size_test qa/L0_input_validation/. && \
    cp bin/metrics_api_test qa/L0_metrics/. && \
    cp bin/response_cache_test qa/L0_response_cache/. && \
    cp bin/request_cancellation_test qa/L0_request_cancellation/. && \
    cp bin/triton_json_test qa/L0_json/. && \
    cp bin/backend_output_detail_test qa/L0_backend_output_detail/. && \
    cp -r deploy/mlflow-triton-plugin qa/L0_mlflow/. && \
    cp bin/input_byte_size_test qa/L0_input_validation/.

RUN mkdir -p qa/pkgs && \
    cp python/triton*.whl qa/pkgs/. && \
    cp -rf python/test/. qa/L0_python_api/.

RUN mkdir -p qa/L0_simple_ensemble/models/simple/1 && \
    cp docs/examples/model_repository/simple/1/model.onnx \
        qa/L0_simple_ensemble/models/simple/1/. && \
    mkdir -p qa/L0_simple_ensemble/models/simple/2 && \
    cp docs/examples/model_repository/simple/1/model.onnx \
        qa/L0_simple_ensemble/models/simple/2/. && \
    mkdir -p qa/L0_socket/models/simple/1 && \
    cp docs/examples/model_repository/simple/1/model.onnx \
        qa/L0_socket/models/simple/1/.

RUN mkdir -p qa/L0_backend_identity/models && \
    cp -r src/test/models/identity_fp32 qa/L0_backend_identity/models/. && \
    mkdir -p qa/L0_backend_identity/models/identity_fp32/1

RUN mkdir -p qa/custom_models/custom_sequence_int32/1 && \
    cp tritonbuild/tritonserver/backends/sequence/libtriton_sequence.so \
        qa/custom_models/custom_sequence_int32/1/. && \
    mkdir -p qa/custom_models/custom_dyna_sequence_int32/1 && \
    cp tritonbuild/tritonserver/backends/dyna_sequence/libtriton_dyna_sequence.so \
        qa/custom_models/custom_dyna_sequence_int32/1/.

# L0_lifecycle needs No-GPU build of identity backend.
RUN cd tritonbuild/identity && \
    rm -rf install build && mkdir build && cd build && \
    cmake -DTRITON_ENABLE_GPU=OFF \
        -DCMAKE_INSTALL_PREFIX:PATH=/workspace/tritonbuild/identity/install \
        -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} \
        -DTRITON_COMMON_REPO_TAG:STRING=${TRITON_COMMON_REPO_TAG} \
        -DTRITON_CORE_REPO_TAG:STRING=${TRITON_CORE_REPO_TAG} \
        -DTRITON_THIRD_PARTY_REPO_TAG:STRING=${TRITON_THIRD_PARTY_REPO_TAG} \
        -DTRITON_BACKEND_REPO_TAG:STRING=${TRITON_BACKEND_REPO_TAG} .. && \
    make -j16 install

# L0_backend_python test require triton_shm_monitor
ARG TRITON_BOOST_URL="https://archives.boost.io/release/1.80.0/source/boost_1_80_0.tar.gz"
RUN cd tritonbuild/python && \
    rm -rf install build && mkdir build && cd build && \
    cmake -DCMAKE_INSTALL_PREFIX:PATH=/workspace/tritonbuild/python/install \
        -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} \
        -DTRITON_COMMON_REPO_TAG:STRING=${TRITON_COMMON_REPO_TAG} \
        -DTRITON_CORE_REPO_TAG:STRING=${TRITON_CORE_REPO_TAG} \
        -DTRITON_BOOST_URL:STRING=${TRITON_BOOST_URL} \
        -DTRITON_BACKEND_REPO_TAG:STRING=${TRITON_BACKEND_REPO_TAG} .. && \
    make -j16 triton-shm-monitor install

RUN cp tritonbuild/identity/install/backends/identity/libtriton_identity.so \
        qa/L0_lifecycle/. && \
    cp tritonbuild/python/install/backends/python/triton_shm_monitor*.so \
        qa/common/. && \
    mkdir -p qa/L0_perf_nomodel/custom_models/custom_zero_1_float32/1 && \
    mkdir -p qa/L0_perf_pyclients/custom_models/custom_zero_1_int32/1 && \
    mkdir -p qa/L0_infer_shm && \
    cp -r qa/L0_infer/. qa/L0_infer_shm && \
    mkdir -p qa/L0_infer_cudashm && \
    cp -r qa/L0_infer/. qa/L0_infer_cudashm && \
    mkdir -p qa/L0_infer_valgrind && \
    cp -r qa/L0_infer/. qa/L0_infer_valgrind && \
    mkdir -p qa/L0_trt_shape_tensors_shm && \
    cp -r qa/L0_trt_shape_tensors/. qa/L0_trt_shape_tensors_shm && \
    mkdir -p qa/L0_trt_shape_tensors_cudashm && \
    cp -r qa/L0_trt_shape_tensors/. qa/L0_trt_shape_tensors_cudashm && \
    mkdir -p qa/L0_batcher_shm && \
    cp -r qa/L0_batcher/. qa/L0_batcher_shm && \
    mkdir -p qa/L0_batcher_cudashm && \
    cp -r qa/L0_batcher/. qa/L0_batcher_cudashm && \
    mkdir -p qa/L0_batcher_valgrind && \
    cp -r qa/L0_batcher/. qa/L0_batcher_valgrind && \
    mkdir -p qa/L0_sequence_batcher_shm && \
    cp -r qa/L0_sequence_batcher/. qa/L0_sequence_batcher_shm && \
    mkdir -p qa/L0_sequence_batcher_cudashm && \
    cp -r qa/L0_sequence_batcher/. qa/L0_sequence_batcher_cudashm && \
    mkdir -p qa/L0_sequence_batcher_valgrind && \
    cp -r qa/L0_sequence_batcher/. qa/L0_sequence_batcher_valgrind && \
    mkdir -p qa/L0_perf_nomodel_shm && \
    cp -r qa/L0_perf_nomodel/. qa/L0_perf_nomodel_shm && \
    mkdir -p qa/L0_perf_nomodel_cudashm && \
    cp -r qa/L0_perf_nomodel/. qa/L0_perf_nomodel_cudashm

# L0_model_control_stress will not be present if gitlab tests are not available
RUN if [ -d qa/L0_model_control_stress ]; then \
        mkdir -p qa/L0_model_control_stress_valgrind && \
            cp -r qa/L0_model_control_stress/. qa/L0_model_control_stress_valgrind && \
            mkdir -p qa/L0_model_control_stress_valgrind_massif && \
            cp -r qa/L0_model_control_stress/. qa/L0_model_control_stress_valgrind_massif; \
    fi

RUN mkdir -p qa/L0_decoupled/models/repeat_int32/1 && \
    mkdir -p qa/L0_decoupled/models/square_int32/1 && \
    mkdir -p qa/L0_decoupled/models/identity_int32/1 && \
    mkdir -p qa/L0_decoupled/models/simple_repeat/1 && \
    mkdir -p qa/L0_decoupled/models/fan_repeat/1 && \
    mkdir -p qa/L0_decoupled/models/sequence_repeat/1 && \
    mkdir -p qa/L0_decoupled/models/repeat_square/1 && \
    mkdir -p qa/L0_decoupled/models/nested_square/1 && \
    mkdir -p qa/L0_grpc_state_cleanup/models/repeat_int32/1

RUN if [ "$IGPU_BUILD" == "0" ]; then \
        cp backends/repeat/libtriton_repeat.so qa/L0_model_config && \
        cp backends/repeat/libtriton_repeat.so qa/L0_decoupled/models/repeat_int32/1 && \
        cp backends/repeat/libtriton_repeat.so qa/L0_grpc_state_cleanup/models/repeat_int32/1/. && \
        cp backends/square/libtriton_square.so qa/L0_decoupled/models/square_int32/1; \
    fi

RUN cp -r qa/L0_decoupled/models qa/L0_decoupled/python_models/ && \
    cp /workspace/tritonbuild/python/examples/decoupled/repeat_model.py \
        qa/L0_decoupled/python_models/repeat_int32/1/. && \
    cp /workspace/tritonbuild/python/examples/decoupled/repeat_config.pbtxt \
        qa/L0_decoupled/python_models/repeat_int32/. && \
    cp /workspace/tritonbuild/python/examples/decoupled/square_model.py \
        qa/L0_decoupled/python_models/square_int32/1/. && \
    cp /workspace/tritonbuild/python/examples/decoupled/square_config.pbtxt \
        qa/L0_decoupled/python_models/square_int32/. && \
    cp /workspace/tritonbuild/python/examples/decoupled/repeat_model.py \
        qa/L0_memory/python_models/repeat_int32/1/model.py && \
    cp /workspace/tritonbuild/python/examples/decoupled/repeat_config.pbtxt \
        qa/L0_memory/python_models/repeat_int32/config.pbtxt

RUN mkdir -p qa/L0_decoupled_grpc_error && \
    cp -r qa/L0_decoupled/. qa/L0_decoupled_grpc_error

RUN mkdir -p qa/L0_grpc_error_state_cleanup && \
    cp -r qa/L0_grpc_state_cleanup/. qa/L0_grpc_error_state_cleanup

RUN mkdir -p qa/L0_repoagent_checksum/models/identity_int32/1 && \
    cp tritonbuild/identity/install/backends/identity/libtriton_identity.so \
        qa/L0_repoagent_checksum/models/identity_int32/1/.
RUN mkdir -p qa/L0_passive_instance/models/distributed_int32_int32_int32/1 && \
    cp tritonbuild/tritonserver/backends/distributed_addsub/libtriton_distributed_addsub.so \
        qa/L0_passive_instance/models/distributed_int32_int32_int32/1/.

############################################################################
## Copy artifacts from sdk container
############################################################################
FROM ${SDK_IMAGE} AS sdk

ARG TARGETPLATFORM
WORKDIR /workspace
COPY --from=cibase /workspace/qa/ qa/
RUN mkdir -p qa/clients && mkdir -p qa/pkgs && \
    cp -a install/bin/* qa/clients/. && \
    cp install/lib/libgrpcclient.so qa/clients/. && \
    cp install/lib/libhttpclient.so qa/clients/. && \
    cp install/python/*.py qa/clients/. && \
    cp install/python/triton*.whl qa/pkgs/. && \
    cp install/java/examples/*.jar qa/clients/.
RUN cp client/src/grpc_generated/go/*.go qa/L0_simple_go_client/. && \
    cp client/src/grpc_generated/javascript/*.js qa/L0_simple_nodejs_client/. && \
    cp client/src/grpc_generated/javascript/*.json qa/L0_simple_nodejs_client/. && \
    cp -r client/src/grpc_generated/java qa/L0_client_java/.

############################################################################
## Create CI enabled image
############################################################################
FROM $BASE_IMAGE

ARG TARGETPLATFORM

# Ensure apt-get won't prompt for selecting options
ENV DEBIAN_FRONTEND=noninteractive

# install platform specific packages
RUN if grep -qE '^VERSION_ID="(18\.04|20\.04|22\.04|24\.04)' /etc/os-release; then \
        apt-get update && \
        apt-get install -y --no-install-recommends \
                libpng-dev; \
    else \
        echo "Ubuntu version must be either 18.04, 20.04, 22.04 or 24.04" && \
        exit 1; \
    fi

# CI/QA for memcheck requires valgrind
# libarchive-dev is required by Python backend
RUN apt-get update && apt-get install -y --no-install-recommends \
                              curl \
                              gdb \
                              libopencv-dev \
                              libarchive-dev \
                              libopencv-core-dev \
                              libzmq3-dev \
                              openjdk-11-jdk \
                              nginx \
                              npm \
                              protobuf-compiler \
                              python3-dev \
                              python3-pip \
                              python3-protobuf \
                              python3-wheel \
                              python3-setuptools \
                              swig \
                              valgrind && \
    rm -rf /var/lib/apt/lists/*

# CI/QA expects "python" executable (not python3).
RUN rm -f /usr/bin/python && \
    ln -s /usr/bin/python3 /usr/bin/python

RUN pip3 install --upgrade "numpy<2" pillow attrdict future "grpcio<1.68" requests gsutil \
                           "awscli<=1.36.40" six "grpcio-channelz<1.68" prettytable virtualenv \
                           check-jsonschema

# go needed for example go client test.
RUN if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
      wget https://golang.org/dl/go1.22.3.linux-arm64.tar.gz && \
      rm -rf /usr/local/go && tar -C /usr/local -xzf go1.22.3.linux-arm64.tar.gz && \
      rm -f go1.22.3.linux-arm64.tar.gz; \
    else \
      wget https://golang.org/dl/go1.22.3.linux-amd64.tar.gz && \
      rm -rf /usr/local/go && tar -C /usr/local -xzf go1.22.3.linux-amd64.tar.gz && \
      rm -f go1.22.3.linux-amd64.tar.gz; \
    fi
ENV GOPATH /root/go
ENV PATH $PATH:/usr/local/go/bin:$GOPATH/bin
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@latest && \
    go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
# CI expects tests in /opt/tritonserver/qa. The triton-server (1000)
# user should own all artifacts in case CI is run using triton-server
# user.
WORKDIR /opt/tritonserver
COPY --chown=1000:1000 --from=sdk /workspace/qa/ qa/

# Remove CI tests that are meant to run only on build image and
# install the tritonserver/triton python client APIs.
RUN rm -fr qa/L0_copyrights qa/L0_build_variants && \
    find qa/pkgs/ -maxdepth 1 -type f -name \
    "tritonclient-*linux*.whl" | xargs printf -- '%s[all]' | \
    xargs pip3 install --upgrade

ENV LD_LIBRARY_PATH /opt/tritonserver/qa/clients:${LD_LIBRARY_PATH}

# DLIS-3631: Needed to run Perf Analyzer CI tests correctly
ENV LD_LIBRARY_PATH /opt/hpcx/ompi/lib:${LD_LIBRARY_PATH}

# Required for PyTorch to pickup the correct HPCX libraries
ENV LD_LIBRARY_PATH /opt/hpcx/ucc/lib/:/opt/hpcx/ucx/lib/:${LD_LIBRARY_PATH}


================================================
FILE: Dockerfile.sdk
================================================
# Copyright 2019-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#
# Multistage build.
#

# Base image on the minimum Triton container
ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:26.02-py3-min

ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo
ARG TRITON_REPO_ORGANIZATION=http://github.com/triton-inference-server
ARG TRITON_COMMON_REPO_TAG=main
ARG TRITON_CORE_REPO_TAG=main
ARG TRITON_CLIENT_REPO_TAG=main
ARG TRITON_THIRD_PARTY_REPO_TAG=main
ARG TRITON_ENABLE_GPU=ON
ARG JAVA_BINDINGS_MAVEN_VERSION=3.8.4
ARG JAVA_BINDINGS_JAVACPP_PRESETS_TAG=1.5.8
# DCGM version to install for Model Analyzer
ARG DCGM_VERSION=4.5.2-1

ARG NVIDIA_TRITON_SERVER_SDK_VERSION=unknown
ARG NVIDIA_BUILD_ID=unknown

############################################################################
##  Build image
############################################################################

FROM ${BASE_IMAGE} AS sdk_build

# Ensure apt-get won't prompt for selecting options
ENV DEBIAN_FRONTEND=noninteractive
ENV PIP_BREAK_SYSTEM_PACKAGES=1 CMAKE_POLICY_VERSION_MINIMUM=3.5

RUN apt-get update && \
    apt-get install -y --no-install-recommends \
            autoconf \
            automake \
            build-essential \
            ca-certificates \
            curl \
            git \
            gperf \
            libb64-dev \
            libgoogle-perftools-dev \
            libopencv-core-dev \
            libopencv-dev \
            libssl-dev \
            libtool \
            maven \
            openjdk-11-jdk \
            pkg-config \
            python3 \
            python3-dev \
            python3-pdfkit \
            python3-pip \
            python3-setuptools \
            python3-wheel \
            rapidjson-dev \
            software-properties-common \
            vim \
            wget && \
    pip3 install --upgrade "grpcio-tools<1.68" cmake==4.0.3

ENV CMAKE_POLICY_MINIMUM_REQUIRED=3.5

# Build expects "python" executable (not python3).
RUN rm -f /usr/bin/python && \
    ln -s /usr/bin/python3 /usr/bin/python

# Build the client library and examples
ARG TRITON_REPO_ORGANIZATION
ARG TRITON_CLIENT_REPO_SUBDIR
ARG TRITON_COMMON_REPO_TAG
ARG TRITON_CORE_REPO_TAG
ARG TRITON_CLIENT_REPO_TAG
ARG TRITON_THIRD_PARTY_REPO_TAG
ARG TRITON_ENABLE_GPU
ARG JAVA_BINDINGS_MAVEN_VERSION
ARG JAVA_BINDINGS_JAVACPP_PRESETS_TAG
ARG TARGETPLATFORM

WORKDIR /workspace
COPY TRITON_VERSION .
COPY ${TRITON_CLIENT_REPO_SUBDIR} client

WORKDIR /workspace/client_build
RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
          -DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \
          -DTRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION} \
          -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
          -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
          -DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \
          -DTRITON_ENABLE_PERF_ANALYZER=OFF \
          -DTRITON_ENABLE_CC_HTTP=ON -DTRITON_ENABLE_CC_GRPC=ON \
          -DTRITON_ENABLE_PYTHON_HTTP=ON -DTRITON_ENABLE_PYTHON_GRPC=ON \
          -DTRITON_ENABLE_JAVA_HTTP=ON \
          -DTRITON_ENABLE_EXAMPLES=ON -DTRITON_ENABLE_TESTS=ON \
          -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} /workspace/client
RUN cmake --build . -v --parallel --target cc-clients java-clients python-clients

# Install Java API Bindings
RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
        source /workspace/client/src/java-api-bindings/scripts/install_dependencies_and_build.sh \
        --maven-version ${JAVA_BINDINGS_MAVEN_VERSION} \
        --core-tag ${TRITON_CORE_REPO_TAG} \
        --javacpp-tag ${JAVA_BINDINGS_JAVACPP_PRESETS_TAG} \
        --jar-install-path /workspace/install/java-api-bindings; \
    fi

############################################################################
## Create sdk container
############################################################################
FROM ${BASE_IMAGE}

# Ensure apt-get won't prompt for selecting options
ENV DEBIAN_FRONTEND=noninteractive
ENV PIP_BREAK_SYSTEM_PACKAGES=1

ARG DCGM_VERSION
ARG TRITON_REPO_ORGANIZATION
ARG TRITON_CORE_REPO_TAG
ARG TARGETPLATFORM
ARG TRITON_ENABLE_GPU

RUN apt-get update && \
    apt-get install -y --no-install-recommends \
            curl \
            default-jdk \
            git \
            gperf \
            libb64-dev \
            libgoogle-perftools-dev \
            libopencv-core-dev \
            libopencv-dev \
            libssl-dev \
            libtool \
            maven \
            perl \
            python3 \
            python3-dev \
            python3-pdfkit \
            python3-pip \
            python3-setuptools \
            python3-wheel \
            vim \
            wget && \
    pip3 install "grpcio<1.68" "grpcio-tools<1.68"

WORKDIR /workspace
COPY TRITON_VERSION .
COPY NVIDIA_Deep_Learning_Container_License.pdf .
COPY --from=sdk_build /workspace/client/ client/
COPY --from=sdk_build /workspace/install/ install/
RUN cd install && \
    export VERSION=`cat /workspace/TRITON_VERSION` && \
    tar zcf /workspace/v$VERSION.clients.tar.gz *

# For CI testing need to copy over L0_sdk test and L0_client_build_variants test.
RUN mkdir qa
COPY qa/L0_sdk qa/L0_sdk
COPY qa/L0_client_build_variants qa/L0_client_build_variants

# Create a directory for all the python client tests to enable unit testing
RUN mkdir -p qa/python_client_unit_tests/
COPY --from=sdk_build /workspace/client/src/python/library/tests/* qa/python_client_unit_tests/

# Install an image needed by the quickstart and other documentation.
COPY qa/images/mug.jpg images/mug.jpg

# Install the dependencies needed to run the client examples. These
# are not needed for building but including them allows this image to
# be used to run the client examples.
RUN pip3 install --upgrade "numpy<2" pillow attrdict && \
    find install/python/ -maxdepth 1 -type f -name \
         "tritonclient-*linux*.whl" | xargs printf -- '%s[all]' | \
    xargs pip3 install --upgrade

# Install GenAI-Perf
RUN pip3 install genai-perf

# Install DCGM
RUN if [ "$TRITON_ENABLE_GPU" = "ON" ]; then \
        [ "$(uname -m)" != "x86_64" ] && arch="sbsa" || arch="x86_64" && \
        curl -o /tmp/cuda-keyring.deb \
        https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/$arch/cuda-keyring_1.1-1_all.deb \
        && apt install /tmp/cuda-keyring.deb && rm /tmp/cuda-keyring.deb && \
        apt update && \
        apt install --yes --no-install-recommends \
               datacenter-gpu-manager-4-core=1:${DCGM_VERSION} \
               datacenter-gpu-manager-4-dev=1:${DCGM_VERSION}; \
    fi

# Build expects "python" executable (not python3).
RUN rm -f /usr/bin/python && \
    ln -s /usr/bin/python3 /usr/bin/python

# Install Model Analyzer
ARG TRITON_MODEL_ANALYZER_REPO_TAG
ARG TRITON_MODEL_ANALYZER_REPO="${TRITON_REPO_ORGANIZATION}/model_analyzer@${TRITON_MODEL_ANALYZER_REPO_TAG}"
RUN pip3 install "git+${TRITON_MODEL_ANALYZER_REPO}"

# Entrypoint Banner
ENV NVIDIA_PRODUCT_NAME="Triton Server SDK"
COPY docker/entrypoint.d/ /opt/nvidia/entrypoint.d/
RUN sed 's/Server/Server SDK/' /opt/nvidia/entrypoint.d/10-banner.txt | \
    sed 's/^===/=======/' > /opt/nvidia/entrypoint.d/10-banner.new && \
    mv /opt/nvidia/entrypoint.d/10-banner.new /opt/nvidia/entrypoint.d/10-banner.txt

ARG NVIDIA_TRITON_SERVER_SDK_VERSION
ARG NVIDIA_BUILD_ID
ENV NVIDIA_TRITON_SERVER_SDK_VERSION=${NVIDIA_TRITON_SERVER_SDK_VERSION}
ENV NVIDIA_BUILD_ID=${NVIDIA_BUILD_ID}

ENV PATH=/workspace/install/bin:${PATH}
ENV LD_LIBRARY_PATH=/workspace/install/lib:${LD_LIBRARY_PATH}

# DLIS-3631: Needed to run Perf Analyzer CI tests correctly
ENV LD_LIBRARY_PATH=/opt/hpcx/ompi/lib:${LD_LIBRARY_PATH}

# Set TCMALLOC_RELEASE_RATE for users setting LD_PRELOAD with tcmalloc
ENV TCMALLOC_RELEASE_RATE=200


================================================
FILE: Dockerfile.win10.min
================================================
# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# Windows min container for Triton build

ARG BASE_IMAGE=mcr.microsoft.com/windows:10.0.19042.1889

FROM ${BASE_IMAGE} as dependency_base

RUN powershell.exe Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope LocalMachine
RUN powershell.exe [Net.ServicePointManager]::Expect100Continue=$true;[Net.ServicePointManager]::SecurityProtocol=[Net.SecurityProtocolType]::Tls,[Net.SecurityProtocolType]::Tls11,[Net.SecurityProtocolType]::Tls12,[Net.SecurityProtocolType]::Ssl3;Invoke-Expression( New-Object System.Net.WebClient ).DownloadString('https://chocolatey.org/install.ps1')
RUN choco install unzip -y

#
# Installing TensorRT
#
ARG TENSORRT_VERSION=10.8.0.43
ARG TENSORRT_ZIP="TensorRT-${TENSORRT_VERSION}.Windows.win10.cuda-12.8.zip"
ARG TENSORRT_SOURCE=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.8.0/zip/TensorRT-${TENSORRT_VERSION}.Windows.win10.cuda-12.8.zip
# COPY ${TENSORRT_ZIP} /tmp/${TENSORRT_ZIP}
ADD ${TENSORRT_SOURCE} /tmp/${TENSORRT_ZIP}
RUN unzip /tmp/%TENSORRT_ZIP%
RUN move TensorRT-* TensorRT

LABEL TENSORRT_VERSION="${TENSORRT_VERSION}"


#
# Installing cuDNN
#
ARG CUDNN_VERSION=9.7.1.26
ARG CUDNN_ZIP=cudnn-windows-x86_64-${CUDNN_VERSION}_cuda12-archive.zip
ARG CUDNN_SOURCE=https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.7.1.26_cuda12-archive.zip
ADD ${CUDNN_SOURCE} /tmp/${CUDNN_ZIP}
RUN unzip /tmp/%CUDNN_ZIP%
RUN move cudnn-* cudnn

LABEL CUDNN_VERSION="${CUDNN_VERSION}"


FROM ${BASE_IMAGE} as build_base

SHELL ["cmd", "/S", "/C"]

RUN mkdir c:\tmp
WORKDIR /tmp

RUN powershell.exe Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope LocalMachine
RUN powershell.exe [Net.ServicePointManager]::Expect100Continue=$true;[Net.ServicePointManager]::SecurityProtocol=[Net.SecurityProtocolType]::Tls,[Net.SecurityProtocolType]::Tls11,[Net.SecurityProtocolType]::Tls12,[Net.SecurityProtocolType]::Ssl3;Invoke-Expression( New-Object System.Net.WebClient ).DownloadString('https://chocolatey.org/install.ps1')
RUN choco install git docker unzip -y

#
# Installing python
#
ARG PYTHON_VERSION=3.12.3
ARG PYTHON_SOURCE=https://www.python.org/ftp/python/${PYTHON_VERSION}/python-${PYTHON_VERSION}-amd64.exe
ADD ${PYTHON_SOURCE} python-${PYTHON_VERSION}-amd64.exe
RUN python-%PYTHON_VERSION%-amd64.exe /quiet InstallAllUsers=1 PrependPath=1 Include_doc=0 TargetDir="C:\python%PYTHON_VERSION%"
RUN mklink "C:\python%PYTHON_VERSION%\python3.exe" "C:\python%PYTHON_VERSION%\python.exe"
RUN pip install --upgrade wheel setuptools docker

LABEL PYTHON_VERSION=${PYTHON_VERSION}

#
# Installing CMake
#
ARG CMAKE_VERSION=4.0.3
RUN pip install cmake==%CMAKE_VERSION%

ENV CMAKE_POLICY_VERSION_MINIMUM=3.5

ENV CMAKE_TOOLCHAIN_FILE /vcpkg/scripts/buildsystems/vcpkg.cmake
ENV VCPKG_TARGET_TRIPLET x64-windows

LABEL CMAKE_VERSION=${CMAKE_VERSION}

# Be aware that pip can interact badly with VS cmd shell so need to pip install before
# vsdevcmd.bat (see https://bugs.python.org/issue38989)
#
# Installing Visual Studio BuildTools: VS17 2022
#
# Download collect.exe in case of an install failure.
ADD https://aka.ms/vscollect.exe "C:\tmp\collect.exe"

# Use the latest release channel. For more control, specify the location of an internal layout.
# Download the Build Tools bootstrapper.
# ARG BUILD_TOOLS_SOURCE=https://aka.ms/vs/17/release/vs_buildtools.exe

ARG BUILDTOOLS_VERSION=17.12.35506.116
ARG BUILD_TOOLS_SOURCE=https://download.visualstudio.microsoft.com/download/pr/5536698c-711c-4834-876f-2817d31a2ef2/58894fc272e86d3c3a6d85bf3a1df1e5a0685be8b9ab65d9f3cc5c2a8c6921cc/vs_BuildTools.exe

ADD ${BUILD_TOOLS_SOURCE} vs_buildtools.exe
# Install Build Tools with the Microsoft.VisualStudio.Workload.VCTools workload, including recommended.
ARG VS_INSTALL_PATH_WP="C:\BuildTools"
RUN vs_buildtools.exe --quiet --wait --norestart --nocache install \
      --installPath %VS_INSTALL_PATH_WP% \
      --add Microsoft.VisualStudio.Workload.VCTools \
      --includeRecommended \
      --locale "En-us"

LABEL BUILDTOOLS_VERSION=${BUILDTOOLS_VERSION}

WORKDIR /

#
# Installing Vcpkg
#
ARG VCPGK_VERSION=2024.03.19
RUN git clone --single-branch --depth=1 -b %VCPGK_VERSION% https://github.com/microsoft/vcpkg.git
WORKDIR /vcpkg
RUN bootstrap-vcpkg.bat
RUN vcpkg.exe update
RUN vcpkg.exe install \
      boost-interprocess:x64-windows \
      boost-stacktrace:x64-windows \
      b64:x64-windows \
      openssl-windows:x64-windows \
      openssl:x64-windows \
      pthread:x64-windows \
      rapidjson:x64-windows \
      zlib:x64-windows
RUN vcpkg.exe integrate install

LABEL VCPGK_VERSION=${VCPGK_VERSION}

WORKDIR /

#
# Installing CUDA
#
ARG CUDA_MAJOR=12
ARG CUDA_MINOR=8
ARG CUDA_PATCH=0
ARG CUDA_VERSION=${CUDA_MAJOR}.${CUDA_MINOR}.${CUDA_PATCH}
ARG CUDA_PACKAGES="nvcc_${CUDA_MAJOR}.${CUDA_MINOR} \
                   cudart_${CUDA_MAJOR}.${CUDA_MINOR} \
                   nvml_dev_${CUDA_MAJOR}.${CUDA_MINOR} \
                   nvrtc_${CUDA_MAJOR}.${CUDA_MINOR} nvrtc_dev_${CUDA_MAJOR}.${CUDA_MINOR} \
                   cublas_${CUDA_MAJOR}.${CUDA_MINOR} cublas_dev_${CUDA_MAJOR}.${CUDA_MINOR} \
                   cufft_${CUDA_MAJOR}.${CUDA_MINOR} cufft_dev_${CUDA_MAJOR}.${CUDA_MINOR} \
                   curand_${CUDA_MAJOR}.${CUDA_MINOR} curand_dev_${CUDA_MAJOR}.${CUDA_MINOR} \
                   cusolver_${CUDA_MAJOR}.${CUDA_MINOR} cusolver_dev_${CUDA_MAJOR}.${CUDA_MINOR} \
                   cusparse_${CUDA_MAJOR}.${CUDA_MINOR} cusparse_dev_${CUDA_MAJOR}.${CUDA_MINOR} \
                   cupti_${CUDA_MAJOR}.${CUDA_MINOR} \
                   thrust_${CUDA_MAJOR}.${CUDA_MINOR} \
                   visual_studio_integration_${CUDA_MAJOR}.${CUDA_MINOR}"
ARG CUDA_INSTALL_ROOT_WP="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v${CUDA_MAJOR}.${CUDA_MINOR}"

ARG CUDA_SOURCE=https://developer.download.nvidia.com/compute/cuda/${CUDA_VERSION}/network_installers/cuda_${CUDA_VERSION}_windows_network.exe
ADD ${CUDA_SOURCE} cuda_${CUDA_VERSION}_windows_network.exe

RUN cuda_%CUDA_VERSION%_windows_network.exe -s %CUDA_PACKAGES%
# Copy the CUDA visualstudio integration from where it was installed
# into the appropriate place in BuildTools
RUN copy "%CUDA_INSTALL_ROOT_WP%\extras\visual_studio_integration\MSBuildExtensions\*" "%VS_INSTALL_PATH_WP%\MSBuild\Microsoft\VC\v170\BuildCustomizations"

RUN setx PATH "%CUDA_INSTALL_ROOT_WP%\bin;%PATH%"

ENV CUDA_VERSION=${CUDA_VERSION}
LABEL CUDA_VERSION="${CUDA_VERSION}"

ARG CUDNN_VERSION=9.7.1.26
ENV CUDNN_VERSION ${CUDNN_VERSION}
COPY --from=dependency_base /cudnn /cudnn
RUN copy cudnn\bin\cudnn*.dll "%CUDA_INSTALL_ROOT_WP%\bin\."
RUN copy cudnn\lib\x64\cudnn*.lib "%CUDA_INSTALL_ROOT_WP%\lib\x64\."
RUN copy cudnn\include\cudnn*.h "%CUDA_INSTALL_ROOT_WP%\include\."
LABEL CUDNN_VERSION="${CUDNN_VERSION}"

ARG TENSORRT_VERSION=10.8.0.43
ENV TRT_VERSION ${TENSORRT_VERSION}
COPY --from=dependency_base /TensorRT /TensorRT
RUN setx PATH "c:\TensorRT\lib;%PATH%"
LABEL TENSORRT_VERSION="${TENSORRT_VERSION}"

# It is important that the entrypoint initialize VisualStudio
# environment otherwise the build will fail. Also set
# CMAKE_TOOLCHAIN_FILE and VCPKG_TARGET_TRIPLET so
# that cmake can find the packages installed by vcpkg.
ENTRYPOINT C:\BuildTools\VC\Auxiliary\Build\vcvars64.bat &&


================================================
FILE: LICENSE
================================================
Copyright (c) 2018-2026, NVIDIA CORPORATION. All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
 * Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
 * Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.
 * Neither the name of NVIDIA CORPORATION nor the names of its
   contributors may be used to endorse or promote products derived
   from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: README.md
================================================
<!--
# Copyright 2018-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->
[![License](https://img.shields.io/badge/License-BSD3-lightgrey.svg)](https://opensource.org/licenses/BSD-3-Clause)

>[!WARNING]
>You are currently on the `main` branch which tracks under-development progress
>towards the next release. The current release is version [2.66.0](https://github.com/triton-inference-server/server/releases/latest)
>and corresponds to the 26.02 container release on NVIDIA GPU Cloud (NGC).

# Triton Inference Server

Triton Inference Server is an open source inference serving software that
streamlines AI inferencing. Triton enables teams to deploy any AI model from
multiple deep learning and machine learning frameworks, including TensorRT,
PyTorch, ONNX, OpenVINO, Python, RAPIDS FIL, and more. Triton
Inference Server supports inference across cloud, data center, edge and embedded
devices on NVIDIA GPUs, x86 and ARM CPU, or AWS Inferentia. Triton Inference
Server delivers optimized performance for many query types, including real time,
batched, ensembles and audio/video streaming. Triton inference Server is part of
[NVIDIA AI Enterprise](https://www.nvidia.com/en-us/data-center/products/ai-enterprise/),
a software platform that accelerates the data science pipeline and streamlines
the development and deployment of production AI.

Major features include:

- [Supports multiple deep learning
  frameworks](https://github.com/triton-inference-server/backend#where-can-i-find-all-the-backends-that-are-available-for-triton)
- [Supports multiple machine learning
  frameworks](https://github.com/triton-inference-server/fil_backend)
- [Concurrent model
  execution](docs/user_guide/architecture.md#concurrent-model-execution)
- [Dynamic batching](docs/user_guide/batcher.md#dynamic-batcher)
- [Sequence batching](docs/user_guide/batcher.md#sequence-batcher) and
  [implicit state management](docs/user_guide/architecture.md#implicit-state-management)
  for stateful models
- Provides [Backend API](https://github.com/triton-inference-server/backend) that
  allows adding custom backends and pre/post processing operations
- Supports writing custom backends in python, a.k.a.
  [Python-based backends.](https://github.com/triton-inference-server/backend/blob/main/docs/python_based_backends.md#python-based-backends)
- Model pipelines using
  [Ensembling](docs/user_guide/architecture.md#ensemble-models) or [Business
  Logic Scripting
  (BLS)](https://github.com/triton-inference-server/python_backend#business-logic-scripting)
- [HTTP/REST and GRPC inference
  protocols](docs/customization_guide/inference_protocols.md) based on the community
  developed [KServe
  protocol](https://github.com/kserve/kserve/tree/master/docs/predict-api/v2)
- A [C API](docs/customization_guide/inprocess_c_api.md) and
  [Java API](docs/customization_guide/inprocess_java_api.md)
  allow Triton to link directly into your application for edge and other in-process use cases
- [Metrics](docs/user_guide/metrics.md) indicating GPU utilization, server
  throughput, server latency, and more

**New to Triton Inference Server?** Make use of
[these tutorials](https://github.com/triton-inference-server/tutorials)
to begin your Triton journey!

Join the [Triton and TensorRT community](https://www.nvidia.com/en-us/deep-learning-ai/triton-tensorrt-newsletter/) and
stay current on the latest product updates, bug fixes, content, best practices,
and more.  Need enterprise support?  NVIDIA global support is available for Triton
Inference Server with the
[NVIDIA AI Enterprise software suite](https://www.nvidia.com/en-us/data-center/products/ai-enterprise/).

## Serve a Model in 3 Easy Steps

```bash
# Step 1: Create the example model repository
git clone -b r26.02 https://github.com/triton-inference-server/server.git
cd server/docs/examples
./fetch_models.sh

# Step 2: Launch triton from the NGC Triton container
docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:26.02-py3 tritonserver --model-repository=/models --model-control-mode explicit --load-model densenet_onnx

# Step 3: Sending an Inference Request
# In a separate console, launch the image_client example from the NGC Triton SDK container
docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:26.02-py3-sdk /workspace/install/bin/image_client -m densenet_onnx -c 3 -s INCEPTION /workspace/images/mug.jpg

# Inference should return the following
Image '/workspace/images/mug.jpg':
    15.346230 (504) = COFFEE MUG
    13.224326 (968) = CUP
    10.422965 (505) = COFFEEPOT
```
Please read the [QuickStart](docs/getting_started/quickstart.md) guide for additional information
regarding this example. The quickstart guide also contains an example of how to launch Triton on [CPU-only systems](docs/getting_started/quickstart.md#run-on-cpu-only-system). New to Triton and wondering where to get started? Watch the [Getting Started video](https://youtu.be/NQDtfSi5QF4).

## Examples and Tutorials

Check out [NVIDIA LaunchPad](https://www.nvidia.com/en-us/data-center/products/ai-enterprise-suite/trial/)
for free access to a set of hands-on labs with Triton Inference Server hosted on
NVIDIA infrastructure.

Specific end-to-end examples for popular models, such as ResNet, BERT, and DLRM
are located in the
[NVIDIA Deep Learning Examples](https://github.com/NVIDIA/DeepLearningExamples)
page on GitHub. The
[NVIDIA Developer Zone](https://developer.nvidia.com/nvidia-triton-inference-server)
contains additional documentation, presentations, and examples.

## Documentation

### Build and Deploy

The recommended way to build and use Triton Inference Server is with Docker
images.

- [Install Triton Inference Server with Docker containers](docs/customization_guide/build.md#building-with-docker) (*Recommended*)
- [Install Triton Inference Server without Docker containers](docs/customization_guide/build.md#building-without-docker)
- [Build a custom Triton Inference Server Docker container](docs/customization_guide/compose.md)
- [Build Triton Inference Server from source](docs/customization_guide/build.md#building-on-unsupported-platforms)
- [Build Triton Inference Server for Windows 10](docs/customization_guide/build.md#building-for-windows-10)
- Examples for deploying Triton Inference Server with Kubernetes and Helm on [GCP](deploy/gcp/README.md),
  [AWS](deploy/aws/README.md), and [NVIDIA FleetCommand](deploy/fleetcommand/README.md)
- [Secure Deployment Considerations](docs/customization_guide/deploy.md)

### Using Triton

#### Preparing Models for Triton Inference Server

The first step in using Triton to serve your models is to place one or
more models into a [model repository](docs/user_guide/model_repository.md). Depending on
the type of the model and on what Triton capabilities you want to enable for
the model, you may need to create a [model
configuration](docs/user_guide/model_configuration.md) for the model.

- [Add custom operations to Triton if needed by your model](docs/user_guide/custom_operations.md)
- Enable model pipelining with [Model Ensemble](docs/user_guide/architecture.md#ensemble-models)
  and [Business Logic Scripting (BLS)](https://github.com/triton-inference-server/python_backend#business-logic-scripting)
- Optimize your models setting [scheduling and batching](docs/user_guide/architecture.md#models-and-schedulers)
  parameters and [model instances](docs/user_guide/model_configuration.md#instance-groups).
- Use the [Model Analyzer tool](https://github.com/triton-inference-server/model_analyzer)
  to help optimize your model configuration with profiling
- Learn how to [explicitly manage what models are available by loading and
  unloading models](docs/user_guide/model_management.md)

#### Configure and Use Triton Inference Server

- Read the [Quick Start Guide](docs/getting_started/quickstart.md) to run Triton Inference
  Server on both GPU and CPU
- Triton supports multiple execution engines, called
  [backends](https://github.com/triton-inference-server/backend#where-can-i-find-all-the-backends-that-are-available-for-triton), including
  [TensorRT](https://github.com/triton-inference-server/tensorrt_backend),
  [PyTorch](https://github.com/triton-inference-server/pytorch_backend),
  [ONNX](https://github.com/triton-inference-server/onnxruntime_backend),
  [OpenVINO](https://github.com/triton-inference-server/openvino_backend),
  [Python](https://github.com/triton-inference-server/python_backend), and more
- Not all the above backends are supported on every platform supported by Triton.
  Look at the
  [Backend-Platform Support Matrix](https://github.com/triton-inference-server/backend/blob/main/docs/backend_platform_support_matrix.md)
  to learn which backends are supported on your target platform.
- Learn how to [optimize performance](docs/user_guide/optimization.md) using the
  [Performance Analyzer](https://github.com/triton-inference-server/perf_analyzer/blob/main/README.md)
  and
  [Model Analyzer](https://github.com/triton-inference-server/model_analyzer)
- Learn how to [manage loading and unloading models](docs/user_guide/model_management.md) in
  Triton
- Send requests directly to Triton with the [HTTP/REST JSON-based
  or gRPC protocols](docs/customization_guide/inference_protocols.md#httprest-and-grpc-protocols)

#### Client Support and Examples

A Triton *client* application sends inference and other requests to Triton. The
[Python and C++ client libraries](https://github.com/triton-inference-server/client)
provide APIs to simplify this communication.

- Review client examples for [C++](https://github.com/triton-inference-server/client/blob/main/src/c%2B%2B/examples),
  [Python](https://github.com/triton-inference-server/client/blob/main/src/python/examples),
  and [Java](https://github.com/triton-inference-server/client/blob/main/src/java/src/main/java/triton/client/examples)
- Configure [HTTP](https://github.com/triton-inference-server/client#http-options)
  and [gRPC](https://github.com/triton-inference-server/client#grpc-options)
  client options
- Send input data (e.g. a jpeg image) directly to Triton in the [body of an HTTP
  request without any additional metadata](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_binary_data.md#raw-binary-request)

### Extend Triton

[Triton Inference Server's architecture](docs/user_guide/architecture.md) is specifically
designed for modularity and flexibility

- [Customize Triton Inference Server container](docs/customization_guide/compose.md) for your use case
- [Create custom backends](https://github.com/triton-inference-server/backend)
  in either [C/C++](https://github.com/triton-inference-server/backend/blob/main/README.md#triton-backend-api)
  or [Python](https://github.com/triton-inference-server/python_backend)
- Create [decoupled backends and models](docs/user_guide/decoupled_models.md) that can send
  multiple responses for a request or not send any responses for a request
- Use a [Triton repository agent](docs/customization_guide/repository_agents.md) to add functionality
  that operates when a model is loaded and unloaded, such as authentication,
  decryption, or conversion
- Deploy Triton on [Jetson and JetPack](docs/user_guide/jetson.md)
- [Use Triton on AWS
   Inferentia](https://github.com/triton-inference-server/python_backend/tree/main/inferentia)

### Additional Documentation

- [FAQ](docs/user_guide/faq.md)
- [User Guide](docs/README.md#user-guide)
- [Customization Guide](docs/README.md#customization-guide)
- [Release Notes](https://docs.nvidia.com/deeplearning/triton-inference-server/release-notes/index.html)
- [GPU, Driver, and CUDA Support
Matrix](https://docs.nvidia.com/deeplearning/dgx/support-matrix/index.html)

## Contributing

Contributions to Triton Inference Server are more than welcome. To
contribute please review the [contribution
guidelines](CONTRIBUTING.md). If you have a backend, client,
example or similar contribution that is not modifying the core of
Triton, then you should file a PR in the [contrib
repo](https://github.com/triton-inference-server/contrib).

## Reporting problems, asking questions

We appreciate any feedback, questions or bug reporting regarding this project.
When posting [issues in GitHub](https://github.com/triton-inference-server/server/issues),
follow the process outlined in the [Stack Overflow document](https://stackoverflow.com/help/mcve).
Ensure posted examples are:
- minimal – use as little code as possible that still produces the
  same problem
- complete – provide all parts needed to reproduce the problem. Check
  if you can strip external dependencies and still show the problem. The
  less time we spend on reproducing problems the more time we have to
  fix it
- verifiable – test the code you're about to provide to make sure it
  reproduces the problem. Remove all other problems that are not
  related to your request/question.

For issues, please use the provided bug report and feature request templates.

For questions, we recommend posting in our community
[GitHub Discussions.](https://github.com/triton-inference-server/server/discussions)

## For more information

Please refer to the [NVIDIA Developer Triton page](https://developer.nvidia.com/nvidia-triton-inference-server)
for more information.


================================================
FILE: SECURITY.md
================================================
<!--
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Report a Security Vulnerability

To report a potential security vulnerability in any NVIDIA product, please use either:
* This web form: [Security Vulnerability Submission Form](https://www.nvidia.com/object/submit-security-vulnerability.html), or
* Send email to: [NVIDIA PSIRT](mailto:psirt@nvidia.com)

**OEM Partners should contact their NVIDIA Customer Program Manager**

If reporting a potential vulnerability via email, please encrypt it using NVIDIA’s public PGP key ([see PGP Key page](https://www.nvidia.com/en-us/security/pgp-key/)) and include the following information:
1. Product/Driver name and version/branch that contains the vulnerability
2. Type of vulnerability (code execution, denial of service, buffer overflow, etc.)
3. Instructions to reproduce the vulnerability
4. Proof-of-concept or exploit code
5. Potential impact of the vulnerability, including how an attacker could exploit the vulnerability

See https://www.nvidia.com/en-us/security/ for past NVIDIA Security Bulletins and Notices.


================================================
FILE: TRITON_VERSION
================================================
2.67.0dev


================================================
FILE: build.py
================================================
#!/usr/bin/env python3
# Copyright 2020-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import importlib.util
import multiprocessing
import os
import os.path
import pathlib
import platform
import stat
import subprocess
import sys
from inspect import getsourcefile

import distro
import requests

#
# Build Triton Inference Server.
#

# By default build.py builds the Triton Docker image, but can also be
# used to build without Docker.  See docs/build.md and --help for more
# information.
#
# The TRITON_VERSION file indicates the Triton version and
# DEFAULT_TRITON_VERSION_MAP is used to determine the corresponding container
# version and upstream container version (upstream containers are
# dependencies required by Triton). These versions may be overridden.

# Map from Triton version to corresponding container and component versions.
#
#   triton version ->
#     (triton container version,
#      upstream container version,
#      ORT version,
#      ORT OpenVINO version (use None to disable OpenVINO in ORT),
#      Standalone OpenVINO version,
#      DCGM version
#     )
#
# Currently the OpenVINO versions used in ORT and standalone must
# match because of the way dlopen works with loading the backends. If
# different versions are used then one backend or the other will
# incorrectly load the other version of the openvino libraries.
#

DEFAULT_TRITON_VERSION_MAP = {
    "release_version": "2.67.0dev",
    "triton_container_version": "26.03dev",
    "upstream_container_version": "26.02",
    "ort_version": "1.24.2",
    "ort_openvino_version": "2026.0.0",
    "standalone_openvino_version": "2026.0.0",
    "dcgm_version": "4.5.2-1",
    "vllm_version": "0.16.0",
    "rhel_py_version": "3.12.3",
}

CORE_BACKENDS = ["ensemble"]

FLAGS = None
EXTRA_CORE_CMAKE_FLAGS = {}
OVERRIDE_CORE_CMAKE_FLAGS = {}
EXTRA_BACKEND_CMAKE_FLAGS = {}
OVERRIDE_BACKEND_CMAKE_FLAGS = {}

THIS_SCRIPT_DIR = os.path.dirname(os.path.abspath(getsourcefile(lambda: 0)))


def log(msg, force=False):
    if force or not FLAGS.quiet:
        try:
            print(msg, file=sys.stderr)
        except Exception:
            print("<failed to log>", file=sys.stderr)


def log_verbose(msg):
    if FLAGS.verbose:
        log(msg, force=True)


def fail(msg):
    fail_if(True, msg)


def fail_if(p, msg):
    if p:
        print("error: {}".format(msg), file=sys.stderr)
        sys.exit(1)


def target_platform():
    # When called by compose.py, FLAGS will be None
    if FLAGS and FLAGS.target_platform is not None:
        return FLAGS.target_platform
    platform_string = platform.system().lower()
    if platform_string == "linux":
        # Need to inspect the /etc/os-release file to get
        # the distribution of linux
        id_like_list = distro.like().split()
        if "debian" in id_like_list:
            return "linux"
        else:
            return "rhel"
    else:
        return platform_string


def target_machine():
    # When called by compose.py, FLAGS will be None
    if FLAGS and FLAGS.target_machine is not None:
        return FLAGS.target_machine
    return platform.machine().lower()


def container_versions(version, container_version, upstream_container_version):
    if container_version is None:
        container_version = FLAGS.triton_container_version
    if upstream_container_version is None:
        upstream_container_version = FLAGS.upstream_container_version
    return container_version, upstream_container_version


class BuildScript:
    """Utility class for writing build scripts"""

    def __init__(self, filepath, desc=None, verbose=False):
        self._filepath = filepath
        self._file = open(self._filepath, "w")
        self._verbose = verbose
        self.header(desc)

    def __enter__(self):
        return self

    def __exit__(self, type, value, traceback):
        self.close()

    def __del__(self):
        self.close()

    def close(self):
        if self._file is not None:
            if target_platform() == "windows":
                self.blankln()
                self._file.write("}\n")
                self._file.write("catch {\n")
                self._file.write("    $_;\n")
                self._file.write("    ExitWithCode 1;\n")
                self._file.write("}\n")
            """Close the file"""
            self._file.close()
            self._file = None
            st = os.stat(self._filepath)
            os.chmod(self._filepath, st.st_mode | stat.S_IEXEC)

    def blankln(self):
        self._file.write("\n")

    def commentln(self, cnt):
        self._file.write("#" * cnt + "\n")

    def comment(self, msg=""):
        if not isinstance(msg, str):
            try:
                for m in msg:
                    self._file.write(f"# {msg}\n")
                return
            except TypeError:
                pass
        self._file.write(f"# {msg}\n")

    def comment_verbose(self, msg=""):
        if self._verbose:
            self.comment(msg)

    def header(self, desc=None):
        if target_platform() != "windows":
            self._file.write("#!/usr/bin/env bash\n\n")

        if desc is not None:
            self.comment()
            self.comment(desc)
            self.comment()
            self.blankln()

        self.comment("Exit script immediately if any command fails")
        if target_platform() == "windows":
            self._file.write("$UseStructuredOutput = $false\n")
            self.blankln()
            self._file.write("function ExitWithCode($exitcode) {\n")
            self._file.write("    $host.SetShouldExit($exitcode)\n")
            self._file.write("    exit $exitcode\n")
            self._file.write("}\n")
            self.blankln()
            if self._verbose:
                self._file.write("Set-PSDebug -Trace 1\n")
            self.blankln()
            self._file.write("try {\n")
        else:
            self._file.write("set -e\n")
            if self._verbose:
                self._file.write("set -x\n")
        self.blankln()

    def envvar_ref(self, v):
        if target_platform() == "windows":
            return f"${{env:{v}}}"
        return f"${{{v}}}"

    def cmd(self, clist, check_exitcode=False):
        if isinstance(clist, str):
            self._file.write(f"{clist}\n")
        else:
            for c in clist:
                self._file.write(f"{c} ")
            self.blankln()

        if check_exitcode:
            if target_platform() == "windows":
                self._file.write("if ($LASTEXITCODE -ne 0) {\n")
                self._file.write(
                    '  Write-Output "exited with status code $LASTEXITCODE";\n'
                )
                self._file.write("  ExitWithCode 1;\n")
                self._file.write("}\n")

    def cwd(self, path):
        if target_platform() == "windows":
            self.cmd(f"Set-Location -EV Err -EA Stop {path}")
        else:
            self.cmd(f"cd {path}")

    def cp(self, src, dest):
        if target_platform() == "windows":
            self.cmd(f"Copy-Item -EV Err -EA Stop {src} -Destination {dest}")
        else:
            self.cmd(f"cp {src} {dest}")

    def mkdir(self, path):
        if target_platform() == "windows":
            self.cmd(
                f"New-Item -EV Err -EA Stop -ItemType Directory -Force -Path {path}"
            )
        else:
            self.cmd(f"mkdir -p {pathlib.Path(path)}")

    def rmdir(self, path):
        if target_platform() == "windows":
            self.cmd(f"if (Test-Path -Path {path}) {{")
            self.cmd(f"  Remove-Item -EV Err -EA Stop -Recurse -Force {path}")
            self.cmd("}")
        else:
            self.cmd(f"rm -fr {pathlib.Path(path)}")

    def cpdir(self, src, dest):
        if target_platform() == "windows":
            self.cmd(f"Copy-Item -EV Err -EA Stop -Recurse {src} -Destination {dest}")
        else:
            self.cmd(f"cp -r {src} {dest}")

    def tar(self, subdir, tar_filename):
        if target_platform() == "windows":
            fail("unsupported operation: tar")
        else:
            self.cmd(f"tar zcf {tar_filename} {subdir}")

    def cmake(self, args):
        # Pass some additional envvars into cmake...
        env_args = []
        for k in ("TRT_VERSION", "CMAKE_TOOLCHAIN_FILE", "VCPKG_TARGET_TRIPLET"):
            env_args += [f'"-D{k}={self.envvar_ref(k)}"']
        self.cmd(f'cmake {" ".join(env_args)} {" ".join(args)}', check_exitcode=True)

    def makeinstall(self, target="install"):
        verbose_flag = "-v" if self._verbose else ""
        self.cmd(
            f"cmake --build . --config {FLAGS.build_type} -j{FLAGS.build_parallel} {verbose_flag} -t {target}"
        )

    def gitclone(self, repo, tag, subdir, org):
        clone_dir = subdir
        if not FLAGS.no_force_clone:
            self.rmdir(clone_dir)

        if target_platform() == "windows":
            self.cmd(f"if (-Not (Test-Path -Path {clone_dir})) {{")
        else:
            self.cmd(f"if [[ ! -e {clone_dir} ]]; then")

        # FIXME [DLIS-4045 - Currently the tag starting with "pull/" is not
        # working with "--repo-tag" as the option is not forwarded to the
        # individual repo build correctly.]
        # If 'tag' starts with "pull/" then it must be of form
        # "pull/<pr>/head". We just clone at "main" and then fetch the
        # reference onto a new branch we name "tritonbuildref".
        if tag.startswith("pull/"):
            self.cmd(
                f"  git clone --recursive --depth=1 {org}/{repo}.git {subdir}; git --git-dir {subdir}/.git log --oneline -1",
                check_exitcode=True,
            )
            self.cmd("}" if target_platform() == "windows" else "fi")
            self.cwd(subdir)
            self.cmd(f"git fetch origin {tag}:tritonbuildref", check_exitcode=True)
            self.cmd(f"git checkout tritonbuildref", check_exitcode=True)
        else:
            self.cmd(
                f"  git clone --recursive --single-branch --depth=1 -b {tag} {org}/{repo}.git {subdir}; git --git-dir {subdir}/.git log --oneline -1",
                check_exitcode=True,
            )
            self.cmd("}" if target_platform() == "windows" else "fi")


def cmake_core_arg(name, type, value):
    # Return cmake -D setting to set name=value for core build. Use
    # command-line specified value if one is given.
    if name in OVERRIDE_CORE_CMAKE_FLAGS:
        value = OVERRIDE_CORE_CMAKE_FLAGS[name]
    if type is None:
        type = ""
    else:
        type = ":{}".format(type)
    return '"-D{}{}={}"'.format(name, type, value)


def cmake_core_enable(name, flag):
    # Return cmake -D setting to set name=flag?ON:OFF for core
    # build. Use command-line specified value for 'flag' if one is
    # given.
    if name in OVERRIDE_CORE_CMAKE_FLAGS:
        value = OVERRIDE_CORE_CMAKE_FLAGS[name]
    else:
        value = "ON" if flag else "OFF"
    return '"-D{}:BOOL={}"'.format(name, value)


def cmake_core_extra_args():
    args = []
    for k, v in EXTRA_CORE_CMAKE_FLAGS.items():
        args.append('"-D{}={}"'.format(k, v))
    return args


def cmake_backend_arg(backend, name, type, value):
    # Return cmake -D setting to set name=value for backend build. Use
    # command-line specified value if one is given.
    if backend in OVERRIDE_BACKEND_CMAKE_FLAGS:
        if name in OVERRIDE_BACKEND_CMAKE_FLAGS[backend]:
            value = OVERRIDE_BACKEND_CMAKE_FLAGS[backend][name]
    if type is None:
        type = ""
    else:
        type = ":{}".format(type)
    return '"-D{}{}={}"'.format(name, type, value)


def cmake_backend_enable(backend, name, flag):
    # Return cmake -D setting to set name=flag?ON:OFF for backend
    # build. Use command-line specified value for 'flag' if one is
    # given.
    value = None
    if backend in OVERRIDE_BACKEND_CMAKE_FLAGS:
        if name in OVERRIDE_BACKEND_CMAKE_FLAGS[backend]:
            value = OVERRIDE_BACKEND_CMAKE_FLAGS[backend][name]
    if value is None:
        value = "ON" if flag else "OFF"
    return '"-D{}:BOOL={}"'.format(name, value)


def cmake_backend_extra_args(backend):
    args = []
    if backend in EXTRA_BACKEND_CMAKE_FLAGS:
        for k, v in EXTRA_BACKEND_CMAKE_FLAGS[backend].items():
            args.append('"-D{}={}"'.format(k, v))
    return args


def cmake_repoagent_arg(name, type, value):
    # For now there is no override for repo-agents
    if type is None:
        type = ""
    else:
        type = ":{}".format(type)
    return '"-D{}{}={}"'.format(name, type, value)


def cmake_repoagent_enable(name, flag):
    # For now there is no override for repo-agents
    value = "ON" if flag else "OFF"
    return '"-D{}:BOOL={}"'.format(name, value)


def cmake_repoagent_extra_args():
    # For now there is no extra args for repo-agents
    args = []
    return args


def cmake_cache_arg(name, type, value):
    # For now there is no override for caches
    if type is None:
        type = ""
    else:
        type = ":{}".format(type)
    return '"-D{}{}={}"'.format(name, type, value)


def cmake_cache_enable(name, flag):
    # For now there is no override for caches
    value = "ON" if flag else "OFF"
    return '"-D{}:BOOL={}"'.format(name, value)


def cmake_cache_extra_args():
    # For now there is no extra args for caches
    args = []
    return args


def core_cmake_args(components, backends, cmake_dir, install_dir):
    cargs = [
        cmake_core_arg("CMAKE_BUILD_TYPE", None, FLAGS.build_type),
        cmake_core_arg("CMAKE_INSTALL_PREFIX", "PATH", install_dir),
        cmake_core_arg("TRITON_VERSION", "STRING", FLAGS.version),
        cmake_core_arg("TRITON_REPO_ORGANIZATION", "STRING", FLAGS.github_organization),
        cmake_core_arg("TRITON_COMMON_REPO_TAG", "STRING", components["common"]),
        cmake_core_arg("TRITON_CORE_REPO_TAG", "STRING", components["core"]),
        cmake_core_arg("TRITON_BACKEND_REPO_TAG", "STRING", components["backend"]),
        cmake_core_arg(
            "TRITON_THIRD_PARTY_REPO_TAG", "STRING", components["thirdparty"]
        ),
    ]

    cargs.append(cmake_core_enable("TRITON_ENABLE_LOGGING", FLAGS.enable_logging))
    cargs.append(cmake_core_enable("TRITON_ENABLE_STATS", FLAGS.enable_stats))
    cargs.append(cmake_core_enable("TRITON_ENABLE_METRICS", FLAGS.enable_metrics))
    cargs.append(
        cmake_core_enable("TRITON_ENABLE_METRICS_GPU", FLAGS.enable_gpu_metrics)
    )
    cargs.append(
        cmake_core_enable("TRITON_ENABLE_METRICS_CPU", FLAGS.enable_cpu_metrics)
    )
    cargs.append(cmake_core_enable("TRITON_ENABLE_TRACING", FLAGS.enable_tracing))
    cargs.append(cmake_core_enable("TRITON_ENABLE_NVTX", FLAGS.enable_nvtx))

    cargs.append(cmake_core_enable("TRITON_ENABLE_GPU", FLAGS.enable_gpu))
    cargs.append(
        cmake_core_arg(
            "TRITON_MIN_COMPUTE_CAPABILITY", None, FLAGS.min_compute_capability
        )
    )

    cargs.append(cmake_core_enable("TRITON_ENABLE_MALI_GPU", FLAGS.enable_mali_gpu))

    cargs.append(cmake_core_enable("TRITON_ENABLE_GRPC", "grpc" in FLAGS.endpoint))
    cargs.append(cmake_core_enable("TRITON_ENABLE_HTTP", "http" in FLAGS.endpoint))
    cargs.append(
        cmake_core_enable("TRITON_ENABLE_SAGEMAKER", "sagemaker" in FLAGS.endpoint)
    )
    cargs.append(
        cmake_core_enable("TRITON_ENABLE_VERTEX_AI", "vertex-ai" in FLAGS.endpoint)
    )

    cargs.append(cmake_core_enable("TRITON_ENABLE_GCS", "gcs" in FLAGS.filesystem))
    cargs.append(cmake_core_enable("TRITON_ENABLE_S3", "s3" in FLAGS.filesystem))
    cargs.append(
        cmake_core_enable(
            "TRITON_ENABLE_AZURE_STORAGE", "azure_storage" in FLAGS.filesystem
        )
    )

    cargs.append(cmake_core_enable("TRITON_ENABLE_ENSEMBLE", "ensemble" in backends))
    cargs.append(cmake_core_enable("TRITON_ENABLE_TENSORRT", "tensorrt" in backends))

    cargs += cmake_core_extra_args()
    cargs.append(cmake_dir)
    return cargs


def repoagent_repo(ra):
    return "{}_repository_agent".format(ra)


def repoagent_cmake_args(images, components, ra, install_dir):
    args = []

    cargs = args + [
        cmake_repoagent_arg("CMAKE_BUILD_TYPE", None, FLAGS.build_type),
        cmake_repoagent_arg("CMAKE_INSTALL_PREFIX", "PATH", install_dir),
        cmake_repoagent_arg(
            "TRITON_REPO_ORGANIZATION", "STRING", FLAGS.github_organization
        ),
        cmake_repoagent_arg("TRITON_COMMON_REPO_TAG", "STRING", components["common"]),
        cmake_repoagent_arg("TRITON_CORE_REPO_TAG", "STRING", components["core"]),
    ]

    cargs.append(cmake_repoagent_enable("TRITON_ENABLE_GPU", FLAGS.enable_gpu))
    cargs += cmake_repoagent_extra_args()
    cargs.append("..")
    return cargs


def cache_repo(cache):
    # example: "local", or "redis"
    return "{}_cache".format(cache)


def cache_cmake_args(images, components, cache, install_dir):
    args = []

    cargs = args + [
        cmake_cache_arg("CMAKE_BUILD_TYPE", None, FLAGS.build_type),
        cmake_cache_arg("CMAKE_INSTALL_PREFIX", "PATH", install_dir),
        cmake_cache_arg(
            "TRITON_REPO_ORGANIZATION", "STRING", FLAGS.github_organization
        ),
        cmake_cache_arg("TRITON_COMMON_REPO_TAG", "STRING", components["common"]),
        cmake_cache_arg("TRITON_CORE_REPO_TAG", "STRING", components["core"]),
    ]

    cargs.append(cmake_cache_enable("TRITON_ENABLE_GPU", FLAGS.enable_gpu))
    cargs += cmake_cache_extra_args()
    cargs.append("..")
    return cargs


def backend_repo(be):
    return "{}_backend".format(be)


def backend_cmake_args(images, components, be, install_dir, library_paths):
    cmake_build_type = FLAGS.build_type

    if be == "onnxruntime":
        args = onnxruntime_cmake_args(images, library_paths)
    elif be == "openvino":
        args = openvino_cmake_args()
    elif be == "python":
        args = python_cmake_args()
    elif be == "dali":
        args = dali_cmake_args()
    elif be == "pytorch":
        args = pytorch_cmake_args(images)
    elif be == "armnn_tflite":
        args = armnn_tflite_cmake_args()
    elif be == "fil":
        args = fil_cmake_args(images)
        # DLIS-4618: FIL backend fails debug build, so override it for now.
        cmake_build_type = "Release"
    elif be == "fastertransformer":
        args = fastertransformer_cmake_args()
    elif be == "tensorrt":
        args = tensorrt_cmake_args()
    elif be == "tensorrtllm":
        args = tensorrtllm_cmake_args(images)
    else:
        args = []

    cargs = args + [
        cmake_backend_arg(be, "CMAKE_BUILD_TYPE", None, cmake_build_type),
        cmake_backend_arg(be, "CMAKE_INSTALL_PREFIX", "PATH", install_dir),
        cmake_backend_arg(
            be, "TRITON_REPO_ORGANIZATION", "STRING", FLAGS.github_organization
        ),
        cmake_backend_arg(be, "TRITON_COMMON_REPO_TAG", "STRING", components["common"]),
        cmake_backend_arg(be, "TRITON_CORE_REPO_TAG", "STRING", components["core"]),
        cmake_backend_arg(
            be, "TRITON_BACKEND_REPO_TAG", "STRING", components["backend"]
        ),
    ]

    cargs.append(cmake_backend_enable(be, "TRITON_ENABLE_GPU", FLAGS.enable_gpu))
    cargs.append(
        cmake_backend_enable(be, "TRITON_ENABLE_MALI_GPU", FLAGS.enable_mali_gpu)
    )
    cargs.append(cmake_backend_enable(be, "TRITON_ENABLE_STATS", FLAGS.enable_stats))
    cargs.append(
        cmake_backend_enable(be, "TRITON_ENABLE_METRICS", FLAGS.enable_metrics)
    )

    # [DLIS-4950] always enable below once Windows image is updated with CUPTI
    # cargs.append(cmake_backend_enable(be, 'TRITON_ENABLE_MEMORY_TRACKER', True))
    if (target_platform() == "windows") and (not FLAGS.no_container_build):
        print(
            "Warning: Detected docker build is used for Windows, backend utility 'device memory tracker' will be disabled due to missing library in CUDA Windows docker image."
        )
        cargs.append(cmake_backend_enable(be, "TRITON_ENABLE_MEMORY_TRACKER", False))
    elif target_platform() == "igpu":
        print(
            "Warning: Detected iGPU build, backend utility 'device memory tracker' will be disabled as iGPU doesn't contain required version of the library."
        )
        cargs.append(cmake_backend_enable(be, "TRITON_ENABLE_MEMORY_TRACKER", False))
    elif FLAGS.enable_gpu:
        cargs.append(cmake_backend_enable(be, "TRITON_ENABLE_MEMORY_TRACKER", True))

    cargs += cmake_backend_extra_args(be)
    if be == "tensorrtllm":
        cargs.append("-S ../triton_backend/inflight_batcher_llm -B .")

    else:
        cargs.append("..")
    return cargs


def python_cmake_args():
    cargs = []
    if target_platform() == "rhel":
        cargs.append(
            cmake_backend_arg(
                "python", "PYBIND11_PYTHON_VERSION", "STRING", FLAGS.rhel_py_version
            )
        )

    return cargs


def pytorch_cmake_args(images):
    if "pytorch" in images:
        image = images["pytorch"]
    else:
        image = "nvcr.io/nvidia/pytorch:{}-py3".format(FLAGS.upstream_container_version)
    cargs = [
        cmake_backend_arg("pytorch", "TRITON_PYTORCH_DOCKER_IMAGE", None, image),
    ]

    # TODO: TPRD-372 TorchTRT extension is not currently supported by our manylinux build
    # TODO: TPRD-373 NVTX extension is not currently supported by our manylinux build
    if target_platform() != "rhel":
        if FLAGS.enable_gpu:
            cargs.append(
                cmake_backend_enable("pytorch", "TRITON_PYTORCH_ENABLE_TORCHTRT", True)
            )
        cargs.append(
            cmake_backend_enable("pytorch", "TRITON_ENABLE_NVTX", FLAGS.enable_nvtx)
        )
        if target_platform() == "igpu":
            cargs.append(
                cmake_backend_enable("pytorch", "TRITON_PYTORCH_NVSHMEM", False)
            )
    return cargs


def onnxruntime_cmake_args(images, library_paths):
    cargs = [
        cmake_backend_arg(
            "onnxruntime",
            "TRITON_BUILD_ONNXRUNTIME_VERSION",
            None,
            os.getenv("TRITON_BUILD_ONNXRUNTIME_VERSION")
            if os.getenv("TRITON_BUILD_ONNXRUNTIME_VERSION")
            else FLAGS.ort_version,
        )
    ]

    # TRITON_ENABLE_GPU is already set for all backends in backend_cmake_args()
    if FLAGS.enable_gpu:
        # TODO: TPRD-712 TensorRT is not currently supported by our RHEL build for SBSA.
        if target_platform() != "rhel" or (
            target_platform() == "rhel" and target_machine() == "x86_64"
        ):
            cargs.append(
                cmake_backend_enable(
                    "onnxruntime", "TRITON_ENABLE_ONNXRUNTIME_TENSORRT", True
                )
            )

    if target_platform() == "windows":
        if "base" in images:
            cargs.append(
                cmake_backend_arg(
                    "onnxruntime", "TRITON_BUILD_CONTAINER", None, images["base"]
                )
            )
    else:
        if "base" in images:
            cargs.append(
                cmake_backend_arg(
                    "onnxruntime", "TRITON_BUILD_CONTAINER", None, images["base"]
                )
            )
        else:
            cargs.append(
                cmake_backend_arg(
                    "onnxruntime",
                    "TRITON_BUILD_CONTAINER_VERSION",
                    None,
                    FLAGS.upstream_container_version,
                )
            )

        # TODO: TPRD-333 OpenVino extension is not currently supported by our manylinux build
        if (
            (target_machine() != "aarch64")
            and (target_platform() != "rhel")
            and (FLAGS.ort_openvino_version is not None)
        ):
            cargs.append(
                cmake_backend_enable(
                    "onnxruntime", "TRITON_ENABLE_ONNXRUNTIME_OPENVINO", True
                )
            )
            cargs.append(
                cmake_backend_arg(
                    "onnxruntime",
                    "TRITON_BUILD_ONNXRUNTIME_OPENVINO_VERSION",
                    None,
                    FLAGS.ort_openvino_version,
                )
            )

        if (target_platform() == "igpu") or (target_platform() == "rhel"):
            cargs.append(
                cmake_backend_arg(
                    "onnxruntime",
                    "TRITON_BUILD_TARGET_PLATFORM",
                    None,
                    target_platform(),
                )
            )

    return cargs


def openvino_cmake_args():
    cargs = [
        cmake_backend_arg(
            "openvino",
            "TRITON_BUILD_OPENVINO_VERSION",
            None,
            FLAGS.standalone_openvino_version,
        )
    ]
    if target_platform() == "windows":
        if "base" in images:
            cargs.append(
                cmake_backend_arg(
                    "openvino", "TRITON_BUILD_CONTAINER", None, images["base"]
                )
            )
    else:
        if "base" in images:
            cargs.append(
                cmake_backend_arg(
                    "openvino", "TRITON_BUILD_CONTAINER", None, images["base"]
                )
            )
        else:
            cargs.append(
                cmake_backend_arg(
                    "openvino",
                    "TRITON_BUILD_CONTAINER_VERSION",
                    None,
                    FLAGS.upstream_container_version,
                )
            )
    return cargs


def tensorrt_cmake_args():
    cargs = [
        cmake_backend_enable("tensorrt", "TRITON_ENABLE_NVTX", FLAGS.enable_nvtx),
    ]
    if target_platform() == "windows":
        cargs.append(
            cmake_backend_arg(
                "tensorrt", "TRITON_TENSORRT_INCLUDE_PATHS", None, "c:/TensorRT/include"
            )
        )

    return cargs


def dali_cmake_args():
    return [
        cmake_backend_enable("dali", "TRITON_DALI_SKIP_DOWNLOAD", False),
    ]


def fil_cmake_args(images):
    cargs = [cmake_backend_enable("fil", "TRITON_FIL_DOCKER_BUILD", True)]
    if "base" in images:
        cargs.append(
            cmake_backend_arg("fil", "TRITON_BUILD_CONTAINER", None, images["base"])
        )
    else:
        cargs.append(
            cmake_backend_arg(
                "fil",
                "TRITON_BUILD_CONTAINER_VERSION",
                None,
                FLAGS.upstream_container_version,
            )
        )

    return cargs


def armnn_tflite_cmake_args():
    return [
        cmake_backend_arg("armnn_tflite", "JOBS", None, multiprocessing.cpu_count()),
    ]


def fastertransformer_cmake_args():
    print("Warning: FasterTransformer backend is not officially supported.")
    cargs = [
        cmake_backend_arg(
            "fastertransformer", "CMAKE_EXPORT_COMPILE_COMMANDS", None, 1
        ),
        cmake_backend_arg("fastertransformer", "ENABLE_FP8", None, "OFF"),
    ]
    return cargs


def tensorrtllm_cmake_args(images):
    cargs = []
    cargs.append(cmake_backend_enable("tensorrtllm", "USE_CXX11_ABI", True))
    return cargs


def install_dcgm_libraries(dcgm_version, target_machine):
    if dcgm_version == "":
        fail(
            "unable to determine default repo-tag, DCGM version not known for {}".format(
                FLAGS.version
            )
        )
        return ""
    else:
        # RHEL has the same install instructions for both aarch64 and x86
        if target_platform() == "rhel":
            if target_machine == "aarch64":
                return """
ENV DCGM_VERSION {}
# Install DCGM. Steps from https://developer.nvidia.com/dcgm#Downloads
RUN dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo \\
    && dnf clean expire-cache \\
    && dnf makecache --refresh \\
    && dnf install --assumeyes \\
                 datacenter-gpu-manager-4-core-1:{} \\
                 datacenter-gpu-manager-4-devel-1:{}
""".format(
                    dcgm_version, dcgm_version, dcgm_version
                )
            else:
                return """
ENV DCGM_VERSION {}
# Install DCGM. Steps from https://developer.nvidia.com/dcgm#Downloads
RUN dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo \\
    && dnf clean expire-cache \\
    && dnf makecache --refresh \\
    && dnf install --assumeyes \\
                 datacenter-gpu-manager-4-core-1:{} \\
                 datacenter-gpu-manager-4-devel-1:{}
""".format(
                    dcgm_version, dcgm_version, dcgm_version
                )
        else:
            if target_machine == "aarch64":
                return """
ENV DCGM_VERSION {}
# Install DCGM. Steps from https://developer.nvidia.com/dcgm#Downloads
RUN curl -o /tmp/cuda-keyring.deb \\
        https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/sbsa/cuda-keyring_1.1-1_all.deb \\
      && apt install /tmp/cuda-keyring.deb \\
      && rm /tmp/cuda-keyring.deb \\
      && apt update -qq \\
      && apt install --yes --no-install-recommends \\
                  datacenter-gpu-manager-4-core=1:{} \\
                  datacenter-gpu-manager-4-dev=1:{}
""".format(
                    dcgm_version, dcgm_version, dcgm_version
                )
            else:
                return """
ENV DCGM_VERSION {}
# Install DCGM. Steps from https://developer.nvidia.com/dcgm#Downloads
RUN curl -o /tmp/cuda-keyring.deb \\
          https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb \\
      && apt install /tmp/cuda-keyring.deb \\
      && rm /tmp/cuda-keyring.deb \\
      && apt update -qq \\
      && apt install --yes --no-install-recommends \\
                   datacenter-gpu-manager-4-core=1:{} \\
                   datacenter-gpu-manager-4-dev=1:{}
""".format(
                    dcgm_version, dcgm_version, dcgm_version
                )


def create_dockerfile_buildbase_rhel(ddir, dockerfile_name, argmap):
    df = """
ARG TRITON_VERSION={}
ARG TRITON_CONTAINER_VERSION={}
ARG BASE_IMAGE={}
""".format(
        argmap["TRITON_VERSION"],
        argmap["TRITON_CONTAINER_VERSION"],
        argmap["BASE_IMAGE"],
    )

    df += """
FROM ${BASE_IMAGE}

ARG TRITON_VERSION
ARG TRITON_CONTAINER_VERSION
ENV PIP_BREAK_SYSTEM_PACKAGES=1 CMAKE_POLICY_VERSION_MINIMUM=3.5
"""
    df += """
# Install docker docker buildx
RUN yum install -y ca-certificates curl gnupg yum-utils \\
      && yum-config-manager --add-repo https://download.docker.com/linux/rhel/docker-ce.repo \\
      && yum install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
#   && yum install -y docker.io docker-buildx-plugin

# libcurl4-openSSL-dev is needed for GCS
# python3-dev is needed by Torchvision
# python3-pip and libarchive-dev is needed by python backend
# libxml2-dev is needed for Azure Storage
# scons is needed for armnn_tflite backend build dep
RUN yum install -y \\
            autoconf \\
            automake \\
            bzip2-devel \\
            ca-certificates \\
            git \\
            gperf \\
            gperftools-devel \\
            libarchive-devel \\
            libb64-devel \\
            libcurl-devel \\
            libtool \\
            libxml2-devel \\
            ncurses-devel \\
            numactl-devel \\
            openssl-devel \\
            pkg-config \\
            python3-pip \\
            python3-scons \\
            python3-setuptools \\
            rapidjson-devel \\
            re2-devel \\
            readline-devel \\
            unzip \\
            wget \\
            xz-devel \\
            zlib-devel
"""
    if os.getenv("CCACHE_REMOTE_ONLY") and os.getenv("CCACHE_REMOTE_STORAGE"):
        df += """
RUN curl -k -s -L https://github.com/ccache/ccache/archive/refs/tags/v4.10.2.tar.gz -o /tmp/ccache.tar.gz \\
    && tar -xzf /tmp/ccache.tar.gz -C /tmp \\
    && cmake -D CMAKE_BUILD_TYPE=Release -S /tmp/ccache-4.10.2 -B /tmp/build \\
    && cmake --build /tmp/build -j$(nproc) -t install \\
    && rm -rf /tmp/ccache.tar.gz /tmp/ccache-4.10.2 /tmp/build

ENV CCACHE_REMOTE_ONLY="true" \\
    CCACHE_REMOTE_STORAGE="{}" \\
    CMAKE_CXX_COMPILER_LAUNCHER="ccache" \\
    CMAKE_C_COMPILER_LAUNCHER="ccache" \\
    CMAKE_CUDA_COMPILER_LAUNCHER="ccache"

RUN ccache -p
""".format(
            os.getenv("CCACHE_REMOTE_STORAGE")
        )
    # Requires openssl-devel to be installed first for pyenv build to be successful
    df += change_default_python_version_rhel(FLAGS.rhel_py_version)
    df += """

RUN pip3 install --upgrade pip \\
      && pip3 install --upgrade \\
          build \\
          wheel \\
          setuptools \\
          docker \\
          virtualenv \\
          patchelf==0.17.2 \\
          cmake==4.0.3
"""
    df += f"""
# Install boost version >= 1.78 for boost::span
# Current libboost-dev apt packages are < 1.78, so install from tar.gz
RUN wget -O /tmp/boost.tar.gz {FLAGS.boost_url} \\
      && sha256sum /tmp/boost.tar.gz | grep {FLAGS.boost_sha256} \\
      && (cd /tmp && tar xzf boost.tar.gz) \\
      && mv /tmp/boost_1_80_0/boost /usr/include/boost
"""

    if FLAGS.enable_gpu:
        df += install_dcgm_libraries(argmap["DCGM_VERSION"], target_machine())
    df += """
ENV TRITON_SERVER_VERSION ${TRITON_VERSION}
ENV NVIDIA_TRITON_SERVER_VERSION ${TRITON_CONTAINER_VERSION}
"""

    df += """
WORKDIR /workspace
RUN rm -fr *
COPY . .
ENTRYPOINT []
"""

    with open(os.path.join(ddir, dockerfile_name), "w") as dfile:
        dfile.write(df)


def create_dockerfile_buildbase(ddir, dockerfile_name, argmap):
    df = """
ARG TRITON_VERSION={}
ARG TRITON_CONTAINER_VERSION={}
ARG BASE_IMAGE={}
""".format(
        argmap["TRITON_VERSION"],
        argmap["TRITON_CONTAINER_VERSION"],
        argmap["BASE_IMAGE"],
    )

    df += """
FROM ${BASE_IMAGE}

ARG TRITON_VERSION
ARG TRITON_CONTAINER_VERSION
ENV PIP_BREAK_SYSTEM_PACKAGES=1 CMAKE_POLICY_VERSION_MINIMUM=3.5
"""
    # Install the windows- or linux-specific buildbase dependencies
    if target_platform() == "windows":
        df += """
RUN python3 -m pip install build

SHELL ["cmd", "/S", "/C"]
"""
    else:
        df += """
# Ensure apt-get won't prompt for selecting options
ENV DEBIAN_FRONTEND=noninteractive

# Install docker docker buildx
RUN apt-get update \\
      && apt-get install -y ca-certificates curl gnupg \\
      && install -m 0755 -d /etc/apt/keyrings \\
      && curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg \\
      && chmod a+r /etc/apt/keyrings/docker.gpg \\
      && echo \\
          "deb [arch="$(dpkg --print-architecture)" signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \\
          "$(. /etc/os-release && echo "$VERSION_CODENAME")" stable" | \\
          tee /etc/apt/sources.list.d/docker.list > /dev/null \\
      && apt-get update \\
      && apt-get install -y docker.io docker-buildx-plugin

# libcurl4-openSSL-dev is needed for GCS
# python3-dev is needed by Torchvision
# python3-pip and libarchive-dev is needed by python backend
# libxml2-dev is needed for Azure Storage
# scons is needed for armnn_tflite backend build dep
RUN apt-get update \\
      && apt-get install -y --no-install-recommends \\
            ca-certificates \\
            autoconf \\
            automake \\
            build-essential \\
            git \\
            gperf \\
            libre2-dev \\
            libssl-dev \\
            libtool \\
            libcurl4-openssl-dev \\
            libb64-dev \\
            libgoogle-perftools-dev \\
            python3-dev \\
            python3-pip \\
            python3-wheel \\
            python3-setuptools \\
            rapidjson-dev \\
            scons \\
            software-properties-common \\
            pkg-config \\
            unzip \\
            wget \\
            zlib1g-dev \\
            libarchive-dev \\
            libxml2-dev \\
            libnuma-dev \\
            wget \\
      && rm -rf /var/lib/apt/lists/*

RUN pip3 install --upgrade \\
          build \\
          docker \\
          virtualenv \\
          patchelf==0.17.2 \\
          cmake==4.0.3 \\
          pybind11[global]
"""

        df += f"""
# Install boost version >= 1.78 for boost::span
# Current libboost-dev apt packages are < 1.78, so install from tar.gz
RUN wget -O /tmp/boost.tar.gz {FLAGS.boost_url} \\
      && sha256sum /tmp/boost.tar.gz | grep {FLAGS.boost_sha256} \\
      && (cd /tmp && tar xzf boost.tar.gz) \\
      && mv /tmp/boost_1_80_0/boost /usr/include/boost
"""

        if FLAGS.enable_gpu:
            df += install_dcgm_libraries(argmap["DCGM_VERSION"], target_machine())

    df += """
ENV TRITON_SERVER_VERSION ${TRITON_VERSION}
ENV NVIDIA_TRITON_SERVER_VERSION ${TRITON_CONTAINER_VERSION}
"""

    if os.getenv("CCACHE_REMOTE_ONLY") and os.getenv("CCACHE_REMOTE_STORAGE"):
        df += """
ENV CCACHE_REMOTE_ONLY="true" \\
    CCACHE_REMOTE_STORAGE="{}" \\
    CMAKE_CXX_COMPILER_LAUNCHER="ccache" \\
    CMAKE_C_COMPILER_LAUNCHER="ccache" \\
    CMAKE_CUDA_COMPILER_LAUNCHER="ccache"

RUN apt-get update \\
      && apt-get install -y --no-install-recommends ccache && ccache -p \\
      && rm -rf /var/lib/apt/lists/*
""".format(
            os.getenv("CCACHE_REMOTE_STORAGE")
        )

    # Copy in the triton source. We remove existing contents first in
    # case the FROM container has something there already.
    if target_platform() == "windows":
        df += """
WORKDIR /workspace
RUN rmdir /S/Q * || exit 0
COPY . .
"""
    else:
        df += """
WORKDIR /workspace
RUN rm -fr *
COPY . .
ENTRYPOINT []
"""

    with open(os.path.join(ddir, dockerfile_name), "w") as dfile:
        dfile.write(df)


def create_dockerfile_cibase(ddir, dockerfile_name, argmap):
    df = """
ARG TRITON_VERSION={}
ARG TRITON_CONTAINER_VERSION={}
ARG BASE_IMAGE={}
""".format(
        argmap["TRITON_VERSION"],
        argmap["TRITON_CONTAINER_VERSION"],
        argmap["BASE_IMAGE"],
    )

    df += """
FROM ${BASE_IMAGE}

ARG TRITON_VERSION
ARG TRITON_CONTAINER_VERSION

COPY build/ci /workspace

WORKDIR /workspace

ENV TRITON_SERVER_VERSION ${TRITON_VERSION}
ENV NVIDIA_TRITON_SERVER_VERSION ${TRITON_CONTAINER_VERSION}
ENV PIP_BREAK_SYSTEM_PACKAGES=1
"""

    with open(os.path.join(ddir, dockerfile_name), "w") as dfile:
        dfile.write(df)


def create_dockerfile_linux(
    ddir, dockerfile_name, argmap, backends, repoagents, caches, endpoints
):
    df = """
ARG TRITON_VERSION={}
ARG TRITON_CONTAINER_VERSION={}
""".format(
        argmap["TRITON_VERSION"],
        argmap["TRITON_CONTAINER_VERSION"],
    )
    if "vllm" in backends and argmap["INFERENCE_IMAGE"] is None:
        argmap[
            "INFERENCE_IMAGE"
        ] = f"nvcr.io/nvidia/vllm:{FLAGS.upstream_container_version}-py3"
    df += """ARG BASE_IMAGE={}
""".format(
        argmap["INFERENCE_IMAGE"]
        if argmap["INFERENCE_IMAGE"] is not None
        else argmap["BASE_IMAGE"],
    )

    # PyTorch backends need extra CUDA and other
    # dependencies during runtime that are missing in the CPU-only base container.
    # These dependencies must be copied from the Triton Min image.
    if not FLAGS.enable_gpu and ("pytorch" in backends):
        df += """
############################################################################
##  Triton Min image
############################################################################
FROM {} AS min_container

""".format(
            argmap["GPU_BASE_IMAGE"]
        )

    df += """
############################################################################
##  Production stage: Create container with just inference server executable
############################################################################
FROM ${BASE_IMAGE}

ENV PIP_BREAK_SYSTEM_PACKAGES=1
"""

    df += dockerfile_prepare_container_linux(
        argmap, backends, FLAGS.enable_gpu, target_machine()
    )

    df += f"""
WORKDIR /opt
COPY --chown=1000:1000 build/install tritonserver

WORKDIR /opt/tritonserver
COPY --chown=1000:1000 NVIDIA_Deep_Learning_Container_License.pdf .
RUN find /opt/tritonserver/python -maxdepth 1 -type f -name \\
    "tritonserver-*.whl" | xargs -I {{}} pip install --upgrade {{}}[{FLAGS.triton_wheels_dependencies_group}] && \\
    find /opt/tritonserver/python -maxdepth 1 -type f -name \\
    "tritonfrontend-*.whl" | xargs -I {{}} pip install --upgrade {{}}[{FLAGS.triton_wheels_dependencies_group}]

RUN pip3 install -r python/openai/requirements.txt

"""
    if not FLAGS.no_core_build:
        # Add feature labels for SageMaker endpoint
        if "sagemaker" in endpoints:
            df += """
LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
LABEL com.amazonaws.sagemaker.capabilities.multi-models=true
COPY --chown=1000:1000 docker/sagemaker/serve /usr/bin/.
"""
    # This is required since libcublasLt.so is not present during the build
    # stage of the PyTorch backend
    if not FLAGS.enable_gpu and ("pytorch" in backends):
        df += """
RUN patchelf --add-needed /usr/local/cuda/lib64/stubs/libcublasLt.so.13 backends/pytorch/libtorch_cuda.so
"""
    if "tensorrtllm" in backends:
        df += """
RUN ldconfig && \\
    find /opt/tritonserver -name lib*so -exec dirname {} \\; > /etc/ld.so.conf.d/tritonserver.conf && \\
    ldconfig

"""
    with open(os.path.join(ddir, dockerfile_name), "w") as dfile:
        dfile.write(df)


def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_machine):
    gpu_enabled = 1 if enable_gpu else 0
    # Common steps to produce docker images shared by build.py and compose.py.
    # Sets environment variables, installs dependencies and adds entrypoint
    df = """
ARG TRITON_VERSION
ARG TRITON_CONTAINER_VERSION

ENV TRITON_SERVER_VERSION ${TRITON_VERSION}
ENV NVIDIA_TRITON_SERVER_VERSION ${TRITON_CONTAINER_VERSION}
LABEL com.nvidia.tritonserver.version="${TRITON_SERVER_VERSION}"

ENV PATH /opt/tritonserver/bin:${PATH}
# Remove once https://github.com/openucx/ucx/pull/9148 is available
# in the min container.
ENV UCX_MEM_EVENTS no
"""

    # Necessary for libtorch.so to find correct HPCX libraries
    if "pytorch" in backends:
        df += """
ENV LD_LIBRARY_PATH /opt/hpcx/ucc/lib/:/opt/hpcx/ucx/lib/:${LD_LIBRARY_PATH}
"""

    backend_dependencies = ""
    # libgomp1 is needed by both onnxruntime and pytorch backends
    if ("onnxruntime" in backends) or ("pytorch" in backends):
        backend_dependencies = "libgomp1"

    # libgfortran5 is needed by pytorch backend on ARM
    if ("pytorch" in backends) and (target_machine == "aarch64"):
        backend_dependencies += " libgfortran5"
    # openssh-server is needed for fastertransformer
    if "fastertransformer" in backends:
        backend_dependencies += " openssh-server"

    df += """
ENV TF_ADJUST_HUE_FUSED         1
ENV TF_ADJUST_SATURATION_FUSED  1
ENV TF_ENABLE_WINOGRAD_NONFUSED 1
ENV TF_AUTOTUNE_THRESHOLD       2
ENV TRITON_SERVER_GPU_ENABLED    {gpu_enabled}

# Create a user that can be used to run triton as
# non-root. Make sure that this user to given ID 1000. All server
# artifacts copied below are assign to this user.
ENV TRITON_SERVER_USER=triton-server
RUN userdel tensorrt-server > /dev/null 2>&1 || true \\
      && userdel ubuntu > /dev/null 2>&1 || true \\
      && if ! id -u $TRITON_SERVER_USER > /dev/null 2>&1 ; then \\
          useradd $TRITON_SERVER_USER; \\
        fi \\
      && [ `id -u $TRITON_SERVER_USER` -eq 1000 ] \\
      && [ `id -g $TRITON_SERVER_USER` -eq 1000 ]
""".format(
        gpu_enabled=gpu_enabled
    )

    if target_platform() == "rhel":
        df += """
# Common dependencies.
RUN yum install -y \\
        git \\
        gperf \\
        re2-devel \\
        openssl-devel \\
        libtool \\
        libcurl-devel \\
        libb64-devel \\
        gperftools-devel \\
        wget \\
        python3.12-pip \\
        numactl-devel

RUN pip3 install patchelf==0.17.2

"""
    else:
        df += """
# Ensure apt-get won't prompt for selecting options
ENV DEBIAN_FRONTEND=noninteractive

# Common dependencies. FIXME (can any of these be conditional? For
# example libcurl only needed for GCS?)
RUN apt-get update \\
      && apt-get install -y --no-install-recommends \\
              clang \\
              curl \\
              dirmngr \\
              git \\
              gperf \\
              libb64-0d \\
              libcurl4-openssl-dev \\
              libgoogle-perftools-dev \\
              libjemalloc-dev \\
              libnuma-dev \\
              wget \\
              {backend_dependencies} \\
              python3-pip \\
      && rm -rf /var/lib/apt/lists/*
""".format(
            backend_dependencies=backend_dependencies
        )

    df += """
# Set TCMALLOC_RELEASE_RATE for users setting LD_PRELOAD with tcmalloc
ENV TCMALLOC_RELEASE_RATE 200
"""

    if "fastertransformer" in backends:
        be = "fastertransformer"
        url = "https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/{}/docker/create_dockerfile_and_build.py".format(
            backends[be]
        )
        response = requests.get(url)
        spec = importlib.util.spec_from_loader(
            "fastertransformer_buildscript", loader=None, origin=url
        )
        fastertransformer_buildscript = importlib.util.module_from_spec(spec)
        exec(response.content, fastertransformer_buildscript.__dict__)
        df += fastertransformer_buildscript.create_postbuild(is_multistage_build=False)

    if enable_gpu:
        df += install_dcgm_libraries(argmap["DCGM_VERSION"], target_machine)
        # This segment will break the RHEL SBSA build. Need to determine whether
        # this is necessary to incorporate.
        if target_platform() != "rhel":
            df += """
# Extra defensive wiring for CUDA Compat lib
RUN ln -sf ${_CUDA_COMPAT_PATH}/lib.real ${_CUDA_COMPAT_PATH}/lib \\
    && echo ${_CUDA_COMPAT_PATH}/lib > /etc/ld.so.conf.d/00-cuda-compat.conf \\
    && ldconfig \\
    && rm -f ${_CUDA_COMPAT_PATH}/lib
"""
    else:
        df += add_cpu_libs_to_linux_dockerfile(backends, target_machine)

    # Add dependencies needed for python backend
    if "python" in backends:
        if target_platform() == "rhel":
            df += """
# python3, python3-pip and some pip installs required for the python backend
RUN yum install -y \\
        libarchive-devel \\
        openssl-devel \\
        readline-devel
"""
            # Requires openssl-devel to be installed first for pyenv build to be successful
            df += change_default_python_version_rhel(FLAGS.rhel_py_version)
            df += """
RUN pip3 install --upgrade pip \\
    && pip3 install --upgrade \\
        wheel \\
        setuptools \\
        \"numpy<2\" \\
        virtualenv
"""
        else:
            df += """
# python3, python3-pip and some pip installs required for the python backend
RUN apt-get update \\
      && apt-get install -y --no-install-recommends \\
            python3 \\
            libarchive-dev \\
            python3-pip \\
            python3-wheel \\
            python3-setuptools \\
            libpython3-dev \\
      && pip3 install --upgrade \\
            \"numpy<2\" \\
            virtualenv \\
      && rm -rf /var/lib/apt/lists/*
"""
    if "tensorrtllm" in backends or "vllm" in backends:
        df += """
ENV TRITON_CUDACRT_PATH=/usr/local/cuda/include \\
    TRITON_CUDART_PATH=/usr/local/cuda/include \\
    TRITON_CUOBJDUMP_PATH=/usr/local/cuda/bin/cuobjdump \\
    TRITON_CUPTI_PATH=/usr/local/cuda/include \\
    TRITON_NVDISASM_PATH=/usr/local/cuda/bin/nvdisasm \\
    TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas
"""

    if "dali" in backends:
        df += """
# Update Python path to include DALI
ENV PYTHONPATH=/opt/tritonserver/backends/dali/wheel/dali:$PYTHONPATH
"""

    if target_platform() == "rhel":
        repo_arch = "sbsa" if target_machine == "aarch64" else "x86_64"
        df += """
RUN dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/{repo_arch}/cuda-rhel8.repo \\
    && dnf clean expire-cache \\
    && dnf install --assumeyes libnvshmem3-cuda-13

RUN dirname  $(find /usr -name "libcudart*.so" -o  -name "libnvinf*.so" -o -name "libnvshm*" -type f) | sort -u > /etc/ld.so.conf.d/triton-cuda-libs.conf && ldconfig
""".format(
            repo_arch=repo_arch
        )

    df += """
WORKDIR /opt/tritonserver
RUN rm -fr /opt/tritonserver/*
ENV NVIDIA_PRODUCT_NAME="Triton Server"
COPY docker/entrypoint.d/ /opt/nvidia/entrypoint.d/
"""

    # The CPU-only build uses ubuntu as the base image, and so the
    # entrypoint files are not available in /opt/nvidia in the base
    # image, so we must provide them ourselves.
    if not enable_gpu:
        df += """
COPY docker/cpu_only/ /opt/nvidia/
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
"""

    df += """
ENV NVIDIA_BUILD_ID {}
LABEL com.nvidia.build.id={}
LABEL com.nvidia.build.ref={}
""".format(
        argmap["NVIDIA_BUILD_ID"], argmap["NVIDIA_BUILD_ID"], argmap["NVIDIA_BUILD_REF"]
    )
    return df


def add_cpu_libs_to_linux_dockerfile(backends, target_machine):
    df = ""
    libs_arch = "aarch64" if target_machine == "aarch64" else "x86_64"
    if "pytorch" in backends:
        # Add extra dependencies for pytorch backend.
        # Note: Even though the build is CPU-only, the version of pytorch
        # we are using depend upon libraries like cuda and cudnn. Since
        # these dependencies are not present in the ubuntu base image,
        # we must copy these from the Triton min container ourselves.
        cuda_arch = "sbsa" if target_machine == "aarch64" else "x86_64"
        df += """
RUN mkdir -p /usr/local/cuda/lib64/stubs
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcusparse.so /usr/local/cuda/lib64/stubs/libcusparse.so.12
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcusolver.so /usr/local/cuda/lib64/stubs/libcusolver.so.12
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcurand.so /usr/local/cuda/lib64/stubs/libcurand.so.10
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcufft.so /usr/local/cuda/lib64/stubs/libcufft.so.12
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcublas.so /usr/local/cuda/lib64/stubs/libcublas.so.13
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcublasLt.so /usr/local/cuda/lib64/stubs/libcublasLt.so.13

RUN mkdir -p /usr/local/cuda/targets/{cuda_arch}-linux/lib
COPY --from=min_container /usr/local/cuda/lib64/libcudart.so.13 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
COPY --from=min_container /usr/local/cuda/lib64/libcupti.so.13 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
COPY --from=min_container /usr/local/cuda/lib64/libnvJitLink.so.13 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
COPY --from=min_container /usr/local/cuda/lib64/libcufile.so.0 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
COPY --from=min_container /usr/local/cuda/lib64/libnvrtc.so.13 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
COPY --from=min_container /usr/local/cuda/lib64/libcusparseLt.so.0 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
COPY --from=min_container /usr/local/cuda/lib64/libnvshmem_host.so.3 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.

RUN mkdir -p /opt/hpcx/ucc/lib/ /opt/hpcx/ucx/lib/
COPY --from=min_container /opt/hpcx/ucc/lib/libucc.so.1 /opt/hpcx/ucc/lib/libucc.so.1
COPY --from=min_container /opt/hpcx/ucx/lib/libucm.so.0 /opt/hpcx/ucx/lib/libucm.so.0
COPY --from=min_container /opt/hpcx/ucx/lib/libucp.so.0 /opt/hpcx/ucx/lib/libucp.so.0
COPY --from=min_container /opt/hpcx/ucx/lib/libucs.so.0 /opt/hpcx/ucx/lib/libucs.so.0
COPY --from=min_container /opt/hpcx/ucx/lib/libuct.so.0 /opt/hpcx/ucx/lib/libuct.so.0

COPY --from=min_container /usr/lib/{libs_arch}-linux-gnu/libcudnn.so.9 /usr/lib/{libs_arch}-linux-gnu/libcudnn.so.9

# patchelf is needed to add deps of libcublasLt.so.12 to libtorch_cuda.so
RUN apt-get update \\
      && apt-get install -y --no-install-recommends openmpi-bin
RUN pip3 install patchelf==0.17.2

ENV LD_LIBRARY_PATH /usr/local/cuda/targets/{cuda_arch}-linux/lib:/usr/local/cuda/lib64/stubs:${{LD_LIBRARY_PATH}}
""".format(
            cuda_arch=cuda_arch, libs_arch=libs_arch
        )

    if "pytorch" in backends:
        # Add NCCL dependency for pytorch backend.
        # Note: Even though the build is CPU-only, the version of
        # pytorch we are using depends upon the NCCL library.
        # Since this dependency is not present in the ubuntu base image,
        # we must copy it from the Triton min container ourselves.
        df += """
COPY --from=min_container /usr/lib/{libs_arch}-linux-gnu/libnccl.so.2 /usr/lib/{libs_arch}-linux-gnu/libnccl.so.2
""".format(
            libs_arch=libs_arch
        )

    return df


def change_default_python_version_rhel(version):
    df = f"""
# The python library version available for install via 'yum install python3.X-devel' does not
# match the version of python inside the RHEL base container. This means that python packages
# installed within the container will not be picked up by the python backend stub process pybind
# bindings. It must instead must be installed via pyenv.
ENV PYENV_ROOT=/opt/pyenv_build
RUN curl https://pyenv.run | bash
ENV PATH="${{PYENV_ROOT}}/bin:$PATH"
RUN eval "$(pyenv init -)"
RUN CONFIGURE_OPTS=\"--with-openssl=/usr/lib64\" && pyenv install {version} \\
    && cp ${{PYENV_ROOT}}/versions/{version}/lib/libpython3* /usr/lib64/

# RHEL image has several python versions. It's important
# to set the correct version, otherwise, packages that are
# pip installed will not be found during testing.
ENV PYVER={version} PYTHONPATH=/opt/python/v
RUN ln -sf ${{PYENV_ROOT}}/versions/${{PYVER}}* ${{PYTHONPATH}}
ENV PYBIN=${{PYTHONPATH}}/bin
ENV PYTHON_BIN_PATH=${{PYBIN}}/python${{PYVER}} PATH=${{PYBIN}}:${{PATH}}
"""
    return df


def create_dockerfile_windows(
    ddir, dockerfile_name, argmap, backends, repoagents, caches
):
    df = """
ARG TRITON_VERSION={}
ARG TRITON_CONTAINER_VERSION={}
ARG BASE_IMAGE={}

############################################################################
##  Production stage: Create container with just inference server executable
############################################################################
FROM ${{BASE_IMAGE}}

ARG TRITON_VERSION
ARG TRITON_CONTAINER_VERSION

ENV TRITON_SERVER_VERSION=${{TRITON_VERSION}}
ENV NVIDIA_TRITON_SERVER_VERSION=${{TRITON_CONTAINER_VERSION}}
LABEL com.nvidia.tritonserver.version="${{TRITON_SERVER_VERSION}}"

RUN setx path "%path%;C:\\opt\\tritonserver\\bin"

""".format(
        argmap["TRITON_VERSION"],
        argmap["TRITON_CONTAINER_VERSION"],
        argmap["BASE_IMAGE"],
    )
    df += """
WORKDIR /opt
RUN rmdir /S/Q tritonserver || exit 0
COPY --chown=1000:1000 build/install tritonserver

WORKDIR /opt/tritonserver
COPY --chown=1000:1000 NVIDIA_Deep_Learning_Container_License.pdf .

"""
    df += """
ENTRYPOINT []
ENV NVIDIA_BUILD_ID {}
LABEL com.nvidia.build.id={}
LABEL com.nvidia.build.ref={}
""".format(
        argmap["NVIDIA_BUILD_ID"], argmap["NVIDIA_BUILD_ID"], argmap["NVIDIA_BUILD_REF"]
    )

    with open(os.path.join(ddir, dockerfile_name), "w") as dfile:
        dfile.write(df)


def create_build_dockerfiles(
    container_build_dir, images, backends, repoagents, caches, endpoints
):
    if "base" in images:
        base_image = images["base"]
        if target_platform() == "rhel":
            print(
                "warning: RHEL is not an officially supported target and you will probably experience errors attempting to build this container."
            )
    elif target_platform() == "windows":
        base_image = "mcr.microsoft.com/dotnet/framework/sdk:4.8"
    elif target_platform() == "rhel":
        raise KeyError("A base image must be specified when targeting RHEL")
    elif FLAGS.enable_gpu:
        base_image = "nvcr.io/nvidia/tritonserver:{}-py3-min".format(
            FLAGS.upstream_container_version
        )
    else:
        base_image = "ubuntu:24.04"

    if "inference" in images:
        inference_image = images["inference"]
    else:
        inference_image = None

    dockerfileargmap = {
        "NVIDIA_BUILD_REF": "" if FLAGS.build_sha is None else FLAGS.build_sha,
        "NVIDIA_BUILD_ID": "<unknown>" if FLAGS.build_id is None else FLAGS.build_id,
        "TRITON_VERSION": FLAGS.version,
        "TRITON_CONTAINER_VERSION": FLAGS.container_version,
        "BASE_IMAGE": base_image,
        "INFERENCE_IMAGE": inference_image,
        "DCGM_VERSION": FLAGS.dcgm_version,
    }

    # For CPU-only image we need to copy some cuda libraries and dependencies
    # since we are using PyTorch containers that are not CPU-only.
    if (
        not FLAGS.enable_gpu
        and ("pytorch" in backends)
        and (target_platform() != "windows")
    ):
        if "gpu-base" in images:
            gpu_base_image = images["gpu-base"]
        else:
            gpu_base_image = "nvcr.io/nvidia/tritonserver:{}-py3-min".format(
                FLAGS.upstream_container_version
            )
        dockerfileargmap["GPU_BASE_IMAGE"] = gpu_base_image

    if target_platform() == "rhel":
        create_dockerfile_buildbase_rhel(
            FLAGS.build_dir, "Dockerfile.buildbase", dockerfileargmap
        )
    else:
        create_dockerfile_buildbase(
            FLAGS.build_dir, "Dockerfile.buildbase", dockerfileargmap
        )

    if target_platform() == "windows":
        create_dockerfile_windows(
            FLAGS.build_dir,
            "Dockerfile",
            dockerfileargmap,
            backends,
            repoagents,
            caches,
        )
    else:
        create_dockerfile_linux(
            FLAGS.build_dir,
            "Dockerfile",
            dockerfileargmap,
            backends,
            repoagents,
            caches,
            endpoints,
        )

    # Dockerfile used for the creating the CI base image.
    create_dockerfile_cibase(FLAGS.build_dir, "Dockerfile.cibase", dockerfileargmap)


def create_docker_build_script(script_name, container_install_dir, container_ci_dir):
    with BuildScript(
        os.path.join(FLAGS.build_dir, script_name),
        verbose=FLAGS.verbose,
        desc=("Docker-based build script for Triton Inference Server"),
    ) as docker_script:
        #
        # Build base image... tritonserver_buildbase
        #
        docker_script.commentln(8)
        docker_script.comment("Create Triton base build image")
        docker_script.comment(
            "This image contains all dependencies necessary to build Triton"
        )
        docker_script.comment()

        cachefrommap = [
            "tritonserver_buildbase",
            "tritonserver_buildbase_cache0",
            "tritonserver_buildbase_cache1",
        ]

        baseargs = [
            "docker",
            "build",
            "-t",
            "tritonserver_buildbase",
            "-f",
            os.path.join(FLAGS.build_dir, "Dockerfile.buildbase"),
        ]

        if not FLAGS.no_container_pull:
            baseargs += [
                "--pull",
            ]

        # Windows docker runs in a VM and memory needs to be specified
        # explicitly (at least for some configurations of docker).
        if target_platform() == "windows":
            if FLAGS.container_memory:
                baseargs += ["--memory", FLAGS.container_memory]

        if target_platform() != "windows":
            baseargs += ["--cache-from={}".format(k) for k in cachefrommap]

        baseargs += ["."]

        docker_script.cwd(THIS_SCRIPT_DIR)
        docker_script.cmd(baseargs, check_exitcode=True)

        #
        # Build...
        #
        docker_script.blankln()
        docker_script.commentln(8)
        docker_script.comment("Run build in tritonserver_buildbase container")
        docker_script.comment("Mount a directory into the container where the install")
        docker_script.comment("artifacts will be placed.")
        docker_script.comment()

        # Don't use '-v' to communicate the built artifacts out of the
        # build, because we want this code to work even if run within
        # Docker (i.e. docker-in-docker) and not just if run directly
        # from host.
        runargs = [
            "docker",
            "run",
            "-w",
            "/workspace/build",
            "--name",
            "tritonserver_builder",
        ]

        if not FLAGS.no_container_interactive:
            runargs += ["-it"]

        if target_platform() == "windows":
            if FLAGS.container_memory:
                runargs += ["--memory", FLAGS.container_memory]
            runargs += ["-v", "\\\\.\\pipe\\docker_engine:\\\\.\\pipe\\docker_engine"]
        else:
            runargs += ["-v", "/var/run/docker.sock:/var/run/docker.sock"]
            if FLAGS.use_user_docker_config:
                if os.path.exists(FLAGS.use_user_docker_config):
                    runargs += [
                        "-v",
                        os.path.expanduser(
                            FLAGS.use_user_docker_config + ":/root/.docker/config.json"
                        ),
                    ]

        runargs += ["tritonserver_buildbase"]

        if target_platform() == "windows":
            runargs += ["powershell.exe", "-noexit", "-File", "./cmake_build.ps1"]
        else:
            runargs += ["./cmake_build"]

        # Remove existing tritonserver_builder container...
        if target_platform() == "windows":
            docker_script.cmd(["docker", "rm", "tritonserver_builder"])
        else:
            docker_script._file.write(
                'if [ "$(docker ps -a | grep tritonserver_builder)" ]; then  docker rm -f tritonserver_builder; fi\n'
            )

        docker_script.cmd(runargs, check_exitcode=True)

        docker_script.cmd(
            [
                "docker",
                "cp",
                "tritonserver_builder:/tmp/tritonbuild/install",
                FLAGS.build_dir,
            ],
            check_exitcode=True,
        )
        docker_script.cmd(
            [
                "docker",
                "cp",
                "tritonserver_builder:/tmp/tritonbuild/ci",
                FLAGS.build_dir,
            ],
            check_exitcode=True,
        )

        #
        # Final image... tritonserver
        #
        docker_script.blankln()
        docker_script.commentln(8)
        docker_script.comment("Create final tritonserver image")
        docker_script.comment()

        finalargs = [
            "docker",
            "build",
        ]
        if secrets:
            finalargs += [
                f"--secret id=req,src={requirements}",
                f"--secret id=VLLM_INDEX_URL",
                f"--secret id=PYTORCH_TRITON_URL",
                f"--secret id=NVPL_SLIM_URL",
                f"--build-arg BUILD_PUBLIC_VLLM={build_public_vllm}",
            ]
        finalargs += [
            "-t",
            "tritonserver",
            "-f",
            os.path.join(FLAGS.build_dir, "Dockerfile"),
            ".",
        ]
        docker_script.cwd(THIS_SCRIPT_DIR)
        docker_script.cmd(finalargs, check_exitcode=True)

        #
        # CI base image... tritonserver_cibase
        #
        docker_script.blankln()
        docker_script.commentln(8)
        docker_script.comment("Create CI base image")
        docker_script.comment()

        cibaseargs = [
            "docker",
            "build",
            "-t",
            "tritonserver_cibase",
            "-f",
            os.path.join(FLAGS.build_dir, "Dockerfile.cibase"),
            ".",
        ]

        docker_script.cwd(THIS_SCRIPT_DIR)
        docker_script.cmd(cibaseargs, check_exitcode=True)


def core_build(
    cmake_script, repo_dir, cmake_dir, build_dir, install_dir, components, backends
):
    repo_build_dir = os.path.join(build_dir, "tritonserver", "build")
    repo_install_dir = os.path.join(build_dir, "tritonserver", "install")

    cmake_script.commentln(8)
    cmake_script.comment("Triton core library and tritonserver executable")
    cmake_script.comment()
    cmake_script.mkdir(repo_build_dir)
    cmake_script.cwd(repo_build_dir)
    cmake_script.cmake(
        core_cmake_args(components, backends, cmake_dir, repo_install_dir)
    )
    cmake_script.makeinstall()

    if target_platform() == "windows":
        cmake_script.mkdir(os.path.join(install_dir, "bin"))
        cmake_script.cp(
            os.path.join(repo_install_dir, "bin", "tritonserver.exe"),
            os.path.join(install_dir, "bin"),
        )
        cmake_script.cp(
            os.path.join(repo_install_dir, "bin", "tritonserver.dll"),
            os.path.join(install_dir, "bin"),
        )
        cmake_script.cp(
            os.path.join(repo_install_dir, "lib", "tritonserver.lib"),
            os.path.join(install_dir, "bin"),
        )
    elif target_platform() == "rhel":
        cmake_script.mkdir(os.path.join(install_dir, "bin"))
        cmake_script.cp(
            os.path.join(repo_install_dir, "bin", "tritonserver"),
            os.path.join(install_dir, "bin"),
        )
        cmake_script.mkdir(os.path.join(install_dir, "lib64"))
        cmake_script.cp(
            os.path.join(repo_install_dir, "lib64", "libtritonserver.so"),
            os.path.join(install_dir, "lib64"),
        )
    else:
        cmake_script.mkdir(os.path.join(install_dir, "bin"))
        cmake_script.cp(
            os.path.join(repo_install_dir, "bin", "tritonserver"),
            os.path.join(install_dir, "bin"),
        )
        cmake_script.mkdir(os.path.join(install_dir, "lib"))
        cmake_script.cp(
            os.path.join(repo_install_dir, "lib", "libtritonserver.so"),
            os.path.join(install_dir, "lib"),
        )
    # [FIXME] Placing the tritonserver and tritonfrontend wheel files in 'python' for now,
    # should be uploaded to pip registry to be able to install directly
    cmake_script.mkdir(os.path.join(install_dir, "python"))
    cmake_script.cp(
        os.path.join(repo_install_dir, "python", "triton*.whl"),
        os.path.join(install_dir, "python"),
    )

    cmake_script.mkdir(os.path.join(install_dir, "include", "triton"))
    cmake_script.cpdir(
        os.path.join(repo_install_dir, "include", "triton", "core"),
        os.path.join(install_dir, "include", "triton", "core"),
    )

    cmake_script.cpdir(
        os.path.join(repo_dir, "python", "openai"), os.path.join(install_dir, "python")
    )

    cmake_script.cp(os.path.join(repo_dir, "LICENSE"), install_dir)
    cmake_script.cp(os.path.join(repo_dir, "TRITON_VERSION"), install_dir)

    # If requested, package the source code for all OSS used to build
    # For windows, Triton is not delivered as a container so skip for
    # windows platform.
    if target_platform() != "windows":
        if (
            (not FLAGS.no_container_build)
            and (not FLAGS.no_core_build)
            and (not FLAGS.no_container_source)
        ):
            cmake_script.mkdir(os.path.join(install_dir, "third-party-src"))
            cmake_script.cwd(repo_build_dir)
            cmake_script.tar(
                "third-party-src",
                os.path.join(install_dir, "third-party-src", "src.tar.gz"),
            )
            cmake_script.cp(
                os.path.join(repo_dir, "docker", "README.third-party-src"),
                os.path.join(install_dir, "third-party-src", "README"),
            )

    cmake_script.comment()
    cmake_script.comment("end Triton core library and tritonserver executable")
    cmake_script.commentln(8)
    cmake_script.blankln()


def tensorrtllm_prebuild(cmake_script):
    # Export the TRT_ROOT environment variable
    cmake_script.cmd("export TRT_ROOT=/usr/local/tensorrt")
    cmake_script.cmd("export ARCH=$(uname -m)")
    cmake_script.cmd(
        'export LD_LIBRARY_PATH="/usr/local/cuda/compat/lib.real:${LD_LIBRARY_PATH}"'
    )


def tensorrtllm_postbuild(cmake_script, repo_install_dir, tensorrtllm_be_dir):
    # TODO: Update the CMakeLists.txt of TRT-LLM backend to install the artifacts to the correct location
    cmake_destination_dir = os.path.join(repo_install_dir, "backends/tensorrtllm")
    cmake_script.mkdir(cmake_destination_dir)

    # Copy over the TRT-LLM backend libraries
    cmake_script.cp(
        os.path.join(tensorrtllm_be_dir, "build", "libtriton_tensorrtllm*.so"),
        cmake_destination_dir,
    )
    cmake_script.cp(
        os.path.join(tensorrtllm_be_dir, "build", "trtllmExecutorWorker"),
        cmake_destination_dir,
    )


def backend_build(
    be,
    cmake_script,
    tag,
    build_dir,
    install_dir,
    github_organization,
    images,
    components,
    library_paths,
):
    repo_build_dir = os.path.join(build_dir, be, "build")
    repo_install_dir = os.path.join(build_dir, be, "install")

    cmake_script.commentln(8)
    cmake_script.comment(f"'{be}' backend")
    cmake_script.comment("Delete this section to remove backend from build")
    cmake_script.comment()
    cmake_script.mkdir(build_dir)
    cmake_script.cwd(build_dir)
    if be == "tensorrtllm":
        repository_name = "TensorRT-LLM"
        cmake_script.gitclone(repository_name, tag, be, github_organization)
    else:
        cmake_script.gitclone(backend_repo(be), tag, be, github_organization)

    if be == "tensorrtllm":
        tensorrtllm_prebuild(cmake_script)

    cmake_script.mkdir(repo_build_dir)
    cmake_script.cwd(repo_build_dir)
    cmake_script.cmake(
        backend_cmake_args(images, components, be, repo_install_dir, library_paths)
    )
    cmake_script.makeinstall()

    if be == "tensorrtllm":
        tensorrtllm_be_dir = os.path.join(build_dir, be)
        tensorrtllm_postbuild(cmake_script, repo_install_dir, tensorrtllm_be_dir)

    cmake_script.mkdir(os.path.join(install_dir, "backends"))
    cmake_script.rmdir(os.path.join(install_dir, "backends", be))

    # The python library version available for install via 'yum install python3.X-devel' does not
    # match the version of python inside the RHEL base container. This means that python packages
    # installed within the container will not be picked up by the python backend stub process pybind
    # bindings. It must instead must be installed via pyenv. We package it here for better usability.
    if target_platform() == "rhel" and be == "python":
        major_minor_version = ".".join((FLAGS.rhel_py_version).split(".")[:2])
        version_matched_files = "/usr/lib64/libpython" + major_minor_version + "*"
        cmake_script.cp(
            version_matched_files, os.path.join(repo_install_dir, "backends", be)
        )

    cmake_script.cpdir(
        os.path.join(repo_install_dir, "backends", be),
        os.path.join(install_dir, "backends"),
    )

    cmake_script.comment()
    cmake_script.comment(f"end '{be}' backend")
    cmake_script.commentln(8)
    cmake_script.blankln()


def backend_clone(
    be,
    clone_script,
    tag,
    build_dir,
    install_dir,
    github_organization,
):
    clone_script.commentln(8)
    clone_script.comment(f"'{be}' backend")
    clone_script.comment("Delete this section to remove backend from build")
    clone_script.comment()
    clone_script.mkdir(build_dir)
    clone_script.cwd(build_dir)
    clone_script.gitclone(backend_repo(be), tag, be, github_organization)

    repo_target_dir = os.path.join(install_dir, "backends")
    clone_script.mkdir(repo_target_dir)
    backend_dir = os.path.join(repo_target_dir, be)
    clone_script.rmdir(backend_dir)
    clone_script.mkdir(backend_dir)

    clone_script.cp(
        os.path.join(build_dir, be, "src", "model.py"),
        backend_dir,
    )
    clone_script.cpdir(
        os.path.join(build_dir, be, "src", "utils"),
        backend_dir,
    )

    clone_script.comment()
    clone_script.comment(f"end '{be}' backend")
    clone_script.commentln(8)
    clone_script.blankln()


def repo_agent_build(
    ra, cmake_script, build_dir, install_dir, repoagent_repo, repoagents
):
    repo_build_dir = os.path.join(build_dir, ra, "build")
    repo_install_dir = os.path.join(build_dir, ra, "install")

    cmake_script.commentln(8)
    cmake_script.comment(f"'{ra}' repository agent")
    cmake_script.comment("Delete this section to remove repository agent from build")
    cmake_script.comment()
    cmake_script.mkdir(build_dir)
    cmake_script.cwd(build_dir)
    cmake_script.gitclone(
        repoagent_repo(ra), repoagents[ra], ra, FLAGS.github_organization
    )

    cmake_script.mkdir(repo_build_dir)
    cmake_script.cwd(repo_build_dir)
    cmake_script.cmake(repoagent_cmake_args(images, components, ra, repo_install_dir))
    cmake_script.makeinstall()

    cmake_script.mkdir(os.path.join(install_dir, "repoagents"))
    cmake_script.rmdir(os.path.join(install_dir, "repoagents", ra))
    cmake_script.cpdir(
        os.path.join(repo_install_dir, "repoagents", ra),
        os.path.join(install_dir, "repoagents"),
    )
    cmake_script.comment()
    cmake_script.comment(f"end '{ra}' repository agent")
    cmake_script.commentln(8)
    cmake_script.blankln()


def cache_build(cache, cmake_script, build_dir, install_dir, cache_repo, caches):
    repo_build_dir = os.path.join(build_dir, cache, "build")
    repo_install_dir = os.path.join(build_dir, cache, "install")

    cmake_script.commentln(8)
    cmake_script.comment(f"'{cache}' cache")
    cmake_script.comment("Delete this section to remove cache from build")
    cmake_script.comment()
    cmake_script.mkdir(build_dir)
    cmake_script.cwd(build_dir)
    cmake_script.gitclone(
        cache_repo(cache), caches[cache], cache, FLAGS.github_organization
    )

    cmake_script.mkdir(repo_build_dir)
    cmake_script.cwd(repo_build_dir)
    cmake_script.cmake(cache_cmake_args(images, components, cache, repo_install_dir))
    cmake_script.makeinstall()

    cmake_script.mkdir(os.path.join(install_dir, "caches"))
    cmake_script.rmdir(os.path.join(install_dir, "caches", cache))
    cmake_script.cpdir(
        os.path.join(repo_install_dir, "caches", cache),
        os.path.join(install_dir, "caches"),
    )
    cmake_script.comment()
    cmake_script.comment(f"end '{cache}' cache")
    cmake_script.commentln(8)
    cmake_script.blankln()


def cibase_build(
    cmake_script, repo_dir, cmake_dir, build_dir, install_dir, ci_dir, backends
):
    repo_install_dir = os.path.join(build_dir, "tritonserver", "install")

    cmake_script.commentln(8)
    cmake_script.comment("Collect Triton CI artifacts")
    cmake_script.comment()

    cmake_script.mkdir(ci_dir)

    # On windows we are not yet using a CI/QA docker image for
    # testing, so don't do anything...
    if target_platform() == "windows":
        return

    # The core build produces some artifacts that are needed for CI
    # testing, so include those in the install.
    cmake_script.cpdir(os.path.join(repo_dir, "qa"), ci_dir)
    cmake_script.cpdir(os.path.join(repo_dir, "deploy"), ci_dir)
    cmake_script.mkdir(os.path.join(ci_dir, "docs"))
    cmake_script.cpdir(
        os.path.join(repo_dir, "docs", "examples"), os.path.join(ci_dir, "docs")
    )
    cmake_script.mkdir(os.path.join(ci_dir, "src", "test"))
    cmake_script.cpdir(
        os.path.join(repo_dir, "src", "test", "models"),
        os.path.join(ci_dir, "src", "test"),
    )
    # Skip copying the artifacts in the bin, lib, and python as those directories will
    # be missing when the core build is not enabled.
    if not FLAGS.no_core_build:
        cmake_script.cpdir(os.path.join(repo_install_dir, "bin"), ci_dir)
        cmake_script.mkdir(os.path.join(ci_dir, "lib"))
        cmake_script.cp(
            os.path.join(repo_install_dir, "lib", "libtritonrepoagent_relocation.so"),
            os.path.join(ci_dir, "lib"),
        )
        cmake_script.cpdir(os.path.join(repo_install_dir, "python"), ci_dir)

    # Some of the backends are needed for CI testing
    cmake_script.mkdir(os.path.join(ci_dir, "backends"))
    for be in ("identity", "repeat", "square"):
        be_install_dir = os.path.join(build_dir, be, "install", "backends", be)
        if target_platform() == "windows":
            cmake_script.cmd(f"if (Test-Path -Path {be_install_dir}) {{")
        else:
            cmake_script.cmd(f"if [[ -e {be_install_dir} ]]; then")
        cmake_script.cpdir(be_install_dir, os.path.join(ci_dir, "backends"))
        cmake_script.cmd("}" if target_platform() == "windows" else "fi")

    # Some of the unit-test built backends are needed for CI testing
    cmake_script.mkdir(os.path.join(ci_dir, "tritonbuild", "tritonserver", "backends"))
    for be in (
        "query",
        "implicit_state",
        "sequence",
        "dyna_sequence",
        "distributed_addsub",
        "iterative_sequence",
    ):
        be_install_dir = os.path.join(repo_install_dir, "backends", be)
        if target_platform() == "windows":
            cmake_script.cmd(f"if (Test-Path -Path {be_install_dir}) {{")
        else:
            cmake_script.cmd(f"if [[ -e {be_install_dir} ]]; then")
        cmake_script.cpdir(
            be_install_dir,
            os.path.join(ci_dir, "tritonbuild", "tritonserver", "backends"),
        )
        cmake_script.cmd("}" if target_platform() == "windows" else "fi")

    # The onnxruntime_backend build produces some artifacts that
    # are needed for CI testing.
    if "onnxruntime" in backends:
        ort_install_dir = os.path.join(build_dir, "onnxruntime", "install")
        cmake_script.mkdir(os.path.join(ci_dir, "qa", "L0_custom_ops"))
        if target_platform() != "igpu":
            cmake_script.cp(
                os.path.join(ort_install_dir, "test", "libcustom_op_library.so"),
                os.path.join(ci_dir, "qa", "L0_custom_ops"),
            )
            cmake_script.cp(
                os.path.join(ort_install_dir, "test", "custom_op_test.onnx"),
                os.path.join(ci_dir, "qa", "L0_custom_ops"),
            )
        # [WIP] other way than wildcard?
        backend_tests = os.path.join(build_dir, "onnxruntime", "test", "*")
        cmake_script.cpdir(backend_tests, os.path.join(ci_dir, "qa"))

    # Need the build area for some backends so that they can be
    # rebuilt with specific options.
    cmake_script.mkdir(os.path.join(ci_dir, "tritonbuild"))
    for be in ("identity", "python"):
        if be in backends:
            cmake_script.rmdir(os.path.join(build_dir, be, "build"))
            cmake_script.rmdir(os.path.join(build_dir, be, "install"))
            cmake_script.cpdir(
                os.path.join(build_dir, be), os.path.join(ci_dir, "tritonbuild")
            )

    cmake_script.comment()
    cmake_script.comment("end Triton CI artifacts")
    cmake_script.commentln(8)
    cmake_script.blankln()


def finalize_build(cmake_script, install_dir, ci_dir):
    cmake_script.cmd(f"chmod -R a+rw {install_dir}")
    cmake_script.cmd(f"chmod -R a+rw {ci_dir}")


def enable_all():
    if target_platform() != "windows":
        all_backends = [
            "ensemble",
            "identity",
            "square",
            "repeat",
            "onnxruntime",
            "python",
            "dali",
            "pytorch",
            "openvino",
            "fil",
            "tensorrt",
        ]
        all_repoagents = ["checksum"]
        all_caches = ["local", "redis"]
        all_filesystems = ["gcs", "s3", "azure_storage"]
        all_endpoints = ["http", "grpc", "sagemaker", "vertex-ai"]

        FLAGS.enable_logging = True
        FLAGS.enable_stats = True
        FLAGS.enable_metrics = True
        FLAGS.enable_gpu_metrics = True
        FLAGS.enable_cpu_metrics = True
        FLAGS.enable_tracing = True
        FLAGS.enable_nvtx = True
        FLAGS.enable_gpu = True
    else:
        all_backends = [
            "ensemble",
            "identity",
            "square",
            "repeat",
            "onnxruntime",
            "openvino",
            "tensorrt",
        ]
        all_repoagents = ["checksum"]
        all_caches = ["local", "redis"]
        all_filesystems = []
        all_endpoints = ["http", "grpc"]

        FLAGS.enable_logging = True
        FLAGS.enable_stats = True
        FLAGS.enable_tracing = True
        FLAGS.enable_gpu = True

    requested_backends = []
    for be in FLAGS.backend:
        parts = be.split(":")
        requested_backends += [parts[0]]
    for be in all_backends:
        if be not in requested_backends:
            FLAGS.backend += [be]

    requested_repoagents = []
    for ra in FLAGS.repoagent:
        parts = ra.split(":")
        requested_repoagents += [parts[0]]
    for ra in all_repoagents:
        if ra not in requested_repoagents:
            FLAGS.repoagent += [ra]

    requested_caches = []
    for cache in FLAGS.cache:
        parts = cache.split(":")
        requested_caches += [parts[0]]
    for cache in all_caches:
        if cache not in requested_caches:
            FLAGS.cache += [cache]

    for fs in all_filesystems:
        if fs not in FLAGS.filesystem:
            FLAGS.filesystem += [fs]

    for ep in all_endpoints:
        if ep not in FLAGS.endpoint:
            FLAGS.endpoint += [ep]


if __name__ == "__main__":
    parser = argparse.ArgumentParser()

    group_qv = parser.add_mutually_exclusive_group()
    group_qv.add_argument(
        "-q",
        "--quiet",
        action="store_true",
        required=False,
        help="Disable console output.",
    )
    group_qv.add_argument(
        "-v",
        "--verbose",
        action="store_true",
        required=False,
        help="Enable verbose output.",
    )

    parser.add_argument(
        "--dryrun",
        action="store_true",
        required=False,
        help="Output the build scripts, but do not perform build.",
    )
    parser.add_argument(
        "--no-container-build",
        action="store_true",
        required=False,
        help="Do not use Docker container for build.",
    )
    parser.add_argument(
        "--use-user-docker-config",
        default=None,
        required=False,
        help="Path to the Docker configuration file to be used when performing container build.",
    )
    parser.add_argument(
        "--no-container-interactive",
        action="store_true",
        required=False,
        help='Do not use -it argument to "docker run" when performing container build.',
    )
    parser.add_argument(
        "--no-container-pull",
        action="store_true",
        required=False,
        help="Do not use Docker --pull argument when building container.",
    )
    parser.add_argument(
        "--container-memory",
        default=None,
        required=False,
        help="Value for Docker --memory argument. Used only for windows builds.",
    )
    parser.add_argument(
        "--target-platform",
        required=False,
        default=None,
        help='Target platform for build, can be "linux", "rhel", "windows" or "igpu". If not specified, build targets the current platform.',
    )
    parser.add_argument(
        "--target-machine",
        required=False,
        default=None,
        help="Target machine/architecture for build. If not specified, build targets the current machine/architecture.",
    )

    parser.add_argument(
        "--build-id",
        type=str,
        required=False,
        help="Build ID associated with the build.",
    )
    parser.add_argument(
        "--build-sha", type=str, required=False, help="SHA associated with the build."
    )
    parser.add_argument(
        "--build-dir",
        type=str,
        required=False,
        help="Build directory. All repo clones and builds will be performed in this directory.",
    )
    parser.add_argument(
        "--install-dir",
        type=str,
        required=False,
        default=None,
        help="Install directory, default is <builddir>/opt/tritonserver.",
    )
    parser.add_argument(
        "--cmake-dir",
        type=str,
        required=False,
        help="Directory containing the CMakeLists.txt file for Triton server.",
    )
    parser.add_argument(
        "--tmp-dir",
        type=str,
        required=False,
        default="/tmp",
        help="Temporary directory used for building inside docker. Default is /tmp.",
    )
    parser.add_argument(
        "--library-paths",
        action="append",
        required=False,
        default=None,
        help="Specify library paths for respective backends in build as <backend-name>[:<library_path>].",
    )
    parser.add_argument(
        "--build-type",
        required=False,
        default="Release",
        help='Build type, one of "Release", "Debug", "RelWithDebInfo" or "MinSizeRel". Default is "Release".',
    )
    parser.add_argument(
        "-j",
        "--build-parallel",
        type=int,
        required=False,
        default=None,
        help="Build parallelism. Defaults to 2 * number-of-cores.",
    )

    parser.add_argument(
        "--github-organization",
        type=str,
        required=False,
        default="https://github.com/triton-inference-server",
        help='The GitHub organization containing the repos used for the build. Defaults to "https://github.com/triton-inference-server".',
    )
    parser.add_argument(
        "--version",
        type=str,
        required=False,
        help="The Triton version. If not specified defaults to the value in the TRITON_VERSION file.",
    )
    parser.add_argument(
        "--container-version",
        type=str,
        required=False,
        help="The Triton container version to build. If not specified the container version will be chosen automatically based on --version value.",
    )
    parser.add_argument(
        "--container-prebuild-command",
        type=str,
        required=False,
        help="When performing a container build, this command will be executed within the container just before the build it performed.",
    )
    parser.add_argument(
        "--no-container-source",
        action="store_true",
        required=False,
        help="Do not include OSS source code in Docker container.",
    )
    parser.add_argument(
        "--image",
        action="append",
        required=False,
        help='Use specified Docker image in build as <image-name>,<full-image-name>. <image-name> can be "base", "gpu-base", or "pytorch".',
    )

    parser.add_argument(
        "--enable-all",
        action="store_true",
        required=False,
        help="Enable all standard released Triton features, backends, repository agents, caches, endpoints and file systems.",
    )
    parser.add_argument(
        "--enable-logging", action="store_true", required=False, help="Enable logging."
    )
    parser.add_argument(
        "--enable-stats",
        action="store_true",
        required=False,
        help="Enable statistics collection.",
    )
    parser.add_argument(
        "--enable-metrics",
        action="store_true",
        required=False,
        help="Enable metrics reporting.",
    )
    parser.add_argument(
        "--enable-gpu-metrics",
        action="store_true",
        required=False,
        help="Include GPU metrics in reported metrics.",
    )
    parser.add_argument(
        "--enable-cpu-metrics",
        action="store_true",
        required=False,
        help="Include CPU metrics in reported metrics.",
    )
    parser.add_argument(
        "--enable-tracing", action="store_true", required=False, help="Enable tracing."
    )
    parser.add_argument(
        "--enable-nvtx", action="store_true", required=False, help="Enable NVTX."
    )
    parser.add_argument(
        "--enable-gpu", action="store_true", required=False, help="Enable GPU support."
    )
    parser.add_argument(
        "--enable-mali-gpu",
        action="store_true",
        required=False,
        help="Enable ARM MALI GPU support.",
    )
    parser.add_argument(
        "--min-compute-capability",
        type=str,
        required=False,
        default="6.0",
        help="Minimum CUDA compute capability supported by server.",
    )

    parser.add_argument(
        "--endpoint",
        action="append",
        required=False,
        help='Include specified endpoint in build. Allowed values are "grpc", "http", "vertex-ai" and "sagemaker".',
    )
    parser.add_argument(
        "--filesystem",
        action="append",
        required=False,
        help='Include specified filesystem in build. Allowed values are "gcs", "azure_storage" and "s3".',
    )
    parser.add_argument(
        "--no-core-build",
        action="store_true",
        required=False,
        help="Do not build Triton core shared library or executable.",
    )
    parser.add_argument(
        "--backend",
        action="append",
        required=False,
        help='Include specified backend in build as <backend-name>[:<repo-tag>]. If <repo-tag> starts with "pull/" then it refers to a pull-request reference, otherwise <repo-tag> indicates the git tag/branch to use for the build. If the version is non-development then the default <repo-tag> is the release branch matching the container version (e.g. version YY.MM -> branch rYY.MM); otherwise the default <repo-tag> is "main" (e.g. version YY.MMdev -> branch main).',
    )
    parser.add_argument(
        "--repo-tag",
        action="append",
        required=False,
        help='The version of a component to use in the build as <component-name>:<repo-tag>. <component-name> can be "common", "core", "backend" or "thirdparty". <repo-tag> indicates the git tag/branch to use for the build. Currently <repo-tag> does not support pull-request reference. If the version is non-development then the default <repo-tag> is the release branch matching the container version (e.g. version YY.MM -> branch rYY.MM); otherwise the default <repo-tag> is "main" (e.g. version YY.MMdev -> branch main).',
    )
    parser.add_argument(
        "--repoagent",
        action="append",
        required=False,
        help='Include specified repo agent in build as <repoagent-name>[:<repo-tag>]. If <repo-tag> starts with "pull/" then it refers to a pull-request reference, otherwise <repo-tag> indicates the git tag/branch to use for the build. If the version is non-development then the default <repo-tag> is the release branch matching the container version (e.g. version YY.MM -> branch rYY.MM); otherwise the default <repo-tag> is "main" (e.g. version YY.MMdev -> branch main).',
    )
    parser.add_argument(
        "--cache",
        action="append",
        required=False,
        help='Include specified cache in build as <cache-name>[:<repo-tag>]. If <repo-tag> starts with "pull/" then it refers to a pull-request reference, otherwise <repo-tag> indicates the git tag/branch to use for the build. If the version is non-development then the default <repo-tag> is the release branch matching the container version (e.g. version YY.MM -> branch rYY.MM); otherwise the default <repo-tag> is "main" (e.g. version YY.MMdev -> branch main).',
    )
    parser.add_argument(
        "--no-force-clone",
        action="store_true",
        default=False,
        help="Do not create fresh clones of repos that have already been cloned.",
    )
    parser.add_argument(
        "--extra-core-cmake-arg",
        action="append",
        required=False,
        help="Extra CMake argument as <name>=<value>. The argument is passed to CMake as -D<name>=<value> and is included after all CMake arguments added by build.py for the core builds.",
    )
    parser.add_argument(
        "--override-core-cmake-arg",
        action="append",
        required=False,
        help="Override specified CMake argument in the build as <name>=<value>. The argument is passed to CMake as -D<name>=<value>. This flag only impacts CMake arguments that are used by build.py. To unconditionally add a CMake argument to the core build use --extra-core-cmake-arg.",
    )
    parser.add_argument(
        "--extra-backend-cmake-arg",
        action="append",
        required=False,
        help="Extra CMake argument for a backend build as <backend>:<name>=<value>. The argument is passed to CMake as -D<name>=<value> and is included after all CMake arguments added by build.py for the backend.",
    )
    parser.add_argument(
        "--override-backend-cmake-arg",
        action="append",
        required=False,
        help="Override specified backend CMake argument in the build as <backend>:<name>=<value>. The argument is passed to CMake as -D<name>=<value>. This flag only impacts CMake arguments that are used by build.py. To unconditionally add a CMake argument to the backend build use --extra-backend-cmake-arg.",
    )
    parser.add_argument(
        "--release-version",
        required=False,
        default=DEFAULT_TRITON_VERSION_MAP["release_version"],
        help="This flag sets the release version for Triton Inference Server to be built. Default: the latest released version.",
    )
    parser.add_argument(
        "--triton-container-version",
        required=False,
        default=DEFAULT_TRITON_VERSION_MAP["triton_container_version"],
        help="This flag sets the container version for Triton Inference Server to be built. Default: the latest released version.",
    )
    parser.add_argument(
        "--upstream-container-version",
        required=False,
        default=DEFAULT_TRITON_VERSION_MAP["upstream_container_version"],
        help="This flag sets the upstream container version for Triton Inference Server to be built. Default: the latest released version.",
    )
    parser.add_argument(
        "--ort-version",
        required=False,
        default=DEFAULT_TRITON_VERSION_MAP["ort_version"],
        help="This flag sets the ORT version for Triton Inference Server to be built. Default: the latest supported version.",
    )
    parser.add_argument(
        "--ort-openvino-version",
        required=False,
        default=DEFAULT_TRITON_VERSION_MAP["ort_openvino_version"],
        help="This flag sets the OpenVino version for Triton Inference Server to be built. Default: the latest supported version.",
    )
    parser.add_argument(
        "--standalone-openvino-version",
        required=False,
        default=DEFAULT_TRITON_VERSION_MAP["standalone_openvino_version"],
        help="This flag sets the standalon OpenVino version for Triton Inference Server to be built. Default: the latest supported version.",
    )
    parser.add_argument(
        "--dcgm-version",
        required=False,
        default=DEFAULT_TRITON_VERSION_MAP["dcgm_version"],
        help="This flag sets the DCGM version for Triton Inference Server to be built. Default: the latest supported version.",
    )
    parser.add_argument(
        "--vllm-version",
        required=False,
        default=DEFAULT_TRITON_VERSION_MAP["vllm_version"],
        help="This flag sets the vLLM version for Triton Inference Server to be built. Default: the latest supported version.",
    )
    parser.add_argument(
        "--rhel-py-version",
        required=False,
        default=DEFAULT_TRITON_VERSION_MAP["rhel_py_version"],
        help="This flag sets the Python version for RHEL platform of Triton Inference Server to be built. Default: the latest supported version.",
    )
    parser.add_argument(
        "--build-secret",
        action="append",
        required=False,
        nargs=2,
        metavar=("key", "value"),
        help="Add build secrets in the form of <key> <value>. These secrets are used during the build process for vllm. The secrets are passed to the Docker build step as `--secret id=<key>`. The following keys are expected and their purposes are described below:\n\n"
        "  - 'req': A file containing a list of dependencies for pip (e.g., requirements.txt).\n"
        "  - 'build_public_vllm': A flag (default is 'true') indicating whether to build the public VLLM version.\n\n"
        "Ensure that the required environment variables for these secrets are set before running the build.",
    )
    parser.add_argument(
        "--triton-wheels-dependencies-group",
        required=False,
        type=str,
        default="all",
        help="The group of dependencies for Triton wheels to be installed. Default value is 'all'.",
    )
    FLAGS = parser.parse_args()

    if FLAGS.image is None:
        FLAGS.image = []
    if FLAGS.repo_tag is None:
        FLAGS.repo_tag = []
    if FLAGS.backend is None:
        FLAGS.backend = []
    if FLAGS.endpoint is None:
        FLAGS.endpoint = []
    if FLAGS.filesystem is None:
        FLAGS.filesystem = []
    if FLAGS.repoagent is None:
        FLAGS.repoagent = []
    if FLAGS.cache is None:
        FLAGS.cache = []
    if FLAGS.library_paths is None:
        FLAGS.library_paths = []
    if FLAGS.extra_core_cmake_arg is None:
        FLAGS.extra_core_cmake_arg = []
    if FLAGS.override_core_cmake_arg is None:
        FLAGS.override_core_cmake_arg = []
    if FLAGS.override_backend_cmake_arg is None:
        FLAGS.override_backend_cmake_arg = []
    if FLAGS.extra_backend_cmake_arg is None:
        FLAGS.extra_backend_cmake_arg = []
    if FLAGS.build_secret is None:
        FLAGS.build_secret = []

    FLAGS.boost_url = os.getenv(
        "TRITON_BOOST_URL",
        "https://archives.boost.io/release/1.80.0/source/boost_1_80_0.tar.gz",
    )
    FLAGS.boost_sha256 = (
        "4b2136f98bdd1f5857f1c3dea9ac2018effe65286cf251534b6ae20cc45e1847"
    )
    # if --enable-all is specified, then update FLAGS to enable all
    # settings, backends, repo-agents, caches, file systems, endpoints, etc.
    if FLAGS.enable_all:
        enable_all()

    # When doing a docker build, --build-dir, --install-dir and
    # --cmake-dir must not be set. We will use the build/ subdir
    # within the server/ repo that contains this build.py script for
    # --build-dir. If not doing a docker build, --build-dir must be
    # set.
    if FLAGS.no_container_build:
        if FLAGS.build_dir is None:
            fail("--no-container-build requires --build-dir")
        if FLAGS.install_dir is None:
            FLAGS.install_dir = os.path.join(FLAGS.build_dir, "opt", "tritonserver")
        if FLAGS.cmake_dir is None:
            FLAGS.cmake_dir = THIS_SCRIPT_DIR
    else:
        if FLAGS.build_dir is not None:
            fail("--build-dir must not be set for container-based build")
        if FLAGS.install_dir is not None:
            fail("--install-dir must not be set for container-based build")
        if FLAGS.cmake_dir is not None:
            fail("--cmake-dir must not be set for container-based build")
        FLAGS.build_dir = os.path.join(THIS_SCRIPT_DIR, "build")

    # Determine the versions. Start with Triton version, if --version
    # is not explicitly specified read from TRITON_VERSION file.
    if FLAGS.version is None:
        FLAGS.version = DEFAULT_TRITON_VERSION_MAP["release_version"]

    if FLAGS.build_parallel is None:
        FLAGS.build_parallel = multiprocessing.cpu_count() * 2

    log("Building Triton Inference Server")
    log("platform {}".format(target_platform()))
    log("machine {}".format(target_machine()))
    log("version {}".format(FLAGS.version))
    log("build dir {}".format(FLAGS.build_dir))
    log("install dir {}".format(FLAGS.install_dir))
    log("cmake dir {}".format(FLAGS.cmake_dir))

    # Determine the default repo-tag that should be used for images,
    # backends, repo-agents, and caches if a repo-tag is not given
    # explicitly. For release branches we use the release branch as
    # the default, otherwise we use 'main'.
    default_repo_tag = (
        "main"
        if FLAGS.triton_container_version.endswith("dev")
        else "r" + FLAGS.triton_container_version
    )
    log("default repo-tag: {}".format(default_repo_tag))

    # For other versions use the TRITON_VERSION_MAP unless explicitly
    # given.
    FLAGS.container_version, FLAGS.upstream_container_version = container_versions(
        FLAGS.version, FLAGS.container_version, FLAGS.upstream_container_version
    )

    log("container version {}".format(FLAGS.container_version))
    log("upstream container version {}".format(FLAGS.upstream_container_version))

    for ep in FLAGS.endpoint:
        log(f'endpoint "{ep}"')
    for fs in FLAGS.filesystem:
        log(f'filesystem "{fs}"')

    # Initialize map of backends to build and repo-tag for each.
    backends = {}
    for be in FLAGS.backend:
        parts = be.split(":")
        if len(parts) == 1:
            parts.append(default_repo_tag)
        log('backend "{}" at tag/branch "{}"'.format(parts[0], parts[1]))
        backends[parts[0]] = parts[1]

    if "vllm" in backends:
        if "python" not in backends:
            log(
                "vLLM backend requires Python backend, adding Python backend with tag {}".format(
                    backends["vllm"]
                )
            )
            backends["python"] = backends["vllm"]

    secrets = dict(getattr(FLAGS, "build_secret", []))
    if secrets:
        requirements = secrets.get("req", "")
        build_public_vllm = secrets.get("build_public_vllm", "true")
        log('Build Arg for BUILD_PUBLIC_VLLM: "{}"'.format(build_public_vllm))

    # Initialize map of repo agents to build and repo-tag for each.
    repoagents = {}
    for be in FLAGS.repoagent:
        parts = be.split(":")
        if len(parts) == 1:
            parts.append(default_repo_tag)
        log('repoagent "{}" at tag/branch "{}"'.format(parts[0], parts[1]))
        repoagents[parts[0]] = parts[1]

    # Initialize map of caches to build and repo-tag for each.
    caches = {}
    for be in FLAGS.cache:
        parts = be.split(":")
        if len(parts) == 1:
            parts.append(default_repo_tag)
        log('cache "{}" at tag/branch "{}"'.format(parts[0], parts[1]))
        caches[parts[0]] = parts[1]

    # Initialize map of docker images.
    images = {}
    for img in FLAGS.image:
        parts = img.split(",")
        fail_if(
            len(parts) != 2, "--image must specify <image-name>,<full-image-registry>"
        )
        fail_if(
            parts[0] not in ["base", "gpu-base", "pytorch", "inference"],
            "unsupported value for --image",
        )
        log('image "{}": "{}"'.format(parts[0], parts[1]))
        images[parts[0]] = parts[1]

    # Initialize map of library paths for each backend.
    library_paths = {}
    for lpath in FLAGS.library_paths:
        parts = lpath.split(":")
        if len(parts) == 2:
            log('backend "{}" library path "{}"'.format(parts[0], parts[1]))
            library_paths[parts[0]] = parts[1]

    # Parse any explicitly specified cmake arguments
    for cf in FLAGS.extra_core_cmake_arg:
        parts = cf.split("=")
        fail_if(len(parts) != 2, "--extra-core-cmake-arg must specify <name>=<value>")
        log('CMake core extra "-D{}={}"'.format(parts[0], parts[1]))
        EXTRA_CORE_CMAKE_FLAGS[parts[0]] = parts[1]

    for cf in FLAGS.override_core_cmake_arg:
        parts = cf.split("=")
        fail_if(
            len(parts) != 2, "--override-core-cmake-arg must specify <name>=<value>"
        )
        log('CMake core override "-D{}={}"'.format(parts[0], parts[1]))
        OVERRIDE_CORE_CMAKE_FLAGS[parts[0]] = parts[1]

    for cf in FLAGS.extra_backend_cmake_arg:
        parts = cf.split(":", 1)
        fail_if(
            len(parts) != 2,
            "--extra-backend-cmake-arg must specify <backend>:<name>=<value>",
        )
        be = parts[0]
        parts = parts[1].split("=", 1)
        fail_if(
            len(parts) != 2,
            "--extra-backend-cmake-arg must specify <backend>:<name>=<value>",
        )
        fail_if(
            be not in backends,
            '--extra-backend-cmake-arg specifies backend "{}" which is not included in build'.format(
                be
            ),
        )
        log('backend "{}" CMake extra "-D{}={}"'.format(be, parts[0], parts[1]))
        if be not in EXTRA_BACKEND_CMAKE_FLAGS:
            EXTRA_BACKEND_CMAKE_FLAGS[be] = {}
        EXTRA_BACKEND_CMAKE_FLAGS[be][parts[0]] = parts[1]

    for cf in FLAGS.override_backend_cmake_arg:
        parts = cf.split(":", 1)
        fail_if(
            len(parts) != 2,
            "--override-backend-cmake-arg must specify <backend>:<name>=<value>",
        )
        be = parts[0]
        parts = parts[1].split("=", 1)
        fail_if(
            len(parts) != 2,
            "--override-backend-cmake-arg must specify <backend>:<name>=<value>",
        )
        fail_if(
            be not in backends,
            '--override-backend-cmake-arg specifies backend "{}" which is not included in build'.format(
                be
            ),
        )
        log('backend "{}" CMake override "-D{}={}"'.format(be, parts[0], parts[1]))
        if be not in OVERRIDE_BACKEND_CMAKE_FLAGS:
            OVERRIDE_BACKEND_CMAKE_FLAGS[be] = {}
        OVERRIDE_BACKEND_CMAKE_FLAGS[be][parts[0]] = parts[1]

    # Initialize map of common components and repo-tag for each.
    components = {
        "common": default_repo_tag,
        "core": default_repo_tag,
        "backend": default_repo_tag,
        "thirdparty": default_repo_tag,
    }
    for be in FLAGS.repo_tag:
        parts = be.split(":")
        fail_if(len(parts) != 2, "--repo-tag must specify <component-name>:<repo-tag>")
        fail_if(
            parts[0] not in components,
            '--repo-tag <component-name> must be "common", "core", "backend", or "thirdparty"',
        )
        components[parts[0]] = parts[1]
    for c in components:
        log('component "{}" at tag/branch "{}"'.format(c, components[c]))

    # Set the build, install, and cmake directories to use for the
    # generated build scripts and Dockerfiles. If building without
    # Docker, these are the directories specified on the cmdline. If
    # building with Docker, we change these to be directories within
    # FLAGS.tmp_dir inside the Docker container.
    script_repo_dir = THIS_SCRIPT_DIR
    script_build_dir = FLAGS.build_dir
    script_install_dir = script_ci_dir = FLAGS.install_dir
    script_cmake_dir = FLAGS.cmake_dir
    if not FLAGS.no_container_build:
        # FLAGS.tmp_dir may be specified with "\" on Windows, adjust
        # to "/" for docker usage.
        script_build_dir = os.path.normpath(
            os.path.join(FLAGS.tmp_dir, "tritonbuild").replace("\\", "/")
        )
        script_install_dir = os.path.normpath(os.path.join(script_build_dir, "install"))
        script_ci_dir = os.path.normpath(os.path.join(script_build_dir, "ci"))
        if target_platform() == "windows":
            script_repo_dir = script_cmake_dir = os.path.normpath("c:/workspace")
        else:
            script_repo_dir = script_cmake_dir = "/workspace"

    script_name = "cmake_build"
    if target_platform() == "windows":
        script_name += ".ps1"

    # Write the build script that invokes cmake for the core, backends, repo-agents, and caches.
    pathlib.Path(FLAGS.build_dir).mkdir(parents=True, exist_ok=True)
    with BuildScript(
        os.path.join(FLAGS.build_dir, script_name),
        verbose=FLAGS.verbose,
        desc=("Build script for Triton Inference Server"),
    ) as cmake_script:
        # Run the container pre-build command if the cmake build is
        # being done within the build container.
        if not FLAGS.no_container_build and FLAGS.container_prebuild_command:
            cmake_script.cmd(FLAGS.container_prebuild_command, check_exitcode=True)
            cmake_script.blankln()

        # Commands to build the core shared library and the server executable.
        if not FLAGS.no_core_build:
            core_build(
                cmake_script,
                script_repo_dir,
                script_cmake_dir,
                script_build_dir,
                script_install_dir,
                components,
                backends,
            )

        # Commands to build each backend...
        for be in backends:
            # Core backends are not built separately from core so skip...
            if be in CORE_BACKENDS:
                continue

            # If armnn_tflite backend, source from external repo for git clone
            if be == "armnn_tflite":
                github_organization = "https://gitlab.com/arm-research/smarter/"
            else:
                github_organization = FLAGS.github_organization

            if be == "vllm":
                backend_clone(
                    be,
                    cmake_script,
                    backends[be],
                    script_build_dir,
                    script_install_dir,
                    github_organization,
                )
            else:
                backend_build(
                    be,
                    cmake_script,
                    backends[be],
                    script_build_dir,
                    script_install_dir,
                    github_organization,
                    images,
                    components,
                    library_paths,
                )

        # Commands to build each repo agent...
        for ra in repoagents:
            repo_agent_build(
                ra,
                cmake_script,
                script_build_dir,
                script_install_dir,
                repoagent_repo,
                repoagents,
            )

        # Commands to build each cache...
        for cache in caches:
            cache_build(
                cache,
                cmake_script,
                script_build_dir,
                script_install_dir,
                cache_repo,
                caches,
            )

        # Commands needed only when building with Docker...
        if not FLAGS.no_container_build:
            # Commands to collect all the build artifacts needed for CI
            # testing.
            cibase_build(
                cmake_script,
                script_repo_dir,
                script_cmake_dir,
                script_build_dir,
                script_install_dir,
                script_ci_dir,
                backends,
            )

            # When building with Docker the install and ci artifacts
            # written to the build-dir while running the docker container
            # may have root ownership, so give them permissions to be
            # managed by all users on the host system.
            if target_platform() != "windows":
                finalize_build(cmake_script, script_install_dir, script_ci_dir)

    # If --no-container-build is not specified then we perform the
    # actual build within a docker container and from that create the
    # final tritonserver docker image. For the build we need to
    # generate a few Dockerfiles and a top-level script that drives
    # the build process.
    if not FLAGS.no_container_build:
        script_name = "docker_build"
        if target_platform() == "windows":
            script_name += ".ps1"

        create_build_dockerfiles(
            script_build_dir, images, backends, repoagents, caches, FLAGS.endpoint
        )
        create_docker_build_script(script_name, script_install_dir, script_ci_dir)

    # In not dry-run, execute the script to perform the build...  If a
    # container-based build is requested use 'docker_build' script,
    # otherwise build directly on this system using cmake script.
    if not FLAGS.dryrun:
        if target_platform() == "windows":
            p = subprocess.Popen(
                ["powershell.exe", "-noexit", "-File", f"./{script_name}"],
                cwd=FLAGS.build_dir,
            )
        else:
            p = subprocess.Popen([f"./{script_name}"], cwd=FLAGS.build_dir)
        p.wait()
        fail_if(p.returncode != 0, "build failed")


================================================
FILE: compose.py
================================================
#!/usr/bin/env python3
# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import argparse
import os
import platform
import subprocess
import sys

FLAGS = None


#### helper functions
def log(msg, force=False):
    if force or not FLAGS.quiet:
        try:
            print(msg, file=sys.stderr)
        except Exception:
            print("<failed to log>", file=sys.stderr)


def log_verbose(msg):
    if FLAGS.verbose:
        log(msg, force=True)


def fail(msg):
    print("error: {}".format(msg), file=sys.stderr)
    sys.exit(1)


def fail_if(p, msg):
    if p:
        fail(msg)


def start_dockerfile(ddir, images, argmap, dockerfile_name, backends):
    # Set environment variables, set default user and install dependencies
    df = """
#
# Multistage build.
#
ARG TRITON_VERSION={}
ARG TRITON_CONTAINER_VERSION={}

FROM {} AS full
""".format(
        argmap["TRITON_VERSION"], argmap["TRITON_CONTAINER_VERSION"], images["full"]
    )

    # PyTorch backends need extra CUDA and other
    # dependencies during runtime that are missing in the CPU-only base container.
    # These dependencies must be copied from the Triton Min image.
    if not FLAGS.enable_gpu and "pytorch" in backends:
        df += """
FROM {} AS min_container

""".format(
            images["gpu-min"]
        )

    df += """
FROM {}

ENV PIP_BREAK_SYSTEM_PACKAGES=1
""".format(
        images["min"]
    )

    import build

    df += build.dockerfile_prepare_container_linux(
        argmap, backends, FLAGS.enable_gpu, platform.machine().lower()
    )
    # Copy over files
    df += """
WORKDIR /opt/tritonserver
COPY --chown=1000:1000 --from=full /opt/tritonserver/LICENSE .
COPY --chown=1000:1000 --from=full /opt/tritonserver/TRITON_VERSION .
COPY --chown=1000:1000 --from=full /opt/tritonserver/NVIDIA_Deep_Learning_Container_License.pdf .
COPY --chown=1000:1000 --from=full /opt/tritonserver/bin bin/
COPY --chown=1000:1000 --from=full /opt/tritonserver/lib lib/
COPY --chown=1000:1000 --from=full /opt/tritonserver/include include/
"""
    with open(os.path.join(ddir, dockerfile_name), "w") as dfile:
        dfile.write(df)


def add_requested_backends(ddir, dockerfile_name, backends):
    df = "# Copying over backends \n"
    for backend in backends:
        df += """COPY --chown=1000:1000 --from=full /opt/tritonserver/backends/{} /opt/tritonserver/backends/{}
""".format(
            backend, backend
        )
    if len(backends) > 0:
        df += """
# Top-level /opt/tritonserver/backends not copied so need to explicitly set permissions here
RUN chown triton-server:triton-server /opt/tritonserver/backends
"""
    with open(os.path.join(ddir, dockerfile_name), "a") as dfile:
        dfile.write(df)


def add_requested_repoagents(ddir, dockerfile_name, repoagents):
    df = "#  Copying over repoagents \n"
    for ra in repoagents:
        df += """COPY --chown=1000:1000 --from=full /opt/tritonserver/repoagents/{} /opt/tritonserver/repoagents/{}
""".format(
            ra, ra
        )
    if len(repoagents) > 0:
        df += """
# Top-level /opt/tritonserver/repoagents not copied so need to explicitly set permissions here
RUN chown triton-server:triton-server /opt/tritonserver/repoagents
"""
    with open(os.path.join(ddir, dockerfile_name), "a") as dfile:
        dfile.write(df)


def add_requested_caches(ddir, dockerfile_name, caches):
    df = "#  Copying over caches \n"
    for cache in caches:
        df += """COPY --chown=1000:1000 --from=full /opt/tritonserver/caches/{} /opt/tritonserver/caches/{}
""".format(
            cache, cache
        )
    if len(caches) > 0:
        df += """
# Top-level /opt/tritonserver/caches not copied so need to explicitly set permissions here
RUN chown triton-server:triton-server /opt/tritonserver/caches
"""
    with open(os.path.join(ddir, dockerfile_name), "a") as dfile:
        dfile.write(df)


def end_dockerfile(ddir, dockerfile_name, argmap):
    # Install additional dependencies
    df = ""
    if argmap["SAGEMAKER_ENDPOINT"]:
        df += """
LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
COPY --chown=1000:1000 --from=full /usr/bin/serve /usr/bin/.
"""
    with open(os.path.join(ddir, dockerfile_name), "a") as dfile:
        dfile.write(df)


def build_docker_image(ddir, dockerfile_name, container_name):
    # Create container with docker build
    p = subprocess.Popen(
        [
            "docker",
            "build",
            "-t",
            container_name,
            "-f",
            os.path.join(ddir, dockerfile_name),
            ".",
        ]
    )
    p.wait()
    fail_if(p.returncode != 0, "docker build {} failed".format(container_name))


def get_container_version_if_not_specified():
    if FLAGS.container_version is None:
        # Read from TRITON_VERSION file in server repo to determine version
        with open("TRITON_VERSION", "r") as vfile:
            version = vfile.readline().strip()
        import build

        _, FLAGS.container_version = build.container_versions(
            version, None, FLAGS.container_version
        )
        log("version {}".format(version))
    log("using container version {}".format(FLAGS.container_version))


def create_argmap(images, skip_pull):
    # Extract information from upstream build and create map other functions can
    # use
    full_docker_image = images["full"]
    min_docker_image = images["min"]
    enable_gpu = FLAGS.enable_gpu
    # Docker inspect environment variables
    base_run_args = ["docker", "inspect", "-f"]
    import re  # parse all PATH environment variables

    # first pull docker images
    if not skip_pull:
        log("pulling container:{}".format(full_docker_image))
        p = subprocess.run(["docker", "pull", full_docker_image])
        fail_if(
            p.returncode != 0,
            "docker pull container {} failed, {}".format(full_docker_image, p.stderr),
        )
    if enable_gpu:
        if not skip_pull:
            pm = subprocess.run(["docker", "pull", min_docker_image])
            fail_if(
                pm.returncode != 0 and not skip_pull,
                "docker pull container {} failed, {}".format(
                    min_docker_image, pm.stderr
                ),
            )
        pm_path = subprocess.run(
            base_run_args
            + [
                "{{range $index, $value := .Config.Env}}{{$value}} {{end}}",
                min_docker_image,
            ],
            capture_output=True,
            text=True,
        )
        fail_if(
            pm_path.returncode != 0,
            "docker inspect to find triton environment variables for min container failed, {}".format(
                pm_path.stderr
            ),
        )
        # min container needs to be GPU-support-enabled if the build is GPU build
        vars = pm_path.stdout
        e = re.search("CUDA_VERSION", vars)
        gpu_enabled = False if e is None else True
        fail_if(
            not gpu_enabled,
            "Composing container with gpu support enabled but min container provided does not have CUDA installed",
        )

    # Check full container environment variables
    p_path = subprocess.run(
        base_run_args
        + [
            "{{range $index, $value := .Config.Env}}{{$value}} {{end}}",
            full_docker_image,
        ],
        capture_output=True,
        text=True,
    )
    fail_if(
        p_path.returncode != 0,
        "docker inspect to find environment variables for full container failed, {}".format(
            p_path.stderr
        ),
    )
    vars = p_path.stdout
    log_verbose("inspect args: {}".format(vars))

    e0 = re.search("TRITON_SERVER_GPU_ENABLED=([\S]{1,}) ", vars)
    e1 = re.search("CUDA_VERSION", vars)
    gpu_enabled = False
    if e0 != None:
        gpu_enabled = e0.group(1) == "1"
    elif e1 != None:
        gpu_enabled = True
    fail_if(
        gpu_enabled != enable_gpu,
        "Error: full container provided was build with "
        "'TRITON_SERVER_GPU_ENABLED' as {} and you are composing container"
        "with 'TRITON_SERVER_GPU_ENABLED' as {}".format(gpu_enabled, enable_gpu),
    )
    e = re.search("TRITON_SERVER_VERSION=([\S]{6,}) ", vars)
    version = "" if e is None else e.group(1)
    fail_if(
        len(version) == 0,
        "docker inspect to find triton server version failed, {}".format(p_path.stderr),
    )
    e = re.search("NVIDIA_TRITON_SERVER_VERSION=([\S]{5,}) ", vars)
    container_version = "" if e is None else e.group(1)
    fail_if(
        len(container_version) == 0,
        "docker inspect to find triton container version failed, {}".format(vars),
    )
    dcgm_ver = re.search("DCGM_VERSION=([\S]{4,}) ", vars)
    dcgm_version = ""
    if dcgm_ver is None:
        dcgm_version = "4.4.0-1"
        log(
            "WARNING: DCGM version not found from image, installing the earlierst version {}".format(
                dcgm_version
            )
        )
    else:
        dcgm_version = dcgm_ver.group(1)
    fail_if(
        len(dcgm_version) == 0,
        "docker inspect to find DCGM version failed, {}".format(vars),
    )

    p_sha = subprocess.run(
        base_run_args
        + ['{{ index .Config.Labels "com.nvidia.build.ref"}}', full_docker_image],
        capture_output=True,
        text=True,
    )
    fail_if(
        p_sha.returncode != 0,
        "docker inspect of upstream docker image build sha failed, {}".format(
            p_sha.stderr
        ),
    )
    p_build = subprocess.run(
        base_run_args
        + ['{{ index .Config.Labels "com.nvidia.build.id"}}', full_docker_image],
        capture_output=True,
        text=True,
    )
    fail_if(
        p_build.returncode != 0,
        "docker inspect of upstream docker image build sha failed, {}".format(
            p_build.stderr
        ),
    )

    p_find = subprocess.run(
        ["docker", "run", full_docker_image, "bash", "-c", "ls /usr/bin/"],
        capture_output=True,
        text=True,
    )
    f = re.search("serve", p_find.stdout)
    fail_if(
        p_find.returncode != 0,
        "Cannot search for 'serve' in /usr/bin, {}".format(p_find.stderr),
    )
    argmap = {
        "NVIDIA_BUILD_REF": p_sha.stdout.rstrip(),
        "NVIDIA_BUILD_ID": p_build.stdout.rstrip(),
        "TRITON_VERSION": version,
        "TRITON_CONTAINER_VERSION": container_version,
        "DCGM_VERSION": dcgm_version,
        "SAGEMAKER_ENDPOINT": f is not None,
    }
    return argmap


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    group_qv = parser.add_mutually_exclusive_group()
    group_qv.add_argument(
        "-q",
        "--quiet",
        action="store_true",
        required=False,
        help="Disable console output.",
    )
    group_qv.add_argument(
        "-v",
        "--verbose",
        action="store_true",
        required=False,
        help="Enable verbose output.",
    )
    parser.add_argument(
        "--output-name",
        type=str,
        required=False,
        help='Name for the generated Docker image. Default is "tritonserver".',
    )
    parser.add_argument(
        "--work-dir",
        type=str,
        required=False,
        help="Generated dockerfiles are placed here. Default to current directory.",
    )
    parser.add_argument(
        "--container-version",
        type=str,
        required=False,
        help="The version to use for the generated Docker image. If not specified "
        "the container version will be chosen automatically based on the "
        "repository branch.",
    )
    parser.add_argument(
        "--image",
        action="append",
        required=False,
        help="Use specified Docker image to generate Docker image. Specified as "
        '<image-name>,<full-image-name>. <image-name> can be "min", "gpu-min" '
        'or "full". Both "min" and "full" need to be specified at the same time.'
        'This will override "--container-version". "gpu-min" is needed for '
        "CPU-only container to copy PyTorch deps.",
    )
    parser.add_argument(
        "--enable-gpu",
        nargs="?",
        type=lambda x: (str(x).lower() == "true"),
        const=True,
        default=True,
        required=False,
        help=argparse.SUPPRESS,
    )
    parser.add_argument(
        "--backend",
        action="append",
        required=False,
        help="Include <backend-name> in the generated Docker image. The flag may be "
        "specified multiple times.",
    )
    parser.add_argument(
        "--repoagent",
        action="append",
        required=False,
        help="Include <repoagent-name> in the generated Docker image. The flag may "
        "be specified multiple times.",
    )
    parser.add_argument(
        "--cache",
        action="append",
        required=False,
        help="Include <cache-name> in the generated Docker image. The flag may "
        "be specified multiple times.",
    )
    parser.add_argument(
        "--skip-pull",
        action="store_true",
        required=False,
        help="Do not pull the required docker images. The user is responsible "
        "for pulling the upstream images needed to compose the image.",
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        required=False,
        help="Only creates Dockerfile.compose, does not build the Docker image.",
    )

    FLAGS = parser.parse_args()

    if FLAGS.work_dir is None:
        FLAGS.work_dir = "."
    if FLAGS.output_name is None:
        FLAGS.output_name = "tritonserver"

    dockerfile_name = "Dockerfile.compose"

    if FLAGS.backend is None:
        FLAGS.backend = []
    if FLAGS.repoagent is None:
        FLAGS.repoagent = []
    if FLAGS.cache is None:
        FLAGS.cache = []

    # Initialize map of docker images.
    images = {}
    if FLAGS.image:
        for img in FLAGS.image:
            parts = img.split(",")
            fail_if(
                len(parts) != 2,
                "--image must specific <image-name>,<full-image-registry>",
            )
            fail_if(
                parts[0] not in ["min", "full", "gpu-min"],
                "unsupported image-name '{}' for --image".format(parts[0]),
            )
            log('image "{}": "{}"'.format(parts[0], parts[1]))
            images[parts[0]] = parts[1]
    else:
        get_container_version_if_not_specified()
        if FLAGS.enable_gpu:
            images = {
                "full": "nvcr.io/nvidia/tritonserver:{}-py3".format(
                    FLAGS.container_version
                ),
                "min": "nvcr.io/nvidia/tritonserver:{}-py3-min".format(
                    FLAGS.container_version
                ),
            }
        else:
            images = {
                "full": "nvcr.io/nvidia/tritonserver:{}-cpu-only-py3".format(
                    FLAGS.container_version
                ),
                "min": "ubuntu:22.04",
            }
    fail_if(len(images) < 2, "Need to specify both 'full' and 'min' images if at all")

    # For CPU-only image we need to copy some cuda libraries and dependencies
    # since we are using PyTorch containers that
    # are not CPU-only.
    if ("pytorch" in FLAGS.backend) and ("gpu-min" not in images):
        images["gpu-min"] = "nvcr.io/nvidia/tritonserver:{}-py3-min".format(
            FLAGS.container_version
        )

    argmap = create_argmap(images, FLAGS.skip_pull)

    start_dockerfile(FLAGS.work_dir, images, argmap, dockerfile_name, FLAGS.backend)
    add_requested_backends(FLAGS.work_dir, dockerfile_name, FLAGS.backend)
    add_requested_repoagents(FLAGS.work_dir, dockerfile_name, FLAGS.repoagent)
    add_requested_caches(FLAGS.work_dir, dockerfile_name, FLAGS.cache)
    end_dockerfile(FLAGS.work_dir, dockerfile_name, argmap)

    if not FLAGS.dry_run:
        build_docker_image(FLAGS.work_dir, dockerfile_name, FLAGS.output_name)


================================================
FILE: deploy/alibaba-cloud/README.md
================================================
<!--
# Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Deploy Triton Inference Server on PAI-EAS
* Table Of Contents
   - [Description](https://yuque.alibaba-inc.com/pai/blade/mtptqc#Description)
   - [Prerequisites](https://yuque.alibaba-inc.com/pai/blade/mtptqc#Prerequisites)
   - [Demo Instruction](https://yuque.alibaba-inc.com/pai/blade/mtptqc#31bb94ef)
   - [Additional Resources](https://yuque.alibaba-inc.com/pai/blade/mtptqc#89d5e680)
   - [Known Issues](https://yuque.alibaba-inc.com/pai/blade/mtptqc#558ab0be)

# Description
This repository contains information about how to deploy NVIDIA Triton Inference Server in EAS(Elastic Algorithm Service) of Alibaba-Cloud.
- EAS provides a simple way for deep learning developers to deploy their models in Alibaba Cloud.
- Using **Triton Processor** is the recommended way on EAS to deploy Triton Inference Server. Users can simply deploy a Triton Server by preparing models and creating a EAS service by setting processor type to `triton`.
- Models should be uploaded to Alibaba Cloud's OSS(Object Storage Service). User's model repository in OSS will be mounted onto local path visible to Triton Server.
- This documentation uses Triton's own example models for demo. The ONNX inception v3 model can be obtained by the `fetch_models.sh` script.

# Prerequisites
- You should register an Alibaba Cloud Account, and being able to use EAS by [eascmd](https://help.aliyun.com/document_detail/111031.html?spm=a2c4g.11186623.6.752.42356f46FN5fU1), which is a command line tool to create stop or scale services on EAS.
- Before creating an EAS service, you should buy dedicated resource groups(CPU or GPU) on EAS following this [document](https://www.alibabacloud.com/help/doc-detail/120122.htm).
- Make sure you can use OSS(Object Storage Service), the models should be uploaded into your own OSS bucket.

# Demo Instruction
## Prepare a model repo directory in OSS
Download the ONNX inception v3 model via [fetch_model.sh](https://github.com/triton-inference-server/server/blob/main/docs/examples/fetch_models.sh). Then using [ossutil](https://help.aliyun.com/document_detail/50452.html?spm=a2c4g.11186623.6.833.26d66d51dPEytI) , which is a command line tool to use OSS, to upload the model to a certain OSS dir as you want.

```
./ossutil cp inception_v3_onnx/ oss://triton-model-repo/models
```
## Create Triton Service with json config by eascmd
The following is the json we use when creating a Triton Server on EAS.
```
{
  "name": "<your triton service name>",
  "processor": "triton",
  "processor_params": [
    "--model-repository=oss://triton-model-repo/models",
    "--allow-grpc=true",
    "--allow-http=true"
  ],
  "metadata": {
    "instance": 1,
    "cpu": 4,
    "gpu": 1,
    "memory": 10000,
    "resource": "<your resource id>",
    "rpc.keepalive": 3000
  }
}
```
Only processor and processor_params should be different from a normal EAS service.
|params|details|
|--------|-------|
|processor|Name should be **triton** to use Triton on EAS|
|processor_params|List of strings, every element is a param for tritonserver |

```
./eascmd create triton.config
[RequestId]: AECDB6A4-CB69-4688-AA35-BA1E020C39E6
+-------------------+------------------------------------------------------------------------------------------------+
| Internet Endpoint | http://1271520832287160.cn-shanghai.pai-eas.aliyuncs.com/api/predict/test_triton_processor     |
| Intranet Endpoint | http://1271520832287160.vpc.cn-shanghai.pai-eas.aliyuncs.com/api/predict/test_triton_processor |
|             Token | MmY3M2ExZGYwYjZiMTQ5YTRmZWE3MDAzNWM1ZTBiOWQ3MGYxZGNkZQ==                                       |
+-------------------+------------------------------------------------------------------------------------------------+
[OK] Service is now deploying
[OK] Successfully synchronized resources
[OK] Waiting [Total: 1, Pending: 1, Running: 0]
[OK] Waiting [Total: 1, Pending: 1, Running: 0]
[OK] Running [Total: 1, Pending: 0, Running: 1]
[OK] Service is running
```
## Query Triton service by python client
### Install triton's python client
```
pip install tritonclient[all]
```
### A demo to query inception model
```
import numpy as np
import time
from PIL import Image

import tritonclient.http as httpclient
from tritonclient.utils import InferenceServerException

URL = "<servcice url>"
HEADERS = {"Authorization": "<service token>"}
input_img = httpclient.InferInput("input", [1, 299, 299, 3], "FP32")
# Using one of the cat images from imagenet or a random cat images you like
img = Image.open('./cat.png').resize((299, 299))
img = np.asarray(img).astype('float32') / 255.0
input_img.set_data_from_numpy(img.reshape([1, 299, 299, 3]), binary_data=True)

output = httpclient.InferRequestedOutput(
    "InceptionV3/Predictions/Softmax", binary_data=True
)
triton_client = httpclient.InferenceServerClient(url=URL, verbose=False)

start = time.time()
for i in range(10):
    results = triton_client.infer(
        "inception_v3_onnx", inputs=[input_img], outputs=[output], headers=HEADERS
    )
    res_body = results.get_response()
    elapsed_ms = (time.time() - start) * 1000
    if i == 0:
        print("model name: ", res_body["model_name"])
        print("model version: ", res_body["model_version"])
        print("output name: ", res_body["outputs"][0]["name"])
        print("output shape: ", res_body["outputs"][0]["shape"])
    print("[{}] Avg rt(ms): {:.2f}".format(i, elapsed_ms))
    start = time.time()
```
You will get the following result by running the python script:
```
[0] Avg rt(ms): 86.05
[1] Avg rt(ms): 52.35
[2] Avg rt(ms): 50.56
[3] Avg rt(ms): 43.45
[4] Avg rt(ms): 41.19
[5] Avg rt(ms): 40.55
[6] Avg rt(ms): 37.24
[7] Avg rt(ms): 37.16
[8] Avg rt(ms): 36.68
[9] Avg rt(ms): 34.24
[10] Avg rt(ms): 34.27
```
# Additional Resources
See the following resources to learn more about how to use Alibaba Cloud's OSS orEAS.
- [Alibaba Cloud OSS's Document](https://help.aliyun.com/product/31815.html?spm=a2c4g.11186623.6.540.3c0f62e7q3jw8b)


# Known Issues
- [Binary Tensor Data Extension](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_binary_data.md) is not fully supported yet, users want to use service with binary extension supported, it is only available in cn-shanghai region of PAI-EAS.
- Currently only HTTP/1 is supported, hence gRPC cannot be used when query Triton servers on EAS. HTP/2 will be officially supported in a short time.
- Users should not mount a whole OSS bucket when launching Triton processor, but an arbitrarily deep sub-directory in bucket. Otherwise the mounted path will no be as expected.
- Not all of Triton Server parameters are be supported on EAS, the following params are supported on EAS:
```
model-repository
log-verbose
log-info
log-warning
log-error
exit-on-error
strict-model-config
strict-readiness
allow-http
http-thread-count
pinned-memory-pool-byte-size
cuda-memory-pool-byte-size
min-supported-compute-capability
buffer-manager-thread-count
backend-config
```


================================================
FILE: deploy/aws/Chart.yaml
================================================
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

apiVersion: v1
appVersion: "1.0"
description: Triton Inference Server
name: triton-inference-server
version: 1.0.0


================================================
FILE: deploy/aws/README.md
================================================
<!--
# Copyright (c) 2018-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

[![License](https://img.shields.io/badge/License-BSD3-lightgrey.svg)](https://opensource.org/licenses/BSD-3-Clause)

# Kubernetes Deploy: Triton Inference Server Cluster

A helm chart for installing a single cluster of Triton Inference
Server is provided. By default the cluster contains a single instance
of the inference server but the *replicaCount* configuration parameter
can be set to create a cluster of any size, as described below.

This guide assumes you already have a functional Kubernetes cluster
and helm installed (see below for instructions on installing
helm). Note the following requirements:

* The helm chart deploys Prometheus and Grafana to collect and display Triton metrics. To use this helm chart you must install Prpmetheus and Grafana in your cluster as described below and your cluster must contain sufficient CPU resources to support these services.

* If you want Triton Server to use GPUs for inferencing, your cluster
must be configured to contain the desired number of GPU nodes (EC2 G4 instances recommended)
with support for the NVIDIA driver and CUDA version required by the version
of the inference server you are using.

The steps below describe how to set-up a model repository, use helm to
launch the inference server, and then send inference requests to the
running server. You can access a Grafana endpoint to see real-time
metrics reported by the inference server.

## Installing Helm

### Helm v3

If you do not already have Helm installed in your Kubernetes cluster,
executing the following steps from the [official helm install
guide](https://helm.sh/docs/intro/install/) will
give you a quick setup.

If you're currently using Helm v2 and would like to migrate to Helm v3,
please see the [official migration guide](https://helm.sh/docs/topics/v2_v3_migration/).

### Helm v2

> **NOTE**: Moving forward this chart will only be tested and maintained for Helm v3.

Below are example instructions for installing Helm v2.

```
$ curl https://raw.githubusercontent.com/helm/helm/master/scripts/get | bash
$ kubectl create serviceaccount -n kube-system tiller
serviceaccount/tiller created
$ kubectl create clusterrolebinding tiller-cluster-rule --clusterrole=cluster-admin --serviceaccount=kube-system:tiller
$ helm init --service-account tiller --wait
```

If you run into any issues, you can refer to the official installation guide [here](https://v2.helm.sh/docs/install/).

## Model Repository

If you already have a model repository you may use that with this helm
chart. If you do not have a model repository, you can checkout a local
copy of the inference server source repository to create an example
model repository::

```
$ git clone https://github.com/triton-inference-server/server.git
```

Triton Server needs a repository of models that it will make available
for inferencing. For this example you will place the model repository
in an AWS S3 Storage bucket.

```
$ aws s3 mb s3://triton-inference-server-repository
```

Following the [QuickStart](../../docs/getting_started/quickstart.md) download the
example model repository to your system and copy it into the AWS S3
bucket.

```
$ aws s3 cp --recursive docs/examples/model_repository s3://triton-inference-server-repository/model_repository
```

### AWS Model Repository
To load the model from the AWS S3, you need to convert the following AWS credentials in the base64 format and add it to the values.yaml

```
echo -n 'REGION' | base64
```
```
echo -n 'SECRECT_KEY_ID' | base64
```
```
echo -n 'SECRET_ACCESS_KEY' | base64
```

## Deploy Prometheus and Grafana

The inference server metrics are collected by Prometheus and viewable
by Grafana. The inference server helm chart assumes that Prometheus
and Grafana are available so this step must be followed even if you
don't want to use Grafana.

Use the [kube-prometheus-stack](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack) to install these components. The
*serviceMonitorSelectorNilUsesHelmValues* flag is needed so that
Prometheus can find the inference server metrics in the *example*
release deployed below.

```
$ helm install example-metrics --set prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues=false prometheus-community/kube-prometheus-stack
```

Then port-forward to the Grafana service so you can access it from
your local browser.

```
$ kubectl port-forward service/example-metrics-grafana 8080:80
```

Now you should be able to navigate in your browser to localhost:8080
and see the Grafana login page. Use username=admin and
password=prom-operator to login.

An example Grafana dashboard is available in dashboard.json. Use the
import function in Grafana to import and view this dashboard.

## Deploy the Inference Server

Deploy the inference server using the default configuration with the
following commands.

```
$ cd <directory containing Chart.yaml>
$ helm install example .
```

Use kubectl to see status and wait until the inference server pods are
running.

```
$ kubectl get pods
NAME                                               READY   STATUS    RESTARTS   AGE
example-triton-inference-server-5f74b55885-n6lt7   1/1     Running   0          2m21s
```

There are several ways of overriding the default configuration as
described in this [helm
documentation](https://helm.sh/docs/using_helm/#customizing-the-chart-before-installing).

You can edit the values.yaml file directly or you can use the *--set*
option to override a single parameter with the CLI. For example, to
deploy a cluster of four inference servers use *--set* to set the
replicaCount parameter.

```
$ helm install example --set replicaCount=4 .
```

You can also write your own "config.yaml" file with the values you
want to override and pass it to helm.

```
$ cat << EOF > config.yaml
namespace: MyCustomNamespace
image:
  imageName: nvcr.io/nvidia/tritonserver:custom-tag
  modelRepositoryPath: gs://my_model_repository
EOF
$ helm install example -f config.yaml .
```

## Using Triton Inference Server

Now that the inference server is running you can send HTTP or GRPC
requests to it to perform inferencing. By default, the inferencing
service is exposed with a LoadBalancer service type. Use the following
to find the external IP for the inference server. In this case it is
34.83.9.133.

```
$ kubectl get services
NAME                             TYPE           CLUSTER-IP     EXTERNAL-IP   PORT(S)                                        AGE
...
example-triton-inference-server  LoadBalancer   10.18.13.28    34.83.9.133   8000:30249/TCP,8001:30068/TCP,8002:32723/TCP   47m
```

The inference server exposes an HTTP endpoint on port 8000, and GRPC
endpoint on port 8001 and a Prometheus metrics endpoint on
port 8002. You can use curl to get the meta-data of the inference server
from the HTTP endpoint.

```
$ curl 34.83.9.133:8000/v2
```

Follow the [QuickStart](../../docs/getting_started/quickstart.md) to get the example
image classification client that can be used to perform inferencing
using image classification models being served by the inference
server. For example,

```
$ image_client -u 34.83.9.133:8000 -m inception_v3_onnx -s INCEPTION -c3 mug.jpg
Request 0, batch size 1
Image 'images/mug.jpg':
    504 (COFFEE MUG) = 0.723992
    968 (CUP) = 0.270953
    967 (ESPRESSO) = 0.00115997
```

## Cleanup

Once you've finished using the inference server you should use helm to
delete the deployment.

```
$ helm list
NAME            REVISION  UPDATED                   STATUS    CHART                          APP VERSION   NAMESPACE
example         1         Wed Feb 27 22:16:55 2019  DEPLOYED  triton-inference-server-1.0.0  1.0           default
example-metrics	1       	Tue Jan 21 12:24:07 2020	DEPLOYED	prometheus-operator-6.18.0   	 0.32.0     	 default

$ helm uninstall example
$ helm uninstall example-metrics
```

For the Prometheus and Grafana services, you should [explicitly delete
CRDs](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack#uninstall-helm-chart):

```
$ kubectl delete crd alertmanagerconfigs.monitoring.coreos.com alertmanagers.monitoring.coreos.com podmonitors.monitoring.coreos.com probes.monitoring.coreos.com prometheuses.monitoring.coreos.com prometheusrules.monitoring.coreos.com servicemonitors.monitoring.coreos.com thanosrulers.monitoring.coreos.com
```

You may also want to delete the AWS bucket you created to hold the
model repository.

```
$ aws s3 rm -r gs://triton-inference-server-repository
```


================================================
FILE: deploy/aws/dashboard.json
================================================
{
  "__inputs": [
    {
      "name": "DS_PROMETHEUS",
      "label": "Prometheus",
      "description": "",
      "type": "datasource",
      "pluginId": "prometheus",
      "pluginName": "Prometheus"
    }
  ],
  "__requires": [
    {
      "type": "grafana",
      "id": "grafana",
      "name": "Grafana",
      "version": "6.3.5"
    },
    {
      "type": "panel",
      "id": "graph",
      "name": "Graph",
      "version": ""
    },
    {
      "type": "panel",
      "id": "heatmap",
      "name": "Heatmap",
      "version": ""
    },
    {
      "type": "datasource",
      "id": "prometheus",
      "name": "Prometheus",
      "version": "1.0.0"
    }
  ],
  "annotations": {
    "list": [
      {
        "builtIn": 1,
        "datasource": "-- Grafana --",
        "enable": true,
        "hide": true,
        "iconColor": "rgba(0, 211, 255, 1)",
        "name": "Annotations & Alerts",
        "type": "dashboard"
      }
    ]
  },
  "editable": true,
  "gnetId": null,
  "graphTooltip": 0,
  "id": null,
  "links": [],
  "panels": [
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "${DS_PROMETHEUS}",
      "fill": 1,
      "fillGradient": 0,
      "gridPos": {
        "h": 9,
        "w": 12,
        "x": 0,
        "y": 0
      },
      "id": 2,
      "legend": {
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "show": true,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 1,
      "nullPointMode": "null",
      "options": {
        "dataLinks": []
      },
      "percentage": false,
      "pointradius": 2,
      "points": false,
      "renderer": "flot",
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "nv_inference_request_success",
          "legendFormat": "Success {{instance}}",
          "refId": "A"
        },
        {
          "expr": "nv_inference_request_failure",
          "legendFormat": "Failure {{instance}}",
          "refId": "B"
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "Cumulative Inference Requests",
      "tooltip": {
        "shared": true,
        "sort": 0,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        },
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": false
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    },
    {
      "cards": {
        "cardPadding": null,
        "cardRound": null
      },
      "color": {
        "cardColor": "#b4ff00",
        "colorScale": "sqrt",
        "colorScheme": "interpolateReds",
        "exponent": 0.5,
        "mode": "spectrum"
      },
      "dataFormat": "timeseries",
      "gridPos": {
        "h": 9,
        "w": 12,
        "x": 12,
        "y": 0
      },
      "heatmap": {},
      "hideZeroBuckets": false,
      "highlightCards": true,
      "id": 7,
      "legend": {
        "show": false
      },
      "options": {},
      "reverseYBuckets": false,
      "targets": [
        {
          "expr": "sum(increase(nv_inference_load_ratio_bucket[1m])) by (le)",
          "legendFormat": "",
          "refId": "A"
        }
      ],
      "timeFrom": null,
      "timeShift": null,
      "title": "Load Ratio  (Total Time / Compute Time)",
      "tooltip": {
        "show": true,
        "showHistogram": false
      },
      "type": "heatmap",
      "xAxis": {
        "show": true
      },
      "xBucketNumber": null,
      "xBucketSize": null,
      "yAxis": {
        "decimals": null,
        "format": "short",
        "logBase": 1,
        "max": null,
        "min": null,
        "show": true,
        "splitFactor": null
      },
      "yBucketBound": "auto",
      "yBucketNumber": null,
      "yBucketSize": null
    },
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "${DS_PROMETHEUS}",
      "fill": 1,
      "fillGradient": 0,
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 9
      },
      "id": 4,
      "legend": {
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "show": true,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 1,
      "nullPointMode": "null",
      "options": {
        "dataLinks": []
      },
      "percentage": false,
      "pointradius": 2,
      "points": false,
      "renderer": "flot",
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "rate(nv_inference_queue_duration_us[30s]) / 1000",
          "legendFormat": "{{instance}}",
          "refId": "A"
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "Queue Time (milliseconds)",
      "tooltip": {
        "shared": true,
        "sort": 0,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "short",
          "label": "Queue Time (ms)",
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        },
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": false
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    },
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "${DS_PROMETHEUS}",
      "fill": 1,
      "fillGradient": 0,
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 12,
        "y": 9
      },
      "id": 5,
      "legend": {
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "show": true,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 1,
      "nullPointMode": "null",
      "options": {
        "dataLinks": []
      },
      "percentage": false,
      "pointradius": 2,
      "points": false,
      "renderer": "flot",
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "rate(nv_inference_compute_duration_us[30s]) / 1000",
          "legendFormat": "{{instance}}",
          "refId": "A"
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "Compute Time (milliseconds)",
      "tooltip": {
        "shared": true,
        "sort": 0,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "short",
          "label": "Compute Time (ms)",
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        },
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": false
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    }
  ],
  "refresh": "5s",
  "schemaVersion": 19,
  "style": "dark",
  "tags": [],
  "templating": {
    "list": []
  },
  "time": {
    "from": "now-15m",
    "to": "now"
  },
  "timepicker": {
    "refresh_intervals": [
      "5s",
      "10s",
      "30s",
      "1m",
      "5m",
      "15m",
      "30m",
      "1h",
      "2h",
      "1d"
    ]
  },
  "timezone": "",
  "title": "Triton Inference Server",
  "uid": "slEY4dsZk",
  "version": 8
}


================================================
FILE: deploy/aws/templates/_helpers.tpl
================================================
{{/*
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/}}

{{/* vim: set filetype=mustache: */}}
{{/*
Create inference server name.
*/}}
{{- define "triton-inference-server.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
{{- end -}}

{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "triton-inference-server.fullname" -}}
{{- if .Values.fullnameOverride -}}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- $name := default .Chart.Name .Values.nameOverride -}}
{{- if contains $name .Release.Name -}}
{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{- end -}}
{{- end -}}

{{/*
  Create inference server metrics service name and fullname derived from above and
  truncated appropriately.
*/}}
{{- define "triton-inference-server-metrics.name" -}}
{{- $basename := include "triton-inference-server.name" . -}}
{{- $basename_trimmed := $basename | trunc 55 | trimSuffix "-" -}}
{{- printf "%s-%s" $basename_trimmed "metrics" -}}
{{- end -}}

{{- define "triton-inference-server-metrics.fullname" -}}
{{- $basename := include "triton-inference-server.fullname" . -}}
{{- $basename_trimmed := $basename | trunc 55 | trimSuffix "-" -}}
{{- printf "%s-%s" $basename_trimmed "metrics" -}}
{{- end -}}

{{/*
  Create inference server metrics monitor name and fullname derived from
  above and truncated appropriately.
*/}}
{{- define "triton-inference-server-metrics-monitor.name" -}}
{{- $basename := include "triton-inference-server.name" . -}}
{{- $basename_trimmed := $basename | trunc 47 | trimSuffix "-" -}}
{{- printf "%s-%s" $basename_trimmed "metrics-monitor" -}}
{{- end -}}

{{- define "triton-inference-server-metrics-monitor.fullname" -}}
{{- $basename := include "triton-inference-server.fullname" . -}}
{{- $basename_trimmed := $basename | trunc 47 | trimSuffix "-" -}}
{{- printf "%s-%s" $basename_trimmed "metrics-monitor" -}}
{{- end -}}

{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "triton-inference-server.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
{{- end -}}


================================================
FILE: deploy/aws/templates/deployment.yaml
================================================
# Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

apiVersion: apps/v1
kind: Deployment
metadata:
  name: {{ template "triton-inference-server.fullname" . }}
  namespace: {{ .Release.Namespace }}
  labels:
    app: {{ template "triton-inference-server.name" . }}
    chart: {{ template "triton-inference-server.chart" . }}
    release: {{ .Release.Name }}
    heritage: {{ .Release.Service }}
spec:
  replicas: {{ .Values.replicaCount }}
  selector:
    matchLabels:
      app: {{ template "triton-inference-server.name" . }}
      release: {{ .Release.Name }}
  template:
    metadata:
      labels:
        app: {{ template "triton-inference-server.name" . }}
        release: {{ .Release.Name }}

    spec:
      containers:
        - name: {{ .Chart.Name }}
          image: "{{ .Values.image.imageName }}"
          imagePullPolicy: {{ .Values.image.pullPolicy }}

          resources:
            limits:
              nvidia.com/gpu: {{ .Values.image.numGpus }}

          args: ["tritonserver", "--model-store={{ .Values.image.modelRepositoryPath }}",
                 "--model-control-mode=poll",
                 "--repository-poll-secs=5"]

          env:
          - name: AWS_DEFAULT_REGION
            valueFrom:
              secretKeyRef:
                name: aws-credentials
                key: AWS_DEFAULT_REGION
          - name: AWS_ACCESS_KEY_ID
            valueFrom:
              secretKeyRef:
                name: aws-credentials
                key: AWS_ACCESS_KEY_ID
          - name: AWS_SECRET_ACCESS_KEY
            valueFrom:
              secretKeyRef:
                name: aws-credentials
                key: AWS_SECRET_ACCESS_KEY

          ports:
            - containerPort: 8000
              name: http
            - containerPort: 8001
              name: grpc
            - containerPort: 8002
              name: metrics
          livenessProbe:
            httpGet:
              path: /v2/health/live
              port: http
          readinessProbe:
            initialDelaySeconds: 5
            periodSeconds: 5
            httpGet:
              path: /v2/health/ready
              port: http

      securityContext:
        runAsUser: 1000
        fsGroup: 1000


================================================
FILE: deploy/aws/templates/secrets.yaml
================================================
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

apiVersion: v1
kind: Secret
metadata:
  name: aws-credentials
type: Opaque
data:
  AWS_DEFAULT_REGION: {{ .Values.secret.region }}
  AWS_ACCESS_KEY_ID: {{ .Values.secret.id }}
  AWS_SECRET_ACCESS_KEY: {{ .Values.secret.key }}


================================================
FILE: deploy/aws/templates/service.yaml
================================================
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

apiVersion: v1
kind: Service
metadata:
  name: {{ template "triton-inference-server.fullname" . }}
  namespace: {{ .Release.Namespace }}
  labels:
    app: {{ template "triton-inference-server.name" . }}
    chart: {{ template "triton-inference-server.chart" . }}
    release: {{ .Release.Name }}
    heritage: {{ .Release.Service }}
spec:
  type: {{ .Values.service.type }}
  ports:
    - port: 8000
      targetPort: http
      name: http-inference-server
    - port: 8001
      targetPort: grpc
      name: grpc-inference-server
    - port: 8002
      targetPort: metrics
      name: metrics-inference-server
  selector:
    app: {{ template "triton-inference-server.name" . }}
    release: {{ .Release.Name }}
---
apiVersion: v1
kind: Service
metadata:
  name: {{ template "triton-inference-server-metrics.fullname" . }}
  namespace: {{ .Release.Namespace }}
  labels:
    app: {{ template "triton-inference-server-metrics.name" . }}
    chart: {{ template "triton-inference-server.chart" . }}
    release: {{ .Release.Name }}
    heritage: {{ .Release.Service }}
  annotations:
    alpha.monitoring.coreos.com/non-namespaced: "true"
spec:
  ports:
  - name: metrics
    port: 8080
    targetPort: metrics
    protocol: TCP
  selector:
    app: {{ template "triton-inference-server.name" . }}
    release: {{ .Release.Name }}
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
  name: {{ template "triton-inference-server-metrics-monitor.fullname" . }}
  namespace: {{ .Release.Namespace }}
  labels:
    app: {{ template "triton-inference-server-metrics-monitor.name" . }}
    chart: {{ template "triton-inference-server.chart" . }}
    release: {{ .Release.Name }}
    heritage: {{ .Release.Service }}
spec:
  selector:
    matchLabels:
      app: {{ template "triton-inference-server-metrics.name" . }}
  endpoints:
  - port: metrics
    interval: 15s


================================================
FILE: deploy/aws/values.yaml
================================================
# Copyright (c) 2019-2026, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

replicaCount: 1

image:
  imageName: nvcr.io/nvidia/tritonserver:26.02-py3
  pullPolicy: IfNotPresent
  modelRepositoryPath: s3://triton-inference-server-repository/model_repository
  numGpus: 1

service:
  type: LoadBalancer

secret:
  region: AWS_REGION
  id: AWS_SECRET_KEY_ID
  key: AWS_SECRET_ACCESS_KEY


================================================
FILE: deploy/fleetcommand/Chart.yaml
================================================
# Copyright (c) 2019-2026, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

apiVersion: v1
# appVersion is the Triton version; update when changing release
appVersion: 2.66.0
description: Triton Inference Server (Fleet Command)
name: triton-inference-server
# version is the Chart version; update when changing anything in the chart
# This follows semantic versioning, i.e.:
#   Given version X.Y.Z
#   When making fixes to the chart, increment Z
#   When making functional changes to the chart (including updating the Triton version, above), increment Y and reset Z to 0
#   When making breaking changes to the chart (e.g. user must take action before deploying), increment X and reset Y and Z to 0
version: 1.4.0


================================================
FILE: deploy/fleetcommand/README.md
================================================
<!--
# Copyright (c) 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

[![License](https://img.shields.io/badge/License-BSD3-lightgrey.svg)](https://opensource.org/licenses/BSD-3-Clause)

# Fleet Command Deploy: NVIDIA Triton Inference Server

A helm chart for installing a single cluster of NVIDIA Triton Inference Server
on Fleet Command is provided. By default the cluster contains a single instance
of the Triton but the *replicaCount* configuration parameter can be set to
create a cluster of any size, as described below.

This guide assumes you already have a functional Fleet Command location
deployed.  Please refer to the [Fleet Command
Documentation](https://docs.nvidia.com/fleet-command/prod_fleet-command/prod_fleet-command/overview.html)

The steps below describe how to set-up a model repository, use helm to launch
the Triton, and then send inference requests to the running Triton Inference
Server. You can optionally scrape metrics with Prometheus and access a Grafana
endpoint to see real-time metrics reported by Triton.

## Model Repository

If you already have a model repository you may use that with this helm chart. If
you do not have a model repository, you can checkout a local copy of the Triton
Inference Server source repository to create an example model repository::

```
$ git clone https://github.com/triton-inference-server/server.git
```

Triton needs a repository of models that it will make available for inferencing.
For this example you will place the model repository in an S3 Storage bucket
(either in AWS or other S3 API compatible on-premises object storage).

```
$ aws s3 mb s3://triton-inference-server-repository
```

Following the [QuickStart](../../docs/getting_started/quickstart.md) download the example model
repository to your system and copy it into the AWS S3 bucket.

```
$ aws s3 cp -r docs/examples/model_repository s3://triton-inference-server-repository/model_repository
```

### AWS Model Repository

To load the model from the AWS S3, you need to convert the following AWS
credentials in the base64 format and add it to the Application Configuration
section when creating the Fleet Command Deployment.

```
echo -n 'REGION' | base64
echo -n 'SECRECT_KEY_ID' | base64
echo -n 'SECRET_ACCESS_KEY' | base64
# Optional for using session token
echo -n 'AWS_SESSION_TOKEN' | base64
```

## Deploy the Triton Inference Server

Deploy the Triton Inference Server to your Location in Fleet Command by creating
a Deployment.  You can specify configuration parameters to override the default
[values.yaml](values.yaml) in the Application Configuration section.

*Note:* You _must_ provide a `--model-repository` parameter with a path to your
prepared model repository in your S3 bucket.  Otherwise, the Triton will not
start.

An example Application Configuration for Triton on Fleet Command:
```yaml
image:
  serverArgs:
    - --model-repository=s3://triton-inference-server-repository

secret:
  region: <region in base 64 >
  id: <access id in base 64 >
  key: <access key in base 64>
  token: <session token in base 64 (optional)>
```

See [Fleet Command documentation](https://docs.nvidia.com/fleet-command/prod_fleet-command/prod_fleet-command/ug-deploying-to-the-edge.html)
for more info.

### Prometheus ServiceMonitor Support

If you have `prometheus-operator` deployed, you can enable the ServiceMonitor
for the Triton Inference Server by setting `serviceMonitor.enabled: true` in
Application Configuration.  This will also deploy a Grafana dashboard for Triton
as a ConfigMap.

Otherwise, metrics can be scraped by pointing an external Prometheus
instance at the `metricsNodePort` in the values.

## Using Triton Inference Server

Now that the Triton Inference Server is running you can send HTTP or GRPC
requests to it to perform inferencing. By default, the service is exposed with a
NodePort service type, where the same port is opened on all systems in a
Location.

Triton exposes an HTTP endpoint on port 30343, and GRPC endpoint on port 30344
and a Prometheus metrics endpoint on port 30345. These ports can be overridden
in the application configuration when deploying.  You can use curl to get the
meta-data of Triton from the HTTP endpoint.  For example, if a system in your
location has the IP `34.83.9.133`:

```
$ curl 34.83.9.133:30343/v2
```

Follow the [QuickStart](../../docs/getting_started/quickstart.md) to get the example image
classification client that can be used to perform inferencing using image
classification models being served by the Triton. For example,

```
$ image_client -u 34.83.9.133:30343 -m densenet_onnx -s INCEPTION -c 3 mug.jpg
Request 0, batch size 1
Image '/workspace/images/mug.jpg':
    15.349568 (504) = COFFEE MUG
    13.227468 (968) = CUP
    10.424893 (505) = COFFEEPOT
```


================================================
FILE: deploy/fleetcommand/dashboard.json
================================================
{
  "__requires": [
    {
      "type": "grafana",
      "id": "grafana",
      "name": "Grafana",
      "version": "6.3.5"
    },
    {
      "type": "panel",
      "id": "graph",
      "name": "Graph",
      "version": ""
    },
    {
      "type": "panel",
      "id": "heatmap",
      "name": "Heatmap",
      "version": ""
    },
    {
      "type": "datasource",
      "id": "prometheus",
      "name": "Prometheus",
      "version": "1.0.0"
    }
  ],
  "annotations": {
    "list": [
      {
        "builtIn": 1,
        "datasource": "-- Grafana --",
        "enable": true,
        "hide": true,
        "iconColor": "rgba(0, 211, 255, 1)",
        "name": "Annotations & Alerts",
        "type": "dashboard"
      }
    ]
  },
  "editable": true,
  "gnetId": null,
  "graphTooltip": 0,
  "id": null,
  "links": [],
  "panels": [
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "$datasource",
      "fill": 1,
      "fillGradient": 0,
      "gridPos": {
        "h": 9,
        "w": 12,
        "x": 0,
        "y": 0
      },
      "id": 2,
      "legend": {
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "show": true,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 1,
      "nullPointMode": "null",
      "options": {
        "dataLinks": []
      },
      "percentage": false,
      "pointradius": 2,
      "points": false,
      "renderer": "flot",
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "nv_inference_request_success",
          "legendFormat": "Success {{instance}}",
          "refId": "A"
        },
        {
          "expr": "nv_inference_request_failure",
          "legendFormat": "Failure {{instance}}",
          "refId": "B"
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "Cumulative Inference Requests",
      "tooltip": {
        "shared": true,
        "sort": 0,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        },
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": false
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    },
    {
      "cards": {
        "cardPadding": null,
        "cardRound": null
      },
      "color": {
        "cardColor": "#b4ff00",
        "colorScale": "sqrt",
        "colorScheme": "interpolateReds",
        "exponent": 0.5,
        "mode": "spectrum"
      },
      "dataFormat": "timeseries",
      "gridPos": {
        "h": 9,
        "w": 12,
        "x": 12,
        "y": 0
      },
      "heatmap": {},
      "hideZeroBuckets": false,
      "highlightCards": true,
      "id": 7,
      "legend": {
        "show": false
      },
      "options": {},
      "reverseYBuckets": false,
      "targets": [
        {
          "expr": "sum(increase(nv_inference_load_ratio_bucket[1m])) by (le)",
          "legendFormat": "",
          "refId": "A"
        }
      ],
      "timeFrom": null,
      "timeShift": null,
      "title": "Load Ratio  (Total Time / Compute Time)",
      "tooltip": {
        "show": true,
        "showHistogram": false
      },
      "type": "heatmap",
      "xAxis": {
        "show": true
      },
      "xBucketNumber": null,
      "xBucketSize": null,
      "yAxis": {
        "decimals": null,
        "format": "short",
        "logBase": 1,
        "max": null,
        "min": null,
        "show": true,
        "splitFactor": null
      },
      "yBucketBound": "auto",
      "yBucketNumber": null,
      "yBucketSize": null
    },
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "$datasource",
      "fill": 1,
      "fillGradient": 0,
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 9
      },
      "id": 4,
      "legend": {
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "show": true,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 1,
      "nullPointMode": "null",
      "options": {
        "dataLinks": []
      },
      "percentage": false,
      "pointradius": 2,
      "points": false,
      "renderer": "flot",
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "rate(nv_inference_queue_duration_us[30s]) / 1000",
          "legendFormat": "{{instance}}",
          "refId": "A"
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "Queue Time (milliseconds)",
      "tooltip": {
        "shared": true,
        "sort": 0,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "short",
          "label": "Queue Time (ms)",
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        },
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": false
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    },
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "$datasource",
      "fill": 1,
      "fillGradient": 0,
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 12,
        "y": 9
      },
      "id": 5,
      "legend": {
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "show": true,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 1,
      "nullPointMode": "null",
      "options": {
        "dataLinks": []
      },
      "percentage": false,
      "pointradius": 2,
      "points": false,
      "renderer": "flot",
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "rate(nv_inference_compute_duration_us[30s]) / 1000",
          "legendFormat": "{{instance}}",
          "refId": "A"
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "Compute Time (milliseconds)",
      "tooltip": {
        "shared": true,
        "sort": 0,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "short",
          "label": "Compute Time (ms)",
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        },
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": false
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    }
  ],
  "refresh": "5s",
  "schemaVersion": 19,
  "style": "dark",
  "tags": [],
  "templating": {
    "list": [
      {
       "current": {
         "text": "Prometheus",
         "value": "Prometheus"
       },
       "hide": 0,
       "includeAll": false,
       "label": null,
       "multi": false,
       "name": "datasource",
       "options": [],
       "query": "prometheus",
       "refresh": 1,
       "regex": "",
       "skipUrlSync": false,
       "type": "datasource"
      }
    ]
  },
  "time": {
    "from": "now-15m",
    "to": "now"
  },
  "timepicker": {
    "refresh_intervals": [
      "5s",
      "10s",
      "30s",
      "1m",
      "5m",
      "15m",
      "30m",
      "1h",
      "2h",
      "1d"
    ]
  },
  "timezone": "",
  "title": "Triton Inference Server",
  "uid": "slEY4dsZk",
  "version": 8
}


================================================
FILE: deploy/fleetcommand/templates/_helpers.tpl
================================================
{{/*
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/}}

{{/* vim: set filetype=mustache: */}}
{{/*
Create inference server name.
*/}}
{{- define "triton-inference-server.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
{{- end -}}

{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "triton-inference-server.fullname" -}}
{{- if .Values.fullnameOverride -}}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- $name := default .Chart.Name .Values.nameOverride -}}
{{- if contains $name .Release.Name -}}
{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{- end -}}
{{- end -}}

{{/*
  Create inference server metrics service name and fullname derived from above and
  truncated appropriately.
*/}}
{{- define "triton-inference-server-metrics.name" -}}
{{- $basename := include "triton-inference-server.name" . -}}
{{- $basename_trimmed := $basename | trunc 55 | trimSuffix "-" -}}
{{- printf "%s-%s" $basename_trimmed "metrics" -}}
{{- end -}}

{{- define "triton-inference-server-metrics.fullname" -}}
{{- $basename := include "triton-inference-server.fullname" . -}}
{{- $basename_trimmed := $basename | trunc 55 | trimSuffix "-" -}}
{{- printf "%s-%s" $basename_trimmed "metrics" -}}
{{- end -}}

{{/*
  Create inference server metrics monitor name and fullname derived from
  above and truncated appropriately.
*/}}
{{- define "triton-inference-server-metrics-monitor.name" -}}
{{- $basename := include "triton-inference-server.name" . -}}
{{- $basename_trimmed := $basename | trunc 47 | trimSuffix "-" -}}
{{- printf "%s-%s" $basename_trimmed "metrics-monitor" -}}
{{- end -}}

{{- define "triton-inference-server-metrics-monitor.fullname" -}}
{{- $basename := include "triton-inference-server.fullname" . -}}
{{- $basename_trimmed := $basename | trunc 47 | trimSuffix "-" -}}
{{- printf "%s-%s" $basename_trimmed "metrics-monitor" -}}
{{- end -}}

{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "triton-inference-server.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
{{- end -}}


================================================
FILE: deploy/fleetcommand/templates/configmap-grafana-dashboard.yaml
================================================
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

{{- if .Values.serviceMonitor.enabled }}
apiVersion: v1
kind: ConfigMap
metadata:
  name: {{ .Release.Name }}-dashboard-configmap
  labels:
    grafana_dashboard: "1"
data:
  dashboard.json: |-
{{ .Files.Get "dashboard.json" | indent 4}}
{{- end }}


================================================
FILE: deploy/fleetcommand/templates/deployment.yaml
================================================
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

apiVersion: apps/v1
kind: Deployment
metadata:
  name: {{ template "triton-inference-server.fullname" . }}
  namespace: {{ .Release.Namespace }}
  labels:
    app: {{ template "triton-inference-server.name" . }}
    chart: {{ template "triton-inference-server.chart" . }}
    release: {{ .Release.Name }}
    heritage: {{ .Release.Service }}
spec:
  replicas: {{ .Values.replicaCount }}
  selector:
    matchLabels:
      app: {{ template "triton-inference-server.name" . }}
      release: {{ .Release.Name }}
  template:
    metadata:
      labels:
        app: {{ template "triton-inference-server.name" . }}
        release: {{ .Release.Name }}

    spec:
      containers:
        - name: {{ .Chart.Name }}
          image: "{{ .Values.image.imageName }}"
          imagePullPolicy: {{ .Values.image.pullPolicy }}

          resources:
            limits:
              nvidia.com/gpu: {{ .Values.image.numGpus }}

          args:
            - {{ .Values.image.serverCommand }}
            {{- $args := required "image.serverArgs, at least --model-repository, is required!" .Values.image.serverArgs }}
            {{- range $args }}
            - {{ . -}}
            {{ end }}

{{ if .Values.secret }}
          env:
          - name: AWS_DEFAULT_REGION
            valueFrom:
              secretKeyRef:
                name: aws-credentials
                key: AWS_DEFAULT_REGION
          - name: AWS_ACCESS_KEY_ID
            valueFrom:
              secretKeyRef:
                name: aws-credentials
                key: AWS_ACCESS_KEY_ID
          - name: AWS_SECRET_ACCESS_KEY
            valueFrom:
              secretKeyRef:
                name: aws-credentials
                key: AWS_SECRET_ACCESS_KEY
{{- if .Values.secret.token }}
          - name: AWS_SESSION_TOKEN
            valueFrom:
              secretKeyRef:
                name: aws-credentials
                key: AWS_SESSION_TOKEN
{{- end }}
{{- end }}

          ports:
            - containerPort: 8000
              name: http
            - containerPort: 8001
              name: grpc
            - containerPort: 8002
              name: metrics
          livenessProbe:
            httpGet:
              path: /v2/health/live
              port: http
          readinessProbe:
            initialDelaySeconds: 5
            periodSeconds: 5
            httpGet:
              path: /v2/health/ready
              port: http

      securityContext:
        runAsUser: 1000
        fsGroup: 1000


================================================
FILE: deploy/fleetcommand/templates/secrets.yaml
================================================
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

{{- if .Values.secret }}
apiVersion: v1
kind: Secret
metadata:
  name: aws-credentials
type: Opaque
data:
  AWS_DEFAULT_REGION: {{ .Values.secret.region }}
  AWS_ACCESS_KEY_ID: {{ .Values.secret.id }}
  AWS_SECRET_ACCESS_KEY: {{ .Values.secret.key }}
{{- if .Values.secret.token }}
  AWS_SESSION_TOKEN: {{ .Values.secret.token }}
{{- end }}
{{- end }}


================================================
FILE: deploy/fleetcommand/templates/service.yaml
================================================
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

apiVersion: v1
kind: Service
metadata:
  name: {{ template "triton-inference-server.fullname" . }}
  namespace: {{ .Release.Namespace }}
  labels:
    app: {{ template "triton-inference-server.name" . }}
    chart: {{ template "triton-inference-server.chart" . }}
    release: {{ .Release.Name }}
    heritage: {{ .Release.Service }}
spec:
  type: {{ .Values.service.type }}
  ports:
    - port: 8000
      targetPort: http
      name: http-inference-server
      {{- if .Values.service.httpNodePort }}
      nodePort: {{ .Values.service.httpNodePort }}
      {{- end }}
    - port: 8001
      targetPort: grpc
      name: grpc-inference-server
      {{- if .Values.service.grpcNodePort }}
      nodePort: {{ .Values.service.grpcNodePort }}
      {{- end }}
    - port: 8002
      targetPort: metrics
      name: metrics-inference-server
      {{- if .Values.service.metricsNodePort }}
      nodePort: {{ .Values.service.metricsNodePort }}
      {{- end }}
  selector:
    app: {{ template "triton-inference-server.name" . }}
    release: {{ .Release.Name }}
---
apiVersion: v1
kind: Service
metadata:
  name: {{ template "triton-inference-server-metrics.fullname" . }}
  namespace: {{ .Release.Namespace }}
  labels:
    app: {{ template "triton-inference-server-metrics.name" . }}
    chart: {{ template "triton-inference-server.chart" . }}
    release: {{ .Release.Name }}
    heritage: {{ .Release.Service }}
  annotations:
    alpha.monitoring.coreos.com/non-namespaced: "true"
spec:
  ports:
  - name: metrics
    port: 8080
    targetPort: metrics
    protocol: TCP
  selector:
    app: {{ template "triton-inference-server.name" . }}
    release: {{ .Release.Name }}
---
{{- if .Values.serviceMonitor.enabled }}
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
  name: {{ template "triton-inference-server-metrics-monitor.fullname" . }}
  namespace: {{ .Release.Namespace }}
  labels:
    app: {{ template "triton-inference-server-metrics-monitor.name" . }}
    chart: {{ template "triton-inference-server.chart" . }}
    release: {{ .Release.Name }}
    heritage: {{ .Release.Service }}
spec:
  selector:
    matchLabels:
      app: {{ template "triton-inference-server-metrics.name" . }}
  endpoints:
  - port: metrics
    interval: 15s
{{- end }}


================================================
FILE: deploy/fleetcommand/values.yaml
================================================
# Copyright (c) 2019-2026, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

replicaCount: 1

image:
  imageName: nvcr.io/nvidia/tritonserver:26.02-py3
  pullPolicy: IfNotPresent
  numGpus: 1
  serverCommand: tritonserver
  serverArgs:
    # Model Repository Configuration (REQUIRED)
    #
    # Configure sources for model repository below.  Multiple repositories
    # can be specified
    #
    # To download models from an S3 bucket, uncomment and configure below
    # To specify a non-AWS S3 endpoint, use the form
    #  s3://https://your-s3-endpoint:443/bucket/model_repository
    #
    #- --model-repository=s3://triton-inference-server-repository/model_repository
    #
    # Model Control Mode (Optional, default: none)
    #
    # To set model control mode, uncomment and configure below
    # TODO: Fix the following url, it is invalid
    # See https://github.com/triton-inference-server/server/blob/r26.02/docs/user_guide/model_management.md
    #  for more details
    #- --model-control-mode=explicit|poll|none
    #
    # Additional server args
    #
    # see https://github.com/triton-inference-server/server/blob/r26.02/README.md
    #  for more details

service:
  # for Fleet Command, type should be NodePort
  type: NodePort
  # the following ports will be the external port opened for each service
  httpNodePort: 30343
  grpcNodePort: 30344
  metricsNodePort: 30345

# AWS
#secret:
  # update the following with base64 encoded parameters
#  region: AWS_REGION
#  id: AWS_SECRET_KEY_ID
#  key: AWS_SECRET_ACCESS_KEY
#  token: AWS_SESSION_TOKEN

# Prometheus-Operator ServiceMonitor support
# change enabled to 'true' to enable a ServiceMonitor if your cluster has
#  Prometheus-Operator installed
serviceMonitor:
  enabled: false


================================================
FILE: deploy/gcp/Chart.yaml
================================================
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

apiVersion: v1
appVersion: "1.0"
description: Triton Inference Server
name: triton-inference-server
version: 1.0.0


================================================
FILE: deploy/gcp/README.md
================================================
<!--
# Copyright (c) 2018-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

[![License](https://img.shields.io/badge/License-BSD3-lightgrey.svg)](https://opensource.org/licenses/BSD-3-Clause)

# Kubernetes Deploy: Triton Inference Server Cluster

A helm chart for installing a single cluster of Triton Inference
Server is provided. By default the cluster contains a single instance
of the inference server but the *replicaCount* configuration parameter
can be set to create a cluster of any size, as described below.

This guide assumes you already have a functional Kubernetes cluster
and helm installed (see below for instructions on installing
helm). Note the following requirements:

* The helm chart deploys Prometheus and Grafana to collect and display Triton metrics. Your cluster must contain sufficient CPU resources to support these services. At a minimum you will likely require 2 CPU nodes with machine type of n1-standard-2 or greater.

* If you want Triton Server to use GPUs for inferencing, your cluster
must be configured to contain the desired number of GPU nodes with
support for the NVIDIA driver and CUDA version required by the version
of the inference server you are using.

This helm chart is available from [Triton Inference Server
GitHub](https://github.com/triton-inference-server/server) or from the
[NVIDIA GPU Cloud (NGC)](https://ngc.nvidia.com).

The steps below describe how to set-up a model repository, use helm to
launch the inference server, and then send inference requests to the
running server. You can access a Grafana endpoint to see real-time
metrics reported by the inference server.


## Installing Helm

### Helm v3

If you do not already have Helm installed in your Kubernetes cluster,
executing the following steps from the [official helm install
guide](https://helm.sh/docs/intro/install/) will
give you a quick setup.

If you're currently using Helm v2 and would like to migrate to Helm v3,
please see the [official migration guide](https://helm.sh/docs/topics/v2_v3_migration/).

### Helm v2

> **NOTE**: Moving forward this chart will only be tested and maintained for Helm v3.

Below are example instructions for installing Helm v2.

```
$ curl https://raw.githubusercontent.com/helm/helm/master/scripts/get | bash
$ kubectl create serviceaccount -n kube-system tiller
serviceaccount/tiller created
$ kubectl create clusterrolebinding tiller-cluster-rule --clusterrole=cluster-admin --serviceaccount=kube-system:tiller
$ helm init --service-account tiller --wait
```

If you run into any issues, you can refer to the official installation guide [here](https://v2.helm.sh/docs/install/).

## Model Repository

If you already have a model repository you may use that with this helm
chart. If you do not have a model repository, you can checkout a local
copy of the inference server source repository to create an example
model repository::

```
$ git clone https://github.com/triton-inference-server/server.git
```

Triton Server needs a repository of models that it will make available
for inferencing. For this example you will place the model repository
in a Google Cloud Storage bucket.

```
$ gsutil mb gs://triton-inference-server-repository
```

Following the [QuickStart](../../docs/getting_started/quickstart.md) download the
example model repository to your system and copy it into the GCS
bucket.

```
$ gsutil cp -r docs/examples/model_repository gs://triton-inference-server-repository/model_repository
```

### GCS Permissions

Make sure the bucket permissions are set so that the inference server
can access the model repository. If the bucket is public then no
additional changes are needed and you can proceed to "Deploy
Prometheus and Grafana" section.

If bucket premissions need to be set with the
GOOGLE_APPLICATION_CREDENTIALS environment variable then perform the
following steps:

* Generate Google service account JSON with proper permissions called
  *gcp-creds.json*.

* Create a Kubernetes secret from *gcp-creds.json*:

```
  $ kubectl create configmap gcpcreds --from-literal "project-id=myproject"
  $ kubectl create secret generic gcpcreds --from-file gcp-creds.json
```

* Modify templates/deployment.yaml to include the
  GOOGLE_APPLICATION_CREDENTIALS environment variable:

```
    env:
      - name: GOOGLE_APPLICATION_CREDENTIALS
        value: /secret/gcp-creds.json
```

* Modify templates/deployment.yaml to mount the secret in a volume at
  /secret:

```
    volumeMounts:
      - name: vsecret
        mountPath: "/secret"
        readOnly: true
    ...
    volumes:
    - name: vsecret
      secret:
        secretName: gcpcreds
```


## Deploy Prometheus and Grafana

The inference server metrics are collected by Prometheus and viewable
by Grafana. The inference server helm chart assumes that Prometheus
and Grafana are available so this step must be followed even if you
don't want to use Grafana.

Use the [kube-prometheus-stack](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack) to install these components. The
*serviceMonitorSelectorNilUsesHelmValues* flag is needed so that
Prometheus can find the inference server metrics in the *example*
release deployed below.

```
$ helm install example-metrics --set prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues=false prometheus-community/kube-prometheus-stack
```

Then port-forward to the Grafana service so you can access it from
your local browser.

```
$ kubectl port-forward service/example-metrics-grafana 8080:80
```

Now you should be able to navigate in your browser to localhost:8080
and see the Grafana login page. Use username=admin and
password=prom-operator to login.

An example Grafana dashboard is available in dashboard.json. Use the
import function in Grafana to import and view this dashboard.

## Deploy the Inference Server

Deploy the inference server using the default configuration with the
following commands.

```
$ cd <directory containing Chart.yaml>
$ helm install example .
```

Use kubectl to see status and wait until the inference server pods are
running.

```
$ kubectl get pods
NAME                                               READY   STATUS    RESTARTS   AGE
example-triton-inference-server-5f74b55885-n6lt7   1/1     Running   0          2m21s
```

There are several ways of overriding the default configuration as
described in this [helm
documentation](https://helm.sh/docs/using_helm/#customizing-the-chart-before-installing).

You can edit the values.yaml file directly or you can use the *--set*
option to override a single parameter with the CLI. For example, to
deploy a cluster of four inference servers use *--set* to set the
replicaCount parameter.

```
$ helm install example --set replicaCount=4 .
```

You can also write your own "config.yaml" file with the values you
want to override and pass it to helm.

```
$ cat << EOF > config.yaml
namespace: MyCustomNamespace
image:
  imageName: nvcr.io/nvidia/tritonserver:custom-tag
  modelRepositoryPath: gs://my_model_repository
EOF
$ helm install example -f config.yaml .
```

## Using Triton Inference Server

Now that the inference server is running you can send HTTP or GRPC
requests to it to perform inferencing. By default, the inferencing
service is exposed with a LoadBalancer service type. Use the following
to find the external IP for the inference server. In this case it is
34.83.9.133.

```
$ kubectl get services
NAME                             TYPE           CLUSTER-IP     EXTERNAL-IP   PORT(S)                                        AGE
...
example-triton-inference-server  LoadBalancer   10.18.13.28    34.83.9.133   8000:30249/TCP,8001:30068/TCP,8002:32723/TCP   47m
```

The inference server exposes an HTTP endpoint on port 8000, and GRPC
endpoint on port 8001 and a Prometheus metrics endpoint on
port 8002. You can use curl to get the meta-data of the inference server
from the HTTP endpoint.

```
$ curl 34.83.9.133:8000/v2
```

Follow the [QuickStart](../../docs/getting_started/quickstart.md) to get the example
image classification client that can be used to perform inferencing
using image classification models being served by the inference
server. For example,

```
$ image_client -u 34.83.9.133:8000 -m inception_v3_onnx -s INCEPTION -c3 mug.jpg
Request 0, batch size 1
Image 'images/mug.jpg':
    504 (COFFEE MUG) = 0.723992
    968 (CUP) = 0.270953
    967 (ESPRESSO) = 0.00115997
```

## Cleanup

Once you've finished using the inference server you should use helm to
delete the deployment.

```
$ helm list
NAME            REVISION  UPDATED                   STATUS    CHART                          APP VERSION   NAMESPACE
example         1         Wed Feb 27 22:16:55 2019  DEPLOYED  triton-inference-server-1.0.0  1.0           default
example-metrics	1       	Tue Jan 21 12:24:07 2020	DEPLOYED	prometheus-operator-6.18.0   	 0.32.0     	 default

$ helm uninstall example
$ helm uninstall example-metrics
```

For the Prometheus and Grafana services, you should [explicitly delete
CRDs](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack#uninstall-helm-chart):

```
$ kubectl delete crd alertmanagerconfigs.monitoring.coreos.com alertmanagers.monitoring.coreos.com podmonitors.monitoring.coreos.com probes.monitoring.coreos.com prometheuses.monitoring.coreos.com prometheusrules.monitoring.coreos.com servicemonitors.monitoring.coreos.com thanosrulers.monitoring.coreos.com
```

You may also want to delete the GCS bucket you created to hold the
model repository.

```
$ gsutil rm -r gs://triton-inference-server-repository
```


================================================
FILE: deploy/gcp/dashboard.json
================================================
{
  "__inputs": [
    {
      "name": "DS_PROMETHEUS",
      "label": "Prometheus",
      "description": "",
      "type": "datasource",
      "pluginId": "prometheus",
      "pluginName": "Prometheus"
    }
  ],
  "__requires": [
    {
      "type": "grafana",
      "id": "grafana",
      "name": "Grafana",
      "version": "6.3.5"
    },
    {
      "type": "panel",
      "id": "graph",
      "name": "Graph",
      "version": ""
    },
    {
      "type": "panel",
      "id": "heatmap",
      "name": "Heatmap",
      "version": ""
    },
    {
      "type": "datasource",
      "id": "prometheus",
      "name": "Prometheus",
      "version": "1.0.0"
    }
  ],
  "annotations": {
    "list": [
      {
        "builtIn": 1,
        "datasource": "-- Grafana --",
        "enable": true,
        "hide": true,
        "iconColor": "rgba(0, 211, 255, 1)",
        "name": "Annotations & Alerts",
        "type": "dashboard"
      }
    ]
  },
  "editable": true,
  "gnetId": null,
  "graphTooltip": 0,
  "id": null,
  "links": [],
  "panels": [
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "${DS_PROMETHEUS}",
      "fill": 1,
      "fillGradient": 0,
      "gridPos": {
        "h": 9,
        "w": 12,
        "x": 0,
        "y": 0
      },
      "id": 2,
      "legend": {
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "show": true,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 1,
      "nullPointMode": "null",
      "options": {
        "dataLinks": []
      },
      "percentage": false,
      "pointradius": 2,
      "points": false,
      "renderer": "flot",
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "nv_inference_request_success",
          "legendFormat": "Success {{instance}}",
          "refId": "A"
        },
        {
          "expr": "nv_inference_request_failure",
          "legendFormat": "Failure {{instance}}",
          "refId": "B"
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "Cumulative Inference Requests",
      "tooltip": {
        "shared": true,
        "sort": 0,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        },
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": false
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    },
    {
      "cards": {
        "cardPadding": null,
        "cardRound": null
      },
      "color": {
        "cardColor": "#b4ff00",
        "colorScale": "sqrt",
        "colorScheme": "interpolateReds",
        "exponent": 0.5,
        "mode": "spectrum"
      },
      "dataFormat": "timeseries",
      "gridPos": {
        "h": 9,
        "w": 12,
        "x": 12,
        "y": 0
      },
      "heatmap": {},
      "hideZeroBuckets": false,
      "highlightCards": true,
      "id": 7,
      "legend": {
        "show": false
      },
      "options": {},
      "reverseYBuckets": false,
      "targets": [
        {
          "expr": "sum(increase(nv_inference_load_ratio_bucket[1m])) by (le)",
          "legendFormat": "",
          "refId": "A"
        }
      ],
      "timeFrom": null,
      "timeShift": null,
      "title": "Load Ratio  (Total Time / Compute Time)",
      "tooltip": {
        "show": true,
        "showHistogram": false
      },
      "type": "heatmap",
      "xAxis": {
        "show": true
      },
      "xBucketNumber": null,
      "xBucketSize": null,
      "yAxis": {
        "decimals": null,
        "format": "short",
        "logBase": 1,
        "max": null,
        "min": null,
        "show": true,
        "splitFactor": null
      },
      "yBucketBound": "auto",
      "yBucketNumber": null,
      "yBucketSize": null
    },
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "${DS_PROMETHEUS}",
      "fill": 1,
      "fillGradient": 0,
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 9
      },
      "id": 4,
      "legend": {
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "show": true,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 1,
      "nullPointMode": "null",
      "options": {
        "dataLinks": []
      },
      "percentage": false,
      "pointradius": 2,
      "points": false,
      "renderer": "flot",
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "rate(nv_inference_queue_duration_us[30s]) / 1000",
          "legendFormat": "{{instance}}",
          "refId": "A"
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "Queue Time (milliseconds)",
      "tooltip": {
        "shared": true,
        "sort": 0,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "short",
          "label": "Queue Time (ms)",
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        },
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": false
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    },
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "${DS_PROMETHEUS}",
      "fill": 1,
      "fillGradient": 0,
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 12,
        "y": 9
      },
      "id": 5,
      "legend": {
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "show": true,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 1,
      "nullPointMode": "null",
      "options": {
        "dataLinks": []
      },
      "percentage": false,
      "pointradius": 2,
      "points": false,
      "renderer": "flot",
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "rate(nv_inference_compute_duration_us[30s]) / 1000",
          "legendFormat": "{{instance}}",
          "refId": "A"
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "Compute Time (milliseconds)",
      "tooltip": {
        "shared": true,
        "sort": 0,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "short",
          "label": "Compute Time (ms)",
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        },
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": false
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    }
  ],
  "refresh": "5s",
  "schemaVersion": 19,
  "style": "dark",
  "tags": [],
  "templating": {
    "list": []
  },
  "time": {
    "from": "now-15m",
    "to": "now"
  },
  "timepicker": {
    "refresh_intervals": [
      "5s",
      "10s",
      "30s",
      "1m",
      "5m",
      "15m",
      "30m",
      "1h",
      "2h",
      "1d"
    ]
  },
  "timezone": "",
  "title": "Triton Inference Server",
  "uid": "slEY4dsZk",
  "version": 8
}


================================================
FILE: deploy/gcp/templates/_helpers.tpl
================================================
{{/*
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/}}

{{/* vim: set filetype=mustache: */}}
{{/*
Create inference server name.
*/}}
{{- define "triton-inference-server.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
{{- end -}}

{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "triton-inference-server.fullname" -}}
{{- if .Values.fullnameOverride -}}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- $name := default .Chart.Name .Values.nameOverride -}}
{{- if contains $name .Release.Name -}}
{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{- end -}}
{{- end -}}

{{/*
  Create inference server metrics service name and fullname derived from above and
  truncated appropriately.
*/}}
{{- define "triton-inference-server-metrics.name" -}}
{{- $basename := include "triton-inference-server.name" . -}}
{{- $basename_trimmed := $basename | trunc 55 | trimSuffix "-" -}}
{{- printf "%s-%s" $basename_trimmed "metrics" -}}
{{- end -}}

{{- define "triton-inference-server-metrics.fullname" -}}
{{- $basename := include "triton-inference-server.fullname" . -}}
{{- $basename_trimmed := $basename | trunc 55 | trimSuffix "-" -}}
{{- printf "%s-%s" $basename_trimmed "metrics" -}}
{{- end -}}

{{/*
  Create inference server metrics monitor name and fullname derived from
  above and truncated appropriately.
*/}}
{{- define "triton-inference-server-metrics-monitor.name" -}}
{{- $basename := include "triton-inference-server.name" . -}}
{{- $basename_trimmed := $basename | trunc 47 | trimSuffix "-" -}}
{{- printf "%s-%s" $basename_trimmed "metrics-monitor" -}}
{{- end -}}

{{- define "triton-inference-server-metrics-monitor.fullname" -}}
{{- $basename := include "triton-inference-server.fullname" . -}}
{{- $basename_trimmed := $basename | trunc 47 | trimSuffix "-" -}}
{{- printf "%s-%s" $basename_trimmed "metrics-monitor" -}}
{{- end -}}

{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "triton-inference-server.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
{{- end -}}


================================================
FILE: deploy/gcp/templates/deployment.yaml
================================================
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

apiVersion: apps/v1
kind: Deployment
metadata:
  name: {{ template "triton-inference-server.fullname" . }}
  namespace: {{ .Release.Namespace }}
  labels:
    app: {{ template "triton-inference-server.name" . }}
    chart: {{ template "triton-inference-server.chart" . }}
    release: {{ .Release.Name }}
    heritage: {{ .Release.Service }}
spec:
  replicas: {{ .Values.replicaCount }}
  selector:
    matchLabels:
      app: {{ template "triton-inference-server.name" . }}
      release: {{ .Release.Name }}
  template:
    metadata:
      labels:
        app: {{ template "triton-inference-server.name" . }}
        release: {{ .Release.Name }}

    spec:
      containers:
        - name: {{ .Chart.Name }}
          image: "{{ .Values.image.imageName }}"
          imagePullPolicy: {{ .Values.image.pullPolicy }}

          resources:
            limits:
              nvidia.com/gpu: {{ .Values.image.numGpus }}

          args: ["tritonserver", "--model-store={{ .Values.image.modelRepositoryPath }}"]

          ports:
            - containerPort: 8000
              name: http
            - containerPort: 8001
              name: grpc
            - containerPort: 8002
              name: metrics
          livenessProbe:
            httpGet:
              path: /v2/health/live
              port: http
          readinessProbe:
            initialDelaySeconds: 5
            periodSeconds: 5
            httpGet:
              path: /v2/health/ready
              port: http

      securityContext:
        runAsUser: 1000
        fsGroup: 1000


================================================
FILE: deploy/gcp/templates/service.yaml
================================================
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

apiVersion: v1
kind: Service
metadata:
  name: {{ template "triton-inference-server.fullname" . }}
  namespace: {{ .Release.Namespace }}
  labels:
    app: {{ template "triton-inference-server.name" . }}
    chart: {{ template "triton-inference-server.chart" . }}
    release: {{ .Release.Name }}
    heritage: {{ .Release.Service }}
spec:
  type: {{ .Values.service.type }}
  ports:
    - port: 8000
      targetPort: http
      name: http-inference-server
    - port: 8001
      targetPort: grpc
      name: grpc-inference-server
    - port: 8002
      targetPort: metrics
      name: metrics-inference-server
  selector:
    app: {{ template "triton-inference-server.name" . }}
    release: {{ .Release.Name }}
---
apiVersion: v1
kind: Service
metadata:
  name: {{ template "triton-inference-server-metrics.fullname" . }}
  namespace: {{ .Release.Namespace }}
  labels:
    app: {{ template "triton-inference-server-metrics.name" . }}
    chart: {{ template "triton-inference-server.chart" . }}
    release: {{ .Release.Name }}
    heritage: {{ .Release.Service }}
  annotations:
    alpha.monitoring.coreos.com/non-namespaced: "true"
spec:
  ports:
  - name: metrics
    port: 8080
    targetPort: metrics
    protocol: TCP
  selector:
    app: {{ template "triton-inference-server.name" . }}
    release: {{ .Release.Name }}
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
  name: {{ template "triton-inference-server-metrics-monitor.fullname" . }}
  namespace: {{ .Release.Namespace }}
  labels:
    app: {{ template "triton-inference-server-metrics-monitor.name" . }}
    chart: {{ template "triton-inference-server.chart" . }}
    release: {{ .Release.Name }}
    heritage: {{ .Release.Service }}
spec:
  selector:
    matchLabels:
      app: {{ template "triton-inference-server-metrics.name" . }}
  endpoints:
  - port: metrics
    interval: 15s


================================================
FILE: deploy/gcp/values.yaml
================================================
# Copyright (c) 2019-2026, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

replicaCount: 1

image:
  imageName: nvcr.io/nvidia/tritonserver:26.02-py3
  pullPolicy: IfNotPresent
  modelRepositoryPath: gs://triton-inference-server-repository/model_repository
  numGpus: 1

service:
  type: LoadBalancer


================================================
FILE: deploy/gke-marketplace-app/README.md
================================================
<!--
# Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# NVIDIA Triton Inference Server GKE Marketplace Application

**Table Of Contents**
- [NVIDIA Triton Inference Server GKE Marketplace Application](#nvidia-triton-inference-server-gke-marketplace-application)
  - [Description](#description)
  - [Prerequisites](#prerequisites)
  - [Demo Instruction](#demo-instruction)
  - [Additional Resources](#additional-resources)
  - [Known Issues](#known-issues)

## Description

This repository contains Google Kubernetes Engine(GKE) Marketplace Application for NVIDIA Triton Inference Server deployer.

 - Triton GKE deployer is a helm chart deployer recommended by GKE Marketplace
 - Triton GKE deployer deploys a GKE ingress which accepts public inference requests
 - Triton GKE deployer includes a horizontal pod autoscaler(HPA) which relies on [stack driver custom metrics adaptor](https://github.com/GoogleCloudPlatform/k8s-stackdriver/tree/master/custom-metrics-stackdriver-adapter) to monitor GPU duty cycle, and auto scale GPU nodes.
 - This repo also contains a sample to generate BERT model with TensorRT and use Locust to experiment with GPU node autoscaling and monitor client latency/throughput.

![Cloud Architecture Diagram](diagram.png)

## Prerequisites

 - [Install Google Cloud SDK on your laptop/client workstation](https://cloud.google.com/sdk/docs/install), so that `gcloud` SDK cli interface could be run on the client and sign in with your GCP credentials.
 - In addition, user could leverage [Google Cloud shell](https://cloud.google.com/shell/docs/launching-cloud-shell).

## Demo Instruction

First, install this Triton GKE app to an existing GKE cluster with GPU node pool, Google Cloud Marketplace currently doesn't support auto creation of GPU clusters. User has to run following command to create a compatible cluster (gke version >=1.18.7) with GPU node pools, we recommend user to select T4 or A100(MIG) instances type and choose CPU ratio based on profiling of actual inference workflow.

Users need to follow these [instructions](https://cloud.google.com/kubernetes-engine/docs/how-to/kubernetes-service-accounts#creating_a_kubernetes_service_account) to create a kubernetes service account. In this example, we use `gke-test@k80-exploration.iam.gserviceaccount.com`. Make sure it has access to artifact registry and monitoring viewer. For example, to grant access to custom metrics which is required for HPA to work:
```
gcloud iam service-accounts add-iam-policy-binding --role \
  roles/iam.workloadIdentityUser --member \
  "serviceAccount:<project-id>.svc.id.goog[custom-metrics/custom-metrics-stackdriver-adapter]" \
  <google-service-account>@<project-id>.iam.gserviceaccount.com

kubectl annotate serviceaccount --namespace custom-metrics \
  custom-metrics-stackdriver-adapter \
  iam.gke.io/gcp-service-account=<google-service-account>@<project-id>.iam.gserviceaccount.com
```

Currently, GKE >= 1.18.7 only supported in GKE rapid channel, to find the latest version, please visit [GKE release notes](https://cloud.google.com/kubernetes-engine/docs/release-notes).
```
export PROJECT_ID=<your GCP project ID>
export ZONE=<GCP zone of your choice>
export REGION=<GCP region of your choice>
export DEPLOYMENT_NAME=<GKE cluster name, triton-gke for example>
# example: export SERVICE_ACCOUNT="gke-test@k80-exploration.iam.gserviceaccount.com"
export SERVICE_ACCOUNT=<Your GKE service account>

gcloud beta container clusters create ${DEPLOYMENT_NAME} \
--addons=HorizontalPodAutoscaling,HttpLoadBalancing \
--service-account=${SERVICE_ACCOUNT} \
--machine-type=n1-standard-8 \
--node-locations=${ZONE} \
--monitoring=SYSTEM \
--zone=${ZONE} \
--subnetwork=default \
--scopes cloud-platform \
--num-nodes 1 \
--project ${PROJECT_ID}

# add GPU node pools, user can modify number of node based on workloads
gcloud container node-pools create accel \
  --project ${PROJECT_ID} \
  --zone ${ZONE} \
  --cluster ${DEPLOYMENT_NAME} \
  --service-account=${SERVICE_ACCOUNT} \
  --num-nodes 2 \
  --accelerator type=nvidia-tesla-t4,count=1 \
  --enable-autoscaling --min-nodes 2 --max-nodes 3 \
  --machine-type n1-standard-4 \
  --disk-size=100 \
  --scopes cloud-platform \
  --verbosity error

# so that you can run kubectl locally to the cluster
gcloud container clusters get-credentials ${DEPLOYMENT_NAME} --project ${PROJECT_ID} --zone ${ZONE}

# deploy NVIDIA device plugin for GKE to prepare GPU nodes for driver install
kubectl apply -f https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/nvidia-driver-installer/cos/daemonset-preloaded-latest.yaml

# make sure you can run kubectl locally to access the cluster
kubectl create clusterrolebinding cluster-admin-binding --clusterrole cluster-admin --user "$(gcloud config get-value account)"

# enable stackdriver custom metrics adaptor
kubectl apply -f https://raw.githubusercontent.com/GoogleCloudPlatform/k8s-stackdriver/master/custom-metrics-stackdriver-adapter/deploy/production/adapter_new_resource_model.yaml

# create an ip for ingress traffic
gcloud compute addresses create ingress-triton --global
```

Creating a cluster and adding GPU nodes could take up-to 10 minutes. Please be patient after executing this command. GPU resources in GCP could be fully utilized, so please try a different zone in case compute resource cannot be allocated. After GKE cluster is running, run `kubectl get pods --all-namespaces` to make sure the client can access the cluster correctly:

If user would like to experiment with A100 MIG partitioned GPU in GKE, please create node pool with following command:
```
gcloud beta container node-pools create accel \
  --project ${PROJECT_ID} \
  --zone ${ZONE} \
  --cluster ${DEPLOYMENT_NAME} \
  --service-account=${SERVICE_ACCOUNT} \
  --num-nodes 1 \
  --accelerator type=nvidia-tesla-a100,count=1,gpu-partition-size=1g.5gb  \
  --enable-autoscaling --min-nodes 1 --max-nodes 2 \
  --machine-type=a2-highgpu-1g  \
  --disk-size=100 \
  --scopes cloud-platform \
  --verbosity error
```

Please note that A100 MIG in GKE does not support GPU metrics yet, also Triton GPU Metrics is not compatible with A100 MIG. Hence, please disable GPU metrics by unselect allowGPUMetrics while deploy Triton GKE app. Also for the same reason, this deployer doesn't support inference workfload auto-scaling on A100 MIG as well.

Second, go to this [GKE Marketplace link](https://console.cloud.google.com/marketplace/details/nvidia-ngc-public/triton-inference-server) to deploy Triton application.

Users can leave everything as default if their models have already been tested/validated with Triton. They can provide a GCS path pointing to the model repository containing their models. By default, we provide a BERT large model optimized by TensorRT in a public demo GCS bucket that is compatible with the `xx.yy` release of Triton Server in `gs://triton_sample_models/xx_yy`. However, please take note of the following about this demo bucket:
- The TensorRT engine provided in the demo bucket is only compatible with Tesla T4 GPUs.
- This bucket is located in `us-central1`, so loading from this bucket into Triton in other regions may be affected.
- The first deployment of this Triton GKE application will be slower than consecutive runs because the image needs to be pulled into the GKE cluster.
- You can find an example of how this model is generated and uploaded [here](trt-engine/README.md).

Where <xx.yy> is the version of NGC Triton container needed.

![GKE Marketplace Application UI](ui.png)

We want to discuss HPA autoscaling metrics users can leverage. GPU Power(Percentage of Power) tends to be a reliable metric, especially for larger GPU like V100 and A100. GKE currently natively support GPU duty cycle which is GPU utilization in `nvidia-smi`. We ask users always profile their model to determine the autoscaling target and metrics. When attempting to select the right metrics for autoscaling, the goal should be to pick metrics based on the following: 1, meet SLA rrequirement. 2, give consideration to transient request load, 3, keep GPU as fully utilized as possible. Profiling comes in 2 aspects: If user decided to use Duty Cycle or other GPU metric, it is recommend establish baseline to link SLA requirement such as latency with GPU metrics, for example, for model A, latency will be below 10ms 99% of time when Duty Cycle is below 80% utilized. Additionally, profiling also provide insight to model optimization for inference, with tools like [Nsight](https://developer.nvidia.com/nsight-systems).

Once the application is deployed successfully, get the public ip from ingress:
```
> kubectl get ingress
NAME              CLASS    HOSTS   ADDRESS          PORTS   AGE
triton-external   <none>   *       35.186.215.182   80      107s
```

Third, we will try sending request to server with provide client example.

If User selected deploy Triton to accept HTTP request, please launch [Locust](https://docs.locust.io/en/stable/installation.html) with Ingress host and port to query Triton Inference Server. In this [example script](https://github.com/triton-inference-server/server/tree/master/deploy/gke-marketplace-app/client-sample/locustfile_bert.py), we send request to Triton server which has loaded a BERT large TensorRT Engine with Sequence length of 128 into GCP bucket. We simulate 1000 concurrent user as target and spawn user at rate of 50 users per second.
```
locust -f locustfile_bert.py -H http://${INGRESS_HOST}:${INGRESS_PORT}
```

The client example push about ~650 QPS(Query per second) to Triton Server, and will trigger a auto scale of T4 GPU nodes (We recommend to use T4 and A100[MIG] for inference). From locust UI, we will observer a drop of latency mean and variance for the requests. At the end, after autoscaling, we see the latency stablized at ~200 ms, end to end from US client to europe server, which is excellent for a model that has 345 million parameters. Since for each node, we use 1T4 + n1-standard-4 instance, and it can handle ~450 QPS, with on-demand price, it is ($0.35+$0.19)=$0.54/hr, that translate to 3 million inference per dollar for BERT large model at batch size 1. Further more, with 3 year commitment price, hr rate is ($0.16+$0.08)=$0.24/hr, that translate to 6.75 million inference per dollar.

![Locust Client Chart](client.png)

Alternatively, user can opt to use
[Perf Analyzer](https://github.com/triton-inference-server/perf_analyzer/blob/main/README.md)
to profile and study the performance of Triton Inference Server. Here we also
provide a
[client script](https://github.com/triton-inference-server/server/tree/master/deploy/gke-marketplace-app/client-sample/perf_analyzer_grpc.sh)
to use Perf Analyzer to send gRPC to Triton Server GKE deployment. Perf Analyzer
client requires user to use NGC Triton Client Container.

```
bash perf_analyzer_grpc.sh ${INGRESS_HOST}:${INGRESS_PORT}
```

## Additional Resources

See the following resources to learn more about NVIDIA Triton Inference Server and GKE GPU capabilities.

**Documentation**

- [GPU in Google Kubernetes Engine](https://cloud.google.com/kubernetes-engine/docs/how-to/gpus)
- [Optimize GPU Performance in Google Cloud Platform](https://cloud.google.com/compute/docs/gpus/optimize-gpus)
- [Triton Inference Server](https://github.com/triton-inference-server/server)
- [AI Platform Prediction: Custom container concepts with Triton Server](https://cloud.google.com/solutions/ai-platform-prediction-custom-container-concepts) by [Kevin Tsai](https://github.com/merlin1649)
- [AI Platform Prediction: Direct model server setup for NVIDIA Triton Inference Server](https://cloud.google.com/solutions/ai-platform-prediction-direct-model-server-nvidia) by [Kevin Tsai](https://github.com/merlin1649)

## Known Issues

- GKE one click cluster creation doesn't support GPU node pools at the moment, users have to manually create a compatible (>=1.18.7) cluster and attach node pool (T4 and A100 MIG recommended)
- When Horizontal Pod Autoscaler(HPA) expand and all GPU node pool already utilized, GKE will request new GPU node and it can take between 4-7 minutes, it could be a long wait plus GPU driver install and image pulling. We recommend user to leverage multi-tier model serving and Triton's priority feature to create cushion for latency critical models, and allocate active standby GPU node for spike of requests.


================================================
FILE: deploy/gke-marketplace-app/benchmark/README.md
================================================
<!--
# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Benchmarking with NVIDIA Triton Inference Server GKE Marketplace Application

**Table Of Contents**
- [Models](#models)
- [Performance](#performance)

## Models

First, we collect a set of TensorFlow and TensorRT models to compare:

- Get [Distill Bert fine-tuned with Squad Q&A task](https://huggingface.co/distilbert-base-cased-distilled-squad/tree/main) from Huggingface. `wget https://huggingface.co/distilbert-base-cased-distilled-squad/blob/main/saved_model.tar.gz`
- Get [Bert base fine-tuned with Squad Q&A task](https://huggingface.co/deepset/bert-base-cased-squad2/tree/main) from Huggingface `wget https://huggingface.co/deepset/bert-base-cased-squad2/blob/main/saved_model.tar.gz`
- Follow [TensorRT Demo Bert](https://github.com/NVIDIA/TensorRT/tree/master/demo/BERT) to convert BERT base model to TensorRT Engine, choose sequence length of 384 to match previous 2 TensorFlow models. Last step, we choose to create TensorRT engine with 2 optimization profile, profile 0 for batch size 1 and profile 1 for batch size 4 run: `python3 builder.py -m models/fine-tuned/bert_tf_ckpt_base_qa_squad2_amp_384_v19.03.1/model.ckpt -o engines/model.plan -b 8 -s 384 --fp16 --int8 --strict -c models/fine-tuned/bert_tf_ckpt_base_qa_squad2_amp_384_v19.03.1 --squad-json ./squad/train-v2.0.json -v models/fine-tuned/bert_tf_ckpt_base_qa_squad2_amp_384_v19.03.1/vocab.txt --calib-num 100 -iln -imh`. This needs to be ran on the inference GPU respectively (Engine optimized with A100 cannot be used for inference on T4).

We the place the model into a GCS with following structure, `config.pbtxt` was provided.
```
    ├── bert_base_trt_gpu
    │   ├── 1
    │   │   └── model.plan
    │   └── config.pbtxt
    ├── bert_base_trt_gpu_seqlen128
    │   ├── 1
    │   │   └── model.plan
    │   └── config.pbtxt
    ├── bert_base_tf_gpu
    │   ├── 1
    │   │   └── model.savedmodel
    │   └── config.pbtxt
    ├── bert_base_tf_cpu
    │   ├── 1
    │   │   └── model.savedmodel
    │   └── config.pbtxt
    ├── bert_distill_tf_gpu
    │   ├── 1
    │   │   └── model.savedmodel
    │   └── config.pbtxt
    └── bert_distill_tf_cpu
        ├── 1
        │   └── model.savedmodel
        └── config.pbtxt
```

When deploy Triton GKE application, point the model repository to directory contains the structure above with actual models.

## Performance

We use perf analyzer of Triton to benchmark the performance of each model, the perf analyzer reside in another pod of the GKE cluster.
```bash
export INGRESS_HOST=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.status.loadBalancer.ingress[0].ip}')
export INGRESS_PORT=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.ports[?(@.name=="http2")].port}')
bash perf_query.sh 35.194.5.119:80 bert_base_trt_gpu 384
```

We deploy model on n1-standard-96 for CPU BERT BASE and Distill BERT and (n1-standard-4 + T4) for GPU BERT models, the sequence length  of the BERT model is 384 token, and measure the latency/throughput with a concurrency sweep with Triton's performance analyzer. The latency includes Istio ingress/load balancing and reflect the true round trip cost in the same GCP zone.

For all the model with sequence length of 384:
CPU BERT BASE: latency: 700ms, throughput: 12 qps
CPU Distill BERT: latency: 369ms, throughput: 24 qps

GPU BERT BASE: latency: 230ms, throughput: 34.7 qps
GPU Distill BERT: latency: 118ms, throughput: 73.3 qps
GPU TensorRT BERT BASE: latency: 50ms, throughput: 465 qps

With n1-standard-96 priced at $4.56/hr and n1-standard-4 at $0.19/hr and T4 at $0.35/hr totaling $0.54/hr. While achieving a much lower latency, the TCO of BERT inference with TensorRT on T4 is over 163 times that of Distill BERT inference on n1-standard-96.


================================================
FILE: deploy/gke-marketplace-app/benchmark/model-store/bert_base_tf_cpu/config.pbtxt
================================================
# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

max_batch_size: 4
dynamic_batching {
   preferred_batch_size: 1
   max_queue_delay_microseconds: 2000000
}
instance_group {
   count: 2
   kind: KIND_CPU
}


================================================
FILE: deploy/gke-marketplace-app/benchmark/model-store/bert_base_tf_gpu/config.pbtxt
================================================
# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

max_batch_size: 4
dynamic_batching {
   preferred_batch_size: 4
   max_queue_delay_microseconds: 200000
}
instance_group {
   count: 2
   kind: KIND_GPU
}


================================================
FILE: deploy/gke-marketplace-app/benchmark/model-store/bert_base_trt_gpu/config.pbtxt
================================================
# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

platform: "tensorrt_plan"
max_batch_size: 4
dynamic_batching {
   preferred_batch_size: 4
   max_queue_delay_microseconds: 200000
}
instance_group {
   count: 2
   profile: "1"
   kind: KIND_GPU
}


================================================
FILE: deploy/gke-marketplace-app/benchmark/model-store/bert_base_trt_gpu_seqlen128/config.pbtxt
================================================
# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

platform: "tensorrt_plan"
max_batch_size: 8
dynamic_batching {
   preferred_batch_size: 8
   max_queue_delay_microseconds: 200000
}
instance_group {
   count: 2
   kind: KIND_GPU
}


================================================
FILE: deploy/gke-marketplace-app/benchmark/model-store/bert_distill_tf_cpu/config.pbtxt
================================================
# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

max_batch_size: 4
dynamic_batching {
   preferred_batch_size: 1
   max_queue_delay_microseconds: 2000000
}
instance_group {
   count: 2
   kind: KIND_CPU
}


================================================
FILE: deploy/gke-marketplace-app/benchmark/model-store/bert_distill_tf_gpu/config.pbtxt
================================================
# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

max_batch_size: 4
dynamic_batching {
   preferred_batch_size: 4
   max_queue_delay_microseconds: 200000
}
instance_group {
   count: 2
   kind: KIND_GPU
}


================================================
FILE: deploy/gke-marketplace-app/benchmark/perf-analyzer-script/perf_query.sh
================================================
#!/usr/bin/env bash
# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

SERVER_HOST=${1:-"${INGRESS_HOST}:${INGRESS_PORT}"} # need update public IP
MODEL_NAME=${2:-"${MODEL_NAME}"}
SEQ_LENGTH=${3:-"${SEQ_LEN}"}
BATCH_SIZE=${4:-2}
MAX_LATENCY=${5:-5000}
MAX_CLIENT_THREADS=${6:-20}
MAX_CONCURRENCY=${7:-24}
MODEL_VERSION=${8:-1}
precision=${9:-"fp32"}
PERFCLIENT_PERCENTILE=${10:-90}
MAX_TRIALS=${12:-40}

ARGS="\
   --max-threads ${MAX_CLIENT_THREADS} \
   -m ${MODEL_NAME} \
   -x ${MODEL_VERSION} \
   -p 3000 \
   --async \
   --concurrency-range 4:${MAX_CONCURRENCY}:2 \
   -r ${MAX_TRIALS} \
   -v \
   -i HTTP \
   -u ${SERVER_HOST} \
   -b ${BATCH_SIZE} \
   -l ${MAX_LATENCY} \
   -z \
   --percentile=${PERFCLIENT_PERCENTILE}"

echo "Using args:  $(echo "$ARGS" | sed -e 's/   -/\n-/g')"

/workspace/install/bin/perf_client $ARGS -f perf.csv

================================================
FILE: deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml
================================================
# Copyright 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

apiVersion: v1
kind: Pod
metadata:
  labels:
    app: nv-triton-client
  name: nv-triton-client
  namespace: default
spec:
  containers:
  - image: nvcr.io/nvidia/tritonserver:26.02-py3-sdk
    imagePullPolicy: Always
    name: nv-triton-client
    securityContext:
      privileged: true
    command: [ "/bin/bash", "-c", "--" ]
    args: [ "while true; do sleep 30; done;" ]


================================================
FILE: deploy/gke-marketplace-app/client-sample/bert_request.json
================================================
{
  "inputs": [{
    "name": "input_ids",
    "shape": [1, 128],
    "datatype": "INT32",
    "parameters": {},
    "data": [101, 2054, 2003, 23435, 5339, 1029, 102, 23435, 5339, 2003, 1037, 2152, 2836, 2784, 4083, 28937, 4132, 2008, 18058, 2659, 2397, 9407, 1998, 2152, 2083, 18780, 2005, 18726, 2107, 2004, 16755, 2545, 1010, 4613, 1998, 3746, 1013, 2678, 2006, 1050, 17258, 2401, 14246, 2271, 1012, 2009, 2950, 11968, 8043, 2015, 2000, 12324, 4275, 1010, 1998, 13354, 7076, 2000, 2490, 3117, 23092, 1998, 9014, 2077, 11243, 20600, 2015, 2005, 28937, 1012, 2651, 1050, 17258, 2401, 2003, 2330, 1011, 14768, 6129, 11968, 8043, 2015, 1998, 13354, 7076, 1999, 23435, 5339, 2061, 2008, 1996, 2784, 4083, 2451, 2064, 7661, 4697, 1998, 7949, 2122, 6177, 2000, 2202, 5056, 1997, 3928, 23435, 5339, 20600, 2015, 2005, 2115, 18726, 1012, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
  }, {
    "name": "input_mask",
    "shape": [1, 128],
    "datatype": "INT32",
    "parameters": {},
    "data": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
  }, {
    "name": "segment_ids",
    "shape": [1, 128],
    "datatype": "INT32",
    "parameters": {},
    "data": [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
  }],
  "outputs": [{
    "name": "cls_squad_logits",
    "parameters": {
      "binary_data": false
    }
  }]
}


================================================
FILE: deploy/gke-marketplace-app/client-sample/locustfile_bert.py
================================================
#!/usr/bin/env python3

# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json

from locust import HttpUser, LoadTestShape, between, task


class ProfileLoad(LoadTestShape):
    """
    This load profile starts at 0 and steps up by step_users
    increments every tick, up to target_users.  After reaching
    target_user level, load will stay at target_user level
    until time_limit is reached.
    """

    target_users = 1000
    step_users = 50  # ramp users each step
    time_limit = 3600  # seconds

    def tick(self):
        num_steps = self.target_users / self.step_users
        run_time = round(self.get_run_time())

        if run_time < self.time_limit:
            if num_steps < run_time:
                user_count = num_steps * self.step_users
            else:
                user_count = self.target_users
            return (user_count, self.step_users)
        else:
            return None


class TritonUser(HttpUser):
    wait_time = between(0.2, 0.2)

    @task()
    def bert(self):
        response = self.client.post(self.url1, data=json.dumps(self.data))

    def on_start(self):
        with open("bert_request.json") as f:
            self.data = json.load(f)

        self.url1 = "{}/v2/models/{}/infer".format(self.environment.host, "bert")


================================================
FILE: deploy/gke-marketplace-app/client-sample/perf_analyzer_grpc.sh
================================================
#!/usr/bin/env bash
# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

SERVER_HOST=${1:-"${INGRESS_HOST}:${INGRESS_PORT}"} # need update public IP
MODEL_VERSION=${2:-1}
precision=${3:-"int8"}
BATCH_SIZE=${4:-1}
MAX_LATENCY=${5:-500}
MAX_CLIENT_THREADS=${6:-6}
MAX_CONCURRENCY=${7:-20}
MODEL_NAME=${8:-"bert"}
SEQ_LENGTH=${9:-"128"}
PERFCLIENT_PERCENTILE=${10:-90}
STABILITY_PERCENTAGE=${11:-0.01}
MAX_TRIALS=${12:-1000000}

ARGS="\
   --max-threads ${MAX_CLIENT_THREADS} \
   -m ${MODEL_NAME} \
   -x ${MODEL_VERSION} \
   -p 1000 \
   -t ${MAX_CONCURRENCY} \
   -s ${STABILITY_PERCENTAGE} \
   -r ${MAX_TRIALS} \
   -v \
   -i gRPC \
   -u ${SERVER_HOST} \
   -b ${BATCH_SIZE} \
   -l ${MAX_LATENCY} \
   -z \
   --shape=input_ids:${SEQ_LENGTH} \
   --shape=segment_ids:${SEQ_LENGTH} \
   --shape=input_mask:${SEQ_LENGTH} \
   --percentile=${PERFCLIENT_PERCENTILE}"

echo "Using args:  $(echo "$ARGS" | sed -e 's/   -/\n-/g')"

/workspace/install/bin/perf_client $ARGS


================================================
FILE: deploy/gke-marketplace-app/server-deployer/Dockerfile
================================================
# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

FROM gcr.io/cloud-marketplace-tools/k8s/deployer_helm/onbuild


================================================
FILE: deploy/gke-marketplace-app/server-deployer/build_and_push.sh
================================================
#!/bin/bash
# Copyright 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

export REGISTRY=gcr.io/$(gcloud config get-value project | tr ':' '/')
export APP_NAME=tritonserver
export MAJOR_VERSION=2.66
export MINOR_VERSION=2.66.0
export NGC_VERSION=26.02-py3

docker pull nvcr.io/nvidia/$APP_NAME:$NGC_VERSION

docker tag nvcr.io/nvidia/$APP_NAME:$NGC_VERSION $REGISTRY/$APP_NAME:$MAJOR_VERSION
docker tag nvcr.io/nvidia/$APP_NAME:$NGC_VERSION $REGISTRY/$APP_NAME:$MINOR_VERSION
docker tag nvcr.io/nvidia/$APP_NAME:$NGC_VERSION $REGISTRY/$APP_NAME:$NGC_VERSION

docker push $REGISTRY/$APP_NAME:$MINOR_VERSION
docker push $REGISTRY/$APP_NAME:$MAJOR_VERSION
docker push $REGISTRY/$APP_NAME:$NGC_VERSION

docker build --tag $REGISTRY/$APP_NAME/deployer .

docker tag $REGISTRY/$APP_NAME/deployer $REGISTRY/$APP_NAME/deployer:$MAJOR_VERSION
docker tag $REGISTRY/$APP_NAME/deployer $REGISTRY/$APP_NAME/deployer:$MINOR_VERSION
docker push $REGISTRY/$APP_NAME/deployer:$MAJOR_VERSION
docker push $REGISTRY/$APP_NAME/deployer:$MINOR_VERSION


================================================
FILE: deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml
================================================
# Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

apiVersion: v1
appVersion: "2.65"
description: Triton Inference Server
name: triton-inference-server
version: 2.66.0


================================================
FILE: deploy/gke-marketplace-app/server-deployer/chart/triton/templates/_helpers.tpl
================================================
{{/*
# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/}}

{{/* vim: set filetype=mustache: */}}
{{/*
Expand the name of the chart.
*/}}
{{- define "triton-inference-server.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
{{- end -}}

{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "triton-inference-server.fullname" -}}
{{- if .Values.fullnameOverride -}}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- $name := default .Chart.Name .Values.nameOverride -}}
{{- if contains $name .Release.Name -}}
{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{- end -}}
{{- end -}}

{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "triton-inference-server.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
{{- end -}}


================================================
FILE: deploy/gke-marketplace-app/server-deployer/chart/triton/templates/application.yaml
================================================
# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

{{ if and .Values.gcpMarketplace (eq .Values.gcpMarketplace true) }}
---
apiVersion: app.k8s.io/v1beta1
kind: Application
metadata:
  name: "{{ .Release.Name }}"
  annotations:
    kubernetes-engine.cloud.google.com/icon: >-
      data:image/png;base64,{{ .Files.Get "logo.png" | b64enc }}
    marketplace.cloud.google.com/deploy-info: '{"partner_id": "nvidia", "product_id": "triton", "partner_name": "NVIDIA"}'
  labels:
    app.kubernetes.io/name: "{{ .Release.Name }}"
spec:
  descriptor:
    type: Triton
    version: "{{ .Values.publishedVersion }}"
    description: |-
      Triton Inference Server provides a cloud and edge inferencing solution
      optimized for both CPUs and GPUs. Triton supports an HTTP/REST and GRPC
      protocol that allows remote clients to request inferencing for any model
      being managed by the server.

    notes: |-

      Send request to Triton server by using IP address "ingress-triton",
      send to IP:80/v2/models/{}/infer

      Links:
      - [NVIDIA Triton page](https://developer.nvidia.com/nvidia-triton-inference-server)
      - [Documentation](https://github.com/triton-inference-server/server)

  selector:
    matchLabels:
      app.kubernetes.io/name: "{{ .Release.Name }}"
  componentKinds:
  - group: apps/v1
    kind: Deployment
  - group: v1
    kind: Service
  - group: autoscaling/v2
    kind: HorizontalPodAutoscaler
{{  end }}


================================================
FILE: deploy/gke-marketplace-app/server-deployer/chart/triton/templates/deployment.yaml
================================================
# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

apiVersion: apps/v1
kind: Deployment
metadata:
  name: {{ template "triton-inference-server.name" . }}
  namespace: {{ .Release.Namespace }}
  labels:
    app: {{ template "triton-inference-server.name" . }}
    chart: {{ template "triton-inference-server.chart" . }}
    release: {{ .Release.Name }}
    heritage: {{ .Release.Service }}
spec:
  replicas: {{ .Values.initReplicaCount }}
  selector:
    matchLabels:
      app: {{ template "triton-inference-server.name" . }}
      release: {{ .Release.Name }}
  template:
    metadata:
      labels:
        app: {{ template "triton-inference-server.name" . }}
        release: {{ .Release.Name }}

    spec:
      containers:
        - name: {{ .Chart.Name }}
          image: "{{ .Values.image.registry }}/{{ .Values.image.repository }}:{{ .Values.image.tag }}"
          imagePullPolicy: {{ .Values.image.pullPolicy }}

          resources:
            limits:
              nvidia.com/gpu: {{ .Values.image.numGpus }}
          env:
            - name: LD_PRELOAD
              value: {{ .Values.image.ldPreloadPath }}
          args: ["tritonserver", "--model-store={{ .Values.modelRepositoryPath }}",
                 "--strict-model-config={{ .Values.image.strictModelConfig }}",
                 "--log-verbose={{ .Values.image.logVerboseLevel }}",
                 "--allow-gpu-metrics={{ .Values.image.allowGPUMetrics }}"]

          ports:
            - containerPort: 8000
              name: http
            - containerPort: 8001
              name: grpc
            - containerPort: 8002
              name: metrics
          livenessProbe:
            httpGet:
              path: /v2/health/live
              port: http
            initialDelaySeconds: {{ .Values.deployment.livenessProbe.initialDelaySeconds }}
            periodSeconds: {{ .Values.deployment.livenessProbe.periodSeconds }}
            timeoutSeconds: {{ .Values.deployment.livenessProbe.timeoutSeconds }}
            successThreshold: {{ .Values.deployment.livenessProbe.successThreshold }}
            failureThreshold: {{ .Values.deployment.livenessProbe.failureThreshold }}
          readinessProbe:
            httpGet:
              path: /v2/health/ready
              port: http
            initialDelaySeconds: {{ .Values.deployment.readinessProbe.initialDelaySeconds }}
            periodSeconds: {{ .Values.deployment.readinessProbe.periodSeconds }}
            timeoutSeconds: {{ .Values.deployment.readinessProbe.timeoutSeconds }}
            successThreshold: {{ .Values.deployment.readinessProbe.successThreshold }}
            failureThreshold: {{ .Values.deployment.readinessProbe.failureThreshold }}

          securityContext:
            runAsUser: 1000


================================================
FILE: deploy/gke-marketplace-app/server-deployer/chart/triton/templates/hpa.yaml
================================================
# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: triton-hpa
  namespace: {{ .Release.Namespace }}
  labels:
    app: triton-hpa
spec:
  minReplicas: {{ .Values.minReplicaCount }}
  maxReplicas: {{ .Values.maxReplicaCount }}
  metrics:
  - type: External
    external:
      metric:
         name: kubernetes.io|container|accelerator|duty_cycle
      target:
         type: AverageValue
         averageValue: {{ .Values.HPATargetAverageValue }}

  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: {{ template "triton-inference-server.name" . }}


================================================
FILE: deploy/gke-marketplace-app/server-deployer/chart/triton/templates/ingress.yaml
================================================
# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: triton-external
  annotations:
    kubernetes.io/ingress.class: "gce"
    kubernetes.io/ingress.global-static-ip-name: "ingress-triton"
spec:
  rules:
  - http:
      paths:
      - path: "/"
        pathType: Prefix
        backend:
          service:
            name: triton-inference-server
            port:
              {{ if eq .Values.tritonProtocol "gRPC" }}
              number: 8001
              {{ else }}
              number: 8000
              {{ end }}


================================================
FILE: deploy/gke-marketplace-app/server-deployer/chart/triton/templates/service.yaml
================================================
# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

apiVersion: v1
kind: Service
metadata:
  name: {{ template "triton-inference-server.name" . }}
  namespace: {{ .Release.Namespace }}
  annotations:
    cloud.google.com/neg: '{"ingress": true}'
  labels:
    app: {{ template "triton-inference-server.name" . }}
    chart: {{ template "triton-inference-server.chart" . }}
    release: {{ .Release.Name }}
    heritage: {{ .Release.Service }}
spec:
  type: {{ .Values.service.type }}
  ports:
    - port: 8000
      targetPort: http
      name: http-inference-server
    - port: 8001
      targetPort: grpc
      name: grpc-inference-server
    - port: 8002
      targetPort: metrics
      name: metrics-inference-server
  selector:
    app: {{ template "triton-inference-server.name" . }}
    release: {{ .Release.Name }}


================================================
FILE: deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml
================================================
# Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

initReplicaCount: 1
minReplicaCount: 1
maxReplicaCount: 3
# choice from gRPC and HTTP
tritonProtocol: HTTP
# HPA GPU utilization autoscaling target
HPATargetAverageValue: 85
modelRepositoryPath: gs://triton_sample_models/26.02
publishedVersion: '2.66.0'
gcpMarketplace: true

image:
  registry: gcr.io
  repository: nvidia-ngc-public/tritonserver
  tag: 26.02-py3
  pullPolicy: IfNotPresent
  # modify the model repository here to match your GCP storage bucket
  numGpus: 1
  strictModelConfig: False
  # add in custom library which could include custom ops in the model
  ldPreloadPath: ''
  logVerboseLevel: 0
  allowGPUMetrics: True

service:
  type: NodePort

deployment:
  livenessProbe:
    failureThreshold: 60
    initialDelaySeconds: 10
    periodSeconds: 5
    successThreshold: 1
    timeoutSeconds: 1
  readinessProbe:
    failureThreshold: 60
    initialDelaySeconds: 10
    periodSeconds: 5
    successThreshold: 1
    timeoutSeconds: 1


================================================
FILE: deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml
================================================
# Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

x-google-marketplace:
  schemaVersion: v2
  applicationApiVersion: v1beta1
  publishedVersion: '2.66.0'
  publishedVersionMetadata:
    releaseNote: >-
      Initial release.
    releaseTypes:
    - Feature
    recommended: true

  clusterConstraints:
    k8sVersion: ">=1.18.7"
    assistedClusterCreation:
      type: DISABLED
      creationGuidance: GKE currently doesn't support auto-create GPU clusters, please refer to <a href="https://github.com/triton-inference-server/server/tree/master/deploy/gke-marketplace-app">Triton GKE Marketplace Deployer</a> to manually create the GKE cluster >= 1.18.7 and add GPU node pools
    resources:
    - requests:
        gpu:
          nvidia.com/gpu: {}
    istio:
      type: REQUIRED

  images:
    '':
      properties:
        triton.image.registry:
          type: REGISTRY
        triton.image.repository:
            type: REPO_WITHOUT_REGISTRY
        triton.image.tag:
            type: TAG

properties:
  name:
    type: string
    x-google-marketplace:
      type: NAME
  namespace:
    type: string
    x-google-marketplace:
      type: NAMESPACE
  initReplicaCount:
    title: Initial number of Triton pod instances to deploy.
    type: integer
    default: 1
  minReplicaCount:
    title: Minimum number of Triton pod instances in the deployment for autoscaling.
    type: integer
    default: 1
  maxReplicaCount:
    title: Maximum number of Triton pod instances in the deployment for autoscaling.
    type: integer
    default: 3
  tritonProtocol:
    title: Request protocol to send data to Triton, choose from gRPC and HTTP.
    type: string
    default: HTTP
  HPATargetAverageValue:
    title: HPA autoscaling target, GKE currently support Duty Cycle which is GPU utilization, when target is reached, Triton Server service will create another pod instance. We ask user to analyze model inference to associate appropriate GPU metric target based on latency requirement. We also recommend to leave some room to mitigate transient load effect. For user interested in customizing autoscaling metrics, we recommends GPU Power (Percentage of Power), Queue time or SLA measurements such as latency.
    type: integer
    default: 85
  modelRepositoryPath:
    type: string
    title: Bucket where models are stored. Please make sure the user/service account to create the GKE app has permission to this GCS bucket. Read Triton documentation on configs and formatting details, supporting TensorRT, Pytorch, Onnx ... etc.
    default: gs://triton_sample_models/models
  image.ldPreloadPath:
    type: string
    title: Leave this empty by default. Triton allows users to create custom layers for backend such as TensorRT plugin or custom ops, the compiled shared library must be provided via LD_PRELOAD environment variable.
    default: ''
  image.logVerboseLevel:
    type: integer
    title: Set verbose logging level. Zero (0) disables verbose logging and values >= 1 enable verbose logging, this is helpful when user unsure if the model is compatible with Triton or for general debug.
    default: 0
  image.strictModelConfig:
    type: boolean
    title: Leave this unchecked by default. When strictModelConfig is not checked(False), Triton will try to infer the config file from model file, when checked(True), user need to provide config.pbtxt in model repository.
    default: False
  image.allowGPUMetrics:
    type: boolean
    title: Select by default. When use A100 MIG, unselect to disable GPU Memory metrics reported by Triton, as current GPU metrics not support on A100 MIG.
    default: True
  istioEnabled:
    type: boolean
    x-google-marketplace:
      type: ISTIO_ENABLED
    default: True


required:
- name
- namespace
- modelRepositoryPath

form:
- widget: help
  description: GKE currently doesn't support autocreate GPU clusters, please refer to <a href="https://github.com/triton-inference-server/server/tree/master/deploy/gke-marketplace-app">Triton GKE Marketplace Deployer</a> to manually create the GKE cluster >= 1.18.7 and add GPU node pools. Also, please refer to the <a href="https://github.com/triton-inference-server/server">Triton GITHUB page</a> for product information.


================================================
FILE: deploy/gke-marketplace-app/server-deployer/schema.yaml
================================================
# Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

x-google-marketplace:
  schemaVersion: v2
  applicationApiVersion: v1beta1
  publishedVersion: '2.66.0'
  publishedVersionMetadata:
    releaseNote: >-
      Initial release.
    releaseTypes:
    - Feature
    recommended: true

  clusterConstraints:
    k8sVersion: ">=1.18.7"
    assistedClusterCreation:
      type: DISABLED
      creationGuidance: GKE currently doesn't support auto-create GPU clusters, please refer to <a href="https://github.com/triton-inference-server/server/tree/master/deploy/gke-marketplace-app">Triton GKE Marketplace Deployer</a> to manually create the GKE cluster >= 1.18.7 and add GPU node pools
    resources:
    - requests:
        gpu:
          nvidia.com/gpu: {}
    istio:
      type: REQUIRED

  images:
    '':
      properties:
        triton.image.registry:
          type: REGISTRY
        triton.image.repository:
            type: REPO_WITHOUT_REGISTRY
        triton.image.tag:
            type: TAG

properties:
  name:
    type: string
    x-google-marketplace:
      type: NAME
  namespace:
    type: string
    x-google-marketplace:
      type: NAMESPACE
  initReplicaCount:
    title: Initial number of Triton pod instances to deploy.
    type: integer
    default: 1
  minReplicaCount:
    title: Minimum number of Triton pod instances in the deployment for autoscaling.
    type: integer
    default: 1
  maxReplicaCount:
    title: Maximum number of Triton pod instances in the deployment for autoscaling.
    type: integer
    default: 3
  tritonProtocol:
    title: Request protocol to send data to Triton, choose from gRPC and HTTP.
    type: string
    default: HTTP
  HPATargetAverageValue:
    title: HPA autoscaling target, GKE currently support Duty Cycle which is GPU utilization, when target is reached, Triton Server service will create another pod instance. We ask user to analyze model inference to associate appropriate GPU metric target based on latency requirement. We also recommend to leave some room to mitigate transient load effect. For user interested in customizing autoscaling metrics, we recommends GPU Power (Percentage of Power), Queue time or SLA measurements such as latency.
    type: integer
    default: 85
  modelRepositoryPath:
    type: string
    title: Bucket where models are stored. Please make sure the user/service account to create the GKE app has permission to this GCS bucket. Read Triton documentation on configs and formatting details, supporting TensorRT, TensorFlow, Pytorch, Onnx ... etc.
    default: gs://triton_sample_models/26.02
  image.ldPreloadPath:
    type: string
    title: Leave this empty by default. Triton allows users to create custom layers for backend such as TensorRT plugin, the compiled shared library must be provided via LD_PRELOAD environment variable.
    default: ''
  image.logVerboseLevel:
    type: integer
    title: Set verbose logging level. Zero (0) disables verbose logging and values >= 1 enable verbose logging, this is helpful when user unsure if the model is compatible with Triton or for general debug.
    default: 0
  image.strictModelConfig:
    type: boolean
    title: Leave this unchecked by default. When strictModelConfig is not checked(False), Triton will try to infer the config file from model file, when checked(True), user need to provide config.pbtxt in model repository.
    default: False
  image.allowGPUMetrics:
    type: boolean
    title: Select by default. When use A100 MIG, unselect to disable GPU Memory metrics reported by Triton, as current GPU metrics not support on A100 MIG.
    default: True
  istioEnabled:
    type: boolean
    x-google-marketplace:
      type: ISTIO_ENABLED
    default: True


required:
- name
- namespace
- modelRepositoryPath

form:
- widget: help
  description: GKE currently doesn't support autocreate GPU clusters, please refer to <a href="https://github.com/triton-inference-server/server/tree/master/deploy/gke-marketplace-app">Triton GKE Marketplace Deployer</a> to manually create the GKE cluster >= 1.18.7 and add GPU node pools. Also, please refer to the <a href="https://github.com/triton-inference-server/server">Triton GITHUB page</a> for product information.


================================================
FILE: deploy/gke-marketplace-app/trt-engine/README.md
================================================
<!--
# Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Instruction to create BERT engine for each Triton update

## Description

```
docker run --gpus all -it --network host \
    --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 \
    -v ~:/scripts nvcr.io/nvidia/tensorrt:26.02-py3

pip install onnx six torch tf2onnx tensorflow

git clone -b main https://github.com/NVIDIA/TensorRT.git
cd TensorRT
git submodule update --init --recursive

export TRT_OSSPATH=/workspace/TensorRT
export TRT_LIBPATH=/lib/x86_64-linux-gnu

pushd /usr/local/bin && wget https://ngc.nvidia.com/downloads/ngccli_cat_linux.zip && unzip ngccli_cat_linux.zip && chmod u+x ngc-cli/ngc && rm ngccli_cat_linux.zip ngc-cli.md5 && ln -s ngc-cli/ngc ngc && echo "no-apikey\nascii\n" | ngc config set

popd

cd /workspace/TensorRT/demo/BERT
bash ./scripts/download_squad.sh
bash ./scripts/download_model.sh large 128
# bash ./scripts/download_model.sh large 384

mkdir -p engines

python3 builder.py -m models/fine-tuned/bert_tf_ckpt_large_qa_squad2_amp_128_v19.03.1/model.ckpt -o engines/bert_large_int8_bs1_s128.engine -b 1 -s 128 -c models/fine-tuned/bert_tf_ckpt_large_qa_squad2_amp_128_v19.03.1/ -v models/fine-tuned/bert_tf_ckpt_large_qa_squad2_amp_128_v19.03.1/vocab.txt --int8 --fp16 --strict --calib-num 1 -iln -imh

gsutil cp bert_large_int8_bs1_s128.engine gs://triton_sample_models/26.02/bert/1/model.plan
```

For each Triton upgrade, container version used to generate the model, and the model path in GCS `gs://triton_sample_models/26.02/` should be updated accordingly with the correct version.


================================================
FILE: deploy/k8s-onprem/Chart.yaml
================================================
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

apiVersion: v2
appVersion: "1.0"
description: Triton Inference Server
name: triton-inference-server
version: 1.0.0
dependencies:
  - name: traefik
    version: "~10.6.2"
    repository: "https://helm.traefik.io/traefik"
    tags:
      - loadBalancing
  - name: prometheus-adapter
    version: "~3.0.0"
    repository: "https://prometheus-community.github.io/helm-charts"
    tags:
      - autoscaling


================================================
FILE: deploy/k8s-onprem/README.md
================================================
<!--
# Copyright (c) 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

[![License](https://img.shields.io/badge/License-BSD3-lightgrey.svg)](https://opensource.org/licenses/BSD-3-Clause)

# Kubernetes Deploy: NVIDIA Triton Inference Server Cluster

This repository includes a Helm chart and instructions for installing NVIDIA Triton
Inference Server in an on-premises or AWS EC2 Kubernetes cluster. You can also use this
repository to enable load balancing and autoscaling for your Triton cluster.

This guide assumes you already have a functional Kubernetes cluster with support for GPUs.
See the [NVIDIA GPU Operator documentation](https://docs.nvidia.com/datacenter/cloud-native/kubernetes/install-k8s.html)
for instructions on how to install Kubernetes and enable GPU access in your Kubernetes cluster.
You must also have Helm installed (see [Installing Helm](#installing-helm) for instructions). Note the following requirements:

* To deploy Prometheus and Grafana to collect and display Triton metrics, your cluster must contain sufficient CPU resources to support these services.

* To use GPUs for inferencing, your cluster must be configured to contain the desired number of GPU nodes, with
support for the NVIDIA driver and CUDA version required by the version
of the inference server you are using.

* To enable autoscaling, your cluster's kube-apiserver must have the [aggregation layer
enabled](https://kubernetes.io/docs/tasks/extend-kubernetes/configure-aggregation-layer/).
This will allow the horizontal pod autoscaler to read custom metrics from the prometheus adapter.

This Helm chart is available from [Triton Inference Server
GitHub.](https://github.com/triton-inference-server/server)

For more information on Helm and Helm charts, visit the [Helm documentation](https://helm.sh/docs/).

## Quickstart

First, clone this repository to a local machine. Then, execute the following commands:

Install helm

```
$ curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3
$ chmod 700 get_helm.sh
$ ./get_helm.sh
```

Deploy Prometheus and Grafana

```
$ helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
$ helm repo update
$ helm install example-metrics --set prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues=false prometheus-community/kube-prometheus-stack
```

Deploy Triton with default settings

```
helm install example ./deploy/k8s-onprem
```


<!-- The steps below describe how to set-up a model repository, use Helm to
launch the inference server, and then send inference requests to the
running server. You can access a Grafana endpoint to see real-time
metrics reported by the inference server. -->


## Installing Helm

### Helm v3

If you do not already have Helm installed in your Kubernetes cluster,
executing the following steps from the [official Helm install
guide](https://helm.sh/docs/intro/install/) will
give you a quick setup.

If you are currently using Helm v2 and would like to migrate to Helm v3,
see the [official migration guide](https://helm.sh/docs/topics/v2_v3_migration/).

## Model Repository
If you already have a model repository, you may use that with this Helm
chart. If you do not have a model repository, you can check out a local
copy of the server source repository to create an example
model repository:

```
$ git clone https://github.com/triton-inference-server/server.git
```

Triton Server needs a repository of models that it will make available
for inferencing. For this example, we are using an existing NFS server and
placing our model files there. See the
[Model Repository documentation](../../docs/user_guide/model_repository.md) for other
supported locations.

Following the [QuickStart](../../docs/getting_started/quickstart.md), download the
example model repository to your system and copy it onto your NFS server.
Then, add the url or IP address of your NFS server and the server path of your
model repository to `values.yaml`.


## Deploy Prometheus and Grafana

The inference server metrics are collected by Prometheus and viewable
through Grafana. The inference server Helm chart assumes that Prometheus
and Grafana are available so this step must be followed even if you
do not want to use Grafana.

Use the [kube-prometheus-stack](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack) Helm chart to install these components. The
*serviceMonitorSelectorNilUsesHelmValues* flag is needed so that
Prometheus can find the inference server metrics in the *example*
release deployed in a later section.

```
$ helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
$ helm repo update
$ helm install example-metrics --set prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues=false prometheus-community/kube-prometheus-stack
```

Then port-forward to the Grafana service so you can access it from
your local browser.

```
$ kubectl port-forward service/example-metrics-grafana 8080:80
```

Now you should be able to navigate in your browser to localhost:8080
and see the Grafana login page. Use username=admin and
password=prom-operator to log in.

An example Grafana dashboard is available in dashboard.json. Use the
import function in Grafana to import and view this dashboard.

## Enable Autoscaling
To enable autoscaling, ensure that autoscaling tag in `values.yaml`is set to `true`.
This will do two things:

1. Deploy a Horizontal Pod Autoscaler that will scale replicas of the triton-inference-server
based on the information included in `values.yaml`.

2. Install the [prometheus-adapter](https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-adapter) helm chart, allowing the Horizontal Pod Autoscaler to scale
based on custom metrics from prometheus.

The included configuration will scale Triton pods based on the average queue time,
as described in [this blog post](https://developer.nvidia.com/blog/deploying-nvidia-triton-at-scale-with-mig-and-kubernetes/#:~:text=Query%20NVIDIA%20Triton%20metrics%20using%20Prometheus). To customize this,
you may replace or add to the list of custom rules in `values.yaml`. If you change
the custom metric, be sure to change the values in autoscaling.metrics.

If autoscaling is disabled, the number of Triton server pods is set to the minReplicas
variable in `values.yaml`.

## Enable Load Balancing
To enable load balancing, ensure that the loadBalancing tag in `values.yaml`
is set to `true`. This will do two things:

1. Deploy a Traefik reverse proxy through the [Traefik Helm Chart](https://github.com/traefik/traefik-helm-chart).

2. Configure two Traefik [IngressRoutes](https://doc.traefik.io/traefik/providers/kubernetes-crd/),
one for http and one for grpc. This will allow the Traefik service to expose two
ports that will be forwarded to and balanced across the Triton pods.

To choose the port numbers exposed, or to disable either http or grpc, edit the
configured variables in `values.yaml`.

## Deploy the Inference Server

Deploy the inference server, autoscaler, and load balancer using the default
configuration with the following commands.

Here, and in the following commands we use the name `example` for our chart.
This name will be added to the beginning of all resources created during the helm
installation.

```
$ cd <directory containing Chart.yaml>
$ helm install example .
```

Use kubectl to see status and wait until the inference server pods are
running.

```
$ kubectl get pods
NAME                                               READY   STATUS    RESTARTS   AGE
example-triton-inference-server-5f74b55885-n6lt7   1/1     Running   0          2m21s
```

There are several ways of overriding the default configuration as
described in this [Helm
documentation](https://helm.sh/docs/using_helm/#customizing-the-chart-before-installing).

You can edit the values.yaml file directly or you can use the *--set*
option to override a single parameter with the CLI. For example, to
deploy a cluster with a minimum of two inference servers use *--set* to
set the autoscaler.minReplicas parameter.

```
$ helm install example --set autoscaler.minReplicas=2 .
```

You can also write your own "config.yaml" file with the values you
want to override and pass it to Helm. If you specify a "config.yaml" file, the
values set will override those in values.yaml.

```
$ cat << EOF > config.yaml
namespace: MyCustomNamespace
image:
  imageName: nvcr.io/nvidia/tritonserver:custom-tag
  modelRepositoryPath: gs://my_model_repository
EOF
$ helm install example -f config.yaml .
```

## Deploying the Inference Server on OpenShift or OKD

Because of the default security posture of OpenShift and OKD, the configuration
of which uses OpenShift-specific APIs, the chart needs special consideration
when targeting those environments. Any of the above discussed customizations and
prerequisites hold for an OpenShift environment, except that you do not need to
install Prometheus and Grafana and can instead enable monitoring for
user-defined projects by following
[the OpenShift documentation on the topic](https://docs.redhat.com/en/documentation/openshift_container_platform/4.17/html/monitoring/enabling-monitoring-for-user-defined-projects).

To deploy the configurations to enable NFS mounts and the non-root UIDs used in
the Triton deployment, a tag can be enabled alongside any other configurations
discussed above. In the simplest case, to use `--set` on the command line, you
can simply update the tags.openshift parameter.

```
$ cd <directory containing Chart.yaml>
$ helm install example --set tags.openshift=true .
```

## Probe Configuration

In `templates/deployment.yaml` is configurations for `livenessProbe`, `readinessProbe` and `startupProbe` for the Triton server container.
By default, Triton loads all the models before starting the HTTP server to respond to the probes. The process can take several minutes, depending on the models sizes.
If it is not completed in `startupProbe.failureThreshold * startupProbe.periodSeconds` seconds then Kubernetes considers this as a pod failure and restarts it,
ending up with an infinite loop of restarting pods, so make sure to sufficiently set these values for your use case.
The liveliness and readiness probes are being sent only after the first success of a startup probe.

For more details, see the [Kubernetes probe documentation](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/) and the [feature page of the startup probe](https://github.com/kubernetes/enhancements/blob/master/keps/sig-node/950-liveness-probe-holdoff/README.md).

## Using Triton Inference Server

Now that the inference server is running you can send HTTP or GRPC
requests to it to perform inferencing. By default, this chart deploys [Traefik](https://traefik.io/)
and uses [IngressRoutes](https://doc.traefik.io/traefik/providers/kubernetes-crd/)
to balance requests across all available nodes.

To send requests through the Traefik proxy, use the Cluster IP of the
traefik service deployed by the Helm chart. In this case, it is 10.111.128.124.

```
$ kubectl get services
NAME                              TYPE           CLUSTER-IP       EXTERNAL-IP   PORT(S)                                                    AGE
...
example-traefik                   LoadBalancer   10.111.128.124   <pending>     8001:31752/TCP,8000:31941/TCP,80:30692/TCP,443:30303/TCP   74m
example-triton-inference-server   ClusterIP      None             <none>        8000/TCP,8001/TCP,8002/TCP                                 74m
```

Use the following command to refer to the Cluster IP:
```
cluster_ip=`kubectl get svc -l app.kubernetes.io/name=traefik -o=jsonpath='{.items[0].spec.clusterIP}'`
```


The Traefik reverse-proxy exposes an HTTP endpoint on port 8000, and GRPC
endpoint on port 8001 and a Prometheus metrics endpoint on
port 8002. You can use curl to get the meta-data of the inference server
from the HTTP endpoint.

```
$ curl $cluster_ip:8000/v2
```

Follow the [QuickStart](../../docs/getting_started/quickstart.md) to get the example
image classification client that can be used to perform inferencing
using image classification models on the inference
server. For example,

```
$ image_client -u $cluster_ip:8000 -m inception_v3_onnx -s INCEPTION -c3 mug.jpg
Request 0, batch size 1
Image 'images/mug.jpg':
    504 (COFFEE MUG) = 0.723992
    968 (CUP) = 0.270953
    967 (ESPRESSO) = 0.00115997
```

## Testing Load Balancing and Autoscaling
After you have confirmed that your Triton cluster is operational and can perform inference,
you can test the load balancing and autoscaling features by sending a heavy load of requests.
One option for doing this is using the
[perf_analyzer](https://github.com/triton-inference-server/perf_analyzer/blob/main/README.md)
application.

You can apply a progressively increasing load with a command like:
```
perf_analyzer -m simple -u $cluster_ip:8000 --concurrency-range 1:10
```

From your Grafana dashboard, you should be able to see the number of pods increase
as the load increases, with requests being routed evenly to the new pods.

## Cleanup

After you have finished using the inference server, you should use Helm to
delete the deployment.

```
$ helm list
NAME            REVISION  UPDATED                   STATUS    CHART                          APP VERSION   NAMESPACE
example         1         Wed Feb 27 22:16:55 2019  DEPLOYED  triton-inference-server-1.0.0  1.0           default
example-metrics	1       	Tue Jan 21 12:24:07 2020	DEPLOYED	prometheus-operator-6.18.0   	 0.32.0     	 default

$ helm uninstall example
$ helm uninstall example-metrics
```

For the Prometheus and Grafana services, you should [explicitly delete
CRDs](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack#uninstall-helm-chart):

```
$ kubectl delete crd alertmanagerconfigs.monitoring.coreos.com alertmanagers.monitoring.coreos.com podmonitors.monitoring.coreos.com probes.monitoring.coreos.com prometheuses.monitoring.coreos.com prometheusrules.monitoring.coreos.com servicemonitors.monitoring.coreos.com thanosrulers.monitoring.coreos.com
```


================================================
FILE: deploy/k8s-onprem/dashboard.json
================================================
{
  "__inputs": [
    {
      "name": "DS_PROMETHEUS",
      "label": "Prometheus",
      "description": "",
      "type": "datasource",
      "pluginId": "prometheus",
      "pluginName": "Prometheus"
    }
  ],
  "__elements": {},
  "__requires": [
    {
      "type": "panel",
      "id": "gauge",
      "name": "Gauge",
      "version": ""
    },
    {
      "type": "grafana",
      "id": "grafana",
      "name": "Grafana",
      "version": "10.0.1"
    },
    {
      "type": "datasource",
      "id": "prometheus",
      "name": "Prometheus",
      "version": "1.0.0"
    },
    {
      "type": "panel",
      "id": "stat",
      "name": "Stat",
      "version": ""
    },
    {
      "type": "panel",
      "id": "timeseries",
      "name": "Time series",
      "version": ""
    }
  ],
  "annotations": {
    "list": [
      {
        "builtIn": 1,
        "datasource": {
          "type": "datasource",
          "uid": "grafana"
        },
        "enable": true,
        "hide": true,
        "iconColor": "rgba(0, 211, 255, 1)",
        "name": "Annotations & Alerts",
        "target": {
          "limit": 100,
          "matchAny": false,
          "tags": [],
          "type": "dashboard"
        },
        "type": "dashboard"
      }
    ]
  },
  "editable": true,
  "fiscalYearStartMonth": 0,
  "graphTooltip": 0,
  "id": null,
  "links": [],
  "liveNow": false,
  "panels": [
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          }
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 8,
        "x": 0,
        "y": 0
      },
      "id": 9,
      "options": {
        "colorMode": "value",
        "graphMode": "area",
        "justifyMode": "auto",
        "orientation": "auto",
        "reduceOptions": {
          "calcs": [
            "lastNotNull"
          ],
          "fields": "",
          "values": false
        },
        "text": {},
        "textMode": "auto"
      },
      "pluginVersion": "10.0.1",
      "targets": [
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "exemplar": true,
          "expr": "count(count(nv_inference_count) by (instance))",
          "interval": "",
          "legendFormat": "",
          "refId": "A"
        }
      ],
      "title": "Active Triton Instances",
      "type": "stat"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 50,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "lineInterpolation": "linear",
            "lineStyle": {
              "fill": "solid"
            },
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "auto",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "percent"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "max": 1,
          "min": 0,
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          }
        },
        "overrides": [
          {
            "__systemRef": "hideSeriesFrom",
            "matcher": {
              "id": "byNames",
              "options": {
                "mode": "exclude",
                "names": [
                  "example-triton-inference-server-6784d84f5d-v9scn"
                ],
                "prefix": "All except:",
                "readOnly": true
              }
            },
            "properties": [
              {
                "id": "custom.hideFrom",
                "value": {
                  "legend": false,
                  "tooltip": false,
                  "viz": true
                }
              }
            ]
          }
        ]
      },
      "gridPos": {
        "h": 8,
        "w": 16,
        "x": 8,
        "y": 0
      },
      "id": 11,
      "interval": "15s",
      "options": {
        "legend": {
          "calcs": [],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "targets": [
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "exemplar": true,
          "expr": "sum by (pod) (rate(nv_inference_count[1m])) / ignoring(pod) group_left sum (rate(nv_inference_count[1m]))",
          "instant": false,
          "interval": "",
          "legendFormat": "{{pod}}",
          "refId": "A"
        }
      ],
      "title": "Proportion of Requests by Pod",
      "type": "timeseries"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "description": "",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": true,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "links": [],
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "short"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 9,
        "w": 12,
        "x": 0,
        "y": 8
      },
      "id": 2,
      "options": {
        "legend": {
          "calcs": [],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "pluginVersion": "8.2.3",
      "targets": [
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "exemplar": true,
          "expr": "sum(nv_inference_request_success) by (pod)",
          "interval": "",
          "legendFormat": "Success {{pod}}",
          "refId": "A"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "exemplar": true,
          "expr": "sum(nv_inference_request_failure) by (pod)",
          "interval": "",
          "legendFormat": "Failure {{pod}}",
          "refId": "B"
        }
      ],
      "title": "Cumulative Inference Requests by Pod",
      "type": "timeseries"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "Compute Time (ms)",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": true,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "links": [],
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "ms"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 17,
        "w": 12,
        "x": 12,
        "y": 8
      },
      "id": 5,
      "options": {
        "legend": {
          "calcs": [],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "pluginVersion": "8.2.3",
      "targets": [
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "exemplar": true,
          "expr": "sum(rate(nv_inference_compute_infer_duration_us[30s])) by (model) / 1000",
          "interval": "",
          "legendFormat": "{{model}}",
          "refId": "A"
        }
      ],
      "title": "Compute Time by Model (milliseconds)",
      "type": "timeseries"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "Queue Time (ms)",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": true,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "links": [],
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "µs"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 17
      },
      "id": 4,
      "options": {
        "legend": {
          "calcs": [],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "pluginVersion": "8.2.3",
      "targets": [
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "exemplar": true,
          "expr": "avg(rate(nv_inference_queue_duration_us[30s])/(1+rate(nv_inference_request_success[30s]))) by (pod)",
          "interval": "",
          "legendFormat": "{{instance}}",
          "refId": "A"
        }
      ],
      "title": "Average Queue Time by Pod (microseconds)",
      "type": "timeseries"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 2,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "links": [],
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "watt"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 18,
        "x": 0,
        "y": 25
      },
      "id": 10,
      "options": {
        "legend": {
          "calcs": [
            "mean",
            "lastNotNull",
            "max"
          ],
          "displayMode": "table",
          "placement": "right",
          "showLegend": true
        },
        "tooltip": {
          "mode": "multi",
          "sort": "none"
        }
      },
      "pluginVersion": "10.0.1",
      "targets": [
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "nv_gpu_power_usage",
          "interval": "",
          "legendFormat": "GPU {{ gpu_uuid }}",
          "range": true,
          "refId": "A"
        }
      ],
      "title": "GPU Power Usage",
      "type": "timeseries"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "max": 2400,
          "min": 0,
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "#EAB839",
                "value": 1800
              },
              {
                "color": "red",
                "value": 2200
              }
            ]
          },
          "unit": "watt"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 6,
        "x": 18,
        "y": 25
      },
      "id": 16,
      "links": [],
      "options": {
        "orientation": "horizontal",
        "reduceOptions": {
          "calcs": [
            "sum"
          ],
          "fields": "",
          "values": false
        },
        "showThresholdLabels": false,
        "showThresholdMarkers": true
      },
      "pluginVersion": "10.0.1",
      "targets": [
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "sum(nv_gpu_power_usage)",
          "interval": "",
          "legendFormat": "",
          "range": true,
          "refId": "A"
        }
      ],
      "title": "GPU Power Total",
      "type": "gauge"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 2,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "links": [],
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "bytes"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 33
      },
      "id": 18,
      "options": {
        "legend": {
          "calcs": [
            "mean",
            "max"
          ],
          "displayMode": "list",
          "placement": "right",
          "showLegend": true
        },
        "tooltip": {
          "mode": "multi",
          "sort": "none"
        }
      },
      "pluginVersion": "10.0.1",
      "targets": [
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "exemplar": false,
          "expr": "nv_gpu_memory_used_bytes",
          "interval": "",
          "legendFormat": "GPU {{gpu_uuid}}",
          "range": true,
          "refId": "A"
        }
      ],
      "title": "GPU Framebuffer Mem Used",
      "type": "timeseries"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 2,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "links": [],
          "mappings": [],
          "max": 100,
          "min": 0,
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "percent"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 12,
        "y": 33
      },
      "id": 6,
      "options": {
        "legend": {
          "calcs": [
            "mean",
            "lastNotNull",
            "max"
          ],
          "displayMode": "table",
          "placement": "right",
          "showLegend": true,
          "sortBy": "Max",
          "sortDesc": true
        },
        "tooltip": {
          "mode": "multi",
          "sort": "none"
        }
      },
      "pluginVersion": "10.0.1",
      "targets": [
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "nv_gpu_utilization * 100",
          "interval": "",
          "legendFormat": "GPU {{gpu_uuid}}",
          "range": true,
          "refId": "A"
        }
      ],
      "title": "GPU Utilization",
      "type": "timeseries"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 2,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "links": [],
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "bytes"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 41
      },
      "id": 19,
      "options": {
        "legend": {
          "calcs": [
            "mean",
            "max"
          ],
          "displayMode": "list",
          "placement": "right",
          "showLegend": true
        },
        "tooltip": {
          "mode": "multi",
          "sort": "none"
        }
      },
      "pluginVersion": "10.0.1",
      "targets": [
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "nv_cpu_memory_used_bytes",
          "hide": false,
          "instant": false,
          "legendFormat": "Memory",
          "range": true,
          "refId": "A"
        }
      ],
      "title": "Memory Used",
      "type": "timeseries"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 2,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "links": [],
          "mappings": [],
          "max": 100,
          "min": 0,
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "percent"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 12,
        "y": 41
      },
      "id": 20,
      "options": {
        "legend": {
          "calcs": [
            "mean",
            "lastNotNull",
            "max"
          ],
          "displayMode": "table",
          "placement": "right",
          "showLegend": true,
          "sortBy": "Max",
          "sortDesc": true
        },
        "tooltip": {
          "mode": "multi",
          "sort": "none"
        }
      },
      "pluginVersion": "10.0.1",
      "targets": [
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "nv_cpu_utilization * 100",
          "interval": "",
          "legendFormat": "CPU",
          "range": true,
          "refId": "A"
        }
      ],
      "title": "CPU Utilization",
      "type": "timeseries"
    }
  ],
  "refresh": "5s",
  "schemaVersion": 38,
  "style": "dark",
  "tags": [],
  "templating": {
    "list": []
  },
  "time": {
    "from": "now-15m",
    "to": "now"
  },
  "timepicker": {
    "refresh_intervals": [
      "5s",
      "10s",
      "30s",
      "1m",
      "5m",
      "15m",
      "30m",
      "1h",
      "2h",
      "1d"
    ]
  },
  "timezone": "",
  "title": "Triton Inference Server",
  "uid": "slEY4dsZk",
  "version": 5,
  "weekStart": ""
}

================================================
FILE: deploy/k8s-onprem/templates/_helpers.tpl
================================================
{{/*
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/}}

# Defines a set of helper functions that produce templated values for other files.
# Mostly for things like names and labels. This file does not produce any
# kubernetes resources by itself

{{/* vim: set filetype=mustache: */}}
{{/*
Create inference server name.
*/}}
{{- define "triton-inference-server.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
{{- end -}}

{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "triton-inference-server.fullname" -}}
{{- if .Values.fullnameOverride -}}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- $name := default .Chart.Name .Values.nameOverride -}}
{{- if contains $name .Release.Name -}}
{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{- end -}}
{{- end -}}

{{/*
  Create inference server metrics service name and fullname derived from above and
  truncated appropriately.
*/}}
{{- define "triton-inference-server-metrics.name" -}}
{{- $basename := include "triton-inference-server.name" . -}}
{{- $basename_trimmed := $basename | trunc 55 | trimSuffix "-" -}}
{{- printf "%s-%s" $basename_trimmed "metrics" -}}
{{- end -}}

{{- define "triton-inference-server-metrics.fullname" -}}
{{- $basename := include "triton-inference-server.fullname" . -}}
{{- $basename_trimmed := $basename | trunc 55 | trimSuffix "-" -}}
{{- printf "%s-%s" $basename_trimmed "metrics" -}}
{{- end -}}

{{/*
  Create inference server metrics monitor name and fullname derived from
  above and truncated appropriately.
*/}}
{{- define "triton-inference-server-metrics-monitor.name" -}}
{{- $basename := include "triton-inference-server.name" . -}}
{{- $basename_trimmed := $basename | trunc 47 | trimSuffix "-" -}}
{{- printf "%s-%s" $basename_trimmed "metrics-monitor" -}}
{{- end -}}

{{- define "triton-inference-server-metrics-monitor.fullname" -}}
{{- $basename := include "triton-inference-server.fullname" . -}}
{{- $basename_trimmed := $basename | trunc 47 | trimSuffix "-" -}}
{{- printf "%s-%s" $basename_trimmed "metrics-monitor" -}}
{{- end -}}

{{/*
  Create ingressroute names derived from above and truncated appropriately
*/}}
{{- define "triton-inference-server-ingressroute-http.name" -}}
{{- $basename := include "triton-inference-server.name" . -}}
{{- $basename_trimmed := $basename | trunc 50 | trimSuffix "-" -}}
{{- printf "%s-%s" $basename_trimmed "ingress-http" -}}
{{- end -}}

{{- define "triton-inference-server-ingressroute-grpc.name" -}}
{{- $basename := include "triton-inference-server.name" . -}}
{{- $basename_trimmed := $basename | trunc 50 | trimSuffix "-" -}}
{{- printf "%s-%s" $basename_trimmed "ingress-grpc" -}}
{{- end -}}

{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "triton-inference-server.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
{{- end -}}


================================================
FILE: deploy/k8s-onprem/templates/deployment.yaml
================================================
# Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# Creates a deployment for the Triton Inference Server pods
# Each pod contains a Triton container and an nfs mount as specified in
# values.yaml for the model repository

apiVersion: apps/v1
kind: Deployment
metadata:
  name: {{ template "triton-inference-server.fullname" . }}
  namespace: {{ .Release.Namespace }}
  labels:
    app: {{ template "triton-inference-server.name" . }}
    chart: {{ template "triton-inference-server.chart" . }}
    release: {{ .Release.Name }}
    heritage: {{ .Release.Service }}
spec:
  replicas: {{ .Values.autoscaling.minReplicas }}
  selector:
    matchLabels:
      app: {{ template "triton-inference-server.name" . }}
      release: {{ .Release.Name }}
  template:
    metadata:
      labels:
        app: {{ template "triton-inference-server.name" . }}
        release: {{ .Release.Name }}

    spec:
      serviceAccountName: {{ template "triton-inference-server.fullname" . }}
      volumes:
        - name: models
          nfs:
            server: {{ .Values.image.modelRepositoryServer }}
            path: {{ .Values.image.modelRepositoryPath }}
            readOnly: false
      containers:
        - name: {{ .Chart.Name }}
          image: "{{ .Values.image.imageName }}"
          imagePullPolicy: {{ .Values.image.pullPolicy }}
          volumeMounts:
            - mountPath: /models
              name: models

          resources:
            limits:
              nvidia.com/gpu: {{ .Values.image.numGpus }}

          args:
            - tritonserver
            {{- range .Values.serverArgs }}
            - {{ . }}
            {{- end }}

          ports:
            - containerPort: 8000
              name: http
            - containerPort: 8001
              name: grpc
            - containerPort: 8002
              name: metrics
          livenessProbe:
            initialDelaySeconds: 15
            failureThreshold: 3
            periodSeconds: 10
            httpGet:
              path: /v2/health/live
              port: http
          readinessProbe:
            initialDelaySeconds: 5
            periodSeconds: 5
            failureThreshold: 3
            httpGet:
              path: /v2/health/ready
              port: http
          startupProbe:
            # allows Triton to load the models during 30*10 = 300 sec = 5 min
            # starts checking the other probes only after the success of this one
            # for details, see https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-startup-probes
            periodSeconds: 10
            failureThreshold: 30
            httpGet:
              path: /v2/health/ready
              port: http

      securityContext:
        runAsUser: 1000
        fsGroup: 1000


================================================
FILE: deploy/k8s-onprem/templates/hpa.yaml
================================================
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# Creates the horizontal pod autoscaler for the Triton pod deployment.
# In order to use custom metrics (ie metrics other than CPU usage) with this
# autoscaler, you must have enabled installation of the prometheus adapter.
# This autoscaler (and the prometheus adapter) will only be installed in the
# autoscaling tag is set to true.

{{- if .Values.tags.autoscaling }}
apiVersion: autoscaling/v2beta2
kind: HorizontalPodAutoscaler
metadata:
  name: triton-hpa
  namespace: {{ .Release.Namespace }}
  labels:
    app: {{ template "triton-inference-server.name" . }}
    chart: {{ template "triton-inference-server.chart" . }}
    release: {{ .Release.Name }}
    heritage: {{ .Release.Service }}
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: {{ template "triton-inference-server.fullname" . }}
  minReplicas: {{ .Values.autoscaling.minReplicas }}
  maxReplicas: {{ .Values.autoscaling.maxReplicas }}
  metrics: {{ toYaml .Values.autoscaling.metrics | nindent 2}}
{{- end -}}


================================================
FILE: deploy/k8s-onprem/templates/ingressroute.yaml
================================================
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# Creates the traefik IngressRoutes that allow for external access to the
# triton service. Two routes are created, one for gRPC and one for HTTP.
# Requires deployment of the traefik IngressRoute CRD, along with various roles
# and permissions, most easily accomplished through the referenced traefik
# helm chart. Will only be installed if the loadBalancing tag is set to true.

{{- if .Values.tags.loadBalancing }}
apiVersion: traefik.containo.us/v1alpha1
kind: IngressRoute
metadata:
  name: {{ template "triton-inference-server-ingressroute-http.name" . }}
  namespace: {{ .Release.Namespace }}
  labels:
    app: {{ template "triton-inference-server.name" . }}
    chart: {{ template "triton-inference-server.chart" . }}
    release: {{ .Release.Name }}
    heritage: {{ .Release.Service }}
spec:
  entryPoints:
    - triton-http
  routes:
    - match: PathPrefix(`/`)
      kind: Rule
      services:
        - name: {{ template "triton-inference-server.fullname" . }}
          port: 8000
---
apiVersion: traefik.containo.us/v1alpha1
kind: IngressRoute
metadata:
  name: {{ template "triton-inference-server-ingressroute-grpc.name" . }}
  namespace: {{ .Release.Namespace }}
spec:
  entryPoints:
    - triton-grpc
  routes:
    - match: PathPrefix(`/`)
      kind: Rule
      services:
        - name: {{ template "triton-inference-server.fullname" . }}
          port: 8001
          scheme: h2c
{{- end -}}


================================================
FILE: deploy/k8s-onprem/templates/rbac.yaml
================================================
# Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# Configures RBAC if required for the platform to support running with
# NFS volumes and pinned non-root UIDs as required

{{- if .Values.tags.openshift }}
apiVersion: security.openshift.io/v1
kind: SecurityContextConstraints
metadata:
  name: {{ template "triton-inference-server.fullname" . }}
  annotations:
    kubernetes.io/description: triton has the same settings as restricted-v2,
      except it also allows non-root UIDs and NFS mounts.
  labels:
    app: {{ template "triton-inference-server.name" . }}
    chart: {{ template "triton-inference-server.chart" . }}
    release: {{ .Release.Name }}
    heritage: {{ .Release.Service }}
allowHostDirVolumePlugin: false
allowHostIPC: false
allowHostNetwork: false
allowHostPID: false
allowHostPorts: false
allowPrivilegeEscalation: false
allowPrivilegedContainer: false
allowedCapabilities:
- NET_BIND_SERVICE
defaultAddCapabilities: null
fsGroup:
  type: RunAsAny
groups: []
priority: null
readOnlyRootFilesystem: false
requiredDropCapabilities:
- ALL
runAsUser:
  type: MustRunAsNonRoot
seLinuxContext:
  type: MustRunAs
seccompProfiles:
- runtime/default
supplementalGroups:
  type: RunAsAny
users: []
volumes:
- configMap
- csi
- downwardAPI
- emptyDir
- ephemeral
- nfs
- persistentVolumeClaim
- projected
- secret
---
kind: Role
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: {{ include "triton-inference-server.fullname" . }}-scc
  namespace: {{ .Release.Namespace }}
  labels:
    app: {{ template "triton-inference-server.name" . }}
    chart: {{ template "triton-inference-server.chart" . }}
    release: {{ .Release.Name }}
    heritage: {{ .Release.Service }}
rules:
- apiGroups: ["security.openshift.io"]
  resources: ["securitycontextconstraints"]
  resourceNames: [{{ include "triton-inference-server.fullname" . | quote }}]
  verbs: ["use"]
---
kind: RoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: {{ include "triton-inference-server.fullname" . }}-scc
  namespace: {{ .Release.Namespace }}
  labels:
    app: {{ template "triton-inference-server.name" . }}
    chart: {{ template "triton-inference-server.chart" . }}
    release: {{ .Release.Name }}
    heritage: {{ .Release.Service }}
subjects:
- kind: ServiceAccount
  name: {{ template "triton-inference-server.fullname" . }}
  namespace: {{ .Release.Namespace }}
roleRef:
  kind: Role
  name: {{ include "triton-inference-server.fullname" . }}-scc
  apiGroup: rbac.authorization.k8s.io
{{- end -}}


================================================
FILE: deploy/k8s-onprem/templates/service.yaml
================================================
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# Defines the services for triton and the triton metrics service.
# Also creates a ServiceMonitor for the triton metrics service.

apiVersion: v1
kind: Service
metadata:
  name: {{ template "triton-inference-server.fullname" . }}
  namespace: {{ .Release.Namespace }}
  labels:
    app: {{ template "triton-inference-server.name" . }}
    chart: {{ template "triton-inference-server.chart" . }}
    release: {{ .Release.Name }}
    heritage: {{ .Release.Service }}
spec:
  clusterIP: None
  ports:
    - port: 8000
      targetPort: http
      name: http-inference-server
    - port: 8001
      targetPort: grpc
      name: grpc-inference-server
    - port: 8002
      targetPort: metrics
      name: metrics-inference-server
  selector:
    app: {{ template "triton-inference-server.name" . }}
    release: {{ .Release.Name }}
---
apiVersion: v1
kind: Service
metadata:
  name: {{ template "triton-inference-server-metrics.fullname" . }}
  namespace: {{ .Release.Namespace }}
  labels:
    app: {{ template "triton-inference-server-metrics.name" . }}
    chart: {{ template "triton-inference-server.chart" . }}
    release: {{ .Release.Name }}
    heritage: {{ .Release.Service }}
  annotations:
    alpha.monitoring.coreos.com/non-namespaced: "true"
spec:
  ports:
  - name: metrics
    port: 8080
    targetPort: metrics
    protocol: TCP
  selector:
    app: {{ template "triton-inference-server.name" . }}
    release: {{ .Release.Name }}
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
  name: {{ template "triton-inference-server-metrics-monitor.fullname" . }}
  namespace: {{ .Release.Namespace }}
  labels:
    app: {{ template "triton-inference-server-metrics-monitor.name" . }}
    chart: {{ template "triton-inference-server.chart" . }}
    release: {{ .Release.Name }}
    heritage: {{ .Release.Service }}
spec:
  selector:
    matchLabels:
      app: {{ template "triton-inference-server-metrics.name" . }}
  endpoints:
  - port: metrics
    interval: 15s


================================================
FILE: deploy/k8s-onprem/templates/serviceaccount.yaml
================================================
# Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# Configures a ServiceAccount for the Triton deployment to enable RBAC

apiVersion: v1
kind: ServiceAccount
metadata:
  name: {{ template "triton-inference-server.fullname" . }}
  namespace: {{ .Release.Namespace }}
  labels:
    app: {{ template "triton-inference-server.name" . }}
    chart: {{ template "triton-inference-server.chart" . }}
    release: {{ .Release.Name }}
    heritage: {{ .Release.Service }}


================================================
FILE: deploy/k8s-onprem/values.yaml
================================================
# Copyright (c) 2019-2026, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

tags:
  autoscaling: true
  loadBalancing: true
  openshift: false

image:
  imageName: nvcr.io/nvidia/tritonserver:26.02-py3
  pullPolicy: IfNotPresent
  modelRepositoryServer: < Replace with the IP Address of your file server >
  modelRepositoryPath: /srv/models
  numGpus: 1

# add server args here e.g. --grpc-use-ssl, --grpc-server-certs, repository-poll-secs, etc
serverArgs:
  - '--model-repository=/models'

traefik:
  ports:
    triton-http:
      port: 18000
      exposedPort: 8000
      expose: true
      protocol: TCP
    triton-grpc:
      port: 18001
      exposedPort: 8001
      expose: true
      protocol: TCP

autoscaling:
  minReplicas: 1
  maxReplicas: 3
  metrics:
    - type: Pods
      pods:
        metric:
          name: avg_time_queue_us
        target:
          type: AverageValue
          averageValue: 50

prometheus-adapter:
  prometheus:
    url: http://example-metrics-kube-prome-prometheus.default.svc.cluster.local
    port: 9090
  rules:
    custom:
      - seriesQuery: 'nv_inference_queue_duration_us{namespace="default",pod!=""}'
        resources:
          overrides:
            namespace:
              resource: "namespace"
            pod:
              resource: "pod"
        name:
          matches: "nv_inference_queue_duration_us"
          as: "avg_time_queue_us"
        metricsQuery: 'avg(delta(nv_inference_queue_duration_us{<<.LabelMatchers>>}[30s])/(1+delta(nv_inference_request_success{<<.LabelMatchers>>}[30s]))) by (<<.GroupBy>>)'


================================================
FILE: deploy/mlflow-triton-plugin/README.md
================================================
<!--
# Copyright 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->
# MLflow Triton

MLflow plugin for deploying your models from MLflow to Triton Inference Server.
Scripts are included for publishing models, which are in Triton recognized
structure, to your MLflow Model Registry.

### Supported flavors

MLFlow Triton plugin currently supports the following flavors, you may
substitute the flavor specification in the example below according to the model
to be deployed.

* onnx
* triton

## Requirements

* MLflow
* Triton Python HTTP client
* Triton Inference Server

## Installation

The plugin can be installed from source using the following commands

```bash
python setup.py install
```

## Quick Start

In this documentation, we will use the files in `examples` to showcase how
the plugin interacts with Triton Inference Server. The `onnx_float32_int32_int32`
model in `examples` is a simple model that takes two float32 inputs, INPUT0 and
INPUT1, with shape [-1, 16], and produces two int32 outputs, OUTPUT0 and
OUTPUT1, where OUTPUT0 is the element-wise summation of INPUT0 and INPUT1 and
OUTPUT1 is the element-wise subtraction of INPUT0 and INPUT1.

### Start Triton Inference Server in EXPLICIT mode

The MLflow Triton plugin must work with a running Triton server, see
[documentation](https://github.com/triton-inference-server/server/blob/main/docs/getting_started/quickstart.md)
of Triton Inference Server for how to start the server. Note that
the server should be run in EXPLICIT mode (`--model-control-mode=explicit`)
to exploit the deployment feature of the plugin.

Once the server has started, the following environment must be set so that the plugin
can interact with the server properly:
* `TRITON_URL`: The address to the Triton HTTP endpoint
* `TRITON_MODEL_REPO`: The path to the Triton model repository. It can be an s3 URI but keep in \
mind that the env vars AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY are needed.

### Publish models to MLflow

#### ONNX flavor

The MLFlow ONNX built-in functionalities can be used to publish `onnx` flavor
models to MLFlow directly, and the MLFlow Triton plugin will prepare the model
to the format expected by Triton. You may also log
[`config.pbtxt`](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_model_configuration.md)
as additional artifact which Triton will be used to serve the model. Otherwise,
the server should be run with auto-complete feature enabled
(`--strict-model-config=false`) to generate the model configuration.

```bash
import mlflow.onnx
import onnx
model = onnx.load("examples/onnx_float32_int32_int32/1/model.onnx")
mlflow.onnx.log_model(model, "triton", registered_model_name="onnx_float32_int32_int32")
```

#### Triton flavor

For other model frameworks that Triton supports but not yet recognized by
the MLFlow Triton plugin, the `publish_model_to_mlflow.py` script can be used to
publish `triton` flavor models to MLflow. A `triton` flavor model is a directory
containing the model files following the
[model layout](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/model_repository.md#repository-layout).
Below is an example usage:

```bash
cd /scripts

python publish_model_to_mlflow.py --model_name onnx_float32_int32_int32 --model_directory <path-to-the-examples-directory>/onnx_float32_int32_int32 --flavor triton
```

### Deploy models tracked in MLflow to Triton

Once a model is published and tracked in MLflow, it can be deployed to Triton
via MLflow's deployments command, the following command will download the model
to Triton's model repository and request Triton to load the model.

```bash
mlflow deployments create -t triton --flavor triton --name onnx_float32_int32_int32 -m models:/onnx_float32_int32_int32/1
```

### Perform inference

After the model is deployed, the following command is the CLI usage to send
inference request to a deployment.

```bash
mlflow deployments predict -t triton --name onnx_float32_int32_int32 --input-path <path-to-the-examples-directory>/input.json --output-path output.json
```

The inference result will be written in `output.json` and you may compare it
with the results in `expected_output.json`

## MLflow Deployments

"MLflow Deployments" is a set of MLflow APIs for deploying MLflow models to
custom serving tools. The MLflow Triton plugin implements the following
deployment functions to support the interaction with Triton server in MLflow.

### Create Deployment

MLflow deployments create API deploys a model to the Triton target, which will
download the model to Triton's model repository and request Triton to load the
model.

To create a MLflow deployment using CLI

```bash
mlflow deployments create -t triton --flavor triton --name model_name -m models:/model_name/1
```

To create a MLflow deployment using Python API

```bash
from mlflow.deployments import get_deploy_client
client = get_deploy_client('triton')
client.create_deployment("model_name", "models:/model_name/1", flavor="triton")
```

### Delete Deployment

MLflow deployments delete API removes an existing deployment from the Triton
target, which will remove the model in Triton's model repository and request
Triton to unload the model.

To delete a MLflow deployment using CLI

```bash
mlflow deployments delete -t triton --name model_name
```

To delete a MLflow deployment using Python API

```bash
from mlflow.deployments import get_deploy_client
client = get_deploy_client('triton')
client.delete_deployment("model_name")
```

### Update Deployment

MLflow deployments update API updates an existing deployment with another model
(version) tracked in MLflow, which will overwrite the model in Triton's model
repository and request Triton to reload the model.

To update a MLflow deployment using CLI

```bash
mlflow deployments update -t triton --flavor triton --name model_name -m models:/model_name/2
```

To update a MLflow deployment using Python API

```bash
from mlflow.deployments import get_deploy_client
client = get_deploy_client('triton')
client.update_deployment("model_name", "models:/model_name/2", flavor="triton")
```

### List Deployments

MLflow deployments list API lists all existing deployments in Triton target.

To list all MLflow deployments using CLI

```bash
mlflow deployments list -t triton
```

To list all MLflow deployments using Python API

```bash
from mlflow.deployments import get_deploy_client
client = get_deploy_client('triton')
client.list_deployments()
```

### Get Deployment

MLflow deployments get API returns information regarding a specific deployments
in Triton target.

To list a specific MLflow deployment using CLI
```bash
mlflow deployments get -t triton --name model_name
```

To list a specific MLflow deployment using Python API
```bash
from mlflow.deployments import get_deploy_client
client = get_deploy_client('triton')
client.get_deployment("model_name")
```

### Run Inference on Deployments

MLflow deployments predict API runs inference by preparing and sending the
request to Triton and returns the Triton response.

To run inference using CLI

```bash
mlflow deployments predict -t triton --name model_name --input-path input_file --output-path output_file

```

To run inference using Python API

```bash
from mlflow.deployments import get_deploy_client
client = get_deploy_client('triton')
client.predict("model_name", inputs)
```


================================================
FILE: deploy/mlflow-triton-plugin/examples/expected_output.json
================================================
{"outputs":
  {
    "OUTPUT0": [[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]],
    "OUTPUT1": [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
  }
}

================================================
FILE: deploy/mlflow-triton-plugin/examples/input.json
================================================
{"inputs":
  {
    "INPUT0": [[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0]],
    "INPUT1": [[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0]]
  }
}

================================================
FILE: deploy/mlflow-triton-plugin/examples/onnx_float32_int32_int32/config.pbtxt
================================================

# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
platform: "onnxruntime_onnx"
max_batch_size: 8
version_policy: { latest { num_versions: 1 }}
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
input [
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]

================================================
FILE: deploy/mlflow-triton-plugin/mlflow_triton/__init__.py
================================================
#!/usr/bin/env python3

# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: deploy/mlflow-triton-plugin/mlflow_triton/config.py
================================================
#!/usr/bin/env python3

# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import os
import re
from collections import namedtuple

from mlflow.exceptions import MlflowException


class Config(dict):
    def __init__(self):
        super().__init__()
        self["triton_url"] = os.environ.get("TRITON_URL")
        self["triton_model_repo"] = os.environ.get("TRITON_MODEL_REPO")

        if self["triton_model_repo"].startswith("s3://"):
            self.s3_regex = re.compile(
                "s3://(http://|https://|)([0-9a-zA-Z\\-.]+):([0-9]+)/"
                "([0-9a-z.\\-]+)(((/[0-9a-zA-Z.\\-_]+)*)?)"
            )

            uri = self.parse_path(self["triton_model_repo"])
            if uri.protocol == "https://":
                protocol = "https://"
            else:
                protocol = "http://"
            endpoint_url = None
            if uri.host_name != "" and uri.host_port != "":
                endpoint_url = "{}{}:{}".format(protocol, uri.host_name, uri.host_port)

            import boto3

            # boto3 handles AWS credentials
            self["s3"] = boto3.client("s3", endpoint_url=endpoint_url)
            self["s3_bucket"] = uri.bucket
            self["s3_prefix"] = uri.prefix
            self["triton_model_repo"] = "s3://{}".format(
                os.path.join(uri.bucket, uri.prefix)
            )

    def parse_path(self, path):
        # Cleanup extra slashes
        clean_path = self.clean_path(path)

        # Get the bucket name and the object path. Return error if path is malformed
        match = self.s3_regex.fullmatch(clean_path)
        S3URI = namedtuple(
            "S3URI", ["protocol", "host_name", "host_port", "bucket", "prefix"]
        )
        if match:
            uri = S3URI(*match.group(1, 2, 3, 4, 5))
            if uri.prefix and uri.prefix[0] == "/":
                uri = uri._replace(prefix=uri.prefix[1:])
        else:
            bucket_start = clean_path.find("s3://") + len("s3://")
            bucket_end = clean_path.find("/", bucket_start)

            # If there isn't a slash, the address has only the bucket
            if bucket_end > bucket_start:
                bucket = clean_path[bucket_start:bucket_end]
                prefix = clean_path[bucket_end + 1 :]
            else:
                bucket = clean_path[bucket_start:]
                prefix = ""
            uri = S3URI("", "", "", bucket, prefix)

        if uri.bucket == "":
            raise MlflowException("No bucket name found in path: " + path)

        return uri

    def clean_path(self, s3_path):
        # Must handle paths with s3 prefix
        start = s3_path.find("s3://")
        path = ""
        if start != -1:
            path = s3_path[start + len("s3://") :]
            clean_path = "s3://"
        else:
            path = s3_path
            clean_path = ""

        # Must handle paths with https:// or http:// prefix
        https_start = path.find("https://")
        if https_start != -1:
            path = path[https_start + len("https://") :]
            clean_path += "https://"
        else:
            http_start = path.find("http://")
            if http_start != -1:
                path = path[http_start + len("http://") :]
                clean_path += "http://"

        # Remove trailing slashes
        rtrim_length = len(path.rstrip("/"))
        if rtrim_length == 0:
            raise MlflowException("Invalid bucket name: '" + path + "'")

        # Remove leading slashes
        ltrim_length = len(path) - len(path.lstrip("/"))
        if ltrim_length == len(path):
            raise MlflowException("Invalid bucket name: '" + path + "'")

        # Remove extra internal slashes
        true_path = path[ltrim_length : rtrim_length + 1]
        previous_slash = False
        for i in range(len(true_path)):
            if true_path[i] == "/":
                if not previous_slash:
                    clean_path += true_path[i]
                previous_slash = True
            else:
                clean_path += true_path[i]
                previous_slash = False

        return clean_path


================================================
FILE: deploy/mlflow-triton-plugin/mlflow_triton/deployments.py
================================================
#!/usr/bin/env python3

# Copyright 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import ast
import glob
import json
import logging
import os
import shutil
from pathlib import Path

import numpy as np
import pandas as pd
import tritonclient.http as tritonhttpclient
from mlflow.deployments import BaseDeploymentClient
from mlflow.exceptions import MlflowException
from mlflow.models import Model
from mlflow.tracking.artifact_utils import _download_artifact_from_uri
from mlflow_triton.config import Config
from tritonclient.utils import (
    InferenceServerException,
    np_to_triton_dtype,
    triton_to_np_dtype,
)

logger = logging.getLogger(__name__)

_MLFLOW_META_FILENAME = "mlflow-meta.json"


class TritonPlugin(BaseDeploymentClient):
    def __init__(self, uri):
        """
        Initializes the deployment plugin, sets the triton model repo
        """
        super(TritonPlugin, self).__init__(target_uri=uri)
        self.server_config = Config()
        triton_url, self.triton_model_repo = self._get_triton_server_config()
        # need to add other flavors
        self.supported_flavors = ["triton", "onnx"]
        # URL cleaning for constructing Triton client
        ssl = False
        if triton_url.startswith("http://"):
            triton_url = triton_url[len("http://") :]
        elif triton_url.startswith("https://"):
            triton_url = triton_url[len("https://") :]
            ssl = True
        self.triton_client = tritonhttpclient.InferenceServerClient(
            url=triton_url, ssl=ssl
        )

    def _get_triton_server_config(self):
        triton_url = "localhost:8000"
        if self.server_config["triton_url"]:
            triton_url = self.server_config["triton_url"]
        logger.info("Triton url = {}".format(triton_url))

        if not self.server_config["triton_model_repo"]:
            raise Exception("Check that environment variable TRITON_MODEL_REPO is set")
        triton_model_repo = self.server_config["triton_model_repo"]
        logger.info("Triton model repo = {}".format(triton_model_repo))

        return triton_url, triton_model_repo

    def create_deployment(self, name, model_uri, flavor=None, config=None):
        """
        Deploy the model at the model_uri to the Triton model repo. Associated config.pbtxt and *labels* files will be deployed.

        :param name: Name of the model
        :param model_uri: Model uri in format model:/<model-name>/<version-or-stage>
        :param flavor: Flavor of the deployed model
        :param config: Configuration parameters

        :return: Model flavor and name
        """
        self._validate_flavor(flavor)

        # Validate model name
        self._validate_model_name(name)

        # Verify model does not already exist in Triton
        if self._model_exists(name):
            raise Exception(
                "Unable to create deployment for name %s because it already exists."
                % (name)
            )

        # Get the path of the artifact
        path = Path(_download_artifact_from_uri(model_uri))
        self._copy_files_to_triton_repo(path, name, flavor)
        self._generate_mlflow_meta_file(name, flavor, model_uri)

        try:
            self.triton_client.load_model(name)
        except InferenceServerException as ex:
            raise MlflowException(str(ex))

        return {"name": name, "flavor": flavor}

    def delete_deployment(self, name):
        """
        Delete the deployed model in Triton with the provided model name

        :param name: Name of the of the model with version number. For ex: "densenet_onnx/2"

        :return: None
        """
        # Verify model is already deployed to Triton
        if not self._model_exists(name):
            raise Exception(
                "Unable to delete deployment for name %s because it does not exist."
                % (name)
            )

        try:
            self.triton_client.unload_model(name)
        except InferenceServerException as ex:
            raise MlflowException(str(ex))

        self._delete_deployment_files(name)

        return None

    def update_deployment(self, name, model_uri=None, flavor=None, config=None):
        """
        Update the model deployment in triton with the provided name

        :param name: Name and version number of the model, <model_name>/<version>.
        :param model_uri: Model uri models:/model_name/version
        :param flavor: The flavor of the model
        :param config: Configuration parameters

        :return: Returns the flavor of the model
        """
        # TODO: Update this function with a warning. If config and label files associated with this
        # updated model are different than the ones already deployed to triton, issue a warning to the user.
        self._validate_flavor(flavor)

        # Verify model is already deployed to Triton
        if not self._model_exists(name):
            raise Exception(
                "Unable to update deployment for name %s because it does not exist."
                % (name)
            )

        self.get_deployment(name)

        # Get the path of the artifact
        path = Path(_download_artifact_from_uri(model_uri))

        self._copy_files_to_triton_repo(path, name, flavor)

        self._generate_mlflow_meta_file(name, flavor, model_uri)

        try:
            self.triton_client.load_model(name)
        except InferenceServerException as ex:
            raise MlflowException(str(ex))

        return {"flavor": flavor}

    def list_deployments(self):
        """
        List models deployed to Triton.

        :return: None
        """
        resp = self.triton_client.get_model_repository_index()
        actives = []
        for d in resp:
            if "state" in d and d["state"] == "READY":
                mlflow_meta_path = os.path.join(
                    self.triton_model_repo, d["name"], _MLFLOW_META_FILENAME
                )
                if "s3" in self.server_config:
                    meta_dict = ast.literal_eval(
                        self.server_config["s3"]
                        .get_object(
                            Bucket=self.server_config["s3_bucket"],
                            Key=os.path.join(
                                self.server_config["s3_prefix"],
                                d["name"],
                                _MLFLOW_META_FILENAME,
                            ),
                        )["Body"]
                        .read()
                        .decode("utf-8")
                    )
                elif os.path.isfile(mlflow_meta_path):
                    meta_dict = self._get_mlflow_meta_dict(d["name"])
                else:
                    continue

                d["triton_model_path"] = meta_dict["triton_model_path"]
                d["mlflow_model_uri"] = meta_dict["mlflow_model_uri"]
                d["flavor"] = meta_dict["flavor"]
                actives.append(d)

        return actives

    def get_deployment(self, name):
        """
        Get deployment from Triton.

        :param name: Name of the model. \n
                     Ex: "mini_bert_onnx" - gets the details of active version of this model \n

        :return: output - Returns a dict with model info
        """
        deployments = self.list_deployments()
        for d in deployments:
            if d["name"] == name:
                return d
        raise ValueError(f"Unable to get deployment with name {name}")

    def predict(self, deployment_name, df):
        single_input_np = None
        if isinstance(df, np.ndarray):
            single_input_np = df

        inputs = []
        if single_input_np is not None:
            raise MlflowException("Unnamed input is not currently supported")
        else:
            if isinstance(df, pd.DataFrame):
                model_metadata = self.triton_client.get_model_metadata(deployment_name)
                input_dtype = {}
                for input in model_metadata["inputs"]:
                    input_dtype[input["name"]] = triton_to_np_dtype(input["datatype"])
                # Sanity check
                if len(df.columns) != 1:
                    raise MlflowException("Expect Pandas DataFrame has only 1 column")
                col = df.columns[0]
                for row in df.index:
                    val = df[col][row]
                    # Need to form numpy array of the data type expected
                    if type(df[col][row]) != np.ndarray:
                        val = np.array(val, dtype=input_dtype[row])
                    inputs.append(
                        tritonhttpclient.InferInput(
                            row, val.shape, np_to_triton_dtype(val.dtype)
                        )
                    )
                    inputs[-1].set_data_from_numpy(val)
            else:
                for key, val in df.items():
                    inputs.append(
                        tritonhttpclient.InferInput(
                            key, val.shape, np_to_triton_dtype(val.dtype)
                        )
                    )
                    inputs[-1].set_data_from_numpy(val)

        try:
            resp = self.triton_client.infer(model_name=deployment_name, inputs=inputs)
            res = {}
            for output in resp.get_response()["outputs"]:
                res[output["name"]] = resp.as_numpy(output["name"])
            return pd.DataFrame.from_dict({"outputs": res})
        except InferenceServerException as ex:
            raise MlflowException(str(ex))

    def _generate_mlflow_meta_file(self, name, flavor, model_uri):
        triton_deployment_dir = os.path.join(self.triton_model_repo, name)
        meta_dict = {
            "name": name,
            "triton_model_path": triton_deployment_dir,
            "mlflow_model_uri": model_uri,
            "flavor": flavor,
        }

        if "s3" in self.server_config:
            self.server_config["s3"].put_object(
                Body=json.dumps(meta_dict, indent=4).encode("utf-8"),
                Bucket=self.server_config["s3_bucket"],
                Key=os.path.join(
                    self.server_config["s3_prefix"], name, _MLFLOW_META_FILENAME
                ),
            )
        else:
            with open(
                os.path.join(triton_deployment_dir, _MLFLOW_META_FILENAME), "w"
            ) as outfile:
                json.dump(meta_dict, outfile, indent=4)

        print("Saved", _MLFLOW_META_FILENAME, "to", triton_deployment_dir)

    def _get_mlflow_meta_dict(self, name):
        mlflow_meta_path = os.path.join(
            self.triton_model_repo, name, _MLFLOW_META_FILENAME
        )

        if "s3" in self.server_config:
            mlflow_meta_dict = ast.literal_eval(
                self.server_config["s3"]
                .get_object(
                    Bucket=self.server_config["s3_bucket"],
                    Key=os.path.join(
                        self.server_config["s3_prefix"], name, _MLFLOW_META_FILENAME
                    ),
                )["Body"]
                .read()
                .decode("utf-8")
            )
        else:
            with open(mlflow_meta_path, "r") as metafile:
                mlflow_meta_dict = json.load(metafile)

        return mlflow_meta_dict

    def _get_copy_paths(self, artifact_path, name, flavor):
        copy_paths = {}
        copy_paths["model_path"] = {}
        triton_deployment_dir = os.path.join(self.triton_model_repo, name)
        if flavor == "triton":
            # When flavor is 'triton', the model is assumed to be preconfigured
            # with proper model versions and version strategy, which may differ from
            # the versioning in MLFlow
            for file in artifact_path.iterdir():
                if file.is_dir():
                    copy_paths["model_path"]["from"] = file
                    break
            copy_paths["model_path"]["to"] = triton_deployment_dir
        elif flavor == "onnx":
            # Look for model file via MLModel metadata or iterating dir
            model_file = None
            config_file = None
            for file in artifact_path.iterdir():
                if file.name == "MLmodel":
                    mlmodel = Model.load(file)
                    onnx_meta_data = mlmodel.flavors.get("onnx", None)
                    if onnx_meta_data is not None:
                        model_file = onnx_meta_data.get("data", None)
                elif file.name == "config.pbtxt":
                    config_file = file.name
                    copy_paths["config_path"] = {}
                elif file.suffix == ".txt" and file.stem != "requirements":
                    copy_paths[file.stem] = {"from": file, "to": triton_deployment_dir}
            if model_file is None:
                for file in artifact_path.iterdir():
                    if file.suffix == ".onnx":
                        model_file = file.name
                        break
            copy_paths["model_path"]["from"] = os.path.join(artifact_path, model_file)
            copy_paths["model_path"]["to"] = os.path.join(triton_deployment_dir, "1")

            if config_file is not None:
                copy_paths["config_path"]["from"] = os.path.join(
                    artifact_path, config_file
                )
                copy_paths["config_path"]["to"] = triton_deployment_dir
            else:
                # Make sure the directory has been created for config.pbtxt
                os.makedirs(triton_deployment_dir, exist_ok=True)
                # Provide a minimum config file so Triton knows what backend
                # should be performing the auto-completion
                config = """
backend: "onnxruntime"
default_model_filename: "{}"
""".format(
                    model_file
                )
                with open(
                    os.path.join(triton_deployment_dir, "config.pbtxt"), "w"
                ) as cfile:
                    cfile.write(config)
        return copy_paths

    def _walk(self, path):
        """Walk a path like os.walk() if path is dir,
        return file in the expected format otherwise.
        :param path: dir or file path

        :return: root, dirs, files
        """
        if os.path.isfile(path):
            return [(os.path.dirname(path), [], [os.path.basename(path)])]
        elif os.path.isdir(path):
            return list(os.walk(path))
        else:
            raise Exception(f"path: {path} is not a valid path to a file or dir.")

    def _copy_files_to_triton_repo(self, artifact_path, name, flavor):
        copy_paths = self._get_copy_paths(artifact_path, name, flavor)
        for key in copy_paths:
            if "s3" in self.server_config:
                # copy model dir to s3 recursively
                for root, dirs, files in self._walk(copy_paths[key]["from"]):
                    for filename in files:
                        local_path = os.path.join(root, filename)

                        if flavor == "onnx":
                            s3_path = os.path.join(
                                self.server_config["s3_prefix"],
                                copy_paths[key]["to"]
                                .replace(self.server_config["triton_model_repo"], "")
                                .strip("/"),
                                filename,
                            )

                        elif flavor == "triton":
                            rel_path = os.path.relpath(
                                local_path,
                                copy_paths[key]["from"],
                            )
                            s3_path = os.path.join(
                                self.server_config["s3_prefix"], name, rel_path
                            )

                        self.server_config["s3"].upload_file(
                            local_path,
                            self.server_config["s3_bucket"],
                            s3_path,
                        )
            else:
                if os.path.isdir(copy_paths[key]["from"]):
                    if os.path.isdir(copy_paths[key]["to"]):
                        shutil.rmtree(copy_paths[key]["to"])
                    shutil.copytree(copy_paths[key]["from"], copy_paths[key]["to"])
                else:
                    if not os.path.isdir(copy_paths[key]["to"]):
                        os.makedirs(copy_paths[key]["to"])
                    shutil.copy(copy_paths[key]["from"], copy_paths[key]["to"])

        if "s3" not in self.server_config:
            triton_deployment_dir = os.path.join(self.triton_model_repo, name)
            version_folder = os.path.join(triton_deployment_dir, "1")
            os.makedirs(version_folder, exist_ok=True)

        return copy_paths

    def _delete_mlflow_meta(self, filepath):
        if "s3" in self.server_config:
            self.server_config["s3"].delete_object(
                Bucket=self.server_config["s3_bucket"],
                Key=filepath,
            )
        elif os.path.isfile(filepath):
            os.remove(filepath)

    def _delete_deployment_files(self, name):
        triton_deployment_dir = os.path.join(self.triton_model_repo, name)

        if "s3" in self.server_config:
            objs = self.server_config["s3"].list_objects(
                Bucket=self.server_config["s3_bucket"],
                Prefix=os.path.join(self.server_config["s3_prefix"], name),
            )

            for key in objs["Contents"]:
                key = key["Key"]
                try:
                    self.server_config["s3"].delete_object(
                        Bucket=self.server_config["s3_bucket"],
                        Key=key,
                    )
                except Exception as e:
                    raise Exception(f"Could not delete {key}: {e}")

        else:
            # Check if the deployment directory exists
            if not os.path.isdir(triton_deployment_dir):
                raise Exception(
                    "A deployment does not exist for this model in directory {} for model name {}".format(
                        triton_deployment_dir, name
                    )
                )

            model_file = glob.glob("{}/model*".format(triton_deployment_dir))
            for file in model_file:
                print("Model directory found: {}".format(file))
                os.remove(file)
                print("Model directory removed: {}".format(file))

        # Delete mlflow meta file
        mlflow_meta_path = os.path.join(
            self.triton_model_repo, name, _MLFLOW_META_FILENAME
        )
        self._delete_mlflow_meta(mlflow_meta_path)

    def _validate_config_args(self, config):
        if not config["version"]:
            raise Exception("Please provide the version as a config argument")
        if not config["version"].isdigit():
            raise ValueError(
                "Please make sure version is a number. version = {}".format(
                    config["version"]
                )
            )

    def _validate_flavor(self, flavor):
        if flavor not in self.supported_flavors:
            raise Exception("{} model flavor not supported by Triton".format(flavor))

    def _validate_model_name(self, name):
        # Check if the model name is empty or only contains whitespace, tabs, or newlines
        if name.strip() == "":
            raise Exception(
                "Model name cannot be empty. Please enter a valid name to deploy."
            )
        # Path traversal protection
        if "/" in name or name == "..":
            raise Exception(
                "Path traversal is not allowed in model's name: {}".format(name)
            )

    def _model_exists(self, name):
        deploys = self.list_deployments()
        exists = False
        for d in deploys:
            if d["name"] == name:
                exists = True
        return exists


def run_local(name, model_uri, flavor=None, config=None):
    raise NotImplementedError("run_local has not been implemented yet")


def target_help():
    help_msg = (
        "\nmlflow-triton plugin integrates the Triton Inference Server to the mlflow deployment pipeline. \n\n "
        "Example command: \n\n"
        '  mlflow deployments create -t triton --name mymodel --flavor onnx -m models:/mymodel/Production -C "version=1" \n\n'
        "The environment variable TRITON_MODEL_REPO must be set to the location that the Triton"
        "Inference Server is storing its models\n\n"
        "export TRITON_MODEL_REPO = /path/to/triton/model/repo\n\n"
        "Use the following config options:\n\n"
        "- version: The version of the model to be released. This config will be used by Triton to create a new model sub-directory.\n"
    )
    return help_msg


================================================
FILE: deploy/mlflow-triton-plugin/scripts/publish_model_to_mlflow.py
================================================
#!/usr/bin/env python3

# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import os

import click
import mlflow
import triton_flavor


@click.command()
@click.option(
    "--model_name",
    help="Model name",
)
@click.option(
    "--model_directory",
    type=click.Path(exists=True, readable=True),
    required=True,
    help="Model filepath",
)
@click.option(
    "--flavor",
    type=click.Choice(["triton"], case_sensitive=True),
    required=True,
    help="Model flavor",
)
def publish_to_mlflow(model_name, model_directory, flavor):
    mlflow_tracking_uri = os.environ["MLFLOW_TRACKING_URI"]
    artifact_path = "triton"

    mlflow.set_tracking_uri(uri=mlflow_tracking_uri)

    with mlflow.start_run() as run:
        if flavor == "triton":
            triton_flavor.log_model(
                model_directory,
                artifact_path=artifact_path,
                registered_model_name=model_name,
            )
        else:
            # Enhancement, for model in other flavor (framework) that Triton
            # supports, try to format it in Triton style and provide
            # config.pbtxt file. Should this be done in the plugin?
            raise Exception("Other flavor is not supported")

        print(mlflow.get_artifact_uri())


if __name__ == "__main__":
    publish_to_mlflow()


================================================
FILE: deploy/mlflow-triton-plugin/scripts/triton_flavor.py
================================================
#!/usr/bin/env python3

# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
The ``triton`` module provides APIs for logging and loading Triton-recognized
models in the MLflow Model format. This module exports MLflow Models with the following
flavors:

Triton format
    model files in the structure that Triton can load the model from.

"""
import os
import shutil
import sys

from mlflow.exceptions import MlflowException
from mlflow.models import Model
from mlflow.models.model import MLMODEL_FILE_NAME
from mlflow.protos.databricks_pb2 import RESOURCE_ALREADY_EXISTS
from mlflow.tracking._model_registry import DEFAULT_AWAIT_MAX_SLEEP_SECONDS
from mlflow.utils.annotations import experimental

FLAVOR_NAME = "triton"


@experimental
def save_model(
    triton_model_path,
    path,
    mlflow_model=None,
):
    """
    Save an Triton model to a path on the local file system.

    :param triton_model_path: File path to Triton model to be saved.
    :param path: Local path where the model is to be saved.
    :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to.

    """

    path = os.path.abspath(path)
    if os.path.exists(path):
        raise MlflowException(
            message="Path '{}' already exists".format(path),
            error_code=RESOURCE_ALREADY_EXISTS,
        )
    os.makedirs(path)
    triton_model_path = os.path.normpath(triton_model_path)
    model_data_subpath = os.path.basename(triton_model_path)
    model_data_path = os.path.join(path, model_data_subpath)

    # Save Triton model
    shutil.copytree(triton_model_path, model_data_path)

    mlflow_model.add_flavor(FLAVOR_NAME, data=model_data_subpath)
    mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))


@experimental
def log_model(
    triton_model_path,
    artifact_path,
    registered_model_name=None,
    await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS,
):
    """
    Log an Triton model as an MLflow artifact for the current run.

    :param triton_model_path: File path to Triton model.
    :param artifact_path: Run-relative artifact path.
    :param registered_model_name: (Experimental) If given, create a model version under
                                  ``registered_model_name``, also creating a registered model if one
                                  with the given name does not exist.

    :param await_registration_for: Number of seconds to wait for the model version to finish
                            being created and is in ``READY`` status. By default, the function
                            waits for five minutes. Specify 0 or None to skip waiting.

    """
    Model.log(
        artifact_path=artifact_path,
        flavor=sys.modules[__name__],
        triton_model_path=triton_model_path,
        registered_model_name=registered_model_name,
        await_registration_for=await_registration_for,
    )


================================================
FILE: deploy/mlflow-triton-plugin/setup.py
================================================
#!/usr/bin/env python3

# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from setuptools import find_packages, setup

setup(
    name="mlflow-triton",
    version="0.2.0",
    description="Triton Mlflow Deployment",
    long_description=open("README.md").read(),
    long_description_content_type="text/markdown",
    packages=find_packages(),
    install_requires=["mlflow>=2.2.1,<3.0", "tritonclient[all]", "boto3"],
    entry_points={"mlflow.deployments": "triton=mlflow_triton.deployments"},
)


================================================
FILE: deploy/oci/Chart.yaml
================================================
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

apiVersion: v1
appVersion: "1.0"
description: Triton Inference Server
name: triton-inference-server
version: 1.0.0


================================================
FILE: deploy/oci/README.md
================================================
<!--
# Copyright (c) 2018-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

[![License](https://img.shields.io/badge/License-BSD3-lightgrey.svg)](https://opensource.org/licenses/BSD-3-Clause)

# Kubernetes Deploy: Triton Inference Server Cluster

A helm chart for installing a single cluster of Triton Inference
Server is provided. By default the cluster contains a single instance
of the inference server but the *replicaCount* configuration parameter
can be set to create a cluster of any size, as described below.

This guide assumes you already have a functional Kubernetes cluster
and helm installed (see below for instructions on installing
helm). Note the following requirements:

* The helm chart deploys Prometheus and Grafana to collect and display Triton metrics. To use this helm chart you must install Prometheus and Grafana in your cluster as described below and your cluster must contain sufficient CPU resources to support these services.

* If you want Triton Server to use GPUs for inferencing, your cluster
must be configured to contain the desired number of GPU nodes (A10 GPU instances recommended)
with support for the NVIDIA driver and CUDA version required by the version
of the inference server you are using.

The steps below describe how to set-up a model repository, use helm to
launch the inference server, and then send inference requests to the
running server. You can access a Grafana endpoint to see real-time
metrics reported by the inference server.

## Notes for OKE cluster

When creating your node pool, the default value for the boot volume is 46.6GB.
Due to the size of the server container, it is recommended to increase this value
to 150GB and set a [cloud-init script to increase the partition](https://blogs.oracle.com/ateam/post/oke-node-sizing-for-very-large-container-images):

```
#!/bin/bash
curl --fail -H "Authorization: Bearer Oracle" -L0 http://169.254.169.254/opc/v2/instance/metadata/oke_init_script | base64 --decode >/var/run/oke-init.sh
bash /var/run/oke-init.sh
sudo /usr/libexec/oci-growfs -y
```


## Installing Helm

### Using Cloud Shell from OCI Web Console

It is possible to access your OKE Cluster [directly from the OCI Web Console](https://docs.oracle.com/en-us/iaas/Content/ContEng/Tasks/contengaccessingclusterkubectl.htm).
Helm v3 is already available from the Cloud Shell.

### Helm v3

If you do not already have Helm installed in your Kubernetes cluster,
executing the following steps from the [official helm install
guide](https://helm.sh/docs/intro/install/) will
give you a quick setup.

If you're currently using Helm v2 and would like to migrate to Helm v3,
please see the [official migration guide](https://helm.sh/docs/topics/v2_v3_migration/).

### Helm v2

> **NOTE**: Moving forward this chart will only be tested and maintained for Helm v3.

Below are example instructions for installing Helm v2.

```
$ curl https://raw.githubusercontent.com/helm/helm/master/scripts/get | bash
$ kubectl create serviceaccount -n kube-system tiller
serviceaccount/tiller created
$ kubectl create clusterrolebinding tiller-cluster-rule --clusterrole=cluster-admin --serviceaccount=kube-system:tiller
$ helm init --service-account tiller --wait
```

If you run into any issues, you can refer to the official installation guide [here](https://v2.helm.sh/docs/install/).

## Model Repository

If you already have a model repository you may use that with this helm
chart. If you do not have a model repository, you can checkout a local
copy of the inference server source repository to create an example
model repository:

```
$ git clone https://github.com/triton-inference-server/server.git
```

Triton Server needs a repository of models that it will make available
for inferencing. For this example you will place the model repository
in an S3 compatible OCI Object Storage Bucket.

```
$ oci os bucket create --compartment-id <COMPARTMENT_OCID> --name triton-inference-server-repository
```

Following the [QuickStart](../../docs/getting_started/quickstart.md) download the
example model repository to your system and copy it into the OCI
Bucket.

```
$ oci os object bulk-upload -bn triton-inference-server-repository --src-dir docs/examples/model_repository/
```

### OCI Model Repository
To load the model from the OCI Object Storage Bucket, you need to convert the following OCI credentials in the base64 format and add it to the values.yaml

```
echo -n 'REGION' | base64
```
```
echo -n 'SECRECT_KEY_ID' | base64
```
```
echo -n 'SECRET_ACCESS_KEY' | base64
```

You also need to adapt _modelRepositoryPath_ in values.yaml to your [namespace](https://docs.oracle.com/en-us/iaas/Content/Object/Tasks/understandingnamespaces.htm) and [OCI region](https://docs.oracle.com/en-us/iaas/Content/General/Concepts/regions.htm).

```
s3://https://<OCI_NAMESPACE>.compat.objectstorage.<OCI_REGION>.oraclecloud.com:443/triton-inference-server-repository
```

## Deploy Prometheus and Grafana

The inference server metrics are collected by Prometheus and viewable
by Grafana. The inference server helm chart assumes that Prometheus
and Grafana are available so this step must be followed even if you
don't want to use Grafana.

Use the [kube-prometheus-stack](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack) to install these components. The
*serviceMonitorSelectorNilUsesHelmValues* flag is needed so that
Prometheus can find the inference server metrics in the *example*
release deployed below.

```
$ helm install example-metrics --set prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues=false prometheus-community/kube-prometheus-stack
```

Then port-forward to the Grafana service so you can access it from
your local browser.

```
$ kubectl port-forward service/example-metrics-grafana 8080:80
```

Now you should be able to navigate in your browser to localhost:8080
and see the Grafana login page. Use username=admin and
password=prom-operator to login.

Note that it is also possible to set a load balancer service for the grafana dashboard
by running:

```
$ helm install example-metrics --set prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues=false --set grafana.service.type=LoadBalancer prometheus-community/kube-prometheus-stack
```

You can then see the Public IP of you grafana dashboard by running:

```
$ kubectl get svc
NAME                                       TYPE           CLUSTER-IP     EXTERNAL-IP       PORT(S)                      AGE
alertmanager-operated                      ClusterIP      None           <none>            9093/TCP,9094/TCP,9094/UDP   2m33s
example-metrics-grafana                    LoadBalancer   10.96.82.33    141.145.220.114   80:31005/TCP                 2m38s
```

The default load balancer created comes with a fixed shape and a bandwidth of 100Mbps. You can switch to a [flexible](https://docs.oracle.com/en-us/iaas/Content/ContEng/Tasks/contengcreatingloadbalancers-subtopic.htm#contengcreatingloadbalancers_subtopic) shape and adapt the bandwidth according to your OCI limits in case the bandwidth is a bottleneck.


An example Grafana dashboard is available in dashboard.json. Use the
import function in Grafana to import and view this dashboard.

## Deploy the Inference Server

Deploy the inference server using the default configuration with the
following commands.

```
$ cd <directory containing Chart.yaml>
$ helm install example .
```

Use kubectl to see status and wait until the inference server pods are
running.

```
$ kubectl get pods
NAME                                               READY   STATUS    RESTARTS   AGE
example-triton-inference-server-5f74b55885-n6lt7   1/1     Running   0          2m21s
```

There are several ways of overriding the default configuration as
described in this [helm
documentation](https://helm.sh/docs/using_helm/#customizing-the-chart-before-installing).

You can edit the values.yaml file directly or you can use the *--set*
option to override a single parameter with the CLI. For example, to
deploy a cluster of four inference servers use *--set* to set the
replicaCount parameter.

```
$ helm install example --set replicaCount=4 .
```

You can also write your own "config.yaml" file with the values you
want to override and pass it to helm.

```
$ cat << EOF > config.yaml
namespace: MyCustomNamespace
image:
  imageName: nvcr.io/nvidia/tritonserver:custom-tag
  modelRepositoryPath: s3://https://<OCI_NAMESPACE>.compat.objectstorage.<OCI_REGION>.oraclecloud.com:443/triton-inference-server-repository
EOF
$ helm install example -f config.yaml .
```

## Using Triton Inference Server

Now that the inference server is running you can send HTTP or GRPC
requests to it to perform inferencing. By default, the inferencing
service is exposed with a LoadBalancer service type. Use the following
to find the external IP for the inference server. In this case it is
34.83.9.133.

```
$ kubectl get services
NAME                             TYPE           CLUSTER-IP     EXTERNAL-IP   PORT(S)                                        AGE
...
example-triton-inference-server  LoadBalancer   10.18.13.28    34.83.9.133   8000:30249/TCP,8001:30068/TCP,8002:32723/TCP   47m
```

The inference server exposes an HTTP endpoint on port 8000, and GRPC
endpoint on port 8001 and a Prometheus metrics endpoint on
port 8002. You can use curl to get the meta-data of the inference server
from the HTTP endpoint.

```
$ curl 34.83.9.133:8000/v2
```

Follow the [QuickStart](../../docs/getting_started/quickstart.md) to get the example
image classification client that can be used to perform inferencing
using image classification models being served by the inference
server. For example,

```
$ image_client -u 34.83.9.133:8000 -m inception_v3_onnx -s INCEPTION -c3 mug.jpg
Request 0, batch size 1
Image 'images/mug.jpg':
    504 (COFFEE MUG) = 0.723992
    968 (CUP) = 0.270953
    967 (ESPRESSO) = 0.00115997
```

## Cleanup

Once you've finished using the inference server you should use helm to
delete the deployment.

```
$ helm list
NAME            REVISION  UPDATED                   STATUS    CHART                          APP VERSION   NAMESPACE
example         1         Wed Feb 27 22:16:55 2019  DEPLOYED  triton-inference-server-1.0.0  1.0           default
example-metrics	1       	Tue Jan 21 12:24:07 2020	DEPLOYED	prometheus-operator-6.18.0   	 0.32.0     	 default

$ helm uninstall example
$ helm uninstall example-metrics
```

For the Prometheus and Grafana services, you should [explicitly delete
CRDs](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack#uninstall-helm-chart):

```
$ kubectl delete crd alertmanagerconfigs.monitoring.coreos.com alertmanagers.monitoring.coreos.com podmonitors.monitoring.coreos.com probes.monitoring.coreos.com prometheuses.monitoring.coreos.com prometheusrules.monitoring.coreos.com servicemonitors.monitoring.coreos.com thanosrulers.monitoring.coreos.com
```

You may also want to delete the OCI bucket you created to hold the
model repository.

```
$ oci os bucket delete --bucket-name triton-inference-server-repository --empty
```


================================================
FILE: deploy/oci/dashboard.json
================================================
{
  "__inputs": [
    {
      "name": "DS_PROMETHEUS",
      "label": "Prometheus",
      "description": "",
      "type": "datasource",
      "pluginId": "prometheus",
      "pluginName": "Prometheus"
    }
  ],
  "__requires": [
    {
      "type": "grafana",
      "id": "grafana",
      "name": "Grafana",
      "version": "6.3.5"
    },
    {
      "type": "panel",
      "id": "graph",
      "name": "Graph",
      "version": ""
    },
    {
      "type": "panel",
      "id": "heatmap",
      "name": "Heatmap",
      "version": ""
    },
    {
      "type": "datasource",
      "id": "prometheus",
      "name": "Prometheus",
      "version": "1.0.0"
    }
  ],
  "annotations": {
    "list": [
      {
        "builtIn": 1,
        "datasource": "-- Grafana --",
        "enable": true,
        "hide": true,
        "iconColor": "rgba(0, 211, 255, 1)",
        "name": "Annotations & Alerts",
        "type": "dashboard"
      }
    ]
  },
  "editable": true,
  "gnetId": null,
  "graphTooltip": 0,
  "id": null,
  "links": [],
  "panels": [
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "${DS_PROMETHEUS}",
      "fill": 1,
      "fillGradient": 0,
      "gridPos": {
        "h": 9,
        "w": 12,
        "x": 0,
        "y": 0
      },
      "id": 2,
      "legend": {
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "show": true,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 1,
      "nullPointMode": "null",
      "options": {
        "dataLinks": []
      },
      "percentage": false,
      "pointradius": 2,
      "points": false,
      "renderer": "flot",
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "nv_inference_request_success",
          "legendFormat": "Success {{instance}}",
          "refId": "A"
        },
        {
          "expr": "nv_inference_request_failure",
          "legendFormat": "Failure {{instance}}",
          "refId": "B"
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "Cumulative Inference Requests",
      "tooltip": {
        "shared": true,
        "sort": 0,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        },
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": false
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    },
    {
      "cards": {
        "cardPadding": null,
        "cardRound": null
      },
      "color": {
        "cardColor": "#b4ff00",
        "colorScale": "sqrt",
        "colorScheme": "interpolateReds",
        "exponent": 0.5,
        "mode": "spectrum"
      },
      "dataFormat": "timeseries",
      "gridPos": {
        "h": 9,
        "w": 12,
        "x": 12,
        "y": 0
      },
      "heatmap": {},
      "hideZeroBuckets": false,
      "highlightCards": true,
      "id": 7,
      "legend": {
        "show": false
      },
      "options": {},
      "reverseYBuckets": false,
      "targets": [
        {
          "expr": "sum(increase(nv_inference_load_ratio_bucket[1m])) by (le)",
          "legendFormat": "",
          "refId": "A"
        }
      ],
      "timeFrom": null,
      "timeShift": null,
      "title": "Load Ratio  (Total Time / Compute Time)",
      "tooltip": {
        "show": true,
        "showHistogram": false
      },
      "type": "heatmap",
      "xAxis": {
        "show": true
      },
      "xBucketNumber": null,
      "xBucketSize": null,
      "yAxis": {
        "decimals": null,
        "format": "short",
        "logBase": 1,
        "max": null,
        "min": null,
        "show": true,
        "splitFactor": null
      },
      "yBucketBound": "auto",
      "yBucketNumber": null,
      "yBucketSize": null
    },
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "${DS_PROMETHEUS}",
      "fill": 1,
      "fillGradient": 0,
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 9
      },
      "id": 4,
      "legend": {
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "show": true,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 1,
      "nullPointMode": "null",
      "options": {
        "dataLinks": []
      },
      "percentage": false,
      "pointradius": 2,
      "points": false,
      "renderer": "flot",
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "rate(nv_inference_queue_duration_us[30s]) / 1000",
          "legendFormat": "{{instance}}",
          "refId": "A"
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "Queue Time (milliseconds)",
      "tooltip": {
        "shared": true,
        "sort": 0,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "short",
          "label": "Queue Time (ms)",
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        },
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": false
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    },
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "${DS_PROMETHEUS}",
      "fill": 1,
      "fillGradient": 0,
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 12,
        "y": 9
      },
      "id": 5,
      "legend": {
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "show": true,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 1,
      "nullPointMode": "null",
      "options": {
        "dataLinks": []
      },
      "percentage": false,
      "pointradius": 2,
      "points": false,
      "renderer": "flot",
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "rate(nv_inference_compute_duration_us[30s]) / 1000",
          "legendFormat": "{{instance}}",
          "refId": "A"
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "Compute Time (milliseconds)",
      "tooltip": {
        "shared": true,
        "sort": 0,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "short",
          "label": "Compute Time (ms)",
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        },
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": false
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    }
  ],
  "refresh": "5s",
  "schemaVersion": 19,
  "style": "dark",
  "tags": [],
  "templating": {
    "list": []
  },
  "time": {
    "from": "now-15m",
    "to": "now"
  },
  "timepicker": {
    "refresh_intervals": [
      "5s",
      "10s",
      "30s",
      "1m",
      "5m",
      "15m",
      "30m",
      "1h",
      "2h",
      "1d"
    ]
  },
  "timezone": "",
  "title": "Triton Inference Server",
  "uid": "slEY4dsZk",
  "version": 8
}


================================================
FILE: deploy/oci/templates/_helpers.tpl
================================================
{{/*
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/}}

{{/* vim: set filetype=mustache: */}}
{{/*
Create inference server name.
*/}}
{{- define "triton-inference-server.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
{{- end -}}

{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "triton-inference-server.fullname" -}}
{{- if .Values.fullnameOverride -}}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- $name := default .Chart.Name .Values.nameOverride -}}
{{- if contains $name .Release.Name -}}
{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{- end -}}
{{- end -}}

{{/*
  Create inference server metrics service name and fullname derived from above and
  truncated appropriately.
*/}}
{{- define "triton-inference-server-metrics.name" -}}
{{- $basename := include "triton-inference-server.name" . -}}
{{- $basename_trimmed := $basename | trunc 55 | trimSuffix "-" -}}
{{- printf "%s-%s" $basename_trimmed "metrics" -}}
{{- end -}}

{{- define "triton-inference-server-metrics.fullname" -}}
{{- $basename := include "triton-inference-server.fullname" . -}}
{{- $basename_trimmed := $basename | trunc 55 | trimSuffix "-" -}}
{{- printf "%s-%s" $basename_trimmed "metrics" -}}
{{- end -}}

{{/*
  Create inference server metrics monitor name and fullname derived from
  above and truncated appropriately.
*/}}
{{- define "triton-inference-server-metrics-monitor.name" -}}
{{- $basename := include "triton-inference-server.name" . -}}
{{- $basename_trimmed := $basename | trunc 47 | trimSuffix "-" -}}
{{- printf "%s-%s" $basename_trimmed "metrics-monitor" -}}
{{- end -}}

{{- define "triton-inference-server-metrics-monitor.fullname" -}}
{{- $basename := include "triton-inference-server.fullname" . -}}
{{- $basename_trimmed := $basename | trunc 47 | trimSuffix "-" -}}
{{- printf "%s-%s" $basename_trimmed "metrics-monitor" -}}
{{- end -}}

{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "triton-inference-server.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
{{- end -}}


================================================
FILE: deploy/oci/templates/deployment.yaml
================================================
# Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

apiVersion: apps/v1
kind: Deployment
metadata:
  name: {{ template "triton-inference-server.fullname" . }}
  namespace: {{ .Release.Namespace }}
  labels:
    app: {{ template "triton-inference-server.name" . }}
    chart: {{ template "triton-inference-server.chart" . }}
    release: {{ .Release.Name }}
    heritage: {{ .Release.Service }}
spec:
  replicas: {{ .Values.replicaCount }}
  selector:
    matchLabels:
      app: {{ template "triton-inference-server.name" . }}
      release: {{ .Release.Name }}
  template:
    metadata:
      labels:
        app: {{ template "triton-inference-server.name" . }}
        release: {{ .Release.Name }}

    spec:
      containers:
        - name: {{ .Chart.Name }}
          image: "{{ .Values.image.imageName }}"
          imagePullPolicy: {{ .Values.image.pullPolicy }}

          resources:
            limits:
              nvidia.com/gpu: {{ .Values.image.numGpus }}

          args: ["tritonserver", "--model-store={{ .Values.image.modelRepositoryPath }}",
                 "--model-control-mode=poll",
                 "--repository-poll-secs=5"]

          env:
          - name: AWS_DEFAULT_REGION
            valueFrom:
              secretKeyRef:
                name: oci-credentials
                key: OCI_DEFAULT_REGION
          - name: AWS_ACCESS_KEY_ID
            valueFrom:
              secretKeyRef:
                name: oci-credentials
                key: OCI_ACCESS_KEY_ID
          - name: AWS_SECRET_ACCESS_KEY
            valueFrom:
              secretKeyRef:
                name: oci-credentials
                key: OCI_SECRET_ACCESS_KEY

          ports:
            - containerPort: 8000
              name: http
            - containerPort: 8001
              name: grpc
            - containerPort: 8002
              name: metrics
          livenessProbe:
            httpGet:
              path: /v2/health/live
              port: http
          readinessProbe:
            initialDelaySeconds: 5
            periodSeconds: 5
            httpGet:
              path: /v2/health/ready
              port: http

      securityContext:
        runAsUser: 1000
        fsGroup: 1000


================================================
FILE: deploy/oci/templates/secrets.yaml
================================================
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

apiVersion: v1
kind: Secret
metadata:
  name: oci-credentials
type: Opaque
data:
  OCI_DEFAULT_REGION: {{ .Values.secret.region }}
  OCI_ACCESS_KEY_ID: {{ .Values.secret.id }}
  OCI_SECRET_ACCESS_KEY: {{ .Values.secret.key }}


================================================
FILE: deploy/oci/templates/service.yaml
================================================
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

apiVersion: v1
kind: Service
metadata:
  name: {{ template "triton-inference-server.fullname" . }}
  namespace: {{ .Release.Namespace }}
  labels:
    app: {{ template "triton-inference-server.name" . }}
    chart: {{ template "triton-inference-server.chart" . }}
    release: {{ .Release.Name }}
    heritage: {{ .Release.Service }}
spec:
  type: {{ .Values.service.type }}
  ports:
    - port: 8000
      targetPort: http
      name: http-inference-server
    - port: 8001
      targetPort: grpc
      name: grpc-inference-server
    - port: 8002
      targetPort: metrics
      name: metrics-inference-server
  selector:
    app: {{ template "triton-inference-server.name" . }}
    release: {{ .Release.Name }}
---
apiVersion: v1
kind: Service
metadata:
  name: {{ template "triton-inference-server-metrics.fullname" . }}
  namespace: {{ .Release.Namespace }}
  labels:
    app: {{ template "triton-inference-server-metrics.name" . }}
    chart: {{ template "triton-inference-server.chart" . }}
    release: {{ .Release.Name }}
    heritage: {{ .Release.Service }}
  annotations:
    alpha.monitoring.coreos.com/non-namespaced: "true"
spec:
  ports:
  - name: metrics
    port: 8080
    targetPort: metrics
    protocol: TCP
  selector:
    app: {{ template "triton-inference-server.name" . }}
    release: {{ .Release.Name }}
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
  name: {{ template "triton-inference-server-metrics-monitor.fullname" . }}
  namespace: {{ .Release.Namespace }}
  labels:
    app: {{ template "triton-inference-server-metrics-monitor.name" . }}
    chart: {{ template "triton-inference-server.chart" . }}
    release: {{ .Release.Name }}
    heritage: {{ .Release.Service }}
spec:
  selector:
    matchLabels:
      app: {{ template "triton-inference-server-metrics.name" . }}
  endpoints:
  - port: metrics
    interval: 15s


================================================
FILE: deploy/oci/values.yaml
================================================
# Copyright (c) 2019-2026, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

replicaCount: 1

image:
  imageName: nvcr.io/nvidia/tritonserver:26.02-py3
  pullPolicy: IfNotPresent
  modelRepositoryPath: s3://https://<OCI_NAMESPACE>.compat.objectstorage.<OCI_REGION>.oraclecloud.com:443/triton-inference-server-repository
  numGpus: 1

service:
  type: LoadBalancer

secret:
  region: OCI_REGION
  id: OCI_SECRET_KEY_ID
  key: OCI_SECRET_ACCESS_KEY


================================================
FILE: docker/README.third-party-src
================================================
This directory contains the licenses and source code for software
included in the Triton Inference Server build. To extract the files
use:

  $ tar zxf src.tar.gz


================================================
FILE: docker/cpu_only/entrypoint.d/12-banner.sh
================================================
#!/bin/bash
# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.

prodname_uc=$(echo "${NVIDIA_PRODUCT_NAME}" | tr [:lower:] [:upper:] | sed 's/ /_/g' | sed 's/^NVIDIA_//')  # Product name
_prodver="NVIDIA_${prodname_uc}_VERSION" # Container product version variable name
_compver="${prodname_uc}_VERSION"        # Upstream component version variable name

echo
echo "NVIDIA Release ${!_prodver} (build ${NVIDIA_BUILD_ID})"
[ -n "${!_compver}" ] && echo "${NVIDIA_PRODUCT_NAME} Version ${!_compver}"


================================================
FILE: docker/cpu_only/entrypoint.d/50-gpu-driver-check2.sh
================================================
#!/bin/bash
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.

export TRITON_SERVER_CPU_ONLY=1


================================================
FILE: docker/cpu_only/nvidia_entrypoint.sh
================================================
#!/bin/bash
# Copyright 2016-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# Gather parts in alpha order
shopt -s nullglob extglob
SCRIPT_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
declare -a PARTS=( "${SCRIPT_DIR}/entrypoint.d"/*@(.txt|.sh) )
shopt -u nullglob extglob

# Execute the entrypoint parts
for file in "${PARTS[@]}"; do
  case "${file}" in
    *.txt) cat "${file}";;
    *.sh)  source "${file}";;
  esac
done

echo

# This script can either be a wrapper around arbitrary command lines,
# or it will simply exec bash if no arguments were given
if [[ $# -eq 0 ]]; then
  exec "/bin/bash"
else
  exec "$@"
fi


================================================
FILE: docker/entrypoint.d/10-banner.txt
================================================

=============================
== Triton Inference Server ==
=============================


================================================
FILE: docker/entrypoint.d/15-container-copyright.txt
================================================

Copyright (c) 2018-2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.


================================================
FILE: docker/entrypoint.d/50-gpu-driver-check2.sh
================================================
#!/bin/bash
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.

if [[ "${NVIDIA_CPU_ONLY:-0}" == "1" ]]; then
  export TRITON_SERVER_CPU_ONLY=1
fi


================================================
FILE: docker/entrypoint.d/56-network-driver-version-check.sh
================================================


================================================
FILE: docker/entrypoint.d/70-shm-check.sh
================================================


================================================
FILE: docker/entrypoint.d/99-check-run-aip-mode.sh
================================================
#!/bin/bash
# Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.

# If detect Vertex AI environment, launch tritonserver with supplied arguments

# This has the effect of "unshifting" the tritonserver command onto the front
# of $@ if AIP_MODE is nonempty; it will then be exec'd by entrypoint.sh
set -- ${AIP_MODE:+"/opt/tritonserver/bin/tritonserver"} "$@"


================================================
FILE: docker/sagemaker/serve
================================================
#!/bin/bash
# Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

SAGEMAKER_SINGLE_MODEL_REPO=/opt/ml/model/

# Use 'ready' for ping check in single-model endpoint mode, and use 'live' for ping check in multi-model endpoint model
# https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/rest_predict_v2.yaml#L10-L26
if [ -n "$SAGEMAKER_TRITON_OVERRIDE_PING_MODE" ]; then
    SAGEMAKER_TRITON_PING_MODE=${SAGEMAKER_TRITON_OVERRIDE_PING_MODE}
else
    SAGEMAKER_TRITON_PING_MODE="ready"
fi

# Note: in Triton on SageMaker, each model url is registered as a separate repository
# e.g., /opt/ml/models/<hash>/model. Specifying MME model repo path as /opt/ml/models causes Triton
# to treat it as an additional empty repository and changes
# the state of all models to be UNAVAILABLE in the model repository
# https://github.com/triton-inference-server/core/blob/main/src/model_repository_manager.cc#L914,L922
# On Triton, this path will be a dummy path as it's mandatory to specify a model repo when starting triton
SAGEMAKER_MULTI_MODEL_REPO=/tmp/sagemaker

SAGEMAKER_MODEL_REPO=${SAGEMAKER_SINGLE_MODEL_REPO}
is_mme_mode=false

if [ -n "$SAGEMAKER_MULTI_MODEL" ]; then
    if [ "$SAGEMAKER_MULTI_MODEL" == "true" ]; then
        mkdir -p ${SAGEMAKER_MULTI_MODEL_REPO}
        SAGEMAKER_MODEL_REPO=${SAGEMAKER_MULTI_MODEL_REPO}
        if [ -n "$SAGEMAKER_TRITON_OVERRIDE_PING_MODE" ]; then
            SAGEMAKER_TRITON_PING_MODE=${SAGEMAKER_TRITON_OVERRIDE_PING_MODE}
        else
            SAGEMAKER_TRITON_PING_MODE="live"
        fi
        is_mme_mode=true
        echo -e "Triton is running in SageMaker MME mode. Using Triton ping mode: \"${SAGEMAKER_TRITON_PING_MODE}\""
    fi
fi

SAGEMAKER_ARGS="--model-repository=${SAGEMAKER_MODEL_REPO}"
#Set model namespacing to true, but allow disabling if required
if [ -n "$SAGEMAKER_TRITON_DISABLE_MODEL_NAMESPACING" ]; then
    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --model-namespacing=${SAGEMAKER_TRITON_DISABLE_MODEL_NAMESPACING}"
else
    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --model-namespacing=true"
fi
if [ -n "$SAGEMAKER_BIND_TO_PORT" ]; then
    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --sagemaker-port=${SAGEMAKER_BIND_TO_PORT}"
fi
if [ -n "$SAGEMAKER_SAFE_PORT_RANGE" ]; then
    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --sagemaker-safe-port-range=${SAGEMAKER_SAFE_PORT_RANGE}"
fi
if [ -n "$SAGEMAKER_TRITON_ALLOW_GRPC" ]; then
    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --allow-grpc=${SAGEMAKER_TRITON_ALLOW_GRPC}"
else
    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --allow-grpc=false"
fi
if [ -n "$SAGEMAKER_TRITON_ALLOW_METRICS" ]; then
    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --allow-metrics=${SAGEMAKER_TRITON_ALLOW_METRICS}"
else
    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --allow-metrics=false"
fi
if [ -n "$SAGEMAKER_TRITON_METRICS_PORT" ]; then
    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --metrics-port=${SAGEMAKER_TRITON_METRICS_PORT}"
fi
if [ -n "$SAGEMAKER_TRITON_GRPC_PORT" ]; then
    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --grpc-port=${SAGEMAKER_TRITON_GRPC_PORT}"
fi
if [ -n "$SAGEMAKER_TRITON_BUFFER_MANAGER_THREAD_COUNT" ]; then
    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --buffer-manager-thread-count=${SAGEMAKER_TRITON_BUFFER_MANAGER_THREAD_COUNT}"
fi
if [ -n "$SAGEMAKER_TRITON_THREAD_COUNT" ]; then
    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --sagemaker-thread-count=${SAGEMAKER_TRITON_THREAD_COUNT}"
fi
# Enable verbose logging by default. If env variable is specified, use value from env variable
if [ -n "$SAGEMAKER_TRITON_LOG_VERBOSE" ]; then
    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --log-verbose=${SAGEMAKER_TRITON_LOG_VERBOSE}"
else
    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --log-verbose=true"
fi
if [ -n "$SAGEMAKER_TRITON_LOG_INFO" ]; then
    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --log-info=${SAGEMAKER_TRITON_LOG_INFO}"
fi
if [ -n "$SAGEMAKER_TRITON_LOG_WARNING" ]; then
    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --log-warning=${SAGEMAKER_TRITON_LOG_WARNING}"
fi
if [ -n "$SAGEMAKER_TRITON_LOG_ERROR" ]; then
    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --log-error=${SAGEMAKER_TRITON_LOG_ERROR}"
fi
if [ -n "$SAGEMAKER_TRITON_SHM_DEFAULT_BYTE_SIZE" ]; then
    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --backend-config=python,shm-default-byte-size=${SAGEMAKER_TRITON_SHM_DEFAULT_BYTE_SIZE}"
else
    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --backend-config=python,shm-default-byte-size=16777216" #16MB
fi
if [ -n "$SAGEMAKER_TRITON_SHM_GROWTH_BYTE_SIZE" ]; then
    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --backend-config=python,shm-growth-byte-size=${SAGEMAKER_TRITON_SHM_GROWTH_BYTE_SIZE}"
else
    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --backend-config=python,shm-growth-byte-size=1048576" #1MB
fi
if [ -n "$SAGEMAKER_TRITON_TENSORFLOW_VERSION" ]; then
    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --backend-config=tensorflow,version=${SAGEMAKER_TRITON_TENSORFLOW_VERSION}"
fi
if [ -n "$SAGEMAKER_TRITON_MODEL_LOAD_GPU_LIMIT" ]; then
    num_gpus=$(nvidia-smi -L | wc -l)
    for ((i=0; i<${num_gpus}; i++)); do
        SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --model-load-gpu-limit ${i}:${SAGEMAKER_TRITON_MODEL_LOAD_GPU_LIMIT}"
    done
fi
if [ -n "$SAGEMAKER_TRITON_ADDITIONAL_ARGS" ]; then
    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} ${SAGEMAKER_TRITON_ADDITIONAL_ARGS}"
fi


if [ "${is_mme_mode}" = false ] && [ -f "${SAGEMAKER_MODEL_REPO}/config.pbtxt" ]; then
    echo "ERROR: Incorrect directory structure."
    echo "       Model directory needs to contain the top level folder"
    exit 1
fi

# Validate SAGEMAKER_TRITON_INFERENCE_TYPE if set
if [ -n "$SAGEMAKER_TRITON_INFERENCE_TYPE" ]; then
    case "$SAGEMAKER_TRITON_INFERENCE_TYPE" in
        "infer"|"generate"|"generate_stream")
            # Valid value, continue
            ;;
        *)
            echo "ERROR: Invalid SAGEMAKER_TRITON_INFERENCE_TYPE '${SAGEMAKER_TRITON_INFERENCE_TYPE}'"
            echo "       Must be one of: infer, generate, generate_stream"
            exit 1
            ;;
    esac
fi

if [ "${is_mme_mode}" = false ] && [ -n "$SAGEMAKER_TRITON_DEFAULT_MODEL_NAME" ]; then
    if [ -d "${SAGEMAKER_MODEL_REPO}/$SAGEMAKER_TRITON_DEFAULT_MODEL_NAME" ]; then
        SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --load-model=${SAGEMAKER_TRITON_DEFAULT_MODEL_NAME}"
    else
        echo "ERROR: Directory with provided SAGEMAKER_TRITON_DEFAULT_MODEL_NAME ${SAGEMAKER_TRITON_DEFAULT_MODEL_NAME} does not exist"
        exit 1
    fi
elif [ "${is_mme_mode}" = false ]; then
    MODEL_DIRS=(`find "${SAGEMAKER_MODEL_REPO}" -mindepth 1 -maxdepth 1 -type d -printf "%f\n"`)
    case ${#MODEL_DIRS[@]} in
        0) echo "ERROR: No model found in model repository";
           exit 1
           ;;
        1) echo "WARNING: No SAGEMAKER_TRITON_DEFAULT_MODEL_NAME provided."
           echo "         Starting with the only existing model directory ${MODEL_DIRS[0]}";
           export SAGEMAKER_TRITON_DEFAULT_MODEL_NAME=${MODEL_DIRS[0]}
           ;;
        *) echo "ERROR: More than 1 model directory found in model repository."
           echo "       Either provide a single directory or set SAGEMAKER_TRITON_DEFAULT_MODEL_NAME to run the ensemble backend."
           echo "       Directories found in model repository: ${MODEL_DIRS[@]}";
           exit 1
           ;;
    esac
    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --load-model=${SAGEMAKER_TRITON_DEFAULT_MODEL_NAME}"
fi

tritonserver --allow-sagemaker=true --allow-http=false --model-control-mode=explicit $SAGEMAKER_ARGS


================================================
FILE: docs/Dockerfile.docs
================================================
# Copyright 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

FROM ubuntu:24.04

# various documentation dependencies
RUN apt-get update -q=2 \
    && apt-get install -y --no-install-recommends \
        build-essential \
        curl \
        doxygen \
        git \
        git-lfs \
        pandoc \
        python3-dev \
        python3-pip \
        ssh \
        unzip \
        wget \
    && rm -rf /var/lib/apt/lists/*

# install protobuf
RUN wget https://github.com/google/protobuf/releases/download/v3.6.1/protoc-3.6.1-linux-x86_64.zip -O /tmp/proto.zip \
    && unzip /tmp/proto.zip -d /usr/local \
    && rm /tmp/proto.zip

# install pseudomuto/protoc-gen-doc
RUN wget https://github.com/pseudomuto/protoc-gen-doc/releases/download/v1.3.2/protoc-gen-doc-1.3.2.linux-amd64.go1.12.6.tar.gz -O /tmp/protoc-gen-doc.tar.gz \
    && tar -xvf /tmp/protoc-gen-doc.tar.gz --strip-components=1 -C /usr/local/bin/ \
    && rm /tmp/protoc-gen-doc.tar.gz

# install sphinx et al
ENV PIP_BREAK_SYSTEM_PACKAGES=1
RUN pip3 install \
      ablog \
      attrs  \
      breathe \
      docutils \
      exhale \
      httplib2 \
      ipython \
      myst-nb \
      nbclient \
      nbsphinx \
      rst-to-myst \
      sphinx==5.0.0 \
      sphinx-book-theme \
      sphinx-copybutton \
      sphinx-design \
      sphinx-prompt \
      sphinx-sitemap \
      sphinx-tabs \
      sphinxcontrib-bibtex

RUN pip3 install \
      --extra-index-url https://pypi.nvidia.com \
      nvidia-sphinx-theme \
      sphinx==7.4.7

RUN curl -fL https://install-cli.jfrog.io | sh

RUN git config --global --add safe.directory "*"

# Set visitor script to be included on every HTML page
ENV VISITS_COUNTING_SCRIPT="//assets.adobedtm.com/b92787824f2e0e9b68dc2e993f9bd995339fe417/satelliteLib-7ba51e58dc61bcb0e9311aadd02a0108ab24cc6c.js"


================================================
FILE: docs/Makefile
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# Minimal makefile for Sphinx documentation
#

# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS        ?=
SPHINXBUILD       ?= sphinx-build
SOURCEDIR          = .
BUILDDIR           = build
TRITONCLIENTRSTDIR = _reference/tritonclient

#PROTOBUFFILES = $(wildcard ../triton/proto/*.proto)

# Put it first so that "make" without argument is like "make help".
help:
	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

clean:
	@rm -fr ${BUILDDIR}
	@rm -fr ${TRITONCLIENTRSTDIR}

.PHONY: help Makefile clean

# protobuf: source/reference/protos/gen_proto_doc.sh
# 	cd source/reference/protos && \
#     rm -f *.proto.rst && \
#     bash -x ./gen_proto_doc.sh $(PROTOBUFFILES:%=../%)

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
%:
	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


================================================
FILE: docs/README.md
================================================
<!--
# Copyright 2018-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# **Triton Inference Server Documentation**

| [Installation](README.md#installation) | [Getting Started](README.md#getting-started) | [User Guide](README.md#user-guide) | [API Guide](protocol/README.md) | [Additional Resources](README.md#resources) | [Customization Guide](README.md#customization-guide) |
| ------------ | --------------- | --------------- | ------------ | --------------- | --------------- |

**New to Triton Inference Server?** Make use of
[these tutorials](https://github.com/triton-inference-server/tutorials)
 to begin your Triton journey!

## **Installation**
Before you can use the Triton Docker image you must install
[Docker](https://docs.docker.com/engine/install). If you plan on using
a GPU for inference you must also install the [NVIDIA Container
Toolkit](https://github.com/NVIDIA/nvidia-docker). DGX users should
follow [Preparing to use NVIDIA
Containers](http://docs.nvidia.com/deeplearning/dgx/preparing-containers/index.html).

Pull the image using the following command.

```
$ docker pull nvcr.io/nvidia/tritonserver:<yy.mm>-py3
```

Where \<yy.mm\> is the version of Triton that you want to pull. For a complete list of all the variants and versions of the Triton Inference Server Container,  visit the [NGC Page](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver). More information about customizing the Triton Container can be found in [this section](customization_guide/compose.md) of the User Guide.

## **Getting Started**

This guide covers the simplest possible workflow for deploying a model using a Triton Inference Server.
- [Create a Model Repository](getting_started/quickstart.md#create-a-model-repository)
- [Launch Triton](getting_started/quickstart.md#launch-triton)
- [Send an Inference Request](getting_started/quickstart.md#send-an-inference-request)

Triton Inference Server has a considerable list versatile and powerful features. All new users are recommended to explore the [User Guide](README.md#user-guide) and the [additional resources](README.md#resources) sections for features most relevant to their use case.

## **User Guide**
The User Guide describes how to configure Triton, organize and configure your models, use the C++ and Python clients, etc. This guide includes the following:
* Creating a Model Repository [[Overview](README.md#model-repository) || [Details](user_guide/model_repository.md)]
* Writing a Model Configuration [[Overview](README.md#model-configuration) || [Details](user_guide/model_configuration.md)]
* Buillding a Model Pipeline [[Overview](README.md#model-pipeline)]
* Managing Model Availability [[Overview](README.md#model-management) || [Details](user_guide/model_management.md)]
* Collecting Server Metrics [[Overview](README.md#metrics) || [Details](user_guide/metrics.md)]
* Supporting Custom Ops/layers [[Overview](README.md#framework-custom-operations) || [Details](user_guide/custom_operations.md)]
* Using the Client API [[Overview](README.md#client-libraries-and-examples) || [Details](https://github.com/triton-inference-server/client)]
* Cancelling Inference Requests [[Overview](README.md#cancelling-inference-requests) || [Details](user_guide/request_cancellation.md)]
* Analyzing Performance [[Overview](README.md#performance-analysis)]
* Deploying on edge (Jetson) [[Overview](README.md#jetson-and-jetpack)]
* Debugging Guide [Details](./user_guide/debugging_guide.md)

### Model Repository
[Model Repositories](user_guide/model_repository.md) are the organizational hub for using Triton. All models, configuration files, and additional resources needed to serve the models are housed inside a model repository.
- [Cloud Storage](user_guide/model_repository.md#model-repository-locations)
- [File Organization](user_guide/model_repository.md#model-files)
- [Model Versioning](user_guide/model_repository.md#model-versions)
### Model Configuration

A [Model Configuration](user_guide/model_configuration.md) file is where you set the model-level options, such as output tensor reshaping and dynamic batch sizing.

#### Required Model Configuration

Triton Inference Server requires some [Minimum Required parameters](user_guide/model_configuration.md#minimal-model-configuration) to be filled in the Model Configuration. These required parameters essentially pertain to the structure of the model. For ONNX and TensorRT models, users can rely on Triton to [Auto Generate](user_guide/model_configuration.md#auto-generated-model-configuration) the Minimum Required model configuration.
- [Maximum Batch Size - Batching and Non-Batching Models](user_guide/model_configuration.md#maximum-batch-size)
- [Input and Output Tensors](user_guide/model_configuration.md#inputs-and-outputs)
    - [Tensor Datatypes](user_guide/model_configuration.md#datatypes)
    - [Tensor Reshape](user_guide/model_configuration.md#reshape)
    - [Shape Tensor](user_guide/model_configuration.md#shape-tensors)

#### Versioning Models
Users need the ability to save and serve different versions of models based on business requirements. Triton allows users to set policies to make available different versions of the model as needed. [Learn More](user_guide/model_configuration.md#version-policy).

#### Instance Groups
Triton allows users to use of multiple instances of the same model. Users can specify how many instances (copies) of a model to load and whether to use GPU or CPU. If the model is being loaded on GPU, users can also select which GPUs to use. [Learn more](user_guide/model_configuration.md#instance-groups).
- [Specifying Multiple Model Instances](user_guide/model_configuration.md#multiple-model-instances)
- [CPU and GPU Instances](user_guide/model_configuration.md#cpu-model-instance)
- [Configuring Rate Limiter](user_guide/model_configuration.md#rate-limiter-configuration)

#### Optimization Settings

The Model Configuration ModelOptimizationPolicy property is used to specify optimization and prioritization settings for a model. These settings control if/how a model is optimized by the backend and how it is scheduled and executed by Triton. See the [ModelConfig Protobuf](https://github.com/triton-inference-server/common/blob/main/protobuf/model_config.proto) and [Optimization Documentation](user_guide/optimization.md#optimization) for the currently available settings.
- [Framework-Specific Optimization](user_guide/optimization.md#framework-specific-optimization)
  - [ONNX-TensorRT](user_guide/optimization.md#onnx-with-tensorrt-optimization-ort-trt)
  - [ONNX-OpenVINO](user_guide/optimization.md#onnx-with-openvino-optimization)
- [NUMA Optimization](user_guide/optimization.md#numa-optimization)

#### Scheduling and Batching

Triton supports batching individual inference requests to improve compute resource utilization. This is extremely important as individual requests typically will not saturate GPU resources thus not leveraging the parallelism provided by GPUs to its extent. Learn more about Triton's [Batcher and Scheduler](#scheduling-and-batching).
- [Default Scheduler - Non-Batching](user_guide/scheduler.md#default-scheduler)
- [Dynamic Batcher](user_guide/batcher.md#dynamic-batcher)
  - [How to Configure Dynamic Batcher](user_guide/model_configuration.md#recommended-configuration-process)
    - [Delayed Batching](user_guide/batcher.md#delayed-batching)
    - [Preferred Batch Size](user_guide/model_configuration.md#preferred-batch-sizes)
  - [Preserving Request Ordering](user_guide/model_configuration.md#preserve-ordering)
  - [Priority Levels](user_guide/model_configuration.md#priority-levels)
  - [Queuing Policies](user_guide/model_configuration.md#queue-policy)
  - [Ragged Batching](user_guide/ragged_batching.md)
- [Sequence Batcher](user_guide/batcher.md#sequence-batcher)
  - [Stateful Models](user_guide/model_execution.md#stateful-models)
  - [Control Inputs](user_guide/model_execution.md#control-inputs)
  - [Implicit State - Stateful Inference Using a Stateless Model](user_guide/implicit_state_management.md#implicit-state-management)
  - [Sequence Scheduling Strategies](user_guide/architecture.md#scheduling-strategies)
    - [Direct](user_guide/architecture.md#direct)
    - [Oldest](user_guide/architecture.md#oldest)

#### Rate Limiter
Rate limiter manages the rate at which requests are scheduled on model instances by Triton. The rate limiter operates across all models loaded in Triton to allow cross-model prioritization. [Learn more](user_guide/rate_limiter.md).

#### Model Warmup
For a few of the Backends (check [Additional Resources](README.md#resources)) some or all of initialization is deferred until the first inference request is received, the benefit is resource conservation but comes with the downside of the initial requests getting processed slower than expected. Users can pre-"warm up" the model by instructing Triton to initialize the model. [Learn more](user_guide/model_configuration.md#model-warmup).

#### Inference Request/Response Cache
Triton has a feature which allows inference responses to get cached. [Learn More](user_guide/response_cache.md).

### Model Pipeline
Building ensembles is as easy as adding an addition configuration file which outlines the specific flow of tensors from one model to another. Any additional changes required by the model ensemble can be made in existing (individual) model configurations.
- [Model Ensemble](user_guide/architecture.md#ensemble-models)
- [Business Logic Scripting (BLS)](https://github.com/triton-inference-server/python_backend#business-logic-scripting)
### Model Management
Users can specify policies in the model configuration for loading and unloading of models. This [section](user_guide/model_management.md) covers user selectable policy details.
- [Explicit Model Loading and Unloading](user_guide/model_management.md#model-control-mode-explicit)
- [Modifying the Model Repository](user_guide/model_management.md#modifying-the-model-repository)
### Metrics
Triton provides Prometheus metrics like GPU Utilization, Memory Usage, Latency and more. Learn about [available metrics](user_guide/metrics.md).
### Framework Custom Operations
Some frameworks provide the option of building custom layers/operations. These can be added to specific Triton Backends for the those frameworks. [Learn more](user_guide/custom_operations.md)
- [TensorRT](user_guide/custom_operations.md#tensorrt)
- [PyTorch](user_guide/custom_operations.md#pytorch)
- [ONNX](user_guide/custom_operations.md#onnx)
### Client Libraries and Examples
Use the [Triton Client](https://github.com/triton-inference-server/client) API to integrate client applications over the network HTTP/gRPC API or integrate applications directly with Triton using CUDA shared memory to remove network overhead.
- [C++ HTTP/GRPC Libraries](https://github.com/triton-inference-server/client#client-library-apis)
- [Python HTTP/GRPC Libraries](https://github.com/triton-inference-server/client#client-library-apis)
- [Java HTTP Library](https://github.com/triton-inference-server/client/tree/main/src/java)
- GRPC Generated Libraries
  - [go](https://github.com/triton-inference-server/client/tree/main/src/grpc_generated/go)
  - [Java/Scala](https://github.com/triton-inference-server/client/tree/main/src/grpc_generated/java)
  - [Javascript](https://github.com/triton-inference-server/client/tree/main/src/grpc_generated/javascript)
- [Shared Memory Extension](protocol/extension_shared_memory.md)
### Cancelling Inference Requests
Triton can detect and handle requests that have been cancelled from the client-side. This [document](user_guide/request_cancellation.md) discusses scope and limitations of the feature.
### Performance Analysis
Understanding Inference performance is key to better resource utilization. Use Triton's Tools to costomize your deployment.
- [Performance Tuning Guide](user_guide/performance_tuning.md)
- [Optimization](user_guide/optimization.md)
- [Model Analyzer](user_guide/model_analyzer.md)
- [Performance Analyzer](https://github.com/triton-inference-server/perf_analyzer/blob/main/README.md)
- [Inference Request Tracing](user_guide/trace.md)
### Jetson and JetPack
Triton can be deployed on edge devices. Explore [resources](user_guide/jetson.md) and [examples](examples/jetson/README.md).

## **Resources**

The following resources are recommended to explore the full suite of Triton Inference Server's functionalities.
- **Clients**: Triton Inference Server comes with C++, Python and Java APIs with which users can send HTTP/REST or gRPC(possible extensions for other languages) requests. Explore the [client repository](https://github.com/triton-inference-server/server/tree/main/docs/protocol) for examples and documentation.

- **Configuring Deployment**: Triton comes with three tools which can be used to configure deployment setting, measure performance and recommend optimizations.
  - [Model Analyzer](https://github.com/triton-inference-server/model_analyzer) Model Analyzer is CLI tool built to recommend deployment configurations for Triton Inference Server based on user's Quality of Service Requirements. It also generates detailed reports about model performance to summarize the benefits and trade offs of different configurations.
  - [Perf Analyzer](https://github.com/triton-inference-server/perf_analyzer/blob/main/README.md):
  Perf Analyzer is a CLI application built to generate inference requests and
  measures the latency of those requests and throughput of the model being
  served.
  - [Model Navigator](https://github.com/triton-inference-server/model_navigator):
  The Triton Model Navigator is a tool that provides the ability to automate the process of moving model from source to optimal format and configuration for deployment on Triton Inference Server. The tool supports export model from source to all possible formats and applies the Triton Inference Server backend optimizations.

- **Backends**: Triton supports a wide variety of frameworks used to run models. Users can extend this functionality by creating custom backends.
  - [PyTorch](https://github.com/triton-inference-server/pytorch_backend): Widely used Open Source DL Framework
  - [TensorRT](https://github.com/triton-inference-server/tensorrt_backend): NVIDIA [TensorRT](https://developer.nvidia.com/tensorrt) is an inference acceleration SDK that provide a with range of graph optimizations, kernel optimization, use of lower precision, and more.
  - [ONNX](https://github.com/triton-inference-server/onnxruntime_backend): ONNX Runtime is a cross-platform inference and training machine-learning accelerator.
  - [OpenVINO](https://github.com/triton-inference-server/openvino_backend): OpenVINO™ is an open-source toolkit for optimizing and deploying AI inference.
  - [Paddle Paddle](https://github.com/triton-inference-server/paddlepaddle_backend): Widely used Open Source DL Framework
  - [Python](https://github.com/triton-inference-server/python_backend): Users can add custom business logic, or any python code/model for serving requests.
  - [Forest Inference Library](https://github.com/triton-inference-server/fil_backend): Backend built for forest models trained by several popular machine learning frameworks (including XGBoost, LightGBM, Scikit-Learn, and cuML)
  - [DALI](https://github.com/triton-inference-server/dali_backend): NVIDIA [DALI](https://developer.nvidia.com/dali) is a Data Loading Library purpose built to accelerated pre-processing and data loading steps in a Deep Learning Pipeline.
  - [HugeCTR](https://github.com/triton-inference-server/hugectr_backend): HugeCTR is a GPU-accelerated recommender framework designed to distribute training across multiple GPUs and nodes and estimate Click-Through Rates
  - [Managed Stateful Models](https://github.com/triton-inference-server/stateful_backend): This backend automatically manages the input and output states of a model. The states are associated with a sequence id and need to be tracked for inference requests associated with the sequence id.
  - [Faster Transformer](https://github.com/triton-inference-server/fastertransformer_backend): NVIDIA [FasterTransformer](https://github.com/NVIDIA/FasterTransformer/) (FT) is a library implementing an accelerated engine for the inference of transformer-based neural networks, with a special emphasis on large models, spanning many GPUs and nodes in a distributed manner.
  - [Building Custom Backends](https://github.com/triton-inference-server/backend/tree/main/examples#tutorial)
  - [Sample Custom Backend: Repeat_backend](https://github.com/triton-inference-server/repeat_backend): Backend built to demonstrate sending of zero, one, or multiple responses per request.

## **Customization Guide**
This guide describes how to build and test Triton and also how Triton can be extended with new functionality.

- [Build](customization_guide/build.md)
- [Protocols and APIs](customization_guide/inference_protocols.md).
- [Backends](https://github.com/triton-inference-server/backend)
- [Repository Agents](customization_guide/repository_agents.md)
- [Test](customization_guide/test.md)


================================================
FILE: docs/_reference/tritonclient_api.rst
================================================
..
  # Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  #
  # Redistribution and use in source and binary forms, with or without
  # modification, are permitted provided that the following conditions
  # are met:
  #  * Redistributions of source code must retain the above copyright
  #    notice, this list of conditions and the following disclaimer.
  #  * Redistributions in binary form must reproduce the above copyright
  #    notice, this list of conditions and the following disclaimer in the
  #    documentation and/or other materials provided with the distribution.
  #  * Neither the name of NVIDIA CORPORATION nor the names of its
  #    contributors may be used to endorse or promote products derived
  #    from this software without specific prior written permission.
  #
  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
  # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

Python tritonclient Package API
===============================

tritonclient python package is hosted at the `pyPI.org <https://pypi.org/project/tritonclient/>`_. This package documentation for tritonclient is genenerated by sphinx autosummary extension.

.. autosummary::
   :toctree: tritonclient
   :recursive:

   tritonclient


================================================
FILE: docs/_static/.gitattributes
================================================
nvidia-logo-horiz-rgb-blk-for-screen.png filter=lfs diff=lfs merge=lfs -text
nvidia-logo-vert-rgb-blk-for-screen.png filter=lfs diff=lfs merge=lfs -text


================================================
FILE: docs/_static/custom.css
================================================
/*
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
@font-face {
  font-family: "NVIDIA Sans";
  src: url(https://aws1.discourse-cdn.com/nvidia/original/3X/5/2/52891dda673228d54e5d57bf1e4a3880d4b22405.woff2) format("woff2"),
      url(https://aws1.discourse-cdn.com/nvidia/original/3X/e/0/e090b7dda7a582522c7f9045c6ce949cce60134f.woff) format("woff");
  font-weight: 300;
  font-style: normal;
}
@font-face {
  font-family: "NVIDIA Sans";
  src: url(https://aws1.discourse-cdn.com/nvidia/original/3X/a/1/a107baabcbf6b241099122336bce7429bcfd377a.woff2) format("woff2"),
      url(https://aws1.discourse-cdn.com/nvidia/original/3X/3/a/3a6060a4e3bce70e5552ba0de8af4b22c6cf9144.woff) format("woff");
  font-weight: 300;
  font-style: italic;
}
@font-face {
  font-family: "NVIDIA Sans";
  src: url(https://aws1.discourse-cdn.com/nvidia/original/3X/9/9/9920d2b172b01d92fc9c1c0e521dcf45b59c47c3.woff2) format("woff2"),
      url(https://aws1.discourse-cdn.com/nvidia/original/3X/6/c/6c7d947928a7e4ef3e80ed409bef6c243f2148cb.woff) format("woff");
  font-weight: 400;
  font-style: normal;
}
@font-face {
  font-family: "NVIDIA Sans";
  src: url(https://aws1.discourse-cdn.com/nvidia/original/3X/e/8/e8e63fe1244372cd942d957f44a5616a1eba0644.woff2) format("woff2"),
      url(https://aws1.discourse-cdn.com/nvidia/original/3X/0/f/0f1fb2af0283ab09d36e7097bb07d895c3228f12.woff) format("woff");
  font-weight: 400;
  font-style: italic;
}
@font-face {
  font-family: "NVIDIA Sans";
  src: url(https://aws1.discourse-cdn.com/nvidia/original/3X/7/9/79d3c513a9cd72c59f65354f39f89ca52dc17dd2.woff2) format("woff2"),
      url(https://aws1.discourse-cdn.com/nvidia/original/3X/2/5/2581ac533f5d01f4985d8a7245b0766b4630ced8.woff) format("woff");
  font-weight: 500;
  font-style: normal;
}
@font-face {
  font-family: "NVIDIA Sans";
  src: url(https://aws1.discourse-cdn.com/nvidia/original/3X/3/9/39d9ef1ee9770dd503f19bb2ace2fdb4eff3bb50.woff2) format("woff2"),
      url(https://aws1.discourse-cdn.com/nvidia/original/3X/7/b/7bb5d5e2e71b2e13c8098b2e67c0a0ed9258e6c7.woff) format("woff");
  font-weight: 500;
  font-style: italic;
}
@font-face {
  font-family: "NVIDIA Sans";
  src: url(https://aws1.discourse-cdn.com/nvidia/original/3X/0/5/05276a55a43eb3f74981ec1e93252727afcd9d16.woff2) format("woff2"),
      url(https://aws1.discourse-cdn.com/nvidia/original/3X/9/c/9cfec7ed941b06564aa4d5ca14610e81542d070f.woff) format("woff");
  font-weight: 700;
  font-style: normal;
}
@font-face {
  font-family: "NVIDIA Sans";
  src: url(https://aws1.discourse-cdn.com/nvidia/original/3X/a/e/aebd14d09ba56f541e1b8735fb051e33710f9ae7.woff2) format("woff2"),
      url(https://aws1.discourse-cdn.com/nvidia/original/3X/e/d/edbdabef43acc5c12e84a94baaa5542c9404cfeb.woff) format("woff");
  font-weight: 700;
  font-style: italic;
}

/* Custom Styles */
:root {
--pst-font-size-base: none;
--pst-color-primary: 0, 133, 197;
--pst-color-admonition-note: var(--pst-color-primary);
--pst-color-admonition-default: var(--pst-color-primary);
--pst-color-info: 255, 193, 7;
--pst-color-admonition-tip: var(--pst-color-info);
--pst-color-admonition-hint: var(--pst-color-info);
--pst-color-admonition-important: var(--pst-color-info);
--pst-color-warning: 245, 162, 82;
--pst-color-danger: 230, 101, 129;
--pst-color-admonition-warning: var(--pst-color-danger);
--pst-color-link: 118, 185, 0;
--pst-color-inline-code: 92, 22, 130;
--font-family-sans-serif: NVIDIA Sans, Helvetica, Arial, Sans-serif;
--pst-font-family-base-system: NVIDIA Sans, Helvetica, Arial, Sans-serif;
font-family: NVIDIA Sans, Helvetica, Arial, Sans-serif;
}

.prev-next-area {
    font-size: small;
}

.docutils caption {
  caption-side: top;
}

#site-navigation h1.site-logo {
  font-size: 0.85em;
}

/* colors
nv green 118,185,0
black 0, 0, 0
light gray 205, 205, 205
medium gray 140, 140, 140
dark gray 94, 94, 94

emerald 0, 133, 100
emerald #008564
amethyst 92, 22, 130
amethyst #5C1682
cpu blue 0, 133, 197
cpu blue #0085C5
garnet 137, 12, 88
garnet 890C58
fluorite 250, 194, 0
fluorite FAC200
*/

:root {
  --nv-green: #76b900;
  --nv-green-darken: #6ead00;
  --emerald: #008564;
  --emerald-darken: #017c5d;
  --amethyst: #5d1682;
  --amethyst-darken: #4c116b;
  --cpu-blue: #0071c5;
  --cpu-blue-darken: #0062ad;
  --garnet: #890c58;
  --garnet-darken: #7a0c4e;
  --fluorite: #fac200;
  --fluorite-darken: #e4b301;
  --dark-gray: #5e5e5e;
  --light-gray: #cdcdcd;
  --medium-gray: #8c8c8c;
  --medium-gray-darken: #8c8c8cde;
  --primary: #76b900;
  --secondary: #008564;
  --success: #5d1682;
  --info: #0071c5;
  --warning: #fac200;
  --danger: #890c58;
}

/* Riva TBYB (ASR and TTS) Styling */
.demo-box {
  background-color: rgb(245,245,245);
}
a:link { text-decoration: none; }
.scrollable {
  height: 125px;
  overflow-y: auto;
  font-size: 1.3rem;
}
.dot {
  height: 8px;
  width: 8px;
  background-color: rgb(228, 77, 77);
  border-radius: 50%;
  display: inline-block;
}
.timer {
  font-size: 80%;
  text-transform: uppercase;
  white-space: nowrap;
}
.form-select {
  border-radius: 0%;
  font-size: 80%;
}
.form-control {
  border-radius: 0%;
}
.input-group-text {
  border-radius: 0%;
  font-size: 80%;
  text-transform: uppercase;
  background-color: rgb(245,245,245);
}
.card {
  border-radius: 0%;
}
.speech-control {
  border-top-width: 0px;
}
.btn {
  border-radius: 0%;
  font-size: 80%;
  text-transform: uppercase;
  white-space: nowrap;
  min-width: 125px;
}
.btn-primary {
  background-color: var(--nv-green);
  border-color: var(--nv-green);
}
.btn-primary:hover {
  background-color: var(--nv-green-darken);
  border-color: var(--nv-green-darken);
}
.btn-primary:focus, .btn-primary.focus {
  background-color: var(--nv-green-darken);
  border-color: var(--nv-green-darken);
  -webkit-box-shadow: 0 0 0 0.2rem rgba(147, 173, 102, 0.5);
          box-shadow: 0 0 0 0.2rem rgba(147, 173, 102, 0.5);
}
.btn-primary.disabled, .btn-primary:disabled {
  background-color: var(--nv-green);
  border-color: var(--nv-green);
}
.btn-primary:not(:disabled):not(.disabled):active, .btn-primary:not(:disabled):not(.disabled).active,
.show > .btn-primary.dropdown-toggle {
  background-color: var(--nv-green-darken);
  border-color: var(--nv-green-darken);
}
.btn-primary:not(:disabled):not(.disabled):active:focus, .btn-primary:not(:disabled):not(.disabled).active:focus,
.show > .btn-primary.dropdown-toggle:focus {
  -webkit-box-shadow: 0 0 0 0.2rem rgba(147, 173, 102, 0.5);
          box-shadow: 0 0 0 0.2rem rgba(147, 173, 102, 0.5);
}
.btn-secondary {
  background-color: var(--medium-gray);
  border-color: var(--medium-gray);
}
.btn-secondary:hover {
  background-color: var(--medium-gray-darken);
  border-color: var(--medium-gray-darken);
}
.btn-secondary:focus, .btn-secondary.focus {
  background-color: var(--medium-gray-darken);
  border-color: var(--medium-gray-darken);
  -webkit-box-shadow: 0 0 0 0.2rem rgba(140, 140, 140, 0.5);
          box-shadow: 0 0 0 0.2rem rgba(140, 140, 140, 0.5);
}
.btn-secondary.disabled, .btn-secondary:disabled {
  background-color: var(--medium-gray);
  border-color: var(--medium-gray);
}
.btn-secondary:not(:disabled):not(.disabled):active, .btn-secondary:not(:disabled):not(.disabled).active,
.show > .btn-secondary.dropdown-toggle {
  background-color: var(--medium-gray-darken);
  border-color: var(--medium-gray-darken);
}
.btn-secondary:not(:disabled):not(.disabled):active:focus, .btn-secondary:not(:disabled):not(.disabled).active:focus,
.show > .btn-secondary.dropdown-toggle:focus {
  -webkit-box-shadow: 0 0 0 0.2rem rgba(140, 140, 140, 0.5);
          box-shadow: 0 0 0 0.2rem rgba(140, 140, 140, 0.5);
}
.btn-link {
  color: var(--nv-green);
  text-decoration-line: none;
}
.btn-link:hover {
  color: var(--nv-green-darken);
}
.btn-link:focus, .btn-link.focus {
  color: var(--nv-green-darken);
  -webkit-box-shadow: 0 0 0 0.2rem rgba(147, 173, 102, 0.5);
          box-shadow: 0 0 0 0.2rem rgba(147, 173, 102, 0.5);
}
.link-primary {
  color: var(--nv-green);
}
.link-primary:hover {
  color: var(--nv-green-darken);
}

/* Riva ASR Styles */
#riva-upload-label {
  margin-top: 0.5rem;
}

/* Riva TTS Styles */
.tts-control {
  justify-content: space-between;
  align-items: center;
}

.tts-control > p {
  margin: unset;
}

#riva-tts-field {
  resize: none;
  border: unset;
  padding: 0;
  height: 100%;
  font-size: 1.0rem;
}

#riva-terms-of-use p {
  max-width: 620px;
}

/* Media Queries */
@media (max-width: 1024px) {

  /* Riva TTS and ASR */
  .scrollable {
      height: 250px;
  }
}


================================================
FILE: docs/_static/rtd-data.js
================================================
/*
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

// Dummy data for testing ReadTheDocs footer insertion
// This mimics RTD data for a project that uses both versions + languages
var READTHEDOCS_DATA = {
  project: "frc-docs",
  version: "latest",
  language: "en",
  proxied_api_host: "https://readthedocs.org",
};


================================================
FILE: docs/_templates/layout.html
================================================
<!--
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->
{% extends "!layout.html" %}
{%- block footer %}
<script type="text/javascript">_satellite.pageBottom();</script>
{%- endblock %}


================================================
FILE: docs/backend_guide/vllm.rst
================================================
..
.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
..
.. Redistribution and use in source and binary forms, with or without
.. modification, are permitted provided that the following conditions
.. are met:
..  * Redistributions of source code must retain the above copyright
..    notice, this list of conditions and the following disclaimer.
..  * Redistributions in binary form must reproduce the above copyright
..    notice, this list of conditions and the following disclaimer in the
..    documentation and/or other materials provided with the distribution.
..  * Neither the name of NVIDIA CORPORATION nor the names of its
..    contributors may be used to endorse or promote products derived
..    from this software without specific prior written permission.
..
.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

########
vLLM
########

.. toctree::
    :hidden:
    :caption: vLLM
    :maxdepth: 2

    ../vllm_backend/README
    Multi-LoRA <../vllm_backend/docs/llama_multi_lora_tutorial>

================================================
FILE: docs/client_guide/api_reference.rst
================================================
..
.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
..
.. Redistribution and use in source and binary forms, with or without
.. modification, are permitted provided that the following conditions
.. are met:
..  * Redistributions of source code must retain the above copyright
..    notice, this list of conditions and the following disclaimer.
..  * Redistributions in binary form must reproduce the above copyright
..    notice, this list of conditions and the following disclaimer in the
..    documentation and/or other materials provided with the distribution.
..  * Neither the name of NVIDIA CORPORATION nor the names of its
..    contributors may be used to endorse or promote products derived
..    from this software without specific prior written permission.
..
.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

####
API Reference
####

.. toctree::
   :maxdepth: 1
   :hidden:

   OpenAI API <openai_readme.md>
   kserve

================================================
FILE: docs/client_guide/in_process.rst
================================================
..
.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
..
.. Redistribution and use in source and binary forms, with or without
.. modification, are permitted provided that the following conditions
.. are met:
..  * Redistributions of source code must retain the above copyright
..    notice, this list of conditions and the following disclaimer.
..  * Redistributions in binary form must reproduce the above copyright
..    notice, this list of conditions and the following disclaimer in the
..    documentation and/or other materials provided with the distribution.
..  * Neither the name of NVIDIA CORPORATION nor the names of its
..    contributors may be used to endorse or promote products derived
..    from this software without specific prior written permission.
..
.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

####
In-Process Triton Server API
####


The Triton Inference Server provides a backwards-compatible C API/ python-bindings/java-bindings that
allows Triton to be linked directly into a C/C++/java/python application. This API
is called the "Triton Server API" or just "Server API" for short. The
API is implemented in the Triton shared library which is built from
source contained in the `core
repository <https://github.com/triton-inference-server/core>`__. On Linux
this library is libtritonserver.so and on Windows it is
tritonserver.dll. In the Triton Docker image the shared library is
found in /opt/tritonserver/lib. The header file that defines and
documents the Server API is
`tritonserver.h <https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonserver.h>`__.
`Java bindings for In-Process Triton Server API <../customization_guide/inprocess_java_api.html#java-bindings-for-in-process-triton-server-api>`__
are built on top of `tritonserver.h` and can be used for Java applications that
need to use Tritonserver in-process.

All capabilities of Triton server are encapsulated in the shared
library and are exposed via the Server API. The `tritonserver`
executable implements HTTP/REST and GRPC endpoints and uses the Server
API to communicate with core Triton logic. The primary source files
for the endpoints are `grpc_server.cc <https://github.com/triton-inference-server/server/blob/main/src/grpc/grpc_server.cc>`__ and
`http_server.cc <https://github.com/triton-inference-server/server/blob/main/src/http_server.cc>`__. In these source files you can
see the Server API being used.

You can use the Server API in your own application as well. A simple
example using the Server API can be found in
`simple.cc <https://github.com/triton-inference-server/server/blob/main/src/simple.cc>`__.

.. toctree::
   :maxdepth: 1
   :hidden:

   C/C++ <../customization_guide/inprocess_c_api.md>
   python
   Java <../customization_guide/inprocess_java_api.md>

================================================
FILE: docs/client_guide/kserve.rst
================================================
..
.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
..
.. Redistribution and use in source and binary forms, with or without
.. modification, are permitted provided that the following conditions
.. are met:
..  * Redistributions of source code must retain the above copyright
..    notice, this list of conditions and the following disclaimer.
..  * Redistributions in binary form must reproduce the above copyright
..    notice, this list of conditions and the following disclaimer in the
..    documentation and/or other materials provided with the distribution.
..  * Neither the name of NVIDIA CORPORATION nor the names of its
..    contributors may be used to endorse or promote products derived
..    from this software without specific prior written permission.
..
.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

####
KServe API
####


Triton uses the
`KServe community standard inference protocols <https://github.com/kserve/kserve/tree/master/docs/predict-api/v2>`__
to define HTTP/REST and GRPC APIs plus several extensions.

.. toctree::
   :maxdepth: 1
   :hidden:

   HTTP/REST and GRPC Protocol <../customization_guide/inference_protocols.md>
   kserve_extension

================================================
FILE: docs/client_guide/kserve_extension.rst
================================================
..
.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
..
.. Redistribution and use in source and binary forms, with or without
.. modification, are permitted provided that the following conditions
.. are met:
..  * Redistributions of source code must retain the above copyright
..    notice, this list of conditions and the following disclaimer.
..  * Redistributions in binary form must reproduce the above copyright
..    notice, this list of conditions and the following disclaimer in the
..    documentation and/or other materials provided with the distribution.
..  * Neither the name of NVIDIA CORPORATION nor the names of its
..    contributors may be used to endorse or promote products derived
..    from this software without specific prior written permission.
..
.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

####
Extensions
####

To fully enable all capabilities
Triton also implements `HTTP/REST and GRPC
extensions <https://github.com/triton-inference-server/server/tree/main/docs/protocol>`__
to the KServe inference protocol.

.. toctree::
   :maxdepth: 1
   :hidden:

   Binary tensor data extension <../protocol/extension_binary_data.md>
   Classification extension <../protocol/extension_classification.md>
   Schedule policy extension <../protocol/extension_schedule_policy.md>
   Sequence extension <../protocol/extension_sequence.md>
   Shared-memory extension <../protocol/extension_shared_memory.md>
   Model configuration extension <../protocol/extension_model_configuration.md>
   Model repository extension <../protocol/extension_model_repository.md>
   Statistics extension <../protocol/extension_statistics.md>
   Trace extension <../protocol/extension_trace.md>
   Logging extension <../protocol/extension_logging.md>
   Parameters extension <../protocol/extension_parameters.md>

================================================
FILE: docs/client_guide/python.rst
================================================
..
.. Copyright 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
..
.. Redistribution and use in source and binary forms, with or without
.. modification, are permitted provided that the following conditions
.. are met:
..  * Redistributions of source code must retain the above copyright
..    notice, this list of conditions and the following disclaimer.
..  * Redistributions in binary form must reproduce the above copyright
..    notice, this list of conditions and the following disclaimer in the
..    documentation and/or other materials provided with the distribution.
..  * Neither the name of NVIDIA CORPORATION nor the names of its
..    contributors may be used to endorse or promote products derived
..    from this software without specific prior written permission.
..
.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

####
Python
####

.. toctree::
   :maxdepth: 1
   :hidden:

   Overview <../tutorials/Triton_Inference_Server_Python_API/README.md>
   Kafka I/O <../tutorials/Triton_Inference_Server_Python_API/examples/kafka-io/README.md>
   Rayserve <../tutorials/Triton_Inference_Server_Python_API/examples/rayserve/README.md>

================================================
FILE: docs/conf.py
================================================
#!/usr/bin/env python3

# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html

# -- Path setup --------------------------------------------------------------

import json
import logging
import os
import re
import subprocess
from datetime import date
from logging.handlers import RotatingFileHandler

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import httplib2
import nvidia_sphinx_theme
from docutils import nodes
from packaging.version import Version
from sphinx import search

# import sys
# sys.path.insert(0, os.path.abspath('.'))

# -- conf.py setup -----------------------------------------------------------

# conf.py needs to be run in the top level 'docs'
# directory but the calling build script needs to
# be called from the current working directory. We
# change to the 'docs' dir here and then revert back
# at the end of the file.
# current_dir = os.getcwd()
# os.chdir("docs")
# -- Setup logger ------------------------------------------------------------


def setup_logger(name, log_file, level=logging.INFO, max_bytes=1048576, backup_count=5):
    logger = logging.getLogger(name)
    logger.setLevel(level)

    # Prevent adding multiple handlers if the function is called multiple times
    if not logger.handlers:
        # Create handlers
        file_handler = RotatingFileHandler(
            log_file, maxBytes=max_bytes, backupCount=backup_count
        )
        console_handler = logging.StreamHandler()

        # Set the logging level for handlers
        file_handler.setLevel(level)
        console_handler.setLevel(level)

        # Create a logging format
        BLUE = "\033[94m"
        RESET = "\033[0m"
        formatter = logging.Formatter(
            f"{BLUE}%(asctime)s - %(name)s - %(levelname)s - {RESET}%(message)s"
        )
        file_handler.setFormatter(formatter)
        console_handler.setFormatter(formatter)

        # Add handlers to the logger
        logger.addHandler(file_handler)
        logger.addHandler(console_handler)
    return logger


logger = setup_logger(
    os.path.basename(__file__),
    os.environ.get("TRITON_SERVER_DOCS_LOG_FILE", "/tmp/docs.log"),
)
logger.info(f"Defined logger for {os.path.basename(__file__)}")

# -- Project information -----------------------------------------------------

project = "NVIDIA Triton Inference Server"
copyright = "2018-{}, NVIDIA Corporation".format(date.today().year)
author = "NVIDIA"

# Get the version of Triton this is building.
version_long = "0.0.0"
logger.info(f"Getting version from ../TRITON_VERSION")
with open("../TRITON_VERSION") as f:
    version_long = f.readline()
    version_long = version_long.strip()
    logger.info(f"Version: {version_long}")


version_short = re.match(r"^[\d]+\.[\d]+\.[\d]+", version_long).group(0)
logger.info(f"Version short: {version_short}")
version_short_split = version_short.split(".")
logger.info(f"Version short split: {version_short_split}")
one_before = f"{version_short_split[0]}.{int(version_short_split[1]) - 1}.{version_short_split[2]}"
logger.info(f"One before: {one_before}")

# maintain left-side bar toctrees in `contents` file
# so it doesn't show up needlessly in the index page
master_doc = "contents"

# -- General configuration ---------------------------------------------------

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
    "ablog",
    "myst_parser",
    "sphinx_copybutton",
    "sphinx_design",
    "sphinx-prompt",
    # "sphinxcontrib.bibtex",
    "sphinx_tabs.tabs",
    "sphinx_sitemap",
    "sphinx.ext.autodoc",
    "sphinx.ext.autosummary",
    "sphinx.ext.mathjax",
    "sphinx.ext.napoleon",
    "sphinx.ext.ifconfig",
    "sphinx.ext.extlinks",
]

suppress_warnings = ["myst.domains", "ref.ref", "myst.header"]

source_suffix = [".rst", ".md"]

autodoc_default_options = {
    "members": True,
    "undoc-members": True,
    "private-members": True,
}

autosummary_generate = True
autosummary_mock_imports = [
    "tritonclient.grpc.model_config_pb2",
    "tritonclient.grpc.service_pb2",
    "tritonclient.grpc.service_pb2_grpc",
]

napoleon_include_special_with_doc = True

numfig = True

# final location of docs for seo/sitemap
html_baseurl = (
    "https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/"
)

myst_enable_extensions = [
    "dollarmath",
    "amsmath",
    "deflist",
    # "html_admonition",
    "html_image",
    "colon_fence",
    # "smartquotes",
    "replacements",
    # "linkify",
    "substitution",
]
myst_heading_anchors = 5

# Add any paths that contain templates here, relative to this directory.
# templates_path = ["_templates"] # disable it for nvidia-sphinx-theme to show footer

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclusions = None
with open("exclusions.txt", "r") as f:
    exclusions = f.read()
    f.close()
exclude_patterns = exclusions.strip().split("\n")
print(f"exclude_patterns: {exclude_patterns}")

# -- Options for HTML output -------------------------------------------------

# The theme to use for HTML and HTML Help pages.  See the documentation for
# a list of builtin themes.
#
html_theme = "nvidia_sphinx_theme"

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ["_static"]
# html_css_files = ["custom.css"] # Not needed with new theme

html_theme_options = {
    "collapse_navigation": False,
    "github_url": "https://github.com/triton-inference-server/server",
    "switcher": {
        # use for local testing
        # "json_url": "http://localhost:8000/_static/switcher.json",
        "json_url": "https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/_static/switcher.json",
        "version_match": one_before if "dev" in version_long else version_short,
    },
    # "navbar_start": ["navbar-logo", "version-switcher"],
    "primary_sidebar_end": [],
}

# Theme options are theme-specific and customize the look and feel of a theme
# further.  For a list of options available for each theme, see the
# documentation.
#
html_theme_options.update(
    {
        "collapse_navigation": False,
    }
)

logger.info(f"html_theme_options: {html_theme_options}")

deploy_ngc_org = "nvidia"
deploy_ngc_team = "triton"
myst_substitutions = {
    "VersionNum": version_short,
    "deploy_ngc_org_team": f"{deploy_ngc_org}/{deploy_ngc_team}"
    if deploy_ngc_team
    else deploy_ngc_org,
}

logger.info(f"myst_substitutions: {myst_substitutions}")


def ultimateReplace(app, docname, source):
    result = source[0]
    for key in app.config.ultimate_replacements:
        result = result.replace(key, app.config.ultimate_replacements[key])
    source[0] = result


# this is a necessary hack to allow us to fill in variables that exist in code blocks
ultimate_replacements = {
    "{VersionNum}": version_short,
    "{SamplesVersionNum}": version_short,
    "{NgcOrgTeam}": f"{deploy_ngc_org}/{deploy_ngc_team}"
    if deploy_ngc_team
    else deploy_ngc_org,
}
logger.info(f"ultimate_replacements: {ultimate_replacements}")

# bibtex_bibfiles = ["references.bib"]
# To test that style looks good with common bibtex config
# bibtex_reference_style = "author_year"
# bibtex_default_style = "plain"

### We currently use Myst: https://myst-nb.readthedocs.io/en/latest/use/execute.html
nb_execution_mode = "off"  # Global execution disable
# execution_excludepatterns = ['tutorials/tts-python-basics.ipynb']  # Individual notebook disable

###############################
# SETUP SWITCHER
###############################
switcher_path = os.path.join(html_static_path[0], "switcher.json")
logger.info(f"switcher_path: {switcher_path}")
versions = []

# Obtain Triton Server Release Tags.
tags = subprocess.run(["git", "tag", "--list", "v*"], capture_output=True, text=True)
tags_list = sorted(tags.stdout.strip().splitlines(), key=Version, reverse=True)
logger.info(f"Found source tags: {tags_list}")

for v in tags_list:
    versions.append(
        (
            v.replace("v", ""),
            f"triton-inference-server-{v.replace('v', '').replace('.', '')}",
        )
    )

logger.info(f"Defined dictionary of versions: {versions}")


# Build switcher data
json_data = []
for v in versions:
    json_data.append(
        {
            "name": v[0],
            "version": v[0],
            "url": f"https://docs.nvidia.com/deeplearning/triton-inference-server/archives/{v[1]}/user-guide/docs",
        }
    )

if "dev" in version_long:
    json_data.insert(
        0,
        {
            "name": f"{one_before} (current_release)",
            "version": f"{one_before}",
            "url": "https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/index.html",
        },
    )
else:
    json_data.insert(
        0,
        {
            "name": f"{version_short} (current release)",
            "version": f"{version_short}",
            "url": "https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/index.html",
        },
    )

# Trim to last N releases.
json_data = json_data[0:12]
logger.info(f"Trimmed to last 12 release...")

json_data.append(
    {
        "name": "older releases",
        "version": "archives",
        "url": "https://docs.nvidia.com/deeplearning/triton-inference-server/archives/",
    }
)

for i, d in enumerate(json_data):
    logger.info(f"Validating link: {d['url']}")
    h = httplib2.Http()
    resp = h.request(d["url"], "HEAD")
    if int(resp[0]["status"]) >= 400:
        print(d["url"], "NOK", resp[0]["status"])
        # exit(1)

logger.info(f"Writing switcher data to file: {switcher_path}")
with open(switcher_path, "w") as f:
    json.dump(json_data, f, ensure_ascii=False, indent=4)


logger.info("Configuration completed...")


def setup(app):
    app.add_config_value("ultimate_replacements", {}, True)
    app.connect("source-read", ultimateReplace)
    app.add_js_file("https://js.hcaptcha.com/1/api.js")

    visitor_script = (
        "//assets.adobedtm.com/5d4962a43b79/c1061d2c5e7b/launch-191c2462b890.min.js"
    )

    if visitor_script:
        app.add_js_file(visitor_script)

    # if not os.environ.get("READTHEDOCS") and not os.environ.get("GITHUB_ACTIONS"):
    #     app.add_css_file(
    #         "https://assets.readthedocs.org/static/css/readthedocs-doc-embed.css"
    #     )
    #     app.add_css_file("https://assets.readthedocs.org/static/css/badge_only.css")

    #     # Create the dummy data file so we can link it
    #     # ref: https://github.com/readthedocs/readthedocs.org/blob/bc3e147770e5740314a8e8c33fec5d111c850498/readthedocs/core/static-src/core/js/doc-embed/footer.js  # noqa: E501
    #     app.add_js_file("rtd-data.js")
    #     app.add_js_file(
    #         "https://assets.readthedocs.org/static/javascript/readthedocs-doc-embed.js",
    #         priority=501,
    #     )


# cleanup
# os.chdir(current_dir)


================================================
FILE: docs/contents.rst
================================================
..
.. Copyright 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
..
.. Redistribution and use in source and binary forms, with or without
.. modification, are permitted provided that the following conditions
.. are met:
..  * Redistributions of source code must retain the above copyright
..    notice, this list of conditions and the following disclaimer.
..  * Redistributions in binary form must reproduce the above copyright
..    notice, this list of conditions and the following disclaimer in the
..    documentation and/or other materials provided with the distribution.
..  * Neither the name of NVIDIA CORPORATION nor the names of its
..    contributors may be used to endorse or promote products derived
..    from this software without specific prior written permission.
..
.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.. toctree::
   :hidden:

   Home <introduction/index.md>
   Release notes <introduction/release_notes.md>
   Compatibility matrix <introduction/compatibility.md>

.. toctree::
   :hidden:
   :caption: Getting Started

   getting_started/quick_deployment
   LLM With TensorRT-LLM <getting_started/trtllm_user_guide.md>
   Multimodal model <../tutorials/Popular_Models_Guide/Llava1.5/llava_trtllm_guide.md>
   Stable diffusion <../tutorials/Popular_Models_Guide/StableDiffusion/README.md>

.. toctree::
   :hidden:
   :caption: Scaling guide

   Multi-Node (AWS) <../tutorials/Deployment/Kubernetes/EKS_Multinode_Triton_TRTLLM/README.md>
   Multi-Instance <../tutorials/Deployment/Kubernetes/TensorRT-LLM_Autoscaling_and_Load_Balancing/README.md>

.. toctree::
   :hidden:
   :caption: LLM Features

   Constrained Decoding <../tutorials/Feature_Guide/Constrained_Decoding/README.md>
   Function Calling <../tutorials/Feature_Guide/Function_Calling/README.md>
   llm_features/speculative_decoding

.. toctree::
   :hidden:
   :caption: Client

   client_guide/api_reference
   client_guide/in_process
   Client Libraries <client/README>
   _reference/tritonclient_api.rst

.. toctree::
   :hidden:
   :caption: Server

   Model_execution <user_guide/model_execution.md>
   Scheduler <user_guide/scheduler.md>
   Batcher <user_guide/batcher.md>
   server_guide/model_pipelines
   server_guide/state_management
   Request Cancellation <user_guide/request_cancellation.md>
   Rate Limiter <user_guide/rate_limiter.md>
   Caching <user_guide/response_cache.md>
   Metrics <user_guide/metrics.md>
   Tracing <user_guide/trace.md>

.. toctree::
   :hidden:
   :caption: Model Management


   Repository <user_guide/model_repository>
   Configuration <user_guide/model_configuration>
   Optimization <user_guide/optimization>
   Controls <user_guide/model_management>
   Decoupled models <user_guide/decoupled_models>
   Custom operators <user_guide/custom_operations>

.. toctree::
   :hidden:
   :caption: Backends

   TensorRT-LLM <tensorrtllm_backend/README>
   vLLM <backend_guide/vllm>
   Python <python_backend/README>
   PyTorch <pytorch_backend/README>
   ONNX Runtime <onnxruntime_backend/README>
   TensorRT <tensorrt_backend/README>
   FIL <fil_backend/README>
   DALI <dali_backend/README>
   Custom <backend/README>

.. toctree::
   :hidden:
   :caption: Performance benchmarking and tuning

   GenAI Perf Analyzer <perf_benchmark/genai_perf>
   Performance Analyzer <perf_benchmark/perf_analyzer>
   Model Analyzer <perf_benchmark/model_analyzer>
   Model Navigator <model_navigator/README>

.. toctree::
   :hidden:
   :caption: Debugging

   Guide <user_guide/debugging_guide>


================================================
FILE: docs/customization_guide/build.md
================================================
<!--
# Copyright 2018-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Building Triton

This section describes how to build the Triton server from source. For
information on building the Triton client libraries and examples see
[Client Libraries and
Examples](https://github.com/triton-inference-server/client). For
information on building the Triton SDK container see [Build SDK
Image](test.md#build-sdk-image). For information on testing your
Triton build see [Testing Triton](test.md).

You can create a customized Triton Docker image that contains a subset
of the released backends without building from source. For example,
you may want a Triton image that contains only the TensorRT and Python
backends. For this type of customization you don't need to build
Triton from source and instead can use [the *compose*
utility](compose.md).

The Triton source is distributed across multiple GitHub repositories
that together can be built and installed to create a complete Triton
installation. Triton server is built using CMake and (optionally)
Docker. To simplify the build process, Triton provides a
[build.py](https://github.com/triton-inference-server/server/blob/main/build.py) script.
The build.py script will generate the CMake and Docker build steps required to
build Triton, and will optionally invoke those steps or leave the invocation to
you, as described below.

The build.py script currently supports building Triton for the
following platforms. See [Building on Unsupported
Platforms](#building-on-unsupported-platforms) if you are attempting
to build Triton on a platform that is not listed here.

* [Ubuntu 22.04, x86-64](#building-for-ubuntu-2204)

* [Windows 10, x86-64](#building-for-windows-10)

If you are developing or debugging Triton, see [Development and
Incremental Builds](#development-and-incremental-builds) for information
on how to perform incremental build.

## Building for Ubuntu 22.04

For Ubuntu-22.04, build.py supports both a Docker build and a
non-Docker build.

* [Build using Docker](#building-with-docker) and the PyTorch
  Docker image from [NVIDIA GPU Cloud (NGC)](https://ngc.nvidia.com).

* [Build without Docker](#building-without-docker).

### Building With Docker

The easiest way to build Triton is to use Docker. The result of the
build will be a Docker image called *tritonserver* that will contain
the tritonserver executable in /opt/tritonserver/bin and the required
shared libraries in /opt/tritonserver/lib. The backends and
repository-agents built for Triton will be in
/opt/tritonserver/backends and /opt/tritonserver/repoagents,
respectively.

The first step for the build is to clone the
[triton-inference-server/server](https://github.com/triton-inference-server/server)
repo branch for the release you are interested in building (or the
*main* branch to build from the development branch). Then run build.py
as described below. The build.py script performs these steps when
building with Docker.

* In the *build* subdirectory of the server repo, generate the
  docker_build script, the cmake_build script and the Dockerfiles
  needed to build Triton. If you use the --dryrun flag, build.py will
  stop here so that you can examine these files.

* Run the docker_build script to perform the Docker-based build. The
  docker_build script performs the following steps.

  * Build the *tritonserver_buildbase* Docker image that collects all
    the build dependencies needed to build Triton. The
    *tritonserver_buildbase* image is based on a minimal/base
    image. When building with GPU support (--enable-gpu), the *min*
    image is the
    [\<xx.yy\>-py3-min](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver)
    image pulled from [NGC](https://ngc.nvidia.com) that contains the
    CUDA, cuDNN, TensorRT and other dependencies that are required to
    build Triton. When building without GPU support, the *min* image
    is the standard ubuntu:22.04 image.

  * Run the cmake_build script within the *tritonserver_buildbase*
    image to actually build Triton. The cmake_build script performs
    the following steps.

    * Invoke CMake in the server repo to build Triton's core shared
      library and *tritonserver* executable.

    * Clone each requested backend and build it using CMake. For
      example, the ONNX Runtime backend is built using
      [triton-inference-server/onnxruntime_backend/CMakeLists.txt](https://github.com/triton-inference-server/onnxruntime_backend/blob/main/CMakeLists.txt). Some
      of the backends may use Docker as part of their build (for
      example [ONNX
      Runtime](https://github.com/triton-inference-server/onnxruntime_backend)
      and
      [OpenVINO](https://github.com/triton-inference-server/openvino_backend)). If
      you don't want to use Docker in those cases you must consult the
      build process for those backends.

    * Clone each repository agent and build it using the CMake file
      from the corresponding repo. For example, the
      [Checksum](https://github.com/triton-inference-server/checksum_repository_agent)
      repository agent is built using
      [triton-inference-server/checksum_repository_agent/CMakeLists.txt](https://github.com/triton-inference-server/checksum_repository_agent/blob/main/CMakeLists.txt).

  * Copy the built artifacts out of the container and into the build
    subdirectory on the host system.

  * Create the final *tritonserver* Docker image that contains the
    libraries, executables and other artifacts from the build.

  * Create a *tritonserver_cibase* Docker image that contains the QA
    artifacts needed for testing, as described in [Testing
    Triton](test.md).

By default, build.py does not enable any of Triton's optional features
but you can enable all features, backends, and repository agents with
the --enable-all flag. The -v flag turns on verbose output.

```bash
$ ./build.py -v --enable-all
```

If you want to enable only certain Triton features, backends and
repository agents, do not specify --enable-all. Instead you must
specify the individual flags as documented by --help.

#### Building With Specific GitHub Branches

As described above, the build is performed in the server repo, but
source from several other repos is fetched during the build
process. Typically you do not need to specify anything about these
other repos, but if you want to control which branch is used in these
other repos you can as shown in the following example.

```bash
$ ./build.py ... --repo-tag=common:<container tag> --repo-tag=core:<container tag> --repo-tag=backend:<container tag> --repo-tag=thirdparty:<container tag> ... --backend=tensorrt:<container tag> ... --repoagent=checksum:<container tag> ...
```

If you are building on a release branch then `<container tag>` will
default to the branch name. For example, if you are building on the
r24.12 branch, `<container tag>` will default to r24.12. If you are
building on any other branch (including the *main* branch) then
`<container tag>` will default to "main". Therefore, you typically do
not need to provide `<container tag>` at all (nor the preceding
colon). You can use a different `<container tag>` for a component to
instead use the corresponding branch/tag in the build. For example, if
you have a branch called "mybranch" in the
[onnxruntime_backend](https://github.com/triton-inference-server/onnxruntime_backend)
repo that you want to use in the build, you would specify
--backend=onnxruntime:mybranch.

#### CPU-Only Build

If you want to build without GPU support you must specify individual
feature flags and not include the `--enable-gpu` and
`--enable-gpu-metrics` flags. Only the following backends are
available for a non-GPU / CPU-only build: `identity`, `repeat`, `ensemble`,
`square`, `pytorch`, `onnxruntime`, `openvino`,
`python` and `fil`.

CPU-only builds of the PyTorch backends require some CUDA stubs
and runtime dependencies that are not present in the CPU-only base container.
These are retrieved from a GPU base container, which can be changed with the
`--image=gpu-base,nvcr.io/nvidia/tritonserver:<xx.yy>-py3-min` flag.

### Building Without Docker

To build Triton without using Docker you must install the build
dependencies that are handled automatically when building with Docker.

The first step for the build is to clone the
[triton-inference-server/server](https://github.com/triton-inference-server/server)
repo branch for the release you are interested in building (or the
*main* branch to build from the development branch).

To determine what dependencies are required by the build, run build.py
with the --dryrun flag, and then looking in the build subdirectory at
Dockerfile.buildbase.

```bash
$ ./build.py -v --enable-all
```

From Dockerfile.buildbase you can see what dependencies you need to
install on your host system. Note that when building with --enable-gpu
(or --enable-all), Dockerfile.buildbase depends on the
[\<xx.yy\>-py3-min](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver)
image pulled from [NGC](https://ngc.nvidia.com). Unfortunately, a
Dockerfile is not currently available for the
[\<xx.yy\>-py3-min](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver)
image. Instead, you must manually install [CUDA and
cuDNN](#cuda-cublas-cudnn) and [TensorRT](#tensorrt) dependencies as
described below.

Once you have installed these dependencies on your build system you
can then use build.py with the --no-container-build flag to build
Triton.

```bash
$ ./build.py -v --no-container-build --build-dir=`pwd`/build --enable-all
```

See [Building with Docker](#building-with-docker) for more details on how the
cmake_build script is used to perform the build.

#### CUDA, cuBLAS, cuDNN

For Triton to support NVIDIA GPUs you must install CUDA, cuBLAS and
cuDNN. These libraries must be installed on the system include and
library paths so that they are available for the build. The version of
the libraries used for a given release can be found in the [Framework
Containers Support
Matrix](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html).

For a given version of Triton you can attempt to build with
non-supported versions of the libraries but you may have build or
execution issues since non-supported versions are not tested.

#### TensorRT

The TensorRT headers and libraries must be installed on system include
and library paths so that they are available for the build. The
version of TensorRT used in a given release can be found in the
[Framework Containers Support
Matrix](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html).

For a given version of Triton you can attempt to build with
non-supported versions of TensorRT but you may have build or execution
issues since non-supported versions are not tested.

## Building for Windows 10

For Windows 10, build.py supports both a Docker build and a non-Docker
build in a similar way as described for [Ubuntu](#building-for-ubuntu-2204). The primary
difference is that the minimal/base image used as the base of
Dockerfile.buildbase image can be built from the provided
[Dockerfile.win10.min](https://github.com/triton-inference-server/server/blob/main/Dockerfile.win10.min)
file as described in [Windows 10 "Min" Image](#windows-10-min-image). When running build.py
use the --image flag to specify the tag that you assigned to this
image. For example, --image=base,win10-py3-min.

### Windows and Docker

Depending on your version of Windows 10 and your version of Docker you
may need to perform these additional steps before any of the following
step.

* Set your Docker to work with "Windows containers". Right click on
  the whale icon in the lower-right status area and select "Switch to
  Windows containers".

### Windows 10 "Min" Image

The "min" container describes the base dependencies needed to perform
the Windows build. The Windows min container is
[Dockerfile.win10.min](https://github.com/triton-inference-server/server/blob/main/Dockerfile.win10.min).

Before building the min container you must download the appropriate
cuDNN and TensorRT versions and place them in the same directory as
Dockerfile.win10.min.

* For cuDNN the CUDNN_VERSION and CUDNN_ZIP arguments defined in
  Dockerfile.win10.min indicate the version of cuDNN that your should
  download from https://developer.nvidia.com/rdp/cudnn-download.

* For TensorRT the TENSORRT_VERSION and TENSORRT_ZIP arguments defined
  in Dockerfile.win10.min indicate the version of TensorRT that your
  should download from
  https://developer.nvidia.com/nvidia-tensorrt-download.

After downloading the zip files for cuDNN and TensorRT, you build the
min container using the following command.

```bash
$ docker build -t win10-py3-min -f Dockerfile.win10.min .
```

### Build Triton Server

Triton is built using the build.py script. The build system must have
Docker, Python3 (plus pip installed *docker* module) and git installed
so that it can execute build.py and perform a docker build. By
default, build.py does not enable any of Triton's optional features
and so you must enable them explicitly. The following build.py
invocation builds all features and backends available on windows.

```bash
python build.py --cmake-dir=<path/to/repo>/build --build-dir=/tmp/citritonbuild --no-container-pull --image=base,win10-py3-min --enable-logging --enable-stats --enable-tracing --enable-gpu --endpoint=grpc --endpoint=http --repo-tag=common:<container tag> --repo-tag=core:<container tag> --repo-tag=backend:<container tag> --repo-tag=thirdparty:<container tag> --backend=ensemble --backend=tensorrt:<container tag> --backend=onnxruntime:<container tag> --backend=openvino:<container tag> --backend=python:<container tag>
```

If you are building on *main* branch then `<container tag>` will
default to "main". If you are building on a release branch then
`<container tag>` will default to the branch name. For example, if you
are building on the r24.12 branch, `<container tag>` will default to
r24.12. Therefore, you typically do not need to provide `<container
tag>` at all (nor the preceding colon). You can use a different
`<container tag>` for a component to instead use the corresponding
branch/tag in the build. For example, if you have a branch called
"mybranch" in the
[onnxruntime_backend](https://github.com/triton-inference-server/onnxruntime_backend)
repo that you want to use in the build, you would specify
--backend=onnxruntime:mybranch.

### Extract Build Artifacts

When build.py completes, a Docker image called *tritonserver* will
contain the built Triton Server executable, libraries and other
artifacts. Windows containers do not support GPU access so you likely
want to extract the necessary files from the tritonserver image and
run them directly on your host system. All the Triton artifacts can be
found in /opt/tritonserver directory of the tritonserver image.  Your
host system will need to install the CUDA, cuDNN, TensorRT and other
dependencies that were used for the build.

## Building on Unsupported Platforms

Building for an unsupported OS and/or hardware platform is
possible. All of the build scripting, Dockerfiles and CMake
invocations are included in the public repos or are generated by
build.py as described in [Building with Docker](#building-with-docker). From
these files you can find the required dependencies and CMake
invocations. However, due to differences in compilers, libraries,
package management, etc. you may have to make changes in the build
scripts, Dockerfiles, CMake files and the source code.

To see the generated build scripts and Dockerfiles referred to below,
use:

```bash
$ ./build.py -v --enable-all --dryrun
```

You should familiarize yourself with the build process for supported
platforms by reading the above documentation and then follow the
process for the supported platform that most closely matches the
platform you are interested in (for example, if you are trying to
build for RHEL/x86-64 then follow the [Building for Ubuntu
22.04](#building-for-ubuntu-2204) process. You will likely need to
make changes in the following areas and then manually run docker_build
and cmake_build or the equivalent commands to perform a build.

* The generated Dockerfiles install dependencies for the build using
  platform-specific packaging tools, for example, apt-get for
  Ubuntu. You will need to change build.py to use the packaging tool
  appropriate for your platform.

* The package and libraries names for your platform may differ from
  those used by the generated Dockerfiles. You will need to find the
  corresponding packages on libraries on your platform.

* Your platform may use a different compiler or compiler version than
  the support platforms. As a result you may encounter build errors
  that need to be fixed by editing the source code or changing the
  compilation flags.

* Triton depends on a large number of open-source packages that it
  builds from source. If one of these packages does not support your
  platform then you may need to disable the Triton feature that
  depends on that package. For example, Triton supports the S3
  filesystem by building the aws-sdk-cpp package. If aws-sdk-cpp
  doesn't build for your platform then you can remove the need for
  that package by not specifying --filesystem=s3 when you run
  build.py. In general, you should start by running build.py with the
  minimal required feature set.

* By default, the
  [PyTorch](https://github.com/triton-inference-server/pytorch_backend)
  backend build extracts pre-built shared libraries from The PyTorch
  NGC container. But the build can also use PyTorch shared libraries
  that you build separately for your platform. See the pytorch_backend
  build process for details.

## Development and Incremental Builds

### Development Builds Without Docker

If you are [building without Docker](#building-without-docker) use the
CMake invocation steps in cmake_build to invoke CMake to set-up a
build environment where you can invoke make/msbuild.exe to incremental
build the Triton core, a backend, or a repository agent.

### Development Builds With Docker

If you are [building with Docker](#building-with-docker), the generated
*tritonserver_buildbase* image contains all the dependencies needed to
perform a full or incremental build. Within *tritonserver_buildbase*,
/workspace/build/cmake_build contains the CMake invocations that are
used to build the Triton core, the backends, and the repository
agents.

To perform an incremental build within the *tritonserver_buildbase*
container, map your source into the container and then run the
appropriate CMake and `make` (or `msbuild.exe`) steps from cmake_build
within the container.

#### Development Build of Triton Core

Assuming you have a clone of the [server
repo](https://github.com/triton-inference-server/server) on your host
system where you are making changes and you want to perform
incremental builds to test those changes. Your source code is in
/home/me/server. Run the *tritonserver_buildbase* container and map
your server source directory into the container at /server.

```
$ docker run -it --rm -v/home/me/server:/server tritonserver_buildbase bash
```

Look at /workspace/build/cmake_build within the container for the
section of commands that build "Triton core library". You can follow
those command exactly, or you can modify them to change the build
directory or the CMake options. You **must** change the CMake command
to use /server instead of /workspace as the location for the
CMakeLists.txt file and source:

```
$ cmake <options> /server
```

Then you can change directory into the build directory and run `make`
(or `msbuild.exe`) as shown in cmake_build. As you make changes to the
source on your host system, you can perform incremental builds by
re-running `make` (or `msbuild.exe`).

#### Development Build of Backend or Repository Agent

Performing a full or incremental build of a backend or repository
agent is similar to building the Triton core. As an example we will
use the TensorRT backend. Assuming you have a clone of the [TensorRT
backend
repo](https://github.com/triton-inference-server/tensorrt_backend) on
your host system where you are making changes and you want to perform
incremental builds to test those changes. Your source code is in
/home/me/tritonserver_backend. Run the *tritonserver_buildbase*
container and map your TensorRT backend source directory into the
container at /tensorrt_backend. Note that some backends will use
Docker as part of their build, and so the host's Docker registry must
be made available within the *tritonserver_buildbase* by mounting
docker.sock (on Windows use
-v\\.\pipe\docker_engine:\\.\pipe\docker_engine).

```
$ docker run -it --rm -v/var/run/docker.sock:/var/run/docker.sock -v/home/me/tensorrt_backend:/tensorrt_backend tritonserver_buildbase bash
```

Look at /workspace/build/cmake_build within the container for the
section of commands that build "TensorRT backend". You can follow
those command exactly, or you can modify them to change the build
directory or the CMake options. You **must** change the CMake command
to use /tensorrt_backend instead of /workspace as the location for the
CMakeLists.txt file and source:

```
$ cmake <options> /tensorrt_backend
```

Then you can change directory into the build directory and run `make`
(or `msbuild.exe`) as shown in cmake_build. As you make changes to the
source on your host system, you can perform incremental builds by
re-running `make` (or `msbuild.exe`).

### Building with Debug Symbols

To build with Debug symbols, use the --build-type=Debug argument while
launching build.py. If building directly with CMake use
-DCMAKE_BUILD_TYPE=Debug. You can then launch the built server with
gdb and see the debug symbols/information in the gdb trace.


================================================
FILE: docs/customization_guide/compose.md
================================================
<!--
# Copyright (c) 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Customize Triton Container

Two Docker images are available from [NVIDIA GPU Cloud
(NGC)](https://ngc.nvidia.com) that make it possible to easily
construct customized versions of Triton. By customizing Triton you can
significantly reduce the size of the Triton image by removing
functionality that you don't require.

Currently the customization is limited as described below but future
releases will increase the amount of customization that is available.
It is also possible to [build Triton](build.md#building-triton)
from source to get more exact customization.

## Use the compose.py script

The `compose.py` script can be found in the
[server repository](https://github.com/triton-inference-server/server).
Simply clone the repository and run `compose.py` to create a custom container.
Note: Created container version will depend on the branch that was cloned.
For example branch
 [r24.12](https://github.com/triton-inference-server/server/tree/r24.12)
should be used to create a image based on the NGC 24.12 Triton release.

`compose.py` provides `--backend`, `--repoagent` options that allow you to
specify which backends and repository agents to include in the custom image.
For example, the following creates a new docker image that
contains only the Pytorch backends and the checksum repository agent.

Example:
```
python3 compose.py --backend pytorch --repoagent checksum
```
will provide a container `tritonserver` locally. You can access the container
with
```
$ docker run -it tritonserver:latest
```

Note: If `compose.py` is run on release versions `r21.08` and earlier,
the resulting container will have DCGM version 2.2.3 installed.
This may result in different GPU statistic reporting behavior.

### Compose a specific version of Triton

`compose.py` requires two containers: a `min` container which is the
base the compose container is built from and a `full` container from which the
script will extract components. The version of the `min` and `full` container
is determined by the branch of Triton `compose.py` is on.
For example, running
```
python3 compose.py --backend pytorch --repoagent checksum
```
on branch [r24.12](https://github.com/triton-inference-server/server/tree/r24.12) pulls:
- `min` container `nvcr.io/nvidia/tritonserver:24.12-py3-min`
- `full` container `nvcr.io/nvidia/tritonserver:24.12-py3`

Alternatively, users can specify the version of Triton container to pull from
any branch by either:
1. Adding flag `--container-version <container version>` to branch
```
python3 compose.py --backend pytorch --repoagent checksum --container-version 24.12
```
2. Specifying `--image min,<min container image name> --image full,<full container image name>`.
   The user is responsible for specifying compatible `min` and `full` containers.
```
python3 compose.py --backend pytorch --repoagent checksum --image min,nvcr.io/nvidia/tritonserver:24.12-py3-min --image full,nvcr.io/nvidia/tritonserver:24.12-py3
```
Method 1 and 2 will result in the same composed container. Furthermore,
`--image` flag overrides the `--container-version` flag when both are specified.

Note:
1. All contents in `/opt/tritonserver` repository of the `min` image will be
 removed to ensure dependencies of the composed image are added properly.
2. vLLM and TensorRT-LLM backends are currently not supported backends for
`compose.py`. If you want to build additional backends on top of these backends,
it would be better to [build it yourself](#build-it-yourself) by using
`nvcr.io/nvidia/tritonserver:24.12-vllm-python-py3` or
`nvcr.io/nvidia/tritonserver:24.12-trtllm-python-py3` as a `min` container.


### CPU-only container composition

CPU-only containers are not yet available for customization. Please see
 [build documentation](build.md) for instructions to build a full CPU-only
 container. When including PyTorch backend in the composed
 container, an additional `gpu-min` container is needed
since this container provided the CUDA stubs and runtime dependencies which are
not provided in the CPU only min container.

## Build it yourself

If you would like to do what `compose.py` is doing under the hood yourself, you
 can run `compose.py` with the `--dry-run` option and then modify the
 `Dockerfile.compose` file to satisfy your needs.


### Triton with Unsupported and Custom Backends

You can [create and build your own Triton
backend](https://github.com/triton-inference-server/backend).  The
result of that build should be a directory containing your backend
shared library and any additional files required by the
backend. Assuming your backend is called "mybackend" and that the
directory is "./mybackend", adding the following to the Dockerfile `compose.py`
created will create a Triton image that contains all the supported Triton
backends plus your custom backend.

```
COPY ./mybackend /opt/tritonserver/backends/mybackend
```

You also need to install any additional dependencies required by your
backend as part of the Dockerfile. Then use Docker to create the
image.

```
$ docker build -t tritonserver_custom -f Dockerfile.compose .
```


================================================
FILE: docs/customization_guide/deploy.md
================================================
<!--
# Copyright (c) 2020-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Secure Deployment Considerations

The Triton Inference Server project is designed for flexibility and
allows developers to create and deploy inferencing solutions in a
variety of ways. Developers can deploy Triton as an http server, a
grpc server, a server supporting both, or embed a Triton server into
their own application. Developers can deploy Triton locally or in the
cloud, within a Kubernetes cluster behind an API gateway or as a
standalone process.  This guide is intended to provide some key points
and best practices that users deploying Triton based solutions should
consider.

| [Deploying Behind a Secure Gateway or Proxy](#deploying-behind-a-secure-proxy-or-gateway) | [Running with Least Privilege](#running-with-least-privilege) |

> [!IMPORTANT]
> Ultimately the security of a solution based on Triton
> is the responsibility of the developer building and deploying that
> solution. When deploying in production settings please have security
> experts review any potential risks and threats.

> [!WARNING]
> Dynamic updates to model repositories are disabled by
> default. Enabling dynamic updates to model repositories either
> through model loading APIs or through directory polling can lead to
> arbitrary code execution. Model repository access control is
> critical in production deployments. If dynamic updates are required,
> ensure only trusted entities have access to model loading APIs and
> model repository directories.

## Deploying Behind a Secure Proxy or Gateway

The Triton Inference Server is designed primarily as a microservice to
be deployed as part of a larger solution within an application
framework or service mesh.

In such deployments it is typical to utilize dedicated gateway or
proxy servers to handle authorization, access control, resource
management, encryption, load balancing, redundancy and many other
security and availability features.

The full design of such systems is outside the scope of this
deployment guide but in such scenarios dedicated ingress controllers
handle access from outside the trusted network while Triton Inference
Server handles only trusted, validated requests.

In such scenarios Triton Inference Server is not exposed directly to
an untrusted network.

### References on Secure Deployments

In the following references, Triton Inference Server would be deployed
as an "Application" or "Service" within the trusted internal network.

* [https://www.nginx.com/blog/architecting-zero-trust-security-for-kubernetes-apps-with-nginx/]
* [https://istio.io/latest/docs/concepts/security/]
* [https://konghq.com/blog/enterprise/envoy-service-mesh]
* [https://www.solo.io/topics/envoy-proxy/]

## Running with Least Privilege

  The security principle of least privilege advocates that a process be
  granted the minimum permissions required to do its job.

  For an inference solution based on Triton Inference Server there are a
  number of ways to reduce security risks by limiting the permissions
  and capabilities of the server to the minimum required for correct
  operation.

### 1. Follow Best Practices for Securing Kubernetes Deployments

 When deploying Triton within a Kubernetes pod ensure that it is
 running with a service account with the fewest possible
 permissions. Ensure that you have configured [role based access
 control](https://kubernetes.io/docs/reference/access-authn-authz/rbac/)
 to limit access to resources and capabilities as required by your
 application.

### 2. Follow Best Practices for Launching Standalone Docker Containers

  When Triton is deployed as a containerized service, standard docker
  security practices apply. This includes limiting the resources that a
  container has access to as well as limiting network access to the
  container. https://docs.docker.com/engine/security/

### 3. Run as a Non-Root User

   Triton's pre-built containers contain a non-root user that can be used
   to launch the tritonserver application with limited permissions. This
   user, `triton-server` is created with `user id 1000`. When launching
   the container using docker the user can be set with the `--user`
   command line option.

##### Example Launch Command

   ```
   docker run --rm --user triton-server -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:YY.MM-py3 tritonserver --model-repository=/models
   ```

### 4. Restrict or Disable Access to Protocols and APIs

The pre-built Triton Inference Serrver application enables a full set
of features including health checks, server metadata, inference apis,
shared memory apis, model and model repository configuration,
statistics, tracing and logging. Care should be taken to only expose
those capabilities that are required for your solution.

#### Disabling Features at Compile Time

When building a custom inference server application features can be
selectively enabled or disabled using the `build.py` script. As an
example a developer can use the flags `--endpoint http` and
`--endpoint grpc` to compile support for `http`, `grpc` or
both. Support for individual backends can be enabled as well. For more
details please see [documentation](build.md) on building a custom
inference server application.

#### Disabling / Restricting Features at Run Time

The `tritonserver` application provides a number of command line
options to enable and disable features when launched. For a full list
of options please see `tritonserver --help`. The following subset are
described here with basic recommendations.

##### `--exit-on-error <boolean>, default True`

Exits the inference server if any error occurs during
initialization. Recommended to set to `True` to catch any
unanticipated errors.

##### `--disable-auto-complete-config, default enabled`

Disables backends from autocompleting model configuration. If not
required for your solution recommended to disable to ensure model
configurations are defined statically.

##### `--strict-readiness <boolean>, default True`

If set to true `/v2/health/ready` will only report ready when all
selected models are loaded. Recommended to set to `True` to provide a
signal to other services and orchestration frameworks when full
initialization is complete and server is healthy.

##### `--model-control-mode <string>, default "none"`

Specifies the mode for model management.

> [!WARNING]
> Allowing dynamic updates to the model repository can lead
> to arbitrary code execution. Model repository access control is
> critical in production deployments. Unless required for operation, it's recommended
> to disable dynamic updates. If required, please ensure only trusted entities
> can add or remove models from a model repository.

Options:

 * `none`- Models are loaded at start up and can not be modified.
 * `poll`- Server process will poll the model repository for changes.
 * `explicit` - Models can be loaded and unloaded via the model control APIs.

Recommended to set to `none` unless dynamic updates are required. If
dynamic updates are required care must be taken to control access to
the model repository files and load and unload APIs.

##### `--allow-http <boolean>, default True`

Enable HTTP request handling. Recommended to set to `False` if not required.

##### `--allow-grpc <boolean>, default True`

Enable gRPC request handling. Recommended to set to `False` if not required.

##### `--grpc-use-ssl <boolean> default False`

Use SSL authentication for gRPC requests. Recommended to set to `True` if service is not protected by a gateway or proxy.

##### `--grpc-use-ssl-mutual <boolean> default False`

Use mutual SSL authentication for gRPC requests. Recommended to set to `True` if service is not protected by a gateway or proxy.

##### `--grpc-restricted-protocol <<string>:<string>=<string>>`

Restrict access to specific gRPC protocol categories to users with
specific key, value pair shared secret. See
[limit-endpoint-access](inference_protocols.md#limit-endpoint-access-beta)
for more information.

> [!Note]
> Restricting access can be used to limit exposure to model
> control APIs to trusted users.

##### `--http-restricted-api <<string>:<string>=<string>>`

Restrict access to specific HTTP API categories to users with
specific key, value pair shared secret. See
[limit-endpoint-access](inference_protocols.md#limit-endpoint-access-beta)
for more information.

> [!Note]
> Restricting access can be used to limit exposure to model
> control APIs to trusted users.
> When Vertex AI endpoint support is enabled, this setting also
> applies to redirected Vertex AI requests.
> If Triton is built without `TRITON_ENABLE_HTTP`, Vertex AI uses
> default unrestricted API settings.

##### `--allow-sagemaker <boolean> default False`

Enable Sagemaker request handling. Recommended to set to `False` unless required.

##### `--allow-vertex-ai <boolean> default depends on environment variable`

Enable Vertex AI request handling. Default is `True` if
`AIP_MODE=PREDICTION`, `False` otherwise. Recommended to set to
`False` unless required.

##### `--allow-metrics <boolean> default True`

Allow server to publish prometheus style metrics. Recommended to set
to `False` if not required to avoid capturing or exposing any sensitive information.

#### `--trace-config level=<string> default "off"`

Tracing mode. Trace mode supports `triton` and `opentelemetry`. Unless required
`--trace-config level=off` should be set to avoid capturing or exposing any
sensitive information.


##### `backend-directory <string> default /opt/tritonserver/backends`

Directory where backend shared libraries are found.

> [!Warning]
> Access to add or remove files from the backend directory
> must be access controlled. Adding untrusted files
> can lead to arbitrarty code execution.

##### `repoagent-directory <string> default /opt/tritonserver/repoagents`
Directory where repository agent shared libraries are found.

> [!Warning]
> Access to add or remove files from the repoagent directory
> must be access controlled. Adding untrusted files
> can lead to arbitrarty code execution.

##### `cache-directory <string> default /opt/tritonserver/caches`

Directory where cache shared libraries are found.

> [!Warning]
> Access to add or remove files from the cache directory
> must be access controlled. Adding untrusted files
> can lead to arbitrarty code execution.

##### `backend-config=<backend>,additional-dependency-dirs=<string>`

This is an optional Windows feature that enables Triton to search custom
dependency directories when loading a specific backend. The user can input
these directories as a string of semicolon-separated paths (including a
trailing semicolon). These directories are programmatically prepended to
the process's PATH and are removed when the backend is loaded successfully.
Windows will search PATH last in its search sequence, so be cautious that
no untrusted files of same name exist in a location of higher search priority
(e.g., System32). It is still recommended to add backend-specific dependencies
to their corresponding backend folder when possible.

# GRPC server options
Triton Inference Server's gRPC inference handlers internally use states to manage inference requests and response queues. Each state consists of one inference request and one response queue. The response queue within a state can hold multiple response objects. These states remain allocated for reuse to optimize performance by minimizing dynamic allocations.

You can configure the following parameters to balance memory usage and server performance:
- The maximum number of states that remain allocated.
- The maximum number of response objects that can stay allocated in the response queue.

##### `--grpc-infer-allocation-pool-size=<integer>`
Specifies the maximum number of states (inference request/response queues) that remain allocated for reuse. If the number of in-flight requests does not exceed this value, no allocation or deallocation of request/response queues will occur. By default, this value is set to `8`.

##### `--grpc-max-response-pool-size=<integer>`
Specifies the maximum number of inference response objects that can remain allocated in each response queue at any given time. This option is particularly useful in decoupled mode, where multiple responses are generated for a single request. By default, this value is set to `INT_MAX`.

> [!Warning]
> Setting this value too low may negatively impact performance.


================================================
FILE: docs/customization_guide/inference_protocols.md
================================================
<!--
# Copyright 2018-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Inference Protocols and APIs

Clients can communicate with Triton using either an [HTTP/REST
protocol](#httprest-and-grpc-protocols), a [GRPC
protocol](#httprest-and-grpc-protocols), or by an [in-process C
API](inprocess_c_api.md) or its
[C++ wrapper](https://github.com/triton-inference-server/developer_tools/tree/main/server).

## HTTP/REST and GRPC Protocols

Triton exposes both HTTP/REST and GRPC endpoints based on [standard
inference
protocols](https://github.com/kserve/kserve/tree/master/docs/predict-api/v2)
that have been proposed by the [KServe
project](https://github.com/kserve). To fully enable all capabilities
Triton also implements [HTTP/REST and GRPC
extensions](https://github.com/triton-inference-server/server/tree/main/docs/protocol)
to the KServe inference protocol. GRPC protocol also provides a
bi-directional streaming version of the inference RPC to allow a
sequence of inference requests/responses to be sent over a
GRPC stream. We typically recommend using the unary version for
inference requests. The streaming version should be used only if the
situation demands it. Some of such use cases can be:

* Assume a system with multiple Triton server instances running
  behind a Load Balancer. If a sequence of inference requests is
  needed to hit the same Triton server instance, a GRPC stream
  will hold a single connection throughout the lifetime and hence
  ensure the requests are delivered to the same Triton instance.
* If the order of requests/responses needs to be preserved over
  the network, a GRPC stream will ensure that the server receives
  the requests in the same order as they were sent from the
  client.

The HTTP/REST and GRPC protocols also provide endpoints to check
server and model health, metadata and statistics. Additional
endpoints allow model loading and unloading, and inferencing. See
the KServe and extension documentation for details.

### HTTP Options
Triton provides the following configuration options for server-client network transactions over HTTP protocol.

#### Compression

Triton allows the on-wire compression of request/response on HTTP through its clients. See [HTTP Compression](../client/README.md#compression) for more details.

#### Mapping Triton Server Error Codes to HTTP Status Codes

This table maps various Triton Server error codes to their corresponding HTTP status
codes. It can be used as a reference guide for understanding how Triton Server errors
are handled in HTTP responses.


| Triton Server Error Code                      | HTTP Status Code   | Description          |
| ----------------------------------------------| -------------------| ---------------------|
| `TRITONSERVER_ERROR_INTERNAL`                 | 500                | Internal Server Error|
| `TRITONSERVER_ERROR_NOT_FOUND`                | 404                | Not Found            |
| `TRITONSERVER_ERROR_UNAVAILABLE`              | 503                | Service Unavailable  |
| `TRITONSERVER_ERROR_UNSUPPORTED`              | 501                | Not Implemented      |
| `TRITONSERVER_ERROR_UNKNOWN`,<br>`TRITONSERVER_ERROR_INVALID_ARG`,<br>`TRITONSERVER_ERROR_ALREADY_EXISTS`,<br>`TRITONSERVER_ERROR_CANCELLED` | `400` | Bad Request (default for other errors)      |

### GRPC Options
Triton exposes various GRPC parameters for configuring the server-client network transactions. For usage of these options, refer to the output from `tritonserver --help`.

#### SSL/TLS

These options can be used to configure a secured channel for communication. The server-side options include:

* `--grpc-use-ssl`
* `--grpc-use-ssl-mutual`
* `--grpc-server-cert`
* `--grpc-server-key`
* `--grpc-root-cert`

For client-side documentation, see [Client-Side GRPC SSL/TLS](https://github.com/triton-inference-server/client/tree/main#ssltls)

For more details on overview of authentication in gRPC, refer [here](https://grpc.io/docs/guides/auth/).

#### Compression

Triton allows the on-wire compression of request/response messages by exposing following option on server-side:

* `--grpc-infer-response-compression-level`

For client-side documentation, see [Client-Side GRPC Compression](https://github.com/triton-inference-server/client/tree/main#compression-1)

Compression can be used to reduce the amount of bandwidth used in server-client communication. For more details, see [gRPC Compression](https://grpc.github.io/grpc/core/md_doc_compression.html).

#### GRPC KeepAlive

Triton exposes GRPC KeepAlive parameters with the default values for both
client and server described [here](https://github.com/grpc/grpc/blob/master/doc/keepalive.md).

These options can be used to configure the KeepAlive settings:

* `--grpc-keepalive-time`
* `--grpc-keepalive-timeout`
* `--grpc-keepalive-permit-without-calls`
* `--grpc-http2-max-pings-without-data`
* `--grpc-http2-min-recv-ping-interval-without-data`
* `--grpc-http2-max-ping-strikes`

For client-side documentation, see [Client-Side GRPC KeepAlive](https://github.com/triton-inference-server/client/blob/main/README.md#grpc-keepalive).

#### GRPC Status Codes

Triton implements GRPC error handling for streaming requests when a specific flag is enabled through headers. Upon encountering an error, Triton returns the appropriate GRPC error code and subsequently closes the stream.

* `triton_grpc_error` : The header value needs to be set to true while starting the stream.

GRPC status codes can be used for better visibility and monitoring. For more details, see [gRPC Status Codes](https://grpc.io/docs/guides/status-codes/)

For client-side documentation, see [Client-Side GRPC Status Codes](https://github.com/triton-inference-server/client/tree/main#GRPC-Status-Codes)

#### GRPC Inference Handler Threads

In general, using 2 threads per completion queue seems to give the best performance, see [gRPC Performance Best Practices](https://grpc.io/docs/guides/performance/#c). However, in cases where the performance bottleneck is at the request handling step (e.g. ensemble models), increasing the number of gRPC inference handler threads may lead to a higher throughput.

* `--grpc-infer-thread-count`: 2 by default.

Note: More threads don't always mean better performance.

### Limit Endpoint Access (BETA)

Triton users may want to restrict access to protocols or APIs that are
provided by the GRPC or HTTP endpoints of a server. For example, users
can provide one set of access credentials for inference APIs and
another for model control APIs such as model loading and unloading.

The following options can be specified to declare a restricted
protocol group (GRPC) or restricted API group (HTTP):

```
--grpc-restricted-protocol=<protocol_1>,<protocol_2>,...:<restricted-key>=<restricted-value>
--http-restricted-api=<API_1>,API_2>,...:<restricted-key>=<restricted-value>
```

When Vertex AI endpoint support is enabled, `--http-restricted-api`
also applies to redirected Vertex AI requests.
If Triton is built without `TRITON_ENABLE_HTTP`, Vertex AI falls back
to default unrestricted API settings.

The option can be specified multiple times to specifies multiple groups of
protocols or APIs with different restriction settings.

* `protocols / APIs` : A comma-separated list of protocols / APIs to be included in this
group. Note that currently a given protocol / API is not allowed to be included in
multiple groups. The following protocols / APIs are recognized:

  * `health` : Health endpoint defined for [HTTP/REST](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#health) and [GRPC](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#health-1). For GRPC endpoint, this value also exposes [GRPC health check protocol](https://github.com/triton-inference-server/common/blob/main/protobuf/health.proto).
  * `metadata` : Server / model metadata endpoints defined for [HTTP/REST](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#server-metadata) and [GRPC](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#server-metadata-1).
  * `inference` : Inference endpoints defined for [HTTP/REST](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#inference) and [GRPC](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#inference-1).
  * `shared-memory` : [Shared-memory endpoint](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_shared_memory.md).
  * `model-config` : [Model configuration endpoint](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_model_configuration.md).
  * `model-repository` : [Model repository endpoint](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_model_repository.md).
  * `statistics` : [statistics endpoint](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_statistics.md).
  * `trace` : [trace endpoint](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_trace.md).
  * `logging` : [logging endpoint](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_logging.md).

* `restricted-key` : The GRPC / HTTP request header
to be checked when a request is received. The
completed header for GRPC will be in the form of
`triton-grpc-protocol-<restricted-key>`. The completed header for HTTP
will be in the form of `<restricted-key>`.

* `restricted-value` : The header value required to access the specified protocols.

#### Example

To start the server with a set of protocols and APIs restricted for
`admin` usage and the rest of the protocols and APIs left unrestricted
use the following command line arguments:


```
tritonserver --grpc-restricted-protocol=shared-memory,model-config,model-repository,statistics,trace:<admin-key>=<admin-value> \
             --http-restricted-api=shared-memory,model-config,model-repository,statistics,trace:<admin-key>=<admin-value> ...
```

GRPC requests to `admin` protocols require that an additional header
`triton-grpc-protocol-<admin-key>` is provided with value
`<admin-value>`. HTTP requests to `admin` APIs required that an
additional header `<admin-key>` is provided with value `<admin-value>`.


================================================
FILE: docs/customization_guide/inprocess_c_api.md
================================================
<!--
# Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# C API Description

Triton server functionality is encapsulated in a shared library which
is built from source contained in the [core
repository](https://github.com/triton-inference-server/core). You can
include the full capabilities of Triton by linking the shared library
into your application and by using the C API defined in
[tritonserver.h](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonserver.h).

When you link the Triton shared library into your application you are
*not* spawning a separate Triton process, instead, you are including
the Triton core logic directly in your application. The Triton
HTTP/REST or GRPC protocols are not used to communicate with this
Triton core logic, instead all communication between your application
and the Triton core logic must take place via the [Server
API](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonserver.h).

The top-level abstraction used by Server API is `TRITONSERVER_Server`,
which represents the Triton core logic that is capable of implementing
all of the features and capabilities of Triton. A
`TRITONSERVER_Server` object is created by calling
`TRITONSERVER_ServerNew` with a set of options that indicate how the
object should be initialized.  Use of `TRITONSERVER_ServerNew` is
demonstrated in [simple.cc](https://github.com/triton-inference-server/server/blob/main/src/simple.cc). Once you have created a
`TRITONSERVER_Server` object, you can begin using the rest of the
Server API as described below.

## Error Handling

Most Server API functions return an error object indicating success or
failure. Success is indicated by return `nullptr` (`NULL`). Failure is
indicated by returning a `TRITONSERVER_Error` object. The error code
and message can be retrieved from a `TRITONSERVER_Error` object with
`TRITONSERVER_ErrorCode` and `TRITONSERVER_ErrorMessage`.

The lifecycle and ownership of all Server API objects is documented in
[tritonserver.h](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonserver.h). For
`TRITONSERVER_Error`, ownership of the object passes to the caller of
the Server API function. As a result, your application is responsible
for managing the lifecycle of the returned `TRITONSERVER_Error`
object. You must delete the error object using
`TRITONSERVER_ErrorDelete` when you are done using it. Macros such as
`FAIL_IF_ERR` shown in [common.h](https://github.com/triton-inference-server/server/blob/main/src/common.h) are useful for
managing error object lifetimes.

## Versioning and Backwards Compatibility

A typical pattern, demonstrated in [simple.cc](https://github.com/triton-inference-server/server/blob/main/src/simple.cc) and
shown below, shows how you can compare the Server API version provided
by the shared library against the Server API version that you compiled
your application against. The Server API is backwards compatible, so
as long as the major version provided by the shared library matches
the major version that you compiled against, and the minor version
provided by the shared library is greater-than-or-equal to the minor
version that you compiled against, then your application can use the
Server API.

```
#include "tritonserver.h"
// Error checking removed for clarity...
uint32_t api_version_major, api_version_minor;
TRITONSERVER_ApiVersion(&api_version_major, &api_version_minor);
if ((TRITONSERVER_API_VERSION_MAJOR != api_version_major) ||
    (TRITONSERVER_API_VERSION_MINOR > api_version_minor)) {
  // Error, the shared library implementing the Server API is older than
  // the version of the Server API that you compiled against.
}
```

### Non-Inference APIs

The Server API contains functions for checking health and readiness,
getting model information, getting model statistics and metrics,
loading and unloading models, etc. The use of these functions is
straightforward and some of these functions are demonstrated in
[simple.cc](https://github.com/triton-inference-server/server/blob/main/src/simple.cc) and all are documented in
[tritonserver.h](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonserver.h).

### Inference APIs

Performing an inference request requires the use of many Server API
functions and objects, as demonstrated in
[simple.cc](https://github.com/triton-inference-server/server/blob/main/src/simple.cc). The general usage requires the
following steps.

* Create a `TRITONSERVER_ResponseAllocator` using
  `TRITONSERVER_ResponseAllocatorNew`.  You can use the same response
  allocator for all of your inference requests, or you can create
  multiple response allocators.  When Triton produces an output
  tensor, it needs a memory buffer into which it can store the
  contents of that tensor. Triton defers the allocation of these
  output buffers by invoking callback functions in your
  application. You communicate these callback functions to Triton with
  the `TRITONSERVER_ResponseAllocator` object. You must implement two
  callback functions, one for buffer allocation and one for buffer
  free. The signatures for these functions are
  `TRITONSERVER_ResponseAllocatorAllocFn_t` and
  `TRITONSERVER_ResponseAllocatorReleaseFn_t` as defined in
  [tritonserver.h](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonserver.h). In
  [simple.cc](https://github.com/triton-inference-server/server/blob/main/src/simple.cc), these callback functions are
  implemented as `ResponseAlloc` and `ResponseRelease`.

* Create an inference request as a `TRITONSERVER_InferenceRequest`
  object. The inference request is where you specify what model you
  want to use, the input tensors and their values, the output tensors
  that you want returned, and other request parameters. You create an
  inference request using `TRITONSERVER_InferenceRequestNew`. You
  create each input tensor in the request using
  `TRITONSERVER_InferenceRequestAddInput` and set the data for the
  input tensor using `TRITONSERVER_InferenceRequestAppendInputData`
  (or one of the `TRITONSERVER_InferenceRequestAppendInputData*`
  variants defined in
  [tritonserver.h](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonserver.h)). By
  default, Triton will return all output tensors, but you can limit
  Triton to only return some outputs by using
  `TRITONSERVER_InferenceRequestAddRequestedOutput`.

  To correctly manage the lifecycle of the inference request, you must
  use `TRITONSERVER_InferenceRequestSetReleaseCallback` to set a
  callback into a function in your application. This callback will be
  invoke by Triton to return ownership of the
  `TRITONSERVER_InferenceRequest` object. Typically, in this callback
  you will just delete the `TRITONSERVER_InferenceRequest` object by
  using `TRITONSERVER_InferenceRequestDelete`. But you may also
  implement a different lifecycle management; for example, if you are
  reusing inference request objects you would want to make the object
  available for reuse.

  You can optionally use `TRITONSERVER_InferenceRequestSetId` to set a
  user-defined ID on the request. This ID is not used by Triton but
  will be returned in the response.

  You can reuse an existing `TRITONSERVER_InferenceRequest` object for
  a new inference request. A couple of examples of how this is done
  and why it is useful are shown in [simple.cc](https://github.com/triton-inference-server/server/blob/main/src/simple.cc).

* Ask Triton to execute the inference request using
  `TRITONSERVER_ServerInferAsync`. `TRITONSERVER_ServerInferAsync` is
  a asynchronous call that returns immediately. The inference response
  is returned via a callback into your application. You register this
  callback using `TRITONSERVER_InferenceRequestSetResponseCallback`
  before you invoke `TRITONSERVER_ServerInferAsync`. In
  [simple.cc](https://github.com/triton-inference-server/server/blob/main/src/simple.cc) this callback is
  `InferResponseComplete`.

  When you invoke `TRITONSERVER_ServerInferAsync` and it returns
  without error, you are passing ownership of the
  `TRITONSERVER_InferenceRequest` object to Triton, and so you must
  not access that object in any way until Triton returns ownership to
  you via the callback you registered with
  `TRITONSERVER_InferenceRequestSetReleaseCallback`.

* Process the inference response. The inference response is returned
  to the callback function you registered with
  `TRITONSERVER_InferenceRequestSetResponseCallback`. Your callback
  receives the response as a `TRITONSERVER_InferenceResponse`
  object. Your callback takes ownership of the
  `TRITONSERVER_InferenceResponse` object and so must free it with
  `TRITONSERVER_InferenceResponseDelete` when it is no longer needed.

  The first step in processing a response is to use
  `TRITONSERVER_InferenceResponseError` to check if the response is
  returning an error or if it is returning valid results. If the
  response is valid you can use
  `TRITONSERVER_InferenceResponseOutputCount` to iterate over the
  output tensors, and `TRITONSERVER_InferenceResponseOutput` to get
  information about each output tensor.

  Note that the [simple.cc](https://github.com/triton-inference-server/server/blob/main/src/simple.cc) example uses a
  std::promise to simply wait for the response, but synchronizing
  response handling in this way is not required. You can have multiple
  inference requests in flight at the same time and can issue
  inference requests from the same thread or from multiple different
  threads.
allows Triton to be linked directly to a C/C++ application. The API
is documented in
[tritonserver.h](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonserver.h).

A simple example using the C API can be found in
[simple.cc](https://github.com/triton-inference-server/server/blob/main/src/simple.cc).  A more complicated example can be
found in the source that implements the HTTP/REST and GRPC endpoints
for Triton. These endpoints use the C API to communicate with the core
of Triton. The primary source files for the endpoints are
[grpc_server.cc](https://github.com/triton-inference-server/server/blob/main/src/grpc/grpc_server.cc) and
[http_server.cc](https://github.com/triton-inference-server/server/blob/main/src/http_server.cc).

================================================
FILE: docs/customization_guide/inprocess_java_api.md
================================================
<!--
# Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Java bindings for In-Process Triton Server API

The Triton Inference Server uses [Java CPP](https://github.com/bytedeco/javacpp)
to create bindings around Tritonserver to create Java API.

The API is documented in
[tritonserver.java](https://github.com/bytedeco/javacpp-presets/blob/master/tritonserver/src/gen/java/org/bytedeco/tritonserver/global/tritonserver.java).
Alternatively, the user can refer to the web version [API docs](http://bytedeco.org/javacpp-presets/tritonserver/apidocs/)
generated from `tritonserver.java`.
**Note:** Currently, `tritonserver.java` contains bindings for both the `In-process C-API`
and the bindings for `C-API Wrapper`. More information about the [developer_tools/server C-API wrapper](https://github.com/triton-inference-server/developer_tools/blob/main/server/README.md) can be found in the [developer_tools repository](https://github.com/triton-inference-server/developer_tools/).

A simple example using the Java API can be found in
[Samples folder](https://github.com/bytedeco/javacpp-presets/tree/master/tritonserver/samples)
which includes `Simple.java` which is similar to
[`simple.cc`](https://github.com/triton-inference-server/server/blob/main/src/simple.cc).
Please refer to
[sample usage documentation](https://github.com/bytedeco/javacpp-presets/tree/master/tritonserver#sample-usage)
to learn about how to build and run `Simple.java`.

In the [QA folder](https://github.com/triton-inference-server/server/blob/main/qa), folders starting with L0_java include Java API tests.
These can be useful references for getting started, such as the
[ResNet50 test](https://github.com/triton-inference-server/server/blob/main/qa/L0_java_resnet).

## Java API setup instructions

To use the Tritonserver Java API, you will need to have the Tritonserver library
and dependencies installed in your environment. There are two ways to do this:

1. Use a Tritonserver docker container with
   1. `.jar` Java bindings to C API (recommended)
   2. maven and build bindings yourself
2. Build Triton from your environment without Docker (not recommended)

### Run Tritonserver container and install dependencies

To set up your environment with Triton Java API, please follow the following steps:
1. First run Docker container:
```
 $ docker run -it --gpus=all -v ${pwd}:/workspace nvcr.io/nvidia/tritonserver:<your container version>-py3 bash
```
2. Install `jdk`:
```bash
 $ apt update && apt install -y openjdk-11-jdk
```
3. Install `maven` (only if you want to build the bindings yourself):
```bash
$ cd /opt/tritonserver
 $ wget https://archive.apache.org/dist/maven/maven-3/3.8.4/binaries/apache-maven-3.8.4-bin.tar.gz
 $ tar zxvf apache-maven-3.8.4-bin.tar.gz
 $ export PATH=/opt/tritonserver/apache-maven-3.8.4/bin:$PATH
```

### Run Java program with Java bindings Jar

After ensuring that Tritonserver and dependencies are installed, you can run your
Java program with the Java bindings with the following steps:

1. Place Java bindings into your environment. You can do this by either:

   a. Building Java API bindings with provided build script:
      ```bash
      # Clone Triton client repo. Recommended client repo tag is: main
      $ git clone --single-branch --depth=1 -b <client repo tag>
                     https://github.com/triton-inference-server/client.git clientrepo
      # Run build script
      ## For In-Process C-API Java Bindings
      $ source clientrepo/src/java-api-bindings/scripts/install_dependencies_and_build.sh
      ## For C-API Wrapper (Triton with C++ bindings) Java Bindings
      $ source clientrepo/src/java-api-bindings/scripts/install_dependencies_and_build.sh --enable-developer-tools-server
      ```
      This will install the Java bindings to `/workspace/install/java-api-bindings/tritonserver-java-bindings.jar`

   *or*

   b. Copying "Uber Jar" from Triton SDK container to your environment
      ```bash
      $ id=$(docker run -dit nvcr.io/nvidia/tritonserver:<triton container version>-py3-sdk bash)
      $ docker cp ${id}:/workspace/install/java-api-bindings/tritonserver-java-bindings.jar <Uber Jar directory>/tritonserver-java-bindings.jar
      $ docker stop ${id}
      ```
      **Note:** `tritonserver-java-bindings.jar` only includes the `In-Process Java Bindings`. To use the `C-API Wrapper Java Bindings`, please use the build script.
2. Use the built "Uber Jar" that contains the Java bindings
   ```bash
   $ java -cp <Uber Jar directory>/tritonserver-java-bindings.jar <your Java program>
   ```

#### Build Java bindings and run Java program with Maven

If you want to make changes to the Java bindings, then you can use Maven to
build yourself. You can refer to part 1.a of [Run Java program with Java
bindings Jar](#run-java-program-with-java-bindings-jar) to also build the jar
yourself without any modifications to the Tritonserver bindings in
JavaCPP-presets.
You can do this using the following steps:

1. Create the JNI binaries in your local repository (`/root/.m2/repository`)
   with [`javacpp-presets/tritonserver`](https://github.com/bytedeco/javacpp-presets/tree/master/tritonserver).
   For C-API Wrapper Java bindings (Triton with C++ bindings), you need to
   install some build specific dependencies including cmake and rapidjson.
   Refer to [java installation script](https://github.com/triton-inference-server/client/blob/main/src/java-api-bindings/scripts/install_dependencies_and_build.sh)
   for dependencies you need to install and modifications you need to make for your container.
After installing dependencies, you can build the tritonserver project on javacpp-presets:
```bash
 $ git clone https://github.com/bytedeco/javacpp-presets.git
 $ cd javacpp-presets
 $ mvn clean install --projects .,tritonserver
 $ mvn clean install -f platform --projects ../tritonserver/platform -Djavacpp.platform=linux-x86_64
```
2. Create your custom `*.pom` file for Maven. Please refer to
   [samples/simple/pom.xml](https://github.com/bytedeco/javacpp-presets/blob/master/tritonserver/samples/simple/pom.xml) as
   reference for how to create your pom file.
3. After creating your `pom.xml` file you can build your application with:
```bash
 $ mvn compile exec:java -Djavacpp.platform=linux-x86_64 -Dexec.args="<your input args>"
```

================================================
FILE: docs/customization_guide/repository_agents.md
================================================
<!--
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Repository Agent

A *repository agent* extends Triton with new functionality that
operates when a model is loaded or unloaded. You can introduce your
own code to perform authentication, decryption, conversion, or similar
operations when a model is loaded.

**BETA: The repository agent API is beta quality and is subject to
non-backward-compatible changes for one or more releases.**

A repository agent comunicates with Triton using the [repository agent
API](https://github.com/triton-inference-server/core/tree/main/include/triton/core/tritonrepoagent.h). The
[checksum_repository_agent GitHub
repo](https://github.com/triton-inference-server/checksum_repository_agent)
provides an example repository agent that verifies file checksums
before loading a model.

## Using a Repository Agent

A model can use one or more repository agents by specifying them in
the *ModelRepositoryAgents* section of the [model
configuration](../user_guide/model_configuration.md). Each repository agent can have
parameters specific to that agent that are specified in the model
configuration to control the behavior of the agent. To understand the
parameters available for a given agent consult the documentation for
that agent.

Multiple agents may be specified for the same model and they will be
invoked in order when a model is loaded or unloaded. The following
example model configuration contents shows how two agents, "agent0"
and "agent1", are specified so that they are invoked in that order
with the given parameters.

```
model_repository_agents
{
  agents [
    {
      name: "agent0",
      parameters [
        {
          key: "key0",
          value: "value0"
        },
        {
          key: "key1",
          value: "value1"
        }
      ]
    },
    {
      name: "agent1",
      parameters [
        {
          key: "keyx",
          value: "valuex"
        }
      ]
    }
  ]
}
```

## Implementing a Repository Agent

A repository agent must be implemented as a shared library and the
name of the shared library must be
*libtritonrepoagent_\<repo-agent-name\>.so*. The shared library should
hide all symbols except those needed by the repository agent API. See
the [checksum example's
CMakeList.txt](https://github.com/triton-inference-server/checksum_repository_agent/blob/main/CMakeLists.txt)
for an example of how to use an ldscript to expose only the necessary
symbols.

The shared library will be dynamically loaded by Triton when it is
needed. For a repository agent called *A*, the shared library must be
installed as \<repository_agent_directory\>/A/libtritonrepoagent_A.so.
Where \<repository_agent_directory\> is by default
/opt/tritonserver/repoagents.  The --repoagent-directory flag can be
used to override the default.

Your repository agent must implement the repository agent API as
documented in
[tritonrepoagent.h](https://github.com/triton-inference-server/core/tree/main/include/triton/core/tritonrepoagent.h).

Triton follows these steps when loading a model:

* Load the model's configuration file (config.pbtxt) and extract the
  *ModelRepositoryAgents* settings. Even if a repository agent
  modifies the config.pbtxt file, the repository agent settings from
  the initial config.pbtxt file are used for the entire loading
  process.

* For each repository agent specified:

  * Initialize the corresponding repository agent, loading the shared
    library if necessary. Model loading fails if the shared library is
    not available or if initialization fails.

  * Invoke the repository agent's *TRITONREPOAGENT_ModelAction*
    function with action TRITONREPOAGENT_ACTION_LOAD. As input the
    agent can access the model's repository as either a cloud storage
    location or a local filesystem location.

  * The repository agent can return *success* to indicate that no
    changes where made to the repository, can return *failure* to
    indicate that the model load should fail, or can create a new
    repository for the model (for example, by decrypting the input
    repository) and return *success* to indicate that the new
    repository should be used.

  * If the agent returns *success* Triton continues to the next
    agent. If the agent returns *failure*, Triton skips invocation of
    any additional agents.

* If all agents returned *success*, Triton attempts to load the model
  using the final model repository.

* For each repository agent that was invoked with
  TRITONREPOAGENT_ACTION_LOAD, in reverse order:

  * Triton invokes the repository agent's
    *TRITONREPOAGENT_ModelAction* function with action
    TRITONREPOAGENT_ACTION_LOAD_COMPLETE if the model loaded
    successfully or TRITONREPOAGENT_ACTION_LOAD_FAIL if the model
    failed to load.

Triton follows these steps when unloading a model:

* Triton uses the repository agent settings from the initial
  config.pbtxt file, even if during loading one or more agents
  modified its contents.

* For each repository agent that was invoked with
  TRITONREPOAGENT_ACTION_LOAD, in the same order:

  * Triton invokes the repository agent's
    *TRITONREPOAGENT_ModelAction* function with action
    TRITONREPOAGENT_ACTION_UNLOAD.

* Triton unloads the model.

* For each repository agent that was invoked with
  TRITONREPOAGENT_ACTION_UNLOAD, in reverse order:

  * Triton invokes the repository agent's
    *TRITONREPOAGENT_ModelAction* function with action
    TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE.


================================================
FILE: docs/customization_guide/sagemaker.md
================================================
<!--
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Use Triton on SageMaker

Below are important pointers on how to deploy Triton Inference Server on AWS SageMaker to serve trained models in production:

- See [docker/sagemaker/serve](https://github.com/triton-inference-server/server/blob/main/docker/sagemaker/serve) for details on how Triton Inference Server is deployed.
- See [qa/L0_sakemaker/test.sh](https://github.com/triton-inference-server/server/blob/main/qa/L0_sagemaker/test.sh) for example usage and testing.
- See [AWS SageMaker Documentation](https://docs.aws.amazon.com/sagemaker/latest/dg/deploy-models-frameworks-triton.html) for more details.


================================================
FILE: docs/customization_guide/test.md
================================================
<!--
# Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Testing Triton

Currently there is no CI testing enabled for Triton repositories. We
will enable CI testing in a future update.

However, there is a set of tests in the qa/ directory that can be run
manually to provide extensive testing. Before running these tests you
must first generate a few model repositories containing the models
needed by the tests.

## Generate QA Model Repositories

The QA model repositories contain some simple models that are used to
verify the correctness of Triton. To generate the QA model
repositories:

```
$ cd qa/common
$ ./gen_qa_model_repository
```

This will create multiple model repositories in /tmp/\<version\>/qa_*
(for example /tmp/24.12/qa_model_repository).  The TensorRT models
will be created for the GPU on the system that CUDA considers device 0
(zero). If you have multiple GPUs on your system see the documentation
in the scripts for how to target a specific GPU.

## Build SDK Image

Build the *tritonserver_sdk* image that contains the client
libraries, model analyzer, perf analyzer and examples using the following
commands. You must first checkout the `<client branch>` branch of the
*client* repo into the clientrepo/ subdirectory and the `<perf analyzer branch>`
branch of the *perf_analyzer* repo into the perfanalyzerrepo/ subdirectory
respectively. Typically you want to set both `<client branch>` and `<perf analyzer branch>`
to be the same as your current server branch.

```
$ cd <server repo root>
$ git clone --single-branch --depth=1 -b <client branch> https://github.com/triton-inference-server/client.git clientrepo
$ git clone --single-branch --depth=1 -b <perf analyzer branch> https://github.com/triton-inference-server/perf_analyzer.git perfanalyzerrepo
$ docker build -t tritonserver_sdk -f Dockerfile.sdk .
```

## Build QA Image

Next you need to build a QA version of the Triton Docker image. This
image will contain Triton, the QA tests, and all the dependencies
needed to run the QA tests. First do a [Docker image
build](build.md#building-with-docker) to produce the
*tritonserver_cibase* and *tritonserver* images.

Then, build the actual QA image.

```
$ docker build -t tritonserver_qa -f Dockerfile.QA .
```

## Run QA Tests

Now run the QA image and mount the QA model repositories into the
container so the tests will be able to access them.

```
$ docker run --gpus=all -it --rm -v/tmp:/data/inferenceserver tritonserver_qa
```

Within the container the QA tests are in /opt/tritonserver/qa. To run
a test, change directory to the test and run the test.sh script.

```
$ cd <test directory>
$ bash -x ./test.sh
```

### Sanity Tests

Many tests require that you use a complete Triton build, with all
backends and other features enabled. There are three sanity tests that
are parameterized so that you can run them even if you have built a
Triton that contains only a subset of all supported Triton
backends. These tests are L0_infer, L0_batcher and
L0_sequence_batcher. For these tests the following envvars are
available to control how the tests behave:

* BACKENDS: Control which backends are tested. Look in the test.sh
  file of the test to see the default and allowed values.

* ENSEMBLES: Enable testing of ensembles. Set to "0" to disable, set
  to "1" to enable. If enabled you must have the *identity* backend
  included in your Triton build.

* EXPECTED_NUM_TESTS: The tests perform a check of the total number of
  test sub-cases. The exact number of sub-cases that run will depend
  on the values you use for BACKENDS and ENSEMBLES. So you will need
  to adjust this as appropriate for your testing.

For example, if you build a Triton that has only the TensorRT backend
you can run L0_infer as follows:

```
$ BACKENDS="plan" ENSEMBLES=0 EXPECTED_NUM_TESTS=<expected> bash -x ./test.sh
```

Where '\<expected\>' is the number of sub-tests expected to be run for
just TensorRT testing and no ensembles. Depending on which backend(s)
you are testing you will need to experiment and determine the correct
value for '\<expected\>'.


================================================
FILE: docs/customization_guide/tritonfrontend.md
================================================
<!--
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->
### Triton Server (tritonfrontend) Bindings (Beta)

The `tritonfrontend` python package is a set of bindings to Triton's existing
frontends implemented in C++. Currently, `tritonfrontend` supports starting up
`KServeHttp` and `KServeGrpc` frontends. These bindings used in-combination
with Triton's Python In-Process API
([`tritonserver`](https://github.com/triton-inference-server/core/tree/main/python/tritonserver))
and [`tritonclient`](https://github.com/triton-inference-server/client/tree/main/src/python/library)
extend the ability to use Triton's full feature set with a few lines of Python.

Let us walk through a simple example:
1. First we need to load the desired models and start the server with `tritonserver`.
```python
import tritonserver

# Constructing path to Model Repository
model_path = f"server/src/python/examples/example_model_repository"

server_options = tritonserver.Options(
    server_id="ExampleServer",
    model_repository=model_path,
    log_error=True,
    log_warn=True,
    log_info=True,
)
server = tritonserver.Server(server_options).start(wait_until_ready=True)
```
Note: `model_path` may need to be edited depending on your setup.


2. Now, to start up the respective services with `tritonfrontend`
```python
from tritonfrontend import KServeHttp, KServeGrpc, Metrics
http_options = KServeHttp.Options(thread_count=5)
http_service = KServeHttp(server, http_options)
http_service.start()

# Default options (if none provided)
grpc_service = KServeGrpc(server)
grpc_service.start()

# Can start metrics service as well
metrics_service = Metrics(server)
metrics_service.start()
```

3. Finally, with running services, we can use `tritonclient` or simple `curl` commands to send requests and receive responses from the frontends.

```python
import tritonclient.http as httpclient
import numpy as np # Use version numpy < 2
model_name = "identity" # output == input
url = "localhost:8000"

# Create a Triton client
client = httpclient.InferenceServerClient(url=url)

# Prepare input data
input_data = np.array([["Roger Roger"]], dtype=object)

# Create input and output objects
inputs = [httpclient.InferInput("INPUT0", input_data.shape, "BYTES")]

# Set the data for the input tensor
inputs[0].set_data_from_numpy(input_data)

results = client.infer(model_name, inputs=inputs)

# Get the output data
output_data = results.as_numpy("OUTPUT0")

# Print results
print("[INFERENCE RESULTS]")
print("Output data:", output_data)

# Stop respective services and server.
metrics_service.stop()
http_service.stop()
grpc_service.stop()
server.stop()
```

---

Additionally, `tritonfrontend` provides context manager support as well. So steps 2-3, could also be achieved through:
```python
from tritonfrontend import KServeHttp
import tritonclient.http as httpclient
import numpy as np  # Use version numpy < 2

with KServeHttp(server) as http_service:
    # The identity model returns an exact duplicate of the input data as output
    model_name = "identity"
    url = "localhost:8000"
    # Create a Triton client
    with httpclient.InferenceServerClient(url=url) as client:
        # Prepare input data
        input_data = np.array(["Roger Roger"], dtype=object)
        # Create input and output objects
        inputs = [httpclient.InferInput("INPUT0", input_data.shape, "BYTES")]
        # Set the data for the input tensor
        inputs[0].set_data_from_numpy(input_data)
        # Perform inference
        results = client.infer(model_name, inputs=inputs)
        # Get the output data
        output_data = results.as_numpy("OUTPUT0")
        # Print results
        print("[INFERENCE RESULTS]")
        print("Output data:", output_data)

server.stop()
```
With this workflow, you can avoid having to stop each service after client requests have terminated.


## Known Issues
- The following features are not currently supported when launching the Triton frontend services through the python bindings:
    - [Tracing](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/trace.md)
    - [Shared Memory](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_shared_memory.md)
    - [Restricted Protocols](https://github.com/triton-inference-server/server/blob/main/docs/customization_guide/inference_protocols.md#limit-endpoint-access-beta)
    - VertexAI
    - Sagemaker
- After a running server has been stopped, if the client sends an inference request, a Segmentation Fault will occur.

================================================
FILE: docs/examples/README.md
================================================
<!--
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Triton Examples

**New to Triton Inference Server?** Make use of [these tutorials](https://github.com/triton-inference-server/tutorials) to begin your Triton journey!

This folder contains the following:
* jetson: This covers deploying Triton Inference Server on Jetson devices.
* model_repository: This folder is a basic model repository for deploying models using the Triton Inference Server.

================================================
FILE: docs/examples/fetch_models.sh
================================================
#!/bin/bash
# Copyright (c) 2018-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

set -ex

# Convert Tensorflow inception V3 module to ONNX
# Pre-requisite: Python3, venv, and Pip3 are installed on the system
mkdir -p model_repository/inception_onnx/1
wget -O /tmp/inception_v3_2016_08_28_frozen.pb.tar.gz \
     https://storage.googleapis.com/download.tensorflow.org/models/inception_v3_2016_08_28_frozen.pb.tar.gz
(cd /tmp && tar xzf inception_v3_2016_08_28_frozen.pb.tar.gz)
apt update -qq && apt install -y python3-venv
rm -rf tf2onnx
python3 -m venv tf2onnx
source ./tf2onnx/bin/activate
pip3 install "numpy<2" tensorflow==2.18.1 tf2onnx==1.16.1 onnx==1.16.1
python3 -m tf2onnx.convert --graphdef /tmp/inception_v3_2016_08_28_frozen.pb --output inception_v3_onnx.model.onnx --inputs input:0 --outputs InceptionV3/Predictions/Softmax:0
deactivate
mv inception_v3_onnx.model.onnx model_repository/inception_onnx/1/model.onnx


# ONNX densenet
mkdir -p model_repository/densenet_onnx/1
wget -O model_repository/densenet_onnx/1/model.onnx \
     https://github.com/onnx/models/raw/main/validated/vision/classification/densenet-121/model/densenet-7.onnx


================================================
FILE: docs/examples/jetson/README.md
================================================
<!--
# Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Using Triton Inference Server as a shared library for execution on Jetson

## Overview
This project demonstrates how to run C API applications using Triton Inference Server as a shared library. We also show how to build and execute such applications on Jetson.

### Prerequisites

* JetPack >= 4.6
* OpenCV >= 4.1.1
* TensorRT >= 8.0.1.6

### Installation

Follow the installation instructions from the GitHub release page ([https://github.com/triton-inference-server/server/releases/](https://github.com/triton-inference-server/server/releases/)).

In our example, we placed the contents of downloaded release directory under `/opt/tritonserver`.

## Part 1. Concurrent inference and dynamic batching

The purpose of the sample located under [concurrency_and_dynamic_batching](concurrency_and_dynamic_batching/README.md)
is to demonstrate the important features of Triton Inference Server such as concurrent model execution and
dynamic batching. In order to do that, we implemented a people detection application using C API and Triton
Inference Server as a shared library.

## Part 2. Analyzing model performance with perf_analyzer

To analyze model performance on Jetson,
[perf_analyzer](https://github.com/triton-inference-server/perf_analyzer/blob/main/README.md)
tool is used. The `perf_analyzer` is included in the release tar file or can be
compiled from source.

From this directory of the repository, execute the following to evaluate model performance:

```shell
./perf_analyzer -m peoplenet -b 2 --service-kind=triton_c_api --model-repo=$(pwd)/concurrency_and_dynamic_batching/trtis_model_repo_sample_1 --triton-server-directory=/opt/tritonserver --concurrency-range 1:6 -f perf_c_api.csv
```

In the example above we saved the results as a `.csv` file. To visualize these
results, follow the steps described
[here](https://github.com/triton-inference-server/perf_analyzer/blob/main/README.md).


================================================
FILE: docs/examples/jetson/concurrency_and_dynamic_batching/Makefile
================================================
# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

TARGET=people_detection
GCC=g++
GCC_PARMS+=-I../../server -I/usr/include/opencv4 -I../../core/include/ -I/usr/local/cuda/targets/aarch64-linux/include
GCC_PARMS+=-I${HOME}/tritonserver/include/tritonserver -D TRITON_ENABLE_GPU=ON -D TRITON_MIN_COMPUTE_CAPABILITY=5.3

GCC_LIBS=-L${HOME}/tritonserver/lib -L/usr/lib -L/usr/local/cuda/targets/aarch64-linux/lib
GCC_LIBS+=-lpthread -ltritonserver -lopencv_core -lopencv_highgui -lopencv_imgproc -lopencv_imgcodecs -lopencv_dnn -lcudart

all: $(TARGET)


%.o: %.cc
	$(GCC) $(GCC_PARMS) -c -g -o $@ $^

$(TARGET): $(TARGET).o
	$(GCC) $^ $(GCC_LIBS) -o $@

clean:
	rm -f $(TARGET).o $(TARGET)

.PHONY: all clean


================================================
FILE: docs/examples/jetson/concurrency_and_dynamic_batching/README.md
================================================
<!--
# Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Concurrent inference and dynamic batching

The purpose of this sample is to demonstrate the important features of Triton Inference Server such as concurrent model execution and dynamic batching.

We will be using a purpose built deployable people detection model, which we download from [Nvidia GPU Cloud (NGC)](https://ngc.nvidia.com/).

## Acquiring the model

Download the pruned [PeopleNet](https://ngc.nvidia.com/catalog/models/nvidia:tlt_peoplenet) model from the NGC. This model is available as a ready-to-use model, and you can download it from NGC using either `wget` method:

```shell
wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/tao/peoplenet/versions/pruned_v2.1/zip -O pruned_v2.1.zip
```

or via CLI command:

```shell
ngc registry model download-version "nvidia/tao/peoplenet:pruned_v2.1"
```

For latter you need to setup the [NGC CLI](https://ngc.nvidia.com/setup).

Having downloaded the model from the NGC, unzip the archive `peoplenet_pruned_v2.1.zip` into `concurrency_and_dynamic_batching/tao/models/peoplenet`.

If you have the zip archive in the `concurrency_and_dynamic_batching` directory, the following will automatically place the model to the correct location:

```shell
unzip pruned_v2.1.zip -d $(pwd)/tao/models/peoplenet
```

Verify that you can see the model file `resnet34_peoplenet_pruned.etlt` under

```
concurrency_and_dynamic_batching
└── tao
       └── models
           └── peoplenet
               ├── labels.txt
               └── resnet34_peoplenet_pruned.etlt
```

## Converting the model to TensorRT

After you have acquired the model file in `.etlt` format, you will need to convert the model to [TensorRT](https://developer.nvidia.com/tensorrt) format. NVIDIA TensorRT is an SDK for high-performance deep learning inference. It includes a deep learning inference optimizer and runtime that delivers low latency and high throughput for deep learning inference applications. The latest versions of JetPack include TensorRT.

In order to convert an `.etlt` model to TensorRT format, you need to use the `tao-converter` tool.

The `tao-converter` tool is available as a compiled release file for different platforms. The download links corresponding to your deployment system are provided among the [TLT Getting Started resources](https://developer.nvidia.com/tlt-get-started).

After you have downloaded `tao-converter`, you might need to execute

```shell
chmod 777 tao-converter
```

in the directory with the tool.

We provide a conversion script `tao/convert_peoplenet.sh` which expects the model to be present at the location.

```shell
tao
└──  models
   └── peoplenet
```

To execute it, you can place the `tao-converter` executable to the `tao` directory of the project and in the same directory run

```shell
bash convert_peoplenet.sh
```

After you execute it, verify that a `model.plan` file was placed to to the directories `/trtis_model_repo_sample_1/peoplenet/1` and `/trtis_model_repo_sample_2/peoplenet/1`. Note that we have two slightly different repositories for the same model to demonstrate different features of Triton.

Also note that this step has to be performed on the target hardware: if you are planning to execute this application on Jetson, the conversion has to be performed on Jetson.

To learn more about `tao-converter`parameters, run:

```shell
./tao-converter -h
```

## Building the app

To compile the sample, pull the following repositories:
* [https://github.com/triton-inference-server/server](https://github.com/triton-inference-server/server)
* [https://github.com/triton-inference-server/core](https://github.com/triton-inference-server/core)

Make sure you copied the contents of the release you downloaded to `$HOME`

```shell
sudo cp -rf tritonserver2.x.y-jetpack4.6 $HOME/tritonserver
```

Open the terminal in `concurrency_and_dynamic_batching` and build the app executing

```shell
make
```

An example Makefile is provided for Jetson.

## Demonstration  case 1: Concurrent model execution

With Triton Inference Server, multiple models (or multiple instances of the same model) can run simultaneously on the same GPU or on multiple GPUs. In this example, we are demonstrating how to run multiple instances of the same model on a single Jetson GPU.

### Running the sample

To execute from the terminal, run from the `concurrency_and_dynamic_batching` directory:

```shell
LD_LIBRARY_PATH=$HOME/tritonserver/lib ./people_detection -m system -v -r $(pwd)/trtis_model_repo_sample_1 -t 6 -s false -p $HOME/tritonserver
```

The parameter `-t` controls the number of concurrent inference calls we want to execute. We will be executing the same model on the same sample image with the purpose of demonstrating how setting different concurrency options affects the performance.

You can enable saving detected bounding boxes in the project directory in form of overlays over the original image for each execution thread. You can turn the visualization on by setting the parameter `-s` to `true` upon execution (`-s` is set to `false` by default).

### Expected output

Upon execution, in the terminal log you will see _Model 'peoplenet' Stats_ in json format reflecting the inference performance. We also output _TOTAL INFERENCE TIME_ which simply reflects the elapsed time required to run the application including data loading, pre-processing and post-processing.

A typical output in the log for _Model 'peoplenet' Stats_ looks as follows:

```json
{
   "model_stats":[
      {
         "name":"peoplenet",
         "version":"1",
         "last_inference":1626448309997,
         "inference_count":6,
         "execution_count":6,
         "inference_stats":{
            "success":{
               "count":6,
               "ns":574589968
            },
            "fail":{
               "count":0,
               "ns":0
            },
            "queue":{
               "count":6,
               "ns":234669630
            },
            "compute_input":{
               "count":6,
               "ns":194884512
            },
            "compute_infer":{
               "count":6,
               "ns":97322636
            },
            "compute_output":{
               "count":6,
               "ns":47700806
            }
         },
         "batch_stats":[
            {
               "batch_size":1,
               "compute_input":{
                  "count":6,
                  "ns":194884512
               },
               "compute_infer":{
                  "count":6,
                  "ns":97322636
               },
               "compute_output":{
                  "count":6,
                  "ns":47700806
               }
            }
         ]
      }
   ]
}

"TOTAL INFERENCE TIME: 174ms"
```

To learn about different statistics check out the [documentation](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_statistics.md#statistics-extension).

To see how setting different values for concurrency affects total execution time and its components reflected in the model stats, you need to modify a single parameter in the model config file.

To enable concurrent model execution support for a model, corresponding model config file `trtis_model_repo_sample_1/peoplenet/config.pbtxt` includes the following:

```
instance_group [
  {
    count: 3
    kind: KIND_GPU
  }
]
```

You can change the count of allowed inferences for the same model instance and observe how it affects performance in _Model 'peoplenet' Stats_ and _TOTAL INFERENCE TIME_. Note that on Jetson we dont recommend setting values too high: for instance, on a device like a Jetson Xavier AGX we don't recommend setting the number larger than 6. The values in the range 1-3 are optimal.

While trying out different values, note how it affects total inference time as well as some inference statistics (like queue and compute times)

## Demonstration case 2: Dynamic batching

For models that support batching, Triton implements multiple scheduling and batching algorithms that combine individual inference requests together to improve inference throughput. In this example, we want to demonstrate how enbling automatic dynamic batching affects inference performance.

### Running the sample

To observe the effect of dynamic batching, from the `concurrency_and_dynamic_batching` directory execute:

```shell
LD_LIBRARY_PATH=$HOME/tritonserver/lib ./people_detection -m system -v -r $(pwd)/trtis_model_repo_sample_2 -t 6 -s false -p $HOME/tritonserver
```

### Expected output

Take a look at _Model 'peoplenet' Stats_ and _TOTAL INFERENCE TIME_ to see the effect of dynamic batching. A possible outcome should look like that:

```json
{
   "model_stats":[
      {
         "name":"peoplenet",
         "version":"1",
         "last_inference":1626447787832,
         "inference_count":6,
         "execution_count":2,
         "inference_stats":{
            "success":{
               "count":6,
               "ns":558981051
            },
            "fail":{
               "count":0,
               "ns":0
            },
            "queue":{
               "count":6,
               "ns":49271380
            },
            "compute_input":{
               "count":6,
               "ns":170634044
            },
            "compute_infer":{
               "count":6,
               "ns":338079193
            },
            "compute_output":{
               "count":6,
               "ns":950544
            }
         },
         "batch_stats":[
            {
               "batch_size":1,
               "compute_input":{
                  "count":1,
                  "ns":15955684
               },
               "compute_infer":{
                  "count":1,
                  "ns":29917093
               },
               "compute_output":{
                  "count":1,
                  "ns":152264
               }
            },
            {
               "batch_size":5,
               "compute_input":{
                  "count":1,
                  "ns":30935672
               },
               "compute_infer":{
                  "count":1,
                  "ns":61632420
               },
               "compute_output":{
                  "count":1,
                  "ns":159656
               }
            }
         ]
      }
   ]
}

"TOTAL INFERENCE TIME: 162ms"
```

Notice that this time the model was executed only twice (as indicated by `execution_count`). Also, unlike in the previous example, the `batch_stats` part of the statitstics looks different: we see that our model was executed one time with `batch = 1` and the second time with `batch = 5`. It helped to decrease the total inference time.

In order to enable dynamic batching, the following is present in the model config `trtis_model_repo_sample_2/peoplenet/config.pbtxt`:

```
dynamic_batching {
}
```

To try further options of dynamic batcher see the [documentation](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/batcher.md#dynamic-batcher).

You can also try enabling both concurrent model execution and dynamic batching.

================================================
FILE: docs/examples/jetson/concurrency_and_dynamic_batching/common.h
================================================
// Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once

#include <iostream>
#include <string>

#include "triton/core/tritonserver.h"

#define RETURN_IF_ERR(X)             \
  do {                               \
    TRITONSERVER_Error* err__ = (X); \
    if (err__ != nullptr) {          \
      return err__;                  \
    }                                \
  } while (false)

#define RETURN_MSG_IF_ERR(X, MSG)                                      \
  do {                                                                 \
    TRITONSERVER_Error* err__ = (X);                                   \
    if (err__ != nullptr) {                                            \
      return TRITONSERVER_ErrorNew(                                    \
          TRITONSERVER_ErrorCode(err__),                               \
          (std::string(MSG) + ": " + TRITONSERVER_ErrorMessage(err__)) \
              .c_str());                                               \
    }                                                                  \
  } while (false)

#define GOTO_IF_ERR(X, T)            \
  do {                               \
    TRITONSERVER_Error* err__ = (X); \
    if (err__ != nullptr) {          \
      goto T;                        \
    }                                \
  } while (false)

#define FAIL(MSG)                                 \
  do {                                            \
    std::cerr << "error: " << (MSG) << std::endl; \
    exit(1);                                      \
  } while (false)

#define FAIL_IF_ERR(X, MSG)                                       \
  do {                                                            \
    TRITONSERVER_Error* err__ = (X);                              \
    if (err__ != nullptr) {                                       \
      std::cerr << "error: " << (MSG) << ": "                     \
                << TRITONSERVER_ErrorCodeString(err__) << " - "   \
                << TRITONSERVER_ErrorMessage(err__) << std::endl; \
      TRITONSERVER_ErrorDelete(err__);                            \
      exit(1);                                                    \
    }                                                             \
  } while (false)

#define IGNORE_ERR(X)                  \
  do {                                 \
    TRITONSERVER_Error* err__ = (X);   \
    if (err__ != nullptr) {            \
      TRITONSERVER_ErrorDelete(err__); \
    }                                  \
  } while (false)

#ifdef TRITON_ENABLE_GPU
#define FAIL_IF_CUDA_ERR(X, MSG)                                           \
  do {                                                                     \
    cudaError_t err__ = (X);                                               \
    if (err__ != cudaSuccess) {                                            \
      std::cerr << "error: " << (MSG) << ": " << cudaGetErrorString(err__) \
                << std::endl;                                              \
      exit(1);                                                             \
    }                                                                      \
  } while (false)
#endif  // TRITON_ENABLE_GPU

/// Get the integral version from a string, or fail if string does not
/// represent a valid version.
///
/// \param version_string The string version.
/// \param version Returns the integral version.
/// \return The error status. Failure if 'version_string' doesn't
/// convert to valid version.
TRITONSERVER_Error* GetModelVersionFromString(
    const std::string& version_string, int64_t* version);


================================================
FILE: docs/examples/jetson/concurrency_and_dynamic_batching/labels.txt
================================================
person
bag
face


================================================
FILE: docs/examples/jetson/concurrency_and_dynamic_batching/people_detection.cc
================================================
// Copyright (c) 2021, NVIDIA CORPORATION& AFFILIATES.All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include <rapidjson/document.h>
#include <rapidjson/error/en.h>
#include <unistd.h>

#include <chrono>
#include <cstring>
#include <future>
#include <iostream>
#include <opencv2/dnn.hpp>
#include <string>
#include <thread>
#include <unordered_map>
#include <vector>

#include "common.h"
#include "opencv2/core.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/opencv.hpp"
#include "triton/core/tritonserver.h"

#ifdef TRITON_ENABLE_GPU
#include <cuda_runtime_api.h>
#endif  // TRITON_ENABLE_GPU

namespace {

bool enforce_memory_type = false;
TRITONSERVER_MemoryType requested_memory_type;

#ifdef TRITON_ENABLE_GPU
static auto cuda_data_deleter = [](void* data) {
  if (data != nullptr) {
    cudaPointerAttributes attr;
    auto cuerr = cudaPointerGetAttributes(&attr, data);
    if (cuerr != cudaSuccess) {
      std::cerr << "error: failed to get CUDA pointer attribute of " << data
                << ": " << cudaGetErrorString(cuerr) << std::endl;
    }
    if (attr.type == cudaMemoryTypeDevice) {
      cuerr = cudaFree(data);
    } else if (attr.type == cudaMemoryTypeHost) {
      cuerr = cudaFreeHost(data);
    }
    if (cuerr != cudaSuccess) {
      std::cerr << "error: failed to release CUDA pointer " << data << ": "
                << cudaGetErrorString(cuerr) << std::endl;
    }
  }
};
#endif  // TRITON_ENABLE_GPU

void
Usage(char** argv, const std::string& msg = std::string())
{
  if (!msg.empty()) {
    std::cerr << msg << std::endl;
  }

  std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
  std::cerr << "\t-m <\"system\"|\"pinned\"|gpu>"
            << " Enforce the memory type for input and output tensors."
            << " If not specified, inputs will be in system memory and outputs"
            << " will be based on the model's preferred type." << std::endl;
  std::cerr << "\t-v Enable verbose logging." << std::endl;
  std::cerr
      << "\t-t Thread count to simulate the number of concurrent requests."
      << std::endl;
  std::cerr << "\t-r [model repository absolute path]." << std::endl;
  std::cerr << "\t-p [tritonserver path]." << std::endl;
  std::cerr << "\t-s <true|false>."
            << " Specify whether output visualizations will be saved to the "
               "project folder."
            << " If not specified, no outputs will be saved." << std::endl;

  exit(1);
}

TRITONSERVER_Error*
ResponseAlloc(
    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
    int64_t preferred_memory_type_id, void* userp, void** buffer,
    void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type,
    int64_t* actual_memory_type_id)
{
  // Initially attempt to make the actual memory type and id that we
  // allocate be the same as preferred memory type
  *actual_memory_type = preferred_memory_type;
  *actual_memory_type_id = preferred_memory_type_id;

  // If 'byte_size' is zero just return 'buffer' == nullptr, we don't
  // need to do any other book-keeping.
  if (byte_size == 0) {
    *buffer = nullptr;
    *buffer_userp = nullptr;
    std::cout << "allocated " << byte_size << " bytes for result tensor "
              << tensor_name << std::endl;
  } else {
    void* allocated_ptr = nullptr;
    if (enforce_memory_type) {
      *actual_memory_type = requested_memory_type;
    }

    switch (*actual_memory_type) {
#ifdef TRITON_ENABLE_GPU
      case TRITONSERVER_MEMORY_CPU_PINNED: {
        auto err = cudaSetDevice(*actual_memory_type_id);
        if ((err != cudaSuccess) && (err != cudaErrorNoDevice) &&
            (err != cudaErrorInsufficientDriver)) {
          return TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_INTERNAL,
              std::string(
                  "unable to recover current CUDA device: " +
                  std::string(cudaGetErrorString(err)))
                  .c_str());
        }

        err = cudaHostAlloc(&allocated_ptr, byte_size, cudaHostAllocPortable);
        if (err != cudaSuccess) {
          return TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_INTERNAL,
              std::string(
                  "cudaHostAlloc failed: " +
                  std::string(cudaGetErrorString(err)))
                  .c_str());
        }
        break;
      }

      case TRITONSERVER_MEMORY_GPU: {
        auto err = cudaSetDevice(*actual_memory_type_id);
        if ((err != cudaSuccess) && (err != cudaErrorNoDevice) &&
            (err != cudaErrorInsufficientDriver)) {
          return TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_INTERNAL,
              std::string(
                  "unable to recover current CUDA device: " +
                  std::string(cudaGetErrorString(err)))
                  .c_str());
        }

        err = cudaMalloc(&allocated_ptr, byte_size);
        if (err != cudaSuccess) {
          return TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_INTERNAL,
              std::string(
                  "cudaMalloc failed: " + std::string(cudaGetErrorString(err)))
                  .c_str());
        }
        break;
      }
#endif  // TRITON_ENABLE_GPU

      // Use CPU memory if the requested memory type is unknown
      // (default case).
      case TRITONSERVER_MEMORY_CPU:
      default: {
        *actual_memory_type = TRITONSERVER_MEMORY_CPU;
        allocated_ptr = malloc(byte_size);
        break;
      }
    }

    // Pass the tensor name with buffer_userp so we can show it when
    // releasing the buffer.
    if (allocated_ptr != nullptr) {
      *buffer = allocated_ptr;
      *buffer_userp = new std::string(tensor_name);
      std::cout << "allocated " << byte_size << " bytes in "
                << TRITONSERVER_MemoryTypeString(*actual_memory_type)
                << " for result tensor " << tensor_name << std::endl;
    }
  }

  return nullptr;  // Success
}

TRITONSERVER_Error*
ResponseRelease(
    TRITONSERVER_ResponseAllocator* allocator, void* buffer, void* buffer_userp,
    size_t byte_size, TRITONSERVER_MemoryType memory_type,
    int64_t memory_type_id)
{
  std::string* name = nullptr;
  if (buffer_userp != nullptr) {
    name = reinterpret_cast<std::string*>(buffer_userp);
  } else {
    name = new std::string("<unknown>");
  }

  std::cout << "Releasing buffer " << buffer << " of size " << byte_size
            << " in " << TRITONSERVER_MemoryTypeString(memory_type)
            << " for result '" << *name << "'" << std::endl;
  switch (memory_type) {
    case TRITONSERVER_MEMORY_CPU:
      free(buffer);
      break;
#ifdef TRITON_ENABLE_GPU
    case TRITONSERVER_MEMORY_CPU_PINNED: {
      auto err = cudaSetDevice(memory_type_id);
      if (err == cudaSuccess) {
        err = cudaFreeHost(buffer);
      }
      if (err != cudaSuccess) {
        std::cerr << "error: failed to cudaFree " << buffer << ": "
                  << cudaGetErrorString(err) << std::endl;
      }
      break;
    }
    case TRITONSERVER_MEMORY_GPU: {
      auto err = cudaSetDevice(memory_type_id);
      if (err == cudaSuccess) {
        err = cudaFree(buffer);
      }
      if (err != cudaSuccess) {
        std::cerr << "error: failed to cudaFree " << buffer << ": "
                  << cudaGetErrorString(err) << std::endl;
      }
      break;
    }
#endif  // TRITON_ENABLE_GPU
    default:
      std::cerr << "error: unexpected buffer allocated in CUDA managed memory"
                << std::endl;
      break;
  }

  delete name;

  return nullptr;  // Success
}

void
InferRequestComplete(
    TRITONSERVER_InferenceRequest* request, const uint32_t flags, void* userp)
{
  // We reuse the request so we don't delete it here.
}

void
InferResponseComplete(
    TRITONSERVER_InferenceResponse* response, const uint32_t flags, void* userp)
{
  if (response != nullptr) {
    // Send 'response' to the future.
    std::promise<TRITONSERVER_InferenceResponse*>* p =
        reinterpret_cast<std::promise<TRITONSERVER_InferenceResponse*>*>(userp);
    p->set_value(response);
    delete p;
  }
}


TRITONSERVER_Error*
ParseModelMetadata(const rapidjson::Document& model_metadata)
{
  std::string seen_data_type;
  for (const auto& input : model_metadata["inputs"].GetArray()) {
    if (strcmp(input["datatype"].GetString(), "FP32")) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_UNSUPPORTED,
          "this example only supports model with data type FP32");
    }
    if (seen_data_type.empty()) {
      seen_data_type = input["datatype"].GetString();
    } else if (strcmp(seen_data_type.c_str(), input["datatype"].GetString())) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          "the inputs and outputs of this model must have the data type");
    }
  }
  for (const auto& output : model_metadata["outputs"].GetArray()) {
    if (strcmp(output["datatype"].GetString(), "FP32")) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_UNSUPPORTED,
          "this example only supports model with data type FP32");
    } else if (strcmp(seen_data_type.c_str(), output["datatype"].GetString())) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          "the inputs and outputs of this model must have the data type");
    }
  }

  return nullptr;
}


cv::Mat
ResizeKeepAspectRatio(
    const cv::Mat& input, const cv::Size& dstSize, const cv::Scalar& bgcolor,
    bool& fixHeight, float& ratio, int& sideCache)
{
  cv::Mat output;

  double h1 = dstSize.width * (input.rows / (double)input.cols);
  double w2 = dstSize.height * (input.cols / (double)input.rows);
  if (h1 <= dstSize.height) {
    cv::resize(input, output, cv::Size(dstSize.width, h1));
    ratio = (float)dstSize.width / input.cols;
    fixHeight = false;
    sideCache = (int)(ratio * input.rows);
    std::cout << "Resizing to fixed width. Ratio " << ratio << std::endl;
    std::cout << "Height cache " << sideCache << std::endl;
  } else {
    cv::resize(input, output, cv::Size(w2, dstSize.height));
    ratio = (float)dstSize.height / input.rows;
    fixHeight = true;
    sideCache = (int)(ratio * input.cols);
    std::cout << "Resizing to fixed height. Ratio " << ratio << std::endl;
    std::cout << "Width cache " << sideCache << std::endl;
  }

  int top = (dstSize.height - output.rows) / 2;
  int down = (dstSize.height - output.rows + 1) / 2;
  int left = (dstSize.width - output.cols) / 2;
  int right = (dstSize.width - output.cols + 1) / 2;

  cv::copyMakeBorder(
      output, output, top, down, left, right, cv::BORDER_CONSTANT, bgcolor);

  return output;
}


void
SaveOverlay(
    std::vector<cv::Rect>& bboxes_list, std::vector<int>& indexes,
    std::vector<int64_t>& input0_shape, bool& fixHeight, float& ratio,
    int& sideCache, std::string imageName, size_t& thread_id)
{
  const int inputC = input0_shape[1];
  const int inputH = input0_shape[2];
  const int inputW = input0_shape[3];

  cv::Mat image = cv::imread(imageName);

  cv::Scalar color = cv::Scalar(0, 255, 0);

  int xmin, ymin, xmax, ymax;

  for (auto i : indexes) {
    xmin = bboxes_list[i].x;
    ymin = bboxes_list[i].y;
    xmax = bboxes_list[i].x + bboxes_list[i].width;
    ymax = bboxes_list[i].y + bboxes_list[i].height;

    if (fixHeight) {
      xmin = int((xmin - (inputW - sideCache) / 2) / ratio);
      xmax = int((xmax - (inputW - sideCache) / 2) / ratio);
      ymin = int(ymin / ratio);
      ymax = int(ymax / ratio);
    } else {
      ymin = int((ymin - (inputH - sideCache) / 2) / ratio);
      ymax = int((ymax - (inputH - sideCache) / 2) / ratio);
      xmin = int(xmin / ratio);
      xmax = int(xmax / ratio);
    }
    cv::Point p1(xmin, ymin);
    cv::Point p2(xmax, ymax);
    cv::rectangle(image, p1, p2, color, 4);
  }

  std::string outName = "capture_overlay_" + std::to_string(thread_id) + ".jpg";
  imwrite(outName, image);
}


void
Normalize(cv::Mat img, std::vector<float>*& data, int inputC)
{
  for (int c = 0; c < inputC; ++c) {
    for (int i = 0; i < img.rows; ++i) {
      cv::Vec3b* p1 = img.ptr<cv::Vec3b>(i);
      for (int j = 0; j < img.cols; ++j) {
        ((float*)data->data())[c * img.cols * img.rows + i * img.cols + j] =
            p1[j][c] / 255.f;
      }
    }
  }
}


void
RecoverBoundingBoxes(
    std::unordered_map<std::string, std::vector<float>>& output_data,
    std::unordered_map<std::string, const int64_t*>& shapes,
    std::vector<int64_t>& input0_shape, std::vector<cv::Rect>& bboxes_list,
    std::vector<float>& scores_list, std::vector<int>& indexes)
{
  const float box_scale = 35.f;
  const float box_offset = 0.5f;
  const float score_threshold = 0.5f;
  const float nms_threshold = 0.5f;

  int gridH = shapes["output_cov/Sigmoid"][2];
  int gridW = shapes["output_cov/Sigmoid"][3];

  std::cout << "gridH: " << gridH << std::endl;
  std::cout << "gridW: " << gridW << std::endl;

  int modelH = input0_shape[2];
  int modelW = input0_shape[3];
  int batch = input0_shape[0];

  std::cout << "batch: " << batch << std::endl;
  std::cout << "modelH: " << modelH << std::endl;
  std::cout << "modelW: " << modelW << std::endl;

  int cellH = modelH / gridH;
  int cellW = modelW / gridW;

  for (int b = 0; b < batch; b++) {
    for (int h = 0; h < gridH; h++) {
      for (int w = 0; w < gridW; w++) {
        // value(n, c, h, w) = n * CHW + c * HW + h * W + w
        int idx = b * gridH * gridW + h * gridW + w;
        float val = output_data["output_cov/Sigmoid"][idx];
        if (val > score_threshold) {
          scores_list.push_back(val);

          // location of the w, h coordinate in the original image
          int mx = w * cellW;
          int my = h * cellH;

          // scale the detected coordinates to original and return their
          // location in the image
          int idxX1 = b * 3 * gridH * gridW + 0 * gridH * gridW + h * gridW + w;
          int idxY1 = b * 3 * gridH * gridW + 1 * gridH * gridW + h * gridW + w;
          int idxX2 = b * 3 * gridH * gridW + 2 * gridH * gridW + h * gridW + w;
          int idxY2 = b * 3 * gridH * gridW + 3 * gridH * gridW + h * gridW + w;

          int rectX1 =
              -(output_data["output_bbox/BiasAdd"][idxX1] + box_offset) *
                  box_scale +
              mx;
          int rectY1 =
              -(output_data["output_bbox/BiasAdd"][idxY1] + box_offset) *
                  box_scale +
              my;
          int rectX2 =
              (output_data["output_bbox/BiasAdd"][idxX2] + box_offset) *
                  box_scale +
              mx;
          int rectY2 =
              (output_data["output_bbox/BiasAdd"][idxY2] + box_offset) *
                  box_scale +
              my;

          // Rect ROI (x, y, width, height);
          cv::Rect bbox(rectX1, rectY1, rectX2 - rectX1, rectY2 - rectY1);
          bboxes_list.push_back(bbox);
        }
      }
    }
  }

  // Execute non-maximum suppression
  cv::dnn::NMSBoxes(
      bboxes_list, scores_list, score_threshold, nms_threshold, indexes);
}

void
ParseDetections(
    TRITONSERVER_InferenceResponse* response, const std::string& output0,
    const std::string& output1,
    std::unordered_map<std::string, std::vector<float>>& output_data,
    std::unordered_map<std::string, const int64_t*>& shapes)
{
  uint32_t output_count;
  FAIL_IF_ERR(
      TRITONSERVER_InferenceResponseOutputCount(response, &output_count),
      "getting number of response outputs");
  if (output_count != 2) {
    FAIL("expecting 2 response outputs, got " + std::to_string(output_count));
  }

  for (uint32_t idx = 0; idx < output_count; ++idx) {
    const char* cname;
    TRITONSERVER_DataType datatype;
    const int64_t* shape;
    uint64_t dim_count;
    const void* base;
    size_t byte_size;
    TRITONSERVER_MemoryType memory_type;
    int64_t memory_type_id;
    void* userp;

    FAIL_IF_ERR(
        TRITONSERVER_InferenceResponseOutput(
            response, idx, &cname, &datatype, &shape, &dim_count, &base,
            &byte_size, &memory_type, &memory_type_id, &userp),
        "getting output info");

    if (cname == nullptr) {
      FAIL("unable to get output name");
    }

    std::string name(cname);
    if ((name != output0) && (name != output1)) {
      FAIL("unexpected output '" + name + "'");
    }

    shapes[name] = shape;

    std::vector<float>& odata = output_data[name];

    switch (memory_type) {
      case TRITONSERVER_MEMORY_CPU: {
        std::cout << std::endl
                  << name << " is stored in system memory" << std::endl;
        const float* cbase = reinterpret_cast<const float*>(base);
        odata.assign(cbase, cbase + byte_size / sizeof(float));
        break;
      }

      case TRITONSERVER_MEMORY_CPU_PINNED: {
        std::cout << std::endl
                  << name << " is stored in pinned memory" << std::endl;
        const float* cbase = reinterpret_cast<const float*>(base);
        odata.assign(cbase, cbase + byte_size / sizeof(float));
        break;
      }

#ifdef TRITON_ENABLE_GPU
      case TRITONSERVER_MEMORY_GPU: {
        std::cout << std::endl
                  << name << " is stored in GPU memory" << std::endl;
        odata.reserve(byte_size);
        FAIL_IF_CUDA_ERR(
            cudaMemcpy(&odata[0], base, byte_size, cudaMemcpyDeviceToHost),
            "getting " + name + " data from GPU memory");
        break;
      }
#endif

      default:
        FAIL("unexpected memory type");
    }
  }
}

void
DetectionInferenceOutput(
    std::vector<int>& result_indexes, std::vector<cv::Rect>& bboxes_list,
    TRITONSERVER_InferenceResponse* completed_response,
    const std::string& output0, const std::string& output1,
    std::vector<int64_t>& input0_shape, bool& fixHeight, float& ratio,
    int& sideCache, size_t& thread_id, bool visualize = false,
    std::string imageName = "capture.jpg")
{
  // Parse outputs
  std::unordered_map<std::string, std::vector<float>> output_data;
  std::unordered_map<std::string, const int64_t*> shapes;
  ParseDetections(completed_response, output0, output1, output_data, shapes);

  std::vector<float> scores_list;
  RecoverBoundingBoxes(
      output_data, shapes, input0_shape, bboxes_list, scores_list,
      result_indexes);

  std::cout << "Detection finished. Indexes of detected objects: " << std::endl;
  for (auto idx : result_indexes) {
    std::cout << idx << std::endl;
    std::cout << bboxes_list[idx] << std::endl;
  }

  if (visualize)
    SaveOverlay(
        bboxes_list, result_indexes, input0_shape, fixHeight, ratio, sideCache,
        imageName, thread_id);
}


}  // namespace


void
SetServerOptions(
    TRITONSERVER_ServerOptions** server_options, bool verbose_level,
    std::string model_repository_path, std::string tritonserver_path)
{
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsNew(server_options), "creating server options");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetModelRepositoryPath(
          *server_options, model_repository_path.c_str()),
      "setting model repository path");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetLogVerbose(*server_options, verbose_level),
      "setting verbose logging level");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetMetrics(*server_options, true),
      "failed to enable metrics");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetStrictReadiness(*server_options, true),
      "failed to set strict readiness");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetStrictModelConfig(*server_options, true),
      "failed to set strict model config");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetModelControlMode(
          *server_options, TRITONSERVER_MODEL_CONTROL_EXPLICIT),
      "failed to set model control mode to explicit");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetBackendDirectory(
          *server_options, (tritonserver_path + "/backends").c_str()),
      "setting backend directory");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
          *server_options, (tritonserver_path + "/repoagents").c_str()),
      "setting repository agent directory");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetStrictModelConfig(*server_options, true),
      "setting strict model configuration");
#ifdef TRITON_ENABLE_GPU
  double min_compute_capability = TRITON_MIN_COMPUTE_CAPABILITY;
#else
  double min_compute_capability = 0;
#endif  // TRITON_ENABLE_GPU
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability(
          *server_options, min_compute_capability),
      "setting minimum supported CUDA compute capability");
}


void
CheckServerLiveAndReady(std::shared_ptr<TRITONSERVER_Server> server)
{
  size_t wait_seconds = 0;
  while (true) {
    bool live, ready;
    FAIL_IF_ERR(
        TRITONSERVER_ServerIsLive(server.get(), &live),
        "unable to get server liveness");
    FAIL_IF_ERR(
        TRITONSERVER_ServerIsReady(server.get(), &ready),
        "unable to get server readiness");
    std::cout << "Server Health: live " << live << ", ready " << ready
              << std::endl;
    if (live && ready) {
      break;
    }

    if (++wait_seconds >= 10) {
      FAIL("failed to find healthy inference server");
    }

    std::this_thread::sleep_for(std::chrono::milliseconds(1000));
  }
}


void
PrintServerStatus(std::shared_ptr<TRITONSERVER_Server> server)
{
  TRITONSERVER_Message* server_metadata_message;
  FAIL_IF_ERR(
      TRITONSERVER_ServerMetadata(server.get(), &server_metadata_message),
      "unable to get server metadata message");
  const char* buffer;
  size_t byte_size;
  FAIL_IF_ERR(
      TRITONSERVER_MessageSerializeToJson(
          server_metadata_message, &buffer, &byte_size),
      "unable to serialize server metadata message");

  std::cout << "Server Status:" << std::endl;
  std::cout << std::string(buffer, byte_size) << std::endl;

  FAIL_IF_ERR(
      TRITONSERVER_MessageDelete(server_metadata_message),
      "deleting status metadata");
}


void
AwaitModelReady(
    std::shared_ptr<TRITONSERVER_Server> server, const std::string model_name)
{
  bool is_ready = false;
  size_t wait_seconds = 0;
  while (!is_ready) {
    FAIL_IF_ERR(
        TRITONSERVER_ServerModelIsReady(
            server.get(), model_name.c_str(), 1, &is_ready),
        "unable to get model readiness");
    if (!is_ready) {
      if (++wait_seconds >= 5) {
        FAIL("model failed to be ready in 5 seconds");
      }
      std::this_thread::sleep_for(std::chrono::milliseconds(1000));
      continue;
    }

    TRITONSERVER_Message* model_metadata_message;
    FAIL_IF_ERR(
        TRITONSERVER_ServerModelMetadata(
            server.get(), model_name.c_str(), 1, &model_metadata_message),
        "unable to get model metadata message");
    const char* buffer;
    size_t byte_size;
    FAIL_IF_ERR(
        TRITONSERVER_MessageSerializeToJson(
            model_metadata_message, &buffer, &byte_size),
        "unable to serialize model status protobuf");

    rapidjson::Document model_metadata;
    model_metadata.Parse(buffer, byte_size);
    if (model_metadata.HasParseError()) {
      FAIL(
          "error: failed to parse model metadata from JSON: " +
          std::string(GetParseError_En(model_metadata.GetParseError())) +
          " at " + std::to_string(model_metadata.GetErrorOffset()));
    }

    FAIL_IF_ERR(
        TRITONSERVER_MessageDelete(model_metadata_message),
        "deleting status protobuf");

    if (strcmp(model_metadata["name"].GetString(), model_name.c_str())) {
      FAIL("unable to find metadata for model");
    }

    bool found_version = false;
    if (model_metadata.HasMember("versions")) {
      for (const auto& version : model_metadata["versions"].GetArray()) {
        if (strcmp(version.GetString(), "1") == 0) {
          found_version = true;
          break;
        }
      }
    }
    if (!found_version) {
      FAIL("unable to find version 1 status for model");
    }

    FAIL_IF_ERR(ParseModelMetadata(model_metadata), "parsing model metadata");
  }
}


void
LoadInputImageFromFile(
    cv::Mat& dst, std::vector<int64_t>& input0_shape, bool& fixHeight,
    float& ratio, int& sideCache, std::string imageName = "capture.jpg")
{
  const int inputC = input0_shape[1];
  const int inputH = input0_shape[2];
  const int inputW = input0_shape[3];
  const int batchSize = input0_shape[0];

  cv::Mat image = cv::imread(imageName);

  if (image.empty()) {
    std::cout << "Cannot open image " << imageName << std::endl;
    exit(0);
  }

  // resize keeping aspect ratio and pad
  dst = ResizeKeepAspectRatio(
      image, cv::Size(inputW, inputH), cv::Scalar(0, 0, 0), fixHeight, ratio,
      sideCache);

  cv::cvtColor(dst, dst, cv::COLOR_BGR2RGB);
}


void
LoadInputData(
    cv::Mat& dst, std::vector<float>* input0_data,
    std::vector<int64_t>& input0_shape)
{
  const int inputC = input0_shape[1];
  const int inputH = input0_shape[2];
  const int inputW = input0_shape[3];

  input0_data->resize(inputC * inputH * inputW * sizeof(float));

  // normalize
  Normalize(dst, input0_data, inputC);
}

static std::mutex mutex;

void
RunInferenceAndValidate(
    std::shared_ptr<TRITONSERVER_Server> server,
    TRITONSERVER_ResponseAllocator* allocator, cv::Mat scaled_input_image,
    bool fixHeight, float ratio, int sideCache, std::string model_name,
    size_t thread_id, bool visualize)
{
  TRITONSERVER_InferenceRequest* irequest = nullptr;
  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestNew(
          &irequest, server.get(), model_name.c_str(), -1),
      "creating inference request");

  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestSetId(irequest, "my_request_id"),
      "setting ID for the request");

  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestSetReleaseCallback(
          irequest, InferRequestComplete, nullptr),
      "setting request release callback");

  // Inputs
  auto input0 = "input_1";
  std::vector<int64_t> input0_shape({1, 3, 544, 960});

  const TRITONSERVER_DataType datatype = TRITONSERVER_TYPE_FP32;

  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestAddInput(
          irequest, input0, datatype, &input0_shape[0], input0_shape.size()),
      "setting input 0 meta-data for the request");

  // Outputs
  auto output0 = "output_bbox/BiasAdd";
  auto output1 = "output_cov/Sigmoid";

  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output0),
      "requesting output 0 for the request");
  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output1),
      "requesting output 1 for the request");

  // Load the input data
  std::vector<float> input0_data;
  std::vector<int> result_indexes;
  std::vector<cv::Rect> bboxes_list;

  LoadInputData(scaled_input_image, &input0_data, input0_shape);

  size_t input0_size = input0_data.size();

  const void* input0_base = &input0_data[0];

#ifdef TRITON_ENABLE_GPU
  std::unique_ptr<void, decltype(cuda_data_deleter)> input0_gpu(
      nullptr, cuda_data_deleter);
  bool use_cuda_memory =
      (enforce_memory_type &&
       (requested_memory_type != TRITONSERVER_MEMORY_CPU));
  if (use_cuda_memory) {
    FAIL_IF_CUDA_ERR(cudaSetDevice(0), "setting CUDA device to device 0");
    if (requested_memory_type != TRITONSERVER_MEMORY_CPU_PINNED) {
      void* dst;
      FAIL_IF_CUDA_ERR(
          cudaMalloc(&dst, input0_size),
          "allocating GPU memory for INPUT0 data");
      input0_gpu.reset(dst);
      FAIL_IF_CUDA_ERR(
          cudaMemcpy(dst, &input0_data[0], input0_size, cudaMemcpyHostToDevice),
          "setting INPUT0 data in GPU memory");
    } else {
      void* dst;
      FAIL_IF_CUDA_ERR(
          cudaHostAlloc(&dst, input0_size, cudaHostAllocPortable),
          "allocating pinned memory for INPUT0 data");
      input0_gpu.reset(dst);
      FAIL_IF_CUDA_ERR(
          cudaMemcpy(dst, &input0_data[0], input0_size, cudaMemcpyHostToHost),
          "setting INPUT0 data in pinned memory");
    }
  }

  input0_base = use_cuda_memory ? input0_gpu.get() : &input0_data[0];
#endif  // TRITON_ENABLE_GPU

  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestAppendInputData(
          irequest, input0, input0_base, input0_size, requested_memory_type, 0),
      "assigning INPUT0 data");

  // Perform inference...
  {
    auto p = new std::promise<TRITONSERVER_InferenceResponse*>();
    std::future<TRITONSERVER_InferenceResponse*> completed = p->get_future();

    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestSetResponseCallback(
            irequest, allocator, nullptr, InferResponseComplete,
            reinterpret_cast<void*>(p)),
        "setting response callback");

    FAIL_IF_ERR(
        TRITONSERVER_ServerInferAsync(server.get(), irequest, nullptr),
        "running inference");

    // Wait for the inference to complete.
    TRITONSERVER_InferenceResponse* completed_response = completed.get();

    FAIL_IF_ERR(
        TRITONSERVER_InferenceResponseError(completed_response),
        "response status");

    std::unique_lock<std::mutex> lock(mutex);

    // Process output
    DetectionInferenceOutput(
        result_indexes, bboxes_list, completed_response, output0, output1,
        input0_shape, fixHeight, ratio, sideCache, thread_id, visualize);

    FAIL_IF_ERR(
        TRITONSERVER_InferenceResponseDelete(completed_response),
        "deleting inference response");
  }

  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestDelete(irequest),
      "deleting inference request");
}


void
PrintModelStats(
    std::shared_ptr<TRITONSERVER_Server> server, const std::string model_name)
{
  TRITONSERVER_Message* model_stats_message = nullptr;

  FAIL_IF_ERR(
      TRITONSERVER_ServerModelStatistics(
          server.get(), model_name.c_str(), -1 /* model_version */,
          &model_stats_message),
      "unable to get model stats message");
  const char* buffer;
  size_t byte_size;
  FAIL_IF_ERR(
      TRITONSERVER_MessageSerializeToJson(
          model_stats_message, &buffer, &byte_size),
      "unable to serialize server metadata message");

  std::cout << "Model '" << model_name << "' Stats:" << std::endl;
  std::cout << std::string(buffer, byte_size) << std::endl;

  FAIL_IF_ERR(
      TRITONSERVER_MessageDelete(model_stats_message),
      "deleting model stats message");
}


void
CreateAndRunTritonserverInstance(
    std::string model_repository_path, std::string tritonserver_path,
    bool verbose_level, int thread_count, bool visualize)
{
  TRITONSERVER_ServerOptions* server_options = nullptr;

  SetServerOptions(
      &server_options, verbose_level, model_repository_path, tritonserver_path);

  TRITONSERVER_Server* server_ptr = nullptr;

  FAIL_IF_ERR(
      TRITONSERVER_ServerNew(&server_ptr, server_options),
      "creating server instance. ");

  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsDelete(server_options),
      "deleting server options");

  std::shared_ptr<TRITONSERVER_Server> server(
      server_ptr, TRITONSERVER_ServerDelete);

  // Wait and until the server is both live and ready.
  CheckServerLiveAndReady(server);

  // Print status of the servers.
  PrintServerStatus(server);
  std::string model = "peoplenet";

  // Load models in server.
  FAIL_IF_ERR(
      TRITONSERVER_ServerLoadModel(server.get(), model.c_str()),
      "failed to load model peoplenet");

  // Wait for the models to become available.
  AwaitModelReady(server, model.c_str());

  // Create the allocator that will be used to allocate buffers for
  // the result tensors.
  TRITONSERVER_ResponseAllocator* allocator = nullptr;
  FAIL_IF_ERR(
      TRITONSERVER_ResponseAllocatorNew(
          &allocator, ResponseAlloc, ResponseRelease, nullptr /* start_fn */),
      "creating response allocator");


  // Measure total execution time
  using std::chrono::duration;
  using std::chrono::duration_cast;
  using std::chrono::high_resolution_clock;
  using std::chrono::milliseconds;

  cv::Mat scaled_input_image;
  bool fixHeight;
  float ratio;
  int sideCache;
  std::vector<int64_t> input0_shape({1, 3, 544, 960});

  // the input image is loaded only once and used for all inferences
  LoadInputImageFromFile(
      scaled_input_image, input0_shape, fixHeight, ratio, sideCache);

  auto t1 = high_resolution_clock::now();

  // Multi-thread inference
  std::thread inferences[thread_count];
  for (size_t i = 0; i < thread_count; i++) {
    inferences[i] = std::thread(
        &RunInferenceAndValidate, server, allocator, scaled_input_image,
        fixHeight, ratio, sideCache, model.c_str(), i, visualize);
  }

  for (int i = 0; i < thread_count; ++i) {
    inferences[i].join();
  }

  // Second time point to measure elapsed time
  auto t2 = high_resolution_clock::now();

  FAIL_IF_ERR(
      TRITONSERVER_ResponseAllocatorDelete(allocator),
      "deleting response allocator");

  // Print Model Statistics for all models
  PrintModelStats(server, model.c_str());

  // Unload models in the servers.
  FAIL_IF_ERR(
      TRITONSERVER_ServerUnloadModel(server.get(), model.c_str()),
      "failed to unload model");

  /* Getting number of milliseconds as an integer. */
  auto ms_int = duration_cast<milliseconds>(t2 - t1);

  std::cout << "\n TOTAL INFERENCE TIME: " << ms_int.count() << "ms\n";
}


int
main(int argc, char** argv)
{
  std::string model_repository_path;
  std::string tritonserver_path;
  int verbose_level = 0;
  int thread_count = 2;
  bool visualize = false;

  // Parse commandline...
  int opt;
  while ((opt = getopt(argc, argv, "vm:r:p:t:s:")) != -1) {
    switch (opt) {
      case 'm': {
        enforce_memory_type = true;
        if (!strcmp(optarg, "system")) {
          requested_memory_type = TRITONSERVER_MEMORY_CPU;
        } else if (!strcmp(optarg, "pinned")) {
          requested_memory_type = TRITONSERVER_MEMORY_CPU_PINNED;
        } else if (!strcmp(optarg, "gpu")) {
          requested_memory_type = TRITONSERVER_MEMORY_GPU;
        } else {
          Usage(
              argv,
              "-m must be used to specify one of the following types:"
              " <\"system\"|\"pinned\"|gpu>");
        }
        break;
      }
      case 'r':
        model_repository_path = optarg;
        break;
      case 'p':
        tritonserver_path = optarg;
        break;
      case 'v':
        verbose_level = 1;
        break;
      case 't':
        thread_count = std::stoi(optarg);
        break;
      case 's':
        if (!strcmp(optarg, "true")) {
          visualize = true;
        } else if (!strcmp(optarg, "false")) {
          visualize = false;
        } else {
          Usage(
              argv,
              "-s must be:"
              " <true|false>");
        }
        break;
      case '?':
        Usage(argv);
        break;
    }
  }

  if (thread_count < 1) {
    Usage(argv, "thread_count must be >= 1");
  }

  if (model_repository_path.empty()) {
    Usage(argv, "-r must be used to specify model repository path");
  }
#ifndef TRITON_ENABLE_GPU
  if (enforce_memory_type && requested_memory_type != TRITONSERVER_MEMORY_CPU) {
    Usage(argv, "-m can only be set to \"system\" without enabling GPU");
  }
#endif  // TRITON_ENABLE_GPU

  // Check API version.
  uint32_t api_version_major, api_version_minor;
  FAIL_IF_ERR(
      TRITONSERVER_ApiVersion(&api_version_major, &api_version_minor),
      "getting Triton API version");
  if ((TRITONSERVER_API_VERSION_MAJOR != api_version_major) ||
      (TRITONSERVER_API_VERSION_MINOR > api_version_minor)) {
    FAIL("triton server API version mismatch");
  }

  CreateAndRunTritonserverInstance(
      model_repository_path, tritonserver_path, verbose_level, thread_count,
      visualize);

  return 0;
}


================================================
FILE: docs/examples/jetson/concurrency_and_dynamic_batching/tao/convert_peoplenet.sh
================================================
#!/bin/bash
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

./tao-converter \
    -k tlt_encode \
    -d 3,544,960 \
    -i nchw \
    -t fp16 \
    -b 16 \
    -m 64 \
    -o output_cov/Sigmoid,output_bbox/BiasAdd \
    -e ../trtis_model_repo_sample_1/peoplenet/1/model.plan \
    models/peoplenet/resnet34_peoplenet_pruned.etlt

cp ../trtis_model_repo_sample_1/peoplenet/1/model.plan ../trtis_model_repo_sample_2/peoplenet/1/model.plan


================================================
FILE: docs/examples/jetson/concurrency_and_dynamic_batching/tao/models/peoplenet/.gitkeep
================================================


================================================
FILE: docs/examples/jetson/concurrency_and_dynamic_batching/trtis_model_repo_sample_1/peoplenet/1/.gitkeep
================================================


================================================
FILE: docs/examples/jetson/concurrency_and_dynamic_batching/trtis_model_repo_sample_1/peoplenet/config.pbtxt
================================================
name: "peoplenet"
platform: "tensorrt_plan"
max_batch_size: 64
input [
  {
    name: "input_1"
    data_type: TYPE_FP32
    dims: [ 3, 544, 960  ]
  }
]
output [
  {
    name: "output_bbox/BiasAdd"
    data_type: TYPE_FP32
    dims: [ 12, 34, 60 ]
  },
  {
    name: "output_cov/Sigmoid"
    data_type: TYPE_FP32
    dims: [ 3, 34, 60 ]
  }
]
instance_group [
  {
    count: 3
    kind: KIND_GPU
  }
]


================================================
FILE: docs/examples/jetson/concurrency_and_dynamic_batching/trtis_model_repo_sample_2/peoplenet/1/.gitkeep
================================================


================================================
FILE: docs/examples/jetson/concurrency_and_dynamic_batching/trtis_model_repo_sample_2/peoplenet/config.pbtxt
================================================
name: "peoplenet"
platform: "tensorrt_plan"
max_batch_size: 64
input [
  {
    name: "input_1"
    data_type: TYPE_FP32
    dims: [ 3, 544, 960  ]
  }
]
output [
  {
    name: "output_bbox/BiasAdd"
    data_type: TYPE_FP32
    dims: [ 12, 34, 60 ]
  },
  {
    name: "output_cov/Sigmoid"
    data_type: TYPE_FP32
    dims: [ 3, 34, 60 ]
  }
]
dynamic_batching {
}


================================================
FILE: docs/examples/model_repository/densenet_onnx/config.pbtxt
================================================
name: "densenet_onnx"
platform: "onnxruntime_onnx"
max_batch_size : 0
input [
  {
    name: "data_0"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [ 3, 224, 224 ]
    reshape { shape: [ 1, 3, 224, 224 ] }
  }
]
output [
  {
    name: "fc6_1"
    data_type: TYPE_FP32
    dims: [ 1000 ]
    reshape { shape: [ 1, 1000, 1, 1 ] }
    label_filename: "densenet_labels.txt"
  }
]

================================================
FILE: docs/examples/model_repository/densenet_onnx/densenet_labels.txt
================================================
TENCH
GOLDFISH
WHITE SHARK
TIGER SHARK
HAMMERHEAD SHARK
ELECTRIC RAY
STINGRAY
ROOSTER
HEN
OSTRICH
BRAMBLING
GOLDFINCH
HOUSE FINCH
SNOWBIRD
INDIGO FINCH
ROBIN
BULBUL
JAY
MAGPIE
CHICKADEE
WATER OUZEL
KITE
BALD EAGLE
VULTURE
GREAT GREY OWL
FIRE SALAMANDER
NEWT
EFT
SPOTTED SALAMANDER
AXOLOTL
BULL FROG
TREE FROG
TAILED FROG
LOGGERHEAD
LEATHERBACK TURTLE
MUD TURTLE
TERRAPIN
BOX TURTLE
BANDED GECKO
COMMON IGUANA
AMERICAN CHAMELEON
WHIPTAIL
AGAMA
FRILLED LIZARD
ALLIGATOR LIZARD
GILA MONSTER
GREEN LIZARD
AFRICAN CHAMELEON
KOMODO DRAGON
AFRICAN CROCODILE
AMERICAN ALLIGATOR
TRICERATOPS
THUNDER SNAKE
RINGNECK SNAKE
HOGNOSE SNAKE
GREEN SNAKE
KING SNAKE
GARTER SNAKE
WATER SNAKE
VINE SNAKE
NIGHT SNAKE
BOA
ROCK PYTHON
COBRA
GREEN MAMBA
SEA SNAKE
HORNED VIPER
DIAMONDBACK
SIDEWINDER
TRILOBITE
HARVESTMAN
SCORPION
GARDEN SPIDER
BARN SPIDER
GARDEN SPIDER
BLACK WIDOW
TARANTULA
WOLF SPIDER
TICK
CENTIPEDE
GROUSE
PTARMIGAN
RUFFED GROUSE
PRAIRIE CHICKEN
PEACOCK
QUAIL
PARTRIDGE
AFRICAN GREY
MACAW
COCKATOO
LORIKEET
COUCAL
BEE EATER
HORNBILL
HUMMINGBIRD
JACAMAR
TOUCAN
DRAKE
MERGANSER
GOOSE
BLACK SWAN
TUSKER
ECHIDNA
PLATYPUS
WALLABY
KOALA
WOMBAT
JELLYFISH
SEA ANEMONE
BRAIN CORAL
FLATWORM
NEMATODE
CONCH
SNAIL
SLUG
SEA SLUG
CHITON
CHAMBERED NAUTILUS
DUNGENESS CRAB
ROCK CRAB
FIDDLER CRAB
KING CRAB
AMERICAN LOBSTER
SPINY LOBSTER
CRAYFISH
HERMIT CRAB
ISOPOD
WHITE STORK
BLACK STORK
SPOONBILL
FLAMINGO
LITTLE BLUE HERON
AMERICAN EGRET
BITTERN
CRANE
LIMPKIN
EUROPEAN GALLINULE
AMERICAN COOT
BUSTARD
RUDDY TURNSTONE
RED-BACKED SANDPIPER
REDSHANK
DOWITCHER
OYSTERCATCHER
PELICAN
KING PENGUIN
ALBATROSS
GREY WHALE
KILLER WHALE
DUGONG
SEA LION
CHIHUAHUA
JAPANESE SPANIEL
MALTESE DOG
PEKINESE
SHIH-TZU
BLENHEIM SPANIEL
PAPILLON
TOY TERRIER
RHODESIAN RIDGEBACK
AFGHAN HOUND
BASSET
BEAGLE
BLOODHOUND
BLUETICK
COONHOUND
WALKER HOUND
ENGLISH FOXHOUND
REDBONE
BORZOI
IRISH WOLFHOUND
ITALIAN GREYHOUND
WHIPPET
IBIZAN HOUND
NORWEGIAN ELKHOUND
OTTERHOUND
SALUKI
SCOTTISH DEERHOUND
WEIMARANER
STAFFORDSHIRE BULLTERRIER
STAFFORDSHIRE TERRIER
BEDLINGTON TERRIER
BORDER TERRIER
KERRY BLUE TERRIER
IRISH TERRIER
NORFOLK TERRIER
NORWICH TERRIER
YORKSHIRE TERRIER
WIRE-HAIRED FOX TERRIER
LAKELAND TERRIER
SEALYHAM TERRIER
AIREDALE
CAIRN
AUSTRALIAN TERRIER
DANDIE DINMONT
BOSTON BULL
MINIATURE SCHNAUZER
GIANT SCHNAUZER
STANDARD SCHNAUZER
SCOTCH TERRIER
TIBETAN TERRIER
SILKY TERRIER
WHEATEN TERRIER
WHITE TERRIER
LHASA
RETRIEVER
CURLY-COATED RETRIEVER
GOLDEN RETRIEVER
LABRADOR RETRIEVER
CHESAPEAKE BAY RETRIEVER
SHORT-HAIRED POINTER
VISLA
ENGLISH SETTER
IRISH SETTER
GORDON SETTER
BRITTANY SPANIEL
CLUMBER
ENGLISH SPRINGER
WELSH SPRINGER SPANIEL
COCKER SPANIEL
SUSSEX SPANIEL
IRISH WATERSPANIEL
KUVASZ
SCHIPPERKE
GROENENDAEL
MALINOIS
BRIARD
KELPIE
KOMONDOR
OLD ENGLISH SHEEPDOG
SHETLAND SHEEPDOG
COLLIE
BORDER COLLIE
BOUVIER DES FLANDRES
ROTTWEILER
GERMAN SHEPHERD
DOBERMAN
MINIATURE PINSCHER
GREATER SWISS MOUNTAIN DOG
BERNESE MOUNTAIN DOG
APPENZELLER
ENTLEBUCHER
BOXER
BULL MASTIFF
TIBETAN MASTIFF
FRENCH BULLDOG
GREAT DANE
SAINT BERNARD
ESKIMO DOG
MALAMUTE
SIBERIAN HUSKY
DALMATIAN
AFFENPINSCHER
BASENJI
PUG
LEONBERG
NEWFOUNDLAND
GREAT PYRENEES
SAMOYED
POMERANIAN
CHOW
KEESHOND
BRABANCON GRIFFON
PEMBROKE
CARDIGAN
TOY POODLE
MINIATURE POODLE
STANDARD POODLE
MEXICAN HAIRLESS
TIMBER WOLF
WHITE WOLF
RED WOLF
COYOTE
DINGO
DHOLE
AFRICAN HUNTING DOG
HYENA
RED FOX
KIT FOX
ARCTIC FOX
GREY FOX
TABBY
TIGER CAT
PERSIAN CAT
SIAMESE CAT
EGYPTIAN CAT
COUGAR
LYNX
LEOPARD
SNOW LEOPARD
JAGUAR
LION
TIGER
CHEETAH
BROWN BEAR
AMERICAN BLACK BEAR
ICE BEAR
SLOTH BEAR
MONGOOSE
MEERKAT
TIGER BEETLE
LADYBUG
GROUND BEETLE
LONG-HORNED BEETLE
LEAF BEETLE
DUNG BEETLE
RHINOCEROS BEETLE
WEEVIL
FLY
BEE
ANT
GRASSHOPPER
CRICKET
WALKING STICK
COCKROACH
MANTIS
CICADA
LEAFHOPPER
LACEWING
DRAGONFLY
DAMSELFLY
ADMIRAL
RINGLET
MONARCH
CABBAGE BUTTERFLY
SULPHUR BUTTERFLY
LYCAENID
STARFISH
SEA URCHIN
SEA CUCUMBER
WOOD RABBIT
HARE
ANGORA
HAMSTER
PORCUPINE
FOX SQUIRREL
MARMOT
BEAVER
GUINEA PIG
SORREL
ZEBRA
HOG
WILD BOAR
WARTHOG
HIPPOPOTAMUS
OX
WATER BUFFALO
BISON
RAM
BIGHORN
IBEX
HARTEBEEST
IMPALA
GAZELLE
ARABIAN CAMEL
LLAMA
WEASEL
MINK
POLECAT
BLACK-FOOTED FERRET
OTTER
SKUNK
BADGER
ARMADILLO
THREE-TOED SLOTH
ORANGUTAN
GORILLA
CHIMPANZEE
GIBBON
SIAMANG
GUENON
PATAS
BABOON
MACAQUE
LANGUR
COLOBUS
PROBOSCIS MONKEY
MARMOSET
CAPUCHIN
HOWLER MONKEY
TITI
SPIDER MONKEY
SQUIRREL MONKEY
MADAGASCAR CAT
INDRI
INDIAN ELEPHANT
AFRICAN ELEPHANT
LESSER PANDA
GIANT PANDA
BARRACOUTA
EEL
COHO
ROCK BEAUTY
ANEMONE FISH
STURGEON
GAR
LIONFISH
PUFFER
ABACUS
ABAYA
ACADEMIC GOWN
ACCORDION
ACOUSTIC GUITAR
AIRCRAFT CARRIER
AIRLINER
AIRSHIP
ALTAR
AMBULANCE
AMPHIBIAN
ANALOG CLOCK
APIARY
APRON
ASHCAN
ASSAULT RIFLE
BACKPACK
BAKERY
BALANCE BEAM
BALLOON
BALLPOINT
BAND AID
BANJO
BANNISTER
BARBELL
BARBER CHAIR
BARBERSHOP
BARN
BAROMETER
BARREL
BARROW
BASEBALL
BASKETBALL
BASSINET
BASSOON
BATHING CAP
BATH TOWEL
BATHTUB
BEACH WAGON
BEACON
BEAKER
BEARSKIN
BEER BOTTLE
BEER GLASS
BELL COTE
BIB
BICYCLE-BUILT-FOR-TWO
BIKINI
BINDER
BINOCULARS
BIRDHOUSE
BOATHOUSE
BOBSLED
BOLO TIE
BONNET
BOOKCASE
BOOKSHOP
BOTTLECAP
BOW
BOW TIE
BRASS
BRASSIERE
BREAKWATER
BREASTPLATE
BROOM
BUCKET
BUCKLE
BULLETPROOF VEST
BULLET TRAIN
BUTCHER SHOP
CAB
CALDRON
CANDLE
CANNON
CANOE
CAN OPENER
CARDIGAN
CAR MIRROR
CAROUSEL
CARPENTERS KIT
CARTON
CAR WHEEL
CASH MACHINE
CASSETTE
CASSETTE PLAYER
CASTLE
CATAMARAN
CD PLAYER
CELLO
CELLULAR TELEPHONE
CHAIN
CHAINLINK FENCE
CHAIN MAIL
CHAIN SAW
CHEST
CHIFFONIER
CHIME
CHINA CABINET
CHRISTMAS STOCKING
CHURCH
CINEMA
CLEAVER
CLIFF DWELLING
CLOAK
CLOG
COCKTAIL SHAKER
COFFEE MUG
COFFEEPOT
COIL
COMBINATION LOCK
COMPUTER KEYBOARD
CONFECTIONERY
CONTAINER SHIP
CONVERTIBLE
CORKSCREW
CORNET
COWBOY BOOT
COWBOY HAT
CRADLE
CRANE
CRASH HELMET
CRATE
CRIB
CROCK POT
CROQUET BALL
CRUTCH
CUIRASS
DAM
DESK
DESKTOP COMPUTER
DIAL TELEPHONE
DIAPER
DIGITAL CLOCK
DIGITAL WATCH
DINING TABLE
DISHRAG
DISHWASHER
DISK BRAKE
DOCK
DOGSLED
DOME
DOORMAT
DRILLING PLATFORM
DRUM
DRUMSTICK
DUMBBELL
DUTCH OVEN
ELECTRIC FAN
ELECTRIC GUITAR
ELECTRIC LOCOMOTIVE
ENTERTAINMENT CENTER
ENVELOPE
ESPRESSO MAKER
FACE POWDER
FEATHER BOA
FILE
FIREBOAT
FIRE ENGINE
FIRE SCREEN
FLAGPOLE
FLUTE
FOLDING CHAIR
FOOTBALL HELMET
FORKLIFT
FOUNTAIN
FOUNTAIN PEN
FOUR-POSTER
FREIGHT CAR
FRENCH HORN
FRYING PAN
FUR COAT
GARBAGE TRUCK
GASMASK
GAS PUMP
GOBLET
GO-KART
GOLF BALL
GOLFCART
GONDOLA
GONG
GOWN
GRAND PIANO
GREENHOUSE
GRILLE
GROCERY STORE
GUILLOTINE
HAIR SLIDE
HAIR SPRAY
HALF TRACK
HAMMER
HAMPER
HAND BLOWER
HAND-HELD COMPUTER
HANDKERCHIEF
HARD DISC
HARMONICA
HARP
HARVESTER
HATCHET
HOLSTER
HOME THEATER
HONEYCOMB
HOOK
HOOPSKIRT
HORIZONTAL BAR
HORSE CART
HOURGLASS
IPOD
IRON
JACK-O-LANTERN
JEAN
JEEP
JERSEY
JIGSAW PUZZLE
JINRIKISHA
JOYSTICK
KIMONO
KNEE PAD
KNOT
LAB COAT
LADLE
LAMPSHADE
LAPTOP
LAWN MOWER
LENS CAP
LETTER OPENER
LIBRARY
LIFEBOAT
LIGHTER
LIMOUSINE
LINER
LIPSTICK
LOAFER
LOTION
LOUDSPEAKER
LOUPE
LUMBERMILL
MAGNETIC COMPASS
MAILBAG
MAILBOX
MAILLOT
MAILLOT
MANHOLE COVER
MARACA
MARIMBA
MASK
MATCHSTICK
MAYPOLE
MAZE
MEASURING CUP
MEDICINE CHEST
MEGALITH
MICROPHONE
MICROWAVE
MILITARY UNIFORM
MILK CAN
MINIBUS
MINISKIRT
MINIVAN
MISSILE
MITTEN
MIXING BOWL
MOBILE HOME
MODEL T
MODEM
MONASTERY
MONITOR
MOPED
MORTAR
MORTARBOARD
MOSQUE
MOSQUITO NET
MOTOR SCOOTER
MOUNTAIN BIKE
MOUNTAIN TENT
MOUSE
MOUSETRAP
MOVING VAN
MUZZLE
NAIL
NECK BRACE
NECKLACE
NIPPLE
NOTEBOOK
OBELISK
OBOE
OCARINA
ODOMETER
OIL FILTER
ORGAN
OSCILLOSCOPE
OVERSKIRT
OXCART
OXYGEN MASK
PACKET
PADDLE
PADDLEWHEEL
PADLOCK
PAINTBRUSH
PAJAMA
PALACE
PANPIPE
PAPER TOWEL
PARACHUTE
PARALLEL BARS
PARK BENCH
PARKING METER
PASSENGER CAR
PATIO
PAY-PHONE
PEDESTAL
PENCIL BOX
PENCIL SHARPENER
PERFUME
PETRI DISH
PHOTOCOPIER
PICK
PICKELHAUBE
PICKET FENCE
PICKUP
PIER
PIGGY BANK
PILL BOTTLE
PILLOW
PING-PONG BALL
PINWHEEL
PIRATE
PITCHER
PLANE
PLANETARIUM
PLASTIC BAG
PLATE RACK
PLOW
PLUNGER
POLAROID CAMERA
POLE
POLICE VAN
PONCHO
POOL TABLE
POP BOTTLE
POT
POTTERS WHEEL
POWER DRILL
PRAYER RUG
PRINTER
PRISON
PROJECTILE
PROJECTOR
PUCK
PUNCHING BAG
PURSE
QUILL
QUILT
RACER
RACKET
RADIATOR
RADIO
RADIO TELESCOPE
RAIN BARREL
RECREATIONAL VEHICLE
REEL
REFLEX CAMERA
REFRIGERATOR
REMOTE CONTROL
RESTAURANT
REVOLVER
RIFLE
ROCKING CHAIR
ROTISSERIE
RUBBER ERASER
RUGBY BALL
RULE
RUNNING SHOE
SAFE
SAFETY PIN
SALTSHAKER
SANDAL
SARONG
SAX
SCABBARD
SCALE
SCHOOL BUS
SCHOONER
SCOREBOARD
SCREEN
SCREW
SCREWDRIVER
SEAT BELT
SEWING MACHINE
SHIELD
SHOE SHOP
SHOJI
SHOPPING BASKET
SHOPPING CART
SHOVEL
SHOWER CAP
SHOWER CURTAIN
SKI
SKI MASK
SLEEPING BAG
SLIDE RULE
SLIDING DOOR
SLOT
SNORKEL
SNOWMOBILE
SNOWPLOW
SOAP DISPENSER
SOCCER BALL
SOCK
SOLAR DISH
SOMBRERO
SOUP BOWL
SPACE BAR
SPACE HEATER
SPACE SHUTTLE
SPATULA
SPEEDBOAT
SPIDER WEB
SPINDLE
SPORTS CAR
SPOTLIGHT
STAGE
STEAM LOCOMOTIVE
STEEL ARCH BRIDGE
STEEL DRUM
STETHOSCOPE
STOLE
STONE WALL
STOPWATCH
STOVE
STRAINER
STREETCAR
STRETCHER
STUDIO COUCH
STUPA
SUBMARINE
SUIT
SUNDIAL
SUNGLASS
SUNGLASSES
SUNSCREEN
SUSPENSION BRIDGE
SWAB
SWEATSHIRT
SWIMMING TRUNKS
SWING
SWITCH
SYRINGE
TABLE LAMP
TANK
TAPE PLAYER
TEAPOT
TEDDY
TELEVISION
TENNIS BALL
THATCH
THEATER CURTAIN
THIMBLE
THRESHER
THRONE
TILE ROOF
TOASTER
TOBACCO SHOP
TOILET SEAT
TORCH
TOTEM POLE
TOW TRUCK
TOYSHOP
TRACTOR
TRAILER TRUCK
TRAY
TRENCH COAT
TRICYCLE
TRIMARAN
TRIPOD
TRIUMPHAL ARCH
TROLLEYBUS
TROMBONE
TUB
TURNSTILE
TYPEWRITER KEYBOARD
UMBRELLA
UNICYCLE
UPRIGHT
VACUUM
VASE
VAULT
VELVET
VENDING MACHINE
VESTMENT
VIADUCT
VIOLIN
VOLLEYBALL
WAFFLE IRON
WALL CLOCK
WALLET
WARDROBE
WARPLANE
WASHBASIN
WASHER
WATER BOTTLE
WATER JUG
WATER TOWER
WHISKEY JUG
WHISTLE
WIG
WINDOW SCREEN
WINDOW SHADE
WINDSOR TIE
WINE BOTTLE
WING
WOK
WOODEN SPOON
WOOL
WORM FENCE
WRECK
YAWL
YURT
WEB SITE
COMIC BOOK
CROSSWORD PUZZLE
STREET SIGN
TRAFFIC LIGHT
BOOK JACKET
MENU
PLATE
GUACAMOLE
CONSOMME
HOT POT
TRIFLE
ICE CREAM
ICE LOLLY
FRENCH LOAF
BAGEL
PRETZEL
CHEESEBURGER
HOTDOG
MASHED POTATO
HEAD CABBAGE
BROCCOLI
CAULIFLOWER
ZUCCHINI
SPAGHETTI SQUASH
ACORN SQUASH
BUTTERNUT SQUASH
CUCUMBER
ARTICHOKE
BELL PEPPER
CARDOON
MUSHROOM
GRANNY SMITH
STRAWBERRY
ORANGE
LEMON
FIG
PINEAPPLE
BANANA
JACKFRUIT
CUSTARD APPLE
POMEGRANATE
HAY
CARBONARA
CHOCOLATE SAUCE
DOUGH
MEAT LOAF
PIZZA
POTPIE
BURRITO
RED WINE
ESPRESSO
CUP
EGGNOG
ALP
BUBBLE
CLIFF
CORAL REEF
GEYSER
LAKESIDE
PROMONTORY
SANDBAR
SEASHORE
VALLEY
VOLCANO
BALLPLAYER
GROOM
SCUBA DIVER
RAPESEED
DAISY
LADY SLIPPER
CORN
ACORN
HIP
BUCKEYE
CORAL FUNGUS
AGARIC
GYROMITRA
STINKHORN
EARTHSTAR
HEN-OF-THE-WOODS
BOLETE
EAR
TOILET TISSUE


================================================
FILE: docs/examples/model_repository/inception_onnx/config.pbtxt
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
name: "inception_onnx"
platform: "onnxruntime_onnx"
max_batch_size: 0
input [
  {
    name: "input:0"
    data_type: TYPE_FP32
    format: FORMAT_NHWC
    dims: [ 299, 299, 3 ]
    reshape { shape: [ 1, 299, 299, 3 ] }
  }
]
output [
  {
    name: "InceptionV3/Predictions/Softmax:0"
    data_type: TYPE_FP32
    dims: [ 1001 ]
    reshape { shape: [ 1, 1001 ] }
    label_filename: "inception_labels.txt"
  }
]


================================================
FILE: docs/examples/model_repository/inception_onnx/inception_labels.txt
================================================
UNUSED BACKGROUND
TENCH
GOLDFISH
WHITE SHARK
TIGER SHARK
HAMMERHEAD SHARK
ELECTRIC RAY
STINGRAY
ROOSTER
HEN
OSTRICH
BRAMBLING
GOLDFINCH
HOUSE FINCH
SNOWBIRD
INDIGO FINCH
ROBIN
BULBUL
JAY
MAGPIE
CHICKADEE
WATER OUZEL
KITE
BALD EAGLE
VULTURE
GREAT GREY OWL
FIRE SALAMANDER
NEWT
EFT
SPOTTED SALAMANDER
AXOLOTL
BULL FROG
TREE FROG
TAILED FROG
LOGGERHEAD
LEATHERBACK TURTLE
MUD TURTLE
TERRAPIN
BOX TURTLE
BANDED GECKO
COMMON IGUANA
AMERICAN CHAMELEON
WHIPTAIL
AGAMA
FRILLED LIZARD
ALLIGATOR LIZARD
GILA MONSTER
GREEN LIZARD
AFRICAN CHAMELEON
KOMODO DRAGON
AFRICAN CROCODILE
AMERICAN ALLIGATOR
TRICERATOPS
THUNDER SNAKE
RINGNECK SNAKE
HOGNOSE SNAKE
GREEN SNAKE
KING SNAKE
GARTER SNAKE
WATER SNAKE
VINE SNAKE
NIGHT SNAKE
BOA
ROCK PYTHON
COBRA
GREEN MAMBA
SEA SNAKE
HORNED VIPER
DIAMONDBACK
SIDEWINDER
TRILOBITE
HARVESTMAN
SCORPION
GARDEN SPIDER
BARN SPIDER
GARDEN SPIDER
BLACK WIDOW
TARANTULA
WOLF SPIDER
TICK
CENTIPEDE
GROUSE
PTARMIGAN
RUFFED GROUSE
PRAIRIE CHICKEN
PEACOCK
QUAIL
PARTRIDGE
AFRICAN GREY
MACAW
COCKATOO
LORIKEET
COUCAL
BEE EATER
HORNBILL
HUMMINGBIRD
JACAMAR
TOUCAN
DRAKE
MERGANSER
GOOSE
BLACK SWAN
TUSKER
ECHIDNA
PLATYPUS
WALLABY
KOALA
WOMBAT
JELLYFISH
SEA ANEMONE
BRAIN CORAL
FLATWORM
NEMATODE
CONCH
SNAIL
SLUG
SEA SLUG
CHITON
CHAMBERED NAUTILUS
DUNGENESS CRAB
ROCK CRAB
FIDDLER CRAB
KING CRAB
AMERICAN LOBSTER
SPINY LOBSTER
CRAYFISH
HERMIT CRAB
ISOPOD
WHITE STORK
BLACK STORK
SPOONBILL
FLAMINGO
LITTLE BLUE HERON
AMERICAN EGRET
BITTERN
CRANE
LIMPKIN
EUROPEAN GALLINULE
AMERICAN COOT
BUSTARD
RUDDY TURNSTONE
RED-BACKED SANDPIPER
REDSHANK
DOWITCHER
OYSTERCATCHER
PELICAN
KING PENGUIN
ALBATROSS
GREY WHALE
KILLER WHALE
DUGONG
SEA LION
CHIHUAHUA
JAPANESE SPANIEL
MALTESE DOG
PEKINESE
SHIH-TZU
BLENHEIM SPANIEL
PAPILLON
TOY TERRIER
RHODESIAN RIDGEBACK
AFGHAN HOUND
BASSET
BEAGLE
BLOODHOUND
BLUETICK
COONHOUND
WALKER HOUND
ENGLISH FOXHOUND
REDBONE
BORZOI
IRISH WOLFHOUND
ITALIAN GREYHOUND
WHIPPET
IBIZAN HOUND
NORWEGIAN ELKHOUND
OTTERHOUND
SALUKI
SCOTTISH DEERHOUND
WEIMARANER
STAFFORDSHIRE BULLTERRIER
STAFFORDSHIRE TERRIER
BEDLINGTON TERRIER
BORDER TERRIER
KERRY BLUE TERRIER
IRISH TERRIER
NORFOLK TERRIER
NORWICH TERRIER
YORKSHIRE TERRIER
WIRE-HAIRED FOX TERRIER
LAKELAND TERRIER
SEALYHAM TERRIER
AIREDALE
CAIRN
AUSTRALIAN TERRIER
DANDIE DINMONT
BOSTON BULL
MINIATURE SCHNAUZER
GIANT SCHNAUZER
STANDARD SCHNAUZER
SCOTCH TERRIER
TIBETAN TERRIER
SILKY TERRIER
WHEATEN TERRIER
WHITE TERRIER
LHASA
RETRIEVER
CURLY-COATED RETRIEVER
GOLDEN RETRIEVER
LABRADOR RETRIEVER
CHESAPEAKE BAY RETRIEVER
SHORT-HAIRED POINTER
VISLA
ENGLISH SETTER
IRISH SETTER
GORDON SETTER
BRITTANY SPANIEL
CLUMBER
ENGLISH SPRINGER
WELSH SPRINGER SPANIEL
COCKER SPANIEL
SUSSEX SPANIEL
IRISH WATERSPANIEL
KUVASZ
SCHIPPERKE
GROENENDAEL
MALINOIS
BRIARD
KELPIE
KOMONDOR
OLD ENGLISH SHEEPDOG
SHETLAND SHEEPDOG
COLLIE
BORDER COLLIE
BOUVIER DES FLANDRES
ROTTWEILER
GERMAN SHEPHERD
DOBERMAN
MINIATURE PINSCHER
GREATER SWISS MOUNTAIN DOG
BERNESE MOUNTAIN DOG
APPENZELLER
ENTLEBUCHER
BOXER
BULL MASTIFF
TIBETAN MASTIFF
FRENCH BULLDOG
GREAT DANE
SAINT BERNARD
ESKIMO DOG
MALAMUTE
SIBERIAN HUSKY
DALMATIAN
AFFENPINSCHER
BASENJI
PUG
LEONBERG
NEWFOUNDLAND
GREAT PYRENEES
SAMOYED
POMERANIAN
CHOW
KEESHOND
BRABANCON GRIFFON
PEMBROKE
CARDIGAN
TOY POODLE
MINIATURE POODLE
STANDARD POODLE
MEXICAN HAIRLESS
TIMBER WOLF
WHITE WOLF
RED WOLF
COYOTE
DINGO
DHOLE
AFRICAN HUNTING DOG
HYENA
RED FOX
KIT FOX
ARCTIC FOX
GREY FOX
TABBY
TIGER CAT
PERSIAN CAT
SIAMESE CAT
EGYPTIAN CAT
COUGAR
LYNX
LEOPARD
SNOW LEOPARD
JAGUAR
LION
TIGER
CHEETAH
BROWN BEAR
AMERICAN BLACK BEAR
ICE BEAR
SLOTH BEAR
MONGOOSE
MEERKAT
TIGER BEETLE
LADYBUG
GROUND BEETLE
LONG-HORNED BEETLE
LEAF BEETLE
DUNG BEETLE
RHINOCEROS BEETLE
WEEVIL
FLY
BEE
ANT
GRASSHOPPER
CRICKET
WALKING STICK
COCKROACH
MANTIS
CICADA
LEAFHOPPER
LACEWING
DRAGONFLY
DAMSELFLY
ADMIRAL
RINGLET
MONARCH
CABBAGE BUTTERFLY
SULPHUR BUTTERFLY
LYCAENID
STARFISH
SEA URCHIN
SEA CUCUMBER
WOOD RABBIT
HARE
ANGORA
HAMSTER
PORCUPINE
FOX SQUIRREL
MARMOT
BEAVER
GUINEA PIG
SORREL
ZEBRA
HOG
WILD BOAR
WARTHOG
HIPPOPOTAMUS
OX
WATER BUFFALO
BISON
RAM
BIGHORN
IBEX
HARTEBEEST
IMPALA
GAZELLE
ARABIAN CAMEL
LLAMA
WEASEL
MINK
POLECAT
BLACK-FOOTED FERRET
OTTER
SKUNK
BADGER
ARMADILLO
THREE-TOED SLOTH
ORANGUTAN
GORILLA
CHIMPANZEE
GIBBON
SIAMANG
GUENON
PATAS
BABOON
MACAQUE
LANGUR
COLOBUS
PROBOSCIS MONKEY
MARMOSET
CAPUCHIN
HOWLER MONKEY
TITI
SPIDER MONKEY
SQUIRREL MONKEY
MADAGASCAR CAT
INDRI
INDIAN ELEPHANT
AFRICAN ELEPHANT
LESSER PANDA
GIANT PANDA
BARRACOUTA
EEL
COHO
ROCK BEAUTY
ANEMONE FISH
STURGEON
GAR
LIONFISH
PUFFER
ABACUS
ABAYA
ACADEMIC GOWN
ACCORDION
ACOUSTIC GUITAR
AIRCRAFT CARRIER
AIRLINER
AIRSHIP
ALTAR
AMBULANCE
AMPHIBIAN
ANALOG CLOCK
APIARY
APRON
ASHCAN
ASSAULT RIFLE
BACKPACK
BAKERY
BALANCE BEAM
BALLOON
BALLPOINT
BAND AID
BANJO
BANNISTER
BARBELL
BARBER CHAIR
BARBERSHOP
BARN
BAROMETER
BARREL
BARROW
BASEBALL
BASKETBALL
BASSINET
BASSOON
BATHING CAP
BATH TOWEL
BATHTUB
BEACH WAGON
BEACON
BEAKER
BEARSKIN
BEER BOTTLE
BEER GLASS
BELL COTE
BIB
BICYCLE-BUILT-FOR-TWO
BIKINI
BINDER
BINOCULARS
BIRDHOUSE
BOATHOUSE
BOBSLED
BOLO TIE
BONNET
BOOKCASE
BOOKSHOP
BOTTLECAP
BOW
BOW TIE
BRASS
BRASSIERE
BREAKWATER
BREASTPLATE
BROOM
BUCKET
BUCKLE
BULLETPROOF VEST
BULLET TRAIN
BUTCHER SHOP
CAB
CALDRON
CANDLE
CANNON
CANOE
CAN OPENER
CARDIGAN
CAR MIRROR
CAROUSEL
CARPENTERS KIT
CARTON
CAR WHEEL
CASH MACHINE
CASSETTE
CASSETTE PLAYER
CASTLE
CATAMARAN
CD PLAYER
CELLO
CELLULAR TELEPHONE
CHAIN
CHAINLINK FENCE
CHAIN MAIL
CHAIN SAW
CHEST
CHIFFONIER
CHIME
CHINA CABINET
CHRISTMAS STOCKING
CHURCH
CINEMA
CLEAVER
CLIFF DWELLING
CLOAK
CLOG
COCKTAIL SHAKER
COFFEE MUG
COFFEEPOT
COIL
COMBINATION LOCK
COMPUTER KEYBOARD
CONFECTIONERY
CONTAINER SHIP
CONVERTIBLE
CORKSCREW
CORNET
COWBOY BOOT
COWBOY HAT
CRADLE
CRANE
CRASH HELMET
CRATE
CRIB
CROCK POT
CROQUET BALL
CRUTCH
CUIRASS
DAM
DESK
DESKTOP COMPUTER
DIAL TELEPHONE
DIAPER
DIGITAL CLOCK
DIGITAL WATCH
DINING TABLE
DISHRAG
DISHWASHER
DISK BRAKE
DOCK
DOGSLED
DOME
DOORMAT
DRILLING PLATFORM
DRUM
DRUMSTICK
DUMBBELL
DUTCH OVEN
ELECTRIC FAN
ELECTRIC GUITAR
ELECTRIC LOCOMOTIVE
ENTERTAINMENT CENTER
ENVELOPE
ESPRESSO MAKER
FACE POWDER
FEATHER BOA
FILE
FIREBOAT
FIRE ENGINE
FIRE SCREEN
FLAGPOLE
FLUTE
FOLDING CHAIR
FOOTBALL HELMET
FORKLIFT
FOUNTAIN
FOUNTAIN PEN
FOUR-POSTER
FREIGHT CAR
FRENCH HORN
FRYING PAN
FUR COAT
GARBAGE TRUCK
GASMASK
GAS PUMP
GOBLET
GO-KART
GOLF BALL
GOLFCART
GONDOLA
GONG
GOWN
GRAND PIANO
GREENHOUSE
GRILLE
GROCERY STORE
GUILLOTINE
HAIR SLIDE
HAIR SPRAY
HALF TRACK
HAMMER
HAMPER
HAND BLOWER
HAND-HELD COMPUTER
HANDKERCHIEF
HARD DISC
HARMONICA
HARP
HARVESTER
HATCHET
HOLSTER
HOME THEATER
HONEYCOMB
HOOK
HOOPSKIRT
HORIZONTAL BAR
HORSE CART
HOURGLASS
IPOD
IRON
JACK-O-LANTERN
JEAN
JEEP
JERSEY
JIGSAW PUZZLE
JINRIKISHA
JOYSTICK
KIMONO
KNEE PAD
KNOT
LAB COAT
LADLE
LAMPSHADE
LAPTOP
LAWN MOWER
LENS CAP
LETTER OPENER
LIBRARY
LIFEBOAT
LIGHTER
LIMOUSINE
LINER
LIPSTICK
LOAFER
LOTION
LOUDSPEAKER
LOUPE
LUMBERMILL
MAGNETIC COMPASS
MAILBAG
MAILBOX
MAILLOT
MAILLOT
MANHOLE COVER
MARACA
MARIMBA
MASK
MATCHSTICK
MAYPOLE
MAZE
MEASURING CUP
MEDICINE CHEST
MEGALITH
MICROPHONE
MICROWAVE
MILITARY UNIFORM
MILK CAN
MINIBUS
MINISKIRT
MINIVAN
MISSILE
MITTEN
MIXING BOWL
MOBILE HOME
MODEL T
MODEM
MONASTERY
MONITOR
MOPED
MORTAR
MORTARBOARD
MOSQUE
MOSQUITO NET
MOTOR SCOOTER
MOUNTAIN BIKE
MOUNTAIN TENT
MOUSE
MOUSETRAP
MOVING VAN
MUZZLE
NAIL
NECK BRACE
NECKLACE
NIPPLE
NOTEBOOK
OBELISK
OBOE
OCARINA
ODOMETER
OIL FILTER
ORGAN
OSCILLOSCOPE
OVERSKIRT
OXCART
OXYGEN MASK
PACKET
PADDLE
PADDLEWHEEL
PADLOCK
PAINTBRUSH
PAJAMA
PALACE
PANPIPE
PAPER TOWEL
PARACHUTE
PARALLEL BARS
PARK BENCH
PARKING METER
PASSENGER CAR
PATIO
PAY-PHONE
PEDESTAL
PENCIL BOX
PENCIL SHARPENER
PERFUME
PETRI DISH
PHOTOCOPIER
PICK
PICKELHAUBE
PICKET FENCE
PICKUP
PIER
PIGGY BANK
PILL BOTTLE
PILLOW
PING-PONG BALL
PINWHEEL
PIRATE
PITCHER
PLANE
PLANETARIUM
PLASTIC BAG
PLATE RACK
PLOW
PLUNGER
POLAROID CAMERA
POLE
POLICE VAN
PONCHO
POOL TABLE
POP BOTTLE
POT
POTTERS WHEEL
POWER DRILL
PRAYER RUG
PRINTER
PRISON
PROJECTILE
PROJECTOR
PUCK
PUNCHING BAG
PURSE
QUILL
QUILT
RACER
RACKET
RADIATOR
RADIO
RADIO TELESCOPE
RAIN BARREL
RECREATIONAL VEHICLE
REEL
REFLEX CAMERA
REFRIGERATOR
REMOTE CONTROL
RESTAURANT
REVOLVER
RIFLE
ROCKING CHAIR
ROTISSERIE
RUBBER ERASER
RUGBY BALL
RULE
RUNNING SHOE
SAFE
SAFETY PIN
SALTSHAKER
SANDAL
SARONG
SAX
SCABBARD
SCALE
SCHOOL BUS
SCHOONER
SCOREBOARD
SCREEN
SCREW
SCREWDRIVER
SEAT BELT
SEWING MACHINE
SHIELD
SHOE SHOP
SHOJI
SHOPPING BASKET
SHOPPING CART
SHOVEL
SHOWER CAP
SHOWER CURTAIN
SKI
SKI MASK
SLEEPING BAG
SLIDE RULE
SLIDING DOOR
SLOT
SNORKEL
SNOWMOBILE
SNOWPLOW
SOAP DISPENSER
SOCCER BALL
SOCK
SOLAR DISH
SOMBRERO
SOUP BOWL
SPACE BAR
SPACE HEATER
SPACE SHUTTLE
SPATULA
SPEEDBOAT
SPIDER WEB
SPINDLE
SPORTS CAR
SPOTLIGHT
STAGE
STEAM LOCOMOTIVE
STEEL ARCH BRIDGE
STEEL DRUM
STETHOSCOPE
STOLE
STONE WALL
STOPWATCH
STOVE
STRAINER
STREETCAR
STRETCHER
STUDIO COUCH
STUPA
SUBMARINE
SUIT
SUNDIAL
SUNGLASS
SUNGLASSES
SUNSCREEN
SUSPENSION BRIDGE
SWAB
SWEATSHIRT
SWIMMING TRUNKS
SWING
SWITCH
SYRINGE
TABLE LAMP
TANK
TAPE PLAYER
TEAPOT
TEDDY
TELEVISION
TENNIS BALL
THATCH
THEATER CURTAIN
THIMBLE
THRESHER
THRONE
TILE ROOF
TOASTER
TOBACCO SHOP
TOILET SEAT
TORCH
TOTEM POLE
TOW TRUCK
TOYSHOP
TRACTOR
TRAILER TRUCK
TRAY
TRENCH COAT
TRICYCLE
TRIMARAN
TRIPOD
TRIUMPHAL ARCH
TROLLEYBUS
TROMBONE
TUB
TURNSTILE
TYPEWRITER KEYBOARD
UMBRELLA
UNICYCLE
UPRIGHT
VACUUM
VASE
VAULT
VELVET
VENDING MACHINE
VESTMENT
VIADUCT
VIOLIN
VOLLEYBALL
WAFFLE IRON
WALL CLOCK
WALLET
WARDROBE
WARPLANE
WASHBASIN
WASHER
WATER BOTTLE
WATER JUG
WATER TOWER
WHISKEY JUG
WHISTLE
WIG
WINDOW SCREEN
WINDOW SHADE
WINDSOR TIE
WINE BOTTLE
WING
WOK
WOODEN SPOON
WOOL
WORM FENCE
WRECK
YAWL
YURT
WEB SITE
COMIC BOOK
CROSSWORD PUZZLE
STREET SIGN
TRAFFIC LIGHT
BOOK JACKET
MENU
PLATE
GUACAMOLE
CONSOMME
HOT POT
TRIFLE
ICE CREAM
ICE LOLLY
FRENCH LOAF
BAGEL
PRETZEL
CHEESEBURGER
HOTDOG
MASHED POTATO
HEAD CABBAGE
BROCCOLI
CAULIFLOWER
ZUCCHINI
SPAGHETTI SQUASH
ACORN SQUASH
BUTTERNUT SQUASH
CUCUMBER
ARTICHOKE
BELL PEPPER
CARDOON
MUSHROOM
GRANNY SMITH
STRAWBERRY
ORANGE
LEMON
FIG
PINEAPPLE
BANANA
JACKFRUIT
CUSTARD APPLE
POMEGRANATE
HAY
CARBONARA
CHOCOLATE SAUCE
DOUGH
MEAT LOAF
PIZZA
POTPIE
BURRITO
RED WINE
ESPRESSO
CUP
EGGNOG
ALP
BUBBLE
CLIFF
CORAL REEF
GEYSER
LAKESIDE
PROMONTORY
SANDBAR
SEASHORE
VALLEY
VOLCANO
BALLPLAYER
GROOM
SCUBA DIVER
RAPESEED
DAISY
LADY SLIPPER
CORN
ACORN
HIP
BUCKEYE
CORAL FUNGUS
AGARIC
GYROMITRA
STINKHORN
EARTHSTAR
HEN-OF-THE-WOODS
BOLETE
EAR
TOILET TISSUE


================================================
FILE: docs/examples/model_repository/simple/config.pbtxt
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
name: "simple"
platform: "onnxruntime_onnx"
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]


================================================
FILE: docs/examples/model_repository/simple_dyna_sequence/config.pbtxt
================================================
# Copyright (c) 2020-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "simple_dyna_sequence"
platform: "onnxruntime_onnx"
max_batch_size: 8
sequence_batching {
    max_sequence_idle_microseconds: 10000000
    oldest {
      max_candidate_sequences: 1024
      max_queue_delay_microseconds: 10000
    }

  control_input [
    {
      name: "START"
      control [
        {
          kind: CONTROL_SEQUENCE_START
          int32_false_true: [ 0, 1 ]
        }
      ]
    },
    {
      name: "END"
      control [
        {
          kind: CONTROL_SEQUENCE_END
          int32_false_true: [ 0, 1 ]
        }
      ]
    },
    {
      name: "READY"
      control [
        {
          kind: CONTROL_SEQUENCE_READY
          int32_false_true: [ 0, 1 ]
        }
      ]
    },
    {
      name: "CORRID"
      control [
        {
          kind: CONTROL_SEQUENCE_CORRID
          data_type: TYPE_UINT64
        }
      ]
    }
  ]
}
input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
parameters [
  {
    key: "execute_delay_ms"
    value: { string_value: "3" }
  }
]
instance_group [
  {
    count: 2
    kind: KIND_CPU
  }
]


================================================
FILE: docs/examples/model_repository/simple_identity/1/model.py
================================================
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    """This model always returns the input that it has received."""

    def initialize(self, args):
        self.model_config = json.loads(args["model_config"])

    def execute(self, requests):
        """This function is called on inference request."""

        responses = []
        for request in requests:
            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            out_tensor_0 = pb_utils.Tensor("OUTPUT0", in_0.as_numpy())
            responses.append(pb_utils.InferenceResponse([out_tensor_0]))
        return responses


================================================
FILE: docs/examples/model_repository/simple_identity/config.pbtxt
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "simple_identity"
backend: "python"
max_batch_size: 8

input [
  {
    name: "INPUT0"
    data_type: TYPE_STRING
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_STRING
    dims: [ -1 ]
  }
]


================================================
FILE: docs/examples/model_repository/simple_int8/config.pbtxt
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "simple_int8"
platform: "onnxruntime_onnx"
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT8
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_INT8
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT8
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_INT8
    dims: [ 16 ]
  }
]


================================================
FILE: docs/examples/model_repository/simple_sequence/config.pbtxt
================================================
# Copyright (c) 2020-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "simple_sequence"
platform: "onnxruntime_onnx"
max_batch_size: 8
sequence_batching {
  control_input [
    {
      name: "START"
      control [
        {
          kind: CONTROL_SEQUENCE_START
          int32_false_true: [ 0, 1 ]
        }
      ]
    },
    {
      name: "READY"
      control [
        {
          kind: CONTROL_SEQUENCE_READY
          int32_false_true: [ 0, 1 ]
        }
      ]
    }
  ]
}
input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: docs/examples/model_repository/simple_string/config.pbtxt
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "simple_string"
platform: "onnxruntime_onnx"
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_STRING
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_STRING
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_STRING
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_STRING
    dims: [ 16 ]
  }
]


================================================
FILE: docs/exclusions.txt
================================================
README.md
examples/README.md
user_guide/perf_analyzer.md


================================================
FILE: docs/generate_docs.py
================================================
#!/usr/bin/env python3

# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import logging
import os
import re
import subprocess
from functools import partial
from logging.handlers import RotatingFileHandler

# Global constants
server_abspath = os.environ.get("SERVER_ABSPATH", os.getcwd())
server_docs_abspath = os.path.join(server_abspath, "docs")

"""
TODO: Needs to handle cross-branch linkage.

For example, server/docs/user_guide/architecture.md on branch 24.12 links to
server/docs/user_guide/model_analyzer.md on main branch. In this case, the
hyperlink of model_analyzer.md should be a URL instead of relative path.

Another example can be server/docs/user_guide/model_analyzer.md on branch 24.12
links to a file in server repo with relative path. Currently all URLs are
hardcoded to main branch. We need to make sure that the URL actually points to the
correct branch. We also need to handle cases like deprecated or removed files from
older branch to avoid 404 error code.
"""
# Regex patterns
http_patn = r"^https?://"
http_reg = re.compile(http_patn)
tag_patn = "/(?:blob|tree)/main"
triton_repo_patn = rf"{http_patn}github.com/triton-inference-server"
triton_github_url_reg = re.compile(
    rf"{triton_repo_patn}/([^/#]+)(?:{tag_patn})?/*([^#]*)\s*(?=#|$)"
)
# Hyperlink in a .md file, excluding embedded images.
hyperlink_reg = re.compile(r"((?<!\!)\[[^\]]+\]\s*\(\s*)([^)]+?)(\s*\))")

# Load exclusion patterns
with open(f"{server_docs_abspath}/exclusions.txt") as f:
    exclude_patterns = f.read().strip().split("\n")


# Setup logger once
def setup_logger(name, log_file, level=logging.INFO, max_bytes=1048576, backup_count=5):
    logger = logging.getLogger(name)
    logger.setLevel(level)

    # Prevent adding multiple handlers if the function is called multiple times
    if not logger.handlers:
        # Create handlers
        file_handler = RotatingFileHandler(
            log_file, maxBytes=max_bytes, backupCount=backup_count
        )
        console_handler = logging.StreamHandler()

        # Set the logging level for handlers
        file_handler.setLevel(level)
        console_handler.setLevel(level)

        # Create a logging format
        BLUE = "\033[94m"
        RESET = "\033[0m"
        formatter = logging.Formatter(
            f"{BLUE}%(asctime)s - %(name)s - %(levelname)s - {RESET}%(message)s"
        )
        file_handler.setFormatter(formatter)
        console_handler.setFormatter(formatter)

        # Add handlers to the logger
        logger.addHandler(file_handler)
        logger.addHandler(console_handler)
    return logger


parser = argparse.ArgumentParser(description="Setup Triton Server Docs")
parser.add_argument(
    "--repo-tag",
    type=str,
    default=os.environ.get("TRITON_SERVER_REPO_TAG", "main"),
    help="Repository tags in format value",
)
parser.add_argument(
    "--log-file",
    type=str,
    default=os.environ.get("TRITON_SERVER_DOCS_LOG_FILE", "/tmp/docs.log"),
    help="The path to the log file",
)
parser.add_argument(
    "--repo-file",
    default="repositories.txt",
    help="File which lists the repositories to add. File should be"
    " one repository name per line, newline separated.",
)
parser.add_argument(
    "--github-organization",
    type=str,
    default=os.environ.get(
        "TRITON_SERVER_REPO_ORG", "https://github.com/triton-inference-server"
    ),
    help="GitHub organization name",
)
args = parser.parse_args()


logger = setup_logger(os.path.basename(__file__), args.log_file)
logger.info(f"Defined arguments: {args}")


def run_command(command):
    """Run command using subprocess and log execution."""
    logger.info(f"Running command: {command}")
    subprocess.run(
        command,
        shell=True,
        check=True,
        text=True,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
    )


def clone_from_github(repo, tag, org):
    """Clone repository from GitHub (in-sync with build.py)."""
    logger.info(f"Cloning... {org}/{repo}.git@{tag}")
    repo_url = f"{org}/{repo}.git"

    if tag:
        if re.match("model_navigator", repo):
            tag = "main"

        clone_command = ["git", "clone", "--branch", tag, "--single-branch", repo_url]
    else:
        clone_command = ["git", "clone", repo_url]

    subprocess.run(clone_command, check=True)
    logger.info(f"Successfully cloned... {org}/{repo}.git@{tag}")


def is_excluded(file_path):
    """Check if file path matches any exclusion pattern."""
    file_abspath = os.path.abspath(file_path)
    for pattern in exclude_patterns:
        exclude_abspath = os.path.abspath(pattern)
        if os.path.commonpath([file_abspath, exclude_abspath]) == exclude_abspath:
            return True
    return False


def get_git_repo_name(file_path):
    """Return the Git repo name of given file path."""
    directory = os.path.dirname(file_path)
    remote_url = (
        subprocess.check_output(["git", "-C", directory, "remote", "get-url", "origin"])
        .decode()
        .strip()
    )

    # Extract repository name from the remote URL
    if remote_url.endswith(".git"):
        remote_url = remote_url[:-4]
    return os.path.basename(remote_url)


def replace_url_with_relpath(url, src_doc_path):
    """
    Replace Triton Inference Server GitHub URLs with relative paths for:
    1. URL is a doc file (e.g., ".md" file).
    2. URL is a directory with README.md and ends with "#<section>".

    Examples:
        https://github.com/triton-inference-server/server/blob/main/docs/protocol#restricted-protocols
        https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_shared_memory.md
        https://github.com/triton-inference-server/server/blob/main/docs/user_guide/model_configuration.md#dynamic-batcher
    """
    m = triton_github_url_reg.match(url)
    if not m:
        return url

    target_repo_name = m.group(1)
    logger.info(f"Found target repository: {target_repo_name}")
    target_relpath_from_target_repo = os.path.normpath(m.groups("")[1])
    logger.info(
        f"Found target relative path from target repository: {target_relpath_from_target_repo}"
    )
    section = url[len(m.group(0)) :]
    logger.info(f"Found section: {section}")
    valid_hashtag = section not in ["", "#"] and section.startswith("#")

    target_path = (
        os.path.join(server_abspath, target_relpath_from_target_repo)
        if target_repo_name == "server"
        else os.path.join(
            server_docs_abspath, target_repo_name, target_relpath_from_target_repo
        )
    )
    logger.info(f"Found target path: {target_path}")
    # Return URL if it points to a path outside server/docs
    if os.path.commonpath([server_docs_abspath, target_path]) != server_docs_abspath:
        return url
    logger.info(
        f"Target path is under server/docs directory: {os.path.commonpath([server_docs_abspath, target_path]) == server_docs_abspath}"
    )
    # Check if target is valid for conversion
    is_md_file = (
        os.path.isfile(target_path)
        and os.path.splitext(target_path)[1] == ".md"
        and not is_excluded(target_path)
    )
    logger.info(f"Target path is a valid .md file: {is_md_file}")
    is_dir_with_readme = (
        os.path.isdir(target_path)
        and os.path.isfile(os.path.join(target_path, "README.md"))
        and valid_hashtag
        and not is_excluded(os.path.join(target_path, "README.md"))
    )
    logger.info(f"Target path is a directory with README.md: {is_dir_with_readme}")
    if is_md_file:
        pass
    elif is_dir_with_readme:
        target_path = os.path.join(target_path, "README.md")
    else:
        return url
    logger.info(
        f"Target path is a valid .md file or a directory with README.md: {is_md_file or is_dir_with_readme}"
    )

    relpath = os.path.relpath(target_path, start=os.path.dirname(src_doc_path))
    logger.info(f"Found relative path: {relpath}")
    return re.sub(triton_github_url_reg, relpath, url, 1)


def replace_relpath_with_url(relpath, src_doc_path):
    """
    This function replaces relative paths with Triton Inference Server GitHub URLs in following cases.
    1. Relative path is a file that is not ".md" type inside the current repo.
    2. Relative path is a directory but not (has "README.md" and ends with "#<section>").
    3. Relative path does not exist (shows 404 page).

    Examples:
        ../examples/model_repository
        ../examples/model_repository/inception_graphdef/config.pbtxt
    """
    target_path = relpath.rsplit("#", 1)[0]
    section = relpath[len(target_path) :]
    valid_hashtag = section not in ["", "#"]

    if relpath.startswith("#"):
        target_path = os.path.basename(src_doc_path)

    target_path = os.path.normpath(
        os.path.join(os.path.dirname(src_doc_path), target_path)
    )
    src_git_repo_name = get_git_repo_name(src_doc_path)

    src_repo_abspath = (
        server_abspath
        if src_git_repo_name == "server"
        else os.path.join(server_docs_abspath, src_git_repo_name)
    )

    # Assert target path is under the current repo directory
    assert os.path.commonpath([src_repo_abspath, target_path]) == src_repo_abspath

    target_path_from_src_repo = os.path.relpath(target_path, start=src_repo_abspath)

    # For example, target_path of "../protocol#restricted-protocols" should be "<path-to-server>/server/docs/protocol/README.md"
    if (
        os.path.isdir(target_path)
        and valid_hashtag
        and os.path.isfile(os.path.join(target_path, "README.md"))
    ):
        relpath = os.path.join(relpath.rsplit("#", 1)[0], "README.md") + section
        target_path = os.path.join(target_path, "README.md")

    # Keep relpath if it's a valid .md file in docs
    if (
        os.path.isfile(target_path)
        and os.path.splitext(target_path)[1] == ".md"
        and os.path.commonpath([server_docs_abspath, target_path])
        == server_docs_abspath
        and not is_excluded(target_path)
    ):
        return relpath

    return f"https://github.com/triton-inference-server/{src_git_repo_name}/blob/main/{target_path_from_src_repo}{section}"


def replace_hyperlink(m, src_doc_path):
    """
    Replace hyperlinks in markdown files.
    TODO: Support HTML tags for future docs (e.g., <a href=...>).
    """
    hyperlink_str = m.group(2)
    res = (
        replace_url_with_relpath(hyperlink_str, src_doc_path)
        if http_reg.match(hyperlink_str)
        else replace_relpath_with_url(hyperlink_str, src_doc_path)
    )
    return m.group(1) + res + m.group(3)


def preprocess_docs(exclude_paths=None):
    """Find all .md files and preprocess their hyperlinks."""
    # Find all ".md" files
    cmd = f"find {server_docs_abspath} -name '*.md'"
    result = subprocess.run(cmd, check=True, capture_output=True, text=True, shell=True)
    docs_list = [path for path in result.stdout.split("\n") if path]

    # Read, preprocess and write back to each document file
    for doc_abspath in docs_list:
        if is_excluded(doc_abspath):
            continue

        with open(doc_abspath) as f:
            content = f.read()

        content = hyperlink_reg.sub(
            partial(replace_hyperlink, src_doc_path=doc_abspath),
            content,
        )

        with open(doc_abspath, "w") as f:
            f.write(content)


def main():
    """Main function to clone repositories, preprocess docs, and build HTML."""
    logger.info("Starting setup Triton Server documentation for Sphinx build...")
    logger.info(f"Collecting repositories from {args.repo_file}...")
    os.chdir(server_docs_abspath)

    with open(args.repo_file) as f:
        repository_list = f.read().strip().split("\n")

    # Clone repositories
    for repository in repository_list:
        run_command(f"rm -rf {repository}")
        clone_from_github(repository, args.repo_tag, args.github_organization)

    # Preprocess documents after all repos are cloned
    preprocess_docs()


if __name__ == "__main__":
    main()


================================================
FILE: docs/getting_started/llm.md
================================================
<!--
# Copyright (c) 2024-2026, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Deploying Phi-3 Model with Triton and TRT-LLM

This guide captures the steps to build Phi-3 with TRT-LLM and deploy with Triton Inference Server. It also shows a shows how to use GenAI-Perf to run benchmarks to measure model performance in terms of throughput and latency.

This guide is tested on A100 80GB SXM4 and H100 80GB PCIe. It is confirmed to work with Phi-3-mini-128k-instruct and Phi-3-mini-4k-instruct (see [Support Matrix](https://github.com/NVIDIA/TensorRT-LLM/tree/main/examples/models/core/phi) for full list) using TRT-LLM v0.11 and Triton Inference Server 24.07.

- [Build and test TRT-LLM engine](#build-and-test-trt-llm-engine)
- [Deploy with Triton Inference Server](#deploy-with-triton-inference-server)
- [Benchmark with GenAI-Perf](#benchmark-with-genai-perf)
- [Reference Configurations](#reference-configurations)


## Build and test TRT-LLM engine

Reference: <https://nvidia.github.io/TensorRT-LLM/installation/linux.html>

1. ## Retrieve and launch the Docker container (optional)

<!---->

    # Pre-install the environment using the NVIDIA Container Toolkit to avoid manual environment configuration
    docker run --rm --ipc=host --runtime=nvidia --gpus '"device=0"' --entrypoint /bin/bash -it nvidia/cuda:12.4.1-devel-ubuntu22.04

2. ## Install TensorRT-LLM

<!---->

    # Install dependencies, TensorRT-LLM requires Python 3.10
    apt-get update && apt-get -y install python3.10 python3-pip openmpi-bin libopenmpi-dev git git-lfs

    # Install TensorRT-LLM (v0.11.0)
    pip3 install tensorrt_llm==0.11.0 --extra-index-url https://pypi.nvidia.com

    # Check installation
    python3 -c "import tensorrt_llm"

3. ## Clone the TRT-LLM repo with the Phi-3 conversion script

<!---->

    git clone -b v0.11.0 https://github.com/NVIDIA/TensorRT-LLM.git
    cd TensorRT-LLM/examples/phi/

    # only need to install requirements.txt if you want to test the summarize.py example
    # if so, modify requirements.txt such that tensorrt_llm==0.11.0
    # pip install -r requirements.txt


## Build the TRT-LLM Engine

Reference: <https://github.com/NVIDIA/TensorRT-LLM/tree/main/examples/models/core/phi>

4. ## Download Phi-3-mini-4k-instruct

<!---->

    git lfs install
    git clone https://huggingface.co/microsoft/Phi-3-mini-4k-instruct

5. ## Convert weights from HF Transformers to TensorRT-LLM format

<!---->

    python3 ./convert_checkpoint.py \
                        --model_dir ./Phi-3-mini-4k-instruct \
                        --output_dir ./phi-checkpoint \
                        --dtype float16

6. ## Build TensorRT engine(s)

<!---->

    # Build a float16 engine using a single GPU and HF weights.
    # Enable several TensorRT-LLM plugins to increase runtime performance. It also helps with build time.
    # --tp_size and --pp_size are the model shard size
    trtllm-build \
        --checkpoint_dir ./phi-checkpoint \
        --output_dir ./phi-engine \
        --gemm_plugin float16 \
        --max_batch_size 8 \
        --max_input_len 1024 \
        --max_seq_len 2048 \
        --tp_size 1 \
        --pp_size 1

7. ## Run the model

<!---->

    python3 ../run.py --engine_dir ./phi-engine \
         --max_output_len 500 \
         --tokenizer_dir ./Phi-3-mini-4k-instruct \
         --input_text "How do I count to nine in French?"

8. ## Summarization test using the Phi model

The TensorRT-LLM Phi model can be tested to summarize the articles from the [cnn\_dailymail](https://huggingface.co/datasets/cnn_dailymail) dataset. For each summary, the script can compute the [ROUGE](https://en.wikipedia.org/wiki/ROUGE_\(metric\)) scores and use the ROUGE-1 score to validate the implementation. The script can also perform the same summarization using the HF Phi model.

    # Run the summarization task using a TensorRT-LLM model and a single GPU.
    python3 ../summarize.py --engine_dir ./phi-engine \
                            --hf_model_dir ./Phi-3-mini-4k-instruct \
                            --batch_size 1 \
                            --test_trt_llm \
                            --test_hf \
                            --data_type fp16 \
                            --check_accuracy \
                            --tensorrt_llm_rouge1_threshold=20


## Deploy with Triton Inference Server

9. ## Copy engine files from the Docker container to the host

<!---->

    # In another terminal instance, before exiting the current container
    docker cp <container_id>:<path_in_container> <path_on_host>

    # For example
    docker cp 452ee1c1d8a1:/TensorRT-LLM/examples/phi/phi-engine /home/user/phi-engine

10. ## Copy the compiled model to the skeleton repository with TRT-LLM backend

<!---->

    # After exiting the TensorRT-LLM Docker container
    git clone https://github.com/triton-inference-server/tensorrtllm_backend.git
    cd tensorrtllm_backend
    cp ../phi-engine/*   all_models/inflight_batcher_llm/tensorrt_llm/1/

11. ## Modify the configuration files from the model repository

The following configuration files need to be updated:

- ensemble/config.pbtxt

- postprocessing/config.pbtxt

- preprocessing/config.pbtxt

- tensorrt\_llm/config.pbxt

- tensorrt\_llm/1/config.json


### Update ensemble/config.pbtxt

    python3 tools/fill_template.py --in_place \
        all_models/inflight_batcher_llm/ensemble/config.pbtxt \
    triton_max_batch_size:128


### Update preprocessing/config.pbtxt

    python3 tools/fill_template.py --in_place \
        all_models/inflight_batcher_llm/postprocessing/config.pbtxt \
    tokenizer_type:auto,\
    tokenizer_dir:../Phi-3-mini-4k-instruct,\
    triton_max_batch_size:128,\
    postprocessing_instance_count:2


### Update postprocessing/config.pbtxt

    python3 tools/fill_template.py --in_place \
        all_models/inflight_batcher_llm/preprocessing/config.pbtxt \
    tokenizer_type:auto,\
    tokenizer_dir:../Phi-3-mini-4k-instruct,\
    triton_max_batch_size:128,\
    preprocessing_instance_count:2


### Update tensorrt\_llm/config.pbxt

    python3 tools/fill_template.py --in_place \
        all_models/inflight_batcher_llm/tensorrt_llm/config.pbtxt \
    decoupled_mode:true,\
    engine_dir:/all_models/inflight_batcher_llm/tensorrt_llm/1,\
    max_tokens_in_paged_kv_cache:,\
    batch_scheduler_policy:guaranteed_completion,\
    kv_cache_free_gpu_mem_fraction:0.2,\
    max_num_sequences:4,\
    triton_backend:tensorrtllm,\
    triton_max_batch_size:128,\
    max_queue_delay_microseconds:10,\
    max_beam_width:1,\
    batching_strategy:inflight_fused_batching,\
    engine_dir:/opt/all_models/inflight_batcher_llm/tensorrt_llm/1,\
    max_tokens_in_paged_kv_cache:1,\
    batch_scheduler_policy:guaranteed_completion,\
    kv_cache_free_gpu_mem_fraction:0.2


    # manually access tensort_llm/config.pbtxt and change the CPU instances to > 1
    # unfortunately this was hard-coded and cannot be update with the above script

    # instance_group [
    #   {
    #     count: 2
    #     kind : KIND_CPU
    #   }
    # ]


#### Max Tokens in Paged KV Cache

This is only required for Phi-3-mini-128k-instruct, and it is not necessary to modify this parameter for Phi-3-mini-4k-instruct.

To accommodate for the 128k context, remove the following from tensorrt\_llm/config.pbxt - which will allow the max tokens to be determined by the KV cache manager. If you don’t want to remove it, you can also set maxTokensInPagedKvCache such that it is large enough (e.g. 4096) to process at least 1 sequence to completion (i.e. must be larger than beam\_width \* tokensPerBlock \* maxBlocksPerSeq)

    parameters: {
      key: "max_tokens_in_paged_kv_cache"
      value: {
        string_value: "4096"
      }
    }


### Update tensorrt\_llm/1/config.json

In the engine config (tensorrtllm\_backend/all\_models/inflight\_batcher\_llm/tensorrt\_llm/1/config.json), add the following under plugin\_config

    "Use_context_fmha_for_generation": false

    # for example:
            "plugin_config": {
                "dtype": "float16",
                "bert_attention_plugin": "auto",
                "streamingllm": false,
                "Use_context_fmha_for_generation": false

The above needs to be done manually with your favorite editor. Once finished, please be sure your working directory is \~/tensorrtllm\_backend

12. ## Delete tensorrt\_llm\_bls

<!---->

    # Recommended to remove the BLS directory if not needed
    rm -rf all_models/inflight_batcher_llm/tensorrt_llm_bls/

13. ## Download model repository

<!---->

    # for tokenizer
    git lfs install
    git clone https://huggingface.co/microsoft/Phi-3-mini-4k-instruct

14. ## Launch Triton Inference Server (trtllm-python3-py3)

<!---->

    docker run -it --rm --gpus all --network host --shm-size=1g \
    -v $(pwd)/all_models:/opt/all_models \
    -v $(pwd)/scripts:/opt/scripts \
    -v $(pwd)/Phi-3-mini-4k-instruct:/opt/Phi-3-mini-4k-instruct \
    nvcr.io/nvidia/tritonserver:24.07-trtllm-python-py3

    # Launch Server
    python3 ../scripts/launch_triton_server.py --model_repo ../all_models/inflight_batcher_llm --world_size 1

15. ## Send Requests

<!---->

    curl -X POST localhost:8000/v2/models/ensemble/generate -d \
    '{
    "text_input": "A farmer with a wolf, a goat, and a cabbage must cross a river by boat. The boat can carry only the farmer and a single item. If left unattended together, the wolf would eat the goat, or the goat would eat the cabbage. How can they cross the river without anything being eaten?",
    "parameters": {
    "max_tokens": 256,
    "bad_words":[""],
    "stop_words":[""]
    }
    }' | jq


## Benchmark with GenAI-Perf

16. ## Launch Triton Inference Server (py3-sdk)

<!---->

    export RELEASE="24.07"
    docker run -it --net=host --gpus '"device=0"'  nvcr.io/nvidia/tritonserver:${RELEASE}-py3-sdk

17. ## Download the Phi-3 tokenizer

Login to Hugging Face (with User Access Tokens) to get the Phi-3 tokenizer. This step is not necessary but helps with interpreting token metrics from prompts and responses. If you skip this step, be sure to remove the --tokenizer flag from the GenAI-Perf script in Step 18.

    git lfs install
    git clone https://huggingface.co/microsoft/Phi-3-mini-4k-instruct

    pip install huggingface_hub
    huggingface-cli login --token hf_***

18. ## Run GenAI-Perf

<!---->

    export INPUT_SEQUENCE_LENGTH=128
    export OUTPUT_SEQUENCE_LENGTH=128
    export CONCURRENCY=25

    genai-perf \
      -m ensemble \
      --service-kind triton \
      --backend tensorrtllm \
      --random-seed 123 \
      --synthetic-input-tokens-mean $INPUT_SEQUENCE_LENGTH \
      --synthetic-input-tokens-stddev 0 \
      --streaming \
      --output-tokens-mean $OUTPUT_SEQUENCE_LENGTH \
      --output-tokens-stddev 0 \
      --output-tokens-mean-deterministic \
      --concurrency $CONCURRENCY \
      --tokenizer microsoft/Phi-3-mini-4k-instruct \
      --measurement-interval 4000 \
      --url localhost:8001

More details on performance benchmarking with GenAI-Perf can be found [here](https://github.com/triton-inference-server/perf_analyzer/blob/main/genai-perf/README.md).

## Reference Configurations

All config files inside /tensorrtllm\_backend/all\_models/inflight\_batcher\_llm are shown below.

<details>
<summary><b> ensemble/config.pbtxt</b></summary>

    # Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
    #
    # Redistribution and use in source and binary forms, with or without
    # modification, are permitted provided that the following conditions
    # are met:
    #  * Redistributions of source code must retain the above copyright
    #    notice, this list of conditions and the following disclaimer.
    #  * Redistributions in binary form must reproduce the above copyright
    #    notice, this list of conditions and the following disclaimer in the
    #    documentation and/or other materials provided with the distribution.
    #  * Neither the name of NVIDIA CORPORATION nor the names of its
    #    contributors may be used to endorse or promote products derived
    #    from this software without specific prior written permission.
    #
    # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
    # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
    # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
    # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
    # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
    # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
    # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
    # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

    name: "ensemble"
    platform: "ensemble"
    max_batch_size: 128
    input [
      {
        name: "text_input"
        data_type: TYPE_STRING
        dims: [ 1 ]
      },
      {
        name: "decoder_text_input"
        data_type: TYPE_STRING
        dims: [ 1 ]
        optional: true
      },
      {
        name: "image_input"
        data_type: TYPE_FP16
        dims: [ 3, 224, 224 ]
        optional: true
      },
      {
        name: "max_tokens"
        data_type: TYPE_INT32
        dims: [ 1 ]
      },
      {
       name: "bad_words"
       data_type: TYPE_STRING
       dims: [ -1 ]
       optional: true
      },
      {
       name: "stop_words"
       data_type: TYPE_STRING
       dims: [ -1 ]
       optional: true
      },
      {
        name: "end_id"
        data_type: TYPE_INT32
        dims: [ 1 ]
        optional: true
      },
      {
        name: "pad_id"
        data_type: TYPE_INT32
        dims: [ 1 ]
        optional: true
      },
      {
        name: "top_k"
        data_type: TYPE_INT32
        dims: [ 1 ]
        optional: true
      },
      {
        name: "top_p"
        data_type: TYPE_FP32
        dims: [ 1 ]
        optional: true
      },
      {
        name: "temperature"
        data_type: TYPE_FP32
        dims: [ 1 ]
        optional: true
      },
      {
        name: "length_penalty"
        data_type: TYPE_FP32
        dims: [ 1 ]
        optional: true
      },
      {
        name: "repetition_penalty"
        data_type: TYPE_FP32
        dims: [ 1 ]
        optional: true
      },
      {
        name: "min_length"
        data_type: TYPE_INT32
        dims: [ 1 ]
        optional: true
      },
      {
        name: "presence_penalty"
        data_type: TYPE_FP32
        dims: [ 1 ]
        optional: true
      },
      {
        name: "frequency_penalty"
        data_type: TYPE_FP32
        dims: [ 1 ]
        optional: true
      },
      {
        name: "random_seed"
        data_type: TYPE_UINT64
        dims: [ 1 ]
        optional: true
      },
      {
        name: "return_log_probs"
        data_type: TYPE_BOOL
        dims: [ 1 ]
        optional: true
      },
      {
        name: "return_context_logits"
        data_type: TYPE_BOOL
        dims: [ 1 ]
        optional: true
      },
      {
        name: "return_generation_logits"
        data_type: TYPE_BOOL
        dims: [ 1 ]
        optional: true
      },
      {
        name: "beam_width"
        data_type: TYPE_INT32
        dims: [ 1 ]
        optional: true
      },
      {
        name: "stream"
        data_type: TYPE_BOOL
        dims: [ 1 ]
        optional: true
      },
      {
        name: "prompt_embedding_table"
        data_type: TYPE_FP16
        dims: [ -1, -1 ]
        optional: true
      },
      {
        name: "prompt_vocab_size"
        data_type: TYPE_INT32
        dims: [ 1 ]
        optional: true
      },
      {
        name: "embedding_bias_words"
        data_type: TYPE_STRING
        dims: [ -1 ]
        optional: true
      },
      {
        name: "embedding_bias_weights"
        data_type: TYPE_FP32
        dims: [ -1 ]
        optional: true
      }
    ]
    output [
      {
        name: "text_output"
        data_type: TYPE_STRING
        dims: [ -1 ]
      },
      {
        name: "cum_log_probs"
        data_type: TYPE_FP32
        dims: [ -1 ]
      },
      {
        name: "output_log_probs"
        data_type: TYPE_FP32
        dims: [ -1, -1 ]
      },
      {
        name: "context_logits"
        data_type: TYPE_FP32
        dims: [ -1, -1 ]
      },
      {
        name: "generation_logits"
        data_type: TYPE_FP32
        dims: [ -1, -1, -1 ]
      },
      {
        name: "batch_index"
        data_type: TYPE_INT32
        dims: [ 1 ]
      }
    ]
    ensemble_scheduling {
      step [
        {
          model_name: "preprocessing"
          model_version: -1
          input_map {
            key: "QUERY"
            value: "text_input"
          }
          input_map {
            key: "DECODER_QUERY"
            value: "decoder_text_input"
          }
          input_map {
            key: "IMAGE"
            value: "image_input"
          }
          input_map {
            key: "REQUEST_OUTPUT_LEN"
            value: "max_tokens"
          }
          input_map {
            key: "BAD_WORDS_DICT"
            value: "bad_words"
          }
          input_map {
            key: "STOP_WORDS_DICT"
            value: "stop_words"
          }
          input_map {
            key: "EMBEDDING_BIAS_WORDS"
            value: "embedding_bias_words"
          }
          input_map {
            key: "EMBEDDING_BIAS_WEIGHTS"
            value: "embedding_bias_weights"
          }
          input_map {
            key: "END_ID"
            value: "end_id"
          }
          input_map {
            key: "PAD_ID"
            value: "pad_id"
          }
          input_map {
            key: "PROMPT_EMBEDDING_TABLE"
            value: "prompt_embedding_table"
          }
          output_map {
            key: "REQUEST_INPUT_LEN"
            value: "_REQUEST_INPUT_LEN"
          }
          output_map {
            key: "INPUT_ID"
            value: "_INPUT_ID"
          }
          output_map {
            key: "REQUEST_DECODER_INPUT_LEN"
            value: "_REQUEST_DECODER_INPUT_LEN"
          }
          output_map {
            key: "DECODER_INPUT_ID"
            value: "_DECODER_INPUT_ID"
          }
          output_map {
            key: "REQUEST_OUTPUT_LEN"
            value: "_REQUEST_OUTPUT_LEN"
          }
          output_map {
            key: "STOP_WORDS_IDS"
            value: "_STOP_WORDS_IDS"
          }
          output_map {
            key: "BAD_WORDS_IDS"
            value: "_BAD_WORDS_IDS"
          }
          output_map {
            key: "EMBEDDING_BIAS"
            value: "_EMBEDDING_BIAS"
          }
          output_map {
            key: "OUT_END_ID"
            value: "_PREPROCESSOR_END_ID"
          }
          output_map {
            key: "OUT_PAD_ID"
            value: "_PREPROCESSOR_PAD_ID"
          }
          output_map {
            key: "OUT_PROMPT_EMBEDDING_TABLE"
            value: "out_prompt_embedding_table"
          }
        },
        {
          model_name: "tensorrt_llm"
          model_version: -1
          input_map {
            key: "input_ids"
            value: "_INPUT_ID"
          }
          input_map {
            key: "decoder_input_ids"
            value: "_DECODER_INPUT_ID"
          }
          input_map {
            key: "input_lengths"
            value: "_REQUEST_INPUT_LEN"
          }
          input_map {
            key: "decoder_input_lengths"
            value: "_REQUEST_DECODER_INPUT_LEN"
          }
          input_map {
            key: "request_output_len"
            value: "_REQUEST_OUTPUT_LEN"
          }
          input_map {
              key: "end_id"
              value: "_PREPROCESSOR_END_ID"
          }
          input_map {
              key: "pad_id"
              value: "_PREPROCESSOR_PAD_ID"
          }
          input_map {
              key: "embedding_bias"
              value: "_EMBEDDING_BIAS"
          }
          input_map {
              key: "runtime_top_k"
              value: "top_k"
          }
          input_map {
              key: "runtime_top_p"
              value: "top_p"
          }
          input_map {
              key: "temperature"
              value: "temperature"
          }
          input_map {
              key: "len_penalty"
              value: "length_penalty"
          }
          input_map {
              key: "repetition_penalty"
              value: "repetition_penalty"
          }
          input_map {
              key: "min_length"
              value: "min_length"
          }
          input_map {
              key: "presence_penalty"
              value: "presence_penalty"
          }
          input_map {
              key: "frequency_penalty"
              value: "frequency_penalty"
          }
          input_map {
              key: "random_seed"
              value: "random_seed"
          }
          input_map {
              key: "return_log_probs"
              value: "return_log_probs"
          }
          input_map {
              key: "return_context_logits"
              value: "return_context_logits"
          }
          input_map {
              key: "return_generation_logits"
              value: "return_generation_logits"
          }
          input_map {
              key: "beam_width"
              value: "beam_width"
          }
          input_map {
              key: "streaming"
              value: "stream"
          }
          input_map {
            key: "prompt_embedding_table"
            value: "out_prompt_embedding_table"
          }
          input_map {
            key: "prompt_vocab_size"
            value: "prompt_vocab_size"
          }
          input_map {
            key: "stop_words_list"
            value: "_STOP_WORDS_IDS"
          }
          input_map {
            key: "bad_words_list"
            value: "_BAD_WORDS_IDS"
          }
          output_map {
            key: "output_ids"
            value: "_TOKENS_BATCH"
          }
          output_map {
            key: "sequence_length"
            value: "_SEQUENCE_LENGTH"
          },
          output_map {
            key: "cum_log_probs"
            value: "_CUM_LOG_PROBS"
          }
          output_map {
            key: "output_log_probs"
            value: "_OUTPUT_LOG_PROBS"
          },
          output_map {
            key: "context_logits"
            value: "_CONTEXT_LOGITS"
          },
          output_map {
            key: "generation_logits"
            value: "_GENERATION_LOGITS"
          },
          output_map {
            key: "batch_index"
            value: "_BATCH_INDEX"
          }
        },
        {
          model_name: "postprocessing"
          model_version: -1
          input_map {
            key: "TOKENS_BATCH"
            value: "_TOKENS_BATCH"
          }
          input_map {
            key: "CUM_LOG_PROBS"
            value: "_CUM_LOG_PROBS"
          }
          input_map {
            key: "OUTPUT_LOG_PROBS"
            value: "_OUTPUT_LOG_PROBS"
          }
          input_map {
            key: "CONTEXT_LOGITS"
            value: "_CONTEXT_LOGITS"
          }
          input_map {
            key: "GENERATION_LOGITS"
            value: "_GENERATION_LOGITS"
          }
          input_map {
            key: "SEQUENCE_LENGTH"
            value: "_SEQUENCE_LENGTH"
          }
          input_map {
            key: "BATCH_INDEX"
            value: "_BATCH_INDEX"
          }
          output_map {
            key: "OUTPUT"
            value: "text_output"
          }
          output_map {
            key: "OUT_OUTPUT_LOG_PROBS"
            value: "output_log_probs"
          }
          output_map {
            key: "OUT_CUM_LOG_PROBS"
            value: "cum_log_probs"
          }
          output_map {
            key: "OUT_CONTEXT_LOGITS"
            value: "context_logits"
          }
          output_map {
            key: "OUT_GENERATION_LOGITS"
            value: "generation_logits"
          }
          output_map {
            key: "OUT_BATCH_INDEX"
            value: "batch_index"
          }
        }
      ]
    }
</details>

<details>
<summary><b>postprocessing/config.pbtxt</b></summary>

    # Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
    #
    # Redistribution and use in source and binary forms, with or without
    # modification, are permitted provided that the following conditions
    # are met:
    #  * Redistributions of source code must retain the above copyright
    #    notice, this list of conditions and the following disclaimer.
    #  * Redistributions in binary form must reproduce the above copyright
    #    notice, this list of conditions and the following disclaimer in the
    #    documentation and/or other materials provided with the distribution.
    #  * Neither the name of NVIDIA CORPORATION nor the names of its
    #    contributors may be used to endorse or promote products derived
    #    from this software without specific prior written permission.
    #
    # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
    # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
    # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
    # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
    # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
    # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
    # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
    # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

    name: "postprocessing"
    backend: "python"
    max_batch_size: 128
    input [
      {
        name: "TOKENS_BATCH"
        data_type: TYPE_INT32
        dims: [ -1, -1 ]
      },
      {
        name: "SEQUENCE_LENGTH"
        data_type: TYPE_INT32
        dims: [ -1 ]
      },
      {
        name: "CUM_LOG_PROBS"
        data_type: TYPE_FP32
        dims: [ -1 ]
        optional: true
      },
      {
        name: "OUTPUT_LOG_PROBS"
        data_type: TYPE_FP32
        dims: [ -1, -1 ]
        optional: true
      },
      {
        name: "CONTEXT_LOGITS"
        data_type: TYPE_FP32
        dims: [ -1, -1 ]
        optional: true
      },
      {
        name: "GENERATION_LOGITS"
        data_type: TYPE_FP32
        dims: [ -1, -1, -1 ]
        optional: true
      },
      {
        name: "BATCH_INDEX"
        data_type: TYPE_INT32
        dims: [ 1 ]
        optional: true
      }
    ]
    output [
      {
        name: "OUTPUT"
        data_type: TYPE_STRING
        dims: [ -1 ]
      },
      {
        name: "OUT_CUM_LOG_PROBS"
        data_type: TYPE_FP32
        dims: [ -1 ]
      },
      {
        name: "OUT_OUTPUT_LOG_PROBS"
        data_type: TYPE_FP32
        dims: [ -1, -1 ]
      },
      {
        name: "OUT_CONTEXT_LOGITS"
        data_type: TYPE_FP32
        dims: [ -1, -1 ]
      },
      {
        name: "OUT_GENERATION_LOGITS"
        data_type: TYPE_FP32
        dims: [ -1, -1, -1 ]
      },
      {
        name: "OUT_BATCH_INDEX"
        data_type: TYPE_INT32
        dims: [ 1 ]
      }
    ]

    parameters {
      key: "tokenizer_dir"
      value: {
        string_value: "../Phi-3-mini-4k-instruct"
      }
    }

    parameters {
      key: "skip_special_tokens"
      value: {
        string_value: "${skip_special_tokens}"
      }
    }

    instance_group [
        {
            count: 4
            kind: KIND_CPU
        }
    ]
</details>

<details>
<summary><b> preprocessing/config.pbtxt</b> </summary>

    # Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
    #
    # Redistribution and use in source and binary forms, with or without
    # modification, are permitted provided that the following conditions
    # are met:
    #  * Redistributions of source code must retain the above copyright
    #    notice, this list of conditions and the following disclaimer.
    #  * Redistributions in binary form must reproduce the above copyright
    #    notice, this list of conditions and the following disclaimer in the
    #    documentation and/or other materials provided with the distribution.
    #  * Neither the name of NVIDIA CORPORATION nor the names of its
    #    contributors may be used to endorse or promote products derived
    #    from this software without specific prior written permission.
    #
    # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
    # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
    # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
    # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
    # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
    # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
    # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
    # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

    name: "preprocessing"
    backend: "python"
    max_batch_size: 128
    input [
        {
            name: "QUERY"
            data_type: TYPE_STRING
            dims: [ 1 ]
        },
        {
            name: "DECODER_QUERY"
            data_type: TYPE_STRING
            dims: [ 1 ]
            optional: true
        },
        {
            name: "IMAGE"
            data_type: TYPE_FP16
            dims: [ 3, 224, 224 ]
            optional: true
        },
        {
            name: "REQUEST_OUTPUT_LEN"
            data_type: TYPE_INT32
            dims: [ 1 ]
        },
        {
            name: "BAD_WORDS_DICT"
            data_type: TYPE_STRING
            dims: [ -1 ]
            optional: true
        },
        {
            name: "STOP_WORDS_DICT"
            data_type: TYPE_STRING
            dims: [ -1 ]
            optional: true
        },
        {
            name: "EMBEDDING_BIAS_WORDS"
            data_type: TYPE_STRING
            dims: [ -1 ]
            optional: true
        },
        {
            name: "EMBEDDING_BIAS_WEIGHTS"
            data_type: TYPE_FP32
            dims: [ -1 ]
            optional: true
        },
        {
            name: "END_ID"
            data_type: TYPE_INT32
            dims: [ 1 ]
            optional: true
        },
        {
            name: "PAD_ID"
            data_type: TYPE_INT32
            dims: [ 1 ]
            optional: true
        },
        {
            name: "PROMPT_EMBEDDING_TABLE"
            data_type: TYPE_FP16
            dims: [ -1, -1 ]
            optional: true
            allow_ragged_batch: true
        }
    ]
    output [
        {
            name: "INPUT_ID"
            data_type: TYPE_INT32
            dims: [ -1 ]
        },
        {
            name: "REQUEST_INPUT_LEN"
            data_type: TYPE_INT32
            dims: [ 1 ]
        },
        {
            name: "DECODER_INPUT_ID"
            data_type: TYPE_INT32
            dims: [ -1 ]
        },
        {
            name: "REQUEST_DECODER_INPUT_LEN"
            data_type: TYPE_INT32
            dims: [ 1 ]
        },
        {
            name: "BAD_WORDS_IDS"
            data_type: TYPE_INT32
            dims: [ 2, -1 ]
        },
        {
            name: "STOP_WORDS_IDS"
            data_type: TYPE_INT32
            dims: [ 2, -1 ]
        },
        {
            name: "EMBEDDING_BIAS"
            data_type: TYPE_FP32
            dims: [ -1 ]
        },
        {
            name: "REQUEST_OUTPUT_LEN"
            data_type: TYPE_INT32
            dims: [ -1 ]
        },
        {
            name: "OUT_END_ID"
            data_type: TYPE_INT32
            dims: [ 1 ]
        },
        {
            name: "OUT_PAD_ID"
            data_type: TYPE_INT32
            dims: [ 1 ]
        },
        {
            name: "OUT_PROMPT_EMBEDDING_TABLE"
            data_type: TYPE_FP16
            dims: [ -1, -1 ]
        }
    ]

    parameters {
      key: "tokenizer_dir"
      value: {
        string_value: "../Phi-3-mini-4k-instruct"
      }
    }

    parameters {
      key: "add_special_tokens"
      value: {
        string_value: "${add_special_tokens}"
      }
    }

    parameters {
      key: "visual_model_path"
      value: {
        string_value: "${visual_model_path}"
      }
    }

    parameters: {
      key: "gpt_model_path"
      value: {
        string_value: "${engine_dir}"
      }
    }

    instance_group [
        {
            count: 4
            kind: KIND_CPU
        }
    ]

</details>

<details>
<summary> <b> tensorrt_llm/config.pbtxt </b></summary>


    # Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
    #
    # Redistribution and use in source and binary forms, with or without
    # modification, are permitted provided that the following conditions
    # are met:
    #  * Redistributions of source code must retain the above copyright
    #    notice, this list of conditions and the following disclaimer.
    #  * Redistributions in binary form must reproduce the above copyright
    #    notice, this list of conditions and the following disclaimer in the
    #    documentation and/or other materials provided with the distribution.
    #  * Neither the name of NVIDIA CORPORATION nor the names of its
    #    contributors may be used to endorse or promote products derived
    #    from this software without specific prior written permission.
    #
    # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
    # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
    # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
    # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
    # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
    # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
    # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
    # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

    name: "tensorrt_llm"
    backend: "tensorrtllm"
    max_batch_size: 128

    model_transaction_policy {
      decoupled: true
    }

    dynamic_batching {
        preferred_batch_size: [ 128 ]
        max_queue_delay_microseconds: 10
    }

    input [
      {
        name: "input_ids"
        data_type: TYPE_INT32
        dims: [ -1 ]
        allow_ragged_batch: true
      },
      {
        name: "input_lengths"
        data_type: TYPE_INT32
        dims: [ 1 ]
        reshape: { shape: [ ] }
      },
      {
        name: "request_output_len"
        data_type: TYPE_INT32
        dims: [ 1 ]
        reshape: { shape: [ ] }
      },
      {
        name: "draft_input_ids"
        data_type: TYPE_INT32
        dims: [ -1 ]
        optional: true
        allow_ragged_batch: true
      },
      {
        name: "decoder_input_ids"
        data_type: TYPE_INT32
        dims: [ -1 ]
        optional: true
        allow_ragged_batch: true
      },
      {
        name: "decoder_input_lengths"
        data_type: TYPE_INT32
        dims: [ 1 ]
        optional: true
        reshape: { shape: [ ] }
      },
      {
        name: "draft_logits"
        data_type: TYPE_FP32
        dims: [ -1, -1 ]
        optional: true
        allow_ragged_batch: true
      },
      {
        name: "draft_acceptance_threshold"
        data_type: TYPE_FP32
        dims: [ 1 ]
        reshape: { shape: [ ] }
        optional: true
      },
      {
        name: "end_id"
        data_type: TYPE_INT32
        dims: [ 1 ]
        reshape: { shape: [ ] }
        optional: true
      },
      {
        name: "pad_id"
        data_type: TYPE_INT32
        dims: [ 1 ]
        reshape: { shape: [ ] }
        optional: true
      },
      {
        name: "stop_words_list"
        data_type: TYPE_INT32
        dims: [ 2, -1 ]
        optional: true
        allow_ragged_batch: true
      },
      {
        name: "bad_words_list"
        data_type: TYPE_INT32
        dims: [ 2, -1 ]
        optional: true
        allow_ragged_batch: true
      },
      {
        name: "embedding_bias"
        data_type: TYPE_FP32
        dims: [ -1 ]
        optional: true
        allow_ragged_batch: true
      },
      {
        name: "beam_width"
        data_type: TYPE_INT32
        dims: [ 1 ]
        reshape: { shape: [ ] }
        optional: true
      },
      {
        name: "temperature"
        data_type: TYPE_FP32
        dims: [ 1 ]
        reshape: { shape: [ ] }
        optional: true
      },
      {
        name: "runtime_top_k"
        data_type: TYPE_INT32
        dims: [ 1 ]
        reshape: { shape: [ ] }
        optional: true
      },
      {
        name: "runtime_top_p"
        data_type: TYPE_FP32
        dims: [ 1 ]
        reshape: { shape: [ ] }
        optional: true
      },
      {
        name: "runtime_top_p_min"
        data_type: TYPE_FP32
        dims: [ 1 ]
        reshape: { shape: [ ] }
        optional: true
      },
      {
        name: "runtime_top_p_decay"
        data_type: TYPE_FP32
        dims: [ 1 ]
        reshape: { shape: [ ] }
        optional: true
      },
      {
        name: "runtime_top_p_reset_ids"
        data_type: TYPE_INT32
        dims: [ 1 ]
        reshape: { shape: [ ] }
        optional: true
      },
      {
        name: "len_penalty"
        data_type: TYPE_FP32
        dims: [ 1 ]
        reshape: { shape: [ ] }
        optional: true
      },
      {
        name: "early_stopping"
        data_type: TYPE_BOOL
        dims: [ 1 ]
        reshape: { shape: [ ] }
        optional: true
      },
      {
        name: "repetition_penalty"
        data_type: TYPE_FP32
        dims: [ 1 ]
        reshape: { shape: [ ] }
        optional: true
      },
      {
        name: "min_length"
        data_type: TYPE_INT32
        dims: [ 1 ]
        reshape: { shape: [ ] }
        optional: true
      },
      {
        name: "beam_search_diversity_rate"
        data_type: TYPE_FP32
        dims: [ 1 ]
        reshape: { shape: [ ] }
        optional: true
      },
      {
        name: "presence_penalty"
        data_type: TYPE_FP32
        dims: [ 1 ]
        reshape: { shape: [ ] }
        optional: true
      },
      {
        name: "frequency_penalty"
        data_type: TYPE_FP32
        dims: [ 1 ]
        reshape: { shape: [ ] }
        optional: true
      },
      {
        name: "random_seed"
        data_type: TYPE_UINT64
        dims: [ 1 ]
        reshape: { shape: [ ] }
        optional: true
      },
      {
        name: "return_log_probs"
        data_type: TYPE_BOOL
        dims: [ 1 ]
        reshape: { shape: [ ] }
        optional: true
      },
      {
        name: "return_context_logits"
        data_type: TYPE_BOOL
        dims: [ 1 ]
        reshape: { shape: [ ] }
        optional: true
      },
      {
        name: "return_generation_logits"
        data_type: TYPE_BOOL
        dims: [ 1 ]
        reshape: { shape: [ ] }
        optional: true
      },
      {
        name: "stop"
        data_type: TYPE_BOOL
        dims: [ 1 ]
        reshape: { shape: [ ] }
        optional: true
      },
      {
        name: "streaming"
        data_type: TYPE_BOOL
        dims: [ 1 ]
        reshape: { shape: [ ] }
        optional: true
      },
      {
        name: "prompt_embedding_table"
        data_type: TYPE_FP16
        dims: [ -1, -1 ]
        optional: true
        allow_ragged_batch: true
      },
      {
        name: "prompt_vocab_size"
        data_type: TYPE_INT32
        dims: [ 1 ]
        reshape: { shape: [ ] }
        optional: true
      },
      # the unique task ID for the given LoRA.
      # To perform inference with a specific LoRA for the first time `lora_task_id` `lora_weights` and `lora_config` must all be given.
      # The LoRA will be cached, so that subsequent requests for the same task only require `lora_task_id`.
      # If the cache is full the oldest LoRA will be evicted to make space for new ones.  An error is returned if `lora_task_id` is not cached.
      {
        name: "lora_task_id"
    	data_type: TYPE_UINT64
    	dims: [ 1 ]
        reshape: { shape: [ ] }
    	optional: true
      },
      # weights for a lora adapter shape [ num_lora_modules_layers, D x Hi + Ho x D ]
      # where the last dimension holds the in / out adapter weights for the associated module (e.g. attn_qkv) and model layer
      # each of the in / out tensors are first flattened and then concatenated together in the format above.
      # D=adapter_size (R value), Hi=hidden_size_in, Ho=hidden_size_out.
      {
        name: "lora_weights"
    	data_type: TYPE_FP16
    	dims: [ -1, -1 ]
    	optional: true
    	allow_ragged_batch: true
      },
      # module identifier (same size a first dimension of lora_weights)
      # See LoraModule::ModuleType for model id mapping
      #
      # "attn_qkv": 0     # compbined qkv adapter
      # "attn_q": 1       # q adapter
      # "attn_k": 2       # k adapter
      # "attn_v": 3       # v adapter
      # "attn_dense": 4   # adapter for the dense layer in attention
      # "mlp_h_to_4h": 5  # for llama2 adapter for gated mlp layer after attention / RMSNorm: up projection
      # "mlp_4h_to_h": 6  # for llama2 adapter for gated mlp layer after attention / RMSNorm: down projection
      # "mlp_gate": 7     # for llama2 adapter for gated mlp later after attention / RMSNorm: gate
      #
      # last dim holds [ module_id, layer_idx, adapter_size (D aka R value) ]
      {
        name: "lora_config"
    	data_type: TYPE_INT32
    	dims: [ -1, 3 ]
    	optional: true
    	allow_ragged_batch: true
      }
    ]
    output [
      {
        name: "output_ids"
        data_type: TYPE_INT32
        dims: [ -1, -1 ]
      },
      {
        name: "sequence_length"
        data_type: TYPE_INT32
        dims: [ -1 ]
      },
      {
        name: "cum_log_probs"
        data_type: TYPE_FP32
        dims: [ -1 ]
      },
      {
        name: "output_log_probs"
        data_type: TYPE_FP32
        dims: [ -1, -1 ]
      },
      {
        name: "context_logits"
        data_type: TYPE_FP32
        dims: [ -1, -1 ]
      },
      {
        name: "generation_logits"
        data_type: TYPE_FP32
        dims: [ -1, -1, -1 ]
      },
      {
        name: "batch_index"
        data_type: TYPE_INT32
        dims: [ 1 ]
      }
    ]
    instance_group [
      {
        count: 4
        kind : KIND_CPU
      }
    ]
    parameters: {
      key: "max_beam_width"
      value: {
        string_value: "1"
      }
    }
    parameters: {
      key: "FORCE_CPU_ONLY_INPUT_TENSORS"
      value: {
        string_value: "no"
      }
    }
    parameters: {
      key: "gpt_model_type"
      value: {
        string_value: "inflight_fused_batching"
      }
    }
    parameters: {
      key: "gpt_model_path"
      value: {
        string_value: "/opt/all_models/inflight_batcher_llm/tensorrt_llm/1"
      }
    }
    parameters: {
      key: "encoder_model_path"
      value: {
        string_value: "${encoder_engine_dir}"
      }
    }

    </details>
    parameters: {
      key: "max_tokens_in_paged_kv_cache"
      value: {
        string_value: ""
      }
    }
    parameters: {
      key: "max_attention_window_size"
      value: {
        string_value: "${max_attention_window_size}"
      }
    }
    parameters: {
      key: "sink_token_length"
      value: {
        string_value: "${sink_token_length}"
      }
    }
    parameters: {
      key: "batch_scheduler_policy"
      value: {
        string_value: "guaranteed_completion"
      }
    }
    parameters: {
      key: "kv_cache_free_gpu_mem_fraction"
      value: {
        string_value: "0.2"
      }
    }
    parameters: {
      key: "kv_cache_host_memory_bytes"
      value: {
        string_value: "${kv_cache_host_memory_bytes}"
      }
    }
    parameters: {
      key: "kv_cache_onboard_blocks"
      value: {
        string_value: "${kv_cache_onboard_blocks}"
      }
    }
    # enable_trt_overlap is deprecated and doesn't have any effect on the runtime
    # parameters: {
    #   key: "enable_trt_overlap"
    #   value: {
    #     string_value: "${enable_trt_overlap}"
    #   }
    # }
    parameters: {
      key: "exclude_input_in_output"
      value: {
        string_value: "${exclude_input_in_output}"
      }
    }
    parameters: {
      key: "cancellation_check_period_ms"
      value: {
        string_value: "${cancellation_check_period_ms}"
      }
    }
    parameters: {
      key: "stats_check_period_ms"
      value: {
        string_value: "${stats_check_period_ms}"
      }
    }
    parameters: {
      key: "iter_stats_max_iterations"
      value: {
        string_value: "${iter_stats_max_iterations}"
      }
    }
    parameters: {
      key: "request_stats_max_iterations"
      value: {
        string_value: "${request_stats_max_iterations}"
      }
    }
    parameters: {
      key: "enable_kv_cache_reuse"
      value: {
        string_value: "${enable_kv_cache_reuse}"
      }
    }
    parameters: {
      key: "normalize_log_probs"
      value: {
        string_value: "${normalize_log_probs}"
      }
    }
    parameters: {
      key: "enable_chunked_context"
      value: {
        string_value: "${enable_chunked_context}"
      }
    }
    parameters: {
      key: "gpu_device_ids"
      value: {
        string_value: "${gpu_device_ids}"
      }
    }
    parameters: {
      key: "lora_cache_optimal_adapter_size"
      value: {
        string_value: "${lora_cache_optimal_adapter_size}"
      }
    }
    parameters: {
      key: "lora_cache_max_adapter_size"
      value: {
        string_value: "${lora_cache_max_adapter_size}"
      }
    }
    parameters: {
      key: "lora_cache_gpu_memory_fraction"
      value: {
        string_value: "${lora_cache_gpu_memory_fraction}"
      }
    }
    parameters: {
      key: "lora_cache_host_memory_bytes"
      value: {
        string_value: "${lora_cache_host_memory_bytes}"
      }
    }
    parameters: {
      key: "decoding_mode"
      value: {
        string_value: "${decoding_mode}"
      }
    }
    parameters: {
      key: "executor_worker_path"
      value: {
        string_value: "/opt/tritonserver/backends/tensorrtllm/trtllmExecutorWorker"
      }
    }
    parameters: {
      key: "medusa_choices"
        value: {
          string_value: "${medusa_choices}"
      }
    }
    parameters: {
      key: "gpu_weights_percent"
        value: {
          string_value: "${gpu_weights_percent}"
      }
    }

================================================
FILE: docs/getting_started/quick_deployment.rst
================================================
..
.. Copyright 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
..
.. Redistribution and use in source and binary forms, with or without
.. modification, are permitted provided that the following conditions
.. are met:
..  * Redistributions of source code must retain the above copyright
..    notice, this list of conditions and the following disclaimer.
..  * Redistributions in binary form must reproduce the above copyright
..    notice, this list of conditions and the following disclaimer in the
..    documentation and/or other materials provided with the distribution.
..  * Neither the name of NVIDIA CORPORATION nor the names of its
..    contributors may be used to endorse or promote products derived
..    from this software without specific prior written permission.
..
.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

####
Quick Deployment Guide by backend
####

.. toctree::
   :maxdepth: 1
   :hidden:

   Quickstart <quickstart.md>
   TRT-LLM <llm.md>
   vLLM <../tutorials/Popular_Models_Guide/Llama2/vllm_guide.md>
   Python with HuggingFace <../tutorials/Quick_Deploy/HuggingFaceTransformers/README.md>
   PyTorch <../tutorials/Quick_Deploy/PyTorch/README.md>
   ONNX <../tutorials/Quick_Deploy/ONNX/README.md>
   Openvino <../tutorials/Quick_Deploy/OpenVINO/README.md>

================================================
FILE: docs/getting_started/quickstart.md
================================================
<!--
# Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Quickstart

**New to Triton Inference Server and want do just deploy your model quickly?**
Make use of
[these tutorials](https://github.com/triton-inference-server/tutorials#quick-deploy)
 to begin your Triton journey!

The Triton Inference Server is available as [buildable source
  code](../customization_guide/build.md), but the easiest way to install and run Triton is to
  use the pre-built Docker image available from the [NVIDIA GPU
  Cloud (NGC)](https://ngc.nvidia.com).

Launching and maintaining Triton Inference Server revolves around the use of building model repositories. This tutorial will cover:

* Creating a Model Repository
* Launching Triton
* Send an Inference Request

## Create A Model Repository

The [model repository](../user_guide/model_repository.md) is the directory where you
place the models that you want Triton to serve. An example model
repository is included in the
[docs/examples/model_repository](https://github.com/triton-inference-server/server/blob/main/docs/examples/model_repository).
Before using the repository, you must fetch any missing model definition
files from their public model zoos via the provided script.

```
$ cd docs/examples
$ ./fetch_models.sh
```

## Launch Triton

Triton is optimized to provide the best inferencing performance by
using GPUs, but it can also work on CPU-only systems. In both cases
you can use the same Triton Docker image.

### Run on System with GPUs

Use the following command to run Triton with the example model
repository you just created. The [NVIDIA Container
Toolkit](https://github.com/NVIDIA/nvidia-docker) must be installed
for Docker to recognize the GPU(s). The --gpus=1 flag indicates that 1
system GPU should be made available to Triton for inferencing.

```
$ docker run --gpus=1 --rm -p8000:8000 -p8001:8001 -p8002:8002 -v/full/path/to/docs/examples/model_repository:/models nvcr.io/nvidia/tritonserver:<xx.yy>-py3 tritonserver --model-repository=/models
```

Where \<xx.yy\> is the version of Triton that you want to use (and
pulled above). After you start Triton you will see output on the
console showing the server starting up and loading the model. When you
see output like the following, Triton is ready to accept inference
requests.

```
+----------------------+---------+--------+
| Model                | Version | Status |
+----------------------+---------+--------+
| <model_name>         | <v>     | READY  |
| ..                   | .       | ..     |
| ..                   | .       | ..     |
+----------------------+---------+--------+
...
...
...
I1002 21:58:57.891440 62 grpc_server.cc:3914] Started GRPCInferenceService at 0.0.0.0:8001
I1002 21:58:57.893177 62 http_server.cc:2717] Started HTTPService at 0.0.0.0:8000
I1002 21:58:57.935518 62 http_server.cc:2736] Started Metrics Service at 0.0.0.0:8002
```
All the models should show "READY" status to indicate that they loaded correctly. If a model fails to load the status will report the failure and a reason for the failure. If your model is not displayed in the table check the path to the model repository and your CUDA drivers.

### Run on CPU-Only System

On a system without GPUs, Triton should be run without using the
--gpus flag to Docker, but is otherwise identical to what is described
above.

```
$ docker run --rm -p8000:8000 -p8001:8001 -p8002:8002 -v/full/path/to/docs/examples/model_repository:/models nvcr.io/nvidia/tritonserver:<xx.yy>-py3 tritonserver --model-repository=/models
```

Because the --gpus flag is not used, a GPU is not available and Triton
will therefore be unable to load any model configuration that requires
a GPU.

### Verify Triton Is Running Correctly

Use Triton’s *ready* endpoint to verify that the server and the models
are ready for inference. From the host system use curl to access the
HTTP endpoint that indicates server status.

```
$ curl -v localhost:8000/v2/health/ready
...
< HTTP/1.1 200 OK
< Content-Length: 0
< Content-Type: text/plain
```

The HTTP request returns status 200 if Triton is ready and non-200 if
it is not ready.

## Send an Inference Request

Use docker pull to get the client libraries and examples image
from NGC.

```
$ docker pull nvcr.io/nvidia/tritonserver:<xx.yy>-py3-sdk
```

Where \<xx.yy\> is the version that you want to pull. Run the client
image.

```
$ docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:<xx.yy>-py3-sdk
```

From within the nvcr.io/nvidia/tritonserver:<xx.yy>-py3-sdk
image, run the example image-client application to perform image
classification using the example densenet_onnx model.

To send a request for the densenet_onnx model use an image from the
/workspace/images directory. In this case we ask for the top 3
classifications.

```
$ /workspace/install/bin/image_client -m densenet_onnx -c 3 -s INCEPTION /workspace/images/mug.jpg
Request 0, batch size 1
Image '/workspace/images/mug.jpg':
    15.346230 (504) = COFFEE MUG
    13.224326 (968) = CUP
    10.422965 (505) = COFFEEPOT
```


================================================
FILE: docs/getting_started/trtllm_user_guide.md
================================================
<!--
# Copyright 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# TensorRT-LLM User Guide

## What is TensorRT-LLM

[TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM)
(TRT-LLM) is an open-source library designed to accelerate and optimize the
inference performance of large language models (LLMs) on NVIDIA GPUs. TRT-LLM
offers users an easy-to-use Python API to build TensorRT engines for LLMs,
incorporating state-of-the-art optimizations to ensure efficient inference on
NVIDIA GPUs.

## How to run TRT-LLM models with Triton Server via TensorRT-LLM backend

The
[TensorRT-LLM Backend](https://github.com/triton-inference-server/tensorrtllm_backend)
lets you serve TensorRT-LLM models with Triton Inference Server. Check out the
[Getting Started](https://github.com/triton-inference-server/tensorrtllm_backend?tab=readme-ov-file#getting-started)
section in the TensorRT-LLM Backend repo to learn how to utlize the
[NGC Triton TRT-LLM container](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver)
to prepare engines for your LLM models and serve them with Triton.

## How to use your custom TRT-LLM model

All the supported models can be found in the
[examples](https://github.com/NVIDIA/TensorRT-LLM/tree/main/examples/models/core) folder in
the TRT-LLM repo. Follow the examples to convert your models to TensorRT
engines.

After the engine is built, [prepare the model repository](https://github.com/triton-inference-server/tensorrtllm_backend?tab=readme-ov-file#prepare-the-model-repository)
for Triton, and
[modify the model configuration](https://github.com/triton-inference-server/tensorrtllm_backend?tab=readme-ov-file#modify-the-model-configuration).

Only the *mandatory parameters* need to be set in the model config file. Feel free
to modify the optional parameters as needed. To learn more about the
parameters, model inputs, and outputs, see the
[model config documentation](https://github.com/triton-inference-server/tensorrtllm_backend/blob/main/docs/model_config.md) for more details.

## Advanced Configuration Options and Deployment Strategies

Explore advanced configuration options and deployment strategies to optimize
and run Triton with your TRT-LLM models effectively:

- [Model Deployment](https://github.com/triton-inference-server/tensorrtllm_backend/tree/main?tab=readme-ov-file#model-deployment): Techniques for efficiently deploying and managing your models in various environments.
- [Multi-Instance GPU (MIG) Support](https://github.com/triton-inference-server/tensorrtllm_backend/tree/main?tab=readme-ov-file#mig-support): Run Triton and TRT-LLM models with MIG to optimize GPU resource management.
- [Scheduling](https://github.com/triton-inference-server/tensorrtllm_backend/tree/main?tab=readme-ov-file#scheduling): Configure scheduling policies to control how requests are managed and executed.
- [Key-Value Cache](https://github.com/triton-inference-server/tensorrtllm_backend/tree/main?tab=readme-ov-file#key-value-cache): Utlizte KV cache and KV cache reuse to optimize memory usage and improve performance.
- [Decoding](https://github.com/triton-inference-server/tensorrtllm_backend/tree/main?tab=readme-ov-file#decoding): Advanced methods for generating text, including top-k, top-p, top-k top-p, beam search, Medusa, and speculative decoding.
- [Chunked Context](https://github.com/triton-inference-server/tensorrtllm_backend/tree/main?tab=readme-ov-file#chunked-context): Splitting the context into several chunks and batching them during generation phase to increase overall throughput.
- [Quantization](https://github.com/triton-inference-server/tensorrtllm_backend/tree/main?tab=readme-ov-file#quantization): Apply quantization techniques to reduce model size and enhance inference speed.
- [LoRa (Low-Rank Adaptation)](https://github.com/triton-inference-server/tensorrtllm_backend/tree/main?tab=readme-ov-file#lora): Use LoRa for efficient model fine-tuning and adaptation.

## Tutorials

Make sure to check out the
[tutorials](https://github.com/triton-inference-server/tutorials) repo to see
more guides on serving popular LLM models with Triton Server and TensorRT-LLM,
as well as deploying them on Kubernetes.

## Benchmark

[GenAI-Perf](https://github.com/triton-inference-server/perf_analyzer/tree/main/genai-perf)
is a command line tool for measuring the throughput and latency of LLMs served
by Triton Inference Server. Check out the
[Quick Start](https://github.com/triton-inference-server/perf_analyzer/tree/main/genai-perf#quick-start)
to learn how to use GenAI-Perf to benchmark your LLM models.

## Performance Best Practices

Check out the
[Performance tuning guide](https://nvidia.github.io/TensorRT-LLM/performance/performance-tuning-guide/)
to learn how to optimize your TensorRT-LLM models for better performance.

## Metrics

Triton Server provides
[metrics](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/metrics.md)
indicating GPU and request statistics.
See the
[Triton Metrics](https://github.com/triton-inference-server/tensorrtllm_backend?tab=readme-ov-file#triton-metrics)
section in the TensorRT-LLM Backend repo to learn how to query the Triton
metrics endpoint to obtain TRT-LLM statistics.

## Ask questions or report issues

Can't find what you're looking for, or have a question or issue? Feel free to
ask questions or report issues in the GitHub issues page:

- [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM/issues)
- [TensorRT-LLM Backend](https://github.com/triton-inference-server/tensorrtllm_backend/issues)
- [Triton Inference Server](https://github.com/triton-inference-server/server/issues)


================================================
FILE: docs/index.md
================================================
<!--
# Copyright 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# NVIDIA Triton Inference Server

Triton Inference Server is an open source inference serving software that streamlines
AI inferencing. Triton Inference Server enables teams to deploy any AI model from multiple deep
learning and machine learning frameworks, including TensorRT, PyTorch,
ONNX, OpenVINO, Python, RAPIDS FIL, and more. Triton supports inference
across cloud, data center, edge and embedded devices on NVIDIA GPUs, x86 and ARM
CPU, or AWS Inferentia. Triton Inference Server delivers optimized performance
for many query types, including real time, batched, ensembles and audio/video
streaming. Triton inference Server is part of
[NVIDIA AI Enterprise](https://www.nvidia.com/en-us/data-center/products/ai-enterprise/),
a software platform that accelerates the data science pipeline and streamlines
the development and deployment of production AI.

  <!-- :::
  :align: center
  [![Getting Started Video](https://img.youtube.com/vi/NQDtfSi5QF4/1.jpg)](https://www.youtube.com/watch?v=NQDtfSi5QF4)
  ::: -->

<div>
<iframe width="560" height="315" src="https://www.youtube.com/embed/NQDtfSi5QF4" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
</div>


## Triton Architecture

The following figure shows the Triton Inference Server high-level
architecture. The [model repository](user_guide/model_repository.md) is a
file-system based repository of the models that Triton will make
available for inferencing. Inference requests arrive at the server via
either [HTTP/REST or GRPC](customization_guide/inference_protocols.md) or by the [C
API](customization_guide/inprocess_c_api.md) and are then routed to the appropriate per-model
scheduler. Triton implements [multiple scheduling and batching
algorithms](./user_guide/architecture.md#models-and-schedulers) that can be configured on a
model-by-model basis. Each model's scheduler optionally performs
batching of inference requests and then passes the requests to the
[backend](https://github.com/triton-inference-server/backend/blob/main/README.md)
corresponding to the model type. The backend performs inferencing
using the inputs provided in the batched requests to produce the
requested outputs. The outputs are then returned.

Triton supports a [backend C
API](https://github.com/triton-inference-server/backend/blob/main/README.md#triton-backend-api)
that allows Triton to be extended with new functionality such as
custom pre- and post-processing operations or even a new deep-learning
framework.

The models being served by Triton can be queried and controlled by a
dedicated [model management API](user_guide/model_management.md) that is
available by HTTP/REST or GRPC protocol, or by the C API.

Readiness and liveness health endpoints and utilization, throughput
and latency metrics ease the integration of Triton into deployment
framework such as Kubernetes.

![Triton Architecture Diagram](user_guide/images/arch.jpg)

## Triton major features

Major features include:

- [Supports multiple deep learning
  frameworks](backend/README.md#where-can-i-find-all-the-backends-that-are-available-for-triton)
- [Supports multiple machine learning
  frameworks](https://github.com/triton-inference-server/fil_backend)
- [Concurrent model
  execution](user_guide/model_execution.md#concurrent-model-execution)
- [Dynamic batching](user_guide/batcher.md#dynamic-batcher)
- [Sequence batching](user_guide/batcher.md#sequence-batcher) and
  [implicit state management](user_guide/implicit_state_management.md#implicit-state-management)
  for stateful models
- Provides [Backend API](https://github.com/triton-inference-server/backend) that
  allows adding custom backends and pre/post processing operations
- Model pipelines using
  [Ensembling](user_guide/ensemble_models.md#ensemble-models) or [Business
  Logic Scripting
  (BLS)](user_guide/bls.md#business-logic-scripting)
- [HTTP/REST and GRPC inference
  protocols](customization_guide/inference_protocols.md) based on the community
  developed [KServe
  protocol](https://github.com/kserve/kserve/tree/master/docs/predict-api/v2)
- A [C API](customization_guide/inprocess_c_api.md) and
  [Java API](customization_guide/inprocess_java_api.md)
  allow Triton to link directly into your application for edge and other in-process use cases
- [Metrics](user_guide/metrics.md) indicating GPU utilization, server
  throughput, server latency, and more

Join the [Triton and TensorRT community](https://www.nvidia.com/en-us/deep-learning-ai/triton-tensorrt-newsletter/) and stay current on the latest product updates, bug fixes, content, best
practices, and more. Need enterprise support? NVIDIA global support is available
for Triton Inference Server with the [NVIDIA AI Enterprise software suite](https://www.nvidia.com/en-us/data-center/products/ai-enterprise/).

See the [Latest Release Notes](https://docs.nvidia.com/deeplearning/triton-inference-server/release-notes/) for updates on the newest features and bug fixes.

================================================
FILE: docs/introduction/compatibility.md
================================================
<!--
# Copyright (c) 2024-2026, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

[Please visit Deep Learning Framework (DLFW) website for the complete compatibility matrix](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html).

# Release Compatibility Matrix
- [Release Compatibility Matrix](#release-compatibility-matrix)
  - [Container Name: trtllm-python-py3](#container-name-trtllm-python-py3)
  - [Container Name: vllm-python-py3](#container-name-vllm-python-py3)
  - [ONNX Runtime Versions](#onnx-runtime-versions)

## Container Name: trtllm-python-py3

| Triton release version	 | NGC Tag	 | Python version	 | Torch version | TensorRT version | TensorRT-LLM version | CUDA version | CUDA Driver version | Size |
| --- | ---  | --- | --- | --- | --- | --- | --- | --- |
| 26.02 | nvcr.io/nvidia/tritonserver:26.02-trtllm-python-py3 | Python 3.12.3  | 2.9.0a0+145a3a7bda.nv25.10 | 10.13.3.9 | 1.1.0 | 13.0.2.006 | 580.95.05 | 16.17 GB |
| 26.01 | nvcr.io/nvidia/tritonserver:26.01-trtllm-python-py3 | Python 3.12.3  | 2.9.0a0+145a3a7bda.nv25.10 | 10.13.3.9 | 1.1.0 | 13.0.2.006 | 580.95.05 | 16.17 GB |
| 25.12 | nvcr.io/nvidia/tritonserver:25.12-trtllm-python-py3 | Python 3.12.3  | 2.9.0a0+145a3a7bda.nv25.10 | 10.13.3.9 | 1.1.0 | 13.0.2.006 | 580.95.05 | 16.04 GB |
| 25.11 | nvcr.io/nvidia/tritonserver:25.11-trtllm-python-py3 | Python 3.12.3  | 2.9.0a0+145a3a7bda.nv25.10 | 10.13.3.9 | 1.0.3.2510 | 13.0.2.006 | 580.95.05 | 12.25 GB |
| 25.10 | nvcr.io/nvidia/tritonserver:25.10-trtllm-python-py3 | Python 3.12.3  | 2.8.0a0+5228986c39.nv25.6 | 10.11.0.33 | 1.0.0 | 12.9.1.010 | 575.57.08 | 16.21 GB |
| 25.09 | nvcr.io/nvidia/tritonserver:25.09-trtllm-python-py3 | Python 3.12.3  | 2.8.0a0+5228986c39.nv25.6 | 10.11.0.33 | 1.0.0 | 12.9.1.010 | 575.57.08 | 16.25 GB |
| 25.08 | nvcr.io/nvidia/tritonserver:25.08-trtllm-python-py3 | Python 3.12.3  | 2.8.0a0+5228986c39.nv25.5 | 10.11.0.33 | 0.21.0 | 12.9.0.043 | 575.51.03 | 20.49 GB |
| 25.07 | nvcr.io/nvidia/tritonserver:25.07-trtllm-python-py3 | Python 3.12.3  | 2.7.0a0+79aa17489c.nv25.4 | 10.10.0.31 | 0.20.0 | 12.9.0.036 | 575.51.03 | 18.3G |
| 25.06 | nvcr.io/nvidia/tritonserver:25.06-trtllm-python-py3 | Python 3.12.3  | 2.7.0a0+79aa17489c.nv25.4 | 10.10.0.31 | 0.20.0 | 12.9.0.036 | 575.51.03 | 18.3G |
| 25.05 | nvcr.io/nvidia/tritonserver:25.05-trtllm-python-py3 | Python 3.12.3  | 2.7.0a0+7c8ec84dab.nv25.3 | 10.9.0.34 | 0.19.0 | 12.8.1.012 | 570.124.06 | 17G |
| 25.04 | nvcr.io/nvidia/tritonserver:25.04-trtllm-python-py3 | Python 3.12.3  | 2.7.0a0+7c8ec84dab.nv25.3 | 10.9.0.34 | 0.18.2 | 12.8.1.012 | 570.124.06 | 17G |
| 25.03 | nvcr.io/nvidia/tritonserver:25.03-trtllm-python-py3 | Python 3.12.3  | 2.7.0a0+7c8ec84dab.nv25.3 | 10.9.0.34 | 0.18.0 | 12.8.1.012 | 570.124.06 | 28G |
| 25.02 | nvcr.io/nvidia/tritonserver:25.02-trtllm-python-py3 | Python 3.12.3 | 2.6.0a0+ecf3bae40a.nv25.1 | 10.8.0.43 | 0.17.0.post1 | 12.8.0.038 | 570.86.10 | 28G |
| 25.01 | nvcr.io/nvidia/tritonserver:25.01-trtllm-python-py3 | Python 3.12.3  | 2.6.0a0+ecf3bae40a.nv25.1 | 10.8.0.43 | 0.17.0 | 12.8.0.038 | 570.86.10 | 30G |
| 24.12 | nvcr.io/nvidia/tritonserver:24.12-trtllm-python-py3 | Python 3.12.3  | 2.6.0a0+df5bbc09d1.nv24.11 | 10.7.0 | 0.16.0 | 12.6.3 | 560.35.05 | 22G |
| 24.11 | nvcr.io/nvidia/tritonserver:24.11-trtllm-python-py3 | Python 3.10.12  | 2.5.0a0+e000cf0ad9.nv24.10 | 10.6.0 | 0.15.0 | 12.6.3 | 555.42.06 | 24.8G |
| 24.10 | nvcr.io/nvidia/tritonserver:24.10-trtllm-python-py3 | Python 3.10.12  | 2.4.0a0+3bcc3cddb5.nv24.7 | 10.4.0 | 0.14.0 | 12.5.1.007 | 555.42.06 | 23.3G |
| 24.09 | nvcr.io/nvidia/tritonserver:24.09-trtllm-python-py3 | Python 3.10.12  | 2.4.0a0+3bcc3cddb5.nv24.7 | 10.4.0 | 0.13.0 | 12.5.1.007 | 555.42.06 | 21G |
| 24.08 | nvcr.io/nvidia/tritonserver:24.08-trtllm-python-py3 | Python 3.10.12 | 2.4.0a0+3bcc3cddb5.nv24.7 | 10.3.0 | 0.12.0 | 12.5.1.007 | 555.42.06 | 21G |
| 24.07 | nvcr.io/nvidia/tritonserver:24.07-trtllm-python-py3 | Python 3.10.12 | 2.4.0a0+07cecf4168.nv24.5 | 10.1.0 | 0.11.0 | 12.4.1.003 | 550.54.15 | 23G |
| 24.06 | nvcr.io/nvidia/tritonserver:24.06-trtllm-python-py3 | Python 3.10.12  | 2.3.0a0+40ec155e58.nv24.3 | 10.0.1 | 0.10.0 | 12.4.0.041 | 550.54.14 | 31G |
| 24.05 | nvcr.io/nvidia/tritonserver:24.05-trtllm-python-py3 | Python 3.10.12  | 2.3.0a0+ebedce2 | 10.0.1.6  | 0.9.0 |  12.3.2.001 | 545.23.08 | 34G |
| 24.04 | nvcr.io/nvidia/tritonserver:24.04-trtllm-python-py3 | Python 3.10.12  | 2.3.0a0+ebedce2 | 9.3.0.post12.dev1 | 0.9.0  | 12.3.2.001 | 545.23.08 | 34G |

## Container Name: vllm-python-py3

| Triton release version	 | NGC Tag	 | Python version	 | vLLM version | CUDA version | CUDA Driver version | Size |
| --- | --- | --- | --- | --- | --- | --- |
| 26.02 | nvcr.io/nvidia/tritonserver:26.02-vllm-python-py3 | Python 3.12.3  | 0.15.1+nv26.2 | 13.1.1.006 | 590.48.01 | 8.9G |
| 26.01 | nvcr.io/nvidia/tritonserver:26.01-vllm-python-py3 | Python 3.12.3  | 0.13.0+faa43dbf.nv26.1.cu131 | 13.1.1.006 | 590.48.01 | 8.79G |
| 25.12 | nvcr.io/nvidia/tritonserver:25.12-vllm-python-py3 | Python 3.12.3  | 0.11.1+9114fd76.nv25.12.cu131 | 13.1.0.036 | 590.44.01 | 8.54G |
| 25.11 | nvcr.io/nvidia/tritonserver:25.11-vllm-python-py3 | Python 3.12.3  | 0.11.0+582e4e37.nv25.11.cu130 | 13.0.2.006 | 580.95.05 | 8.72G |
| 25.10 | nvcr.io/nvidia/tritonserver:25.10-vllm-python-py3 | Python 3.12.3  | 0.10.2+9dd9ca32.nv25.10.cu130 | 13.0.2.006 | 580.95.05 | 8.34G |
| 25.09 | nvcr.io/nvidia/tritonserver:25.09-vllm-python-py3 | Python 3.12.3  | 0.10.1.1+381074ae.nv25.9.cu130 | 13.0.1.012 | 580.82.07 | 7.78G |
| 25.08 | nvcr.io/nvidia/tritonserver:25.08-vllm-python-py3 | Python 3.12.3  | 0.9.2+4ef1e343.nv25.8.post1.cu130 | 13.0.1.012 | 580.82.07 | 8.1G |
| 25.07 | nvcr.io/nvidia/tritonserver:25.07-vllm-python-py3 | Python 3.12.3  | 0.9.0rc1+1958ee56.nv25.6.cu129 | 12.9.0.043 | 575.51.03 | 10G |
| 25.06 | nvcr.io/nvidia/tritonserver:25.06-vllm-python-py3 | Python 3.12.3  | 0.9.0rc1+1958ee56.nv25.6.cu129 | 12.9.0.043 | 575.51.03 | 10G |
| 25.05 | nvcr.io/nvidia/tritonserver:25.05-vllm-python-py3 | Python 3.12.3  | 0.8.4+dc1a3e10.nv25.5.cu129 | 12.9.0.043 | 575.51.03 | 10G |
| 25.04 | nvcr.io/nvidia/tritonserver:25.04-vllm-python-py3 | Python 3.12.3  | 0.8.1+5f4af9e0.nv25.4.cu129 | 12.9.0.036 | 575.51.02 | 10G |
| 25.03 | nvcr.io/nvidia/tritonserver:25.03-vllm-python-py3 | Python 3.12.3  | 0.7.3+04de634a.nv25.3.cu128 | 12.8.1.012 | 570.124.06 | 22G |
| 25.02 | nvcr.io/nvidia/tritonserver:25.02-vllm-python-py3 | Python 3.12.3  | 0.7.0+5e800e3d.nv25.2.cu128 | 12.8.0.038 | 570.86.10 | 22G |
| 25.01 | nvcr.io/nvidia/tritonserver:25.01-vllm-python-py3 | Python 3.12.3  | 0.6.3.post1 | 12.8.0.038 | 570.86.10 | 23G |
| 24.12 | nvcr.io/nvidia/tritonserver:24.12-vllm-python-py3 | Python 3.12.3 |  0.5.5 | 12.6.3.004 | 560.35.05 | 20G |
| 24.11 | nvcr.io/nvidia/tritonserver:24.11-vllm-python-py3 | Python 3.12.3 |  0.5.5 | 12.6.3.001 | 560.35.05 | 22.1G |
| 24.10 | nvcr.io/nvidia/tritonserver:24.10-vllm-python-py3 | Python 3.10.12 | 0.5.5 | 12.6.2.004 | 560.35.03 | 21G |
| 24.09 | nvcr.io/nvidia/tritonserver:24.09-vllm-python-py3 | Python 3.10.12 | 0.5.3.post1 | 12.6.1.006 | 560.35.03 | 19G |
| 24.08 | nvcr.io/nvidia/tritonserver:24.08-vllm-python-py3 | Python 3.10.12  | 0.5.0 post1 | 12.6.0.022 | 560.35.03 | 19G |
| 24.07 | nvcr.io/nvidia/tritonserver:24.07-vllm-python-py3 | Python 3.10.12  | 0.5.0 post1 | 12.5.1 | 555.42.06 | 19G |
| 24.06 | nvcr.io/nvidia/tritonserver:24.06-vllm-python-py3 | Python 3.10.12  | 0.4.3 | 12.5.0.23 | 555.42.02 | 18G |
| 24.05 | nvcr.io/nvidia/tritonserver:24.05-vllm-python-py3 | Python 3.10.12  | 0.4.0 post1 | 12.4.1 | 550.54.15 | 18G |
| 24.04 | nvcr.io/nvidia/tritonserver:24.04-vllm-python-py3 | Python 3.10.12  | 0.4.0 post1 | 12.4.1 | 550.54.15 | 17G |

## ONNX Runtime Versions

| Triton release version	 | ONNX Runtime	 |
| --- | --- |
| 26.02 | 1.24.1 |
| 26.01 | 1.23.2 |
| 25.12 | 1.23.2 |
| 25.11 | 1.23.2 |
| 25.10 | 1.23.1 |
| 25.09 | 1.23.0 |
| 25.08 | 1.23.0+1d1712fdaf |
| 25.07 | 1.22.0 |
| 25.06 | 1.22.0 |
| 25.05 | 1.22.0 |
| 25.04 | 1.21.0 |
| 25.03 | 1.21.0 |
| 25.02 | 1.20.1 |
| 25.01 | 1.20.1 |
| 24.12 | 1.20.1 |
| 24.11 | 1.19.2 |
| 24.10 | 1.19.2 |
| 24.09 | 1.19.2 |
| 24.08 | 1.18.1 |
| 24.07 | 1.18.1 |
| 24.06 | 1.18.0 |
| 24.05 | 1.18.0 |
| 24.04 | 1.17.3 |


================================================
FILE: docs/introduction/index.md
================================================
<!--
# Copyright 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# NVIDIA Triton Inference Server

Triton Inference Server is an open source inference serving software that streamlines
AI inferencing. Triton Inference Server enables teams to deploy any AI model from multiple deep
learning and machine learning frameworks, including TensorRT,
PyTorch, ONNX, OpenVINO, Python, RAPIDS FIL, and more. Triton supports inference
across cloud, data center, edge and embedded devices on NVIDIA GPUs, x86 and ARM
CPU, or AWS Inferentia. Triton Inference Server delivers optimized performance
for many query types, including real time, batched, ensembles and audio/video
streaming. Triton inference Server is part of
[NVIDIA AI Enterprise](https://www.nvidia.com/en-us/data-center/products/ai-enterprise/),
a software platform that accelerates the data science pipeline and streamlines
the development and deployment of production AI.

  <!-- :::
  :align: center
  [![Getting Started Video](https://img.youtube.com/vi/NQDtfSi5QF4/1.jpg)](https://www.youtube.com/watch?v=NQDtfSi5QF4)
  ::: -->

<div>
<iframe width="560" height="315" src="https://www.youtube.com/embed/NQDtfSi5QF4" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
</div>


## Triton Architecture

The following figure shows the Triton Inference Server high-level
architecture. The [model repository](../user_guide/model_repository.md) is a
file-system based repository of the models that Triton will make
available for inferencing. Inference requests arrive at the server via
either [HTTP/REST or GRPC](../customization_guide/inference_protocols.md) or by the [C
API](../customization_guide/inprocess_c_api.md) and are then routed to the appropriate per-model
scheduler. Triton implements [multiple scheduling and batching
algorithms](../user_guide/architecture.md#models-and-schedulers) that can be configured on a
model-by-model basis. Each model's scheduler optionally performs
batching of inference requests and then passes the requests to the
[backend](https://github.com/triton-inference-server/backend/blob/main/README.md)
corresponding to the model type. The backend performs inferencing
using the inputs provided in the batched requests to produce the
requested outputs. The outputs are then returned.

Triton supports a [backend C
API](https://github.com/triton-inference-server/backend/blob/main/README.md#triton-backend-api)
that allows Triton to be extended with new functionality such as
custom pre- and post-processing operations or even a new deep-learning
framework.

The models being served by Triton can be queried and controlled by a
dedicated [model management API](../user_guide/model_management.md) that is
available by HTTP/REST or GRPC protocol, or by the C API.

Readiness and liveness health endpoints and utilization, throughput
and latency metrics ease the integration of Triton into deployment
framework such as Kubernetes.

![Triton Architecture Diagram](../user_guide/images/arch.jpg)

## Triton major features

Major features include:

- [Supports multiple deep learning
  frameworks](https://github.com/triton-inference-server/backend#where-can-i-find-all-the-backends-that-are-available-for-triton)
- [Supports multiple machine learning
  frameworks](https://github.com/triton-inference-server/fil_backend)
- [Concurrent model
  execution](../user_guide/model_execution.md#concurrent-model-execution)
- [Dynamic batching](../user_guide/batcher.md#dynamic-batcher)
- [Sequence batching](../user_guide/batcher.md#sequence-batcher) and
  [implicit state management](../user_guide/implicit_state_management.md#implicit-state-management)
  for stateful models
- Provides [Backend API](https://github.com/triton-inference-server/backend) that
  allows adding custom backends and pre/post processing operations
- Model pipelines using
  [Ensembling](../user_guide/ensemble_models.md#ensemble-models) or [Business
  Logic Scripting
  (BLS)](../user_guide/bls.md#business-logic-scripting)
- [HTTP/REST and GRPC inference
  protocols](../customization_guide/inference_protocols.md) based on the community
  developed [KServe
  protocol](https://github.com/kserve/kserve/tree/master/docs/predict-api/v2)
- A [C API](../customization_guide/inprocess_c_api.md) and
  [Java API](../customization_guide/inprocess_java_api.md)
  allow Triton to link directly into your application for edge and other in-process use cases
- [Metrics](../user_guide/metrics.md) indicating GPU utilization, server
  throughput, server latency, and more

Join the [Triton and TensorRT community](https://www.nvidia.com/en-us/deep-learning-ai/triton-tensorrt-newsletter/) and stay current on the latest product updates, bug fixes, content, best
practices, and more. Need enterprise support? NVIDIA global support is available
for Triton Inference Server with the [NVIDIA AI Enterprise software suite](https://www.nvidia.com/en-us/data-center/products/ai-enterprise/).

See the [Latest Release Notes](https://docs.nvidia.com/deeplearning/triton-inference-server/release-notes/) for updates on the newest features and bug fixes.

================================================
FILE: docs/introduction/release_notes.md
================================================
<!--
# Copyright (c) 2024-2026, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->
# [Triton Inference Server Release 26.02](https://docs.nvidia.com/deeplearning/triton-inference-server/release-notes/rel-26-02.html#rel-26-02)

The Triton Inference Server container image, release 26.02, is available
on [NGC](https://ngc.nvidia.com/catalog/containers/nvidia:tritonserver) and
is open source
on [GitHub](https://github.com/triton-inference-server/server). Release notes can
be found on the [GitHub Release Page](https://github.com/triton-inference-server/server/releases)


================================================
FILE: docs/llm_features/speculative_decoding.rst
================================================
..
.. Copyright 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
..
.. Redistribution and use in source and binary forms, with or without
.. modification, are permitted provided that the following conditions
.. are met:
..  * Redistributions of source code must retain the above copyright
..    notice, this list of conditions and the following disclaimer.
..  * Redistributions in binary form must reproduce the above copyright
..    notice, this list of conditions and the following disclaimer in the
..    documentation and/or other materials provided with the distribution.
..  * Neither the name of NVIDIA CORPORATION nor the names of its
..    contributors may be used to endorse or promote products derived
..    from this software without specific prior written permission.
..
.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

####
Speculative Decoding
####

.. toctree::
   :maxdepth: 1
   :hidden:

   Overview <../tutorials/Feature_Guide/Speculative_Decoding/README.md>
   TRT-LLM <../tutorials/Feature_Guide/Speculative_Decoding/TRT-LLM/README.md>
   vLLM <../tutorials/Feature_Guide/Speculative_Decoding/vLLM/README.md>

================================================
FILE: docs/perf_benchmark/genai_perf.rst
================================================
..
.. Copyright 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
..
.. Redistribution and use in source and binary forms, with or without
.. modification, are permitted provided that the following conditions
.. are met:
..  * Redistributions of source code must retain the above copyright
..    notice, this list of conditions and the following disclaimer.
..  * Redistributions in binary form must reproduce the above copyright
..    notice, this list of conditions and the following disclaimer in the
..    documentation and/or other materials provided with the distribution.
..  * Neither the name of NVIDIA CORPORATION nor the names of its
..    contributors may be used to endorse or promote products derived
..    from this software without specific prior written permission.
..
.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

####
GenAI Performance Analyzer
####

.. toctree::
   :maxdepth: 1
   :hidden:

   Overview <../perf_analyzer/genai-perf/README.md>
   Large language models <../perf_analyzer/genai-perf/docs/tutorial.md>
   Visual language models <../perf_analyzer/genai-perf/docs/multi_modal.md>
   Embedding models <../perf_analyzer/genai-perf/docs/embeddings.md>
   Ranking models <../perf_analyzer/genai-perf/docs/rankings.md>
   Multiple LoRA adapters <../perf_analyzer/genai-perf/docs/lora.md>

================================================
FILE: docs/perf_benchmark/model_analyzer.rst
================================================
..
.. Copyright 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
..
.. Redistribution and use in source and binary forms, with or without
.. modification, are permitted provided that the following conditions
.. are met:
..  * Redistributions of source code must retain the above copyright
..    notice, this list of conditions and the following disclaimer.
..  * Redistributions in binary form must reproduce the above copyright
..    notice, this list of conditions and the following disclaimer in the
..    documentation and/or other materials provided with the distribution.
..  * Neither the name of NVIDIA CORPORATION nor the names of its
..    contributors may be used to endorse or promote products derived
..    from this software without specific prior written permission.
..
.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

####
Model Analyzer
####

.. toctree::
   :maxdepth: 1
   :hidden:

   Overview <../model_analyzer/README.md>
   Documentation <../model_analyzer/docs/README.md>
   Quick Start <../model_analyzer/docs/quick_start.md>
   Installation <../model_analyzer/docs/install.md>
   CLI Reference <../model_analyzer/docs/cli.md>
   Launch Modes <../model_analyzer/docs/launch_modes.md>
   Configuration <../model_analyzer/docs/config.md>
   Configuration Search <../model_analyzer/docs/config_search.md>
   Metrics <../model_analyzer/docs/metrics.md>
   Checkpointing <../model_analyzer/docs/checkpoints.md>
   Reports <../model_analyzer/docs/report.md>
   Kubernetes <../model_analyzer/docs/kubernetes_deploy.md>
   Model Types <../model_analyzer/docs/model_types.md>
   Ensemble Model <../model_analyzer/docs/ensemble_quick_start.md>
   BLS Model <../model_analyzer/docs/bls_quick_start.md>
   Multi-Model <../model_analyzer/docs/mm_quick_start.md>

================================================
FILE: docs/perf_benchmark/perf_analyzer.rst
================================================
..
.. Copyright 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
..
.. Redistribution and use in source and binary forms, with or without
.. modification, are permitted provided that the following conditions
.. are met:
..  * Redistributions of source code must retain the above copyright
..    notice, this list of conditions and the following disclaimer.
..  * Redistributions in binary form must reproduce the above copyright
..    notice, this list of conditions and the following disclaimer in the
..    documentation and/or other materials provided with the distribution.
..  * Neither the name of NVIDIA CORPORATION nor the names of its
..    contributors may be used to endorse or promote products derived
..    from this software without specific prior written permission.
..
.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

####
Performance Analyzer
####

.. toctree::
   :maxdepth: 1
   :hidden:

   Overview <../perf_analyzer/README.md>
   Documentation <../perf_analyzer/docs/README.md>
   Quick Start <../perf_analyzer/docs/quick_start.md>
   Installation <../perf_analyzer/docs/install.md>
   CLI Reference <../perf_analyzer/docs/cli.md>
   Inference Load Modes <../perf_analyzer/docs/inference_load_modes.md>
   Input Data <../perf_analyzer/docs/input_data.md>
   Measurement Modes <../perf_analyzer/docs/measurements_metrics.md>
   Benchmarking <../perf_analyzer/docs/benchmarking.md>

================================================
FILE: docs/protocol/README.md
================================================
<!--
# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# HTTP/REST and GRPC Protocol

This directory contains documents related to the HTTP/REST and GRPC
protocols used by Triton. Triton uses the [KServe community standard
inference
protocols](https://github.com/kserve/kserve/tree/master/docs/predict-api/v2)
plus several extensions that are defined in the following documents:

- [Binary tensor data extension](./extension_binary_data.md)
- [Classification extension](./extension_classification.md)
- [Schedule policy extension](./extension_schedule_policy.md)
- [Sequence extension](./extension_sequence.md)
- [Shared-memory extension](./extension_shared_memory.md)
- [Model configuration extension](./extension_model_configuration.md)
- [Model repository extension](./extension_model_repository.md)
- [Statistics extension](./extension_statistics.md)
- [Trace extension](./extension_trace.md)
- [Logging extension](./extension_logging.md)
- [Parameters extension](./extension_parameters.md)

Note that some extensions introduce new fields onto the inference protocols,
and the other extensions define new protocols that Triton follows, please refer
to the extension documents for detail.

For the GRPC protocol, the [protobuf
specification](https://github.com/triton-inference-server/common/blob/main/protobuf/grpc_service.proto)
is also available. In addition, you can find the GRPC health checking protocol protobuf
specification [here](https://github.com/triton-inference-server/common/blob/main/protobuf/health.proto).

## Restricted Protocols

You can configure the Triton endpoints, which implement the protocols, to
restrict access to some protocols and to control network settings, please refer
to [protocol customization guide](https://github.com/triton-inference-server/server/blob/main/docs/customization_guide/inference_protocols.md#httprest-and-grpc-protocols) for detail.

## IPv6

Assuming your host or [docker config](https://docs.docker.com/config/daemon/ipv6/)
supports IPv6 connections, `tritonserver` can be configured to use IPv6
HTTP endpoints as follows:
```
$ tritonserver ... --http-address ipv6:[::1]&
...
I0215 21:04:11.572305 571 grpc_server.cc:4868] Started GRPCInferenceService at 0.0.0.0:8001
I0215 21:04:11.572528 571 http_server.cc:3477] Started HTTPService at ipv6:[::1]:8000
I0215 21:04:11.614167 571 http_server.cc:184] Started Metrics Service at ipv6:[::1]:8002
```

This can be confirmed via `netstat`, for example:
```
$ netstat -tulpn | grep tritonserver
tcp6      0      0 :::8000      :::*      LISTEN      571/tritonserver
tcp6      0      0 :::8001      :::*      LISTEN      571/tritonserver
tcp6      0      0 :::8002      :::*      LISTEN      571/tritonserver
```

And can be tested via `curl`, for example:
```
$ curl -6 --verbose "http://[::1]:8000/v2/health/ready"
*   Trying ::1:8000...
* TCP_NODELAY set
* Connected to ::1 (::1) port 8000 (#0)
> GET /v2/health/ready HTTP/1.1
> Host: [::1]:8000
> User-Agent: curl/7.68.0
> Accept: */*
>
* Mark bundle as not supporting multiuse
< HTTP/1.1 200 OK
< Content-Length: 0
< Content-Type: text/plain
<
* Connection #0 to host ::1 left intact
```


## Mapping Triton Server Error Codes to HTTP Status Codes

This table maps various Triton Server error codes to their corresponding HTTP status
codes. It can be used as a reference guide for understanding how Triton Server errors
are handled in HTTP responses.


| Triton Server Error Code                      | HTTP Status Code   | Description          |
| ----------------------------------------------| -------------------| ---------------------|
| `TRITONSERVER_ERROR_INTERNAL`                 | 500                | Internal Server Error|
| `TRITONSERVER_ERROR_NOT_FOUND`                | 404                | Not Found            |
| `TRITONSERVER_ERROR_UNAVAILABLE`              | 503                | Service Unavailable  |
| `TRITONSERVER_ERROR_UNSUPPORTED`              | 501                | Not Implemented      |
| `TRITONSERVER_ERROR_UNKNOWN`,<br>`TRITONSERVER_ERROR_INVALID_ARG`,<br>`TRITONSERVER_ERROR_ALREADY_EXISTS`,<br>`TRITONSERVER_ERROR_CANCELLED` | `400` | Bad Request (default for other errors)      |


================================================
FILE: docs/protocol/extension_binary_data.md
================================================
<!--
# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Binary Tensor Data Extension

This document describes Triton's binary tensor data extension. The
binary tensor data extension allows Triton to support tensor data
represented in a binary format in the body of an HTTP/REST
request. Because this extension is supported, Triton reports
“binary_tensor_data” in the extensions field of its Server Metadata.

## Binary Tensor Request

Tensor data represented as binary data is organized in little-endian
byte order, row major, without stride or padding between elements. All
tensor data types are representable as binary data in the native size
of the data type. For BOOL type element true is a single byte with
value 1 and false is a single byte with value 0. For BYTES type an
element is represented by a 4-byte unsigned integer giving the length
followed by the actual bytes. The binary data for a tensor is
delivered in the HTTP body after the JSON object (see Examples).

The binary tensor data extension uses parameters to indicate that an
input or output tensor is communicated as binary data. The first
parameter is used in `$request_input` and `$response_output` to indicate
that the input or output tensor is communicated as binary data:

- "binary_data_size" : int64 parameter indicating the size of the
  tensor binary data, in bytes.

The second parameter is used in `$request_output` to indicate that the
output should be returned from Triton as binary data.

- "binary_data" : bool parameter that is true if the output should be
  returned as binary data and false (or not given) if the tensor
  should be returned as JSON.

The third parameter is used in $inference_request to indicate that all
outputs should be returned from Triton as binary data, unless
overridden by "binary_data" on a specific output.

- "binary_data_output" : bool parameter that is true if all outputs
  should be returned as binary data and false (or not given) if the
  outputs should be returned as JSON. If "binary_data" is specified on
  an output it overrides this setting.

When one or more tensors are communicated as binary data, the HTTP
body of the request or response will contain the JSON inference
request or response object followed by the binary tensor data in the
same order as the order of the input or output tensors are specified
in the JSON. If any binary data is present in the request or response
the Inference-Header-Content-Length header must be provided to give
the length of the JSON object, and Content-Length continues to give
the full body length (as HTTP requires).

### Examples

For the following request the input tensors are sent as binary data
and the output tensor must be returned as binary data as that is what
is requested. Also note that the total size of the binary data is 19
bytes and that size must be reflected in the content length headers.

```
POST /v2/models/mymodel/infer HTTP/1.1
Host: localhost:8000
Content-Type: application/octet-stream
Inference-Header-Content-Length: <xx>
Content-Length: <xx+19>
{
  "model_name" : "mymodel",
  "inputs" : [
    {
      "name" : "input0",
      "shape" : [ 2, 2 ],
      "datatype" : "UINT32",
      "parameters" : {
        "binary_data_size" : 16
      }
    },
    {
      "name" : "input1",
      "shape" : [ 3 ],
      "datatype" : "BOOL",
      "parameters" : {
        "binary_data_size" : 3
      }
    }
  ],
  "outputs" : [
    {
      "name" : "output0",
      "parameters" : {
        "binary_data" : true
      }
    }
  ]
}
<16 bytes of data for input0 tensor>
<3 bytes of data for input1 tensor>
```

Assuming the model returns a [ 3, 2 ] tensor of data type FP32 the
following response would be returned.

```
HTTP/1.1 200 OK
Content-Type: application/octet-stream
Inference-Header-Content-Length: <yy>
Content-Length: <yy+24>
{
  "outputs" : [
    {
      "name" : "output0",
      "shape" : [ 3, 2 ],
      "datatype"  : "FP32",
      "parameters" : {
        "binary_data_size" : 24
      }
    }
  ]
}
<24 bytes of data for output0 tensor>
```

## Raw Binary Request

For models whose tensor metadata can be deduced from the byte size of the binary
data. User may send the binary tensor request without specifying inference
header. In other words, the request body only contains the binary data of the
tensor. Below is the constraints for the qualified models:

1. Only has 1 input
2. If the input data type is non-BYTE, the number of variable size dimensions is
at most 1. If the data type is BYTE, the shape must be [1]. The supported data
types can be found [here](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#tensor-data-types)

To send a raw binary request, the Inference-Header-Content-Length header must be
provided with value 0 to indicate that the request body doesn't include the
inference header.

Note: if the model supports batching, the request will be treated as batch-1
request because the inference header is omitted. Additionally, all the model
output will be requested to be returned in binary tensor form as described in
the previous section.

### Examples

The following is the example of sending raw binary request. Note that the total
size of the binary data is 16 bytes and that size must be reflected in
the content length headers.

```
POST /v2/models/mymodel/infer HTTP/1.1
Host: localhost:8000
Content-Type: application/octet-stream
Inference-Header-Content-Length: 0
Content-Length: 16
<16 bytes of data for input tensor>
```

Assuming the model returns two outputs which both has shape [ 3, 1 ] and data
type FP32, then the following response would be returned.

```
HTTP/1.1 200 OK
Content-Type: application/octet-stream
Inference-Header-Content-Length: <yy>
Content-Length: <yy+24>
{
  "outputs" : [
    {
      "name" : "output0",
      "shape" : [ 3, 1 ],
      "datatype"  : "FP32",
      "parameters" : {
        "binary_data_size" : 12
      }
    },
    {
      "name" : "output1",
      "shape" : [ 3, 1 ],
      "datatype"  : "FP32",
      "parameters" : {
        "binary_data_size" : 12
      }
    }
  ]
}
<12 bytes of data for output0 tensor>
<12 bytes of data for output1 tensor>
```

================================================
FILE: docs/protocol/extension_classification.md
================================================
<!--
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Classification Extension

This document describes Triton's classification extension.  The
classification extension allows Triton to return an output as a
classification index and (optional) label instead of returning the
output as raw tensor data.  Because this extension is supported,
Triton reports “classification” in the extensions field of its Server
Metadata.

An inference request can use the “classification” parameter to request
that one or more classifications be returned for an output. For such
an output the returned tensor will not be the shape and type produced
by the model, but will instead be type BYTES with shape [ batch-size,
\<count\> ] where each element returns the classification index and
label as a single string. The \<count\> dimension of the returned tensor
will equal the “count” value specified in the classification
parameter.

When the classification parameter is used, Triton will determine the
top-n classifications as the n highest-valued elements in the output
tensor compared using the output tensor’s data type. For example, if
an output tensor is [ 1, 5, 10, 4 ], the highest-valued element is 10
(index 2), followed by 5 (index 1), followed by 4 (index 3), followed
by 1 (index 0). So, for example, the top-2 classifications by index
are [ 2, 1 ].

The format of the returned string will be “\<value\>:\<index\>[:\<label\>]”,
where \<index\> is the index of the class in the model output tensor,
\<value\> is the value associated with that index in the model output,
and the \<label\> associated with that index is optional. For example,
continuing the example from above, the returned tensor will be [
“10:2”, “5:1” ]. If the model has labels associated with those
indices, the returned tensor will be [ “10:2:apple”, “5:1:pickle” ].

## HTTP/REST

In all JSON schemas shown in this document `$number`, `$string`, `$boolean`,
`$object` and `$array` refer to the fundamental JSON types. #optional
indicates an optional JSON field.

The classification extension requires that the “classification”
parameter, when applied to a requested inference output, be recognized
by Triton as follows:

- “classification” : `$number` indicating the number of classes that
  should be returned for the output.

The following example shows how the classification parameter is used
in an inference request.

```
POST /v2/models/mymodel/infer HTTP/1.1
Host: localhost:8000
Content-Type: application/json
Content-Length: <xx>
{
  "id" : "42",
  "inputs" : [
    {
      "name" : "input0",
      "shape" : [ 2, 2 ],
      "datatype" : "UINT32",
      "data" : [ 1, 2, 3, 4 ]
    }
  ],
  "outputs" : [
    {
      "name" : "output0",
      "parameters" : { "classification" : 2 }
    }
  ]
}
```

For the above request Triton will return the “output0” output tensor
as a STRING tensor with shape [ 2 ]. Assuming the model produces
output0 tensor [ 1.1, 3.3, 0.5, 2.4 ] from the above inputs, the
response will be the following.

```
HTTP/1.1 200 OK
Content-Type: application/json
Content-Length: <yy>
{
  "id" : "42"
  "outputs" : [
    {
      "name" : "output0",
      "shape" : [ 2 ],
      "datatype"  : "STRING",
      "data" : [ "3.3:1", "2.4:3" ]
    }
  ]
}
```

If the model has labels associated with each classification index
Triton will return those as well, as shown below.

```
HTTP/1.1 200 OK
Content-Type: application/json
Content-Length: <yy>
{
  "id" : "42"
  "outputs" : [
    {
      "name" : "output0",
      "shape" : [ 2 ],
      "datatype"  : "STRING",
      "data" : [ "3.3:1:index_1_label", "2.4:3:index_3_label" ]
    }
  ]
}
```

## GRPC

The classification extension requires that the “classification”
parameter, when applied to a requested inference output, be recognized
by Triton as follows:

- “classification” : int64_param indicating the number of classes that
  should be returned for the output.

The following example shows how the classification parameter is used
in an inference request.

```
ModelInferRequest {
  model_name : "mymodel"
  model_version : -1
  inputs [
    {
      name : "input0"
      shape : [ 2, 2 ]
      datatype : "UINT32"
      contents { int_contents : [ 1, 2, 3, 4 ] }
    }
  ]
  outputs [
    {
      name : "output0"
      parameters [
        {
          key : "classification"
          value : { int64_param : 2 }
        }
      ]
    }
  ]
}
```

For the above request Triton will return the “output0” output tensor
as a STRING tensor with shape [ 2 ]. Assuming the model produces
output0 tensor [ 1.1, 3.3, 0.5, 2.4 ] from the above inputs, the
response will be the following.

```
ModelInferResponse {
  model_name : "mymodel"
  outputs [
    {
      name : "output0"
      shape : [ 2 ]
      datatype  : "STRING"
      contents { bytes_contents : [ "3.3:1", "2.4:3" ] }
    }
  ]
}
```


================================================
FILE: docs/protocol/extension_generate.md
================================================
<!--
# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Generate Extension

> [!NOTE]
> The Generate Extension is *provisional* and likely to change in future versions.

This document describes Triton's generate extension. The generate
extension provides a simple text-oriented endpoint schema for interacting with
large language models (LLMs). The generate endpoint is specific to HTTP/REST
frontend.

## HTTP/REST

In all JSON schemas shown in this document, `$number`, `$string`, `$boolean`,
`$object` and `$array` refer to the fundamental JSON types. #optional
indicates an optional JSON field.

Triton exposes the generate endpoint at the following URLs. The client may use
HTTP POST request to different URLs for different response behavior, the
endpoint will return the generate results on success or an error in the case of
failure.

```
POST v2/models/${MODEL_NAME}[/versions/${MODEL_VERSION}]/generate

POST v2/models/${MODEL_NAME}[/versions/${MODEL_VERSION}]/generate_stream
```

### generate vs. generate_stream

Both URLs expect the same request JSON object, and generate the same JSON
response object. However, there are some differences in the format used to
return each:
* `/generate` returns exactly 1 response JSON object with a
`Content-Type` of `application/json`
* `/generate_stream` may return multiple responses based on the inference
results, with a `Content-Type` of `text/event-stream; charset=utf-8`.
These responses will be sent as
[Server-Sent Events](https://html.spec.whatwg.org/multipage/server-sent-events.html#server-sent-events)
(SSE), where each response will be a "data" chunk in the HTTP
response body. In the case of inference errors, responses will have
an [error JSON object](#generate-response-json-error-object).
    * Note that the HTTP response code is set in the first response of the SSE,
    so if the first response succeeds but an error occurs in a subsequent
    response for the request, it can result in receiving an error object
    while the status code shows success (200). Therefore, the user must
    always check whether an error object is received when generating
    responses through `/generate_stream`.
    * If the request fails before inference begins, then a JSON error will
    be returned with `Content-Type` of `application/json`, similar to errors
    from other endpoints with the status code set to an error.

### Generate Request JSON Object

The generate request object, identified as *$generate_request*, is
required in the HTTP body of the POST request. The model name and
(optionally) version must be available in the URL. If a version is not
provided, the server may choose a version based on its own policies or
return an error.

    $generate_request =
    {
      "id" : $string, #optional
      "text_input" : $string,
      "parameters" : $parameters #optional
    }

* "id": An identifier for this request. Optional, but if specified this identifier must be returned in the response.
* "text_input" : The text input that the model should generate output from.
* "parameters" : An optional object containing zero or more parameters for this
  generate request expressed as key/value pairs. See
  [Parameters](#parameters) for more information.

> [!NOTE]
> Any additional properties in the request object are passed either as
> parameters or tensors based on model specification.

#### Parameters

The `$parameters` JSON describes zero or more “name”/”value” pairs,
where the “name” is the name of the parameter and the “value” is a
`$string`, `$number`, or `$boolean`.

    $parameters =
    {
      $parameter, ...
    }

    $parameter = $string : $string | $number | $boolean

Parameters are model-specific. The user should check with the model
specification to set the parameters.

#### Example Request

Below is an example to send generate request with additional model parameters `stream` and `temperature`.

```
$ curl -X POST localhost:8000/v2/models/mymodel/generate -d '{"id": "42", "text_input": "client input", "parameters": {"stream": false, "temperature": 0}}'

POST /v2/models/mymodel/generate HTTP/1.1
Host: localhost:8000
Content-Type: application/json
Content-Length: <xx>
{
  "id" : "42",
  "text_input" :  "client input",
  "parameters" :
    {
      "stream": false,
      "temperature": 0
    }
}
```

### Generate Response JSON Object

A successful generate request is indicated by a 200 HTTP status code.
The generate response object, identified as `$generate_response`, is returned in
the HTTP body.

    $generate_response =
    {
      "id" : $string
      "model_name" : $string,
      "model_version" : $string,
      "text_output" : $string
    }

* "id" : The "id" identifier given in the request, if any.
* "model_name" : The name of the model used for inference.
* "model_version" : The specific model version used for inference.
* "text_output" : The output of the inference.

#### Example Response

```
200
{
  "id" : "42"
  "model_name" : "mymodel",
  "model_version" : "1",
  "text_output" : "model output"
}
```

### Generate Response JSON Error Object

A failed generate request must be indicated by an HTTP error status
(typically 400). The HTTP body must contain the
`$generate_error_response` object.

    $generate_error_response =
    {
      "error": <error message string>
    }

* “error” : The descriptive message for the error.

#### Example Error

```
400
{
  "error" : "error message"
}
```


================================================
FILE: docs/protocol/extension_logging.md
================================================
<!--
# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Logging Extension

This document describes Triton's logging extension. The logging extension enables
the client to configure log settings during a Triton run. Triton reports "logging"
in the extensions field of its Server Metadata.

## HTTP/REST

In all JSON schemas shown in this document `$number`, `$string`, `$boolean`,
`$object` and `$array` refer to the fundamental JSON types. #optional
indicates an optional JSON field.

Triton exposes the logging endpoint at the following URL. The client may use
HTTP GET request to retrieve the current log settings. A HTTP POST request
will modify the log settings, and the endpoint will return the updated log
settings on success or an error in the case of failure.

```
GET v2/logging

POST v2/logging
```

### Log Setting Response JSON Object

A successful log setting request is indicated by a 200 HTTP status
code. The response object, identified as `$log_setting_response`, is
returned in the HTTP body for every successful log setting request.

```
$log_setting_response =
{
  $log_setting, ...
}

$log_setting = $string : $string | $boolean | $number
```

Each `$log_setting` JSON describes a “name”/”value” pair, where the “name” is
the `$string` representation of the log setting and the “value” is a `$string`,
`$bool`, or `$number` representation of the setting value. Currently, the
following log settings are defined:

- "log_file" : a `$string` log file location where the log outputs will be saved. If empty, log outputs are streamed to the console.

- "log_info" : a `$boolean` parameter that controls whether the Triton server logs INFO level messages.

- "log_warning" : a `$boolean` parameter that controls whether the Triton server logs WARNING level messages.

- "log_error" : a `$boolean` parameter that controls whether the Triton server logs ERROR level messages.

- "log_verbose_level" : a `$number` parameter that controls whether the Triton server outputs verbose messages
of varying degrees. This value can be any integer >= 0. If "log_verbose_level" is 0, verbose logging will be disabled, and
no verbose messages will be output by the Triton server. If "log_verbose_level" is 1, level 1 verbose messages will be output
by the Triton server. If "log_verbose_level" is 2, the Triton server will output all verbose messages of
level <= 2, etc. Attempting to set "log_verbose_level" to a number < 0 will result in an error.

- "log_format" : a `$string` parameter that controls the format of Triton server log messages. There are currently
2 formats: "default" and "ISO8601".


### Log Setting Response JSON Error Object

A failed log setting request will be indicated by an HTTP error status
(typically 400). The HTTP body will contain a `$log_setting_error_response` object.

```
$log_setting_error_response =
{
  "error": $string
}
```

- “error” : The descriptive message for the error.

### Log Setting Request JSON Object

A log setting request is made with a HTTP POST to
the logging endpoint. In the corresponding response, the HTTP body contains the
response JSON. A successful request is indicated by a 200 HTTP status code.

The request object, identified as `$log_setting_request` must be provided in the HTTP
body.

```
$log_setting_request =
{
  $log_setting, ...
}
```

When a `$log_setting` JSON is received (defined above), only the
specified settings will be updated. Currently, the following log
settings (described above) can be updated:
- "log_info"
- "log_warning"
- "log_error"
- "log_verbose_level"
- "log_format"

### Example Usage
The logging protocol extension can be invoked using the curl library in the following manner (assuming
a Triton server is running at `localhost:8000`):
```
curl -s -w '\n%{http_code}\n' -d '{"log_verbose_level":1}' -X POST localhost:8000/v2/logging
```
This command should return a `$log_setting_response` JSON object with the following format:
```
{"log_file":"","log_info":true,"log_warnings":true,"log_errors":true,"log_verbose_level":1,"log_format":"default"}
200
```
Note that the current values for all parameter fields are returned even though `log_verbose_level`
was the only parameter that was modified.

## GRPC

For the logging extension, Triton implements the following API:

```
service GRPCInferenceService
{
  …

  // Update and get the log setting of the Triton server.
  rpc LogSettings(LogSettingsRequest)
          returns (LogSettingsResponse) {}
}
```

The Log Setting API returns the latest log settings. Errors are indicated
by the `google.rpc.Status` returned for the request. The OK code
indicates success and other codes indicate failure. The request and
response messages for Log Settings are:

```
message LogSettingsRequest
{
  message SettingValue
  {
    oneof parameter_choice
    {
      // bool param option
      bool bool_param = 1;

      // uint32 param option
      uint32 uint32_param = 2;

      // string param option
      string string_param = 3;
    }
  }
  // The new setting values to be updated.
  // Unspecified settings will remain unchanged.
  map<string, SettingValue> settings = 1;
}

message LogSettingsResponse
{
  message SettingValue
  {
    oneof parameter_choice
    {
      // bool param option
      bool bool_param = 1;

      // uint32 param option
      uint32 uint32_param = 2;

      // string param option
      string string_param = 3;
    }
  }
  // The latest log settings values.
  map<string, SettingValue> settings = 1;
}
```

## Logging Formats

The logging extension offers two logging formats. The formats have a
common set of fields but differ in how the timestamp for a log entry
is represented. Messages are serialized according to JSON encoding
rules by default. This behavior can be disabled by setting the
environment variable TRITON_SERVER_ESCAPE_LOG_MESSAGES to "0" when
launching the server but can not be changed through the logging
extension.

Log entries can be single-line or multi-line. Multi-line entries have
a single optional heading followed by the structured representation of
an object such as a table or protobuf message. Multi-line entries end
when the next log entry begins.

1. TRITONSERVER_LOG_DEFAULT

### Single-line Entry
```
<level><month><day><hour>:<min>:<sec>.<usec> <pid> <file>:<line>] <message>
```
Example:
```
I0520 20:03:25.829575 3355 model_lifecycle.cc:441] "AsyncLoad() 'simple'"
```
### Multi-line Entry
```
<level><month><day><hour>:<min>:<sec>.<usec> <pid> <file>:<line>] <heading>
<object>
```
Example:

```
I0520 20:03:25.912303 3355 server.cc:676]
+--------+---------+--------+
| Model  | Version | Status |
+--------+---------+--------+
| simple | 1       | READY  |
+--------+---------+--------+
```


2. TRITONSERVER_LOG_ISO8601

### Single-line Entry
```
<year>-<month>-<day>T<hour>:<min>:<sec>Z <level> <pid> <file>:<line>] <message>
```

Example:
```
2024-05-20T20:03:26Z I 3415 model_lifecycle.cc:441] "AsyncLoad() 'simple'"
```

### Multi-line Entry
```
<year>-<month>-<day>T<hour>:<min>:<sec>Z <level> <pid> <file>:<line>] <heading>
<object>
```

Example:

```
2024-05-20T20:03:26Z I 3415 server.cc:676]
+--------+---------+--------+
| Model  | Version | Status |
+--------+---------+--------+
| simple | 1       | READY  |
+--------+---------+--------+
```


================================================
FILE: docs/protocol/extension_model_configuration.md
================================================
<!--
# Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Model Configuration Extension

This document describes Triton's model configuration extension.  The
model configuration extension allows Triton to return server-specific
information.  Because this extension is supported, Triton reports
“model_configuration” in the extensions field of its Server Metadata.

## HTTP/REST

In all JSON schemas shown in this document `$number`, `$string`, `$boolean`,
`$object` and `$array` refer to the fundamental JSON types. #optional
indicates an optional JSON field.

Triton exposes the model configuration endpoint at the following
URL. The versions portion of the URL is optional; if not provided
Triton will return model configuration for the highest-numbered
version of the model.

```
GET v2/models/${MODEL_NAME}[/versions/${MODEL_VERSION}]/config
```

A model configuration request is made with an HTTP GET to the model
configuration endpoint.A successful model configuration request is
indicated by a 200 HTTP status code. The model configuration response
object, identified as `$model_configuration_response`, is returned in
the HTTP body for every successful request.

```
$model_configuration_response =
{
  # configuration JSON
}
```

The contents of the response will be the JSON representation of the
model's configuration described by the [ModelConfig message from
model_config.proto](https://github.com/triton-inference-server/common/blob/main/protobuf/model_config.proto).

A failed model configuration request must be indicated by an HTTP
error status (typically 400). The HTTP body must contain the
`$model_configuration_error_response` object.

```
$model_configuration_error_response =
{
  "error": <error message string>
}
```

- “error” : The descriptive message for the error.

## GRPC

The GRPC definition of the service is:

```
service GRPCInferenceService
{
  …

  // Get model configuration.
  rpc ModelConfig(ModelConfigRequest) returns (ModelConfigResponse) {}
}
```

Errors are indicated by the google.rpc.Status returned for the
request. The OK code indicates success and other codes indicate
failure. The request and response messages for ModelConfig are:

```
message ModelConfigRequest
{
  // The name of the model.
  string name = 1;

  // The version of the model. If not given the version of the model
  // is selected automatically based on the version policy.
  string version = 2;
}

message ModelConfigResponse
{
  // The model configuration.
  ModelConfig config = 1;
}
```

Where the ModelConfig message is defined in
[model_config.proto](https://github.com/triton-inference-server/common/blob/main/protobuf/model_config.proto).


================================================
FILE: docs/protocol/extension_model_repository.md
================================================
<!--
# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Model Repository Extension

This document describes Triton's model repository extension.  The
model-repository extension allows a client to query and control the
one or more model repositories being served by Triton.  Because this
extension is supported, Triton reports “model_repository” in the
extensions field of the Server Metadata. This extension has an
optional component, described below, that allows the unload API to
specify the "unload_dependents" parameter. Versions of Triton that
support this optional component will also report
"model_repository(unload_dependents)" in the extensions field of the
Server Metadata.

## HTTP/REST

In all JSON schemas shown in this document `$number`, `$string`, `$boolean`,
`$object` and `$array` refer to the fundamental JSON types. `#optional`
indicates an optional JSON field.

The model-repository extension requires Index, Load and Unload
APIs. Triton exposes the endpoints at the following URLs.

```
POST v2/repository/index

POST v2/repository/models/${MODEL_NAME}/load

POST v2/repository/models/${MODEL_NAME}/unload
```

### Index

The index API returns information about every model available in a
model repository, even if it is not currently loaded into Triton. The
index API provides a way to determine which models can potentially be
loaded by the Load API. A model-repository index request is made with
an HTTP POST to the index endpoint. In the corresponding response the
HTTP body contains the JSON response.

The index request object, identified as `$repository_index_request`, is
required in the HTTP body of the POST request.

```
$repository_index_request =
{
  "ready" : $boolean #optional,
}
```

- "ready" : Optional, default is false. If true return only models ready for inferencing.

A successful index request is indicated by a 200 HTTP status code. The
response object, identified as `$repository_index_response`, is returned
in the HTTP body for every successful request.

```
$repository_index_response =
[
  {
    "name" : $string,
    "version" : $string #optional,
    "state" : $string,
    "reason" : $string
  },
  …
]
```

- “name” : The name of the model.
- “version” : The version of the model.
- “state” : The state of the model.
- “reason” : The reason, if any, that the model is in the current state.

A failed index request must be indicated by an HTTP error status
(typically 400). The HTTP body must contain the
`$repository_index_error_response` object.

```
$repository_index_error_response =
{
  "error": $string
}
```

- “error” : The descriptive message for the error.

### Load

The load API requests that a model be loaded into Triton, or reloaded
if the model is already loaded. A load request is made with an HTTP
POST to a load endpoint. The HTTP body may be empty or may contain
the load request object, identified as `$repository_load_request`.
A successful load request is indicated by a 200 HTTP status.


```
$repository_load_request =
{
  "parameters" : $parameters #optional
}
```

- "parameters" : An object containing zero or more parameters for this
  request expressed as key/value pairs. See
  [Parameters](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#parameters)
  for more information.

The load API accepts the following parameters:

- "config" : string parameter that contains a JSON representation of the model
configuration, which must be able to be parsed into [ModelConfig message from
model_config.proto](https://github.com/triton-inference-server/common/blob/main/protobuf/model_config.proto).
This config will be used for loading the model instead of the one in
the model directory. If config is provided, the (re-)load will be triggered as
the model metadata has been updated, and the same (re-)load behavior will be
applied.

- "file:\<version\>/\<file-name\>" : The serialized model file, base64 encoded.
This convention will be used to specify the override model directory to load
the model from. For instance, if the user wants to specify a model directory
that contains an ONNX model as version 2, then the user will specify the
parameter to "file:2/model.onnx" : "\<base64-encoded-file-content\>". Note that
"config" parameter must be provided to serve as the model configuration of the
override model directory.

A failed load request must be indicated by an HTTP error status
(typically 400). The HTTP body must contain the
`$repository_load_error_response` object.

```
$repository_load_error_response =
{
  "error": $string
}
```
- “error” : The descriptive message for the error.

#### Examples

For the following request, Triton will load the model "mymodel" with provided
model configuration and model file.

```
POST /v2/repository/models/mymodel/load HTTP/1.1
Host: localhost:8000
{
  "parameters": {
    "config": "{
      "name": "mymodel",
      "backend": "onnxruntime",
      "inputs": [{
          "name": "INPUT0",
          "datatype": "FP32",
          "shape": [ 1 ]
        }
      ],
      "outputs": [{
          "name": "OUTPUT0",
          "datatype": "FP32",
          "shape": [ 1 ]
        }
      ]
    }",

    "file:1/model.onnx" : "<base64-encoded-file-content>"
  }
}
```

### Unload

The unload API requests that a model be unloaded from Triton. An
unload request is made with an HTTP POST to an unload endpoint. The
HTTP body may be empty or may contain the unload request object,
identified as `$repository_unload_request`. A successful unload request
is indicated by a 200 HTTP status.

```
$repository_unload_request =
{
  "parameters" : $parameters #optional
}
```

- "parameters" : An object containing zero or more parameters for this
  request expressed as key/value pairs. See
  [Parameters](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#parameters)
  for more information.

The unload API accepts the following parameters:

- "unload_dependents" : boolean parameter indicating that in addition
  to unloading the requested model, also unload any dependent model
  that was loaded along with the requested model. For example, request to
  unload the models composing an ensemble will unload the ensemble as well.

A failed unload request must be indicated by an HTTP error status
(typically 400). The HTTP body must contain the
`$repository_unload_error_response` object.

```
$repository_unload_error_response =
{
  "error": $string
}
```

- “error” : The descriptive message for the error.

## GRPC

The model-repository extension requires the following API:

```
service GRPCInferenceService
{
  …

  // Get the index of model repository contents.
  rpc RepositoryIndex(RepositoryIndexRequest)
          returns (RepositoryIndexResponse) {}

  // Load or reload a model from a repository.
  rpc RepositoryModelLoad(RepositoryModeLoadRequest)
          returns (RepositoryModelLoadResponse) {}

  // Unload a model.
  rpc RepositoryModelUnload(RepositoryModelUnloadRequest)
          returns (RepositoryModelUnloadResponse) {}
}

message ModelRepositoryParameter
{
  // The parameter value can be a string, an int64, a boolean
  // or a message specific to a predefined parameter.
  oneof parameter_choice
  {
    // A boolean parameter value.
    bool bool_param = 1;

    // An int64 parameter value.
    int64 int64_param = 2;

    // A string parameter value.
    string string_param = 3;

    // A bytes parameter value.
    bytes bytes_param = 4;
  }
}
```

### Index

The RepositoryIndex API returns information about every model
available in a model repository, even if it is not currently loaded
into Triton. Errors are indicated by the google.rpc.Status returned
for the request. The OK code indicates success and other codes
indicate failure. The request and response messages for
RepositoryIndex are:

```
message RepositoryIndexRequest
{
  // The name of the repository. If empty the index is returned
  // for all repositories.
  string repository_name = 1;

  // If true return only models currently ready for inferencing.
  bool ready = 2;
}

message RepositoryIndexResponse
{
  // Index entry for a model.
  message ModelIndex {
    // The name of the model.
    string name = 1;

    // The version of the model.
    string version = 2;

    // The state of the model.
    string state = 3;

    // The reason, if any, that the model is in the given state.
    string reason = 4;
  }

  // An index entry for each model.
  repeated ModelIndex models = 1;
}
```

### Load

The RepositoryModelLoad API requests that a model be loaded into
Triton, or reloaded if the model is already loaded. Errors are
indicated by the google.rpc.Status returned for the request. The OK
code indicates success and other codes indicate failure. The request
and response messages for RepositoryModelLoad are:

```
message RepositoryModelLoadRequest
{
  // The name of the repository to load from. If empty the model
  // is loaded from any repository.
  string repository_name = 1;

  // The name of the model to load, or reload.
  string model_name = 2;

  // Optional parameters.
  map<string, ModelRepositoryParameter> parameters = 3;
}

message RepositoryModelLoadResponse
{
}
```

The RepositoryModelLoad API accepts the following parameters:

- "config" : string parameter that contains a JSON representation of the model
configuration, which must be able to be parsed into [ModelConfig message from
model_config.proto](https://github.com/triton-inference-server/common/blob/main/protobuf/model_config.proto).
This config will be used for loading the model instead of the one in
the model directory. If config is provided, the (re-)load will be triggered as
the model metadata has been updated, and the same (re-)load behavior will be
applied.

- "file:\<version\>/\<file-name\>" : bytes parameter that contains the model
file content. This convention will be used to specify the override model
directory to load the model from. For instance, if the user wants to specify a
model directory that contains an ONNX model as version 2, then the user will
specify the parameter to "file:2/model.onnx" : "\<file-content\>". Note that
"config" parameter must be provided to serve as the model configuration of the
override model directory.

### Unload

The RepositoryModelUnload API requests that a model be unloaded from
Triton. Errors are indicated by the google.rpc.Status returned for the
request. The OK code indicates success and other codes indicate
failure. The request and response messages for RepositoryModelUnload
are:

```
message RepositoryModelUnloadRequest
{
  // The name of the repository from which the model was originally
  // loaded. If empty the repository is not considered.
  string repository_name = 1;

  // The name of the model to unload.
  string model_name = 2;

  // Optional parameters.
  map<string, ModelRepositoryParameter> parameters = 3;
}

message RepositoryModelUnloadResponse
{
}
```

The RepositoryModelUnload API accepts the following parameters:

- "unload_dependents" : boolean parameter indicating that in addition
  to unloading the requested model, also unload any dependent model
  that was loaded along with the requested model. For example, request to
  unload the models composing an ensemble will unload the ensemble as well.

================================================
FILE: docs/protocol/extension_parameters.md
================================================
<!--
# Copyright 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Parameters Extension

This document describes Triton's parameters extension. The
parameters extension allows an inference request to provide
custom parameters that cannot be provided as inputs. Because this extension is
supported, Triton reports “parameters” in the extensions field of its
Server Metadata. This extension uses the optional "parameters"
field in the KServe Protocol in
[HTTP](https://kserve.github.io/website/docs/concepts/architecture/data-plane/v2-protocol#inference-request-json-object)
and
[GRPC](https://kserve.github.io/website/docs/concepts/architecture/data-plane/v2-protocol#parameters).

The following parameters are reserved for Triton's usage and should not be
used as custom parameters:

- sequence_id
- priority
- timeout
- sequence_start
- sequence_end
- headers
- All the keys that start with `"triton_"` prefix. Some examples used today:
  - `"triton_enable_empty_final_response"` request parameter
  - `"triton_final_response"` response parameter

When using both GRPC and HTTP endpoints, you need to make sure to not use
the reserved parameters list to avoid unexpected behavior. The reserved
parameters are not accessible in the Triton C-API.

## HTTP/REST

The following example shows how a request can include custom parameters.

```
POST /v2/models/mymodel/infer HTTP/1.1
Host: localhost:8000
Content-Type: application/json
Content-Length: <xx>
{
  "parameters" : { "my_custom_parameter" : 42 }
  "inputs" : [
    {
      "name" : "input0",
      "shape" : [ 2, 2 ],
      "datatype" : "UINT32",
      "data" : [ 1, 2, 3, 4 ]
    }
  ],
  "outputs" : [
    {
      "name" : "output0",
    }
  ]
}
```

## GRPC

The `parameters` field in the
ModelInferRequest message can be used to send custom parameters.

## Forwarding HTTP/GRPC Headers as Parameters

Triton can forward HTTP/GRPC headers as inference request parameters. By
specifying a regular expression in `--http-header-forward-pattern` and
`--grpc-header-forward-pattern`,
Triton will add the headers that match with the regular expression as request
parameters. All the forwarded headers will be added as a parameter with string
value. For example to forward all the headers that start with 'PREFIX_' from
both HTTP and GRPC, you should add `--http-header-forward-pattern PREFIX_.*
--grpc-header-forward-pattern PREFIX_.*` to your `tritonserver` command.

By default, the regular expression pattern matches headers with case-insensitive
mode according to the HTTP protocol. If you want to enforce case-sensitive mode,
simplying adding the `(?-i)` prefix which turns off case-insensitive mode, e.g.
`--http-header-forward-pattern (?-i)PREFIX_.*`. Note, headers sent through the
Python HTTP client may be automatically lower-cased by internal client libraries.

The forwarded headers can be accessed using the
[Python](https://github.com/triton-inference-server/python_backend#inference-request-parameters)
or C Backend APIs as inference request parameters.


================================================
FILE: docs/protocol/extension_schedule_policy.md
================================================
<!--
# Copyright 2020-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Schedule Policy Extension

This document describes Triton's schedule policy extension. The
schedule-policy extension allows an inference request to provide
parameters that influence how Triton handles and schedules the
request. Because this extension is supported, Triton reports
“schedule_policy” in the extensions field of its Server Metadata.
Note the policies are specific to [dynamic
batcher](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/batcher.md#dynamic-batcher)
and only experimental support to [sequence
batcher](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/batcher.md#sequence-batcher)
with the [direct](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/architecture.md#direct)
scheduling strategy.

## Dynamic Batcher

The schedule-policy extension uses request parameters to indicate the
policy. The parameters and their type are:

- "priority" : int64 value indicating the priority of the
  request. Priority value zero indicates that the default priority
  level should be used (i.e. same behavior as not specifying the
  priority parameter). Lower value priorities indicate higher priority
  levels. Thus the highest priority level is indicated by setting the
  parameter to 1, the next highest is 2, etc.

- "timeout" : int64 value indicating the timeout value for the
  request, in microseconds. If the request cannot be completed within
  the time Triton will take a model-specific action such as
  terminating the request.

Both parameters are optional and, if not specified, Triton will handle
the request using the default priority and timeout values appropriate
for the model.

## Sequence Batcher with Direct Scheduling Strategy

**Note that the schedule policy for sequence batcher is at experimental stage
and it is subject to change.**

The schedule-policy extension uses request parameters to indicate the
policy. The parameters and their type are:

- "timeout" : int64 value indicating the timeout value for the
  request, in microseconds. If the request cannot be completed within
  the time Triton will terminate the request, as well as the corresponding
  sequence and received requests of the sequence. The timeout will only be
  applied to requests of the sequences that haven't been allocated a batch slot
  for execution, the requests of the sequences that have been allocated batch
  slots will not be affected by the timeout setting.

The parameter is optional and, if not specified, Triton will handle
the request and corresponding sequence based on the model configuration.

================================================
FILE: docs/protocol/extension_sequence.md
================================================
<!--
# Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Sequence Extension

This document describes Triton's sequence extension. The sequence
extension allows Triton to support stateful models that expect a
sequence of related inference requests.

An inference request can specify that it is part of a sequence using
the “sequence_id” parameter in the request and by using the
“sequence_start” and “sequence_end” parameters to indicate the start
and end of sequences.

Because this extension is supported, Triton reports "sequence"
in the extensions field of its Server Metadata. Triton may additionally
report "sequence(string_id)" in the extensions field of the Server Metadata
if the "sequence_id" parameter supports string types.

- "sequence_id" : a string or uint64 value that identifies the sequence to which
  a request belongs. All inference requests that belong to the same sequence
  must use the same sequence ID. A sequence ID of 0 or "" indicates the
  inference request is not part of a sequence.

- "sequence_start" : boolean value if set to true in a request
  indicates that the request is the first in a sequence. If not set,
  or set to false the request is not the first in a sequence. If set
  the "sequence_id" parameter must be set to a non-zero or non-empty string
  value.

- "sequence_end" : boolean value if set to true in a request indicates
  that the request is the last in a sequence. If not set, or set to
  false the request is not the last in a sequence. If set the
  "sequence_id" parameter must be set to a non-zero or non-empty string
  value.

## HTTP/REST

The following example shows how a request is marked as part of a
sequence. In this case the sequence_start and sequence_end parameters
are not used which means that this request is neither the start nor
end of the sequence.

```
POST /v2/models/mymodel/infer HTTP/1.1
Host: localhost:8000
Content-Type: application/json
Content-Length: <xx>
{
  "parameters" : { "sequence_id" : 42 }
  "inputs" : [
    {
      "name" : "input0",
      "shape" : [ 2, 2 ],
      "datatype" : "UINT32",
      "data" : [ 1, 2, 3, 4 ]
    }
  ],
  "outputs" : [
    {
      "name" : "output0",
    }
  ]
}
```

The example below uses a v4 UUID string as the value for the "sequence_id"
parameter.

```
POST /v2/models/mymodel/infer HTTP/1.1
Host: localhost:8000
Content-Type: application/json
Content-Length: <xx>
{
  "parameters" : { "sequence_id" : "e333c95a-07fc-42d2-ab16-033b1a566ed5" }
  "inputs" : [
    {
      "name" : "input0",
      "shape" : [ 2, 2 ],
      "datatype" : "UINT32",
      "data" : [ 1, 2, 3, 4 ]
    }
  ],
  "outputs" : [
    {
      "name" : "output0",
    }
  ]
}
```

## GRPC

In addition to supporting the sequence parameters described above, the
GRPC API adds a streaming version of the inference API to allow a
sequence of inference requests to be sent over the same GRPC
stream. This streaming API is not required to be used for requests
that specify a sequence_id and may be used by requests that do not
specify a sequence_id. The ModelInferRequest is the same as for the
ModelInfer API.  The ModelStreamInferResponse message is shown below.

```
service GRPCInferenceService
{
  …

  // Perform inference using a specific model with GRPC streaming.
  rpc ModelStreamInfer(stream ModelInferRequest) returns (stream ModelStreamInferResponse) {}
}

// Response message for ModelStreamInfer.
message ModelStreamInferResponse
{
  // The message describing the error. The empty message
  // indicates the inference was successful without errors.
  String error_message = 1;

  // Holds the results of the request.
  ModelInferResponse infer_response = 2;
}
```


================================================
FILE: docs/protocol/extension_shared_memory.md
================================================
<!--
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Shared-Memory Extension

This document describes Triton's shared-memory extensions.  The
shared-memory extensions allow a client to communicate input and
output tensors by system or CUDA shared memory. Using shared memory
instead of sending the tensor data over the GRPC or REST interface can
provide significant performance improvement for some use cases.
Because both of these extensions are supported, Triton reports
“system_shared_memory” and "cuda_shared_memory" in the extensions
field of its Server Metadata.

The shared-memory extensions use a common set of parameters to
indicate that an input or output tensor is communicated via shared
memory. These parameters and their type are:

- "shared_memory_region" : string value is the name of a previously
  registered shared memory region. Region names share a namespace for
  system-shared-memory regions and CUDA-shared-memory regions.

- "shared_memory_offset" : int64 value is the offset, in bytes, into
  the region where the data for the tensor starts.

- "shared_memory_byte_size" : int64 value is the size, in bytes, of
  the data.

The “shared_memory_offset” parameter is optional and defaults to
zero. The other two parameters are required. If only one of the two is
given Triton will return an error.

Note that there is no Windows support for shared memory yet. Jetson only
supports system shared memory.

## HTTP/REST

In all JSON schemas shown in this document `$number`, `$string`, `$boolean`,
`$object` and `$array` refer to the fundamental JSON types. #optional
indicates an optional JSON field.

The shared-memory parameters may be used in the `$request_input`
parameters to indicate that the corresponding input is being
communicated via shared memory. The parameters may be used in the
`$request_output` parameters to indicate that the requested output
should be communicated via shared memory.

When these parameters are set for an input tensor the “data” field of
`$request_input` must not be set. If the “data” field is set Triton will
return an error. When these parameters are set for a requested output
tensor the returned `$response_output` must not define the “data” field.

Shared memory regions must be created by the client and then
registered with Triton before they can be referenced with a
“shared_memory_region” parameter. The system and CUDA shared-memory
extensions each require a different set of APIs for registering a
shared memory region.

### System Shared Memory

The system shared memory extension requires Status, Register and
Unregister APIs.

Triton exposes the following URL to register and unregister system
shared memory regions.

```
GET v2/systemsharedmemory[/region/${REGION_NAME}]/status

POST v2/systemsharedmemory/region/${REGION_NAME}/register

POST v2/systemsharedmemory[/region/${REGION_NAME}]/unregister
```

#### Status

A system-shared-memory status request is made with an HTTP GET to the
status endpoint. In the corresponding response the HTTP body contains
the response JSON. If REGION_NAME is provided in the URL the response
includes the status for the corresponding region. If REGION_NAME is
not provided in the URL the response includes the status for all
registered regions.

A successful status request is indicated by a 200 HTTP status
code. The response object, identified as
`$system_shared_memory_status_response`, is returned in the HTTP body
for every successful request.

```
$system_shared_memory_status_response =
[
  {
    "name" : $string,
    "key" : $string,
    "offset" : $number,
    "byte_size" : $number
  },
  …
]
```

- “name” : The name of the shared-memory region.

- “key” : The key of the underlying memory object that contains the
  shared memory region.

- “offset” : The offset, in bytes, within the underlying memory object
  to the start of the shared memory region.

- “byte_size” : The size of the shared memory region, in bytes.

A failed status request must be indicated by an HTTP error status
(typically 400). The HTTP body must contain the
`$system_shared_memory_status_error_response` object.

```
$system_shared_memory_status_error_response =
{
  "error": $string
}
```

- “error” : The descriptive message for the error.

#### Register

A system-shared-memory register request is made with a HTTP POST to
the register endpoint. In the corresponding response the HTTP body
contains the response JSON. A successful register request is indicated
by a 200 HTTP status code.

The request object, identified as
`$system_shared_memory_register_request` must be provided in the HTTP
body.

```
$system_shared_memory_register_request =
{
  "key" : $string,
  "offset" : $number,
  "byte_size" : $number
}
```

- “key” : The key of the underlying memory object that contains the
  shared memory region.

- “offset” : The offset, in bytes, within the underlying memory object
  to the start of the shared memory region.

- “byte_size” : The size of the shared memory region, in bytes.

A failed register request must be indicated by an HTTP error status
(typically 400). The HTTP body must contain the
`$system_shared_memory_register_error_response` object.

```
$system_shared_memory_register_error_response =
{
  "error": $string
}
```
- “error” : The descriptive message for the error.

#### Unregister

A system-shared-memory unregister request is made with an HTTP POST to
an unregister endpoint. In the request the HTTP body must be empty.

A successful register request is indicated by a 200 HTTP status.  If
REGION_NAME is provided in the URL the single region is
unregistered. If REGION_NAME is not provided in the URL all regions
are unregisered.

A failed unregister request must be indicated by an HTTP error status
(typically 400). The HTTP body must contain the
`$system_shared_memory_unregister_error_response` object.

```
$system_shared_memory_unregister_error_response =
{
  "error": $string
}
```

- “error” : The descriptive message for the error.

### CUDA Shared Memory

The CUDA shared memory extension requires Status, Register and
Unregister APIs.

Triton exposes the following URL to register and unregister system
shared memory regions.

```
GET v2/cudasharedmemory[/region/${REGION_NAME}]/status

POST v2/cudasharedmemory/region/${REGION_NAME}/register

POST v2/cudasharedmemory[/region/${REGION_NAME}]/unregister
```

#### Status

A CUDA-shared-memory status request is made with an HTTP GET to the
status endpoint. In the corresponding response the HTTP body contains
the response JSON. If REGION_NAME is provided in the URL the response
includes the status for the corresponding region. If REGION_NAME is
not provided in the URL the response includes the status for all
registered regions.

A successful status request is indicated by a 200 HTTP status
code. The response object, identified as
`$cuda_shared_memory_status_response`, is returned in the HTTP body
for every successful request.

```
$cuda_shared_memory_status_response =
[
  {
    "name" : $string,
    "device_id" : $number,
    "byte_size" : $number
  },
  …
]
```

- “name” : The name of the shared memory region.

- “device_id” : The GPU device ID where the cudaIPC handle was
  created.

- “byte_size” : The size of the shared memory region, in bytes.

A failed status request must be indicated by an HTTP error status
(typically 400). The HTTP body must contain the
`$cuda_shared_memory_status_error_response` object.

```
$cuda_shared_memory_status_error_response =
{
  "error": $string
}
```

- “error” : The descriptive message for the error.

#### Register

A CUDA-shared-memory register request is made with a HTTP POST to
the register endpoint. In the corresponding response the HTTP body
contains the response JSON. A successful register request is indicated
by a 200 HTTP status code.

The request object, identified as
`$cuda_shared_memory_register_request` must be provided in the HTTP
body.

```
$cuda_shared_memory_register_request =
{
  "raw_handle" : { "b64" : $string },
  "device_id" : $number,
  "byte_size" : $number
}
```

- “raw_handle” : The serialized cudaIPC handle, base64 encoded.

- “device_id” : The GPU device ID where the cudaIPC handle was
  created.

- “byte_size” : The size of the shared memory region, in bytes.

A failed register request must be indicated by an HTTP error status
(typically 400). The HTTP body must contain the
`$cuda_shared_memory_register_error_response` object.

```
$cuda_shared_memory_register_error_response =
{
  "error": $string
}
```

- “error” : The descriptive message for the error.

#### Unregister

A CUDA-shared-memory unregister request is made with an HTTP POST to
an unregister endpoint. In the request the HTTP body must be empty.

A successful register request is indicated by a 200 HTTP status.  If
REGION_NAME is provided in the URL the single region is
unregistered. If REGION_NAME is not provided in the URL all regions
are unregisered.

A failed unregister request must be indicated by an HTTP error status
(typically 400). The HTTP body must contain the
`$cuda_shared_memory_unregister_error_response` object.

```
$cuda_shared_memory_unregister_error_response =
{
  "error": $string
}
```

- “error” : The descriptive message for the error.

## GRPC

The shared-memory parameters may be used in the
ModelInferRequest::InferInputTensor message to indicate that the
corresponding input is being communicated via shared memory. The
parameters may be used in the
ModelInferRequest::InferRequestedOutputTensor message to indicate that
the requested output should be communicated via shared memory.

When these parameters are set for an input tensor the “contents” field
of ModelInferRequest::InferInputTensor must not be set. If the
“contents” field is set Triton will return an error.. When these
parameters are set for a requested output tensor the “contents” field
of the ModelInferResponse::InferOutputTensor will not be set in the
inference response.

Shared memory regions must be created by the client and then
registered with Triton before they can be referenced with a
“shared_memory_region” parameter. The system and CUDA shared-memory
extensions each require a different set of APIs. For all APIs, errors
are indicated by the google.rpc.Status returned for the request. The
OK code indicates success and other codes indicate failure.

### System Shared Memory

The system shared memory extension requires the following API:

```
service GRPCInferenceService
{
  …

  // Get the status of all registered system-shared-memory regions.
  rpc SystemSharedMemoryStatus(SystemSharedMemoryStatusRequest)
          returns (SystemSharedMemoryStatusResponse) {}

  // Register system-shared-memory region.
  rpc SystemSharedMemoryRegister(SystemSharedMemoryRegisterRequest)
          returns (SystemSharedMemoryRegisterResponse) {}

  // Unregister system-shared-memory region.
  rpc SystemSharedMemoryUnregister(SystemSharedMemoryUnregisterRequest)
          returns (SystemSharedMemoryUnregisterResponse) {}
}
```

#### Status

The system-shared-memory status API provides information about
registered system shared-memory regions. Errors are indicated by the
google.rpc.Status returned for the request. The OK code indicates
success and other codes indicate failure. The request and response
messages for SystemSharedMemoryStatus are:

```
message SystemSharedMemoryStatusRequest
{
  // The name of the region to get status for. If empty the
  // status is returned for all registered regions.
  string name = 1;
}

message SystemSharedMemoryStatusResponse
{
  // Status for a shared memory region.
  message RegionStatus {
    // The name for the shared memory region.
    string name = 1;

    // The key of the underlying memory object that contains the
    // shared memory region.
    string key = 2;

    // Offset, in bytes, within the underlying memory object to
    // the start of the shared memory region.
    uint64 offset = 3;

    // Size of the shared memory region, in bytes.
    uint64 byte_size = 4;
  }

  // Status for each of the registered regions, indexed by region name.
  map<string, RegionStatus> regions = 1;
}
```

#### Register

The system-shared-memory register API is used to register a new
shared-memory region with Triton. After a region is registered it can
be used in the “shared_memory_region” parameter for an input or output
tensor. Errors are indicated by the google.rpc.Status returned for the
request. The OK code indicates success and other codes indicate
failure. The request and response messages for
SystemSharedMemoryRegister are:

```
message SystemSharedMemoryRegisterRequest
{
  // The name of the region to register.
  string name = 1;

  // The key of the underlying memory object that contains the
  // shared memory region.
  string key = 2;

  // Offset, in bytes, within the underlying memory object to
  // the start of the shared memory region.
  uint64 offset = 3;

  // Size of the shared memory region, in bytes.
  uint64 byte_size = 4;
}

message SystemSharedMemoryRegisterResponse
{
}
```

#### Unregister

The system-shared-memory unregister API provides unregisters a
shared-memory region from Triton. After a region is
unregistered it can no longer be used to communicate input and output
tensor contents. Errors are indicated by the google.rpc.Status
returned for the request. The OK code indicates success and other
codes indicate failure. The request and response messages for
SystemSharedMemoryStatus are:

```
message SystemSharedMemoryUnregisterRequest
{
  // The name of the region to unregister. If empty all system shared-memory
  // regions are unregistered.
  string name = 1;
}

message SystemSharedMemoryUnregisterResponse
{
}
```

### CUDA Shared Memory

The CUDA shared memory extension requires the following API:

```
service GRPCInferenceService
{
  …

  // Get the status of all registered CUDA-shared-memory regions.
  rpc CudaSharedMemoryStatus(CudaSharedMemoryStatusRequest)
          returns (CudaSharedMemoryStatusResponse) {}

  // Register CUDA-shared-memory region.
  rpc CudaSharedMemoryRegister(CudaSharedMemoryRegisterRequest)
          returns (CudaSharedMemoryRegisterResponse) {}

  // Unregister CUDA-shared-memory region.
  rpc CudaSharedMemorUnregister(CudaSharedMemoryUnregisterRequest)
          returns (CudaSharedMemoryUnregisterResponse) {}
}
```

#### Status

The CUDA-shared-memory status API provides information about
registered CUDA shared-memory regions. Errors are indicated by the
google.rpc.Status returned for the request. The OK code indicates
success and other codes indicate failure. The request and response
messages for CudaSharedMemoryStatus are:

```
message CudaSharedMemoryStatusRequest
{
  // The name of the region to get status for. If empty the
  // status is returned for all registered regions.
  string name = 1;
}

message CudaSharedMemoryStatusResponse
{
  // Status for a shared memory region.
  message RegionStatus {
    // The name for the shared memory region.
    string name = 1;

    // The GPU device ID where the cudaIPC handle was created.
    uint64 device_id = 2;

    // Size of the shared memory region, in bytes.
    uint64 byte_size = 3;
  }

  // Status for each of the registered regions, indexed by region name.
  map<string, RegionStatus> regions = 1;
}
```

#### Register

The CUDA-shared-memory register API is used to register a new
shared-memory region with Triton. After a region is
registered it can be used in the “shared_memory_region” parameter for
an input or output tensor. Errors are indicated by the
google.rpc.Status returned for the request. The OK code indicates
success and other codes indicate failure. The request and response
messages for CudaSharedMemoryRegister are:

```
message CudaSharedMemoryRegisterRequest
{
  // The name of the region to register.
  string name = 1;

  // The raw serialized cudaIPC handle.
  bytes raw_handle = 2;

  // The GPU device ID on which the cudaIPC handle was created.
  int64 device_id = 3;

  // Size of the shared memory region, in bytes.
  uint64 byte_size = 4;
}

message CudaSharedMemoryRegisterResponse
{
}
```

#### Unregister

The CUDA-shared-memory unregister API provides unregisters a
shared-memory region from Triton. After a region is unregistered it
can no longer be used to communicate input and output tensor
contents. Errors are indicated by the google.rpc.Status returned for
the request. The OK code indicates success and other codes indicate
failure. The request and response messages for CudaSharedMemoryStatus
are:

```
message CudaSharedMemoryUnregisterRequest
{
  // The name of the region to unregister. If empty all CUDA shared-memory
  // regions are unregistered.
  string name = 1;
}

message CudaSharedMemoryUnregisterResponse
{
}
```


================================================
FILE: docs/protocol/extension_statistics.md
================================================
<!--
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Statistics Extension

This document describes Triton's statistics extension. The statistics
extension enables the reporting of per-model (per-version) statistics
which provide aggregate information about all activity occurring for a
specific model (version) since Triton started. Because this extension
is supported, Triton reports “statistics” in the extensions field of
its Server Metadata.

## HTTP/REST

In all JSON schemas shown in this document `$number`, `$string`, `$boolean`,
`$object` and `$array` refer to the fundamental JSON types. #optional
indicates an optional JSON field.

Triton exposes the statistics endpoint at the following URL. The
specific model name portion of the URL is optional; if not provided
Triton will return the statistics for all versions of all models. If a
specific model is given in the URL the versions portion of the URL is
optional; if not provided Triton will return statistics for all
versions of the specified model.

```
GET v2/models[/${MODEL_NAME}[/versions/${MODEL_VERSION}]]/stats
```

### Statistics Response JSON Object

A successful statistics request is indicated by a 200 HTTP status
code. The response object, identified as `$stats_model_response`, is
returned in the HTTP body for every successful statistics request.

```
$stats_model_response =
{
  "model_stats" : [ $model_stat, ... ]
}
```

Each `$model_stat` object gives the statistics for a specific model and
version. The `$version` field is optional for servers that do not
support versions.

```
$model_stat =
{
  "name" : $string,
  "version" : $string #optional,
  "last_inference" : $number,
  "inference_count" : $number,
  "execution_count" : $number,
  "inference_stats" : $inference_stats,
  "response_stats" : { $string : $response_stats, ... },
  "batch_stats" : [ $batch_stats, ... ],
  "memory_usage" : [ $memory_usage, ...]
}
```

- "name" : The name of the model.

- "version" : The version of the model.

- "last_inference" : The timestamp of the last inference request made
  for this model, as milliseconds since the epoch.

- "inference_count" : The cumulative count of successful inference
  requests made for this model. Each inference in a batched request is
  counted as an individual inference. For example, if a client sends a
  single inference request with batch size 64, "inference_count" will
  be incremented by 64. Similarly, if a clients sends 64 individual
  requests each with batch size 1, "inference_count" will be
  incremented by 64. The "inference_count" value DOES NOT include cache hits.

- "execution_count" : The cumulative count of the number of successful
  inference executions performed for the model. When dynamic batching
  is enabled, a single model execution can perform inferencing for
  more than one inference request. For example, if a clients sends 64
  individual requests each with batch size 1 and the dynamic batcher
  batches them into a single large batch for model execution then
  "execution_count" will be incremented by 1. If, on the other hand,
  the dynamic batcher is not enabled for that each of the 64
  individual requests is executed independently, then
  "execution_count" will be incremented by 64. The "execution_count" value
  DOES NOT include cache hits.

- "inference_stats" : The aggregate statistics for the
  model. So, for example, "inference_stats":"success" indicates the number of
  successful inference requests for the model.

- "response_stats" : The aggregate response statistics for the model. For
  example, { "key" : { "response_stats" : "success" } } indicates the aggregate
  statistics of successful responses at "key" for the model, where "key"
  identifies each response generated by the model across different requests. For
  example, given a model that generates three responses, the keys can be "0",
  "1" and "2" identifying the three responses in order.

- "batch_stats" : The aggregate statistics for each different batch
  size that is executed in the model. The batch statistics indicate
  how many actual model executions were performed and show differences
  due to different batch size (for example, larger batches typically
  take longer to compute).

- "memory_usage" : The memory usage detected during model loading, which may be
  used to estimate the memory to be released once the model is unloaded. Note
  that the estimation is inferenced by the profiling tools and framework's
  memory schema, therefore it is advised to perform experiments to understand
  the scenario that the reported memory usage can be relied on. As a starting
  point, the GPU memory usage for models in ONNX Runtime backend and TensorRT
  backend is usually aligned.

```
$inference_stats =
{
  "success" : $duration_stat,
  "fail" : $duration_stat,
  "queue" : $duration_stat,
  "compute_input" : $duration_stat,
  "compute_infer" : $duration_stat,
  "compute_output" : $duration_stat,
  "cache_hit": $duration_stat,
  "cache_miss": $duration_stat
}
```

- “success” : The count and cumulative duration for all successful
  inference requests. The "success" count and cumulative duration includes
  cache hits.

- “fail” : The count and cumulative duration for all failed inference
  requests.

- “queue” : The count and cumulative duration that inference requests
  wait in scheduling or other queues. The "queue" count and cumulative
  duration includes cache hits.

- “compute_input” : The count and cumulative duration to prepare input
  tensor data as required by the model framework / backend. For
  example, this duration should include the time to copy input tensor
  data to the GPU. The "compute_input" count and cumulative duration DO NOT
  include cache hits.

- “compute_infer” : The count and cumulative duration to execute the
  model. The "compute_infer" count and cumulative duration DO NOT include
  cache hits.

- “compute_output” : The count and cumulative duration to extract
  output tensor data produced by the model framework / backend. For
  example, this duration should include the time to copy output tensor
  data from the GPU. The "compute_output" count and cumulative duration DO NOT
  include cache hits.

- "cache_hit" : The count of response cache hits and cumulative duration to
  lookup and extract output tensor data from the Response Cache on a cache hit.
  For example, this duration should include the time to copy output tensor data
  from the Response Cache to the response object.

- "cache_miss" : The count of response cache misses and cumulative duration to
  lookup and insert output tensor data to the Response Cache on a cache miss.
  For example, this duration should include the time to copy output tensor data
  from the response object to the Response Cache.


```
$response_stats =
{
  "compute_infer" : $duration_stat,
  "compute_output" : $duration_stat,
  "success" : $duration_stat,
  "fail" : $duration_stat,
  "empty_response" : $duration_stat,
  "cancel" : $duration_stat
}
```

- "compute_infer" : The count and cumulative duration to compute a response.
- "compute_output" : The count and cumulative duration to extract the output
  tensor of a computed response.
- "success" : The count and cumulative duration of a success inference. The
  duration is the sum of infer and output durations.
- "fail" : The count and cumulative duration of a fail inference. The duration
  is the sum of infer and output durations.
- "empty_response" : The count and cumulative duration of an inference with an
  empty / no response. The duration is infer durations.
- "cancel" : The count and cumulative duration of a inference cancellation. The
  duration is for cleaning up resources held by cancelled inference requests.


```
$batch_stats =
{
  "batch_size" : $number,
  "compute_input" : $duration_stat,
  "compute_infer" : $duration_stat,
  "compute_output" : $duration_stat
}
```

- "batch_size" : The size of the batch.

- "count" : The number of times the batch size was executed on the
  model. A single model execution performs inferencing for the entire
  request batch and can perform inferencing for multiple requests if
  dynamic batching is enabled.

- “compute_input” : The count and cumulative duration to prepare input
  tensor data as required by the model framework / backend with the
  given batch size. For example, this duration should include the time
  to copy input tensor data to the GPU.

- “compute_infer” : The count and cumulative duration to execute the
  model with the given batch size.

- “compute_output” : The count and cumulative duration to extract
  output tensor data produced by the model framework / backend with
  the given batch size. For example, this duration should include the
  time to copy output tensor data from the GPU.

The `$duration_stat` object reports a count and a total time. This
format can be sampled to determine not only long-running averages but
also incremental averages between sample points.

```
$duration_stat =
{
  "count" : $number,
  "ns" : $number
}
```

- "count" : The number of times the statistic was collected.

- “ns” : The total duration for the statistic in nanoseconds.

```
$memory_usage =
{
  "type" : $string,
  "id" : $number,
  "byte_size" : $number
}
```

- "type" : The type of memory, the value can be "CPU", "CPU_PINNED", "GPU".

- "id" : The id of the memory, typically used with "type" to identify
  a device that hosts the memory.

- "byte_size" : The byte size of the memory.

### Statistics Response JSON Error Object

A failed statistics request will be indicated by an HTTP error status
(typically 400). The HTTP body must contain the
`$repository_statistics_error_response` object.

```
$repository_statistics_error_response =
{
  "error": $string
}
```

- “error” : The descriptive message for the error.

## GRPC

For the statistics extension Triton implements the following API:

```
service GRPCInferenceService
{
  …

  // Get the cumulative statistics for a model and version.
  rpc ModelStatistics(ModelStatisticsRequest)
          returns (ModelStatisticsResponse) {}
}
```

The ModelStatistics API returns model statistics. Errors are indicated
by the google.rpc.Status returned for the request. The OK code
indicates success and other codes indicate failure. The request and
response messages for ModelStatistics are:

```
message ModelStatisticsRequest
{
  // The name of the model. If not given returns statistics for all
  // models.
  string name = 1;

  // The version of the model. If not given returns statistics for
  // all model versions.
  string version = 2;
}

message ModelStatisticsResponse
{
  // Statistics for each requested model.
  repeated ModelStatistics model_stats = 1;
}
```

The statistics messages are:

```
// Statistic recording a cumulative duration metric.
message StatisticDuration
{
  // Cumulative number of times this metric occurred.
  uint64 count = 1;

  // Total collected duration of this metric in nanoseconds.
  uint64 ns = 2;
}

// Statistics for a specific model and version.
message ModelStatistics
{
  // The name of the model.
  string name = 1;

  // The version of the model.
  string version = 2;

  // The timestamp of the last inference request made for this model,
  // as milliseconds since the epoch.
  uint64 last_inference = 3;

  // The cumulative count of successful inference requests made for this
  // model. Each inference in a batched request is counted as an
  // individual inference. For example, if a client sends a single
  // inference request with batch size 64, "inference_count" will be
  // incremented by 64. Similarly, if a clients sends 64 individual
  // requests each with batch size 1, "inference_count" will be
  // incremented by 64. The "inference_count" value DOES NOT include cache hits.
  uint64 inference_count = 4;

  // The cumulative count of the number of successful inference executions
  // performed for the model. When dynamic batching is enabled, a single
  // model execution can perform inferencing for more than one inference
  // request. For example, if a clients sends 64 individual requests each
  // with batch size 1 and the dynamic batcher batches them into a single
  // large batch for model execution then "execution_count" will be
  // incremented by 1. If, on the other hand, the dynamic batcher is not
  // enabled for that each of the 64 individual requests is executed
  // independently, then "execution_count" will be incremented by 64.
  // The "execution_count" value DOES NOT include cache hits.
  uint64 execution_count = 5;

  // The aggregate statistics for the model.
  InferStatistics inference_stats = 6;

  // The aggregate statistics for each different batch size that is
  // executed in the model. The batch statistics indicate how many actual
  // model executions were performed and show differences due to different
  // batch size (for example, larger batches typically take longer to compute).
  repeated InferBatchStatistics batch_stats = 7;

  // The memory usage detected during model loading, which may be
  // used to estimate the memory to be released once the model is unloaded. Note
  // that the estimation is inferenced by the profiling tools and framework's
  // memory schema, therefore it is advised to perform experiments to understand
  // the scenario that the reported memory usage can be relied on. As a starting
  // point, the GPU memory usage for models in ONNX Runtime backend and TensorRT
  // backend is usually aligned.
  repeated MemoryUsage memory_usage = 8;

  // The key and value pairs for all decoupled responses statistics. The key is
  // a string identifying a set of response statistics aggregated together (i.e.
  // index of the response sent). The value is the aggregated response
  // statistics.
  map<string, InferResponseStatistics> response_stats = 9;
}

// Inference statistics.
message InferStatistics
{
  // Cumulative count and duration for successful inference
  // request. The "success" count and cumulative duration includes
  // cache hits.
  StatisticDuration success = 1;

  // Cumulative count and duration for failed inference
  // request.
  StatisticDuration fail = 2;

  // The count and cumulative duration that inference requests wait in
  // scheduling or other queues. The "queue" count and cumulative
  // duration includes cache hits.
  StatisticDuration queue = 3;

  // The count and cumulative duration to prepare input tensor data as
  // required by the model framework / backend. For example, this duration
  // should include the time to copy input tensor data to the GPU.
  // The "compute_input" count and cumulative duration do not account for
  // requests that were a cache hit. See the "cache_hit" field for more
  // info.
  StatisticDuration compute_input = 4;

  // The count and cumulative duration to execute the model.
  // The "compute_infer" count and cumulative duration do not account for
  // requests that were a cache hit. See the "cache_hit" field for more
  // info.
  StatisticDuration compute_infer = 5;

  // The count and cumulative duration to extract output tensor data
  // produced by the model framework / backend. For example, this duration
  // should include the time to copy output tensor data from the GPU.
  // The "compute_output" count and cumulative duration do not account for
  // requests that were a cache hit. See the "cache_hit" field for more
  // info.
  StatisticDuration compute_output = 6;

  // The count of response cache hits and cumulative duration to lookup
  // and extract output tensor data from the Response Cache on a cache
  // hit. For example, this duration should include the time to copy
  // output tensor data from the Response Cache to the response object.
  // On cache hits, triton does not need to go to the model/backend
  // for the output tensor data, so the "compute_input", "compute_infer",
  // and "compute_output" fields are not updated. Assuming the response
  // cache is enabled for a given model, a cache hit occurs for a
  // request to that model when the request metadata (model name,
  // model version, model inputs) hashes to an existing entry in the
  // cache. On a cache miss, the request hash and response output tensor
  // data is added to the cache. See response cache docs for more info:
  // https://github.com/triton-inference-server/server/blob/main/docs/response_cache.md
  StatisticDuration cache_hit = 7;

  // The count of response cache misses and cumulative duration to lookup
  // and insert output tensor data from the computed response to the cache
  // For example, this duration should include the time to copy
  // output tensor data from the response object to the Response Cache.
  // Assuming the response cache is enabled for a given model, a cache
  // miss occurs for a request to that model when the request metadata
  // does NOT hash to an existing entry in the cache. See the response
  // cache docs for more info:
  // https://github.com/triton-inference-server/server/blob/main/docs/response_cache.md
  StatisticDuration cache_miss = 8;
}

// Statistics per decoupled response.
message InferResponseStatistics
{
  // The count and cumulative duration to compute a response.
  StatisticDuration compute_infer = 1;

  // The count and cumulative duration to extract the output tensors of a
  // response.
  StatisticDuration compute_output = 2;

  // The count and cumulative duration for successful responses.
  StatisticDuration success = 3;

  // The count and cumulative duration for failed responses.
  StatisticDuration fail = 4;

  // The count and cumulative duration for empty responses.
  StatisticDuration empty_response = 5;
}

// Inference batch statistics.
message InferBatchStatistics
{
  // The size of the batch.
  uint64 batch_size = 1;

  // The count and cumulative duration to prepare input tensor data as
  // required by the model framework / backend with the given batch size.
  // For example, this duration should include the time to copy input
  // tensor data to the GPU.
  StatisticDuration compute_input = 2;

  // The count and cumulative duration to execute the model with the given
  // batch size.
  StatisticDuration compute_infer = 3;

  // The count and cumulative duration to extract output tensor data
  // produced by the model framework / backend with the given batch size.
  // For example, this duration should include the time to copy output
  // tensor data from the GPU.
  StatisticDuration compute_output = 4;
}

// Memory usage.
message MemoryUsage
{
  // The type of memory, the value can be "CPU", "CPU_PINNED", "GPU".
  string type = 1;

  // The id of the memory, typically used with "type" to identify
  // a device that hosts the memory.
  int64_t id = 2;

  // The byte size of the memory.
  uint64_t byte_size = 3;
}
```


================================================
FILE: docs/protocol/extension_trace.md
================================================
<!--
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Trace Extension

This document describes Triton's trace extension. The trace extension enables
the client to configure the trace settings during a Triton run. Because this
extension is supported, Triton reports “trace” in the extensions field of
its Server Metadata.

## HTTP/REST

In all JSON schemas shown in this document `$number`, `$string`, `$boolean`,
`$object` and `$array` refer to the fundamental JSON types. `#optional`
indicates an optional JSON field.

Triton exposes the trace endpoint at the following URL. The client may use
HTTP GET request to retrieve the current trace setting. A HTTP POST request
will modify the trace setting, and the endpoint will return the updated trace
setting on success or an error in the case of failure. Optional model name
can be provided to get or to set the trace settings for specific model.

```
GET v2[/models/${MODEL_NAME}]/trace/setting

POST v2[/models/${MODEL_NAME}]/trace/setting
```

### Trace Setting Response JSON Object

A successful trace setting request is indicated by a 200 HTTP status
code. The response object, identified as `$trace_setting_response`, is
returned in the HTTP body for every successful trace setting request.

```
$trace_setting_response =
{
  $trace_setting, ...
}

$trace_setting = $string : $string | [ $string, ...]
```

Each `$trace_setting` JSON describes a “name”/”value” pair, where the “name” is
the name of the trace setting and the “value” is a `$string representation` of the
setting value, or an array of `$string` for some settings. Currently the following
trace settings are defined:

- "trace_file" : the file where the trace output will be saved. If
"log_frequency" is set, this will be the prefix of the files to save the
trace output, resulting files in name `"${trace_file}.0", "${trace_file}.1", ...`,
see trace setting "log_frequency" below for detail.
- "trace_level" : the trace level. "OFF" to disable tracing,
"TIMESTAMPS" to trace timestamps, "TENSORS" to trace tensors.
This value is an array of string where user may specify multiple levels to
trace multiple information.
- "trace_rate" : the trace sampling rate. The value represents how many requests
will one trace be sampled from. For example, if the trace rate is "1000",
1 trace will be sampled for every 1000 requests.
- "trace_count" : the number of remaining traces to be sampled. Once the value
becomes "0", no more traces will be sampled for the trace setting, and the
collected traces will be written to indexed trace file in the format described
in "log_frequency", regardless of the "log_frequency" status.
If the value is "-1", the number of traces to be sampled will not be limited.
- "log_frequency" : the frequency that Triton will log the
trace output to the files. If the value is "0", Triton will only log
the trace output to `${trace_file}` when shutting down. Otherwise, Triton will log
the trace output to `${trace_file}.${idx}` when it collects
the specified number of traces. For example, if the log frequency is "100",
when Triton collects the 100-th trace, it logs the traces to file
`"${trace_file}.0"`, and when it collects the 200-th trace, it logs the 101-th to
the 200-th traces to file `"${trace_file}.1"`. Note that the file index will be
reset to 0 when "trace_file" setting is updated.


### Trace Setting Response JSON Error Object

A failed trace setting request will be indicated by an HTTP error status
(typically 400). The HTTP body must contain the
`$trace_setting_error_response` object.

```
$trace_setting_error_response =
{
  "error": $string
}
```

- “error” : The descriptive message for the error.

#### Trace Setting Request JSON Object

A trace setting request is made with a HTTP POST to
the trace endpoint. In the corresponding response the HTTP body contains the
response JSON. A successful request is indicated by a 200 HTTP status code.

The request object, identified as `$trace_setting_request` must be provided in the HTTP
body.

```
$trace_setting_request =
{
  $trace_setting, ...
}
```

The `$trace_setting` JSON is defined in
[Trace Setting Response JSON Object](#trace-setting-response-json-object), only the specified
settings will be updated. In addition to the values mentioned in response JSON
object, JSON null value may be used to remove the specification of
the trace setting. In such case, the current global setting will be used.
Similarly, if this is the first request to initialize a model trace settings,
for the trace settings that are not specified in the request, the current global
setting will be used.

## GRPC

For the trace extension Triton implements the following API:

```
service GRPCInferenceService
{
  …

  // Update and get the trace setting of the Triton server.
  rpc TraceSetting(TraceSettingRequest)
          returns (TraceSettingResponse) {}
}
```

The Trace Setting API returns the latest trace settings. Errors are indicated
by the google.rpc.Status returned for the request. The OK code
indicates success and other codes indicate failure. The request and
response messages for Trace Setting are:

```
message TraceSettingRequest
{
  // The values to be associated with a trace setting.
  // If no value is provided, the setting will be clear and
  // the global setting value will be used.
  message SettingValue
  {
    repeated string value = 1;
  }

  // The new setting values to be updated,
  // settings that are not specified will remain unchanged.
  map<string, SettingValue> settings = 1;

  // The name of the model to apply the new trace settings.
  // If not given, the new settings will be applied globally.
  string model_name = 2;
}

message TraceSettingResponse
{
  message SettingValue
  {
    repeated string value = 1;
  }

  // The latest trace settings.
  map<string, SettingValue> settings = 1;
}
```

The trace settings are mentioned in
[Trace Setting Response JSON Object](#trace-setting-response-json-object).
Note that if this is the first request to initialize
a model trace settings, for the trace settings that are not specified
in the request, the value will be copied from the current global settings.


================================================
FILE: docs/repositories.txt
================================================
backend
client
dali_backend
fil_backend
model_analyzer
model_navigator
onnxruntime_backend
perf_analyzer
python_backend
pytorch_backend
tensorrt_backend
tensorrtllm_backend
tutorials
vllm_backend


================================================
FILE: docs/scaling_guide/scaling_guide.rst
================================================
..
.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
..
.. Redistribution and use in source and binary forms, with or without
.. modification, are permitted provided that the following conditions
.. are met:
..  * Redistributions of source code must retain the above copyright
..    notice, this list of conditions and the following disclaimer.
..  * Redistributions in binary form must reproduce the above copyright
..    notice, this list of conditions and the following disclaimer in the
..    documentation and/or other materials provided with the distribution.
..  * Neither the name of NVIDIA CORPORATION nor the names of its
..    contributors may be used to endorse or promote products derived
..    from this software without specific prior written permission.
..
.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

########
Scaling guide
########

.. toctree::
    :hidden:
    :caption: Scaling guide
    :maxdepth: 2

    Multi-Node (AWS) <../tutorials/Deployment/Kubernetes/EKS_Multinode_Triton_TRTLLM/README.md>
    Multi-Instance <../tutorials/Deployment/Kubernetes/TensorRT-LLM_Autoscaling_and_Load_Balancing/README.md>


================================================
FILE: docs/server_guide/features.rst
================================================
..
.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
..
.. Redistribution and use in source and binary forms, with or without
.. modification, are permitted provided that the following conditions
.. are met:
..  * Redistributions of source code must retain the above copyright
..    notice, this list of conditions and the following disclaimer.
..  * Redistributions in binary form must reproduce the above copyright
..    notice, this list of conditions and the following disclaimer in the
..    documentation and/or other materials provided with the distribution.
..  * Neither the name of NVIDIA CORPORATION nor the names of its
..    contributors may be used to endorse or promote products derived
..    from this software without specific prior written permission.
..
.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

########
Features
########

.. toctree::
    :hidden:
    :caption: Features
    :maxdepth: 2

    Model_execution <../user_guide/model_execution.md>
    Scheduler <../user_guide/scheduler.md>
    Batcher <../user_guide/batcher.md>
    model_pipelines
    state_management
    Request Cancellation <../user_guide/request_cancellation.md>
    Rate Limiter <../user_guide/rate_limiter.md>
    Caching <../user_guide/response_cache.md>
    Metrics <../user_guide/metrics.md>
    Tracing <../user_guide/trace.md>

================================================
FILE: docs/server_guide/model_pipelines.rst
================================================
..
.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
..
.. Redistribution and use in source and binary forms, with or without
.. modification, are permitted provided that the following conditions
.. are met:
..  * Redistributions of source code must retain the above copyright
..    notice, this list of conditions and the following disclaimer.
..  * Redistributions in binary form must reproduce the above copyright
..    notice, this list of conditions and the following disclaimer in the
..    documentation and/or other materials provided with the distribution.
..  * Neither the name of NVIDIA CORPORATION nor the names of its
..    contributors may be used to endorse or promote products derived
..    from this software without specific prior written permission.
..
.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

########
Model Pipelines
########

.. toctree::
    :hidden:
    :caption: Model Pipelines
    :maxdepth: 2

    Ensemble  <../user_guide/ensemble_models>
    Business Logic Scripting <../user_guide/bls>

================================================
FILE: docs/server_guide/state_management.rst
================================================
..
.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
..
.. Redistribution and use in source and binary forms, with or without
.. modification, are permitted provided that the following conditions
.. are met:
..  * Redistributions of source code must retain the above copyright
..    notice, this list of conditions and the following disclaimer.
..  * Redistributions in binary form must reproduce the above copyright
..    notice, this list of conditions and the following disclaimer in the
..    documentation and/or other materials provided with the distribution.
..  * Neither the name of NVIDIA CORPORATION nor the names of its
..    contributors may be used to endorse or promote products derived
..    from this software without specific prior written permission.
..
.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

########
State Management
########

.. toctree::
    :hidden:
    :caption: State Management
    :maxdepth: 2

    Implicit State Management <../user_guide/implicit_state_management.md>

================================================
FILE: docs/user_guide/architecture.md
================================================
<!--
# Copyright 2018-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Triton Architecture

The following figure shows the Triton Inference Server high-level
architecture. The [model repository](model_repository.md) is a
file-system based repository of the models that Triton will make
available for inferencing. Inference requests arrive at the server via
either [HTTP/REST or GRPC](../customization_guide/inference_protocols.md) or by the [C
API](../customization_guide/inference_protocols.md) and are then routed to the appropriate per-model
scheduler. Triton implements [multiple scheduling and batching
algorithms](#models-and-schedulers) that can be configured on a
model-by-model basis. Each model's scheduler optionally performs
batching of inference requests and then passes the requests to the
[backend](https://github.com/triton-inference-server/backend/blob/main/README.md)
corresponding to the model type. The backend performs inferencing
using the inputs provided in the batched requests to produce the
requested outputs. The outputs are then returned.

Triton supports a [backend C
API](https://github.com/triton-inference-server/backend/blob/main/README.md#triton-backend-api)
that allows Triton to be extended with new functionality such as
custom pre- and post-processing operations or even a new deep-learning
framework.

The models being served by Triton can be queried and controlled by a
dedicated [model management API](model_management.md) that is
available by HTTP/REST or GRPC protocol, or by the C API.

Readiness and liveness health endpoints and utilization, throughput
and latency metrics ease the integration of Triton into deployment
framework such as Kubernetes.

![Triton Architecture Diagram](images/arch.jpg)

================================================
FILE: docs/user_guide/batcher.md
================================================
<!--
# Copyright 2018-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->


# Batchers

## Dynamic Batcher

Dynamic batching is a feature of Triton that allows inference requests
to be combined by the server, so that a batch is created
dynamically. Creating a batch of requests typically results in
increased throughput. The dynamic batcher should be used for
[stateless models](architecture.md#stateless-models). The dynamically created
batches are distributed to all [model instances](model_configuration.md#instance-groups)
configured for the model.

Dynamic batching is enabled and configured independently for each
model using the *ModelDynamicBatching* property in the model
configuration. These settings control the preferred size(s) of the
dynamically created batches, the maximum time that requests can be
delayed in the scheduler to allow other requests to join the dynamic
batch, and queue properties such a queue size, priorities, and
time-outs. Refer to
[this guide](https://github.com/triton-inference-server/tutorials/tree/main/Conceptual_Guide/Part_2-improving_resource_utilization#what-is-dynamic-batching)
for a more detailed example of dynamic batching.

### Recommended Configuration Process

The individual settings are described in detail below. The following
steps are the recommended process for tuning the dynamic batcher for
each model. It is also possible to use the [Model
Analyzer](model_analyzer.md) to automatically search across different
dynamic batcher configurations.

* Decide on a [maximum batch size](model_configuration.md#maximum-batch-size) for the model.

* Add the following to the model configuration to enable the dynamic
  batcher with all default settings. By default the dynamic batcher
  will create batches as large as possible up to the maximum batch
  size and will not [delay](#delayed-batching) when forming batches.

```
  dynamic_batching { }
```

* Use the
  [Performance Analyzer](https://github.com/triton-inference-server/client/blob/main/src/c++/perf_analyzer/README.md)
  to determine the latency and throughput provided by the default dynamic
  batcher configuration.

* If the default configuration results in latency values that are
  within your latency budget, try one or both of the following to
  trade off increased latency for increased throughput:

  * Increase maximum batch size.

  * Set [batch delay](#delayed-batching) to a non-zero value. Try
    increasing delay values until the latency budget is exceeded to
    see the impact on throughput.

* [Preferred batch sizes](#preferred-batch-sizes) should not be used
  for most models. A preferred batch size(s) should only be configured
  if that batch size results in significantly higher performance than
  other batch sizes.

### Preferred Batch Sizes

The *preferred_batch_size* property indicates the batch sizes that the
dynamic batcher should attempt to create. For most models,
*preferred_batch_size* should not be specified, as described in
[Recommended Configuration
Process](#recommended-configuration-process). An exception is TensorRT
models that specify multiple optimization profiles for different batch
sizes. In this case, because some optimization profiles may give
significant performance improvement compared to others, it may make
sense to use *preferred_batch_size* for the batch sizes supported by
those higher-performance optimization profiles.

The following example shows the configuration that enables dynamic
batching with preferred batch sizes of 4 and 8.

```
  dynamic_batching {
    preferred_batch_size: [ 4, 8 ]
  }
```

When a model instance becomes available for inferencing, the dynamic
batcher will attempt to create batches from the requests that are
available in the scheduler. Requests are added to the batch in the
order the requests were received. If the dynamic batcher can form a
batch of a preferred size(s) it will create a batch of the largest
possible preferred size and send it for inferencing. If the dynamic
batcher cannot form a batch of a preferred size (or if the dynamic
batcher is not configured with any preferred batch sizes), it will
send a batch of the largest size possible that is less than the
maximum batch size allowed by the model (but see the following section
for the delay option that changes this behavior).

The size of generated batches can be examined in aggregate using
[count metrics](metrics.md#inference-request-metrics).

### Delayed Batching

The dynamic batcher can be configured to allow requests to be delayed
for a limited time in the scheduler to allow other requests to join
the dynamic batch. For example, the following configuration sets the
maximum delay time of 100 microseconds for a request.

```
  dynamic_batching {
    max_queue_delay_microseconds: 100
  }
```

The *max_queue_delay_microseconds* property setting changes the
dynamic batcher behavior when a maximum size (or preferred size) batch
cannot be created. When a batch of a maximum or preferred size cannot
be created from the available requests, the dynamic batcher will delay
sending the batch as long as no request is delayed longer than the
configured *max_queue_delay_microseconds* value. If a new request
arrives during this delay and allows the dynamic batcher to form a
batch of a maximum or preferred batch size, then that batch is sent
immediately for inferencing. If the delay expires the dynamic batcher
sends the batch as is, even though it is not a maximum or preferred
size.

### Preserve Ordering

The *preserve_ordering* property is used to force all responses to be
returned in the same order as requests were received. See the
[protobuf
documentation](https://github.com/triton-inference-server/common/blob/main/protobuf/model_config.proto)
for details.

### Priority Levels

By default the dynamic batcher maintains a single queue that holds all
inference requests for a model. The requests are processed and batched
in order.  The *priority_levels* property can be used to create
multiple priority levels within the dynamic batcher so that requests
with higher priority are allowed to bypass requests with lower
priority. Requests at the same priority level are processed in
order. Inference requests that do not set a priority are scheduled
using the *default_priority_level* property.

### Queue Policy

The dynamic batcher provides several settings that control how
requests are queued for batching.

When *priority_levels* is not defined, the *ModelQueuePolicy* for the
single queue can be set with *default_queue_policy*.  When
*priority_levels* is defined, each priority level can have a different
*ModelQueuePolicy* as specified by *default_queue_policy* and *priority_queue_policy*.

The *ModelQueuePolicy* property allows a maximum queue size to be set
using the *max_queue_size*. The *timeout_action*,
*default_timeout_microseconds* and *allow_timeout_override* settings
allow the queue to be configured so that individual requests are
rejected or deferred if their time in the queue exceeds a specified
timeout.

## Custom Batching

You can set custom batching rules that work _in addition to_ the specified behavior of the dynamic batcher.
To do so, you would implement five functions in [tritonbackend.h](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonbackend.h)
and create a shared library. These functions are described below.

| Function | Description|
| :--          |   :--           |
| TRITONBACKEND_ModelBatchIncludeRequest | Determines whether a request should be included in the current batch |
| TRITONBACKEND_ModelBatchInitialize | Initializes a record-keeping data structure for a new batch |
| TRITONBACKEND_ModelBatchFinalize | Deallocates the record-keeping data structure after a batch is formed |
| TRITONBACKEND_ModelBatcherInitialize | Initializes a read-only data structure for use with all batches |
| TRITONBACKEND_ModelBatcherFinalize | Deallocates the read-only data structure after the model is unloaded |

The path to the shared library can be passed into the model configuration via the parameter
`TRITON_BATCH_STRATEGY_PATH`. If not provided, the dynamic batcher will look for a custom
batching strategy named batchstrategy.so in the model version, model, and backend directories,
in that order. If found, it will load it. This lets you easily share a custom batching strategy
among all models using the same backend.

For a tutorial of how to create and use a custom batching library, please see the
[backend examples directory](https://github.com/triton-inference-server/backend/tree/main/examples#custom-batching).

## Sequence Batcher

Like the dynamic batcher, the sequence batcher combines non-batched
inference requests, so that a batch is created dynamically. Unlike the
dynamic batcher, the sequence batcher should be used for
[stateful models](architecture.md#stateful-models) where a sequence of
inference requests must be routed to the same model instance. The
dynamically created batches are distributed to all [model
instances](model_configuration.md#instance-groups) configured for the model.

Sequence batching is enabled and configured independently for each
model using the *ModelSequenceBatching* property in the model
configuration. These settings control the sequence timeout as well as
configuring how Triton will send control signals to the model
indicating sequence start, end, ready and correlation ID. See
[Stateful Models](architecture.md#stateful-models) for more
information and examples.

## Iterative Sequences

> [!NOTE]
> Iterative sequences are *provisional* and likely to change in future versions.
The sequence batcher supports stateful execution of "iterative
sequences" where a single request is processed over a number of
scheduling iterations. "Iterative sequences" enable the scheduler to
batch multiple inflight requests at each step and allow the model or
backend to complete a request at any iteration.

For models and backends that support "iterative sequences", users can
enable support in the sequence batcher by specifying:

```
  sequence_batching {
    iterative_sequence: true
  }
```

An "iterative sequence" refers to stateful models that iteratively
process a single request until a complete response is generated.  When
iterative sequence is enabled, the sequence scheduler will expect a
single incoming request to initiate the sequence. Backends that
support iterative sequences can then yield back to the sequence
batcher to reschedule the request for further execution in a future
batch.

Because only one request is used to represent the "iterative
sequence", the user doesn't need to set [control
inputs](architecture.md#control-inputs) mentioned in the previous
section. They will be filled internally by the scheduler.

"Iterative sequences" can be [decoupled](decoupled_models.md#decoupled-backends-and-models) where more than
one response can be generated during execution or non-decoupled where
a single response is generated when the full response is complete.

The main advantage of "iterative sequences" is the ability to use
Triton's native batching capabilities to form batches of requests at
different iteration stages without having to maintain additional state
in the backend. Typically batches executed by backends are completed
in the same execution which can waste resources if the execution of
one of the requests in the batch takes much longer than the rest. With
"iterative sequences", processing for each request in a batch can be
broken down into multiple iterations and a backend can start
processing new requests as soon as any request is complete.

### Continuous/Inflight Batching with Iterative Sequences

Continuous batching, iteration level batching, and inflight batching
are terms used in large language model (LLM) inferencing to describe
batching strategies that form batches of requests at each iteration
step. By forming batches "continuously" inference servers can increase
throughput by reusing batch slots as soon as they are free without
waiting for all requests in a batch to complete.

As the number of steps required to process a request can vary
significantly, batching existing requests and new requests continuously
can have a significant improvement on throughput and latency.

To achieve inflight batching with iterative sequences, the backend
should break request processing into a number of steps, where each
step corresponds to one Triton model instance execution. At the end of
each step, the model instance will release requests that have been
completed and reschedule requests that are still inflight. Triton will
then form and schedule the next batch of requests that mixes new and
rescheduled requests.

================================================
FILE: docs/user_guide/bls.md
================================================
<!--
# Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Business Logic Scripting

Triton's
[ensemble](ensemble_models.md#ensemble-models)
feature supports many use cases where multiple models are composed into a
pipeline (or more generally a DAG, directed acyclic graph). However, there are
many other use cases that are not supported because as part of the model
pipeline they require loops, conditionals (if-then-else), data-dependent
control-flow and other custom logic to be intermixed with model execution. We
call this combination of custom logic and model executions *Business Logic
Scripting (BLS)*.

Starting from 21.08, you can implement BLS in your Python model. A new set of
utility functions allows you to execute inference requests on other models
being served by Triton as a part of executing your Python model. Note that BLS
should only be used inside the `execute` function and is not supported
in the `initialize` or `finalize` methods. Example below shows how to use this
feature:

```python
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
  ...
    def execute(self, requests):
      ...
      # Create an InferenceRequest object. `model_name`,
      # `requested_output_names`, and `inputs` are the required arguments and
      # must be provided when constructing an InferenceRequest object. Make
      # sure to replace `inputs` argument with a list of `pb_utils.Tensor`
      # objects.
      inference_request = pb_utils.InferenceRequest(
          model_name='model_name',
          requested_output_names=['REQUESTED_OUTPUT_1', 'REQUESTED_OUTPUT_2'],
          inputs=[<pb_utils.Tensor object>])

      # `pb_utils.InferenceRequest` supports request_id, correlation_id,
      # model version, timeout and preferred_memory in addition to the
      # arguments described above.
      # Note: Starting from the 24.03 release, the `correlation_id` parameter
      # supports both string and unsigned integer values.
      # These arguments are optional. An example containing all the arguments:
      # inference_request = pb_utils.InferenceRequest(model_name='model_name',
      #   requested_output_names=['REQUESTED_OUTPUT_1', 'REQUESTED_OUTPUT_2'],
      #   inputs=[<list of pb_utils.Tensor objects>],
      #   request_id="1", correlation_id=4, model_version=1, flags=0, timeout=5,
      #   preferred_memory=pb_utils.PreferredMemory(
      #     pb_utils.TRITONSERVER_MEMORY_GPU, # or pb_utils.TRITONSERVER_MEMORY_CPU
      #     0))

      # Execute the inference_request and wait for the response
      inference_response = inference_request.exec()

      # Check if the inference response has an error
      if inference_response.has_error():
          raise pb_utils.TritonModelException(
            inference_response.error().message())
      else:
          # Extract the output tensors from the inference response.
          output1 = pb_utils.get_output_tensor_by_name(
            inference_response, 'REQUESTED_OUTPUT_1')
          output2 = pb_utils.get_output_tensor_by_name(
            inference_response, 'REQUESTED_OUTPUT_2')

          # Decide the next steps for model execution based on the received
          # output tensors. It is possible to use the same output tensors
          # to for the final inference response too.
```


In addition to the `inference_request.exec` function that allows you to
execute blocking inference requests, `inference_request.async_exec` allows
you to perform async inference requests. This can be useful when you do not
need the result of the inference immediately. Using `async_exec` function, it
is possible to have multiple inflight inference requests and wait for the
responses only when needed. Example below shows how to use `async_exec`:

```python
import triton_python_backend_utils as pb_utils
import asyncio


class TritonPythonModel:
  ...

    # You must add the Python 'async' keyword to the beginning of `execute`
    # function if you want to use `async_exec` function.
    async def execute(self, requests):
      ...
      # Create an InferenceRequest object. `model_name`,
      # `requested_output_names`, and `inputs` are the required arguments and
      # must be provided when constructing an InferenceRequest object. Make
      # sure to replace `inputs` argument with a list of `pb_utils.Tensor`
      # objects.
      inference_request = pb_utils.InferenceRequest(
          model_name='model_name',
          requested_output_names=['REQUESTED_OUTPUT_1', 'REQUESTED_OUTPUT_2'],
          inputs=[<pb_utils.Tensor object>])

      infer_response_awaits = []
      for i in range(4):
        # async_exec function returns an
        # [Awaitable](https://docs.python.org/3/library/asyncio-task.html#awaitables)
        # object.
        infer_response_awaits.append(inference_request.async_exec())

      # Wait for all of the inference requests to complete.
      infer_responses = await asyncio.gather(*infer_response_awaits)

      for infer_response in infer_responses:
        # Check if the inference response has an error
        if inference_response.has_error():
            raise pb_utils.TritonModelException(
              inference_response.error().message())
        else:
            # Extract the output tensors from the inference response.
            output1 = pb_utils.get_output_tensor_by_name(
              inference_response, 'REQUESTED_OUTPUT_1')
            output2 = pb_utils.get_output_tensor_by_name(
              inference_response, 'REQUESTED_OUTPUT_2')

            # Decide the next steps for model execution based on the received
            # output tensors.
```

A complete example for sync and async BLS in Python backend is included in the
[Examples](../python_backend/README.md#examples) section.

## Using BLS with Decoupled Models

Starting from 23.03 release, you can execute inference requests on decoupled
models in both [default mode](../python_backend/README.md#default-mode) and
[decoupled mode](../python_backend/README.md#decoupled-mode). By setting the `decoupled` parameter to
`True`, the `exec` and `async_exec` function will return an
[iterator](https://docs.python.org/3/glossary.html#term-iterator) of
inference responses returned by a decoupled model. If the `decoupled` parameter
is set to `False`, the `exec` and `async_exec` function will return a single
response as shown in the example above. Besides, you can set the timeout via
the parameter 'timeout' in microseconds within the constructor of
`InferenceRequest`. If the request times out, the request will respond with an
error. The default of 'timeout' is 0 which indicates that the request has no
timeout.

Additionally, starting from the 23.04 release, you have the flexibility to
select a specific device to receive output tensors from BLS calls. This
can be achieved by setting the optional `preferred_memory` parameter within the
`InferenceRequest` constructor. To do this, you can create a `PreferredMemory`
object and specify the `preferred_memory_type` as either
`TRITONSERVER_MEMORY_GPU` or `TRITONSERVER_MEMORY_CPU`, as well as the
`preferred_device_id` as an integer to indicate the memory type and device ID
on which you wish to receive output tensors. If you do not specify the
`preferred_memory` parameter, the output tensors will be allocated on the
same device where the output tensors were received from the model to which the
BLS call is made.

Example below shows how to use this feature:

```python
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
  ...
    def execute(self, requests):
      ...
      # Create an InferenceRequest object. `model_name`,
      # `requested_output_names`, and `inputs` are the required arguments and
      # must be provided when constructing an InferenceRequest object. Make
      # sure to replace `inputs` argument with a list of `pb_utils.Tensor`
      # objects.
      inference_request = pb_utils.InferenceRequest(
          model_name='model_name',
          requested_output_names=['REQUESTED_OUTPUT_1', 'REQUESTED_OUTPUT_2'],
          inputs=[<pb_utils.Tensor object>])

      # `pb_utils.InferenceRequest` supports request_id, correlation_id,
      # model version, timeout and preferred_memory in addition to the
      # arguments described above.
      # Note: Starting from the 24.03 release, the `correlation_id` parameter
      # supports both string and unsigned integer values.
      # These arguments are optional. An example containing all the arguments:
      # inference_request = pb_utils.InferenceRequest(model_name='model_name',
      #   requested_output_names=['REQUESTED_OUTPUT_1', 'REQUESTED_OUTPUT_2'],
      #   inputs=[<list of pb_utils.Tensor objects>],
      #   request_id="1", correlation_id="ex-4", model_version=1, flags=0, timeout=5,
      #   preferred_memory=pb_utils.PreferredMemory(
      #     pb_utils.TRITONSERVER_MEMORY_GPU, # or pb_utils.TRITONSERVER_MEMORY_CPU
      #     0))

      # Execute the inference_request and wait for the response. Here we are
      # running a BLS request on a decoupled model, hence setting the parameter
      # 'decoupled' to 'True'.
      inference_responses = inference_request.exec(decoupled=True)

      for inference_response in inference_responses:
        # Check if the inference response has an error
        if inference_response.has_error():
            raise pb_utils.TritonModelException(
              inference_response.error().message())

        # For some models, it is possible that the last response is empty
        if len(infer_response.output_tensors()) > 0:
          # Extract the output tensors from the inference response.
          output1 = pb_utils.get_output_tensor_by_name(
            inference_response, 'REQUESTED_OUTPUT_1')
          output2 = pb_utils.get_output_tensor_by_name(
            inference_response, 'REQUESTED_OUTPUT_2')

          # Decide the next steps for model execution based on the received
          # output tensors. It is possible to use the same output tensors to
          # for the final inference response too.
```


In addition to the `inference_request.exec(decoupled=True)` function that
allows you to execute blocking inference requests on decoupled models,
`inference_request.async_exec(decoupled=True)` allows you to perform async
inference requests. This can be useful when you do not need the result of the
inference immediately. Using `async_exec` function, it is possible to have
multiple inflight inference requests and wait for the responses only when
needed. Example below shows how to use `async_exec`:

```python
import triton_python_backend_utils as pb_utils
import asyncio


class TritonPythonModel:
  ...

    # You must add the Python 'async' keyword to the beginning of `execute`
    # function if you want to use `async_exec` function.
    async def execute(self, requests):
      ...
      # Create an InferenceRequest object. `model_name`,
      # `requested_output_names`, and `inputs` are the required arguments and
      # must be provided when constructing an InferenceRequest object. Make
      # sure to replace `inputs` argument with a list of `pb_utils.Tensor`
      # objects.
      inference_request = pb_utils.InferenceRequest(
          model_name='model_name',
          requested_output_names=['REQUESTED_OUTPUT_1', 'REQUESTED_OUTPUT_2'],
          inputs=[<pb_utils.Tensor object>])

      infer_response_awaits = []
      for i in range(4):
        # async_exec function returns an
        # [Awaitable](https://docs.python.org/3/library/asyncio-task.html#awaitables)
        # object.
        infer_response_awaits.append(
          inference_request.async_exec(decoupled=True))

      # Wait for all of the inference requests to complete.
      async_responses = await asyncio.gather(*infer_response_awaits)

      for infer_responses in async_responses:
        for infer_response in infer_responses:
          # Check if the inference response has an error
          if inference_response.has_error():
              raise pb_utils.TritonModelException(
                inference_response.error().message())

          # For some models, it is possible that the last response is empty
          if len(infer_response.output_tensors()) > 0:
              # Extract the output tensors from the inference response.
              output1 = pb_utils.get_output_tensor_by_name(
                inference_response, 'REQUESTED_OUTPUT_1')
              output2 = pb_utils.get_output_tensor_by_name(
                inference_response, 'REQUESTED_OUTPUT_2')

              # Decide the next steps for model execution based on the received
              # output tensors.
```

A complete example for sync and async BLS for decoupled models is included in
the [Examples](../python_backend/README.md#examples) section.

Starting from the 22.04 release, the lifetime of the BLS output tensors have
been improved such that if a tensor is no longer needed in your Python model it
will be automatically deallocated. This can increase the number of BLS requests
that you can execute in your model without running into the out of GPU or
shared memory error.

Note: Async BLS is not supported on Python 3.6 or lower due to the `async`
keyword and `asyncio.run` being introduced in Python 3.7.

## Model Loading API

Starting from 23.07 release, you can use the model loading API to load models
required by your BLS model. The model loading API is equivalent to the Triton C
API for loading models which are documented in
[tritonserver.h](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonserver.h).
Below is an example of how to use the model loading API:

```python
import triton_python_backend_utils as pb_utils

class TritonPythonModel:
    def initialize(self, args):
        self.model_name="onnx_model"
        # Check if the model is ready, and load the model if it is not ready.
        # You can specify the model version in string format. The version is
        # optional, and if not provided, the server will choose a version based
        # on the model and internal policy.
        if not pb_utils.is_model_ready(model_name=self.model_name,
                                       model_version="1"):
            # Load the model from the model repository
            pb_utils.load_model(model_name=self.model_name)

            # Load the model with an optional override model config in JSON
            # representation. If provided, this config will be used for
            # loading the model.
            config = "{\"backend\":\"onnxruntime\", \"version_policy\":{\"specific\":{\"versions\":[1]}}}"
            pb_utils.load_model(model_name=self.model_name, config=config)

            # Load the mode with optional override files. The override files are
            # specified as a dictionary where the key is the file path (with
            # "file:" prefix) and the value is the file content as bytes. The
            # files will form the model directory that the model will be loaded
            # from. If specified, 'config' must be provided to be the model
            # configuration of the override model directory.
            with open('models/onnx_int32_int32_int32/1/model.onnx', 'rb') as file:
                data = file.read()
            files = {"file:1/model.onnx": data}
            pb_utils.load_model(model_name=self.model_name,
                                config=config, files=files)

    def execute(self, requests):
        # Execute the model
        ...
        # If the model is no longer needed, you can unload it. You can also
        # specify whether the dependents of the model should also be unloaded by
        # setting the 'unload_dependents' parameter to True. The default value
        # is False. Need to be careful when unloading the model as it can affect
        # other model instances or other models that depend on it.
        pb_utils.unload_model(model_name=self.model_name,
                              unload_dependents=True)

```

Note that the model loading API is only supported if the server is running in
[explicit model control mode](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/model_management.md#model-control-mode-explicit).
Additionally, the model loading API should only be used after the server has
been running, which means that the BLS model should not be loaded during server
startup. You can use different
[client endpoints](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_model_repository.md)
to load the model after the server has been started. The model loading API is
currently not supported during the `auto_complete_config` and `finalize`
functions.

## Using BLS with Stateful Models

[Stateful models](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/architecture.md#stateful-models)
require setting additional flags in the inference request to indicate the
start and end of a sequence. The `flags` argument in the `pb_utils.InferenceRequest`
object can be used to indicate whether the request is the first or last request
in the sequence. An example indicating that the request is starting the
sequence:

```python
inference_request = pb_utils.InferenceRequest(model_name='model_name',
  requested_output_names=['REQUESTED_OUTPUT_1', 'REQUESTED_OUTPUT_2'],
  inputs=[<list of pb_utils.Tensor objects>],
  request_id="1", correlation_id=4,
  flags=pb_utils.TRITONSERVER_REQUEST_FLAG_SEQUENCE_START)
```

For indicating the ending of the sequence you can use the
`pb_utils.TRITONSERVER_REQUEST_FLAG_SEQUENCE_END` flag. If the request is both
starting and ending a sequence at the same time (i.e. the sequence has only a
single request), you can use the bitwise OR operator to enable both of the
flags:

```
flags = pb_utils.TRITONSERVER_REQUEST_FLAG_SEQUENCE_START | pb_utils.TRITONSERVER_REQUEST_FLAG_SEQUENCE_END
```

## Limitation

- You need to make sure that the inference requests performed as a part of your
model do not create a circular dependency. For example, if model A performs an
inference request on itself and there are no more model instances ready to
execute the inference request, the model will block on the inference execution
forever.

- Async BLS is not supported when running a Python model in decoupled mode.

================================================
FILE: docs/user_guide/custom_operations.md
================================================
<!--
# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Custom Operations

Modeling frameworks that allow custom operations are partially
supported by the Triton Inference Server. Custom operations can be
added to Triton at build time or at startup and are made available to
all loaded models.

## TensorRT

TensorRT allows a user to create [custom
layers](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#extending)
which can then be used in TensorRT models. For those models to run in
Triton the custom layers must be made available.

To make the custom layers available to Triton, the TensorRT custom
layer implementations must be compiled into one or more shared
libraries which must then be loaded into Triton using LD_PRELOAD. For
example, assuming your TensorRT custom layers are compiled into
libtrtcustom.so, starting Triton with the following command makes
those custom layers available to all TensorRT models.

```bash
$ LD_PRELOAD=libtrtcustom.so:${LD_PRELOAD} tritonserver --model-repository=/tmp/models ...
```

A limitation of this approach is that the custom layers must be
managed separately from the model repository itself. And more
seriously, if there are custom layer name conflicts across multiple
shared libraries there is currently no way to handle it.

When building the custom layer shared library it is important to use
the same version of TensorRT as is being used in Triton. You can find
the TensorRT version in the [Triton Release
Notes](https://docs.nvidia.com/deeplearning/triton-inference-server/release-notes/index.html). A
simple way to ensure you are using the correct version of TensorRT is
to use the [NGC TensorRT
container](https://ngc.nvidia.com/catalog/containers/nvidia:tensorrt)
corresponding to the Triton container. For example, if you are using
the 24.12 version of Triton, use the 24.12 version of the TensorRT
container.

## PyTorch

Torchscript allows users to [add custom
operations](https://pytorch.org/tutorials/advanced/torch_script_custom_ops.html)
which can then be used in Torchscript models. By using LD_PRELOAD you
can load your custom C++ operations into Triton. For example, if you
follow the instructions in the
[pytorch/extension-script](https://github.com/pytorch/extension-script)
repository and your Torchscript custom operations are compiled into
libpytcustom.so, starting Triton with the following command makes
those operations available to all PyTorch models. Since all Pytorch
custom operations depend on one or more PyTorch shared libraries
that must be available to the custom shared library when it is
loading. In practice this means that you must make sure that
/opt/tritonserver/backends/pytorch is on the library path while
launching the server. There are several ways to control the library path
and a common one is to use the LD_LIBRARY_PATH.

```bash
$ LD_LIBRARY_PATH=/opt/tritonserver/backends/pytorch:$LD_LIBRARY_PATH LD_PRELOAD=libpytcustom.so:${LD_PRELOAD} tritonserver --model-repository=/tmp/models ...
```

A limitation of this approach is that the custom operations must be
managed separately from the model repository itself. And more
seriously, if there are custom layer name conflicts across multiple
shared libraries or the handles used to register them in PyTorch there
is currently no way to handle it.

Starting with the 20.07 release of Triton the [TorchVision
operations](https://github.com/pytorch/vision) will be included with
the PyTorch backend and hence they do not have to be explicitly added
as custom operations.

When building the custom operations shared library it is important to
use the same version of PyTorch as is being used in Triton. You can
find the PyTorch version in the [Triton Release
Notes](https://docs.nvidia.com/deeplearning/triton-inference-server/release-notes/index.html). A
simple way to ensure you are using the correct version of PyTorch is
to use the [NGC PyTorch
container](https://ngc.nvidia.com/catalog/containers/nvidia:pytorch)
corresponding to the Triton container. For example, if you are using
the 24.12 version of Triton, use the 24.12 version of the PyTorch
container.

## ONNX

ONNX Runtime allows users to [add custom
operations](https://onnxruntime.ai/docs/reference/operators/add-custom-op.html)
which can then be used in ONNX models. To register your custom
operations library you need to include it in the model configuration
as an additional field. For example, if you follow [this
example](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/test/shared_lib/test_inference.cc)
from the
[microsoft/onnxruntime](https://github.com/microsoft/onnxruntime)
repository and your ONNXRuntime custom operations are compiled into
libonnxcustom.so, adding the following to the model configuration of
your model makes those operations available to that specific ONNX
model.

```bash
$ model_operations { op_library_filename: "/path/to/libonnxcustom.so" }
```

When building the custom operations shared library it is important to
use the same version of ONNXRuntime as is being used in Triton. You
can find the ONNXRuntime version in the [Triton Release
Notes](https://docs.nvidia.com/deeplearning/triton-inference-server/release-notes/index.html).


================================================
FILE: docs/user_guide/debugging_guide.md
================================================
<!--
# Copyright 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Debugging Guide
This guide goes over first-step troubleshooting for common scenarios in which Triton is behaving unexpectedly or failing. Below, we break down the issues into these categories:

- **[Configuration](#configuration-issues)**: Triton reports an error with your configuration file.
- **[Model](#model-issues)**: Your model fails to load or perform inference.
- Server: The server is crashing or unavailable.
- Client: The client is failing in sending and receiving data to the server.
- Performance: Triton is not achieving optimal performance.

Regardless of the category of your issue, it is worthwhile to try running in the latest Triton container, whenever possible. While we provide support to older containers, fixes get merged into the next release. By checking the latest release, you can spot whether this issue has already been resolved.

You can also search [Triton’s GitHub issues](https://github.com/triton-inference-server/server/issues) to see if someone previously asked about your issue. If you received an error, you can use a few keywords from the error as a search term.

Triton provides different types of errors and statuses, relevant across a wide swath of issues. Here is an overview of them:

| Error | Definition | Example |
| ----- | ---------- | ------- |
|Already Exists | Returned when an action cannot be done because there is already an existing item. | A registered model fails to be registered again.|
| Internal | Returned when there is an unexpected failure within the Triton code. | A memory allocation fails. |
| Invalid Arg | Returned when an invalid argument is provided to a function | A model config has an invalid parameter |
| Not Found | Returned when a requested resource is unable to be found | A shared library is unable to be found |
| Unavailable | Returned when a requested resource is found but unavailable | A requested model is not ready for inference |
| Unknown | Returned for cases where the reason for the error is unknown | This error code should not be used |
| Unsupported | Returned when an option is unsupported | A model config includes a parameter that is not yet supported for that backend |

## Configuration Issues

Before proceeding, please see if the model configuration documentation [here](./model_configuration.md) resolves your question. Beyond that, the best places to find a sample model configuration for your use cases are:

- The server [qa folder](https://github.com/triton-inference-server/server/tree/main/qa). You can find test scripts covering most features, including some which update the model config files to do so.
    - [Custom_models](https://github.com/triton-inference-server/server/tree/main/qa/custom_models), [ensemble_models](https://github.com/triton-inference-server/server/tree/main/qa/ensemble_models), and [python_models](https://github.com/triton-inference-server/server/tree/main/qa/python_models) include examples of configs for their respective use cases.
    - [L0_model_config](https://github.com/triton-inference-server/server/tree/main/qa/L0_model_config) tests many types of incomplete model configs.

Note that if you are running into an issue with [perf_analyzer](https://github.com/triton-inference-server/perf_analyzer/blob/main/README.md) or [Model Analyzer](https://github.com/triton-inference-server/model_analyzer), try loading the model onto Triton directly. This checks if the configuration is incorrect or the perf_analyzer or Model Analyzer options need to be updated.

## Model Issues
**Step 1. Run Models Outside of Triton**

If you are running into an issue with loading or running a model, the first step is to ensure your model runs in its framework outside of Triton. For example, you can run ONNX models in ONNX Runtime and TensorRT models in trtexec. If this check fails, the issue is happening within the framework and not within Triton.

**Step 2. Find the Error Message**

If you receive an error message, you may be able to find where it was generated by searching the code. GitHub provides instructions for searching code [here](https://docs.github.com/en/search-github/searching-on-github/searching-code). A generic search through the Triton organization is available at [this link](https://github.com/search?q=org%3Atriton-inference-server&type=Code).

If your error message only occurs in one or a few places in the Triton code, you may be able to see what’s going wrong pretty quickly. Even if not, it’s good to save this link to provide to us when asking for help with your issue. This is often the first thing we look for.

**Step 3. Build with Debug Flags**

The next step is building with debug flags. We unfortunately don’t provide a debug container, so you’d need to follow the [build guide](https://github.com/triton-inference-server/server/blob/main/docs/customization_guide/build.md) to build the container, which includes a [section on adding debug symbols](../customization_guide/build.md#building-with-debug-symbols). Once you do so, you can install GDB (`apt-get install gdb`) in the container and run Triton in GDB (`gdb --args tritonserver…`). If needed, you can open a second terminal to run a script in another container. If the server segfaults, you can enter `backtrace`, which will provide you a call stack that lets you know where the error got generated. You should then be able to trace the source of the error. If the bug still exists after debugging, we’ll need this to expedite our work.

Advanced GDB users can also examine variable values, add breakpoints, and more to find the cause of their issue.

### Specific Issues
**Undefined Symbols**

There are a few options here:
- This often means a version mismatch between the version of a framework used by Triton and the one used to create the model. Check the version of the framework used in the Triton container and compare against the version used to generate the model.
- If you are loading a shared library used by a backend, don’t forget to include LD_PRELOAD before the command to run Tritonserver. 
    - `LD_PRELOAD=<name_of_so_file.so> tritonserver --model-repository…`
If you built the backend yourself, this could be a linking error. If you are confident the backends and server were built correctly, double check that the server is loading the correct backend.

## Server Issues

You generally should not run into errors with the server itself. If the server goes down, it’s usually because something went wrong during model loading or inference and you can use the above section to debug. It’s particularly useful to work through the [Building with Debug Flags](../customization_guide/build.md#building-with-debug-symbols) section above to resolve those sorts of issues. However, this section will go through some specific cases that may occur.

### No Connection to Server

If you are having trouble connecting to the server or getting its health via the health endpoint (`curl -v localhost:8000/v2/health/ready`), make sure you are able to reach the network your server is running on from where you are running your command. Most commonly, we see that when separate Docker containers are started for the client and server, they are not started with [--net=host](https://docs.docker.com/network/host/) to share the network.

### Intermittent Failure

This is going to be one of the hardest things to debug. If possible, you want to build your server with debug flags to get a backtrace of what is happening specifically. You would also want to keep notes to see how often this happens and whether that is a common cause. The server itself should not fail while idling, so see if a certain action (loading/unloading a model, running a model inference, etc.) is triggering it.

### Server Failure Due to Individual Models

If you want the server to start up even when models fail, use the `exit-on-error=false` option. If you want the server health endpoint to show ready even when specific models fail, use the `--strict-readiness=false` flag.

### Deadlock

Some useful steps for debugging a deadlock with `gdb`:
1. Use `$info threads` to see which threads are waiting.
2. Go to a thread: `$thread 4`.
3. Print the backtrace: `$bt`.
4. Go to the frame with the lock: `$f 1`.
5. Print the memory of the mutex being held: `$p *mutex`.
6. You can now see the owner of the mutex under `owner`.

## Client Issues

For working with different client cases, the best resources are the [client repo’s](https://github.com/triton-inference-server/client) examples. You can see clients written in Python, Java, and C++ with running examples across many common use cases. You can review the main functions of these clients to get a sense of the flow of the code.

We often get performance optimization questions around the clients. Triton clients send input tensors as raw binary. However, GRPC uses protobuf which has some serialization and deserialization overhead. For those looking for the lowest-latency solution, C API eliminates the latency associated with GRPC/HTTP. Shared memory is also a good option to reduce data movement when the client and server are on the same system.

## Performance Issues

This section goes over debugging unexpected performance. If you are looking to optimize performance, please see the [Optimization](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/optimization.md) and [Performance Tuning](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/performance_tuning.md) guides.

The easiest step to start with is running perf_analyzer to get a breakdown of the request lifecycle, throughput, and latency for each individual model. For a more detailed view, you can [enable tracing](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/trace.md) when running the server. This will provide exact timestamps to drill down into what is happening. You can also enable tracing with perf_analyzer for the GRPC and HTTP clients by using the tracing flags. Note that enabling tracing can impact Triton’s performance, but it can be helpful to examine the timestamps throughout a request’s lifecycle.

### Performance Profiling

The next step would be to use a performance profiler. One profiler we recommend is [Nsight Systems](https://developer.nvidia.com/nsight-systems) (nsys), optionally including NVIDIA Tools Extension (NVTX) markers to profile Triton.

The Triton server container already has nsys installed. However, Triton does not build with the NVTX markers by default. If you want to use NVTX markers, you should build Triton with build.py, using the “--enable-nvtx” flag. This will provide details around some phases of processing a request, such as queueing, running inference, and handling outputs.

You can profile Triton by running `nsys profile tritonserver --model-repository …`. The [nsys documentation](https://docs.nvidia.com/nsight-systems/UserGuide/index.html) provides more options and details for getting a thorough overview of what is going on.

## Submitting an Issue

If you’ve done the initial debugging steps with no results, the next step is to submit the issue to us. Before you do so, please answer these questions:
- Is this reproducible with multiple models and/or our example models? Or is the issue unique to your model?
- Is the bug reproducible with any protocol (ex: HTTP vs GRPC)? Or only one protocol?

The answers to the above should inform what you submit. If you find that this issue only happens under specific circumstances, please include this in your report. If the issue still exists, please submit **all** of the below:

- The commands or script used to build/pull Triton and run your models.
    - If building Triton, please provide the version or branch you are building from.
- Your model configuration file.
- The error received, plus any logs.
    - If your issue involves the server crashing, a backtrace of the dump would be helpful.
    - Please enable verbose logging (--verbose-log=1) to get the most detailed logs.
- If this issue is unique to your model, your model or a toy model that reproduces the issue.
- Anything else that would expedite our investigation.


================================================
FILE: docs/user_guide/decoupled_models.md
================================================
<!--
# Copyright 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Decoupled Backends and Models

Triton can support [backends](https://github.com/triton-inference-server/backend)
and models that send multiple responses for a request or zero responses
for a request. A decoupled model/backend may also send responses out-of-order
relative to the order that the request batches are executed. This allows
backend to deliver response whenever it deems fit. This is specifically
useful in Automated Speech Recognition (ASR). The requests with large number
of responses, will not block the responses from other requests from being
delivered.

## Developing Decoupled Backend/Model

### C++ Backend

Read carefully about the [Triton Backend API](https://github.com/triton-inference-server/backend/blob/main/README.md#triton-backend-api),
[Inference Requests and Responses](https://github.com/triton-inference-server/backend/blob/main/README.md#inference-requests-and-responses)
and [Decoupled Responses](https://github.com/triton-inference-server/backend/blob/main/README.md#decoupled-responses).
The [repeat backend](https://github.com/triton-inference-server/repeat_backend)
and [square backend](https://github.com/triton-inference-server/square_backend)
demonstrate how the Triton Backend API can be used to implement a decoupled
backend. The example is designed to show the flexibility of the Triton API
and in no way should be used in production. This example may process multiple
batches of requests at the same time without having to increase the
[instance count](model_configuration.md#instance-groups). In real deployment,
the backend should not allow the caller thread to return from
TRITONBACKEND_ModelInstanceExecute until that instance is ready to
handle another set of requests. If not designed properly the backend
can be easily over-subscribed. This can also cause under-utilization
of features like [Dynamic Batching](batcher.md#dynamic-batcher)
as it leads to eager batching.

### Python model using Python Backend

Read carefully about the [Python Backend](https://github.com/triton-inference-server/python_backend),
and specifically [`execute`](https://github.com/triton-inference-server/python_backend#execute).

The [decoupled examples](https://github.com/triton-inference-server/python_backend/tree/main/examples/decoupled)
demonstrates how decoupled API can be used to implement a decoupled
python model. As noted in the examples, these are designed to show
the flexibility of the decoupled API and in no way should be used
in production.


## Deploying Decoupled Models

The [decoupled model transaction policy](model_configuration.md#decoupled)
must be set in the provided [model configuration](model_configuration.md)
file for the model. Triton requires this information to enable special
handling required for decoupled models. Deploying decoupled models without
this configuration setting will throw errors at the runtime.

## Running Inference on Decoupled Models

[Inference Protocols and APIs](../customization_guide/inference_protocols.md) describes various ways
a client can communicate and run inference on the server. For decoupled models,
Triton's HTTP endpoint cannot be used for running inference as it supports
exactly one response per request. Even standard ModelInfer RPC in the GRPC endpoint
does not support decoupled responses. In order to run inference on a decoupled
model, the client must use the bi-directional streaming RPC. See
[here](https://github.com/triton-inference-server/common/blob/main/protobuf/grpc_service.proto)
for more details. The [decoupled_test.py](../../qa/L0_decoupled/decoupled_test.py) demonstrates
how the gRPC streaming can be used to infer decoupled models.

If using [Triton's in-process C API](../customization_guide/inprocess_c_api.md),
your application should be cognizant that the callback function you registered with
`TRITONSERVER_InferenceRequestSetResponseCallback` can be invoked any number of times,
each time with a new response. You can take a look at [grpc_server.cc](https://github.com/triton-inference-server/server/blob/main/src/grpc/grpc_server.cc)

### Using Decoupled Models in Ensembles

When using decoupled models within an [ensemble pipeline](ensemble_models.md), you may encounter unbounded memory growth if the decoupled model produces responses faster than downstream models can consume them.

To prevent unbounded memory growth in this scenario, consider using the `max_inflight_requests` configuration field. This field limits the maximum number of concurrent inflight requests permitted at each ensemble step for each inference request.

For more details and examples, see [Managing Memory Usage in Ensemble Models](ensemble_models.md#managing-memory-usage-in-ensemble-models).

## Knowing When a Decoupled Inference Request is Complete

An inference request is considered complete when a response containing the
`TRITONSERVER_RESPONSE_COMPLETE_FINAL` flag is received from a model/backend.

1. Client applications using streaming GRPC can access this information by
   checking the response parameters for the `"triton_final_response"` parameter.
   Decoupled models may not send a response for each request depending on how
   the model/backend is designed. In these cases where no response is sent by
   the backend, the streaming GRPC client can opt-in to receive an empty final
   response for each request. By default, empty final responses are not sent to
   save on network traffic.

   ```python
   # Example of streaming GRPC client opting-in
   client.async_stream_infer(
     ...,
     enable_empty_final_response=True
   )
   ```

2. Client applications using the C API can check the
   `TRITONSERVER_RESPONSE_COMPLETE_FINAL` flag directly in their response
   handling / callback logic.

The [decoupled_test.py](../../qa/L0_decoupled/decoupled_test.py)
demonstrates an example of opting-in through the streaming GRPC
Python client API and programmatically identifying when a final response
is received through the `"triton_final_response"` response parameter.


================================================
FILE: docs/user_guide/ensemble_models.md
================================================
<!--
# Copyright 2018-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Ensemble Models

An ensemble model represents a *pipeline* of one or more models and
the connection of input and output tensors between those
models. Ensemble models are intended to be used to encapsulate a
procedure that involves multiple models, such as "data preprocessing
-> inference -> data postprocessing".  Using ensemble models for this
purpose can avoid the overhead of transferring intermediate tensors
and minimize the number of requests that must be sent to Triton.

The ensemble scheduler must be used for ensemble models, regardless of
the scheduler used by the models within the ensemble. With respect to
the ensemble scheduler, an *ensemble* model is not an actual
model. Instead, it specifies the dataflow between models within the
ensemble as *ModelEnsembling::Step* entries in the model
configuration. The scheduler collects the output tensors in each step,
provides them as input tensors for other steps according to the
specification. In spite of that, the ensemble model is still viewed as
a single model from an external view.

Note that the ensemble models will inherit the characteristics of the
models involved, so the meta-data in the request header must comply
with the models within the ensemble. For instance, if one of the
models is stateful model, then the inference request for the ensemble
model should contain the information mentioned in [Stateful
Models](architecture.md#stateful-models), which will be provided to the stateful
model by the scheduler.

As an example consider an ensemble model for image classification and
segmentation that has the following model configuration:

```
name: "ensemble_model"
platform: "ensemble"
max_batch_size: 1
input [
  {
    name: "IMAGE"
    data_type: TYPE_STRING
    dims: [ 1 ]
  }
]
output [
  {
    name: "CLASSIFICATION"
    data_type: TYPE_FP32
    dims: [ 1000 ]
  },
  {
    name: "SEGMENTATION"
    data_type: TYPE_FP32
    dims: [ 3, 224, 224 ]
  }
]
ensemble_scheduling {
  step [
    {
      model_name: "image_preprocess_model"
      model_version: -1
      input_map {
        key: "RAW_IMAGE"
        value: "IMAGE"
      }
      output_map {
        key: "PREPROCESSED_OUTPUT"
        value: "preprocessed_image"
      }
    },
    {
      model_name: "classification_model"
      model_version: -1
      input_map {
        key: "FORMATTED_IMAGE"
        value: "preprocessed_image"
      }
      output_map {
        key: "CLASSIFICATION_OUTPUT"
        value: "CLASSIFICATION"
      }
    },
    {
      model_name: "segmentation_model"
      model_version: -1
      input_map {
        key: "FORMATTED_IMAGE"
        value: "preprocessed_image"
      }
      output_map {
        key: "SEGMENTATION_OUTPUT"
        value: "SEGMENTATION"
      }
    }
  ]
}
```

The ensemble\_scheduling section indicates that the ensemble scheduler will be
used and that the ensemble model consists of three different models. Each
element in step section specifies the model to be used and how the inputs and
outputs of the model are mapped to tensor names recognized by the scheduler. For
example, the first element in step specifies that the latest version of
image\_preprocess\_model should be used, the content of its input "RAW\_IMAGE"
is provided by "IMAGE" tensor, and the content of its output
"PREPROCESSED\_OUTPUT" will be mapped to "preprocessed\_image" tensor for later
use. The tensor names recognized by the scheduler are the ensemble inputs, the
ensemble outputs and all values in the input\_map and the output\_map.

The models composing the ensemble may also have dynamic batching
enabled.  Since ensemble models are just routing the data between
composing models, Triton can take requests into an ensemble model
without modifying the ensemble's configuration to exploit the dynamic
batching of the composing models.

Assuming that only the ensemble model, the preprocess model, the classification
model and the segmentation model are being served, the client applications will
see them as four different models which can process requests independently.
However, the ensemble scheduler will view the ensemble model as the following.

![Ensemble Example](images/ensemble_example0.png)

When an inference request for the ensemble model is received, the ensemble
scheduler will:

1. Recognize that the "IMAGE" tensor in the request is mapped to input
   "RAW\_IMAGE" in the preprocess model.

2. Check models within the ensemble and send an internal request to the
   preprocess model because all the input tensors required are ready.

3. Recognize the completion of the internal request, collect the output
   tensor and map the content to "preprocessed\_image" which is an unique name
   known within the ensemble.

4. Map the newly collected tensor to inputs of the models within the ensemble.
   In this case, the inputs of "classification\_model" and "segmentation\_model"
   will be mapped and marked as ready.

5. Check models that require the newly collected tensor and send internal
   requests to models whose inputs are ready, the classification
   model and the segmentation model in this case. Note that the responses will
   be in arbitrary order depending on the load and computation time of
   individual models.

6. Repeat step 3-5 until no more internal requests should be sent, and then
   response to the inference request with the tensors mapped to the ensemble
   output names.

Unlike other models, ensemble models do not support "instance_group" field in
the model configuration. The reason is that the ensemble scheduler itself
is mainly an event-driven scheduler with very minimal overhead so its
almost never the bottleneck of the pipeline. The composing models
within the ensemble can be individually scaled up or down with their
respective `instance_group` settings. To optimize your model pipeline
performance, you can use
[Model Analyzer](https://github.com/triton-inference-server/model_analyzer)
to find the optimal model configurations.

When crafting the ensemble steps, it is useful to note the distinction between
*key* and *value* on the `input_map`/`output_map`:
* *key*: An `input`/`output` tensor name on the composing model.
* *value*: A tensor name on the ensemble model, which acts as an identifier
connecting ensemble `input`/`output` to those on the composing model and between
composing models.

## Managing Memory Usage in Ensemble Models

An *inflight request* refers to an intermediate request generated by an upstream model that is queued and held in memory until it is processed by a downstream model within an ensemble pipeline. When upstream models process requests significantly faster than downstream models, these in-flight requests can accumulate and potentially lead to unbounded memory growth. This problem occurs when there is a speed mismatch between different steps in the pipeline and is particularly common in *decoupled models* that produce multiple responses per request more quickly than downstream models can consume.

Consider an example ensemble model with two steps where the upstream model is 10× faster:
1. **Preprocessing model**: Produces 100 preprocessed requests/sec
2. **Inference model**: Consumes 10 requests/sec

Without backpressure, requests accumulate in the pipeline faster than they can be processed, eventually leading to out-of-memory errors.

The `max_inflight_requests` field in the ensemble configuration sets a limit on the number of concurrent inflight requests permitted at each ensemble step for a single inference request.
When this limit is reached, faster upstream models are paused (blocked) until downstream models finish processing, effectively preventing unbounded memory growth.

```
ensemble_scheduling {
  max_inflight_requests: 16

  step [
    {
      model_name: "dali_preprocess"
      model_version: -1
      input_map { key: "RAW_IMAGE", value: "IMAGE" }
      output_map { key: "PREPROCESSED_IMAGE", value: "preprocessed" }
    },
    {
      model_name: "onnx_inference"
      model_version: -1
      input_map { key: "INPUT", value: "preprocessed" }
      output_map { key: "OUTPUT", value: "RESULT" }
    }
  ]
}
```

**Configuration:**
* **`max_inflight_requests: 16`**: For each ensemble request (not globally), at most 16 requests from `dali_preprocess`
  can wait for `onnx_inference` to process. Once this per-step limit is reached, `dali_preprocess` is blocked until the downstream step completes a response.
* **Default (`0`)**: No limit - allows unlimited inflight requests (original behavior).

### When to Use This Feature

Use `max_inflight_requests` when your ensemble pipeline includes:
* **Streaming or decoupled models**: When models produce multiple responses per request more quickly than downstream steps can process them.
* **Memory constraints**: Risk of unbounded memory growth from accumulating requests.

### Choosing the Right Value

The optimal value depends on your specific deployment, including batch size, request rate, available memory, and throughput.

* **Too low**: The producer step is blocked too often, which underutilizes faster models.
* **Too high**: Memory usage increases, diminishing the effectiveness of backpressure.
* **Recommendation**: Start with a small value and adjust it based on memory usage and throughput monitoring.

### Performance Considerations

* **Zero overhead when disabled**: If `max_inflight_requests: 0` (default),
  no synchronization overhead is incurred.
* **Minimal overhead when enabled**: Uses a blocking/wakeup mechanism per ensemble step, where upstream models are paused ("blocked") when the inflight requests limit is reached and resumed ("woken up") as downstream models complete processing them. This synchronization ensures memory usage stays within bounds, though it may increase latency.

  **Note**: This blocking does not cancel or internally time out intermediate requests, but clients may experience increased end-to-end latency.

## Additional Resources

You can find additional end-to-end ensemble examples in the links below:
* [This guide](https://github.com/triton-inference-server/tutorials/tree/main/Conceptual_Guide/Part_5-Model_Ensembles)
explores the concept of ensembles with a running example.
* [Preprocessing in Python Backend Using
  Ensemble](https://github.com/triton-inference-server/python_backend#preprocessing)
* [Accelerating Inference with NVIDIA Triton Inference Server and NVIDIA
  DALI](https://developer.nvidia.com/blog/accelerating-inference-with-triton-inference-server-and-dali/)
* [Using RAPIDS AI with NVIDIA Triton Inference
  Server](https://github.com/rapidsai/rapids-examples/tree/main/rapids_triton_example)

================================================
FILE: docs/user_guide/faq.md
================================================
<!--
# Copyright 2019-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# FAQ

## What are the advantages of running a model with Triton Inference Server compared to running directly using the model's framework API?

When using Triton Inference Server the inference result will be the
same as when using the model's framework directly. However, with
Triton you get benefits like [concurrent model
execution](architecture.md#concurrent-model-execution) (the ability to
run multiple models at the same time on the same GPU) and [dynamic
batching](batcher.md#dynamic-batcher) to get better
throughput. You can also [replace or upgrade models while Triton and
client application are running](model_management.md). Another benefit
is that Triton can be deployed as a Docker container, anywhere – on
premises and on public clouds. Triton Inference Server also [supports
multiple
frameworks](https://github.com/triton-inference-server/backend) such
as TensorRT, PyTorch, and ONNX on both GPUs and CPUs
leading to a streamlined deployment.

## Can Triton Inference Server run on systems that don't have GPUs?

Yes, the QuickStart guide describes how to [run Triton on a CPU-Only
System](../getting_started/quickstart.md#run-on-cpu-only-system).

## Can Triton Inference Server be used in non-Docker environments?

Yes. Triton Inference Server can also be [built from
source](../customization_guide/build.md#building-without-docker) on your "bare metal"
system.

## Do you provide client libraries for languages other than C++ and Python?

We provide C++ and Python client libraries to make it easy for users
to write client applications that communicate with Triton. We chose
those languages because they were likely to be popular and performant
in the ML inference space, but in the future we can possibly add other
languages if there is a need.

We provide the GRPC API as a way to generate your own client library
for a large number of languages. By following the official GRPC
documentation and using
[grpc_service.proto](https://github.com/triton-inference-server/common/blob/main/protobuf/grpc_service.proto)
you can generate language bindings for all the languages supported by
GRPC. We provide three examples of this for
[Go](https://github.com/triton-inference-server/client/blob/main/src/grpc_generated/go),
[Python](https://github.com/triton-inference-server/client/blob/main/src/python/examples/grpc_client.py) and
[Java](https://github.com/triton-inference-server/client/blob/main/src/grpc_generated/java).

In general the client libraries (and client examples) are meant to be
just that, examples. We feel the client libraries are well written and
well tested, but they are not meant to serve every possible use
case. In some cases you may want to develop your own customized
library to suit your specific needs.

## How would you use Triton Inference Server within the AWS environment?

In an AWS environment, the Triton Inference Server docker container
can run on [CPU-only instances or GPU compute
instances](../getting_started/quickstart.md#launch-triton). Triton can run directly on the
compute instance or inside Elastic Kubernetes Service (EKS). In
addition, other AWS services such as Elastic Load Balancer (ELB) can
be used for load balancing traffic among multiple Triton
instances. Elastic Block Store (EBS) or S3 can be used for storing
deep-learning models loaded by the inference server.

## How do I measure the performance of my model running in the Triton Inference Server?

The Triton Inference Server exposes performance information in two
ways: by [Prometheus metrics](metrics.md) and by the statistics
available through the [HTTP/REST, GRPC, and C
APIs](../customization_guide/inference_protocols.md).

A client application,
[perf_analyzer](https://github.com/triton-inference-server/perf_analyzer/blob/main/README.md),
allows you to measure the performance of an individual model using a synthetic
load. The perf_analyzer application is designed to show you the tradeoff of
latency vs. throughput.

## How can I fully utilize the GPU with Triton Inference Server?

Triton Inference Server has several features designed to increase
GPU utilization:

* Triton can [simultaneously perform inference for multiple
  models](architecture.md#concurrent-model-execution) (using either
  the same or different frameworks) using the same GPU.

* Triton can increase inference throughput by using [multiple
instances of the same
model](architecture.md#concurrent-model-execution) to handle multiple
simultaneous inferences requests to that model. Triton chooses
reasonable defaults but [you can also control the exact level of
concurrency](model_configuration.md#instance-groups) on a
model-by-model basis.

* Triton can [batch together multiple inference requests into a single
  inference execution](batcher.md#dynamic-batcher). Typically,
  batching inference requests leads to much higher thoughput with only
  a relatively small increase in latency.

As a general rule, batching is the most beneficial way to increase GPU
utilization. So you should always try enabling the [dynamic
batcher](batcher.md#dynamic-batcher) with your models. Using
multiple instances of a model can also provide some benefit but is
typically most useful for models that have small compute
requirements. Most models will benefit from using two instances but
more than that is often not useful.

## If I have a server with multiple GPUs should I use one Triton Inference Server to manage all GPUs or should I use multiple inference servers, one for each GPU?

Triton Inference Server will take advantage of all GPUs that it has
access to on the server. You can limit the GPUs available to Triton by
using the CUDA_VISIBLE_DEVICES environment variable (or with Docker
you can also use NVIDIA_VISIBLE_DEVICES or --gpus flag when launching
the container). When using multiple GPUs, Triton will distribute
inference request across the GPUs to keep them all equally
utilized. You can also [control more explicitly which models are
running on which GPUs](model_configuration.md#instance-groups).

In some deployment and orchestration environments (for example,
Kubernetes) it may be more desirable to partition a single multi-GPU
server into multiple *nodes*, each with one GPU. In this case the
orchestration environment will run a different Triton for each GPU and
an load balancer will be used to divide inference requests across the
available Triton instances.

## If the server segfaults, how can I debug it?

The NGC build is a Release build and does not contain Debug symbols.
The build.py as well defaults to a Release build. Refer to the instructions
in [build.md](../customization_guide/build.md#building-with-debug-symbols) to create a Debug build
of Triton. This will help find the cause of the segmentation fault when
looking at the gdb trace for the segfault.

When opening a GitHub issue for the segfault with Triton, please include
the backtrace to better help us resolve the problem.

## What are the benefits of using [Triton Inference Server](https://developer.nvidia.com/triton-inference-server) as part of the [NVIDIA AI Enterprise Software Suite](https://www.nvidia.com/en-us/data-center/products/ai-enterprise/)?

NVIDIA AI Enterprise enables enterprises to implement full AI workflows by
delivering an entire end-to-end AI platform. Four key benefits:

### Enterprise-Grade Support, Security & API Stability:

Business-critical AI projects stay on track with NVIDIA Enterprise Support,
available globally to assist both IT teams with deploying and managing the
lifecycle of AI applications and the developer teams with building AI
applications.  Support includes maintenance updates, dependable SLAs and
response times.  Regular security reviews and priority notifications mitigate
potential risk of unmanaged opensource and ensure compliance with corporate
standards.  Finally, long term support and regression testing ensures API
stability between releases.

### Speed time to production with AI Workflows & Pretrained Models:
To reduce the complexity of developing common AI applications, NVIDIA AI
Enterprise includes
[AI workflows](https://www.nvidia.com/en-us/launchpad/ai/workflows/) which are
reference applications for specific business outcomes such as Intelligent
Virtual Assistants and Digital Fingerprinting for real-time cybersecurity threat
detection.  AI workflow reference applications may include
[AI frameworks](https://docs.nvidia.com/deeplearning/frameworks/index.html) and
[pretrained models](https://developer.nvidia.com/ai-models),
[Helm Charts](https://catalog.ngc.nvidia.com/helm-charts),
[Jupyter Notebooks](https://developer.nvidia.com/run-jupyter-notebooks) and
[documentation](https://docs.nvidia.com/ai-enterprise/index.html#overview).

### Performance for Efficiency and Cost Savings:
Using accelerated compute for AI workloads such as data process with
[NVIDIA RAPIDS Accelerator](https://developer.nvidia.com/rapids) for Apache
Spark and inference with Triton Inference Sever delivers better performance
which also improves efficiency and reduces operation and infrastructure costs,
including savings from reduced time and energy consumption.

### Optimized and Certified to Deploy Everywhere:
Cloud, Data Center, Edge Optimized and certified to ensure reliable performance
whether it’s running your AI in the public cloud, virtualized data centers, or
on DGX systems.


================================================
FILE: docs/user_guide/implicit_state_management.md
================================================
<!--
# Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Implicit State Management

Implicit state management allows a stateful model to store its state inside
Triton. When using implicit state, the stateful model does not need to store
the state required for inference inside the model.

Below is a portion of the model configuration that indicates the model
is using implicit state.

```
sequence_batching {
  state [
    {
      input_name: "INPUT_STATE"
      output_name: "OUTPUT_STATE"
      data_type: TYPE_INT32
      dims: [ -1 ]
    }
  ]
}
```

The *state* section in the sequence_batching setting is used to indicate that
the model is using implicit state. The *input_name* field specifies the name of
the input tensor that will contain the input state. The *output_name* field
describes the name of the output tensor produced by the model that contains
output state. The output state provided by the model in the *i<sup>th</sup>*
request in the sequence will be used as the input state in the
*i+1<sup>th</sup>* request. The *dims* field specifies the dimensions of the
state tensors. When the *dims* field contains variable-sized dimensions, the
shape of the input state and output state does not have to match.

For debugging purposes, the client can request the output state. In order to
allow the client to request the output state, the
[*output* section of the model configuration](./model_configuration.md#inputs-and-outputs)
must list the output state as one of the model outputs. Note that requesting the
output state from the client can increase the request latency because of the
additional tensors that have to be transferred.

Implicit state management requires backend support. Currently, only
[onnxruntime_backend](https://github.com/triton-inference-server/onnxruntime_backend)
[tensorrt_backend](https://github.com/triton-inference-server/tensorrt_backend),
and [pytorch_backend](https://github.com/triton-inference-server/pytorch_backend)
support implicit state.

## State Initialization

By default, the starting request in the sequence contains uninitialized data for
the input state. The model can use the start flag in the request to detect the
beginning of a new sequence and initialize the model state by providing the
initial state in the model output. If the *dims* section in the *state*
description of the model contains variable-sized dimensions, Triton will use *1*
for every variable-sized dimension for the starting request. For other
non-starting requests in the sequence, the input state is the output state of
the previous request in the sequence. For an example ONNX model that uses
implicit state you can refer to this onnx model generated from the
`create_onnx_modelfile_wo_initial_state()`
[from this generation script](https://github.com/triton-inference-server/server/blob/main/qa/common/gen_qa_implicit_models.py).
This is a simple accumulator model that stores the partial sum of the requests
in a sequence in Triton using implicit state. For state initialization, if the
request is starting, the model sets the "OUTPUT\_STATE" to be equal to the
"INPUT" tensor. For non-starting requests, it sets the "OUTPUT\_STATE" tensor
to the sum of "INPUT" and "INPUT\_STATE" tensors.

In addition to the default state initialization discussed above, Triton provides
two other mechanisms for initializing state.

### Initializing State from Zero.

Below is an example of initializing state from zero.

```
sequence_batching {
  state [
    {
      input_name: "INPUT_STATE"
      output_name: "OUTPUT_STATE"
      data_type: TYPE_INT32
      dims: [ -1 ]
      initial_state: {
       data_type: TYPE_INT32
       dims: [ 1 ]
       zero_data: true
       name: "initial state"
      }
    }
  ]
}
```

Note that in the example above variable dimensions in the state description are
converted to fixed size dimensions.

### Initializing State from File

For initializing state from file, you need to create a directory named
"initial\_state" under the model directory. The file that contains the initial
state under this directory needs to be provided in the *data_file* field.
The data stored in this file will be used in row-major order as the initial
state. Below is an example state description initializing state from file.

```
sequence_batching {
  state [
    {
      input_name: "INPUT_STATE"
      output_name: "OUTPUT_STATE"
      data_type: TYPE_INT32
      dims: [ -1 ]
      initial_state: {
       data_type: TYPE_INT32
       dims: [ 1 ]
       data_file: "initial_state_data"
       name: "initial state"
      }
    }
  ]
}
```

## Scheduling Strategies

The sequence batcher can employ one of two scheduling strategies when
deciding how to batch the sequences that are routed to the same model
instance. These strategies are [direct](#direct) and [oldest](#oldest).

### Direct

With the Direct scheduling strategy the sequence batcher ensures not
only that all inference requests in a sequence are routed to the same
model instance, but also that each sequence is routed to a dedicated
batch slot within the model instance. This strategy is required when
the model maintains state for each batch slot, and is expecting all
inference requests for a given sequence to be routed to the same slot
so that the state is correctly updated.

As an example of the sequence batcher using the Direct scheduling
strategy, assume a TensorRT stateful model that has the following
model configuration.

```
name: "direct_stateful_model"
platform: "tensorrt_plan"
max_batch_size: 2
sequence_batching {
  max_sequence_idle_microseconds: 5000000
  direct { }
  control_input [
    {
      name: "START"
      control [
        {
          kind: CONTROL_SEQUENCE_START
          fp32_false_true: [ 0, 1 ]
        }
      ]
    },
    {
      name: "READY"
      control [
        {
          kind: CONTROL_SEQUENCE_READY
          fp32_false_true: [ 0, 1 ]
        }
      ]
    }
  ]
}
input [
  {
    name: "INPUT"
    data_type: TYPE_FP32
    dims: [ 100, 100 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_FP32
    dims: [ 10 ]
  }
]
instance_group [
  {
    count: 2
  }
]
```

The sequence_batching section indicates that the model should use the
sequence batcher and the Direct scheduling strategy. In this example
the model only requires a *start* and *ready* control input from the
sequence batcher so only those controls are listed. The instance_group
indicates two instances of the model should be instantiated and
max_batch_size indicates that each of those instances should perform
batch-size 2 inferences. The following figure shows a representation
of the sequence batcher and the inference resources specified by this
configuration.

![Sequence Batching Example](images/sequence_example0.png)

Each model instance is maintaining state for each batch slot, and is
expecting all inference requests for a given sequence to be routed to
the same slot so that the state is correctly updated. For this example
that means that Triton can simultaneously perform inference for up to
four sequences.

Using the Direct scheduling strategy, the sequence batcher:

* Recognizes when an inference request starts a new sequence and
  allocates a batch slot for that sequence. If no batch slot is
  available for the new sequence, Triton places the inference request
  in a backlog.

* Recognizes when an inference request is part of a sequence that has
  an allocated batch slot and routes the request to that slot.

* Recognizes when an inference request is part of a sequence that is
  in the backlog and places the request in the backlog.

* Recognizes when the last inference request in a sequence has been
  completed. The batch slot occupied by that sequence is immediately
  reallocated to a sequence in the backlog, or freed for a future
  sequence if there is no backlog.

The following figure shows how multiple sequences are scheduled onto
the model instances using the Direct scheduling strategy. On the left
the figure shows several sequences of requests arriving at
Triton. Each sequence could be made up of any number of inference
requests and those individual inference requests could arrive in any
order relative to inference requests in other sequences, except that
the execution order shown on the right assumes that the first
inference request of sequence 0 arrives before any inference request
in sequences 1-5, the first inference request of sequence 1 arrives
before any inference request in sequences 2-5, etc.

The right of the figure shows how the inference request sequences are
scheduled onto the model instances over time.

![Sequence Batcher Example](images/sequence_example1.png)

The following figure shows the sequence batcher uses the control input
tensors to communicate with the model. The figure shows two sequences
assigned to the two batch slots in a model instance. Inference
requests for each sequence arrive over time. The START and READY rows
show the input tensor values used for each execution of the
model. Over time the following happens:

* The first request arrives for the sequence in slot0. Assuming the
  model instance is not already executing an inference, the sequence
  scheduler immediately schedules the model instance to execute
  because an inference request is available.

* This is the first request in the sequence so the corresponding
  element in the START tensor is set to 1. There is no request
  available in slot1 so the READY tensor shows only slot0 as ready.

* After the inference completes the sequence scheduler sees that there
  are no requests available in any batch slot and so the model
  instance sits idle.

* Next, two inference requests arrive close together in time so that
  the sequence scheduler sees them both available in their respective
  batch slots. The scheduler immediately schedules the model instance
  to perform a batch-size 2 inference and uses START and READY to show
  that both slots have an inference request available but that only
  slot1 is the start of a new sequence.

* The processing continues in a similar manner for the other inference
  requests.

![Sequence Batcher Example](images/sequence_example2.png)

### Oldest

With the Oldest scheduling strategy the sequence batcher ensures that
all inference requests in a sequence are routed to the same model
instance and then uses the [dynamic
batcher](batcher.md#dynamic-batcher) to batch together
multiple inferences from different sequences into a batch that
inferences together.  With this strategy the model must typically use
the CONTROL_SEQUENCE_CORRID control so that it knows which sequence
each inference request in the batch belongs to. The
CONTROL_SEQUENCE_READY control is typically not needed because all
inferences in the batch will always be ready for inference.

As an example of the sequence batcher using the Oldest scheduling
strategy, assume a stateful model that has the following model
configuration:

```
name: "oldest_stateful_model"
platform: "tensorflow_savedmodel"
max_batch_size: 2
sequence_batching {
  max_sequence_idle_microseconds: 5000000
  oldest
    {
      max_candidate_sequences: 4
    }
  control_input [
    {
      name: "START"
      control [
        {
          kind: CONTROL_SEQUENCE_START
          fp32_false_true: [ 0, 1 ]
        }
      ]
    },
    {
      name: "END"
      control [
        {
          kind: CONTROL_SEQUENCE_END
          fp32_false_true: [ 0, 1 ]
        }
      ]
    },
    {
      name: "CORRID"
      control [
        {
          kind: CONTROL_SEQUENCE_CORRID
          data_type: TYPE_UINT64
        }
      ]
    }
  ]
}
input [
  {
    name: "INPUT"
    data_type: TYPE_FP32
    dims: [ 100, 100 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_FP32
    dims: [ 10 ]
  }
]
```

The sequence_batching section indicates that the model should use the
sequence batcher and the Oldest scheduling strategy. The Oldest
strategy is configured so that the sequence batcher maintains up to 4
active candidate sequences from which it prefers to form dynamic
batches of size 2. In this example the model requires a *start*,
*end*, and *correlation ID* control input from the sequence
batcher. The following figure shows a representation of the sequence
batcher and the inference resources specified by this configuration.

![Sequence Batching Example](images/dyna_sequence_example0.png)

Using the Oldest scheduling strategy, the sequence batcher:

* Recognizes when an inference request starts a new sequence and
  attempts to find a model instance that has room for a candidate
  sequence. If no model instance has room for a new candidate
  sequence, Triton places the inference request in a backlog.

* Recognizes when an inference request is part of a sequence that is
  already a candidate sequence in some model instance and routes the
  request to that model instance.

* Recognizes when an inference request is part of a sequence that is
  in the backlog and places the request in the backlog.

* Recognizes when the last inference request in a sequence has been
  completed. The model instance immediately removes a sequence from
  the backlog and makes it a candidate sequence in the model instance,
  or records that the model instance can handle a future sequence if
  there is no backlog.

The following figure shows how multiple sequences are scheduled onto
the model instance specified by the above example configuration. On
the left the figure shows four sequences of requests arriving at
Triton. Each sequence is composed of multiple inference requests as
shown in the figure. The center of the figure shows how the inference
request sequences are batched onto the model instance over time,
assuming that the inference requests for each sequence arrive at the
same rate with sequence A arriving just before B, which arrives just
before C, etc. The Oldest strategy forms a dynamic batch from the
oldest requests but never includes more than one request from a given
sequence in a batch (for example, the last two inferences in sequence
D are not batched together).

![Sequence Batcher Example](images/dyna_sequence_example1.png)

================================================
FILE: docs/user_guide/jetson.md
================================================
<!--
# Copyright 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Triton Inference Server Support for Jetson and JetPack

A release of Triton for [JetPack 5.0](https://developer.nvidia.com/embedded/jetpack)
is provided in the attached tar file in the [release notes](https://github.com/triton-inference-server/server/releases).

![Triton on Jetson Diagram](images/triton_on_jetson.png)

Triton Inference Server support on JetPack includes:

* Running models on GPU and NVDLA
* [Concurrent model execution](architecture.md#concurrent-model-execution)
* [Dynamic batching](architecture.md#models-and-schedulers)
* [Model pipelines](architecture.md#ensemble-models)
* [Extensible backends](https://github.com/triton-inference-server/backend)
* [HTTP/REST and GRPC inference protocols](../customization_guide/inference_protocols.md)
* [C API](../customization_guide/inprocess_c_api.md)

Limitations on JetPack 5.0:

* Onnx Runtime backend does not support the OpenVino and TensorRT execution providers.
The CUDA execution provider is in Beta.
* The Python backend does not support GPU Tensors and Async BLS.
* CUDA IPC (shared memory) is not supported. System shared memory however is supported.
* GPU metrics, GCS storage, S3 storage and Azure storage are not supported.

On JetPack, although HTTP/REST and GRPC inference protocols are supported, for edge
use cases, direct [C API integration](../customization_guide/inprocess_c_api.md)
is recommended.

You can download the `.tgz` file for Jetson from the Triton Inference Server
[release page](https://github.com/triton-inference-server/server/releases) in the
_"Jetson JetPack Support"_ section.

The `.tgz` file contains the Triton server executable and shared libraries,
as well as the C++ and Python client libraries and examples.

## Installation and Usage

### Build Dependencies for Triton

The following dependencies must be installed before building Triton server:

```
apt-get update && \
        apt-get install -y --no-install-recommends \
            software-properties-common \
            autoconf \
            automake \
            build-essential \
            git \
            libb64-dev \
            libre2-dev \
            libssl-dev \
            libtool \
            libboost-dev \
            rapidjson-dev \
            pkg-config \
            libopenblas-dev \
            libarchive-dev \
            zlib1g-dev \
            python3 \
            python3-dev \
            python3-pip
```

Additional Onnx Runtime dependencies must be installed to build the Onnx Runtime backend:

```
pip3 install --upgrade flake8 flatbuffers patchelf==0.17.2
```

Additional PyTorch dependencies must be installed to build (and run) the PyTorch backend:

```
apt-get -y install autoconf \
            bc \
            g++-8 \
            gcc-8 \
            clang-8 \
            lld-8

pip3 install --upgrade expecttest xmlrunner hypothesis aiohttp pyyaml scipy ninja typing_extensions protobuf
```

Apart from these PyTorch dependencies, the PyTorch wheel corresponding to the release must also be installed (for build and runtime):

```
pip3 install --upgrade https://developer.download.nvidia.com/compute/redist/jp/v50/pytorch/torch-1.12.0a0+2c916ef.nv22.3-cp38-cp38-linux_aarch64.whl
```

The following dependencies must be installed before building Triton client libraries/examples:

```
apt-get install -y --no-install-recommends \
            curl \
            jq

pip3 install --upgrade wheel setuptools cython && \
    pip3 install --upgrade grpcio-tools "numpy<2" attrdict pillow
```

**Note**: OpenCV 4.2.0 is installed as a part of JetPack. It is one of the dependencies for the client build.

**Note**: When building Triton on Jetson, you will require a recent version of cmake.
We recommend using cmake 3.25.2. Below is a script to upgrade your cmake version to 3.25.2.

```
apt remove cmake
# Using CMAKE installation instruction from:: https://apt.kitware.com/
apt update && apt install -y gpg wget && \
      wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | \
            gpg --dearmor - |  \
            tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null && \
      . /etc/os-release && \
      echo "deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ $UBUNTU_CODENAME main" | \
      tee /etc/apt/sources.list.d/kitware.list >/dev/null && \
      apt-get update && \
      apt-get install -y --no-install-recommends cmake cmake-data
```

### Runtime Dependencies for Triton

The following runtime dependencies must be installed before running Triton server:

```
apt-get update && \
        apt-get install -y --no-install-recommends \
        libb64-0d \
        libre2-9 \
        libssl1.1 \
        rapidjson-dev \
        libopenblas-dev \
        libarchive-dev \
        zlib1g \
        python3 \
        python3-dev \
        python3-pip
```

The following runtime dependencies must be installed before running Triton client:

```
apt-get update && \
        apt-get install -y --no-install-recommends \
        curl \
        jq

pip3 install --upgrade wheel setuptools && \
    pip3 install --upgrade grpcio-tools "numpy<2" attrdict pillow
```

The PyTorch runtime dependencies are the same as the build dependencies listed above.

### Usage

**Note**: The PyTorch backend depends on libomp.so, which is not loaded automatically.
If using the PyTorch backend in Triton, you need to set the LD_LIBRARY_PATH to allow
libomp.so to be loaded as needed before launching Triton.

```
LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/lib/llvm-8/lib"
```

**Note**: On Jetson, the backend directory must be explicitly specified using the
`--backend-directory` flag.

```
tritonserver --model-repository=/path/to/model_repo --backend-directory=/path/to/tritonserver/backends \
             --backend-config=onnx,version=2
```

**Note**:
[perf_analyzer](https://github.com/triton-inference-server/perf_analyzer/blob/main/README.md)
is supported on Jetson, while the [model_analyzer](model_analyzer.md) is
currently not available for Jetson. To execute `perf_analyzer` for C API, use
the CLI flag `--service-kind=triton_c_api`:

```shell
perf_analyzer -m graphdef_int32_int32_int32 --service-kind=triton_c_api \
    --triton-server-directory=/opt/tritonserver \
    --model-repository=/workspace/qa/L0_perf_analyzer_capi/models
```

Refer to these [examples](../examples/jetson/README.md) that demonstrate how to use Triton Inference Server on Jetson.


================================================
FILE: docs/user_guide/metrics.md
================================================
<!--
# Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Metrics

Triton provides [Prometheus](https://prometheus.io/) metrics
indicating GPU and request statistics. By default, these metrics are
available at http://localhost:8002/metrics. The metrics are only
available by accessing the endpoint, and are not pushed or published
to any remote server. The metric format is plain text so you can view
them directly, for example:

```
$ curl localhost:8002/metrics
```

The `tritonserver --allow-metrics=false` option can be used to disable
all metric reporting, while the `--allow-gpu-metrics=false` and
`--allow-cpu-metrics=false` can be used to disable just the GPU and CPU
metrics respectively.

The `--metrics-port` option can be used to select a different port. By default,
Triton reuses the `--http-address` option for the metrics endpoint and binds the
http and metrics endpoints to the same specific address when http service is
enabled. If http service is not enabled, the metric address will bind to `0.0.0.0`
by default. To uniquely specify the metric endpoint, `--metrics-address` option
can be used. See the `tritonserver --help` output for more info on these CLI options.

To change the interval at which metrics are polled/updated, see the `--metrics-interval-ms` flag. Metrics that are updated "Per Request" are unaffected by this interval setting. This interval only applies to metrics that are designated as "Per Interval" in the tables of each section below:

- [Inference Request Metrics](#inference-request-metrics)
- [GPU Metrics](#gpu-metrics)
- [CPU Metrics](#cpu-metrics)
- [Pinned Memory Metrics](#pinned-memory-metrics)
- [Response Cache Metrics](#response-cache-metrics)
- [Custom Metrics](#custom-metrics)

## Inference Request Metrics

### Counts

For models that do not support batching, *Request Count*, *Inference
Count* and *Execution Count* will be equal, indicating that each
inference request is executed separately.

For models that support batching, the count metrics can be interpreted
to determine average batch size as *Inference Count* / *Execution
Count*. The count metrics are illustrated by the following examples:

* Client sends a single batch-1 inference request. *Request Count* =
  1, *Inference Count* = 1, *Execution Count* = 1.

* Client sends a single batch-8 inference request. *Request Count* =
  1, *Inference Count* = 8, *Execution Count* = 1.

* Client sends 2 requests: batch-1 and batch-8. Dynamic batcher is not
  enabled for the model. *Request Count* = 2, *Inference Count* = 9,
  *Execution Count* = 2.

* Client sends 2 requests: batch-1 and batch-1. Dynamic batcher is
  enabled for the model and the 2 requests are dynamically batched by
  the server. *Request Count* = 2, *Inference Count* = 2, *Execution
  Count* = 1.

* Client sends 2 requests: batch-1 and batch-8. Dynamic batcher is
  enabled for the model and the 2 requests are dynamically batched by
  the server. *Request Count* = 2, *Inference Count* = 9, *Execution
  Count* = 1.

|Category      |Metric          |Metric Name |Description                            |Granularity|Frequency    |
|--------------|----------------|------------|---------------------------|-----------|-------------|
|Count         |Success Count   |`nv_inference_request_success` |Number of successful inference requests received by Triton (each request is counted as 1, even if the request contains a batch) |Per model  |Per request  |
|              |Failure Count   |`nv_inference_request_failure` |Number of failed inference requests received by Triton (each request is counted as 1, even if the request contains a batch) |Per model  |Per request  |
|              |Inference Count |`nv_inference_count` |Number of inferences performed (a batch of "n" is counted as "n" inferences, does not include cached requests)|Per model|Per request|
|              |Execution Count |`nv_inference_exec_count` |Number of inference batch executions (see [Inference Request Metrics](#inference-request-metrics), does not include cached requests)|Per model|Per request|
|              |Pending Request Count |`nv_inference_pending_request_count` |Number of inference requests awaiting execution by a backend. This number is incremented when a request is enqueued to the server (`TRITONSERVER_ServerInferAsync`) and is decremented when a backend is about to start executing the request. More details can be found below. |Per model|Per request|

#### Failure Count Categories

| Failed Request Reason |Description |
|------------|------------|
| REJECTED  | Number of inference failures due to request timeout in the scheduler. |
| CANCELED  |  Number of inference failures due to request cancellation in the core. |
| BACKEND |  Number of inference failures during execution of requests in the backend/model. |
| OTHER  | Number of inference failures due to other uncategorized reasons in the core. |

> **Note**
>
> Ensemble failure metrics will reflect the failure counts of their composing models as well as the parent model, but currently do not capture the same granularity for the "reason" label and will default to the "OTHER" reason.
>
> For example, if EnsembleA contains ModelA, and ModelA experiences a failed request due to a queue/backlog timeout in the scheduler, ModelA will have a failed request metric reflecting `reason=REJECTED` and `count=1`.
> Additionally, EnsembleA will have a failed request metric reflecting `reason=OTHER` and `count=2`.
> The `count=2` reflects 1 from the internally failed request captured by ModelA, as well as 1 from the failed top-level request sent to EnsembleA by the user/client.
> The `reason=OTHER` reflects that fact that the ensemble doesn't currently capture the specific reason why
> ModelA's request failed at this time.

#### Pending Request Count (Queue Size) Per-Model

The *Pending Request Count* reflects the number of requests that have been
received by Triton core via `TRITONSERVER_InferAsync`, but have not yet
started execution by a backend model instance
(`TRITONBACKEND_ModelInstanceExecute`).

For all intents and purposes, the
"pending request count" and "queue size" per-model can be used
interchangeably, and the number reflected in the metric should
intuitively represent the number of requests that are not currently
being executed by any model instances. In simple terms, if you send a 100
requests to a model that can only handle 5 requests concurrently, then you
should see a pending count of 95 for that model in most cases.

For those interested in more technical details, the term "pending request count"
is a bit more accurate than "queue size" because Triton is highly configurable,
and there are many places in Triton that a request be considered pending rather
than a single queue. Some of the most common will be called out below:
- Default Scheduler backlogs any requests not currently executing.
  - Assuming 1 available model instance with the default scheduler settings,
    and 10 requests are sent in rapid succession.
  - The 1st request should be picked up for
    execution immediately, and the remaining 9 requests should be considered
    pending for this model, until the 1st request is finished. Afterwards, the
    next request should be picked up and the pending count should be decremented
    to 8, and so on until all requests are finished and the pending count is 0.
- Dynamic Batcher queue for dynamically creating batches from requests.
  - Assuming 1 available model instance with the dynamic batch scheduler
    configured with `max_batch_size: 4` and a sufficiently large
    `max_queue_delay_microseconds` (or queue of requests),
    and 10 requests are sent in rapid succession.
  - The first 4 requests, or as large of a batch the scheduler could form,
    should be picked up for execution immediately, and the remaining 6 requests
    should be considered pending. After the batch finishes, the next batch
    should be picked up, decrementing the pending count again to 2 pending.
    Then finally since only 2 requests remain, the final 2 requests will be
    batched and picked up by the backend, decrementing the pending count to 0.
- Sequence Batcher queues and backlogs for ongoing sequence requests, some may
  be assigned sequence slots, some may not.
  - Sequence Batchers of both strategies (direct and oldest) will have pending
    counts that generally follow the same trend as the dynamic batching
    description above. The sequence batchers will immediately execute as many
    requests in a batch as it can based on the model/scheduler config settings,
    and any further requests will be considered pending until the previous batch
    finishes and the next batch can start.
- Rate Limiter queues for prepared batches of requests.
  - When rate limiting is enabled, requests can be held back from execution
    to satisfy the rate limit constraints that were configured.

There are some places where a request would not be considered pending:
- Ensemble Scheduler
  - The Ensemble Scheduler almost immediately enqueues any requests it receives
    into the composing model schedulers at the first step in the ensemble.
    Therefore, the requests could be considered pending by the composing model
    scheduler's, however from the ensemble's perspective, these requests have been
    scheduled.
- Frontends (HTTP/GRPC Servers)
  - Any requests sent from a client to a frontend server in-front of Triton
    may spend some time in the corresponding server's code mapping
    protocol-specific metadata to Triton metadata. Though this time is
    generally brief, it will not be considered pending from Triton's
    perspective until Triton core has received the request from the frontend.

### Latencies

Starting in 23.04, Triton exposes the ability to choose the types of metrics
that are published through the `--metrics-config` CLI options.

#### Counters

By default, the following
[Counter](https://prometheus.io/docs/concepts/metric_types/#counter)
metrics are used for latencies:

|Category      |Metric          |Metric Name |Description                            |Granularity|Frequency    |
|--------------|----------------|------------|---------------------------|-----------|-------------|
|Latency       |Request Time    |`nv_inference_request_duration_us` |Cumulative end-to-end inference request handling time (includes cached requests) |Per model  |Per request  |
|              |Queue Time      |`nv_inference_queue_duration_us` |Cumulative time requests spend waiting in the scheduling queue (includes cached requests) |Per model  |Per request  |
|              |Compute Input Time|`nv_inference_compute_input_duration_us` |Cumulative time requests spend processing inference inputs (in the framework backend, does not include cached requests)     |Per model  |Per request  |
|              |Compute Time    |`nv_inference_compute_infer_duration_us` |Cumulative time requests spend executing the inference model (in the framework backend, does not include cached requests)     |Per model  |Per request  |
|              |Compute Output Time|`nv_inference_compute_output_duration_us` |Cumulative time requests spend processing inference outputs (in the framework backend, does not include cached requests)     |Per model  |Per request  |

To disable these metrics specifically, you can set `--metrics-config counter_latencies=false`

#### Histograms

> **Note**
>
> The following Histogram feature is experimental for the time being and may be
> subject to change based on user feedback.

By default, the following
[Histogram](https://prometheus.io/docs/concepts/metric_types/#histogram)
metrics are used for latencies:

|Category      |Metric          |Metric Name |Description                |Granularity|Frequency    |
|--------------|----------------|------------|---------------------------|-----------|-------------|
|Latency       |Request to First Response Time    |`nv_inference_first_response_histogram_ms` |Histogram of end-to-end inference request to the first response time |Per model  |Per request  |

To enable these metrics specifically, you can set `--metrics-config histogram_latencies=true`

Each histogram above is composed of several sub-metrics. For each histogram
metric, there is a set of `le` (less than or equal to) thresholds tracking
the counter for each bucket. Additionally, there are `_count` and `_sum`
metrics that aggregate the count and observed values for each. For example,
see the following information exposed by the "Time to First Response" histogram
metrics:
```
# HELP nv_first_response_histogram_ms Duration from request to first response in milliseconds
# TYPE nv_first_response_histogram_ms histogram
nv_inference_first_response_histogram_ms_count{model="my_model",version="1"} 37
nv_inference_first_response_histogram_ms_sum{model="my_model",version="1"} 10771
nv_inference_first_response_histogram_ms{model="my_model",version="1", le="100"} 8
nv_inference_first_response_histogram_ms{model="my_model",version="1", le="500"} 30
nv_inference_first_response_histogram_ms{model="my_model",version="1", le="2000"} 36
nv_inference_first_response_histogram_ms{model="my_model",version="1", le="5000"} 37
nv_inference_first_response_histogram_ms{model="my_model",version="1", le="+Inf"} 37
```

Triton initializes histograms with default buckets for each, as shown above.
Buckets can be overridden per family by specifying `model_metrics` in the
model configuration. For example:
```
// config.pbtxt
model_metrics {
  metric_control: [
    {
      metric_identifier: {
        family: "nv_inference_first_response_histogram_ms"
      }
      histogram_options: {
        buckets: [ 1, 2, 4, 8 ]
      }
    }
  ]
}
```

> **Note**
>
> To apply changes to metric options dynamically, the model must be completely
> unloaded and then reloaded for the updates to take effect.

Currently, the following histogram families support custom buckets.
```
nv_inference_first_response_histogram_ms  // Time to First Response
```

#### Summaries

> **Note**
>
> The following Summary feature is experimental for the time being and may be
> subject to change based on user feedback.

To get configurable quantiles over a sliding time window, Triton supports
a set a [Summary](https://prometheus.io/docs/concepts/metric_types/#summary)
metrics for latencies as well. These metrics are disabled by default, but can
be enabled by setting `--metrics-config summary_latencies=true`.

For more information on how the quantiles are calculated, see
[this explanation](https://grafana.com/blog/2022/03/01/how-summary-metrics-work-in-prometheus/).

The following summary metrics are available:

|Category      |Metric          |Metric Name |Description                            |Granularity|Frequency    |
|--------------|----------------|------------|---------------------------|-----------|-------------|
|Latency       |Request Time    |`nv_inference_request_summary_us` |Summary of end-to-end inference request handling times (includes cached requests) |Per model  |Per request  |
|              |Queue Time      |`nv_inference_queue_summary_us` |Summary of time requests spend waiting in the scheduling queue (includes cached requests) |Per model  |Per request  |
|              |Compute Input Time|`nv_inference_compute_input_summary_us` |Summary time requests spend processing inference inputs (in the framework backend, does not include cached requests)     |Per model  |Per request  |
|              |Compute Time    |`nv_inference_compute_infer_summary_us` |Summary of time requests spend executing the inference model (in the framework backend, does not include cached requests)     |Per model  |Per request  |
|              |Compute Output Time|`nv_inference_compute_output_summary_us` |Summary of time requests spend processing inference outputs (in the framework backend, does not include cached requests)     |Per model  |Per request  |

Each summary above is actually composed of several sub-metrics. For each
metric, there is a set of `quantile` metrics tracking the latency for each
quantile. Additionally, there are `_count` and `_sum` metrics that aggregate
the count and observed values for each. For example, see the following
information exposed by the Inference Queue Summary metrics:
```
# HELP nv_inference_queue_summary_us Summary of inference queuing duration in microseconds (includes cached requests)
# TYPE nv_inference_queue_summary_us summary
nv_inference_queue_summary_us_count{model="my_model",version="1"} 161
nv_inference_queue_summary_us_sum{model="my_model",version="1"} 11110
nv_inference_queue_summary_us{model="my_model",version="1",quantile="0.5"} 55
nv_inference_queue_summary_us{model="my_model",version="1",quantile="0.9"} 97
nv_inference_queue_summary_us{model="my_model",version="1",quantile="0.95"} 98
nv_inference_queue_summary_us{model="my_model",version="1",quantile="0.99"} 101
nv_inference_queue_summary_us{model="my_model",version="1",quantile="0.999"} 101
```

The count and sum for the summary above show that stats have been recorded for
161 requests, and took a combined total of 11110 microseconds. The `_count` and
`_sum` of a summary should generally match the counter metric equivalents when
applicable, such as:
```
nv_inference_request_success{model="my_model",version="1"} 161
nv_inference_queue_duration_us{model="my_model",version="1"} 11110
```

Triton has a set of default quantiles to track, as shown above. To set
custom quantiles, you can use the `--metrics-config` CLI option. The format is:
```
tritonserver --metrics-config summary_quantiles="<quantile1>:<error1>,...,<quantileN>:<errorN>"`
```

For example:
```
tritonserver --metrics-config summary_quantiles="0.5:0.05,0.9:0.01,0.95:0.001,0.99:0.001"`
```

To better understand the setting of error values for computing each quantile, see the
[best practices for histograms and summaries](https://prometheus.io/docs/practices/histograms/#histograms-and-summaries).


## GPU Metrics

GPU metrics are collected through the use of [DCGM](https://developer.nvidia.com/dcgm).
Collection of GPU metrics can be toggled with the `--allow-gpu-metrics` CLI flag.
If building Triton locally, the `TRITON_ENABLE_METRICS_GPU` CMake build flag can be used to toggle building the relevant code entirely.

|Category        |Metric            |Metric Name                 |Description                                            |Granularity|Frequency    |
|----------------|------------------|----------------------------|-------------------------------------------------------|-----------|-------------|
|GPU Utilization |Power Usage       |`nv_gpu_power_usage`        |GPU instantaneous power, in watts                      |Per GPU    |Per interval |
|                |Power Limit       |`nv_gpu_power_limit`        |Maximum GPU power limit, in watts                      |Per GPU    |Per interval |
|                |Energy Consumption|`nv_energy_consumption`     |GPU energy consumption since Triton started, in joules |Per GPU    |Per interval |
|                |GPU Utilization   |`nv_gpu_utilization`        |GPU utilization rate (0.0 - 1.0)                       |Per GPU    |Per interval |
|GPU Memory      |GPU Total Memory  |`nv_gpu_memory_total_bytes` |Total GPU memory, in bytes                             |Per GPU    |Per interval |
|                |GPU Used Memory   |`nv_gpu_memory_used_bytes`  |Used GPU memory, in bytes                              |Per GPU    |Per interval |


## CPU Metrics

Collection of CPU metrics can be toggled with the `--allow-cpu-metrics` CLI flag.
If building Triton locally, the `TRITON_ENABLE_METRICS_CPU` CMake build flag can be used to toggle building the relevant code entirely.

> **Note**
>
> CPU Metrics are currently only supported on Linux.
> They collect information from the [/proc filesystem](https://www.kernel.org/doc/html/latest/filesystems/proc.html) such as `/proc/stat` and `/proc/meminfo`.

|Category      |Metric          |Metric Name |Description                            |Granularity|Frequency    |
|--------------|----------------|------------|---------------------------|-----------|-------------|
|CPU Utilization | CPU Utilization | `nv_cpu_utilization` | Total CPU utilization rate [0.0 - 1.0] | Aggregated across all cores since last interval | Per interval |
|CPU Memory      | CPU Total Memory | `nv_cpu_memory_total_bytes` | Total CPU memory (RAM), in bytes | System-wide | Per interval |
|                | CPU Used Memory | `nv_cpu_memory_used_bytes` | Used CPU memory (RAM), in bytes | System-wide | Per interval |

## Pinned Memory Metrics

Starting in 24.01, Triton offers Pinned Memory metrics to monitor the utilization of the Pinned Memory pool.

|Category        |Metric            |Metric Name                 |Description                                            |Granularity|Frequency    |
|----------------|------------------|----------------------------|-------------------------------------------------------|-----------|-------------|
|Pinned Memory   |Total Pinned memory |`nv_pinned_memory_pool_total_bytes`        |Total Pinned memory, in bytes                      |All models    |Per interval |
|                |Used Pinned memory |`nv_pinned_memory_pool_used_bytes`        |Used Pinned memory, in bytes                      |All models    |Per interval |

## Response Cache Metrics

Cache metrics can be reported in two ways:

1. A base set of cache metrics will be reported
by Triton directly, such as the cache hit/miss counts and durations described
below.

2. As of 23.03, additional cache metrics may be reported depending on the
[cache implementation](response_cache.md#cache-implementations)
being used through Triton's [Metrics API](#custom-metrics).

### Triton-reported Response Cache Metrics

Compute latency metrics in the
[Inference Request Metrics table](#inference-request-metrics) above are
calculated for the time spent in model inference backends. If the response
cache is enabled for a given model (see [Response Cache](response_cache.md)
docs for more info), total inference times may be affected by response cache
lookup times.

On cache hits, "Cache Hit Time" indicates the time spent looking up the
response, and "Compute Input Time" /  "Compute Time" / "Compute Output Time"
are not recorded.

On cache misses, "Cache Miss Time" indicates the time spent looking up
the request hash and inserting the computed output tensor data into the cache.
Otherwise, "Compute Input Time" /  "Compute Time" / "Compute Output Time" will
be recorded as usual.

|Category      |Metric          |Metric Name |Description                            |Granularity|Frequency    |
|--------------|----------------|------------|---------------------------|-----------|-------------|
|Count         |Cache Hit Count |`nv_cache_num_hits_per_model` |Number of response cache hits per model |Per model |Per request |
|              |Cache Miss Count |`nv_cache_num_misses_per_model` |Number of response cache misses per model |Per model |Per request |
|Latency       |Cache Hit Time |`nv_cache_hit_duration_per_model` |Cumulative time requests spend retrieving a cached response per model on cache hits (microseconds) |Per model |Per request |
|              |Cache Miss Time |`nv_cache_miss_duration_per_model` |Cumulative time requests spend looking up and inserting responses into the cache on a cache miss (microseconds) |Per model |Per request |

Similar to the Summaries section above for Inference Request Metrics, the
per-model cache hit/miss latency metrics also support Summaries.

> **Note**
>
> For models with response caching enabled, the inference request **summary** metric
> is currently disabled. This is due to extra time spent internally on cache
> management that wouldn't be reflected correctly in the end to end request time.
> Other summary metrics are unaffected.

## Custom Metrics

Triton exposes a C API to allow users and backends to register and collect
custom metrics with the existing Triton metrics endpoint. The user takes the
ownership of the custom metrics created through the APIs and must manage their
lifetime following the API documentation.

The
[identity_backend](https://github.com/triton-inference-server/identity_backend/blob/main/README.md#custom-metric-example)
demonstrates a practical example of adding a custom metric to a backend.

Further documentation can be found in the `TRITONSERVER_MetricFamily*` and
`TRITONSERVER_Metric*` API annotations in
[tritonserver.h](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonserver.h).

### TensorRT-LLM Backend Metrics

The TRT-LLM backend uses the custom metrics API to track and expose specific metrics about
LLMs, KV Cache, and Inflight Batching to Triton:
https://github.com/triton-inference-server/tensorrtllm_backend?tab=readme-ov-file#triton-metrics

### vLLM Backend Metrics

The vLLM backend uses the custom metrics API to track and expose specific metrics about
LLMs to Triton:
https://github.com/triton-inference-server/vllm_backend?tab=readme-ov-file#triton-metrics


================================================
FILE: docs/user_guide/model_analyzer.md
================================================
<!--
# Copyright (c) 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Model Analyzer

The Triton [Model Analyzer](https://github.com/triton-inference-server/model_analyzer)
 is a tool that uses
[Performance Analyzer](https://github.com/triton-inference-server/perf_analyzer/blob/main/README.md)
to send requests to your model while measuring GPU memory and compute
utilization. The Model Analyzer is specifically useful for characterizing the
GPU memory requirements for your model under different batching and model
instance configurations. Once you have this GPU memory usage information you can
more intelligently decide on how to combine multiple models on the same GPU
while remaining within the memory capacity of the GPU.

For more detailed examples and explanations of using Model Analyzer, see:
- [Model Analyzer Conceptual Guide](https://github.com/triton-inference-server/tutorials/tree/main/Conceptual_Guide/Part_3-optimizing_triton_configuration)
- [Maximizing Deep Learning
Inference Performance with NVIDIA Model
Analyzer](https://developer.nvidia.com/blog/maximizing-deep-learning-inference-performance-with-nvidia-model-analyzer)

================================================
FILE: docs/user_guide/model_configuration.md
================================================
<!--
# Copyright 2018-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Model Configuration

**Is this your first time writing a config file?** Check out [this guide](https://github.com/triton-inference-server/tutorials/tree/main/Conceptual_Guide/Part_1-model_deployment#model-configuration) or this [example](https://github.com/triton-inference-server/tutorials/tree/main/HuggingFace#examples)!

Each model in a [model repository](model_repository.md) must include a model configuration that provides required and optional information about the model.
Typically, this configuration is provided in a config.pbtxt file specified as [ModelConfig protobuf](https://github.com/triton-inference-server/common/blob/main/protobuf/model_config.proto).
In some cases, discussed in [Auto-Generated Model Configuration](#auto-generated-model-configuration), the model configuration can be generated automatically by Triton and so does not need to be provided explicitly.

This section describes the most important model configuration properties but the documentation in the [ModelConfig protobuf](https://github.com/triton-inference-server/common/blob/main/protobuf/model_config.proto) should also be consulted.

## Minimal Model Configuration

A minimal model configuration must specify the [*platform* and/or *backend* properties](https://github.com/triton-inference-server/backend/blob/main/README.md#backends), the *max_batch_size* property, and the input and output tensors of the model.

As an example consider a TensorRT model that has two inputs, *input0* and *input1*, and one output, *output0*, all of which are 16 entry float32 tensors.
The minimal configuration is:

```
  platform: "tensorrt_plan"
  max_batch_size: 8
  input [
    {
      name: "input0"
      data_type: TYPE_FP32
      dims: [ 16 ]
    },
    {
      name: "input1"
      data_type: TYPE_FP32
      dims: [ 16 ]
    }
  ]
  output [
    {
      name: "output0"
      data_type: TYPE_FP32
      dims: [ 16 ]
    }
  ]
```

### Name, Platform and Backend

The model configuration *name* property is optional.
If the name of the model is not specified in the configuration it is assumed to be the same as the model repository directory containing the model.
If *name* is specified it must match the name of the model repository directory containing the model.
The required values for *platform* and *backend* are described in the [backend documentation](https://github.com/triton-inference-server/backend/blob/main/README.md#backends).

### Model Transaction Policy

The *model_transaction_policy* property describes the nature of transactions expected from the model.

#### Decoupled

This boolean setting indicates whether responses generated by the model are [decoupled](./decoupled_models.md) with the requests issued to it.
Using decoupled means the number of responses generated by the model may differ from number of requests issued, and the responses may be out of order relative to the order of requests.
The default is false, which means the model will generate exactly one response for each request.

### Maximum Batch Size

The *max_batch_size* property indicates the maximum batch size that the model supports for the [types of batching](architecture.md#models-and-schedulers) that can be exploited by Triton.
If the model's batch dimension is the first dimension, and all inputs and outputs to the model have this batch dimension, then Triton can use its [dynamic batcher](batcher.md#dynamic-batcher) or [sequence batcher](batcher.md#sequence-batcher) to automatically use batching with the model.
In this case *max_batch_size* should be set to a value greater-or-equal-to 1 that indicates the maximum batch size that Triton should use with the model.

For models that do not support batching, or do not support batching in the specific ways described above, *max_batch_size* must be set to zero.


### Inputs and Outputs

Each model input and output must specify a name, datatype, and shape.
The name specified for an input or output tensor must match the name expected by the model.

#### Special Conventions for PyTorch Backend

**Naming Convention:**

Due to the absence of sufficient metadata for inputs/outputs in TorchScript model files, the "name" attribute of inputs/outputs in the configuration must follow specific naming conventions.
These are detailed below.

1. [Only for Inputs] When the input is not a Dictionary of Tensors, the input names in the configuration file should mirror the names of the input arguments to the forward function in the model's definition.

   For example, if the forward function for the Torchscript model was defined as `forward(self, input0, input1)`, the first and second inputs should be named "input0" and "input1" respectively.

2. `<name>__<index>`: Where \<name\> can be any string and \<index\> is an integer index that refers to the position of the corresponding input/output.

   This means that if there are two inputs and two outputs, the first and second inputs can be named "INPUT__0" and "INPUT__1" and the first and second outputs can be named "OUTPUT__0" and "OUTPUT__1" respectively.

3. If all inputs (or outputs) do not follow the same naming convention, then we enforce strict ordering from the model configuration i.e. we assume the order of inputs (or outputs) in the configuration is the true ordering of these inputs.

***Dictionary of Tensors as Input:***

The PyTorch backend supports passing of inputs to the model in the form of a Dictionary of Tensors.
This is only supported when there is a *single* input to the model of type Dictionary that contains a mapping of string to tensor.
As an example, if there is a model that expects the input of the form:

```
{'A': tensor1, 'B': tensor2}
```

The input names in the configuration in this case must not follow the above naming conventions `<name>__<index>`.
Instead, the names of the inputs in this case must map to the string value 'key' for that specific tensor.
For this case, the inputs would be "A" and "B", where input "A" refers to value corresponding to tensor1 and "B" refers to the value corresponding to tensor2.

<br/>

The datatypes allowed for input and output tensors varies based on the type of the model.
Section [Datatypes](#datatypes) describes the allowed datatypes and how they map to the datatypes of each model type.

An input shape indicates the shape of an input tensor expected by the model and by Triton in inference requests.
An output shape indicates the shape of an output tensor produced by the model and returned by Triton in response to an inference request.
Both input and output shape must have rank greater-or-equal-to 1, that is, the empty shape **[ ]** is not allowed.

Input and output shapes are specified by a combination of *max_batch_size* and the dimensions specified by the input or output *dims* property.
For models with *max_batch_size* greater-than 0, the full shape is formed as [ -1 ] + *dims*.
For models with *max_batch_size* equal to 0, the full shape is formed as *dims*.
For example, for the following configuration the shape of "input0" is [-1, 16 ] and the shape of "output0" is [ -1, 4 ].

```
  platform: "tensorrt_plan"
  max_batch_size: 8
  input [
    {
      name: "input0"
      data_type: TYPE_FP32
      dims: [ 16 ]
    }
  ]
  output [
    {
      name: "output0"
      data_type: TYPE_FP32
      dims: [ 4 ]
    }
  ]
```

For a configuration that is identical except that *max_batch_size* equal to 0, the shape of "input0" is [ 16 ] and the shape of "output0" is [ 4 ].

```
  platform: "tensorrt_plan"
  max_batch_size: 0
  input [
    {
      name: "input0"
      data_type: TYPE_FP32
      dims: [ 16 ]
    }
  ]
  output [
    {
      name: "output0"
      data_type: TYPE_FP32
      dims: [ 4 ]
    }
  ]
```

For models that support input and output tensors with variable-size dimensions, those dimensions can be listed as -1 in the input and output configuration.
For example, if a model requires a 2-dimensional input tensor where the first dimension must be size 4 but the second dimension can be any size, the model configuration for that input would include *dims: [ 4, -1 ]*.
Triton would then accept inference requests where that input tensor's second dimension was any value greater-or-equal-to 0.
The model configuration can be more restrictive than what is allowed by the underlying model.
For example, even though the framework model itself allows the second dimension to be any size, the model configuration could be specified as *dims: [ 4, 4 ]*.
In this case, Triton would only accept inference requests where the input tensor's shape was exactly *[ 4, 4 ]*.

The [*reshape* property](#reshape) must be used if there is a mismatch between the input shape that Triton receives in an inference request and the input shape expected by the model.
Similarly, the *reshape* property must be used if there is a mismatch between the output shape produced by the model and the shape that Triton returns in a response to an inference request.

Model inputs can specify `allow_ragged_batch` to indicate that the input is a [ragged input](ragged_batching.md#ragged-batching).
The field is used with [dynamic batcher](model_configuration.md#default-max-batch-size-and-dynamic-batcher) to allow batching without enforcing the input to have the same shape in all requests.

## Auto-Generated Model Configuration

The model configuration file containing the required settings must be available with each model to be deployed on Triton.
In some cases the required portions of the model configuration can be generated automatically by Triton.
The required portion of the model configuration are the settings shown in the [Minimal Model Configuration](#minimal-model-configuration).
By default, Triton will try to complete these sections.
However, by starting Triton with `--disable-auto-complete-config` option, Triton can be configured to not auto-complete model configuration on the backend side.
However, even with this option Triton will fill in missing [`instance_group`](#instance-groups) settings with default values.

Triton can derive all the required settings automatically for most of the TensorRT saved-model, ONNX models, and OpenVINO models.
For Python models, [`auto_complete_config`](https://github.com/triton-inference-server/python_backend/#auto_complete_config) function can be implemented in Python backend to provide [`max_batch_size`](#maximum-batch-size), [`input`](#inputs-and-outputs) and [`output`](#inputs-and-outputs) properties using `set_max_batch_size`, `add_input`, and `add_output` functions.
These properties will allow Triton to load the Python model with [Minimal Model Configuration](#minimal-model-configuration) in absence of a configuration file.
All other model types *must* provide a model configuration file.

When developing a custom backend, you can populate required settings in the configuration and call `TRITONBACKEND_ModelSetConfig` API to update completed configuration with Triton core.
You can take a look at [Onnxruntime](https://github.com/triton-inference-server/onnxruntime_backend) backends as examples of how to achieve this.
Currently, only [inputs, outputs](#inputs-and-outputs), [max_batch_size](#maximum-batch-size) and [dynamic batching](model_configuration.md#default-max-batch-size-and-dynamic-batcher) settings can be populated by backend.
For custom backends, your config.pbtxt file must include a `backend` field or your model name must be in the form `<model_name>.<backend_name>`.

You can also see the model configuration generated for a model by Triton using the [model configuration endpoint](../protocol/extension_model_configuration.md).
The easiest way to do this is to use a utility like *curl*:

```bash
$ curl localhost:8000/v2/models/<model name>/config
```

This will return a JSON representation of the generated model configuration.
From this you can take the max_batch_size, inputs, and outputs sections of the JSON and convert it to a config.pbtxt file.
Triton only generates the [minimal portion of the model configuration](#minimal-model-configuration).
You must still provide the optional portions of the model configuration by editing the config.pbtxt file.

## Custom Model Configuration

Sometimes when multiple devices running Triton instances that share one model repository, it is necessary to have models configured differently on each platform in order to achieve the best performance.
Triton allows users to pick the custom model configuration name by setting `--model-config-name` option.

For example, when running `./tritonserver --model-repository=</path/to/model/repository> --model-config-name=h100`, the server will search the custom configuration file `h100.pbtxt` under `/path/to/model/repository/<model-name>/configs` directory for each model
that is loaded.
If `h100.pbtxt` exists, it will be used as the configuration for this model.
Otherwise, the default configuration `/path/to/model/repository/<model-name>/config.pbtxt` or [auto-generated model configuration](#auto-generated-model-configuration) will be selected based on the settings.

Custom model configuration also works with `Explicit` and `Poll` model control modes.
Users may delete or add new custom configurations and the server will pick the configuration file for each loaded model dynamically.

Note: custom model configuration name should not contain any space character.

Example 1: --model-config-name=h100
```
.
└── model_repository/
    ├── model_a/
    │   ├── configs/
    │   │   ├── v100.pbtxt
    │   │   └── **h100.pbtxt**
    │   └── config.pbtxt
    ├── model_b/
    │   ├── configs/
    │   │   └── v100.pbtxt
    │   └── **config.pbtxt**
    └── model_c/
        ├── configs/
        │   └── config.pbtxt
        └── **config.pbtxt**
```

Example 2: --model-config-name=config
```
.
└── model_repository/
    ├── model_a/
    │   ├── configs/
    │   │   ├── v100.pbtxt
    │   │   └── h100.pbtxt
    │   └── **config.pbtxt**
    ├── model_b/
    │   ├── configs/
    │   │   └── v100.pbtxt
    │   └── **config.pbtxt**
    └── model_c/
        ├── configs/
        │   └── **config.pbtxt**
        └── config.pbtxt
```

Example 3: --model-config-name not set
```
.
└── model_repository/
    ├── model_a/
    │   ├── configs/
    │   │   ├── v100.pbtxt
    │   │   └── h100.pbtxt
    │   └── **config.pbtxt**
    ├── model_b/
    │   ├── configs/
    │   │   └── v100.pbtxt
    │   └── **config.pbtxt**
    └── model_c/
        ├── configs/
        │   └── config.pbtxt
        └── **config.pbtxt**
```

### Default Max Batch Size and Dynamic Batcher

When a model is using the auto-complete feature, a default maximum batch size may be set by using the `--backend-config=default-max-batch-size=<int>` command line argument.
This allows all models which are capable of batching and which make use of [Auto Generated Model Configuration](#auto-generated-model-configuration) to have a default maximum batch size.
This value is set to 4 by default.
Backend developers may make use of this default-max-batch-size by obtaining it from the TRITONBACKEND_BackendConfig api.
Currently, the following backends which utilize these default batch values and turn on dynamic batching in their generated model configurations are:

1. [Onnxruntime backend](https://github.com/triton-inference-server/onnxruntime_backend)

2. [TensorRT backend](https://github.com/triton-inference-server/tensorrt_backend)

   1. TensorRT models store the maximum batch size explicitly and do not make use of the default-max-batch-size parameter.
      However, if max_batch_size > 1 and no scheduler is provided, the dynamic batch scheduler will be enabled.

If a value greater than 1 for the maximum batch size is set for the model, the [dynamic_batching](batcher.md#dynamic-batcher) config will be set if no scheduler is provided in the configuration file.


## Datatypes

The following table shows the tensor datatypes supported by Triton.
The first column shows the name of the datatype as it appears in the model configuration file.
The next four columns show the corresponding datatype for supported model frameworks.
If a model framework does not have an entry for a given datatype, then Triton does not support that datatype for that model.
The sixth column, labeled "API", shows the corresponding datatype for the TRITONSERVER C API, TRITONBACKEND C API, HTTP/REST protocol and GRPC protocol.
The last column shows the corresponding datatype for the Python numpy library.

|Model Config  |TensorRT      |ONNX Runtime  |PyTorch  |API      |NumPy         |
|--------------|--------------|--------------|---------|---------|--------------|
|TYPE_BOOL     | kBOOL        |BOOL          |kBool    |BOOL     |bool          |
|TYPE_UINT8    | kUINT8       |UINT8         |kByte    |UINT8    |uint8         |
|TYPE_UINT16   |              |UINT16        |         |UINT16   |uint16        |
|TYPE_UINT32   |              |UINT32        |         |UINT32   |uint32        |
|TYPE_UINT64   |              |UINT64        |         |UINT64   |uint64        |
|TYPE_INT8     | kINT8        |INT8          |kChar    |INT8     |int8          |
|TYPE_INT16    |              |INT16         |kShort   |INT16    |int16         |
|TYPE_INT32    | kINT32       |INT32         |kInt     |INT32    |int32         |
|TYPE_INT64    | kINT64       |INT64         |kLong    |INT64    |int64         |
|TYPE_FP16     | kHALF        |FLOAT16       |         |FP16     |float16       |
|TYPE_FP32     | kFLOAT       |FLOAT         |kFloat   |FP32     |float32       |
|TYPE_FP64     |              |DOUBLE        |kDouble  |FP64     |float64       |
|TYPE_STRING   |              |STRING        |         |BYTES    |dtype(object) |
|TYPE_BF16     | kBF16        |              |         |BF16     |              |

For TensorRT each value is in the nvinfer1::DataType namespace.
For example, nvinfer1::DataType::kFLOAT is the 32-bit floating-point datatype.

For ONNX Runtime each value is prepended with ONNX_TENSOR_ELEMENT_DATA_TYPE_.
For example, ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT is the 32-bit floating-point datatype.

For PyTorch each value is in the torch namespace. For example, torch::kFloat is the 32-bit floating-point datatype.

For Numpy each value is in the numpy module. For example, numpy.float32 is the 32-bit floating-point datatype.

## Reshape

The *ModelTensorReshape* property on a model configuration input or output is used to indicate that the input or output shape accepted by the inference API differs from the input or output shape expected or produced by the underlying framework model or custom backend.

For an input, *reshape* can be used to reshape the input tensor to a different shape expected by the framework or backend.
A common use-case is where a model that supports batching expects a batched input to have shape *[ batch-size ]*, which means that the batch dimension fully describes the shape.
For the inference API the equivalent shape *[ batch-size, 1 ]* must be specified since each input must specify a non-empty *dims*.
For this case the input should be specified as:

```
  input [
    {
      name: "in"
      dims: [ 1 ]
      reshape: { shape: [ ] }
    }
  ]
```

For an output, *reshape* can be used to reshape the output tensor produced by the framework or backend to a different shape that is returned by the inference API.
A common use-case is where a model that supports batching expects a batched output to have shape *[ batch-size ]*, which means that the batch dimension fully describes the shape.
For the inference API the equivalent shape *[ batch-size, 1 ]* must be specified since each output must specify a non-empty *dims*.
For this case the output should be specified as:

```
  output [
    {
      name: "in"
      dims: [ 1 ]
      reshape: { shape: [ ] }
    }
  ]
```

## Shape Tensors

For models that support shape tensors, the *is_shape_tensor* property must be set appropriately for inputs and outputs that are acting as shape tensors.
The following shows an example configuration that specifies shape tensors.

```
  name: "myshapetensormodel"
  platform: "tensorrt_plan"
  max_batch_size: 8
  input [
    {
      name: "input0"
      data_type: TYPE_FP32
      dims: [ 1 , 3]
    },
    {
      name: "input1"
      data_type: TYPE_INT32
      dims: [ 2 ]
      is_shape_tensor: true
    }
  ]
  output [
    {
      name: "output0"
      data_type: TYPE_FP32
      dims: [ 1 , 3]
    }
  ]
```

As discussed above, Triton assumes that batching occurs along the first dimension which is not listed in in the input or output tensor *dims*.
However, for shape tensors, batching occurs at the first shape value.
For the above example, an inference request must provide inputs with the following shapes.

```
  "input0": [ x, 1, 3]
  "input1": [ 3 ]
  "output0": [ x, 1, 3]
```

Where *x* is the batch size of the request.
Triton requires the shape tensors to be marked as shape tensors in the model when using batching. Note that "input1" has shape *[ 3 ]* and not *[ 2 ]*, which is how it is described in model configuration.
As `myshapetensormodel` model is a batching model, the batch size should be provided as an additional value.
Triton will accumulate all the shape values together for "input1" in batch dimension before issuing the request to model.

For example, assume the client sends following three requests to Triton with following inputs:

```
Request1:
input0: [[[1,2,3]]] <== shape of this tensor [1,1,3]
input1: [1,4,6] <== shape of this tensor [3]

Request2:
input0: [[[4,5,6]], [[7,8,9]]] <== shape of this tensor [2,1,3]
input1: [2,4,6] <== shape of this tensor [3]

Request3:
input0: [[[10,11,12]]] <== shape of this tensor [1,1,3]
input1: [1,4,6] <== shape of this tensor [3]
```

Assuming these requests get batched together would be delivered to the model as:


```
Batched Requests to model:
input0: [[[1,2,3]], [[4,5,6]], [[7,8,9]], [[10,11,12]]] <== shape of this tensor [4,1,3]
input1: [4, 4, 6] <== shape of this tensor [3]

```

Currently, only TensorRT supports shape tensors.
Read [Shape Tensor I/O](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#shape_tensor_io) to learn more about shape tensors.

## Non-Linear I/O Formats

For models that process input or output data in non-linear formats, the _is_non_linear_format_io_ property must be set.
The following example model configuration shows how to specify that INPUT0 and INPUT1 use non-linear I/O data formats.

```
  name: "mytensorrtmodel"
  platform: "tensorrt_plan"
  max_batch_size: 8
  input [
    {
      name: "INPUT0"
      data_type: TYPE_FP16
      dims: [ 3,224,224 ]
      is_non_linear_format_io: true
    },
    {
      name: "INPUT1"
      data_type: TYPE_FP16
      dims: [ 3,224,224 ]
      is_non_linear_format_io: true
    }
  ]
  output [
    {
      name: "OUTPUT0"
      data_type: TYPE_FP16
      dims: [ 1,3 ]
     }
  ]
```

Currently, only TensorRT supports this property.
To learn more about I/O formats, refer to the [I/O Formats documentation](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#reformat-free-network-tensors).

## Version Policy

Each model can have one or more [versions](model_repository.md#model-versions).
The *ModelVersionPolicy* property of the model configuration is used to set one of the following policies.

* *All*: All versions of the model that are available in the model repository are available for inferencing.
  ```version_policy: { all: {}}```

* *Latest*: Only the latest ‘n’ versions of the model in the repository are available for inferencing.
  The latest versions of the model are the numerically greatest version numbers.
  ```version_policy: { latest: { num_versions: 2}}```

* *Specific*: Only the specifically listed versions of the model are available for inferencing.
  ```version_policy: { specific: { versions: [1,3]}}```

If no version policy is specified, then *Latest* (with n=1) is used as the default, indicating that only the most recent version of the model is made available by Triton.
In all cases, the [addition or removal of version subdirectories](model_management.md) from the model repository can change which model version is used on subsequent inference requests.

The following configuration specifies that all versions of the model will be available from the server.

```
  platform: "tensorrt_plan"
  max_batch_size: 8
  input [
    {
      name: "input0"
      data_type: TYPE_FP32
      dims: [ 16 ]
    },
    {
      name: "input1"
      data_type: TYPE_FP32
      dims: [ 16 ]
    }
  ]
  output [
    {
      name: "output0"
      data_type: TYPE_FP32
      dims: [ 16 ]
    }
  ]
  version_policy: { all { }}
```

## Instance Groups

Triton can provide multiple [instances of a
model](architecture.md#concurrent-model-execution) so that multiple
inference requests for that model can be handled simultaneously.
The
model configuration *ModelInstanceGroup* property is used to specify
the number of execution instances that should be made available and
what compute resource should be used for those instances.

### Multiple Model Instances

By default, a single execution instance of the model is created for each GPU available in the system.
The instance-group setting can be used to place multiple execution instances of a model on every GPU or on only certain GPUs.
For example, the following configuration will place two execution instances of the model to be available on each
system GPU.

```
  instance_group [
    {
      count: 2
      kind: KIND_GPU
    }
  ]
```

And the following configuration will place one execution instance on GPU 0 and two execution instances on GPUs 1 and 2.

```
  instance_group [
    {
      count: 1
      kind: KIND_GPU
      gpus: [ 0 ]
    },
    {
      count: 2
      kind: KIND_GPU
      gpus: [ 1, 2 ]
    }
  ]
```

For a more detailed example of using instance groups, see [this guide](https://github.com/triton-inference-server/tutorials/tree/main/Conceptual_Guide/Part_2-improving_resource_utilization#concurrent-model-execution).

### CPU Model Instance

The instance group setting is also used to enable execution of a model on the CPU.
A model can be executed on the CPU even if there is a GPU available in the system.
The following places two execution instances on the CPU.

```
  instance_group [
    {
      count: 2
      kind: KIND_CPU
    }
  ]
```

If no `count` is specified for a KIND_CPU instance group, then the default instance count will be 2 for selected backends (Onnxruntime).
All other backends will default to 1.

### Host Policy

The instance group setting is associated with a host policy.
The following configuration will associate all instances created by the instance group setting with host policy "policy_0".
By default the host policy will be set according to the device kind of the instance, for instance, KIND_CPU is "cpu", KIND_MODEL is "model", and KIND_GPU is "gpu_\<gpu_id\>".

```
  instance_group [
    {
      count: 2
      kind: KIND_CPU
      host_policy: "policy_0"
    }
  ]
```

### Rate Limiter Configuration

Instance group optionally specifies [rate limiter](rate_limiter.md) configuration which controls how the rate limiter operates on the instances in the group.
The rate limiter configuration is ignored if rate limiting is off.
If rate limiting is on and if an instance_group does not provide this configuration, then the execution on the model instances belonging to this group will not be limited in any way by the rate limiter.
The configuration includes the following specifications:

#### Resources

The set of [resources](rate_limiter.md#resources) required to execute a model instance.
The "name" field identifies the resource and "count" field refers to the number of copies of the resource that the model instance in the group requires to run.
The "global" field specifies whether the resource is per-device or shared globally across the system.
Loaded models can not specify a resource with the same name both as global and non-global.
If no resources are provided then triton assumes the execution of model instance does not require any resources and will start executing as soon as model instance is available.

#### Priority

Priority serves as a weighting value to be used for prioritizing across all the instances of all the models.
An instance with priority 2 will be given 1/2 the number of scheduling chances as an instance with priority 1.

The following example specifies the instances in the group requires four "R1" and two "R2" resources for execution.
Resource "R2" is a global resource.
Additionally, the rate-limiter priority of the instance_group is 2.

```
  instance_group [
    {
      count: 1
      kind: KIND_GPU
      gpus: [ 0, 1, 2 ]
      rate_limiter {
        resources [
          {
            name: "R1"
            count: 4
          },
          {
            name: "R2"
            global: True
            count: 2
          }
        ]
        priority: 2
      }
    }
  ]
```

The above configuration creates 3 model instances, one on each device (0, 1 and 2).
The three instances will not contend for "R1" among themselves as "R1" is local for their own device, however, they will contend for "R2" because it is specified as a global resource which means "R2" is shared across the system.
Though these instances don't contend for "R1" among themselves, but they will contend for "R1" with other model instances which includes "R1" in their resource requirements and run on the same device as them.

### Ensemble Model Instance Groups

[Ensemble models](architecture.md#ensemble-models) are an abstraction Triton uses to execute a user-defined pipeline of models.
Since there is no physical instance associated with an ensemble model, the `instance_group` field can not be specified for it.

However, each composing model that makes up an ensemble can specify `instance_group` in its config file and individually support parallel execution as described above when the ensemble receives multiple requests.

## CUDA Compute Capability

Similar to the `default_model_filename` field, you can optionally specify the `cc_model_filenames` field to map the GPU's [CUDA Compute Capability](https://developer.nvidia.com/cuda-gpus) to a corresponding model filename at model load time.
This is particularly useful for TensorRT models, since they are generally tied to a specific compute capability.

```
cc_model_filenames [
  {
    key: "7.5"
    value: "resnet50_T4.plan"
  },
  {
    key: "8.0"
    value: "resnet50_A100.plan"
  }
]
```

## Optimization Policy

The model configuration *ModelOptimizationPolicy* property is used to specify optimization and prioritization settings for a model.
These settings control if/how a model is optimized by the backend and how it is scheduled and executed by Triton.
See the [ModelConfig protobuf](https://github.com/triton-inference-server/common/blob/main/protobuf/model_config.proto) and [optimization](optimization.md#framework-specific-optimization) documentation for the currently available settings.

## Model Warmup

When a model is loaded by Triton the corresponding [backend](https://github.com/triton-inference-server/backend/blob/main/README.md) initializes for that model.
For some backends, some or all of this initialization is deferred until the model receives its first inference request (or first few inference requests).
As a result, the first (few) inference requests can be significantly slower due to deferred initialization.

To avoid these initial, slow inference requests, Triton provides a configuration option that enables a model to be "warmed up" so that it is completely initialized before the first inference request is received.
When the *ModelWarmup* property is defined in a model configuration, Triton will not show the model as being ready for inference until model warmup has completed.

The model configuration *ModelWarmup* is used to specify warmup settings for a model.
The settings define a series of inference requests that Triton will create to warm-up each model instance.
A model instance will be served only if it completes the requests successfully.
Note that the effect of warming up models varies depending on the framework backend, and it will cause Triton to be less responsive to model update, so the users should experiment and choose the configuration that suits their need.
See the [ModelWarmup protobuf](https://github.com/triton-inference-server/common/blob/main/protobuf/model_config.proto) documentation for the currently available settings, and [L0_warmup](https://github.com/triton-inference-server/server/blob/main/qa/L0_warmup/test.sh) for examples on specifying different variants of warmup samples.

## Response Cache

The model configuration `response_cache` section has an `enable` boolean used to enable the Response Cache for this model.

```
response_cache {
  enable: true
}
```

In addition to enabling the cache in the model config, a `--cache-config` must be specified when starting the server to enable caching on the server-side.
See the [Response Cache](response_cache.md) doc for more details on enabling server-side caching.


================================================
FILE: docs/user_guide/model_execution.md
================================================
<!--
# Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Concurrent Model Execution

The Triton architecture allows multiple models and/or multiple
instances of the same model to execute in parallel on the same
system. The system may have zero, one, or many GPUs. The following
figure shows an example with two models; model0 and model1. Assuming
Triton is not currently processing any request, when two requests
arrive simultaneously, one for each model, Triton immediately
schedules both of them onto the GPU and the GPU’s hardware scheduler
begins working on both computations in parallel. Models executing on
the system's CPU are handled similarly by Triton except that the
scheduling of the CPU threads execution each model is handled by the
system's OS.

![Triton Mult-Model Execution Diagram](images/multi_model_exec.png)

By default, if multiple requests for the same model arrive at the same
time, Triton will serialize their execution by scheduling only one at
a time on the GPU, as shown in the following figure.

![Triton Mult-Model Serial Execution
Diagram](images/multi_model_serial_exec.png)

Triton provides a [model configuration option called
instance-group](model_configuration.md#instance-groups) that allows
each model to specify how many parallel executions of that model
should be allowed. Each such enabled parallel execution is referred to
as an *instance*. By default, Triton gives each model a single
instance for each available GPU in the system. By
using the instance_group field in the model configuration, the number
of execution instances for a model can
be changed. The following figure shows model execution when model1
is configured to allow three instances. As shown in the figure, the
first three model1 inference requests are immediately executed in
parallel. The fourth model1 inference request must wait until one of
the first three executions completes before beginning.

![Triton Mult-Model Parallel Execution
Diagram](images/multi_model_parallel_exec.png)

# Models And Schedulers

Triton supports multiple scheduling and batching algorithms that can
be selected independently for each model.  This section describes
*stateless* and *stateful* models and how Triton provides
schedulers to support those model types. For a given model, the
selection and configuration of the scheduler is done with the [model's
configuration file](model_configuration.md).

## Stateless Models

With respect to Triton's schedulers, a *stateless* model does not
maintain state between inference requests. Each inference performed on
a stateless model is independent of all other inferences using that
model.

Examples of stateless models are CNNs such as image classification and
object detection. The [default
scheduler](scheduler.md#default-scheduler) or [dynamic
batcher](batcher.md#dynamic-batcher) can be used as the
scheduler for these stateless models.

RNNs and similar models which do have internal memory can be stateless
as long as the state they maintain does not span inference
requests. For example, an RNN that iterates over all elements in a
batch is considered stateless by Triton if the internal state is not
carried between batches of inference requests. The [default
scheduler](scheduler.md#default-scheduler) can be used for
these stateless models. The [dynamic
batcher](batcher.md#dynamic-batcher) cannot be used since
the model is typically not expecting the batch to represent multiple
inference requests.

## Stateful Models

With respect to Triton's schedulers, a *stateful* model does maintain
state between inference requests. The model is expecting multiple
inference requests that together form a sequence of inferences that
must be routed to the same model instance so that the state being
maintained by the model is correctly updated. Moreover, the model may
require that Triton provide *control* signals indicating, for example,
the start and end of the sequence.

The [sequence batcher](batcher.md#sequence-batcher) must
be used for these stateful models. As explained below, the sequence
batcher ensures that all inference requests in a sequence get routed
to the same model instance so that the model can maintain state
correctly. The sequence batcher also communicates with the model to
indicate when a sequence is starting, when a sequence is ending, when
a sequence has an inference request ready for execution, and the
*correlation ID* of the sequence.

When making inference requests for a stateful model, the client
application must provide the same correlation ID to all requests in a
sequence, and must also mark the start and end of the sequence. The
correlation ID allows Triton to identify that the requests belong to
the same sequence.

### Control Inputs

For a stateful model to operate correctly with the sequence batcher,
the model must typically accept one or more *control* input tensors
that Triton uses to communicate with the model. The
*ModelSequenceBatching::Control* section of the [model
configuration](model_configuration.md) indicates how the model exposes
the tensors that the sequence batcher should use for these
controls. All controls are optional. Below is portion of a model
configuration that shows an example configuration for all the
available control signals.

```
sequence_batching {
  control_input [
    {
      name: "START"
      control [
        {
          kind: CONTROL_SEQUENCE_START
          fp32_false_true: [ 0, 1 ]
        }
      ]
    },
    {
      name: "END"
      control [
        {
          kind: CONTROL_SEQUENCE_END
          fp32_false_true: [ 0, 1 ]
        }
      ]
    },
    {
      name: "READY"
      control [
        {
          kind: CONTROL_SEQUENCE_READY
          fp32_false_true: [ 0, 1 ]
        }
      ]
    },
    {
      name: "CORRID"
      control [
        {
          kind: CONTROL_SEQUENCE_CORRID
          data_type: TYPE_UINT64
        }
      ]
    }
  ]
}
```

* **Start**: The start input tensor is specified using
  CONTROL_SEQUENCE_START in the configuration. The example
  configuration indicates that the model has an input tensor called
  START with a 32-bit floating point data-type. The sequence batcher
  will define this tensor when executing an inference on the
  model. The START tensor must be 1-dimensional with size equal to the
  batch-size. Each element in the tensor indicates if the sequence in
  the corresponding batch slot is starting or not. In the example
  configuration, fp32_false_true indicates that a sequence start is
  indicated by tensor element equal to 1, and non-start is indicated
  by tensor element equal to 0.

* **End**: The end input tensor is specified using
  CONTROL_SEQUENCE_END in the configuration. The example configuration
  indicates that the model has an input tensor called END with a
  32-bit floating point data-type. The sequence batcher will define
  this tensor when executing an inference on the model. The END tensor
  must be 1-dimensional with size equal to the batch-size. Each
  element in the tensor indicates if the sequence in the corresponding
  batch slot is ending or not. In the example configuration,
  fp32_false_true indicates that a sequence end is indicated by tensor
  element equal to 1, and non-end is indicated by tensor element equal
  to 0.

* **Ready**: The ready input tensor is specified using
  CONTROL_SEQUENCE_READY in the configuration. The example
  configuration indicates that the model has an input tensor called
  READY with a 32-bit floating point data-type. The sequence batcher
  will define this tensor when executing an inference on the
  model. The READY tensor must be 1-dimensional with size equal to the
  batch-size. Each element in the tensor indicates if the sequence in
  the corresponding batch slot has an inference request ready for
  inference. In the example configuration, fp32_false_true indicates
  that a sequence ready is indicated by tensor element equal to 1, and
  non-ready is indicated by tensor element equal to 0.

* **Correlation ID**: The correlation ID input tensor is specified
  using CONTROL_SEQUENCE_CORRID in the configuration. The example
  configuration indicates that the model has an input tensor called
  CORRID with a unsigned 64-bit integer data-type. The sequence
  batcher will define this tensor when executing an inference on the
  model. The CORRID tensor must be 1-dimensional with size equal to
  the batch-size. Each element in the tensor indicates the correlation
  ID of the sequence in the corresponding batch slot.

### State Management for Stateful Models
[Implicit State Management](implicit_state_management.md#implicit-state-management)

================================================
FILE: docs/user_guide/model_management.md
================================================
<!--
# Copyright 2018-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Model Management

Triton provides model management APIs are part of the [HTTP/REST and GRPC protocols, and as part of the C API](../customization_guide/inference_protocols.md).
Triton operates in one of three model control modes: `NONE`, `EXPLICIT`, or `POLL`.
The model control mode determines how changes to the model repository are handled by Triton and which of these protocols and APIs are available.

## Model Control Mode `NONE`

- Triton attempts to load all models in the model repository at startup.
  Models that Triton is not able to load will be marked as "UNAVAILABLE" and will not be available for inferencing.

- Changes to the model repository while the server is running will be ignored.
  Model load and unload requests using the [model control protocol](../protocol/extension_model_repository.md) will have no affect and will return an error response.

- This model control mode is selected by specifying `--model-control-mode=none` when starting Triton.

- This is the default model control mode.

> [!IMPORTANT]
> Changing the model repository while Triton is running must be done carefully, as explained in [Modifying the Model Repository](#modifying-the-model-repository).

## Model Control Mode `EXPLICIT`

- At startup, Triton loads only those models specified explicitly with the `--load-model` command-line option.

- To load ALL models at startup, specify `--load-model=*` as the ONLY `--load-model` argument.
  Specifying `--load-model=*` in conjunction with another `--load-model` argument will result in error.

- If `--load-model` is not specified then no models are loaded at startup.
  Models that Triton is not able to load will be marked as "UNAVAILABLE" and will not be available for inferencing.

- After startup, all model load and unload actions must be initiated explicitly by using the [model control protocol](../protocol/extension_model_repository.md).
  The response status of the model control request indicates success or failure of the load or unload action.
  When attempting to reload an already loaded model, the existing model should be explicitly unloaded prior to the updated version being loaded.

- This model control mode is enabled by specifying `--model-control-mode=explicit`.

### Using Alternate Memory Allocation Libraries

If you are seeing some memory growth when using the [model control protocol](../protocol/extension_model_repository.md) for loading and unloading models, it is possible that it's not an actual memory leak but some system's `malloc` heuristics that causes memory to be unable to be released back to the OS right away.

To improve memory performance, you can consider switching from `malloc` to [`tcmalloc`](https://github.com/google/tcmalloc) or to [`jemalloc`](https://github.com/jemalloc/jemalloc) by setting the `LD_PRELOAD` environment variable when running Triton, as shown below:

- Using `tcmalloc`:

  ```bash
  LD_PRELOAD=/usr/lib/$(uname -m)-linux-gnu/libtcmalloc.so.4:${LD_PRELOAD} tritonserver --model-repository=/models ...
  ```

- Using `jemalloc`:

  ```bash
  PRELOAD=/usr/lib/$(uname -m)-linux-gnu/libtcmalloc.so:${LD_PRELOAD} tritonserver --model-repository=/models ...
  ```

We recommend experimenting with both `tcmalloc` and `jemalloc` to determine which one works better for your use case,
as they have different strategies for memory allocation and deallocation and may perform differently depending on the workload.

Both `tcmalloc` and `jemalloc` libraries are already installed within the Triton container.
However, if you need to install them, you can do so using the following commands:

- Install `tcmalloc`:

  ```bash
  apt-get install gperf libgoogle-perftools-dev
  ```

- Install `jemalloc`:

  ```bash
  apt-get install libjemalloc-dev
  ```

## Model Control Mode `POLL`

- Triton attempts to load all models in the model repository at startup.
  Models that Triton is not able to load will be marked as "UNAVAILABLE" and will not be available for inferencing.

- Changes to the model repository will be detected and Triton will attempt to load and unload models as necessary based on those changes.
  - When reloading a model fails, the already loaded model will be unchanged and will remain loaded.
  - When reloading a model succeeds, the existing loaded model will be replaced by a newly loaded instance without loss of availability.

Changes to the model repository may not be detected immediately because Triton polls the repository periodically.
You can control the polling interval with the `--repository-poll-secs` option.
The console log or the [model ready protocol](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md)
or the index operation of the [model control protocol](../protocol/extension_model_repository.md) can be used to determine when model repository changes have taken effect.

> [!WARNING]
> There is no synchronization between when Triton polls the model repository and when you make any changes to the repository.
> As a result Triton could observe partial and incomplete changes that lead to unexpected behavior.
> For this reason `POLL` mode is not recommended for use in production environments.

Model load and unload requests using the [model control protocol](../protocol/extension_model_repository.md) will have no affect and will return an error response.

This model control mode is enabled by specifying `--model-control-mode=poll` and by setting `--repository-poll-secs` to a non-zero value when starting Triton.
Changing the model repository while Triton is running must be done carefully, as explained in
[Modifying the Model Repository](#modifying-the-model-repository).

In `POLL` mode Triton responds to the following model repository changes:

- Versions may be added and removed from models by adding and removing the corresponding version subdirectory.
  Triton will allow in-flight requests to complete even if they are using a removed version of the model.
  New requests for a removed model version will fail.
  Depending on the model's [version policy](model_configuration.md#version-policy), changes to the available versions may change which model version is served by default.

- Existing models can be removed from the repository by removing the corresponding model directory.
  Triton will allow in-flight requests to any version of the removed model to complete.
  New requests for a removed model will fail.

- New models can be added to the repository by adding a new model directory.

- The [model configuration file](model_configuration.md) (config.pbtxt) can be changed and Triton will unload and reload the model to pick up the new model configuration.

- Label(s) files providing labels for outputs that represent classifications can be added, removed, or modified and Triton will unload and reload the model to pick up the new labels.
  If a label file is added or removed, the corresponding edit to the `label_filename` property of the output it corresponds to in the [model configuration](model_configuration.md) must be performed at the same time.

## Modifying the Model Repository

Each model in a model repository [resides in its own sub-directory](model_repository.md#repository-layout).
The activity allowed on the contents of a model's sub-directory varies depending on how Triton is using that model.
The state of a model can be determined by using the [model metadata](../customization_guide/inference_protocols.md#inference-protocols-and-apis) or [repository index](../protocol/extension_model_repository.md#index) APIs.

- When a model is actively loading or unloading, no files or directories within that sub-directory must be added, removed, or modified.

- When a model has never been loaded or has been completely unloaded, then its entire model sub-directory can be removed or its contents modified.

- When the model has been completely loaded, then any files or directories within its sub-directory can be added, removed, or modified; except for shared libraries implementing the model's backend.

  - Triton uses the backend shared libraries while the model is loading, removing or modifying them is recommended against as it can destabilize the Triton process.

  - To update a model's backend, the model cannot be loaded by Triton:

    1. Unload any loaded models relying on the backend-to-be-updated.
    2. Modify the backend's shared libraries.
    3. Load the previously unloaded models.

  > [!TIP]
  > With some operating systems, it may also be possible to simply move the existing shared-libraries to another location outside of the model repository, copy in the new shared libraries, and then reload the model.

- When only the model instance configuration in the [model configuration file](model_configuration.md) (config.pbtxt) is modified (i.e. increasing/decreasing the instance count), will Triton update the model rather then reloading it.

- When either a load request is received under [Model Control Mode EXPLICIT](#model-control-mode-explicit) or change to the [model configuration file](model_configuration.md) (config.pbtxt) is detected under [Model Control Mode POLL](#model-control-mode-poll).

  - The new model configuration may also be passed to Triton via the [load API](../protocol/extension_model_repository.md#load).

  - Some text editors create a swap file in the model directory when the [model configuration file](model_configuration.md) (config.pbtxt) is modified in place.
    The swap file is not part of the model configuration, so its presence in the model directory may be detected as a new file and cause the model to fully reload when only an update is expected.

- When a sequence model is *updated* (i.e. decreasing the instance count), Triton will wait until in-flight sequences are completed, or otherwise cleared, before the instance behind the sequence is removed.

  - When the instance count is decreased, arbitrary instance(s) are selected among idle instances and instances with in-flight sequence(s) for removal.

- When a sequence model is *reloaded* with in-flight sequence(s) (i.e. changes to the model file), Triton does not guarantee any remaining request(s) from the in-flight sequence(s) will be routed to the same model instance for processing.

  > [!IMPORTANT]
  > It is currently the responsibility of the user to ensure any in-flight sequence(s) are completed before reloading a sequence model.

## Concurrently Loading Models

To reduce service downtime, Triton loads new models in the background while continuing to serve inferences on existing models.
Based on use case and performance requirements, the optimal amount of resources dedicated to loading models may differ.

Triton exposes a `--model-load-thread-count` option to configure the number of threads dedicated to loading models, which defaults to `4`.

To set this parameter with the C API, refer to `TRITONSERVER_ServerOptionsSetModelLoadThreadCount` in [tritonserver.h](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonserver.h).


================================================
FILE: docs/user_guide/model_repository.md
================================================
<!--
# Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Model Repository

**Is this your first time setting up a model repository?** Check out [these tutorials](https://github.com/triton-inference-server/tutorials/tree/main/Conceptual_Guide/Part_1-model_deployment#setting-up-the-model-repository) to begin your Triton journey!

The Triton Inference Server serves models from one or more model repositories that are specified when the server is started.
While Triton is running, the models being served can be modified as described in [Model Management](model_management.md).

## Repository Layout

These repository paths are specified when Triton is started using the `--model-repository` option.
The `--model-repository` option can be specified multiple times to included models from multiple repositories.
The directories and files that compose a model repository must follow a required layout.
Assuming a repository path is specified as follows.

```bash
$ tritonserver --model-repository=<model-repository-path>
```

The corresponding repository layout must be:

```
  <model-repository-path>/
    <model-name>/
      [config.pbtxt]
      [<output-labels-file> ...]
      [configs]/
        [<custom-config-file> ...]
      <version>/
        <model-definition-file>
      <version>/
        <model-definition-file>
      ...
    <model-name>/
      [config.pbtxt]
      [<output-labels-file> ...]
      [configs]/
        [<custom-config-file> ...]
      <version>/
        <model-definition-file>
      <version>/
        <model-definition-file>
      ...
    ...
```

Within the top-level model repository directory there must be zero or more <model-name> sub-directories.
Each of the <model-name> sub-directories contains the repository information for the corresponding model.
The config.pbtxt file describes the [model configuration](model_configuration.md) for the model.
For some models, config.pbtxt is required while for others it is optional. See
[Auto-Generated Model Configuration](model_configuration.md#auto-generated-model-configuration) for more information.

Each <model-name> directory may include an optional sub-directory configs.
Within the configs directory there must be zero or more <custom-config-file> with .pbtxt file extension. For more information about how the custom model configuration is handled by Triton see [Custom Model Configuration](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/model_configuration.md#custom-model-configuration).

Each <model-name> directory must have at least one numeric sub-directory representing a version of the model.
For more information about how the model versions are handled by Triton see [Model Versions](#model-versions). Each model is executed by a specific
[backend](https://github.com/triton-inference-server/backend/blob/main/README.md).
Within each version sub-directory there must be the files required by that backend. For example, models that use framework backends such as TensorRT, PyTorch, ONNX and OpenVINO must provide the [framework-specific model files](#model-files).

## Model Repository Locations

Triton can access models from one or more locally accessible file paths, from Google Cloud Storage, from Amazon S3, and from Azure Storage.

### Local File System

For a locally accessible file-system the absolute path must be specified.

```bash
$ tritonserver --model-repository=/path/to/model/repository ...
```

### Cloud Storage with Environment variables

#### Google Cloud Storage

For a model repository residing in Google Cloud Storage, the repository path must be prefixed with gs://.

```bash
$ tritonserver --model-repository=gs://bucket/path/to/model/repository ...
```

When using Google Cloud Storage, credentials are fetched and attempted in the following order:

1. [GOOGLE_APPLICATION_CREDENTIALS environment variable](https://cloud.google.com/docs/authentication/application-default-credentials#GAC)
   - The environment variable should be set and contains the location of a credential JSON file.
   - Authorized user credential will be attempted first, and then service account credential.

2. [The attached service account](https://cloud.google.com/docs/authentication/application-default-credentials#attached-sa)
   - A value for the [Authorization HTTP header](https://googleapis.dev/cpp/google-cloud-storage/1.42.0/classgoogle_1_1cloud_1_1storage_1_1oauth2_1_1ComputeEngineCredentials.html#a8c3a5d405366523e2f4df06554f0a676) should be obtainable.

3. Anonymous credential (also known as public bucket)
   - The bucket (and objects) should have granted `get` and `list` permission to all users.
   - One way to grant such permission is by adding both [storage.objectViewer](https://cloud.google.com/storage/docs/access-control/iam-roles#standard-roles) and [storage.legacyBucketReader](https://cloud.google.com/storage/docs/access-control/iam-roles#legacy-roles) predefined roles for "allUsers" to the bucket, for example:

     ```
     $ gsutil iam ch allUsers:objectViewer "${BUCKET_URL}"
     $ gsutil iam ch allUsers:legacyBucketReader "${BUCKET_URL}"
     ```

By default, Triton makes a local copy of a remote model repository in a temporary folder, which is deleted after Triton server is shut down.
If you would like to control where remote model repository is copied to, you may set the `TRITON_GCS_MOUNT_DIRECTORY` environment variable to a path pointing to the existing folder on your local machine.

```bash
export TRITON_GCS_MOUNT_DIRECTORY=/path/to/your/local/directory
```

**Make sure, that `TRITON_GCS_MOUNT_DIRECTORY` exists on your local machine and it is empty.**

#### S3

For a model repository residing in Amazon S3, the path must be prefixed with s3://.

```bash
$ tritonserver --model-repository=s3://bucket/path/to/model/repository ...
```

For a local or private instance of S3, the prefix s3:// must be followed by the host and port (separated by a semicolon) and subsequently the bucket path.

```bash
$ tritonserver --model-repository=s3://host:port/bucket/path/to/model/repository ...
```

By default, Triton uses HTTP to communicate with your instance of S3.
If your instance of S3 supports HTTPS and you wish for Triton to use the HTTPS protocol to communicate with it, you can specify the same in the model repository path by prefixing the host name with https://.

```bash
$ tritonserver --model-repository=s3://https://host:port/bucket/path/to/model/repository ...
```

When using S3, the credentials and default region can be passed by using either the [aws config](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html) command or via the respective [environment variables](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-envvars.html).
If the environment variables are set they will take a higher priority and will be used by Triton instead of the credentials set using the aws config command.

By default, Triton makes a local copy of a remote model repository in a temporary folder, which is deleted after Triton server is shut down.
If you would like to control where remote model repository is copied to, you may set the `TRITON_AWS_MOUNT_DIRECTORY` environment variable to a path pointing to the existing folder on your local machine.

```bash
export TRITON_AWS_MOUNT_DIRECTORY=/path/to/your/local/directory
```

**Make sure, that `TRITON_AWS_MOUNT_DIRECTORY` exists on your local machine and it is empty.**

#### Azure Storage

For a model repository residing in Azure Storage, the repository path must be prefixed with as://.

```bash
$ tritonserver --model-repository=as://account_name/container_name/path/to/model/repository ...
```

When using Azure Storage, you must set the `AZURE_STORAGE_ACCOUNT` and `AZURE_STORAGE_KEY` environment variables to an account that has access to the Azure Storage repository.

If you don't know your `AZURE_STORAGE_KEY` and have your Azure CLI correctly configured, here's an example of how to find a key corresponding to your `AZURE_STORAGE_ACCOUNT`:

```bash
$ export AZURE_STORAGE_ACCOUNT="account_name"
$ export AZURE_STORAGE_KEY=$(az storage account keys list -n $AZURE_STORAGE_ACCOUNT --query "[0].value")
```

By default, Triton makes a local copy of a remote model repository in a temporary folder, which is deleted after Triton server is shut down.
If you would like to control where remote model repository is copied to, you may set the `TRITON_AZURE_MOUNT_DIRECTORY` environment variable to a path pointing to the existing folder on your local machine.

```bash
export TRITON_AZURE_MOUNT_DIRECTORY=/path/to/your/local/directory
```

**Make sure, that `TRITON_AZURE_MOUNT_DIRECTORY` exists on your local machine and it is empty.**


### Cloud Storage with Credential file (Beta)

*This feature is currently in beta and may be subject to change.*

To group the credentials into a single file for Triton, you may set the `TRITON_CLOUD_CREDENTIAL_PATH` environment variable to a path pointing to a JSON file of the following format, residing in the local file system.

```
export TRITON_CLOUD_CREDENTIAL_PATH="cloud_credential.json"
```

"cloud_credential.json":
```
{
  "gs": {
    "": "PATH_TO_GOOGLE_APPLICATION_CREDENTIALS",
    "gs://gcs-bucket-002": "PATH_TO_GOOGLE_APPLICATION_CREDENTIALS_2"
  },
  "s3": {
    "": {
      "secret_key": "AWS_SECRET_ACCESS_KEY",
      "key_id": "AWS_ACCESS_KEY_ID",
      "region": "AWS_DEFAULT_REGION",
      "session_token": "",
      "profile": ""
    },
    "s3://s3-bucket-002": {
      "secret_key": "AWS_SECRET_ACCESS_KEY_2",
      "key_id": "AWS_ACCESS_KEY_ID_2",
      "region": "AWS_DEFAULT_REGION_2",
      "session_token": "AWS_SESSION_TOKEN_2",
      "profile": "AWS_PROFILE_2"
    }
  },
  "as": {
    "": {
      "account_str": "AZURE_STORAGE_ACCOUNT",
      "account_key": "AZURE_STORAGE_KEY"
    },
    "as://Account-002/Container": {
      "account_str": "",
      "account_key": ""
    }
  }
}
```

To match a credential, the longest matching credential name against the start of a given path is used. For example: `gs://gcs-bucket-002/model_repository` will match the "gs://gcs-bucket-002" GCS credential, and `gs://any-other-gcs-bucket` will match the "" GCS credential.

This feature is intended for use-cases which multiple credentials are needed for each cloud storage provider. Be sure to replace any credential paths/keys with the actual paths/keys from the example above.

If the `TRITON_CLOUD_CREDENTIAL_PATH` environment variable is not set, the [Cloud Storage with Environment variables](#cloud-storage-with-environment-variables) will be used.

### Caching of Cloud Storage

Triton currently doesn't perform file caching for cloud storage.
However, this functionality can be implemented through [repository agent API](https://github.com/triton-inference-server/server/blob/bbbcad7d87adc9596f99e3685da5d6b73380514f/docs/customization_guide/repository_agents.md) by injecting a proxy, which checks a specific local directory for caching given the cloud storage (original path) of the model, and then decides if cached files may be used.

## Model Versions

Each model can have one or more versions available in the model repository.
Each version is stored in its own, numerically named, subdirectory where the name of the subdirectory corresponds to the version number of the model.
The subdirectories that are not numerically named, or have names that start with the character "0" will be ignored.
Each model configuration specifies a [version policy](model_configuration.md#version-policy) that controls which of the versions in the model repository are made available by Triton at any given time.

## Model Files

The contents of each model version sub-directory is determined by the type of the model and the requirements of the [backend](https://github.com/triton-inference-server/backend/blob/main/README.md) that supports the model.

### TensorRT Models

A TensorRT model definition is called a *Plan*. A TensorRT Plan is a single file that by default must be named model.plan.
This default name can be overridden using the *default_model_filename* property in the [model configuration](model_configuration.md).

A TensorRT Plan is specific to a GPU's [CUDA Compute Capability](https://developer.nvidia.com/cuda-gpus).
As a result, TensorRT models will need to set the *cc_model_filenames* property in the [model configuration](model_configuration.md) to associate each Plan file with the corresponding Compute Capability.

A minimal model repository for a TensorRT model is:

```
  <model-repository-path>/
    <model-name>/
      config.pbtxt
      1/
        model.plan
```

### ONNX Models

An ONNX model is a single file or a directory containing multiple files. By default the file or directory must be named model.onnx.
This default name can be overridden using the *default_model_filename* property in the [model configuration](model_configuration.md).

Triton supports all ONNX models that are supported by the version of [ONNX Runtime](https://github.com/Microsoft/onnxruntime) being used by Triton.
Models will not be supported if they use a [stale ONNX opset version](https://github.com/Microsoft/onnxruntime/blob/master/docs/Versioning.md#version-matrix) or [contain operators with unsupported types](https://github.com/microsoft/onnxruntime/issues/1122).

A minimal model repository for a ONNX model contained in a single file is:

```
  <model-repository-path>/
    <model-name>/
      config.pbtxt
      1/
        model.onnx
```

An ONNX model composed from multiple files must be contained in a directory.
By default this directory must be named model.onnx but can be overridden using the *default_model_filename* property in the [model configuration](model_configuration.md).
The main model file within this directory must be named model.onnx.
A minimal model repository for a ONNX model contained in a directory is:

```
  <model-repository-path>/
    <model-name>/
      config.pbtxt
      1/
        model.onnx/
           model.onnx
           <other model files>
```

### TorchScript Models

An TorchScript model is a single file that by default must be named model.pt.
This default name can be overridden using the *default_model_filename* property in the [model configuration](model_configuration.md).
It is possible that some models traced with different versions of PyTorch may not be supported by Triton due to changes in the underlying opset.

A minimal model repository for a TorchScript model is:

```
  <model-repository-path>/
    <model-name>/
      config.pbtxt
      1/
        model.pt
```

### OpenVINO Models

An OpenVINO model is represented by two files, a *.xml and *.bin file. By default the *.xml file must be named model.xml.
This default name can be overridden using the *default_model_filename* property in the [model configuration](model_configuration.md).

A minimal model repository for an OpenVINO model is:

```
  <model-repository-path>/
    <model-name>/
      config.pbtxt
      1/
        model.xml
        model.bin
```

### Python Models

The [Python backend](https://github.com/triton-inference-server/python_backend) allows you to run Python code as a model within Triton.
By default the Python script must be named model.py but this default name can be overridden using the *default_model_filename* property in the [model configuration](model_configuration.md).

A minimal model repository for a Python model is:

```
  <model-repository-path>/
    <model-name>/
      config.pbtxt
      1/
        model.py
```

### DALI Models

The [DALI backend](https://github.com/triton-inference-server/dali_backend) allows you to run a [DALI pipeline](https://github.com/NVIDIA/DALI) as a model within Triton.
In order to use this backend, you need to generate a file, by default named `model.dali`, and include it in your model repository.
Please refer to [DALI backend documentation](https://github.com/triton-inference-server/dali_backend#how-to-use) for the description, how to generate `model.dali`.
The default model file name can be overridden using the *default_model_filename* property in the [model configuration](model_configuration.md).

A minimal model repository for a DALI model is:

```
  <model-repository-path>/
    <model-name>/
      config.pbtxt
      1/
        model.dali
```


================================================
FILE: docs/user_guide/optimization.md
================================================
<!--
# Copyright (c) 2019-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Optimization

The Triton Inference Server has many features that you can use to
decrease latency and increase throughput for your model. This section
discusses these features and demonstrates how you can use them to
improve the performance of your model. As a prerequisite you should
follow the [QuickStart](../getting_started/quickstart.md) to get Triton and client
examples running with the example model repository.

This section focuses on understanding latency and throughput tradeoffs
for a single model. The [Model Analyzer](model_analyzer.md) section
describes a tool that helps you understand the GPU memory utilization
of your models so you can decide how to best run multiple models on a
single GPU.

Unless you already have a client application suitable for measuring
the performance of your model on Triton, you should familiarize
yourself with
[Performance Analyzer](https://github.com/triton-inference-server/perf_analyzer/blob/main/README.md).
The Performance Analyzer is an essential tool for optimizing your model's
performance.

As a running example demonstrating the optimization features and
options, we will use a ONNX Inception model that you can obtain
by following the [QuickStart](../getting_started/quickstart.md). As a baseline we use
perf_analyzer to determine the performance of the model using a [basic
model configuration that does not enable any performance
features](../examples/model_repository/inception_onnx/config.pbtxt).

```
$ perf_analyzer -m inception_onnx --percentile=95 --concurrency-range 1:4
...
Inferences/Second vs. Client p95 Batch Latency
Concurrency: 1, throughput: 62.6 infer/sec, latency 21371 usec
Concurrency: 2, throughput: 73.2 infer/sec, latency 34381 usec
Concurrency: 3, throughput: 73.2 infer/sec, latency 50298 usec
Concurrency: 4, throughput: 73.4 infer/sec, latency 65569 usec
```

The results show that our non-optimized model configuration gives a
throughput of about 73 inferences per second. Note how there is a
significant throughput increase going from one concurrent request to
two concurrent requests and then throughput levels off. With one
concurrent request Triton is idle during the time when the response is
returned to the client and the next request is received at the
server. Throughput increases with a concurrency of two because Triton
overlaps the processing of one request with the communication of the
other. Because we are running perf_analyzer on the same system as
Triton, two requests are enough to completely hide the communication
latency.

## Optimization Settings

For most models, the Triton feature that provides the largest
performance improvement is [dynamic
batching](batcher.md#dynamic-batcher).
[This example](https://github.com/triton-inference-server/tutorials/tree/main/Conceptual_Guide/Part_2-improving_resource_utilization#dynamic-batching--concurrent-model-execution)
 sheds more light on conceptual details. If your model does not
support batching then you can skip ahead to [Model
Instances](#model-instances).


### Dynamic Batcher

The dynamic batcher combines individual inference requests into a
larger batch that will often execute much more efficiently than
executing the individual requests independently. To enable the dynamic
batcher stop Triton, add the following line to the end of the [model
configuration file for
inception_onnx](../examples/model_repository/inception_onnx/config.pbtxt),
and then restart Triton.

```
dynamic_batching { }
```

The dynamic batcher allows Triton to handle a higher number of
concurrent requests because those requests are combined for
inference. To see this run perf_analyzer with request concurrency from
1 to 8.

```
$ perf_analyzer -m inception_onnx --percentile=95 --concurrency-range 1:8
...
Inferences/Second vs. Client p95 Batch Latency
Concurrency: 1, throughput: 66.8 infer/sec, latency 19785 usec
Concurrency: 2, throughput: 80.8 infer/sec, latency 30732 usec
Concurrency: 3, throughput: 118 infer/sec, latency 32968 usec
Concurrency: 4, throughput: 165.2 infer/sec, latency 32974 usec
Concurrency: 5, throughput: 194.4 infer/sec, latency 33035 usec
Concurrency: 6, throughput: 217.6 infer/sec, latency 34258 usec
Concurrency: 7, throughput: 249.8 infer/sec, latency 34522 usec
Concurrency: 8, throughput: 272 infer/sec, latency 35988 usec
```

With eight concurrent requests the dynamic batcher allows Triton to
provide 272 inferences per second without increasing latency
compared to not using the dynamic batcher.

Instead of having perf_analyzer collect data for a range of request
concurrency values we can instead use a couple of simple rules that
typically applies when perf_analyzer is running on the same system as
Triton. The first rule is that for minimum latency set the request
concurrency to 1 and disable the dynamic batcher and use only 1 [model
instance](#model-instances). The second rule is that for maximum
throughput set the request concurrency to be
`2 * <maximum batch size> * <model instance count>`. We will discuss model
instances [below](#model-instances), for now we are working with one model
instance. So for maximum-batch-size 4 we want to run perf_analyzer
with request concurrency of `2 * 4 * 1 = 8`.

```
$ perf_analyzer -m inception_onnx --percentile=95 --concurrency-range 8
...
Inferences/Second vs. Client p95 Batch Latency
Concurrency: 8, throughput: 267.8 infer/sec, latency 35590 usec
```

### Model Instances

Triton allows you to specify how many copies of each model you want to
make available for inferencing. By default you get one copy of each
model, but you can specify any number of instances in the model
configuration by using [instance
groups](model_configuration.md#instance-groups). Typically, having two
instances of a model will improve performance because it allows
overlap of memory transfer operations (for example, CPU to/from GPU)
with inference compute. Multiple instances also improve GPU
utilization by allowing more inference work to be executed
simultaneously on the GPU. Smaller models may benefit from more than
two instances; you can use perf_analyzer to experiment.

To specify two instances of the inception_onnx model: stop Triton,
remove any dynamic batching settings you may have previously added to
the model configuration (we discuss combining dynamic batcher and
multiple model instances below), add the following lines to the end of
the [model configuration
file](../examples/model_repository/inception_onnx/config.pbtxt), and
then restart Triton.

```
instance_group [ { count: 2 }]
```

Now run perf_analyzer using the same options as for the baseline.

```
$ perf_analyzer -m inception_onnx --percentile=95 --concurrency-range 1:4
...
Inferences/Second vs. Client p95 Batch Latency
Concurrency: 1, throughput: 70.6 infer/sec, latency 19547 usec
Concurrency: 2, throughput: 106.6 infer/sec, latency 23532 usec
Concurrency: 3, throughput: 110.2 infer/sec, latency 36649 usec
Concurrency: 4, throughput: 108.6 infer/sec, latency 43588 usec
```

In this case having two instances of the model increases throughput
from about 73 inference per second to about 110 inferences per second
compared with one instance.

It is possible to enable both the dynamic batcher and multiple model
instances, for example, change the model configuration file to include
the following.

```
dynamic_batching { }
instance_group [ { count: 2 }]
```

When we run perf_analyzer with the same options used for just the
dynamic batcher above.

```
$ perf_analyzer -m inception_onnx --percentile=95 --concurrency-range 16
...
Inferences/Second vs. Client p95 Batch Latency
Concurrency: 16, throughput: 289.6 infer/sec, latency 59817 usec
```

We see that two instances does not improve throughput much while
increasing latency, compared with just using the dynamic batcher and
one instance. This occurs because for this model the dynamic batcher
alone is capable of fully utilizing the GPU and so adding additional
model instances does not provide any performance advantage. In general
the benefit of the dynamic batcher and multiple instances is model
specific, so you should experiment with perf_analyzer to determine the
settings that best satisfy your throughput and latency requirements.

## Framework-Specific Optimization

Triton has several optimization settings that apply to only a subset
of the supported model frameworks. These optimization settings are
controlled by the model configuration [optimization
policy](model_configuration.md#optimization-policy). Visit
[this guide](https://github.com/triton-inference-server/tutorials/tree/main/Conceptual_Guide/Part_4-inference_acceleration)
 for an end to end discussion.

### ONNX with TensorRT Optimization (ORT-TRT)

One especially powerful optimization is to use TensorRT in
conjunction with an ONNX model. As an example of TensorRT optimization
applied to an ONNX model, we will use an ONNX DenseNet model that you
can obtain by following [QuickStart](../getting_started/quickstart.md). As a baseline we
use perf_analyzer to determine the performance of the model using a
[basic model configuration that does not enable any performance
features](../examples/model_repository/densenet_onnx/config.pbtxt).

```
$ perf_analyzer -m densenet_onnx --percentile=95 --concurrency-range 1:4
...
Inferences/Second vs. Client p95 Batch Latency
Concurrency: 1, 113.2 infer/sec, latency 8939 usec
Concurrency: 2, 138.2 infer/sec, latency 14548 usec
Concurrency: 3, 137.2 infer/sec, latency 21947 usec
Concurrency: 4, 136.8 infer/sec, latency 29661 usec
```

To enable TensorRT optimization for the model: stop Triton, add the
following lines to the end of the model configuration file, and then
restart Triton.

```
optimization { execution_accelerators {
  gpu_execution_accelerator : [ {
    name : "tensorrt"
    parameters { key: "precision_mode" value: "FP16" }
    parameters { key: "max_workspace_size_bytes" value: "1073741824" }
    }]
}}
```

As Triton starts you should check the console output and wait until
Triton prints the "Staring endpoints" message. ONNX model loading can
be significantly slower when TensorRT optimization is enabled. In
production you can use [model warmup](model_configuration.md#model-warmup)
to avoid this model startup/optimization slowdown. Now
run perf_analyzer using the same options as for the baseline.

```
$ perf_analyzer -m densenet_onnx --percentile=95 --concurrency-range 1:4
...
Inferences/Second vs. Client p95 Batch Latency
Concurrency: 1, 190.6 infer/sec, latency 5384 usec
Concurrency: 2, 273.8 infer/sec, latency 7347 usec
Concurrency: 3, 272.2 infer/sec, latency 11046 usec
Concurrency: 4, 266.8 infer/sec, latency 15089 usec
```

The TensorRT optimization provided 2x throughput improvement while
cutting latency in half. The benefit provided by TensorRT will vary
based on the model, but in general it can provide significant
performance improvement.

### ONNX with OpenVINO Optimization

ONNX models running on the CPU can also be accelerated by using
[OpenVINO](https://docs.openvinotoolkit.org/latest/index.html). To
enable OpenVINO optimization for an ONNX model, add the following
lines to the end of the model's configuration file.

```
optimization { execution_accelerators {
  cpu_execution_accelerator : [ {
    name : "openvino"
  }]
}}
```

## NUMA Optimization

Many modern CPUs are composed of multiple cores, memories and interconnects that
expose different performance characteristics depending on how threads and
data are allocated.
Triton allows you to set host policies that describe this
[NUMA](https://www.kernel.org/doc/html/latest/mm/numa.html) configuration for
your system and then assign model instances to different host policies
to exploit these NUMA properties.

### Host Policy

Triton allows you to specify host policy that associates with a policy name on
startup. A host policy will be applied to a model instance if the instance is
specified with the same policy name by using host policy field in [instance
groups](model_configuration.md#instance-groups). Note that if not specified,
the host policy field will be set to default name based on the instance
property.

To specify a host policy, you can specify the following in command line option:
```
--host-policy=<policy_name>,<setting>=<value>
```

Currently, the supported settings are the following:

* *numa-node*: The NUMA node id that the host policy will be bound to, the
  host policy restricts memory allocation to the node specified.

* *cpu-cores*: The CPU cores to be run on, the instance with this host policy
  set will be running on one of those CPU cores.

Assuming that the system is configured to bind GPU 0 with NUMA node 0 which has
CPU cores from 0 to 15, the following shows setting the numa-node and cpu-cores
policies for "gpu_0":

```
$ tritonserver --host-policy=gpu_0,numa-node=0 --host-policy=gpu_0,cpu-cores=0-15 ...
```


================================================
FILE: docs/user_guide/perf_analyzer.md
================================================
<!--
# Copyright (c) 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

Perf Analyzer documentation has been relocated to
[here](https://github.com/triton-inference-server/perf_analyzer/blob/main/README.md).


================================================
FILE: docs/user_guide/performance_tuning.md
================================================
<!--
# Copyright 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Deploying your trained model using Triton

Given a trained model, how do I deploy it at-scale with an optimal configuration
using Triton Inference Server?  This document is here to help answer that.

For those who like a [high level overview](#overview), below is the common flow
for most use cases.

For those who wish to jump right in, skip to the
[end-to-end example](#end-to-end-example).

For additional material, see the
[Triton Conceptual Guide tutorial](https://github.com/triton-inference-server/tutorials/tree/main/Conceptual_Guide/Part_4-inference_acceleration).

## Overview

1. Is my model compatible with Triton?
    - If your model falls under one of Triton's
    [supported backends](https://github.com/triton-inference-server/backend),
    then we can simply try to deploy the model as described in the
    [Quickstart](../getting_started/quickstart.md) guide.
    For the ONNXRuntime and TensorRT backends, the
    minimal model configuration can be inferred from the model using Triton's
    [AutoComplete](model_configuration.md#auto-generated-model-configuration)
    feature.
    This means that a `config.pbtxt` may still be provided, but is not required
    unless you want to explicitly set certain parameters.
    Additionally, by enabling verbose logging via `--log-verbose=1`, you can see
    the complete config that Triton sees internally in the server log output.
    For other backends, refer to the
    [Minimal Model Configuration](model_configuration.md#minimal-model-configuration)
    required to get started.
    - If your model does not come from a supported backend, you can look into
    the [Python Backend](https://github.com/triton-inference-server/python_backend)
    or writing a
    [Custom C++ Backend](https://github.com/triton-inference-server/backend/blob/main/examples/README.md)
    to support your model. The Python Backend provides a simple interface to
    execute requests through a generic python script, but may not be as
    performant as a Custom C++ Backend.  Depending on your use case, the Python
    Backend performance may be a sufficient tradeoff for the simplicity of
    implementation.

2. Can I run inference on my served model?
    - Assuming you were able to load your model on Triton, the next step is to
    verify that we can run inference requests and get a baseline performance
    benchmark of your model.
    Triton's
    [Perf Analyzer](https://github.com/triton-inference-server/perf_analyzer/blob/main/README.md)
    tool specifically fits this purpose. Here is a simplified output for
    demonstration purposes:

    ```
    # NOTE: "my_model" represents a model currently being served by Triton
    $ perf_analyzer -m my_model
    ...

    Inferences/Second vs. Client Average Batch Latency
    Concurrency: 1, throughput: 482.8 infer/sec, latency 12613 usec
    ```

    - This gives us a sanity test that we are able to successfully form input
    requests and receive output responses to communicate with the model backend
    via Triton APIs.
    - If Perf Analyzer fails to send requests and it is unclear from the error
    how to proceed, then you may want to sanity check that your model
    `config.pbtxt` inputs/outputs match what the model expects. If the config
    is correct, check that the model runs successfully using its original
    framework directly.  If you don't have your own script or tool to do so,
    [Polygraphy](https://github.com/NVIDIA/TensorRT/tree/main/tools/Polygraphy)
    is a useful tool to run sample inferences on your model via various
    frameworks.  Currently, Polygraphy supports ONNXRuntime, TensorRT, and
    TensorFlow 1.x.
    - The definition of "performing well" is subject to change for each use
    case. Some common metrics are throughput, latency, and GPU utilization.
    There are many variables that can be tweaked just within your model
    configuration (`config.pbtxt`) to obtain different results.
    - As your model, config, or use case evolves,
    [Perf Analyzer](https://github.com/triton-inference-server/perf_analyzer/blob/main/README.md)
    is a great tool to quickly verify model functionality and performance.

3. How can I improve my model performance?
    - To further understand the best model configuration you can provide to
    Triton for your use case, Triton's
    [Model Analyzer](https://github.com/triton-inference-server/model_analyzer)
    tool can help.
    Model Analyzer can automatically or
    [manually](https://github.com/triton-inference-server/model_analyzer/blob/main/docs/config_search.md)
    search through config combinations to find the optimal triton configuration
    to meet your constraints.  After running Model Analyzer to find the optimal
    configurations for your model/use case, you can transfer the generated
    config files to your [Model Repository](model_repository.md).
    Model Analyzer provides a
    [Quickstart](https://github.com/triton-inference-server/model_analyzer/blob/main/docs/quick_start.md)
    guide with some examples to walk through.
    - Upon serving the model with the newly optimized configuration file found
    by Model Analyzer and running Perf Analyzer again, you should expect to find
    better performance numbers in most cases compared to a default config.
    - Some parameters that can be tuned for a model may not be exposed to Model
    Analyzer's automatic search since they don't apply to all models.
    For instance, [backends](https://github.com/triton-inference-server/backend)
    can expose backend-specific configuration options that can be tuned as well.
    The [ONNXRuntime
    Backend](https://github.com/triton-inference-server/onnxruntime_backend),
    for example, has several
    [parameters](https://github.com/triton-inference-server/onnxruntime_backend#model-config-options)
    that affect the level of parallelization when executing inference on a
    model.
    These backend-specific options may be worth investigating if the defaults
    are not providing sufficient performance.  To tune custom sets of
    parameters, Model Analyzer supports
    [Manual Configuration Search](https://github.com/triton-inference-server/model_analyzer/blob/main/docs/config_search.md).
    - To learn more about further optimizations for your model configuration,
    see the [Optimization](optimization.md) docs.

### Other Areas of Interest

1. My model performs slowly when it is first loaded by Triton
(cold-start penalty), what do I do?
    - Triton exposes the ability to run
    [ModelWarmup](model_configuration.md#model-warmup) requests when first
    loading the model to ensure that the model is sufficiently warmed up before
    being marked "READY" for inference.

2. Why doesn't my model perform significantly faster on GPU?
    - Most official backends supported by Triton are optimized for GPU inference
    and should perform well on GPU out of the box.
    - Triton exposes options for you to optimize your model further on the GPU.
    Triton's
    [Framework Specific Optimizations](optimization.md#framework-specific-optimization)
    goes into further detail on this topic.
    - Complete conversion of your model to a backend fully optimized for GPU
    inference such as [TensorRT](https://developer.nvidia.com/tensorrt) may
    provide even better results.
    You may find more Triton-specific details about TensorRT in the
    [TensorRT Backend](https://github.com/triton-inference-server/tensorrt_backend).
    - If none of the above can help get sufficient GPU-accelerated performance
    for your model, the model may simply be better designed for CPU execution
    and the [OpenVINO Backend](https://github.com/triton-inference-server/openvino_backend) may
    help further optimize your CPU execution.

## End-to-end Example

> **Note**
> If you have never worked with Triton before, you may be interested in first
checking out the [Quickstart](../getting_started/quickstart.md) example.
> Some basic understanding of Triton may be useful for the following section,
but this example is meant to be straightforward enough without prior experience.

Let's take an ONNX model as our example since ONNX is designed to be a format
that can be [easily
exported](https://github.com/onnx/tutorials#converting-to-onnx-format) from most
other frameworks.

1. Create a [Model Repository](model_repository.md) and download our example
`densenet_onnx` model into it.

```bash
# Create model repository with placeholder for model and version 1
mkdir -p ./models/densenet_onnx/1

# Download model and place it in model repository
wget -O models/densenet_onnx/1/model.onnx \
    https://github.com/onnx/models/raw/main/validated/vision/classification/densenet-121/model/densenet-7.onnx
```

2. Create a minimal [Model Configuration](model_configuration.md) for the
`densenet_onnx` model in our [Model Repository](model_repository.md) at
`./models/densenet_onnx/config.pbtxt`.

> **Note**
> This is a slightly simplified version of another [example
config](../examples/model_repository/densenet_onnx/config.pbtxt) that utilizes
other [Model Configuration](model_configuration.md) features not necessary for
this example.

```protobuf
name: "densenet_onnx"
backend: "onnxruntime"
max_batch_size: 0
input: [
  {
    name: "data_0",
    data_type: TYPE_FP32,
    dims: [ 1, 3, 224, 224]
  }
]
output: [
  {
    name: "prob_1",
    data_type: TYPE_FP32,
    dims: [ 1, 1000, 1, 1 ]
  }
]
```

> **Note**
> As of the 22.07 release, both Triton and Model Analyzer support fully
auto-completing the config file for
[backends that support it](model_configuration.md#auto-generated-model-configuration).
> So for an ONNX model, for example, this step can be skipped unless you want to
explicitly set certain parameters.

3. Start the server container

To serve our model, we will use the server container which comes pre-installed
with a `tritonserver` binary.

```bash
# Start server container
docker run -ti --rm --gpus=all --network=host -v $PWD:/mnt --name triton-server nvcr.io/nvidia/tritonserver:24.12-py3

# Start serving your models
tritonserver --model-repository=/mnt/models
```

> **Note**
> The `-v $PWD:/mnt` is mounting your current directory on the host into the
`/mnt` directory inside the container.
> So if you created your model repository in `$PWD/models`, you will find it
inside the container at `/mnt/models`.
> You can change these paths as needed. See
[docker volume](https://docs.docker.com/storage/volumes/) docs for more information on
how this works.


To check if the model loaded successfully, we expect to see our model in a
`READY` state in the output of the previous command:

```
...
I0802 18:11:47.100537 135 model_repository_manager.cc:1345] successfully loaded 'densenet_onnx' version 1
...
+---------------+---------+--------+
| Model         | Version | Status |
+---------------+---------+--------+
| densenet_onnx | 1       | READY  |
+---------------+---------+--------+
...
```

4. Verify the model can run inference

To verify our model can perform inference, we will use the `triton-client`
container that we already started which comes with `perf_analyzer`
pre-installed.

In a separate shell, we use Perf Analyzer to sanity check that we can run
inference and get a baseline for the kind of performance we expect from this
model.

In the example below, Perf Analyzer is sending requests to models served on the
same machine (`localhost` from the server container via `--network=host`).
However, you may also test models being served remotely at some `<IP>:<PORT>`
by setting the `-u` flag, such as `perf_analyzer -m densenet_onnx -u
127.0.0.1:8000`.

```bash
# Start the SDK container interactively
docker run -ti --rm --gpus=all --network=host -v $PWD:/mnt --name triton-client nvcr.io/nvidia/tritonserver:24.12-py3-sdk

# Benchmark model being served from step 3
perf_analyzer -m densenet_onnx --concurrency-range 1:4
```

```
...
Inferences/Second vs. Client Average Batch Latency
Concurrency: 1, throughput: 265.147 infer/sec, latency 3769 usec
Concurrency: 2, throughput: 890.793 infer/sec, latency 2243 usec
Concurrency: 3, throughput: 937.036 infer/sec, latency 3199 usec
Concurrency: 4, throughput: 965.21 infer/sec, latency 4142 usec
```

5. Run Model Analyzer to find the best configurations for our model

While Model Analyzer comes pre-installed in the SDK (client) container and
supports various modes of connecting to a Triton server, for simplicity we will
use install Model Analyzer in our `server` container to use the `local`
(default) mode.
To learn more about other methods of connecting Model Analyzer to a running
Triton Server, see the `--triton-launch-mode` Model Analyzer flag.

```bash
# Enter server container interactively
docker exec -ti triton-server bash

# Stop existing tritonserver process if still running
# because model-analyzer will start its own server
SERVER_PID=`ps | grep tritonserver | awk '{ printf $1 }'`
kill ${SERVER_PID}

# Install model analyzer
pip install --upgrade pip
pip install triton-model-analyzer wkhtmltopdf

# Profile the model using local (default) mode
# NOTE: This may take some time, in this example it took ~10 minutes
model-analyzer profile \
  --model-repository=/mnt/models \
  --profile-models=densenet_onnx \
  --output-model-repository-path=results

# Summarize the profiling results
model-analyzer analyze --analysis-models=densenet_onnx
```

Example Model Analyzer output summary:

> In 51 measurements across 6 configurations, `densenet_onnx_config_3` provides
the best throughput: **323 infer/sec**.
>
> **This is a 92% gain over the default configuration (168 infer/sec), under the
given constraints.**

| Model Config Name | Max Batch Size | Dynamic Batching | Instance Count | p99 Latency (ms) | Throughput (infer/sec) | Max GPU Memory Usage (MB) | Average GPU Utilization (%) |
|---|---|---|---|---|---|---|---|
| densenet_onnx_config_3 | 0 | Enabled | 4/GPU | 35.8 | 323.13 | 3695 | 58.6 |
| densenet_onnx_config_2 | 0 | Enabled | 3/GPU | 59.575 | 295.82 | 3615 | 58.9 |
| densenet_onnx_config_4 | 0 | Enabled | 5/GPU | 69.939 | 291.468 | 3966 | 58.2 |
| densenet_onnx_config_default | 0 | Disabled | 1/GPU | 12.658 | 167.549 | 3116 | 51.3 |

In the table above, we see that setting our GPU [Instance
Count](model_configuration.md#instance-groups) to 4 allows us to achieve the
highest throughput and almost lowest latency on this system.

Also, note that this `densenet_onnx` model has a fixed batch-size that is
explicitly specified in the first dimension of the Input/Output `dims`,
therefore the `max_batch_size` parameter is set to 0 as described
[here](model_configuration.md#maximum-batch-size).
For models that support dynamic batch size, Model Analyzer would also tune the
`max_batch_size` parameter.

> **Warning**
> These results are specific to the system running the Triton server, so for
example, on a smaller GPU we may not see improvement from increasing the GPU
instance count.
> In general, running the same configuration on systems with different hardware
(CPU, GPU, RAM, etc.) may provide different results, so it is important to
profile your model on a system that accurately reflects where you will deploy
your models for your use case.

6. Extract optimal config from Model Analyzer results

In our example above, `densenet_onnx_config_3` was the optimal configuration.
So let's extract that `config.pbtxt` and put it back in our model repository for future use.

```bash
# (optional) Backup our original config.pbtxt (if any) to another directory
cp /mnt/models/densenet_onnx/config.pbtxt /tmp/original_config.pbtxt

# Copy over the optimal config.pbtxt from Model Analyzer results to our model repository
cp ./results/densenet_onnx_config_3/config.pbtxt /mnt/models/densenet_onnx/
```

Now that we have an optimized Model Configuration, we are ready to take our
model to deployment.  For further manual tuning, read the [Model
Configuration](model_configuration.md) and [Optimization](optimization.md) docs
to learn more about Triton's complete set of capabilities.

In this example, we happened to get both the highest throughput and almost
lowest latency from the same configuration, but in some cases this is a tradeoff
that must be made. Certain models or configurations may achieve a higher
throughput but also incur a higher latency in return.  It is worthwhile to fully
inspect the reports generated by Model Analyzer to ensure your model performance
meets your requirements.


================================================
FILE: docs/user_guide/ragged_batching.md
================================================
<!--
# Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Ragged Batching

Triton provides [dynamic batching feature](batcher.md#dynamic-batcher),
which combines multiple requests for the same model execution to provide larger
throughput. By default, the requests can be dynamically batched only if
each input has the same shape across the requests. In order to exploit dynamic
batching for cases where input shapes often vary, the client would need to pad
the input tensors in the requests to the same shape.

Ragged batching is a feature to avoid explicit padding by allowing user to
specify which of the inputs doesn't require the shape check. User can specify
such input (ragged input) by setting `allow_ragged_batch` field in the model
config:

```
...
input [
  {
    name: "input0"
    data_type: TYPE_FP32
    dims: [ 16 ]
    allow_ragged_batch: true
  }
]
...
```

How ragged input are processed in a batch of requests depends on the backend
implementation. The backends, such as
[ONNX Runtime backend](https://github.com/triton-inference-server/onnxruntime_backend),
[TensorFlow backend](https://github.com/triton-inference-server/tensorflow_backend),
[PyTorch backend](https://github.com/triton-inference-server/pytorch_backend),
and [TensorRT backend](https://github.com/triton-inference-server/tensorrt_backend),
require models to accept ragged inputs as 1-dimensional tensors.
These backends concatenates the request inputs into the 1-dimensional tensor.

Because the concatenated input doesn't track the start and end index for each
request, the backends often require the model to have additional input(s),
[batch input](#batch-input), that describe various information about the batch
formed.

## Batch Input

Batch input is often used in combination with ragged input to provide
information about each batch element, such as the element count
of an input for each request in the batch. A batch input is generated by
Triton instead of being provided in the request, because the information can
only be finalized after the dynamic batch is formed.

Besides element count,
there are other batch input kinds that the user can specify, see the
[protobuf documentation](https://github.com/triton-inference-server/common/blob/main/protobuf/model_config.proto)
for details.

## Example on Ragged Input and Batch Input

If you have a model that accepts 1 variable length input tensor, INPUT, with
shape [ -1, -1 ]. The first dimension is the batch dimension, and the second
dimension is the variable-length content. When the client sends 3 requests of
shapes [ 1, 3 ], [ 1, 4 ], [ 1, 5 ]. To exploit dynamic batching, the
straight-forward way to implement this model would expect INPUT shape [ -1, -1 ]
and assume that all inputs were padded to same length so that all requests
become shape [ 1, 5 ] and thus Triton can batch and send them to the model
as a single [ 3, 5 ] tensor. In this case, there will be overhead on padding
the tensor and on extra model computation on the padded content.
Below is the input config:

```
max_batch_size: 16
input [
  {
    name: "INPUT"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]
```

With triton ragged batching, the model will be implemented to expect INPUT shape
[ -1 ] and an additional batch input, INDEX, shape [ -1 ] which the model should
use to interpret the batch elements in INPUT. For such model,
the client requests don't need to be padded and they can be sent as they are
(with shapes [ 1, 3 ], [ 1, 4 ], [ 1, 5 ]). The backends discussed above will
batch the input into a tensor of shape [ 12 ] which contains the 3 + 4 + 5
concatenation of the requests. Triton also creates the batch input tensor of
shape [ 3 ] with value [ 3, 7, 12 ] which gives the offset into the input tensor
where each batch element ends. Below is the input config:

```
max_batch_size: 16
input [
  {
    name: "INPUT"
    data_type: TYPE_FP32
    dims: [ -1 ]
    allow_ragged_batch: true
  }
]
batch_input [
  {
    kind: BATCH_ACCUMULATED_ELEMENT_COUNT
    target_name: "INDEX"
    data_type: TYPE_FP32
    source_input: "INPUT"
  }
]
```

The above example uses
[`BATCH_ACCUMULATED_ELEMENT_COUNT`](https://github.com/triton-inference-server/common/blob/main/protobuf/model_config.proto)
type of ragged batching. Other types described in [protobuf documentation](https://github.com/triton-inference-server/common/blob/main/protobuf/model_config.proto) operate similarly.

================================================
FILE: docs/user_guide/rate_limiter.md
================================================
<!--
# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Rate Limiter

Rate limiter manages the rate at which requests are scheduled on
model instances by Triton. The rate limiter operates across all
models loaded in Triton to allow *cross-model prioritization*.

In absence of rate limiting (--rate-limit=off), Triton schedules
execution of a request (or set of requests when using dynamic
batching) as soon as a model instance is available. This behavior
is typically best suited for performance. However, there can be
cases where running all the models simultaneously places excessive
load on the server. For instance, model execution on some
frameworks dynamically allocate memory. Running all such models
simultaneously may lead to system going out-of-memory.

Rate limiter allows to postpone the inference execution on some
model instances such that not all of them runs simultaneously.
The model priorities are used to decide which model instance
to schedule next.

## Using Rate Limiter

To enable rate limiting users must set `--rate-limit` option when
launching tritonserver. For more information, consult usage of
the option emitted by `tritonserver --help`.

The rate limiter is controlled by the rate limiter configuration given
for each model instance, as described in [rate limiter
configuration](model_configuration.md#rate-limiter-configuration).
The rate limiter configuration includes
[resources](model_configuration.md#resources) and
[priority](model_configuration.md#priority) for the model instances
defined by the instance group.

### Resources

Resources are identified by a unique name and a count indicating
the number of copies of the resource. By default, model instance
uses no rate-limiter resources. By listing a resource/count the
model instance indicates that it requires that many resources to
be available on the model instance device before it can be allowed
to execute. When under execution the specified many resources are
allocated to the model instance only to be released when the
execution is over. The available number of resource copies
are, by default, the max across all model instances that list that
resource. For example, assume three loaded model instances A, B
and C each specifying the following resource requirements for
a single device:

```
A: [R1: 4, R2: 4]
B: [R2: 5, R3: 10, R4: 5]
C: [R1: 1, R3: 7, R4: 2]
```

By default, based on those model instance requirements, the server
will create the following resources with the indicated copies:

```
R1: 4
R2: 5
R3: 10
R4: 5
```

These values ensure that all model instances can be successfully
scheduled. The default for a resource can be overridden by giving
it explicitly on command-line using `--rate-limit-resource` option.
`tritonserver --help` will provide with more detailed usage
instructions.

By default, the available resource copies are per-device and resource
requirements for a model instance are enforced against corresponding
resources associated with the device where the model instance runs.
The `--rate-limit-resource` allows users to provide different resource
copies to different devices. Rate limiter can also handle global
resources. Instead of creating resource copies per-device, a global
resource will have a single copy all across the system.

Rate limiter depends upon the model configuration to determine
whether the resource is global or not. See
[resources](model_configuration.md#resources) for more details on
how to specify them in model configuration.

For tritonserver, running on a two device machine, invoked with
`--rate-limit-resource=R1:10 --rate-limit-resource=R2:5:0 --rate-limit-resource=R2:8:1 --rate-limit-resource=R3:2`
, available resource copies are:

```
GLOBAL   => [R3: 2]
DEVICE 0 => [R1: 10, R2: 5]
DEVICE 1 => [R1: 10, R2: 8]
```

where R3 appears as a global resource in one of the loaded model.

### Priority

In a resource constrained system, there will be a contention for
the resources among model instances to execute their inference
requests. Priority setting helps determining which model instance
to select for next execution. See [priority](model_configuration.md#priority)
for more information.


================================================
FILE: docs/user_guide/request_cancellation.md
================================================
<!--
# Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Request Cancellation

Starting from r23.10, Triton supports handling request cancellation received
from the gRPC client or a C API user. Long running inference requests such
as for auto generative large language models may run for an indeterminate
amount of time or indeterminate number of steps. Additionally clients may
enqueue a large number of requests as part of a sequence or request stream
and later determine the results are no longer needed. Continuing to process
requests whose results are no longer required can significantly impact server
resources.

## Issuing Request Cancellation

### In-Process C API

[In-Process Triton Server C API](../customization_guide/inprocess_c_api.md) has been enhanced with `TRITONSERVER_InferenceRequestCancel`
and `TRITONSERVER_InferenceRequestIsCancelled` to issue cancellation and query
whether cancellation has been issued on an inflight request respectively. Read more
about the APIs in [tritonserver.h](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonserver.h).


### gRPC Endpoint

In addition, [gRPC endpoint](../customization_guide/inference_protocols.md#httprest-and-grpc-protocols) can
now detect cancellation from the client and attempt to terminate request.
At present, only gRPC python client supports issuing request cancellation
to the server endpoint. See [request-cancellation](https://github.com/triton-inference-server/client#request-cancellation)
for more details on how to issue requests from the client-side.
See gRPC guide on RPC [cancellation](https://grpc.io/docs/guides/cancellation/) for
finer details.

## Handling in Triton Core

Triton core checks for requests that have been cancelled at some critical points
when using [dynamic](batcher.md#dynamic-batcher) or
[sequence](batcher.md#sequence-batcher) batching. The checking is
also performed between each
[ensemble](./scheduler.md#ensemble-scheduler) steps and terminates
further processing if the request is cancelled.

On detecting a cancelled request, Triton core responds with CANCELLED status. If a request
is cancelled when using [sequence_batching](batcher.md#sequence-batcher),
then all the pending requests in the same sequence will also be cancelled. The sequence
is represented by the requests that has identical sequence id.

**Note**: Currently, Triton core does not detect cancellation status of a request once
it is forwarded to [rate limiter](./rate_limiter.md). Improving the request cancellation
detection and handling within Triton core is work in progress.

## Handling in Backend

Upon receiving request cancellation, Triton does its best to terminate request
at various points. However, once a request has been given to the backend
for execution, it is up to the individual backends to detect and handle
request termination.
Currently, the following backends support early termination:
- [TensorRT-LLM backend](https://github.com/triton-inference-server/tensorrtllm_backend)
- [vLLM backend](https://github.com/triton-inference-server/vllm_backend)
- [python backend](https://github.com/triton-inference-server/python_backend)

Python backend is a special case where we expose the APIs to detect cancellation
status of the request but it is up to the `model.py` developer to detect whether
the request is cancelled and terminate further execution.

**For the backend developer**: The backend APIs have also been enhanced to let the
backend detect whether the request received from Triton core has been cancelled.
See `TRITONBACKEND_RequestIsCancelled` and `TRITONBACKEND_ResponseFactoryIsCancelled`
in [tritonbackend.h](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonbackend.h)
for more details. The backend upon detecting request cancellation can stop processing
it any further.
The Python models running behind Python backend can also query the cancellation status
of request and response_sender. See [this](https://github.com/triton-inference-server/python_backend#request-cancellation-handling)
section in python backend documentation for more details.


================================================
FILE: docs/user_guide/response_cache.md
================================================
<!--
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Triton Response Cache

## Overview

In this document an *inference request* is the model name, model version, and
input tensors (name, shape, datatype and tensor data) that make up a request
submitted to Triton. An inference result is the output tensors (name, shape,
datatype and tensor data) produced by an inference execution. The response cache
is used by Triton to hold inference results generated for previous executed
inference requests. Triton will maintain the response cache so that inference
requests that hit in the cache will not need to execute a model to produce
results and will instead extract their results from the cache. For some use
cases this can significantly reduce the inference request latency.

Triton accesses the response cache with a hash of the inference request that
includes the model name, model version and model inputs. If the hash is found in
the cache, the corresponding inference result is extracted from the cache and
used for the request. When this happens there is no need for Triton to execute
the model to produce the inference result. If the hash is not found in the
cache, Triton executes the model to produce the inference result, and then
records that result in the cache so that subsequent inference requests can
(re)use those results.

## Usage

In order for caching to be used on a given model, it must be enabled
on both the server-side, and in the model's
[model config](model_configuration.md#response-cache). See the following
sections below for more details.

### Enable Caching on Server-side

The response cache is enabled on the server-side by specifying a cache
implementation name `<cache>` and corresponding configuration when starting
the Triton server.

Through the CLI, this translates to setting
`tritonserver --cache-config <cache>,<key>=<value> ...`. For example:
```
tritonserver --cache-config local,size=1048576
```

> [!NOTE]
> If using a non-interactive shell, you may need to specify the argument without
> the space like so: `--cache-config=<cache>,<key>=<value>`.

For in-process C API applications, this translates to calling
`TRITONSERVER_SetCacheConfig(const char* cache_implementation, const char* config_json)`.

This allows users to enable/disable caching globally on server startup.

### Enable Caching for a Model

**By default, no model uses response caching even if the response cache
is enabled globally with the `--cache-config` flag.**

For a given model to use response caching, the model must also have
response caching enabled in its model configuration:
```
# config.pbtxt

response_cache {
  enable: true
}
```

This allows users to enable/disable caching for specific models.

For more information on enabling the response cache for each model, see the
[model configuration docs](model_configuration.md#response-cache).

### Cache Implementations

Starting in the 23.03 release, Triton has a set of
[TRITONCACHE APIs](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritoncache.h)
that are used to communicate with a cache implementation of the user's choice.

A cache implementation is a shared library that implements the required
TRITONCACHE APIs and is dynamically loaded on server startup, if enabled.

Triton's most recent
[tritonserver release containers](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver)
come with the following cache implementations out of the box:
- [local](https://github.com/triton-inference-server/local_cache): `/opt/tritonserver/caches/local/libtritoncache_local.so`
- [redis](https://github.com/triton-inference-server/redis_cache): `/opt/tritonserver/caches/redis/libtritoncache_redis.so`

With these TRITONCACHE APIs, `tritonserver` exposes a new `--cache-config`
CLI flag that gives the user flexible customization of which cache implementation
to use, and how to configure it. Similar to the `--backend-config` flag,
the expected format is `--cache-config <cache_name>,<key>=<value>` and may
be specified multiple times to specify multiple keys if the cache implementation
requires it.

#### Local Cache

The `local` cache implementation is equivalent to the response cache used
internally before the 23.03 release. For more implementation specific details,
see the
[local cache implementation](https://github.com/triton-inference-server/local_cache).

When `--cache-config local,size=SIZE` is specified with a non-zero `SIZE`,
Triton allocates the requested size in CPU memory and **shares the
cache across all inference requests and across all models**.

#### Redis Cache

The `redis` cache implementation exposes the ability for Triton to communicate
with a Redis server for caching. The `redis_cache` implementation is essentially
a Redis client that acts as an intermediary between Triton and Redis.

To list a few benefits of the `redis` cache compared to the `local` cache in
the context of Triton:
- The Redis server can be hosted remotely as long as it is accessible by Triton,
  so it is not tied directly to the Triton process lifetime.
  - This means Triton can be restarted and still have access to previously cached entries.
  - This also means that Triton doesn't have to compete with the cache for memory/resource usage.
- Multiple Triton instances can share a cache by configuring each Triton instance
  to communicate with the same Redis server.
- The Redis server can be updated/restarted independently of Triton, and
  Triton will fallback to operating as it would with no cache access during
  any Redis server downtime, and log appropriate errors.

In general, the Redis server can be configured/deployed as needed for your use
case, and Triton's `redis` cache will simply act as a client of your Redis
deployment. The [Redis docs](https://redis.io/docs/) should be consulted for
questions and details about configuring the Redis server.

For Triton-specific `redis` cache implementation details/configuration, see the
[redis cache implementation](https://github.com/triton-inference-server/redis_cache).

#### Custom Cache

With the TRITONCACHE API interface, it is now possible for
users to implement their own cache to suit any use-case specific needs.
To see the required interface that must be implemented by a cache
developer, see the
[TRITONCACHE API header](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritoncache.h).
The `local` or `redis` cache implementations may be used as reference.

Upon successfully developing and building a custom cache, the resulting shared
library (ex: `libtritoncache_<name>.so`) must be placed in the cache directory
similar to where the `local` and `redis` cache implementations live. By default,
this directory is `/opt/tritonserver/caches`, but a custom directory may be
specified with `--cache-dir` as needed.

To put this example together, if the custom cache were named "custom"
(this name is arbitrary), by default Triton would expect to find the
cache implementation at `/opt/tritonserver/caches/custom/libtritoncache_custom.so`.

## Deprecation Notes

> **Note**
> Prior to 23.03, enabling the `local` cache used to be done through setting a non-zero size
> (in bytes) when Triton was launched using the `--response-cache-byte-size` flag.
>
> Starting in 23.03, the `--response-cache-byte-size` flag is now deprecated and
> `--cache-config` should be used instead. For backwards compatibility,
> `--response-cache-byte-size` will continue to function under the hood by being
> converted to the corresponding `--cache-config` argument, but it will default
> to using the `local` cache implementation. It is not possible to choose other
> cache implementations using the `--response-cache-byte-size` flag.
>
> For example, `--response-cache-byte-size 1048576`
> would be equivalent to `--cache-config local,size=1048576`. However, the
> `--cache-config` flag is much more flexible and should be used instead.

> **Warning**
>
> The `local` cache implementation may fail to initialize for very small values
> of `--cache-config local,size=<small_value>` or `--response-cache-byte-size`
> (ex: less than 1024 bytes) due to internal memory management requirements.
> If you encounter an initialization error for a relatively small cache size,
> try increasing it.
>
> Similarly, the size is upper bounded by the available RAM on the system.
> If you encounter an initial allocation error for a very large cache size
> setting, try decreasing it.

## Performance

The response cache is intended to be used for use cases where a significant
number of duplicate requests (cache hits) are expected and therefore would
benefit from caching. The term "significant" here is subjective to the use
case, but a simple interpretation would be to consider the proportion of
expected cache hits/misses, as well as the average time spend computing
a response.

For cases where cache hits are common and computation is expensive,
the cache can significantly improve overall performance.

For cases where most requests are unique (cache misses) or the compute is
fast/cheap (the model is not compute-bound), the cache can negatively impact
the overall performance due to the overhead of managing and communicating with
the cache.

## Ensemble Model Caching

Top-level requests to ensemble models support caching if all composing models
within the ensemble support caching as well.

Similarly, if a composing model in the ensemble doesn't support caching,
then the ensemble model would inherit this limitation and not support
caching either. See the known limitations below for what types of models
support caching.

A cache hit on an ensemble will skip sending requests to the composing models
entirely, and return the cached response from the ensemble model.

A cache miss on an ensemble will fallback to standard inference and the request
will proceed to the composing models as usual.

The ensemble and its composing models can independently enable caching, and
each maintain their own caches when enabled. It is possible for a request
to be a cache miss at the ensemble level, but then for an intermediate model
within the ensemble to have a cache hit, depending on the inputs and outputs
of models being composed. Composing models do not need to enable caching to
enable it at the ensemble level.


## Known Limitations

- Only input tensors located in CPU memory will be hashable for accessing the
  cache. If an inference request contains input tensors not in CPU memory, the
  request will not be hashed and therefore the response will not be cached.
- Only responses with all output tensors located in CPU memory will be eligible
  for caching. If any output tensor in a response is not located in CPU memory,
  the response will not be cached.
- The cache is accessed using only the inference request hash. As a result, if
  two different inference requests generate the same hash (a hash collision),
  then Triton may incorrectly use the cached result for an inference request.
  The hash is a 64-bit value so the likelihood of collision is small.
- Only successful inference requests will have their responses cached. If a
  request fails or returns an error during inference, its response will not be
  cached.
- Only requests going through the Default Scheduler or Dynamic Batch Scheduler
  are eligible for caching. The Sequence Batcher does not currently support
  response caching.
- The response cache does not currently support
  [decoupled models](decoupled_models.md).


================================================
FILE: docs/user_guide/scheduler.md
================================================
<!--
# Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Schedulers

Triton supports batch inferencing by allowing individual inference
requests to specify a batch of inputs. The inferencing for a batch of
inputs is performed at the same time which is especially important for
GPUs since it can greatly increase inferencing throughput. In many use
cases the individual inference requests are not batched, therefore,
they do not benefit from the throughput benefits of batching.

The inference server contains multiple scheduling and batching
algorithms that support many different model types and use-cases. More
information about model types and schedulers can be found in [Models
And Schedulers](architecture.md#models-and-schedulers).

## Default Scheduler

The default scheduler is used for a model if none of the
*scheduling_choice* properties are specified in the model
configuration. The default scheduler simply distributes inference
requests to all [model instances](model_configuration.md#instance-groups) configured for the
model.

## Ensemble Scheduler

The ensemble scheduler must be used for [ensemble
 models](architecture.md#ensemble-models) and cannot be used for any
 other type of model.

The ensemble scheduler is enabled and configured independently for
each model using the *ModelEnsembleScheduling* property in the model
configuration. The settings describe the models that are included in
the ensemble and the flow of tensor values between the models. See
[Ensemble Models](architecture.md#ensemble-models) for more
information and examples.

================================================
FILE: docs/user_guide/trace.md
================================================
<!--
# Copyright 2019-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Triton Server Trace

Triton includes that capability to generate a detailed trace for
individual inference requests. Tracing is enable by command-line
arguments when running the tritonserver executable.

`--trace-config` command line option in Triton can be used to specify
global and trace mode specific config setting. The format of this flag
is `--trace-config <mode>,<setting>=<value>`, where `<mode>`
is either `triton` or `opentelemetry`. By default, the trace mode is set to `triton`,
and the server will use Triton's trace APIs. For `opentelemetry` mode,
the server will use the [OpenTelemetry's APIs](#opentelemetry-trace-support) to generate,
collect and export traces for individual inference requests.

To specify global trace settings (level, rate, count, or mode),
the format is `--trace-config <setting>=<value>`.

An example usage, which invokes Triton's trace APIs:

```
$ tritonserver \
    --trace-config triton,file=/tmp/trace.json \
    --trace-config triton,log-frequency=50 \
    --trace-config rate=100 \
    --trace-config level=TIMESTAMPS \
    --trace-config count=100 ...
```

## Trace Settings
### Global Settings
The following table shows available global trace settings to pass to `--trace-config`
<table>
  <thead>
  <tr>
    <th>Setting</th>
    <th>Default Value</th>
    <th>Description</th>
  </tr>
  </thead>
  <tbody>
    <tr>
    <td><code>rate</code></td>
    <td>1000</td>
    <td>
      Specifies the sampling rate. The same as deprecated
      <code>--trace-rate</code>. <br/>
      For example, a value of 1000 specifies that every 1000-th inference <br/>
      request will be traced.
    </td>
    </tr>
    <tr>
    <td><code>level</code></td>
    <td>OFF</td>
    <td>
      Indicates the level of trace detail that should be collected and <br/>
      may be specified  multiple times to trace multiple information. <br/>
      The same as deprecated <code>--trace-level</code>. <br/>
      Choices are <code>TIMESTAMPS</code> and <code>TENSORS</code>.<br/>
      <b>Note</b> that <code>opentelemetry</code> mode does not currently <br/>
      support <code>TENSORS</code> level.
    </td>
    </tr>
    <tr>
    <td><code>count</code></td>
    <td>-1</td>
    <td>
      Specifies the remaining number of traces to be collected. <br/>
      The default value of -1 specifies to never stop collecting traces. <br/>
      With a value  of 100, Triton will stop tracing requests<br/>
      after 100 traces are collected.<br/>
      The same as  deprecated <code>--trace-count</code>.
    </td>
    </tr>
    <tr>
    <td><code>mode</code></td>
    <td>triton</td>
    <td>
      Specifies which trace APIs to use for collecting traces. <br/>
      The choices are <code>triton</code> or <code>opentelemetry</code>. <br/>
    </td>
    </tr>
  </tbody>
</table>

### Triton Trace APIs Settings

The following table shows available Triton trace APIs settings for
`--trace-config triton,<setting>=<value>`.
<table>
  <thead>
  <tr>
    <th>Setting</th>
    <th>Default Value</th>
    <th>Description</th>
  </tr>
  </thead>
  <tbody>
    <tr>
    <td><code>file</code></td>
    <td>empty string</td>
    <td>
      Indicates where the trace output should be written. <br/>
      The same as deprecated <code>--trace-file</code>. <br/>
    </td>
    </tr>
    <tr>
    <td><code>log-frequency</code></td>
    <td>0</td>
    <td>
      Specifies the rate that the traces are written to file. <br/>
      For example, a value of 50 specifies that Triton will log <br/>
      to file for every 50 traces collected. <br/>
      The same as deprecated <code>--trace-log-frequency</code>.<br/>
    </td>
    </tr>
  </tbody>
</table>

In addition to the trace configuration settings in the command line, you can
modify the trace configuration using the [trace
protocol](../protocol/extension_trace.md). This option is currently not supported,
when trace mode is set to `opentelemetry`.

**Note**: the following flags are **deprecated**:

The `--trace-file` option indicates where the trace output should be
written. The `--trace-rate` option specifies the sampling rate. In
this example every 100-th inference request will be traced. The
`--trace-level` option indicates the level of trace detail that should
be collected. `--trace-level` option may be specified multiple times to
trace multiple information. The `--trace-log-frequency` option specifies the
rate that the traces are written to file. In this example Triton will log to
file for every 50 traces collected. The `--trace-count` option specifies the
remaining number of traces to be collected. In this example Triton will stop
tracing more requests after 100 traces are collected.  Use the `--help` option
to get more information.

## Supported Trace Level Option

- `TIMESTAMPS`: Tracing execution timestamps of each request.
- `TENSORS`: Tracing input and output tensors during the execution.

## JSON Trace Output

The trace output is a JSON file with the following schema.

```
[
  {
    "model_name": $string,
    "model_version": $number,
    "id": $number,
    "request_id": $string,
    "parent_id": $number
  },
  {
    "id": $number,
    "timestamps": [
      { "name" : $string, "ns" : $number }
    ]
  },
  {
    "id": $number
    "activity": $string,
    "tensor":{
      "name": $string,
      "data": $string,
      "shape": $string,
      "dtype": $string
    }
  },
  ...
]
```

Each trace is assigned a "id", which indicates the model name and
version of the inference request. If the trace is from a
model run as part of an ensemble, the "parent_id" will indicate the
"id" of the containing ensemble.
For example:
```
[
  {
    "id": 1,
    "model_name": "simple",
    "model_version": 1
  },
  ...
]
```

Each `TIMESTAMPS` trace will have one or more "timestamps" with
each timestamp having a name and the timestamp in nanoseconds ("ns").
For example:

```
[
  {"id": 1, "timestamps": [{ "name": "HTTP_RECV_START", "ns": 2356425054587444 }] },
  {"id": 1, "timestamps": [{ "name": "HTTP_RECV_END", "ns": 2356425054632308 }] },
  {"id": 1, "timestamps": [{ "name": "REQUEST_START", "ns": 2356425054785863 }] },
  {"id": 1, "timestamps": [{ "name": "QUEUE_START", "ns": 2356425054791517 }] },
  {"id": 1, "timestamps": [{ "name": "INFER_RESPONSE_COMPLETE", "ns": 2356425057587919 }] },
  {"id": 1, "timestamps": [{ "name": "COMPUTE_START", "ns": 2356425054887198 }] },
  {"id": 1, "timestamps": [{ "name": "COMPUTE_INPUT_END", "ns": 2356425057152908 }] },
  {"id": 1, "timestamps": [{ "name": "COMPUTE_OUTPUT_START", "ns": 2356425057497763 }] },
  {"id": 1, "timestamps": [{ "name": "COMPUTE_END", "ns": 2356425057540989 }] },
  {"id": 1, "timestamps": [{ "name": "REQUEST_END", "ns": 2356425057643164 }] },
  {"id": 1, "timestamps": [{ "name": "HTTP_SEND_START", "ns": 2356425057681578 }] },
  {"id": 1, "timestamps": [{ "name": "HTTP_SEND_END", "ns": 2356425057712991 }] }
]
```

Each `TENSORS` trace will contain an "activity" and a "tensor".
"activity" indicates the type of tensor, including "TENSOR_QUEUE_INPUT"
and "TENSOR_BACKEND_OUTPUT" by now. "tensor" has the detail of tensor,
including its "name", "data" and "dtype". For example:

```
[
  {
    "id": 1,
    "activity": "TENSOR_QUEUE_INPUT",
    "tensor":{
      "name": "input",
      "data": "0.1,0.1,0.1,...",
      "shape": "1,16",
      "dtype": "FP32"
    }
  }
]
```

## Trace Summary Tool

An example [trace summary tool](https://github.com/triton-inference-server/server/blob/main/qa/common/trace_summary.py) can be
used to summarize a set of traces collected from Triton. Basic usage
is:

```
$ trace_summary.py <trace file>
```

This produces a summary report for all traces in the file. HTTP and
GRPC inference requests are reported separately.

```
File: trace.json
Summary for simple (-1): trace count = 1
HTTP infer request (avg): 403.578us
	Receive (avg): 20.555us
	Send (avg): 4.52us
	Overhead (avg): 24.592us
	Handler (avg): 353.911us
  		Overhead (avg): 23.675us
  		Queue (avg): 18.019us
  		Compute (avg): 312.217us
  			Input (avg): 24.151us
  			Infer (avg): 244.186us
  			Output (avg): 43.88us
Summary for simple (-1): trace count = 1
GRPC infer request (avg): 383.601us
	Send (avg): 62.816us
	Handler (avg): 392.924us
  		Overhead (avg): 51.968us
  		Queue (avg): 21.45us
  		Compute (avg): 319.506us
  			Input (avg): 27.76us
  			Infer (avg): 227.844us
  			Output (avg): 63.902us
```

Note: The "Receive (avg)" metric is not included in the gRPC summary as gRPC library does not provide any non-intrusive hooks to detect time spent in reading a message from the wire. Tracing an HTTP request will provide an accurate measurement of time spent reading a request from the network.

Use the -t option to get a summary for each trace in the file. This
summary shows the time, in microseconds, between different points in
the processing of an inference request. For example, the below output
shows that it took 15us from the start of handling the request until
the request was enqueued in the scheduling queue.

```
$ trace_summary.py -t <trace file>
...
simple (-1):
  	request handler start
  		15us
  	queue start
  		20us
  	compute start
  		266us
  	compute end
  		4us
  	request handler end
  		19us
  	grpc send start
  		77us
  	grpc send end
...
```

The script can also show the data flow of the first request if there are
`TENSORS` traces in the file. If the `TENSORS` traces are from an ensemble,
the data flow will be shown with the dependency of each model.

```
...
Data Flow:
	==========================================================
	Name:   ensemble
	Version:1
	QUEUE_INPUT:
		input: [[0.705676  0.830855  0.833153]]
	BACKEND_OUTPUT:
		output: [[1. 2. 7. 0. 4. 7. 9. 3. 4. 9.]]
	==========================================================
		==================================================
		Name:   test_trt1
		Version:1
		QUEUE_INPUT:
			input: [[0.705676  0.830855  0.833153]]
		BACKEND_OUTPUT:
			output1: [[1. 1. ...]]
		==================================================
		==================================================
		Name:   test_trt2
		Version:1
		QUEUE_INPUT:
			input: [[0.705676  0.830855  0.833153]]
		BACKEND_OUTPUT:
			output2: [[2. 2. ...]]
		==================================================
		==================================================
		Name:   test_py
		Version:1
		QUEUE_INPUT:
			output1: [[1. 1. ...]]
		QUEUE_INPUT:
			output2: [[2. 2. ...]]
		BACKEND_OUTPUT:
			output: [[1. 2. 7. 0. 4. 7. 9. 3. 4. 9.]]
		==================================================
...
```

The meaning of the trace timestamps is:

* HTTP Request Receive: Collected only for inference requests that use the
  HTTP protocol. The time required to read the inference request from
  the network.

* Send: The time required to send the inference response.

* Overhead: Additional time required in the HTTP endpoint to
  process the inference request and response.

* Handler: The total time spent handling the inference request, not
  including the HTTP and GRPC request/response handling.

  * Queue: The time the inference request spent in the scheduling queue.

  * Compute: The time the inference request spent executing the actual
    inference. This time includes the time spent copying input and
    output tensors. If --trace-level=TIMESTAMPS then a breakdown of the
    compute time will be provided as follows:

    * Input: The time to copy input tensor data as required by the
      inference framework / backend. This includes the time to copy
      input tensor data to the GPU.

    * Infer: The time spent executing the model to perform the
      inference.

    * Output: The time to copy output tensor data as required by the
      inference framework / backend. This includes the time to copy
      output tensor data from the GPU.

  * Overhead: Additional time required for request handling not
    covered by Queue or Compute times.

* Data Flow: The data flow of the first request. It contains the input and
  output tensors of each part of execution.

  * Name: The name of model.

  * Version: The version of model.

  * QUEUE_INPUT: The tensor entering the queue of a backend to wait for
    scheduling.

  * BACKEND_OUTPUT: The tensor in the response of a backend.

## Tracing for BLS models

Triton does not collect traces for child models invoked from
[BLS](https://github.com/triton-inference-server/python_backend/tree/main#business-logic-scripting)
models by default.

To include child models into collected traces, user needs to provide the `trace`
argument (as shown in the example below), when constructing an InferenceRequest object.
This helps Triton associate the child model with the parent model's trace (`request.trace()`).

```python

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
  ...
    def execute(self, requests):
      ...
      for request in requests:
        ...
        inference_request = pb_utils.InferenceRequest(
            model_name='model_name',
            requested_output_names=['REQUESTED_OUTPUT_1', 'REQUESTED_OUTPUT_2'],
            inputs=[<pb_utils.Tensor object>], trace = request.trace())

```

## OpenTelemetry trace support

Triton provides an option to generate and export traces using
[OpenTelemetry APIs and SDKs](https://opentelemetry.io/).

To specify OpenTelemetry mode for tracing, specify the `--trace-config`
flag as follows:

```
$ tritonserver --trace-config mode=opentelemetry \
    --trace-config opentelemetry,url=<endpoint> ...
```

Triton's OpenTelemetry trace mode uses
[Batch Span Processor](https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/#batch-span-processor),
which batches ended spans and sends them in bulk. Batching helps
with data compression and reduces the number of outgoing connections
required to transmit the data. This processor supports both size and
time based batching. Size-based batching is controlled by 2 parameters:
`bsp_max_export_batch_size` and `bsp_max_queue_size`, while time-based batching
is controlled by `bsp_schedule_delay`. Collected spans will be exported when
the batch size reaches `bsp_max_export_batch_size`, or delay since last export
reaches `bsp_schedule_delay`, whatever comes first. Additionally, user should
make sure that `bsp_max_export_batch_size` is always less than
`bsp_max_queue_size`, otherwise the excessive spans will be dropped
and trace data will be lost.

Default parameters for the Batch Span Processor are provided in
[`OpenTelemetry trace APIs settings`](#opentelemetry-trace-apis-settings).
As a general recommendation, make sure that `bsp_max_queue_size` is large enough
to hold all collected spans, and `bsp_schedule_delay` does not cause frequent
exports, which will affect Triton Server's latency. A minimal Triton trace
consists of 3 spans: top level span, model span, and compute span.

* __Top level span__: The top-level span collects timestamps for when
request was received by Triton, and when the response was sent. Any Triton
trace contains only 1 top level span.
* __Model span__: Model spans collect information, when request for
this model was started, when it was placed in a queue, and when it was ended.
A minimal Triton trace contains 1 model span.
* __Compute span__: Compute spans record compute timestamps. A minimal
Triton trace contains 1 compute span.

The total amount of spans depends on the complexity of your model.
A general rule is any base model - a single model that performs computations -
produces 1 model span and one compute span. For ensembles, every composing
model produces model and compute spans in addition to one model span for the
ensemble. [BLS](#tracing-for-bls-models) models produce the same number of
model and compute spans as the total amount of models involved in the BLS request,
including the main BLS model.


### Differences in trace contents from Triton's trace [output](#json-trace-output)

OpenTelemetry APIs produce [spans](https://opentelemetry.io/docs/concepts/observability-primer/#spans)
that collect the same timestamps as Triton's Trace
APIs. Each span also includes `model_name`, `model_version`, `request_id`,
and `parent_id` as an [attribute](https://opentelemetry.io/docs/concepts/observability-primer/#span-attributes).

The span collects `TIMESTAMPS` that consist of a name and a timestamp
in nanoseconds, which is similar to Triton Trace APIs. However,
OpenTelemetry relies on the system's clock for event timestamps, which is based
on the system's real-time clock. On the other hand, Triton Trace APIs
report timestamps using steady clock, which is a monotonic clock that ensures
time always movess forward. This clock is not related to wall clock time
and, for example, can measure time since last reboot.


### OpenTelemetry trace APIs settings

The following table shows available OpenTelemetry trace APIs settings for
`--trace-config opentelemetry,<setting>=<value>`.
<table>
  <thead>
  <tr>
    <th>Setting</th>
    <th>Default Value</th>
    <th>Description</th>
  </tr>
  </thead>
  <tbody>
    <tr>
    <td><code>url</code></td>
    <td><code>http://localhost:4318/v1/traces</code></td>
    <td>
      <code>host:port</code> to which the receiver is going to receive
      trace data.
    </td>
    </tr>
    <tr>
    <td><code>resource</code></td>
    <td><code>service.name=triton-inference-server</code></td>
    <td>
      Key-value pairs to be used as resource attributes. <br/>
      Should be specified following the provided template:<br/>
      <code>--trace-config opentelemetry,resource=<<text>key</text>>=<<text>value</text>></code><br/>
      For example:<br/>
      <code>--trace-config opentelemetry,resource=service.name=triton</code><br/>
      <code>--trace-config opentelemetry,resource=service.version=1</code><br/>
      Alternatively, key-value attributes can be specified through <br/>
      <a href="https://opentelemetry.io/docs/concepts/sdk-configuration/general-sdk-configuration/#otel_resource_attributes">
      OTEL_RESOURCE_ATTRIBUTES</a>
      environment variable.
    </td>
    </tr>
    <tr>
    <td><a href="https://opentelemetry.io/docs/specs/otel/trace/sdk/#batching-processor">
      Batch Span Processor</a>
    </td>
    <td></td><td></td>
    </tr>
    <tr>
    <td><code>bsp_max_queue_size</code></td>
    <td align="center">2048</td>
    <td>
      Maximum queue size. <br/>
      This setting can also be specified through <br/>
      <a href="https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/#batch-span-processor">
      OTEL_BSP_MAX_QUEUE_SIZE</a>
      environment variable.
    </td>
    </tr>
    <tr>
    <td><code>bsp_schedule_delay</code></td>
    <td align="center">5000</td>
    <td>
      Delay interval (in milliseconds) between two consecutive exports. <br/>
      This setting can also be specified through <br/>
      <a href="https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/#batch-span-processor">
      OTEL_BSP_SCHEDULE_DELAY</a>
      environment variable.
    </td>
    </tr>
    <tr>
    <td><code>bsp_max_export_batch_size</code></td>
    <td align="center">512</td>
    <td>
      Maximum batch size. Must be less than or equal to
      <code>bsp_max_queue_size</code>.<br/>
      This setting can also be specified through <br/>
      <a href="https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/#batch-span-processor">
      OTEL_BSP_MAX_EXPORT_BATCH_SIZE</a>
      environment variable.
    </td>
    </tr>
  </tbody>
</table>

### OpenTelemetry Context Propagation

Triton supports [context propagation](https://opentelemetry.io/docs/concepts/context-propagation/)
in OpenTelemetry mode starting in version 24.01. Note, that every request
with propagated OpenTelemetry context will be traced, regardless of `rate` and
`count` trace settings. If a user wishes to trace only those requests, for which
OpenTelemetry context was injected on the client side, please start Triton with
`--trace-config rate=0`:
```
$ tritonserver \
    --trace-config rate=0 \
    --trace-config level=TIMESTAMPS \
    --trace-config count=-1 \
    --trace-config mode=opentelemetry
```
Please, be aware that this option is subject to change in future releases.

#### How to inject OpenTelemetry context on the client side

For C++ clients, please refer to [gRPC](https://github.com/open-telemetry/opentelemetry-cpp/blob/main/examples/grpc/README.md)
and [HTTP](https://github.com/open-telemetry/opentelemetry-cpp/blob/main/examples/http/README.md)
examples.

For python clients, please make sure to install
[OpenTelemetry Python](https://github.com/open-telemetry/opentelemetry-python/tree/main?tab=readme-ov-file#install).
You can then use the `opentelemetry.propagate.inject` method to prepare headers to
pass with the request, as shown [here](https://github.com/open-telemetry/opentelemetry-python/blob/main/docs/examples/auto-instrumentation/client.py#L37-L41).
Then, you can specify headers in the `infer` method. For references, please
look at our [tests](https://github.com/triton-inference-server/server/blob/main/qa/L0_trace/opentelemetry_unittest.py),
e.g. [http context propagation test](https://github.com/triton-inference-server/server/blob/main/qa/L0_trace/opentelemetry_unittest.py#L494-L508).

### Custom Backend Tracing

In the case when a custom activity needs to be traced in the backend, please
use `TRITONSERVER_InferenceTraceReportActivity` API. For examples, please
refer to the [identity backend](https://github.com/triton-inference-server/identity_backend/blob/main/src/identity.cc).

In `openTelemetry` trace mode, if one wishes to start a new span, make sure
that the name of your custom activity ends with `_START`. To end the new span,
make sure that corresponding activity ends with `_END`. For example, in the
identity backend, we start a `CUSTOM_ACTIVITY` span, by [reporting](https://github.com/triton-inference-server/identity_backend/blob/30ff4255d09a4ec7547e7949a75d0cefb7e3bb28/src/identity.cc#L887-L893)
`CUSTOM_ACTIVITY_START` event; and we close this span by [reporting](https://github.com/triton-inference-server/identity_backend/blob/30ff4255d09a4ec7547e7949a75d0cefb7e3bb28/src/identity.cc#L897-L902)
`CUSTOM_ACTIVITY_END` event.

Please note, that it is user's responsibility to make sure that all custom started
spans are properly ended.

### Limitations

- OpenTelemetry trace mode is not supported on Windows systems.

- Triton supports only
[OTLP/HTTP Exporter](https://opentelemetry.io/docs/specs/otlp/#otlphttp)
and allows specification of only url for this exporter through
`--trace-config`. Other options and corresponding default values can be
found [here](https://github.com/open-telemetry/opentelemetry-cpp/tree/v1.8.3/exporters/otlp#configuration-options--otlp-http-exporter-).

- Triton does not support configuration of the opentelemetry trace settings
during a Triton run and opentelemetry specific settings are not available
for the retrieval through [Triton's trace extension](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_trace.md).


================================================
FILE: docs/user_guide/v1_to_v2.md
================================================
<!--
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

# Version 1 to Version 2 Migration

Version 2 of Triton does not generally maintain backwards
compatibility with version 1.  Specifically, you should take the
following items into account when transitioning from version 1 to
version 2.

* The Triton executables and libraries are in /opt/tritonserver. The
  Triton executable is /opt/tritonserver/bin/tritonserver.

* Some *tritonserver* command-line arguments are removed, changed or
  have different default behavior in version 2.

  * --api-version, --http-health-port, --grpc-infer-thread-count,
    --grpc-stream-infer-thread-count,--allow-poll-model-repository, --allow-model-control
    and --tf-add-vgpu are removed.

  * The default for --model-control-mode is changed to *none*.

  * --tf-allow-soft-placement and --tf-gpu-memory-fraction are renamed
     to --backend-config="tensorflow,allow-soft-placement=\<true,false\>"
     and --backend-config="tensorflow,gpu-memory-fraction=\<float\>".

* The HTTP/REST and GRPC protocols, while conceptually similar to
  version 1, are completely changed in version 2. See [inference
  protocols](../customization_guide/inference_protocols.md) for more information.

* Python and C++ client libraries are re-implemented to match the new
  HTTP/REST and GRPC protocols. The Python client no longer depends on
  a C++ shared library and so should be usable on any platform that
  supports Python. See [client
  libraries](https://github.com/triton-inference-server/client) for
  more information.

* Building Triton has changed significantly in version 2. See
  [build](../customization_guide/build.md) for more information.

* In the Docker containers the environment variables indicating the
  Triton version have changed to have a TRITON prefix, for example,
  TRITON_SERVER_VERSION.


================================================
FILE: enhancements/NNNN-template-complete.md
================================================
<!--
# Copyright (c) 2025-2026, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->
# \<Title\>

**Status**: \[Draft | Under Review | Approved | Replaced | Deferred | Rejected\]

**Authors**: \[Name/Team\]

**Category**: \[Architecture | Process | Guidelines\]

**Replaces**: \[Link of previous proposal if applicable\]

**Replaced By**: \[Link of previous proposal if applicable\]

**Sponsor**: \[Name of code owner or maintainer to shepherd process\]

**Required Reviewers**: \[Names of technical leads that are required for acceptance\]

**Review Date**: \[Date for review\]

**Pull Request**: \[Link to Pull Request of the Proposal itself\]

**Implementation PR / Tracking Issue**: \[Link to Pull Request or Tracking Issue for Implementation\]

## Summary

**\[Required\]**

## Motivation

**\[Required\]**

Describe the problem that needs to be addressed with enough detail for someone familiar with the project to understand.
Generally one to two short paragraphs.
Additional details can be placed in the background section as needed.
Cover **what** the issue is and **why** it needs to be addressed.
Link to github issues if relevant.

### Goals

**\[Optional \- if not applicable omit\]**

List out any additional goals in bullet points.
Goals may be aspirational / difficult to measure but guide the proposal.

* Goal

* Goal

* Goal

### Non Goals

**\[Optional \- if not applicable omit\]**

List out any items which are out of scope / specifically not required in bullet points.
Indicates the scope of the proposal and issue being resolved.

### Requirements

**\[Optional \- if not applicable omit\]**

List out any additional requirements in numbered subheadings.

**\<numbered subheadings\>**

#### REQ \<\#\> \<Title\>

Describe the requirement in as much detail as necessary for others to understand it and how it applies to the TEP.
Keep in mind that requirements should be measurable and will be used to determine if a TEP has been successfully implemented or not.

Requirement names should be prefixed using a monotonically increasing number such as “REQ 1 \<Title\>” followed by “REQ 2 \<Title\>” and so on.
Use title casing when naming requirements. Requirement names should be as descriptive as possible while remaining as terse as possible.

Use all-caps, bolded terms like **MUST** and **SHOULD** when describing each requirement.
See \[RFC-2119\](https://datatracker.ietf.org/doc/html/rfc2119) for additional information.

## Proposal

**\[Required\]**

Describe the high level design / proposal.
Use sub sections as needed, but start with an overview and then dig into the details.
Try to provide images and diagrams to facilitate understanding.

## Implementation Details

**\[Optional \- if not applicable omit\]**

Add additional detailed items here including interface signatures, etc.
Add anything that is relevant but seems more of a detail than central to the proposal.
Use sub sections / bullet points as needed.
Try to provide images and diagrams to facilitate understanding.
If applicable link to PR.

### Deferred to Implementation

**\[Optional \- if not applicable omit\]**

List out items that are under discussion but that will be resolved only during implementation / code review.

## Implementation Phases

**\[Optional \- if not applicable omit\]**

List out phases of implementation (can be single phase).
Give each phase a monotonically increasing number; example “Phase 0” followed by “Phase 1” and so on.
Give phases titles if it makes sense.

### Phase \<\#\> \<Optional Title\>

**Release Target**: Date

**Effort Estimate**: \<estimate of time and number of engineers to complete the phase\>

**Work Item(s):** \<one or more links to github issues\>

**Supported API / Behavior:**

* \<name and concise description of the API / behavior\>

**Not Supported:**

* \<name and concise description of the API / behavior\>

## Related Proposals

**\[Optional \- if not applicable omit\]**

* File

* File

* File

* File

* File

## Alternate Solutions

**\[Required, if not applicable write N/A\]**

List out solutions that were considered but ultimately rejected.
Consider free form `-`, but a possible format shown below.

### Alt \<\#\> \<Title\>

**Pros:**

\<bulleted list or pros describing the positive aspects of this solution\>

**Cons:**

\<bulleted list or pros describing the negative aspects of this solution\>

**Reason Rejected:**

\<bulleted list or pros describing why this option was not used\>

**Notes:**

\<optional: additional comments about this solution\>

## Background

**\[Optional \- if not applicable omit\]**

Add additional context and references as needed to help reviewers and authors understand the context of the problem and solution being proposed.

## References

**\[Optional \- if not applicable omit\]**

Add additional references as needed to help reviewers and authors understand the context of the problem and solution being proposed.

* \<hyper-linked title of an external reference resource\>

## Terminology & Definitions

**\[Optional \- if not applicable omit\]**

List out additional terms / definitions (lexicon).
Try to keep definitions as concise as possible and use links to external resources when additional information would be useful to the reader.

Keep the list of terms sorted alphabetically to ease looking up definitions by readers.

| \<Term\> | \<Definition\> |
| :---- | :---- |
| **\<Term\>** | \<Definition\> |

## Acronyms & Abbreviations

**\[Optional \- if not applicable omit\]**

Provide a list of frequently used acronyms and abbreviations which are uncommon or unlikely to be known by the reader.
Do not include acronyms or abbreviations which the reader is likely to be familiar with.

Keep the list of acronyms and abbreviations sorted alphabetically to ease looking up definitions by readers.

Do not include the full definition in the expanded meaning of an abbreviation or acronym.
If the reader needs the definition, please include it in the \[Terminology & Definitions\](#terminology--definitions) section.

**\<Acronym/Abbreviation\>:** \<Expanded Meaning\>


================================================
FILE: enhancements/NNNN-template-limited.md
================================================
<!--
# Copyright (c) 2025-2026, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->
# \<Title\>

**Status**: \[Draft | Under Review | Approved | Replaced | Deferred | Rejected\]

**Authors**: \[Name/Team\]

**Category**: \[Architecture | Process | Guidelines\]

**Replaces**: \[Link of previous proposal if applicable\]

**Replaced By**: \[Link of previous proposal if applicable\]

**Sponsor**: \[Name of code owner or maintainer to shepherd process\]

**Required Reviewers**: \[Names of technical leads that are required for acceptance\]

**Review Date**: \[Date for review\]

**Pull Request**: \[Link to Pull Request of the Proposal itself\]

**Implementation PR / Tracking Issue**: \[Link to Pull Request or Tracking Issue for Implementation\]

## Summary

**\[Required\]**

## Motivation

**\[Required\]**

Describe the problem that needs to be addressed with enough detail for someone familiar with the project to understand.
Generally one to two short paragraphs.
Additional details can be placed in the background section as needed. Cover **what** the issue is and **why** it needs to be addressed.
Link to github issues if relevant.

### Goals

**\[Optional \- if not applicable omit\]**

List out any additional goals in bullet points.
Goals may be aspirational / difficult to measure but guide the proposal.

* Goal

* Goal

* Goal

#### Non Goals

**\[Optional \- if not applicable omit\]**

List out any items which are out of scope / specifically not required in bullet points.
Indicates the scope of the proposal and issue being resolved.

### Requirements

**\[Optional \- if not applicable omit\]**

List out any additional requirements in numbered subheadings.

**\<numbered subheadings\>**

#### REQ \<\#\> \<Title\>

Describe the requirement in as much detail as necessary for others to understand it and how it applies to the TEP.
Keep in mind that requirements should be measurable and will be used to determine if a TEP has been successfully implemented or not.

Requirement names should be prefixed using a monotonically increasing number such as “REQ 1 \<Title\>” followed by “REQ 2 \<Title\>” and so on.
Use title casing when naming requirements.
Requirement names should be as descriptive as possible while remaining as terse as possible.

Use all-caps, bolded terms like **MUST** and **SHOULD** when describing each requirement.
See \[RFC-2119\](https://datatracker.ietf.org/doc/html/rfc2119) for additional information.

## Proposal

**\[Required\]**

Describe the high level design / proposal.
Use sub sections as needed, but start with an overview and then dig into the details.
Try to provide images and diagrams to facilitate understanding.

## Alternate Solutions

**\[Required, if not applicable write N/A\]**

List out solutions that were considered but ultimately rejected.
Consider free form `-`, but a possible format shown below.

## Alt \<\#\> \<Title\>

**Pros:**

\<bulleted list or pros describing the positive aspects of this solution\>

**Cons:**

\<bulleted list or pros describing the negative aspects of this solution\>

**Reason Rejected:**

\<bulleted list or pros describing why this option was not used\>

**Notes:**

\<optional: additional comments about this solution\>


================================================
FILE: enhancements/README.md
================================================
<!--
# Copyright (c) 2025-2026, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->
# Triton Enhancement Proposals (TEP)

Enhancement Proposals and Architecture Decisions

Please see [0000-tep-process](teps/0000-tep-process.md) for full explanation and details.

## Authoring Guidelines

1. Start with either the:
- [NNNN-template-complete.md](NNNN-template-complete.md) and remove unneeded sections.
- [NNNN-template-limited.md](NNNN-template-limited.md) and then add selectively from the complete template based on need.

1. Identify a **Code-Owner** or **Maintainer** of the TEP repository to shepherd the process.

2. Create a draft PR and iterate with co-authors, **Sponsor**

3. When ready for review, mark as ready and work with **Sponsor** to set a **Review Date**.


================================================
FILE: enhancements/teps/0000-tep-process.md
================================================
<!--
# Copyright (c) 2025-2026, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->
# Triton Enhancement Proposals

**Status**: Draft

**Authors**: [whoisj](https://github.com/whoisj)

**Category**: Process

**Replaces**: N/A

**Replaced By**: N/A

**Sponsor**: [whoisj](https://github.com/whoisj)

**Required Reviewers**: [dzier](https://github.com/dzier), [nnshah1](https://github.com/nnshah1)

**Review Date**: 17 Oct 2025

**Pull Request**: [N/A](https://github.com/triton-inference-server/server/pull/8517)

## Summary

A standard process and format for proposing and capturing architecture, design, and process decisions for the Triton project along with the motivations behind those decisions.
We adopt a similar process as adopted by Dynamo, Kubernetes, Rust, Python, and Ray broadly categorized as "enhancement proposals".

## Motivation

With any software project but especially agile, open source projects in the AI space, architecture, design, and process decisions are made rapidly and for specific reasons which can sometimes be difficult to understand after the fact.
For Triton in particular many teams and community members are collaborating for the first time and have varied backgrounds and design philosophies.
The Triton project's code base itself reflects multiple previously independent code bases integrated quickly to meet overall project goals.
As the project evolves we need a way to propose, ratify and capture architecture, design and process decisions quickly and thoughtfully in a transparent, consistent, lightweight, maintainable way.

Borrowing from the motivation for KEPs:

> The purpose of the KEP process is to reduce the amount of "tribal knowledge" in our community.
> By moving decisions from a smattering of mailing lists, video calls and hallway conversations into a well tracked artifact, this process aims to enhance communication and discoverability.

### Goals

* **Useful**

  Enhancement proposals and the process of writing and approving them should encourage the thoughtful evaluation of design, process, and architecture choices and lead to timely decisions with a clear record of what was decided, why, and what other options were considered.

* **Lightweight and Scalable**

  The format and process should be applicable both to small or medium sized changes as well as large ones.
  The process should not impede the rate of progress but serve to provide timely feedback, discussion, and ratification on key proposals.
  The process should also support retroactive documents to capture and explain decisions already made.

* **Single Document for Requirements and Design**

  Combine aspects of requirements documents, design documents and software architecture documents into a single document.
  Give one place to understand the motivation, requirements, and design of a feature or process.

* **Support Process, Architecture and Guideline Decisions**

  Have a single format to articulate decisions that effect process (such as github merge rules or templates) as well as code and design guidelines as well as features.

* **Clear**

  Should be relatively clear when a document is required, when the review needs to be completed, and by who and what the overall process is.

* **Encourage Collaboration**

  Should allow for easy collaboration and communication between *Authors** and **Reviewers**.

* **Flexible**

  Format and process should be flexible enough to be used for different types of decisions requiring different levels of detail and formatting of sections.

### Non Goals

* Triton Enhancement Proposals (TEP)s do not take the place of other forms of documentation such as user / developer facing documentation (including architecture documents, api documentation)
* Prototyping and early development are not gated by design / architectural approval.
* TEPs should not be a perfunctory process but lead to discussion and thought process around good designs.
* Not all changes (bug fixes, documentation improvements) need a TEP - and many can be reviewed via that normal GitHub pull request

## Proposal

Following successful open source projects such as Kubernetes (KEP) and Dynamo (DEP) we adopt a markdown based enhancement proposal format designed to support any decisions we need to capture as a project.
We will adopt an open, community-wide, discussion and comment process using pull requests but enable **Code-Owners** and **Maintainers** to be the final arbiters of **Approval**.

Subject area experts will be listed as required **Reviewers** to ensure proposals are complete and reviewed properly.

<!-- Enhancement proposals will be stored in github in a separate repository. -->

We provide two templates "limited" and "complete" where the limited template is a strict subset of the complete template, and both indicate which sections are required and which are optional.

## Implementation Details

### Proposal Process

<!-- * Fork or create a branch in the `enhancements` repository -->

* Copy the [limited template](../NNNN-template-limited.md) or [complete template](../NNNN-template-complete.md) to `teps/NNNN-my-feature.md` (where `my-feature` is descriptive, don't assign an `TEP` identifier yet)

  > [!Note]
  > Choose the template that fits your purpose.
  > You can start with the limited form and pull additional sections from the complete form as needed.
  > Keep the order of the sections consistent.

* Identify a **Sponsor** from the list of **Maintainers** or **Code-Owners** to help with the process.

* Fill in the proposal template.
  Be sure to include all required sections.
  Keep sections in the order prescribed in the template.

* Work with the **Sponsor** to identify the required reviewers and a timeline for review.

<!-- * Submit a pull request to the `enhancements` repository -->

* If discussion is needed the **Sponsor** can ask for a slot in the weekly Engineering Sync or schedule an ad-hoc meeting with the required reviewers.

* Iterate and incorporate feedback via the pull request.

* When review is complete The **Sponsor** will merge the request and update the status.

* **Sponsor** should assign an identifier.

* **Author** and **Sponsor** should add issues and/or PRs as needed to track implementation.

### When is a proposal required?

It is difficult to enumerate all the circumstances where a proposal would be required or not required.
Generally we will follow this process when making "substantial changes".
The definition of "substantial" is evolving and mainly determined by the core team and community.

When in doubt reach out to a **Maintainer** or **Code-Owner**.

**Generally speaking a proposal would not be required for**:

* Bug fixes that don't change advertised behavior

* Documentation fixes / updates

* Minor refactors within a single module

**Generally speaking proposals would be required for**:

* New features which add significant functionality

* Changes to existing features or code which require discussion

* Changes to public interfaces

* Responses to security related vulnerabilities found directly in the project code

* Changes to packaging and installation

* When a **Maintainer** or **Code-Owner** recommends that a change go through the proposal process

* Retroactively to capture current architecture, guideline, or process

### Minor Changes After Review

For minor changes or changes that are in the spirit of the review, updates can be made to the document without a new proposal.

*Example:* links to implementation

### Significant Changes After Review

For significant changes, a new proposal should be made and the original marked as replaced.

### Maintenance

TEPs should be reviewed for updates, replacements, or archiving on a regular basis.

### Sensitive Changes and Discussions

Certain types of changes need to be discussed and ratified before being made public due to timing of non-disclosed information.
In such (rare) cases, drafts and reviews will be conducted offline by **Authors**, **Code-Owners**, and **Maintainers** with the public proposals being updated when possible.

*Example:* when responding to undisclosed security vulnerabilities, we want to avoid inadvertently encouraging zero day attacks for deployed systems.

In such (rare) cases, we may make use of a private repo on a temporary basis to collect feedback before publishing to the public repo.

### Deferred to Implementation

* Definition of **Code-Owners** and **Maintainers**

* Whether or not to organize **TEP**s into sub directories for projects / areas

* Tooling around the creation / indexing of **TEP**s

* Making requirements required in addition to motivation

* Format recommendations for API surfaces / other formatted components.

* Decisions / guidelines on when a TEP is needed.

## Alternate Solutions

### Alt 1 Google Docs

**Pros:**

* Fits existing documents and templates used by many teams

**Cons:**

* Difficult to integrate with AI tools.

* Difficult to search and index

**Reason Rejected:**

* Want to standardize around a simple text format and use AI tools also for diagramming, etc.

## Background

With the rise of Agile software development practices and large open source projects, software development teams needed to devise new and lightweight (w.r.t to previous software architecture documents) ways of recording architecture proposals and decisions.
As Agile was born in part as a reaction to waterfall styles of planning and development and famously prioritized “Working software over comprehensive documentation”, so too there was a need to replace monolithic large software design specifications with something lighter weight but that still encouraged good architecture.

From this need for a new way of practicing software architecture a  body of work and theory has evolved around the concepts of “Architecture Decision Records” which in turn are also termed “Any Decision Record”, and RFCs or Enhancement proposals (PEP, KEP, REP).

In each case the core requirements of the process are that the team document the problem, the proposal / design, the status of the proposal, implications / follow on work, and any alternatives that were considered using a standard template and review process.

Just as in Agile planning, each team modifies the template and process to fit their needs.

### References

1. [Documenting Architecture Decisions (cognitect.com)](https://cognitect.com/blog/2011/11/15/documenting-architecture-decisions)

2. [The most plagiarized Architecture Decision Record blog on the internet. | by Conall Daly | Medium](https://conalldalydev.medium.com/the-most-plagiarised-architecture-decision-record-blog-on-the-internet-c9dd2018c1d6)

3. [adr.github.io](https://adr.github.io/)

4. [When Should I Write an Architecture Decision Record \- Spotify Engineering : Spotify Engineering (atspotify.com)](https://engineering.atspotify.com/2020/04/when-should-i-write-an-architecture-decision-record/)

5. [Scaling Engineering Teams via RFCs: Writing Things Down \- The Pragmatic Engineer](https://blog.pragmaticengineer.com/scaling-engineering-teams-via-writing-things-down-rfcs/)

6. [Love Unrequited: The Story of Architecture, Agile, and How Architecture Decision Records Brought Them Together | IEEE Journals & Magazine | IEEE Xplore](https://ieeexplore.ieee.org/document/9801811)

7. [ray-project/enhancements: Tracking Ray Enhancement Proposals (github.com)](https://github.com/ray-project/enhancements)

8. [Kubernetes Enhancement Proposals](https://github.com/kubernetes/enhancements/blob/master/keps/sig-architecture/0000-kep-process/README.md)

9. [Dynamo Enhancement Proposals](https://github.com/ai-dynamo/enhancements/blob/main/README.md)


================================================
FILE: pyproject.toml
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

[tool.codespell]
# note: pre-commit passes explicit lists of files here, which this skip file list doesn't override -
# this is only to allow you to run codespell interactively
skip = "./.git,./.github"
# ignore short words, and typename parameters like OffsetT
ignore-regex = "\\b(.{1,4}|[A-Z]\\w*T)\\b"
# ignore allowed words
ignore-words-list = "passin,couldn"
# use the 'clear' dictionary for unambiguous spelling mistakes
builtin = "clear"
# disable warnings about binary files and wrong encoding
quiet-level = 3

[tool.isort]
profile = "black"
use_parentheses = true
multi_line_output = 3
include_trailing_comma = true
force_grid_wrap = 0
ensure_newline_before_comments = true
line_length = 88
balanced_wrapping = true
indent = "    "
skip = ["build"]


================================================
FILE: python/openai/README.md
================================================
<!--
# Copyright (c) 2024-2026, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->
# OpenAI-Compatible Frontend for Triton Inference Server

## Pre-requisites

1. Docker + NVIDIA Container Runtime
2. A correctly configured `HF_TOKEN` for access to HuggingFace models.
    - The current examples and testing primarily use the
      [`meta-llama/Meta-Llama-3.1-8B-Instruct`](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct)
      model, but you can manually bring your own models and adjust accordingly.

## VLLM

1. Launch the container and install dependencies:
  - Mounts the `~/.huggingface/cache` for re-use of downloaded models across runs, containers, etc.
  - Sets the [`HF_TOKEN`](https://huggingface.co/docs/huggingface_hub/en/package_reference/environment_variables#hftoken) environment variable to
    access gated models, make sure this is set in your local environment if needed.

```bash
docker run -it --net=host --gpus all --rm \
  -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
  -e HF_TOKEN \
  nvcr.io/nvidia/tritonserver:26.02-vllm-python-py3
```

2. Launch the OpenAI-compatible Triton Inference Server:
```bash
cd /opt/tritonserver/python/openai

# NOTE: Adjust the --tokenizer based on the model being used
python3 openai_frontend/main.py --model-repository tests/vllm_models --tokenizer meta-llama/Meta-Llama-3.1-8B-Instruct
```

<details>
<summary>Example output</summary>

```
...
+-----------------------+---------+--------+
| Model                 | Version | Status |
+-----------------------+---------+--------+
| llama-3.1-8b-instruct | 1       | READY  | <- Correct Model Loaded in Triton
+-----------------------+---------+--------+
...
Found model: name='llama-3.1-8b-instruct', backend='vllm'
[WARNING] Adding CORS for the following origins: ['http://localhost']
INFO:     Started server process [126]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:9000 (Press CTRL+C to quit) <- OpenAI Frontend Started Successfully
```

</details>

3. Send a `/v1/chat/completions` request:
  - Note the use of `jq` is optional, but provides a nicely formatted output for JSON responses.
```bash
MODEL="llama-3.1-8b-instruct"
curl -s http://localhost:9000/v1/chat/completions -H 'Content-Type: application/json' -d '{
  "model": "'${MODEL}'",
  "messages": [{"role": "user", "content": "Say this is a test!"}]
}' | jq
```

<details>
<summary>Example output</summary>

```json
{
  "id": "cmpl-0242093d-51ae-11f0-b339-e7480668bfbe",
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "message":
      {
        "content": "This is only a test.",
        "tool_calls": null,
        "role": "assistant",
        "function_call": null
      },
      "logprobs": null
    }
  ],
  "created": 1750846825,
  "model": "llama-3.1-8b-instruct",
  "system_fingerprint": null,
  "object": "chat.completion",
  "usage": {
    "completion_tokens": 7,
    "prompt_tokens": 42,
    "total_tokens": 49
  }
}
```

</details>

4. Send a `/v1/completions` request:
  - Note the use of `jq` is optional, but provides a nicely formatted output for JSON responses.
```bash
MODEL="llama-3.1-8b-instruct"
curl -s http://localhost:9000/v1/completions -H 'Content-Type: application/json' -d '{
  "model": "'${MODEL}'",
  "prompt": "Machine learning is"
}' | jq
```

<details>
<summary>Example output</summary>

```json
{
  "id": "cmpl-58fba3a0-51ae-11f0-859d-e7480668bfbe",
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "text": " an amazing field that can truly understand the hidden patterns that exist in the data,"
    }
  ],
  "created": 1750846970,
  "model": "llama-3.1-8b-instruct",
  "system_fingerprint": null,
  "object": "text_completion",
  "usage": {
    "completion_tokens": 16,
    "prompt_tokens": 4,
    "total_tokens": 20
  }
}
```

</details>

5. Benchmark with `genai-perf`:
- To install genai-perf in this container, see the instructions [here](https://github.com/triton-inference-server/perf_analyzer/tree/main/genai-perf#install-genai-perf-ubuntu-2404-python-310)
- Or try using genai-perf from the [SDK container](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver)

```bash
MODEL="llama-3.1-8b-instruct"
TOKENIZER="meta-llama/Meta-Llama-3.1-8B-Instruct"
genai-perf profile \
  --model ${MODEL} \
  --tokenizer ${TOKENIZER} \
  --service-kind openai \
  --endpoint-type chat \
  --url localhost:9000 \
  --streaming
```

<details>
<summary>Example output</summary>

```
2024-10-14 22:43 [INFO] genai_perf.parser:82 - Profiling these models: llama-3.1-8b-instruct
2024-10-14 22:43 [INFO] genai_perf.wrapper:163 - Running Perf Analyzer : 'perf_analyzer -m llama-3.1-8b-instruct --async --input-data artifacts/llama-3.1-8b-instruct-openai-chat-concurrency1/inputs.json -i http --concurrency-range 1 --endpoint v1/chat/completions --service-kind openai -u localhost:9000 --measurement-interval 10000 --stability-percentage 999 --profile-export-file artifacts/llama-3.1-8b-instruct-openai-chat-concurrency1/profile_export.json'
                              NVIDIA GenAI-Perf | LLM Metrics
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━┓
┃                         Statistic ┃    avg ┃    min ┃    max ┃    p99 ┃    p90 ┃    p75 ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━┩
│          Time to first token (ms) │  71.66 │  64.32 │  86.52 │  76.13 │  74.92 │  73.26 │
│          Inter token latency (ms) │  18.47 │  18.25 │  18.72 │  18.67 │  18.61 │  18.53 │
│              Request latency (ms) │ 348.00 │ 274.60 │ 362.27 │ 355.41 │ 352.29 │ 350.66 │
│            Output sequence length │  15.96 │  12.00 │  16.00 │  16.00 │  16.00 │  16.00 │
│             Input sequence length │ 549.66 │ 548.00 │ 551.00 │ 550.00 │ 550.00 │ 550.00 │
│ Output token throughput (per sec) │  45.84 │    N/A │    N/A │    N/A │    N/A │    N/A │
│      Request throughput (per sec) │   2.87 │    N/A │    N/A │    N/A │    N/A │    N/A │
└───────────────────────────────────┴────────┴────────┴────────┴────────┴────────┴────────┘
2024-10-14 22:44 [INFO] genai_perf.export_data.json_exporter:62 - Generating artifacts/llama-3.1-8b-instruct-openai-chat-concurrency1/profile_export_genai_perf.json
2024-10-14 22:44 [INFO] genai_perf.export_data.csv_exporter:71 - Generating artifacts/llama-3.1-8b-instruct-openai-chat-concurrency1/profile_export_genai_perf.csv
```

</details>

6. Use the OpenAI python client directly:
```python
from openai import OpenAI

client = OpenAI(
    base_url="http://localhost:9000/v1",
    api_key="EMPTY",
)

model = "llama-3.1-8b-instruct"
completion = client.chat.completions.create(
    model=model,
    messages=[
        {
            "role": "system",
            "content": "You are a helpful assistant.",
        },
        {"role": "user", "content": "What are LLMs?"},
    ],
    max_completion_tokens=256,
)

print(completion.choices[0].message.content)
```

7. Run tests (NOTE: The server should not be running, the tests will handle starting/stopping the server as necessary):
```bash
cd /opt/tritonserver/python/openai/
pip install -r requirements-test.txt

pytest -v tests/
```

### LoRA Adapters

If the command line argument `--lora-separator=<separator_string>` is provided
when starting the OpenAI Frontend, a LoRA adaptor listed in `multi_lora.json`
may be selected by appending the LoRA name to the model name,
separated by the LoRA separator, on the inference request in
`<model_name><separator_string><lora_name>` format.

<details>
<summary>For example</summary>

```bash
# start server with model named gemma-2b
python3 openai_frontend/main.py --lora-separator=_lora_ ...

# inference without LoRA
curl -s http://localhost:9000/v1/completions -H 'Content-Type: application/json' -d '{
  "model": "gemma-2b",
  "temperature": 0,
  "prompt": "When was the wheel invented?"
}'
{
  ...
  "choices":[{..."text":"\n\nThe wheel was invented by the Sumerians in Mesopotamia around 350"}],
  ...
}

# inference with LoRA named doll
curl -s http://localhost:9000/v1/completions -H 'Content-Type: application/json' -d '{
  "model": "gemma-2b_lora_doll",
  "temperature": 0,
  "prompt": "When was the wheel invented?"
}'
{
  ...
  "choices":[{..."text":"\n\nThe wheel was invented in Mesopotamia around 3500 BC.\n\n"}],
  ...
}

# inference with LoRA named sheep
curl -s http://localhost:9000/v1/completions -H 'Content-Type: application/json' -d '{
  "model": "gemma-2b_lora_sheep",
  "temperature": 0,
  "prompt": "When was the wheel invented?"
}'
{
  ...
  "choices":[{..."text":"\n\nThe wheel was invented around 3000 BC in Mesopotamia.\n\n"}],
  ...
}
```

</details>

When listing or retrieving model(s), the model id will include the LoRA name in
the same `<model_name><separator_string><lora_name>` format for each LoRA
adapter listed on the `multi_lora.json`. Note: The LoRA name inclusion is
limited to locally stored models, inference requests are not limited though.

#### vLLM
See the
[vLLM documentation](https://github.com/triton-inference-server/vllm_backend/blob/main/docs/llama_multi_lora_tutorial.md)
on how to serve a vLLM model with LoRA adapters.

#### TensorRT-LLM
Similarly, see [TensorRT-LLM document](https://github.com/triton-inference-server/tensorrtllm_backend/blob/main/docs/lora.md)
on how to prepare LoRA-enabled TensorRT-LLM engines and generate LoRA tensors.
The path of LoRA adapter in `multi_lora.json` is the directory of
`model.lora_config.npy` and `model.lora_weights.npy` tensors.

<details>
<summary>For example</summary>

model repository
```
inflight_batcher_llm
├── postprocessing
|   ├── 1
|   |   └── model.py
|   └── config.pbtxt
├── preprocessing
|   ├── 1
|   |   └── model.py
|   └── config.pbtxt
├── tensorrt_llm
|   ├── 1
|   |   └── model.py
|   └── config.pbtxt
└── tensorrt_llm_bls
    ├── 1
    |   ├── Japanese-Alpaca-LoRA-7b-v0-weights
    |   |   ├── model.lora_config.npy
    |   |   └── model.lora_weights.npy
    |   ├── luotuo-lora-7b-0.1-weights
    |   |   ├── model.lora_config.npy
    |   |   └── model.lora_weights.npy
    |   ├── model.py
    |   └── multi_lora.json
    └── config.pbtxt
```

multi_lora.json
```
{
  "doll": "inflight_batcher_llm/tensorrt_llm_bls/1/luotuo-lora-7b-0.1-weights",
  "sheep": "inflight_batcher_llm/tensorrt_llm_bls/1/Japanese-Alpaca-LoRA-7b-v0-weights"
}
```
</details>

### Embedding Models
Currently, OpenAI-Compatible Frontend supports loading embedding models and embeddings endpoints via vLLM backend. Check [vLLM supported models](https://docs.vllm.ai/en/latest/models/supported_models.html#embedding) for all supported embedding models from vLLM.

1. Launch the container and install dependencies:
  - Mounts the `~/.huggingface/cache` for re-use of downloaded models across runs, containers, etc.
  - Sets the [`HF_TOKEN`](https://huggingface.co/docs/huggingface_hub/en/package_reference/environment_variables#hftoken) environment variable to
    access gated models, make sure this is set in your local environment if needed.

```bash
docker run -it --net=host --gpus all --rm \
  -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
  -e HF_TOKEN \
  nvcr.io/nvidia/tritonserver:26.02-vllm-python-py3
```

2. Launch the OpenAI-compatible Triton Inference Server:
```bash
cd /opt/tritonserver/python/openai

# NOTE: Embeddings endpoint does not require "--tokenizer"
python3 openai_frontend/main.py --model-repository tests/vllm_embedding_models
```

<details>
<summary>Example output</summary>

```
...
+------------------+---------+--------+
| Model            | Version | Status |
+------------------+---------+--------+
| all-MiniLM-L6-v2 | 1       | READY  | <- Correct Model Loaded in Triton
+------------------+---------+--------+
...
Found model: name='all-MiniLM-L6-v2', backend='vllm'
[WARNING] Adding CORS for the following origins: ['http://localhost']
INFO:     Started server process [133]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:9000 (Press CTRL+C to quit) <- OpenAI Frontend Started Successfully
```

</details>

3. Send a `/v1/embeddings` request:
  - Note the use of `jq` is optional, but provides a nicely formatted output for JSON responses.
```bash
MODEL="all-MiniLM-L6-v2"
curl -s http://localhost:9000/v1/embeddings \
  -H 'Content-Type: application/json' \
  -d '{
    "model": "'${MODEL}'",
    "input": "The food was delicious and the waiter...",
    "dimensions": 10,
    "encoding_format": "float"
  }' | jq
```

<details>
<summary>Example output</summary>

```json
{
  "object": "list",
  "data": [
    {
      "object": "embedding",
      "embedding": [
        -0.1914404183626175,
        0.4000193178653717,
        0.058502197265625,
        0.18909454345703125,
        -0.4690297544002533,
        0.004936536308377981,
        0.45893096923828125,
        -0.31141534447669983,
        0.18299102783203125,
        -0.4907582700252533
      ],
      "index": 0
    }
  ],
  "model": "all-MiniLM-L6-v2",
  "usage": {
    "prompt_tokens": 12,
    "total_tokens": 12
  }
}
```

</details>

## TensorRT-LLM

0. Prepare your model repository for a TensorRT-LLM model, build the engine, etc. You can try any of the following options:
  - [Triton CLI](https://github.com/triton-inference-server/triton_cli/)
  - [TRT-LLM Backend Quickstart](https://github.com/triton-inference-server/tensorrtllm_backend?tab=readme-ov-file#quick-start)

1. Launch the container:
  - Mounts the `~/.huggingface/cache` for re-use of downloaded models across runs, containers, etc.
  - Sets the [`HF_TOKEN`](https://huggingface.co/docs/huggingface_hub/en/package_reference/environment_variables#hftoken) environment variable to
    access gated models, make sure this is set in your local environment if needed.

```bash
docker run -it --net=host --gpus all --rm \
  -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
  -e HF_TOKEN \
  -e TRTLLM_ORCHESTRATOR=1 \
  nvcr.io/nvidia/tritonserver:26.02-trtllm-python-py3
```

2. Install dependencies inside the container:
```bash
# Install python bindings for tritonserver and tritonfrontend
pip install /opt/tritonserver/python/triton*.whl

# Install application requirements
git clone https://github.com/triton-inference-server/server.git
cd server/python/openai/
pip install -r requirements.txt
```

2. Launch the OpenAI server:
```bash
# NOTE: Adjust the --tokenizer based on the model being used
python3 openai_frontend/main.py --model-repository path/to/models --tokenizer meta-llama/Meta-Llama-3.1-8B-Instruct
```

3. Send a `/v1/chat/completions` request:
  - Note the use of `jq` is optional, but provides a nicely formatted output for JSON responses.
```bash
# MODEL should be the client-facing model name in your model repository for a pipeline like TRT-LLM.
# For example, this could also be "ensemble", or something like "gpt2" if generated from Triton CLI
MODEL="tensorrt_llm_bls"
curl -s http://localhost:9000/v1/chat/completions -H 'Content-Type: application/json' -d '{
  "model": "'${MODEL}'",
  "messages": [{"role": "user", "content": "Say this is a test!"}]
}' | jq
```

<details>
<summary>Example output</summary>

```json
{
  "id": "cmpl-5ad4f860-bf13-11f0-b137-b75b7f0a8586",
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "message": {
        "content": "It looks like you're ready to see if I'm functioning properly. What would",
        "tool_calls": null,
        "role": "assistant",
        "function_call": null
      },
      "logprobs": null
    }
  ],
  "created": 1762875029,
  "model": "tensorrt_llm_bls",
  "system_fingerprint": null,
  "object": "chat.completion",
  "usage": {
    "prompt_tokens": 42,
    "total_tokens": 58,
    "completion_tokens": 16
  }
}
```

</details>

The other examples should be the same as vLLM, except that you should set `MODEL="tensorrt_llm_bls"` or `MODEL="ensemble"`,
everywhere applicable as seen in the example request above.

## KServe Frontends

To support serving requests through both the OpenAI-Compatible and
KServe Predict v2 frontends to the same running Triton Inference Server,
the `tritonfrontend` python bindings are included for optional use in this
application as well.

You can opt-in to including these additional frontends, assuming `tritonfrontend`
is installed, with `--enable-kserve-frontends` like below:

```
python3 openai_frontend/main.py \
  --model-repository tests/vllm_models \
  --tokenizer meta-llama/Meta-Llama-3.1-8B-Instruct \
  --enable-kserve-frontends
```

See `python3 openai_frontend/main.py --help` for more information on the
available arguments and default values.

For more information on the `tritonfrontend` python bindings, see the docs
[here](https://github.com/triton-inference-server/server/blob/main/docs/customization_guide/tritonfrontend.md).

## Model Parallelism Support

- [x] vLLM ([EngineArgs](https://github.com/triton-inference-server/vllm_backend/blob/main/README.md#using-the-vllm-backend))
    - ex: Configure `tensor_parallel_size: 2` in the
      [model.json](https://github.com/triton-inference-server/vllm_backend/blob/main/samples/model_repository/vllm_model/1/model.json)
- [x] TensorRT-LLM ([Orchestrator Mode](https://github.com/triton-inference-server/tensorrtllm_backend/blob/main/README.md#orchestrator-mode))
    - Set the following environment variable: `export TRTLLM_ORCHESTRATOR=1`
- [ ] TensorRT-LLM ([Leader Mode](https://github.com/triton-inference-server/tensorrtllm_backend/blob/main/README.md#leader-mode))
    - Not currently supported

## Tool Calling

The OpenAI frontend supports `tools` and `tool_choice` in the `v1/chat/completions` API. Please refer to the OpenAI API reference for more details about these parameters:
  [tools](https://platform.openai.com/docs/api-reference/chat/create#chat-create-tools),
  [tool_choice](https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice)

To enable the tool-calling feature, add the `--tool-call-parser {parser_name}` flag when starting the server. The two available parsers are `llama3` and `mistral`.
The `llama3` parser supports tool-calling features for LLaMA 3.1, 3.2, and 3.3 models, while the `mistral` parser supports tool-calling features for the Mistral Instruct model.

Example for launching the OpenAI frontend with a tool call parser:
```
python3 openai_frontend/main.py \
  --model-repository tests/vllm_models \
  --tokenizer meta-llama/Meta-Llama-3.1-8B-Instruct \
  --tool-call-parser llama3
```

Example for making a tool calling request:

```python
import json
from openai import OpenAI


def get_current_weather(city: str, state: str, unit: "str"):
    return (
        "The weather in Dallas, Texas is 85 degrees fahrenheit. It is "
        "partly cloudly, with highs in the 90's."
    )

available_tools = {"get_current_weather": get_current_weather}

openai_api_key = "EMPTY"
openai_api_base = "http://localhost:9000/v1"

client = OpenAI(
    api_key=openai_api_key,
    base_url=openai_api_base,
)

model = "llama-3.1-8b-instruct" # change this to the model in the repository

tools = [
    {
        "type": "function",
        "function": {
            "name": "get_current_weather",
            "description": "Get the current weather in a given location",
            "parameters": {
                "type": "object",
                "properties": {
                    "city": {
                        "type": "string",
                        "description": "The city to find the weather for, e.g. 'San Francisco'",
                    },
                    "state": {
                        "type": "string",
                        "description": "the two-letter abbreviation for the state that the city is"
                        " in, e.g. 'CA' which would mean 'California'",
                    },
                    "unit": {
                        "type": "string",
                        "description": "The unit to fetch the temperature in",
                        "enum": ["celsius", "fahrenheit"],
                    },
                },
                "required": ["city", "state", "unit"],
            },
        },
    }
]

messages = [
    {
        "role": "system",
        "content": "You're a helpful assistant! Answer the users question best you can.",
    },
    {"role": "user", "content": "What is the weather in Dallas, Texas in Fahrenheit?"},
]

tool_calls = client.chat.completions.create(
    messages=messages, model=model, tools=tools, max_completion_tokens=128
)
function_name = tool_calls.choices[0].message.tool_calls[0].function.name
function_arguments = tool_calls.choices[0].message.tool_calls[0].function.arguments

print(f"function name: " f"{function_name}")
print(f"function arguments: {function_arguments}")
print(f"tool calling result: {available_tools[function_name](**json.loads(function_arguments))}")
```

Example output:
```
function name: get_current_weather
function arguments: {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}
tool calling result: The weather in Dallas, Texas is 85 degrees fahrenheit. It is partly cloudly, with highs in the 90's.
```

#### Named Tool Calling

The OpenAI frontend supports named function calling, utilizing structured outputs in the vLLM backend and guided decoding in TensorRT-LLM backend. Users can specify one of the tools in `tool_choice` to force the model to select a specific tool for function calling.

> [!NOTE]
> For instructions on enabling guided decoding in the TensorRT-LLM backend, please refer to [this guide](https://github.com/triton-inference-server/tensorrtllm_backend/blob/main/docs/guided_decoding.md)

Example for making a named tool calling request:

```python
import json
from openai import OpenAI


def get_current_weather(city: str, state: str, unit: "str"):
    return (
        "The weather in Dallas, Texas is 85 degrees fahrenheit. It is "
        "partly cloudly, with highs in the 90's."
    )

def get_n_day_weather_forecast(city: str, state: str, unit: str, num_days: int):
    return (
        f"The weather in Dallas, Texas is 85 degrees fahrenheit in next {num_days} days."
    )

available_tools = {"get_current_weather": get_current_weather,
                  "get_n_day_weather_forecast": get_n_day_weather_forecast}

openai_api_key = "EMPTY"
openai_api_base = "http://localhost:9000/v1"
client = OpenAI(
    api_key=openai_api_key,
    base_url=openai_api_base,
)
model = "llama-3.1-8b-instruct" # change this to the model in the repository
tools = [
    {
        "type": "function",
        "function": {
            "name": "get_current_weather",
            "description": "Get the current weather in a given location",
            "parameters": {
                "type": "object",
                "properties": {
                    "city": {
                        "type": "string",
                        "description": "The city to find the weather for, e.g. 'San Francisco'",
                    },
                    "state": {
                        "type": "string",
                        "description": "the two-letter abbreviation for the state that the city is"
                        " in, e.g. 'CA' which would mean 'California'",
                    },
                    "unit": {
                        "type": "string",
                        "description": "The unit to fetch the temperature in",
                        "enum": ["celsius", "fahrenheit"],
                    },
                },
                "required": ["city", "state", "unit"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "get_n_day_weather_forecast",
            "description": "Get an N-day weather forecast",
            "parameters": {
                "type": "object",
                "properties": {
                    "city": {
                        "type": "string",
                        "description": "The city to find the weather for, "
                        "e.g. 'San Francisco'",
                    },
                    "state": {
                        "type": "string",
                        "description": "must the two-letter abbreviation for the state "
                        "that the city is in, e.g. 'CA' which would "
                        "mean 'California'",
                    },
                    "unit": {
                        "type": "string",
                        "description": "The unit to fetch the temperature in",
                        "enum": ["celsius", "fahrenheit"],
                    },
                    "num_days": {
                        "type": "integer",
                        "description": "The number of days to forecast",
                    },
                },
                "required": ["city", "state", "unit", "num_days"],
            },
        },
     }
]

tool_choice = {"function": {"name": "get_n_day_weather_forecast"}, "type": "function"}

messages = [
    {
        "role": "system",
        "content": "You're a helpful assistant! Answer the users question best you can.",
    },
    {"role": "user", "content": "What is the weather in Dallas, Texas in Fahrenheit?"},
]

tool_calls = client.chat.completions.create(
    messages=messages, model=model, tools=tools, tool_choice=tool_choice, max_completion_tokens=128
)
function_name = tool_calls.choices[0].message.tool_calls[0].function.name
function_arguments = tool_calls.choices[0].message.tool_calls[0].function.arguments

print(f"function name: {function_name}")
print(f"function arguments: {function_arguments}")
print(f"tool calling result: {available_tools[function_name](**json.loads(function_arguments))}")
```

Example output:
```
function name: get_n_day_weather_forecast
function arguments: {"city": "Dallas", "state": "TX", "unit": "fahrenheit", num_days: 1}
tool calling result: The weather in Dallas, Texas is 85 degrees fahrenheit in next 1 days.
```

## Limit Endpoint Access

The OpenAI-compatible server supports restricting access to specific API endpoints through authentication headers. This feature allows you to protect sensitive endpoints while keeping others publicly accessible.

### Configuration

Use the `--openai-restricted-api` command-line argument to configure endpoint restrictions:

```
--openai-restricted-api <API_1>,<API_2>,... <restricted-key> <restricted-value>
```

- **`API`**: A comma-separated list of APIs to be included in this group. Note that currently a given API is not allowed to be included in multiple groups. The following protocols / APIs are recognized:
  - **inference**: Chat completions and text completions endpoints
    - `POST /v1/chat/completions`
    - `POST /v1/completions`
  - **embedding**: Embedding endpoint
    - `POST /v1/embeddings`
  - **model-repository**: Model listing and information endpoints
    - `GET /v1/models`
    - `GET /v1/models/{model_name}`
  - **metrics**: Server metrics endpoint
    - `GET /metrics`
  - **health**: Health check endpoint
    - `GET /health/ready`

- **`restricted-key`**: The HTTP request header to be checked when a request is received.
- **`restricted-value`**: The header value required to access the specified protocols.

### Examples

#### Restrict Inference API Endpoints Only
```bash
--openai-restricted-api "inference api-key my-secret-key"
```

Clients must include the header:
```bash
curl -H "api-key: my-secret-key" \
     -X POST http://localhost:9000/v1/chat/completions \
     -d '{"model": "my-model", "messages": [{"role": "user", "content": "Hello"}]}'
```

#### Restrict Multiple API Endpoints
```bash
# Different authentication for different APIs
--openai-restricted-api "inference user-key user-secret" \
--openai-restricted-api "model-repository admin-key admin-secret"

# Multiple APIs in single argument with shared authentication
--openai-restricted-api "inference,model-repository shared-key shared-secret"
```


================================================
FILE: python/openai/openai_frontend/__init__.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: python/openai/openai_frontend/engine/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: python/openai/openai_frontend/engine/engine.py
================================================
# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


from __future__ import annotations

from typing import Iterator, List, Protocol

from schemas.openai import (
    CreateChatCompletionRequest,
    CreateChatCompletionResponse,
    CreateCompletionRequest,
    CreateCompletionResponse,
    CreateEmbeddingRequest,
    CreateEmbeddingResponse,
    Model,
)


class LLMEngine(Protocol):
    """
    Interface for an OpenAI-aware inference engine to be attached to an
    OpenAI-compatible frontend.

    NOTE: This interface is subject to change, and may land on something more
          generic rather than the current 1:1 with OpenAI endpoints over time.
    """

    def ready(self) -> bool:
        """
        Returns True if the engine is ready to accept inference requests, or False otherwise.
        """
        pass

    def metrics(self) -> str:
        """
        Returns the engine's metrics in a Prometheus-compatible string format.
        """
        pass

    def models(self) -> List[Model]:
        """
        Returns a List of OpenAI Model objects.
        """
        pass

    def chat(
        self, request: CreateChatCompletionRequest
    ) -> CreateChatCompletionResponse | Iterator[str]:
        """
        If request.stream is True, this returns an Iterator (or Generator) that
        produces server-sent-event (SSE) strings in the following form:
            'data: {CreateChatCompletionStreamResponse}\n\n'
            ...
            'data: [DONE]\n\n'

        If request.stream is False, this returns a CreateChatCompletionResponse.
        """
        pass

    def completion(
        self, request: CreateCompletionRequest
    ) -> CreateCompletionResponse | Iterator[str]:
        """
        If request.stream is True, this returns an Iterator (or Generator) that
        produces server-sent-event (SSE) strings in the following form:
            'data: {CreateCompletionResponse}\n\n'
            ...
            'data: [DONE]\n\n'

        If request.stream is False, this returns a CreateCompletionResponse.
        """
        pass

    def embedding(self, request: CreateEmbeddingRequest) -> CreateEmbeddingResponse:
        """
        Returns a CreateEmbeddingResponse.
        """
        pass


================================================
FILE: python/openai/openai_frontend/engine/triton_engine.py
================================================
# Copyright 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


from __future__ import annotations

import base64
import json
import time
import uuid
from dataclasses import dataclass
from typing import (
    Any,
    AsyncIterable,
    AsyncIterator,
    Callable,
    Dict,
    List,
    Literal,
    Optional,
    Tuple,
    Union,
)

import numpy as np
import tritonserver
from engine.engine import LLMEngine
from engine.utils.chat import load_chat_template, parse_chat_messages
from engine.utils.tokenizer import get_tokenizer
from engine.utils.tool_call_parsers import ToolCallParser, ToolParserManager
from engine.utils.triton import (
    RequestKind,
    TritonLoraConfig,
    _create_trtllm_embedding_request,
    _create_trtllm_generate_request,
    _create_vllm_embedding_request,
    _create_vllm_generate_request,
    _get_openai_chat_format_logprobs_from_vllm_response,
    _get_openai_completion_format_logprobs_from_vllm_response,
    _get_output,
    _get_usage_from_response,
    _parse_lora_configs,
    _StreamingUsageAccumulator,
    _validate_triton_responses_non_streaming,
)
from schemas.openai import (
    ChatCompletionChoice,
    ChatCompletionFinishReason,
    ChatCompletionLogprobs,
    ChatCompletionMessageToolCall,
    ChatCompletionMessageToolCallChunk,
    ChatCompletionNamedToolChoice,
    ChatCompletionResponseMessage,
    ChatCompletionStreamingResponseChoice,
    ChatCompletionStreamResponseDelta,
    ChatCompletionToolChoiceOption1,
    Choice,
    CompletionUsage,
    CreateChatCompletionRequest,
    CreateChatCompletionResponse,
    CreateChatCompletionStreamResponse,
    CreateCompletionRequest,
    CreateCompletionResponse,
    CreateEmbeddingRequest,
    CreateEmbeddingResponse,
    EmbeddingObject,
    FinishReason,
    Function1,
    Function2,
    Model,
    ObjectType,
)
from utils.utils import ClientError, ServerError


# TODO: Improve type hints
@dataclass
class TritonModelMetadata:
    # Name used in Triton model repository
    name: str
    # Name of backend used by Triton
    backend: str
    # Triton model object handle
    model: tritonserver.Model
    # Tokenizers used for chat templates
    tokenizer: Optional[Any]
    # LoRA names supported by the backend
    lora_configs: Optional[List[TritonLoraConfig]]
    # Name of the input tensor enabling "echo" parameter in /v1/completions endpoint
    echo_tensor_name: Optional[str]
    # Time that model was loaded by Triton
    create_time: int
    # Conversion format between OpenAI and Triton requests
    inference_request_converter: Callable
    embedding_request_converter: Callable


class TritonLLMEngine(LLMEngine):
    def __init__(
        self,
        server: tritonserver.Server,
        tokenizer: str,
        default_max_tokens: int,
        backend: Optional[str] = None,
        lora_separator: Optional[str] = None,
        tool_call_parser: Optional[str] = None,
        chat_template: Optional[str] = None,
    ):
        # Assume an already configured and started server
        self.server = server
        self.tokenizer = self._get_tokenizer(tokenizer)
        # TODO: Reconsider name of "backend" vs. something like "request_format"
        self.backend = backend
        self.lora_separator = lora_separator
        self.default_max_tokens = default_max_tokens

        # NOTE: Creation time and model metadata will be static at startup for
        # now, and won't account for dynamically loading/unloading models.
        self.create_time = int(time.time())
        self.model_metadata = self._get_model_metadata()
        self.tool_call_parser = (
            ToolParserManager.get_tool_parser_cls(tool_call_parser)
            if tool_call_parser
            else None
        )
        self.chat_template = load_chat_template(chat_template)

    def ready(self) -> bool:
        return self.server.ready()

    def metrics(self) -> str:
        return self.server.metrics()

    def models(self) -> List[Model]:
        models = []
        for metadata in self.model_metadata.values():
            model_names = [metadata.name]
            if (
                self.lora_separator is not None
                and len(self.lora_separator) > 0
                and metadata.lora_configs is not None
            ):
                for lora_config in metadata.lora_configs:
                    model_names.append(
                        f"{metadata.name}{self.lora_separator}{lora_config.name}"
                    )

            for model_name in model_names:
                models.append(
                    Model(
                        id=model_name,
                        created=metadata.create_time,
                        object=ObjectType.model,
                        owned_by="Triton Inference Server",
                    ),
                )

        return models

    async def chat(
        self, request: CreateChatCompletionRequest
    ) -> CreateChatCompletionResponse | AsyncIterator[str]:
        model_name, lora_name = self._get_model_and_lora_name(request.model)
        metadata = self.model_metadata.get(model_name)
        self._validate_chat_request(request, metadata, lora_name)

        conversation = parse_chat_messages(request.messages)

        add_generation_prompt = True

        tool_dicts = (
            None
            if request.tools is None
            else [tool.model_dump() for tool in request.tools]
        )

        prompt = metadata.tokenizer.apply_chat_template(
            conversation=conversation,
            tokenize=False,
            add_generation_prompt=add_generation_prompt,
            tools=tool_dicts,
            chat_template=self.chat_template,
        )

        # Convert to Triton request format and perform inference
        responses = metadata.model.async_infer(
            metadata.inference_request_converter(
                metadata.model,
                prompt,
                request,
                self._get_lora_config(model_name, lora_name),
                metadata.echo_tensor_name,
                self.default_max_tokens,
            )
        )

        # Prepare and send responses back to client in OpenAI format
        request_id = f"cmpl-{uuid.uuid1()}"
        created = int(time.time())
        default_role = "assistant"
        role = self._get_first_response_role(
            conversation, add_generation_prompt, default_role
        )

        tool_call_parser = (
            self.tool_call_parser(metadata.tokenizer) if self.tool_call_parser else None
        )

        if request.stream:
            return self._streaming_chat_iterator(
                request_id,
                metadata.backend,
                created,
                request,
                role,
                tool_call_parser,
                responses,
            )

        # Response validation with decoupled models in mind
        responses = [response async for response in responses]
        _validate_triton_responses_non_streaming(responses)
        response = responses[0]
        text = _get_output(response)

        response_message, finish_reason = self._get_chat_completion_response_message(
            request=request,
            request_id=request_id,
            tool_call_parser=tool_call_parser,
            text=text,
            role=role,
            backend=metadata.backend,
        )

        usage = _get_usage_from_response(
            response, metadata.backend, RequestKind.GENERATION
        )

        # Parse logprobs if requested
        logprobs_data = None
        if request.logprobs:
            openai_logprobs = _get_openai_chat_format_logprobs_from_vllm_response(
                response
            )
            if openai_logprobs:
                logprobs_data = ChatCompletionLogprobs(content=openai_logprobs)

        return CreateChatCompletionResponse(
            id=request_id,
            choices=[
                ChatCompletionChoice(
                    index=0,
                    message=response_message,
                    logprobs=logprobs_data,
                    finish_reason=finish_reason,
                )
            ],
            created=created,
            model=request.model,
            system_fingerprint=None,
            object=ObjectType.chat_completion,
            usage=usage,
        )

    def _get_chat_completion_response_message(
        self,
        request: CreateChatCompletionRequest,
        request_id: str,
        tool_call_parser: ToolCallParser,
        text: str,
        role: str,
        backend: str,
    ) -> Tuple[ChatCompletionResponseMessage, ChatCompletionFinishReason]:
        response_message: ChatCompletionResponseMessage
        auto_tools_called = False
        tool_function_name = self._get_named_function_name(request=request)
        if tool_function_name:
            response_message = ChatCompletionResponseMessage(
                content="",
                role=role,
                tool_calls=[
                    ChatCompletionMessageToolCall(
                        id=request_id,
                        type="function",
                        function=Function1(name=tool_function_name, arguments=text),
                    )
                ],
            )
        elif (
            tool_call_parser
            and request.tools
            and (
                request.tool_choice is None
                or request.tool_choice.root == ChatCompletionToolChoiceOption1.auto
            )
        ):
            response_message = tool_call_parser.parse_tool_calls(text, role, backend)
            auto_tools_called = (
                response_message.tool_calls is not None
                and len(response_message.tool_calls.root) > 0
            )
        else:
            response_message = ChatCompletionResponseMessage(
                content=text, role=role, tool_calls=None
            )

        finish_reason = (
            ChatCompletionFinishReason.tool_calls
            if auto_tools_called
            else ChatCompletionFinishReason.stop
        )

        return response_message, finish_reason

    async def completion(
        self, request: CreateCompletionRequest
    ) -> CreateCompletionResponse | AsyncIterator[str]:
        # Validate request and convert to Triton format
        model_name, lora_name = self._get_model_and_lora_name(request.model)
        metadata = self.model_metadata.get(model_name)
        self._validate_completion_request(request, metadata, lora_name)

        # Convert to Triton request format and perform inference
        responses = metadata.model.async_infer(
            metadata.inference_request_converter(
                metadata.model,
                request.prompt,
                request,
                self._get_lora_config(model_name, lora_name),
                metadata.echo_tensor_name,
                self.default_max_tokens,
            )
        )

        # Prepare and send responses back to client in OpenAI format
        request_id = f"cmpl-{uuid.uuid1()}"
        created = int(time.time())
        if request.stream:
            return self._streaming_completion_iterator(
                request_id, created, request, responses, metadata.backend
            )

        # Response validation with decoupled models in mind
        responses = [response async for response in responses]
        _validate_triton_responses_non_streaming(responses)
        response = responses[0]
        text = _get_output(response)

        usage = _get_usage_from_response(
            response, metadata.backend, RequestKind.GENERATION
        )

        # Parse logprobs if requested
        logprobs_data = None
        if request.logprobs is not None and request.logprobs > 0:
            logprobs_data = _get_openai_completion_format_logprobs_from_vllm_response(
                response
            )

        choice = Choice(
            finish_reason=FinishReason.stop,
            index=0,
            logprobs=logprobs_data,
            text=text,
        )
        return CreateCompletionResponse(
            id=request_id,
            choices=[choice],
            system_fingerprint=None,
            object=ObjectType.text_completion,
            created=created,
            model=request.model,
            usage=usage,
        )

    async def embedding(
        self, request: CreateEmbeddingRequest
    ) -> CreateEmbeddingResponse:
        # Validate request and convert to Triton format
        model_name, _ = self._get_model_and_lora_name(request.model)
        metadata = self.model_metadata.get(model_name)
        self._validate_embedding_request(request, metadata)

        # Convert to Triton request format and perform inference
        responses = metadata.model.async_infer(
            metadata.embedding_request_converter(
                metadata.model,
                request,
            )
        )

        # Response validation with decoupled models in mind
        responses = [response async for response in responses]
        _validate_triton_responses_non_streaming(responses)
        response = responses[0]

        # Extract embedding from response (currently stored as JSON string in text_output)
        embedding_json = _get_output(response)
        embedding_list = json.loads(embedding_json)

        usage = _get_usage_from_response(
            response, metadata.backend, RequestKind.EMBEDDING
        )

        embedding = self._get_embedding(embedding_list, request.encoding_format)
        embedding_obj = EmbeddingObject(
            embedding=embedding, index=0, object="embedding"
        )

        return CreateEmbeddingResponse(
            object="list",
            data=[embedding_obj],
            model=request.model,
            usage=usage,
        )

    @staticmethod
    def _get_embedding(
        embedding: List[float], encoding_format: Literal["float", "base64"]
    ) -> Union[list[float], str]:
        if encoding_format == "float":
            return embedding
        else:
            embedding_bytes = np.array(embedding, dtype="float32").tobytes()
            return base64.b64encode(embedding_bytes).decode("utf-8")

    # TODO: This behavior should be tested further
    def _get_first_response_role(
        self, conversation: List[Dict], add_generation_prompt: bool, default_role: str
    ) -> str:
        if add_generation_prompt:
            return default_role

        return conversation[-1]["role"]

    # TODO: Expose explicit flag to catch edge cases
    def _determine_request_converter(self, backend: str, request_type: RequestKind):
        # Allow manual override of backend request format if provided by user
        if self.backend:
            backend = self.backend

        # Request conversion from OpenAI format to backend-specific format
        if backend == "vllm":
            if request_type == RequestKind.GENERATION:
                return _create_vllm_generate_request
            else:
                return _create_vllm_embedding_request

        # Use TRT-LLM format as default for everything else. This could be
        # an ensemble, a python or BLS model, a TRT-LLM backend model, etc.
        if request_type == RequestKind.GENERATION:
            return _create_trtllm_generate_request
        else:
            return _create_trtllm_embedding_request

    def _get_model_and_lora_name(self, request_model_name: str):
        if self.lora_separator is None or len(self.lora_separator) == 0:
            return request_model_name, None

        names = request_model_name.split(self.lora_separator)
        if len(names) != 2:
            return request_model_name, None

        return names[0], names[1]

    def _get_tokenizer(self, tokenizer_name: str):
        tokenizer = None
        if tokenizer_name:
            tokenizer = get_tokenizer(tokenizer_name)

        return tokenizer

    def _get_model_metadata(self) -> Dict[str, TritonModelMetadata]:
        # One tokenizer and creation time shared for all loaded models for now.
        model_metadata = {}

        # Read all triton models and store the necessary metadata for each
        for name, _ in self.server.models().keys():
            model = self.server.model(name)
            backend = model.config()["backend"]
            # Explicitly handle ensembles to avoid any runtime validation errors
            if not backend and model.config()["platform"] == "ensemble":
                backend = "ensemble"
            print(f"Found model: {name=}, {backend=}")

            lora_configs = _parse_lora_configs(
                self.server.options.model_repository,
                name,
                model.version,
                backend if self.backend is None else self.backend,
            )

            echo_tensor_name = None
            for input in model.config()["input"]:
                if input["name"] in [
                    "exclude_input_in_output",
                    "sampling_param_exclude_input_from_output",
                ]:
                    echo_tensor_name = input["name"]
                    break

            metadata = TritonModelMetadata(
                name=name,
                backend=backend,
                model=model,
                tokenizer=self.tokenizer,
                lora_configs=lora_configs,
                echo_tensor_name=echo_tensor_name,
                create_time=self.create_time,
                inference_request_converter=self._determine_request_converter(
                    backend, RequestKind.GENERATION
                ),
                embedding_request_converter=self._determine_request_converter(
                    backend, RequestKind.EMBEDDING
                ),
            )
            model_metadata[name] = metadata

        return model_metadata

    def _get_streaming_chat_response_chunk(
        self,
        choice: ChatCompletionStreamingResponseChoice,
        request_id: str,
        created: int,
        model: str,
        usage: Optional[CompletionUsage] = None,
    ) -> CreateChatCompletionStreamResponse:
        return CreateChatCompletionStreamResponse(
            id=request_id,
            choices=[choice],
            created=created,
            model=model,
            system_fingerprint=None,
            object=ObjectType.chat_completion_chunk,
            usage=usage,
        )

    def _get_first_streaming_chat_response(
        self, request_id: str, created: int, model: str, role: str
    ) -> CreateChatCompletionStreamResponse:
        # First chunk has no content and sets the role
        choice = ChatCompletionStreamingResponseChoice(
            index=0,
            delta=ChatCompletionStreamResponseDelta(
                role=role, content="", function_call=None
            ),
            logprobs=None,
            finish_reason=None,
        )
        chunk = self._get_streaming_chat_response_chunk(
            choice, request_id, created, model, usage=None
        )
        return chunk

    async def _streaming_chat_iterator(
        self,
        request_id: str,
        backend: str,
        created: int,
        request: CreateChatCompletionRequest,
        role: str,
        tool_call_parser: ToolCallParser,
        responses: AsyncIterable,
    ) -> AsyncIterator[str]:
        model = request.model

        tool_function_name = self._get_named_function_name(request=request)

        # Determine whether tools are in use with "auto" tool choice
        tool_choice_auto = (
            tool_call_parser
            and not tool_function_name
            and self._should_stream_with_auto_tool_parsing(request)
        )

        previous_text = ""
        include_usage = request.stream_options and request.stream_options.include_usage
        usage_accumulator = _StreamingUsageAccumulator(backend)

        chunk = self._get_first_streaming_chat_response(
            request_id, created, model, role
        )
        yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n"

        async for response in responses:
            delta_text = _get_output(response)
            if include_usage:
                usage_accumulator.update(response)

            (
                response_delta,
                finish_reason,
                current_text,
            ) = self._get_streaming_response_delta(
                previous_text=previous_text,
                delta_text=delta_text,
                tool_function_name=tool_function_name,
                tool_choice_auto=tool_choice_auto,
                tool_call_parser=tool_call_parser,
                backend=backend,
                is_final_response=response.final,
            )
            previous_text = current_text

            # Parse logprobs for this chunk if requested
            chunk_logprobs = None
            if request.logprobs:
                openai_logprobs = _get_openai_chat_format_logprobs_from_vllm_response(
                    response
                )
                if openai_logprobs:
                    chunk_logprobs = ChatCompletionLogprobs(content=openai_logprobs)

            # if the response delta is None (e.g. because it was a
            # "control token" for tool calls or the parser otherwise
            # wasn't ready to send a token, then
            # get the next token without streaming a chunk
            if response_delta is None and finish_reason is None:
                continue

            if finish_reason and response_delta is None:
                response_delta = ChatCompletionStreamResponseDelta(content="")

            choice = ChatCompletionStreamingResponseChoice(
                index=0,
                delta=response_delta,
                logprobs=chunk_logprobs,
                finish_reason=finish_reason,
            )

            chunk = self._get_streaming_chat_response_chunk(
                choice, request_id, created, model, usage=None
            )
            yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n"

        # Send the final usage chunk if requested via stream_options.
        if include_usage:
            usage_payload = usage_accumulator.get_final_usage()
            if usage_payload:
                final_usage_chunk = CreateChatCompletionStreamResponse(
                    id=request_id,
                    choices=[],
                    created=created,
                    model=model,
                    system_fingerprint=None,
                    object=ObjectType.chat_completion_chunk,
                    usage=usage_payload,
                )
                yield f"data: {final_usage_chunk.model_dump_json(exclude_unset=True)}\n\n"

        yield "data: [DONE]\n\n"

    def _get_streaming_response_delta(
        self,
        previous_text: str,
        delta_text: str,
        tool_function_name: Optional[str],
        tool_choice_auto: bool,
        tool_call_parser: ToolCallParser,
        backend: str,
        is_final_response: bool,
    ) -> Tuple[
        Optional[ChatCompletionStreamResponseDelta],
        Optional[ChatCompletionFinishReason],
        str,
    ]:
        response_delta: Optional[ChatCompletionStreamResponseDelta]
        current_text = ""
        if tool_function_name:
            response_delta = ChatCompletionStreamResponseDelta(
                tool_calls=[
                    ChatCompletionMessageToolCallChunk(
                        index=0,
                        function=Function2(
                            name=tool_function_name, arguments=delta_text
                        ),
                    )
                ]
            )
        elif tool_choice_auto:
            current_text = previous_text + delta_text
            response_delta = tool_call_parser.parse_tool_calls_streaming(
                current_text=current_text, delta_text=delta_text, backend=backend
            )
        else:
            response_delta = ChatCompletionStreamResponseDelta(
                role=None, content=delta_text, function_call=None
            )

        if is_final_response:
            auto_tools_called = False
            if tool_call_parser:
                auto_tools_called = len(tool_call_parser.prev_tool_call_arr) > 0
                index = (
                    len(tool_call_parser.prev_tool_call_arr) - 1
                    if auto_tools_called
                    else 0
                )
            else:
                index = 0

            # check to make sure we haven't "forgotten" to stream
            # any tokens that were generated but previously
            # matched by partial json parsing, such as '}'.
            # only happens if we are NOT using structured outputs
            # or guided decoding
            if (
                self._should_check_for_unstreamed_tool_arg_tokens(
                    response_delta=response_delta,
                    auto_tools_called=auto_tools_called,
                )
                and tool_call_parser
            ):
                latest_delta_len = 0
                if (
                    isinstance(response_delta.tool_calls[0].function, Function2)
                ) and isinstance(response_delta.tool_calls[0].function.arguments, str):
                    latest_delta_len = len(
                        response_delta.tool_calls[0].function.arguments
                    )
                # get the expected call based on partial JSON
                # parsing which "autocompletes" the JSON
                expected_call = json.dumps(
                    tool_call_parser.prev_tool_call_arr[index].get("arguments", {}),
                    ensure_ascii=False,
                )
                # get what we've streamed so far for arguments
                # for the current tool
                actual_call = tool_call_parser.streamed_args_for_tool[index]
                if latest_delta_len > 0:
                    actual_call = actual_call[:-latest_delta_len]

                # check to see if there's anything left to stream
                remaining_call = expected_call.replace(actual_call, "", 1)

                response_delta = ChatCompletionStreamResponseDelta(
                    tool_calls=[
                        ChatCompletionMessageToolCallChunk(
                            index=index,
                            function=Function2(arguments=remaining_call).model_dump(
                                exclude_none=True
                            ),
                        )
                    ]
                )

            finish_reason = (
                ChatCompletionFinishReason.tool_calls
                if auto_tools_called
                else ChatCompletionFinishReason.stop
            )
        else:
            finish_reason = None

        return response_delta, finish_reason, current_text

    def _validate_chat_request(
        self,
        request: CreateChatCompletionRequest,
        metadata: TritonModelMetadata,
        lora_name: str | None,
    ):
        """
        Validates a chat request to align with currently supported features.
        """

        # Reject missing internal information needed to do inference
        if not metadata:
            raise ClientError(f"Unknown model: {request.model}")

        if not metadata.tokenizer:
            raise ServerError("Unknown tokenizer")

        if not metadata.backend:
            raise ServerError("Unknown backend")

        if not metadata.inference_request_converter:
            raise ServerError(
                f"Unknown inference request format for model: {request.model}"
            )

        if not metadata.embedding_request_converter:
            raise ServerError(
                f"Unknown embedding request format for model: {request.model}"
            )

        if (
            metadata.lora_configs is not None
            and lora_name is not None
            and lora_name
            not in [lora_config.name for lora_config in metadata.lora_configs]
        ):
            raise ClientError(f"Unknown LoRA: {lora_name}; for model: {request.model}")

        # Reject unsupported features if requested
        if request.n and request.n > 1:
            raise ClientError(
                f"Received n={request.n}, but only single choice (n=1) is currently supported"
            )

        if request.logit_bias is not None:
            raise ClientError("logit bias is not currently supported")

        # Logprobs are only supported for vLLM backend currently
        if metadata.backend != "vllm" and (
            request.logprobs or request.top_logprobs is not None
        ):
            raise ClientError(
                "logprobs are currently available only for the vLLM backend"
            )

        if request.top_logprobs is not None and not request.logprobs:
            raise ClientError("`top_logprobs` can only be used when `logprobs` is True")

        self._verify_chat_tool_call_settings(request=request)

        if request.stream_options and not request.stream:
            raise ClientError("`stream_options` can only be used when `stream` is True")

    def _verify_chat_tool_call_settings(self, request: CreateChatCompletionRequest):
        if (
            request.tool_choice
            and request.tool_choice.root == ChatCompletionToolChoiceOption1.required
            and not request.tools
        ):
            raise ClientError(
                '"required" tool choice requires CreateChatCompletionRequest.tools to be provided'
            )

        if (
            request.tool_choice
            and isinstance(request.tool_choice.root, ChatCompletionNamedToolChoice)
            and not request.tools
        ):
            raise ClientError(
                "Named tool choice requires CreateChatCompletionRequest.tools to be provided"
            )

        if (
            request.tool_choice
            and request.tool_choice.root == ChatCompletionToolChoiceOption1.auto
            and self.tool_call_parser is None
        ):
            raise ClientError(
                '"auto" tool choice requires --tool-call-parser to be set'
            )

        if (
            request.tool_choice is None
            and request.tools
            and self.tool_call_parser is None
        ):
            raise ClientError(
                "having tools in the request requires --tool-call-parser to be set"
            )

    async def _streaming_completion_iterator(
        self,
        request_id: str,
        created: int,
        request: CreateCompletionRequest,
        responses: AsyncIterable,
        backend: str,
    ) -> AsyncIterator[str]:
        model = request.model
        include_usage = request.stream_options and request.stream_options.include_usage
        usage_accumulator = _StreamingUsageAccumulator(backend)
        current_offset = 0

        async for response in responses:
            if include_usage:
                usage_accumulator.update(response)

            text = _get_output(response)

            # Parse logprobs for this chunk if requested
            chunk_logprobs = None
            if request.logprobs is not None and request.logprobs > 0:
                chunk_logprobs = (
                    _get_openai_completion_format_logprobs_from_vllm_response(response)
                )
                # Adjust text offsets based on accumulated output
                if chunk_logprobs and chunk_logprobs.text_offset:
                    chunk_logprobs.text_offset = [
                        offset + current_offset for offset in chunk_logprobs.text_offset
                    ]

            current_offset += len(text)

            choice = Choice(
                finish_reason=FinishReason.stop if response.final else None,
                index=0,
                logprobs=chunk_logprobs,
                text=text,
            )
            chunk = CreateCompletionResponse(
                id=request_id,
                choices=[choice],
                system_fingerprint=None,
                object=ObjectType.text_completion,
                created=created,
                model=model,
                usage=None,
            )

            yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n"

        # Send the final usage chunk if requested via stream_options.
        if include_usage:
            usage_payload = usage_accumulator.get_final_usage()
            if usage_payload:
                final_usage_chunk = CreateCompletionResponse(
                    id=request_id,
                    choices=[],
                    system_fingerprint=None,
                    object=ObjectType.text_completion,
                    created=created,
                    model=model,
                    usage=usage_payload,
                )
                yield f"data: {final_usage_chunk.model_dump_json(exclude_unset=True)}\n\n"

        yield "data: [DONE]\n\n"

    def _validate_completion_request(
        self,
        request: CreateCompletionRequest,
        metadata: TritonModelMetadata,
        lora_name: str | None,
    ):
        """
        Validates a completions request to align with currently supported features.
        """
        # Reject missing internal information needed to do inference
        if not metadata:
            raise ClientError(f"Unknown model: {request.model}")

        if not metadata.backend:
            raise ServerError("Unknown backend")

        if not metadata.inference_request_converter:
            raise ServerError(
                f"Unknown inference request format for model: {request.model}"
            )

        if not metadata.embedding_request_converter:
            raise ServerError(
                f"Unknown embedding request format for model: {request.model}"
            )

        if (
            metadata.lora_configs is not None
            and lora_name is not None
            and lora_name
            not in [lora_config.name for lora_config in metadata.lora_configs]
        ):
            raise ClientError(f"Unknown LoRA: {lora_name}; for model: {request.model}")

        # Reject unsupported features if requested
        if request.suffix is not None:
            raise ClientError("suffix is not currently supported")

        if not request.prompt:
            raise ClientError("prompt must be non-empty")

        # Currently only support single string as input
        if not isinstance(request.prompt, str):
            raise ClientError("only single string input is supported")

        if "best_of" in request.model_fields_set and metadata.backend == "vllm":
            raise ClientError(
                "best_of is no longer supported in vLLM backend, removed from vLLM V1 engine"
            )

        if request.n and request.n > 1:
            raise ClientError(
                f"Received n={request.n}, but only single choice (n=1) is currently supported"
            )

        if request.best_of and request.best_of > 1:
            raise ClientError(
                f"Received best_of={request.best_of}, but only single choice (best_of=1) is currently supported"
            )

        if request.logit_bias is not None:
            raise ClientError("logit bias is not supported")

        # Logprobs are only supported for vLLM backend currently
        if (
            request.logprobs is not None
            and request.logprobs > 0
            and metadata.backend != "vllm"
        ):
            raise ClientError(
                "logprobs are currently available only for the vLLM backend"
            )

        if request.stream_options and not request.stream:
            raise ClientError("`stream_options` can only be used when `stream` is True")

    def _validate_embedding_request(
        self,
        request: CreateEmbeddingRequest,
        metadata: TritonModelMetadata,
    ):
        """
        Validates an embedding request to align with currently supported features.
        """

        # Reject missing internal information needed to do inference
        if not metadata:
            raise ClientError(f"Unknown model: {request.model}")

        if not metadata.backend:
            raise ServerError("Unknown backend")

        if not metadata.inference_request_converter:
            raise ServerError(
                f"Unknown inference request format for model: {request.model}"
            )

        if not metadata.embedding_request_converter:
            raise ServerError(
                f"Unknown embedding request format for model: {request.model}"
            )

    def _should_stream_with_auto_tool_parsing(
        self, request: CreateChatCompletionRequest
    ):
        has_tools = request.tools and self.tool_call_parser
        auto_tool = (
            request.tool_choice is None
            or request.tool_choice.root == ChatCompletionToolChoiceOption1.auto
        )
        return has_tools and auto_tool

    def _should_check_for_unstreamed_tool_arg_tokens(
        self, response_delta: ChatCompletionStreamResponseDelta, auto_tools_called
    ):
        return bool(
            auto_tools_called
            and self.tool_call_parser
            and response_delta
            and response_delta.tool_calls
            and response_delta.tool_calls[0]
            and response_delta.tool_calls[0].function
            and response_delta.tool_calls[0].function.arguments is not None
        )

    def _get_named_function_name(
        self, request: CreateChatCompletionRequest
    ) -> Optional[str]:
        if request.tool_choice and isinstance(
            request.tool_choice.root, ChatCompletionNamedToolChoice
        ):
            tool_choice_function_name = request.tool_choice.root.function.name
        else:
            tool_choice_function_name = None

        if (
            request.tool_choice
            and request.tool_choice.root == ChatCompletionToolChoiceOption1.required
        ):
            tool_choice_required_function_name = request.tools[0].function.name
        else:
            tool_choice_required_function_name = None

        return tool_choice_function_name or tool_choice_required_function_name

    def _get_lora_config(
        self, model_name: str, lora_name: Optional[str]
    ) -> TritonLoraConfig:
        model_metadata = self.model_metadata.get(model_name)
        if lora_name is None or model_metadata.lora_configs is None:
            return None
        for lora_config in model_metadata.lora_configs:
            if lora_config.name == lora_name:
                return lora_config
        raise ClientError(f"Unknown LoRA: {lora_name}; for model: {model_name}")


================================================
FILE: python/openai/openai_frontend/engine/utils/__init__.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: python/openai/openai_frontend/engine/utils/chat.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import json
from typing import Dict, Iterable, List, Optional, Required, TypedDict, Union, cast

# FIXME: Converge on single set of types in either schemas.openai or openai.types
from openai.types.chat import ChatCompletionMessageToolCallParam
from openai.types.chat.chat_completion_message_tool_call_param import Function
from schemas.openai import (
    ChatCompletionMessageToolCall,
    ChatCompletionRequestAssistantMessage,
    ChatCompletionRequestMessage,
    ChatCompletionRequestMessageContentPart,
    ChatCompletionRequestToolMessage,
    ChatCompletionRequestUserMessage,
    Type1,
)
from utils.utils import ClientError


class ConversationMessage(TypedDict, total=False):
    role: Required[str]
    """The role of the message's author."""

    content: Union[Optional[str], List[Dict[str, str]]]
    """The contents of the message"""

    tool_call_id: Optional[str]
    """Tool call that this message is responding to."""

    name: Optional[str]
    """The name of the function to call"""

    tool_calls: Optional[Iterable[ChatCompletionMessageToolCallParam]]
    """The tool calls generated by the model, such as function calls."""


def _frontend_schema_to_openai_schema_completion_tool_call(
    tool_call_param: ChatCompletionMessageToolCall,
) -> ChatCompletionMessageToolCallParam:
    return ChatCompletionMessageToolCallParam(
        id=tool_call_param.id,
        type=tool_call_param.type,
        function=Function(
            name=tool_call_param.function.name,
            arguments=tool_call_param.function.arguments,
        ),
    )


def _parse_chat_message_content_parts(
    role: str, parts: List[ChatCompletionRequestMessageContentPart]
) -> ConversationMessage:
    content = list[Dict]()

    for part in parts:
        if part.root.type == Type1.text or part.root.type == "text":
            parse_res = {"type": "text", "text": part.root.text}
            content.append(parse_res)
        else:
            raise ClientError(
                f"only text message is supported, but got {part.root.type}"
            )

    return ConversationMessage(role=role, content=content)


def _parse_chat_message_content(
    message: ChatCompletionRequestMessage,
) -> ConversationMessage:
    role = message.root.role
    content = message.root.content

    if content is None or isinstance(content, str):
        result_msg = ConversationMessage(role=role, content=content)
    else:  # content is a list of message parts
        result_msg = _parse_chat_message_content_parts(
            role,
            content,
        )

    if role == "assistant":
        parsed_msg = cast(ChatCompletionRequestAssistantMessage, message.root)

        if parsed_msg.tool_calls:
            result_msg["tool_calls"] = list(
                [
                    _frontend_schema_to_openai_schema_completion_tool_call(tool_call)
                    for tool_call in parsed_msg.tool_calls.root
                ]
            )
    elif role == "tool":
        parsed_msg = cast(ChatCompletionRequestToolMessage, message.root)
        if parsed_msg.tool_call_id:
            result_msg["tool_call_id"] = parsed_msg.tool_call_id

    if isinstance(message.root, ChatCompletionRequestUserMessage) and isinstance(
        message.root.name, str
    ):
        result_msg["name"] = message.root.name

    return result_msg


def _postprocess_messages(messages: List[ConversationMessage]) -> None:
    # per the Transformers docs & maintainers, tool call arguments in
    # assistant-role messages with tool_calls need to be dicts not JSON str -
    # this is how tool-use chat templates will expect them moving forwards
    # so, for messages that have tool_calls, parse the string (which we get
    # from openAI format) to dict
    for message in messages:
        if (
            message["role"] == "assistant"
            and "tool_calls" in message
            and isinstance(message["tool_calls"], list)
        ):
            for item in message["tool_calls"]:
                item["function"]["arguments"] = json.loads(
                    item["function"]["arguments"]
                )


def parse_chat_messages(
    messages: List[ChatCompletionRequestMessage],
) -> List[ConversationMessage]:
    conversation: List[ConversationMessage] = []

    for msg in messages:
        sub_message = _parse_chat_message_content(msg)
        conversation.append(sub_message)

    _postprocess_messages(conversation)

    return conversation


# This function loads the chat template file content
# if the user chooses to use a chat template different from
# the original one provided with the model's tokenizer.
def load_chat_template(chat_template) -> Optional[str]:
    if chat_template is None:
        return None

    with open(chat_template) as f:
        return f.read()


================================================
FILE: python/openai/openai_frontend/engine/utils/tokenizer.py
================================================
# Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Adapted from
# https://github.com/vllm-project/vllm/blob/main/vllm/transformers_utils/tokenizer.py
# Copyright 2024 The vLLM team.

from typing import Optional, Union

from transformers import AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast

AnyTokenizer = Union[PreTrainedTokenizer, PreTrainedTokenizerFast]


def get_cached_tokenizer(
    tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast]
) -> Union[PreTrainedTokenizer, PreTrainedTokenizerFast]:
    """Get tokenizer with cached properties.

    This will patch the tokenizer object in place.

    By default, transformers will recompute multiple tokenizer properties
    each time they are called, leading to a significant slowdown. This
    function caches these properties for faster access."""

    tokenizer_all_special_ids = set(tokenizer.all_special_ids)
    tokenizer_all_special_tokens_extended = tokenizer.all_special_tokens_extended
    tokenizer_all_special_tokens = set(tokenizer.all_special_tokens)
    tokenizer_len = len(tokenizer)

    class CachedTokenizer(tokenizer.__class__):  # type: ignore
        @property
        def all_special_ids(self):
            return tokenizer_all_special_ids

        @property
        def all_special_tokens(self):
            return tokenizer_all_special_tokens

        @property
        def all_special_tokens_extended(self):
            return tokenizer_all_special_tokens_extended

        def __len__(self):
            return tokenizer_len

    CachedTokenizer.__name__ = f"Cached{tokenizer.__class__.__name__}"

    tokenizer.__class__ = CachedTokenizer
    return tokenizer


def get_tokenizer(
    tokenizer_name: str,
    *args,
    tokenizer_mode: str = "auto",
    trust_remote_code: bool = False,
    tokenizer_revision: Optional[str] = None,
    download_dir: Optional[str] = None,
    **kwargs,
) -> Union[PreTrainedTokenizer, PreTrainedTokenizerFast]:
    """Gets a tokenizer for the given model name via Huggingface/modelscope."""
    if tokenizer_mode == "slow":
        if kwargs.get("use_fast", False):
            raise ValueError("Cannot use the fast tokenizer in slow tokenizer mode.")
        kwargs["use_fast"] = False

    try:
        tokenizer = AutoTokenizer.from_pretrained(
            tokenizer_name,
            *args,
            trust_remote_code=trust_remote_code,
            tokenizer_revision=tokenizer_revision,
            **kwargs,
        )
    except ValueError as e:
        raise e
    except AttributeError as e:
        raise e

    if not isinstance(tokenizer, PreTrainedTokenizerFast):
        print(
            "Using a slow tokenizer. This might cause a significant "
            "slowdown. Consider using a fast tokenizer instead."
        )
    return get_cached_tokenizer(tokenizer)


================================================
FILE: python/openai/openai_frontend/engine/utils/tool_call_parsers/__init__.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Adapted from
# https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/openai/tool_parsers/__init__.py
# Copyright 2024 The vLLM team.

from .llama_tool_call_parser import Llama3JsonToolParser
from .mistral_tool_call_parser import MistralToolParser
from .tool_call_parser import ToolCallParser, ToolParserManager

__all__ = [
    "ToolCallParser",
    "ToolParserManager",
    "Llama3JsonToolParser",
    "MistralToolParser",
]


================================================
FILE: python/openai/openai_frontend/engine/utils/tool_call_parsers/llama_tool_call_parser.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Adapted from
# https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py
# Copyright 2024 The vLLM team.
import json
import uuid
from typing import Union

import partial_json_parser
from engine.utils.tokenizer import AnyTokenizer
from engine.utils.tool_call_parsers.tool_call_parser import (
    ToolCallParser,
    ToolParserManager,
)
from partial_json_parser.core.options import Allow
from schemas.openai import (
    ChatCompletionMessageToolCall,
    ChatCompletionMessageToolCallChunk,
    ChatCompletionMessageToolCalls,
    ChatCompletionResponseMessage,
    ChatCompletionStreamResponseDelta,
    Function1,
    Function2,
)

from .utils import find_common_prefix, is_complete_json, partial_json_loads


@ToolParserManager.register_module("llama3")
class Llama3JsonToolParser(ToolCallParser):
    def __init__(self, tokenizer: AnyTokenizer):
        super().__init__(tokenizer)

        # initialize properties used for state when parsing tool calls in
        # streaming mode
        self.prev_tool_call_arr: list[dict] = []
        self.current_tool_id: int = -1
        self.current_tool_name_sent: bool = False
        self.streamed_args_for_tool: list[
            str
        ] = []  # map what has been streamed for each tool so far to a list

        self.bot_token = "<|python_tag|>"

    def parse_tool_calls(
        self, full_text: str, role: str, backend: str
    ) -> ChatCompletionResponseMessage:
        """
        Extract the tool calls from a complete model response.
        """
        # case -- if a tool call token is not present, return a text response
        if not (full_text.startswith(self.bot_token) or full_text.startswith("{")):
            return ChatCompletionResponseMessage(
                tool_calls=None, content=full_text, role=role
            )

        original_full_text = full_text
        try:
            # FIXME: tensorrt_llm backend might generate some unnecessary text messages
            # after the tool call json text starting with "assistant\n\n".
            if backend != "vllm":
                last_index = full_text.find("assistant\n\n")
                if last_index > 0:
                    full_text = full_text[:last_index]

            # load the JSON, and then use it to build the Function and
            # Tool Call
            dec = json.JSONDecoder()
            function_call_arr = []

            # depending on the prompt format the Llama model may or may not
            # prefix the output with the <|python_tag|> token
            start_idx = (
                len(self.bot_token) if full_text.startswith(self.bot_token) else 0
            )
            while start_idx < len(full_text):
                (obj, end_idx) = dec.raw_decode(full_text[start_idx:])
                start_idx += end_idx + len("; ")
                function_call_arr.append(obj)

            tool_calls = ChatCompletionMessageToolCalls(
                root=[
                    ChatCompletionMessageToolCall(
                        id=f"cmpl-{uuid.uuid1()}",
                        type="function",
                        function=Function1(
                            name=raw_function_call["name"],
                            # function call args are JSON but as a string
                            arguments=json.dumps(
                                raw_function_call["arguments"]
                                if "arguments" in raw_function_call
                                else raw_function_call["parameters"]
                            ),
                        ),
                    )
                    for raw_function_call in function_call_arr
                ]
            )

            # get any content before the tool call
            ret = ChatCompletionResponseMessage(
                tool_calls=tool_calls, content="", role=role
            )
            return ret

        except Exception as e:
            # return information to just treat the tool call as regular JSON
            return ChatCompletionResponseMessage(
                tool_calls=None, content=original_full_text, role=role
            )

    def parse_tool_calls_streaming(
        self, current_text: str, delta_text: str, backend: str
    ) -> Union[ChatCompletionStreamResponseDelta, None]:
        if not (
            current_text.startswith(self.bot_token) or current_text.startswith("{")
        ):
            return ChatCompletionStreamResponseDelta(content=delta_text)

        # bit mask flags for partial JSON parsing. If the name hasn't been
        # sent yet, don't allow sending
        # an incomplete string since OpenAI only ever (as far as I have
        # seen) allows sending the entire tool/ function name at once.
        flags = Allow.ALL if self.current_tool_name_sent else Allow.ALL & ~Allow.STR
        try:
            tool_call_arr = []
            is_complete = []
            try:
                # depending on the prompt format the Llama model may or may not
                # prefix the output with the <|python_tag|> token
                start_idx = (
                    len(self.bot_token)
                    if current_text.startswith(self.bot_token)
                    else 0
                )
                while start_idx < len(current_text):
                    (obj, end_idx) = partial_json_loads(current_text[start_idx:], flags)
                    is_complete.append(
                        is_complete_json(current_text[start_idx : start_idx + end_idx])
                    )
                    start_idx += end_idx + len("; ")
                    # depending on the prompt Llama can use
                    # either arguments or parameters
                    if "parameters" in obj:
                        assert (
                            "arguments" not in obj
                        ), "model generated both parameters and arguments"
                        obj["arguments"] = obj["parameters"]
                    tool_call_arr.append(obj)
            except partial_json_parser.core.exceptions.MalformedJSON:
                return None

            # select as the current tool call the one we're on the state at
            current_tool_call: dict = (
                tool_call_arr[self.current_tool_id] if len(tool_call_arr) > 0 else {}
            )

            # case -- if no tokens have been streamed for the tool, e.g.
            #   only the array brackets, stream nothing
            if len(tool_call_arr) == 0:
                return None

            # case: we are starting a new tool in the array
            #   -> array has > 0 length AND length has moved past cursor
            elif (
                len(tool_call_arr) > 0 and len(tool_call_arr) > self.current_tool_id + 1
            ):
                # if we're moving on to a new call, first make sure we
                # haven't missed anything in the previous one that was
                # auto-generated due to JSON completions, but wasn't
                # streamed to the client yet.
                if self.current_tool_id >= 0:
                    cur_arguments = current_tool_call.get("arguments")
                    if cur_arguments:
                        cur_args_json = json.dumps(cur_arguments)
                        sent = len(self.streamed_args_for_tool[self.current_tool_id])
                        argument_diff = cur_args_json[sent:]

                        delta = ChatCompletionStreamResponseDelta(
                            tool_calls=[
                                ChatCompletionMessageToolCallChunk(
                                    index=self.current_tool_id,
                                    function=Function2(
                                        arguments=argument_diff
                                    ).model_dump(exclude_none=True),
                                )
                            ]
                        )
                        self.streamed_args_for_tool[
                            self.current_tool_id
                        ] += argument_diff
                    else:
                        delta = None
                else:
                    delta = None
                # re-set stuff pertaining to progress in the current tool
                self.current_tool_id = len(tool_call_arr) - 1
                self.current_tool_name_sent = False
                self.streamed_args_for_tool.append("")
                return delta

            # if the current tool name hasn't been sent, send if available
            # - otherwise send nothing
            elif not self.current_tool_name_sent:
                function_name = current_tool_call.get("name")
                if function_name:
                    delta = ChatCompletionStreamResponseDelta(
                        tool_calls=[
                            ChatCompletionMessageToolCallChunk(
                                index=self.current_tool_id,
                                type="function",
                                id=f"cmpl-{uuid.uuid1()}",
                                function=Function2(name=function_name).model_dump(
                                    exclude_none=True
                                ),
                            )
                        ]
                    )
                    self.current_tool_name_sent = True
                else:
                    delta = None

            # now we know we're on the same tool call and we're streaming
            # arguments
            else:
                cur_arguments = current_tool_call.get("arguments")
                delta = None

                if cur_arguments:
                    sent = len(self.streamed_args_for_tool[self.current_tool_id])
                    cur_args_json = json.dumps(cur_arguments)
                    prev_arguments = self.prev_tool_call_arr[self.current_tool_id].get(
                        "arguments"
                    )

                    argument_diff = None
                    if is_complete[self.current_tool_id]:
                        argument_diff = cur_args_json[sent:]
                    elif prev_arguments:
                        prev_args_json = json.dumps(prev_arguments)
                        if cur_args_json != prev_args_json:
                            prefix = find_common_prefix(prev_args_json, cur_args_json)
                            argument_diff = prefix[sent:]

                    if argument_diff is not None:
                        delta = ChatCompletionStreamResponseDelta(
                            tool_calls=[
                                ChatCompletionMessageToolCallChunk(
                                    index=self.current_tool_id,
                                    function=Function2(
                                        arguments=argument_diff
                                    ).model_dump(exclude_none=True),
                                )
                            ]
                        )
                        self.streamed_args_for_tool[
                            self.current_tool_id
                        ] += argument_diff

            self.prev_tool_call_arr = tool_call_arr
            return delta

        except Exception:
            return None


================================================
FILE: python/openai/openai_frontend/engine/utils/tool_call_parsers/mistral_tool_call_parser.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Adapted from
# https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
# Copyright 2024 The vLLM team.
import json
import re
from random import choices
from string import ascii_letters, digits
from typing import Dict, List, Union

import partial_json_parser
from engine.utils.tokenizer import AnyTokenizer
from engine.utils.tool_call_parsers.tool_call_parser import (
    ToolCallParser,
    ToolParserManager,
)
from partial_json_parser.core.options import Allow
from schemas.openai import (
    ChatCompletionMessageToolCall,
    ChatCompletionMessageToolCallChunk,
    ChatCompletionMessageToolCalls,
    ChatCompletionResponseMessage,
    ChatCompletionStreamResponseDelta,
    Function1,
    Function2,
)

from .utils import extract_intermediate_diff

ALPHANUMERIC = ascii_letters + digits


def generate_mistral_random_id():
    # Mistral Tool Call Ids must be alphanumeric with a maximum length of 9.
    # https://github.com/mistralai/mistral-common/blob/21ee9f6cee3441e9bb1e6ed2d10173f90bd9b94b/src/mistral_common/protocol/instruct/validator.py#L299
    return "".join(choices(ALPHANUMERIC, k=9))


@ToolParserManager.register_module("mistral")
class MistralToolParser(ToolCallParser):
    def __init__(self, tokenizer: AnyTokenizer):
        super().__init__(tokenizer)

        # initialize properties used for state when parsing tool calls in
        # streaming mode
        self.prev_tool_call_arr: List[Dict] = []
        self.current_tool_id: int = -1
        self.current_tool_name_sent: bool = False
        self.streamed_args_for_tool: List[
            str
        ] = []  # map what has been streamed for each tool so far to a list
        self.bot_token = "[TOOL_CALLS]"
        self.tool_call_regex = re.compile(r"\[{.*}\]", re.DOTALL)

    def parse_tool_calls(
        self, full_text: str, role: str, backend: str
    ) -> ChatCompletionResponseMessage:
        """
        Extract the tool calls from a complete model response. Requires
        find-and-replacing single quotes with double quotes for JSON parsing,
        make sure your tool call arguments don't ever include quotes!
        """

        # case -- if a tool call token is not present, return a text response
        if not (full_text.startswith(self.bot_token) or full_text.startswith("[")):
            return ChatCompletionResponseMessage(
                tool_calls=None, content=full_text, role=role
            )

        # first remove the BOT token
        tool_content = full_text.replace(self.bot_token, "").strip()
        try:
            # we first try to directly load the json as parsing very nested
            # jsons is difficult
            try:
                function_call_arr = json.loads(tool_content)
            except json.JSONDecodeError:
                # use a regex to find the part corresponding to the tool call.
                # NOTE: This use case should not happen if the model is trained
                # correctly. It's a easy possible fix so it's included, but
                # can be brittle for very complex / highly nested tool calls
                raw_tool_call = self.tool_call_regex.findall(tool_content)[0]
                function_call_arr = json.loads(raw_tool_call)

            # Tool Call
            tool_calls = ChatCompletionMessageToolCalls(
                root=[
                    ChatCompletionMessageToolCall(
                        id=generate_mistral_random_id(),
                        type="function",
                        function=Function1(
                            name=raw_function_call["name"],
                            # function call args are JSON but as a string
                            arguments=json.dumps(
                                raw_function_call["arguments"], ensure_ascii=False
                            ),
                        ),
                    )
                    for raw_function_call in function_call_arr
                ]
            )

            # get any content before the tool call
            content = (
                full_text.split(self.bot_token)[0]
                if full_text.startswith(self.bot_token)
                else ""
            )
            return ChatCompletionResponseMessage(
                tool_calls=tool_calls, content=content, role=role
            )

        except Exception:
            # return information to just treat the tool call as regular JSON
            return ChatCompletionResponseMessage(
                tool_calls=None, content=full_text, role=role
            )

    def parse_tool_calls_streaming(
        self, current_text: str, delta_text: str, backend: str
    ) -> Union[ChatCompletionStreamResponseDelta, None]:
        # if the tool call token is not in the tokens generated so far, append
        # output to contents since it's not a tool
        # tensorrt_llm backend likely doesn't generate the bos token
        if not (self.bot_token in current_text or "[" in current_text):
            return ChatCompletionStreamResponseDelta(content=delta_text)

        # handle if we detected the BOT token which means the start of tool
        # calling
        if self.bot_token == delta_text.strip():
            # if it's the only token, return None, so we don't send a chat
            # completion any don't send a control token
            return None

        flags = Allow.ALL if self.current_tool_name_sent else Allow.ALL & ~Allow.STR

        try:
            # replace BOT token with empty string, and convert single quotes
            # to double to allow parsing as JSON since mistral uses single
            # quotes instead of double for tool calls
            parsable_arr = current_text.split(self.bot_token)[-1]

            # tool calls are generated in an array, so do partial JSON
            # parsing on the entire array
            try:
                tool_call_arr: List[Dict] = partial_json_parser.loads(
                    parsable_arr, flags
                )
            except partial_json_parser.core.exceptions.MalformedJSON:
                return None

            # select as the current tool call the one we're on the state at

            current_tool_call: Dict = (
                tool_call_arr[self.current_tool_id] if len(tool_call_arr) > 0 else {}
            )

            # case -- if no tokens have been streamed for the tool, e.g.
            #   only the array brackets, stream nothing
            if len(tool_call_arr) == 0:
                return None

            # case: we are starting a new tool in the array
            #   -> array has > 0 length AND length has moved past cursor
            elif (
                len(tool_call_arr) > 0 and len(tool_call_arr) > self.current_tool_id + 1
            ):
                # if we're moving on to a new call, first make sure we
                # haven't missed anything in the previous one that was
                # auto-generated due to JSON completions, but wasn't
                # streamed to the client yet.
                if self.current_tool_id >= 0:
                    diff: Union[str, None] = current_tool_call.get("arguments")

                    if diff:
                        diff = json.dumps(diff, ensure_ascii=False).replace(
                            self.streamed_args_for_tool[self.current_tool_id], ""
                        )

                        delta = ChatCompletionStreamResponseDelta(
                            tool_calls=[
                                ChatCompletionMessageToolCallChunk(
                                    index=self.current_tool_id,
                                    function=Function2(arguments=diff).model_dump(
                                        exclude_none=True
                                    ),
                                )
                            ]
                        )
                        self.streamed_args_for_tool[self.current_tool_id] += diff
                    else:
                        delta = None
                else:
                    delta = None
                # re-set stuff pertaining to progress in the current tool
                self.current_tool_id = len(tool_call_arr) - 1
                self.current_tool_name_sent = False
                self.streamed_args_for_tool.append("")
                return delta

            # case: update an existing tool - this is handled below

            # if the current tool name hasn't been sent, send if available
            # - otherwise send nothing
            if not self.current_tool_name_sent:
                function_name = current_tool_call.get("name")
                if function_name:
                    delta = ChatCompletionStreamResponseDelta(
                        tool_calls=[
                            ChatCompletionMessageToolCallChunk(
                                index=self.current_tool_id,
                                type="function",
                                id=generate_mistral_random_id(),
                                function=Function2(name=function_name).model_dump(
                                    exclude_none=True
                                ),
                            )
                        ]
                    )
                    self.current_tool_name_sent = True
                else:
                    delta = None

            # now we know we're on the same tool call and we're streaming
            # arguments
            else:
                prev_arguments = self.prev_tool_call_arr[self.current_tool_id].get(
                    "arguments"
                )
                cur_arguments = current_tool_call.get("arguments")

                new_text = delta_text.replace("'", '"')
                if '"}' in new_text:
                    new_text = new_text[: new_text.rindex('"}')]

                if not cur_arguments and not prev_arguments:
                    delta = None
                elif not cur_arguments and prev_arguments:
                    delta = None
                elif cur_arguments and not prev_arguments:
                    cur_arguments_json = json.dumps(cur_arguments, ensure_ascii=False)[
                        :-2
                    ]

                    if new_text not in cur_arguments_json:
                        return None
                    arguments_delta = cur_arguments_json[
                        : cur_arguments_json.rindex(new_text) + len(new_text)
                    ]
                    delta = ChatCompletionStreamResponseDelta(
                        tool_calls=[
                            ChatCompletionMessageToolCallChunk(
                                index=self.current_tool_id,
                                function=Function2(
                                    arguments=arguments_delta
                                ).model_dump(exclude_none=True),
                            )
                        ]
                    )
                    self.streamed_args_for_tool[self.current_tool_id] += arguments_delta

                elif cur_arguments and prev_arguments:
                    cur_args_json = json.dumps(cur_arguments, ensure_ascii=False)
                    prev_args_json = json.dumps(prev_arguments, ensure_ascii=False)

                    argument_diff = extract_intermediate_diff(
                        cur_args_json, prev_args_json
                    )
                    delta = ChatCompletionStreamResponseDelta(
                        tool_calls=[
                            ChatCompletionMessageToolCallChunk(
                                index=self.current_tool_id,
                                function=Function2(arguments=argument_diff).model_dump(
                                    exclude_none=True
                                ),
                            )
                        ]
                    )
                    self.streamed_args_for_tool[self.current_tool_id] += argument_diff
                else:
                    # try parsing it with regular JSON - if it works we're
                    # at the end, and we need to send the difference between
                    # tokens streamed so far and the valid JSON
                    delta = None

            # check to see if the name is defined and has been sent. if so,
            # stream the name - otherwise keep waiting
            # finish by setting old and returning None as base case
            self.prev_tool_call_arr = tool_call_arr
            return delta

        except Exception:
            return None


================================================
FILE: python/openai/openai_frontend/engine/utils/tool_call_parsers/tool_call_parser.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Adapted from
# https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py
# Copyright 2024 The vLLM team.
from typing import Callable, Dict, List, Optional, Union

from engine.utils.tokenizer import AnyTokenizer
from schemas.openai import (
    ChatCompletionMessageToolCalls,
    ChatCompletionStreamResponseDelta,
)


class ToolCallParser:
    """The Base Tool Call Parser for parsing the Tool Call from the responses,
    Two inferfaces are supported: the one-time parser for synchronized response
    and streaming parser for streaming response.
    """

    def __init__(self, tokenizer: AnyTokenizer):
        self.prev_tool_call_arr: List[Dict] = []
        # the index of the tool call that is currently being parsed
        self.current_tool_id: int = -1
        self.current_tool_name_sent: bool = False
        self.streamed_args_for_tool: List[str] = []

        self.model_tokenizer = tokenizer

    def parse_tool_calls(
        self, full_text: str, role: str, backend: str
    ) -> ChatCompletionMessageToolCalls:
        raise NotImplementedError(
            "BaseToolCallParser.parse_tool_calls has not been implemented!"
        )

    def parse_tool_calls_streaming(
        self, current_text: str, delta_text: str, backend: str
    ) -> ChatCompletionStreamResponseDelta:
        raise NotImplementedError(
            "BaseToolCallParser.parse_tool_calls_streaming has not been implemented!"
        )


class ToolParserManager:
    tool_parsers: dict[str, type] = {}

    @classmethod
    def get_tool_parser_cls(cls, name) -> type:
        if name in cls.tool_parsers:
            return cls.tool_parsers[name]

        raise KeyError(f"tool parser: '{name}' not found in tool_call_parsers")

    @classmethod
    def _register_module(
        cls,
        module: type,
        module_name: Optional[Union[str, list[str]]] = None,
        force: bool = True,
    ) -> None:
        if not issubclass(module, ToolCallParser):
            raise TypeError(
                f"module must be subclass of ToolCallParser, but got {type(module)}"
            )
        if module_name is None:
            module_name = module.__name__
        if isinstance(module_name, str):
            module_name = [module_name]
        for name in module_name:
            if not force and name in cls.tool_parsers:
                existed_module = cls.tool_parsers[name]
                raise KeyError(
                    f"{name} is already registered " f"at {existed_module.__module__}"
                )
            cls.tool_parsers[name] = module

    @classmethod
    def register_module(
        cls,
        name: Optional[Union[str, list[str]]] = None,
        force: bool = True,
        module: Union[type, None] = None,
    ) -> Union[type, Callable]:
        """
        Register module with the given name or name list. it can be used as a
        decoder(with module as None) or normal function(with module as not
        None).
        """
        if not isinstance(force, bool):
            raise TypeError(f"force must be a boolean, but got {type(force)}")

        # raise the error ahead of time
        if not (name is None or isinstance(name, str)):
            raise TypeError(
                "name must be None, an instance of str, " f"but got {type(name)}"
            )

        # use it as a normal method: x.register_module(module=SomeClass)
        if module is not None:
            cls._register_module(module=module, module_name=name, force=force)
            return module

        # use it as a decorator: @x.register_module()
        def _register(module):
            cls._register_module(module=module, module_name=name, force=force)
            return module

        return _register


================================================
FILE: python/openai/openai_frontend/engine/utils/tool_call_parsers/utils.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Adapted from
# https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/openai/tool_parsers/utils.py
# Copyright 2024 The vLLM team.

import json
from typing import Any

import partial_json_parser
from partial_json_parser.core.options import Allow


# partial_json_parser doesn't support extra data and
# JSONDecorder.raw_decode doesn't support partial JSON
def partial_json_loads(input_str: str, flags: Allow) -> tuple[Any, int]:
    try:
        return (partial_json_parser.loads(input_str, flags), len(input_str))
    except json.JSONDecodeError as e:
        if "Extra data" in e.msg:
            dec = json.JSONDecoder()
            return dec.raw_decode(input_str)
        raise


def is_complete_json(input_str: str) -> bool:
    try:
        json.loads(input_str)
        return True
    except json.JSONDecodeError:
        return False


def find_common_prefix(s1: str, s2: str) -> str:
    """
    Finds a common prefix that is shared between two strings, if there is one.
    Order of arguments is NOT important.

    This function is provided as a UTILITY for extracting information from JSON
    generated by partial_json_parser, to help in ensuring that the right tokens
    are returned in streaming, so that close-quotes, close-brackets and
    close-braces are not returned prematurely.

    e.g. find_common_prefix('{"fruit": "ap"}', '{"fruit": "apple"}') ->
    '{"fruit": "ap'
    """
    prefix = ""
    min_length = min(len(s1), len(s2))
    for i in range(0, min_length):
        if s1[i] == s2[i]:
            prefix += s1[i]
        else:
            break
    return prefix


def find_common_suffix(s1: str, s2: str) -> str:
    """
    Finds a common suffix shared between two strings, if there is one. Order of
    arguments is NOT important.
    Stops when the suffix ends OR it hits an alphanumeric character

    e.g. find_common_suffix('{"fruit": "ap"}', '{"fruit": "apple"}') -> '"}'
    """
    suffix = ""
    min_length = min(len(s1), len(s2))
    for i in range(1, min_length + 1):
        if s1[-i] == s2[-i] and not s1[-i].isalnum():
            suffix = s1[-i] + suffix
        else:
            break
    return suffix


def extract_intermediate_diff(curr: str, old: str) -> str:
    """
    Given two strings, extract the difference in the middle between two strings
    that are known to have a common prefix and/or suffix.

    This function is provided as a UTILITY for extracting information from JSON
    generated by partial_json_parser, to help in ensuring that the right tokens
    are returned in streaming, so that close-quotes, close-brackets and
    close-braces are not returned prematurely. The order of arguments IS
    important - the new version of the partially-parsed JSON must be the first
    argument, and the second argument must be from the previous generation.

    What it returns, is tokens that should be streamed to the client.

    e.g. extract_intermediate_diff('{"fruit": "apple"}', '{"fruit": "ap"}')
        -> 'ple'

    """
    suffix = find_common_suffix(curr, old)

    old = old[::-1].replace(suffix[::-1], "", 1)[::-1]
    prefix = find_common_prefix(curr, old)
    diff = curr
    if len(suffix):
        diff = diff[::-1].replace(suffix[::-1], "", 1)[::-1]

    if len(prefix):
        # replace the prefix only once in case it's mirrored
        diff = diff.replace(prefix, "", 1)

    return diff


================================================
FILE: python/openai/openai_frontend/engine/utils/triton.py
================================================
# Copyright 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import ctypes
import json
import os
import re
import sys
import traceback
from dataclasses import asdict, dataclass, field
from enum import Enum
from pathlib import Path
from typing import Dict, Iterable, List, Optional, Union

import numpy as np
import tritonserver
from pydantic import BaseModel
from schemas.openai import (
    ChatCompletionNamedToolChoice,
    ChatCompletionTokenLogprob,
    ChatCompletionToolChoiceOption1,
    CompletionUsage,
    CreateChatCompletionRequest,
    CreateCompletionRequest,
    CreateEmbeddingRequest,
    EmbeddingUsage,
    Logprobs,
    TopLogprob,
)
from utils.utils import ClientError, ServerError


class RequestKind(Enum):
    GENERATION = 1
    EMBEDDING = 2


@dataclass
class TritonLoraConfig:
    name: str

    # Unique fields for TensorRT-LLM backend
    task_id: Optional[int] = None
    path: Optional[str] = None
    is_registered: Optional[bool] = False


def _create_vllm_generate_request(
    model,
    prompt,
    request: CreateChatCompletionRequest | CreateCompletionRequest,
    lora_config: TritonLoraConfig | None,
    echo_tensor_name: str | None,
    default_max_tokens: int,
):
    inputs = {}
    # Exclude non-sampling parameters so they aren't passed to vLLM
    excludes = {
        "model",
        "stream",
        "messages",
        "prompt",
        "echo",
        "store",
        "metadata",
        "response_format",
        "service_tier",
        "stream_options",
        "tools",
        "tool_choice",
        "parallel_tool_calls",
        "user",
        "function_call",
        "functions",
        "suffix",
        "max_completion_tokens",
        # will be handled explicitly
        "max_tokens",
        "logprobs",
        "top_logprobs",
        # not supported for vLLM backend (removed from vLLM V1) but supported for TRT-LLM/Python backend
        "best_of",
    }

    # NOTE: The exclude_none is important, as internals may not support
    # values of NoneType at this time.
    sampling_parameters = request.model_dump(
        exclude=excludes,
        exclude_none=True,
    )

    request_logprobs = False
    # Indicates CreateChatCompletionRequest
    if hasattr(request, "max_completion_tokens"):
        if request.max_completion_tokens is not None:
            sampling_parameters["max_tokens"] = request.max_completion_tokens
        # Fallback to deprecated request.max_tokens
        elif request.max_tokens is not None:
            sampling_parameters["max_tokens"] = request.max_tokens
        # If neither is set, use a default value for max_tokens
        else:
            sampling_parameters["max_tokens"] = default_max_tokens

        # Handle logprobs for chat completions
        # OpenAI API: logprobs (bool), top_logprobs (int 0-20)
        # vLLM API: logprobs (int) - number of top token logprobs to return
        if request.logprobs and request.top_logprobs is not None:
            sampling_parameters["logprobs"] = request.top_logprobs
            request_logprobs = True
        elif request.logprobs:
            # If logprobs=True but top_logprobs not specified, default to 1
            sampling_parameters["logprobs"] = 1
            request_logprobs = True
    # Indicates CreateCompletionRequest
    else:
        if request.max_tokens is not None:
            sampling_parameters["max_tokens"] = request.max_tokens
        else:
            sampling_parameters["max_tokens"] = default_max_tokens

        # Handle logprobs for completions
        # OpenAI API: logprobs (int 0-5) - number of top token log probs
        # vLLM API: logprobs (int) - same behavior, pass directly
        if request.logprobs is not None and request.logprobs > 0:
            sampling_parameters["logprobs"] = request.logprobs
            request_logprobs = True
    inputs["return_logprobs"] = np.bool_([request_logprobs])

    if lora_config is not None:
        sampling_parameters["lora_name"] = lora_config.name

    guided_json = _get_guided_json_from_tool(request)
    if guided_json is not None:
        from vllm.sampling_params import StructuredOutputsParams

        sampling_parameters["structured_outputs"] = json.dumps(
            asdict(StructuredOutputsParams(json=guided_json))
        )
    sampling_parameters = json.dumps(sampling_parameters)

    exclude_input_in_output = True
    echo = getattr(request, "echo", None)
    if echo is not None:
        exclude_input_in_output = not echo

    inputs["text_input"] = [prompt]
    inputs["stream"] = np.bool_([request.stream])
    inputs[echo_tensor_name] = np.bool_([exclude_input_in_output])
    # Pass sampling_parameters as serialized JSON string input to support List
    # fields like 'stop' that aren't supported by TRITONSERVER_Parameters yet.
    inputs["sampling_parameters"] = [sampling_parameters]
    inputs["return_num_input_tokens"] = np.bool_([True])
    inputs["return_num_output_tokens"] = np.bool_([True])
    return model.create_request(inputs=inputs)


def _create_trtllm_generate_request(
    model,
    prompt,
    request: CreateChatCompletionRequest | CreateCompletionRequest,
    lora_config: TritonLoraConfig | None,
    echo_tensor_name: str | None,
    default_max_tokens: int,
):
    inputs = {}
    inputs["text_input"] = [[prompt]]
    inputs["stream"] = np.bool_([[request.stream]])

    # Indicates CreateChatCompletionRequest
    if hasattr(request, "max_completion_tokens"):
        if request.max_completion_tokens is not None:
            inputs["max_tokens"] = np.int32([[request.max_completion_tokens]])
        # Fallback to deprecated request.max_tokens
        elif request.max_tokens is not None:
            inputs["max_tokens"] = np.int32([[request.max_tokens]])
        # If neither is set, use a default value for max_tokens
        else:
            inputs["max_tokens"] = np.int32([[default_max_tokens]])
    # Indicates CreateCompletionRequest
    elif request.max_tokens is not None:
        inputs["max_tokens"] = np.int32([[request.max_tokens]])
    else:
        inputs["max_tokens"] = np.int32([[default_max_tokens]])

    if request.stop:
        if isinstance(request.stop, str):
            request.stop = [request.stop]
        inputs["stop_words"] = [request.stop]
    # Check "is not None" specifically, because values of zero are valid.
    if request.top_p is not None:
        inputs["top_p"] = np.float32([[request.top_p]])
    if request.frequency_penalty is not None:
        inputs["frequency_penalty"] = np.float32([[request.frequency_penalty]])
    if request.presence_penalty is not None:
        inputs["presence_penalty"] = np.float32([[request.presence_penalty]])
    if request.seed is not None:
        inputs["seed"] = np.uint64([[request.seed]])
    if request.temperature is not None:
        inputs["temperature"] = np.float32([[request.temperature]])
    # Only limited TRT-LLM models support "echo" (inflight_batcher_llm, disaggregated_serving, llmapi)
    echo = getattr(request, "echo", None)
    if echo is not None and echo_tensor_name is not None:
        inputs[echo_tensor_name] = np.bool_([[not echo]])

    guided_json = _get_guided_json_from_tool(request)
    if guided_json is not None:
        inputs["guided_decoding_guide_type"] = [["json_schema"]]
        inputs["guided_decoding_guide"] = [[guided_json]]

    if lora_config is not None:
        # To perform inference with a specific LoRA for the first time `lora_task_id` `lora_weights` and `lora_config` must all be given.
        # The LoRA will be cached, so that subsequent requests for the same task only require `lora_task_id`.
        inputs["lora_task_id"] = np.uint64([[lora_config.task_id]])
        if not lora_config.is_registered:
            lora_weights_data = np.load(
                os.path.join(lora_config.path, "model.lora_weights.npy")
            )
            lora_config_data = np.load(
                os.path.join(lora_config.path, "model.lora_config.npy")
            )
            inputs["lora_weights"] = lora_weights_data
            inputs["lora_config"] = lora_config_data
            lora_config.is_registered = True

    inputs["return_num_input_tokens"] = np.bool_([[True]])
    inputs["return_num_output_tokens"] = np.bool_([[True]])
    return model.create_request(inputs=inputs)


def _create_vllm_embedding_request(
    model,
    request: CreateEmbeddingRequest,
):
    inputs = {}
    embedding_request = {}
    embedding_request["input"] = request.input

    pooling_params = {}
    dims = request.dimensions
    if dims is not None:
        pooling_params["dimensions"] = [dims]
    embedding_request["pooling_params"] = pooling_params

    inputs["embedding_request"] = [json.dumps(embedding_request)]
    inputs["return_num_input_tokens"] = np.bool_([True])
    inputs["return_num_output_tokens"] = np.bool_([True])
    return model.create_request(inputs=inputs)


def _create_trtllm_embedding_request(
    model,
    request: CreateEmbeddingRequest,
):
    raise ClientError(
        "TRT-LLM backend and Python backend do not support embedding requests"
    )


def _construct_string_from_pointer(pointer: int, size: int) -> str:
    """Constructs a Python string from a C pointer and size."""

    # Create a ctypes string buffer
    string_buffer = ctypes.create_string_buffer(size + 1)  # +1 for null terminator

    # Copy the data from the pointer to the buffer
    ctypes.memmove(string_buffer, pointer, size)

    # Convert the buffer to a Python string
    return string_buffer.value.decode("utf-8")  # Adjust encoding if needed


def _get_volume(shape: Iterable[int]) -> int:
    volume = 1
    for dim in shape:
        volume *= dim

    return volume


def _to_string(tensor: tritonserver.Tensor) -> str:
    # FIXME: This could be a bit more robust by reading byte size from first
    # 4 bytes and then just reading the first string, rather than assuming
    # single string, assuming it's of similar performance to do so.

    # The following optimization to read string directly from buffer assumes
    # there is only a single string, so enforce it to avoid obscure errors.
    volume = _get_volume(tensor.shape)
    if volume != 1:
        raise ServerError(
            f"Expected to find 1 string in the output, found {volume} instead."
        )
    if tensor.size < 4:
        raise ServerError(
            f"Expected string buffer to contain its serialized byte size, but found size of {tensor.size}."
        )

    # NOTE: +/- 4 accounts for serialized byte string length in first 4 bytes of buffer
    return _construct_string_from_pointer(tensor.data_ptr + 4, tensor.size - 4)


@dataclass
class _StreamingUsageAccumulator:
    """Helper class to accumulate token usage from a streaming response."""

    backend: str
    prompt_tokens: int = 0
    completion_tokens: int = 0
    _prompt_tokens_set: bool = field(init=False, default=False)

    def update(self, response: tritonserver.InferenceResponse):
        """Extracts usage from a response and updates the token counts."""
        usage = _get_usage_from_response(response, self.backend, RequestKind.GENERATION)
        if usage:
            # The prompt_tokens is received with every chunk but should only be set once.
            if not self._prompt_tokens_set:
                self.prompt_tokens = usage.prompt_tokens
                self._prompt_tokens_set = True
            self.completion_tokens += usage.completion_tokens

    def get_final_usage(self) -> Optional[CompletionUsage]:
        """
        Returns the final populated CompletionUsage object if any tokens were tracked.
        """
        # If _prompt_tokens_set is True, it means we have received and processed
        # at least one valid usage payload.
        if self._prompt_tokens_set:
            return CompletionUsage(
                prompt_tokens=self.prompt_tokens,
                completion_tokens=self.completion_tokens,
                total_tokens=self.prompt_tokens + self.completion_tokens,
            )
        return None


def _get_usage_from_response(
    response: tritonserver._api._response.InferenceResponse,
    backend: str,
    request_type: RequestKind,
) -> Optional[CompletionUsage | EmbeddingUsage]:
    """
    Extracts token usage statistics from a Triton inference response.
    """
    prompt_tokens = None
    completion_tokens = None

    if (
        "num_input_tokens" in response.outputs
        and "num_output_tokens" in response.outputs
    ):
        input_token_tensor = response.outputs["num_input_tokens"]
        output_token_tensor = response.outputs["num_output_tokens"]

        if input_token_tensor.data_type == tritonserver.DataType.UINT32:
            prompt_tokens_ptr = ctypes.cast(
                input_token_tensor.data_ptr, ctypes.POINTER(ctypes.c_uint32)
            )
            prompt_tokens = prompt_tokens_ptr[0]
        elif input_token_tensor.data_type == tritonserver.DataType.INT32:
            prompt_tokens_ptr = ctypes.cast(
                input_token_tensor.data_ptr, ctypes.POINTER(ctypes.c_int32)
            )
            prompt_tokens = prompt_tokens_ptr[0]

        if output_token_tensor.data_type == tritonserver.DataType.UINT32:
            completion_tokens_ptr = ctypes.cast(
                output_token_tensor.data_ptr, ctypes.POINTER(ctypes.c_uint32)
            )
            completion_tokens = completion_tokens_ptr[0]
        elif output_token_tensor.data_type == tritonserver.DataType.INT32:
            completion_tokens_ptr = ctypes.cast(
                output_token_tensor.data_ptr, ctypes.POINTER(ctypes.c_int32)
            )
            completion_tokens = completion_tokens_ptr[0]

        if prompt_tokens is not None:
            if request_type == RequestKind.GENERATION and completion_tokens is not None:
                total_tokens = prompt_tokens + completion_tokens
                return CompletionUsage(
                    prompt_tokens=prompt_tokens,
                    completion_tokens=completion_tokens,
                    total_tokens=total_tokens,
                )
            elif request_type == RequestKind.EMBEDDING:
                return EmbeddingUsage(
                    prompt_tokens=prompt_tokens,
                    total_tokens=prompt_tokens,
                )

    return None


# TODO: Use tritonserver.InferenceResponse when support is published
def _get_output(response: tritonserver._api._response.InferenceResponse) -> str:
    if "text_output" in response.outputs:
        tensor = response.outputs["text_output"]

        # Alternative method, creates the same string, but goes through
        # deserialization, numpy, and dlpack overhead:
        # return tensor.to_bytes_array()[0].decode("utf-8")

        # Optimized method
        return _to_string(tensor)

    return ""


def _get_logprobs_from_response(
    response: tritonserver._api._response.InferenceResponse,
) -> Optional[List[Dict]]:
    """
    Extracts logprobs from a Triton inference response (vLLM backend).

    Returns:
        List of dictionaries containing logprobs data, or None if not available.
        Format: [
            {
                token_id: {
                    "logprob": float,
                    "rank": int,
                    "decoded_token": str
                }
            },
            ...
        ]
    """
    if "logprobs" not in response.outputs:
        return None

    logprobs_tensor = response.outputs["logprobs"]
    if logprobs_tensor is None:
        return None

    # The logprobs are stored as JSON string (vLLM backend)
    logprobs_str = _to_string(logprobs_tensor)

    if logprobs_str == "null":
        return None

    try:
        logprobs_data = json.loads(logprobs_str)
        return logprobs_data
    except json.JSONDecodeError:
        return None


def _get_openai_chat_format_logprobs_from_vllm_response(
    response: tritonserver._api._response.InferenceResponse,
) -> Optional[List[ChatCompletionTokenLogprob]]:
    """
    Convert logprobs from a Triton inference response (vLLM backend) to OpenAI chat completion format.

    Args:
        response: Triton inference response containing logprobs output.

    Returns:
        List of ChatCompletionTokenLogprob objects, or None if no logprobs available.
    """
    vllm_logprobs = _get_logprobs_from_response(response)

    if not vllm_logprobs:
        return None

    openai_logprobs = []
    for token_logprobs_dict in vllm_logprobs:
        if not token_logprobs_dict:
            continue

        # Sort by rank to identify the selected token (rank=1 is always the chosen token)
        sorted_tokens = sorted(
            token_logprobs_dict.items(), key=lambda x: x[1].get("rank", sys.maxsize)
        )

        # The first token (lowest rank) is the selected token
        selected_token_id, selected_token_data = sorted_tokens[0]
        selected_token = selected_token_data["decoded_token"]
        selected_logprob = selected_token_data["logprob"]

        # Convert to bytes representation
        token_bytes = list(selected_token.encode("utf-8"))

        top_logprobs_list = []
        for token_id, token_data in sorted_tokens:
            decoded_token = token_data["decoded_token"]
            top_logprobs_list.append(
                TopLogprob(
                    token=decoded_token,
                    logprob=token_data["logprob"],
                    bytes=list(decoded_token.encode("utf-8")),
                )
            )

        openai_logprobs.append(
            ChatCompletionTokenLogprob(
                token=selected_token,
                logprob=selected_logprob,
                bytes=token_bytes,
                top_logprobs=top_logprobs_list,
            )
        )

    return openai_logprobs


def _get_openai_completion_format_logprobs_from_vllm_response(
    response: tritonserver._api._response.InferenceResponse,
) -> Optional[Logprobs]:
    """
    Convert logprobs from a Triton inference response (vLLM backend) to OpenAI completion format.

    Args:
        response: Triton inference response containing logprobs output.

    Returns:
        Logprobs object for completions API, or None if no logprobs available.
    """
    vllm_logprobs = _get_logprobs_from_response(response)

    if not vllm_logprobs:
        return None

    text_offset = []
    token_logprobs = []
    tokens = []
    top_logprobs = []

    current_offset = 0
    for token_logprobs_dict in vllm_logprobs:
        if not token_logprobs_dict:
            continue

        # Sort by rank to identify the selected token (rank=1 is always the chosen token)
        sorted_tokens = sorted(
            token_logprobs_dict.items(), key=lambda x: x[1].get("rank", sys.maxsize)
        )

        # The first token (lowest rank) is the selected token
        selected_token_id, selected_token_data = sorted_tokens[0]
        selected_token = selected_token_data["decoded_token"]
        selected_logprob = selected_token_data["logprob"]

        text_offset.append(current_offset)
        token_logprobs.append(selected_logprob)
        tokens.append(selected_token)

        # Build top_logprobs dict for this position
        top_logprobs_dict = {}
        for token_id, token_data in sorted_tokens:
            decoded_token = token_data["decoded_token"]
            top_logprobs_dict[decoded_token] = token_data["logprob"]
        top_logprobs.append(top_logprobs_dict)

        current_offset += len(selected_token)

    return Logprobs(
        text_offset=text_offset,
        token_logprobs=token_logprobs,
        tokens=tokens,
        top_logprobs=top_logprobs,
    )


def _validate_triton_responses_non_streaming(
    responses: List[tritonserver._api._response.InferenceResponse],
):
    num_responses = len(responses)
    if 1 <= num_responses <= 2:
        if responses[-1].final != True:
            raise ServerError("Unexpected internal error with incorrect response flags")
    else:
        raise ServerError(
            f"Unexpected number of responses: {num_responses}, expected 1 or 2."
        )


def _get_guided_json_from_tool(
    request: CreateChatCompletionRequest | CreateCompletionRequest,
) -> Optional[Union[str, dict, BaseModel]]:
    if isinstance(request, CreateChatCompletionRequest):
        if request.tool_choice is None or not request.tools:
            return None

        if type(request.tool_choice.root) is ChatCompletionNamedToolChoice:
            tool_name = request.tool_choice.root.function.name
        elif request.tool_choice.root == ChatCompletionToolChoiceOption1.required:
            tool_name = request.tools[0].function.name
        else:
            return None

        tools = {tool.function.name: tool.function for tool in request.tools}
        if tool_name not in tools:
            raise ClientError(f"Tool '{tool_name}' has not been passed in `tools`.")
        tool = tools[tool_name]
        return tool.parameters.model_dump_json()

    return None


def _validate_lora_path_trtllm(repo_path: str, lora_path: str, lora_name: str):
    if os.path.isabs(lora_path):
        raise ValueError(
            f"LoRA path '{lora_path}' for '{lora_name}' must be a relative path inside its model repository"
        )

    # NOTE: Error messages should never contain the real/absolute paths.
    realpath_repo = os.path.realpath(repo_path)
    realpath_lora = os.path.realpath(os.path.join(realpath_repo, lora_path))
    # Always check if the LoRA path is inside the model repository before checking its existence.
    if os.path.commonpath([realpath_repo, realpath_lora]) != realpath_repo:
        raise ValueError(
            f"LoRA path '{lora_path}' for '{lora_name}' must be inside its model repository"
        )
    if not os.path.exists(realpath_lora):
        raise ServerError(
            f"LoRA directory '{lora_path}' not found for '{lora_name}' in its model repository"
        )

    # Check if the files exist
    for lora_file in ["model.lora_weights.npy", "model.lora_config.npy"]:
        lora_file_path = os.path.join(realpath_lora, lora_file)
        if not os.path.exists(lora_file_path):
            raise ServerError(
                f"LoRA file '{lora_file}' not found for '{lora_name}' at path: {lora_file_path}"
            )


def _parse_lora_configs(
    model_repository: str | list[str], model_name: str, model_version: int, backend: str
) -> None | List[tuple[str, str]]:
    if (
        len(model_name) == 0
        or model_name.isspace()
        or "/" in model_name
        or "\\" in model_name
    ):
        raise ValueError(
            f"Invalid model name: '{model_name}'. Model names must be valid file-system-path segment names."
        )

    lora_configs = []
    lora_task_id = 1
    repo_paths = model_repository
    if isinstance(repo_paths, str):
        repo_paths = [repo_paths]
    for repo_path in repo_paths:
        model_path = os.path.join(repo_path, model_name)
        if (not Path(model_path).is_relative_to(repo_path)) or (
            os.path.normpath(model_path) != model_path
        ):
            raise ValueError(
                f"Invalid model name: '{model_name}'. Model names must be valid file-system-path segment names."
            )

        model_path = os.path.normpath(model_path)
        if not os.path.isdir(model_path):
            # Cloud path?
            return None
        if model_version <= 0:
            for version_path in os.listdir(model_path):
                version = os.path.basename(version_path)
                if re.fullmatch(r"^[0-9]+$", version) is None:
                    continue
                model_version = max(model_version, int(version))
            if model_version <= 0:
                # Model directory is malformed?
                return None
        version_path = os.path.join(model_path, str(model_version))
        lora_config_path = os.path.join(version_path, "multi_lora.json")

        if backend == "vllm":
            is_lora_enabled = False
            model_file_path = os.path.join(version_path, "model.json")
            try:
                with open(model_file_path, "r") as f:
                    config = json.load(f)
                    if "enable_lora" in config:
                        # The value could be a string or a bool.
                        is_lora_enabled = str(config["enable_lora"]).lower() == "true"
            except Exception:
                # Model directory or model.json is malformed?
                return None
            if is_lora_enabled != True:
                continue
        else:
            # TRT-LLM backend does not use model.json
            if not os.path.exists(lora_config_path):
                continue

        try:
            with open(lora_config_path, "r") as f:
                lora_config = json.load(f)
                for lora_name, lora_path in lora_config.items():
                    if backend == "vllm":
                        lora_configs.append(TritonLoraConfig(name=lora_name))
                    else:
                        _validate_lora_path_trtllm(repo_path, lora_path, lora_name)
                        lora_configs.append(
                            TritonLoraConfig(
                                name=lora_name, path=lora_path, task_id=lora_task_id
                            )
                        )
                        lora_task_id += 1
        except ServerError as e:
            raise e
        except Exception as e:
            # LoRA is enabled but its list is not provided or malformed?
            print(traceback.format_exc())
            return None
    return lora_configs


================================================
FILE: python/openai/openai_frontend/frontend/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: python/openai/openai_frontend/frontend/fastapi/__init__.py
================================================
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: python/openai/openai_frontend/frontend/fastapi/middleware/__init__.py
================================================
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: python/openai/openai_frontend/frontend/fastapi/middleware/api_restriction.py
================================================
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from fastapi import Request
from fastapi.responses import JSONResponse
from starlette.middleware.base import BaseHTTPMiddleware
from utils.utils import StatusCode

# Mapping of API to their corresponding HTTP endpoints
ENDPOINT_MAPPING = {
    "inference": [
        "POST /v1/chat/completions",
        "POST /v1/completions",
        "POST /v1/embeddings",
    ],
    "model-repository": ["GET /v1/models"],
    "metrics": ["GET /metrics"],
    "health": ["GET /health/ready"],
}


class RestrictedFeatures:
    """
    Manages API endpoint restrictions and their authentication requirements.

    This class parses command-line arguments for restricted API configurations
    and provides methods to check if specific APIs are restricted
    and what authentication is required.
    """

    def __init__(self, args: list[str]):
        """
        Initialize the RestrictedFeatures with command-line arguments.

        Args:
            args: List of --openai-restricted-api argument strings
                 (e.g., [["inference", "infer-key", "infer-value"],
                         ["model-repository", "model-key", "model-value"]])
        """
        self._restrictions = {}
        self.ParseRestrictedFeatureOption(args)

    def ParseRestrictedFeatureOption(self, args):
        """
        Parse command-line arguments to extract API restrictions.

        Args:
            args: List of restriction configuration strings

        Raises:
            ValueError: If unknown API is specified or duplicate API configs are found
        """
        for apis, key, value in args:
            api_list = apis.split(",")
            for api in api_list:
                # Validate that the API is valid
                if api not in ENDPOINT_MAPPING:
                    raise ValueError(
                        f"Unknown API '{api}'. Available APIs: {list(ENDPOINT_MAPPING.keys())}"
                    )

                # Check for duplicate APIs across different arguments
                if self.IsRestricted(api):
                    raise ValueError(
                        f"restricted api '{api}' can not be specified in multiple config groups"
                    )

                self.Insert(api, (key, value))

    def RestrictionDict(self) -> dict[str, tuple[str, str]]:
        """
        Get a copy of the restrictions dictionary.

        Returns:
            dict: Copy of the restrictions mapping API names to (header_key, header_value) tuples
        """
        return self._restrictions.copy()

    def Insert(self, api: str, restriction: tuple[str, str]):
        """
        Add a restriction for a specific API.

        Args:
            api: The API name (e.g., "inference", "model-repository")
            restriction: Tuple of (header_key, header_value) for authentication
        """
        self._restrictions[api] = restriction

    def IsRestricted(self, api: str) -> bool:
        """
        Check if a specific API is restricted.

        Args:
            api: The API name to check

        Returns:
            bool: True if the API is restricted, False otherwise
        """
        return api in self._restrictions


class APIRestrictionMiddleware(BaseHTTPMiddleware):
    """
    Middleware to restrict API endpoint access based on allowed APIs configuration.

    This middleware intercepts HTTP requests and checks if they match any restricted
    API endpoints. If a request matches a restricted endpoint, it validates the
    authentication headers before allowing the request to proceed.

    Similar to Triton Server's endpoint access control feature.
    """

    def __init__(self, app, restricted_apis: RestrictedFeatures):
        """
        Initialize the API restriction middleware.

        Args:
            app: The FastAPI application instance
            restricted_apis: RestrictedFeatures instance containing the restriction configuration
        """
        super().__init__(app)
        self.restricted_apis = restricted_apis

    def _get_auth_header(self, request: Request) -> tuple[str, str] | None:
        request_method = request.method
        request_path = request.url.path

        # Check each restricted API to see if the request matches
        for (
            restricted_api,
            auth_spec,
        ) in self.restricted_apis.RestrictionDict().items():
            # Check each endpoint in the API
            for restricted_endpoint in ENDPOINT_MAPPING[restricted_api]:
                restricted_method, restricted_path = restricted_endpoint.split(" ")

                # Match both HTTP method and path prefix
                if request_method == restricted_method and request_path.startswith(
                    restricted_path
                ):
                    return auth_spec
        return None

    async def dispatch(self, request: Request, call_next):
        """
        Main middleware dispatch method that processes each incoming request.

        Args:
            request: The incoming HTTP request
            call_next: The next middleware/handler in the chain

        Returns:
            Response: Either the next handler's response or a 401 authentication error
        """
        # Check if the request matches any restricted patterns
        auth_header = self._get_auth_header(request)

        # If request not restricted, proceed with the request
        if not auth_header:
            return await call_next(request)

        # Check authentication for the matching restricted endpoint
        auth_result = self._check_authentication(request, auth_header)
        if auth_result["valid"]:
            # Authentication passed, allow request to proceed
            return await call_next(request)
        else:
            # Authentication failed, return 401 error
            return JSONResponse(
                status_code=StatusCode.AUTHORIZATION_ERROR,
                content={
                    "error": {
                        "message": auth_result["message"],
                        "type": "authentication_error",
                        "code": "invalid_auth",
                    }
                },
            )

    def _check_authentication(self, request: Request, auth_header: tuple[str, str]):
        """
        Check if the request contains valid authentication headers.

        Args:
            request: The incoming HTTP request
            auth_header: Tuple of (expected_header_key, expected_header_value)

        Returns:
            dict: {"valid": bool, "message": str} - Authentication result and error message if invalid
        """
        expected_key, expected_value = auth_header

        # Get the actual header value from the request
        actual_value = request.headers.get(expected_key)

        # Validate the header value matches the expected value
        if not actual_value or actual_value != expected_value:
            return {
                "valid": False,
                "message": f"This API is restricted, expecting header '{expected_key}' with valid value",
            }

        return {"valid": True}


================================================
FILE: python/openai/openai_frontend/frontend/fastapi/routers/__init__.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: python/openai/openai_frontend/frontend/fastapi/routers/chat.py
================================================
# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import traceback

from fastapi import APIRouter, HTTPException, Request
from fastapi.responses import StreamingResponse
from schemas.openai import CreateChatCompletionRequest, CreateChatCompletionResponse
from utils.utils import ClientError, ServerError, StatusCode

router = APIRouter()


@router.post(
    "/v1/chat/completions", response_model=CreateChatCompletionResponse, tags=["Chat"]
)
async def create_chat_completion(
    request: CreateChatCompletionRequest,
    raw_request: Request,
) -> CreateChatCompletionResponse | StreamingResponse:
    """
    Creates a chat completion for the provided messages and parameters.
    """
    if not raw_request.app.engine:
        raise HTTPException(
            status_code=StatusCode.SERVER_ERROR, detail="No attached inference engine"
        )

    try:
        response = await raw_request.app.engine.chat(request)
        if request.stream:
            return StreamingResponse(response, media_type="text/event-stream")
        return response
    except ClientError as e:
        raise HTTPException(status_code=StatusCode.CLIENT_ERROR, detail=f"{e}")
    except ServerError as e:
        print(traceback.format_exc())
        raise HTTPException(status_code=StatusCode.SERVER_ERROR, detail=f"{e}")
    except Exception as e:
        print(traceback.format_exc())
        raise HTTPException(status_code=StatusCode.SERVER_ERROR, detail=f"{e}")


================================================
FILE: python/openai/openai_frontend/frontend/fastapi/routers/completions.py
================================================
# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import traceback

from fastapi import APIRouter, HTTPException, Request
from fastapi.responses import StreamingResponse
from schemas.openai import CreateCompletionRequest, CreateCompletionResponse
from utils.utils import ClientError, ServerError, StatusCode

router = APIRouter()


@router.post(
    "/v1/completions", response_model=CreateCompletionResponse, tags=["Completions"]
)
async def create_completion(
    request: CreateCompletionRequest, raw_request: Request
) -> CreateCompletionResponse | StreamingResponse:
    """
    Creates a completion for the provided prompt and parameters.
    """
    if not raw_request.app.engine:
        raise HTTPException(
            status_code=StatusCode.SERVER_ERROR, detail="No attached inference engine"
        )

    try:
        response = await raw_request.app.engine.completion(request)
        if request.stream:
            return StreamingResponse(response, media_type="text/event-stream")
        return response
    except ClientError as e:
        raise HTTPException(status_code=StatusCode.CLIENT_ERROR, detail=f"{e}")
    except ServerError as e:
        print(traceback.format_exc())
        raise HTTPException(status_code=StatusCode.SERVER_ERROR, detail=f"{e}")
    except Exception as e:
        print(traceback.format_exc())
        raise HTTPException(status_code=StatusCode.SERVER_ERROR, detail=f"{e}")


================================================
FILE: python/openai/openai_frontend/frontend/fastapi/routers/embeddings.py
================================================
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import traceback

from fastapi import APIRouter, HTTPException, Request
from fastapi.responses import StreamingResponse
from schemas.openai import CreateEmbeddingRequest, CreateEmbeddingResponse
from utils.utils import ClientError, ServerError, StatusCode

router = APIRouter()


@router.post(
    "/v1/embeddings", response_model=CreateEmbeddingResponse, tags=["Embeddings"]
)
async def create_embedding(
    request: CreateEmbeddingRequest, raw_request: Request
) -> CreateEmbeddingResponse | StreamingResponse:
    """
    Creates embedding for the provided input text.
    """
    if not raw_request.app.engine:
        raise HTTPException(
            status_code=StatusCode.SERVER_ERROR, detail="No attached inference engine"
        )

    try:
        response = await raw_request.app.engine.embedding(request)
        return response
    except ClientError as e:
        raise HTTPException(status_code=StatusCode.CLIENT_ERROR, detail=f"{e}")
    except ServerError as e:
        print(traceback.format_exc())
        raise HTTPException(status_code=StatusCode.SERVER_ERROR, detail=f"{e}")
    except Exception as e:
        print(traceback.format_exc())
        raise HTTPException(status_code=StatusCode.SERVER_ERROR, detail=f"{e}")


================================================
FILE: python/openai/openai_frontend/frontend/fastapi/routers/models.py
================================================
# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from typing import List

from fastapi import APIRouter, HTTPException, Request
from schemas.openai import ListModelsResponse, Model, ObjectType
from utils.utils import StatusCode

router = APIRouter()

OWNED_BY = "Triton Inference Server"


@router.get("/v1/models", response_model=ListModelsResponse, tags=["Models"])
def list_models(request: Request) -> ListModelsResponse:
    """
    Lists the currently available models, and provides basic information about each one such as the owner and availability.
    """
    if not request.app.engine:
        raise HTTPException(
            status_code=StatusCode.SERVER_ERROR, detail="No attached inference engine"
        )

    models: List[Model] = request.app.engine.models()
    return ListModelsResponse(object=ObjectType.list, data=models)


@router.get("/v1/models/{model_name}", response_model=Model, tags=["Models"])
def retrieve_model(request: Request, model_name: str) -> Model:
    """
    Retrieves a model instance, providing basic information about the model such as the owner and permissioning.
    """
    if not request.app.engine:
        raise HTTPException(
            status_code=StatusCode.SERVER_ERROR, detail="No attached inference engine"
        )

    # TODO: Return model directly from engine instead of searching models
    models: List[Model] = request.app.engine.models()
    for model in models:
        if model.id == model_name:
            return model

    raise HTTPException(
        status_code=StatusCode.NOT_FOUND, detail=f"Unknown model: {model_name}"
    )


================================================
FILE: python/openai/openai_frontend/frontend/fastapi/routers/observability.py
================================================
# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from fastapi import APIRouter, HTTPException, Request
from fastapi.responses import PlainTextResponse, Response
from utils.utils import StatusCode

router = APIRouter()


@router.get("/metrics", response_class=PlainTextResponse, tags=["Utilities"])
def metrics(request: Request) -> PlainTextResponse:
    return request.app.engine.metrics()


@router.get("/health/ready", tags=["Utilities"])
def ready(request: Request) -> Response:
    if not request.app.engine:
        raise HTTPException(
            status_code=StatusCode.SERVER_ERROR, detail="No attached inference engine"
        )

    if not request.app.engine.ready():
        raise HTTPException(
            status_code=StatusCode.CLIENT_ERROR,
            detail="Attached inference engine is not ready for inference requests.",
        )

    return Response(status_code=StatusCode.SUCCESS)


================================================
FILE: python/openai/openai_frontend/frontend/fastapi_frontend.py
================================================
# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from __future__ import annotations

import uvicorn
from engine.triton_engine import TritonLLMEngine
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from frontend.fastapi.middleware.api_restriction import (
    APIRestrictionMiddleware,
    RestrictedFeatures,
)
from frontend.fastapi.routers import (
    chat,
    completions,
    embeddings,
    models,
    observability,
)
from frontend.frontend import OpenAIFrontend


class FastApiFrontend(OpenAIFrontend):
    def __init__(
        self,
        engine: TritonLLMEngine,
        host: str = "localhost",
        port: int = 8000,
        log_level: str = "info",
        restricted_apis: list = None,
    ):
        self.host: str = host
        self.port: int = port
        self.log_level: str = log_level
        if restricted_apis:
            self.restricted_apis: RestrictedFeatures = RestrictedFeatures(
                restricted_apis
            )
        else:
            self.restricted_apis: RestrictedFeatures = None
        self.stopped: bool = False

        self.app = self._create_app()
        # Attach the inference engine to the FastAPI app
        self.app.engine = engine

    def __del__(self):
        self.stop()

    def start(self):
        config = uvicorn.Config(
            app=self.app,
            host=self.host,
            port=self.port,
            log_level=self.log_level,
            timeout_keep_alive=5,
        )
        server = uvicorn.Server(config)
        server.run()

    def stop(self):
        # NOTE: If the frontend owned the engine, it could do cleanup here.
        pass

    def _create_app(self):
        app = FastAPI(
            title="OpenAI API",
            description="The OpenAI REST API. Please see https://platform.openai.com/docs/api-reference for more details.",
            version="2.0.0",
            termsOfService="https://openai.com/policies/terms-of-use",
            contact={"name": "OpenAI Support", "url": "https://help.openai.com/"},
            license={
                "name": "MIT",
                "url": "https://github.com/openai/openai-openapi/blob/master/LICENSE",
            },
        )

        app.include_router(observability.router)
        app.include_router(models.router)
        app.include_router(completions.router)
        app.include_router(chat.router)
        app.include_router(embeddings.router)

        # NOTE: For debugging purposes, should generally be restricted or removed
        self._add_cors_middleware(app)
        if self.restricted_apis != None:
            self._add_api_restriction_middleware(app)

        return app

    def _add_cors_middleware(self, app: FastAPI):
        # Allow API calls through browser /docs route for debug purposes
        origins = [
            "http://localhost",
        ]

        # TODO: Move towards logger instead of printing
        print(f"[WARNING] Adding CORS for the following origins: {origins}")
        app.add_middleware(
            CORSMiddleware,
            allow_origins=origins,
            allow_credentials=True,
            allow_methods=["*"],
            allow_headers=["*"],
        )

    def _add_api_restriction_middleware(self, app: FastAPI):
        app.add_middleware(
            APIRestrictionMiddleware, restricted_apis=self.restricted_apis
        )
        print(
            f"[INFO] API restrictions enabled. Restricted API endpoints: {self.restricted_apis.RestrictionDict()}"
        )


================================================
FILE: python/openai/openai_frontend/frontend/frontend.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from __future__ import annotations

from typing import Protocol


class OpenAIFrontend(Protocol):
    def start(self) -> None:
        """
        Starts the OpenAI-compatible service.
        """
        pass

    def stop(self) -> None:
        """
        Stops the OpenAI-compatible service.
        """
        pass


================================================
FILE: python/openai/openai_frontend/main.py
================================================
#!/usr/bin/env python3

# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import signal
import sys
from functools import partial

import tritonserver
from engine.triton_engine import TritonLLMEngine
from frontend.fastapi_frontend import FastApiFrontend


def signal_handler(
    server, openai_frontend, kserve_http_frontend, kserve_grpc_frontend, signal, frame
):
    print(f"Received {signal=}, {frame=}")
    # Graceful Shutdown
    shutdown(server, openai_frontend, kserve_http_frontend, kserve_grpc_frontend)


def shutdown(server, openai_frontend, kserve_http, kserve_grpc):
    print("Shutting down Triton OpenAI-Compatible Frontend...")
    openai_frontend.stop()

    if kserve_http:
        print("Shutting down Triton KServe HTTP Frontend...")
        kserve_http.stop()

    if kserve_grpc:
        print("Shutting down Triton KServe GRPC Frontend...")
        kserve_grpc.stop()

    print("Shutting down Triton Inference Server...")
    server.stop()


def start_kserve_frontends(server, args):
    http_service, grpc_service = None, None
    try:
        from tritonfrontend import KServeGrpc, KServeHttp

        http_options = KServeHttp.Options(address=args.host, port=args.kserve_http_port)
        http_service = KServeHttp(server, http_options)
        http_service.start()

        grpc_options = KServeGrpc.Options(address=args.host, port=args.kserve_grpc_port)
        grpc_service = KServeGrpc(server, grpc_options)
        grpc_service.start()

    except ModuleNotFoundError:
        # FIXME: Raise error instead of warning if kserve frontends are opt-in
        print(
            "[WARNING] The 'tritonfrontend' package was not found. "
            "KServe frontends won't be available through this application without it. "
            "Check /opt/tritonserver/python for tritonfrontend*.whl and pip install it if present."
        )
    return http_service, grpc_service


def parse_args():
    parser = argparse.ArgumentParser(
        description="Triton Inference Server with OpenAI-Compatible RESTful API server."
    )

    # Triton Inference Server
    triton_group = parser.add_argument_group("Triton Inference Server")
    triton_group.add_argument(
        "--model-repository",
        type=str,
        required=True,
        help="Path to the Triton model repository holding the models to be served",
    )
    triton_group.add_argument(
        "--tokenizer",
        type=str,
        default=None,
        help="HuggingFace ID or local folder path of the Tokenizer to use for chat templates",
    )
    triton_group.add_argument(
        "--backend",
        type=str,
        default=None,
        choices=["vllm", "tensorrtllm"],
        help="Manual override of Triton backend request format (inputs/output names) to use for inference",
    )
    triton_group.add_argument(
        "--lora-separator",
        type=str,
        default=None,
        help="LoRA name selection may be appended to the model name following this separator if the separator is provided",
    )
    triton_group.add_argument(
        "--tritonserver-log-verbose-level",
        type=int,
        default=0,
        help="The tritonserver log verbosity level",
    )
    triton_group.add_argument(
        "--host",
        type=str,
        default="0.0.0.0",
        help="Address/host of frontends (default: '0.0.0.0')",
    )
    triton_group.add_argument(
        "--tool-call-parser",
        type=str,
        default=None,
        help="Specify the parser for handling tool calling related response text. Options include: 'llama3' and 'mistral'.",
    )
    # Allows the user to try a different chat template to craft better prompts and receive more targeted tool-calling responses from the model.
    # Some Mistral models have a separate chat template file, in addition to the tokenizer_config.json,
    # such as mistralai/Mistral-Small-3.1-24B-Instruct-2503.
    # This can serve as a workaround for those models.
    triton_group.add_argument(
        "--chat-template",
        type=str,
        default=None,
        help="The path to the custom Jinja chat template file. This is useful if you'd like to use a different chat template than the one provided by the model.",
    )

    triton_group.add_argument(
        "--default-max-tokens",
        type=int,
        default=16,
        help="The default maximum number of tokens to generate if not specified in the request. The default is 16.",
    )

    # OpenAI-Compatible Frontend (FastAPI)
    openai_group = parser.add_argument_group("Triton OpenAI-Compatible Frontend")
    openai_group.add_argument(
        "--openai-port", type=int, default=9000, help="OpenAI HTTP port (default: 9000)"
    )
    openai_group.add_argument(
        "--uvicorn-log-level",
        type=str,
        default="info",
        choices=["debug", "info", "warning", "error", "critical", "trace"],
        help="log level for uvicorn",
    )
    openai_group.add_argument(
        "--openai-restricted-api",
        type=str,
        default=None,
        nargs=3,
        metavar=("APIs", "Restricted Key", "Restricted Value"),
        action="append",
        help="Restrict access to specific OpenAI API endpoints. Format: '<API_1>,<API_2>,... <restricted-key> <restricted-value>' (e.g., 'inference,model-repository admin-key admin-value'). If not specified, all endpoints are allowed.",
    )

    # KServe Predict v2 Frontend
    kserve_group = parser.add_argument_group("Triton KServe Frontend")
    kserve_group.add_argument(
        "--enable-kserve-frontends",
        action="store_true",
        help="Enable KServe Predict v2 HTTP/GRPC frontends (disabled by default)",
    )
    kserve_group.add_argument(
        "--kserve-http-port",
        type=int,
        default=8000,
        help="KServe Predict v2 HTTP port (default: 8000)",
    )
    kserve_group.add_argument(
        "--kserve-grpc-port",
        type=int,
        default=8001,
        help="KServe Predict v2 GRPC port (default: 8001)",
    )

    return parser.parse_args()


def main():
    args = parse_args()

    # Initialize a Triton Inference Server pointing at LLM models
    server: tritonserver.Server = tritonserver.Server(
        model_repository=args.model_repository,
        log_verbose=args.tritonserver_log_verbose_level,
        log_info=True,
        log_warn=True,
        log_error=True,
    ).start(wait_until_ready=True)

    # Wrap Triton Inference Server in an interface-conforming "LLMEngine"
    engine: TritonLLMEngine = TritonLLMEngine(
        server=server,
        tokenizer=args.tokenizer,
        backend=args.backend,
        lora_separator=args.lora_separator,
        tool_call_parser=args.tool_call_parser,
        chat_template=args.chat_template,
        default_max_tokens=args.default_max_tokens,
    )

    # Attach TritonLLMEngine as the backbone for inference and model management
    try:
        openai_frontend: FastApiFrontend = FastApiFrontend(
            engine=engine,
            host=args.host,
            port=args.openai_port,
            log_level=args.uvicorn_log_level,
            restricted_apis=args.openai_restricted_api,
        )
    except ValueError as e:
        print(
            f"[ERROR] Failed to initialize FastAPI frontend: {e}",
            file=sys.stderr,
        )
        sys.exit(1)

    # Optionally expose Triton KServe HTTP/GRPC Frontends
    kserve_http, kserve_grpc = None, None
    if args.enable_kserve_frontends:
        kserve_http, kserve_grpc = start_kserve_frontends(server, args)

    # Gracefully shutdown when receiving signals for testing and interactive use
    signal.signal(
        signal.SIGINT,
        partial(signal_handler, server, openai_frontend, kserve_http, kserve_grpc),
    )
    signal.signal(
        signal.SIGTERM,
        partial(signal_handler, server, openai_frontend, kserve_http, kserve_grpc),
    )

    # Blocking call until killed or interrupted with SIGINT
    openai_frontend.start()


if __name__ == "__main__":
    main()


================================================
FILE: python/openai/openai_frontend/schemas/__init__.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: python/openai/openai_frontend/schemas/openai.py
================================================
# Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# generated by fastapi-codegen:
#   filename:  api-spec/openai_trimmed.yml
#   timestamp: 2024-05-05T21:52:36+00:00

from __future__ import annotations

from enum import Enum
from typing import Any, Dict, List, Literal, Optional, Union

from pydantic import AnyUrl, BaseModel, ConfigDict, Field, RootModel, confloat, conint


class Error(BaseModel):
    code: str
    message: str
    param: str
    type: str


class ErrorResponse(BaseModel):
    error: Error


class Object(Enum):
    list = "list"


class DeleteModelResponse(BaseModel):
    id: str
    deleted: bool
    object: str


class Model1(Enum):
    gpt_3_5_turbo_instruct = "gpt-3.5-turbo-instruct"
    davinci_002 = "davinci-002"
    babbage_002 = "babbage-002"


class PromptItem(RootModel):
    root: List[Any]


class CreateCompletionRequest(BaseModel):
    # Explicitly return errors for unknown fields.
    model_config: ConfigDict = ConfigDict(extra="forbid")

    model: Union[str, Model1] = Field(
        ...,
        description="ID of the model to use. You can use the [List models](/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](/docs/models/overview) for descriptions of them.\n",
    )
    prompt: Union[str, List[str], List[int], List[PromptItem]] = Field(
        ...,
        description="The prompt(s) to generate completions for, encoded as a string, array of strings, array of tokens, or array of token arrays.\n\nNote that <|endoftext|> is the document separator that the model sees during training, so if a prompt is not specified the model will generate as if from the beginning of a new document.\n",
    )
    best_of: Optional[conint(ge=0, le=20)] = Field(
        1,
        description='Generates `best_of` completions server-side and returns the "best" (the one with the highest log probability per token). Results cannot be streamed.\n\nWhen used with `n`, `best_of` controls the number of candidate completions and `n` specifies how many to return – `best_of` must be greater than `n`.\n\n**Note:** Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`.\n',
    )
    echo: Optional[bool] = Field(
        False, description="Echo back the prompt in addition to the completion\n"
    )
    frequency_penalty: Optional[confloat(ge=-2.0, le=2.0)] = Field(
        0,
        description="Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.\n\n[See more information about frequency and presence penalties.](/docs/guides/text-generation/parameter-details)\n",
    )
    # TODO: Extension, flesh out description and defaults
    ignore_eos: Optional[bool] = Field(
        False, description="Ignore end-of-sequence tokens during generation\n"
    )
    logit_bias: Optional[Dict[str, int]] = Field(
        None,
        description='Modify the likelihood of specified tokens appearing in the completion.\n\nAccepts a JSON object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.\n\nAs an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token from being generated.\n',
    )
    logprobs: Optional[conint(ge=0, le=5)] = Field(
        None,
        description="Include the log probabilities on the `logprobs` most likely output tokens, as well the chosen tokens. For example, if `logprobs` is 5, the API will return a list of the 5 most likely tokens. The API will always return the `logprob` of the sampled token, so there may be up to `logprobs+1` elements in the response.\n\nThe maximum value for `logprobs` is 5.\n",
    )
    max_tokens: Optional[conint(ge=0)] = Field(
        None,
        description="The maximum number of [tokens](/tokenizer) that can be generated in the completion.\n\nThe token count of your prompt plus `max_tokens` cannot exceed the model's context length. [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) for counting tokens.\n",
        examples=[16],
    )
    # TODO: Extension, flesh out description and defaults
    min_tokens: Optional[conint(ge=0)] = Field(
        None,
        description="The minimum number of [tokens](/tokenizer) that should be generated in the completion.\n",
    )
    n: Optional[conint(ge=1, le=128)] = Field(
        1,
        description="How many completions to generate for each prompt.\n\n**Note:** Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`.\n",
        examples=[1],
    )
    presence_penalty: Optional[confloat(ge=-2.0, le=2.0)] = Field(
        0,
        description="Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.\n\n[See more information about frequency and presence penalties.](/docs/guides/text-generation/parameter-details)\n",
    )
    seed: Optional[conint(ge=-9223372036854775808, le=9223372036854775807)] = Field(
        None,
        description="If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result.\n\nDeterminism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend.\n",
    )
    stop: Optional[Union[str, List[str]]] = Field(
        None,
        description="Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.\n",
    )
    stream: Optional[bool] = Field(
        False,
        description="Whether to stream back partial progress. If set, tokens will be sent as data-only [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) as they become available, with the stream terminated by a `data: [DONE]` message. [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).\n",
    )
    stream_options: Optional[StreamOptions] = Field(
        None,
        description="Options for streaming responses. Only use when `stream` is set to `true`.",
    )
    suffix: Optional[str] = Field(
        None,
        description="The suffix that comes after a completion of inserted text.\n\nThis parameter is only supported for `gpt-3.5-turbo-instruct`.\n",
        examples=["test."],
    )
    temperature: Optional[confloat(ge=0.0, le=2.0)] = Field(
        1,
        description="What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.\n\nWe generally recommend altering this or `top_p` but not both.\n",
        examples=[1],
    )
    top_p: Optional[confloat(ge=0.0, le=1.0)] = Field(
        1,
        description="An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.\n\nWe generally recommend altering this or `temperature` but not both.\n",
        examples=[1],
    )
    user: Optional[str] = Field(
        None,
        description="A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids).\n",
        examples=["user-1234"],
    )


class FinishReason(Enum):
    stop = "stop"
    length = "length"
    content_filter = "content_filter"


class Logprobs(BaseModel):
    text_offset: Optional[List[int]] = None
    token_logprobs: Optional[List[float]] = None
    tokens: Optional[List[str]] = None
    top_logprobs: Optional[List[Dict[str, float]]] = None


class Choice(BaseModel):
    finish_reason: FinishReason | None = Field(
        ...,
        description="The reason the model stopped generating tokens. This will be `stop` if the model hit a natural stop point or a provided stop sequence,\n`length` if the maximum number of tokens specified in the request was reached,\nor `content_filter` if content was omitted due to a flag from our content filters.\n",
    )
    index: int
    logprobs: Logprobs | None
    text: str


class Object1(Enum):
    text_completion = "text_completion"


class Type(Enum):
    image_url = "image_url"


class Detail(Enum):
    auto = "auto"
    low = "low"
    high = "high"


class ImageUrl(BaseModel):
    model_config = ConfigDict(use_enum_values=True)

    url: AnyUrl = Field(
        ..., description="Either a URL of the image or the base64 encoded image data."
    )
    detail: Optional[Detail] = Field(
        "auto",
        description="Specifies the detail level of the image. Learn more in the [Vision guide](/docs/guides/vision/low-or-high-fidelity-image-understanding).",
    )


class ChatCompletionRequestMessageContentPartImage(BaseModel):
    model_config = ConfigDict(use_enum_values=True)

    type: Type = Field(..., description="The type of the content part.")
    image_url: ImageUrl


class Type1(Enum):
    text = "text"


class ChatCompletionRequestMessageContentPartText(BaseModel):
    model_config = ConfigDict(use_enum_values=True)

    type: Type1 = Field(..., description="The type of the content part.")
    text: str = Field(..., description="The text content.")


class Role(Enum):
    system = "system"

    def __str__(self):
        return self.name


class ChatCompletionRequestSystemMessage(BaseModel):
    model_config = ConfigDict(use_enum_values=True)

    content: str = Field(..., description="The contents of the system message.")
    role: Role = Field(
        ..., description="The role of the messages author, in this case `system`."
    )
    name: Optional[str] = Field(
        None,
        description="An optional name for the participant. Provides the model information to differentiate between participants of the same role.",
    )


class Role1(Enum):
    user = "user"

    def __str__(self):
        return self.name


class Role2(Enum):
    assistant = "assistant"

    def __str__(self):
        return self.name


class FunctionCall(BaseModel):
    arguments: str = Field(
        ...,
        description="The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.",
    )
    name: str = Field(..., description="The name of the function to call.")


class Role3(Enum):
    tool = "tool"

    def __str__(self):
        return self.name


class ChatCompletionRequestToolMessage(BaseModel):
    model_config = ConfigDict(use_enum_values=True)

    role: Role3 = Field(
        ..., description="The role of the messages author, in this case `tool`."
    )
    content: str = Field(..., description="The contents of the tool message.")
    tool_call_id: str = Field(
        ..., description="Tool call that this message is responding to."
    )


class Role4(Enum):
    function = "function"

    def __str__(self):
        return self.name


class ChatCompletionRequestFunctionMessage(BaseModel):
    model_config = ConfigDict(use_enum_values=True)

    role: Role4 = Field(
        ..., description="The role of the messages author, in this case `function`."
    )
    content: str = Field(..., description="The contents of the function message.")
    name: str = Field(..., description="The name of the function to call.")


class FunctionParameters(BaseModel):
    model_config = ConfigDict(extra="allow")


class ChatCompletionFunctions(BaseModel):
    description: Optional[str] = Field(
        None,
        description="A description of what the function does, used by the model to choose when and how to call the function.",
    )
    name: str = Field(
        ...,
        description="The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.",
    )
    parameters: Optional[FunctionParameters] = None


class ChatCompletionFunctionCallOption(BaseModel):
    name: str = Field(..., description="The name of the function to call.")


class FunctionObject(BaseModel):
    description: Optional[str] = Field(
        None,
        description="A description of what the function does, used by the model to choose when and how to call the function.",
    )
    name: str = Field(
        ...,
        description="The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.",
    )
    parameters: Optional[FunctionParameters] = None


class ChatCompletionToolChoiceOption1(Enum):
    none = "none"
    auto = "auto"
    required = "required"


class Function(BaseModel):
    name: str = Field(..., description="The name of the function to call.")


class ChatCompletionNamedToolChoice(BaseModel):
    type: str = Field(
        ...,
        description="The type of the tool. Currently, only `function` is supported.",
    )
    function: Function


class Function1(BaseModel):
    name: str = Field(..., description="The name of the function to call.")
    arguments: str = Field(
        ...,
        description="The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.",
    )


class ChatCompletionMessageToolCall(BaseModel):
    id: str = Field(..., description="The ID of the tool call.")
    type: str = Field(
        ...,
        description="The type of the tool. Currently, only `function` is supported.",
    )
    function: Function1 = Field(..., description="The function that the model called.")


class Function2(BaseModel):
    name: Optional[str] = Field(None, description="The name of the function to call.")
    arguments: Optional[str] = Field(
        None,
        description="The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.",
    )


class ChatCompletionMessageToolCallChunk(BaseModel):
    index: int
    id: Optional[str] = Field(None, description="The ID of the tool call.")
    type: Optional[str] = Field(
        None,
        description="The type of the tool. Currently, only `function` is supported.",
    )
    function: Optional[Function2] = None


class ChatCompletionRole(Enum):
    system = "system"
    user = "user"
    assistant = "assistant"
    tool = "tool"
    function = "function"


class Role5(Enum):
    assistant = "assistant"

    def __str__(self):
        return self.name


class FunctionCall2(BaseModel):
    arguments: Optional[str] = Field(
        None,
        description="The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.",
    )
    name: Optional[str] = Field(None, description="The name of the function to call.")


class Role6(Enum):
    system = "system"
    user = "user"
    assistant = "assistant"
    tool = "tool"

    def __str__(self):
        return self.name


class ChatCompletionStreamResponseDelta(BaseModel):
    content: Optional[str] = Field(
        None, description="The contents of the chunk message."
    )
    function_call: Optional[FunctionCall2] = Field(
        None,
        description="Deprecated and replaced by `tool_calls`. The name and arguments of a function that should be called, as generated by the model.",
    )
    tool_calls: Optional[List[ChatCompletionMessageToolCallChunk]] = None
    role: Optional[str] = Field(
        None, description="The role of the author of this message."
    )


class Model2(Enum):
    gpt_4_turbo = "gpt-4-turbo"
    gpt_4_turbo_2024_04_09 = "gpt-4-turbo-2024-04-09"
    gpt_4_0125_preview = "gpt-4-0125-preview"
    gpt_4_turbo_preview = "gpt-4-turbo-preview"
    gpt_4_1106_preview = "gpt-4-1106-preview"
    gpt_4_vision_preview = "gpt-4-vision-preview"
    gpt_4 = "gpt-4"
    gpt_4_0314 = "gpt-4-0314"
    gpt_4_0613 = "gpt-4-0613"
    gpt_4_32k = "gpt-4-32k"
    gpt_4_32k_0314 = "gpt-4-32k-0314"
    gpt_4_32k_0613 = "gpt-4-32k-0613"
    gpt_3_5_turbo = "gpt-3.5-turbo"
    gpt_3_5_turbo_16k = "gpt-3.5-turbo-16k"
    gpt_3_5_turbo_0301 = "gpt-3.5-turbo-0301"
    gpt_3_5_turbo_0613 = "gpt-3.5-turbo-0613"
    gpt_3_5_turbo_1106 = "gpt-3.5-turbo-1106"
    gpt_3_5_turbo_0125 = "gpt-3.5-turbo-0125"
    gpt_3_5_turbo_16k_0613 = "gpt-3.5-turbo-16k-0613"


class Type6(Enum):
    text = "text"
    json_object = "json_object"


class ResponseFormat(BaseModel):
    type: Optional[Type6] = Field(
        "text",
        description="Must be one of `text` or `json_object`.",
        examples=["json_object"],
    )


class StreamOptions(BaseModel):
    include_usage: Optional[bool] = Field(
        False,
        description="If enabled, an additional chunk is sent before the `data: [DONE]` message. That chunk’s `usage` field reports the total token usage for the request and its `choices` array is always empty. All other chunks include a `usage` field with a null value.",
    )


class FunctionCall3(Enum):
    none = "none"
    auto = "auto"


class ChatCompletionFinishReason(Enum):
    stop = "stop"
    length = "length"
    tool_calls = "tool_calls"
    content_filter = "content_filter"
    function_call = "function_call"


class Object2(Enum):
    chat_completion = "chat.completion"


class FinishReason2(Enum):
    stop = "stop"
    length = "length"
    function_call = "function_call"
    content_filter = "content_filter"


class TopLogprob(BaseModel):
    token: str = Field(..., description="The token.")
    logprob: float = Field(
        ...,
        description="The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value `-9999.0` is used to signify that the token is very unlikely.",
    )
    bytes: List[int] = Field(
        ...,
        description="A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be `null` if there is no bytes representation for the token.",
    )


class ChatCompletionTokenLogprob(BaseModel):
    token: str = Field(..., description="The token.")
    logprob: float = Field(
        ...,
        description="The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value `-9999.0` is used to signify that the token is very unlikely.",
    )
    bytes: List[int] = Field(
        ...,
        description="A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be `null` if there is no bytes representation for the token.",
    )
    top_logprobs: List[TopLogprob] = Field(
        ...,
        description="List of the most likely tokens and their log probability, at this token position. In rare cases, there may be fewer than the number of requested `top_logprobs` returned.",
    )


class ChatCompletionLogprobs(BaseModel):
    content: List[ChatCompletionTokenLogprob] = Field(
        ...,
        description="A list of message content tokens with log probability information.",
    )


class ChatCompletionStreamingResponseChoice(BaseModel):
    delta: ChatCompletionStreamResponseDelta
    logprobs: Optional[ChatCompletionLogprobs] = Field(
        None, description="Log probability information for the choice."
    )
    finish_reason: ChatCompletionFinishReason | None = Field(
        ...,
        description="The reason the model stopped generating tokens. This will be `stop` if the model hit a natural stop point or a provided stop sequence,\n`length` if the maximum number of tokens specified in the request was reached,\n`content_filter` if content was omitted due to a flag from our content filters,\n`tool_calls` if the model called a tool, or `function_call` (deprecated) if the model called a function.\n",
    )
    index: int = Field(
        ..., description="The index of the choice in the list of choices."
    )


class Object4(Enum):
    chat_completion_chunk = "chat.completion.chunk"


class CreateChatCompletionStreamResponse(BaseModel):
    id: str = Field(
        ...,
        description="A unique identifier for the chat completion. Each chunk has the same ID.",
    )
    choices: List[ChatCompletionStreamingResponseChoice] = Field(
        ...,
        description="A list of chat completion choices. Can be more than one if `n` is greater than 1.",
    )
    created: int = Field(
        ...,
        description="The Unix timestamp (in seconds) of when the chat completion was created. Each chunk has the same timestamp.",
    )
    model: str = Field(..., description="The model to generate the completion.")
    system_fingerprint: Optional[str] = Field(
        None,
        description="This fingerprint represents the backend configuration that the model runs with.\nCan be used in conjunction with the `seed` request parameter to understand when backend changes have been made that might impact determinism.\n",
    )
    object: Object4 = Field(
        ..., description="The object type, which is always `chat.completion.chunk`."
    )
    usage: Optional[CompletionUsage] = None


class CreateChatCompletionImageResponse(BaseModel):
    pass


class Object5(Enum):
    model = "model"


class Model(BaseModel):
    id: str = Field(
        ...,
        description="The model identifier, which can be referenced in the API endpoints.",
    )
    created: int = Field(
        ..., description="The Unix timestamp (in seconds) when the model was created."
    )
    object: Object5 = Field(
        ..., description='The object type, which is always "model".'
    )
    owned_by: str = Field(..., description="The organization that owns the model.")


class BaseUsage(BaseModel):
    prompt_tokens: int = Field(..., description="Number of tokens in the prompt.")
    total_tokens: int = Field(
        ...,
        description="Total number of tokens used in the request (prompt + completion).",
    )


class EmbeddingUsage(BaseUsage):
    pass


class CompletionUsage(BaseUsage):
    completion_tokens: int = Field(
        ..., description="Number of tokens in the generated completion."
    )


class Event(Enum):
    error = "error"


class ErrorEvent(BaseModel):
    event: Event
    data: Error


class Event1(Enum):
    done = "done"


class Data(Enum):
    field_DONE_ = "[DONE]"


class DoneEvent(BaseModel):
    event: Event1
    data: Data


class ListModelsResponse(BaseModel):
    object: Object
    data: List[Model]


class CreateCompletionResponse(BaseModel):
    id: str = Field(..., description="A unique identifier for the completion.")
    choices: List[Choice] = Field(
        ...,
        description="The list of completion choices the model generated for the input prompt.",
    )
    created: int = Field(
        ...,
        description="The Unix timestamp (in seconds) of when the completion was created.",
    )
    model: str = Field(..., description="The model used for completion.")
    system_fingerprint: Optional[str] = Field(
        None,
        description="This fingerprint represents the backend configuration that the model runs with.\n\nCan be used in conjunction with the `seed` request parameter to understand when backend changes have been made that might impact determinism.\n",
    )
    object: Object1 = Field(
        ..., description='The object type, which is always "text_completion"'
    )
    usage: Optional[CompletionUsage] = None


class ChatCompletionRequestMessageContentPart(RootModel):
    root: Union[
        ChatCompletionRequestMessageContentPartText,
        ChatCompletionRequestMessageContentPartImage,
    ]


class ChatCompletionRequestUserMessage(BaseModel):
    model_config = ConfigDict(use_enum_values=True)

    content: Union[str, List[ChatCompletionRequestMessageContentPart]] = Field(
        ..., description="The contents of the user message.\n"
    )
    role: Role1 = Field(
        ..., description="The role of the messages author, in this case `user`."
    )
    name: Optional[str] = Field(
        None,
        description="An optional name for the participant. Provides the model information to differentiate between participants of the same role.",
    )


class ChatCompletionTool(BaseModel):
    type: str = Field(
        ...,
        description="The type of the tool. Currently, only `function` is supported.",
    )
    function: FunctionObject


class ChatCompletionToolChoiceOption(RootModel):
    root: Union[ChatCompletionToolChoiceOption1, ChatCompletionNamedToolChoice] = Field(
        ...,
        description='Controls which (if any) tool is called by the model.\n`none` means the model will not call any tool and instead generates a message.\n`auto` means the model can pick between generating a message or calling one or more tools.\n`required` means the model must call one or more tools.\nSpecifying a particular tool via `{"type": "function", "function": {"name": "my_function"}}` forces the model to call that tool.\n\n`none` is the default when no tools are present. `auto` is the default if tools are present.\n',
    )


class ChatCompletionMessageToolCalls(RootModel):
    root: List[ChatCompletionMessageToolCall] = Field(
        ...,
        description="The tool calls generated by the model, such as function calls.",
    )


class ChatCompletionResponseMessage(BaseModel):
    content: str = Field(..., description="The contents of the message.")
    tool_calls: Optional[ChatCompletionMessageToolCalls] = None
    role: str = Field(..., description="The role of the author of this message.")
    function_call: Optional[FunctionCall] = Field(
        None,
        description="Deprecated and replaced by `tool_calls`. The name and arguments of a function that should be called, as generated by the model.",
    )


class ChatCompletionChoice(BaseModel):
    finish_reason: ChatCompletionFinishReason = Field(
        ...,
        description="The reason the model stopped generating tokens. This will be `stop` if the model hit a natural stop point or a provided stop sequence,\n`length` if the maximum number of tokens specified in the request was reached,\n`content_filter` if content was omitted due to a flag from our content filters,\n`tool_calls` if the model called a tool, or `function_call` (deprecated) if the model called a function.\n",
    )
    index: int = Field(
        ..., description="The index of the choice in the list of choices."
    )
    message: ChatCompletionResponseMessage
    logprobs: ChatCompletionLogprobs | None = Field(
        ..., description="Log probability information for the choice."
    )


class CreateChatCompletionResponse(BaseModel):
    id: str = Field(..., description="A unique identifier for the chat completion.")
    choices: List[ChatCompletionChoice] = Field(
        ...,
        description="A list of chat completion choices. Can be more than one if `n` is greater than 1.",
    )
    created: int = Field(
        ...,
        description="The Unix timestamp (in seconds) of when the chat completion was created.",
    )
    model: str = Field(..., description="The model used for the chat completion.")
    system_fingerprint: Optional[str] = Field(
        None,
        description="This fingerprint represents the backend configuration that the model runs with.\n\nCan be used in conjunction with the `seed` request parameter to understand when backend changes have been made that might impact determinism.\n",
    )
    object: Object2 = Field(
        ..., description="The object type, which is always `chat.completion`."
    )
    usage: Optional[CompletionUsage] = None


class Choice2(BaseModel):
    finish_reason: FinishReason2 = Field(
        ...,
        description="The reason the model stopped generating tokens. This will be `stop` if the model hit a natural stop point or a provided stop sequence, `length` if the maximum number of tokens specified in the request was reached, `content_filter` if content was omitted due to a flag from our content filters, or `function_call` if the model called a function.\n",
    )
    index: int = Field(
        ..., description="The index of the choice in the list of choices."
    )
    message: ChatCompletionResponseMessage


class CreateChatCompletionFunctionResponse(BaseModel):
    id: str = Field(..., description="A unique identifier for the chat completion.")
    choices: List[Choice2] = Field(
        ...,
        description="A list of chat completion choices. Can be more than one if `n` is greater than 1.",
    )
    created: int = Field(
        ...,
        description="The Unix timestamp (in seconds) of when the chat completion was created.",
    )
    model: str = Field(..., description="The model used for the chat completion.")
    system_fingerprint: Optional[str] = Field(
        None,
        description="This fingerprint represents the backend configuration that the model runs with.\n\nCan be used in conjunction with the `seed` request parameter to understand when backend changes have been made that might impact determinism.\n",
    )
    object: Object2 = Field(
        ..., description="The object type, which is always `chat.completion`."
    )
    usage: Optional[CompletionUsage] = None


class ChatCompletionRequestAssistantMessage(BaseModel):
    model_config = ConfigDict(use_enum_values=True)

    content: Optional[str] = Field(
        None,
        description="The contents of the assistant message. Required unless `tool_calls` or `function_call` is specified.\n",
    )
    role: Role2 = Field(
        ..., description="The role of the messages author, in this case `assistant`."
    )
    name: Optional[str] = Field(
        None,
        description="An optional name for the participant. Provides the model information to differentiate between participants of the same role.",
    )
    tool_calls: Optional[ChatCompletionMessageToolCalls] = None
    function_call: Optional[FunctionCall] = Field(
        None,
        description="Deprecated and replaced by `tool_calls`. The name and arguments of a function that should be called, as generated by the model.",
    )


class ChatCompletionRequestMessage(RootModel):
    root: Union[
        ChatCompletionRequestSystemMessage,
        ChatCompletionRequestUserMessage,
        ChatCompletionRequestAssistantMessage,
        ChatCompletionRequestToolMessage,
        ChatCompletionRequestFunctionMessage,
    ]

    @property
    def role(self):
        return self.root.role

    @property
    def content(self):
        return self.root.content


class CreateChatCompletionRequest(BaseModel):
    # Explicitly return errors for unknown fields.
    model_config: ConfigDict = ConfigDict(extra="forbid")

    messages: List[ChatCompletionRequestMessage] = Field(
        ...,
        description="A list of messages comprising the conversation so far. [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).",
        min_length=1,
    )
    model: Union[str, Model2] = Field(
        ...,
        description="ID of the model to use. See the [model endpoint compatibility](/docs/models/model-endpoint-compatibility) table for details on which models work with the Chat API.",
        examples=["gpt-4-turbo"],
    )
    frequency_penalty: Optional[confloat(ge=-2.0, le=2.0)] = Field(
        0,
        description="Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.\n\n[See more information about frequency and presence penalties.](/docs/guides/text-generation/parameter-details)\n",
    )
    # TODO: Extension, flesh out description and defaults
    ignore_eos: Optional[bool] = Field(
        False, description="Ignore end-of-sequence tokens during generation\n"
    )
    logit_bias: Optional[Dict[str, int]] = Field(
        None,
        description="Modify the likelihood of specified tokens appearing in the completion.\n\nAccepts a JSON object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.\n",
    )
    logprobs: Optional[bool] = Field(
        False,
        description="Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the `content` of `message`.",
    )
    top_logprobs: Optional[conint(ge=0, le=20)] = Field(
        None,
        description="An integer between 0 and 20 specifying the number of most likely tokens to return at each token position, each with an associated log probability. `logprobs` must be set to `true` if this parameter is used.",
    )
    max_completion_tokens: Optional[conint(ge=0)] = Field(
        None,
        description="The maximum number of [tokens](/tokenizer) that can be generated in the chat completion.\n\nThe total length of input tokens and generated tokens is limited by the model's context length. [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) for counting tokens.\n",
    )
    # TODO: Remove support for max_tokens field in the future: https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_completion_tokens
    max_tokens: Optional[conint(ge=0)] = Field(
        None,
        description="DEPRECATED: Use `max_completion_tokens` instead. The maximum number of [tokens](/tokenizer) that can be generated in the chat completion.\n\nThe total length of input tokens and generated tokens is limited by the model's context length. [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) for counting tokens.\n",
    )
    # TODO: Extension, flesh out description and defaults
    min_tokens: Optional[conint(ge=0)] = Field(
        None,
        description="The minimum number of [tokens](/tokenizer) that should be generated in the chat completion.\n",
    )
    n: Optional[conint(ge=1, le=128)] = Field(
        1,
        description="How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep `n` as `1` to minimize costs.",
        examples=[1],
    )
    presence_penalty: Optional[confloat(ge=-2.0, le=2.0)] = Field(
        0,
        description="Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.\n\n[See more information about frequency and presence penalties.](/docs/guides/text-generation/parameter-details)\n",
    )
    response_format: Optional[ResponseFormat] = Field(
        None,
        description='An object specifying the format that the model must output. Compatible with [GPT-4 Turbo](/docs/models/gpt-4-and-gpt-4-turbo) and all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.\n\nSetting to `{ "type": "json_object" }` enables JSON mode, which guarantees the message the model generates is valid JSON.\n\n**Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly "stuck" request. Also note that the message content may be partially cut off if `finish_reason="length"`, which indicates the generation exceeded `max_completion_tokens` or the conversation exceeded the max context length.\n',
    )
    seed: Optional[conint(ge=-9223372036854775808, le=9223372036854775807)] = Field(
        None,
        description="This feature is in Beta.\nIf specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result.\nDeterminism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend.\n",
    )
    stop: Optional[Union[str, List[str]]] = Field(
        None,
        description="Up to 4 sequences where the API will stop generating further tokens.\n",
    )
    stream: Optional[bool] = Field(
        False,
        description="If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) as they become available, with the stream terminated by a `data: [DONE]` message. [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).\n",
    )
    stream_options: Optional[StreamOptions] = Field(
        None,
        description="Options for streaming responses. Only use when `stream` is set to `true`.",
    )
    temperature: Optional[confloat(ge=0.0, le=2.0)] = Field(
        0.7,
        description="What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.\n\nWe generally recommend altering this or `top_p` but not both.\n",
        examples=[1],
    )
    top_p: Optional[confloat(ge=0.0, le=1.0)] = Field(
        1,
        description="An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.\n\nWe generally recommend altering this or `temperature` but not both.\n",
        examples=[1],
    )
    tools: Optional[List[ChatCompletionTool]] = Field(
        None,
        description="A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for. A max of 128 functions are supported.\n",
    )
    tool_choice: Optional[ChatCompletionToolChoiceOption] = None
    user: Optional[str] = Field(
        None,
        description="A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids).\n",
        examples=["user-1234"],
    )
    function_call: Optional[
        Union[FunctionCall3, ChatCompletionFunctionCallOption]
    ] = Field(
        None,
        description='Deprecated in favor of `tool_choice`.\n\nControls which (if any) function is called by the model.\n`none` means the model will not call a function and instead generates a message.\n`auto` means the model can pick between generating a message or calling a function.\nSpecifying a particular function via `{"name": "my_function"}` forces the model to call that function.\n\n`none` is the default when no functions are present. `auto` is the default if functions are present.\n',
    )
    functions: Optional[List[ChatCompletionFunctions]] = Field(
        None,
        description="Deprecated in favor of `tools`.\n\nA list of functions the model may generate JSON inputs for.\n",
        max_length=128,
        min_length=1,
    )


# Additional Aliases for Convenience


class ObjectType:
    model = Object5.model
    list = Object.list
    text_completion = Object1.text_completion
    chat_completion_chunk = Object4.chat_completion_chunk
    chat_completion = Object2.chat_completion


class EmbeddingObject(BaseModel):
    model_config: ConfigDict = ConfigDict(extra="forbid")

    object: Literal["embedding"] = Field(
        description="The object type, which is always 'embedding'.",
    )
    embedding: Union[List[float], str] = Field(
        ...,
        description="The embedding vector, which is a list of floats or a base64-encoded string.",
    )
    index: int = Field(
        ...,
        description="The index of the embedding in the list of embeddings.",
    )


class CreateEmbeddingRequest(BaseModel):
    # Explicitly return errors for unknown fields.
    model_config: ConfigDict = ConfigDict(extra="forbid")

    input: Union[str, List[int]] = Field(
        ...,
        description="Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays.",
        min_length=1,
        examples=["The food was delicious and the waiter..."],
    )
    model: Union[str, Model2] = Field(
        ...,
        description="ID of the model to use. See the [model endpoint compatibility](/docs/models/model-endpoint-compatibility) table for details on which models work with the Chat API.",
        examples=["text-embedding-ada-002"],
    )
    dimensions: Optional[int] = Field(
        None,
        description="The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models.",
    )
    encoding_format: Optional[Literal["float", "base64"]] = Field(
        "float",
        description="The format to return the embeddings in.",
    )
    user: Optional[str] = Field(
        None,
        description="A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids).\n",
        examples=["user-1234"],
    )


class CreateEmbeddingResponse(BaseModel):
    model_config: ConfigDict = ConfigDict(extra="forbid")

    object: Literal["list"] = Field(
        description="The object type, which is always 'list'.",
    )
    data: List[EmbeddingObject] = Field(
        ...,
        description="The list of embeddings.",
    )
    model: Union[str, Model2] = Field(
        ...,
        description="The model used to generate the embeddings.",
    )
    usage: Optional[EmbeddingUsage] = Field(
        ...,
        description="The usage for the request.",
    )


================================================
FILE: python/openai/openai_frontend/utils/utils.py
================================================
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from enum import IntEnum


class ServerError(Exception):
    """Exception raised for server errors."""

    pass


class ClientError(Exception):
    """Exception raised for client errors."""

    pass


class StatusCode(IntEnum):
    SUCCESS = 200
    CLIENT_ERROR = 400
    AUTHORIZATION_ERROR = 401
    NOT_FOUND = 404
    SERVER_ERROR = 500


================================================
FILE: python/openai/requirements-test.txt
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# Testing
pytest==8.1.1
pytest-asyncio==0.23.8


================================================
FILE: python/openai/requirements.txt
================================================
# Copyright 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# FastAPI Application
fastapi==0.121.2
# Fix httpx version to avoid bug in openai library:
# https://community.openai.com/t/error-with-openai-1-56-0-client-init-got-an-unexpected-keyword-argument-proxies/1040332/3
httpx==0.27.2
openai==1.107.3
partial-json-parser # used for parsing partial JSON outputs

# FIXME [TRI-641]: The latest stable version of scipy is 1.17.0 which caused segfault during tests. See TRI-620.
scipy==1.16.3
# Minimum starlette version needed to address CVE(s):
# https://github.com/advisories/GHSA-f96h-pmfr-66vw
# https://github.com/advisories/GHSA-7f5h-v6xp-fcq8
starlette>=0.49.1


================================================
FILE: python/openai/tests/__init__.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: python/openai/tests/conftest.py
================================================
# Copyright 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
from pathlib import Path

import pytest
from fastapi.testclient import TestClient
from tests.utils import OpenAIServer, setup_fastapi_app, setup_server


def pytest_configure(config):
    """Register custom markers."""
    config.addinivalue_line(
        "markers", "openai: mark test to run with OpenAI server (subprocess)"
    )
    config.addinivalue_line("markers", "asyncio: mark test as an asyncio test")


### TEST ENVIRONMENT SETUP ###
def infer_test_environment(tool_call_parser):
    # Infer the test environment for simplicity in local dev/testing.
    try:
        import vllm as _

        backend = "vllm"
        if tool_call_parser == "mistral":
            model = "mistral-nemo-instruct-2407"
        else:
            model = "llama-3.1-8b-instruct"
        return backend, model
    except ImportError:
        print("No vllm installation found.")

    try:
        import tensorrt_llm as _

        backend = "tensorrtllm"
        model = "tensorrt_llm_bls"
        return backend, model
    except ImportError:
        print("No tensorrt_llm installation found.")

    raise Exception("Unknown test environment")


def infer_test_model_repository(backend, tool_call_parser):
    if tool_call_parser == "mistral":
        model_repository = str(Path(__file__).parent / f"{backend}_mistral_models")
    else:
        model_repository = str(Path(__file__).parent / f"{backend}_models")
    return model_repository


### FIXTURES - Refactored from global variables ###


@pytest.fixture(scope="session")
def tool_call_parser():
    return os.environ.get("TEST_TOOL_CALL_PARSER", "llama3")


@pytest.fixture(scope="session")
def backend(tool_call_parser):
    env_backend = os.environ.get("TEST_BACKEND")
    env_model = os.environ.get("TEST_MODEL")

    if not env_backend or not env_model:
        inferred_backend, _ = infer_test_environment(tool_call_parser)
        return inferred_backend
    return env_backend


@pytest.fixture(scope="session")
def model(tool_call_parser):
    env_model = os.environ.get("TEST_MODEL")

    if not env_model:
        _, inferred_model = infer_test_environment(tool_call_parser)
        return inferred_model
    return env_model


@pytest.fixture(scope="session")
def model_repository(backend, tool_call_parser):
    env_repo = os.environ.get("TEST_MODEL_REPOSITORY")

    if env_repo:
        return env_repo
    return infer_test_model_repository(backend, tool_call_parser)


@pytest.fixture(scope="session")
def tokenizer_model():
    return os.environ.get("TEST_TOKENIZER", "meta-llama/Meta-Llama-3.1-8B-Instruct")


@pytest.fixture(scope="session")
def prompt():
    return "What is machine learning?"


@pytest.fixture(scope="session")
def messages(prompt):
    return [{"role": "user", "content": prompt}]


@pytest.fixture(scope="session")
def input(prompt):
    return prompt


# NOTE: OpenAI client requires actual server running, and won't work
# with the FastAPI TestClient. Run the server at module scope to run
# only once for all the tests below.
@pytest.fixture(scope="module")
def server(
    model_repository: str, tokenizer_model: str, backend: str, tool_call_parser: str
):
    args = [
        "--model-repository",
        model_repository,
        "--tokenizer",
        tokenizer_model,
        "--backend",
        backend,
        "--tool-call-parser",
        tool_call_parser,
    ]
    # TODO: Incorporate kserve frontend binding smoke tests to catch any
    # breakage with default values or slight cli arg variations
    extra_args = ["--enable-kserve-frontends"]
    args += extra_args

    with OpenAIServer(args) as openai_server:
        yield openai_server


# NOTE: The FastAPI TestClient acts like a server and triggers the FastAPI app
# lifespan startup/shutdown, but does not actually expose the network port to interact
# with arbitrary clients - you must use the TestClient returned to interact with
# the "server" when "starting the server" via TestClient.
@pytest.fixture(scope="class")
def fastapi_client_class_scope(
    model_repository: str, tokenizer_model: str, backend: str
):
    server = setup_server(model_repository=model_repository)
    app = setup_fastapi_app(tokenizer=tokenizer_model, server=server, backend=backend)
    with TestClient(app) as test_client:
        yield test_client

    server.stop()


# FIXME: In TRTLLM tests, the in-process Triton server for the FastAPI app
# does not automatically release GPU memory, even after calling stop().
# The memory is only released when the entire pytest process exits.
#
# As a result, when the OpenAI server starts another Triton server as a subprocess,
# there may not be enough GPU memory available to launch a new model instance.
#
# This is a workaround to ensure that tests using the OpenAI server run first.
# Once the OpenAI server subprocess is terminated, tests using the FastAPI app can safely run.
def pytest_collection_modifyitems(session, config, items):
    def get_priority(item):
        cls = item.cls
        if cls:
            if getattr(cls, "pytestmark", None):
                for mark in cls.pytestmark:
                    if mark.name == "openai":
                        return 0
                    elif mark.name == "fastapi":
                        return 1
        return 2  # unmarked tests last

    items.sort(key=get_priority)


================================================
FILE: python/openai/tests/test_chat_completions.py
================================================
# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import copy
import subprocess
from pathlib import Path
from typing import List

import pytest
import tritonserver
from fastapi.testclient import TestClient
from tests.utils import setup_fastapi_app, setup_server


class TestChatCompletions:
    @pytest.fixture(scope="class")
    def client(self, fastapi_client_class_scope):
        yield fastapi_client_class_scope

    def test_chat_completions_defaults(self, client, model: str, messages: List[dict]):
        response = client.post(
            "/v1/chat/completions",
            json={"model": model, "messages": messages},
        )

        assert response.status_code == 200
        message = response.json()["choices"][0]["message"]
        assert message["content"].strip()
        assert message["role"] == "assistant"

        usage = response.json().get("usage")
        assert usage is not None

    def test_chat_completions_system_prompt(self, client, model: str):
        # NOTE: Currently just sanity check that there are no issues when a
        # system role is provided. There is no test logic to measure the quality
        # of the response yet.
        messages = [
            {"role": "system", "content": "You are a Triton Inference Server expert."},
            {"role": "user", "content": "What is machine learning?"},
        ]

        response = client.post(
            "/v1/chat/completions", json={"model": model, "messages": messages}
        )

        assert response.status_code == 200
        message = response.json()["choices"][0]["message"]
        assert message["content"].strip()
        assert message["role"] == "assistant"

    def test_chat_completions_system_prompt_only(self, client, model: str):
        # No user prompt provided
        messages = [
            {"role": "system", "content": "You are a Triton Inference Server expert."}
        ]

        response = client.post(
            "/v1/chat/completions", json={"model": model, "messages": messages}
        )

        assert response.status_code == 200
        message = response.json()["choices"][0]["message"]
        assert message["content"].strip()
        assert message["role"] == "assistant"

    def test_chat_completions_user_prompt_str(self, client, model: str):
        # No system prompt provided
        messages = [{"role": "user", "content": "What is machine learning?"}]

        response = client.post(
            "/v1/chat/completions", json={"model": model, "messages": messages}
        )

        assert response.status_code == 200
        message = response.json()["choices"][0]["message"]
        assert message["content"].strip()
        assert message["role"] == "assistant"

    def test_chat_completions_user_prompt_dict(self, client, model: str):
        # No system prompt provided
        messages = [
            {
                "role": "user",
                "content": [{"type": "text", "text": "What is machine learning?"}],
            }
        ]

        response = client.post(
            "/v1/chat/completions", json={"model": model, "messages": messages}
        )

        assert response.status_code == 200
        message = response.json()["choices"][0]["message"]
        assert message["content"].strip()
        assert message["role"] == "assistant"

    @pytest.mark.parametrize(
        "param_key, param_value",
        [
            ("temperature", 0.7),
            ("max_tokens", 10),
            ("max_completion_tokens", 10),
            ("top_p", 0.9),
            ("frequency_penalty", 0.5),
            ("presence_penalty", 0.2),
            ("n", 1),
            # Single stop word as a string
            ("stop", "."),
            # List of stop words
            ("stop", []),
            ("stop", [".", ","]),
            # logprobs is a boolean for chat completions
            ("logprobs", True),
            ("logit_bias", {"0": 0}),
            # NOTE: Extensions to the spec
            ("min_tokens", 16),
            ("ignore_eos", True),
        ],
    )
    def test_chat_completions_sampling_parameters(
        self, client, param_key, param_value, model: str, messages: List[dict]
    ):
        response = client.post(
            "/v1/chat/completions",
            json={
                "model": model,
                "messages": messages,
                param_key: param_value,
            },
        )

        # FIXME: Add support and remove this check
        unsupported_parameters = ["logit_bias"]
        if param_key in unsupported_parameters:
            assert response.status_code == 400
            assert response.json()["detail"] == "logit bias is not currently supported"
            return

        # TRT-LLM backend doesn't support logprobs
        if (
            param_key == "logprobs"
            and param_value is True
            and model == "tensorrt_llm_bls"
        ):
            assert response.status_code == 400
            assert (
                "logprobs are currently available only for the vLLM backend"
                in response.json()["detail"]
            )
            return

        assert response.status_code == 200
        assert response.json()["choices"][0]["message"]["content"]
        assert response.json()["choices"][0]["message"]["role"] == "assistant"

    @pytest.mark.parametrize(
        "param_key, param_value",
        [
            ("temperature", 2.1),
            ("temperature", -0.1),
            ("max_tokens", -1),
            ("max_completion_tokens", -1),
            ("top_p", 1.1),
            ("frequency_penalty", 3),
            ("frequency_penalty", -3),
            ("presence_penalty", 2.1),
            ("presence_penalty", -2.1),
            # NOTE: Extensions to the spec
            ("min_tokens", -1),
            ("ignore_eos", 123),
        ],
    )
    def test_chat_completions_invalid_sampling_parameters(
        self, client, param_key, param_value, model: str, messages: List[dict]
    ):
        response = client.post(
            "/v1/chat/completions",
            json={
                "model": model,
                "messages": messages,
                param_key: param_value,
            },
        )
        print("Response:", response.json())

        # Assert schema validation error
        assert response.status_code == 422

    # Simple tests to verify max_tokens roughly behaves as expected
    @pytest.mark.parametrize(
        "max_tokens_key",
        [
            "max_tokens",
            "max_completion_tokens",
        ],
    )
    def test_chat_completions_max_tokens(
        self, client, max_tokens_key, model: str, messages: List[dict]
    ):
        responses = []
        payload = {"model": model, "messages": messages}

        # Send two requests with max_tokens/max_completion_tokens = 1 to check their similarity
        payload[max_tokens_key] = 1
        responses.append(
            client.post(
                "/v1/chat/completions",
                json=payload,
            )
        )
        responses.append(
            client.post(
                "/v1/chat/completions",
                json=payload,
            )
        )
        # Send one requests with larger max_tokens/max_completion_tokens to check its dis-similarity
        payload[max_tokens_key] = 100
        responses.append(
            client.post(
                "/v1/chat/completions",
                json=payload,
            )
        )

        for response in responses:
            print("Response:", response.json())
            assert response.status_code == 200

        response1_text = (
            responses[0].json()["choices"][0]["message"]["content"].strip().split()
        )
        response2_text = (
            responses[1].json()["choices"][0]["message"]["content"].strip().split()
        )
        response3_text = (
            responses[2].json()["choices"][0]["message"]["content"].strip().split()
        )
        # Simplification: One token shouldn't be more than one space-delimited word
        assert len(response1_text) == len(response2_text) == 1
        assert len(response3_text) > len(response1_text)

    def test_chat_completions_max_completion_tokens_precedence(
        self, client, model: str, messages: List[dict]
    ):
        payload = {
            "model": model,
            "messages": messages,
            "max_tokens": 50,  # Higher value for max_tokens
            "max_completion_tokens": 1,  # Lower, expected to take precedence
        }

        response = client.post(
            "/v1/chat/completions",
            json=payload,
        )

        print("Response:", response.json())
        assert response.status_code == 200

        response_text_words = (
            response.json()["choices"][0]["message"]["content"].strip().split()
        )
        # Check if the number of words is around max_completion_tokens
        assert len(response_text_words) == 1

    @pytest.mark.parametrize(
        "temperature",
        [0.0, 1.0],
    )
    # Simple tests to verify temperature roughly behaves as expected
    def test_chat_completions_temperature_vllm(
        self, client, temperature, backend: str, model: str, messages: List[dict]
    ):
        if backend != "vllm":
            pytest.skip(reason="Only used to test vLLM-specific temperature behavior")

        responses = []
        payload = {
            "model": model,
            "messages": messages,
            "max_completion_tokens": 256,
            "temperature": temperature,
        }

        responses.append(
            client.post(
                "/v1/chat/completions",
                json=payload,
            )
        )
        responses.append(
            client.post(
                "/v1/chat/completions",
                json=payload,
            )
        )

        for response in responses:
            print("Response:", response.json())
            assert response.status_code == 200

        response1_text = (
            responses[0].json()["choices"][0]["message"]["content"].strip().split()
        )
        response2_text = (
            responses[1].json()["choices"][0]["message"]["content"].strip().split()
        )

        # Temperature of 0.0 indicates greedy sampling, so check
        # that two equivalent requests produce the same response.
        if temperature == 0.0:
            # NOTE: This check may be ambitious to get an exact match in all
            # cases depending on how other parameter defaults are set, so
            # it can probably be removed if it introduces flakiness.
            assert response1_text == response2_text
        # Temperature of 1.0 indicates maximum randomness, so check
        # that two equivalent requests produce different responses.
        elif temperature == 1.0:
            assert response1_text != response2_text
        # Don't bother checking values other than the extremes
        else:
            raise ValueError(f"Unexpected {temperature=} for this test.")

    # Remove xfail when fix is released and this test returns xpass status
    @pytest.mark.xfail(
        reason="TRT-LLM BLS model will ignore temperature until a later release"
    )
    # Simple tests to verify temperature roughly behaves as expected
    def test_chat_completions_temperature_tensorrtllm(
        self, client, backend: str, model: str, messages: List[dict]
    ):
        if backend != "tensorrtllm":
            pytest.skip(
                reason="Only used to test TRT-LLM-specific temperature behavior"
            )

        responses = []
        payload1 = {
            "model": model,
            "messages": messages,
            # Increase token length to allow more room for variability
            "max_completion_tokens": 200,
            "temperature": 0.0,
            # TRT-LLM requires certain settings of `top_k` / `top_p` to
            # respect changes in `temperature`
            "top_p": 0.5,
        }

        payload2 = copy.deepcopy(payload1)
        payload2["temperature"] = 1.0

        # First 2 responses should be the same in TRT-LLM with identical payload
        responses.append(
            client.post(
                "/v1/chat/completions",
                json=payload1,
            )
        )
        responses.append(
            client.post(
                "/v1/chat/completions",
                json=payload1,
            )
        )
        # Third response should differ with different temperature in payload
        responses.append(
            client.post(
                "/v1/chat/completions",
                json=payload2,
            )
        )

        for response in responses:
            print("Response:", response.json())
            assert response.status_code == 200

        response1_text = (
            responses[0].json()["choices"][0]["message"]["content"].strip().split()
        )
        response2_text = (
            responses[1].json()["choices"][0]["message"]["content"].strip().split()
        )
        response3_text = (
            responses[2].json()["choices"][0]["message"]["content"].strip().split()
        )

        assert response1_text == response2_text
        assert response1_text != response3_text

    # Simple tests to verify random seed roughly behaves as expected
    def test_chat_completions_seed(self, client, model: str, messages: List[dict]):
        responses = []
        payload1 = {
            "model": model,
            "messages": messages,
            # Increase token length to allow more room for variability
            "max_completion_tokens": 200,
            "seed": 1,
        }
        payload2 = copy.deepcopy(payload1)
        payload2["seed"] = 2

        # First 2 responses should be the same in both vLLM and TRT-LLM with identical seed
        responses.append(
            client.post(
                "/v1/chat/completions",
                json=payload1,
            )
        )
        responses.append(
            client.post(
                "/v1/chat/completions",
                json=payload1,
            )
        )
        # Third response should differ with different seed in payload
        responses.append(
            client.post(
                "/v1/chat/completions",
                json=payload2,
            )
        )

        for response in responses:
            print("Response:", response.json())
            assert response.status_code == 200

        response1_text = (
            responses[0].json()["choices"][0]["message"]["content"].strip().split()
        )
        response2_text = (
            responses[1].json()["choices"][0]["message"]["content"].strip().split()
        )
        response3_text = (
            responses[2].json()["choices"][0]["message"]["content"].strip().split()
        )

        assert response1_text == response2_text
        assert response1_text != response3_text

    def test_chat_completions_no_message(
        self, client, model: str, messages: List[dict]
    ):
        # Message validation requires min_length of 1
        messages = []
        response = client.post(
            "/v1/chat/completions", json={"model": model, "messages": messages}
        )
        assert response.status_code == 422
        assert (
            response.json()["detail"][0]["msg"]
            == "List should have at least 1 item after validation, not 0"
        )

    def test_chat_completions_empty_message(
        self, client, model: str, messages: List[dict]
    ):
        # Message validation requires min_length of 1
        messages = [{}]
        response = client.post(
            "/v1/chat/completions", json={"model": model, "messages": messages}
        )
        assert response.status_code == 422
        assert response.json()["detail"][0]["msg"] == "Field required"

    def test_chat_completions_multiple_choices(
        self, client, model: str, messages: List[dict]
    ):
        response = client.post(
            "/v1/chat/completions",
            json={"model": model, "messages": messages, "n": 2},
        )

        assert response.status_code == 400
        assert "only single choice" in response.json()["detail"]

    @pytest.mark.skip(reason="Not Implemented Yet")
    def test_chat_completions_streaming(self, client):
        pass

    def test_chat_completions_no_streaming(
        self, client, model: str, messages: List[dict]
    ):
        response = client.post(
            "/v1/chat/completions",
            json={"model": model, "messages": messages, "stream": False},
        )

        assert response.status_code == 200
        message = response.json()["choices"][0]["message"]
        assert message["content"].strip()
        assert message["role"] == "assistant"

    @pytest.mark.skip(reason="Not Implemented Yet")
    def test_function_calling(self):
        pass

    @pytest.mark.skip(reason="Not Implemented Yet")
    def test_lora(self):
        pass

    @pytest.mark.skip(reason="Not Implemented Yet")
    def test_multi_lora(self):
        pass

    @pytest.mark.skip(reason="Not Implemented Yet")
    def test_request_n_choices(self):
        pass

    @pytest.mark.skip(reason="Not Implemented Yet")
    def test_request_logit_bias(self):
        pass

    def test_usage_response(self, client, model: str, messages: List[dict]):
        response = client.post(
            "/v1/chat/completions",
            json={"model": model, "messages": messages},
        )

        assert response.status_code == 200
        usage = response.json().get("usage")
        assert usage is not None
        assert isinstance(usage["prompt_tokens"], int)
        assert isinstance(usage["completion_tokens"], int)
        assert isinstance(usage["total_tokens"], int)
        assert usage["prompt_tokens"] > 0
        assert usage["completion_tokens"] > 0
        assert (
            usage["total_tokens"] == usage["prompt_tokens"] + usage["completion_tokens"]
        )

    def test_chat_completions_logprobs(
        self, client, backend: str, model: str, messages: List[dict]
    ):
        """Test logprobs parameter for chat completions."""
        response = client.post(
            "/v1/chat/completions",
            json={
                "model": model,
                "messages": messages,
                "logprobs": True,
                "top_logprobs": 2,
                "max_tokens": 10,
            },
        )

        # Non-vLLM backends should raise an error
        if backend != "vllm":
            assert response.status_code == 400
            assert (
                "logprobs are currently available only for the vLLM backend"
                in response.json()["detail"]
            )
            return

        assert response.status_code == 200
        response_json = response.json()

        # Check that logprobs are present in the response
        choice = response_json["choices"][0]
        assert "logprobs" in choice
        logprobs = choice["logprobs"]

        assert logprobs is not None
        assert "content" in logprobs
        content = logprobs["content"]
        assert isinstance(content, list)
        assert len(content) > 0

        # Validate structure of each token logprob
        for token_logprob in content:
            assert "token" in token_logprob
            assert "logprob" in token_logprob
            assert "bytes" in token_logprob
            assert "top_logprobs" in token_logprob

            assert isinstance(token_logprob["token"], str)
            assert isinstance(token_logprob["logprob"], (int, float))
            assert isinstance(token_logprob["bytes"], list)
            assert isinstance(token_logprob["top_logprobs"], list)

            # Validate top_logprobs structure
            for top_logprob in token_logprob["top_logprobs"]:
                assert "token" in top_logprob
                assert "logprob" in top_logprob
                assert "bytes" in top_logprob

    def test_chat_completions_logprobs_false(
        self, client, model: str, messages: List[dict]
    ):
        """Test that logprobs=False returns no logprobs."""
        response = client.post(
            "/v1/chat/completions",
            json={
                "model": model,
                "messages": messages,
                "logprobs": False,
                "max_tokens": 10,
            },
        )

        assert response.status_code == 200
        response_json = response.json()

        # logprobs should be None when logprobs=False
        choice = response_json["choices"][0]
        assert choice.get("logprobs") is None

    @pytest.mark.parametrize("top_logprobs_value", [0, 5])
    def test_chat_completions_top_logprobs_without_logprobs(
        self,
        client,
        model: str,
        messages: List[dict],
        top_logprobs_value: int,
        backend: str,
    ):
        """Test that top_logprobs without logprobs raises validation error."""
        if backend != "vllm":
            pytest.skip(
                reason="logprobs are currently available only for the vLLM backend"
            )

        response = client.post(
            "/v1/chat/completions",
            json={
                "model": model,
                "messages": messages,
                "top_logprobs": top_logprobs_value,
                "max_tokens": 10,
            },
        )

        # Should raise validation error for any value when logprobs is not True
        assert response.status_code == 400
        assert (
            "`top_logprobs` can only be used when `logprobs` is True"
            in response.json()["detail"]
        )

    def test_chat_completions_top_logprobs_validation(
        self, client, model: str, messages: List[dict]
    ):
        """Test that top_logprobs > 20 is rejected by schema validation."""
        response = client.post(
            "/v1/chat/completions",
            json={
                "model": model,
                "messages": messages,
                "logprobs": True,
                "top_logprobs": 25,  # Exceeds maximum of 20
                "max_tokens": 5,
            },
        )

        # Should raise schema validation error
        assert response.status_code == 422
        assert "Input should be less than or equal to 20" in str(
            response.json()["detail"]
        )


# For tests that won't use the same pytest fixture for server startup across
# the whole class test suite.
class TestChatCompletionsTokenizers:
    # Re-use a single Triton server for different frontend configurations
    @pytest.fixture(scope="class")
    def server(self, model_repository: str):
        server = setup_server(model_repository)
        yield server
        server.stop()

    # A tokenizer must be known for /chat/completions endpoint in order to
    # apply chat templates, and for simplicity in determination, users should
    # define the tokenizer. So, explicitly raise an error if none is provided.
    def test_chat_completions_no_tokenizer(
        self,
        server: tritonserver.Server,
        backend: str,
        model: str,
        messages: List[dict],
    ):
        app = setup_fastapi_app(tokenizer="", server=server, backend=backend)
        with TestClient(app) as client:
            response = client.post(
                "/v1/chat/completions",
                json={"model": model, "messages": messages},
            )

        assert response.status_code == 500
        assert response.json()["detail"] == "Unknown tokenizer"

    def test_chat_completions_custom_tokenizer(
        self,
        server: tritonserver.Server,
        backend: str,
        tokenizer_model: str,
        model: str,
        messages: List[dict],
    ):
        # Tokenizers can be provided by a local file path to a directory containing
        # the relevant files such as tokenizer.json and tokenizer_config.json.
        custom_tokenizer_path = str(Path(__file__).parent / "custom_tokenizer")
        download_cmd = f"hf download --local-dir {custom_tokenizer_path} {tokenizer_model} --include *.json"
        print(f"Running download command: {download_cmd}")
        subprocess.run(download_cmd.split(), check=True)

        # Compare the downloaded tokenizer response against remote HF equivalent
        # to assert equivalent functionality in responses and chat template.
        app_local = setup_fastapi_app(
            tokenizer=custom_tokenizer_path, server=server, backend=backend
        )
        app_hf = setup_fastapi_app(
            tokenizer=tokenizer_model, server=server, backend=backend
        )

        responses = []
        with TestClient(app_local) as client_local, TestClient(app_hf) as client_hf:
            payload = {
                "model": model,
                "messages": messages,
                "temperature": 0,
                "seed": 0,
            }
            responses.append(client_local.post("/v1/chat/completions", json=payload))
            responses.append(client_hf.post("/v1/chat/completions", json=payload))

        for response in responses:
            assert response.status_code == 200
            message = response.json()["choices"][0]["message"]
            assert message["content"].strip()
            assert message["role"] == "assistant"

        def equal_dicts(d1, d2, ignore_keys):
            d1_filtered = {k: v for k, v in d1.items() if k not in ignore_keys}
            d2_filtered = {k: v for k, v in d2.items() if k not in ignore_keys}
            return d1_filtered == d2_filtered

        ignore_keys = ["id", "created"]
        assert equal_dicts(
            responses[0].json(), responses[1].json(), ignore_keys=ignore_keys
        )

    def test_chat_completions_invalid_chat_tokenizer(
        self,
        server: tritonserver.Server,
        backend: str,
        model: str,
        messages: List[dict],
    ):
        # NOTE: Use of apply_chat_template on a tokenizer that doesn't support it
        # is a warning prior to transformers 4.44, and an error afterwards.
        # NOTE: Can remove after both TRT-LLM and VLLM containers have this version.
        import transformers

        print(f"{transformers.__version__=}")
        if transformers.__version__ < "4.44.0":
            pytest.xfail()

        # Pick a tokenizer with no chat template defined
        invalid_chat_tokenizer = "gpt2"
        try:
            app = setup_fastapi_app(
                tokenizer=invalid_chat_tokenizer, server=server, backend=backend
            )
        except OSError as e:
            expected_msg = f"We couldn't connect to 'https://huggingface.co' to load this file, couldn't find it in the cached files and it looks like {invalid_chat_tokenizer} is not the path to a directory containing a file named config.json."
            if expected_msg in str(e):
                pytest.skip("HuggingFace network issues")
            raise e
        with TestClient(app) as client:
            response = client.post(
                "/v1/chat/completions",
                json={"model": model, "messages": messages},
            )

        assert response.status_code == 500
        # Error may vary based on transformers version
        expected_errors = [
            "cannot use apply_chat_template()",
            "cannot use chat template",
        ]
        assert any(
            error in response.json()["detail"].lower() for error in expected_errors
        )


================================================
FILE: python/openai/tests/test_completions.py
================================================
# Copyright 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import copy

import pytest


class TestCompletions:
    @pytest.fixture(scope="class")
    def client(self, fastapi_client_class_scope):
        yield fastapi_client_class_scope

    def test_completions_defaults(self, client, model: str, prompt: str):
        response = client.post(
            "/v1/completions",
            json={"model": model, "prompt": prompt},
        )

        print("Response:", response.json())
        assert response.status_code == 200
        # NOTE: Could be improved to look for certain quality of response,
        #       or tested with dummy identity model.
        assert response.json()["choices"][0]["text"].strip()

        usage = response.json().get("usage")
        assert usage is not None

    @pytest.mark.parametrize(
        "sampling_parameter, value",
        [
            ("temperature", 0.7),
            ("max_tokens", 10),
            ("top_p", 0.9),
            ("frequency_penalty", 0.5),
            ("presence_penalty", 0.2),
            ("n", 1),
            # logprobs is an integer for completions
            ("logprobs", 5),
            ("logit_bias", {"0": 0}),
            # NOTE: Extensions to the spec
            ("min_tokens", 16),
            ("ignore_eos", True),
        ],
    )
    def test_completions_sampling_parameters(
        self, client, sampling_parameter, value, model: str, prompt: str
    ):
        response = client.post(
            "/v1/completions",
            json={
                "model": model,
                "prompt": prompt,
                sampling_parameter: value,
            },
        )
        print("Response:", response.json())

        # FIXME: Add support and remove this check
        unsupported_parameters = ["logit_bias"]
        if sampling_parameter in unsupported_parameters:
            assert response.status_code == 400
            assert response.json()["detail"] == "logit bias is not supported"
            return

        # TRT-LLM backend doesn't support logprobs
        if (
            sampling_parameter == "logprobs"
            and value is not None
            and model == "tensorrt_llm_bls"
        ):
            assert response.status_code == 400
            assert (
                "logprobs are currently available only for the vLLM backend"
                in response.json()["detail"]
            )
            return

        assert response.status_code == 200
        assert response.json()["choices"][0]["text"].strip()

    # Simple tests to verify max_tokens roughly behaves as expected
    def test_completions_max_tokens(self, client, model: str, prompt: str):
        responses = []
        payload = {"model": model, "prompt": prompt, "max_tokens": 1}

        # Send two requests with max_tokens = 1 to check their similarity
        payload["max_tokens"] = 1
        responses.append(
            client.post(
                "/v1/completions",
                json=payload,
            )
        )
        responses.append(
            client.post(
                "/v1/completions",
                json=payload,
            )
        )
        # Send one requests with larger max_tokens to check its dis-similarity
        payload["max_tokens"] = 100
        responses.append(
            client.post(
                "/v1/completions",
                json=payload,
            )
        )

        for response in responses:
            print("Response:", response.json())
            assert response.status_code == 200

        response1_text = responses[0].json()["choices"][0]["text"].strip().split()
        response2_text = responses[1].json()["choices"][0]["text"].strip().split()
        response3_text = responses[2].json()["choices"][0]["text"].strip().split()
        # Simplification: One token shouldn't be more than one space-delimited word
        assert len(response1_text) == len(response2_text) == 1
        assert len(response3_text) > len(response1_text)

    @pytest.mark.parametrize(
        "temperature",
        [0.0, 1.0],
    )
    # Simple tests to verify temperature roughly behaves as expected
    def test_completions_temperature_vllm(
        self, client, temperature, backend: str, model: str, prompt: str
    ):
        if backend != "vllm":
            pytest.skip(reason="Only used to test vLLM-specific temperature behavior")

        responses = []
        payload = {
            "model": model,
            "prompt": prompt,
            "temperature": temperature,
        }

        responses.append(
            client.post(
                "/v1/completions",
                json=payload,
            )
        )
        responses.append(
            client.post(
                "/v1/completions",
                json=payload,
            )
        )

        for response in responses:
            print("Response:", response.json())
            assert response.status_code == 200

        response1_text = responses[0].json()["choices"][0]["text"].strip().split()
        response2_text = responses[1].json()["choices"][0]["text"].strip().split()

        # Temperature of 0.0 indicates greedy sampling, so check
        # that two equivalent requests produce the same response.
        if temperature == 0.0:
            # NOTE: This check may be ambitious to get an exact match in all
            # frameworks depending on how other parameter defaults are set, so
            # it can probably be removed if it introduces flakiness.
            print(f"Comparing '{response1_text}' == '{response2_text}'")
            assert response1_text == response2_text
        # Temperature of 1.0 indicates maximum randomness, so check
        # that two equivalent requests produce different responses.
        elif temperature == 1.0:
            print(f"Comparing '{response1_text}' != '{response2_text}'")
            assert response1_text != response2_text
        # Don't bother checking values other than the extremes
        else:
            raise ValueError(f"Unexpected {temperature=} for this test.")

    # Remove xfail when fix is released and this test returns xpass status
    @pytest.mark.xfail(
        reason="TRT-LLM BLS model will ignore temperature until a later release"
    )
    # Simple tests to verify temperature roughly behaves as expected
    def test_completions_temperature_tensorrtllm(
        self, client, backend: str, model: str, prompt: str
    ):
        if backend != "tensorrtllm":
            pytest.skip(reason="Only used to test vLLM-specific temperature behavior")

        responses = []
        payload1 = {
            "model": model,
            "prompt": prompt,
            "temperature": 0.0,
            # TRT-LLM requires certain settings of `top_k` / `top_p` to
            # respect changes in `temperature`
            "top_p": 0.5,
        }
        payload2 = copy.deepcopy(payload1)
        payload2["temperature"] = 1.0

        # First 2 responses should be the same in TRT-LLM with identical payload
        responses.append(
            client.post(
                "/v1/completions",
                json=payload1,
            )
        )
        responses.append(
            client.post(
                "/v1/completions",
                json=payload1,
            )
        )
        # Third response should differ with different temperature in payload
        responses.append(
            client.post(
                "/v1/completions",
                json=payload2,
            )
        )

        for response in responses:
            print("Response:", response.json())
            assert response.status_code == 200

        response1_text = responses[0].json()["choices"][0]["text"].strip().split()
        response2_text = responses[1].json()["choices"][0]["text"].strip().split()
        response3_text = responses[2].json()["choices"][0]["text"].strip().split()

        assert response1_text == response2_text
        assert response1_text != response3_text

    # Simple tests to verify seed roughly behaves as expected
    def test_completions_seed(self, client, model: str, prompt: str):
        responses = []
        payload1 = {"model": model, "prompt": prompt, "seed": 1}
        payload2 = copy.deepcopy(payload1)
        payload2["seed"] = 2

        # First 2 responses should be the same in TRT-LLM with identical payload
        responses.append(
            client.post(
                "/v1/completions",
                json=payload1,
            )
        )
        responses.append(
            client.post(
                "/v1/completions",
                json=payload1,
            )
        )
        # Third response should differ with different temperature in payload
        responses.append(
            client.post(
                "/v1/completions",
                json=payload2,
            )
        )

        for response in responses:
            print("Response:", response.json())
            assert response.status_code == 200

        response1_text = responses[0].json()["choices"][0]["text"].strip().split()
        response2_text = responses[1].json()["choices"][0]["text"].strip().split()
        response3_text = responses[2].json()["choices"][0]["text"].strip().split()

        assert response1_text == response2_text
        assert response1_text != response3_text

    @pytest.mark.parametrize(
        "sampling_parameter, value",
        [
            ("temperature", 2.1),
            ("temperature", -0.1),
            ("max_tokens", -1),
            ("top_p", 1.1),
            ("frequency_penalty", 3),
            ("frequency_penalty", -3),
            ("presence_penalty", 2.1),
            ("presence_penalty", -2.1),
            # NOTE: Extensions to the spec
            ("min_tokens", -1),
            ("ignore_eos", 123),
        ],
    )
    def test_completions_invalid_sampling_parameters(
        self, client, sampling_parameter, value, model: str, prompt: str
    ):
        response = client.post(
            "/v1/completions",
            json={
                "model": model,
                "prompt": prompt,
                sampling_parameter: value,
            },
        )

        print("Response:", response.json())
        assert response.status_code == 422

    def test_completions_empty_request(self, client):
        response = client.post("/v1/completions", json={})
        assert response.status_code == 422

    def test_completions_no_model(self, client, prompt: str):
        response = client.post("/v1/completions", json={"prompt": prompt})
        assert response.status_code == 422

    def test_completions_no_prompt(self, client, model: str):
        response = client.post("/v1/completions", json={"model": model})
        assert response.status_code == 422

    def test_completions_empty_prompt(self, client, model: str):
        response = client.post("/v1/completions", json={"model": model, "prompt": ""})

        # NOTE: Should this be validated in schema instead?
        # 400 Error returned in route handler
        assert response.status_code == 400

    def test_no_prompt(self, client, model: str):
        response = client.post("/v1/completions", json={"model": model})

        # 422 Error returned by schema validation
        assert response.status_code == 422

    @pytest.mark.parametrize(
        "sampling_parameter_dict",
        [
            # Each individual parameter should fail for > 1 for now
            {"n": 2},
            {"best_of": 2},
            {"n": 2, "best_of": 2},
            # When individual params > 1 are supported, best_of < n should fail
            {"n": 2, "best_of": 1},
        ],
    )
    def test_completions_multiple_choices(
        self,
        client,
        sampling_parameter_dict: dict,
        backend: str,
        model: str,
        prompt: str,
    ):
        response = client.post(
            "/v1/completions",
            json={"model": model, "prompt": prompt, **sampling_parameter_dict},
        )
        print("Response:", response.json())

        # FIXME: Add support and test for success
        # Expected to fail when n or best_of > 1, only single choice supported for now
        assert response.status_code == 400
        if backend == "vllm" and "best_of" in sampling_parameter_dict:
            error_message = "best_of is no longer supported in vLLM backend"
        else:
            error_message = "only single choice"
        assert error_message in response.json()["detail"]

    @pytest.mark.skip(reason="Not Implemented Yet")
    def test_lora(self):
        pass

    @pytest.mark.skip(reason="Not Implemented Yet")
    def test_multi_lora(self):
        pass

    @pytest.mark.parametrize("echo", [False, True])
    def test_echo(self, client, model: str, prompt: str, echo: bool):
        response = client.post(
            "/v1/completions", json={"model": model, "prompt": prompt, "echo": echo}
        )

        response_text = response.json()["choices"][0]["text"].strip()
        if echo:
            assert response_text.startswith(prompt)
        else:
            # TODO: Consider using a different prompt. In TRT-LLM model, the second response may contain the prompt in the middle of the response even if echo is False, e.g. " Briefly explained.\nWhat is machine learning? She learns from data\nmachine learning".
            assert prompt not in response_text

    def test_usage_response(self, client, model: str, prompt: str):
        response = client.post(
            "/v1/completions",
            json={"model": model, "prompt": prompt},
        )

        assert response.status_code == 200
        usage = response.json().get("usage")
        assert usage is not None
        assert isinstance(usage["prompt_tokens"], int)
        assert isinstance(usage["completion_tokens"], int)
        assert isinstance(usage["total_tokens"], int)
        assert usage["prompt_tokens"] > 0
        assert usage["completion_tokens"] > 0
        assert (
            usage["total_tokens"] == usage["prompt_tokens"] + usage["completion_tokens"]
        )

    def test_completions_logprobs(self, client, backend: str, model: str, prompt: str):
        """Test logprobs parameter for completions."""
        response = client.post(
            "/v1/completions",
            json={
                "model": model,
                "prompt": prompt,
                "logprobs": 3,
                "max_tokens": 10,
            },
        )

        # Non-vLLM backends should raise an error
        if backend != "vllm":
            assert response.status_code == 400
            assert (
                "logprobs are currently available only for the vLLM backend"
                in response.json()["detail"]
            )
            return

        assert response.status_code == 200
        response_json = response.json()

        # Check that logprobs are present in the response
        choice = response_json["choices"][0]
        assert "logprobs" in choice
        logprobs = choice["logprobs"]

        assert logprobs is not None
        assert "text_offset" in logprobs
        assert "token_logprobs" in logprobs
        assert "tokens" in logprobs
        assert "top_logprobs" in logprobs

        assert isinstance(logprobs["text_offset"], list)
        assert isinstance(logprobs["token_logprobs"], list)
        assert isinstance(logprobs["tokens"], list)
        assert isinstance(logprobs["top_logprobs"], list)

        # All lists should have the same length
        num_tokens = len(logprobs["tokens"])
        assert len(logprobs["text_offset"]) == num_tokens
        assert len(logprobs["token_logprobs"]) == num_tokens
        assert len(logprobs["top_logprobs"]) == num_tokens

        # Validate each token
        for i in range(num_tokens):
            assert isinstance(logprobs["tokens"][i], str)
            assert isinstance(logprobs["token_logprobs"][i], (int, float))
            assert isinstance(logprobs["text_offset"][i], int)
            assert isinstance(logprobs["top_logprobs"][i], dict)

            # Validate top_logprobs dict contains token -> logprob mappings
            for token, logprob in logprobs["top_logprobs"][i].items():
                assert isinstance(token, str)
                assert isinstance(logprob, (int, float))

    def test_completions_logprobs_zero(self, client, model: str, prompt: str):
        """Test that logprobs=0 returns no logprobs."""
        response = client.post(
            "/v1/completions",
            json={
                "model": model,
                "prompt": prompt,
                "logprobs": 0,
                "max_tokens": 10,
            },
        )

        assert response.status_code == 200
        response_json = response.json()

        # logprobs should be None when logprobs=0
        choice = response_json["choices"][0]
        assert choice.get("logprobs") is None

    def test_completions_logprobs_validation(self, client, model: str, prompt: str):
        """Test that logprobs > 5 is rejected by schema validation."""
        response = client.post(
            "/v1/completions",
            json={
                "model": model,
                "prompt": prompt,
                "logprobs": 7,  # Exceeds maximum of 5
                "max_tokens": 5,
            },
        )

        # Should raise schema validation error
        assert response.status_code == 422
        assert "Input should be less than or equal to 5" in str(
            response.json()["detail"]
        )


================================================
FILE: python/openai/tests/test_embeddings.py
================================================
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import base64
import os
from pathlib import Path

import numpy as np
import pytest

# Results on A6000 GPU. The results vary slightly across GPU models.
EMBEDDING_OUTPUT_FLOAT = [
    -0.1914404183626175,
    0.4000193178653717,
    0.058502197265625,
    0.18909454345703125,
    -0.4690297544002533,
    0.004936536308377981,
    0.45893096923828125,
    -0.31141534447669983,
    0.18299102783203125,
    -0.4907582700252533,
    0.6920369267463684,
    -0.001537322998046875,
    0.1219015121459961,
    -0.11682561784982681,
    0.02811431884765625,
    -0.5207672119140625,
    0.4574941098690033,
    -0.31097412109375,
    0.13371849060058594,
    -0.4693959653377533,
    -0.2766602337360382,
    0.029005050659179688,
    -0.13730454444885254,
    -0.18662643432617188,
    0.0063533782958984375,
    0.16905848681926727,
    0.1612701416015625,
    0.08376502990722656,
    -0.09822845458984375,
    -0.012738545425236225,
    -0.16643650829792023,
    -0.01901753805577755,
    -0.0503285713493824,
    0.03994830325245857,
    -0.3373819887638092,
    -0.0188166294246912,
    0.374481201171875,
    -0.4371846616268158,
    0.22470474243164062,
    -0.011973063461482525,
    0.13784568011760712,
    -0.1484222412109375,
    0.19347667694091797,
    0.11848513036966324,
    0.09258091449737549,
    -0.2887814939022064,
    -0.2301533967256546,
    -0.12088584899902344,
    0.1941477507352829,
    -0.05228869244456291,
    -0.2508443295955658,
    0.15698719024658203,
    -0.19024403393268585,
    -0.4728952944278717,
    0.336700439453125,
    0.11721674352884293,
    0.24498240649700165,
    -0.4826914370059967,
    -0.119984470307827,
    0.1008249893784523,
    -0.0983428955078125,
    -0.0059178671799600124,
    0.2341969758272171,
    0.2725118100643158,
    0.2384185791015625,
    -0.30748113989830017,
    -0.17387008666992188,
    -0.44342041015625,
    -0.15537135303020477,
    0.27348455786705017,
    0.1131540909409523,
    -0.23855717480182648,
    0.2574056088924408,
    -0.12090619653463364,
    -0.14412720501422882,
    -0.2408244013786316,
    0.0021938085556030273,
    -0.39945730566978455,
    -0.555908203125,
    0.0760548934340477,
    -0.1530914306640625,
    -0.40975189208984375,
    -0.2091045379638672,
    0.20317332446575165,
    -0.20295588672161102,
    -0.3643442690372467,
    0.05287488177418709,
    -0.24874623119831085,
    0.11500009149312973,
    0.1661122590303421,
    0.26618704199790955,
    -0.22980372607707977,
    -0.202911376953125,
    -0.2738393247127533,
    0.20629756152629852,
    -0.24571101367473602,
    -0.1486002653837204,
    -0.12444128841161728,
    0.27539315819740295,
    0.41679826378822327,
    -0.01199467945843935,
    0.1778361052274704,
    -0.15123574435710907,
    -0.0391184501349926,
    -0.035979270935058594,
    0.11838880926370621,
    -0.07832065969705582,
    0.15302227437496185,
    -0.11540285497903824,
    -0.008619308471679688,
    0.011735956184566021,
    0.41825103759765625,
    0.1798756867647171,
    0.0468953438103199,
    -0.31410470604896545,
    -0.28439536690711975,
    0.028476715087890625,
    -0.18972015380859375,
    -0.1492512971162796,
    0.23354721069335938,
    0.2631734311580658,
    0.3009694516658783,
    -0.31204381585121155,
    0.17155838012695312,
    -0.6126009821891785,
    -0.16471035778522491,
    0.7154337763786316,
    0.0,
    -0.3936564028263092,
    -0.15255196392536163,
    0.24118296802043915,
    -0.13930638134479523,
    0.6811599731445312,
    0.135009765625,
    -0.18750762939453125,
    0.26521047949790955,
    -0.1257190704345703,
    0.0532684326171875,
    0.25982680916786194,
    -0.3410797119140625,
    -0.189666748046875,
    0.016697248443961143,
    0.1474812775850296,
    0.085713230073452,
    -0.0862935408949852,
    0.521209716796875,
    0.3840688169002533,
    0.04953320696949959,
    -0.0478159599006176,
    -0.3888498842716217,
    0.3243462145328522,
    0.03093973733484745,
    -0.3594563901424408,
    0.16615693271160126,
    -0.07209650427103043,
    0.049218177795410156,
    0.14628247916698456,
    -0.10561561584472656,
    0.1696879118680954,
    0.1195220947265625,
    0.0140139264985919,
    0.08987680822610855,
    0.02198282815515995,
    -0.06835142523050308,
    -0.09100532531738281,
    -0.3970082700252533,
    -0.20552189648151398,
    -0.0871327742934227,
    -0.008806228637695312,
    0.10437265783548355,
    0.2754974365234375,
    0.2630208432674408,
    -0.67779541015625,
    0.32654380798339844,
    -0.4008077085018158,
    0.2785542905330658,
    0.16632080078125,
    -0.0709940567612648,
    -0.1678619384765625,
    -0.11333879083395004,
    0.5577189326286316,
    0.3165779113769531,
    -0.2243397980928421,
    0.08053144067525864,
    0.1904652863740921,
    0.22478973865509033,
    0.11852264404296875,
    -0.2071024626493454,
    0.2380015105009079,
    0.4622955322265625,
    0.1029459610581398,
    -0.30094656348228455,
    0.0351104736328125,
    -0.09827486425638199,
    0.0018183389911428094,
    0.07406362146139145,
    0.18090057373046875,
    0.2231648713350296,
    -0.1001536026597023,
    0.06609535217285156,
    0.0055376687087118626,
    -0.02939859963953495,
    -0.17679977416992188,
    0.2300567626953125,
    -0.232757568359375,
    -0.1863892823457718,
    0.14040501415729523,
    -0.21081669628620148,
    0.4772237241268158,
    0.00708770751953125,
    0.25393548607826233,
    -0.12926609814167023,
    -0.21408335864543915,
    0.43414306640625,
    -0.16021983325481415,
    -0.6590754389762878,
    0.383026123046875,
    0.4894002377986908,
    -0.5350291132926941,
    0.1563262939453125,
    0.4013887941837311,
    -0.1429697722196579,
    -0.1266673356294632,
    0.0,
    -0.12781651318073273,
    0.5082905888557434,
    -0.4895477294921875,
    0.05857785418629646,
    -0.01038360595703125,
    -0.4025942385196686,
    -0.6376139521598816,
    -0.27256616950035095,
    -0.2183430939912796,
    0.13019943237304688,
    -0.2378387451171875,
    -0.12579791247844696,
    0.23233287036418915,
    -0.1948690414428711,
    -0.10780048370361328,
    0.4768002927303314,
    0.2761942446231842,
    0.09968694299459457,
    -0.07807016372680664,
    0.18632762134075165,
    -0.014780680648982525,
    0.18301646411418915,
    0.10943603515625,
    0.45223236083984375,
    -0.24634425342082977,
    0.5127970576286316,
    0.15272267162799835,
    0.26901498436927795,
    -0.8670451045036316,
    -0.20471616089344025,
    0.3934173583984375,
    -0.22558848559856415,
    0.14676158130168915,
    -0.16282017529010773,
    0.0047810873948037624,
    0.49467912316322327,
    -0.1040293350815773,
    -0.13565094769001007,
    -0.05704273656010628,
    0.2030487060546875,
    0.27226924896240234,
    -0.16900062561035156,
    0.06879997253417969,
    0.44347524642944336,
    0.08619359880685806,
    -0.1269734650850296,
    -0.05267079547047615,
    -0.3465728759765625,
    0.1846415251493454,
    -0.0655873641371727,
    0.027518590912222862,
    -0.06689834594726562,
    -0.13316090404987335,
    -0.3649355471134186,
    -0.0573628731071949,
    0.030780792236328125,
    0.2462870329618454,
    -0.0250523891299963,
    0.08964482694864273,
    -0.34076571464538574,
    -0.3342704772949219,
    -0.000331878662109375,
    0.25020280480384827,
    0.34731578826904297,
    0.4081510007381439,
    0.0661773681640625,
    0.14612038433551788,
    -0.37111154198646545,
    -0.17901070415973663,
    0.0565798282623291,
    -0.1689503937959671,
    0.311676025390625,
    0.06296539306640625,
    0.11648496240377426,
    -0.16365115344524384,
    -0.011795361526310444,
    -0.4601001739501953,
    0.13840866088867188,
    0.1115519180893898,
    -0.3645426332950592,
    -0.182403564453125,
    -0.20782725512981415,
    -0.004481792449951172,
    0.0870104655623436,
    -0.11704126745462418,
    0.34148290753364563,
    0.17841561138629913,
    -0.2754109799861908,
    -0.0867462158203125,
    0.09910837560892105,
    -0.14540545642375946,
    -0.10996246337890625,
    -0.10946687310934067,
    0.023001352325081825,
    0.11987527459859848,
    -5.960464477539063e-8,
    0.3316993713378906,
    -0.025622526183724403,
    -0.28015899658203125,
    0.34741735458374023,
    0.04091135784983635,
    -0.34874120354652405,
    0.22758229076862335,
    -0.042999267578125,
    0.0382130928337574,
    0.5654922127723694,
    -0.9378255009651184,
    0.17114512622356415,
    0.13035202026367188,
    0.4369252622127533,
    0.0897369384765625,
    0.19928233325481415,
    0.33091607689857483,
    -0.10624822229146957,
    -0.2845611572265625,
    0.2822163999080658,
    0.1722426414489746,
    0.2111460417509079,
    -0.1069692000746727,
    -0.3496347963809967,
    0.15000660717487335,
    0.014147520065307617,
    -0.36633554100990295,
    0.23989041149616241,
    -0.06397350877523422,
    0.2501627504825592,
    0.04016287997364998,
    -0.3789469301700592,
    -0.4247843325138092,
    0.1515035629272461,
    0.36554718017578125,
    0.057392120361328125,
    -0.3492482602596283,
    -0.45532989501953125,
    0.4090474545955658,
    -0.3914286196231842,
    -0.4888407289981842,
    0.4746551513671875,
    -0.6188761591911316,
    -0.018857955932617188,
    0.02373504638671875,
    0.22691090404987335,
    -0.07608286291360855,
    0.5331514477729797,
    -0.27182260155677795,
    0.2309315949678421,
    -0.1824493408203125,
    0.12648265063762665,
    0.2586142122745514,
    -0.07648912817239761,
    0.2318166047334671,
    -0.5225245356559753,
    0.133880615234375,
    -0.010974247939884663,
    0.09001413732767105,
    0.2562611997127533,
    0.19260406494140625,
    0.4470011293888092,
    -0.1636505126953125,
    -0.3675130307674408,
]


@pytest.mark.skipif(
    os.environ.get("IMAGE_KIND") == "TRTLLM",
    reason="TRT-LLM backend does not support embedding requests",
)
class TestEmbeddings:
    @pytest.fixture(scope="class")
    def client(self, fastapi_client_class_scope):
        yield fastapi_client_class_scope

    @pytest.fixture(scope="class")
    def model(self):
        # Override with embeddings-specific model
        return "all-MiniLM-L6-v2"

    @pytest.fixture(scope="class")
    def tokenizer_model(self):
        return None

    @pytest.fixture(scope="class")
    def model_repository(self):
        # Override with embeddings-specific repository
        return str(Path(__file__).parent / "vllm_embedding_models")

    @pytest.fixture(scope="class")
    def input(self):
        return "The food was delicious and the waiter..."

    def _check_embedding_response(
        self, response, model, dims=len(EMBEDDING_OUTPUT_FLOAT), encoding_format="float"
    ):
        assert response.status_code == 200, response.json()
        embedding = response.json()["data"][0]["embedding"]
        assert embedding is not None
        if encoding_format == "base64":
            embedding = np.frombuffer(base64.b64decode(embedding), dtype=np.float32)

        # The results vary slightly across GPU models
        result = np.allclose(
            EMBEDDING_OUTPUT_FLOAT[:dims], embedding, rtol=0, atol=1e-3
        )
        assert (
            result
        ), f"Embeddings do not match expected output\nExpect {EMBEDDING_OUTPUT_FLOAT[:dims]},\ngot{embedding}"

        assert response.json()["data"][0]["object"] == "embedding"
        assert response.json()["data"][0]["index"] == 0
        assert response.json()["model"] == model

        usage = response.json().get("usage")
        assert usage is not None
        assert usage["prompt_tokens"] == 12
        assert usage["total_tokens"] == 12

    @pytest.mark.parametrize(
        "input",
        [
            "The food was delicious and the waiter...",
            [101, 1996, 2833, 2001, 12090, 1998, 1996, 15610, 1012, 1012, 1012, 102],
        ],
    )
    def test_embeddings_defaults(self, client, model: str, input: str):
        response = client.post(
            "/v1/embeddings",
            json={"model": model, "input": input},
        )

        self._check_embedding_response(response, model)

    # FIXME: Python model cannot unload gracefully if raise error.
    # def test_chat_completions_defaults(
    #     self, client, model: str, messages: List[dict], backend: str
    # ):
    #     response = client.post(
    #         "/v1/chat/completions",
    #         json={"model": model, "messages": messages},
    #     )

    #     assert response.status_code == 400
    #     assert "does not support" in response.json()["detail"]

    @pytest.mark.parametrize(
        "param_key, param_value",
        [
            ("dimensions", [10]),
            ("encoding_format", "invalid"),
            ("encoding_format", 0),
        ],
    )
    def test_embeddings_invalid_parameters(
        self, client, param_key, param_value, model: str, input: str
    ):
        response = client.post(
            "/v1/embeddings",
            json={
                "model": model,
                "input": input,
                param_key: param_value,
            },
        )

        # Assert schema validation error
        assert response.status_code == 422, response.json()

    @pytest.mark.parametrize("dimensions", [0, 10, 100, -1])
    @pytest.mark.parametrize("encoding_format", ["float", "base64"])
    def test_embeddings_parameters(
        self, client, dimensions, encoding_format, model: str, input: str
    ):
        response = client.post(
            "/v1/embeddings",
            json={
                "model": model,
                "input": input,
                "dimensions": dimensions,
                "encoding_format": encoding_format,
            },
        )

        self._check_embedding_response(
            response, model, dims=dimensions, encoding_format=encoding_format
        )

    def test_embeddings_empty_request(self, client):
        response = client.post("/v1/embeddings", json={})
        assert response.status_code == 422
        assert response.json()["detail"][0]["msg"] == "Field required"

    def test_embeddings_no_model(self, client, input: str):
        response = client.post("/v1/embeddings", json={"input": input})
        assert response.status_code == 422
        assert response.json()["detail"][0]["msg"] == "Field required"

    @pytest.mark.parametrize(
        "model, error_code",
        [
            ("", 400),
            (123, 422),
            ("Invalid", 400),
            (None, 422),
        ],
    )
    def test_embeddings_invalid_model(self, client, model: str, input, error_code: int):
        print("Model:", model)
        # Message validation requires min_length of 1
        response = client.post("/v1/embeddings", json={"model": model, "input": input})
        assert response.status_code == error_code
        if error_code == 400:
            assert response.json()["detail"] == f"Unknown model: {model}"
        else:
            assert (
                response.json()["detail"][0]["msg"] == "Input should be a valid string"
            )

    def test_embeddings_no_input(self, client, model: str):
        response = client.post("/v1/embeddings", json={"model": model})
        assert response.status_code == 422

    @pytest.mark.parametrize(
        "input",
        [
            "",
            [],
        ],
    )
    def test_embeddings_empty_input(self, client, model: str, input):
        # Message validation requires min_length of 1
        response = client.post("/v1/embeddings", json={"model": model, "input": input})
        assert response.status_code == 422
        assert (
            response.json()["detail"][0]["msg"]
            == "Value should have at least 1 item after validation, not 0"
        )

    @pytest.mark.parametrize(
        "input",
        [
            123,
            1.5,
            0,
            None,
        ],
    )
    def test_embeddings_invalid_input(self, client, model: str, input):
        # Message validation requires min_length of 1
        response = client.post("/v1/embeddings", json={"model": model, "input": input})
        assert response.status_code == 422
        assert response.json()["detail"][0]["msg"] == "Input should be a valid string"


================================================
FILE: python/openai/tests/test_lora.py
================================================
# Copyright 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json
import os
import shutil
import unittest

import pytest
from huggingface_hub import snapshot_download
from openai import BadRequestError, NotFoundError
from openai_frontend.engine.utils.triton import (
    _parse_lora_configs as parse_lora_configs,
)
from openai_frontend.engine.utils.triton import (
    _validate_lora_path_trtllm as validate_lora_path_trtllm,
)

from .utils import OpenAIServer


def is_vllm_installed():
    try:
        import vllm as _

        return True
    except ImportError:
        return False


@pytest.mark.parametrize(
    "model_repository,model_name,expect_error",
    [
        ("openai_model_repository", "", True),  # Empty string as model name.
        ("openai_model_repository", "     ", True),  # Whitespace-only model name.
        ("openai_model_repository", "invalid/path", True),
        ("openai_model_repository", "invalid\\path", True),
        ("openai_model_repository", "../outside/repo", True),
        ("openai_model_repository", "../test_models/identity_py", True),
        ("test_models", "../test_models/identity_py", True),
        ("test_models", "identity_py", False),
        ("test_models", "mock_llm", False),
    ],
)
def test_parse_lora_configs(model_repository: str, model_name: str, expect_error: bool):
    try:
        parse_lora_configs(model_repository, model_name, 1, "vllm")
        parse_lora_configs(model_repository, model_name, 1, "tensorrtllm")
    except ValueError as e:
        if expect_error:
            assert (
                f"Invalid model name: '{model_name}'. Model names must be valid file-system-path segment names."
                == str(e)
            )
        else:
            raise pytest.fail(
                f"(model_repository='{model_repository}', model_name='{model_name}') raised ValueError unexpectedly: {e}"
            )
    else:
        if expect_error:
            raise pytest.fail(
                f"(model_repository='{model_repository}', model_name='{model_name}') did not raise ValueError as expected."
            )


@pytest.mark.skipif(
    is_vllm_installed(),
    reason="VLLM backend does not validate LoRA paths",
)
@pytest.mark.parametrize(
    "lora_path,expect_error,error_message",
    [
        # Valid relative path inside repo (requires .npy files to exist at runtime).
        ("tensorrt_llm_bls/1/luotuo-lora-7b-0.1-weights", False, None),
        ("tensorrt_llm_bls/1/Japanese-Alpaca-LoRA-7b-v0-weights", False, None),
        # Absolute path not allowed.
        (
            os.path.join(
                os.path.abspath(os.curdir),
                "tests/tensorrtllm_models",
                "tensorrt_llm_bls/1/luotuo-lora-7b-0.1-weights",
            ),
            True,
            f"must be a relative path inside its model repository",
        ),
        ("/etc/passwd", True, "must be a relative path inside its model repository"),
        # Path outside repo (traversal).
        ("tensorrt_llm_bls/1//../1/luotuo-lora-7b-0.1-weights", False, None),
        ("../outside/lora", True, "must be inside its model repository"),
        ("subdir/../../etc/passwd", True, "must be inside its model repository"),
        # LoRA directory not found.
        ("tensorrt_llm_bls/10", True, "LoRA directory 'tensorrt_llm_bls/10' not found"),
        (
            "tensorrt_llm_bls/1/non_exist",
            True,
            "LoRA directory 'tensorrt_llm_bls/1/non_exist' not found",
        ),
        # LoRA file not found.
        ("tensorrt_llm_bls/1", True, "LoRA file 'model.lora_weights.npy' not found"),
    ],
)
def test_validate_lora_path_trtllm(
    lora_path: str,
    expect_error: bool,
    error_message: str,
):
    lora_name = ""
    repo_path = "tests/tensorrtllm_models"
    try:
        validate_lora_path_trtllm(repo_path, lora_path, lora_name)
    except Exception as e:
        if not expect_error:
            raise pytest.fail(
                f"repo_path='{repo_path}' raised exception unexpectedly: {e}"
            )
        assert error_message in str(e)
    else:
        if expect_error:
            raise pytest.fail(
                f"lora_path='{repo_path}' did not raise exception as expected."
            )


class LoRATest(unittest.TestCase):
    _backend = "vllm" if is_vllm_installed() else "tensorrtllm"
    _model_name = "gemma-2b" if _backend == "vllm" else "tensorrt_llm_bls"
    # TODO: Find a LoRA model that has its own tokenizer.
    _tokenizer = "meta-llama/Meta-Llama-3.1-8B-Instruct"
    _lora_separator = "_lora_"
    _prompt = "When was the wheel invented?"
    # more prompts that may yield different outputs:
    # - "Why can camels survive for long without water?"
    # - "What is LAPR?"
    # - "What is the difference between pets and cattle?"
    _temperature = 0
    _top_p = 1

    def setUp(self):
        self._completions_outputs = {}
        self._chat_completion_outputs = {}

    def _create_vllm_model_repository_with_lora(self):
        shutil.rmtree("models", ignore_errors=True)
        os.makedirs(f"models/{self._model_name}/1", exist_ok=True)
        with open(f"models/{self._model_name}/config.pbtxt", "w") as f:
            f.write('backend: "vllm"')
        with open(f"models/{self._model_name}/1/model.json", "w") as f:
            f.write(
                json.dumps(
                    {
                        "model": "unsloth/gemma-2b",
                        "enable_lora": True,
                        "max_lora_rank": 32,
                    }
                )
            )
        with open(f"models/{self._model_name}/1/multi_lora.json", "w") as f:
            f.write(
                json.dumps(
                    {
                        "doll": f"models/{self._model_name}/1/GemmaDoll",
                        "sheep": f"models/{self._model_name}/1/GemmaSheep",
                    }
                )
            )
        snapshot_download(
            repo_id="swathijn/GemmaDoll-2b-dolly-LORA-Tune",
            local_dir=f"models/{self._model_name}/1/GemmaDoll",
        )
        snapshot_download(
            repo_id="eduardo-alvarez/GemmaSheep-2B-LORA-TUNED",
            local_dir=f"models/{self._model_name}/1/GemmaSheep",
        )

    def _create_trtllm_model_repository_with_lora(self):
        shutil.rmtree("models", ignore_errors=True)
        shutil.copytree("tests/tensorrtllm_models", "models")
        with open(f"models/{self._model_name}/1/multi_lora.json", "w") as f:
            f.write(
                json.dumps(
                    {
                        "doll": f"models/{self._model_name}/1/luotuo-lora-7b-0.1-weights",
                        "sheep": f"models/{self._model_name}/1/Japanese-Alpaca-LoRA-7b-v0-weights",
                    }
                )
            )

    def _create_vllm_model_repository_without_lora(self):
        shutil.rmtree("models", ignore_errors=True)
        os.makedirs(f"models/{self._model_name}/1", exist_ok=True)
        with open(f"models/{self._model_name}/config.pbtxt", "w") as f:
            f.write('backend: "vllm"')
        with open(f"models/{self._model_name}/1/model.json", "w") as f:
            f.write(json.dumps({"model": "unsloth/gemma-2b"}))

    def _create_trtllm_model_repository_without_lora(self):
        shutil.rmtree("models", ignore_errors=True)
        shutil.copytree("tests/tensorrtllm_models", "models")

    def _create_model_repository_mock_llm(self):
        shutil.rmtree("models", ignore_errors=True)
        os.makedirs(f"models/{self._model_name}/1", exist_ok=True)
        with open(f"models/{self._model_name}/config.pbtxt", "w") as f:
            f.write(
                """
                backend: "python"
                max_batch_size: 0
                model_transaction_policy { decoupled: True }
                input [
                    {
                        name: "text_input"
                        data_type: TYPE_STRING
                        dims: [ 1 ]
                    },
                    {
                        name: "stream"
                        data_type: TYPE_BOOL
                        dims: [ 1 ]
                    },
                    {
                        name: "sampling_parameters"
                        data_type: TYPE_STRING
                        dims: [ 1 ]
                    },
                    {
                        name: "exclude_input_in_output"
                        data_type: TYPE_BOOL
                        dims: [ 1 ]
                    },
                    {
                        name: "return_num_input_tokens"
                        data_type: TYPE_BOOL
                        dims: [1]
                        optional: true
                    },
                    {
                        name: "return_num_output_tokens"
                        data_type: TYPE_BOOL
                        dims: [1]
                        optional: true
                    },
                    {
                        name: "return_logprobs"
                        data_type: TYPE_BOOL
                        dims: [1]
                        optional: true
                    }
                ]
                output [
                    {
                        name: "text_output"
                        data_type: TYPE_STRING
                        dims: [ -1 ]
                    }
                ]
            """
            )
        shutil.copy(
            "tests/test_models/mock_llm/1/model.py", f"models/{self._model_name}/1"
        )

    def _get_model_name(self, lora_name):
        model_name = self._model_name
        if lora_name != "":
            model_name += f"{self._lora_separator}{lora_name}"
        return model_name

    def _test_list_models(self, client, expected_lora_names):
        expected_model_names = []
        for lora_name in expected_lora_names:
            expected_model_names.append(self._get_model_name(lora_name))
        models = client.models.list()
        for model in models:
            if self._backend == "tensorrtllm" and not model.id.startswith(
                "tensorrt_llm_bls"
            ):
                continue
            self.assertIn(model.id, expected_model_names)
            expected_model_names.remove(model.id)
        self.assertEqual(
            len(expected_model_names),
            0,
            f"expected_model_names: {expected_model_names}",
        )

    def _test_retrieve_model(self, client, lora_name):
        model_name = self._get_model_name(lora_name)
        model = client.models.retrieve(model_name)
        self.assertEqual(model.id, model_name)

    def _test_completions(self, client, lora_name):
        model_name = self._get_model_name(lora_name)
        completion = client.completions.create(
            model=model_name,
            prompt=self._prompt,
            temperature=self._temperature,
            top_p=self._top_p,
        )
        self.assertEqual(completion.model, model_name)
        output = completion.choices[0].text
        for other_output in self._completions_outputs.values():
            self.assertNotEqual(
                output,
                other_output,
                msg=f"other completions outputs: {self._completions_outputs}",
            )
        self._completions_outputs[lora_name] = output

    def _test_chat_completion(self, client, lora_name):
        model_name = self._get_model_name(lora_name)
        messages = [{"role": "user", "content": self._prompt}]
        chat_completion = client.chat.completions.create(
            model=model_name,
            messages=messages,
            temperature=self._temperature,
            top_p=self._top_p,
        )
        self.assertEqual(chat_completion.model, model_name)
        output = chat_completion.choices[0].message.content
        for other_output in self._chat_completion_outputs.values():
            self.assertNotEqual(
                output,
                other_output,
                msg=f"other chat outputs: {self._chat_completion_outputs}",
            )
        self._chat_completion_outputs[lora_name] = output

    def test_lora_separator_not_set(self):
        if self._backend == "vllm":
            self._create_vllm_model_repository_with_lora()
        elif self._backend == "tensorrtllm":
            self._create_trtllm_model_repository_with_lora()
        else:
            raise Exception(f"Unexpected backend {self._backend=}")

        with OpenAIServer(
            cli_args=[
                "--model-repository",
                "models",
                "--tokenizer",
                self._tokenizer,
            ],
            env_dict={"CUDA_VISIBLE_DEVICES": "0"},
        ) as server:
            client = server.get_client()
            # Test listing/retrieving models
            self._test_list_models(client, [""])
            self._test_retrieve_model(client, "")
            with self.assertRaises(NotFoundError) as e:
                self._test_retrieve_model(client, "doll")
            expected_error = f"Error code: 404 - {{'detail': 'Unknown model: {self._model_name}{self._lora_separator}doll'}}"
            self.assertEqual(str(e.exception), expected_error)
            with self.assertRaises(NotFoundError) as e:
                self._test_retrieve_model(client, "sheep")
            expected_error = f"Error code: 404 - {{'detail': 'Unknown model: {self._model_name}{self._lora_separator}sheep'}}"
            self.assertEqual(str(e.exception), expected_error)
            # Test selecting LoRAs
            self._test_completions(client, "")
            self._test_chat_completion(client, "")
            with self.assertRaises(BadRequestError) as e:
                self._test_completions(client, "doll")
            expected_error = f"Error code: 400 - {{'detail': 'Unknown model: {self._model_name}{self._lora_separator}doll'}}"
            self.assertEqual(str(e.exception), expected_error)
            with self.assertRaises(BadRequestError) as e:
                self._test_chat_completion(client, "sheep")
            expected_error = f"Error code: 400 - {{'detail': 'Unknown model: {self._model_name}{self._lora_separator}sheep'}}"
            self.assertEqual(str(e.exception), expected_error)

    def test_lora_separator_set(self):
        if self._backend == "vllm":
            self._create_vllm_model_repository_with_lora()
        elif self._backend == "tensorrtllm":
            self._create_trtllm_model_repository_with_lora()
        else:
            raise Exception(f"Unexpected backend {self._backend=}")

        with OpenAIServer(
            cli_args=[
                "--model-repository",
                "models",
                "--tokenizer",
                self._tokenizer,
                "--lora-separator",
                self._lora_separator,
            ],
            env_dict={"CUDA_VISIBLE_DEVICES": "0"},
        ) as server:
            client = server.get_client()
            # Test listing/retrieving models
            self._test_list_models(client, ["", "doll", "sheep"])
            self._test_retrieve_model(client, "")
            self._test_retrieve_model(client, "doll")
            self._test_retrieve_model(client, "sheep")

            # Test retrieving LoRAs unknown to the backend
            with self.assertRaises(NotFoundError) as e:
                self._test_retrieve_model(client, "unknown")
            expected_error = f"Error code: 404 - {{'detail': 'Unknown model: {self._model_name}{self._lora_separator}unknown'}}"
            self.assertEqual(str(e.exception), expected_error)

            # Test selecting LoRAs
            self._test_completions(client, "")
            self._test_completions(client, "doll")
            self._test_completions(client, "sheep")
            self._test_chat_completion(client, "")
            self._test_chat_completion(client, "doll")
            self._test_chat_completion(client, "sheep")

            # Test selecting LoRAs unknown to the backend
            expected_error = f"Error code: 400 - {{'detail': 'Unknown LoRA: unknown; for model: {self._model_name}{self._lora_separator}unknown'}}"
            with self.assertRaises(BadRequestError) as e:
                self._test_completions(client, "unknown")
            self.assertEqual(str(e.exception), expected_error)
            with self.assertRaises(BadRequestError) as e:
                self._test_chat_completion(client, "unknown")
            self.assertEqual(str(e.exception), expected_error)

    def test_lora_separator_set_for_lora_off_model(self):
        if self._backend == "vllm":
            self._create_vllm_model_repository_without_lora()
        elif self._backend == "tensorrtllm":
            self._create_trtllm_model_repository_without_lora()
        else:
            raise Exception(f"Unexpected backend {self._backend=}")

        with OpenAIServer(
            cli_args=[
                "--model-repository",
                "models",
                "--tokenizer",
                self._tokenizer,
                "--lora-separator",
                self._lora_separator,
            ],
            env_dict={"CUDA_VISIBLE_DEVICES": "0"},
        ) as server:
            client = server.get_client()
            # Test listing/retrieving models
            self._test_list_models(client, [""])
            self._test_retrieve_model(client, "")
            # Test retrieving models with LoRAs
            with self.assertRaises(NotFoundError) as e:
                self._test_retrieve_model(client, "doll")
            expected_error = f"Error code: 404 - {{'detail': 'Unknown model: {self._model_name}{self._lora_separator}doll'}}"
            self.assertEqual(str(e.exception), expected_error)
            # Test inference
            self._test_completions(client, "")
            self._test_chat_completion(client, "")
            # Test selecting LoRAs
            expected_error = f"Error code: 400 - {{'detail': 'Unknown LoRA: sheep; for model: {self._model_name}{self._lora_separator}sheep'}}"
            with self.assertRaises(BadRequestError) as e:
                self._test_completions(client, "sheep")
            self.assertEqual(str(e.exception), expected_error)
            with self.assertRaises(BadRequestError) as e:
                self._test_chat_completion(client, "sheep")
            self.assertEqual(str(e.exception), expected_error)

    @unittest.skipUnless(is_vllm_installed(), "vLLM not installed")
    def test_lora_separator_set_for_non_vllm_formatted_models(self):
        self._create_model_repository_mock_llm()
        with OpenAIServer(
            cli_args=[
                "--model-repository",
                "models",
                "--tokenizer",
                self._tokenizer,
                "--backend",
                "vllm",
                "--lora-separator",
                self._lora_separator,
            ],
            env_dict={"CUDA_VISIBLE_DEVICES": "0"},
        ) as server:
            client = server.get_client()
            # Test listing/retrieving models
            self._test_list_models(client, [""])
            self._test_retrieve_model(client, "")
            # Test retrieving models with LoRAs
            with self.assertRaises(NotFoundError) as e:
                self._test_retrieve_model(client, "sheep")
            expected_error = f"Error code: 404 - {{'detail': 'Unknown model: {self._model_name}{self._lora_separator}sheep'}}"
            self.assertEqual(str(e.exception), expected_error)
            # Test selecting LoRAs
            # Expectation:
            #   If the frontend cannot determine which LoRA(s) are available, then any
            #   request with a well-formed LoRA model name will be inferenced.
            self._test_completions(client, "doll")
            self._test_chat_completion(client, "doll")


if __name__ == "__main__":
    unittest.main()


================================================
FILE: python/openai/tests/test_models/identity_py/1/model.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def execute(self, requests):
        """
        Identity model in Python backend.
        """
        responses = []
        for request in requests:
            input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            out_tensor = pb_utils.Tensor("OUTPUT0", input_tensor.as_numpy())
            responses.append(pb_utils.InferenceResponse([out_tensor]))
        return responses


================================================
FILE: python/openai/tests/test_models/identity_py/config.pbtxt
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

backend: "python"
max_batch_size: 64

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]


================================================
FILE: python/openai/tests/test_models/mock_llm/1/model.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json
import time

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def initialize(self, args):
        self.model_config = json.loads(args["model_config"])
        self.decoupled = self.model_config.get("model_transaction_policy", {}).get(
            "decoupled"
        )

    def execute(self, requests):
        if self.decoupled:
            return self.exec_decoupled(requests)
        else:
            return self.exec(requests)

    def exec(self, requests):
        responses = []
        for request in requests:
            params = json.loads(request.parameters())
            rep_count = params["REPETITION"] if "REPETITION" in params else 1

            input_np = pb_utils.get_input_tensor_by_name(
                request, "text_intpu"
            ).as_numpy()
            stream_np = pb_utils.get_input_tensor_by_name(request, "stream").as_numpy()
            stream = stream_np.flatten()[0]
            if stream:
                responses.append(
                    pb_utils.InferenceResponse(
                        error=pb_utils.TritonError(
                            "STREAM only supported in decoupled mode"
                        )
                    )
                )
            else:
                out_tensor = pb_utils.Tensor(
                    "text_output", np.repeat(input_np, rep_count, axis=1)
                )
                responses.append(pb_utils.InferenceResponse([out_tensor]))
        return responses

    def exec_decoupled(self, requests):
        for request in requests:
            params = json.loads(request.parameters())
            rep_count = params["REPETITION"] if "REPETITION" in params else 1
            fail_last = params["FAIL_LAST"] if "FAIL_LAST" in params else False
            delay = params["DELAY"] if "DELAY" in params else None

            sender = request.get_response_sender()
            input_np = pb_utils.get_input_tensor_by_name(
                request, "text_input"
            ).as_numpy()
            stream_np = pb_utils.get_input_tensor_by_name(request, "stream").as_numpy()
            out_tensor = pb_utils.Tensor("text_output", input_np)
            response = pb_utils.InferenceResponse([out_tensor])
            # If stream enabled, just send multiple copies of response
            # FIXME: Could split up response string into tokens, but this is simpler for now.
            stream = stream_np.flatten()[0]
            if stream:
                for _ in range(rep_count):
                    if delay is not None:
                        time.sleep(delay)
                    sender.send(response)
                sender.send(
                    None
                    if not fail_last
                    else pb_utils.InferenceResponse(
                        error=pb_utils.TritonError("An Error Occurred")
                    ),
                    flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL,
                )
            # If stream disabled, just send one response
            else:
                sender.send(
                    response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
                )
        return None


================================================
FILE: python/openai/tests/test_models/mock_llm/config.pbtxt
================================================
# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
backend: "python"

max_batch_size: 0

model_transaction_policy {
  decoupled: True
}

input [
  {
    name: "text_input"
    data_type: TYPE_STRING
    dims: [ 1, 1 ]
  },
  {
    name: "stream"
    data_type: TYPE_BOOL
    dims: [ 1, 1 ]
  },
  {
    name: "return_logprobs"
    data_type: TYPE_BOOL
    dims: [ 1, 1 ]
    optional: true
  }
]

output [
  {
    name: "text_output"
    data_type: TYPE_STRING
    dims: [ 1, -1 ]
  }
]

instance_group [
  {
    count: 1
    kind: KIND_MODEL
  }
]


================================================
FILE: python/openai/tests/test_observability.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from pathlib import Path

import pytest
from fastapi.testclient import TestClient
from tests.utils import setup_fastapi_app, setup_server


# Override conftest.py default model
@pytest.fixture
def model():
    return "mock_llm"


class TestObservability:
    @pytest.fixture(scope="class")
    def client(self):
        # TODO: Cleanup, mock server/engine, etc.
        model_repository = Path(__file__).parent / "test_models"
        server = setup_server(str(model_repository))
        app = setup_fastapi_app(tokenizer="", server=server, backend=None)
        with TestClient(app) as test_client:
            yield test_client

        server.stop()

    ### General Error Handling ###
    def test_not_found(self, client):
        response = client.get("/does-not-exist")
        assert response.status_code == 404

    ### Startup / Health ###
    def test_startup_success(self, client):
        response = client.get("/health/ready")
        assert response.status_code == 200

    ### Metrics ###
    def test_startup_metrics(self, client):
        response = client.get("/metrics")
        assert response.status_code == 200
        # TODO: Flesh out metrics tests further
        assert "nv_cpu_utilization" in response.text

    ### Models ###
    def test_models_list(self, client):
        response = client.get("/v1/models")
        assert response.status_code == 200
        models = response.json()["data"]
        # Two models are in test_models specifically to verify that all models
        # are listed by this endpoint. This can be removed if the behavior changes.
        assert len(models) == 2
        for model in models:
            assert model["id"]
            assert model["object"] == "model"
            assert model["created"] > 0
            assert model["owned_by"] == "Triton Inference Server"

    def test_models_get(self, client, model):
        response = client.get(f"/v1/models/{model}")
        assert response.status_code == 200
        model_resp = response.json()
        assert model_resp["id"] == model
        assert model_resp["object"] == "model"
        assert model_resp["created"] > 0
        assert model_resp["owned_by"] == "Triton Inference Server"


================================================
FILE: python/openai/tests/test_openai_client.py
================================================
# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from typing import List

import numpy as np
import openai
import pytest


@pytest.mark.openai
class TestOpenAIClient:
    @pytest.fixture(scope="class")
    def client(self, server):
        return server.get_client()

    def test_openai_client_models(self, client: openai.OpenAI, backend: str):
        models = list(client.models.list())
        print(f"Models: {models}")
        if backend == "tensorrtllm":
            # tensorrt_llm_bls +
            # preprocess -> tensorrt_llm -> postprocess
            assert len(models) == 4
        elif backend == "vllm":
            assert len(models) == 1
        else:
            raise Exception(f"Unexpected backend {backend=}")

    def test_openai_client_completion(
        self, client: openai.OpenAI, model: str, prompt: str
    ):
        completion = client.completions.create(
            prompt=prompt,
            model=model,
        )

        print(f"Completion results: {completion}")
        assert completion.choices[0].text
        assert completion.choices[0].finish_reason == "stop"

        usage = completion.usage
        assert usage is not None
        assert isinstance(usage.prompt_tokens, int)
        assert isinstance(usage.completion_tokens, int)
        assert isinstance(usage.total_tokens, int)
        assert usage.prompt_tokens > 0
        assert usage.completion_tokens > 0
        assert usage.total_tokens == usage.prompt_tokens + usage.completion_tokens

    def test_openai_client_chat_completion(
        self, client: openai.OpenAI, model: str, messages: List[dict]
    ):
        chat_completion = client.chat.completions.create(
            messages=messages,
            model=model,
        )

        print(f"Chat completion results: {chat_completion}")
        assert chat_completion.choices[0].message.content
        assert chat_completion.choices[0].finish_reason == "stop"

        usage = chat_completion.usage
        assert usage is not None
        assert isinstance(usage.prompt_tokens, int)
        assert isinstance(usage.completion_tokens, int)
        assert isinstance(usage.total_tokens, int)
        assert usage.prompt_tokens > 0
        assert usage.completion_tokens > 0
        assert usage.total_tokens == usage.prompt_tokens + usage.completion_tokens

    @pytest.mark.parametrize("echo", [False, True])
    def test_openai_client_completion_echo(
        self, client: openai.OpenAI, echo: bool, model: str, prompt: str
    ):
        completion = client.completions.create(prompt=prompt, model=model, echo=echo)

        response = completion.choices[0].text
        if echo:
            assert response.startswith(prompt)
        else:
            # TODO: Consider using a different prompt. In TRT-LLM model, the second response may contain the prompt in the middle of the response even if echo is False, e.g. " Briefly explained.\nWhat is machine learning? She learns from data\nmachine learning".
            assert prompt not in response

    @pytest.mark.skip(reason="Not Implemented Yet")
    def test_openai_client_function_calling(self):
        pass


@pytest.mark.openai
class TestAsyncOpenAIClient:
    @pytest.fixture(scope="class")
    def client(self, server):
        return server.get_async_client()

    @pytest.mark.asyncio
    async def test_openai_client_models(self, client: openai.AsyncOpenAI, backend: str):
        async_models = await client.models.list()
        models = [model async for model in async_models]
        print(f"Models: {models}")
        if backend == "tensorrtllm":
            # tensorrt_llm_bls +
            # preprocess -> tensorrt_llm -> postprocess
            assert len(models) == 4
        elif backend == "vllm":
            assert len(models) == 1
        else:
            raise Exception(f"Unexpected backend {backend=}")

    @pytest.mark.asyncio
    async def test_openai_client_completion(
        self, client: openai.AsyncOpenAI, model: str, prompt: str
    ):
        completion = await client.completions.create(
            prompt=prompt,
            model=model,
        )

        print(f"Completion results: {completion}")
        assert completion.choices[0].text
        assert completion.choices[0].finish_reason == "stop"

        usage = completion.usage
        assert usage is not None
        assert isinstance(usage.prompt_tokens, int)
        assert isinstance(usage.completion_tokens, int)
        assert isinstance(usage.total_tokens, int)
        assert usage.prompt_tokens > 0
        assert usage.completion_tokens > 0
        assert usage.total_tokens == usage.prompt_tokens + usage.completion_tokens

    @pytest.mark.asyncio
    async def test_openai_client_chat_completion(
        self, client: openai.AsyncOpenAI, model: str, messages: List[dict]
    ):
        chat_completion = await client.chat.completions.create(
            messages=messages,
            model=model,
        )

        assert chat_completion.choices[0].message.content
        assert chat_completion.choices[0].finish_reason == "stop"

        usage = chat_completion.usage
        assert usage is not None
        assert isinstance(usage.prompt_tokens, int)
        assert isinstance(usage.completion_tokens, int)
        assert isinstance(usage.total_tokens, int)
        assert usage.prompt_tokens > 0
        assert usage.completion_tokens > 0
        assert usage.total_tokens == usage.prompt_tokens + usage.completion_tokens

        print(f"Chat completion results: {chat_completion}")

    @pytest.mark.asyncio
    async def test_completion_streaming(
        self, client: openai.AsyncOpenAI, model: str, prompt: str
    ):
        # Test single completion for comparison
        chat_completion = await client.completions.create(
            model=model,
            prompt=prompt,
            max_tokens=10,
            temperature=0.0,
            stream=False,
            seed=0,
        )
        output = chat_completion.choices[0].text
        stop_reason = chat_completion.choices[0].finish_reason

        # Test streaming
        stream = await client.completions.create(
            model=model,
            prompt=prompt,
            max_tokens=10,
            temperature=0.0,
            stream=True,
            seed=0,
        )
        chunks = []
        finish_reason_count = 0
        async for chunk in stream:
            delta = chunk.choices[0]
            if delta.text:
                chunks.append(delta.text)
            if delta.finish_reason is not None:
                finish_reason_count += 1

        # finish reason should only return in last block
        assert finish_reason_count == 1
        assert chunk.choices[0].finish_reason == stop_reason
        assert "".join(chunks) == output

    @pytest.mark.parametrize(
        "sampling_parameter_dict",
        [
            {},
            # Verify that stop words work with streaming outputs
            {"stop": "is"},
            {"stop": ["is"]},
            {"stop": ["is", ".", ","]},
        ],
    )
    @pytest.mark.asyncio
    async def test_chat_streaming(
        self,
        client: openai.AsyncOpenAI,
        model: str,
        messages: List[dict],
        sampling_parameter_dict: dict,
    ):
        # Fixed seed and temperature for comparing reproducible responses
        seed = 0
        temperature = 0.0
        # Generate enough tokens to easily identify stop words are working.
        max_completion_tokens = 64

        # Test single chat completion for comparison
        chat_completion = await client.chat.completions.create(
            model=model,
            messages=messages,
            max_completion_tokens=max_completion_tokens,
            temperature=temperature,
            seed=seed,
            stream=False,
            **sampling_parameter_dict,
        )
        output = chat_completion.choices[0].message.content
        stop_reason = chat_completion.choices[0].finish_reason

        # Test streaming
        stream = await client.chat.completions.create(
            model=model,
            messages=messages,
            max_completion_tokens=max_completion_tokens,
            temperature=temperature,
            seed=seed,
            stream=True,
            **sampling_parameter_dict,
        )
        chunks = []
        finish_reason_count = 0
        async for chunk in stream:
            delta = chunk.choices[0].delta
            if delta.role:
                assert delta.role == "assistant"
            if delta.content:
                chunks.append(delta.content)
            if chunk.choices[0].finish_reason is not None:
                finish_reason_count += 1
            assert chunk.usage is None

        # finish reason should only return in last block
        assert finish_reason_count == 1
        assert chunk.choices[0].finish_reason == stop_reason

        # Assert that streaming actually returned multiple responses
        # and that it is equivalent to the non-streamed output
        assert len(chunks) > 1
        streamed_output = "".join(chunks)
        assert streamed_output == output

    @pytest.mark.asyncio
    async def test_chat_streaming_usage_option(
        self, client: openai.AsyncOpenAI, model: str, messages: List[dict]
    ):
        seed = 0
        temperature = 0.0
        max_tokens = 16

        # Get usage and content from a non-streaming call
        stream_false = await client.chat.completions.create(
            model=model,
            messages=messages,
            max_tokens=max_tokens,
            temperature=temperature,
            seed=seed,
            stream=False,
        )
        usage_stream_false = stream_false.usage
        stream_false_output = stream_false.choices[0].message.content
        assert usage_stream_false is not None
        assert stream_false_output is not None

        # First, run with include_usage=False.
        stream_options_false = await client.chat.completions.create(
            model=model,
            messages=messages,
            max_tokens=max_tokens,
            temperature=temperature,
            seed=seed,
            stream=True,
            stream_options={"include_usage": False},
        )
        chunks_false = [chunk async for chunk in stream_options_false]
        for chunk in chunks_false:
            assert chunk.usage is None, "Usage should be null when include_usage=False"
        stream_options_false_output = "".join(
            c.choices[0].delta.content
            for c in chunks_false
            if c.choices and c.choices[0].delta.content
        )

        # Now, run with include_usage=True.
        stream_options_true = await client.chat.completions.create(
            model=model,
            messages=messages,
            max_tokens=max_tokens,
            temperature=temperature,
            seed=seed,
            stream=True,
            stream_options={"include_usage": True},
        )
        chunks_true = [chunk async for chunk in stream_options_true]
        content_chunks = [c for c in chunks_true if c.usage is None]

        # Verify that we received exactly one extra chunk.
        assert len(chunks_true) == len(chunks_false) + 1

        # Verify content is consistent
        stream_options_true_output = "".join(
            c.choices[0].delta.content
            for c in content_chunks
            if c.choices and c.choices[0].delta.content
        )
        assert stream_options_true_output == stream_false_output
        assert stream_options_true_output == stream_options_false_output

        # Verify the final chunk has usage data and empty choices.
        final_chunk = chunks_true[-1]
        assert final_chunk.usage is not None
        assert len(final_chunk.choices) == 0
        usage_stream_options_true = final_chunk.usage
        assert (
            isinstance(usage_stream_options_true.prompt_tokens, int)
            and usage_stream_options_true.prompt_tokens > 0
        )
        assert (
            isinstance(usage_stream_options_true.completion_tokens, int)
            and usage_stream_options_true.completion_tokens > 0
        )
        assert (
            usage_stream_options_true.total_tokens
            == usage_stream_options_true.prompt_tokens
            + usage_stream_options_true.completion_tokens
        )

        # Verify other chunks have no usage data.
        for chunk in chunks_true[:-1]:
            assert chunk.usage is None

        # Assert usage is consistent between streaming and non-streaming calls
        assert usage_stream_false.model_dump() == usage_stream_options_true.model_dump()

    @pytest.mark.asyncio
    async def test_completion_streaming_usage_option(
        self, client: openai.AsyncOpenAI, model: str, prompt: str
    ):
        seed = 0
        temperature = 0.0
        max_tokens = 16

        # Get usage and content from a non-streaming call
        stream_false = await client.completions.create(
            model=model,
            prompt=prompt,
            max_tokens=max_tokens,
            temperature=temperature,
            stream=False,
            seed=seed,
        )
        usage_stream_false = stream_false.usage
        stream_false_output = stream_false.choices[0].text
        assert usage_stream_false is not None
        assert stream_false_output is not None

        # First, run with include_usage=False.
        stream_options_false = await client.completions.create(
            model=model,
            prompt=prompt,
            max_tokens=max_tokens,
            temperature=temperature,
            seed=seed,
            stream=True,
            stream_options={"include_usage": False},
        )
        chunks_false = [chunk async for chunk in stream_options_false]
        for chunk in chunks_false:
            assert chunk.usage is None
        stream_options_false_output = "".join(
            c.choices[0].text for c in chunks_false if c.choices and c.choices[0].text
        )

        # Now, run with include_usage=True.
        stream_options_true = await client.completions.create(
            model=model,
            prompt=prompt,
            max_tokens=max_tokens,
            temperature=temperature,
            stream=True,
            seed=seed,
            stream_options={"include_usage": True},
        )
        chunks_true = [chunk async for chunk in stream_options_true]
        content_chunks = [c for c in chunks_true if c.usage is None]

        # Verify that we received exactly one extra chunk.
        assert len(chunks_true) == len(chunks_false) + 1

        # Verify content is consistent
        stream_options_true_output = "".join(
            c.choices[0].text for c in content_chunks if c.choices and c.choices[0].text
        )
        assert stream_options_true_output == stream_false_output
        assert stream_options_true_output == stream_options_false_output

        # Verify the final chunk has usage data and empty choices.
        final_chunk = chunks_true[-1]
        assert final_chunk.usage is not None
        assert len(final_chunk.choices) == 0
        usage_stream_options_true = final_chunk.usage
        assert (
            isinstance(usage_stream_options_true.prompt_tokens, int)
            and usage_stream_options_true.prompt_tokens > 0
        )
        assert (
            isinstance(usage_stream_options_true.completion_tokens, int)
            and usage_stream_options_true.completion_tokens > 0
        )
        assert (
            usage_stream_options_true.total_tokens
            == usage_stream_options_true.prompt_tokens
            + usage_stream_options_true.completion_tokens
        )

        # Verify other chunks have no usage data.
        for chunk in chunks_true[:-1]:
            assert chunk.usage is None

        # Assert usage is consistent between streaming and non-streaming calls
        assert usage_stream_false.model_dump() == usage_stream_options_true.model_dump()

    @pytest.mark.asyncio
    async def test_stream_options_without_streaming(
        self, client: openai.AsyncOpenAI, model: str, prompt: str
    ):
        with pytest.raises(openai.BadRequestError) as e:
            await client.completions.create(
                model=model,
                prompt=prompt,
                stream=False,
                stream_options={"include_usage": True},
            )
        assert "`stream_options` can only be used when `stream` is True" in str(e.value)

        with pytest.raises(openai.BadRequestError) as e:
            await client.chat.completions.create(
                model=model,
                messages=[{"role": "user", "content": prompt}],
                stream=False,
                stream_options={"include_usage": True},
            )
        assert "`stream_options` can only be used when `stream` is True" in str(e.value)

    @pytest.mark.asyncio
    async def test_chat_completion_logprobs(
        self, client: openai.AsyncOpenAI, backend: str, model: str, messages: List[dict]
    ):
        """Test logprobs for chat completions and compare streaming vs non-streaming."""
        # Non-vLLM backends should raise an error
        if backend != "vllm":
            with pytest.raises(openai.BadRequestError) as exc_info:
                await client.chat.completions.create(
                    model=model,
                    messages=messages,
                    logprobs=True,
                    top_logprobs=2,
                    max_tokens=10,
                )
            assert "logprobs are currently available only for the vLLM backend" in str(
                exc_info.value
            )
            return

        # Test non-streaming
        seed = 0
        temperature = 0.0
        chat_completion = await client.chat.completions.create(
            model=model,
            messages=messages,
            logprobs=True,
            top_logprobs=2,
            max_tokens=10,
            temperature=temperature,
            seed=seed,
            stream=False,
        )

        assert chat_completion.choices[0].message.content
        assert chat_completion.choices[0].logprobs is not None

        logprobs = chat_completion.choices[0].logprobs
        assert logprobs.content is not None
        assert len(logprobs.content) > 0

        # Validate each token logprob
        for token_logprob in logprobs.content:
            assert token_logprob.token
            assert isinstance(token_logprob.logprob, float)
            assert isinstance(token_logprob.bytes, list)
            assert token_logprob.top_logprobs is not None
            assert len(token_logprob.top_logprobs) > 0

        # Test streaming and compare with non-streaming
        stream = await client.chat.completions.create(
            model=model,
            messages=messages,
            logprobs=True,
            top_logprobs=2,
            max_tokens=10,
            temperature=temperature,
            seed=seed,
            stream=True,
        )

        chunks = []
        stream_logprobs = []
        async for chunk in stream:
            if chunk.choices[0].delta.content:
                chunks.append(chunk.choices[0].delta.content)
            if chunk.choices[0].logprobs and chunk.choices[0].logprobs.content:
                stream_logprobs.extend(chunk.choices[0].logprobs.content)

        # Assert streaming output matches non-streaming
        streamed_output = "".join(chunks)
        assert streamed_output == chat_completion.choices[0].message.content

        # Assert both streaming and non-streaming produce logprobs
        assert len(stream_logprobs) > 0, "Streaming should produce logprobs"
        assert len(stream_logprobs) == len(logprobs.content), "Same number of tokens"

        # Compare tokens and logprob values (using np.allclose for float comparison)
        stream_tokens_list = [t.token for t in stream_logprobs]
        non_stream_tokens_list = [t.token for t in logprobs.content]
        stream_logprobs_values = [t.logprob for t in stream_logprobs]
        non_stream_logprobs_values = [t.logprob for t in logprobs.content]

        assert stream_tokens_list == non_stream_tokens_list, "Tokens should match"
        assert np.allclose(
            stream_logprobs_values, non_stream_logprobs_values, rtol=0, atol=1e-1
        ), "Logprob values should be close"

    @pytest.mark.asyncio
    async def test_completion_logprobs(
        self, client: openai.AsyncOpenAI, backend: str, model: str, prompt: str
    ):
        """Test logprobs for completions."""
        # Non-vLLM backends should raise an error
        if backend != "vllm":
            with pytest.raises(openai.BadRequestError) as exc_info:
                await client.completions.create(
                    model=model,
                    prompt=prompt,
                    logprobs=3,
                    max_tokens=10,
                )
            assert "logprobs are currently available only for the vLLM backend" in str(
                exc_info.value
            )
            return

        # Test non-streaming
        seed = 0
        temperature = 0.0
        completion = await client.completions.create(
            model=model,
            prompt=prompt,
            logprobs=3,
            max_tokens=10,
            temperature=temperature,
            seed=seed,
            stream=False,
        )

        assert completion.choices[0].text
        assert completion.choices[0].logprobs is not None

        logprobs = completion.choices[0].logprobs
        assert logprobs.tokens is not None
        assert logprobs.token_logprobs is not None
        assert logprobs.text_offset is not None
        assert logprobs.top_logprobs is not None

        num_tokens = len(logprobs.tokens)
        assert len(logprobs.token_logprobs) == num_tokens
        assert len(logprobs.text_offset) == num_tokens
        assert len(logprobs.top_logprobs) == num_tokens

        # Test streaming and compare with non-streaming
        stream = await client.completions.create(
            model=model,
            prompt=prompt,
            logprobs=3,
            max_tokens=10,
            temperature=temperature,
            seed=seed,
            stream=True,
        )

        chunks = []
        stream_tokens = []
        stream_token_logprobs = []
        stream_text_offsets = []
        stream_top_logprobs = []

        async for chunk in stream:
            if chunk.choices[0].text:
                chunks.append(chunk.choices[0].text)
            if chunk.choices[0].logprobs:
                lp = chunk.choices[0].logprobs
                if lp.tokens:
                    stream_tokens.extend(lp.tokens)
                if lp.token_logprobs:
                    stream_token_logprobs.extend(lp.token_logprobs)
                if lp.text_offset:
                    stream_text_offsets.extend(lp.text_offset)
                if lp.top_logprobs:
                    stream_top_logprobs.extend(lp.top_logprobs)

        # Assert streaming output matches non-streaming
        streamed_output = "".join(chunks)
        assert streamed_output == completion.choices[0].text

        # Compare values (using np.allclose for float comparison)
        assert stream_tokens == logprobs.tokens, "Tokens should match"
        assert stream_text_offsets == logprobs.text_offset, "Text offsets should match"
        assert stream_top_logprobs == logprobs.top_logprobs, "Top logprobs should match"
        assert np.allclose(
            stream_token_logprobs, logprobs.token_logprobs, rtol=0, atol=1e-1
        ), "Token logprob values should be close"

    @pytest.mark.parametrize("top_logprobs_value", [0, 5])
    @pytest.mark.asyncio
    async def test_top_logprobs_requires_logprobs(
        self,
        client: openai.AsyncOpenAI,
        model: str,
        messages: List[dict],
        top_logprobs_value: int,
        backend: str,
    ):
        """
        Test that top_logprobs without logprobs raises an error
        """
        if backend != "vllm":
            pytest.skip(
                reason="logprobs are currently available only for the vLLM backend"
            )

        with pytest.raises(openai.BadRequestError) as exc_info:
            await client.chat.completions.create(
                model=model,
                messages=messages,
                top_logprobs=top_logprobs_value,  # Without logprobs=True
                max_tokens=5,
            )
        assert "`top_logprobs` can only be used when `logprobs` is True" in str(
            exc_info.value
        )

    @pytest.mark.asyncio
    async def test_chat_top_logprobs_exceeds_max(
        self, client: openai.AsyncOpenAI, model: str, messages: List[dict]
    ):
        """Test that top_logprobs > 20 raises schema validation error."""
        with pytest.raises(openai.UnprocessableEntityError) as exc_info:
            await client.chat.completions.create(
                model=model,
                messages=messages,
                logprobs=True,
                top_logprobs=25,  # Exceeds maximum of 20
                max_tokens=5,
            )
        # Pydantic validation error
        assert "less than or equal to 20" in str(exc_info.value).lower()

    @pytest.mark.asyncio
    async def test_completion_logprobs_exceeds_max(
        self, client: openai.AsyncOpenAI, model: str, prompt: str
    ):
        """Test that logprobs > 5 raises schema validation error."""
        with pytest.raises(openai.UnprocessableEntityError) as exc_info:
            await client.completions.create(
                model=model,
                prompt=prompt,
                logprobs=7,  # Exceeds maximum of 5
                max_tokens=5,
            )
        # Pydantic validation error
        assert "less than or equal to 5" in str(exc_info.value).lower()


================================================
FILE: python/openai/tests/test_openai_restricted_apis.py
================================================
#!/usr/bin/env python3

# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from pathlib import Path
from typing import Dict, List, Optional

import pytest
import requests
from tests.utils import OpenAIServer


def assert_response_success(
    response: requests.Response, expected_status: int = 200, description: str = ""
):
    """Assert that a response was successful."""
    assert (
        response.status_code == expected_status
    ), f"{description} should return {expected_status}, got {response.status_code}"


def assert_response_unauthorized(
    response: requests.Response, expected_status: int = 401, description: str = ""
):
    """Assert that a response was unauthorized."""
    assert (
        response.status_code == expected_status
    ), f"{description} should be unauthorized with {expected_status}, got {response.status_code}"


def make_get_request(
    base_url: str,
    endpoint: str,
    headers: Optional[Dict[str, str]] = None,
    timeout: int = 10,
):
    """Make a GET request to the specified endpoint."""
    url = f"{base_url}{endpoint}"
    response = requests.get(url, headers=headers, timeout=timeout)
    return response


def verify_inference_endpoints(
    base_url, model, headers, expected_success, description_prefix
):
    def make_chat_request(
        base_url: str,
        model: str,
        messages: List[Dict[str, str]],
        headers: Optional[Dict[str, str]] = None,
        max_tokens: int = 10,
        timeout: int = 10,
    ):
        """Make a POST request to the chat completions endpoint."""
        url = f"{base_url}/v1/chat/completions"
        data = {
            "model": model,
            "messages": messages,
            "max_tokens": max_tokens,
        }
        response = requests.post(url, json=data, headers=headers, timeout=timeout)
        return response

    def make_completion_request(
        base_url: str,
        model: str,
        prompt: str,
        headers: Optional[Dict[str, str]] = None,
        max_tokens: int = 10,
        timeout: int = 10,
    ):
        """Make a POST request to the completions endpoint."""
        url = f"{base_url}/v1/completions"
        data = {
            "model": model,
            "prompt": prompt,
            "max_tokens": max_tokens,
        }
        response = requests.post(url, json=data, headers=headers, timeout=timeout)
        return response

    messages = [{"role": "user", "content": "Hello"}]
    response = make_chat_request(base_url, model, messages, headers=headers)
    if expected_success:
        assert_response_success(
            response, description=f"{description_prefix} Chat completions endpoint"
        )
    else:
        assert_response_unauthorized(
            response, description=f"{description_prefix} Chat completions endpoint"
        )

    prompt = "Hello"
    response = make_completion_request(base_url, model, prompt, headers=headers)
    if expected_success:
        assert_response_success(
            response, description=f"{description_prefix} Completions endpoint"
        )
    else:
        assert_response_unauthorized(
            response, description=f"{description_prefix} Completions endpoint"
        )


def verify_model_repository_endpoints(
    base_url, model, headers, expected_success, description_prefix
):
    response = make_get_request(base_url, "/v1/models", headers=headers)
    if expected_success:
        assert_response_success(
            response, description=f"{description_prefix} Models endpoint"
        )
    else:
        assert_response_unauthorized(
            response, description=f"{description_prefix} Models endpoint"
        )

    response = make_get_request(base_url, f"/v1/models/{model}", headers=headers)
    if expected_success:
        assert_response_success(
            response, description=f"{description_prefix} Specific model endpoint"
        )
    else:
        assert_response_unauthorized(
            response, description=f"{description_prefix} Specific model endpoint"
        )


def verify_metrics_endpoint(base_url, headers, expected_success, description_prefix):
    # Test metrics endpoint
    response = make_get_request(base_url, "/metrics", headers=headers)
    assert_response_success(response, description="Unrestricted Metrics endpoint")

    if expected_success:
        assert_response_success(
            response, description=f"{description_prefix} Metrics endpoint"
        )
    else:
        assert_response_unauthorized(
            response, description=f"{description_prefix} Metrics endpoint"
        )


def verify_health_endpoint(base_url, headers, expected_success, description_prefix):
    # Test health endpoint
    response = make_get_request(base_url, "/health/ready", headers=headers)
    if expected_success:
        assert_response_success(
            response, description=f"{description_prefix} Health endpoint"
        )
    else:
        assert_response_unauthorized(
            response, description=f"{description_prefix} Health endpoint"
        )


@pytest.mark.openai
class TestRestrictedAPIInvalidArguments:
    """Test cases for malformed --openai-restricted-api arguments."""

    def _test_server_startup_failure(
        self,
        malformed_api_arg,
        expected_error_pattern=None,
    ):
        """Helper method to test that server fails to start with malformed arguments."""
        args = [
            "--model-repository",
            str(
                Path(__file__).parent / f"test_models"
            ),  # Hardcode to simple models to speed up tests
        ]
        if type(malformed_api_arg[0]) == list:
            for api_arg in malformed_api_arg:
                args.append("--openai-restricted-api")
                args.extend(api_arg)
        else:
            args.append("--openai-restricted-api")
            args.extend(malformed_api_arg)

        # Server should fail to start with malformed arguments
        with pytest.raises((ValueError, Exception)) as exc_info:
            with OpenAIServer(args) as openai_server:
                pass  # Should not reach here

        if expected_error_pattern:
            assert expected_error_pattern in str(
                exc_info.value
            ), f"Expected error pattern '{expected_error_pattern}' not found in: {exc_info.value}"

    @pytest.mark.parametrize(
        "malformed_arg",
        [
            ["unknown-endpoint", "auth-key", "auth-value"],
            ["invalid,inference", "auth-key", "auth-value"],  # Mix of invalid and valid
            ["inference,unknown", "auth-key", "auth-value"],  # Mix of valid and invalid
        ],
    )
    def test_unknown_endpoint_names(self, malformed_arg):
        """Test that server handles unknown endpoint names gracefully."""
        self._test_server_startup_failure(
            malformed_arg,
            expected_error_pattern="Unknown API",
        )

    @pytest.mark.parametrize(
        "malformed_arg",
        [
            ["inference,inference", "auth-key", "auth-value"],
        ],
    )
    def test_duplicate_apis(self, malformed_arg):
        """Test that server handles duplicate APIs gracefully."""
        self._test_server_startup_failure(
            malformed_arg,
            expected_error_pattern="restricted api 'inference' can not be specified in multiple config groups",
        )

    @pytest.mark.parametrize(
        "malformed_arg",
        [
            # API with different auth specs
            [
                ["inference", "auth-key1", "value1"],
                ["inference", "auth-key2", "value2"],
            ],
            # API with same auth specs
            [["inference", "auth-key", "value"], ["inference", "auth-key", "value"]],
            # Multiple APIs with one duplicate
            [
                ["inference", "auth-key1", "value1"],
                ["model-repository", "auth-key2", "value2"],
                ["inference", "auth-key3", "value3"],
            ],
            # All APIs duplicated
            [
                ["inference", "auth-key1", "value1"],
                ["model-repository", "auth-key2", "value2"],
                ["inference", "auth-key3", "value3"],
                ["model-repository", "auth-key4", "value4"],
            ],
        ],
    )
    def test_conflict_configs(self, malformed_arg):
        """Test that server fails when duplicate APIs are specified in multiple arguments."""
        # Test cases where the same API name appears in multiple --openai-restricted-api arguments
        self._test_server_startup_failure(
            malformed_arg,
            expected_error_pattern="restricted api 'inference' can not be specified in multiple config groups",
        )


@pytest.mark.openai
class TestOpenAIServerRestrictedAPIs:
    """Test cases for OpenAI server with restricted APIs functionality."""

    @pytest.fixture(scope="class")
    def server_with_restrictions(self, model_repository, tokenizer_model, backend):
        """Start server with restricted APIs enabled."""
        args = [
            "--model-repository",
            model_repository,
            "--tokenizer",
            tokenizer_model,
            "--backend",
            backend,
            "--openai-restricted-api",
            "inference,model-repository",
            "admin-key",
            "admin-value",
        ]

        with OpenAIServer(args) as openai_server:
            yield openai_server

    @pytest.mark.parametrize(
        "headers, expected_success, description",
        [
            (None, False, "No auth"),
            ({"admin-key": "admin-value"}, True, "Valid auth"),
            ({"admin-key": "wrong-value"}, False, "Invalid auth value"),
            ({"wrong-key": "admin-value"}, False, "Invalid auth key"),
        ],
    )
    def test_restricted_endpoints_with_auth(
        self, server_with_restrictions, model, headers, expected_success, description
    ):
        """Test restricted endpoints with different authentication scenarios."""
        base_url = server_with_restrictions.url_root

        verify_model_repository_endpoints(
            base_url, model, headers, expected_success, description
        )
        verify_inference_endpoints(
            base_url, model, headers, expected_success, description
        )

    def test_unrestricted_endpoints(self, server_with_restrictions):
        """Test that unrestricted endpoints work without authentication."""
        base_url = server_with_restrictions.url_root

        verify_metrics_endpoint(
            base_url, None, expected_success=True, description_prefix="Unrestricted"
        )
        verify_health_endpoint(
            base_url, None, expected_success=True, description_prefix="Unrestricted"
        )


@pytest.mark.openai
class TestOpenAIServerMultipleRestrictions:
    """Test cases for OpenAI server with multiple restriction groups."""

    @pytest.fixture(scope="class")
    def server_multiple_restrictions(self, model_repository, tokenizer_model, backend):
        """Start server with multiple restriction groups."""
        args = [
            "--model-repository",
            model_repository,
            "--tokenizer",
            tokenizer_model,
            "--backend",
            backend,
            "--openai-restricted-api",
            "model-repository",
            "model-key",
            "model-value",
            "--openai-restricted-api",
            "inference",
            "infer-key",
            "infer-value",
        ]

        with OpenAIServer(args) as openai_server:
            yield openai_server

    def test_endpoint_groups_with_correct_auth(
        self, server_multiple_restrictions, model
    ):
        """Test that endpoint groups work with their specific authentication keys."""
        base_url = server_multiple_restrictions.url_root

        # Test model repository endpoints with model key
        model_headers = {"model-key": "model-value"}
        verify_model_repository_endpoints(
            base_url,
            model,
            model_headers,
            expected_success=True,
            description_prefix="Correct model key",
        )

        # Test inference endpoints with inference key
        infer_headers = {"infer-key": "infer-value"}
        verify_inference_endpoints(
            base_url,
            model,
            infer_headers,
            expected_success=True,
            description_prefix="Correct inference key",
        )

    @pytest.mark.parametrize(
        "model_headers, model_description, infer_headers, infer_description",
        [
            (None, "No auth", None, "No auth"),
            (
                {"infer-key": "infer-value"},
                "Model key for inference endpoints",
                {"model-key": "model-value"},
                "Inference key for model endpoints",
            ),
            (
                {"wrong-key": "wrong-value"},
                "Completely wrong key",
                {"wrong-key": "wrong-value"},
                "Completely wrong key",
            ),
        ],
    )
    def test_endpoint_groups_with_wrong_auth(
        self,
        server_multiple_restrictions,
        model,
        model_headers,
        model_description,
        infer_headers,
        infer_description,
    ):
        """Test that endpoint groups are blocked with wrong authentication keys."""
        base_url = server_multiple_restrictions.url_root

        # Test scenarios where wrong auth keys are used
        verify_model_repository_endpoints(
            base_url,
            model,
            model_headers,
            expected_success=False,
            description_prefix=model_description,
        )
        verify_inference_endpoints(
            base_url,
            model,
            infer_headers,
            expected_success=False,
            description_prefix=infer_description,
        )

    def test_unrestricted_endpoints(self, server_multiple_restrictions):
        """Test that unrestricted endpoints work without authentication."""
        base_url = server_multiple_restrictions.url_root

        verify_metrics_endpoint(
            base_url, None, expected_success=True, description_prefix="Unrestricted"
        )
        verify_health_endpoint(
            base_url, None, expected_success=True, description_prefix="Unrestricted"
        )


================================================
FILE: python/openai/tests/test_tool_calling.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import json
import os
from typing import Dict, List, Optional

import openai
import pytest
from openai.types.chat import (
    ChatCompletionMessageParam,
    ChatCompletionMessageToolCall,
    ChatCompletionNamedToolChoiceParam,
    ChatCompletionToolParam,
)

# resources for testing the tool callings
WEATHER_TOOL: ChatCompletionToolParam = {
    "type": "function",
    "function": {
        "name": "get_current_weather",
        "description": "Get the current weather in a given location",
        "parameters": {
            "type": "object",
            "properties": {
                "city": {
                    "type": "string",
                    "description": "The city to find the weather for, "
                    "e.g. 'San Francisco'",
                },
                "state": {
                    "type": "string",
                    "description": "must the two-letter abbreviation for the state "
                    "that the city is in, e.g. 'CA' which would "
                    "mean 'California'",
                },
                "unit": {
                    "type": "string",
                    "description": "The unit to fetch the temperature in",
                    "enum": ["celsius", "fahrenheit"],
                },
            },
            "required": ["city", "state", "unit"],
        },
    },
}

WEATHER_FORECAST_TOOL: ChatCompletionToolParam = {
    "type": "function",
    "function": {
        "name": "get_n_day_weather_forecast",
        "description": "Get an N-day weather forecast",
        "parameters": {
            "type": "object",
            "properties": {
                "city": {
                    "type": "string",
                    "description": "The city to find the weather for, "
                    "e.g. 'San Francisco'",
                },
                "state": {
                    "type": "string",
                    "description": "must the two-letter abbreviation for the state "
                    "that the city is in, e.g. 'CA' which would "
                    "mean 'California'",
                },
                "unit": {
                    "type": "string",
                    "description": "The unit to fetch the temperature in",
                    "enum": ["celsius", "fahrenheit"],
                },
                "num_days": {
                    "type": "integer",
                    "description": "The number of days to forecast",
                },
            },
            "required": ["city", "state", "unit", "num_days"],
        },
    },
}

MESSAGES_ASKING_FOR_TOOLS: List[ChatCompletionMessageParam] = [
    {
        "role": "system",
        "content": "You're a helpful assistant! Answer the users question best you can.",
    },
    {"role": "user", "content": "What is the weather in Dallas, Texas in Fahrenheit?"},
]

MESSAGES_WITH_TOOL_RESPONSE: List[ChatCompletionMessageParam] = [
    {
        "role": "system",
        "content": "You're a helpful assistant! Answer the users question best you can.",
    },
    {"role": "user", "content": "What is the weather in Dallas, Texas in Fahrenheit?"},
    {
        "role": "assistant",
        "tool_calls": [
            {
                "id": "123456789",
                "type": "function",
                "function": {
                    "name": "get_current_weather",
                    "arguments": '{"city": "Dallas", "state": "TX", '
                    '"unit": "fahrenheit"}',
                },
            }
        ],
    },
    {"role": "tool", "tool_call_id": "123456789", "content": "98"},
]

WEATHER_FORECAST_TOOL_CHOICE: ChatCompletionNamedToolChoiceParam = {
    "function": {"name": "get_n_day_weather_forecast"},
    "type": "function",
}


@pytest.mark.openai
class TestAsyncClientToolCalling:
    @pytest.fixture(scope="class")
    def client(self, server):
        return server.get_async_client()

    def validate_tool_calls_present(
        self, tool_calls: Optional[List[ChatCompletionMessageToolCall]], skip_id=False
    ):
        assert tool_calls is not None
        assert len(tool_calls) == 1
        assert tool_calls[0].type == "function"
        assert tool_calls[0].function is not None
        assert isinstance(tool_calls[0].id, str)
        if not skip_id:
            assert len(tool_calls[0].id) >= 9

    def validate_weather_tool_arguments(self, parsed_arguments: Dict):
        assert isinstance(parsed_arguments, Dict)
        assert isinstance(parsed_arguments.get("city"), str)
        assert isinstance(parsed_arguments.get("state"), str)
        assert isinstance(parsed_arguments.get("unit"), str)
        assert parsed_arguments.get("city") == "Dallas"
        assert parsed_arguments.get("state") in ("TX", "Texas")
        assert parsed_arguments.get("unit") == "fahrenheit"

    def validate_weather_forcast_tool_arguments(self, parsed_arguments: Dict):
        assert isinstance(parsed_arguments, Dict)
        assert isinstance(parsed_arguments.get("city"), str)
        assert isinstance(parsed_arguments.get("state"), str)
        assert isinstance(parsed_arguments.get("unit"), str)
        assert isinstance(parsed_arguments.get("num_days"), int)
        assert parsed_arguments.get("city") == "Dallas"
        assert parsed_arguments.get("state") in ("TX", "Texas")
        assert parsed_arguments.get("unit") == "fahrenheit"

    @pytest.mark.asyncio
    async def test_tool_call_and_choice(self, client: openai.AsyncOpenAI, model: str):
        chat_completion = await client.chat.completions.create(
            messages=MESSAGES_ASKING_FOR_TOOLS,
            temperature=0,
            max_completion_tokens=128,
            model=model,
            tools=[WEATHER_TOOL, WEATHER_FORECAST_TOOL],
            logprobs=False,
        )

        choice = chat_completion.choices[0]
        stop_reason = chat_completion.choices[0].finish_reason
        tool_calls = chat_completion.choices[0].message.tool_calls

        # make sure a tool call is present
        self.validate_tool_calls_present(tool_calls)
        assert stop_reason == "tool_calls"

        # make sure the weather tool was called (classic example) with arguments
        assert tool_calls[0].function.name == WEATHER_TOOL["function"]["name"]
        assert tool_calls[0].function.arguments is not None
        assert isinstance(tool_calls[0].function.arguments, str)

        # make sure the arguments parse properly
        parsed_arguments = json.loads(tool_calls[0].function.arguments)
        self.validate_weather_tool_arguments(parsed_arguments)

        function_name: Optional[str] = None
        function_args_str: str = ""
        tool_call_id: Optional[str] = None
        role_name: Optional[str] = None
        finish_reason_count: int = 0

        # make the same request, streaming
        stream = await client.chat.completions.create(
            model=model,
            messages=MESSAGES_ASKING_FOR_TOOLS,
            temperature=0,
            max_completion_tokens=128,
            tools=[WEATHER_TOOL, WEATHER_FORECAST_TOOL],
            logprobs=False,
            stream=True,
        )

        async for chunk in stream:
            assert chunk.choices[0].index == 0

            if chunk.choices[0].finish_reason:
                finish_reason_count += 1
                assert chunk.choices[0].finish_reason == "tool_calls"

            # if a role is being streamed make sure it wasn't already set to
            # something else
            if chunk.choices[0].delta.role:
                assert not role_name or role_name == "assistant"
                role_name = "assistant"

            # if a tool call is streamed make sure there's exactly one
            # (based on the request parameters
            streamed_tool_calls = chunk.choices[0].delta.tool_calls

            if streamed_tool_calls and len(streamed_tool_calls) > 0:
                assert len(streamed_tool_calls) == 1
                tool_call = streamed_tool_calls[0]

                # if a tool call ID is streamed, make sure one hasn't been already
                if tool_call.id:
                    assert not tool_call_id
                    tool_call_id = tool_call.id

                # if parts of the function start being streamed
                if tool_call.function:
                    # if the function name is defined, set it. it should be streamed
                    # IN ENTIRETY, exactly one time.
                    if tool_call.function.name:
                        assert function_name is None
                        assert isinstance(tool_call.function.name, str)
                        function_name = tool_call.function.name
                    if tool_call.function.arguments:
                        assert isinstance(tool_call.function.arguments, str)
                        function_args_str += tool_call.function.arguments

        assert finish_reason_count == 1
        assert role_name == "assistant"
        assert isinstance(tool_call_id, str) and (len(tool_call_id) >= 9)

        # validate the name and arguments
        assert function_name == WEATHER_TOOL["function"]["name"]
        assert function_name == tool_calls[0].function.name
        assert isinstance(function_args_str, str)

        # validate arguments
        streamed_args = json.loads(function_args_str)
        self.validate_weather_tool_arguments(streamed_args)

        # make sure everything matches non-streaming except for ID
        assert function_name == tool_calls[0].function.name
        assert choice.message.role == role_name
        assert choice.message.tool_calls[0].function.name == function_name

        # compare streamed with non-streamed args Dict-wise, not string-wise
        # because character-to-character comparison might not work e.g. the tool
        # call parser adding extra spaces or something like that. we care about the
        # dicts matching not byte-wise match
        assert parsed_arguments == streamed_args

    @pytest.mark.asyncio
    async def test_tool_call_with_reply_response(
        self, client: openai.AsyncOpenAI, model: str, backend: str
    ):
        chat_completion = await client.chat.completions.create(
            messages=MESSAGES_WITH_TOOL_RESPONSE,
            temperature=0,
            max_completion_tokens=128,
            model=model,
            tools=[WEATHER_TOOL, WEATHER_FORECAST_TOOL],
            logprobs=False,
            seed=0,
        )

        choice = chat_completion.choices[0]

        assert choice.finish_reason != "tool_calls"  # "stop"
        assert choice.message.role == "assistant"
        assert choice.message.tool_calls is None or len(choice.message.tool_calls) == 0
        assert choice.message.content is not None

        stream = await client.chat.completions.create(
            messages=MESSAGES_WITH_TOOL_RESPONSE,
            temperature=0,
            max_completion_tokens=128,
            model=model,
            tools=[WEATHER_TOOL, WEATHER_FORECAST_TOOL],
            logprobs=False,
            stream=True,
            seed=0,
        )

        chunks: List[str] = []
        finish_reason_count = 0
        role_sent: bool = False

        async for chunk in stream:
            delta = chunk.choices[0].delta

            if delta.role:
                assert not role_sent
                assert delta.role == "assistant"
                role_sent = True

            if delta.content:
                chunks.append(delta.content)

            if chunk.choices[0].finish_reason is not None:
                finish_reason_count += 1
                assert chunk.choices[0].finish_reason == choice.finish_reason

            assert not delta.tool_calls or len(delta.tool_calls) == 0

        assert role_sent
        assert finish_reason_count == 1
        assert len(chunks)

        # validate if steaming and non-streaming generates the same content
        assert "".join(chunks) == choice.message.content

    @pytest.mark.asyncio
    async def test_tool_call_with_named_tool_choice(
        self, client: openai.AsyncOpenAI, model: str
    ):
        chat_completion = await client.chat.completions.create(
            messages=MESSAGES_ASKING_FOR_TOOLS,
            temperature=0,
            max_completion_tokens=128,
            model=model,
            tool_choice=WEATHER_FORECAST_TOOL_CHOICE,
            tools=[WEATHER_TOOL, WEATHER_FORECAST_TOOL],
            logprobs=False,
        )

        choice = chat_completion.choices[0]
        stop_reason = chat_completion.choices[0].finish_reason
        tool_calls = chat_completion.choices[0].message.tool_calls

        # make sure a tool call is present
        self.validate_tool_calls_present(tool_calls, skip_id=True)
        assert stop_reason != "tool_calls"

        # make sure the weather tool was called (classic example) with arguments
        assert tool_calls[0].function.name == WEATHER_FORECAST_TOOL["function"]["name"]
        assert tool_calls[0].function.arguments is not None
        assert isinstance(tool_calls[0].function.arguments, str)

        # make sure the arguments parse properly
        parsed_arguments = json.loads(tool_calls[0].function.arguments)
        self.validate_weather_forcast_tool_arguments(parsed_arguments)

        function_name: Optional[str] = None
        function_args_str: str = ""
        tool_call_id: Optional[str] = None
        role_name: Optional[str] = None
        finish_reason_count: int = 0

        # make the same request, streaming
        stream = await client.chat.completions.create(
            model=model,
            messages=MESSAGES_ASKING_FOR_TOOLS,
            temperature=0,
            max_completion_tokens=128,
            tool_choice=WEATHER_FORECAST_TOOL_CHOICE,
            tools=[WEATHER_TOOL, WEATHER_FORECAST_TOOL],
            logprobs=False,
            stream=True,
        )

        async for chunk in stream:
            assert chunk.choices[0].index == 0

            if chunk.choices[0].finish_reason:
                finish_reason_count += 1
                assert chunk.choices[0].finish_reason != "tool_calls"

            # if a role is being streamed make sure it wasn't already set to
            # something else
            if chunk.choices[0].delta.role:
                assert not role_name or role_name == "assistant"
                role_name = "assistant"

            # if a tool call is streamed make sure there's exactly one
            # (based on the request parameters
            streamed_tool_calls = chunk.choices[0].delta.tool_calls

            if streamed_tool_calls and len(streamed_tool_calls) > 0:
                assert len(streamed_tool_calls) == 1
                tool_call = streamed_tool_calls[0]

                # if a tool call ID is streamed, make sure one hasn't been already
                if tool_call.id:
                    assert not tool_call_id
                    tool_call_id = tool_call.id

                # if parts of the function start being streamed
                if tool_call.function:
                    # if the function name is defined, set it. it should be streamed
                    # IN ENTIRETY, exactly one time.
                    if tool_call.function.name:
                        assert isinstance(tool_call.function.name, str)
                        function_name = tool_call.function.name
                    if tool_call.function.arguments:
                        assert isinstance(tool_call.function.arguments, str)
                        function_args_str += tool_call.function.arguments

        assert finish_reason_count == 1
        assert role_name == "assistant"

        # validate the name and arguments
        assert function_name == WEATHER_FORECAST_TOOL["function"]["name"]
        assert function_name == tool_calls[0].function.name
        assert isinstance(function_args_str, str)

        # validate arguments
        streamed_args = json.loads(function_args_str)
        self.validate_weather_forcast_tool_arguments(streamed_args)

        # make sure everything matches non-streaming except for ID
        assert function_name == tool_calls[0].function.name
        assert choice.message.role == role_name
        assert choice.message.tool_calls[0].function.name == function_name

    @pytest.mark.asyncio
    async def test_tool_call_with_required_tool_choice(
        self, client: openai.AsyncOpenAI, model: str
    ):
        chat_completion = await client.chat.completions.create(
            messages=MESSAGES_ASKING_FOR_TOOLS,
            temperature=0,
            max_completion_tokens=128,
            model=model,
            tool_choice="required",
            tools=[WEATHER_TOOL, WEATHER_FORECAST_TOOL],
            logprobs=False,
        )

        choice = chat_completion.choices[0]
        stop_reason = chat_completion.choices[0].finish_reason
        tool_calls = chat_completion.choices[0].message.tool_calls

        # make sure a tool call is present
        self.validate_tool_calls_present(tool_calls, skip_id=True)
        assert stop_reason != "tool_calls"

        # make sure the weather tool was called (classic example) with arguments
        assert tool_calls[0].function.name == WEATHER_TOOL["function"]["name"]
        assert tool_calls[0].function.arguments is not None
        assert isinstance(tool_calls[0].function.arguments, str)

        # make sure the arguments parse properly
        parsed_arguments = json.loads(tool_calls[0].function.arguments)
        self.validate_weather_tool_arguments(parsed_arguments)

        function_name: Optional[str] = None
        function_args_str: str = ""
        tool_call_id: Optional[str] = None
        role_name: Optional[str] = None
        finish_reason_count: int = 0

        # make the same request, streaming
        stream = await client.chat.completions.create(
            model=model,
            messages=MESSAGES_ASKING_FOR_TOOLS,
            temperature=0,
            max_completion_tokens=128,
            tool_choice="required",
            tools=[WEATHER_TOOL, WEATHER_FORECAST_TOOL],
            logprobs=False,
            stream=True,
        )

        async for chunk in stream:
            assert chunk.choices[0].index == 0

            if chunk.choices[0].finish_reason:
                finish_reason_count += 1
                assert chunk.choices[0].finish_reason != "tool_calls"

            # if a role is being streamed make sure it wasn't already set to
            # something else
            if chunk.choices[0].delta.role:
                assert not role_name or role_name == "assistant"
                role_name = "assistant"

            # if a tool call is streamed make sure there's exactly one
            # (based on the request parameters
            streamed_tool_calls = chunk.choices[0].delta.tool_calls

            if streamed_tool_calls and len(streamed_tool_calls) > 0:
                assert len(streamed_tool_calls) == 1
                tool_call = streamed_tool_calls[0]

                # if a tool call ID is streamed, make sure one hasn't been already
                if tool_call.id:
                    assert not tool_call_id
                    tool_call_id = tool_call.id

                # if parts of the function start being streamed
                if tool_call.function:
                    # if the function name is defined, set it. it should be streamed
                    # IN ENTIRETY, exactly one time.
                    if tool_call.function.name:
                        assert isinstance(tool_call.function.name, str)
                        function_name = tool_call.function.name
                    if tool_call.function.arguments:
                        assert isinstance(tool_call.function.arguments, str)
                        function_args_str += tool_call.function.arguments

        assert finish_reason_count == 1
        assert role_name == "assistant"

        # validate the name and arguments
        assert function_name == WEATHER_TOOL["function"]["name"]
        assert function_name == tool_calls[0].function.name
        assert isinstance(function_args_str, str)

        # validate arguments
        streamed_args = json.loads(function_args_str)
        self.validate_weather_tool_arguments(streamed_args)

        # make sure everything matches non-streaming except for ID
        assert function_name == tool_calls[0].function.name
        assert choice.message.role == role_name
        assert choice.message.tool_calls[0].function.name == function_name

    @pytest.mark.asyncio
    async def test_inconsistent_tool_choice_and_tools(
        self, client: openai.AsyncOpenAI, model: str
    ):
        # tool choice function but the tools are empty
        with pytest.raises(openai.BadRequestError):
            await client.chat.completions.create(
                messages=MESSAGES_ASKING_FOR_TOOLS,
                temperature=0,
                max_completion_tokens=128,
                model=model,
                tool_choice=WEATHER_FORECAST_TOOL_CHOICE,
                logprobs=False,
            )
        # tool choice function that is not provided in the tools
        with pytest.raises(openai.BadRequestError):
            await client.chat.completions.create(
                messages=MESSAGES_ASKING_FOR_TOOLS,
                temperature=0,
                max_completion_tokens=128,
                model=model,
                tool_choice=WEATHER_FORECAST_TOOL_CHOICE,
                tools=[WEATHER_TOOL],
                logprobs=False,
            )

        # tool choice required but tools is empty
        with pytest.raises(openai.BadRequestError):
            await client.chat.completions.create(
                messages=MESSAGES_ASKING_FOR_TOOLS,
                temperature=0,
                max_completion_tokens=128,
                model=model,
                tool_choice="required",
                tools=[],
                logprobs=False,
            )


================================================
FILE: python/openai/tests/utils.py
================================================
# Copyright 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import subprocess
import sys
import threading
import time
from pathlib import Path
from typing import Dict, List, Optional

import openai
import requests
import tritonserver

sys.path.append(os.path.join(Path(__file__).resolve().parent, "..", "openai_frontend"))
from engine.triton_engine import TritonLLMEngine
from frontend.fastapi_frontend import FastApiFrontend


# TODO: Cleanup, refactor, mock, etc.
def setup_server(model_repository: str):
    server: tritonserver.Server = tritonserver.Server(
        model_repository=model_repository,
        log_verbose=0,
        log_info=True,
        log_warn=True,
        log_error=True,
    ).start(wait_until_ready=True)
    return server


def setup_fastapi_app(
    tokenizer: str,
    server: tritonserver.Server,
    backend: str,
    default_max_tokens: int = 16,
):
    engine: TritonLLMEngine = TritonLLMEngine(
        server=server,
        tokenizer=tokenizer,
        backend=backend,
        default_max_tokens=default_max_tokens,
    )
    frontend: FastApiFrontend = FastApiFrontend(engine=engine)
    return frontend.app


# Heavily inspired by vLLM's test infrastructure
class OpenAIServer:
    API_KEY = "EMPTY"  # Triton's OpenAI server does not need API key
    START_TIMEOUT = 240  # wait for server to start for up to 240 seconds, mistral model takes longer time to start

    def __init__(
        self,
        cli_args: List[str],
        *,
        env_dict: Optional[Dict[str, str]] = None,
    ) -> None:
        # TODO: Incorporate caller's cli_args passed to this instance instead
        self.host = "localhost"
        self.port = 9000

        env = os.environ.copy()
        if env_dict is not None:
            env.update(env_dict)

        this_dir = Path(__file__).resolve().parent
        script_path = this_dir / ".." / "openai_frontend" / "main.py"
        self.proc = subprocess.Popen(
            ["python3", script_path] + cli_args,
            env=env,
            stdout=sys.stdout,
            stderr=subprocess.PIPE,  # Capture stderr
            text=True,
        )
        self.stderr_lines = []
        threading.Thread(target=self._read_stderr, daemon=True).start()
        # Wait until health endpoint is responsive
        self._wait_for_server(
            url=self.url_for("health", "ready"), timeout=self.START_TIMEOUT
        )

    def _read_stderr(self):
        """Read stderr and print to console in real-time. Continues throughout server lifecycle."""
        try:
            if self.proc.stderr:
                for line in iter(self.proc.stderr.readline, ""):
                    self.stderr_lines.append(line.rstrip("\n\r"))
                    sys.stderr.write(line)
                    sys.stderr.flush()
        except (OSError, ValueError, BrokenPipeError) as exc:
            # Ignore expected errors during process shutdown, but log for debugging.
            sys.stderr.write(f"[OpenAIServer] Error while reading stderr: {exc}\n")
            sys.stderr.flush()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self.proc.terminate()
        try:
            wait_secs = 30
            self.proc.wait(wait_secs)
        except subprocess.TimeoutExpired:
            # force kill if needed
            self.proc.kill()

    def _wait_for_server(self, *, url: str, timeout: float):
        start = time.time()
        while True:
            try:
                if requests.get(url).status_code == 200:
                    break
            except Exception as err:
                result = self.proc.poll()
                if result is not None and result != 0:
                    stderr_text = (
                        "\n".join(self.stderr_lines)
                        if self.stderr_lines
                        else "No stderr output"
                    )
                    error = RuntimeError(
                        f"Server exited unexpectedly with return code {result}.\n"
                        f"Stderr output:\n{stderr_text}"
                    )
                    error.stderr_lines = list(self.stderr_lines)
                    raise error from err

                time.sleep(0.5)
                if time.time() - start > timeout:
                    stderr_text = (
                        "\n".join(self.stderr_lines)
                        if self.stderr_lines
                        else "No stderr output"
                    )
                    error = RuntimeError(
                        f"Server failed to start in time.\n"
                        f"Stderr output:\n{stderr_text}"
                    )
                    error.stderr_lines = list(self.stderr_lines)
                    raise error from err

    @property
    def url_root(self) -> str:
        return f"http://{self.host}:{self.port}"

    def url_for(self, *parts: str) -> str:
        return self.url_root + "/" + "/".join(parts)

    def get_client(self):
        return openai.OpenAI(
            base_url=self.url_for("v1"),
            api_key=self.API_KEY,
        )

    def get_async_client(self):
        return openai.AsyncOpenAI(
            base_url=self.url_for("v1"),
            api_key=self.API_KEY,
        )


================================================
FILE: python/openai/tests/vllm_embedding_models/all-MiniLM-L6-v2/1/model.json
================================================
{"model": "sentence-transformers/all-MiniLM-L6-v2", "gpu_memory_utilization": 0.5}


================================================
FILE: python/openai/tests/vllm_embedding_models/all-MiniLM-L6-v2/config.pbtxt
================================================
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

backend: "vllm"
instance_group [{kind: KIND_MODEL}]


================================================
FILE: python/openai/tests/vllm_mistral_models/mistral-nemo-instruct-2407/1/model.json
================================================
{"model": "mistralai/Mistral-Nemo-Instruct-2407", "gpu_memory_utilization": 0.9}

================================================
FILE: python/openai/tests/vllm_mistral_models/mistral-nemo-instruct-2407/config.pbtxt
================================================
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

backend: "vllm"
instance_group [{kind: KIND_MODEL}]


================================================
FILE: python/openai/tests/vllm_models/llama-3.1-8b-instruct/1/model.json
================================================
{"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "gpu_memory_utilization": 0.9}


================================================
FILE: python/openai/tests/vllm_models/llama-3.1-8b-instruct/config.pbtxt
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

backend: "vllm"
instance_group [{kind: KIND_MODEL}]


================================================
FILE: qa/L0_additional_dependency_dirs/test.sh
================================================
#!/bin/bash
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi

if [[ -v WSL_DISTRO_NAME ]] || [[ -v MSYSTEM ]]; then
    TRITON_DIR=${TRITON_DIR:=c:/tritonserver}
    SERVER=${SERVER:=c:/tritonserver/bin/tritonserver.exe}
    BACKEND_DIR=${BACKEND_DIR:=c:/tritonserver/backends}
    MODELDIR=${MODELDIR:=c:/}
    TRITONSERVER_IPADDR=${TRITONSERVER_IPADDR:="localhost"}
else
    echo -e "Test is not currently supported for Linux"
    exit 1
fi

# FIXME: TPRD-244 - Do not use hard-coded dependency paths when TRT models
# are being regularly generated on Windows.
DEPENDENCY_PATH="C:/ci_test_deps/24.07"
STALE_DEPENDENCY_PATH="C:/ci_test_deps/24.05"
CUSTOM_DEPENDENCY_DIR=${MODELDIR}/custom_dependency_location
STALE_DEPENDENCY_DIR=${MODELDIR}/stale_dependency_location
TRT_MODEL_DIR="C:/tmp/24.07_trt_models"
# Unlike the other commands, the mv command requires the path to be fully in
# the UNIX style in order for regex to work properly. We cannot apply this
# uniformly because the command line requires Windows path style.
LOCAL_CI_TEST_DEPS_DIR=${MODELDIR_POSIX}/ci_test_deps

source ../common/util.sh
rm -rf ${CUSTOM_DEPENDENCY_DIR} ${LOCAL_CI_TEST_DEPS_DIR} ${STALE_DEPENDENCY_DIR} ${MODELDIR_POSIX}/models
rm -f ./*.log ./*.out
RET=0;

mkdir ${LOCAL_CI_TEST_DEPS_DIR} ${CUSTOM_DEPENDENCY_DIR} ${STALE_DEPENDENCY_DIR}

# Make a copy of the ci_test_deps directory and move out the TRT dependencies
# A pre-test step will add ${LOCAL_CI_TEST_DEPS_DIR} to the PATH
cp -r ${DEPENDENCY_PATH}/* ${LOCAL_CI_TEST_DEPS_DIR} && mv ${LOCAL_CI_TEST_DEPS_DIR}/nvinfer* ${CUSTOM_DEPENDENCY_DIR}/

cp -r ${STALE_DEPENDENCY_PATH}/nvinfer* ${STALE_DEPENDENCY_DIR}/

mkdir ${MODELDIR_POSIX}/models && \
    cp -r ${TRT_MODEL_DIR}/qa_model_repository/plan_int32_int32_int32 ${MODELDIR_POSIX}/models/plan_int32_int32_int32

function simple_inference_check()
{
    INFER_SUCCESS=1
    set +e
    code=`curl -s -w %{http_code} -o ./curl.out -d'{"inputs":[{"name":"INPUT0","datatype":"INT32","shape":[1,16],"data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},{"name":"INPUT1","datatype":"INT32","shape":[1,16],"data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]}]}' ${TRITONSERVER_IPADDR}:8000/v2/models/plan_int32_int32_int32/infer`
    set -e
    if [ "$code" != "200" ]; then
        cat ./curl.out
        INFER_SUCCESS=0
    fi
}

# Test Case 1: Run server when TRT implicit dependencies are not on path (expect FAIL)
SERVER_LOG="./not_on_path_server.log"
SERVER_ARGS="--model-repository=${MODELDIR}/models --backend-directory=${BACKEND_DIR} --log-verbose=2"
run_server
if [ "$SERVER_PID" != "0" ]; then
    echo -e "\n***\n*** FAILED on line ${LINENO}: unexpected success starting $SERVER\n***"
    kill_server
    RET=1
fi

# Test Case 2: Launch server with additional options to point to custom dependency location (expect SUCCESS)
SERVER_LOG="./custom_dependency_dir_server.log"
SERVER_ARGS="--model-repository=${MODELDIR}/models --backend-directory=${BACKEND_DIR} --log-verbose=2 --backend-config=tensorrt,additional-dependency-dirs=${CUSTOM_DEPENDENCY_DIR};"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    RET=1
fi

simple_inference_check
if [ "${INFER_SUCCESS}" == "0" ]; then
    echo -e "\n***\n*** FAILED on line ${LINENO}: simple inference check failed\n***"
    RET=1
fi

kill_server

# Test Case 3: Launch server with additional options to point to custom dependency location and load model dynamically (expect SUCCESS)
SERVER_LOG="./dynamic_loading_server.log"
SERVER_ARGS="--model-repository=${MODELDIR}/models --backend-directory=${BACKEND_DIR} --log-verbose=2 --backend-config=tensorrt,additional-dependency-dirs=${CUSTOM_DEPENDENCY_DIR}; --model-control-mode=explicit"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    RET=1
fi

code=`curl -s -w %{http_code} -o ./curl.out -X POST ${TRITONSERVER_IPADDR}:8000/v2/repository/models/plan_int32_int32_int32/load`
if [ "$code" != "200" ]; then
    echo -e "\n***\n*** FAILED on line ${LINENO}: Unable to load model: plan_int32_int32_int32\n***"
    cat ./curl.out
    RET=1
fi

simple_inference_check
if [ "${INFER_SUCCESS}" == "0" ]; then
    echo -e "\n***\n*** FAILED on line ${LINENO}: simple inference check failed\n***"
    RET=1
fi

kill_server

# Test Case 4: Run server when pointing to stale TRT dependencies (expect FAIL)
SERVER_LOG="./stale_dependencies_server.log"
SERVER_ARGS="--model-repository=${MODELDIR}/models --backend-directory=${BACKEND_DIR} --log-verbose=2 --backend-config=tensorrt,additional-dependency-dirs=${STALE_DEPENDENCY_DIR};"
run_server
if [ "$SERVER_PID" != "0" ]; then
    echo -e "\n***\n*** FAILED on line ${LINENO}: unexpected success starting $SERVER\n***"
    kill_server
    RET=1
fi

# Test Case 5: [Test ordering] Run server when pointing to stale and correct TRT dependencies (stale first - expect FAIL).
SERVER_LOG="./stale_first_server.log"
SERVER_ARGS="--model-repository=${MODELDIR}/models --backend-directory=${BACKEND_DIR} --log-verbose=2 --backend-config=tensorrt,additional-dependency-dirs=${STALE_DEPENDENCY_DIR};${CUSTOM_DEPENDENCY_DIR};"
run_server
if [ "$SERVER_PID" != "0" ]; then
    echo -e "\n***\n*** FAILED on line ${LINENO}: unexpected success starting $SERVER\n***"
    kill_server
    RET=1
fi

# Test Case 6:  [Test ordering] Run server when pointing to stale and correct TRT dependencies (correct first - expect SUCCESS).
SERVER_LOG="./correct_first_server.log"
SERVER_ARGS="--model-repository=${MODELDIR}/models --backend-directory=${BACKEND_DIR} --log-verbose=2 --backend-config=tensorrt,additional-dependency-dirs=${CUSTOM_DEPENDENCY_DIR};${STALE_DEPENDENCY_DIR};"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    RET=1
fi

simple_inference_check
if [ "${INFER_SUCCESS}" == "0" ]; then
    echo -e "\n***\n*** FAILED on line ${LINENO}: simple inference check failed\n***"
    RET=1
fi

kill_server

ORIGINAL_PATH=$PATH
# Test Case 7: [Test ordering] Run server when correct TRT dependencies exist in environment PATH (expect SUCCESS)
SERVER_LOG="./correct_in_environment.log"
SERVER_ARGS="--model-repository=${MODELDIR}/models --backend-directory=${BACKEND_DIR} --log-verbose=2"
PATH="${ORIGINAL_PATH};${CUSTOM_DEPENDENCY_DIR};"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    RET=1
fi

simple_inference_check
if [ "${INFER_SUCCESS}" == "0" ]; then
    echo -e "\n***\n*** FAILED on line ${LINENO}: simple inference check failed\n***"
    RET=1
fi

PATH=$ORIGINAL_PATH
kill_server

# Test Case 8: [Test ordering] Run server when correct TRT dependencies exist in environment PATH, but user specifies stale additional dependency directory (user input takes priority - expect FAIL)
SERVER_LOG="./correct_in_environment_but_user_adds_stale.log"
SERVER_ARGS="--model-repository=${MODELDIR}/models --backend-directory=${BACKEND_DIR} --log-verbose=2 --backend-config=tensorrt,additional-dependency-dirs=${STALE_DEPENDENCY_DIR};"
PATH="${ORIGINAL_PATH};${CUSTOM_DEPENDENCY_DIR};"
run_server
if [ "$SERVER_PID" != "0" ]; then
    echo -e "\n***\n*** FAILED on line ${LINENO}: unexpected success starting $SERVER\n***"
    kill_server
    RET=1
fi

PATH=$ORIGINAL_PATH

# Test Case 9: [Test ordering] Run server when stale TRT dependencies exist in environment PATH (expect FAIL)
SERVER_LOG="./stale_in_environment.log"
SERVER_ARGS="--model-repository=${MODELDIR}/models --backend-directory=${BACKEND_DIR} --log-verbose=2"
PATH="${ORIGINAL_PATH};${STALE_DEPENDENCY_DIR};"
run_server
if [ "$SERVER_PID" != "0" ]; then
    echo -e "\n***\n*** FAILED on line ${LINENO}: unexpected success starting $SERVER\n***"
    kill_server
    RET=1
fi

PATH=$ORIGINAL_PATH

# Test Case 10: [Test ordering] Run server when stale TRT dependencies exist in environment PATH, but user specifies correct additional dependency directory (user input takes priority - expect SUCCESS)
SERVER_LOG="./stale_in_environment_but_user_adds_correct.log"
SERVER_ARGS="--model-repository=${MODELDIR}/models --backend-directory=${BACKEND_DIR} --log-verbose=2 --backend-config=tensorrt,additional-dependency-dirs=${CUSTOM_DEPENDENCY_DIR};"
PATH="${ORIGINAL_PATH};${STALE_DEPENDENCY_DIR};"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    RET=1
fi

simple_inference_check
if [ "${INFER_SUCCESS}" == "0" ]; then
    echo -e "\n***\n*** FAILED on line ${LINENO}: simple inference check failed\n***"
    RET=1
fi

PATH=$ORIGINAL_PATH
kill_server

# Test Case 11: Incorrect extension usage. User provided path(s) that are not semi-colon separated (expect FAIL).
SERVER_LOG="./incorrect_usage_server.log"
SERVER_ARGS="--model-repository=${MODELDIR}/models --backend-directory=${BACKEND_DIR} --log-verbose=2 --backend-config=tensorrt,additional-dependency-dirs=C:/not_semicolon_terminated"
run_server
if [ "$SERVER_PID" != "0" ]; then
    echo -e "\n***\n*** FAILED on line ${LINENO}: unexpected success starting $SERVER\n***"
    kill_server
    RET=1
fi

if [ $(cat ${SERVER_LOG} | grep "malformed" | wc -l) -eq 0 ]; then
    echo -e "\n***\n*** FAILED on line ${LINENO}: expected error statement not found $SERVER\n***"
    cat $SERVER_LOG
    RET=1
fi

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
else
  echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_async_work_queue/test.sh
================================================
#!/bin/bash
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

TEST_LOG="./async_work_queue.log"
ASYNC_WORK_QUEUE_TEST=./async_work_queue_test

RET=0

export CUDA_VISIBLE_DEVICES=0

rm -f TEST_LOG

set +e
$ASYNC_WORK_QUEUE_TEST >>$TEST_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi
set -e

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    cat $TEST_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_backend_bls/test.sh
================================================
#!/bin/bash
# Copyright 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

export CUDA_VISIBLE_DEVICES=0

TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:="http://github.com/triton-inference-server"}
TRITON_BACKEND_REPO_TAG=${TRITON_BACKEND_REPO_TAG:="main"}
TRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG:="main"}
TRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG:="main"}

SERVER=/opt/tritonserver/bin/tritonserver
source ../common/util.sh

RET=0

# Backend build requires recent version of CMake (FetchContent required)
# Using CMAKE installation instruction from:: https://apt.kitware.com/
apt update -q=2 \
    && apt install -y gpg wget \
    && wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - |  tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null \
    && . /etc/os-release \
    && echo "deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ $UBUNTU_CODENAME main" | tee /etc/apt/sources.list.d/kitware.list >/dev/null \
    && apt-get update -q=2 \
    && apt-get install -y --no-install-recommends cmake=4.0.3* cmake-data=4.0.3* \
            rapidjson-dev
cmake --version

rm -fr *.log ./backend

git clone --single-branch --depth=1 -b $TRITON_BACKEND_REPO_TAG \
    ${TRITON_REPO_ORGANIZATION}/backend.git

(cd backend/examples/backends/bls &&
 mkdir build &&
 cd build &&
 export CMAKE_POLICY_VERSION_MINIMUM=3.5 &&
 cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install \
       -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} \
       -DTRITON_BACKEND_REPO_TAG=${TRITON_BACKEND_REPO_TAG} \
       -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
       -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
       -DWARNINGS_AS_ERRORS:BOOL=OFF \
       .. &&
 make -j4 install)

rm -fr /opt/tritonserver/backends/bls
cp -r backend/examples/backends/bls/build/install/backends/bls /opt/tritonserver/backends/.

SERVER_ARGS="--model-repository=`pwd`/backend/examples/model_repos/bls_models --log-verbose=1"
SERVER_LOG="./inference_server.log"
CLIENT_LOG="./client.log"

mkdir `pwd`/backend/examples/model_repos/bls_models/bls_fp32/1/

# Run the server with all the required models.
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
backend/examples/clients/bls_client >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo "Failed: Client test had a non-zero return code."
    RET=1
fi

grep "PASS" $CLIENT_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** bls_test.py FAILED. \n***"
    cat $CLIENT_LOG
    cat $SERVER_LOG
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_backend_config/test.sh
================================================
#!/bin/bash
# Copyright 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# Parses default-max-batch-size log record
#
# Example log record:
# I0521 02:12:37.402353 161 backend_model.cc:503] "Adding default backend config setting: default-max-batch-size,4
parse_default_max_batch_size() {
    echo $(python3 -c "print('$1'.split(',')[1].strip('\"'))")
}

# Returns backend configuration json
# message from server log file path
#
# Example: config_map = $(get_config_map server.log)
get_config_map() {
    BACKEND_CONFIG_MAP=$(grep "backend configuration:" $1)
    echo $(python3 -c "backend_config='$BACKEND_CONFIG_MAP'.split('] \"backend configuration:\n')[1].rstrip('\"');print(backend_config)")
}

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

rm -rf ./models/
mkdir -p ./models/no_config
cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/onnx_float32_float32_float32/1 ./models/no_config/


SERVER=/opt/tritonserver/bin/tritonserver
SERVER_TIMEOUT=20
source ../common/util.sh

SERVER_LOG_BASE="./inference_server"
rm -f $SERVER_LOG_BASE*
rm -f *.out

COMMON_ARGS="--model-repository=`pwd`/models --strict-model-config=false --log-verbose=1 "

NEGATIVE_PARSE_ARGS=("--backend-config=,default-max-batch-size=3 $COMMON_ARGS" \
                    "--backend-config=default-max-batch-size= $COMMON_ARGS" \
                    "--backend-config=default-max-batch-size $COMMON_ARGS" \
)

POSITIVE_DEFAULT_ARGS=$COMMON_ARGS
POSITIVE_TEST_ARGS=("--backend-config=default-max-batch-size=5 $COMMON_ARGS" \
                    "--backend-config=default-max-batch-size=6 $COMMON_ARGS" \
                    "--backend-config=default-max-batch-size=7 --backend-config=default-max-batch-size=8 $COMMON_ARGS" \
)

# These integers correspond to the expected default-max-batch-size which gets set
# in the POSITIVE_TEST_ARGS
POSITIVE_TEST_ANSWERS=(5 6 8)

RET=0
# Positive tests
SERVER_ARGS=$POSITIVE_DEFAULT_ARGS
SERVER_LOG=$SERVER_LOG_BASE.backend_config_positive_default.log
run_server

if [ "$SERVER_PID" == "0" ]; then
    echo -e "*** FAILED: Server failed to start $SERVER\n"
    RET=1

else
    kill $SERVER_PID
    wait $SERVER_PID

    RESULT_LOG_LINE=$(grep -a "Adding default backend config setting:" $SERVER_LOG)
    if [ "$RESULT_LOG_LINE" != "" ]; then

        # Pick out the logged value of the default-max-batch-size which gets passed into model creation
        RESOLVED_DEFAULT_MAX_BATCH_SIZE=$(parse_default_max_batch_size "${RESULT_LOG_LINE}")

        if [ "$RESOLVED_DEFAULT_MAX_BATCH_SIZE" != "4" ]; then
            echo "*** FAILED: Found default-max-batch-size not equal to the expected default-max-batch-size. Expected: default-max-batch-size,4, Found: $RESOLVED_DEFAULT_MAX_BATCH_SIZE \n"
            RET=1
        fi
    else
        echo "*** FAILED: No log statement stating default max batch size\n"
        RET=1
    fi
fi

for ((i=0; i < ${#POSITIVE_TEST_ARGS[@]}; i++)); do
    SERVER_ARGS=${POSITIVE_TEST_ARGS[$i]}
    SERVER_LOG=$SERVER_LOG_BASE.backend_config_positive_$i.log
    run_server

    if [ "$SERVER_PID" == "0" ]; then
        echo -e "*** FAILED: Server failed to start $SERVER\n"
        RET=1

    else
        kill $SERVER_PID
        wait $SERVER_PID

        RESULT_LOG_LINE=$(grep -a "Found overwritten default setting:" $SERVER_LOG)
        if [ "$RESULT_LOG_LINE" != "" ]; then

            # Pick out the logged value of the default-max-batch-size which gets passed into model creation
            RESOLVED_DEFAULT_MAX_BATCH_SIZE=$(parse_default_max_batch_size "${RESULT_LOG_LINE}")

            if [ "$RESOLVED_DEFAULT_MAX_BATCH_SIZE" != "${POSITIVE_TEST_ANSWERS[$i]}" ]; then
                echo "*** FAILED: Found default-max-batch-size not equal to the expected default-max-batch-size. Expected: ${POSITIVE_TEST_ANSWERS[$i]}, Found: $RESOLVED_DEFAULT_MAX_BATCH_SIZE \n"
                RET=1
            fi
        else
            echo "*** FAILED: No log statement stating default max batch size\n"
            RET=1
        fi
    fi
done

# Negative tests
# Failing because the syntax is incorrect
for ((i=0; i < ${#NEGATIVE_PARSE_ARGS[@]}; i++)); do
    SERVER_ARGS=${NEGATIVE_PARSE_ARGS[$i]}
    SERVER_LOG=$SERVER_LOG_BASE.backend_config_negative_parse$i.log
    run_server

    if [ "$SERVER_PID" == "0" ]; then
        if ! grep -e "--backend-config option format is" $SERVER_LOG; then
            echo -e "*** FAILED: Expected invalid backend config parse message but found other error.\n"
            RET=1
        fi
    else
        echo -e "*** FAILED: Expected server to exit with error, but found running.\n"
        RET=1
        kill $SERVER_PID
        wait $SERVER_PID
    fi
done


#
# Specific backend tests
#

# While inference server is running, save the
# config of the 'no_config' model to the TRIAL
# file.
function save_model_config() {
    CODE=`curl -s -w %{http_code} -o ./$TRIAL.out localhost:8000/v2/models/no_config/config`
    set -e
    if [ "$CODE" != "200" ]; then
        cat $TRIAL.out
        echo -e "\n***\n*** Test Failed\n***"
        RET=1
    fi
}

# Onnxruntime: Batching ON
rm -rf ./models/
mkdir -p ./models/no_config
cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/onnx_float32_float32_float32/1 ./models/no_config/

SERVER_ARGS="--backend-config=onnxruntime,default-max-batch-size=5 $COMMON_ARGS"
SERVER_LOG=$SERVER_LOG_BASE.backend_config_onnxruntime_batch_5.log
run_server

TRIAL=onnxruntime_batching_on
if [ "$SERVER_PID" == "0" ]; then
    echo -e "*** FAILED: Server failed to start $SERVER\n"
    RET=1

else
    save_model_config

    # Assert the max-batch-size is the command line value
    MAX_BATCH_LOG_LINE=$(grep -a "\"max_batch_size\":5" $TRIAL.out)
    if [ "$MAX_BATCH_LOG_LINE" == "" ]; then
        echo "*** FAILED: Expected max batch size to be 5 but found: $MAX_BATCH_LOG_LINE\n"
        RET=1
    fi

    # Assert we are also turning on the dynamic_batcher
    DYNAMIC_BATCHING_LOG_LINE=$(grep -a "Starting dynamic-batcher thread" $SERVER_LOG)
    if [ "$DYNAMIC_BATCHING_LOG_LINE" == "" ]; then
        echo "*** FAILED: Expected dynamic batching to be set in model config but was not found\n"
        RET=1
    fi

    kill $SERVER_PID
    wait $SERVER_PID
fi

# Onnxruntime: Batching OFF
rm -rf ./models/
mkdir -p ./models/no_config
cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/onnx_float32_float32_float32/1 ./models/no_config/

SERVER_ARGS="--backend-config=onnxruntime,default-max-batch-size=0 $COMMON_ARGS"
SERVER_LOG=$SERVER_LOG_BASE.backend_config_onnxruntime_batch_0.log
run_server

TRIAL=onnxruntime_batching_off
if [ "$SERVER_PID" == "0" ]; then
    echo -e "*** FAILED: Server failed to start $SERVER\n"
    RET=1

else
    save_model_config

    # Assert the max-batch-size is 0 in the case batching is supported
    # in the model but not in the config.
    MAX_BATCH_LOG_LINE=$(grep -a "\"max_batch_size\":0" $TRIAL.out)
    if [ "$MAX_BATCH_LOG_LINE" == "" ]; then
        echo "*** FAILED: Expected max batch size to be 0 but found: $MAX_BATCH_LOG_LINE\n"
        RET=1
    fi

    # Assert batching disabled
    if [ "$(grep -a -E '\"dynamic_batching\": \{}' $SERVER_LOG)" != "" ]; then
        echo "*** FAILED: Found dynamic batching in configuration when none expected.\n"
        RET=1
    fi

    kill $SERVER_PID
    wait $SERVER_PID

fi

#
# General backend tests
#

# We want to make sure that backend configurations
# are not lost. For this purpose we are using only onnx backend

rm -rf ./models/
mkdir -p ./models/no_config/
cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/onnx_float32_float32_float32/1 ./models/no_config/

# First getting a baseline for the number of default configs
# added during a server set up
SERVER_ARGS="$COMMON_ARGS"
SERVER_LOG=$SERVER_LOG_BASE.default_configs.log
run_server

if [ "$SERVER_PID" == "0" ]; then
    echo -e "*** FAILED: Server failed to start $SERVER\n"
    RET=1

else
    # Count number of default configs
    BACKEND_CONFIG_MAP=$(get_config_map $SERVER_LOG)
    DEFAULT_CONFIG_COUNT=$(echo $BACKEND_CONFIG_MAP | jq -r | jq '.["cmdline"]' | jq length)
    if [ $DEFAULT_CONFIG_COUNT -lt 4 ]; then
        echo "*** FAILED: Expected number of default configs to be at least 4 but found: $DEFAULT_CONFIG_COUNT\n"
        RET=1
    fi

    kill $SERVER_PID
    wait $SERVER_PID

fi

# Now make sure that when setting specific backend configs
# default ones are not lost.
# Current logic for backend config resolution reads default configs first,
# then specific configs and overrides defaults if needed.
# We would like to make sure that none of configs are lost and
# defaults are properly overridden.
# One of defaultconfigs is `min-compute-capability`. This test
# checks if it is properlly overridden.
MIN_COMPUTE_CAPABILITY=XX
SERVER_ARGS="--backend-config=onnxruntime,min-compute-capability=$MIN_COMPUTE_CAPABILITY $COMMON_ARGS"
SERVER_LOG=$SERVER_LOG_BASE.global_configs.log
run_server

if [ "$SERVER_PID" == "0" ]; then
    echo -e "*** FAILED: Server failed to start $SERVER\n"
    RET=1

else
    # Count number of default configs
    BACKEND_CONFIG_MAP=$(get_config_map $SERVER_LOG)
    CONFIG_VALUE=$(echo $BACKEND_CONFIG_MAP | jq -r | jq '.["cmdline"]' | jq -r '.["min-compute-capability"]')

    if [ $CONFIG_VALUE != $MIN_COMPUTE_CAPABILITY ]; then
        echo "*** FAILED: Expected min-compute-capability config to be $MIN_COMPUTE_CAPABILITY but found: $CONFIG_VALUE\n"
        RET=1
    fi

    kill $SERVER_PID
    wait $SERVER_PID

fi
# Now make sure that specific backend configs are not lost.
SERVER_ARGS="--backend-config=onnxruntime,a=0 --backend-config=onnxruntime,y=0 --backend-config=onnxruntime,z=0 $COMMON_ARGS"
SERVER_LOG=$SERVER_LOG_BASE.specific_configs.log
EXPECTED_CONFIG_COUNT=$(($DEFAULT_CONFIG_COUNT+3))
run_server

if [ "$SERVER_PID" == "0" ]; then
    echo -e "*** FAILED: Server failed to start $SERVER\n"
    RET=1

else
    # Count number of default configs
    BACKEND_CONFIG_MAP=$(get_config_map $SERVER_LOG)
    TOTAL_CONFIG_COUNT=$(echo $BACKEND_CONFIG_MAP | jq -r | jq '.["cmdline"]' | jq 'length')

    if [ $TOTAL_CONFIG_COUNT -ne $EXPECTED_CONFIG_COUNT ]; then
        echo "*** FAILED: Expected number of backend configs to be $EXPECTED_CONFIG_COUNT but found: $TOTAL_CONFIG_COUNT\n"
        RET=1
    fi

    kill $SERVER_PID
    wait $SERVER_PID

fi


# Print test outcome
if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_backend_fastertransformer/test.sh
================================================
#!/bin/bash
# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FASTERTRANSFORMER_BRANCH_TAG=${FASTERTRANSFORMER_BRANCH_TAG:="main"}
FASTERTRANSFORMER_BRANCH=${FASTERTRANSFORMER_BRANCH:="https://github.com/triton-inference-server/fastertransformer_backend.git"}
SERVER_TIMEOUT=600
SERVER_LOG="$PWD/inference_server"
CLIENT_LOG="$PWD/client"

MODEL_DIR=${MODEL_DIR:=$PWD/fastertransformer_backend/all_models/t5/}
TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
SERVER=${TRITON_DIR}/bin/tritonserver
BACKEND_DIR=${TRITON_DIR}/backends
SERVER_ARGS_EXTRA="--exit-timeout-secs=${SERVER_TIMEOUT} --backend-directory=${BACKEND_DIR}"
SERVER_ARGS="--model-repository=${MODEL_DIR} ${SERVER_ARGS_EXTRA}"
source ../common/util.sh

rm -f $SERVER_LOG* $CLIENT_LOG*

RET=0
# install dependencies
apt-get update && \
    apt-get install -y --no-install-recommends python3 python3-pip python3-protobuf
pip3 install --upgrade "numpy<2"

# install client libraries
pip3 install tritonclient[all]

# Clone repo
git clone --single-branch --depth=1 -b ${FASTERTRANSFORMER_BRANCH_TAG} ${FASTERTRANSFORMER_BRANCH}
cd fastertransformer_backend

run_server

if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

python3 tools/issue_request.py tools/requests/sample_request_single_t5.json >$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    RET=1
fi

kill_server

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
else
    cat $SERVER_LOG
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_backend_identity/identity_test.py
================================================
#!/usr/bin/python

# Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import sys
from builtins import range

import numpy as np
import requests as httpreq
import tritonclient.grpc as grpcclient
import tritonclient.http as httpclient
from tritonclient.utils import np_to_triton_dtype

FLAGS = None

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-v",
        "--verbose",
        action="store_true",
        required=False,
        default=False,
        help="Enable verbose output",
    )
    parser.add_argument(
        "-u", "--url", type=str, required=False, help="Inference server URL."
    )
    parser.add_argument(
        "-i",
        "--protocol",
        type=str,
        required=False,
        default="http",
        help='Protocol ("http"/"grpc") used to '
        + 'communicate with inference service. Default is "http".',
    )

    FLAGS = parser.parse_args()
    if (FLAGS.protocol != "http") and (FLAGS.protocol != "grpc"):
        print(
            'unexpected protocol "{}", expects "http" or "grpc"'.format(FLAGS.protocol)
        )
        exit(1)

    client_util = httpclient if FLAGS.protocol == "http" else grpcclient

    if FLAGS.url is None:
        FLAGS.url = "localhost:8000" if FLAGS.protocol == "http" else "localhost:8001"

    # Run async requests to make sure backend handles request batches
    # correctly. We use just HTTP for this since we are not testing the
    # protocol anyway.
    if FLAGS.protocol == "http":
        model_name = "identity_uint32"
        request_parallelism = 4
        shape = [2, 2]
        with client_util.InferenceServerClient(
            FLAGS.url, concurrency=request_parallelism, verbose=FLAGS.verbose
        ) as client:
            input_datas = []
            requests = []
            for i in range(request_parallelism):
                input_data = (16384 * np.random.randn(*shape)).astype(np.uint32)
                input_datas.append(input_data)
                inputs = [
                    client_util.InferInput(
                        "INPUT0", input_data.shape, np_to_triton_dtype(input_data.dtype)
                    )
                ]
                inputs[0].set_data_from_numpy(input_data)
                requests.append(client.async_infer(model_name, inputs))

            for i in range(request_parallelism):
                # Get the result from the initiated asynchronous inference request.
                # Note the call will block till the server responds.
                results = requests[i].get_result()
                print(results)

                output_data = results.as_numpy("OUTPUT0")
                if output_data is None:
                    print("error: expected 'OUTPUT0'")
                    sys.exit(1)

                if not np.array_equal(output_data, input_datas[i]):
                    print(
                        "error: expected output {} to match input {}".format(
                            output_data, input_datas[i]
                        )
                    )
                    sys.exit(1)

            # Make sure the requests ran in parallel.
            stats = client.get_inference_statistics(model_name)
            if (len(stats["model_stats"]) != 1) or (
                stats["model_stats"][0]["name"] != model_name
            ):
                print("error: expected statistics for {}".format(model_name))
                sys.exit(1)

            stat = stats["model_stats"][0]
            if (stat["inference_count"] != 8) or (stat["execution_count"] != 1):
                print(
                    "error: expected execution_count == 1 and inference_count == 8, got {} and {}".format(
                        stat["execution_count"], stat["inference_count"]
                    )
                )
                sys.exit(1)

            # Check metrics to make sure they are reported correctly
            metrics = httpreq.get("http://localhost:8002/metrics")
            print(metrics.text)

            success_str = (
                'nv_inference_request_success{model="identity_uint32",version="1"}'
            )
            infer_count_str = 'nv_inference_count{model="identity_uint32",version="1"}'
            infer_exec_str = (
                'nv_inference_exec_count{model="identity_uint32",version="1"}'
            )
            custom_metric_str = (
                'input_byte_size_counter{model="identity_uint32",version="1"}'
            )

            success_val = None
            infer_count_val = None
            infer_exec_val = None
            custom_metric_val = None
            for line in metrics.text.splitlines():
                if line.startswith(success_str):
                    success_val = float(line[len(success_str) :])
                if line.startswith(infer_count_str):
                    infer_count_val = float(line[len(infer_count_str) :])
                if line.startswith(infer_exec_str):
                    infer_exec_val = float(line[len(infer_exec_str) :])
                if line.startswith(custom_metric_str):
                    custom_metric_val = float(line[len(custom_metric_str) :])

            if success_val != 4:
                print(
                    "error: expected metric {} == 4, got {}".format(
                        success_str, success_val
                    )
                )
                sys.exit(1)
            if infer_count_val != 8:
                print(
                    "error: expected metric {} == 8, got {}".format(
                        infer_count_str, infer_count_val
                    )
                )
                sys.exit(1)
            if infer_exec_val != 1:
                print(
                    "error: expected metric {} == 1, got {}".format(
                        infer_exec_str, infer_exec_val
                    )
                )
                sys.exit(1)
            if custom_metric_val != 64:
                print(
                    "error: expected metric {} == 64, got {}".format(
                        custom_metric_str, custom_metric_val
                    )
                )
                sys.exit(1)

    # Reuse a single client for all sync tests
    with client_util.InferenceServerClient(FLAGS.url, verbose=FLAGS.verbose) as client:
        for model_name, np_dtype, shape in (
            # yapf: disable
            ("identity_fp32", np.float32, [1, 0]),
            ("identity_fp32", np.float32, [1, 5]),
            ("identity_uint32", np.uint32, [4, 0]),
            ("identity_uint32", np.uint32, [8, 5]),
            ("identity_nobatch_int8", np.int8, [0]),
            ("identity_nobatch_int8", np.int8, [7]),
            ("identity_bytes", object, [1, 1]),
            ("identity_bf16", np.float32, [1, 0]),
            ("identity_bf16", np.float32, [1, 5])
        ):
            # yapf: enable
            if np_dtype != object:
                input_data = (16384 * np.random.randn(*shape)).astype(np_dtype)
            else:
                in0 = 16384 * np.ones(shape, dtype="int")
                in0n = np.array([str(x) for x in in0.reshape(in0.size)], dtype=object)
                input_data = in0n.reshape(in0.shape)
            if model_name != "identity_bf16":
                triton_type = np_to_triton_dtype(input_data.dtype)
            else:
                triton_type = "BF16"
            inputs = [client_util.InferInput("INPUT0", input_data.shape, triton_type)]
            inputs[0].set_data_from_numpy(input_data)

            results = client.infer(model_name, inputs)
            print(results)

            # Make sure outputs are expected value
            output_data = results.as_numpy("OUTPUT0")

            if np_dtype == object:
                output_data = np.array(
                    [str(x, encoding="utf-8") for x in output_data.flatten()],
                    dtype=object,
                ).reshape(output_data.shape)

            if output_data is None:
                print("error: expected 'OUTPUT0'")
                sys.exit(1)

            if model_name == "identity_bf16":
                if input_data.shape != output_data.shape:
                    print(
                        "error: expected output shape {} to match input shape {}".format(
                            output_data.shape, input_data.shape
                        )
                    )
                    sys.exit(1)
                for input, output in zip(
                    np.nditer(input_data, flags=["refs_ok", "zerosize_ok"], order="C"),
                    np.nditer(output_data, flags=["refs_ok", "zerosize_ok"], order="C"),
                ):
                    if input.tobytes()[2:4] != output.tobytes()[2:4]:
                        print(
                            "error: expected low-order bits of output {} to match low-order bits of input {}".format(
                                output, input
                            )
                        )
                        sys.exit(1)
                    if output.tobytes()[0:2] != b"\x00\x00":
                        print(
                            "error: expected output {} to have all-zero high-order bits, got {}".format(
                                output, output.tobytes()[0:2]
                            )
                        )
                        sys.exit(1)
            else:
                if not np.array_equal(output_data, input_data):
                    print(
                        "error: expected output {} to match input {}".format(
                            output_data, input_data
                        )
                    )
                    sys.exit(1)

            # Make sure response parameters are correct
            response = results.get_response()
            if FLAGS.protocol == "http":
                params = response["parameters"]
                param0 = params["param0"]
                param1 = params["param1"]
                param2 = params["param2"]
                param3 = params["param3"]
            else:
                params = response.parameters
                param0 = params["param0"].string_param
                param1 = params["param1"].int64_param
                param2 = params["param2"].bool_param
                param3 = params["param3"].double_param

            if param0 != "an example string parameter":
                print("error: expected 'param0' == 'an example string parameter'")
                sys.exit(1)
            if param1 != 42:
                print("error: expected 'param1' == 42")
                sys.exit(1)
            if param2 != False:
                print("error: expected 'param2' == False")
                sys.exit(1)
            if param3 != 123.123:
                print("error: expected 'param3' == 123.123")
                sys.exit(1)


================================================
FILE: qa/L0_backend_identity/test.sh
================================================
#!/bin/bash
# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

export CUDA_VISIBLE_DEVICES=0

CLIENT_PY=./identity_test.py
CLIENT_LOG="./client.log"

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=`pwd`/all_models --log-verbose=1"
SERVER_LOG="./inference_server.log"
source ../common/util.sh

rm -fr *.log ./all_models

cp -r ./models ./all_models
cp -r ./models/identity_fp32 ./all_models/identity_bytes
(cd all_models/identity_bytes && \
          sed -i "s/^name:.*/name: \"identity_bytes\"/" config.pbtxt && \
          sed -i "s/TYPE_FP32/TYPE_STRING/g" config.pbtxt)
cp -r ./models/identity_fp32 ./all_models/identity_nobatch_int8
(cd all_models/identity_nobatch_int8 && \
          sed -i "s/^name:.*/name: \"identity_nobatch_int8\"/" config.pbtxt && \
          sed -i "s/^max_batch_size:.*/max_batch_size: 0/" config.pbtxt && \
          sed -i "s/TYPE_FP32/TYPE_INT8/g" config.pbtxt)
cp -r ./models/identity_fp32 ./all_models/identity_uint32
(cd all_models/identity_uint32 && \
          sed -i "s/^name:.*/name: \"identity_uint32\"/" config.pbtxt && \
          sed -i "s/^max_batch_size:.*/max_batch_size: 8/" config.pbtxt && \
          sed -i "s/TYPE_FP32/TYPE_UINT32/g" config.pbtxt && \
          echo "dynamic_batching { preferred_batch_size: [8], max_queue_delay_microseconds: 3000000 }" >> config.pbtxt)
cp -r ./models/identity_fp32 ./all_models/identity_bf16
(cd all_models/identity_bf16 && \
          sed -i "s/^name:.*/name: \"identity_bf16\"/" config.pbtxt && \
          sed -i "s/TYPE_FP32/TYPE_BF16/g" config.pbtxt)

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

RET=0

for PROTOCOL in http grpc; do
    set +e
    python $CLIENT_PY -i $PROTOCOL -v >>$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        echo "Failed: Client test had a non-zero return code."
        RET=1
    fi
    set -e
done

kill $SERVER_PID
wait $SERVER_PID

# Validate the byte_sizes reported by backend
OLDIFS=$IFS; IFS=','
for i in "byte_size = 0, 8", \
         "byte_size = 7, 2", \
         "byte_size = 16, 6", \
         "byte_size = 20, 2", \
         "byte_size = 160, 2" \
         ; do set -- $i; \
    # $SERVER_LOG is recorded as a binary file. Using -a option
    # to correctly grep the pattern in the server log.
    if [[ $(cat $SERVER_LOG | grep -a $1 | wc -l) -ne $2 ]]; then
        echo -e "\n***\n*** Test Failed $1 $2\n***"
        RET=1
    fi
done
IFS=$OLDIFS

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
else
    cat $SERVER_LOG
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_backend_onnxruntime/gen_add_bf16_onnx_model.py
================================================
#!/usr/bin/env python3
# Copyright 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


# Generates the add_bf16 ONNX model and Triton config.
# Model: element-wise Add in BFLOAT16 (INPUT0 + INPUT1 = OUTPUT), ONNX Runtime backend.
import os

import onnx


def generate_bf16_add_model(models_dir):
    """Generate a simple BFLOAT16 Add model (INPUT0 + INPUT1 = OUTPUT)."""
    model_name = "add_bf16"
    shape = [1]
    onnx_dtype = onnx.TensorProto.BFLOAT16

    add = onnx.helper.make_node("Add", ["INPUT0", "INPUT1"], ["OUTPUT"])

    input0 = onnx.helper.make_tensor_value_info("INPUT0", onnx_dtype, shape)
    input1 = onnx.helper.make_tensor_value_info("INPUT1", onnx_dtype, shape)
    output = onnx.helper.make_tensor_value_info("OUTPUT", onnx_dtype, shape)

    graph_proto = onnx.helper.make_graph(
        [add],
        model_name,
        [input0, input1],
        [output],
    )
    model_def = onnx.helper.make_model(graph_proto, producer_name="triton")
    # Cap IR version for older ONNX Runtime (e.g. max supported 11)
    model_def.ir_version = min(model_def.ir_version, 11)
    # BFLOAT16 support requires opset 13+
    model_def.opset_import[0].version = 13

    model_dir = os.path.join(models_dir, model_name, "1")
    os.makedirs(model_dir, exist_ok=True)
    onnx.save(model_def, os.path.join(model_dir, "model.onnx"))

    # Write config.pbtxt
    config = """platform: "onnxruntime_onnx"
max_batch_size: 0
input [
  {{
    name: "INPUT0"
    data_type: TYPE_BF16
    dims: {shape}
  }},
  {{
    name: "INPUT1"
    data_type: TYPE_BF16
    dims: {shape}
  }}
]
output [
  {{
    name: "OUTPUT"
    data_type: TYPE_BF16
    dims: {shape}
  }}
]
""".format(
        shape=shape
    )

    config_path = os.path.join(models_dir, model_name, "config.pbtxt")
    with open(config_path, "w") as f:
        f.write(config)

    print(f"Generated model '{model_name}' in {models_dir}")


if __name__ == "__main__":
    models_dir = os.path.join(os.getcwd(), "models")
    os.makedirs(models_dir, exist_ok=True)
    generate_bf16_add_model(models_dir)


================================================
FILE: qa/L0_backend_onnxruntime/test.py
================================================
#!/usr/bin/env python3
# Copyright 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import unittest

import numpy as np
import tritonclient.grpc as grpcclient
import tritonclient.http as httpclient


class BFloat16Test(unittest.TestCase):
    def setUp(self):
        self.protocol = os.environ.get("CLIENT_TYPE", "http")
        if self.protocol == "http":
            self.client_ = httpclient.InferenceServerClient("localhost:8000")
        else:
            self.client_ = grpcclient.InferenceServerClient("localhost:8001")
        self.model_name_ = "add_bf16"
        self.shape_ = [1]

    def _infer_bf16(self, input0_data, input1_data):
        """Helper to run BF16 inference and return the output numpy array."""
        if self.protocol == "http":
            input0 = httpclient.InferInput("INPUT0", self.shape_, "BF16")
            input1 = httpclient.InferInput("INPUT1", self.shape_, "BF16")
        else:
            input0 = grpcclient.InferInput("INPUT0", self.shape_, "BF16")
            input1 = grpcclient.InferInput("INPUT1", self.shape_, "BF16")
        input0.set_data_from_numpy(input0_data)
        input1.set_data_from_numpy(input1_data)

        results = self.client_.infer(self.model_name_, [input0, input1])
        return results.as_numpy("OUTPUT")

    def test_bf16_add_variants(self):
        """Run BF16 add across multiple cases: zeros, negatives, large, small, cancellation, and identical."""
        for input0_val, input1_val, expected_val in [
            (0.0, 0.0, 0.0),  # zeros
            (-1.5, 3.5, 2.0),  # negatives / mixed
            (100.0, 200.0, 300.0),  # large
            (1e-2, 1e-2, 2e-2),  # small (near underflow)
            (1.0, -1.0, 0.0),  # cancellation
            (2.0, 2.0, 4.0),  # identical inputs
        ]:
            output = self._infer_bf16(
                np.full(self.shape_, input0_val, dtype=np.float32),
                np.full(self.shape_, input1_val, dtype=np.float32),
            )
            self.assertEqual(output.dtype, np.float32)
            # TODO: BF16 to FP32 conversion loses precision. Remove rtol and atol in TRI-801.
            # BF16 has ~3 decimal digits; use relaxed tol for computed values
            np.testing.assert_allclose(output, expected_val, rtol=1e-2, atol=1e-3)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_backend_onnxruntime/test.sh
================================================
#!/bin/bash
# Copyright 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

export CUDA_VISIBLE_DEVICES=0

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_LOG="./inference_server.log"
CLIENT_LOG="./test.log"
source ../common/util.sh

rm -f *.log
rm -rf models

RET=0

# BFLOAT16 test
# Generate the model
mkdir -p models/add_bf16/1
set +e

pip install onnx==1.20.1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to install onnx dependency\n***"
    exit 1
fi

python gen_add_bf16_onnx_model.py
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to generate BFLOAT16 ONNX model\n***"
    exit 1
fi

set -e

SERVER_ARGS="--model-repository=`pwd`/models"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

for client_type in http grpc; do
    export CLIENT_TYPE=$client_type
    CLIENT_LOG="./test_${client_type}.log"
    python test.py >>$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Failed ($client_type)\n***"
        RET=1
    fi
done
unset CLIENT_TYPE

set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_backend_output_detail/test.sh
================================================
#!/bin/bash
# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "No Repo version detected"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi
export CUDA_VISIBLE_DEVICES=0

rm -f *.log
MODELSDIR=`pwd`/models
rm -fr $MODELSDIR && mkdir -p $MODELSDIR/add_sub/1 && \
    cp  ../python_models/add_sub/config.pbtxt $MODELSDIR/add_sub && \
    cp  ../python_models/add_sub/model.py $MODELSDIR/add_sub/1 && \

source ../common/util.sh

RET=0

TEST_LOG="./backend_output_detail_test.log"
TEST_EXEC=./backend_output_detail_test

set +e
LD_LIBRARY_PATH=/opt/tritonserver/lib:$LD_LIBRARY_PATH $TEST_EXEC >>$TEST_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Backend Output Detail Unit Test Failed\n***"
    RET=1
fi
set -e

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    cat $TEST_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_backend_python/argument_validation/models/argument_validation/1/model.py
================================================
# Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import unittest

import numpy as np
import triton_python_backend_utils as pb_utils


class ArgumentValidationTest(unittest.TestCase):
    def test_infer_request_args(self):
        # Dummy arguments used in the tests.
        inputs = [pb_utils.Tensor("INPUT0", np.asarray([1, 2], dtype=np.int32))]
        model_name = "my_model"
        requested_output_names = ["my_output"]

        #
        # inputs field validation
        #

        # Test list of None as inputs
        with self.assertRaises(pb_utils.TritonModelException) as e:
            pb_utils.InferenceRequest(
                inputs=[None],
                model_name=model_name,
                requested_output_names=requested_output_names,
            )

        # Test None object as list of inputs
        with self.assertRaises(TypeError) as e:
            pb_utils.InferenceRequest(
                inputs=None,
                model_name=model_name,
                requested_output_names=requested_output_names,
            )

        # model_name validation
        with self.assertRaises(TypeError) as e:
            pb_utils.InferenceRequest(
                model_name=None,
                inputs=inputs,
                requested_output_names=requested_output_names,
            )

        #
        # Requested output name validations
        #

        # Test list of None objects as requested_output_names
        with self.assertRaises(TypeError) as e:
            pb_utils.InferenceRequest(
                requested_output_names=[None], inputs=inputs, model_name=model_name
            )

        with self.assertRaises(TypeError) as e:
            pb_utils.InferenceRequest(
                requested_output_names=None, inputs=inputs, model_name=model_name
            )

        # Other arguments validation

        # correlation_id set to None
        with self.assertRaises(TypeError) as e:
            pb_utils.InferenceRequest(
                requested_output_names=requested_output_names,
                inputs=inputs,
                model_name=model_name,
                correlation_id=None,
            )

        # correlation_id set to an integer
        infer_request_test = pb_utils.InferenceRequest(
            requested_output_names=requested_output_names,
            inputs=inputs,
            model_name=model_name,
            correlation_id=5,
        )
        self.assertIsInstance(infer_request_test.correlation_id(), int)
        self.assertEqual(infer_request_test.correlation_id(), 5)

        # correlation_id set to string
        infer_request_test = pb_utils.InferenceRequest(
            requested_output_names=requested_output_names,
            inputs=inputs,
            model_name=model_name,
            correlation_id="test_str_id-5",
        )
        self.assertIsInstance(infer_request_test.correlation_id(), str)
        self.assertEqual(infer_request_test.correlation_id(), "test_str_id-5")

        # correlation_id default
        infer_request_test = pb_utils.InferenceRequest(
            requested_output_names=requested_output_names,
            inputs=inputs,
            model_name=model_name,
        )
        self.assertIsInstance(infer_request_test.correlation_id(), int)
        self.assertEqual(infer_request_test.correlation_id(), 0)

        # request_id set to None
        with self.assertRaises(TypeError) as e:
            pb_utils.InferenceRequest(
                requested_output_names=requested_output_names,
                inputs=inputs,
                model_name=model_name,
                request_id=None,
            )

        # model_version set to None
        with self.assertRaises(TypeError) as e:
            pb_utils.InferenceRequest(
                requested_output_names=requested_output_names,
                inputs=inputs,
                model_name=model_name,
                model_version=None,
            )

        # flags set to None
        with self.assertRaises(TypeError) as e:
            pb_utils.InferenceRequest(
                requested_output_names=requested_output_names,
                inputs=inputs,
                model_name=model_name,
                flags=None,
            )

        # Empty lists should not raise an exception
        pb_utils.InferenceRequest(
            requested_output_names=[], inputs=[], model_name=model_name
        )

    def test_infer_response_args(self):
        outputs = [pb_utils.Tensor("OUTPUT0", np.asarray([1, 2], dtype=np.int32))]

        # Test list of None object as output tensor
        with self.assertRaises(pb_utils.TritonModelException) as e:
            pb_utils.InferenceResponse(output_tensors=[None])

        # Test None as output tensors
        with self.assertRaises(TypeError) as e:
            pb_utils.InferenceResponse(output_tensors=None)

        # This should not raise an exception
        pb_utils.InferenceResponse(output_tensors=[])
        pb_utils.InferenceResponse(outputs)

    def test_tensor_args(self):
        np_array = np.asarray([1, 2], dtype=np.int32)

        # Test None as tensor name
        with self.assertRaises(TypeError) as e:
            pb_utils.Tensor(None, np_array)

        # Test None as Numpy array
        with self.assertRaises(TypeError) as e:
            pb_utils.Tensor("OUTPUT0", None)

        # Test None as dlpack capsule
        with self.assertRaises(pb_utils.TritonModelException) as e:
            pb_utils.Tensor.from_dlpack("OUTPUT0", None)

        # Test empty string as tensor name (from_dlpack)
        with self.assertRaises(pb_utils.TritonModelException) as e:
            pb_utils.Tensor.from_dlpack("", None)

        # Test empty string as tensor name
        with self.assertRaises(TypeError) as e:
            pb_utils.Tensor("", None)

    def test_log_args(self):
        logger = pb_utils.Logger

        # Test None as log level setting
        with self.assertRaises(TypeError) as e:
            logger.log("Invalid Level", None)

        # Test integer as log level setting
        with self.assertRaises(TypeError) as e:
            logger.log("Invalid Level", 1)

        # Test None as log info msg
        with self.assertRaises(TypeError) as e:
            logger.log_info(None)

        # Test None as log warning msg
        with self.assertRaises(TypeError) as e:
            logger.log_warn(None)

        # Test None as log error msg
        with self.assertRaises(TypeError) as e:
            logger.log_error(None)

        # Test None as log verbose msg
        with self.assertRaises(TypeError) as e:
            logger.log_verbose(None)

        # This should not raise an exception
        logger.log("Level unspecified")


class TritonPythonModel:
    """This model tests the Python API arguments to make sure invalid args are
    rejected."""

    def execute(self, requests):
        responses = []
        for _ in requests:
            # Run the unittest and store the results in InferenceResponse.
            test = unittest.main("model", exit=False)
            responses.append(
                pb_utils.InferenceResponse(
                    [
                        pb_utils.Tensor(
                            "OUTPUT0",
                            np.array([test.result.wasSuccessful()], dtype=np.float16),
                        )
                    ]
                )
            )
        return responses


================================================
FILE: qa/L0_backend_python/argument_validation/models/argument_validation/config.pbtxt
================================================
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "argument_validation"
backend: "python"
max_batch_size: 0

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]

instance_group [{ kind: KIND_CPU }]


================================================
FILE: qa/L0_backend_python/argument_validation/test.sh
================================================
#!/bin/bash
# Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

CLIENT_PY=../test_infer_shm_leak.py
CLIENT_LOG="./arg_validation_client.log"
TEST_RESULT_FILE='test_results.txt'
SERVER_ARGS="--model-repository=${MODELDIR}/argument_validation/models --backend-directory=${BACKEND_DIR} --log-verbose=1"
SERVER_LOG="./arg_validation_server.log"

RET=0
source ../../common/util.sh

rm -fr *.log

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    RET=1
fi

set +e
export MODEL_NAME="argument_validation"
python3 -m pytest --junitxml="${MODEL_NAME}.report.xml" $CLIENT_PY >> $CLIENT_LOG 2>&1

if [ $? -ne 0 ]; then
    echo -e "\n***\n*** python_unittest.py FAILED. \n***"
    RET=1
fi
set -e

kill_server

if [ $RET -eq 1 ]; then
    cat $CLIENT_LOG
    cat $SERVER_LOG
    echo -e "\n***\n*** Argument validation test FAILED. \n***"
else
    echo -e "\n***\n*** Argument validation test PASSED. \n***"
fi

exit $RET


================================================
FILE: qa/L0_backend_python/async_execute/concurrency_test.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import time
import unittest

import numpy as np
import tritonclient.grpc as grpcclient


class ConcurrencyTest(unittest.TestCase):
    def setUp(self):
        # Initialize client
        self._triton = grpcclient.InferenceServerClient("localhost:8001")

    def _generate_streaming_callback_and_response_pair(self):
        response = []  # [{"result": result, "error": error}, ...]

        def callback(result, error):
            response.append({"result": result, "error": error})

        return callback, response

    # Helper for testing concurrent execution
    def _concurrent_execute_requests(self, model_name, batch_size, number_of_requests):
        delay_secs = 4
        shape = [batch_size, 1]
        inputs = [grpcclient.InferInput("WAIT_SECONDS", shape, "FP32")]
        inputs[0].set_data_from_numpy(np.full(shape, delay_secs, dtype=np.float32))

        callback, response = self._generate_streaming_callback_and_response_pair()
        self._triton.start_stream(callback)
        for i in range(number_of_requests):
            self._triton.async_stream_infer(model_name, inputs)

        # 2s for sending requests for processing and 2s for returning results.
        wait_secs = 2 + delay_secs + 2
        time.sleep(wait_secs)
        # Ensure the sleep is shorter than sequential processing delay.
        sequential_min_delay = wait_secs * batch_size * number_of_requests
        self.assertLessEqual(wait_secs, sequential_min_delay)

        # If executed sequentially, the results are not available yet, so concurrent
        # execution is observed from seeing the correct responses.
        self.assertEqual(len(response), number_of_requests)
        for res in response:
            self.assertEqual(res["result"].as_numpy("DUMMY_OUT").shape[0], batch_size)
            self.assertIsNone(res["error"])

        self._triton.stop_stream()

    # Test batched requests are executed concurrently
    def test_concurrent_execute_single_request(self):
        self._concurrent_execute_requests(
            model_name="async_execute_decouple", batch_size=4, number_of_requests=1
        )

    # Test multiple requests are executed concurrently
    def test_concurrent_execute_multi_request(self):
        self._concurrent_execute_requests(
            model_name="async_execute_decouple", batch_size=1, number_of_requests=4
        )

    # Test batched requests are executed concurrently via bls
    def test_concurrent_execute_single_request_bls(self):
        self._concurrent_execute_requests(
            model_name="async_execute_decouple_bls", batch_size=4, number_of_requests=1
        )

    # Test multiple requests are executed concurrently via bls
    def test_concurrent_execute_multi_request_bls(self):
        self._concurrent_execute_requests(
            model_name="async_execute_decouple_bls", batch_size=1, number_of_requests=4
        )

    # Test requests with a shorter duration should return first
    def test_concurrent_execute_different_duration(self):
        model_name = "async_execute_decouple"
        callback, response = self._generate_streaming_callback_and_response_pair()
        self._triton.start_stream(callback)

        # Send 2 requests / delays
        shape = [1, 1]
        for delay_secs in [10, 2]:
            inputs = [grpcclient.InferInput("WAIT_SECONDS", shape, "FP32")]
            inputs[0].set_data_from_numpy(np.full(shape, delay_secs, dtype=np.float32))
            self._triton.async_stream_infer(model_name, inputs)
            time.sleep(2)  # leave a gap after each inference
            shape[0] += 1  # batch size to track request id

        # The last request executes for 2 secs, leave an additional 2 secs for sending
        # the request and 2 secs for receiving its response. Since 2 secs has elapsed
        # after sending the request, wait for another 4 secs.
        time.sleep(4)
        # The response of the last request should be available by now, while the first
        # request executes for 10 secs and only 8 secs has elapsed, so its response
        # should not be available by now.
        self.assertEqual(len(response), 1)
        self.assertEqual(response[0]["result"].as_numpy("DUMMY_OUT").shape[0], 2)
        self.assertIsNone(response[0]["error"])

        # The first request executes for 10 secs, leave an additional 2 secs for sending
        # the request and 2 secs for receiving its response. Since 8 secs has elapsed
        # after sending the request, wait for another 6 secs.
        time.sleep(6)
        # The response of the first request should be available by now.
        self.assertEqual(len(response), 2)
        self.assertEqual(response[1]["result"].as_numpy("DUMMY_OUT").shape[0], 1)
        self.assertIsNone(response[1]["error"])

        self._triton.stop_stream()

    # Test model exception handling
    def test_model_raise_exception(self):
        model_name = "async_execute_decouple"
        delay_secs = -1  # model will raise exception
        shape = [1, 1]
        inputs = [grpcclient.InferInput("WAIT_SECONDS", shape, "FP32")]
        inputs[0].set_data_from_numpy(np.full(shape, delay_secs, dtype=np.float32))

        with open(os.environ["SERVER_LOG"]) as f:
            server_log = f.read()
        self.assertNotIn("ValueError: wait_secs cannot be negative", server_log)

        callback, response = self._generate_streaming_callback_and_response_pair()
        self._triton.start_stream(callback)
        self._triton.async_stream_infer(model_name, inputs)
        time.sleep(2)
        self._triton.stop_stream()

        with open(os.environ["SERVER_LOG"]) as f:
            server_log = f.read()
        self.assertIn("ValueError: wait_secs cannot be negative", server_log)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_backend_python/async_execute/test.sh
================================================
#!/bin/bash
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

source ../../common/util.sh

RET=0

#
# Test execution overlapping on the same instance
#
rm -rf models && mkdir models
mkdir -p models/async_execute_decouple/1 && \
    cp ../../python_models/async_execute_decouple/model.py models/async_execute_decouple/1 && \
    cp ../../python_models/async_execute_decouple/config.pbtxt models/async_execute_decouple
mkdir -p models/async_execute_decouple_bls/1 && \
    cp ../../python_models/async_execute_decouple_bls/model.py models/async_execute_decouple_bls/1 && \
    cp ../../python_models/async_execute_decouple_bls/config.pbtxt models/async_execute_decouple_bls

TEST_LOG="concurrency_test.log"
SERVER_LOG="concurrency_test.server.log"
SERVER_ARGS="--model-repository=${MODELDIR}/async_execute/models --backend-directory=${BACKEND_DIR} --log-verbose=1"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
SERVER_LOG=$SERVER_LOG python3 -m pytest --junitxml=concurrency_test.report.xml concurrency_test.py > $TEST_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** async execute concurrency test FAILED\n***"
    cat $TEST_LOG
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 1 ]; then
    echo -e "\n***\n*** Async execute test FAILED\n***"
else
    echo -e "\n***\n*** Async execute test Passed\n***"
fi
exit $RET


================================================
FILE: qa/L0_backend_python/bls/bls_parameters_test.py
================================================
#!/usr/bin/env python3

# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json
import os
import unittest

import numpy as np
import tritonclient.grpc as grpcclient
from tritonclient.utils import np_to_triton_dtype

# By default, find tritonserver on "localhost", but for windows tests
# we overwrite the IP address with the TRITONSERVER_IPADDR envvar
_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")


class TestBlsParameters(unittest.TestCase):
    def test_bls_parameters(self):
        model_name = "bls_parameters"
        shape = [1]
        num_params = 3

        # Based on the num_params specified, the model will generate a JSON response
        # containing all the supported parameter types for num_params times recursively.
        # Make sure the model has at least num_params + 1 instances.
        expected_params = {}
        for i in range(1, num_params + 1):
            expected_params["bool_" + str(i)] = bool(i)
            expected_params["int_" + str(i)] = i
            expected_params["str_" + str(i)] = str(i)

        with grpcclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8001") as client:
            input_data = np.array([num_params], dtype=np.ubyte)
            inputs = [
                grpcclient.InferInput(
                    "NUMBER_PARAMETERS", shape, np_to_triton_dtype(input_data.dtype)
                )
            ]
            inputs[0].set_data_from_numpy(input_data)
            outputs = [grpcclient.InferRequestedOutput("PARAMETERS_AGGREGATED")]
            result = client.infer(model_name, inputs, outputs=outputs)
            params_json = str(
                result.as_numpy("PARAMETERS_AGGREGATED")[0], encoding="utf-8"
            )

        params = json.loads(params_json)
        self.assertEqual(params, expected_params)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_backend_python/bls/test.sh
================================================
#!/bin/bash
# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

CLIENT_PY=../test_infer_shm_leak.py
CLIENT_LOG="./bls_client.log"
TEST_RESULT_FILE='test_results.txt'
source ../../common/util.sh

TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:=http://github.com/triton-inference-server}

RET=0
rm -fr *.log ./models *.txt *.xml

# FIXME: [DLIS-5970] Until Windows supports GPU tensors, only test CPU
if [[ ${TEST_WINDOWS} == 0 ]]; then
    pip3 uninstall -y torch
    pip3 install torch==2.3.1+cu118 -f https://download.pytorch.org/whl/torch_stable.html

    mkdir -p models/bls/1/
    cp ../../python_models/bls/model.py models/bls/1/
    cp ../../python_models/bls/config.pbtxt models/bls

    mkdir -p models/dlpack_add_sub/1/
    cp ../../python_models/dlpack_add_sub/model.py models/dlpack_add_sub/1/
    cp ../../python_models/dlpack_add_sub/config.pbtxt models/dlpack_add_sub

    mkdir -p models/bls_async/1/
    cp ../../python_models/bls_async/model.py models/bls_async/1/
    cp ../../python_models/bls_async/config.pbtxt models/bls_async

    mkdir -p models/bls_memory/1/
    cp ../../python_models/bls_memory/model.py models/bls_memory/1/
    cp ../../python_models/bls_memory/config.pbtxt models/bls_memory

    mkdir -p models/bls_memory_async/1/
    cp ../../python_models/bls_memory_async/model.py models/bls_memory_async/1/
    cp ../../python_models/bls_memory_async/config.pbtxt models/bls_memory_async

    mkdir -p models/add_sub/1/
    cp ../../python_models/add_sub/model.py models/add_sub/1/
    cp ../../python_models/add_sub/config.pbtxt models/add_sub

    mkdir -p models/execute_error/1/
    cp ../../python_models/execute_error/model.py models/execute_error/1/
    cp ../../python_models/execute_error/config.pbtxt models/execute_error

    mkdir -p models/identity_fp32/1/
    cp ../../python_models/identity_fp32/model.py models/identity_fp32/1/
    cp ../../python_models/identity_fp32/config.pbtxt models/identity_fp32

    mkdir -p models/dlpack_identity/1/
    cp ../../python_models/dlpack_identity/model.py models/dlpack_identity/1/
    cp ../../python_models/dlpack_identity/config.pbtxt models/dlpack_identity

    cp -r ${DATADIR}/qa_sequence_implicit_model_repository/onnx_nobatch_sequence_int32/ ./models

    git clone ${TRITON_REPO_ORGANIZATION}/python_backend -b $PYTHON_BACKEND_REPO_TAG
    mkdir -p models/square_int32/1/
    cp python_backend/examples/decoupled/square_model.py models/square_int32/1/model.py
    cp python_backend/examples/decoupled/square_config.pbtxt models/square_int32/config.pbtxt

    mkdir -p models/dlpack_square/1/
    cp ../../python_models/dlpack_square/model.py models/dlpack_square/1/
    cp ../../python_models/dlpack_square/config.pbtxt models/dlpack_square

    mkdir -p models/identity_fp32_timeout/1/
    cp ../../python_models/identity_fp32_timeout/model.py models/identity_fp32_timeout/1/
    cp ../../python_models/identity_fp32_timeout/config.pbtxt models/identity_fp32_timeout

    cp -r ${DATADIR}/qa_model_repository/libtorch_nobatch_float32_float32_float32/ ./models/libtorch_gpu && \
        sed -i 's/libtorch_nobatch_float32_float32_float32/libtorch_gpu/' models/libtorch_gpu/config.pbtxt && \
        echo "instance_group [ { kind: KIND_GPU} ]" >> models/libtorch_gpu/config.pbtxt

    cp -r ${DATADIR}/qa_model_repository/libtorch_nobatch_float32_float32_float32/ ./models/libtorch_cpu && \
        sed -i 's/libtorch_nobatch_float32_float32_float32/libtorch_cpu/' models/libtorch_cpu/config.pbtxt && \
        echo "instance_group [ { kind: KIND_CPU} ]" >> models/libtorch_cpu/config.pbtxt

    # Test with different sizes of CUDA memory pool
    # TODO: Why 256 worked in place of 128, on decoupled data pipeline?
    for CUDA_MEMORY_POOL_SIZE_MB in 64 256 ; do
        CUDA_MEMORY_POOL_SIZE_BYTES=$((CUDA_MEMORY_POOL_SIZE_MB * 1024 * 1024))
        SERVER_ARGS="--model-repository=${MODELDIR}/bls/models --backend-directory=${BACKEND_DIR} --log-verbose=1 --cuda-memory-pool-byte-size=0:${CUDA_MEMORY_POOL_SIZE_BYTES}"
        for TRIAL in non_decoupled decoupled ; do
            export BLS_KIND=${TRIAL}
            SERVER_LOG="./bls_${TRIAL}.${CUDA_MEMORY_POOL_SIZE_MB}.inference_server.log"

            run_server
            if [ "$SERVER_PID" == "0" ]; then
                echo -e "\n***\n*** Failed to start $SERVER\n***"
                cat $SERVER_LOG
                exit 1
            fi

            set +e

            for MODEL_NAME in bls bls_memory bls_memory_async bls_async; do
                export MODEL_NAME=${MODEL_NAME}
                # Run with pytest to capture the return code correctly
                pytest --junitxml="${MODEL_NAME}.${TRIAL}.${CUDA_MEMORY_POOL_SIZE_MB}.report.xml" $CLIENT_PY >> $CLIENT_LOG 2>&1
                EXIT_CODE=$?
                if [ $EXIT_CODE -ne 0 ]; then
                    echo -e "\n***\n*** ${MODEL_NAME} ${BLS_KIND} test FAILED. \n***"
                    RET=$EXIT_CODE
                    cat $SERVER_LOG
                    cat $CLIENT_LOG
                fi
            done

            kill_server

            set -e

            # Only check the timeout value if there is no error since the test
            # may fail before the test_timeout case gets run.
            if [ $RET -eq 0 ]; then
                # Check for bls 'test_timeout' to ensure timeout value is being correctly passed
                if [ `grep -c "Request timeout: 11000000000" $SERVER_LOG` == "0" ]; then
                    echo -e "\n***\n*** BLS timeout value not correctly passed to model: line ${LINENO}\n***"
                    cat $SERVER_LOG
                    RET=1
                fi
            fi

            if [[ $CUDA_MEMORY_POOL_SIZE_MB -eq 256 ]]; then
                if [ `grep -c "Failed to allocate memory from CUDA memory pool" $SERVER_LOG` != "0" ]; then
                    echo -e "\n***\n*** Expected to use CUDA memory pool for all tests when CUDA_MEMORY_POOL_SIZE_MB is 256 MB for 'bls' $BLS_KIND test\n***"
                    cat $SERVER_LOG
                    RET=1
                fi
            fi
        done
    done
fi

SERVER_ARGS="--model-repository=${MODELDIR}/bls/models --backend-directory=${BACKEND_DIR} --log-verbose=1"
# Test error handling when BLS is used in "initialize" or "finalize" function
ERROR_MESSAGE="BLS is only supported during the 'execute' function."

rm -fr ./models
mkdir -p models/bls_init_error/1/
cp ../../python_models/bls_init_error/model.py models/bls_init_error/1/
cp ../../python_models/bls_init_error/config.pbtxt models/bls_init_error
SERVER_LOG="./bls_init_error_server.log"
# This variable is used to print out the correct server log for each sub-test.
SUB_TEST_RET=0

run_server
if [ "$SERVER_PID" != "0" ]; then
    echo -e "*** FAILED: unexpected success starting $SERVER" >> $CLIENT_LOG
    RET=1
    SUB_TEST_RET=1
    kill_server
else
    if grep "$ERROR_MESSAGE" $SERVER_LOG; then
        echo -e "Found \"$ERROR_MESSAGE\"" >> $CLIENT_LOG
    else
        echo -e "Not found \"$ERROR_MESSAGE\"" >> $CLIENT_LOG
        RET=1
        SUB_TEST_RET=1
    fi
fi

if [ $SUB_TEST_RET -eq 1 ]; then
    cat $CLIENT_LOG
    cat $SERVER_LOG
fi

# FIXME: [DLIS-6122] Requires support for model load/unload
# Until we can simulate Ctrl^C bls_finialize_error will not pass.
if [[ ${TEST_WINDOWS} == 0 ]]; then
    rm -fr ./models
    mkdir -p models/bls_finalize_error/1/
    cp ../../python_models/bls_finalize_error/model.py models/bls_finalize_error/1/
    cp ../../python_models/bls_finalize_error/config.pbtxt models/bls_finalize_error/
    SERVER_LOG="./bls_finalize_error_server.log"
    SUB_TEST_RET=0

    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        RET=1
    else
        kill_server

        if grep "$ERROR_MESSAGE" $SERVER_LOG; then
            echo -e "Found \"$ERROR_MESSAGE\"" >> $CLIENT_LOG
        else
            echo -e "Not found \"$ERROR_MESSAGE\"" >> $CLIENT_LOG
            RET=1
            SUB_TEST_RET=1
        fi

        if [ $SUB_TEST_RET -eq 1 ]; then
            cat $CLIENT_LOG
            cat $SERVER_LOG
        fi
    fi

    # Test model loading API with BLS
    SUB_TEST_RET=0
    rm -fr ./models
    mkdir -p models/bls_model_loading/1/
    cp ../../python_models/bls_model_loading/model.py models/bls_model_loading/1/
    cp ../../python_models/bls_model_loading/config.pbtxt models/bls_model_loading/
    cp -fr ${DATADIR}/qa_model_repository/onnx_int32_int32_int32 models/.
    # Make only version 2, 3 is valid version directory
    rm -rf models/onnx_int32_int32_int32/1

    SERVER_LOG="./bls_model_loading_server.log"
    SERVER_ARGS="--model-repository=${MODELDIR}/bls/models --backend-directory=${BACKEND_DIR} --model-control-mode=explicit --log-verbose=1"

    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        RET=1
    else
        export MODEL_NAME='bls_model_loading'

        set +e
        code=`curl -s -w %{http_code} -X POST ${TRITONSERVER_IPADDR}:8000/v2/repository/models/${MODEL_NAME}/load`
        set -e
        if [ "$code" == "400" ]; then
            echo -e "\n***\n*** Failed to load model '${MODEL_NAME}'\n***"
            RET=1
            SUB_TEST_RET=1
        fi

        set +e

        python3 -m pytest --junitxml="${MODEL_NAME}.report.xml" $CLIENT_PY >> $CLIENT_LOG 2>&1
        if [ $? -ne 0 ]; then
            echo -e "\n***\n*** 'bls_model_loading' test FAILED. \n***"
            cat $CLIENT_LOG
            RET=1
            SUB_TEST_RET=1
    fi

        set -e

        kill_server

        if [ $SUB_TEST_RET -eq 1 ]; then
            cat $CLIENT_LOG
            cat $SERVER_LOG
        fi
    fi

    # Test model loading API with BLS warmup
    (cd models/bls_model_loading && \
            echo "model_warmup [{" >> config.pbtxt && \
            echo "    name : \"regular sample\"" >> config.pbtxt && \
            echo "    batch_size: 1" >> config.pbtxt && \
            echo "    inputs {" >> config.pbtxt && \
            echo "        key: \"INPUT0\"" >> config.pbtxt && \
            echo "        value: {" >> config.pbtxt && \
            echo "            data_type: TYPE_FP32" >> config.pbtxt && \
            echo "            dims: 4" >> config.pbtxt && \
            echo "            zero_data: false" >> config.pbtxt && \
            echo "        }" >> config.pbtxt && \
            echo "    }" >> config.pbtxt && \
            echo "    inputs {" >> config.pbtxt && \
            echo "        key: \"INPUT1\"" >> config.pbtxt && \
            echo "        value: {" >> config.pbtxt && \
            echo "            data_type: TYPE_FP32" >> config.pbtxt && \
            echo "            dims: 4" >> config.pbtxt && \
            echo "            zero_data: false" >> config.pbtxt && \
            echo "        }" >> config.pbtxt && \
            echo "    }" >> config.pbtxt && \
            echo "}]" >> config.pbtxt )

    SUB_TEST_RET=0
    SERVER_LOG="./bls_model_loading_server_warmup.log"
    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        RET=1
    else
        set +e
        code=`curl -s -w %{http_code} -X POST ${TRITONSERVER_IPADDR}:8000/v2/repository/models/${MODEL_NAME}/load`
        set -e
        if [ "$code" == "400" ]; then
            echo -e "\n***\n*** Failed to load model '${MODEL_NAME}'\n***"
            RET=1
            SUB_TEST_RET=1
        fi

        kill_server

        if [ $SUB_TEST_RET -eq 1 ]; then
            cat $CLIENT_LOG
            cat $SERVER_LOG
        fi
    fi
fi

# Test BLS parameters
rm -rf params_models && mkdir -p params_models/bls_parameters/1
cp ../../python_models/bls_parameters/model.py ./params_models/bls_parameters/1
cp ../../python_models/bls_parameters/config.pbtxt ./params_models/bls_parameters

TEST_LOG="./bls_parameters.log"
SERVER_LOG="./bls_parameters.server.log"

SERVER_ARGS="--model-repository=${MODELDIR}/bls/params_models --backend-directory=${BACKEND_DIR} --log-verbose=1"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python3 bls_parameters_test.py > $TEST_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** bls_parameters_test.py FAILED. \n***"
    cat $TEST_LOG
    RET=1
fi
set -e

kill_server

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** BLS test PASSED. \n***"
else
    echo -e "\n***\n*** BLS test FAILED. \n***"
fi

exit $RET


================================================
FILE: qa/L0_backend_python/common.sh
================================================
#!/bin/bash
# Copyright 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

get_shm_pages() {
  shm_pages=(`ls /dev/shm`)
  echo ${#shm_pages[@]}
}

install_conda() {
  rm -rf ./miniconda
  file_name="Miniconda3-py312_24.9.2-0-Linux-x86_64.sh"
  wget https://repo.anaconda.com/miniconda/$file_name

  # install miniconda in silent mode
  bash $file_name -p ./miniconda -b

  # activate conda
  eval "$(./miniconda/bin/conda shell.bash hook)"
}

install_build_deps_apt() {
  apt update && apt install software-properties-common rapidjson-dev -y
  # Using CMAKE installation instruction from:: https://apt.kitware.com/
  apt update -q=2 \
    && apt install -y gpg wget \
    && wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - |  tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null \
    && . /etc/os-release \
    && echo "deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ $UBUNTU_CODENAME main" | tee /etc/apt/sources.list.d/kitware.list >/dev/null \
    && apt-get update -q=2 \
    && apt-get install -y --no-install-recommends cmake=4.0.3* cmake-data=4.0.3*
}

install_build_deps_yum() {
  yum install rapidjson-devel -y
}

install_build_deps() {
  if [[ ${TRITON_RHEL} -eq "1" ]] && grep -qE 'rhel|centos|fedora' /etc/os-release; then
    install_build_deps_yum
  else
    install_build_deps_apt
  fi
}

create_conda_env() {
  local python_version=$1
  local env_name=$2
  conda create -n $env_name python=$python_version -y
  conda activate $env_name
  conda install -c conda-forge conda-pack -y
}

create_conda_env_with_specified_path() {
  local python_version=$1
  local env_path=$2
  conda create -p $env_path python=$python_version -y
  conda activate $env_path
  conda install -c conda-forge conda-pack -y
}

create_python_backend_stub() {
  rm -rf python_backend
  git clone ${TRITON_REPO_ORGANIZATION}/python_backend -b $PYTHON_BACKEND_REPO_TAG
  CUDA_PATH=$(readlink -f /usr/local/cuda)
  export CMAKE_POLICY_VERSION_MINIMUM=3.5
  (cd python_backend/ \
      && mkdir builddir \
      && cd builddir \
      && export CMAKE_POLICY_VERSION_MINIMUM=3.5 \
      && cmake \
        -DCMAKE_CUDA_COMPILER=$CUDA_PATH/bin/nvcc \
        -DCMAKE_INCLUDE_PATH:STRING=/usr/include \
        -DCUDAToolkit_ROOT=$CUDA_PATH \
        -DPYBIND11_PYTHON_VERSION=$PY_VERSION \
        -DTRITON_BACKEND_REPO_TAG=$TRITON_BACKEND_REPO_TAG \
        -DTRITON_COMMON_REPO_TAG=$TRITON_COMMON_REPO_TAG \
        -DTRITON_CORE_REPO_TAG=$TRITON_CORE_REPO_TAG \
        -DTRITON_ENABLE_GPU=ON \
        -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} \
        -S ../ \
      && cmake --build . --target triton-python-backend-stub -j18)
}


================================================
FILE: qa/L0_backend_python/custom_metrics/test.sh
================================================
#!/bin/bash
# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

CLIENT_PY=../test_infer_shm_leak.py
CLIENT_LOG="./custom_metrics_client.log"
TEST_RESULT_FILE='test_results.txt'
source ../../common/util.sh

SERVER_ARGS="--model-repository=${MODELDIR}/custom_metrics/models --backend-directory=${BACKEND_DIR} --log-verbose=1"
SERVER_LOG="./custom_metrics_server.log"

RET=0
rm -fr *.log ./models *.txt

mkdir -p models/custom_metrics/1/
cp ../../python_models/custom_metrics/model.py models/custom_metrics/1/
cp ../../python_models/custom_metrics/config.pbtxt models/custom_metrics

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

export MODEL_NAME='custom_metrics'
python3 -m pytest --junitxml="${MODEL_NAME}.report.xml" $CLIENT_PY >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** 'Custom Metrics' test FAILED. \n***"
    cat $CLIENT_LOG
    RET=1
fi

set -e

kill_server


if [ $RET -eq 1 ]; then
    cat $CLIENT_LOG
    cat $SERVER_LOG
    echo -e "\n***\n*** Custom Metrics test FAILED. \n***"
else
    echo -e "\n***\n*** Custom Metrics test PASSED. \n***"
fi

exit $RET


================================================
FILE: qa/L0_backend_python/decoupled/decoupled_test.py
================================================
#!/usr/bin/env python3

# Copyright 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import sys

sys.path.append("../../common")

import queue
import time
import unittest
from functools import partial

import numpy as np
import shm_util
import tritonclient.grpc as grpcclient
from tritonclient.utils import *

# By default, find tritonserver on "localhost", but for windows tests
# we overwrite the IP address with the TRITONSERVER_IPADDR envvar
_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")


def prepare_decoupled_bls_cancel_inputs(input_value, max_sum_value, ignore_cancel):
    input_data = np.array([input_value], dtype=np.int32)
    max_sum_data = np.array([max_sum_value], dtype=np.int32)
    ignore_cancel_data = np.array([ignore_cancel], dtype=np.bool_)
    inputs = [
        grpcclient.InferInput(
            "INPUT",
            input_data.shape,
            np_to_triton_dtype(input_data.dtype),
        ),
        grpcclient.InferInput(
            "MAX_SUM",
            max_sum_data.shape,
            np_to_triton_dtype(max_sum_data.dtype),
        ),
        grpcclient.InferInput(
            "IGNORE_CANCEL",
            ignore_cancel_data.shape,
            np_to_triton_dtype(ignore_cancel_data.dtype),
        ),
    ]
    inputs[0].set_data_from_numpy(input_data)
    inputs[1].set_data_from_numpy(max_sum_data)
    inputs[2].set_data_from_numpy(ignore_cancel_data)

    return inputs


class UserData:
    def __init__(self):
        self._completed_requests = queue.Queue()


def callback(user_data, result, error):
    if error:
        user_data._completed_requests.put(error)
    else:
        user_data._completed_requests.put(result)


class DecoupledTest(unittest.TestCase):
    def setUp(self):
        self._shm_leak_detector = shm_util.ShmLeakDetector()

    def test_decoupled_execute_error(self):
        # The decoupled_execute_error model returns an error for the first
        # request and successfully processes the second request. This is making
        # sure that an error in a single request does not completely fail the
        # batch.

        model_name = "decoupled_execute_error"
        shape = [2, 2]
        number_of_requests = 2
        user_data = UserData()
        with self._shm_leak_detector.Probe() as shm_probe:
            with grpcclient.InferenceServerClient(
                f"{_tritonserver_ipaddr}:8001"
            ) as triton_client:
                triton_client.start_stream(callback=partial(callback, user_data))

                input_datas = []
                for i in range(number_of_requests):
                    input_data = np.random.randn(*shape).astype(np.float32)
                    input_datas.append(input_data)
                    inputs = [
                        grpcclient.InferInput(
                            "IN", input_data.shape, np_to_triton_dtype(input_data.dtype)
                        )
                    ]
                    inputs[0].set_data_from_numpy(input_data)
                    triton_client.async_stream_infer(
                        model_name=model_name, inputs=inputs
                    )

                for i in range(number_of_requests):
                    result = user_data._completed_requests.get()
                    if i == 0:
                        self.assertIs(type(result), InferenceServerException)
                        continue

                    print(result)
                    output_data = result.as_numpy("OUT")
                    self.assertIsNotNone(output_data, "error: expected 'OUT'")
                    self.assertTrue(
                        np.array_equal(output_data, input_datas[i]),
                        "error: expected output {} to match input {}".format(
                            output_data, input_datas[i]
                        ),
                    )

    def test_decoupled_bls(self):
        # Test combinations of BLS and decoupled API in Python backend.
        model_name = "decoupled_bls"
        shape = [1, 2]
        user_data = UserData()
        with self._shm_leak_detector.Probe() as shm_probe:
            with grpcclient.InferenceServerClient(
                f"{_tritonserver_ipaddr}:8001"
            ) as triton_client:
                triton_client.start_stream(callback=partial(callback, user_data))

                input_datas = []
                input_data = np.random.randn(*shape).astype(np.float32)
                input_datas.append(input_data)
                inputs = [
                    grpcclient.InferInput(
                        "IN", input_data.shape, np_to_triton_dtype(input_data.dtype)
                    )
                ]
                inputs[0].set_data_from_numpy(input_data)
                triton_client.async_stream_infer(model_name=model_name, inputs=inputs)

                # Check the results of the decoupled model using BLS
                def check_result(result):
                    # Make sure the result is not an exception
                    self.assertIsNot(type(result), InferenceServerException)

                    output_data = result.as_numpy("OUT")
                    self.assertIsNotNone(output_data, "error: expected 'OUT'")
                    self.assertTrue(
                        np.array_equal(output_data, input_data),
                        "error: expected output {} to match input {}".format(
                            output_data, input_data
                        ),
                    )

                result = user_data._completed_requests.get()
                check_result(result)

    def test_decoupled_bls_stream(self):
        # Test combinations of BLS and decoupled API in Python backend.
        model_name = "decoupled_bls_stream"
        in_values = [4, 2, 0, 1]
        user_data = UserData()
        with self._shm_leak_detector.Probe() as shm_probe:
            with grpcclient.InferenceServerClient(
                f"{_tritonserver_ipaddr}:8001"
            ) as triton_client:
                triton_client.start_stream(callback=partial(callback, user_data))
                for i in range(len(in_values)):
                    input_data = np.array([in_values[i]], dtype=np.int32)
                    inputs = [
                        grpcclient.InferInput(
                            "IN", input_data.shape, np_to_triton_dtype(input_data.dtype)
                        )
                    ]
                    inputs[0].set_data_from_numpy(input_data)
                    triton_client.async_stream_infer(
                        model_name=model_name, inputs=inputs, request_id=str(i)
                    )

                # Retrieve results...
                recv_count = 0
                expected_count = sum(in_values)
                result_dict = {}
                while recv_count < expected_count:
                    data_item = user_data._completed_requests.get()
                    self.assertIsNot(type(data_item), InferenceServerException)

                    this_id = data_item.get_response().id
                    if this_id not in result_dict.keys():
                        result_dict[this_id] = []
                    result_dict[this_id].append((recv_count, data_item))

                    recv_count += 1
                # Validate results...
                for i in range(len(in_values)):
                    this_id = str(i)
                    is_received = False
                    if this_id in result_dict.keys():
                        is_received = True

                    if in_values[i] != 0:
                        self.assertTrue(
                            is_received,
                            "response for request id {} not received".format(this_id),
                        )
                        self.assertEqual(len(result_dict[this_id]), in_values[i])

                        result_list = result_dict[this_id]
                        expected_data = np.array([in_values[i]], dtype=np.int32)
                        for j in range(len(result_list)):
                            this_data = result_list[j][1].as_numpy("OUT")
                            self.assertTrue(
                                np.array_equal(expected_data, this_data),
                                "error: incorrect data: expected {}, got {}".format(
                                    expected_data, this_data
                                ),
                            )
                    else:
                        self.assertFalse(
                            is_received,
                            "received unexpected response for request id {}".format(
                                this_id
                            ),
                        )

    def test_decoupled_return_response_error(self):
        model_name = "decoupled_return_response_error"
        shape = [16]
        user_data = UserData()
        with self._shm_leak_detector.Probe() as shm_probe:
            with grpcclient.InferenceServerClient(
                f"{_tritonserver_ipaddr}:8001"
            ) as client:
                client.start_stream(callback=partial(callback, user_data))
                input_data_0 = np.random.random(shape).astype(np.float32)
                input_data_1 = np.random.random(shape).astype(np.float32)
                inputs = [
                    grpcclient.InferInput(
                        "INPUT0",
                        input_data_0.shape,
                        np_to_triton_dtype(input_data_0.dtype),
                    ),
                    grpcclient.InferInput(
                        "INPUT1",
                        input_data_1.shape,
                        np_to_triton_dtype(input_data_1.dtype),
                    ),
                ]
                inputs[0].set_data_from_numpy(input_data_0)
                inputs[1].set_data_from_numpy(input_data_1)
                client.async_stream_infer(model_name=model_name, inputs=inputs)
                data_item = user_data._completed_requests.get()
                if type(data_item) == InferenceServerException:
                    self.assertIn(
                        "Python model 'decoupled_return_response_error_0_0' is using "
                        "the decoupled mode and the execute function must return "
                        "None.",
                        data_item.message(),
                        "Exception message didn't show up.",
                    )

    def test_decoupled_send_after_close_error(self):
        model_name = "decoupled_send_after_close_error"
        shape = [16]
        user_data = UserData()
        with self._shm_leak_detector.Probe() as shm_probe:
            with grpcclient.InferenceServerClient(
                f"{_tritonserver_ipaddr}:8001"
            ) as client:
                client.start_stream(callback=partial(callback, user_data))
                input_data_0 = np.random.random(shape).astype(np.float32)
                input_data_1 = np.random.random(shape).astype(np.float32)
                inputs = [
                    grpcclient.InferInput(
                        "INPUT0",
                        input_data_0.shape,
                        np_to_triton_dtype(input_data_0.dtype),
                    ),
                    grpcclient.InferInput(
                        "INPUT1",
                        input_data_1.shape,
                        np_to_triton_dtype(input_data_1.dtype),
                    ),
                ]
                inputs[0].set_data_from_numpy(input_data_0)
                inputs[1].set_data_from_numpy(input_data_1)
                client.async_stream_infer(model_name=model_name, inputs=inputs)

                # Because the model has closed the response sender there is no
                # way to deliver the error message to the client. The error
                # will be logged on the server side.
                time.sleep(4)
                self.assertEqual(
                    user_data._completed_requests.qsize(),
                    0,
                    "The completed request size must be zero.",
                )

    def test_decoupled_execute_cancel(self):
        model_name = "execute_cancel"
        log_path = "decoupled_server.log"
        execute_delay = 4.0  # seconds
        shape = [1, 1]
        user_data = UserData()

        with self._shm_leak_detector.Probe() as shm_probe:
            with grpcclient.InferenceServerClient(
                f"{_tritonserver_ipaddr}:8001"
            ) as client:
                client.start_stream(callback=partial(callback, user_data))
                input_data = np.array([[execute_delay]], dtype=np.float32)
                inputs = [
                    grpcclient.InferInput(
                        "EXECUTE_DELAY", shape, np_to_triton_dtype(input_data.dtype)
                    )
                ]
                inputs[0].set_data_from_numpy(input_data)
                client.async_stream_infer(model_name, inputs)
                time.sleep(2)  # model delay for decoupling request and response sender
                time.sleep(2)  # ensure the request is executing
                client.stop_stream(cancel_requests=True)
                time.sleep(2)  # ensure the cancellation is delivered

            self.assertFalse(user_data._completed_requests.empty())
            while not user_data._completed_requests.empty():
                data_item = user_data._completed_requests.get()
                self.assertIsInstance(data_item, InferenceServerException)
                self.assertEqual(data_item.status(), "StatusCode.CANCELLED")

            with open(log_path, mode="r", encoding="utf-8", errors="strict") as f:
                log_text = f.read()
            self.assertIn("[execute_cancel] Request not cancelled at 1.0 s", log_text)
            self.assertIn("[execute_cancel] Request cancelled at ", log_text)

    def test_decoupled_bls_cancel(self):
        model_names = ["decoupled_bls_cancel", "decoupled_bls_async_cancel"]
        input_value = 1
        max_sum_value = 10
        ignore_cancel = False
        user_data = UserData()
        for model_name in model_names:
            with self._shm_leak_detector.Probe() as shm_probe:
                with grpcclient.InferenceServerClient(
                    f"{_tritonserver_ipaddr}:8001"
                ) as client:
                    client.start_stream(callback=partial(callback, user_data))
                    inputs = prepare_decoupled_bls_cancel_inputs(
                        input_value=input_value,
                        max_sum_value=max_sum_value,
                        ignore_cancel=ignore_cancel,
                    )
                    client.async_stream_infer(model_name, inputs)

                    # Check the results of the decoupled model using BLS
                    def check_result(result):
                        # Make sure the result is not an exception
                        self.assertIsNot(type(result), InferenceServerException)
                        is_cancelled = result.as_numpy("IS_CANCELLED")
                        self.assertTrue(
                            is_cancelled[0],
                            "error: expected the request to be cancelled",
                        )

                        max_sum_data = np.array([max_sum_value], dtype=np.int32)
                        sum_data = result.as_numpy("SUM")
                        self.assertIsNotNone(sum_data, "error: expected 'SUM'")
                        self.assertTrue(
                            np.array_equal(sum_data, max_sum_data),
                            "error: expected output {} to match input {}".format(
                                sum_data, max_sum_data
                            ),
                        )

                    result = user_data._completed_requests.get()
                    check_result(result)

    def test_decoupled_bls_ignore_cancel(self):
        model_names = ["decoupled_bls_cancel", "decoupled_bls_async_cancel"]
        input_value = 1
        max_sum_value = 10
        ignore_cancel = True
        user_data = UserData()
        for model_name in model_names:
            with self._shm_leak_detector.Probe() as shm_probe:
                with grpcclient.InferenceServerClient(
                    f"{_tritonserver_ipaddr}:8001"
                ) as client:
                    client.start_stream(callback=partial(callback, user_data))
                    inputs = prepare_decoupled_bls_cancel_inputs(
                        input_value=input_value,
                        max_sum_value=max_sum_value,
                        ignore_cancel=ignore_cancel,
                    )
                    client.async_stream_infer(model_name, inputs)

                    # Check the results of the decoupled model using BLS
                    def check_result(result):
                        # Make sure the result is not an exception
                        self.assertIsNot(type(result), InferenceServerException)
                        is_cancelled = result.as_numpy("IS_CANCELLED")
                        self.assertFalse(
                            is_cancelled[0],
                            "error: expected the request not being cancelled",
                        )

                        max_sum_data = np.array([max_sum_value], dtype=np.int32)
                        sum_data = result.as_numpy("SUM")
                        self.assertIsNotNone(sum_data, "error: expected 'SUM'")
                        self.assertTrue(
                            sum_data > max_sum_data,
                            "error: expected sum_data {} to be greater than max_sum_data {}".format(
                                sum_data, max_sum_data
                            ),
                        )

                    result = user_data._completed_requests.get()
                    check_result(result)

    def test_decoupled_bls_cancel_after_cancellation(self):
        model_name = "decoupled_bls_cancel_after_complete"
        input_value = 1
        max_sum_value = 10
        ignore_cancel = False
        user_data = UserData()
        with self._shm_leak_detector.Probe() as shm_probe:
            with grpcclient.InferenceServerClient(
                f"{_tritonserver_ipaddr}:8001"
            ) as client:
                client.start_stream(callback=partial(callback, user_data))
                inputs = prepare_decoupled_bls_cancel_inputs(
                    input_value=input_value,
                    max_sum_value=max_sum_value,
                    ignore_cancel=ignore_cancel,
                )
                client.async_stream_infer(model_name, inputs)

                # Check the results of the decoupled model using BLS
                def check_result(result):
                    # Make sure the result is not an exception
                    self.assertIsNot(type(result), InferenceServerException)
                    is_cancelled = result.as_numpy("IS_CANCELLED")
                    self.assertTrue(
                        is_cancelled[0], "error: expected the request to be cancelled"
                    )

                    max_sum_data = np.array([max_sum_value], dtype=np.int32)
                    sum_data = result.as_numpy("SUM")
                    self.assertIsNotNone(sum_data, "error: expected 'SUM'")
                    self.assertTrue(
                        np.array_equal(sum_data, max_sum_data),
                        "error: expected output {} to match input {}".format(
                            sum_data, max_sum_data
                        ),
                    )

                result = user_data._completed_requests.get()
                check_result(result)

    def test_decoupled_bls_cancel_after_completion(self):
        model_name = "decoupled_bls_cancel_after_complete"
        input_value = 1
        max_sum_value = 25
        ignore_cancel = False
        user_data = UserData()
        with self._shm_leak_detector.Probe() as shm_probe:
            with grpcclient.InferenceServerClient(
                f"{_tritonserver_ipaddr}:8001"
            ) as client:
                client.start_stream(callback=partial(callback, user_data))
                inputs = prepare_decoupled_bls_cancel_inputs(
                    input_value=input_value,
                    max_sum_value=max_sum_value,
                    ignore_cancel=ignore_cancel,
                )
                client.async_stream_infer(model_name, inputs)

                # Check the results of the decoupled model using BLS
                def check_result(result):
                    # Make sure the result is not an exception
                    self.assertIsNot(type(result), InferenceServerException)
                    is_cancelled = result.as_numpy("IS_CANCELLED")
                    self.assertFalse(
                        is_cancelled[0],
                        "error: expected the request not being cancelled",
                    )

                    max_sum_data = np.array([max_sum_value], dtype=np.int32)
                    sum_data = result.as_numpy("SUM")
                    self.assertIsNotNone(sum_data, "error: expected 'SUM'")
                    self.assertTrue(
                        sum_data < max_sum_data,
                        "error: expected sum_data {} to be lesser than max_sum_data {}".format(
                            sum_data, max_sum_data
                        ),
                    )

                result = user_data._completed_requests.get()
                check_result(result)

    def test_decoupled_raise_exception(self):
        # The decoupled_raise_exception model raises an exception for the request.
        # This test case is making sure that repeated exceptions are properly handled.

        model_name = "decoupled_raise_exception"
        shape = [2, 2]
        number_of_requests = 10
        user_data = UserData()
        with grpcclient.InferenceServerClient(
            f"{_tritonserver_ipaddr}:8001"
        ) as triton_client:
            triton_client.start_stream(callback=partial(callback, user_data))

            input_datas = []
            for i in range(number_of_requests):
                input_data = np.random.randn(*shape).astype(np.float32)
                input_datas.append(input_data)
                inputs = [
                    grpcclient.InferInput(
                        "IN", input_data.shape, np_to_triton_dtype(input_data.dtype)
                    )
                ]
                inputs[0].set_data_from_numpy(input_data)
                triton_client.async_stream_infer(model_name=model_name, inputs=inputs)

            for i in range(number_of_requests):
                result = user_data._completed_requests.get()
                self.assertIs(type(result), InferenceServerException)
                self.assertIn("Intentional Error", result.message())

            self.assertTrue(triton_client.is_model_ready(model_name))


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_backend_python/decoupled/models/decoupled_bls/1/model.py
================================================
# Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json
import sys
import threading
import time

import numpy as np
import torch
import triton_python_backend_utils as pb_utils
from torch.utils.dlpack import from_dlpack, to_dlpack


class TritonPythonModel:
    """This model sends an error message with the first request."""

    def initialize(self, args):
        logger = pb_utils.Logger
        logger.log("Initialize-Specific Msg!", logger.INFO)
        logger.log_info("Initialize-Info Msg!")
        logger.log_warn("Initialize-Warning Msg!")
        logger.log_error("Initialize-Error Msg!")
        # You must parse model_config. JSON string is not parsed here
        self.model_config = model_config = json.loads(args["model_config"])

        using_decoupled = pb_utils.using_decoupled_model_transaction_policy(
            model_config
        )
        if not using_decoupled:
            raise pb_utils.TritonModelException(
                """the model `{}` can generate any number of responses per request,
                enable decoupled transaction policy in model configuration to
                serve this model""".format(
                    args["model_name"]
                )
            )

        # Get OUT configuration
        out_config = pb_utils.get_output_config_by_name(model_config, "OUT")

        # Convert Triton types to numpy types
        self.out_dtype = pb_utils.triton_string_to_numpy(out_config["data_type"])

        self.inflight_thread_count = 0
        self.inflight_thread_count_lck = threading.Lock()
        logger = pb_utils.Logger
        logger.log("Initialize-Specific Msg!", logger.INFO)
        logger.log_info("Initialize-Info Msg!")
        logger.log_warn("Initialize-Warning Msg!")
        logger.log_error("Initialize-Error Msg!")

    def execute(self, requests):
        """This function is called on inference request."""
        logger = pb_utils.Logger
        logger.log("Execute-Specific Msg!", logger.INFO)
        logger.log_info("Execute-Info Msg!")
        logger.log_warn("Execute-Warning Msg!")
        logger.log_error("Execute-Error Msg!")
        # Only generate the error for the first request
        for i, request in enumerate(requests):
            request_input = pb_utils.get_input_tensor_by_name(request, "IN")

            # Sync BLS request
            infer_request = pb_utils.InferenceRequest(
                model_name="identity_fp32",
                requested_output_names=["OUTPUT0"],
                inputs=[pb_utils.Tensor("INPUT0", request_input.as_numpy())],
            )
            infer_response = infer_request.exec()
            if infer_response.has_error():
                raise pb_utils.TritonModelException(
                    f"BLS Response has an error: {infer_response.error().message()}"
                )

            output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
            if np.any(output0.as_numpy() != request_input.as_numpy()):
                raise pb_utils.TritonModelException(
                    f"BLS Request input and BLS response output do not match. {request_input.as_numpy()} != {output0.as_numpy()}"
                )

            thread1 = threading.Thread(
                target=self.response_thread,
                args=(
                    request.get_response_sender(),
                    pb_utils.get_input_tensor_by_name(request, "IN").as_numpy(),
                ),
            )
            thread1.daemon = True
            with self.inflight_thread_count_lck:
                self.inflight_thread_count += 1
            thread1.start()

        logger = pb_utils.Logger
        logger.log("Execute-Specific Msg!", logger.INFO)
        logger.log_info("Execute-Info Msg!")
        logger.log_warn("Execute-Warning Msg!")
        logger.log_error("Execute-Error Msg!")

        return None

    def _get_gpu_bls_outputs(self, input0_pb, input1_pb):
        """
        This function is created to test that the DLPack container works
        properly when the inference response and outputs go out of scope.

        Returns True on success and False on failure.
        """
        logger = pb_utils.Logger
        logger.log("_get_gpu_bls_outputs-Specific Msg!", logger.INFO)
        logger.log_info("_get_gpu_bls_outputs-Info Msg!")
        logger.log_warn("_get_gpu_bls_outputs-Warning Msg!")
        logger.log_error("_get_gpu_bls_outputs-Error Msg!")

        infer_request = pb_utils.InferenceRequest(
            model_name="dlpack_add_sub",
            inputs=[input0_pb, input1_pb],
            requested_output_names=["OUTPUT0", "OUTPUT1"],
        )
        infer_response = infer_request.exec()
        if infer_response.has_error():
            return False

        output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
        output1 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT1")
        if output0 is None or output1 is None:
            return False

        # When one of the inputs is in GPU the output returned by the model must
        # be in GPU, otherwise the outputs will be in CPU.
        if not input0_pb.is_cpu() or not input1_pb.is_cpu():
            if output0.is_cpu() or output1.is_cpu():
                return False
        else:
            if (not output0.is_cpu()) or (not output1.is_cpu()):
                return False

        # Make sure that the reference count is increased by one when DLPack
        # representation is created.
        rc_before_dlpack_output0 = sys.getrefcount(output0)
        rc_before_dlpack_output1 = sys.getrefcount(output1)

        output0_dlpack = output0.to_dlpack()
        output1_dlpack = output1.to_dlpack()

        rc_after_dlpack_output0 = sys.getrefcount(output0)
        rc_after_dlpack_output1 = sys.getrefcount(output1)

        if rc_after_dlpack_output0 - rc_before_dlpack_output0 != 1:
            return False

        if rc_after_dlpack_output1 - rc_before_dlpack_output1 != 1:
            return False

        # Make sure that reference count decreases after destroying the DLPack
        output0_dlpack = None
        output1_dlpack = None
        rc_after_del_dlpack_output0 = sys.getrefcount(output0)
        rc_after_del_dlpack_output1 = sys.getrefcount(output1)
        if rc_after_del_dlpack_output0 - rc_after_dlpack_output0 != -1:
            return False

        if rc_after_del_dlpack_output1 - rc_after_dlpack_output1 != -1:
            return False

        return output0.to_dlpack(), output1.to_dlpack()

    def _test_gpu_bls_add_sub(self, is_input0_gpu, is_input1_gpu):
        logger = pb_utils.Logger
        logger.log("_test_gpu_bls_add_sub-Specific Msg!", logger.INFO)
        logger.log_info("_test_gpu_bls_add_sub-Info Msg!")
        logger.log_warn("_test_gpu_bls_add_sub-Warning Msg!")
        logger.log_error("_test_gpu_bls_add_sub-Error Msg!")

        input0 = torch.rand(16)
        input1 = torch.rand(16)

        if is_input0_gpu:
            input0 = input0.to("cuda")

        if is_input1_gpu:
            input1 = input1.to("cuda")

        input0_pb = pb_utils.Tensor.from_dlpack("INPUT0", to_dlpack(input0))
        input1_pb = pb_utils.Tensor.from_dlpack("INPUT1", to_dlpack(input1))
        gpu_bls_return = self._get_gpu_bls_outputs(input0_pb, input1_pb)
        if gpu_bls_return:
            output0_dlpack, output1_dlpack = gpu_bls_return
        else:
            return False

        expected_output_0 = from_dlpack(input0_pb.to_dlpack()).to("cpu") + from_dlpack(
            input1_pb.to_dlpack()
        ).to("cpu")
        expected_output_1 = from_dlpack(input0_pb.to_dlpack()).to("cpu") - from_dlpack(
            input1_pb.to_dlpack()
        ).to("cpu")

        output0_matches = torch.all(
            expected_output_0 == from_dlpack(output0_dlpack).to("cpu")
        )
        output1_matches = torch.all(
            expected_output_1 == from_dlpack(output1_dlpack).to("cpu")
        )
        if not output0_matches or not output1_matches:
            return False

        return True

    def execute_gpu_bls(self):
        logger = pb_utils.Logger
        logger.log("execute_gpu_bls-Specific Msg!", logger.INFO)
        logger.log_info("execute_gpu_bls-Info Msg!")
        logger.log_warn("execute_gpu_bls-Warning Msg!")
        logger.log_error("execute_gpu_bls-Error Msg!")
        for input0_device in [True, False]:
            for input1_device in [True, False]:
                test_status = self._test_gpu_bls_add_sub(input0_device, input1_device)
                if not test_status:
                    return False

        return True

    def response_thread(self, response_sender, in_input):
        # The response_sender is used to send response(s) associated with the
        # corresponding request.
        # Sleep 5 seconds to make sure the main thread has exited.
        logger = pb_utils.Logger
        logger.log("response_thread-Specific Msg!", logger.INFO)
        logger.log_info("response_thread-Info Msg!")
        logger.log_warn("response_thread-Warning Msg!")
        logger.log_error("response_thread-Error Msg!")
        time.sleep(5)

        # FIXME: [DLIS-5970] Until Windows supports GPU tensors, only test CPU
        if sys.platform != "win32":
            status = self.execute_gpu_bls()
        else:
            status = True

        if not status:
            infer_response = pb_utils.InferenceResponse(error="GPU BLS test failed.")
            response_sender.send(infer_response)
        else:
            in_value = in_input
            infer_request = pb_utils.InferenceRequest(
                model_name="identity_fp32",
                requested_output_names=["OUTPUT0"],
                inputs=[pb_utils.Tensor("INPUT0", in_input)],
            )
            infer_response = infer_request.exec()
            output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
            if infer_response.has_error():
                response = pb_utils.InferenceResponse(
                    error=infer_response.error().message()
                )
                response_sender.send(
                    response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
                )
            elif np.any(in_input != output0.as_numpy()):
                error_message = (
                    "BLS Request input and BLS response output do not match."
                    f" {in_value} != {output0.as_numpy()}"
                )
                response = pb_utils.InferenceResponse(error=error_message)
                response_sender.send(
                    response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
                )
            else:
                output_tensors = [pb_utils.Tensor("OUT", in_value)]
                response = pb_utils.InferenceResponse(output_tensors=output_tensors)
                response_sender.send(
                    response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
                )

        with self.inflight_thread_count_lck:
            self.inflight_thread_count -= 1
        logger.log("response_thread-Specific Msg!", logger.INFO)
        logger.log_info("response_thread-Info Msg!")
        logger.log_warn("response_thread-Warning Msg!")
        logger.log_error("response_thread-Error Msg!")

    def finalize(self):
        """`finalize` is called only once when the model is being unloaded.
        Implementing `finalize` function is OPTIONAL. This function allows
        the model to perform any necessary clean ups before exit.
        """
        logger = pb_utils.Logger
        logger.log_info("Finalize invoked")

        inflight_threads = True
        while inflight_threads:
            with self.inflight_thread_count_lck:
                inflight_threads = self.inflight_thread_count != 0
            if inflight_threads:
                time.sleep(0.1)

        logger.log_info("Finalize complete...")


================================================
FILE: qa/L0_backend_python/decoupled/models/decoupled_bls/config.pbtxt
================================================
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "decoupled_bls"
backend: "python"
max_batch_size: 64

model_transaction_policy {
  decoupled: True
}
input [
  {
    name: "IN"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

output [
  {
    name: "OUT"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]


================================================
FILE: qa/L0_backend_python/decoupled/models/decoupled_bls_async_cancel/1/model.py
================================================
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import asyncio

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    """
    This model sends a decoupled bls inference request to 'response_sender_until_cancelled'
    model, and sums up its responses.
    Once the MAX_SUM is reached, the model will call the response iterator's
    cancel() method to cancel the response stream.
    If the IGNORE_CANCEL is set to True, the 'response_sender_until_cancelled' model will not hornor
    the request cancellation and keep sending the output to the model.
    The number of total responses should not reach MAX_RESPONSE_COUNT.
    """

    async def execute(self, requests):
        max_sum = (
            pb_utils.get_input_tensor_by_name(requests[0], "MAX_SUM").as_numpy().flat[0]
        )
        input = pb_utils.get_input_tensor_by_name(requests[0], "INPUT")
        ignore_cancel = pb_utils.get_input_tensor_by_name(requests[0], "IGNORE_CANCEL")
        delay = pb_utils.Tensor("DELAY", np.array([50], dtype=np.int32))
        max_response_count = pb_utils.Tensor(
            "MAX_RESPONSE_COUNT", np.array([20], dtype=np.int32)
        )

        infer_request = pb_utils.InferenceRequest(
            model_name="response_sender_until_cancelled",
            inputs=[input, max_response_count, delay, ignore_cancel],
            requested_output_names=["OUTPUT"],
        )

        response_stream = await infer_request.async_exec(decoupled=True)

        is_cancelled = False
        error = None
        response_sum = 0
        for infer_response in response_stream:
            if infer_response.has_error():
                if infer_response.error().code() == pb_utils.TritonError.CANCELLED:
                    is_cancelled = True
                else:
                    error = infer_response.error()
                break

            out = pb_utils.get_output_tensor_by_name(
                infer_response, "OUTPUT"
            ).as_numpy()[0]

            response_sum += out
            if response_sum >= max_sum:
                response_stream.cancel()

        responses = [
            pb_utils.InferenceResponse(
                output_tensors=[
                    pb_utils.Tensor("SUM", np.array([response_sum], dtype=np.int32)),
                    pb_utils.Tensor(
                        "IS_CANCELLED", np.array([is_cancelled], dtype=np.bool_)
                    ),
                ],
                error=error,
            )
        ]

        return responses


================================================
FILE: qa/L0_backend_python/decoupled/models/decoupled_bls_async_cancel/config.pbtxt
================================================
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "decoupled_bls_async_cancel"
backend: "python"

input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  },
  {
    name: "MAX_SUM"
    data_type: TYPE_INT32
    dims: [ 1 ]
  },
  {
    name: "IGNORE_CANCEL"
    data_type: TYPE_BOOL
    dims: [ 1 ]
  }
]
output [
  {
    name: "SUM"
    data_type: TYPE_INT32
    dims: [ 1 ]
  },
  {
    name: "IS_CANCELLED"
    data_type: TYPE_BOOL
    dims: [ 1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]


================================================
FILE: qa/L0_backend_python/decoupled/models/decoupled_bls_cancel/1/model.py
================================================
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    """
    This model sends a decoupled bls inference request to 'response_sender_until_cancelled'
    model, and sums up its responses.
    Once the MAX_SUM is reached, the model will call the response iterator's
    cancel() method to cancel the response stream.
    If the IGNORE_CANCEL is set to True, the 'response_sender_until_cancelled' model will not hornor
    the request cancellation and keep sending the output to the model.
    The number of total responses should not reach MAX_RESPONSE_COUNT.
    """

    def execute(self, requests):
        max_sum = (
            pb_utils.get_input_tensor_by_name(requests[0], "MAX_SUM").as_numpy().flat[0]
        )
        input = pb_utils.get_input_tensor_by_name(requests[0], "INPUT")
        ignore_cancel = pb_utils.get_input_tensor_by_name(requests[0], "IGNORE_CANCEL")
        delay = pb_utils.Tensor("DELAY", np.array([50], dtype=np.int32))
        max_response_count = pb_utils.Tensor(
            "MAX_RESPONSE_COUNT", np.array([20], dtype=np.int32)
        )

        infer_request = pb_utils.InferenceRequest(
            model_name="response_sender_until_cancelled",
            inputs=[input, max_response_count, delay, ignore_cancel],
            requested_output_names=["OUTPUT"],
        )

        response_stream = infer_request.exec(decoupled=True)

        is_cancelled = False
        error = None
        response_sum = 0
        for infer_response in response_stream:
            if infer_response.has_error():
                if infer_response.error().code() == pb_utils.TritonError.CANCELLED:
                    is_cancelled = True
                else:
                    error = infer_response.error()
                break

            out = pb_utils.get_output_tensor_by_name(
                infer_response, "OUTPUT"
            ).as_numpy()[0]

            response_sum += out
            if response_sum >= max_sum:
                response_stream.cancel()

        responses = [
            pb_utils.InferenceResponse(
                output_tensors=[
                    pb_utils.Tensor("SUM", np.array([response_sum], dtype=np.int32)),
                    pb_utils.Tensor(
                        "IS_CANCELLED", np.array([is_cancelled], dtype=np.bool_)
                    ),
                ],
                error=error,
            )
        ]

        return responses


================================================
FILE: qa/L0_backend_python/decoupled/models/decoupled_bls_cancel/config.pbtxt
================================================
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "decoupled_bls_cancel"
backend: "python"

input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  },
  {
    name: "MAX_SUM"
    data_type: TYPE_INT32
    dims: [ 1 ]
  },
  {
    name: "IGNORE_CANCEL"
    data_type: TYPE_BOOL
    dims: [ 1 ]
  }
]
output [
  {
    name: "SUM"
    data_type: TYPE_INT32
    dims: [ 1 ]
  },
  {
    name: "IS_CANCELLED"
    data_type: TYPE_BOOL
    dims: [ 1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]


================================================
FILE: qa/L0_backend_python/decoupled/models/decoupled_bls_cancel_after_complete/1/model.py
================================================
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import asyncio

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    """
    This model sends a decoupled bls inference request to 'response_sender_until_cancelled'
    model, and sums up its responses.
    Once the MAX_SUM is reached, the model will call the response iterator's
    cancel() method to cancel the response stream.
    If the IGNORE_CANCEL is set to True, the 'response_sender_until_cancelled' model will not hornor
    the request cancellation and keep sending the output to the model.
    The number of total responses should not reach MAX_RESPONSE_COUNT.
    """

    async def execute(self, requests):
        max_sum = (
            pb_utils.get_input_tensor_by_name(requests[0], "MAX_SUM").as_numpy().flat[0]
        )
        input = pb_utils.get_input_tensor_by_name(requests[0], "INPUT")
        ignore_cancel = pb_utils.get_input_tensor_by_name(requests[0], "IGNORE_CANCEL")
        delay = pb_utils.Tensor("DELAY", np.array([50], dtype=np.int32))
        max_response_count = pb_utils.Tensor(
            "MAX_RESPONSE_COUNT", np.array([20], dtype=np.int32)
        )

        infer_request = pb_utils.InferenceRequest(
            model_name="response_sender_until_cancelled",
            inputs=[input, max_response_count, delay, ignore_cancel],
            requested_output_names=["OUTPUT"],
        )

        response_stream = await infer_request.async_exec(decoupled=True)

        is_cancelled = False
        error = None
        response_sum = 0
        for infer_response in response_stream:
            if infer_response.has_error():
                if infer_response.error().code() == pb_utils.TritonError.CANCELLED:
                    is_cancelled = True
                else:
                    error = infer_response.error()
                break

            out = pb_utils.get_output_tensor_by_name(
                infer_response, "OUTPUT"
            ).as_numpy()[0]

            response_sum += out
            if response_sum >= max_sum:
                response_stream.cancel()

        # test cancel after request completion.
        if not error:
            try:
                response_stream.cancel()
            except Exception as e:
                error = pb_utils.TritonError(
                    message=str(e),
                    code=pb_utils.TritonError.INTERNAL,
                )

        responses = [
            pb_utils.InferenceResponse(
                output_tensors=[
                    pb_utils.Tensor("SUM", np.array([response_sum], dtype=np.int32)),
                    pb_utils.Tensor(
                        "IS_CANCELLED", np.array([is_cancelled], dtype=np.bool_)
                    ),
                ],
                error=error,
            )
        ]

        return responses


================================================
FILE: qa/L0_backend_python/decoupled/models/decoupled_bls_cancel_after_complete/config.pbtxt
================================================
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "decoupled_bls_cancel_after_complete"
backend: "python"

input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  },
  {
    name: "MAX_SUM"
    data_type: TYPE_INT32
    dims: [ 1 ]
  },
  {
    name: "IGNORE_CANCEL"
    data_type: TYPE_BOOL
    dims: [ 1 ]
  }
]
output [
  {
    name: "SUM"
    data_type: TYPE_INT32
    dims: [ 1 ]
  },
  {
    name: "IS_CANCELLED"
    data_type: TYPE_BOOL
    dims: [ 1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]


================================================
FILE: qa/L0_backend_python/decoupled/models/decoupled_bls_stream/1/model.py
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json
import threading
import time

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    """This model sends a BLS request to a decoupled model 'square_int32' and
    returns the output from 'square_int32' as responses.
    """

    def initialize(self, args):
        # You must parse model_config. JSON string is not parsed here
        self.model_config = model_config = json.loads(args["model_config"])

        using_decoupled = pb_utils.using_decoupled_model_transaction_policy(
            model_config
        )
        if not using_decoupled:
            raise pb_utils.TritonModelException(
                """the model `{}` can generate any number of responses per request,
                enable decoupled transaction policy in model configuration to
                serve this model""".format(
                    args["model_name"]
                )
            )

        self.inflight_thread_count = 0
        self.inflight_thread_count_lck = threading.Lock()

    def execute(self, requests):
        """This function is called on inference request."""

        for request in requests:
            thread = threading.Thread(
                target=self.response_thread,
                args=(
                    request.get_response_sender(),
                    pb_utils.get_input_tensor_by_name(request, "IN").as_numpy(),
                ),
            )
            thread.daemon = True
            with self.inflight_thread_count_lck:
                self.inflight_thread_count += 1
            thread.start()

        return None

    def response_thread(self, response_sender, in_value):
        infer_request = pb_utils.InferenceRequest(
            model_name="square_int32",
            requested_output_names=["OUT"],
            inputs=[pb_utils.Tensor("IN", in_value)],
        )
        infer_responses = infer_request.exec(decoupled=True)

        response_count = 0
        for infer_response in infer_responses:
            if len(infer_response.output_tensors()) > 0:
                output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT")
                if infer_response.has_error():
                    response = pb_utils.InferenceResponse(
                        error=infer_response.error().message()
                    )
                    response_sender.send(
                        response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
                    )
                elif np.any(in_value != output0.as_numpy()):
                    error_message = (
                        "BLS Request input and BLS response output do not match."
                        f" {in_value} != {output0.as_numpy()}"
                    )
                    response = pb_utils.InferenceResponse(error=error_message)
                    response_sender.send(
                        response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
                    )
                else:
                    output_tensors = [pb_utils.Tensor("OUT", output0.as_numpy())]
                    response = pb_utils.InferenceResponse(output_tensors=output_tensors)
                    response_sender.send(response)

            response_count += 1

        if in_value != response_count - 1:
            error_message = "Expected {} responses, got {}".format(
                in_value, len(infer_responses) - 1
            )
            response = pb_utils.InferenceResponse(error=error_message)
            response_sender.send(
                response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
            )
        else:
            response_sender.send(flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)

        with self.inflight_thread_count_lck:
            self.inflight_thread_count -= 1

    def finalize(self):
        inflight_threads = True
        while inflight_threads:
            with self.inflight_thread_count_lck:
                inflight_threads = self.inflight_thread_count != 0
            if inflight_threads:
                time.sleep(0.1)


================================================
FILE: qa/L0_backend_python/decoupled/models/decoupled_bls_stream/config.pbtxt
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "decoupled_bls_stream"
backend: "python"

model_transaction_policy {
  decoupled: True
}
input [
  {
    name: "IN"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]

output [
  {
    name: "OUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]


================================================
FILE: qa/L0_backend_python/decoupled/models/decoupled_execute_error/1/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json
import threading
import time

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    """This model sends an error message with the first request."""

    def initialize(self, args):
        # You must parse model_config. JSON string is not parsed here
        self.model_config = model_config = json.loads(args["model_config"])

        using_decoupled = pb_utils.using_decoupled_model_transaction_policy(
            model_config
        )
        if not using_decoupled:
            raise pb_utils.TritonModelException(
                """the model `{}` can generate any number of responses per request,
                enable decoupled transaction policy in model configuration to
                serve this model""".format(
                    args["model_name"]
                )
            )

        # Get OUT configuration
        out_config = pb_utils.get_output_config_by_name(model_config, "OUT")

        # Convert Triton types to numpy types
        self.out_dtype = pb_utils.triton_string_to_numpy(out_config["data_type"])

        self.inflight_thread_count = 0
        self.inflight_thread_count_lck = threading.Lock()

    def execute(self, requests):
        """This function is called on inference request."""

        # Only generate the error for the first request
        for i, request in enumerate(requests):
            # Start a separate thread to send the responses for the request.
            thread = threading.Thread(
                target=self.response_thread,
                args=(
                    request.get_response_sender(),
                    i,
                    pb_utils.get_input_tensor_by_name(request, "IN").as_numpy(),
                ),
            )
            thread.daemon = True

            with self.inflight_thread_count_lck:
                self.inflight_thread_count += 1

            thread.start()

        return None

    def response_thread(self, response_sender, index, in_input):
        # The response_sender is used to send response(s) associated with the
        # corresponding request.  The first request will send errors and the
        # other requests will send the responses.  The number of responses per
        # requests is the number of elements in input tensor.

        in_value = in_input
        out_output = pb_utils.Tensor("OUT", in_value)

        if index == 0:
            error = pb_utils.TritonError("An error occurred during execution")
            response = pb_utils.InferenceResponse(
                output_tensors=[out_output], error=error
            )
        else:
            response = pb_utils.InferenceResponse(output_tensors=[out_output])
        response_sender.send(response)

        # We must close the response sender to indicate to Triton that we are
        # done sending responses for the corresponding request. We can't use the
        # response sender after closing it.
        response_sender.send(flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)

        with self.inflight_thread_count_lck:
            self.inflight_thread_count -= 1

    def finalize(self):
        """`finalize` is called only once when the model is being unloaded.
        Implementing `finalize` function is OPTIONAL. This function allows
        the model to perform any necessary clean ups before exit.
        """
        print("Finalize invoked")

        inflight_threads = True
        while inflight_threads:
            with self.inflight_thread_count_lck:
                inflight_threads = self.inflight_thread_count != 0
            if inflight_threads:
                time.sleep(0.1)

        print("Finalize complete...")


================================================
FILE: qa/L0_backend_python/decoupled/models/decoupled_execute_error/config.pbtxt
================================================
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "decoupled_execute_error"
backend: "python"
max_batch_size: 64

model_transaction_policy {
  decoupled: True
}
input [
  {
    name: "IN"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

output [
  {
    name: "OUT"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]

dynamic_batching { preferred_batch_size: [8], max_queue_delay_microseconds: 12000000 }


================================================
FILE: qa/L0_backend_python/decoupled/models/decoupled_raise_exception/1/model.py
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


class TritonPythonModel:
    def initialize(self, args):
        pass

    def execute(self, requests):
        for request in requests:
            raise Exception("Intentional Error")
        return None


================================================
FILE: qa/L0_backend_python/decoupled/models/decoupled_raise_exception/config.pbtxt
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "decoupled_raise_exception"
backend: "python"
max_batch_size: 64

model_transaction_policy {
  decoupled: True
}
input [
  {
    name: "IN"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

output [
  {
    name: "OUT"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]


================================================
FILE: qa/L0_backend_python/decoupled/models/decoupled_return_response_error/1/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    """This model tries to return a response directly from
    execute function when configured as decoupled model.
    """

    def initialize(self, args):
        self.model_config = model_config = json.loads(args["model_config"])

        using_decoupled = pb_utils.using_decoupled_model_transaction_policy(
            model_config
        )
        if not using_decoupled:
            raise pb_utils.TritonModelException(
                """the model `{}` can generate any number of responses per request,
                enable decoupled transaction policy in model configuration to
                serve this model""".format(
                    args["model_name"]
                )
            )

        output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")
        output1_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT1")

        self.output0_dtype = pb_utils.triton_string_to_numpy(
            output0_config["data_type"]
        )
        self.output1_dtype = pb_utils.triton_string_to_numpy(
            output1_config["data_type"]
        )

    def execute(self, requests):
        """Tries to create a response sender object and use that
        for sending the response.
        """

        output0_dtype = self.output0_dtype
        output1_dtype = self.output1_dtype

        responses = []
        for request in requests:
            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
            out_0, out_1 = (
                in_0.as_numpy() + in_1.as_numpy(),
                in_0.as_numpy() - in_1.as_numpy(),
            )

            out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(output0_dtype))
            out_tensor_1 = pb_utils.Tensor("OUTPUT1", out_1.astype(output1_dtype))
            responses.append(pb_utils.InferenceResponse([out_tensor_0, out_tensor_1]))
        return responses


================================================
FILE: qa/L0_backend_python/decoupled/models/decoupled_return_response_error/config.pbtxt
================================================
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "decoupled_return_response_error"
backend: "python"
model_transaction_policy {
  decoupled: True
}
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
input [
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]

instance_group [{ kind: KIND_CPU }]


================================================
FILE: qa/L0_backend_python/decoupled/models/decoupled_send_after_close_error/1/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    """This model tries to send response after closing
    the response_sender.
    """

    def initialize(self, args):
        self.model_config = model_config = json.loads(args["model_config"])

        using_decoupled = pb_utils.using_decoupled_model_transaction_policy(
            model_config
        )
        if not using_decoupled:
            raise pb_utils.TritonModelException(
                """the model `{}` can generate any number of responses per request,
                enable decoupled transaction policy in model configuration to
                serve this model""".format(
                    args["model_name"]
                )
            )

        output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")
        output1_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT1")

        self.output0_dtype = pb_utils.triton_string_to_numpy(
            output0_config["data_type"]
        )
        self.output1_dtype = pb_utils.triton_string_to_numpy(
            output1_config["data_type"]
        )

    def execute(self, requests):
        """Create a response sender object and use that
        for sending the response.
        """

        # This model does not support batching, so 'request_count' should always be 1.
        if len(requests) != 1:
            raise pb_utils.TritonModelException(
                "unsupported batch size " + len(requests)
            )

        output0_dtype = self.output0_dtype
        output1_dtype = self.output1_dtype

        response_sender = requests[0].get_response_sender()
        in_0 = pb_utils.get_input_tensor_by_name(requests[0], "INPUT0")
        in_1 = pb_utils.get_input_tensor_by_name(requests[0], "INPUT1")
        out_0, out_1 = (
            in_0.as_numpy() + in_1.as_numpy(),
            in_0.as_numpy() - in_1.as_numpy(),
        )

        out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(output0_dtype))
        out_tensor_1 = pb_utils.Tensor("OUTPUT1", out_1.astype(output1_dtype))
        response = pb_utils.InferenceResponse([out_tensor_0, out_tensor_1])

        response_sender.send(flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)
        response_sender.send(response)


================================================
FILE: qa/L0_backend_python/decoupled/models/decoupled_send_after_close_error/config.pbtxt
================================================
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "decoupled_send_after_close_error"
backend: "python"
model_transaction_policy {
  decoupled: True
}

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
input [
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]

instance_group [{ kind: KIND_CPU }]


================================================
FILE: qa/L0_backend_python/decoupled/test.sh
================================================
#!/bin/bash
# Copyright 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:="http://github.com/triton-inference-server"}
CLIENT_PY=./decoupled_test.py
CLIENT_LOG="./decoupled_client.log"
TEST_RESULT_FILE='test_results.txt'
SERVER_ARGS="--model-repository=${MODELDIR}/decoupled/models --backend-directory=${BACKEND_DIR} --log-verbose=1"
SERVER_LOG="./decoupled_server.log"

pip3 uninstall -y torch
# FIXME: Until Windows supports GPU tensors, only test CPU scenarios
if [[ ${TEST_WINDOWS} == 1 ]]; then
  pip3 install torch==2.3.1 -f https://download.pytorch.org/whl/torch_stable.html
else
  pip3 install torch==2.3.1+cu118 -f https://download.pytorch.org/whl/torch_stable.html
fi

RET=0
source ../../common/util.sh

rm -fr *.log
mkdir -p models/identity_fp32/1/
cp ../../python_models/identity_fp32/model.py models/identity_fp32/1/
cp ../../python_models/identity_fp32/config.pbtxt models/identity_fp32/

mkdir -p models/execute_cancel/1/
cp ../../python_models/execute_cancel/model.py ./models/execute_cancel/1/
cp ../../python_models/execute_cancel/config.pbtxt ./models/execute_cancel/
echo "model_transaction_policy { decoupled: True }" >> ./models/execute_cancel/config.pbtxt

mkdir -p models/response_sender_until_cancelled/1/
cp ../../python_models/response_sender_until_cancelled/model.py ./models/response_sender_until_cancelled/1/
cp ../../python_models/response_sender_until_cancelled/config.pbtxt ./models/response_sender_until_cancelled/

rm -fr python_backend
git clone ${TRITON_REPO_ORGANIZATION}/python_backend -b $PYTHON_BACKEND_REPO_TAG
mkdir -p models/square_int32/1/
cp python_backend/examples/decoupled/square_model.py models/square_int32/1/model.py
cp python_backend/examples/decoupled/square_config.pbtxt models/square_int32/config.pbtxt

mkdir -p models/dlpack_add_sub/1/
cp ../../python_models/dlpack_add_sub/model.py models/dlpack_add_sub/1/
cp ../../python_models/dlpack_add_sub/config.pbtxt models/dlpack_add_sub/

function verify_log_counts () {
  if [ `grep -c "Specific Msg!" $SERVER_LOG` -lt 1 ]; then
    echo -e "\n***\n*** Test Failed: Specific Msg Count Incorrect\n***"
    RET=1
  fi
  if [ `grep -c "Info Msg!" $SERVER_LOG` -lt 1 ]; then
    echo -e "\n***\n*** Test Failed: Info Msg Count Incorrect\n***"
    RET=1
  fi
  if [ `grep -c "Warning Msg!" $SERVER_LOG` -lt 1 ]; then
    echo -e "\n***\n*** Test Failed: Warning Msg Count Incorrect\n***"
    RET=1
  fi
  if [ `grep -c "Error Msg!" $SERVER_LOG` -lt 1 ]; then
    echo -e "\n***\n*** Test Failed: Error Msg Count Incorrect\n***"
    RET=1
  fi
  # NOTE: Windows does not seem to have a way to send a true SIGINT signal
  # to tritonserver. Instead, it seems required to use taskkill.exe with /F (force)
  # to kill the running program. This means the server terminates immediately,
  # instead of shutting down how it would if Ctrl^C was invoked from the terminal.
  # To properly test functionality, we need a WAR.
  if [[ ${TEST_WINDOWS} == 0 ]]; then
    if [ `grep -c "Finalize invoked" $SERVER_LOG` -ne 3 ]; then
      echo -e "\n***\n*** Test Failed: 'Finalize invoked' message missing\n***"
      RET=1
    fi
    if [ `grep -c "Finalize complete..." $SERVER_LOG` -ne 3 ]; then
      echo -e "\n***\n*** Test Failed: 'Finalize complete...' message missing\n***"
      RET=1
    fi
  fi
}

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    RET=1
fi

set +e
python3 -m pytest --junitxml=decoupled.report.xml $CLIENT_PY > $CLIENT_LOG 2>&1

if [ $? -ne 0 ]; then
    echo -e "\n***\n*** decoupled test FAILED. \n***"
    RET=1
fi
set -e

kill_server

verify_log_counts

if [ $RET -eq 1 ]; then
    cat $CLIENT_LOG
    cat $SERVER_LOG
    echo -e "\n***\n*** Decoupled test FAILED. \n***"
else
    echo -e "\n***\n*** Decoupled test PASSED. \n***"
fi

exit $RET


================================================
FILE: qa/L0_backend_python/ensemble/ensemble_test.py
================================================
#!/usr/bin/env python3

# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import sys

sys.path.append("../../common")

import unittest

import numpy as np
import shm_util
import tritonclient.http as httpclient
from tritonclient.utils import *

# By default, find tritonserver on "localhost", but for windows tests
# we overwrite the IP address with the TRITONSERVER_IPADDR envvar
_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")


class EnsembleTest(unittest.TestCase):
    def setUp(self):
        self._shm_leak_detector = shm_util.ShmLeakDetector()

    def infer(self, model_name):
        shape = [16]
        with self._shm_leak_detector.Probe() as shm_probe:
            with httpclient.InferenceServerClient(
                f"{_tritonserver_ipaddr}:8000"
            ) as client:
                input_data_0 = np.random.random(shape).astype(np.float32)
                input_data_1 = np.random.random(shape).astype(np.float32)
                inputs = [
                    httpclient.InferInput(
                        "INPUT0",
                        input_data_0.shape,
                        np_to_triton_dtype(input_data_0.dtype),
                    ),
                    httpclient.InferInput(
                        "INPUT1",
                        input_data_1.shape,
                        np_to_triton_dtype(input_data_1.dtype),
                    ),
                ]
                inputs[0].set_data_from_numpy(input_data_0)
                inputs[1].set_data_from_numpy(input_data_1)
                result = client.infer(model_name, inputs)
                output0 = result.as_numpy("OUTPUT0")
                output1 = result.as_numpy("OUTPUT1")
                self.assertIsNotNone(output0)
                self.assertIsNotNone(output1)

                # Set a big enough tolerance to reduce intermittence. May be
                # better to test integer outputs in the future for consistency.
                self.assertTrue(np.allclose(output0, 2 * input_data_0, atol=1e-06))
                self.assertTrue(np.allclose(output1, 2 * input_data_1, atol=1e-06))

    def test_ensemble(self):
        model_name = "ensemble"
        self.infer(model_name)

    def test_ensemble_gpu(self):
        model_name = "ensemble_gpu"
        self.infer(model_name)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_backend_python/ensemble/test.sh
================================================
#!/bin/bash
# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

CLIENT_LOG="./ensemble_client.log"
source ../common.sh
source ../../common/util.sh

# FIXME: [DLIS-5970] Until Windows supports GPU tensors, only test CPU scenarios
if [[ ${TEST_WINDOWS} == 1 ]]; then
    EXPECTED_NUM_TESTS="1"
else
    EXPECTED_NUM_TESTS="2"
fi

SERVER_ARGS="--model-repository=${MODELDIR}/ensemble/models --backend-directory=${BACKEND_DIR} --log-verbose=1"
SERVER_LOG="./ensemble_server.log"

RET=0
rm -rf models/ $CLIENT_LOG

# Ensemble Model
mkdir -p models/ensemble/1/
cp ../../python_models/ensemble/config.pbtxt ./models/ensemble

mkdir -p models/add_sub_1/1/
cp ../../python_models/add_sub/config.pbtxt ./models/add_sub_1
cp ../../python_models/add_sub/model.py ./models/add_sub_1/1/

mkdir -p models/add_sub_2/1/
cp ../../python_models/add_sub/config.pbtxt ./models/add_sub_2/
cp ../../python_models/add_sub/model.py ./models/add_sub_2/1/

# FIXME: [DLIS-5970] Until Windows supports GPU tensors, only test CPU scenarios
if [[ ${TEST_WINDOWS} == 0 ]]; then
    # Ensemble GPU Model
    mkdir -p models/ensemble_gpu/1/
    cp ../../python_models/ensemble_gpu/config.pbtxt ./models/ensemble_gpu
    cp -r ${DATADIR}/qa_model_repository/libtorch_float32_float32_float32/ ./models
    (cd models/libtorch_float32_float32_float32 && \
            echo "instance_group [ { kind: KIND_GPU }]" >> config.pbtxt)
    (cd models/libtorch_float32_float32_float32 && \
            sed -i "s/^max_batch_size:.*/max_batch_size: 0/" config.pbtxt)
    (cd models/libtorch_float32_float32_float32 && \
            sed -i "s/^version_policy:.*//" config.pbtxt)
    rm -rf models/libtorch_float32_float32_float32/2
    rm -rf models/libtorch_float32_float32_float32/3
fi

prev_num_pages=`get_shm_pages`

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    RET=1
fi

set +e

# FIXME: [DLIS-5970] Until Windows supports GPU tensors, only test CPU scenarios
if [[ ${TEST_WINDOWS} == 0 ]]; then
    python3 -m pytest --junitxml=ensemble.report.xml ensemble_test.py 2>&1 > $CLIENT_LOG
else
    python3 ensemble_test.py EnsembleTest.test_ensemble 2>&1 > $CLIENT_LOG
fi

if [ $? -ne 0 ]; then
    echo -e "\n***\n*** ensemble_test.py FAILED. \n***"
    RET=1
fi
set -e

kill_server

current_num_pages=`get_shm_pages`
if [ $current_num_pages -ne $prev_num_pages ]; then
    ls /dev/shm
    echo -e "\n***\n*** Test Failed. Shared memory pages where not cleaned properly.
Shared memory pages before starting triton equals to $prev_num_pages
and shared memory pages after starting triton equals to $current_num_pages \n***"
    RET=1
fi

if [ $RET -eq 1 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Ensemble test FAILED. \n***"
else
    echo -e "\n***\n*** Ensemble test PASSED. \n***"
fi

exit $RET


================================================
FILE: qa/L0_backend_python/env/test.sh
================================================
#!/bin/bash
# Copyright 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

CLIENT_LOG="./env_client.log"
source ../common.sh
source ../../common/util.sh

BASE_SERVER_ARGS="--model-repository=${MODELDIR}/env/models --log-verbose=1 --disable-auto-complete-config"
PYTHON_BACKEND_BRANCH=$PYTHON_BACKEND_REPO_TAG
SERVER_ARGS=$BASE_SERVER_ARGS
SERVER_LOG="./env_server.log"

RET=0

rm -fr ./models
rm -rf *.tar.gz
install_build_deps
install_conda

# Test conda env without custom Python backend stub This environment should
# always use the default Python version shipped in the container. For Ubuntu
# 24.04 it is Python 3.12, for Ubuntu 22.04 is Python 3.10 and for Ubuntu 20.04
# is 3.8.
path_to_conda_pack='$$TRITON_MODEL_DIRECTORY/python_3_12_environment.tar.gz'
create_conda_env "3.12" "python-3-12"
conda install -c conda-forge libstdcxx-ng=14 -y
TORCH_VERSION="2.8.0"
conda install numpy=1.26.4 -y
if [[ ${TRITON_RHEL} -eq "1" ]] && grep -qE 'rhel|centos|fedora' /etc/os-release; then
    TORCH_VERISON="2.17.0"
fi
conda install pytorch=${TORCH_VERSION} -y
PY312_VERSION_STRING="Python version is 3.12, NumPy version is 1.26.4, and PyTorch version is ${TORCH_VERSION}"
conda pack -o python3.12.tar.gz
mkdir -p models/python_3_12/1/
cp ../../python_models/python_version/config.pbtxt ./models/python_3_12
cp python3.12.tar.gz models/python_3_12/python_3_12_environment.tar.gz
(cd models/python_3_12 && \
          sed -i "s/^name:.*/name: \"python_3_12\"/" config.pbtxt && \
          echo "parameters: {key: \"EXECUTION_ENV_PATH\", value: {string_value: \"$path_to_conda_pack\"}}" >> config.pbtxt)
cp ../../python_models/python_version/model.py ./models/python_3_12/1/
conda deactivate
rm -rf ./miniconda

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

kill_server

set +e
for EXPECTED_VERSION_STRING in "$PY312_VERSION_STRING"; do
    grep "$EXPECTED_VERSION_STRING" $SERVER_LOG
    if [ $? -ne 0 ]; then
        cat $SERVER_LOG
        echo -e "\n***\n*** $EXPECTED_VERSION_STRING was not found in Triton logs. \n***"
        RET=1
    fi
done

# Test default (non set) locale in python stub processes
# NOTE: In certain pybind versions, the locale settings may not be propagated from parent to
#       stub processes correctly. See https://github.com/triton-inference-server/python_backend/pull/260.
export LC_ALL=INVALID
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

kill_server

grep "Locale is (None, None)" $SERVER_LOG
    if [ $? -ne 0 ]; then
        cat $SERVER_LOG
        echo -e "\n***\n*** Default unset Locale was not found in Triton logs. \n***"
        RET=1
    fi
set -e

rm $SERVER_LOG

# Test locale set via environment variable in python stub processes
# NOTE: In certain pybind versions, the locale settings may not be propagated from parent to
#       stub processes correctly. See https://github.com/triton-inference-server/python_backend/pull/260.
export LC_ALL=C.UTF-8
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

kill_server

set +e
grep "Locale is ('C', 'UTF-8')" $SERVER_LOG
    if [ $? -ne 0 ]; then
        cat $SERVER_LOG
        echo -e "\n***\n*** Locale UTF-8 was not found in Triton logs. \n***"
        RET=1
    fi
set -e

rm $SERVER_LOG

## Test re-extraction of environment.
SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1 --model-control-mode=explicit"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

# The environment should be extracted
curl -v -X POST localhost:8000/v2/repository/models/python_3_12/load
touch -m models/python_3_12/1/model.py
# The environment should not be re-extracted
curl -v -X POST localhost:8000/v2/repository/models/python_3_12/load
touch -m models/python_3_12/python_3_12_environment.tar.gz
# The environment should be re-extracted
curl -v -X POST localhost:8000/v2/repository/models/python_3_12/load

kill_server

set +e

PY312_ENV_EXTRACTION="Extracting Python execution env"
if [ `grep -c "${PY312_ENV_EXTRACTION}" ${SERVER_LOG}` != "2" ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Python execution environment should be extracted exactly twice. \n***"
    RET=1
fi
set -e

# Test execution environments with S3
# S3 credentials are necessary for this test. Pass via ENV variables
aws configure set default.region $AWS_DEFAULT_REGION && \
    aws configure set aws_access_key_id $AWS_ACCESS_KEY_ID && \
    aws configure set aws_secret_access_key $AWS_SECRET_ACCESS_KEY

# S3 bucket path (Point to bucket when testing cloud storage)
BUCKET_URL="s3://triton-bucket-${CI_JOB_ID}"

# Cleanup and delete S3 test bucket if it already exists (due to test failure)
aws s3 rm $BUCKET_URL --recursive --include "*" && \
    aws s3 rb $BUCKET_URL || true

# Make S3 test bucket
aws s3 mb "${BUCKET_URL}"

# Remove Slash in BUCKET_URL
BUCKET_URL=${BUCKET_URL%/}
BUCKET_URL_SLASH="${BUCKET_URL}/"

# Test with the bucket url as model repository
aws s3 cp models/ "${BUCKET_URL_SLASH}" --recursive --include "*"

rm $SERVER_LOG
# Occasionally needs more time to load
SERVER_TIMEOUT=420

SERVER_ARGS="--model-repository=$BUCKET_URL_SLASH --log-verbose=1"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    aws s3 rb "${BUCKET_URL}" --force || true
    exit 1
fi

kill_server

set +e
grep "$PY312_VERSION_STRING" $SERVER_LOG
if [ $? -ne 0 ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** $PY312_VERSION_STRING was not found in Triton logs. \n***"
    RET=1
fi
set -e

# Clean up bucket contents
aws s3 rm "${BUCKET_URL_SLASH}" --recursive --include "*"

# Test with EXECUTION_ENV_PATH outside the model directory
sed -i "s/\$\$TRITON_MODEL_DIRECTORY\/python_3_12_environment/s3:\/\/triton-bucket-${CI_JOB_ID}\/python_3_12_environment/" models/python_3_12/config.pbtxt
mv models/python_3_12/python_3_12_environment.tar.gz models

aws s3 cp models/ "${BUCKET_URL_SLASH}" --recursive --include "*"

rm $SERVER_LOG

SERVER_ARGS="--model-repository=$BUCKET_URL_SLASH --log-verbose=1"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    aws s3 rb "${BUCKET_URL}" --force || true
    exit 1
fi

kill_server

set +e
for EXPECTED_VERSION_STRING in "$PY312_VERSION_STRING"; do
    grep "$EXPECTED_VERSION_STRING" $SERVER_LOG
    if [ $? -ne 0 ]; then
        cat $SERVER_LOG
        echo -e "\n***\n*** $EXPECTED_VERSION_STRING was not found in Triton logs. \n***"
        RET=1
    fi
done
set -e

# Clean up bucket contents and delete bucket
aws s3 rm "${BUCKET_URL_SLASH}" --recursive --include "*"
aws s3 rb "${BUCKET_URL}"

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Env Manager Test PASSED.\n***"
else
  cat $SERVER_LOG
  echo -e "\n***\n*** Env Manager Test FAILED.\n***"
fi

exit $RET


================================================
FILE: qa/L0_backend_python/examples/test.sh
================================================
#!/bin/bash
# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

source ../common.sh
source ../../common/util.sh

TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:="http://github.com/triton-inference-server"}

SERVER_ARGS="--model-repository=${MODELDIR}/examples/python_backend/models --backend-directory=${BACKEND_DIR} --log-verbose=1"
SERVER_LOG="./examples_server.log"

RET=0
rm -fr *.log python_backend/

# Install torch
pip3 uninstall -y torch
pip3 uninstall -y numpy
# NOTE: Using this subtest as a test case that involves using a python model with
# numpy 2.X without changing the environments used in all the other test cases.
if [ "$TEST_JETSON" == "0" ] && [[ ${TEST_WINDOWS} == 0 ]]; then
    if [ ${PYTHON_ENV_VERSION} == "8" ]; then
        # Python 3.8 does not support numpy 2.x, so installing numpy1.x
        pip3 install "numpy<2"
        pip3 install torch==2.0.0+cu117 -f https://download.pytorch.org/whl/torch_stable.html torchvision==0.15.0+cu117
    else
        # Python 3.9 >= supports numpy 2.x.
        pip3 install "numpy>=2"
        pip3 install torch==2.5.0 torchvision==0.20.0 --index-url https://download.pytorch.org/whl/cu124
    fi
else
    if [ ${PYTHON_ENV_VERSION} == "8" ]; then
        # Python 3.8 does not support numpy 2.x, so installing numpy1.x
        pip3 install "numpy<2"
        pip3 install torch==2.0.0 -f https://download.pytorch.org/whl/torch_stable.html torchvision==0.15.0
    else
        # Python 3.9 >= supports numpy 2.x.
        pip3 install "numpy>=2"
        pip3 install torch==2.5.0 -f https://download.pytorch.org/whl/torch_stable.html torchvision==0.20.0
    fi
fi

# Install `validators` for Model Instance Kind example
pip3 install validators

# Install JAX
# Jax has dropped the support for Python 3.8. See https://jax.readthedocs.io/en/latest/changelog.html
if [ "$TEST_JETSON" == "0" ] && [ ${PYTHON_ENV_VERSION} != "8" ]; then
    pip install -U "jax[cuda12]"
fi

git clone ${TRITON_REPO_ORGANIZATION}/python_backend -b $PYTHON_BACKEND_REPO_TAG
cd python_backend

# Example 1
CLIENT_LOG="../examples_add_sub_client.log"
mkdir -p models/add_sub/1/
cp examples/add_sub/model.py models/add_sub/1/model.py
cp examples/add_sub/config.pbtxt models/add_sub/config.pbtxt
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    RET=1
fi

set +e
python3 examples/add_sub/client.py > $CLIENT_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to verify add_sub example. \n***"
    RET=1
fi

grep "PASS" $CLIENT_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to verify add_sub example. \n***"
    cat $CLIENT_LOG
    RET=1
fi
set -e

kill_server

# Example 2
CLIENT_LOG="../examples_pytorch_client.log"
mkdir -p models/pytorch/1/
cp examples/pytorch/model.py models/pytorch/1/model.py
cp examples/pytorch/config.pbtxt models/pytorch/config.pbtxt
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    RET=1
fi

set +e
python3 examples/pytorch/client.py > $CLIENT_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to verify pytorch example. \n***"
    RET=1
fi

grep "PASS" $CLIENT_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to verify pytorch example. \n***"
    cat $CLIENT_LOG
    RET=1
fi
set -e

kill_server

# Example 3

# JAX AddSub
# JAX is not supported on Jetson
# Jax has dropped the support for Python 3.8. See https://jax.readthedocs.io/en/latest/changelog.html
if [ "$TEST_JETSON" == "0" ] && [ ${PYTHON_ENV_VERSION} != "8" ]; then
    CLIENT_LOG="../examples_jax_client.log"
    mkdir -p models/jax/1/
    cp examples/jax/model.py models/jax/1/model.py
    cp examples/jax/config.pbtxt models/jax/config.pbtxt
    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        RET=1
    fi

    set +e
    python3 examples/jax/client.py > $CLIENT_LOG
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed to verify jax example. \n***"
        RET=1
    fi

    grep "PASS" $CLIENT_LOG
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed to verify jax example. \n***"
        cat $CLIENT_LOG
        RET=1
    fi
    set -e

    kill_server
fi

# Example 4

# BLS Sync
CLIENT_LOG="../examples_sync_client.log"
mkdir -p models/bls_sync/1
cp examples/bls/sync_model.py models/bls_sync/1/model.py
cp examples/bls/sync_config.pbtxt models/bls_sync/config.pbtxt
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    RET=1
fi

set +e
python3 examples/bls/sync_client.py > $CLIENT_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to verify BLS sync example. \n***"
    RET=1
fi

grep "PASS" $CLIENT_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to verify BLS sync example. \n***"
    cat $CLIENT_LOG
    RET=1
fi
set -e

kill_server

# Example 5

# Decoupled Repeat
CLIENT_LOG="../examples_repeat_client.log"
mkdir -p models/repeat_int32/1/
cp examples/decoupled/repeat_model.py models/repeat_int32/1/model.py
cp examples/decoupled/repeat_config.pbtxt models/repeat_int32/config.pbtxt
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    RET=1
fi

set +e
python3 examples/decoupled/repeat_client.py > $CLIENT_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to verify repeat_int32 example. \n***"
    RET=1
fi

grep "PASS" $CLIENT_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to verify repeat_int32 example. \n***"
    cat $CLIENT_LOG
    RET=1
fi
set -e

kill_server

# Example 6

# Decoupled Square
CLIENT_LOG="../examples_square_client.log"
mkdir -p models/square_int32/1/
cp examples/decoupled/square_model.py models/square_int32/1/model.py
cp examples/decoupled/square_config.pbtxt models/square_int32/config.pbtxt
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    RET=1
fi

set +e
python3 examples/decoupled/square_client.py > $CLIENT_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to verify square_int32 example. \n***"
    RET=1
fi

grep "PASS" $CLIENT_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to verify square_int32 example. \n***"
    cat $CLIENT_LOG
    RET=1
fi
set -e

kill_server

#
# BLS Async
#
# Skip async BLS on Jetson since it is not supported with python3.6
# Having multiple python versions lead to build issues.
# Anaconda is not officially supported on Jetson.
if [ "$TEST_JETSON" == "0" ]; then
    CLIENT_LOG="../examples_async_client.log"
    mkdir -p models/bls_async/1
    cp examples/bls/async_model.py models/bls_async/1/model.py
    cp examples/bls/async_config.pbtxt models/bls_async/config.pbtxt
    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        RET=1
    fi

    set +e
    python3 examples/bls/async_client.py > $CLIENT_LOG
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed to verify BLS async example. \n***"
        RET=1
    fi

    grep "PASS" $CLIENT_LOG
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed to verify BLS async example. \n***"
        cat $CLIENT_LOG
        RET=1
    fi

    set -e

    kill_server
fi

# Auto Complete Model Configuration Example
CLIENT_LOG="../examples_auto_complete_client.log"
mkdir -p models/nobatch_auto_complete/1/
mkdir -p models/batch_auto_complete/1/
cp examples/auto_complete/nobatch_model.py models/nobatch_auto_complete/1/model.py
cp examples/auto_complete/batch_model.py models/batch_auto_complete/1/model.py

SERVER_ARGS="$SERVER_ARGS --strict-model-config=false"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    RET=1
fi

set +e
python3 examples/auto_complete/client.py > $CLIENT_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to verify auto_complete example. \n***"
    RET=1
fi

grep "PASS" $CLIENT_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to verify auto_complete example. \n***"
    cat $CLIENT_LOG
    RET=1
fi
set -e

kill_server

# BLS Decoupled Sync
CLIENT_LOG="../examples_bls_decoupled_sync_client.log"
mkdir -p models/bls_decoupled_sync/1
cp examples/bls_decoupled/sync_model.py models/bls_decoupled_sync/1/model.py
cp examples/bls_decoupled/sync_config.pbtxt models/bls_decoupled_sync/config.pbtxt
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    RET=1
fi

set +e
python3 examples/bls_decoupled/sync_client.py > $CLIENT_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to verify BLS Decoupled Sync example. \n***"
    RET=1
fi

grep "PASS" $CLIENT_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to verify BLS Decoupled Sync example. \n***"
    cat $CLIENT_LOG
    RET=1
fi
set -e

kill_server

# BLS Decoupled Async
if [ "$TEST_JETSON" == "0" ]; then
    CLIENT_LOG="../examples_bls_decoupled_async_client.log"
    mkdir -p models/bls_decoupled_async/1
    cp examples/bls_decoupled/async_model.py models/bls_decoupled_async/1/model.py
    cp examples/bls_decoupled/async_config.pbtxt models/bls_decoupled_async/config.pbtxt
    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        RET=1
    fi

    set +e
    python3 examples/bls_decoupled/async_client.py > $CLIENT_LOG
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed to verify BLS Decoupled Async example. \n***"
        RET=1
    fi

    grep "PASS" $CLIENT_LOG
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed to verify BLS Decoupled Async example. \n***"
        cat $CLIENT_LOG
        RET=1
    fi

    set -e

    kill_server
fi

# Example 7

# Model Instance Kind
CLIENT_LOG="../examples_model_instance_kind.log"
mkdir -p models/resnet50/1
cp examples/instance_kind/model.py models/resnet50/1/
cp examples/instance_kind/config.pbtxt models/resnet50/
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    RET=1
fi

set +e
python3 examples/instance_kind/client.py --label_file examples/instance_kind/resnet50_labels.txt > $CLIENT_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to verify Model instance Kind example. \n***"
    RET=1
fi

grep "PASS" $CLIENT_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to verify Model Instance Kind example. Example failed to pass. \n***"
    cat $CLIENT_LOG
    RET=1
fi
set -e

kill_server

# Custom Metrics
CLIENT_LOG="../examples_custom_metrics_client.log"
mkdir -p models/custom_metrics/1
cp examples/custom_metrics/model.py models/custom_metrics/1/model.py
cp examples/custom_metrics/config.pbtxt models/custom_metrics/config.pbtxt
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    RET=1
fi

set +e
python3 examples/custom_metrics/client.py > $CLIENT_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to verify Custom Metrics example. \n***"
    RET=1
fi

grep "PASS" $CLIENT_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to verify Custom Metrics example. \n***"
    cat $CLIENT_LOG
    RET=1
fi
set -e

kill_server


if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Example verification test PASSED.\n***"
else
    echo -e "\n***\n*** Example verification test FAILED.\n***"
fi

exit $RET


================================================
FILE: qa/L0_backend_python/io/io_test.py
================================================
#!/usr/bin/env python3

# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import sys

sys.path.append("../../common")

import itertools
import queue
import unittest
from functools import partial

import numpy as np
import shm_util
import tritonclient.grpc as grpcclient
from tritonclient.utils import *

TRIAL = os.getenv("TRIAL")

# By default, find tritonserver on "localhost", but for windows tests
# we overwrite the IP address with the TRITONSERVER_IPADDR envvar
_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")


class UserData:
    def __init__(self):
        self._completed_requests = queue.Queue()


def callback(user_data, result, error):
    if error:
        user_data._completed_requests.put(error)
    else:
        user_data._completed_requests.put(result)


class IOTest(unittest.TestCase):
    def setUp(self):
        self._shm_leak_detector = shm_util.ShmLeakDetector()
        self._client = grpcclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8001")

    def _run_ensemble_test(self, model_name):
        user_data = UserData()
        input0 = np.random.random([1000]).astype(np.float32)
        # Use context manager to close client stream if any early exit occurs
        with grpcclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8001") as client:
            client.start_stream(callback=partial(callback, user_data))
            # Each pair represents whether the corresponding model is in GPU or not.
            gpu_flags = [(True, False), (True, False), (True, False)]
            # Create iterable of all possible combinations of each model gpu location
            # ex: (True, True, True), (True, True, False), (True, False, True), ...
            combinations = itertools.product(*gpu_flags)
            for model_1_in_gpu, model_2_in_gpu, model_3_in_gpu in combinations:
                gpu_output = np.asarray(
                    [model_1_in_gpu, model_2_in_gpu, model_3_in_gpu], dtype=bool
                )
                inputs = [
                    grpcclient.InferInput(
                        "INPUT0", input0.shape, np_to_triton_dtype(input0.dtype)
                    ),
                    grpcclient.InferInput(
                        "GPU_OUTPUT",
                        gpu_output.shape,
                        np_to_triton_dtype(gpu_output.dtype),
                    ),
                ]
                inputs[0].set_data_from_numpy(input0)
                inputs[1].set_data_from_numpy(gpu_output)
                client.async_stream_infer(model_name=model_name, inputs=inputs)
                if TRIAL == "default":
                    result = user_data._completed_requests.get()
                    output0 = result.as_numpy("OUTPUT0")
                    self.assertIsNotNone(output0)
                    self.assertTrue(np.all(output0 == input0))
                else:
                    response_repeat = 2
                    for _ in range(response_repeat):
                        result = user_data._completed_requests.get()
                        output0 = result.as_numpy("OUTPUT0")
                        self.assertIsNotNone(output0)
                        self.assertTrue(np.all(output0 == input0))

    def test_ensemble_io(self):
        model_name = "ensemble_io"

        # FIXME: This test detects a decrease of 80 bytes, which fails inequality check:
        # [ensemble_io] Shared memory leak detected: 1006976 (current) != 1007056 (prev).
        # so Probe was modified to check for growth instead of inequality.
        with self._shm_leak_detector.Probe():
            self._run_ensemble_test(model_name)

    def test_empty_gpu_output(self):
        model_name = "dlpack_empty_output"
        with self._shm_leak_detector.Probe():
            input_data = np.array([[1.0]], dtype=np.float32)
            inputs = [
                grpcclient.InferInput(
                    "INPUT", input_data.shape, np_to_triton_dtype(input_data.dtype)
                )
            ]
            inputs[0].set_data_from_numpy(input_data)
            result = self._client.infer(model_name, inputs)
            output = result.as_numpy("OUTPUT")
            self.assertIsNotNone(output)
            self.assertEqual(output.size, 0)

    def test_variable_gpu_output(self):
        model_name = "variable_gpu_output"
        with self._shm_leak_detector.Probe():
            # Input is not important in this test
            input_data = np.array([[1.0]], dtype=np.float32)
            inputs = [
                grpcclient.InferInput(
                    "INPUT", input_data.shape, np_to_triton_dtype(input_data.dtype)
                )
            ]
            inputs[0].set_data_from_numpy(input_data)
            user_data = UserData()

            # The test sends five requests to the model and the model returns five
            # responses with different GPU output shapes
            num_requests = 5
            for _ in range(num_requests):
                _ = self._client.async_infer(
                    model_name=model_name,
                    inputs=inputs,
                    callback=partial(callback, user_data),
                )

            for i in range(num_requests):
                result = user_data._completed_requests.get()
                if result is InferenceServerException:
                    self.assertTrue(False, result)
                output = result.as_numpy("OUTPUT")
                self.assertIsNotNone(output)
                self.assertEqual(output.size, i + 1)
                np.testing.assert_almost_equal(output, np.ones(i + 1) * (i + 1))

    # Non-decoupled models should filter outputs base on requested outputs.
    def test_requested_output_default(self):
        model_name = "add_sub"
        shape = [16]

        input0_data = np.random.rand(*shape).astype(np.float32)
        input1_data = np.random.rand(*shape).astype(np.float32)
        inputs = [
            grpcclient.InferInput(
                "INPUT0", input0_data.shape, np_to_triton_dtype(input0_data.dtype)
            ),
            grpcclient.InferInput(
                "INPUT1", input1_data.shape, np_to_triton_dtype(input1_data.dtype)
            ),
        ]
        inputs[0].set_data_from_numpy(input0_data)
        inputs[1].set_data_from_numpy(input1_data)

        # request for output 1, among output 0 and 1.
        requested_outputs = [grpcclient.InferRequestedOutput("OUTPUT1")]
        with self._shm_leak_detector.Probe():
            response = self._client.infer(
                model_name=model_name,
                inputs=inputs,
                outputs=requested_outputs,
            )
        outputs = response.get_response().outputs
        self.assertEqual(len(outputs), len(requested_outputs))
        output1_data = response.as_numpy("OUTPUT1")
        self.assertTrue(np.allclose(input0_data - input1_data, output1_data))

        # without requested output should return all outputs
        with self._shm_leak_detector.Probe():
            response = self._client.infer(model_name=model_name, inputs=inputs)
        outputs = response.get_response().outputs
        self.assertEqual(len(outputs), len(inputs))
        output0_data = response.as_numpy("OUTPUT0")
        output1_data = response.as_numpy("OUTPUT1")
        self.assertTrue(np.allclose(input0_data + input1_data, output0_data))
        self.assertTrue(np.allclose(input0_data - input1_data, output1_data))

    # Decoupled models should filter outputs base on requested outputs.
    def test_requested_output_decoupled(self):
        model_name = "dlpack_io_identity_decoupled"
        shape = [4]
        expected_response_repeat = 2

        input0_data = np.random.rand(*shape).astype(np.float32)
        gpu_output_data = np.random.rand(*shape).astype(np.bool_)
        inputs = [
            grpcclient.InferInput(
                "INPUT0", input0_data.shape, np_to_triton_dtype(input0_data.dtype)
            ),
            grpcclient.InferInput(
                "GPU_OUTPUT",
                gpu_output_data.shape,
                np_to_triton_dtype(gpu_output_data.dtype),
            ),
        ]
        inputs[0].set_data_from_numpy(input0_data)
        inputs[1].set_data_from_numpy(gpu_output_data)

        # request for output 0, among output 0 and next gpu output.
        requested_outputs = [grpcclient.InferRequestedOutput("OUTPUT0")]
        user_data = UserData()
        with grpcclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8001") as client:
            client.start_stream(callback=partial(callback, user_data))
            client.async_stream_infer(
                model_name=model_name, inputs=inputs, outputs=requested_outputs
            )
            client.stop_stream()
        for _ in range(expected_response_repeat):
            self.assertFalse(user_data._completed_requests.empty())
            response = user_data._completed_requests.get()
            outputs = response.get_response().outputs
            self.assertEqual(len(outputs), len(requested_outputs))
            output0_data = response.as_numpy("OUTPUT0")
            self.assertTrue(np.allclose(input0_data, output0_data))
        self.assertTrue(user_data._completed_requests.empty())

        # without requested output should return all outputs
        user_data = UserData()
        with grpcclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8001") as client:
            client.start_stream(callback=partial(callback, user_data))
            client.async_stream_infer(model_name=model_name, inputs=inputs)
            client.stop_stream()
        for _ in range(expected_response_repeat):
            self.assertFalse(user_data._completed_requests.empty())
            response = user_data._completed_requests.get()
            outputs = response.get_response().outputs
            self.assertEqual(len(outputs), len(inputs))
            output0_data = response.as_numpy("OUTPUT0")
            next_gpu_output_data = response.as_numpy("NEXT_GPU_OUTPUT")
            self.assertTrue(np.allclose(input0_data, output0_data))
            self.assertTrue(np.allclose(gpu_output_data[1:], next_gpu_output_data))
        self.assertTrue(user_data._completed_requests.empty())

    # Assert a prior crash is fixed regarding requested output on a decoupled model.
    def test_requested_output_decoupled_prior_crash(self):
        model_name = "llm"
        prompt = "test"

        text_input_data = np.array([[prompt]]).astype(object)
        inputs = [grpcclient.InferInput("text_input", text_input_data.shape, "BYTES")]
        inputs[-1].set_data_from_numpy(text_input_data)

        requested_outputs = [grpcclient.InferRequestedOutput("text_output")]

        user_data = UserData()
        with grpcclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8001") as client:
            client.start_stream(callback=partial(callback, user_data))
            client.async_stream_infer(
                model_name=model_name, inputs=inputs, outputs=requested_outputs
            )
            client.stop_stream()

        outputs = ""
        while not user_data._completed_requests.empty():
            result = user_data._completed_requests.get(block=False)
            if isinstance(result, InferenceServerException):
                raise result
            outputs += str(result.as_numpy("text_output")[0], encoding="utf-8")
        self.assertGreater(len(outputs), 0, "text_output is empty")


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_backend_python/io/requested_output_model/config.pbtxt
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#
# This test case was added based on a prior crash. DO NOT MODIFY!
#

name: "llm"
backend: "python"
max_batch_size: 128

model_transaction_policy {
  decoupled: True
}

input [
  {
    name: "text_input"
    data_type: TYPE_STRING
    dims: [ 1 ]
  }
]
output [
  {
    name: "text_output"
    data_type: TYPE_STRING
    dims: [ -1 ]
  },
  {
    name: "sequence_index"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]


================================================
FILE: qa/L0_backend_python/io/requested_output_model/model.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#
# This test case was added based on a prior crash. DO NOT MODIFY!
#

import json
import traceback

import numpy as np
import triton_python_backend_utils as pb_utils


def get_valid_param_value(param, default_value=""):
    value = param.get("string_value", "")
    return default_value if value.startswith("${") or value == "" else value


class TritonPythonModel:
    def initialize(self, args):
        model_config = json.loads(args["model_config"])
        self.output_config = pb_utils.get_output_config_by_name(
            model_config, "text_output"
        )
        self.output_dtype = pb_utils.triton_string_to_numpy(
            self.output_config["data_type"]
        )
        self.decoupled = pb_utils.using_decoupled_model_transaction_policy(model_config)
        self.logger = pb_utils.Logger

    def create_triton_tensors(self, index):
        x = "bla" + str(index)
        output = [x.encode("utf8")]
        np_output = np.array(output).astype(self.output_dtype)
        seq_idx = np.array([[0]]).astype(np.int32)

        t1 = pb_utils.Tensor("text_output", np_output)
        t2 = pb_utils.Tensor("sequence_index", seq_idx)
        tensors = [t1, t2]
        return tensors

    def create_triton_response(self, index):
        tensors = self.create_triton_tensors(index)
        return pb_utils.InferenceResponse(output_tensors=tensors)

    def execute(self, requests):
        responses = []
        for request in requests:
            if self.decoupled:
                response_sender = request.get_response_sender()
            try:
                for index in range(0, 1):
                    triton_response = self.create_triton_response(index)
                    if self.decoupled:
                        response_sender.send(triton_response)
                    else:
                        responses.append(triton_response)

                if self.decoupled:
                    response_sender.send(
                        flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
                    )

            except Exception:
                self.logger.log_error(traceback.format_exc())
                error_response = pb_utils.InferenceResponse(
                    output_tensors=[],
                    error=pb_utils.TritonError(traceback.format_exc()),
                )

                if self.decoupled:
                    response_sender.send(error_response)
                    response_sender.send(
                        flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
                    )
                else:
                    responses.append(error_response)

        if self.decoupled:
            return None
        else:
            assert len(responses) == len(requests)
            return responses

    def finalize(self):
        self.logger.log_info("Cleaning up...")


================================================
FILE: qa/L0_backend_python/io/test.sh
================================================
#!/bin/bash
# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

UNITTEST_PY=./io_test.py
CLIENT_LOG="./io_client.log"
TEST_RESULT_FILE='test_results.txt'
source ../common.sh
source ../../common/util.sh

SERVER_ARGS="--model-repository=${MODELDIR}/io/models --backend-directory=${BACKEND_DIR} --log-verbose=1"
SERVER_LOG="./io_server.log"

RET=0
rm -fr *.log ./models

pip3 uninstall -y torch
pip3 install torch==2.3.1+cu118 -f https://download.pytorch.org/whl/torch_stable.html

# IOTest.test_ensemble_io
TRIALS="default decoupled"

for trial in $TRIALS; do
    export TRIAL=$trial
    rm -rf ./models

    if [ $trial = "default" ]; then
        for i in {1..3}; do
            model_name=dlpack_io_identity_$i
            mkdir -p models/$model_name/1/
            cp ../../python_models/dlpack_io_identity/model.py ./models/$model_name/1/
            cp ../../python_models/dlpack_io_identity/config.pbtxt ./models/$model_name/
            (cd models/$model_name && \
                      sed -i "s/^name:.*/name: \"$model_name\"/" config.pbtxt)
        done
    else
        for i in {1..3}; do
            model_name=dlpack_io_identity_$i
            mkdir -p models/$model_name/1/
            cp ../../python_models/dlpack_io_identity_decoupled/model.py ./models/$model_name/1/
            cp ../../python_models/dlpack_io_identity_decoupled/config.pbtxt ./models/$model_name/
            (cd models/$model_name && \
                      sed -i "s/^name:.*/name: \"$model_name\"/" config.pbtxt)
        done
    fi

    mkdir -p models/ensemble_io/1/
    cp ../../python_models/ensemble_io/config.pbtxt ./models/ensemble_io

    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        RET=1
    fi

    set +e
    SUBTEST="test_ensemble_io"
    python3 -m pytest --junitxml=${SUBTEST}.${TRIAL}.report.xml ${UNITTEST_PY}::IOTest::${SUBTEST} >> ${CLIENT_LOG}.${SUBTEST}
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** IOTest.${SUBTEST} FAILED. \n***"
        cat $CLIENT_LOG.${SUBTEST}
        RET=1
    fi
    set -e

    kill $SERVER_PID
    wait $SERVER_PID
done

# IOTest.test_empty_gpu_output
rm -rf models && mkdir models
mkdir -p models/dlpack_empty_output/1/
cp ../../python_models/dlpack_empty_output/model.py ./models/dlpack_empty_output/1/
cp ../../python_models/dlpack_empty_output/config.pbtxt ./models/dlpack_empty_output/

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    RET=1
fi

set +e
SUBTEST="test_empty_gpu_output"
python3 -m pytest --junitxml=${SUBTEST}.report.xml ${UNITTEST_PY}::IOTest::${SUBTEST} > ${CLIENT_LOG}.${SUBTEST}

if [ $? -ne 0 ]; then
    echo -e "\n***\n*** IOTest.${SUBTEST} FAILED. \n***"
    cat $CLIENT_LOG.${SUBTEST}
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

# IOTest.test_variable_gpu_output
rm -rf models && mkdir models
mkdir -p models/variable_gpu_output/1/
cp ../../python_models/variable_gpu_output/model.py ./models/variable_gpu_output/1/
cp ../../python_models/variable_gpu_output/config.pbtxt ./models/variable_gpu_output/

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    RET=1
fi

set +e
SUBTEST="test_variable_gpu_output"
python3 -m pytest --junitxml=${SUBTEST}.report.xml ${UNITTEST_PY}::IOTest::${SUBTEST} > ${CLIENT_LOG}.${SUBTEST}

if [ $? -ne 0 ]; then
    echo -e "\n***\n*** IOTest.${SUBTEST} FAILED. \n***"
    cat $CLIENT_LOG.${SUBTEST}
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

# IOTest.test_requested_output_default & IOTest.test_requested_output_decoupled
rm -rf models && mkdir models
mkdir -p models/add_sub/1/
cp ../../python_models/add_sub/model.py ./models/add_sub/1/
cp ../../python_models/add_sub/config.pbtxt ./models/add_sub/
mkdir -p models/dlpack_io_identity_decoupled/1/
cp ../../python_models/dlpack_io_identity_decoupled/model.py ./models/dlpack_io_identity_decoupled/1/
cp ../../python_models/dlpack_io_identity_decoupled/config.pbtxt ./models/dlpack_io_identity_decoupled/

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    RET=1
fi

SUBTESTS="test_requested_output_default test_requested_output_decoupled"
for SUBTEST in $SUBTESTS; do
    set +e
    python3 -m pytest --junitxml=${SUBTEST}.report.xml ${UNITTEST_PY}::IOTest::${SUBTEST} > ${CLIENT_LOG}.${SUBTEST}
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** IOTest.${SUBTEST} FAILED. \n***"
        cat $CLIENT_LOG.${SUBTEST}
        RET=1
    fi
    set -e
done

kill $SERVER_PID
wait $SERVER_PID

# IOTest.test_requested_output_decoupled_prior_crash
rm -rf models && mkdir models
mkdir -p models/llm/1/
cp requested_output_model/config.pbtxt models/llm/
cp requested_output_model/model.py models/llm/1/

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    RET=1
fi

SUBTEST="test_requested_output_decoupled_prior_crash"
set +e
python3 -m pytest --junitxml=${SUBTEST}.report.xml ${UNITTEST_PY}::IOTest::${SUBTEST} > ${CLIENT_LOG}.${SUBTEST}
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** IOTest.${SUBTEST} FAILED. \n***"
    cat $CLIENT_LOG.${SUBTEST}
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** IO test PASSED.\n***"
else
    echo -e "\n***\n*** IO test FAILED.\n***"
fi

exit $RET


================================================
FILE: qa/L0_backend_python/lifecycle/lifecycle_test.py
================================================
#!/usr/bin/env python3

# Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import re
import sys

import requests

sys.path.append("../../common")

import queue
import threading
import time
import unittest
from functools import partial

import numpy as np
import shm_util
import tritonclient.grpc as grpcclient
import tritonclient.http as httpclient
from tritonclient.utils import *

# By default, find tritonserver on "localhost", but for windows tests
# we overwrite the IP address with the TRITONSERVER_IPADDR envvar
_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")


class UserData:
    def __init__(self):
        self._completed_requests = queue.Queue()


def callback(user_data, result, error):
    if error:
        user_data._completed_requests.put(error)
    else:
        user_data._completed_requests.put(result)


class LifecycleTest(unittest.TestCase):
    def setUp(self):
        self._shm_leak_detector = shm_util.ShmLeakDetector()

    def _get_metrics(self):
        metrics_url = f"http://{_tritonserver_ipaddr}:8002/metrics"
        r = requests.get(metrics_url)
        r.raise_for_status()
        return r.text

    def _metrics_before_test(self, model, reason):
        pattern = rf'nv_inference_request_failure\{{model="{model}",reason="{reason}",version="1"\}} (\d+)'
        metrics = self._get_metrics()
        match = re.search(pattern, metrics)
        if match:
            return int(match.group(1))
        else:
            raise Exception(f"Failure metrics for model='{model}' not found")

    def _assert_metrics(
        self, model_name, reason, expected_count_increase, initial_count
    ):
        metrics = self._get_metrics()
        # Add initial count + expected count for the the test
        expected_metric = f'nv_inference_request_failure{{model="{model_name}",reason="{reason}",version="1"}} {expected_count_increase + initial_count}'
        self.assertIn(expected_metric, metrics)

    def test_error_code(self):
        model_name = "error_code"
        shape = [1, 1]
        # [(Triton error, expected gRPC error message starting), ...]
        errors = [
            ("UNKNOWN", "[StatusCode.UNKNOWN]"),
            ("INTERNAL", "[StatusCode.INTERNAL]"),
            ("NOT_FOUND", "[StatusCode.NOT_FOUND]"),
            ("INVALID_ARG", "[StatusCode.INVALID_ARGUMENT]"),
            ("UNAVAILABLE", "[StatusCode.UNAVAILABLE]"),
            ("UNSUPPORTED", "[StatusCode.UNIMPLEMENTED]"),
            ("ALREADY_EXISTS", "[StatusCode.ALREADY_EXISTS]"),
            ("CANCELLED", "[StatusCode.CANCELLED]"),
            ("(default)", "[StatusCode.INTERNAL] unrecognized"),
        ]
        with self._shm_leak_detector.Probe() as shm_probe:
            with grpcclient.InferenceServerClient(
                f"{_tritonserver_ipaddr}:8001"
            ) as client:
                for error, expected_grpc_error_start in errors:
                    input_data = np.array([[error]], dtype=np.object_)
                    inputs = [
                        grpcclient.InferInput(
                            "ERROR_CODE", shape, np_to_triton_dtype(input_data.dtype)
                        )
                    ]
                    inputs[0].set_data_from_numpy(input_data)
                    with self.assertRaises(InferenceServerException) as e:
                        client.infer(model_name, inputs)
                    # e.g. [StatusCode.UNKNOWN] error code: TRITONSERVER_ERROR_UNKNOWN
                    # e.g. [StatusCode.INTERNAL] unrecognized error code: (default)
                    self.assertEqual(
                        str(e.exception),
                        expected_grpc_error_start + " error code: " + error,
                    )

    def test_execute_cancel(self):
        model_name = "execute_cancel"
        log_path = "lifecycle_server.log"
        execute_delay = 4.0  # seconds
        shape = [1, 1]
        response = {"responded": False, "result": None, "error": None}

        def callback(result, error):
            response["responded"] = True
            response["result"] = result
            response["error"] = error

        with self._shm_leak_detector.Probe() as shm_probe:
            with grpcclient.InferenceServerClient(
                f"{_tritonserver_ipaddr}:8001"
            ) as client:
                input_data = np.array([[execute_delay]], dtype=np.float32)
                inputs = [
                    grpcclient.InferInput(
                        "EXECUTE_DELAY", shape, np_to_triton_dtype(input_data.dtype)
                    )
                ]
                inputs[0].set_data_from_numpy(input_data)
                exec_future = client.async_infer(model_name, inputs, callback)
                time.sleep(2)  # ensure the request is executing
                self.assertFalse(response["responded"])
                exec_future.cancel()
                time.sleep(2)  # ensure the cancellation is delivered
                self.assertTrue(response["responded"])

        self.assertEqual(response["result"], None)
        self.assertIsInstance(response["error"], InferenceServerException)
        self.assertEqual(response["error"].status(), "StatusCode.CANCELLED")
        with open(log_path, mode="r", encoding="utf-8", errors="strict") as f:
            log_text = f.read()
            self.assertIn("[execute_cancel] Request not cancelled at 1.0 s", log_text)
            self.assertIn("[execute_cancel] Request cancelled at ", log_text)

    def test_batch_error(self):
        # The execute_error model returns an error for the first and third
        # request and successfully processes the second request. This is making
        # sure that an error in a single request does not completely fail the
        # batch.
        model_name = "execute_error"
        shape = [2, 2]
        number_of_requests = 3
        user_data = UserData()
        triton_client = grpcclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8001")
        triton_client.start_stream(callback=partial(callback, user_data))

        with self._shm_leak_detector.Probe() as shm_probe:
            input_datas = []
            for i in range(number_of_requests):
                input_data = np.random.randn(*shape).astype(np.float32)
                input_datas.append(input_data)
                inputs = [
                    grpcclient.InferInput(
                        "IN", input_data.shape, np_to_triton_dtype(input_data.dtype)
                    )
                ]
                inputs[0].set_data_from_numpy(input_data)
                triton_client.async_stream_infer(model_name=model_name, inputs=inputs)

            for i in range(number_of_requests):
                result = user_data._completed_requests.get()
                if i == 0 or i == 2:
                    self.assertIs(type(result), InferenceServerException)
                    continue

                print(result)
                output_data = result.as_numpy("OUT")
                self.assertIsNotNone(output_data, "error: expected 'OUT'")
                self.assertTrue(
                    np.array_equal(output_data, input_datas[i]),
                    "error: expected output {} to match input {}".format(
                        output_data, input_datas[i]
                    ),
                )

    def test_infer_pymodel_error(self):
        model_name = "wrong_model"
        shape = [2, 2]
        initial_metrics_value = self._metrics_before_test(model_name, "BACKEND")
        with self._shm_leak_detector.Probe() as shm_probe:
            with httpclient.InferenceServerClient(
                f"{_tritonserver_ipaddr}:8000"
            ) as client:
                input_data = (16384 * np.random.randn(*shape)).astype(np.uint32)
                inputs = [
                    httpclient.InferInput(
                        "IN", input_data.shape, np_to_triton_dtype(input_data.dtype)
                    )
                ]
                inputs[0].set_data_from_numpy(input_data)
                try:
                    client.infer(model_name, inputs)
                except InferenceServerException as e:
                    print(e.message())
                    self.assertTrue(
                        e.message().startswith(
                            "Failed to process the request(s) for model "
                        ),
                        "Exception message is not correct",
                    )
                else:
                    self.assertTrue(
                        False, "Wrong exception raised or did not raise an exception"
                    )
        expected_count_increase = 1
        self._assert_metrics(
            model_name,
            "BACKEND",
            expected_count_increase,
            initial_metrics_value,
        )

    # Test grpc stream behavior when triton_grpc_error is set to true.
    # Expected to close stream and return GRPC error when model returns error.
    def test_triton_grpc_error_error_on(self):
        model_name = "execute_grpc_error"
        shape = [2, 2]
        number_of_requests = 2
        user_data = UserData()
        triton_client = grpcclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8001")
        metadata = {"triton_grpc_error": "true"}
        triton_client.start_stream(
            callback=partial(callback, user_data), headers=metadata
        )
        stream_end = False
        for i in range(number_of_requests):
            input_data = np.random.randn(*shape).astype(np.float32)
            inputs = [
                grpcclient.InferInput(
                    "IN", input_data.shape, np_to_triton_dtype(input_data.dtype)
                )
            ]
            inputs[0].set_data_from_numpy(input_data)
            try:
                triton_client.async_stream_infer(model_name=model_name, inputs=inputs)
                result = user_data._completed_requests.get()
                if type(result) == InferenceServerException:
                    # execute_grpc_error intentionally returns error with StatusCode.INTERNAL status on 2nd request
                    self.assertEqual(str(result.status()), "StatusCode.INTERNAL")
                    stream_end = True
                else:
                    # Stream is not killed
                    output_data = result.as_numpy("OUT")
                    self.assertIsNotNone(output_data, "error: expected 'OUT'")
            except Exception as e:
                if stream_end == True:
                    # We expect the stream to have closed
                    self.assertTrue(
                        True,
                        "This should always pass as cancellation should succeed",
                    )
                else:
                    self.assertFalse(
                        True, "Unexpected Stream killed without Error from CORE"
                    )

    # Test grpc stream behavior when triton_grpc_error is set to true in multiple open streams.
    # Expected to close stream and return GRPC error when model returns error.
    def test_triton_grpc_error_multithreaded(self):
        thread1 = threading.Thread(target=self.test_triton_grpc_error_error_on)
        thread2 = threading.Thread(target=self.test_triton_grpc_error_error_on)
        # Start the threads
        thread1.start()
        thread2.start()
        # Wait for both threads to finish
        thread1.join()
        thread2.join()

    # Test grpc stream behavior when triton_grpc_error is set to true and subsequent stream is cancelled.
    # Expected cancellation is successful.
    def test_triton_grpc_error_cancel(self):
        model_name = "execute_grpc_error"
        shape = [2, 2]
        number_of_requests = 1
        user_data = UserData()
        triton_server_url = (
            f"{_tritonserver_ipaddr}:8001"  # Replace with your Triton server address
        )
        stream_end = False
        triton_client = grpcclient.InferenceServerClient(triton_server_url)

        metadata = {"triton_grpc_error": "true"}

        triton_client.start_stream(
            callback=partial(callback, user_data), headers=metadata
        )

        for i in range(number_of_requests):
            input_data = np.random.randn(*shape).astype(np.float32)
            inputs = [
                grpcclient.InferInput(
                    "IN", input_data.shape, np_to_triton_dtype(input_data.dtype)
                )
            ]
            inputs[0].set_data_from_numpy(input_data)
            try:
                triton_client.async_stream_infer(model_name=model_name, inputs=inputs)
                result = user_data._completed_requests.get()
                if type(result) == InferenceServerException:
                    stream_end = True
                if i == 0:
                    triton_client.stop_stream(cancel_requests=True)
            except Exception as e:
                if stream_end == True:
                    # We expect the stream to have closed
                    self.assertTrue(
                        True,
                        "This should always pass as cancellation should succeed",
                    )
                else:
                    self.assertFalse(
                        True, "Unexpected Stream killed without Error from CORE"
                    )
        self.assertTrue(
            True,
            "This should always pass as cancellation should succeed without any exception",
        )

    # Test grpc stream behavior when triton_grpc_error is set to false
    # and subsequent stream is NOT closed when error is reported from CORE
    def test_triton_grpc_error_error_off(self):
        model_name = "execute_grpc_error"
        shape = [2, 2]
        number_of_requests = 4
        response_counter = 0
        user_data = UserData()
        triton_client = grpcclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8001")
        triton_client.start_stream(callback=partial(callback, user_data))
        for i in range(number_of_requests):
            input_data = np.random.randn(*shape).astype(np.float32)
            inputs = [
                grpcclient.InferInput(
                    "IN", input_data.shape, np_to_triton_dtype(input_data.dtype)
                )
            ]
            inputs[0].set_data_from_numpy(input_data)
            triton_client.async_stream_infer(model_name=model_name, inputs=inputs)
            _ = user_data._completed_requests.get()
            response_counter += 1
        # we expect response_counter == number_of_requests,
        # which indicates that after the first reported grpc error stream did NOT close and mode != triton_grpc_error
        self.assertEqual(response_counter, number_of_requests)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_backend_python/lifecycle/test.sh
================================================
#!/bin/bash
# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

CLIENT_LOG="./lifecycle_client.log"
TEST_RESULT_FILE='test_results.txt'
source ../common.sh
source ../../common/util.sh

SERVER_ARGS="--model-repository=${MODELDIR}/lifecycle/models --backend-directory=${BACKEND_DIR} --log-verbose=1"
SERVER_LOG="./lifecycle_server.log"

RET=0
rm -fr *.log ./models

mkdir -p models/error_code/1/
cp ../../python_models/error_code/model.py ./models/error_code/1/
cp ../../python_models/error_code/config.pbtxt ./models/error_code/

mkdir -p models/execute_cancel/1/
cp ../../python_models/execute_cancel/model.py ./models/execute_cancel/1/
cp ../../python_models/execute_cancel/config.pbtxt ./models/execute_cancel/

mkdir -p models/execute_error/1/
cp ../../python_models/execute_error/model.py ./models/execute_error/1/
cp ../../python_models/execute_error/config.pbtxt ./models/execute_error/
(cd models/execute_error && \
          sed -i "s/^name:.*/name: \"execute_error\"/" config.pbtxt && \
          sed -i "s/^max_batch_size:.*/max_batch_size: 8/" config.pbtxt && \
          echo "dynamic_batching { preferred_batch_size: [8], max_queue_delay_microseconds: 12000000 }" >> config.pbtxt)

mkdir -p models/execute_grpc_error/1/
cp ../../python_models/execute_grpc_error/model.py ./models/execute_grpc_error/1/
cp ../../python_models/execute_grpc_error/config.pbtxt ./models/execute_grpc_error/
(cd models/execute_grpc_error && \
          sed -i "s/^name:.*/name: \"execute_grpc_error\"/" config.pbtxt && \
          sed -i "s/^max_batch_size:.*/max_batch_size: 8/" config.pbtxt && \
          echo "dynamic_batching { preferred_batch_size: [8], max_queue_delay_microseconds: 1200000 }" >> config.pbtxt)

mkdir -p models/execute_return_error/1/
cp ../../python_models/execute_return_error/model.py ./models/execute_return_error/1/
cp ../../python_models/execute_return_error/config.pbtxt ./models/execute_return_error/

mkdir -p models/wrong_model/1/
cp ../../python_models/wrong_model/model.py ./models/wrong_model/1/
cp ../../python_models/wrong_model/config.pbtxt ./models/wrong_model/
(cd models/wrong_model && \
          sed -i "s/^name:.*/name: \"wrong_model\"/" config.pbtxt && \
          sed -i "s/TYPE_FP32/TYPE_UINT32/g" config.pbtxt)

prev_num_pages=`get_shm_pages`

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    RET=1
fi

set +e

# Run this multiple times to catch any intermittent segfault.
for i in {0..4}; do
    python3 -m pytest --junitxml=lifecycle.iter${i}.report.xml lifecycle_test.py >> $CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** lifecycle_test.py FAILED. \n***"
        RET=1
    fi
done

set -e

kill_server

current_num_pages=`get_shm_pages`
if [ $current_num_pages -ne $prev_num_pages ]; then
    ls /dev/shm
    echo -e "\n***\n*** Test Failed. Shared memory pages were not cleaned properly.
Shared memory pages before starting triton equals to $prev_num_pages
and shared memory pages after starting triton equals to $current_num_pages \n***"
    RET=1
fi

# These models have errors in the initialization and finalization
# steps and we want to ensure that correct error is being returned

rm -rf models/
mkdir -p models/init_error/1/
cp ../../python_models/init_error/model.py ./models/init_error/1/
cp ../../python_models/init_error/config.pbtxt ./models/init_error/

set +e
prev_num_pages=`get_shm_pages`
run_server_nowait

wait $SERVER_PID
current_num_pages=`get_shm_pages`
if [ $current_num_pages -ne $prev_num_pages ]; then
    ls /dev/shm
    echo -e "\n***\n*** Test Failed. Shared memory pages were not cleaned properly.
Shared memory pages before starting triton equals to $prev_num_pages
and shared memory pages after starting triton equals to $current_num_pages \n***"
    RET=1
fi

grep "name 'lorem_ipsum' is not defined" $SERVER_LOG

if [ $? -ne 0 ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** init_error model test failed \n***"
    RET=1
fi
set -e

# FIXME: Until we find a way to simulate Ctrl^C on windows, this
# test will not pass.
if [[ ${TEST_WINDOWS} == 0 ]]; then
    rm -rf models/
    mkdir -p models/fini_error/1/
    cp ../../python_models/fini_error/model.py ./models/fini_error/1/
    cp ../../python_models/fini_error/config.pbtxt ./models/fini_error/

    prev_num_pages=`get_shm_pages`
    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        RET=1
    fi

    kill_server

    current_num_pages=`get_shm_pages`
    if [ $current_num_pages -ne $prev_num_pages ]; then
        cat $CLIENT_LOG
        ls /dev/shm
        echo -e "\n***\n*** Test Failed. Shared memory pages were not cleaned properly.
    Shared memory pages before starting triton equals to $prev_num_pages
    and shared memory pages after starting triton equals to $current_num_pages \n***"
        RET=1
    fi

    set +e
    grep "name 'undefined_variable' is not defined" $SERVER_LOG

    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** fini_error model test failed \n***"
        RET=1
    fi
    set -e
fi

rm -rf models/
mkdir -p models/auto_complete_error/1/
cp ../../python_models/auto_complete_error/model.py ./models/auto_complete_error/1/

SERVER_ARGS="${SERVER_ARGS} --strict-model-config=false"

set +e
prev_num_pages=`get_shm_pages`
run_server_nowait

wait $SERVER_PID
current_num_pages=`get_shm_pages`
if [ $current_num_pages -ne $prev_num_pages ]; then
    ls /dev/shm
    echo -e "\n***\n*** Test Failed. Shared memory pages were not cleaned properly.
Shared memory pages before starting triton equals to $prev_num_pages
and shared memory pages after starting triton equals to $current_num_pages \n***"
    RET=1
fi

set +e
grep "name 'undefined_variable' is not defined" $SERVER_LOG

if [ $? -ne 0 ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** auto_complete_error model test failed \n***"
    RET=1
fi
set -e

if [ $RET -eq 1 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Lifecycle test FAILED. \n***"
else
    echo -e "\n***\n*** Lifecycle test PASSED. \n***"
fi

exit $RET


================================================
FILE: qa/L0_backend_python/logging/logging_test.py
================================================
#!/usr/bin/env python3

# Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import sys

sys.path.append("../../common")
import unittest

import numpy as np
import shm_util
import tritonclient.http as httpclient
from tritonclient.utils import *

# By default, find tritonserver on "localhost", but for windows tests
# we overwrite the IP address with the TRITONSERVER_IPADDR envvar
_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")


class LogTest(unittest.TestCase):
    def setUp(self):
        self._shm_leak_detector = shm_util.ShmLeakDetector()

    def test_log_output(self):
        model_name = "identity_fp32_logging"
        with self._shm_leak_detector.Probe() as shm_probe:
            with httpclient.InferenceServerClient(
                f"{_tritonserver_ipaddr}:8000"
            ) as client:
                input_data = np.array([[1.0]], dtype=np.float32)
                inputs = [
                    httpclient.InferInput(
                        "INPUT0", input_data.shape, np_to_triton_dtype(input_data.dtype)
                    )
                ]
                inputs[0].set_data_from_numpy(input_data)
                result = client.infer(model_name, inputs)
                output0 = result.as_numpy("OUTPUT0")
                self.assertIsNotNone(output0)
                self.assertTrue(np.all(output0 == input_data))


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_backend_python/logging/test.sh
================================================
#!/bin/bash
# Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

CLIENT_LOG="logging_client.log"
TEST_RESULT_FILE="test_results.txt"
LOG_TEST="logging_test.py"
SERVER_LOG="./logging_server.log"

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

MODELSDIR=${MODELDIR}/logging/models
source ../../common/util.sh

function verify_log_counts () {
  non_verbose_expected=$1
  verbose_expected=$2

  if [ `grep -c "Specific Msg!" $SERVER_LOG` != $non_verbose_expected ]; then
    echo -e "\n***\n*** Test Failed: Specific Msg Count Incorrect\n***"
    RET=1
  fi
  if [ `grep -c "Info Msg!" $SERVER_LOG` != $non_verbose_expected ]; then
    echo -e "\n***\n*** Test Failed: Info Msg Count Incorrect\n***"
    RET=1
  fi
  if [ `grep -c "Warning Msg!" $SERVER_LOG` != $non_verbose_expected ]; then
    echo -e "\n***\n*** Test Failed: Warning Msg Count Incorrect\n***"
    RET=1
  fi
  if [ `grep -c "Error Msg!" $SERVER_LOG` != $non_verbose_expected ]; then
    echo -e "\n***\n*** Test Failed: Error Msg Count Incorrect\n***"
    RET=1
  fi
  if [ `grep -c "Verbose Msg!" $SERVER_LOG` != $verbose_expected ]; then
    echo -e "\n***\n*** Test Failed: Verbose Msg Count Incorrect\n***"
    RET=1
  fi
}

rm -f *.log

# set up simple repository MODELBASE
rm -fr ${MODELSDIR} && mkdir -p ${MODELSDIR} && \
    python_model="identity_fp32_logging"
    mkdir -p models/$python_model/1/
    cp ../../python_models/${python_model}/config.pbtxt models/${python_model}/config.pbtxt
    cp ../../python_models/${python_model}/model.py models/${python_model}/1/
RET=0

#Run Server with Default Log Settings
SERVER_ARGS="--model-repository=${MODELSDIR} --backend-directory=${BACKEND_DIR}"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
SUBTEST="default"
python3 -m pytest --junitxml=log_test.${SUBTEST}.report.xml ${LOG_TEST} >> ${CLIENT_LOG} 2>&1
if [ $? -ne 0 ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Test Failed\n***"
    cat $CLIENT_LOG
    RET=1
fi
set -e

kill_server

# Check if correct # log messages are present [ non-verbose-msg-cnt | verbose-msg-cnt ]
# NOTE: Windows does not seem to have a way to send a true SIGINT signal
# to tritonserver. Instead, it seems required to use taskkill.exe with /F (force)
# to kill the running program. This means the server terminates immediately,
# instead of shutting down how it would if Ctrl^C was invoked from the terminal.
# To properly test functionality, we need a WAR. In the meantime, we will subtract
# 1 from the expected values to account for the fact that no logs will be emitted
# from the finalize function.
if [[ ${TEST_WINDOWS} == 1 ]]; then
    verify_log_counts 3 0
else
    verify_log_counts 4 0
fi


rm -f *.log
#Run Server Enabling Verbose Messages
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
# Enable verbose logging
code=`curl -s -w %{http_code} -o ./curl.out -d'{"log_verbose_level":1}' ${TRITONSERVER_IPADDR}:8000/v2/logging`

if [ "$code" != "200" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Test Failed: Could not Change Log Settings\n***"
    RET=1
fi

SUBTEST="verbose"
python3 -m pytest --junitxml=log_test.${SUBTEST}.report.xml ${LOG_TEST} >> ${CLIENT_LOG} 2>&1
if [ $? -ne 0 ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Test Failed\n***"
    cat $CLIENT_LOG
    RET=1
fi
set -e

kill_server

# Verbose only 3 because model must initialize before
# log settings can be modified
if [[ ${TEST_WINDOWS} == 1 ]]; then
    verify_log_counts 3 2
else
    verify_log_counts 4 3
fi

rm -f *.log
#Run Server Enabling Verbose Messages
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
# Disable all logging
BOOL_PARAMS=${BOOL_PARAMS:="log_info log_warning log_error"}
for BOOL_PARAM in $BOOL_PARAMS; do
    # Attempt to use integer instead of bool
    code=`curl -s -w %{http_code} -o ./curl.out -d'{"'"$BOOL_PARAM"'":false}' ${TRITONSERVER_IPADDR}:8000/v2/logging`
    if [ "$code" != "200" ]; then
        cat ./curl.out
        echo -e "\n***\n*** Test Failed: Could not Change Log Settings\n***"
        RET=1
    fi
done

SUBTEST="disabled"
python3 -m pytest --junitxml=log_test.${SUBTEST}.report.xml ${LOG_TEST} >> ${CLIENT_LOG} 2>&1
if [ $? -ne 0 ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Test Failed\n***"
    cat $CLIENT_LOG
    RET=1
fi
set -e

kill_server

# Will have 1 occurrence of each non-verbose log type
# because the server must initialize before log settings
# can be modified
# Same count for both Unix and Windows because this does
# not test log output in the finalize step.
verify_log_counts 1 0


if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Logging test PASSED. \n***"
else
    echo -e "\n***\n*** Logging test FAILED. \n***"
fi

exit $RET


================================================
FILE: qa/L0_backend_python/model_control/model_control_test.py
================================================
#!/usr/bin/env python3

# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import base64
import json
import os
import subprocess
import sys

sys.path.append("../../common")

import unittest

import numpy as np
import shm_util
import tritonclient.http as httpclient
from tritonclient.utils import *

# By default, find tritonserver on "localhost", but for windows tests
# we overwrite the IP address with the TRITONSERVER_IPADDR envvar
_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")


class ExplicitModelTest(unittest.TestCase):
    def setUp(self):
        self._shm_leak_detector = shm_util.ShmLeakDetector()

    def send_identity_request(self, client, model_name):
        inputs = []
        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "FP32"))
        input0_data = np.arange(start=0, stop=16, dtype=np.float32)
        input0_data = np.expand_dims(input0_data, axis=0)
        inputs[0].set_data_from_numpy(input0_data)

        with self._shm_leak_detector.Probe() as shm_probe:
            result = client.infer(
                model_name=model_name,
                inputs=inputs,
                outputs=[httpclient.InferRequestedOutput("OUTPUT0")],
            )
        output_numpy = result.as_numpy("OUTPUT0")
        self.assertTrue(np.all(input0_data == output_numpy))

    def test_model_reload(self):
        model_name = "identity_fp32"
        ensemble_model_name = "simple_" + "identity_fp32"
        with httpclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8000") as client:
            for _ in range(5):
                self.assertFalse(client.is_model_ready(model_name))
                # Load the model before the ensemble model to make sure reloading the
                # model works properly in Python backend.
                client.load_model(model_name)
                client.load_model(ensemble_model_name)
                self.assertTrue(client.is_model_ready(model_name))
                self.assertTrue(client.is_model_ready(ensemble_model_name))
                self.send_identity_request(client, model_name)
                self.send_identity_request(client, ensemble_model_name)
                client.unload_model(ensemble_model_name)
                client.unload_model(model_name)
                self.assertFalse(client.is_model_ready(model_name))
                self.assertFalse(client.is_model_ready(ensemble_model_name))


class ModelIDValidationTest(unittest.TestCase):
    """
    Test model ID validation for user-provided model names.

    Verifies that model names containing dangerous characters are properly rejected.
    Uses raw HTTP requests via curl instead of the Triton client to test server-side
    validation without the Triton client encoding special characters.
    """

    def setUp(self):
        self._shm_leak_detector = shm_util.ShmLeakDetector()
        self._client = httpclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8000")
        self._triton_host = _tritonserver_ipaddr
        self._triton_port = 8000

        # Check if curl is available
        try:
            subprocess.run(["curl", "--version"], capture_output=True, check=True)
        except (subprocess.CalledProcessError, FileNotFoundError):
            self.skipTest("curl command not available - required for raw HTTP testing")

    def _send_load_model_request(self, model_name):
        """Send HTTP request to load model for testing input validation using curl"""

        # Create simple Triton Python model code
        python_model_code = f"""import triton_python_backend_utils as pb_utils

class TritonPythonModel:
    def execute(self, requests):
        print('Hello world from model {model_name}')
        responses = []
        for request in requests:
            # Simple identity function
            input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            out_tensor = pb_utils.Tensor("OUTPUT0", input_tensor.as_numpy())
            responses.append(pb_utils.InferenceResponse([out_tensor]))
        return responses"""

        # Base64 encode the Python code (as required by Triton server)
        python_code_b64 = base64.b64encode(python_model_code.encode("utf-8")).decode(
            "ascii"
        )

        # Create simple config
        config = {
            "name": model_name,
            "backend": "python",
            "max_batch_size": 4,
            "input": [{"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [-1]}],
            "output": [{"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [-1]}],
        }

        payload = {
            "parameters": {
                "config": json.dumps(config),
                "file:/1/model.py": python_code_b64,
            }
        }

        url = f"http://{self._triton_host}:{self._triton_port}/v2/repository/models/{model_name}/load"

        # Convert payload to JSON string
        payload_json = json.dumps(payload)

        try:
            # Use curl to send the request
            curl_cmd = [
                "curl",
                "-s",
                "-w",
                "\n%{http_code}",  # Write HTTP status code on separate line
                "-X",
                "POST",
                "-H",
                "Content-Type: application/json",
                "-d",
                payload_json,
                url,
            ]

            result = subprocess.run(
                curl_cmd, capture_output=True, text=True, timeout=10
            )

            # Parse curl output - last line is status code, rest is response body
            output_lines = (
                result.stdout.strip().split("\n") if result.stdout.strip() else []
            )
            if len(output_lines) >= 2:
                try:
                    status_code = int(output_lines[-1])
                    response_text = "\n".join(output_lines[:-1])
                except ValueError:
                    status_code = 0
                    response_text = result.stdout or result.stderr or "Invalid response"
            elif len(output_lines) == 1 and output_lines[0].isdigit():
                status_code = int(output_lines[0])
                response_text = result.stderr or "No response body"
            else:
                status_code = 0
                response_text = result.stdout or result.stderr or "No response"

            # Return an object similar to requests.Response
            class CurlResponse:
                def __init__(self, status_code, text):
                    self.status_code = status_code
                    self.text = text
                    self.content = text.encode()

            return CurlResponse(status_code, response_text)

        except (
            subprocess.TimeoutExpired,
            subprocess.CalledProcessError,
            ValueError,
        ) as e:
            # Return a mock response for errors
            class ErrorResponse:
                def __init__(self, error_msg):
                    self.status_code = 0
                    self.text = f"Error: {error_msg}"
                    self.content = self.text.encode()

            return ErrorResponse(str(e))

    def test_invalid_character_model_names(self):
        """Test that model names with invalid characters are properly rejected"""

        # Based on INVALID_CHARS = ";|&$`<>()[]{}\\\"'*?~#!"
        invalid_model_names = [
            r"model;test",
            r"model|test",
            r"model&test",
            r"model$test",
            r"model`test`",
            r"model<test>",
            r"model(test)",
            # r"model[test]", # request fails to send unencoded
            r"model{test}",
            r"model\test",
            r'model"test"',
            r"model'test'",
            r"model*test",
            # r"model?test", # request fails to send unencoded
            r"model~test",
            # r"model#test", # request fails to send unencoded
            r"model!test",
        ]

        for invalid_name in invalid_model_names:
            with self.subTest(model_name=invalid_name):
                print(f"Testing invalid model name: {invalid_name}")

                response = self._send_load_model_request(invalid_name)
                print(
                    f"Response for '{invalid_name}': Status {response.status_code}, Text: {response.text[:200]}..."
                )

                # Should not get a successful 200 response
                self.assertNotEqual(
                    200,
                    response.status_code,
                    f"Invalid model name '{invalid_name}' should not get 200 OK response",
                )

                # Special case for curly braces - they get stripped and cause load failures prior to the validation check
                if "{" in invalid_name or "}" in invalid_name:
                    self.assertIn(
                        "failed to load",
                        response.text,
                        f"Model with curly braces '{invalid_name}' should fail to load",
                    )
                else:
                    # Normal case - should get character validation error
                    self.assertIn(
                        "Invalid stub name: contains invalid characters",
                        response.text,
                        f"invalid response for '{invalid_name}' should contain 'Invalid stub name: contains invalid characters'",
                    )

                # Verify the model is not loaded/ready since it was rejected
                try:
                    self.assertFalse(
                        self._client.is_model_ready(invalid_name),
                        f"Model '{invalid_name}' should not be ready after failed load attempt",
                    )
                except Exception as e:
                    # If checking model readiness fails, that's also acceptable since the model name is invalid
                    print(
                        f"Note: Could not check model readiness for '{invalid_name}': {e}"
                    )

    def test_valid_model_names(self):
        """Test that valid model names work"""

        valid_model_names = [
            "TestModel123",
            "model-with-hyphens",
            "model_with_underscores",
        ]

        for valid_name in valid_model_names:
            with self.subTest(model_name=valid_name):
                print(f"Testing valid model name: {valid_name}")

                response = self._send_load_model_request(valid_name)
                print(
                    f"Response for valid '{valid_name}': Status {response.status_code}, Text: {response.text[:100]}..."
                )

                # Valid model names should be accepted and load successfully
                self.assertEqual(
                    200,
                    response.status_code,
                    f"Valid model name '{valid_name}' should get 200 OK response, got {response.status_code}. Response: {response.text}",
                )

                # Should not contain validation error message
                self.assertNotIn(
                    "Invalid stub name: contains invalid characters",
                    response.text,
                    f"Valid model name '{valid_name}' should not contain validation error message",
                )

                # Verify the model is actually loaded by checking if it's ready
                try:
                    self.assertTrue(
                        self._client.is_model_ready(valid_name),
                        f"Model '{valid_name}' should be ready after successful load",
                    )
                    # Clean up - unload the model after testing
                    self._client.unload_model(valid_name)
                except Exception as e:
                    self.fail(f"Failed to check if model '{valid_name}' is ready: {e}")


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_backend_python/model_control/test.sh
================================================
#!/bin/bash
# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

CLIENT_LOG="./model_control_client.log"
TEST_RESULT_FILE='test_results.txt'
SERVER_ARGS="--model-repository=${MODELDIR}/model_control/models --model-control-mode=explicit --backend-directory=${BACKEND_DIR} --log-verbose=1"
SERVER_LOG="./model_control_server.log"

RET=0
rm -fr *.log ./models

source ../../common/util.sh

mkdir -p models/identity_fp32/1/
mkdir -p models/simple_identity_fp32/1/
cp ../../python_models/identity_fp32/model.py ./models/identity_fp32/1/model.py
cp ../../python_models/identity_fp32/config.pbtxt ./models/identity_fp32/config.pbtxt
cp ../../python_models/simple_identity_fp32/config.pbtxt ./models/simple_identity_fp32/config.pbtxt

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python3 -m pytest --junitxml=model_control.report.xml model_control_test.py 2>&1 > $CLIENT_LOG

if [ $? -ne 0 ]; then
    echo -e "\n***\n*** model_control_test.py FAILED. \n***"
    RET=1
fi

echo -e "\n***\n*** Running model ID validation test\n***"
SUBTEST="model_id_validation"
python3 -m pytest --junitxml=model_control.${SUBTEST}.report.xml model_control_test.py::ModelIDValidationTest >> ${CLIENT_LOG} 2>&1

if [ $? -ne 0 ]; then
    echo -e "\n***\n*** model_id_validation_test.py FAILED. \n***"
    RET=1
fi

set -e

kill_server

if [ $RET -eq 1 ]; then
    echo -e "\n***\n*** Server logs:\n***"
    cat $SERVER_LOG
    echo -e "\n***\n*** Client logs:\n***"
    cat $CLIENT_LOG
    echo -e "\n***\n*** model_control_test FAILED. \n***"
else
    echo -e "\n***\n*** model_control_test PASSED. \n***"
fi

exit $RET


================================================
FILE: qa/L0_backend_python/model_readiness/test.sh
================================================
#!/bin/bash
# Copyright 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

TEST_RESULT_FILE='test_results.txt'
source ../common.sh
source ../../common/util.sh

SERVER_ARGS="--model-repository=${MODELDIR}/model_readiness/models --backend-directory=${BACKEND_DIR} --log-verbose=1"

RET=0
rm -fr *.log ./models

MODEL_NAME="identity_fp32"
mkdir -p models/$MODEL_NAME/1/
cp ../../python_models/$MODEL_NAME/model.py ./models/$MODEL_NAME/1/model.py
cp ../../python_models/$MODEL_NAME/config.pbtxt ./models/$MODEL_NAME/config.pbtxt

#
# Test Model Readiness (TRITONBACKEND_ModelInstanceReady)
# Test with different signals to simulate various crash/exit scenarios
# 11 (SIGSEGV) - Segmentation fault / crash
# 9  (SIGKILL) - Force kill
for SIGNAL in 11 9; do
    echo -e "\n***\n*** Testing model_readiness with Signal $SIGNAL\n***"
    SERVER_LOG="./model_readiness_signal_${SIGNAL}_server.log"
    CLIENT_LOG="./model_readiness_signal_${SIGNAL}_client.log"

    run_server
    if [ "$SERVER_PID" == "0" ]; then
        cat $SERVER_LOG
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        exit 1
    fi

    set +e

    # Verify model is initially ready
    echo "Checking Initial Readiness..."
    python3 -m unittest test_model_readiness.TestModelReadiness.test_model_ready >> ${CLIENT_LOG} 2>&1
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Test model_readiness Failed (Signal $SIGNAL): Initial readiness check failed \n***"
        RET=1
        kill_server
        exit 1
    fi

    # Find the stub process PID
    stub_pid=$(pgrep -f "triton_python_backend_stub")

    if [ -z "$stub_pid" ]; then
        echo -e "\n***\n*** Test model_readiness Failed (Signal $SIGNAL): Could not find stub process \n***"
        RET=1
        kill_server
    else
        echo "Found stub process: $stub_pid"

        # Kill the stub process
        echo "Killing stub with signal $SIGNAL..."
        kill -$SIGNAL $stub_pid
        sleep 1

        # Verify model is now NOT ready
        echo "Checking Not Ready Status..."
        python3 -m unittest test_model_readiness.TestModelReadiness.test_model_not_ready >> ${CLIENT_LOG} 2>&1
        if [ $? -ne 0 ]; then
            echo -e "\n***\n*** Test model_readiness Failed (Signal $SIGNAL): Model reported ready after kill \n***"
            RET=1
        else
            # Verify correct error message in logs
            # Expect 2 occurrences: HTTP and gRPC checks
            error_count=$(grep -c "Model '${MODEL_NAME}' version 1 is not ready: Stub process '${MODEL_NAME}_0_0' is not healthy." $SERVER_LOG)
            if [ "$error_count" -eq 2 ]; then
                 echo -e "\n***\n Test model_readiness Passed for Signal $SIGNAL \n***"
            else
                 echo -e "\n***\n*** Test model_readiness Failed (Signal $SIGNAL): Expected 2 error messages, found $error_count \n***"
                 cat $SERVER_LOG
                 RET=1
            fi
        fi
    fi

    set -e
    kill_server
done

#
# Test User-Defined Model Readiness Function
#
echo -e "\n***\n*** Testing User-Defined is_ready() Function\n***"

# Helper function to set up test models with different readiness behaviors based on config parameters
setup_readiness_test_model() {
    local model_name=$1
    local return_value=$2
    local delay_secs=$3

    mkdir -p ./models/$model_name/1/
    if [ "$model_name" == "is_ready_fn_coroutine_returns_true" ]; then
        cp ./test_models/readiness_coroutine_model.py ./models/$model_name/1/model.py
    else
        cp ./test_models/readiness_model.py ./models/$model_name/1/model.py
    fi
    cp ./models/identity_fp32/config.pbtxt ./models/$model_name/config.pbtxt
    sed -i "s/^name:.*/name: \"$model_name\"/" ./models/$model_name/config.pbtxt
    cat >> ./models/$model_name/config.pbtxt << EOF
parameters: {
  key: "READINESS_FN_RETURN_VALUE"
  value: { string_value: "$return_value" }
}
parameters: {
  key: "READINESS_FN_DELAY_SECS"
  value: { string_value: "$delay_secs" }
}
EOF
}

# Create readiness test models using shared model.py + config parameters
setup_readiness_test_model "is_ready_fn_returns_true" "true" "0.1"
setup_readiness_test_model "is_ready_fn_returns_false" "false" "0.1"
setup_readiness_test_model "is_ready_fn_raises_error" "exception" "0.1"
setup_readiness_test_model "is_ready_fn_returns_non_boolean" "non_boolean" "0.1"
setup_readiness_test_model "is_ready_fn_timeout" "true" "8"
setup_readiness_test_model "is_ready_fn_coroutine_returns_true" "coroutine" "0.1"

# Decoupled model has a unique execute() and its own config
mkdir -p ./models/is_ready_fn_returns_true_decoupled/1/
cp ./test_models/is_ready_fn_returns_true_decoupled/model.py \
    ./models/is_ready_fn_returns_true_decoupled/1/model.py
cp ./test_models/is_ready_fn_returns_true_decoupled/config.pbtxt \
    ./models/is_ready_fn_returns_true_decoupled/config.pbtxt

# Start server with all models
SERVER_ARGS="--model-repository=$(pwd)/models --backend-directory=${BACKEND_DIR} --log-verbose=1"
SERVER_LOG="./test_user_defined_model_readiness_function_server.log"
CLIENT_LOG="./test_user_defined_model_readiness_function_client.log"

run_server
if [ "$SERVER_PID" == "0" ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    exit 1
fi

set +e

echo "Running TestUserDefinedModelReadinessFunction..."
python3 -m unittest test_model_readiness.TestUserDefinedModelReadinessFunction -v >> ${CLIENT_LOG} 2>&1
TEST_EXIT_CODE=$?

if [ $TEST_EXIT_CODE -ne 0 ]; then
    echo -e "\n***\n*** TestUserDefinedModelReadinessFunction FAILED\n***"
    cat ${CLIENT_LOG}
    RET=1
else
    echo -e "\n***\n*** TestUserDefinedModelReadinessFunction PASSED\n***"
fi

set -e
kill_server


# Final result
if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** All Model Readiness Tests Passed\n***"
else
  echo -e "\n***\n*** Model Readiness Tests FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_backend_python/model_readiness/test_model_readiness.py
================================================
# Copyright 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import queue
import threading
import time
import unittest
from functools import partial

import numpy as np
import tritonclient.grpc as grpcclient
import tritonclient.http as httpclient
from tritonclient.utils import InferenceServerException

URL_HTTP = "localhost:8000"
URL_GRPC = "localhost:8001"
DEFAULT_RESPONSE_TIMEOUT = 60


class UserData:
    def __init__(self):
        self._response_queue = queue.Queue()


def callback(user_data, result, error):
    if error:
        user_data._response_queue.put(error)
    else:
        user_data._response_queue.put(result)


def prepare_infer_args(input_value):
    """Create InferInput and InferRequestedOutput lists for decoupled inference."""
    input_data = np.array([[input_value]], dtype=np.int32)
    infer_input = [grpcclient.InferInput("IN", input_data.shape, "INT32")]
    infer_input[0].set_data_from_numpy(input_data)
    outputs = [grpcclient.InferRequestedOutput("OUT")]
    return infer_input, outputs


def collect_responses(user_data, expected_responses_count):
    """
    Collect up to `expected_responses_count` responses from user_data.
    """
    errors = []
    responses = []
    recv_count = 0
    while recv_count < expected_responses_count:
        try:
            result = user_data._response_queue.get(timeout=DEFAULT_RESPONSE_TIMEOUT)
        except queue.Empty:
            raise Exception(
                f"No response received within {DEFAULT_RESPONSE_TIMEOUT} seconds."
            )
        if type(result) == InferenceServerException:
            errors.append(result)
            break
        else:
            responses.append(result.as_numpy("OUT")[0])
        recv_count = recv_count + 1

    return errors, responses


def call_inference_identity_model(model_name, protocol, client):
    """Send an inference request and verify the output matches the input."""
    shape = (1, 8)
    input_data = np.ones(shape, dtype=np.float32)

    if protocol == "http":
        inputs = [httpclient.InferInput("INPUT0", input_data.shape, "FP32")]
    else:
        inputs = [grpcclient.InferInput("INPUT0", input_data.shape, "FP32")]

    inputs[0].set_data_from_numpy(input_data)
    result = client.infer(model_name, inputs)
    output_data = result.as_numpy("OUTPUT0")

    np.testing.assert_array_almost_equal(
        input_data,
        output_data,
        err_msg=f"Inference output mismatch for {model_name}",
    )


class TestModelReadiness(unittest.TestCase):
    def setUp(self):
        self.model_name = "identity_fp32"
        self.client_http = httpclient.InferenceServerClient(url=URL_HTTP)
        self.client_grpc = grpcclient.InferenceServerClient(url=URL_GRPC)

    def test_model_ready(self):
        # Check HTTP
        try:
            is_ready = self.client_http.is_model_ready(self.model_name)
            self.assertTrue(
                is_ready, f"[HTTP] Model {self.model_name} should be READY but is NOT"
            )
            call_inference_identity_model(self.model_name, "http", self.client_http)
        except Exception as e:
            self.fail(f"[HTTP] Unexpected error: {str(e)}")

        # Check gRPC
        try:
            is_ready = self.client_grpc.is_model_ready(self.model_name)
            self.assertTrue(
                is_ready, f"[gRPC] Model {self.model_name} should be READY but is NOT"
            )
            call_inference_identity_model(self.model_name, "grpc", self.client_grpc)
        except Exception as e:
            self.fail(f"[gRPC] Unexpected error: {str(e)}")

    def test_model_not_ready(self):
        # Check HTTP
        try:
            is_ready = self.client_http.is_model_ready(self.model_name)
            self.assertFalse(
                is_ready,
                f"[HTTP] Model {self.model_name} should be NOT READY but is READY",
            )
        except Exception as e:
            self.fail(f"[HTTP] Unexpected error: {str(e)}")

        # Check gRPC
        try:
            is_ready = self.client_grpc.is_model_ready(self.model_name)
            self.assertFalse(
                is_ready,
                f"[gRPC] Model {self.model_name} should be NOT READY but is READY.",
            )
        except Exception as e:
            self.fail(f"[gRPC] Unexpected error: {str(e)}")


class TestUserDefinedModelReadinessFunction(unittest.TestCase):
    """
    Test user-defined is_ready() function
    """

    def setUp(self):
        self.client_http = httpclient.InferenceServerClient(url=URL_HTTP)
        self.client_grpc = grpcclient.InferenceServerClient(url=URL_GRPC)

    def _run_inference_decoupled(self, index, model_name, expected_responses_count):
        """Send a decoupled streaming inference request and verify responses."""
        user_data = UserData()
        with grpcclient.InferenceServerClient(URL_GRPC) as triton_client:
            try:
                inputs, outputs = prepare_infer_args(expected_responses_count)
                triton_client.start_stream(callback=partial(callback, user_data))
                triton_client.async_stream_infer(
                    model_name=model_name, inputs=inputs, outputs=outputs
                )

                # Collect and verify responses
                errors, responses = collect_responses(
                    user_data, expected_responses_count
                )
                self.assertEqual(
                    len(responses),
                    expected_responses_count,
                    f"Index: {index} - Expected {expected_responses_count} responses, got {len(responses)}",
                )
                self.assertEqual(
                    len(errors),
                    0,
                    f"Index: {index} - Expected 0 errors, got {len(errors)}",
                )

                # Verify correctness of successful responses
                for idx, output in enumerate(responses):
                    self.assertEqual(
                        output,
                        expected_responses_count,
                        msg=f"Response {idx} has incorrect value - {output}",
                    )
            finally:
                triton_client.stop_stream()

    def test_multiple_concurrent_ready_and_infer_requests_decoupled(self):
        model_name = "is_ready_fn_returns_true_decoupled"
        num_requests = 16
        response_count = 8
        readiness_errors = []
        infer_errors = []

        def readiness_wrapper(index, model_name):
            try:
                with grpcclient.InferenceServerClient(url=URL_GRPC) as triton_client:
                    is_ready = triton_client.is_model_ready(model_name)
                    if not is_ready:
                        raise AssertionError(
                            f"Index: {index} - GRPC client - Model {model_name} should be READY"
                        )
            except Exception as e:
                readiness_errors.append((index, str(e)))

        def inference_wrapper(index, model_name):
            try:
                self._run_inference_decoupled(index, model_name, response_count)
            except Exception as e:
                infer_errors.append((index, str(e)))

        # Launch concurrent threads
        threads = []
        for i in range(num_requests):
            # Start threads with slight delay
            time.sleep(0.1)
            t1 = threading.Thread(
                target=inference_wrapper, args=(i, model_name), name=f"infer-{i}"
            )
            t2 = threading.Thread(
                target=readiness_wrapper, args=(i, model_name), name=f"ready-{i}"
            )
            threads.extend([t1, t2])
            t1.start()
            t2.start()

        # Wait for all requests to complete
        for t in threads:
            t.join(timeout=120)

        for t in threads:
            self.assertFalse(t.is_alive(), f"Threads are not completed: {t.name}")

        self.assertEqual(
            len(readiness_errors), 0, f"Readiness errors: {readiness_errors}"
        )
        self.assertEqual(len(infer_errors), 0, f"Inference errors: {infer_errors}")

    def test_is_ready_coroutine_returns_true(self):
        model_name = "is_ready_fn_coroutine_returns_true"
        for _ in range(5):
            self.assertTrue(
                self.client_http.is_model_ready(model_name),
                f"HTTP - Model {model_name} (coroutine) should be READY",
            )
            self.assertTrue(
                self.client_grpc.is_model_ready(model_name),
                f"gRPC - Model {model_name} (coroutine) should be READY",
            )
        call_inference_identity_model(model_name, "http", self.client_http)
        call_inference_identity_model(model_name, "grpc", self.client_grpc)

    def test_is_ready_returns_true(self):
        model_name = "is_ready_fn_returns_true"
        num_requests = 10

        # Send multiple requests in sequence to ensure consistent behavior
        for i in range(num_requests):
            self.assertTrue(
                self.client_http.is_model_ready(model_name),
                f"iteration {i} - HTTP client - Model {model_name} should be READY",
            )
            self.assertTrue(
                self.client_grpc.is_model_ready(model_name),
                f"iteration {i} - GRPC client - Model {model_name} should be READY",
            )

            # Verify inference is unaffected by readiness checks.
            call_inference_identity_model(model_name, "http", self.client_http)
            call_inference_identity_model(model_name, "grpc", self.client_grpc)

    def test_is_ready_returns_false(self):
        model_name = "is_ready_fn_returns_false"
        num_requests = 10

        # Send multiple requests in sequence to ensure consistent behavior
        for i in range(num_requests):
            self.assertFalse(
                self.client_http.is_model_ready(model_name),
                f"iteration {i} - HTTP client - Model {model_name} should be NOT READY",
            )
            self.assertFalse(
                self.client_grpc.is_model_ready(model_name),
                f"iteration {i} - GRPC client - Model {model_name} should be NOT READY",
            )

            # Verify inference is unaffected by readiness checks.
            call_inference_identity_model(model_name, "http", self.client_http)
            call_inference_identity_model(model_name, "grpc", self.client_grpc)

    def test_is_ready_raises_exception(self):
        model_name = "is_ready_fn_raises_error"
        num_requests = 10

        # Send multiple requests in sequence to ensure consistent behavior
        for i in range(num_requests):
            self.assertFalse(
                self.client_http.is_model_ready(model_name),
                f"iteration {i} - HTTP client - Model {model_name} should be NOT READY (exception)",
            )
            self.assertFalse(
                self.client_grpc.is_model_ready(model_name),
                f"iteration {i} - GRPC client - Model {model_name} should be NOT READY (exception)",
            )

            # Verify inference is unaffected by readiness checks.
            call_inference_identity_model(model_name, "http", self.client_http)
            call_inference_identity_model(model_name, "grpc", self.client_grpc)

        # Verify a healthy model is still ready to confirm server stability.
        model_name = "is_ready_fn_returns_true"
        for i in range(num_requests):
            self.assertTrue(
                self.client_http.is_model_ready(model_name),
                f"iteration {i} - HTTP client - Model {model_name} should be READY",
            )
            self.assertTrue(
                self.client_grpc.is_model_ready(model_name),
                f"iteration {i} - GRPC client - Model {model_name} should be READY",
            )

            # Verify inference is unaffected by readiness checks.
            call_inference_identity_model(model_name, "http", self.client_http)
            call_inference_identity_model(model_name, "grpc", self.client_grpc)

    def test_is_ready_returns_non_boolean(self):
        model_name = "is_ready_fn_returns_non_boolean"
        num_requests = 10

        # Send multiple requests in sequence to ensure consistent behavior
        for i in range(num_requests):
            self.assertFalse(
                self.client_http.is_model_ready(model_name),
                f"iteration {i} - HTTP client - Model {model_name} should be NOT READY (wrong return type)",
            )
            self.assertFalse(
                self.client_grpc.is_model_ready(model_name),
                f"iteration {i} - GRPC client - Model {model_name} should be NOT READY (wrong return type)",
            )

            # Verify inference is unaffected by readiness checks.
            call_inference_identity_model(model_name, "http", self.client_http)
            call_inference_identity_model(model_name, "grpc", self.client_grpc)

        # Verify a healthy model is still ready to confirm server stability.
        model_name = "is_ready_fn_returns_true"
        for i in range(num_requests):
            self.assertTrue(
                self.client_http.is_model_ready(model_name),
                f"iteration {i} - HTTP client - Model {model_name} should be READY",
            )
            self.assertTrue(
                self.client_grpc.is_model_ready(model_name),
                f"iteration {i} - GRPC client - Model {model_name} should be READY",
            )

            # Verify inference is unaffected by readiness checks.
            call_inference_identity_model(model_name, "http", self.client_http)
            call_inference_identity_model(model_name, "grpc", self.client_grpc)

    def test_is_ready_takes_long_time(self):
        model_name = "is_ready_fn_timeout"
        num_requests = 10

        # Send multiple requests in sequence to ensure consistent behavior
        for i in range(num_requests):
            # This call should time out and return NOT_READY.
            # Note: the stub will continue running is_ready()
            # in the background (similar to the inference flow)
            # even after the backend readiness timeout expires.
            is_ready = self.client_http.is_model_ready(model_name)
            self.assertFalse(
                is_ready,
                f"iteration {i} - HTTP client - Model {model_name} should timeout and return NOT READY",
            )

            call_inference_identity_model(model_name, "http", self.client_http)

            # This call should not create another internal IPC message.
            # It must wait for the in-flight readiness check
            # and return READY once that check completes.
            is_ready = self.client_grpc.is_model_ready(model_name)
            self.assertTrue(
                is_ready,
                f"iteration {i} - GRPC client - Model {model_name} should be READY",
            )

            call_inference_identity_model(model_name, "grpc", self.client_grpc)

    def test_multiple_concurrent_ready_and_infer_requests(self):
        model_name = "is_ready_fn_returns_true"
        ready_results = {"http": [], "grpc": []}
        ready_errors = {"http": [], "grpc": []}
        infer_results = {"http": [], "grpc": []}
        infer_errors = {"http": [], "grpc": []}
        num_requests = 16

        def check_model_readiness(protocol, index):
            try:
                if protocol == "http":
                    with httpclient.InferenceServerClient(url=URL_HTTP) as client_http:
                        is_ready = client_http.is_model_ready(model_name)
                        ready_results["http"].append((index, is_ready))
                else:
                    with grpcclient.InferenceServerClient(url=URL_GRPC) as client_grpc:
                        is_ready = client_grpc.is_model_ready(model_name)
                        ready_results["grpc"].append((index, is_ready))
            except Exception as e:
                ready_errors[protocol].append((index, str(e)))

        def do_inference(protocol, index):
            try:
                if protocol == "http":
                    with httpclient.InferenceServerClient(url=URL_HTTP) as client_http:
                        start = time.time()
                        call_inference_identity_model(model_name, protocol, client_http)
                        elapsed = time.time() - start
                        infer_results["http"].append((index, True, elapsed))
                else:
                    with grpcclient.InferenceServerClient(url=URL_GRPC) as client_grpc:
                        start = time.time()
                        call_inference_identity_model(model_name, protocol, client_grpc)
                        elapsed = time.time() - start
                        infer_results["grpc"].append((index, True, elapsed))
            except Exception as e:
                infer_errors[protocol].append((index, str(e)))

        # Launch concurrent readiness and inference requests.
        http_threads = []
        for i in range(num_requests):
            t1 = threading.Thread(target=check_model_readiness, args=("http", i))
            t2 = threading.Thread(target=do_inference, args=("http", i))
            http_threads.extend([t1, t2])
            t1.start()
            t2.start()

        # Wait for all requests to complete
        for t in http_threads:
            t.join(timeout=60)

        for t in http_threads:
            self.assertFalse(t.is_alive(), f"HTTP threads are not completed")

        grpc_threads = []
        for i in range(num_requests):
            t1 = threading.Thread(target=check_model_readiness, args=("grpc", i))
            t2 = threading.Thread(target=do_inference, args=("grpc", i))
            grpc_threads.extend([t1, t2])
            t1.start()
            t2.start()

        # Wait for all requests to complete
        for t in grpc_threads:
            t.join(timeout=60)

        for t in grpc_threads:
            self.assertFalse(t.is_alive(), f"gRPC threads are not completed")

        # Verify no errors in readiness checks
        self.assertEqual(
            len(ready_errors["http"]), 0, f"HTTP errors: {ready_errors['http']}"
        )
        self.assertEqual(
            len(ready_errors["grpc"]), 0, f"gRPC errors: {ready_errors['grpc']}"
        )
        self.assertEqual(
            len(ready_results["http"]),
            num_requests,
            f"Expected {num_requests} HTTP results",
        )
        self.assertEqual(
            len(ready_results["grpc"]),
            num_requests,
            f"Expected {num_requests} gRPC results",
        )

        # All should be True
        for idx, ready in ready_results["http"]:
            self.assertTrue(ready, f"HTTP check {idx} should be ready")
        for idx, ready in ready_results["grpc"]:
            self.assertTrue(ready, f"gRPC check {idx} should be ready")

        # Verify no errors in inference
        self.assertEqual(
            len(infer_errors["http"]), 0, f"Errors occurred: {infer_errors['http']}"
        )
        self.assertEqual(
            len(infer_errors["grpc"]), 0, f"Errors occurred: {infer_errors['grpc']}"
        )
        self.assertEqual(
            len(infer_results["http"]),
            num_requests,
            f"Expected {num_requests} HTTP inference results",
        )
        self.assertEqual(
            len(infer_results["grpc"]),
            num_requests,
            f"Expected {num_requests} gRPC inference results",
        )


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_backend_python/model_readiness/test_models/is_ready_fn_returns_true_decoupled/config.pbtxt
================================================
# Copyright 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


backend: "python"
max_batch_size: 1

input [
  {
    name: "IN"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]

output [
  {
    name: "OUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]

instance_group [
  {
    count: 1
    kind: KIND_CPU
  }
]

model_transaction_policy {
  decoupled: true
}


================================================
FILE: qa/L0_backend_python/model_readiness/test_models/is_ready_fn_returns_true_decoupled/model.py
================================================
# Copyright 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


import time

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    """
    Decoupled model that produces N responses based on input value.
    """

    def execute(self, requests):
        for request in requests:
            # Get input - number of responses to produce
            in_tensor = pb_utils.get_input_tensor_by_name(request, "IN")
            count = in_tensor.as_numpy().item()

            response_sender = request.get_response_sender()
            out_tensor = pb_utils.Tensor("OUT", np.array([count], dtype=np.int32))

            # Produce 'count' responses, each with 'count' as the output value
            for i in range(count):
                # Simulate some processing delay
                time.sleep(0.1)
                response = pb_utils.InferenceResponse(output_tensors=[out_tensor])
                response_sender.send(response)

            # Send final flag
            response_sender.send(flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)

        return None

    def is_ready(self) -> bool:
        # Simulate some processing delay
        time.sleep(0.2)
        return True


================================================
FILE: qa/L0_backend_python/model_readiness/test_models/readiness_coroutine_model.py
================================================
# Copyright 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import asyncio
import json

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    """
    Parameterized test model for async is_ready() testing.

    Behavior is controlled via config.pbtxt parameters:
      READINESS_FN_DELAY_SECS - seconds to await before returning (e.g. "0.1")
    """

    def initialize(self, args):
        model_config = json.loads(args["model_config"])
        params = model_config.get("parameters", {})
        self.readiness_delay_secs = float(
            params.get("READINESS_FN_DELAY_SECS", {}).get("string_value", "0.1")
        )

    def execute(self, requests):
        responses = []
        for request in requests:
            input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            out_tensor = pb_utils.Tensor("OUTPUT0", input_tensor.as_numpy())
            responses.append(pb_utils.InferenceResponse([out_tensor]))
        return responses

    async def is_ready(self):
        await asyncio.sleep(self.readiness_delay_secs)
        return True


================================================
FILE: qa/L0_backend_python/model_readiness/test_models/readiness_model.py
================================================
# Copyright 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json
import time

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    """
    Parameterized test model for user-defined is_ready() testing.

    Behavior is controlled via config.pbtxt parameters:
      READINESS_FN_RETURN_VALUE - "true", "false", "exception", or "non_boolean"
      READINESS_FN_DELAY_SECS  - seconds to sleep before returning (e.g. "0.1")
    """

    def initialize(self, args):
        model_config = json.loads(args["model_config"])
        params = model_config.get("parameters", {})
        self.readiness_return_value = params.get("READINESS_FN_RETURN_VALUE", {}).get(
            "string_value", "true"
        )
        self.readiness_delay_secs = float(
            params.get("READINESS_FN_DELAY_SECS", {}).get("string_value", "0")
        )

    def execute(self, requests):
        responses = []
        for request in requests:
            input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            out_tensor = pb_utils.Tensor("OUTPUT0", input_tensor.as_numpy())
            responses.append(pb_utils.InferenceResponse([out_tensor]))
        return responses

    def is_ready(self):
        if self.readiness_delay_secs > 0:
            time.sleep(self.readiness_delay_secs)

        if self.readiness_return_value == "true":
            return True
        elif self.readiness_return_value == "false":
            return False
        elif self.readiness_return_value == "exception":
            raise RuntimeError("Internal check failed – model is not ready")
        elif self.readiness_return_value == "non_boolean":
            return "ready"
        return True


================================================
FILE: qa/L0_backend_python/parameters/response_parameters_test.py
================================================
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../../common")

import json
import unittest

import numpy as np
import shm_util
import tritonclient.grpc as grpcclient
from tritonclient.utils import InferenceServerException


class ResponseParametersTest(unittest.TestCase):
    _server_address_grpc = "localhost:8001"
    _model_name = "response_parameters"
    _shape = [1, 1]

    def setUp(self):
        self._shm_leak_detector = shm_util.ShmLeakDetector()

    def _assert_response_parameters_match(self, infer_result, expected_params):
        res_params = {}
        for param_key, param_value in infer_result.get_response().parameters.items():
            if param_value.HasField("bool_param"):
                value = param_value.bool_param
            elif param_value.HasField("int64_param"):
                value = param_value.int64_param
            elif param_value.HasField("string_param"):
                value = param_value.string_param
            else:
                raise ValueError(f"Unsupported parameter choice: {param_value}")
            res_params[param_key] = value
        self.assertEqual(expected_params, res_params)

    def _assert_response_parameters_infer_success(self, params):
        params_str = json.dumps(params)

        inputs = [grpcclient.InferInput("RESPONSE_PARAMETERS", self._shape, "BYTES")]
        inputs[0].set_data_from_numpy(np.array([[params_str]], dtype=np.object_))

        with self._shm_leak_detector.Probe() as shm_probe:
            with grpcclient.InferenceServerClient(self._server_address_grpc) as client:
                result = client.infer(self._model_name, inputs)

        # verify the response parameters
        self._assert_response_parameters_match(result, params)

        # model returns the input as output
        output = str(result.as_numpy("OUTPUT")[0][0], encoding="utf-8")
        self.assertEqual(params_str, output)

    def _assert_response_parameters_infer_fail(self, params, expected_err_msg):
        params_str = json.dumps(params)

        inputs = [grpcclient.InferInput("RESPONSE_PARAMETERS", self._shape, "BYTES")]
        inputs[0].set_data_from_numpy(np.array([[params_str]], dtype=np.object_))

        with self._shm_leak_detector.Probe() as shm_probe:
            with grpcclient.InferenceServerClient(self._server_address_grpc) as client:
                with self.assertRaises(InferenceServerException) as e:
                    client.infer(self._model_name, inputs)

        self.assertIn("[StatusCode.INVALID_ARGUMENT] ", str(e.exception))
        self.assertIn(expected_err_msg, str(e.exception))

    def test_setting_empty_response_parameters(self):
        params = {}
        self._assert_response_parameters_infer_success(params)

    def test_setting_one_element_response_parameters(self):
        params = {"many_elements": False}
        self._assert_response_parameters_infer_success(params)

    def test_setting_three_element_response_parameters(self):
        params = {"bool": True, "str": "Hello World!", "int": 1024}
        self._assert_response_parameters_infer_success(params)

    def test_setting_multi_element_response_parameters(self):
        params = {"a": "1", "b": "2", "c": 3, "d": False, "e": 5, "f": ""}
        self._assert_response_parameters_infer_success(params)

    def test_setting_wrong_type_response_parameters(self):
        params = []
        expected_err_msg = ", got <class 'list'>"
        self._assert_response_parameters_infer_fail(params, expected_err_msg)

    def test_setting_int_key_type_response_parameters(self):
        params = {"1": "int key"}
        expected_err_msg = (
            "Expect parameters keys to have type str, found type <class 'int'>"
        )
        self._assert_response_parameters_infer_fail(params, expected_err_msg)

    def test_setting_float_response_parameters(self):
        params = {"int": 2, "float": 0.5}
        expected_err_msg = "Expect parameters values to have type bool/int/str, found type <class 'float'>"
        self._assert_response_parameters_infer_fail(params, expected_err_msg)

    def test_setting_null_response_parameters(self):
        params = {"bool": True, "null": None}
        expected_err_msg = "Expect parameters values to have type bool/int/str, found type <class 'NoneType'>"
        self._assert_response_parameters_infer_fail(params, expected_err_msg)

    def test_setting_nested_response_parameters(self):
        params = {"str": "", "list": ["variable"]}
        expected_err_msg = "Expect parameters values to have type bool/int/str, found type <class 'list'>"
        self._assert_response_parameters_infer_fail(params, expected_err_msg)

    def test_setting_response_parameters_decoupled(self):
        model_name = "response_parameters_decoupled"
        params = [{"bool": False, "int": 2048}, {"str": "Hello World!"}]
        params_str = json.dumps(params)

        inputs = [grpcclient.InferInput("RESPONSE_PARAMETERS", self._shape, "BYTES")]
        inputs[0].set_data_from_numpy(np.array([[params_str]], dtype=np.object_))

        responses = []
        with self._shm_leak_detector.Probe() as shm_probe:
            with grpcclient.InferenceServerClient(self._server_address_grpc) as client:
                client.start_stream(
                    callback=(lambda result, error: responses.append((result, error)))
                )
                client.async_stream_infer(model_name=model_name, inputs=inputs)
                client.stop_stream()

        self.assertEqual(len(params), len(responses))
        for i in range(len(params)):
            result, error = responses[i]
            self.assertIsNone(error)

            # Since this is a decoupled model, the 'triton_final_response' parameter
            # will be a part of the response parameters, so include it into the expected
            # parameters. The model sends the complete final flag separately from the
            # response, so the parameter is always False.
            expected_params = params[i].copy()
            expected_params["triton_final_response"] = False
            self._assert_response_parameters_match(result, expected_params)

            output = str(result.as_numpy("OUTPUT")[0][0], encoding="utf-8")
            self.assertEqual(json.dumps(params[i]), output)

    def test_setting_response_parameters_bls(self):
        model_name = "response_parameters_bls"
        params = {"bool": False, "int": 2048, "str": "Hello World!"}
        params_decoupled = [{}, {"bool": True, "int": 10000}, {"str": "?"}]
        params_str = json.dumps(params)
        params_decoupled_str = json.dumps(params_decoupled)

        inputs = [
            grpcclient.InferInput("RESPONSE_PARAMETERS", self._shape, "BYTES"),
            grpcclient.InferInput(
                "RESPONSE_PARAMETERS_DECOUPLED", self._shape, "BYTES"
            ),
        ]
        inputs[0].set_data_from_numpy(np.array([[params_str]], dtype=np.object_))
        inputs[1].set_data_from_numpy(
            np.array([[params_decoupled_str]], dtype=np.object_)
        )

        with self._shm_leak_detector.Probe() as shm_probe:
            with grpcclient.InferenceServerClient(self._server_address_grpc) as client:
                result = client.infer(model_name, inputs)

        output = str(result.as_numpy("OUTPUT")[0][0], encoding="utf-8")
        self.assertEqual(output, "True")


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_backend_python/parameters/test.sh
================================================
#!/bin/bash
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

source ../../common/util.sh

RET=0

#
# Test response parameters
#
rm -rf models && mkdir models
mkdir -p models/response_parameters/1 && \
    cp ../../python_models/response_parameters/model.py models/response_parameters/1 && \
    cp ../../python_models/response_parameters/config.pbtxt models/response_parameters
mkdir -p models/response_parameters_decoupled/1 && \
    cp ../../python_models/response_parameters_decoupled/model.py models/response_parameters_decoupled/1 && \
    cp ../../python_models/response_parameters_decoupled/config.pbtxt models/response_parameters_decoupled
mkdir -p models/response_parameters_bls/1 && \
    cp ../../python_models/response_parameters_bls/model.py models/response_parameters_bls/1 && \
    cp ../../python_models/response_parameters_bls/config.pbtxt models/response_parameters_bls

TEST_LOG="response_parameters_test.log"
SERVER_LOG="response_parameters_test.server.log"
SERVER_ARGS="--model-repository=${MODELDIR}/parameters/models --backend-directory=${BACKEND_DIR} --log-verbose=1"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python3 -m pytest --junitxml=response_parameters_test.report.xml response_parameters_test.py > $TEST_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Response parameters test FAILED\n***"
    cat $TEST_LOG
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 1 ]; then
    echo -e "\n***\n*** Parameters test FAILED\n***"
else
    echo -e "\n***\n*** Parameters test Passed\n***"
fi
exit $RET


================================================
FILE: qa/L0_backend_python/python_based_backends/python_based_backends_test.py
================================================
# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import sys
import unittest
from random import randint

import numpy as np
import tritonclient.grpc as grpcclient
from tritonclient.utils import *

sys.path.append("../../common")

# By default, find tritonserver on "localhost", but for windows tests
# we overwrite the IP address with the TRITONSERVER_IPADDR envvar
_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")


class PythonBasedBackendsTest(unittest.TestCase):
    def setUp(self):
        self.triton_client = grpcclient.InferenceServerClient(
            url=f"{_tritonserver_ipaddr}:8001"
        )
        self.add_sub_model_1 = "add"
        self.add_sub_model_2 = "sub"
        self.python_model = "add_sub"
        self.pytorch_model = "add_sub_pytorch"

        self.triton_client.load_model(
            self.add_sub_model_1,
            config='{"backend":"add_sub","version_policy":{"latest":{"num_versions":2}}}',
        )
        self.triton_client.load_model(self.add_sub_model_2)
        self.triton_client.load_model(self.python_model)
        self.triton_client.load_model(self.pytorch_model)

    def test_add_sub_models(self):
        self.assertTrue(
            self.triton_client.is_model_ready(self.add_sub_model_1, model_version="2")
        )
        self._test_add_sub_model(
            model_name=self.add_sub_model_1, model_version="2", single_output=True
        )

        self.assertTrue(
            self.triton_client.is_model_ready(self.add_sub_model_1, model_version="1")
        )
        self._test_add_sub_model(
            model_name=self.add_sub_model_1, model_version="1", single_output=True
        )

        self.assertTrue(self.triton_client.is_model_ready(self.add_sub_model_2))
        self._test_add_sub_model(model_name=self.add_sub_model_2, single_output=True)

    def test_python_model(self):
        self.assertTrue(
            self.triton_client.is_model_ready(self.python_model, model_version="2")
        )
        self._test_add_sub_model(
            model_name=self.python_model, shape=[16], model_version="2"
        )

    def test_pytorch_model(self):
        self.assertTrue(
            self.triton_client.is_model_ready(self.pytorch_model, model_version="1")
        )
        self._test_add_sub_model(model_name=self.pytorch_model)

    def _test_add_sub_model(
        self, model_name, model_version="1", shape=[4], single_output=False
    ):
        input0_data = np.random.rand(*shape).astype(np.float32)
        input1_data = np.random.rand(*shape).astype(np.float32)

        inputs = [
            grpcclient.InferInput(
                "INPUT0", input0_data.shape, np_to_triton_dtype(input0_data.dtype)
            ),
            grpcclient.InferInput(
                "INPUT1", input1_data.shape, np_to_triton_dtype(input1_data.dtype)
            ),
        ]

        inputs[0].set_data_from_numpy(input0_data)
        inputs[1].set_data_from_numpy(input1_data)

        if single_output:
            outputs = [grpcclient.InferRequestedOutput("OUTPUT")]

        else:
            outputs = [
                grpcclient.InferRequestedOutput("OUTPUT0"),
                grpcclient.InferRequestedOutput("OUTPUT1"),
            ]

        response = self.triton_client.infer(
            model_name=model_name,
            inputs=inputs,
            model_version=model_version,
            request_id=str(randint(10, 99)),
            outputs=outputs,
        )

        if single_output:
            if model_name == "add":
                self.assertTrue(
                    np.allclose(input0_data + input1_data, response.as_numpy("OUTPUT"))
                )
            else:
                self.assertTrue(
                    np.allclose(input0_data - input1_data, response.as_numpy("OUTPUT"))
                )
        else:
            self.assertTrue(
                np.allclose(input0_data + input1_data, response.as_numpy("OUTPUT0"))
            )
            self.assertTrue(
                np.allclose(input0_data - input1_data, response.as_numpy("OUTPUT1"))
            )

    def tearDown(self):
        self.triton_client.close()


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_backend_python/python_based_backends/test.sh
================================================
#!/bin/bash
# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

source ../../common/util.sh

QA_MODELS_PATH="../../python_models"
MODEL_REPOSITORY="${MODELDIR}/python_based_backends/models"
SERVER_ARGS="--model-repository=${MODEL_REPOSITORY} --backend-directory=${BACKEND_DIR} --model-control-mode=explicit --log-verbose=1"
SERVER_LOG="./python_based_backends_server.log"
CLIENT_LOG="./python_based_backends_client.log"
TEST_RESULT_FILE="./test_results.txt"
CLIENT_PY="./python_based_backends_test.py"
GEN_PYTORCH_MODEL_PY="../../common/gen_qa_pytorch_model.py"
RET=0

rm -rf ${MODEL_REPOSITORY}
pip3 install torch

# Setup add_sub backend and models
mkdir -p ${BACKEND_DIR}/add_sub
cp ${QA_MODELS_PATH}/python_based_backends/add_sub_backend/model.py ${BACKEND_DIR}/add_sub/model.py

mkdir -p ${MODEL_REPOSITORY}/add/1/
echo '{ "operation": "add" }' > ${MODEL_REPOSITORY}/add/1/model.json
echo "backend: \"add_sub\"" > ${MODEL_REPOSITORY}/add/config.pbtxt
cp -r ${MODEL_REPOSITORY}/add/1/ ${MODEL_REPOSITORY}/add/2/

mkdir -p ${MODEL_REPOSITORY}/sub/1/
echo '{ "operation": "sub" }' > ${MODEL_REPOSITORY}/sub/1/model.json
echo "backend: \"add_sub\"" > ${MODEL_REPOSITORY}/sub/config.pbtxt

# Setup python backend model
mkdir -p ${MODEL_REPOSITORY}/add_sub/1
cp ${QA_MODELS_PATH}/add_sub/model.py ${MODEL_REPOSITORY}/add_sub/1/
cp ${QA_MODELS_PATH}/add_sub/config.pbtxt ${MODEL_REPOSITORY}/add_sub/
cp -r ${MODEL_REPOSITORY}/add_sub/1/ ${MODEL_REPOSITORY}/add_sub/2/

# Setup pytorch backend model
cp ${GEN_PYTORCH_MODEL_PY} ./gen_qa_pytorch_model.py
GEN_PYTORCH_MODEL_PY=./gen_qa_pytorch_model.py

set +e
python3 ${GEN_PYTORCH_MODEL_PY} -m ${MODEL_REPOSITORY}

if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Running ${GEN_PYTORCH_MODEL_PY} FAILED. \n***"
    exit 1
fi
set -e

run_server
if [ "$SERVER_PID" == "0" ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    exit 1
fi

set +e
python3 -m pytest --junitxml=python_based_backends.report.xml ${CLIENT_PY} -v > ${CLIENT_LOG} 2>&1

if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Running ${CLIENT_PY} FAILED. \n***"
    RET=1
fi
set -e

kill_server
rm -rf ${MODEL_REPOSITORY} ${GEN_PYTORCH_MODEL_PY}

if [ $RET -eq 1 ]; then
    cat $CLIENT_LOG
    cat $SERVER_LOG
    echo -e "\n***\n*** Python-based Backends test FAILED. \n***"
else
    echo -e "\n***\n*** Python-based Backends test PASSED. \n***"
fi

exit $RET


================================================
FILE: qa/L0_backend_python/python_test.py
================================================
#!/usr/bin/python

# Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import os
import unittest

import numpy as np
import requests as httpreq
import shm_util
import tritonclient.http as httpclient
from tritonclient.utils import *

# By default, find tritonserver on "localhost", but for windows tests
# we overwrite the IP address with the TRITONSERVER_IPADDR envvar
_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")

_test_jetson = bool(int(os.environ.get("TEST_JETSON", 0)))
_test_windows = bool(int(os.environ.get("TEST_WINDOWS", 0)))


class PythonTest(unittest.TestCase):
    def setUp(self):
        self._shm_leak_detector = shm_util.ShmLeakDetector()

    def _infer_help(self, model_name, shape, data_type):
        with httpclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8000") as client:
            input_data_0 = np.array(np.random.randn(*shape), dtype=data_type)
            inputs = [
                httpclient.InferInput(
                    "INPUT0", shape, np_to_triton_dtype(input_data_0.dtype)
                )
            ]
            inputs[0].set_data_from_numpy(input_data_0)

            result = client.infer(model_name, inputs)
            output0 = result.as_numpy("OUTPUT0")
            self.assertTrue(np.all(input_data_0 == output0))

    def _create_cuda_region(self, client, size, name):
        import tritonclient.utils.cuda_shared_memory as cuda_shared_memory

        shm0_handle = cuda_shared_memory.create_shared_memory_region(
            name, byte_size=size, device_id=0
        )
        client.register_cuda_shared_memory(
            name, cuda_shared_memory.get_raw_handle(shm0_handle), 0, size
        )
        return shm0_handle

    def _optional_input_infer(self, model_name, has_input0, has_input1):
        with httpclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8000") as client:
            shape = (1,)
            if has_input0:
                input0_numpy = np.random.randint(0, 100, size=shape, dtype=np.int32)
            else:
                # Set the input0 to a default value if it is optional. This is
                # the input used by the model if it is not provided.
                input0_numpy = np.array([5], dtype=np.int32)

            if has_input1:
                input1_numpy = np.random.randint(0, 100, size=shape, dtype=np.int32)
            else:
                # Set the input1 to a default value if it is optional. This is
                # the input used by the model if it is not provided.
                input1_numpy = np.array([5], dtype=np.int32)

            inputs = []
            if has_input0:
                inputs.append(
                    httpclient.InferInput(
                        "INPUT0", shape, np_to_triton_dtype(input0_numpy.dtype)
                    )
                )
                inputs[-1].set_data_from_numpy(input0_numpy)

            if has_input1:
                inputs.append(
                    httpclient.InferInput(
                        "INPUT1", shape, np_to_triton_dtype(input1_numpy.dtype)
                    )
                )
                inputs[-1].set_data_from_numpy(input1_numpy)

            result = client.infer(model_name, inputs)
            output0 = result.as_numpy("OUTPUT0")
            self.assertIsNotNone(output0, "OUTPUT0 was not found.")

            output1 = result.as_numpy("OUTPUT1")
            self.assertIsNotNone(output1, "OUTPUT1 was not found.")

            expected_output0 = input0_numpy + input1_numpy
            expected_output1 = input0_numpy - input1_numpy
            np.testing.assert_equal(
                output0, expected_output0, "OUTPUT0 doesn't match expected OUTPUT0"
            )
            np.testing.assert_equal(
                output1, expected_output1, "OUTPUT1 doesn't match expected OUTPUT1"
            )

    def test_growth_error(self):
        # NOTE: Windows tests are not running in a docker container. Consequently, we
        # do not specify a --shm-size to use a basis to grow. Therefore, this test does
        # not apply for Windows.
        if not _test_windows:
            # 2 MiBs
            total_byte_size = 2 * 1024 * 1024
            shape = [total_byte_size]
            model_name = "identity_uint8_nobatch"
            dtype = np.uint8
            with self._shm_leak_detector.Probe() as shm_probe:
                self._infer_help(model_name, shape, dtype)

            # 1 GiB payload leads to error in the main Python backend process.
            # Total shared memory available is 1GiB.
            total_byte_size = 1024 * 1024 * 1024
            shape = [total_byte_size]
            with self.assertRaises(InferenceServerException) as ex:
                self._infer_help(model_name, shape, dtype)
            self.assertIn(
                "Failed to increase the shared memory pool size", str(ex.exception)
            )

            # 512 MiBs payload leads to error in the Python stub process.
            total_byte_size = 512 * 1024 * 1024
            shape = [total_byte_size]
            with self.assertRaises(InferenceServerException) as ex:
                self._infer_help(model_name, shape, dtype)
            self.assertIn(
                "Failed to increase the shared memory pool size", str(ex.exception)
            )

            # 2 MiBs
            # Send a small paylaod to make sure it is still working properly
            total_byte_size = 2 * 1024 * 1024
            shape = [total_byte_size]
            with self._shm_leak_detector.Probe() as shm_probe:
                self._infer_help(model_name, shape, dtype)

    # GPU tensors are not supported on jetson
    # CUDA Shared memory is not supported on jetson
    if not _test_jetson and not _test_windows:

        def test_gpu_tensor_error(self):
            import tritonclient.utils.cuda_shared_memory as cuda_shared_memory

            model_name = "identity_bool"
            with self._shm_leak_detector.Probe() as shm_probe:
                with httpclient.InferenceServerClient(
                    f"{_tritonserver_ipaddr}:8000"
                ) as client:
                    input_data = np.array([[True] * 1000], dtype=bool)
                    inputs = [
                        httpclient.InferInput(
                            "INPUT0",
                            input_data.shape,
                            np_to_triton_dtype(input_data.dtype),
                        )
                    ]
                    inputs[0].set_data_from_numpy(input_data)

                    requested_outputs = [httpclient.InferRequestedOutput("OUTPUT0")]

                    # intentionally create a shared memory region with not enough size.
                    client.unregister_cuda_shared_memory()
                    shm0_handle = self._create_cuda_region(client, 1, "output0_data")

                    requested_outputs[0].set_shared_memory("output0_data", 1)
                    with self.assertRaises(InferenceServerException) as ex:
                        client.infer(model_name, inputs, outputs=requested_outputs)
                    self.assertIn(
                        "should be at least 1000 bytes to hold the results",
                        str(ex.exception),
                    )
                    client.unregister_cuda_shared_memory()
                    cuda_shared_memory.destroy_shared_memory_region(shm0_handle)

        def test_dlpack_tensor_error(self):
            import tritonclient.utils.cuda_shared_memory as cuda_shared_memory

            model_name = "dlpack_identity"
            with self._shm_leak_detector.Probe() as shm_probe:
                with httpclient.InferenceServerClient(
                    f"{_tritonserver_ipaddr}:8000"
                ) as client:
                    input_data = np.array([[1] * 1000], dtype=np.float32)
                    inputs = [
                        httpclient.InferInput(
                            "INPUT0",
                            input_data.shape,
                            np_to_triton_dtype(input_data.dtype),
                        )
                    ]

                    requested_outputs = [httpclient.InferRequestedOutput("OUTPUT0")]
                    input_data_size = input_data.itemsize * input_data.size
                    client.unregister_cuda_shared_memory()
                    input_region = self._create_cuda_region(
                        client, input_data_size, "input0_data"
                    )
                    inputs[0].set_shared_memory("input0_data", input_data_size)
                    cuda_shared_memory.set_shared_memory_region(
                        input_region, [input_data]
                    )

                    # Intentionally create a small region to trigger an error
                    shm0_handle = self._create_cuda_region(client, 1, "output0_data")
                    requested_outputs[0].set_shared_memory("output0_data", 1)

                    with self.assertRaises(InferenceServerException) as ex:
                        client.infer(model_name, inputs, outputs=requested_outputs)
                    self.assertIn(
                        "should be at least 4000 bytes to hold the results",
                        str(ex.exception),
                    )
                    client.unregister_cuda_shared_memory()
                    cuda_shared_memory.destroy_shared_memory_region(shm0_handle)

    def test_async_infer(self):
        model_name = "identity_uint8"
        request_parallelism = 4
        shape = [2, 2]

        with self._shm_leak_detector.Probe() as shm_probe:
            with httpclient.InferenceServerClient(
                f"{_tritonserver_ipaddr}:8000", concurrency=request_parallelism
            ) as client:
                input_datas = []
                requests = []
                for i in range(request_parallelism):
                    input_data = (16384 * np.random.randn(*shape)).astype(np.uint8)
                    input_datas.append(input_data)
                    inputs = [
                        httpclient.InferInput(
                            "INPUT0",
                            input_data.shape,
                            np_to_triton_dtype(input_data.dtype),
                        )
                    ]
                    inputs[0].set_data_from_numpy(input_data)
                    requests.append(client.async_infer(model_name, inputs))

                for i in range(request_parallelism):
                    # Get the result from the initiated asynchronous inference request.
                    # Note the call will block till the server responds.
                    results = requests[i].get_result()

                    output_data = results.as_numpy("OUTPUT0")
                    self.assertIsNotNone(output_data, "error: expected 'OUTPUT0'")
                    self.assertTrue(
                        np.array_equal(output_data, input_datas[i]),
                        "error: expected output {} to match input {}".format(
                            output_data, input_datas[i]
                        ),
                    )

                # Make sure the requests ran in parallel.
                stats = client.get_inference_statistics(model_name)
                test_cond = (len(stats["model_stats"]) != 1) or (
                    stats["model_stats"][0]["name"] != model_name
                )
                self.assertFalse(
                    test_cond, "error: expected statistics for {}".format(model_name)
                )

                stat = stats["model_stats"][0]
                self.assertFalse(
                    (stat["inference_count"] != 8) or (stat["execution_count"] != 1),
                    "error: expected execution_count == 1 and inference_count == 8, got {} and {}".format(
                        stat["execution_count"], stat["inference_count"]
                    ),
                )
                batch_stat = stat["batch_stats"][0]
                self.assertFalse(
                    batch_stat["batch_size"] != 8,
                    f"error: expected batch_size == 8, got {batch_stat['batch_size']}",
                )
                # Check metrics to make sure they are reported correctly
                metrics = httpreq.get(f"http://{_tritonserver_ipaddr}:8002/metrics")
                print(metrics.text)

                success_str = (
                    'nv_inference_request_success{model="identity_uint8",version="1"}'
                )
                infer_count_str = (
                    'nv_inference_count{model="identity_uint8",version="1"}'
                )
                infer_exec_str = (
                    'nv_inference_exec_count{model="identity_uint8",version="1"}'
                )

                success_val = None
                infer_count_val = None
                infer_exec_val = None
                for line in metrics.text.splitlines():
                    if line.startswith(success_str):
                        success_val = float(line[len(success_str) :])
                    if line.startswith(infer_count_str):
                        infer_count_val = float(line[len(infer_count_str) :])
                    if line.startswith(infer_exec_str):
                        infer_exec_val = float(line[len(infer_exec_str) :])

                self.assertFalse(
                    success_val != 4,
                    "error: expected metric {} == 4, got {}".format(
                        success_str, success_val
                    ),
                )
                self.assertFalse(
                    infer_count_val != 8,
                    "error: expected metric {} == 8, got {}".format(
                        infer_count_str, infer_count_val
                    ),
                )
                self.assertFalse(
                    infer_exec_val != 1,
                    "error: expected metric {} == 1, got {}".format(
                        infer_exec_str, infer_exec_val
                    ),
                )

    def test_bool(self):
        model_name = "identity_bool"
        with self._shm_leak_detector.Probe() as shm_probe:
            with httpclient.InferenceServerClient(
                f"{_tritonserver_ipaddr}:8000"
            ) as client:
                input_data = np.array([[True, False, True]], dtype=bool)
                inputs = [
                    httpclient.InferInput(
                        "INPUT0", input_data.shape, np_to_triton_dtype(input_data.dtype)
                    )
                ]
                inputs[0].set_data_from_numpy(input_data)
                result = client.infer(model_name, inputs)
                output0 = result.as_numpy("OUTPUT0")
                self.assertIsNotNone(output0)
                self.assertTrue(np.all(output0 == input_data))

    def test_bf16(self):
        model_name = "identity_bf16"
        shape = [2, 2]
        with self._shm_leak_detector.Probe() as shm_probe:
            with httpclient.InferenceServerClient(
                f"{_tritonserver_ipaddr}:8000"
            ) as client:
                # NOTE: Client will truncate FP32 to BF16 internally
                # since numpy has no built-in BF16 representation.
                np_input = np.ones(shape, dtype=np.float32)
                inputs = [
                    httpclient.InferInput(
                        "INPUT0", np_input.shape, "BF16"
                    ).set_data_from_numpy(np_input)
                ]
                result = client.infer(model_name, inputs)

                # Assert that Triton correctly returned a BF16 tensor.
                response = result.get_response()
                triton_output = response["outputs"][0]
                triton_dtype = triton_output["datatype"]
                self.assertEqual(triton_dtype, "BF16")

                np_output = result.as_numpy("OUTPUT0")
                self.assertIsNotNone(np_output)
                # BF16 tensors are held in FP32 when converted to numpy due to
                # lack of native BF16 support in numpy, so verify that.
                self.assertEqual(np_output.dtype, np.float32)
                self.assertTrue(np.allclose(np_output, np_input))

    def test_infer_pytorch(self):
        # FIXME: This model requires torch. Because windows tests are not run in a docker
        # environment with torch installed, we need to think about how we want to install
        # the package. Do we install it on the runners? Within the model?
        if not _test_windows:
            model_name = "pytorch_fp32_fp32"
            shape = [1, 1, 28, 28]
            with self._shm_leak_detector.Probe() as shm_probe:
                with httpclient.InferenceServerClient(
                    f"{_tritonserver_ipaddr}:8000"
                ) as client:
                    input_data = np.zeros(shape, dtype=np.float32)
                    inputs = [
                        httpclient.InferInput(
                            "IN", input_data.shape, np_to_triton_dtype(input_data.dtype)
                        )
                    ]
                    inputs[0].set_data_from_numpy(input_data)
                    result = client.infer(model_name, inputs)
                    output_data = result.as_numpy("OUT")
                    self.assertIsNotNone(output_data, "error: expected 'OUT'")

                    # expected inference response from a zero tensor
                    expected_result = [
                        -2.2377274,
                        -2.3976364,
                        -2.2464046,
                        -2.2790744,
                        -2.3828976,
                        -2.2940576,
                        -2.2928185,
                        -2.340665,
                        -2.275219,
                        -2.292135,
                    ]
                    self.assertTrue(
                        np.allclose(output_data[0], expected_result),
                        "Inference result is not correct",
                    )

    def test_init_args(self):
        model_name = "init_args"
        shape = [2, 2]
        with self._shm_leak_detector.Probe() as shm_probe:
            with httpclient.InferenceServerClient(
                f"{_tritonserver_ipaddr}:8000"
            ) as client:
                input_data = np.zeros(shape, dtype=np.float32)
                inputs = [
                    httpclient.InferInput(
                        "IN", input_data.shape, np_to_triton_dtype(input_data.dtype)
                    )
                ]
                inputs[0].set_data_from_numpy(input_data)
                result = client.infer(model_name, inputs)
                # output response in this model is the number of keys in the args
                self.assertTrue(
                    result.as_numpy("OUT") == 7,
                    "Number of keys in the init args is not correct",
                )

    def test_unicode(self):
        model_name = "string"
        shape = [1]

        # The first run will use np.bytes_ and the second run will use
        # np.object_
        for i in range(2):
            with self._shm_leak_detector.Probe() as shm_probe:
                with httpclient.InferenceServerClient(
                    f"{_tritonserver_ipaddr}:8000"
                ) as client:
                    utf8 = "😀"
                    input_data = np.array(
                        [bytes(utf8, encoding="utf-8")], dtype=np.bytes_
                    )
                    inputs = [
                        httpclient.InferInput(
                            "INPUT0", shape, np_to_triton_dtype(input_data.dtype)
                        )
                    ]
                    inputs[0].set_data_from_numpy(input_data)
                    result = client.infer(model_name, inputs)
                    output0 = result.as_numpy("OUTPUT0")
                    self.assertIsNotNone(output0)
                    self.assertEqual(output0[0], input_data)

    def test_optional_input(self):
        model_name = "optional"

        with self._shm_leak_detector.Probe() as shm_probe:
            for has_input0 in [True, False]:
                for has_input1 in [True, False]:
                    self._optional_input_infer(model_name, has_input0, has_input1)

    def test_string(self):
        model_name = "string_fixed"
        shape = [1]

        # Test different string outputs. This test will send 4 requests to the
        # backend. The model will return 4 responses (np.object_ and np.bytes) *
        # (empty output and fixed output)
        for i in range(4):
            with self._shm_leak_detector.Probe() as shm_probe:
                with httpclient.InferenceServerClient(
                    f"{_tritonserver_ipaddr}:8000"
                ) as client:
                    input_data = np.array(["123456"], dtype=np.object_)
                    inputs = [
                        httpclient.InferInput(
                            "INPUT0", shape, np_to_triton_dtype(input_data.dtype)
                        )
                    ]
                    inputs[0].set_data_from_numpy(input_data)
                    result = client.infer(model_name, inputs)
                    output0 = result.as_numpy("OUTPUT0")
                    self.assertIsNotNone(output0)

                    if i % 2 == 0:
                        self.assertEqual(output0[0], input_data.astype(np.bytes_))
                    else:
                        self.assertEqual(output0.size, 0)

    def test_non_contiguous(self):
        model_name = "non_contiguous"
        shape = [2, 10, 11, 6, 5]
        new_shape = [10, 2, 6, 5, 11]
        shape_reorder = [1, 0, 4, 2, 3]
        with self._shm_leak_detector.Probe() as shm_probe:
            with httpclient.InferenceServerClient(
                f"{_tritonserver_ipaddr}:8000"
            ) as client:
                input_numpy = np.random.rand(*shape)
                input_numpy = input_numpy.astype(np.float32)
                inputs = [
                    httpclient.InferInput(
                        "INPUT0", shape, np_to_triton_dtype(input_numpy.dtype)
                    )
                ]
                inputs[0].set_data_from_numpy(input_numpy)
                result = client.infer(model_name, inputs)
                output0 = input_numpy.reshape(new_shape)

                # Transpose the tensor to create a non-contiguous tensor.
                output1 = input_numpy.T
                output2 = np.transpose(input_numpy, shape_reorder)

                self.assertTrue(np.all(output0 == result.as_numpy("OUTPUT0")))
                self.assertTrue(np.all(output1 == result.as_numpy("OUTPUT1")))
                self.assertTrue(np.all(output2 == result.as_numpy("OUTPUT2")))


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_backend_python/request_rescheduling/grpc_endpoint_test.py
================================================
#!/usr/bin/env python
# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import sys

sys.path.append("../../common")

# GRPC streaming helpers..
import queue
import unittest
from functools import partial

import numpy as np
import tritonclient.grpc as grpcclient
from tritonclient.utils import InferenceServerException

# By default, find tritonserver on "localhost", but for windows tests
# we overwrite the IP address with the TRITONSERVER_IPADDR envvar
_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")


class UserData:
    def __init__(self):
        self._completed_requests = queue.Queue()


def callback(user_data, result, error):
    if error:
        user_data._completed_requests.put(error)
    else:
        user_data._completed_requests.put(result)


class GrpcEndpointTest(unittest.TestCase):
    def test_grpc_decoupled(self, sequence_id=0, sequence_start=False):
        user_data = UserData()
        with grpcclient.InferenceServerClient(
            f"{_tritonserver_ipaddr}:8001"
        ) as triton_client:
            # Reload the model to reset the flag
            triton_client.unload_model("iterative_sequence")
            triton_client.load_model("iterative_sequence")

            triton_client.start_stream(callback=partial(callback, user_data))
            inputs = []
            inputs.append(grpcclient.InferInput("IN", [1], "INT32"))
            inputs[0].set_data_from_numpy(np.array([3], dtype=np.int32))

            triton_client.async_stream_infer(
                model_name="iterative_sequence",
                inputs=inputs,
                sequence_id=sequence_id,
                sequence_start=sequence_start,
            )
            res_count = 3
            while res_count > 0:
                data_item = user_data._completed_requests.get()
                res_count -= 1
                if type(data_item) == InferenceServerException:
                    raise data_item
                else:
                    self.assertEqual(res_count, data_item.as_numpy("OUT")[0])
            self.assertEqual(0, res_count)

    def test_grpc_non_decoupled(self, sequence_id=0, sequence_start=False):
        with grpcclient.InferenceServerClient(
            f"{_tritonserver_ipaddr}:8001"
        ) as triton_client:
            # Reload the model to reset the flag
            triton_client.unload_model("request_rescheduling_addsub")
            triton_client.load_model("request_rescheduling_addsub")

            inputs = []
            inputs.append(grpcclient.InferInput("INPUT0", [16], "FP32"))
            inputs.append(grpcclient.InferInput("INPUT1", [16], "FP32"))
            input0_val = np.random.randn(*[16]).astype(np.float32)
            input1_val = np.random.randn(*[16]).astype(np.float32)
            inputs[0].set_data_from_numpy(input0_val)
            inputs[1].set_data_from_numpy(input1_val)

            results = triton_client.infer(
                model_name="request_rescheduling_addsub",
                inputs=inputs,
            )

            output0_data = results.as_numpy("OUTPUT0")
            output1_data = results.as_numpy("OUTPUT1")

            self.assertTrue(np.array_equal(output0_data, input0_val + input1_val))
            self.assertTrue(np.array_equal(output1_data, input0_val - input1_val))


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_backend_python/request_rescheduling/test.sh
================================================
#!/bin/bash
# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

CLIENT_PY="../test_infer_shm_leak.py"
CLIENT_LOG="./request_rescheduling_client.log"
TEST_RESULT_FILE='test_results.txt'
source ../../common/util.sh

RET=0

rm -fr *.log ./models *.txt

mkdir -p models/bls_request_rescheduling/1/
cp ../../python_models/bls_request_rescheduling/model.py models/bls_request_rescheduling/1/
cp ../../python_models/bls_request_rescheduling/config.pbtxt models/bls_request_rescheduling

mkdir -p models/request_rescheduling_addsub/1/
cp ../../python_models/request_rescheduling_addsub/model.py models/request_rescheduling_addsub/1/
cp ../../python_models/request_rescheduling_addsub/config.pbtxt models/request_rescheduling_addsub

mkdir -p models/iterative_sequence/1/
cp ../../python_models/iterative_sequence/model.py models/iterative_sequence/1/
cp ../../python_models/iterative_sequence/config.pbtxt models/iterative_sequence

mkdir -p models/wrong_return_type/1/
cp ../../python_models/wrong_return_type/model.py models/wrong_return_type/1/
cp ../../python_models/wrong_return_type/config.pbtxt models/wrong_return_type

SERVER_LOG="./request_rescheduling_server.log"
SERVER_ARGS="--model-repository=${MODELDIR}/request_rescheduling/models --backend-directory=${BACKEND_DIR} --model-control-mode=explicit --load-model=* --log-verbose=1"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

export MODEL_NAME='bls_request_rescheduling'

set +e
python3 -m pytest --junitxml="${MODEL_NAME}.report.xml" $CLIENT_PY >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** bls_request_rescheduling test FAILED. \n***"
    cat $CLIENT_LOG
    RET=1
fi
set -e

GRPC_TEST_PY=./grpc_endpoint_test.py

set +e
python3 -m pytest --junitxml="grpc_request_reschedule.report.xml" ${GRPC_TEST_PY} >> ${CLIENT_LOG} 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** GRPC Endpoint test FAILED. \n***"
    cat $CLIENT_LOG
    RET=1
fi
set -e

kill_server


if [ $RET -eq 1 ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Request Rescheduling test FAILED. \n***"
else
    echo -e "\n***\n*** Request Rescheduling test PASSED. \n***"
fi

exit $RET


================================================
FILE: qa/L0_backend_python/response_sender/response_sender_complete_final_test.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import time
import unittest

import numpy as np
import tritonclient.grpc as grpcclient

# By default, find tritonserver on "localhost", but for windows tests
# we overwrite the IP address with the TRITONSERVER_IPADDR envvar
_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")


class ResponseSenderTest(unittest.TestCase):
    def _generate_streaming_callback_and_responses_pair(self):
        responses = []  # [{"result": result, "error": error}, ...]

        def callback(result, error):
            responses.append({"result": result, "error": error})

        return callback, responses

    def test_respond_after_complete_final(self):
        with open(os.environ["SERVER_LOG"]) as f:
            server_log = f.read()
        self.assertNotIn("Test Passed", server_log)

        model_name = "response_sender_complete_final"
        shape = [1, 1]
        inputs = [grpcclient.InferInput("INPUT0", shape, "FP32")]
        input0_np = np.array([[123.45]], np.float32)
        inputs[0].set_data_from_numpy(input0_np)

        callback, responses = self._generate_streaming_callback_and_responses_pair()
        with grpcclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8001") as client:
            client.start_stream(callback)
            client.async_stream_infer(model_name, inputs)
            client.stop_stream()

        self.assertEqual(len(responses), 1)
        for response in responses:
            output0_np = response["result"].as_numpy(name="OUTPUT0")
            self.assertTrue(np.allclose(input0_np, output0_np))
            self.assertIsNone(response["error"])

        time.sleep(1)  # make sure the logs are written before checking
        with open(os.environ["SERVER_LOG"]) as f:
            server_log = f.read()
        self.assertNotIn("Unexpected request length", server_log)
        self.assertNotIn("Expected exception not raised", server_log)
        self.assertNotIn("Test FAILED", server_log)
        self.assertIn("Test Passed", server_log)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_backend_python/response_sender/response_sender_test.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import unittest

import numpy as np
import tritonclient.grpc as grpcclient
from tritonclient.utils import InferenceServerException

# By default, find tritonserver on "localhost", but for windows tests
# we overwrite the IP address with the TRITONSERVER_IPADDR envvar
_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")


class ResponseSenderTest(unittest.TestCase):
    _inputs_parameters_zero_response_pre_return = {
        "number_of_response_before_return": 0,
        "send_complete_final_flag_before_return": True,
        "return_a_response": False,
        "number_of_response_after_return": 0,
        "send_complete_final_flag_after_return": False,
    }
    _inputs_parameters_zero_response_post_return = {
        "number_of_response_before_return": 0,
        "send_complete_final_flag_before_return": False,
        "return_a_response": False,
        "number_of_response_after_return": 0,
        "send_complete_final_flag_after_return": True,
    }
    _inputs_parameters_one_response_pre_return = {
        "number_of_response_before_return": 1,
        "send_complete_final_flag_before_return": True,
        "return_a_response": False,
        "number_of_response_after_return": 0,
        "send_complete_final_flag_after_return": False,
    }
    _inputs_parameters_one_response_post_return = {
        "number_of_response_before_return": 0,
        "send_complete_final_flag_before_return": False,
        "return_a_response": False,
        "number_of_response_after_return": 1,
        "send_complete_final_flag_after_return": True,
    }
    _inputs_parameters_two_response_pre_return = {
        "number_of_response_before_return": 2,
        "send_complete_final_flag_before_return": True,
        "return_a_response": False,
        "number_of_response_after_return": 0,
        "send_complete_final_flag_after_return": False,
    }
    _inputs_parameters_two_response_post_return = {
        "number_of_response_before_return": 0,
        "send_complete_final_flag_before_return": False,
        "return_a_response": False,
        "number_of_response_after_return": 2,
        "send_complete_final_flag_after_return": True,
    }
    _inputs_parameters_response_pre_and_post_return = {
        "number_of_response_before_return": 1,
        "send_complete_final_flag_before_return": False,
        "return_a_response": False,
        "number_of_response_after_return": 3,
        "send_complete_final_flag_after_return": True,
    }
    _inputs_parameters_one_response_on_return = {
        "number_of_response_before_return": 0,
        "send_complete_final_flag_before_return": False,
        "return_a_response": True,
        "number_of_response_after_return": 0,
        "send_complete_final_flag_after_return": False,
    }
    _inputs_parameters_one_response_pre_and_on_return = {
        "number_of_response_before_return": 1,
        "send_complete_final_flag_before_return": True,
        "return_a_response": True,
        "number_of_response_after_return": 0,
        "send_complete_final_flag_after_return": False,
    }
    _inputs_parameters_one_response_on_and_post_return = {
        "number_of_response_before_return": 0,
        "send_complete_final_flag_before_return": False,
        "return_a_response": True,
        "number_of_response_after_return": 1,
        "send_complete_final_flag_after_return": True,
    }

    def _get_inputs(
        self,
        number_of_response_before_return,
        send_complete_final_flag_before_return,
        return_a_response,
        number_of_response_after_return,
        send_complete_final_flag_after_return,
    ):
        shape = [1, 1]
        inputs = [
            grpcclient.InferInput("NUMBER_OF_RESPONSE_BEFORE_RETURN", shape, "UINT8"),
            grpcclient.InferInput(
                "SEND_COMPLETE_FINAL_FLAG_BEFORE_RETURN", shape, "BOOL"
            ),
            grpcclient.InferInput("RETURN_A_RESPONSE", shape, "BOOL"),
            grpcclient.InferInput("NUMBER_OF_RESPONSE_AFTER_RETURN", shape, "UINT8"),
            grpcclient.InferInput(
                "SEND_COMPLETE_FINAL_FLAG_AFTER_RETURN", shape, "BOOL"
            ),
        ]
        inputs[0].set_data_from_numpy(
            np.array([[number_of_response_before_return]], np.uint8)
        )
        inputs[1].set_data_from_numpy(
            np.array([[send_complete_final_flag_before_return]], bool)
        )
        inputs[2].set_data_from_numpy(np.array([[return_a_response]], bool))
        inputs[3].set_data_from_numpy(
            np.array([[number_of_response_after_return]], np.uint8)
        )
        inputs[4].set_data_from_numpy(
            np.array([[send_complete_final_flag_after_return]], bool)
        )
        return inputs

    def _generate_streaming_callback_and_responses_pair(self):
        responses = []  # [{"result": result, "error": error}, ...]

        def callback(result, error):
            responses.append({"result": result, "error": error})

        return callback, responses

    def _infer_parallel(self, model_name, parallel_inputs):
        callback, responses = self._generate_streaming_callback_and_responses_pair()
        with grpcclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8001") as client:
            client.start_stream(callback)
            for inputs in parallel_inputs:
                client.async_stream_infer(model_name, inputs)
            client.stop_stream()
        return responses

    def _infer(
        self,
        model_name,
        number_of_response_before_return,
        send_complete_final_flag_before_return,
        return_a_response,
        number_of_response_after_return,
        send_complete_final_flag_after_return,
    ):
        inputs = self._get_inputs(
            number_of_response_before_return,
            send_complete_final_flag_before_return,
            return_a_response,
            number_of_response_after_return,
            send_complete_final_flag_after_return,
        )
        return self._infer_parallel(model_name, [inputs])

    def _assert_responses_valid(
        self,
        responses,
        number_of_response_before_return,
        return_a_response,
        number_of_response_after_return,
    ):
        before_return_response_count = 0
        response_returned = False
        after_return_response_count = 0
        for response in responses:
            result, error = response["result"], response["error"]
            self.assertIsNone(error)
            result_np = result.as_numpy(name="INDEX")
            response_id = result_np.sum() / result_np.shape[0]
            if response_id < 1000:
                self.assertFalse(
                    response_returned,
                    "Expect at most one response returned per request.",
                )
                response_returned = True
            elif response_id < 2000:
                before_return_response_count += 1
            elif response_id < 3000:
                after_return_response_count += 1
            else:
                raise ValueError(f"Unexpected response_id: {response_id}")
        self.assertEqual(number_of_response_before_return, before_return_response_count)
        self.assertEqual(return_a_response, response_returned)
        self.assertEqual(number_of_response_after_return, after_return_response_count)

    def _assert_responses_exception(self, responses, expected_message):
        for response in responses:
            self.assertIsNone(response["result"])
            self.assertIsInstance(response["error"], InferenceServerException)
            self.assertIn(expected_message, response["error"].message())
        # There may be more responses, but currently only sees one for all tests.
        self.assertEqual(len(responses), 1)

    def _assert_decoupled_infer_success(
        self,
        number_of_response_before_return,
        send_complete_final_flag_before_return,
        return_a_response,
        number_of_response_after_return,
        send_complete_final_flag_after_return,
    ):
        model_name = "response_sender_decoupled"
        responses = self._infer(
            model_name,
            number_of_response_before_return,
            send_complete_final_flag_before_return,
            return_a_response,
            number_of_response_after_return,
            send_complete_final_flag_after_return,
        )
        self._assert_responses_valid(
            responses,
            number_of_response_before_return,
            return_a_response,
            number_of_response_after_return,
        )
        # Do NOT group into a for-loop as it hides which model failed.
        model_name = "response_sender_decoupled_async"
        responses = self._infer(
            model_name,
            number_of_response_before_return,
            send_complete_final_flag_before_return,
            return_a_response,
            number_of_response_after_return,
            send_complete_final_flag_after_return,
        )
        self._assert_responses_valid(
            responses,
            number_of_response_before_return,
            return_a_response,
            number_of_response_after_return,
        )

    def _assert_non_decoupled_infer_with_expected_response_success(
        self,
        number_of_response_before_return,
        send_complete_final_flag_before_return,
        return_a_response,
        number_of_response_after_return,
        send_complete_final_flag_after_return,
        expected_number_of_response_before_return,
        expected_return_a_response,
        expected_number_of_response_after_return,
    ):
        model_name = "response_sender"
        responses = self._infer(
            model_name,
            number_of_response_before_return,
            send_complete_final_flag_before_return,
            return_a_response,
            number_of_response_after_return,
            send_complete_final_flag_after_return,
        )
        self._assert_responses_valid(
            responses,
            expected_number_of_response_before_return,
            expected_return_a_response,
            expected_number_of_response_after_return,
        )
        # Do NOT group into a for-loop as it hides which model failed.
        model_name = "response_sender_async"
        responses = self._infer(
            model_name,
            number_of_response_before_return,
            send_complete_final_flag_before_return,
            return_a_response,
            number_of_response_after_return,
            send_complete_final_flag_after_return,
        )
        self._assert_responses_valid(
            responses,
            expected_number_of_response_before_return,
            expected_return_a_response,
            expected_number_of_response_after_return,
        )

    def _assert_non_decoupled_infer_success(
        self,
        number_of_response_before_return,
        send_complete_final_flag_before_return,
        return_a_response,
        number_of_response_after_return,
        send_complete_final_flag_after_return,
    ):
        self._assert_non_decoupled_infer_with_expected_response_success(
            number_of_response_before_return,
            send_complete_final_flag_before_return,
            return_a_response,
            number_of_response_after_return,
            send_complete_final_flag_after_return,
            expected_number_of_response_before_return=number_of_response_before_return,
            expected_return_a_response=return_a_response,
            expected_number_of_response_after_return=number_of_response_after_return,
        )

    # Decoupled model send response final flag before request return.
    def test_decoupled_zero_response_pre_return(self):
        self._assert_decoupled_infer_success(
            **self._inputs_parameters_zero_response_pre_return
        )

    # Decoupled model send response final flag after request return.
    def test_decoupled_zero_response_post_return(self):
        self._assert_decoupled_infer_success(
            **self._inputs_parameters_zero_response_post_return
        )

    # Decoupled model send 1 response before request return.
    def test_decoupled_one_response_pre_return(self):
        self._assert_decoupled_infer_success(
            **self._inputs_parameters_one_response_pre_return
        )

    # Decoupled model send 1 response after request return.
    def test_decoupled_one_response_post_return(self):
        self._assert_decoupled_infer_success(
            **self._inputs_parameters_one_response_post_return
        )

    # Decoupled model send 2 response before request return.
    def test_decoupled_two_response_pre_return(self):
        self._assert_decoupled_infer_success(
            **self._inputs_parameters_two_response_pre_return
        )

    # Decoupled model send 2 response after request return.
    def test_decoupled_two_response_post_return(self):
        self._assert_decoupled_infer_success(
            **self._inputs_parameters_two_response_post_return
        )

    # Decoupled model send 1 and 3 responses before and after return.
    def test_decoupled_response_pre_and_post_return(self):
        self._assert_decoupled_infer_success(
            **self._inputs_parameters_response_pre_and_post_return
        )

    # Non-decoupled model send 1 response on return.
    def test_non_decoupled_one_response_on_return(self):
        self._assert_non_decoupled_infer_success(
            **self._inputs_parameters_one_response_on_return
        )

    # Non-decoupled model send 1 response before return.
    def test_non_decoupled_one_response_pre_return(self):
        self._assert_non_decoupled_infer_success(
            **self._inputs_parameters_one_response_pre_return
        )

    # Non-decoupled model send 1 response after return.
    def test_non_decoupled_one_response_post_return(self):
        self._assert_non_decoupled_infer_success(
            **self._inputs_parameters_one_response_post_return
        )

    # Decoupled model requests each responding differently.
    def test_decoupled_multiple_requests(self):
        parallel_inputs = [
            self._get_inputs(**self._inputs_parameters_zero_response_pre_return),
            self._get_inputs(**self._inputs_parameters_zero_response_post_return),
            self._get_inputs(**self._inputs_parameters_one_response_pre_return),
            self._get_inputs(**self._inputs_parameters_one_response_post_return),
            self._get_inputs(**self._inputs_parameters_two_response_pre_return),
            self._get_inputs(**self._inputs_parameters_two_response_post_return),
            self._get_inputs(**self._inputs_parameters_response_pre_and_post_return),
        ]
        expected_number_of_response_before_return = 4
        expected_return_a_response = False
        expected_number_of_response_after_return = 6

        model_name = "response_sender_decoupled_batching"
        responses = self._infer_parallel(model_name, parallel_inputs)
        self._assert_responses_valid(
            responses,
            expected_number_of_response_before_return,
            expected_return_a_response,
            expected_number_of_response_after_return,
        )
        # Do NOT group into a for-loop as it hides which model failed.
        model_name = "response_sender_decoupled_async_batching"
        responses = self._infer_parallel(model_name, parallel_inputs)
        self._assert_responses_valid(
            responses,
            expected_number_of_response_before_return,
            expected_return_a_response,
            expected_number_of_response_after_return,
        )

    # Non-decoupled model requests each responding differently.
    def test_non_decoupled_multiple_requests(self):
        parallel_inputs = [
            self._get_inputs(**self._inputs_parameters_one_response_on_return),
            self._get_inputs(**self._inputs_parameters_one_response_pre_return),
            self._get_inputs(**self._inputs_parameters_one_response_post_return),
        ]
        expected_number_of_response_before_return = 1
        expected_return_a_response = True
        expected_number_of_response_after_return = 1

        model_name = "response_sender_batching"
        responses = self._infer_parallel(model_name, parallel_inputs)
        self._assert_responses_valid(
            responses,
            expected_number_of_response_before_return,
            expected_return_a_response,
            expected_number_of_response_after_return,
        )
        # Do NOT group into a for-loop as it hides which model failed.
        model_name = "response_sender_async_batching"
        responses = self._infer_parallel(model_name, parallel_inputs)
        self._assert_responses_valid(
            responses,
            expected_number_of_response_before_return,
            expected_return_a_response,
            expected_number_of_response_after_return,
        )

    # Decoupled model send 1 response on return.
    def test_decoupled_one_response_on_return(self):
        responses = self._infer(
            model_name="response_sender_decoupled",
            **self._inputs_parameters_one_response_on_return,
        )
        self._assert_responses_exception(
            responses,
            expected_message="using the decoupled mode and the execute function must return None",
        )
        # TODO: Test for async decoupled after fixing 'AsyncEventFutureDoneCallback'
        #       using `py_future.result()` with error hangs on exit.

    # Decoupled model send 1 response and return 1 response.
    def test_decoupled_one_response_pre_and_on_return(self):
        # Note: The before return response will send a valid response and close the
        #       response sender. Then, returning a response will generate an error, but
        #       since the response sender is closed, nothing is passed to the client.
        responses = self._infer(
            model_name="response_sender_decoupled",
            **self._inputs_parameters_one_response_pre_and_on_return,
        )
        self._assert_responses_valid(
            responses,
            number_of_response_before_return=1,
            return_a_response=0,
            number_of_response_after_return=0,
        )
        # TODO: Test for async decoupled after fixing 'AsyncEventFutureDoneCallback'
        #       using `py_future.result()` with error hangs on exit.

    # Decoupled model return 1 response and send 1 response.
    def test_decoupled_one_response_on_and_post_return(self):
        # Note: The returned response will send an error response and complete final
        #       flag, and close the response sender and factory. Then, sending a
        #       response will raise an exception. Since the exception happens after the
        #       model returns, it cannot be caught by the stub (i.e. in a daemon
        #       thread), so nothing will happen.
        responses = self._infer(
            model_name="response_sender_decoupled",
            **self._inputs_parameters_one_response_on_and_post_return,
        )
        self._assert_responses_exception(
            responses,
            expected_message="using the decoupled mode and the execute function must return None",
        )
        # TODO: Test for async decoupled after fixing 'AsyncEventFutureDoneCallback'
        #       using `py_future.result()` with error hangs on exit.

    # Non-decoupled model send response final flag before request return.
    def test_non_decoupled_zero_response_pre_return(self):
        # Note: The final flag will raise an exception which stops the model. Since the
        #       exception happens before the model returns, it will be caught by the
        #       stub process which pass it to the backend and sent an error response
        #       with final flag.
        expected_message = (
            "Non-decoupled model cannot send complete final before sending a response"
        )
        model_name = "response_sender"
        responses = self._infer(
            model_name,
            **self._inputs_parameters_zero_response_pre_return,
        )
        self._assert_responses_exception(responses, expected_message)
        # Do NOT group into a for-loop as it hides which model failed.
        model_name = "response_sender_async"
        responses = self._infer(
            model_name,
            **self._inputs_parameters_zero_response_pre_return,
        )
        self._assert_responses_exception(responses, expected_message)

    # Non-decoupled model send response final flag after request return.
    @unittest.skip("Model unload will hang, see the TODO comment.")
    def test_non_decoupled_zero_response_post_return(self):
        # Note: The final flag will raise an exception which stops the model. Since the
        #       exception happens after the model returns, it cannot be caught by the
        #       stub (i.e. in a daemon thread), so nothing will happen.
        # TODO: Since the stub does not know if the model failed after returning, the
        #       complete final flag is not sent and will hang when unloading the model.
        #       How to detect such event and close the response factory?
        raise NotImplementedError("No testing is performed")

    # Non-decoupled model send 2 response before return.
    def test_non_decoupled_two_response_pre_return(self):
        # Note: The 1st response will make its way to the client, but sending the 2nd
        #       response will raise an exception which stops the model. Since the
        #       exception happens before the model returns, it will be caught by the
        #       stub process which pass it to the backend and sent an error response
        #       with final flag. Since this is non-decoupled model using gRPC stream,
        #       any response after the 1st will be discarded by the frontend.
        self._assert_non_decoupled_infer_with_expected_response_success(
            **self._inputs_parameters_two_response_pre_return,
            expected_number_of_response_before_return=1,
            expected_return_a_response=False,
            expected_number_of_response_after_return=0,
        )

    # Non-decoupled model send 2 response after return.
    @unittest.skip("Model unload will hang, see the TODO comment.")
    def test_non_decoupled_two_response_post_return(self):
        # Note: The 1st response will make its way to the client, but sending the 2nd
        #       response will raise an exception which stops the model. Since the
        #       exception happens after the model returns, it cannot be caught by the
        #       stub (i.e. in a daemon thread), so nothing will happen.
        # TODO: Since the stub does not know if the model failed after returning, the
        #       complete final flag is not sent and will hang when unloading the model.
        #       How to detect such event and close the response factory?
        self._assert_non_decoupled_infer_with_expected_response_success(
            **self._inputs_parameters_two_response_post_return,
            expected_number_of_response_before_return=0,
            expected_return_a_response=False,
            expected_number_of_response_after_return=1,
        )

    # Non-decoupled model send 1 response and return 1 response.
    def test_non_decoupled_one_response_pre_and_on_return(self):
        # Note: The sent response will make its way to the client and complete final.
        #       The returned response will see the response sender is closed and raise
        #       an exception. The backend should see the request is closed and do
        #       nothing upon receiving the error from stub.
        self._assert_non_decoupled_infer_with_expected_response_success(
            **self._inputs_parameters_one_response_pre_and_on_return,
            expected_number_of_response_before_return=1,
            expected_return_a_response=False,
            expected_number_of_response_after_return=0,
        )

    # Non-decoupled model return 1 response and send 1 response.
    def test_non_decoupled_one_response_on_and_post_return(self):
        # Note: The returned response will send the response to the client and complete
        #       final. The sent response will see the response sender is closed and
        #       raise an exception. Since the exception happens after the model returns,
        #       it cannot be caught by the stub (i.e. in a daemon thread), so nothing
        #       will happen.
        self._assert_non_decoupled_infer_with_expected_response_success(
            **self._inputs_parameters_one_response_on_and_post_return,
            expected_number_of_response_before_return=0,
            expected_return_a_response=True,
            expected_number_of_response_after_return=0,
        )


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_backend_python/response_sender/test.sh
================================================
#!/bin/bash
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

source ../../common/util.sh

RET=0

#
# Test response sender under decoupled / non-decoupled
#
rm -rf models && mkdir models
mkdir -p models/response_sender/1 && \
    cp ../../python_models/response_sender/model_common.py models/response_sender/1 && \
    cp ../../python_models/response_sender/model.py models/response_sender/1 && \
    cp ../../python_models/response_sender/config.pbtxt models/response_sender
mkdir -p models/response_sender_decoupled/1 && \
    cp ../../python_models/response_sender/model_common.py models/response_sender_decoupled/1 && \
    cp ../../python_models/response_sender/model.py models/response_sender_decoupled/1 && \
    cp ../../python_models/response_sender/config.pbtxt models/response_sender_decoupled && \
    echo "model_transaction_policy { decoupled: True }" >> models/response_sender_decoupled/config.pbtxt
mkdir -p models/response_sender_async/1 && \
    cp ../../python_models/response_sender/model_common.py models/response_sender_async/1 && \
    cp ../../python_models/response_sender/model_async.py models/response_sender_async/1/model.py && \
    cp ../../python_models/response_sender/config.pbtxt models/response_sender_async
mkdir -p models/response_sender_decoupled_async/1 && \
    cp ../../python_models/response_sender/model_common.py models/response_sender_decoupled_async/1 && \
    cp ../../python_models/response_sender/model_async.py models/response_sender_decoupled_async/1/model.py && \
    cp ../../python_models/response_sender/config.pbtxt models/response_sender_decoupled_async && \
    echo "model_transaction_policy { decoupled: True }" >> models/response_sender_decoupled_async/config.pbtxt
mkdir -p models/response_sender_batching/1 && \
    cp ../../python_models/response_sender/model_common.py models/response_sender_batching/1 && \
    cp ../../python_models/response_sender/model.py models/response_sender_batching/1 && \
    cp ../../python_models/response_sender/config.pbtxt models/response_sender_batching && \
    echo "dynamic_batching { max_queue_delay_microseconds: 500000 }" >> models/response_sender_batching/config.pbtxt
mkdir -p models/response_sender_decoupled_batching/1 && \
    cp ../../python_models/response_sender/model_common.py models/response_sender_decoupled_batching/1 && \
    cp ../../python_models/response_sender/model.py models/response_sender_decoupled_batching/1 && \
    cp ../../python_models/response_sender/config.pbtxt models/response_sender_decoupled_batching && \
    echo "model_transaction_policy { decoupled: True }" >> models/response_sender_decoupled_batching/config.pbtxt && \
    echo "dynamic_batching { max_queue_delay_microseconds: 500000 }" >> models/response_sender_decoupled_batching/config.pbtxt
mkdir -p models/response_sender_async_batching/1 && \
    cp ../../python_models/response_sender/model_common.py models/response_sender_async_batching/1 && \
    cp ../../python_models/response_sender/model_async.py models/response_sender_async_batching/1/model.py && \
    cp ../../python_models/response_sender/config.pbtxt models/response_sender_async_batching && \
    echo "dynamic_batching { max_queue_delay_microseconds: 500000 }" >> models/response_sender_async_batching/config.pbtxt
mkdir -p models/response_sender_decoupled_async_batching/1 && \
    cp ../../python_models/response_sender/model_common.py models/response_sender_decoupled_async_batching/1 && \
    cp ../../python_models/response_sender/model_async.py models/response_sender_decoupled_async_batching/1/model.py && \
    cp ../../python_models/response_sender/config.pbtxt models/response_sender_decoupled_async_batching && \
    echo "model_transaction_policy { decoupled: True }" >> models/response_sender_decoupled_async_batching/config.pbtxt && \
    echo "dynamic_batching { max_queue_delay_microseconds: 500000 }" >> models/response_sender_decoupled_async_batching/config.pbtxt

TEST_LOG="response_sender_test.log"
SERVER_LOG="response_sender_test.server.log"
SERVER_ARGS="--model-repository=${MODELDIR}/response_sender/models --backend-directory=${BACKEND_DIR} --log-verbose=1"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
SERVER_LOG=$SERVER_LOG python3 -m pytest --junitxml=concurrency_test.report.xml response_sender_test.py > $TEST_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** response sender test FAILED\n***"
    cat $TEST_LOG
    RET=1
fi
set -e

kill_server

#
# Test response sender to raise exception on response after complete final flag
#
rm -rf models && mkdir models
mkdir -p models/response_sender_complete_final/1 && \
    cp ../../python_models/response_sender_complete_final/model.py models/response_sender_complete_final/1 && \
    cp ../../python_models/response_sender_complete_final/config.pbtxt models/response_sender_complete_final

TEST_LOG="response_sender_complete_final_test.log"
SERVER_LOG="response_sender_complete_final_test.server.log"
SERVER_ARGS="--model-repository=${MODELDIR}/response_sender/models --backend-directory=${BACKEND_DIR} --log-verbose=1"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
SERVER_LOG=$SERVER_LOG python3 -m pytest --junitxml=concurrency_test.report.xml response_sender_complete_final_test.py > $TEST_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** response sender complete final test FAILED\n***"
    cat $TEST_LOG
    RET=1
fi
set -e

kill_server

#
# Test async response sender under decoupled / non-decoupled
#

# TODO

if [ $RET -eq 1 ]; then
    echo -e "\n***\n*** Response sender test FAILED\n***"
else
    echo -e "\n***\n*** Response sender test Passed\n***"
fi
exit $RET


================================================
FILE: qa/L0_backend_python/restart/models/restart/1/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from os import path

import c_python_backend_utils as c_utils
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def execute(self, requests):
        # This function will be called once to record the free memory. Then,
        # the stub process will be killed to trigger Python backend restart.
        # After that this value will be read again to make sure that it matches
        # before restart.

        file_name = "free_memory.txt"
        current_free_memory = str(c_utils.shared_memory.free_memory())
        if path.exists(file_name):
            with open(file_name, "r") as f:
                expected_free_memory = f.read()
                assert expected_free_memory == current_free_memory, (
                    f"Free shared memory before and after restart are not equal. "
                    "{expected_free_memory} (before) != {current_free_memory} (after)."
                )
        else:
            with open(file_name, "w") as f:
                f.write(current_free_memory)

        responses = []
        for request in requests:
            input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            out_tensor = pb_utils.Tensor("OUTPUT0", input_tensor.as_numpy())
            responses.append(pb_utils.InferenceResponse([out_tensor]))
        return responses


================================================
FILE: qa/L0_backend_python/restart/models/restart/config.pbtxt
================================================
# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "restart"
backend: "python"
max_batch_size: 64

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]


================================================
FILE: qa/L0_backend_python/restart/restart_test.py
================================================
#!/usr/bin/env python3

# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import sys

sys.path.append("../../common")

import unittest

import numpy as np
import shm_util
import tritonclient.http as httpclient
from tritonclient.utils import *

# By default, find tritonserver on "localhost", but for windows tests
# we overwrite the IP address with the TRITONSERVER_IPADDR envvar
_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")


class RestartTest(unittest.TestCase):
    def setUp(self):
        self._shm_leak_detector = shm_util.ShmLeakDetector()

    def _infer_helper(self, model_name, shape, data_type):
        with httpclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8000") as client:
            input_data_0 = np.array(np.random.randn(*shape), dtype=data_type)
            inputs = [
                httpclient.InferInput(
                    "INPUT0", shape, np_to_triton_dtype(input_data_0.dtype)
                )
            ]
            inputs[0].set_data_from_numpy(input_data_0)
            result = client.infer(model_name, inputs)
            output0 = result.as_numpy("OUTPUT0")
            self.assertTrue(np.all(input_data_0 == output0))

    def test_restart(self):
        shape = [1, 16]
        model_name = "restart"
        dtype = np.float32

        # Since the stub process has been killed, the first request
        # will return an exception.
        with self.assertRaises(InferenceServerException):
            # FIXME: No leak check here as the unhealthy stub error likely causes issues.
            # tritonclient.utils.InferenceServerException: [400] Failed to
            # process the request(s) for model instance 'restart_0_0',
            # message: Stub process 'restart_0_0' is not healthy.
            # [restart] Shared memory leak detected: 1007216 (current) > 1007056 (prev).
            self._infer_helper(model_name, shape, dtype)

        # The second request should work properly since the stub process should
        # have come alive.
        with self._shm_leak_detector.Probe() as shm_probe:
            self._infer_helper(model_name, shape, dtype)

    def test_infer(self):
        shape = [1, 16]
        model_name = "restart"
        dtype = np.float32
        with self._shm_leak_detector.Probe() as shm_probe:
            self._infer_helper(model_name, shape, dtype)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_backend_python/restart/test.sh
================================================
#!/bin/bash
# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

CLIENT_LOG="./restart_client.log"
SERVER_ARGS="--model-repository=${MODELDIR}/restart/models --backend-directory=${BACKEND_DIR} --log-verbose=1"
SERVER_LOG="./restart_server.log"
source ../../common/util.sh
source ../common.sh

rm -fr *.log free_memory.txt

RET=0

prev_num_pages=`get_shm_pages`
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
SUBTEST="test_infer"
python3 -m pytest --junitxml=restart.${SUBTEST}.report.xml restart_test.py::RestartTest::${SUBTEST} >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    cat $SERVER_LOG
    echo -e "\n***\n*** ${SUBTEST} test FAILED. \n***"
    RET=1
fi
set -e

# NOTE: with the current setup, tritonserver is launched within wsl, but the stub is started
# in Windows. Therefore, finding the PID of the stub requires a bit more work.
if [[ ${TEST_WINDOWS} == 1 ]]; then
    tasklist=$(/mnt/c/windows/system32/tasklist.exe /FI 'IMAGENAME eq triton_python_backend_stub.exe' /FO CSV)
    taskcount=$(echo "$tasklist" | grep -c triton_python_backend_stub)
    if [[ $taskcount > 0 ]]; then
        echo "$tasklist" | while IFS=, read -r taskname taskpid taskrest; do
            if [[ "$taskname" == "\"triton_python_backend_stub.exe\"" ]]; then
                taskpid="${taskpid%\"}"
                taskpid="${taskpid#\"}"
                /mnt/c/windows/system32/taskkill.exe /PID $taskpid /F /T
            fi
        done
    fi
else
    triton_procs=$(pgrep --parent $SERVER_PID)
    echo $triton_procs
    for proc in $triton_procs; do
        kill -9 $proc
    done
fi

set +e

SUBTEST="test_restart"
python3 -m pytest --junitxml=restart.${SUBTEST}.report.xml restart_test.py::RestartTest::${SUBTEST} >> $CLIENT_LOG 2>&1

if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    cat $SERVER_LOG
    echo -e "\n***\n*** ${SUBTEST} test FAILED. \n***"
    RET=1
fi
set -e

kill_server

current_num_pages=`get_shm_pages`
if [ $current_num_pages -ne $prev_num_pages ]; then
    cat $CLIENT_LOG
    ls /dev/shm
    echo -e "\n***\n*** Test Failed. Shared memory pages where not cleaned properly.
Shared memory pages before starting triton equals to $prev_num_pages
and shared memory pages after starting triton equals to $current_num_pages \n***"
    exit 1
fi

# Test if the Triton server exits gracefully when the stub has been killed.
rm $SERVER_LOG
prev_num_pages=`get_shm_pages`
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

triton_procs=`pgrep --parent $SERVER_PID`
echo $triton_procs

set +e
for proc in $triton_procs; do
    kill -9 $proc
done
set -e

kill_server

current_num_pages=`get_shm_pages`
if [ $current_num_pages -ne $prev_num_pages ]; then
    cat $CLIENT_LOG
    ls /dev/shm
    echo -e "\n***\n*** Test Failed. Shared memory pages where not cleaned properly.
Shared memory pages before starting triton equals to $prev_num_pages
and shared memory pages after starting triton equals to $current_num_pages \n***"
    exit 1
fi

if [ $RET -eq 1 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Restart test FAILED. \n***"
else
    echo -e "\n***\n*** Restart test PASSED. \n***"
fi

exit $RET


================================================
FILE: qa/L0_backend_python/setup_python_enviroment.sh
================================================
#!/bin/bash
# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
RET=0
set -e
if [ ${PYTHON_ENV_VERSION} = "12" ]; then
    echo No need to set up anything for default python3.${PYTHON_ENV_VERSION}
    exit $RET
fi

source common.sh
source ../common/util.sh

TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:="http://github.com/triton-inference-server"}
BASE_SERVER_ARGS="--model-repository=${MODELDIR}/models --log-verbose=1 --disable-auto-complete-config"
PYTHON_BACKEND_BRANCH=$PYTHON_BACKEND_REPO_TAG
SERVER_ARGS=$BASE_SERVER_ARGS
SERVER_LOG="./inference_server.log"
export PYTHON_ENV_VERSION=${PYTHON_ENV_VERSION:="12"}
RET=0
EXPECTED_VERSION_STRINGS=""

rm -fr ./models
rm -rf *.tar.gz
install_build_deps
install_conda

# Test other python versions
conda update -n base -c defaults conda -y

# Create a model with python 3.11 version
# Successful execution of the Python model indicates that the environment has
# been setup correctly.
if [ ${PYTHON_ENV_VERSION} = "11" ]; then
    create_conda_env "3.11" "python-3-11"
    conda install pytorch=2.8.0 -y
    conda install -c conda-forge libstdcxx-ng=14 -y
    conda install numpy=1.23.5 -y
    EXPECTED_VERSION_STRING="Python version is 3.11, NumPy version is 1.23.5, and PyTorch version is 2.8.0"
    create_python_backend_stub
    conda-pack -o python3.11.tar.gz
    path_to_conda_pack="$PWD/python-3-11"
    mkdir -p $path_to_conda_pack
    tar -xzf python3.11.tar.gz -C $path_to_conda_pack
    mkdir -p models/python_3_11/1/
    cp ../python_models/python_version/config.pbtxt ./models/python_3_11
    (cd models/python_3_11 && \
            sed -i "s/^name:.*/name: \"python_3_11\"/" config.pbtxt && \
            echo "parameters: {key: \"EXECUTION_ENV_PATH\", value: {string_value: \"$path_to_conda_pack\"}}">> config.pbtxt)
    cp ../python_models/python_version/model.py ./models/python_3_11/1/
    cp python_backend/builddir/triton_python_backend_stub ./models/python_3_11
fi
conda deactivate
rm -rf ./miniconda

# test that
set +e
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

kill_server

grep "$EXPECTED_VERSION_STRING" $SERVER_LOG
if [ $? -ne 0 ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** $EXPECTED_VERSION_STRING was not found in Triton logs. \n***"
    RET=1
fi
set -e

echo "python environment 3.${PYTHON_ENV_VERSION}"
# copy the stub out to /opt/tritonserver/backends/python/triton_python_backend_stub
cp python_backend/builddir/triton_python_backend_stub /opt/tritonserver/backends/python/triton_python_backend_stub
# Set up environment and stub for each test
apt-get update -qq && apt-get install -y software-properties-common
add-apt-repository ppa:deadsnakes/ppa -y
apt-get update && apt-get -y install \
                            "python3.${PYTHON_ENV_VERSION}-dev" \
                            "python3.${PYTHON_ENV_VERSION}-distutils" \
                            libboost-dev
rm -f /usr/bin/python3 && \
ln -s "/usr/bin/python3.${PYTHON_ENV_VERSION}" /usr/bin/python3
pip3 install --upgrade requests numpy virtualenv protobuf
find /opt/tritonserver/qa/pkgs/ -maxdepth 1 -type f -name \
    "tritonclient-*linux*.whl" | xargs printf -- '%s[all]' | \
    xargs pip3 install --upgrade

# Build triton-shm-monitor for the test
cd python_backend && rm -rf install build && mkdir build && cd build && \
    export CMAKE_POLICY_VERSION_MINIMUM=3.5 && \
    cmake -DCMAKE_INSTALL_PREFIX:PATH=$PWD/install \
        -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} \
        -DTRITON_COMMON_REPO_TAG:STRING=${TRITON_COMMON_REPO_TAG} \
        -DTRITON_CORE_REPO_TAG:STRING=${TRITON_CORE_REPO_TAG} \
        -DTRITON_BACKEND_REPO_TAG:STRING=${TRITON_BACKEND_REPO_TAG} .. && \
    make -j16 triton-shm-monitor install
cp $PWD/install/backends/python/triton_shm_monitor.cpython-* /opt/tritonserver/qa/common/.
set +e
exit $RET


================================================
FILE: qa/L0_backend_python/test.sh
================================================
#!/bin/bash
# Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi

# On windows the paths invoked by the script (running in WSL) must use
# /mnt/c when needed but the paths on the tritonserver command-line
# must be C:/ style.
export TEST_WINDOWS=0
if [[ -v WSL_DISTRO_NAME ]] || [[ -v MSYSTEM ]]; then
    export DATADIR=${DATADIR:="/c/data/inferenceserver/${REPO_VERSION}"}
    export TRITON_DIR=${TRITON_DIR:=c:/tritonserver}
    # This will run in WSL, but Triton will run in windows, so environment
    # variables meant for loaded models must be exported using WSLENV.
    # The /w flag indicates the value should only be included when invoking
    # Win32 from WSL.
    export WSLENV=TRITON_DIR
    export SERVER=${SERVER:=c:/tritonserver/bin/tritonserver.exe}
    export BACKEND_DIR=${BACKEND_DIR:=c:/tritonserver/backends}
    export MODELDIR=${MODELDIR:=c:/}
    export TEST_WINDOWS=1
else
    export DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
    export TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
    export SERVER=${TRITON_DIR}/bin/tritonserver
    export BACKEND_DIR=${TRITON_DIR}/backends
    export MODELDIR=${MODELDIR:=`pwd`}
fi
export REPO_VERSION=$REPO_VERSION
export TEST_JETSON=${TEST_JETSON:=0}
export CUDA_VISIBLE_DEVICES=0
export PYTHON_ENV_VERSION=${PYTHON_ENV_VERSION:="12"}
export PYTHON_BACKEND_REPO_TAG=$PYTHON_BACKEND_REPO_TAG

BASE_SERVER_ARGS="--model-repository=${MODELDIR}/models --backend-directory=${BACKEND_DIR} --log-verbose=1"
# Set the default byte size to 5MBs to avoid going out of shared memory. The
# environment that this job runs on has only 1GB of shared-memory available.
SERVER_ARGS="$BASE_SERVER_ARGS --backend-config=python,shm-default-byte-size=5242880"

CLIENT_PY=./python_test.py
CLIENT_LOG="./client.log"
TEST_RESULT_FILE='test_results.txt'
SERVER_LOG="./inference_server.log"
source ../common/util.sh
source ./common.sh

rm -fr *.log ./models

python3 --version | grep "3.12" > /dev/null
if [ $? -ne 0 ]; then
    echo -e "Expecting Python default version to be: Python 3.12 but actual version is $(python3 --version)"
    exit 1
fi

(bash -ex setup_python_enviroment.sh)

python3 --version | grep "3.${PYTHON_ENV_VERSION}" > /dev/null
if [ $? -ne 0 ]; then
    echo -e "Expecting Python version to be: Python 3.${PYTHON_ENV_VERSION} but actual version is $(python3 --version)"
    exit 1
fi

mkdir -p models/identity_fp32/1/
cp ../python_models/identity_fp32/model.py ./models/identity_fp32/1/model.py
cp ../python_models/identity_fp32/config.pbtxt ./models/identity_fp32/config.pbtxt
mkdir -p models/identity_bf16/1/
cp ../python_models/identity_bf16/model.py ./models/identity_bf16/1/model.py
cp ../python_models/identity_bf16/config.pbtxt ./models/identity_bf16/config.pbtxt
RET=0

cp -r ./models/identity_fp32 ./models/identity_uint8
(cd models/identity_uint8 && \
          sed -i "s/^name:.*/name: \"identity_uint8\"/" config.pbtxt && \
          sed -i "s/TYPE_FP32/TYPE_UINT8/g" config.pbtxt && \
          sed -i "s/^max_batch_size:.*/max_batch_size: 8/" config.pbtxt && \
          echo "dynamic_batching { preferred_batch_size: [8], max_queue_delay_microseconds: 12000000 }" >> config.pbtxt)

cp -r ./models/identity_fp32 ./models/identity_uint8_nobatch
(cd models/identity_uint8_nobatch && \
          sed -i "s/^name:.*/name: \"identity_uint8_nobatch\"/" config.pbtxt && \
          sed -i "s/TYPE_FP32/TYPE_UINT8/g" config.pbtxt && \
          sed -i "s/^max_batch_size:.*//" config.pbtxt >> config.pbtxt)

cp -r ./models/identity_fp32 ./models/identity_uint32
(cd models/identity_uint32 && \
          sed -i "s/^name:.*/name: \"identity_uint32\"/" config.pbtxt && \
          sed -i "s/TYPE_FP32/TYPE_UINT32/g" config.pbtxt)

cp -r ./models/identity_fp32 ./models/identity_bool
(cd models/identity_bool && \
          sed -i "s/^name:.*/name: \"identity_bool\"/" config.pbtxt && \
          sed -i "s/TYPE_FP32/TYPE_BOOL/g" config.pbtxt)

# Test models with `default_model_filename` variable set.
cp -r ./models/identity_fp32 ./models/default_model_name
mv ./models/default_model_name/1/model.py ./models/default_model_name/1/mymodel.py
(cd models/default_model_name && \
    sed -i "s/^name:.*/name: \"default_model_name\"/" config.pbtxt && \
    echo "default_model_filename: \"mymodel.py\"" >> config.pbtxt )

mkdir -p models/pytorch_fp32_fp32/1/
    cp -r ../python_models/pytorch_fp32_fp32/model.py ./models/pytorch_fp32_fp32/1/
    cp ../python_models/pytorch_fp32_fp32/config.pbtxt ./models/pytorch_fp32_fp32/
    (cd models/pytorch_fp32_fp32 && \
            sed -i "s/^name:.*/name: \"pytorch_fp32_fp32\"/" config.pbtxt)

mkdir -p models/delayed_model/1/
cp -r ../python_models/delayed_model/model.py ./models/delayed_model/1/
cp ../python_models/delayed_model/config.pbtxt ./models/delayed_model/
mkdir -p models/init_args/1/
cp ../python_models/init_args/model.py ./models/init_args/1/
cp ../python_models/init_args/config.pbtxt ./models/init_args/
sed -i "s|TRITON_DIR_PATH|${TRITON_DIR}|" ./models/init_args/config.pbtxt


mkdir -p models/optional/1/
cp ../python_models/optional/model.py ./models/optional/1/
cp ../python_models/optional/config.pbtxt ./models/optional/

mkdir -p models/non_contiguous/1/
cp ../python_models/non_contiguous/model.py ./models/non_contiguous/1/
cp ../python_models/non_contiguous/config.pbtxt ./models/non_contiguous/config.pbtxt

# Unicode Characters
mkdir -p models/string/1/
cp ../python_models/string/model.py ./models/string/1/
cp ../python_models/string/config.pbtxt ./models/string

# More string tests
mkdir -p models/string_fixed/1/
cp ../python_models/string_fixed/model.py ./models/string_fixed/1/
cp ../python_models/string_fixed/config.pbtxt ./models/string_fixed

mkdir -p models/dlpack_identity/1/
cp ../python_models/dlpack_identity/model.py ./models/dlpack_identity/1/
cp ../python_models/dlpack_identity/config.pbtxt ./models/dlpack_identity


if [ "$TEST_JETSON" == "0" ] && [[ ${TEST_WINDOWS} == 0 ]]; then
  pip3 install torch==2.3.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
else
  # GPU tensor tests are disabled on jetson
  pip3 install torch==2.3.1 -f https://download.pytorch.org/whl/torch_stable.html
fi

pip3 install pytest requests virtualenv

prev_num_pages=`get_shm_pages`
run_server
if [ "$SERVER_PID" == "0" ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    exit 1
fi

set +e
python3 -m pytest --junitxml=L0_backend_python.report.xml $CLIENT_PY >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    RET=1
fi
set -e

kill_server

current_num_pages=`get_shm_pages`
if [ $current_num_pages -ne $prev_num_pages ]; then
    ls /dev/shm
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test Failed. Shared memory pages where not cleaned properly.
Shared memory pages before starting triton equals to $prev_num_pages
and shared memory pages after starting triton equals to $current_num_pages \n***"
    RET=1
fi

prev_num_pages=`get_shm_pages`
# Triton non-graceful exit
run_server
if [ "$SERVER_PID" == "0" ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    exit 1
fi

sleep 5

readarray -t triton_procs < <(pgrep --parent ${SERVER_PID})

set +e

# Trigger non-graceful termination of Triton
kill -9 $SERVER_PID

# Wait 10 seconds so that Python stub can detect non-graceful exit
sleep 10

for triton_proc in $triton_procs; do
    kill -0 $triton_proc > /dev/null 2>&1
    if [ $? -eq 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Python backend non-graceful exit test failed \n***"
        RET=1
        break
    fi
done
set -e

#
# Test KIND_GPU
# Disable env test for Jetson & Windows since GPU Tensors are not supported
if [ "$TEST_JETSON" == "0" ] && [[ ${TEST_WINDOWS} == 0 ]]; then
  rm -rf models/
  mkdir -p models/add_sub_gpu/1/
  cp ../python_models/add_sub/model.py ./models/add_sub_gpu/1/
  cp ../python_models/add_sub_gpu/config.pbtxt ./models/add_sub_gpu/

  prev_num_pages=`get_shm_pages`
  run_server
  if [ "$SERVER_PID" == "0" ]; then
      cat $SERVER_LOG
      echo -e "\n***\n*** Failed to start $SERVER\n***"
      exit 1
  fi

  if [ $? -ne 0 ]; then
      cat $SERVER_LOG
      echo -e "\n***\n*** KIND_GPU model test failed \n***"
      RET=1
  fi

  kill_server

  current_num_pages=`get_shm_pages`
  if [ $current_num_pages -ne $prev_num_pages ]; then
      cat $CLIENT_LOG
      ls /dev/shm
      echo -e "\n***\n*** Test Failed. Shared memory pages where not cleaned properly.
  Shared memory pages before starting triton equals to $prev_num_pages
  and shared memory pages after starting triton equals to $current_num_pages \n***"
      exit 1
  fi
fi

# Test Multi file models
rm -rf models/
mkdir -p models/multi_file/1/
cp ../python_models/multi_file/*.py ./models/multi_file/1/
cp ../python_models/identity_fp32/config.pbtxt ./models/multi_file/
(cd models/multi_file && \
          sed -i "s/^name:.*/name: \"multi_file\"/" config.pbtxt)

prev_num_pages=`get_shm_pages`
run_server
if [ "$SERVER_PID" == "0" ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    exit 1
fi

if [ $? -ne 0 ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** multi-file model test failed \n***"
    RET=1
fi

kill_server

current_num_pages=`get_shm_pages`
if [ $current_num_pages -ne $prev_num_pages ]; then
    cat $SERVER_LOG
    ls /dev/shm
    echo -e "\n***\n*** Test Failed. Shared memory pages where not cleaned properly.
Shared memory pages before starting triton equals to $prev_num_pages
and shared memory pages after starting triton equals to $current_num_pages \n***"
    exit 1
fi

# Test environment variable propagation
rm -rf models/
mkdir -p models/model_env/1/
cp ../python_models/model_env/model.py ./models/model_env/1/
cp ../python_models/model_env/config.pbtxt ./models/model_env/

export MY_ENV="MY_ENV"
if [[ ${TEST_WINDOWS} == 1 ]]; then
    # This will run in WSL, but Triton will run in windows, so environment
    # variables meant for loaded models must be exported using WSLENV.
    # The /w flag indicates the value should only be included when invoking
    # Win32 from WSL.
    export WSLENV=MY_ENV/w
fi

prev_num_pages=`get_shm_pages`
run_server
if [ "$SERVER_PID" == "0" ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    echo -e "\n***\n*** Environment variable test failed \n***"
    exit 1
fi

kill_server

current_num_pages=`get_shm_pages`
if [ $current_num_pages -ne $prev_num_pages ]; then
    cat $CLIENT_LOG
    ls /dev/shm
    echo -e "\n***\n*** Test Failed. Shared memory pages where not cleaned properly.
Shared memory pages before starting triton equals to $prev_num_pages
and shared memory pages after starting triton equals to $current_num_pages \n***"
    exit 1
fi

rm -fr ./models
mkdir -p models/identity_fp32/1/
cp ../python_models/identity_fp32/model.py ./models/identity_fp32/1/model.py
cp ../python_models/identity_fp32/config.pbtxt ./models/identity_fp32/config.pbtxt

shm_default_byte_size=$((1024*1024*4))
SERVER_ARGS="$BASE_SERVER_ARGS --backend-config=python,shm-default-byte-size=$shm_default_byte_size"

run_server
if [ "$SERVER_PID" == "0" ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    exit 1
fi

for shm_page in `ls /dev/shm/`; do
    if [[ $shm_page !=  triton_python_backend_shm* ]]; then
        continue
    fi
    page_size=`ls -l /dev/shm/$shm_page 2>&1 | awk '{print $5}'`
    if [ $page_size -ne $shm_default_byte_size ]; then
        echo -e "Shared memory region size is not equal to
$shm_default_byte_size for page $shm_page. Region size is
$page_size."
        RET=1
    fi
done

kill_server

# Test model getting killed during initialization
rm -fr ./models
mkdir -p models/init_exit/1/
cp ../python_models/init_exit/model.py ./models/init_exit/1/model.py
cp ../python_models/init_exit/config.pbtxt ./models/init_exit/config.pbtxt

ERROR_MESSAGE="Stub process 'init_exit_0_0' is not healthy."

prev_num_pages=`get_shm_pages`
run_server
if [ "$SERVER_PID" != "0" ]; then
    echo -e "*** FAILED: unexpected success starting $SERVER" >> $CLIENT_LOG
    RET=1
    kill_server
else
    if grep "$ERROR_MESSAGE" $SERVER_LOG; then
        echo -e "Found \"$ERROR_MESSAGE\"" >> $CLIENT_LOG
    else
        echo $CLIENT_LOG
        echo -e "Not found \"$ERROR_MESSAGE\"" >> $CLIENT_LOG
        RET=1
    fi
fi

current_num_pages=`get_shm_pages`
if [ $current_num_pages -ne $prev_num_pages ]; then
    cat $SERVER_LOG
    ls /dev/shm
    echo -e "\n***\n*** Test Failed. Shared memory pages where not cleaned properly.
Shared memory pages before starting triton equals to $prev_num_pages
and shared memory pages after starting triton equals to $current_num_pages \n***"
    exit 1
fi


# Test model with non-existent model file
# The model.py file is intentionally not created to trigger the failure.
rm -fr ./models
mkdir -p models/non_existent_model/1
cp ../python_models/identity_fp32/config.pbtxt ./models/non_existent_model/config.pbtxt
(cd models/non_existent_model && \
          sed -i "s/^name:.*/name: \"non_existent_model\"/" config.pbtxt)

ERROR_MESSAGE_1="Failed to preinitialize Python stub: Python model file not found in '`pwd`/models/non_existent_model/1/model.py'"
ERROR_MESSAGE_2="failed to load 'non_existent_model'"

for test_mode in "default" "auto_config_disabled"; do
    if [ "$test_mode" == "default" ]; then
        SERVER_LOG_SUFFIX=""
        SERVER_ARGS=$BASE_SERVER_ARGS
    else
        SERVER_LOG_SUFFIX="_auto_config_disabled"
        SERVER_ARGS="$BASE_SERVER_ARGS --disable-auto-complete-config"
    fi

    SERVER_LOG="./non_existent_model_server${SERVER_LOG_SUFFIX}.log"
    CLIENT_LOG="./non_existent_model_client${SERVER_LOG_SUFFIX}.log"

    prev_num_pages=`get_shm_pages`
    run_server
    if [ "$SERVER_PID" != "0" ]; then
        echo -e "*** FAILED: unexpected success starting $SERVER" >> $CLIENT_LOG
        RET=1
        kill_server
    else
        if grep -q "$ERROR_MESSAGE_1" $SERVER_LOG; then
            echo -e "Found \"$ERROR_MESSAGE_1\"" >> $CLIENT_LOG
        else
            echo -e "Not found \"$ERROR_MESSAGE_1\" in $SERVER_LOG" >> $CLIENT_LOG
            cat $SERVER_LOG >> $CLIENT_LOG
            RET=1
        fi

        if grep -q "$ERROR_MESSAGE_2" $SERVER_LOG; then
            echo -e "Found \"$ERROR_MESSAGE_2\"" >> $CLIENT_LOG
        else
            echo -e "Not found \"$ERROR_MESSAGE_2\" in $SERVER_LOG" >> $CLIENT_LOG
            cat $SERVER_LOG >> $CLIENT_LOG
            RET=1
        fi
    fi

    current_num_pages=`get_shm_pages`
    if [ $current_num_pages -ne $prev_num_pages ]; then
        cat $SERVER_LOG
        ls /dev/shm
        echo -e "\n***\n*** Test Failed. Shared memory pages were not cleaned properly.
Shared memory pages before starting triton equals to $prev_num_pages
and shared memory pages after starting triton equals to $current_num_pages \n***"
        exit 1
    fi
done

# Disable env test for Jetson since cloud storage repos are not supported
# Disable ensemble, io and bls tests for Jetson since GPU Tensors are not supported
# Disable variants test for Jetson since already built without GPU Tensor support
# Disable decoupled test because it uses GPU tensors
if [ "$TEST_JETSON" == "0" ]; then
    SUBTESTS="ensemble bls decoupled response_sender"
    # [DLIS-6093] Disable variants test for Windows since tests are not executed in docker container (cannot apt update/install)
    # [DLIS-5970] Disable io tests for Windows since GPU Tensors are not supported
    # [DLIS-6122] Disable model_control & request_rescheduling tests for Windows since they require load/unload
    if [[ ${TEST_WINDOWS} == 0 ]]; then
        SUBTESTS+=" variants io python_based_backends async_execute"
    fi

    for TEST in ${SUBTESTS}; do
        # Run each subtest in a separate virtual environment to avoid conflicts
        # between dependencies.
        setup_virtualenv

        set +e
        (cd ${TEST} && bash -ex test.sh)
        EXIT_CODE=$?
        if [ $EXIT_CODE -ne 0 ]; then
            echo "Subtest ${TEST} FAILED"
            RET=$EXIT_CODE

            # In bls test, it is allowed to fail with a strict memory leak of 480 bytes with exit code '123'.
            # Propagate the exit code to make sure it's not overwritten by other tests.
            if [[ ${TEST} == "bls" ]]  && [[ $EXIT_CODE -ne 1 ]] ; then
                BLS_RET=$RET
            fi
        fi
        set -e

        deactivate_virtualenv
    done

    # [DLIS-5969]: Incorporate env test for windows
    if [[ ${PYTHON_ENV_VERSION} = "12" ]] && [[ ${TEST_WINDOWS} == 0 ]]; then
        # In 'env' test we use miniconda for dependency management. No need to run
        # the test in a virtual environment.
        set +e
        (cd env && bash -ex test.sh)
        if [ $? -ne 0 ]; then
            echo "Subtest env FAILED"
            RET=1
        fi
        set -e
    fi
fi

SUBTESTS="lifecycle argument_validation logging custom_metrics parameters"
# [DLIS-6124] Disable restart test for Windows since it requires more investigation
# [DLIS-6122] Disable model_control & request_rescheduling tests for Windows since they require load/unload
# [DLIS-6123] Disable examples test for Windows since it requires updates to the example clients
if [[ ${TEST_WINDOWS} == 0 ]]; then
    # TODO: Reimplement restart on decoupled data pipeline and enable restart.
    SUBTESTS+=" model_control examples request_rescheduling model_readiness"
fi
for TEST in ${SUBTESTS}; do
    # Run each subtest in a separate virtual environment to avoid conflicts
    # between dependencies.
    setup_virtualenv

    set +e
    (cd ${TEST} && bash -ex test.sh)

    if [ $? -ne 0 ]; then
        echo "Subtest ${TEST} FAILED"
        RET=1
    fi
    set -e

    deactivate_virtualenv
done

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
else
  echo -e "\n***\n*** Test FAILED\n***"
fi

# Exit with RET if it is 1, meaning that the test failed.
# Otherwise, exit with BLS_RET if it is set, meaning that the known memory leak is captured.
if [ $RET -eq 1 ]; then
    exit $RET
else
    if [ -z "$BLS_RET" ]; then
        exit $RET
    else
        exit $BLS_RET
    fi
fi


================================================
FILE: qa/L0_backend_python/test_infer_shm_leak.py
================================================
#!/usr/bin/env python3

# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../../common")

import os
import unittest

import pytest
import shm_util
import tritonclient.grpc as grpcclient
from tritonclient.utils import *

# By default, find tritonserver on "localhost", but for windows tests
# we overwrite the IP address with the TRITONSERVER_IPADDR envvar
_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")

# The exit code 123 is used to indicate that the shm leak probe detected a 480
# bytes leak in the bls sub-test. Any leak other than 480 bytes will cause the
# test to fail with the default exit code 1.
ALLOWED_FAILURE_EXIT_CODE = 123


class TestInferShmLeak:
    def _run_unittest(self, model_name):
        with grpcclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8001") as client:
            # No input is required
            result = client.infer(model_name, [], client_timeout=240)
            output0 = result.as_numpy("OUTPUT0")

            # The model returns 1 if the tests were successfully passed.
            # Otherwise, it will return 0.
            assert output0 == [1], f"python_unittest failed for model {model_name}"

    def test_shm_leak(self):
        self._shm_leak_detector = shm_util.ShmLeakDetector()
        model_name = os.environ.get("MODEL_NAME", "default_model")

        try:
            with self._shm_leak_detector.Probe() as shm_probe:
                self._run_unittest(model_name)
        except AssertionError as e:
            if "Known shared memory leak of 480 bytes detected" in str(e):
                pytest.exit(str(e), returncode=ALLOWED_FAILURE_EXIT_CODE)
            else:
                raise e


================================================
FILE: qa/L0_backend_python/variants/test.sh
================================================
#!/bin/bash
# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# Building a CPU build of Python backend
TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:=http://github.com/triton-inference-server}

source ../common.sh
install_build_deps
rm -rf python_backend

git clone ${TRITON_REPO_ORGANIZATION}/python_backend -b $PYTHON_BACKEND_REPO_TAG
(cd python_backend/ && mkdir builddir && cd builddir && \
  export CMAKE_POLICY_VERSION_MINIMUM=3.5 && \
  cmake -DTRITON_ENABLE_GPU=OFF -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} -DTRITON_BACKEND_REPO_TAG=$TRITON_BACKEND_REPO_TAG -DTRITON_COMMON_REPO_TAG=$TRITON_COMMON_REPO_TAG -DTRITON_CORE_REPO_TAG=$TRITON_CORE_REPO_TAG ../ && \
  make -j18 install)

if [ $? == 0 ]; then
  echo -e "\n***\n*** No CPU build test PASSED.\n***"
else
  echo -e "\n***\n*** No CPU build test FAILED.\n***"
fi


================================================
FILE: qa/L0_backend_release/test.sh
================================================
#!/bin/bash
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

SIMPLE_CLIENT=../clients/simple_http_infer_client
SIMPLE_SEQ_CLIENT=../clients/simple_grpc_sequence_stream_infer_client

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=`pwd`/models"
source ../common/util.sh

export CUDA_VISIBLE_DEVICES=0

RET=0

rm -fr *.log

# This is a test of the schedulers to make sure they correctly release
# their own backend so don't need to test across all frameworks.  Set
# the delay, in milliseconds, that will cause the scheduler to be the
# last holding the backend handle.
export TRITONSERVER_DELAY_SCHEDULER_BACKEND_RELEASE=5000

# dynamic batcher - 1 instance
rm -fr models && cp -r simple_models models
(cd models/simple && echo "instance_group [{ count: 1 }]" >> config.pbtxt)

SERVER_LOG="./inference_server_1.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

$SIMPLE_CLIENT -v >> client_simple.log 2>&1
if [ $? -ne 0 ]; then
    RET=1
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

# dynamic batcher - 4 instance
rm -fr models && cp -r simple_models models
(cd models/simple && echo "instance_group [{ count: 4 }]" >> config.pbtxt)

SERVER_LOG="./inference_server_4.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

$SIMPLE_CLIENT -v >> client_simple.log 2>&1
if [ $? -ne 0 ]; then
    RET=1
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

# sequence batcher - 1 instance
rm -fr models && cp -r simple_seq_models models
(cd models/simple_sequence && \
        sed -i "s/sequence_batching.*{.*/sequence_batching { max_sequence_idle_microseconds: 10000000/" \
            config.pbtxt)

SERVER_LOG="./inference_server_seq_1.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

$SIMPLE_SEQ_CLIENT -v >> client_simple_seq.log 2>&1
if [ $? -ne 0 ]; then
    RET=1
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

# sequence batcher - 4 instance
rm -fr models && cp -r simple_seq_models models
(cd models/simple_sequence && \
        echo "instance_group [{ count: 3 }]" >> config.pbtxt && \
        sed -i "s/sequence_batching.*{.*/sequence_batching { max_sequence_idle_microseconds: 10000000/" \
            config.pbtxt)

SERVER_LOG="./inference_server_seq_4.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

$SIMPLE_SEQ_CLIENT -v >> client_simple_seq.log 2>&1
if [ $? -ne 0 ]; then
    RET=1
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_backend_tutorial/test.sh
================================================
#!/bin/bash
# Copyright 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

export CUDA_VISIBLE_DEVICES=0

TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:="http://github.com/triton-inference-server"}
TRITON_BACKEND_REPO_TAG=${TRITON_BACKEND_REPO_TAG:="main"}
TRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG:="main"}
TRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG:="main"}

MINIMAL_LOG="./minimal.log"
RECOMMENDED_LOG="./recommended.log"

SERVER=/opt/tritonserver/bin/tritonserver
source ../common/util.sh

RET=0

# Client build requires recent version of CMake (FetchContent required)
# Using CMAKE installation instruction from:: https://apt.kitware.com/
apt update -q=2 \
    && apt install -y gpg wget \
    && wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - |  tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null \
    && . /etc/os-release \
    && echo "deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ $UBUNTU_CODENAME main" | tee /etc/apt/sources.list.d/kitware.list >/dev/null \
    && apt-get update -q=2 \
    && apt-get install -y --no-install-recommends cmake=4.0.3* cmake-data=4.0.3* \
            rapidjson-dev
cmake --version

rm -fr *.log ./backend
git clone --single-branch --depth=1 -b $TRITON_BACKEND_REPO_TAG \
    ${TRITON_REPO_ORGANIZATION}/backend.git

#
# Minimal backend
#
(cd backend/examples/backends/minimal &&
 mkdir build &&
 cd build &&
 export CMAKE_POLICY_VERSION_MINIMUM=3.5 && \
 cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install \
       -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} \
       -DTRITON_BACKEND_REPO_TAG=${TRITON_BACKEND_REPO_TAG} \
       -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
       -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
       .. &&
 make -j4 install)

rm -fr /opt/tritonserver/backends/minimal
cp -r backend/examples/backends/minimal/build/install/backends/minimal /opt/tritonserver/backends/.

SERVER_LOG="./minimal_server.log"
SERVER_ARGS="--model-repository=`pwd`/backend/examples/model_repos/minimal_models"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

backend/examples/clients/minimal_client >> ${MINIMAL_LOG} 2>&1
if [ $? -ne 0 ]; then
    cat $MINIMAL_LOG
    RET=1
fi

grep "OUT0 = \[1 2 3 4\]" $MINIMAL_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to verify minimal nonbatching example. \n***"
    cat $MINIMAL_LOG
    RET=1
fi

grep "OUT0 = \[\[10 11 12 13\]\]" $MINIMAL_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to verify minimal batching example. \n***"
    cat $MINIMAL_LOG
    RET=1
fi

grep "OUT0 = \[\[20 21 22 23\]\]" $MINIMAL_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to verify minimal batching example. \n***"
    cat $MINIMAL_LOG
    RET=1
fi

grep "model batching: requests in batch 2" $SERVER_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to verify minimal server log. \n***"
    cat $SERVER_LOG
    cat $MINIMAL_LOG
    RET=1
fi

grep "batched IN0 value: \[ 10, 11, 12, 13, 20, 21, 22, 23 \]" $SERVER_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to verify minimal server log. \n***"
    cat $SERVER_LOG
    cat $MINIMAL_LOG
    RET=1
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

rm -fr /opt/tritonserver/backends/minimal

#
# Recommended backend
#
(cd backend/examples/backends/recommended &&
 mkdir build &&
 cd build &&
 export CMAKE_POLICY_VERSION_MINIMUM=3.5 && \
 cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install \
       -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} \
       -DTRITON_BACKEND_REPO_TAG=${TRITON_BACKEND_REPO_TAG} \
       -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
       -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
       .. &&
 make -j4 install)

rm -fr /opt/tritonserver/backends/recommended
cp -r backend/examples/backends/recommended/build/install/backends/recommended /opt/tritonserver/backends/.

SERVER_LOG="./recommended_server.log"
SERVER_ARGS="--model-repository=`pwd`/backend/examples/model_repos/recommended_models"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

backend/examples/clients/recommended_client >> ${RECOMMENDED_LOG} 2>&1
if [ $? -ne 0 ]; then
    cat $RECOMMENDED_LOG
    RET=1
fi

grep -z "OUTPUT = \[\[\[1.  1.1 1.2 1.3\].*\[2.  2.1 2.2 2.3\].*\[3.  3.1 3.2 3.3\].*\[4.  4.1 4.2 4.3\]\]\]" $RECOMMENDED_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to verify recommended example. \n***"
    cat $RECOMMENDED_LOG
    RET=1
fi

grep -z "OUTPUT = \[\[\[10.  10.1 10.2 10.3\].*\[20.  20.1 20.2 20.3\].*\[30.  30.1 30.2 30.3\].*\[40.  40.1 40.2 40.3\]\]\]" $RECOMMENDED_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to verify recommended example. \n***"
    cat $RECOMMENDED_LOG
    RET=1
fi

grep "model batching: requests in batch 2" $SERVER_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to verify recommended server log. \n***"
    cat $SERVER_LOG
    cat $RECOMMENDED_LOG
    RET=1
fi

FOUND_MATCH=0
grep "batched INPUT value: \[ 1.000000, 1.100000, 1.200000, 1.300000, 2.000000, 2.100000, 2.200000, 2.300000, 3.000000, 3.100000, 3.200000, 3.300000, 4.000000, 4.100000, 4.200000, 4.300000, 10.000000, 10.100000, 10.200000, 10.300000, 20.000000, 20.100000, 20.200001, 20.299999, 30.000000, 30.100000, 30.200001, 30.299999, 40.000000, 40.099998, 40.200001, 40.299999 \]" $SERVER_LOG
if [ $? -ne 0 ]; then
    FOUND_MATCH=1
fi
grep "batched INPUT value: \[ 10.000000, 10.100000, 10.200000, 10.300000, 20.000000, 20.100000, 20.200001, 20.299999, 30.000000, 30.100000, 30.200001, 30.299999, 40.000000, 40.099998, 40.200001, 40.299999, 1.000000, 1.100000, 1.200000, 1.300000, 2.000000, 2.100000, 2.200000, 2.300000, 3.000000, 3.100000, 3.200000, 3.300000, 4.000000, 4.100000, 4.200000, 4.300000 \]" $SERVER_LOG
if [ $? -ne 0 ]; then
    FOUND_MATCH=1
fi
if [ $FOUND_MATCH -eq 0 ]; then
    echo -e "\n***\n*** Failed to verify recommended server log. \n***"
    cat $SERVER_LOG
    cat $RECOMMENDED_LOG
    RET=1
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

rm -fr /opt/tritonserver/backends/recommended

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_batch_custom/batch_custom_test.py
================================================
#!/usr/bin/env python3

# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import os
import threading
import time
import unittest
from builtins import range
from collections.abc import Iterable

import infer_util as iu
import numpy as np
import test_util as tu
import tritonclient.grpc as grpcclient

# By default, find tritonserver on "localhost", but can be overridden
# with TRITONSERVER_IPADDR envvar
_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")

_deferred_exceptions_lock = threading.Lock()
_deferred_exceptions = []


class BatcherTest(tu.TestResultCollector):
    def setUp(self):
        # The helper client for setup will be GRPC for simplicity.
        self.triton_client_ = grpcclient.InferenceServerClient(
            f"{_tritonserver_ipaddr}:8001"
        )
        self.precreated_shm_regions_ = []
        global _deferred_exceptions
        _deferred_exceptions = []

    def tearDown(self):
        super().tearDown()

    def add_deferred_exception(self, ex):
        global _deferred_exceptions
        with _deferred_exceptions_lock:
            _deferred_exceptions.append(ex)

    def check_deferred_exception(self):
        # Just raise one of the exceptions...
        with _deferred_exceptions_lock:
            if len(_deferred_exceptions) > 0:
                raise _deferred_exceptions[0]

    def check_response(
        self,
        trial,
        bs,
        thresholds,
        requested_outputs=("OUTPUT0", "OUTPUT1"),
        input_size=16,
        shm_region_names=None,
        precreated_shm_regions=None,
    ):
        try:
            start_ms = int(round(time.time() * 1000))

            if (
                trial == "libtorch"
                or trial == "onnx"
                or trial == "plan"
                or trial == "python"
            ):
                tensor_shape = (bs, input_size)
                iu.infer_exact(
                    self,
                    trial,
                    tensor_shape,
                    bs,
                    np.float32,
                    np.float32,
                    np.float32,
                    swap=False,
                    model_version=1,
                    outputs=requested_outputs,
                    use_http=False,
                    use_grpc=False,
                    use_http_json_tensors=False,
                    skip_request_id_check=True,
                    use_streaming=False,
                )
            else:
                self.assertFalse(True, "unknown trial type: " + trial)

            end_ms = int(round(time.time() * 1000))

            lt_ms = thresholds[0]
            gt_ms = thresholds[1]
            if lt_ms is not None:
                self.assertTrue(
                    (end_ms - start_ms) < lt_ms,
                    "expected less than "
                    + str(lt_ms)
                    + "ms response time, got "
                    + str(end_ms - start_ms)
                    + " ms",
                )
            if gt_ms is not None:
                self.assertTrue(
                    (end_ms - start_ms) > gt_ms,
                    "expected greater than "
                    + str(gt_ms)
                    + "ms response time, got "
                    + str(end_ms - start_ms)
                    + " ms",
                )
        except Exception as ex:
            self.add_deferred_exception(ex)

    def check_status(self, model_name, batch_exec, request_cnt, infer_cnt, exec_count):
        # There is a time window between when responses are returned and statistics are updated.
        # To prevent intermittent test failure during that window, wait up to 10 seconds for the
        # inference statistics to be ready.
        num_tries = 10
        for i in range(num_tries):
            stats = self.triton_client_.get_inference_statistics(model_name, "1")
            self.assertEqual(len(stats.model_stats), 1, "expect 1 model stats")
            actual_exec_cnt = stats.model_stats[0].execution_count
            if actual_exec_cnt == exec_count:
                break
            print(
                "WARNING: expect {} executions, got {} (attempt {})".format(
                    exec_count, actual_exec_cnt, i
                )
            )
            time.sleep(1)

        self.assertEqual(
            stats.model_stats[0].name,
            model_name,
            "expect model stats for model {}".format(model_name),
        )
        self.assertEqual(
            stats.model_stats[0].version,
            "1",
            "expect model stats for model {} version 1".format(model_name),
        )

        if batch_exec:
            batch_stats = stats.model_stats[0].batch_stats
            self.assertEqual(
                len(batch_stats),
                len(batch_exec),
                "expected {} different batch-sizes, got {}".format(
                    len(batch_exec), len(batch_stats)
                ),
            )

            for batch_stat in batch_stats:
                bs = batch_stat.batch_size
                bc = batch_stat.compute_infer.count
                self.assertTrue(bs in batch_exec, "unexpected batch-size {}".format(bs))
                # Get count from one of the stats
                self.assertEqual(
                    bc,
                    batch_exec[bs],
                    "expected model-execution-count {} for batch size {}, got {}".format(
                        batch_exec[bs], bs, bc
                    ),
                )

        actual_request_cnt = stats.model_stats[0].inference_stats.success.count
        self.assertEqual(
            actual_request_cnt,
            request_cnt,
            "expected model-request-count {}, got {}".format(
                request_cnt, actual_request_cnt
            ),
        )

        actual_exec_cnt = stats.model_stats[0].execution_count
        if isinstance(exec_count, Iterable):
            self.assertIn(
                actual_exec_cnt,
                exec_count,
                "expected model-exec-count {}, got {}".format(
                    exec_count, actual_exec_cnt
                ),
            )
        else:
            self.assertEqual(
                actual_exec_cnt,
                exec_count,
                "expected model-exec-count {}, got {}".format(
                    exec_count, actual_exec_cnt
                ),
            )
        actual_infer_cnt = stats.model_stats[0].inference_count
        self.assertEqual(
            actual_infer_cnt,
            infer_cnt,
            "expected model-inference-count {}, got {}".format(
                infer_cnt, actual_infer_cnt
            ),
        )

    def test_volume_batching(self):
        # Send 12 requests with batch size 1. The max_queue_delay is set
        # to non-zero. Depending upon the timing of the requests arrival
        # there can be either 4-6 model executions.
        model_base = "onnx"
        dtype = np.float16
        shapes = (
            [
                1,
                4,
                4,
            ],
        )

        try:
            # use threads to send 12 requests without waiting for response
            threads = []
            for i in range(12):
                threads.append(
                    threading.Thread(
                        target=iu.infer_zero,
                        args=(self, model_base, 1, dtype, shapes, shapes),
                        kwargs={
                            "use_http": True,
                            "use_grpc": False,
                            "use_http_json_tensors": False,
                            "use_streaming": False,
                        },
                    )
                )
            for t in threads:
                t.start()
            for t in threads:
                t.join()
            self.check_deferred_exception()
            model_name = tu.get_zero_model_name(model_base, len(shapes), dtype)
            self.check_status(model_name, None, 12, 12, (4, 5, 6))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_batch_custom/test.sh
================================================
#!/bin/bash
# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

## This test tests the ability to use custom batching strategies with models.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

BATCH_CUSTOM_TEST=batch_custom_test.py
CLIENT_LOG_BASE="./client.log"
DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository
EXPECTED_NUM_TESTS="1"
MODEL_NAME="onnx_zero_1_float16"
SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=models --log-verbose 1"
SERVER_LOG_BASE="./inference_server.log"
TEST_RESULT_FILE='test_results.txt'
TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:="http://github.com/triton-inference-server"}
TRITON_BACKEND_REPO_TAG=${TRITON_BACKEND_REPO_TAG:="main"}
TRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG:="main"}
TRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG:="main"}


source ../common/util.sh
RET=0

# Batch strategy build requires recent version of CMake (FetchContent required)
# Using CMAKE installation instruction from:: https://apt.kitware.com/
apt update -q=2 \
    && apt install -y gpg wget \
    && wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - |  tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null \
    && . /etc/os-release \
    && echo "deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ $UBUNTU_CODENAME main" | tee /etc/apt/sources.list.d/kitware.list >/dev/null \
    && apt-get update -q=2 \
    && apt-get install -y --no-install-recommends cmake=4.0.3* cmake-data=4.0.3* rapidjson-dev
cmake --version

# Set up repository
rm -fr *.log* ./backend
rm -fr models && mkdir models
cp -r $DATADIR/$MODEL_NAME models

CONFIG_PATH="models/${MODEL_NAME}/config.pbtxt"
echo "dynamic_batching { max_queue_delay_microseconds: 10000}" >> ${CONFIG_PATH}
echo "instance_group [ { kind: KIND_GPU count: 2 }]" >> ${CONFIG_PATH}
echo "parameters { key: \"MAX_BATCH_VOLUME_BYTES\" value: {string_value: \"96\"}}" >> ${CONFIG_PATH}

# Create custom batching libraries
git clone --single-branch --depth=1 -b $TRITON_BACKEND_REPO_TAG \
    ${TRITON_REPO_ORGANIZATION}/backend.git

(cd backend/examples/batching_strategies/volume_batching &&
 mkdir build &&
 cd build &&
 export CMAKE_POLICY_VERSION_MINIMUM=3.5 && \
 cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install \
       -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} \
       -DTRITON_BACKEND_REPO_TAG=${TRITON_BACKEND_REPO_TAG} \
       -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
       -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} .. &&
 make -j4 install)

 (cd backend/examples/batching_strategies/single_batching &&
 mkdir build &&
 cd build &&
 export CMAKE_POLICY_VERSION_MINIMUM=3.5 && \
 cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install \
       -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} \
       -DTRITON_BACKEND_REPO_TAG=${TRITON_BACKEND_REPO_TAG} \
       -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
       -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} .. &&
 make -j4 install)

cp -r backend/examples/batching_strategies/volume_batching/build/libtriton_volumebatching.so models
cp -r backend/examples/batching_strategies/single_batching/build/libtriton_singlebatching.so models

# Run a test to validate the single batching strategy example.
# Then, run tests to validate the volume batching example being passed in via the backend dir, model dir, version dir, and model config.
BACKEND_DIR="/opt/tritonserver/backends/onnxruntime"
MODEL_DIR="models/$MODEL_NAME"
VERSION_DIR="$MODEL_DIR/1/"

test_types=('single_batching_backend' 'backend_directory' 'model_directory' 'version_directory' 'model_config')
test_setups=("cp models/libtriton_singlebatching.so ${BACKEND_DIR}/batchstrategy.so && sed -i \"s/(4, 5, 6))/(12))/\" ${BATCH_CUSTOM_TEST}"
    "cp models/libtriton_volumebatching.so ${BACKEND_DIR}/batchstrategy.so && sed -i \"s/(12))/(4, 5, 6))/\" ${BATCH_CUSTOM_TEST}"
    "mv ${BACKEND_DIR}/batchstrategy.so ${MODEL_DIR} && cp models/libtriton_singlebatching.so ${BACKEND_DIR}"
    "mv ${MODEL_DIR}/batchstrategy.so ${VERSION_DIR}/batchstrategy.so"
    "mv ${VERSION_DIR}/batchstrategy.so models/${MODEL_NAME}/libtriton_volumebatching.so && echo \"parameters: {key: \\\"TRITON_BATCH_STRATEGY_PATH\\\", value: {string_value: \\\"${MODEL_DIR}/libtriton_volumebatching.so\\\"}}\" >> ${CONFIG_PATH}")

for i in "${!test_setups[@]}"; do
    echo "Running ${test_types[$i]} test"
    eval ${test_setups[$i]}

    SERVER_LOG=${SERVER_LOG_BASE}_${test_types[$i]}
    CLIENT_LOG=${CLIENT_LOG_BASE}_${test_types[$i]}

    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi
    if [ `grep -c "Loading custom batching strategy" $SERVER_LOG` != "1" ]; then
        cat $SERVER_LOG
        echo -e "\n***\n*** Failed to load custom batching strategy.***"
        RET=1
    else
        set +e
        python $BATCH_CUSTOM_TEST >$CLIENT_LOG 2>&1
        if [ $? -ne 0 ]; then
            cat $CLIENT_LOG
            echo -e "\n***\n*** ${test_types[$i]} Test Failed\n***"
            RET=1
        else
            check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
            if [ $? -ne 0 ]; then
                cat $CLIENT_LOG
                echo -e "\n***\n*** ${test_types[$i]} Test Result Verification Failed\n***"
                RET=1
            fi
        fi
        set -e
    fi

    kill $SERVER_PID
    wait $SERVER_PID
done

# Test ModelBatchInitialize failure
FILE_PATH="backend/examples/batching_strategies/volume_batching/src/volume_batching.cc"
OLD_STRING="\/\/ Batcher will point to an unsigned integer representing the maximum"
NEW_STRING="return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_NOT_FOUND,\"Failure test case\");"

sed -i "s/${OLD_STRING}/${NEW_STRING}/g" ${FILE_PATH}

(cd backend/examples/batching_strategies/volume_batching &&
 cd build &&
 cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install \
       -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} \
       -DTRITON_BACKEND_REPO_TAG=${TRITON_BACKEND_REPO_TAG} \
       -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
       -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} .. &&
 make -j4 install)

cp -r backend/examples/batching_strategies/volume_batching/build/libtriton_volumebatching.so models/${MODEL_NAME}/libtriton_volumebatching.so

SERVER_LOG=${SERVER_LOG_BASE}_batching_init_failure

run_server
if [ "$SERVER_PID" != "0" ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** ModelBatchInit Error Test: unexpected successful server start $SERVER\n***"
    kill_server
    RET=1
else
    if [ `grep -c "Failure test case" $SERVER_LOG` -lt 1 ] || [ `grep -c "Not found" $SERVER_LOG` -lt 1 ]; then
        cat $SERVER_LOG
        echo -e "\n***\n*** ModelBatchInit Error Test: failed to find \"Failure test case\" message and/or \"Not found\" error type"
        RET=1
    fi
fi


if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_batch_input/batch_input_test.py
================================================
#!/usr/bin/env python3

# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import queue
import unittest
from functools import partial

import numpy as np
import test_util as tu
import tritonclient.grpc as grpcclient
from tritonclient.utils import InferenceServerException


class BatchInputTest(tu.TestResultCollector):
    def setUp(self):
        self.client = grpcclient.InferenceServerClient(url="localhost:8001")

        def callback(user_data, result, error):
            if error:
                user_data.put(error)
            else:
                user_data.put(result)

        self.client_callback = callback

    def set_inputs(self, shapes, input_name):
        self.dtype_ = np.float32
        self.inputs = []
        for shape in shapes:
            self.inputs.append(
                [grpcclient.InferInput(input_name, [1, shape[0]], "FP32")]
            )
            self.inputs[-1][0].set_data_from_numpy(
                np.full([1, shape[0]], shape[0], np.float32)
            )

    def set_inputs_for_batch_item(self, shapes, input_name):
        self.dtype_ = np.float32
        self.inputs = []
        for shape in shapes:
            self.inputs.append([grpcclient.InferInput(input_name, shape, "FP32")])
            self.inputs[-1][0].set_data_from_numpy(np.full(shape, shape[0], np.float32))

    def test_ragged_output(self):
        model_name = "ragged_io"
        # The model is an identity model
        self.set_inputs([[2], [4], [1], [3]], "INPUT0")
        user_data = queue.Queue()
        self.client.start_stream(callback=partial(self.client_callback, user_data))

        output_name = "OUTPUT0"
        outputs = [grpcclient.InferRequestedOutput(output_name)]

        async_requests = []
        try:
            for input in self.inputs:
                # Asynchronous inference call.
                async_requests.append(
                    self.client.async_stream_infer(
                        model_name=model_name, inputs=input, outputs=outputs
                    )
                )

            expected_value_list = [[v] * v for v in [2, 4, 1, 3]]
            expected_value_list = [
                np.asarray([expected_value], dtype=np.float32)
                for expected_value in expected_value_list
            ]
            for idx in range(len(async_requests)):
                # Get the result from the initiated asynchronous inference request.
                # Note the call will block till the server responds.
                result = user_data.get()

                # Validate the results by comparing with precomputed values.
                output_data = result.as_numpy(output_name)
                self.assertTrue(
                    np.array_equal(output_data, expected_value_list[idx]),
                    "Expect response {} to have value {}, got {}".format(
                        idx, expected_value_list[idx], output_data
                    ),
                )
        except InferenceServerException as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))
        self.client.stop_stream()

    def test_ragged_input(self):
        model_name = "ragged_acc_shape"
        self.set_inputs([[2], [4], [1], [3]], "RAGGED_INPUT")
        user_data = queue.Queue()
        self.client.start_stream(callback=partial(self.client_callback, user_data))

        output_name = "RAGGED_OUTPUT"
        outputs = [grpcclient.InferRequestedOutput(output_name)]
        async_requests = []
        try:
            for input in self.inputs:
                # Asynchronous inference call.
                async_requests.append(
                    self.client.async_stream_infer(
                        model_name=model_name, inputs=input, outputs=outputs
                    )
                )

            value_lists = [[v] * v for v in [2, 4, 1, 3]]
            expected_value = []
            for value_list in value_lists:
                expected_value += value_list
            expected_value = np.asarray([expected_value], dtype=np.float32)
            for idx in range(len(async_requests)):
                # Get the result from the initiated asynchronous inference request.
                # Note the call will block till the server responds.
                result = user_data.get()
                # Validate the results by comparing with precomputed values.
                output_data = result.as_numpy(output_name)
                self.assertTrue(
                    np.array_equal(output_data, expected_value),
                    "Expect response {} to have value {}, got {}".format(
                        idx, expected_value, output_data
                    ),
                )
        except InferenceServerException as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))
        self.client.stop_stream()

    def test_element_count(self):
        model_name = "ragged_element_count_acc_zero"
        self.set_inputs([[2], [4], [1], [3]], "RAGGED_INPUT")
        user_data = queue.Queue()
        self.client.start_stream(callback=partial(self.client_callback, user_data))

        output_name = "BATCH_AND_SIZE_OUTPUT"
        outputs = [grpcclient.InferRequestedOutput(output_name)]

        async_requests = []
        try:
            for input in self.inputs:
                # Asynchronous inference call.
                async_requests.append(
                    self.client.async_stream_infer(
                        model_name=model_name, inputs=input, outputs=outputs
                    )
                )

            expected_value = np.asarray([[2, 4, 1, 3]], np.float32)
            for idx in range(len(async_requests)):
                # Get the result from the initiated asynchronous inference request.
                # Note the call will block till the server responds.
                result = user_data.get()

                # Validate the results by comparing with precomputed values.
                output_data = result.as_numpy(output_name)
                self.assertTrue(
                    np.array_equal(output_data, expected_value),
                    "Expect response {} to have value {}, got {}".format(
                        idx, expected_value, output_data
                    ),
                )
        except InferenceServerException as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))
        self.client.stop_stream()

    def test_accumulated_element_count(self):
        model_name = "ragged_acc_shape"
        self.set_inputs([[2], [4], [1], [3]], "RAGGED_INPUT")
        user_data = queue.Queue()
        self.client.start_stream(callback=partial(self.client_callback, user_data))

        output_name = "BATCH_AND_SIZE_OUTPUT"
        outputs = [grpcclient.InferRequestedOutput(output_name)]

        async_requests = []
        try:
            for input in self.inputs:
                # Asynchronous inference call.
                async_requests.append(
                    self.client.async_stream_infer(
                        model_name=model_name, inputs=input, outputs=outputs
                    )
                )

            expected_value = np.asarray([[2, 6, 7, 10]], np.float32)
            for idx in range(len(async_requests)):
                # Get the result from the initiated asynchronous inference request.
                # Note the call will block till the server responds.
                result = user_data.get()

                # Validate the results by comparing with precomputed values.
                output_data = result.as_numpy(output_name)
                self.assertTrue(
                    np.array_equal(output_data, expected_value),
                    "Expect response {} to have value {}, got {}".format(
                        idx, expected_value, output_data
                    ),
                )
        except InferenceServerException as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))
        self.client.stop_stream()

    def test_accumulated_element_count_with_zero(self):
        model_name = "ragged_element_count_acc_zero"
        self.set_inputs([[2], [4], [1], [3]], "RAGGED_INPUT")
        user_data = queue.Queue()
        self.client.start_stream(callback=partial(self.client_callback, user_data))

        output_name = "BATCH_OUTPUT"
        outputs = [grpcclient.InferRequestedOutput(output_name)]

        async_requests = []
        try:
            for input in self.inputs:
                # Asynchronous inference call.
                async_requests.append(
                    self.client.async_stream_infer(
                        model_name=model_name, inputs=input, outputs=outputs
                    )
                )

            expected_value = np.asarray([[0, 2, 6, 7, 10]], np.float32)
            for idx in range(len(async_requests)):
                # Get the result from the initiated asynchronous inference request.
                # Note the call will block till the server responds.
                result = user_data.get()

                # Validate the results by comparing with precomputed values.
                output_data = result.as_numpy(output_name)
                self.assertTrue(
                    np.array_equal(output_data, expected_value),
                    "Expect response {} to have value {}, got {}".format(
                        idx, expected_value, output_data
                    ),
                )
        except InferenceServerException as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))
        self.client.stop_stream()

    def test_max_element_count_as_shape(self):
        model_name = "ragged_acc_shape"
        self.set_inputs([[2], [4], [1], [3]], "RAGGED_INPUT")
        user_data = queue.Queue()
        self.client.start_stream(callback=partial(self.client_callback, user_data))

        output_name = "BATCH_OUTPUT"
        outputs = [grpcclient.InferRequestedOutput(output_name)]

        async_requests = []
        try:
            for input in self.inputs:
                # Asynchronous inference call.
                async_requests.append(
                    self.client.async_stream_infer(
                        model_name=model_name, inputs=input, outputs=outputs
                    )
                )

            for idx in range(len(async_requests)):
                # Get the result from the initiated asynchronous inference request.
                # Note the call will block till the server responds.
                result = user_data.get()

                # Validate the results by comparing with precomputed values.
                output_data = result.as_numpy(output_name)
                self.assertEqual(
                    output_data.shape,
                    (1, 4),
                    "Expect response {} to have shape to represent max element count {} among the batch , got {}".format(
                        idx, 4, output_data.shape
                    ),
                )
        except InferenceServerException as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))
        self.client.stop_stream()

    def test_batch_item_shape_flatten(self):
        # Use 4 set of inputs with shape
        # [1, 4, 1], [1, 1, 2], [1, 1, 2], [1, 2, 2]
        # Note that the test only checks the formation of "BATCH_INPUT" where
        # the value of "RAGGED_INPUT" is irrelevant, only the shape matters
        self.set_inputs_for_batch_item(
            [[1, 4, 1], [1, 1, 2], [1, 1, 2], [1, 2, 2]], "RAGGED_INPUT"
        )

        model_name = "batch_item_flatten"
        user_data = queue.Queue()
        self.client.start_stream(callback=partial(self.client_callback, user_data))

        output_name = "BATCH_OUTPUT"
        outputs = [grpcclient.InferRequestedOutput(output_name)]

        async_requests = []
        try:
            for input in self.inputs:
                # Asynchronous inference call.
                async_requests.append(
                    self.client.async_stream_infer(
                        model_name=model_name, inputs=input, outputs=outputs
                    )
                )

            expected_value = np.asarray([[4, 1, 1, 2, 1, 2, 2, 2]], np.float32)
            for idx in range(len(async_requests)):
                # Get the result from the initiated asynchronous inference request.
                # Note the call will block till the server responds.
                result = user_data.get()

                # Validate the results by comparing with precomputed values.
                output_data = result.as_numpy(output_name)
                self.assertTrue(
                    np.array_equal(output_data, expected_value),
                    "Expect response {} to have value {}, got {}".format(
                        idx, expected_value, output_data
                    ),
                )
        except InferenceServerException as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))
        self.client.stop_stream()

    def test_batch_item_shape(self):
        # Use 3 set of inputs with shape [2, 1, 2], [1, 1, 2], [1, 2, 2]
        # Note that the test only checks the formation of "BATCH_INPUT" where
        # the value of "RAGGED_INPUT" is irrelevant, only the shape matters
        self.set_inputs_for_batch_item(
            [[2, 1, 2], [1, 1, 2], [1, 2, 2]], "RAGGED_INPUT"
        )

        expected_outputs = [
            np.array([[1.0, 2.0], [1.0, 2.0]]),
            np.array([[1.0, 2.0]]),
            np.array([[2.0, 2.0]]),
        ]

        model_name = "batch_item"
        user_data = queue.Queue()
        self.client.start_stream(callback=partial(self.client_callback, user_data))

        output_name = "BATCH_OUTPUT"
        outputs = [grpcclient.InferRequestedOutput(output_name)]

        async_requests = []
        try:
            for input in self.inputs:
                # Asynchronous inference call.
                async_requests.append(
                    self.client.async_stream_infer(
                        model_name=model_name, inputs=input, outputs=outputs
                    )
                )

            for idx in range(len(async_requests)):
                # Get the result from the initiated asynchronous inference request.
                # Note the call will block till the server responds.
                result = user_data.get()

                # Validate the results by comparing with precomputed values.
                output_data = result.as_numpy(output_name)
                self.assertTrue(
                    np.allclose(output_data, expected_outputs[idx]),
                    "Expect response to have value:\n{}, got:\n{}\nEqual matrix:\n{}".format(
                        expected_outputs[idx],
                        output_data,
                        np.isclose(expected_outputs[idx], output_data),
                    ),
                )
        except InferenceServerException as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))
        self.client.stop_stream()


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_batch_input/test.sh
================================================
#!/bin/bash
# Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

CLIENT_LOG="./client.log"
BATCH_INPUT_TEST=batch_input_test.py
EXPECTED_NUM_TESTS="8"

DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_ragged_model_repository
IDENTITY_DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository

TEST_RESULT_FILE='test_results.txt'
SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=models --exit-timeout-secs=120"
SERVER_LOG="./inference_server.log"
source ../common/util.sh

# If BACKENDS not specified, set to all
BACKENDS=${BACKENDS:="onnx plan libtorch"}

rm -f $SERVER_LOG $CLIENT_LOG

RET=0
for BACKEND in $BACKENDS; do
    rm -rf models && mkdir models
    cp -r $DATADIR/${BACKEND}_batch_input models/ragged_element_count_acc_zero
    (cd models/ragged_element_count_acc_zero && \
          sed -i "s/${BACKEND}_batch_input/ragged_element_count_acc_zero/" config.pbtxt)
    cp -r $DATADIR/${BACKEND}_batch_input models/ragged_acc_shape
    (cd models/ragged_acc_shape && \
          sed -i "s/${BACKEND}_batch_input/ragged_acc_shape/" config.pbtxt && \
          sed -i "s/BATCH_ELEMENT_COUNT/BATCH_ACCUMULATED_ELEMENT_COUNT/" config.pbtxt && \
          sed -i "s/BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO/BATCH_MAX_ELEMENT_COUNT_AS_SHAPE/" config.pbtxt)
    cp -r $DATADIR/${BACKEND}_batch_input models/batch_item_flatten
    (cd models/batch_item_flatten && \
          sed -i "s/${BACKEND}_batch_input/batch_item_flatten/" config.pbtxt && \
          sed -i "0,/-1/{s/-1/-1, -1/}" config.pbtxt && \
          sed -i "s/BATCH_ELEMENT_COUNT/BATCH_ACCUMULATED_ELEMENT_COUNT/" config.pbtxt && \
          sed -i "s/BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO/BATCH_ITEM_SHAPE_FLATTEN/" config.pbtxt)
    cp -r $DATADIR/${BACKEND}_batch_item models/batch_item
    (cd models/batch_item && \
          sed -i "s/${BACKEND}_batch_item/batch_item/" config.pbtxt)
    # Use nobatch model to showcase ragged input, identity model to verify
    # batch input is generated properly
    cp -r $IDENTITY_DATADIR/${BACKEND}_nobatch_zero_1_float32 models/ragged_io
    (cd models/ragged_io && \
          # In case of libtorch, update I/O names
          sed -i "s/__0/0/" config.pbtxt && \
          sed -i "s/${BACKEND}_nobatch_zero_1_float32/ragged_io/" config.pbtxt && \
          sed -i "s/^max_batch_size:.*/max_batch_size: 4/" config.pbtxt && \
          sed -i "s/name: \"INPUT0\"/name: \"INPUT0\"\\nallow_ragged_batch: true/" config.pbtxt && \
          echo "batch_output [{target_name: \"OUTPUT0\" \
                                 kind: BATCH_SCATTER_WITH_INPUT_SHAPE \
                                 source_input: \"INPUT0\" }] \
                dynamic_batching { max_queue_delay_microseconds: 1000000 }" >> config.pbtxt)


    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    set +e
    python3 $BATCH_INPUT_TEST >$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Failed\n***"
        RET=1
    else
        check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
        if [ $? -ne 0 ]; then
            cat $CLIENT_LOG
            echo -e "\n***\n*** Test Result Verification Failed\n***"
            RET=1
        fi
    fi
    set -e

    kill $SERVER_PID
    wait $SERVER_PID
done

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_batcher/batcher_test.py
================================================
#!/usr/bin/env python3

# Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import os
import threading
import time
import unittest
from builtins import range

import infer_util as iu
import numpy as np
import test_util as tu
import tritonclient.grpc as grpcclient

# By default, find tritonserver on "localhost", but can be overridden
# with TRITONSERVER_IPADDR envvar
_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")

TEST_SYSTEM_SHARED_MEMORY = bool(int(os.environ.get("TEST_SYSTEM_SHARED_MEMORY", 0)))
TEST_CUDA_SHARED_MEMORY = bool(int(os.environ.get("TEST_CUDA_SHARED_MEMORY", 0)))

if TEST_SYSTEM_SHARED_MEMORY:
    import tritonclient.utils.shared_memory as shm
if TEST_CUDA_SHARED_MEMORY:
    import tritonclient.utils.cuda_shared_memory as cudashm

# Test with either GRPC of HTTP, but not both since when we check
# results we expect only one to run
USE_GRPC = os.environ.get("USE_GRPC", 1) != "0"
USE_HTTP = os.environ.get("USE_HTTP", 1) != "0"
if USE_GRPC and USE_HTTP:
    USE_GRPC = False
assert USE_GRPC or USE_HTTP, "USE_GRPC or USE_HTTP must be non-zero"

BACKENDS = os.environ.get("BACKENDS", "onnx libtorch plan python")

_trials = BACKENDS.split(" ")

_ragged_batch_supported_trials = ["custom"]
if "plan" in _trials:
    _ragged_batch_supported_trials.append("plan")
if "onnx" in _trials:
    _ragged_batch_supported_trials.append("onnx")
if "libtorch" in _trials:
    _ragged_batch_supported_trials.append("libtorch")

_max_queue_delay_ms = 10000

_deferred_exceptions_lock = threading.Lock()
_deferred_exceptions = []


class BatcherTest(tu.TestResultCollector):
    def setUp(self):
        # The helper client for setup will be GRPC for simplicity.
        self.triton_client_ = grpcclient.InferenceServerClient(
            f"{_tritonserver_ipaddr}:8001"
        )
        self.precreated_shm_regions_ = []
        global _deferred_exceptions
        _deferred_exceptions = []

    def tearDown(self):
        if TEST_SYSTEM_SHARED_MEMORY:
            self.triton_client_.unregister_system_shared_memory()
        if TEST_CUDA_SHARED_MEMORY:
            self.triton_client_.unregister_cuda_shared_memory()
        for precreated_shm_region in self.precreated_shm_regions_:
            if TEST_SYSTEM_SHARED_MEMORY:
                shm.destroy_shared_memory_region(precreated_shm_region)
            elif TEST_CUDA_SHARED_MEMORY:
                cudashm.destroy_shared_memory_region(precreated_shm_region)
        super().tearDown()

    # FIXME why only used for outputs
    def create_advance(self, shm_regions=None):
        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
            precreated_shm_regions = []
            if shm_regions is None:
                shm_regions = ["output0", "output1"]
            for shm_region in shm_regions:
                if TEST_SYSTEM_SHARED_MEMORY:
                    shm_handle = shm.create_shared_memory_region(
                        shm_region + "_data", "/" + shm_region, 512
                    )
                    self.triton_client_.register_system_shared_memory(
                        shm_region + "_data", "/" + shm_region, 512
                    )
                else:
                    shm_handle = cudashm.create_shared_memory_region(
                        shm_region + "_data", 512, 0
                    )
                    self.triton_client_.register_cuda_shared_memory(
                        shm_region + "_data", cudashm.get_raw_handle(shm_handle), 0, 512
                    )
                # Collect precreated handles for cleanup
                self.precreated_shm_regions_.append(shm_handle)
                precreated_shm_regions.append(shm_handle)
            return precreated_shm_regions
        return []

    def add_deferred_exception(self, ex):
        global _deferred_exceptions
        with _deferred_exceptions_lock:
            _deferred_exceptions.append(ex)

    def check_deferred_exception(self):
        # Just raise one of the exceptions...
        with _deferred_exceptions_lock:
            if len(_deferred_exceptions) > 0:
                raise _deferred_exceptions[0]

    def check_response(
        self,
        trial,
        bs,
        thresholds,
        requested_outputs=("OUTPUT0", "OUTPUT1"),
        input_size=16,
        shm_region_names=None,
        precreated_shm_regions=None,
    ):
        try:
            start_ms = int(round(time.time() * 1000))

            if (
                trial == "libtorch"
                or trial == "onnx"
                or trial == "plan"
                or trial == "python"
            ):
                tensor_shape = (bs, input_size)
                iu.infer_exact(
                    self,
                    trial,
                    tensor_shape,
                    bs,
                    np.float32,
                    np.float32,
                    np.float32,
                    swap=False,
                    model_version=1,
                    outputs=requested_outputs,
                    use_http_json_tensors=False,
                    use_grpc=USE_GRPC,
                    use_http=USE_HTTP,
                    skip_request_id_check=True,
                    use_streaming=False,
                    shm_region_names=shm_region_names,
                    precreated_shm_regions=precreated_shm_regions,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                )
            else:
                self.assertFalse(True, "unknown trial type: " + trial)

            end_ms = int(round(time.time() * 1000))

            lt_ms = thresholds[0]
            gt_ms = thresholds[1]
            if lt_ms is not None:
                self.assertTrue(
                    (end_ms - start_ms) < lt_ms,
                    "expected less than "
                    + str(lt_ms)
                    + "ms response time, got "
                    + str(end_ms - start_ms)
                    + " ms",
                )
            if gt_ms is not None:
                self.assertTrue(
                    (end_ms - start_ms) > gt_ms,
                    "expected greater than "
                    + str(gt_ms)
                    + "ms response time, got "
                    + str(end_ms - start_ms)
                    + " ms",
                )
        except Exception as ex:
            self.add_deferred_exception(ex)

    def check_setup(self, model_name, preferred_batch_sizes, max_queue_delay_us):
        # Make sure test.sh set up the correct batcher settings
        config = self.triton_client_.get_model_config(model_name).config
        bconfig = config.dynamic_batching
        self.assertEqual(len(bconfig.preferred_batch_size), len(preferred_batch_sizes))
        for i in preferred_batch_sizes:
            self.assertTrue(i in bconfig.preferred_batch_size)
        self.assertEqual(bconfig.max_queue_delay_microseconds, max_queue_delay_us)

    def check_status(self, model_name, batch_exec, request_cnt, infer_cnt, exec_count):
        # There is a time window between when responses are returned and statistics are updated.
        # To prevent intermittent test failure during that window, wait up to 10 seconds for the
        # inference statistics to be ready.
        num_tries = 10
        for i in range(num_tries):
            stats = self.triton_client_.get_inference_statistics(model_name, "1")
            self.assertEqual(len(stats.model_stats), 1, "expect 1 model stats")
            actual_exec_cnt = stats.model_stats[0].execution_count
            if actual_exec_cnt in exec_count:
                break
            print(
                "WARNING: expect {} executions, got {} (attempt {})".format(
                    exec_count, actual_exec_cnt, i
                )
            )
            time.sleep(1)

        self.assertEqual(
            stats.model_stats[0].name,
            model_name,
            "expect model stats for model {}".format(model_name),
        )
        self.assertEqual(
            stats.model_stats[0].version,
            "1",
            "expect model stats for model {} version 1".format(model_name),
        )

        if batch_exec:
            batch_stats = stats.model_stats[0].batch_stats
            self.assertEqual(
                len(batch_stats),
                len(batch_exec),
                "expected {} different batch-sizes, got {}".format(
                    len(batch_exec), len(batch_stats)
                ),
            )

            for batch_stat in batch_stats:
                bs = batch_stat.batch_size
                bc = batch_stat.compute_infer.count
                self.assertTrue(bs in batch_exec, "unexpected batch-size {}".format(bs))
                # Get count from one of the stats
                self.assertEqual(
                    bc,
                    batch_exec[bs],
                    "expected model-execution-count {} for batch size {}, got {}".format(
                        batch_exec[bs], bs, bc
                    ),
                )

        actual_request_cnt = stats.model_stats[0].inference_stats.success.count
        self.assertEqual(
            actual_request_cnt,
            request_cnt,
            "expected model-request-count {}, got {}".format(
                request_cnt, actual_request_cnt
            ),
        )

        actual_exec_cnt = stats.model_stats[0].execution_count
        self.assertIn(
            actual_exec_cnt,
            exec_count,
            "expected model-exec-count {}, got {}".format(exec_count, actual_exec_cnt),
        )

        actual_infer_cnt = stats.model_stats[0].inference_count
        self.assertEqual(
            actual_infer_cnt,
            infer_cnt,
            "expected model-inference-count {}, got {}".format(
                infer_cnt, actual_infer_cnt
            ),
        )

    def test_static_batch_preferred(self):
        # Send two requests with static batch sizes == preferred
        # size. This should cause the responses to be returned
        # immediately
        precreated_shm_regions = self.create_advance()
        for trial in _trials:
            try:
                model_name = tu.get_model_name(
                    trial, np.float32, np.float32, np.float32
                )

                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)
                self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)

                self.check_response(
                    trial,
                    2,
                    (3000, None),
                    precreated_shm_regions=precreated_shm_regions,
                )
                self.check_response(
                    trial,
                    6,
                    (3000, None),
                    precreated_shm_regions=precreated_shm_regions,
                )
                self.check_deferred_exception()
                self.check_status(model_name, {2: 1, 6: 1}, 2, 8, (2,))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_static_batch_lt_any_preferred(self):
        # Send a request with a static batch size < any preferred
        # size. This should cause the response to be delayed by the
        # max batch queue delay
        precreated_shm_regions = self.create_advance()
        for trial in _trials:
            try:
                model_name = tu.get_model_name(
                    trial, np.float32, np.float32, np.float32
                )

                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)
                self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)

                self.check_response(
                    trial,
                    1,
                    (_max_queue_delay_ms * 1.5, _max_queue_delay_ms),
                    precreated_shm_regions=precreated_shm_regions,
                )
                self.check_deferred_exception()
                self.check_status(model_name, {1: 1}, 1, 1, (1,))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_static_batch_not_preferred(self):
        # Send a request with a static batch size in between preferred
        # sizes. This should cause the response to be delayed by the
        # max batch queue delay
        precreated_shm_regions = self.create_advance()
        for trial in _trials:
            try:
                model_name = tu.get_model_name(
                    trial, np.float32, np.float32, np.float32
                )

                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)
                self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)

                self.check_response(
                    trial,
                    3,
                    (_max_queue_delay_ms * 1.5, _max_queue_delay_ms),
                    precreated_shm_regions=precreated_shm_regions,
                )
                self.check_deferred_exception()
                self.check_status(model_name, {3: 1}, 1, 3, (1,))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_static_batch_gt_max_preferred(self):
        # Send a request with a static batch size > maximum preferred
        # size. This should cause the request to be issued immediately
        # (even though the maximum batching queue delay is very high).
        precreated_shm_regions = self.create_advance()
        for trial in _trials:
            try:
                model_name = tu.get_model_name(
                    trial, np.float32, np.float32, np.float32
                )

                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)
                self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)

                self.check_response(
                    trial,
                    7,
                    (3000, None),
                    precreated_shm_regions=precreated_shm_regions,
                )
                self.check_deferred_exception()
                self.check_status(model_name, {7: 1}, 1, 7, (1,))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_multi_batch_different_shape_allow_ragged(self):
        # Send two requests with static batch sizes == preferred size,
        # but with different shapes (using model with variable-size
        # tensors). Input tensors are marked as allowing ragged batch
        # so requests should be batched.
        for trial in _ragged_batch_supported_trials:
            try:
                dtype = np.float32
                model_name = tu.get_zero_model_name(trial, 1, dtype)

                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)
                self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)

                threads = []
                threads.append(
                    threading.Thread(
                        target=iu.infer_zero,
                        args=(self, trial, 1, dtype, ([1, 16],), ([1, 16],)),
                        kwargs={
                            "use_grpc": USE_GRPC,
                            "use_http": USE_HTTP,
                            "use_http_json_tensors": False,
                            "use_streaming": False,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=iu.infer_zero,
                        args=(self, trial, 1, dtype, ([1, 8],), ([1, 8],)),
                        kwargs={
                            "use_grpc": USE_GRPC,
                            "use_http": USE_HTTP,
                            "use_http_json_tensors": False,
                            "use_streaming": False,
                        },
                    )
                )
                threads[0].start()
                threads[1].start()
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                self.check_status(model_name, {2: 1}, 2, 2, (1,))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_multi_batch_different_shape(self):
        # Send two requests with sum of static batch sizes ==
        # preferred size, but with different shapes (using model with
        # variable-size tensors). This should cause the requests to
        # not be batched. The first response will come back
        # immediately and the second delayed by the max batch queue
        # delay
        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
            shm0_region_names = ["ip00", "ip01", "op00", "op01"]
            shm1_region_names = ["ip10", "ip11", "op10", "op11"]
        else:
            shm0_region_names = None
            shm1_region_names = None
        precreated_shm0_regions = self.create_advance(["op00", "op01"])
        precreated_shm1_regions = self.create_advance(["op10", "op11"])
        for trial in _trials:
            try:
                model_name = tu.get_model_name(
                    trial, np.float32, np.float32, np.float32
                )

                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)
                self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)

                threads = []
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "input_size": 16,
                            "shm_region_names": shm0_region_names,
                            "precreated_shm_regions": precreated_shm0_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(
                            trial,
                            1,
                            (_max_queue_delay_ms * 1.5, _max_queue_delay_ms),
                        ),
                        kwargs={
                            "input_size": 8,
                            "shm_region_names": shm1_region_names,
                            "precreated_shm_regions": precreated_shm1_regions,
                        },
                    )
                )
                threads[0].start()
                time.sleep(1)
                threads[1].start()
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                self.check_status(model_name, {1: 2}, 2, 2, (2,))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_multi_batch_not_preferred(self):
        # Send two requests with total static batch size in between
        # preferred sizes. This should cause the first response to be
        # delayed by the max batch queue delay, and the second by max
        # delay (minus the difference in time that they arrived in the
        # queue)
        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
            shm0_region_names = ["ip00", "ip01", "op00", "op01"]
            shm1_region_names = ["ip10", "ip11", "op10", "op11"]
        else:
            shm0_region_names = None
            shm1_region_names = None
        precreated_shm0_regions = self.create_advance(["op00", "op01"])
        precreated_shm1_regions = self.create_advance(["op10", "op11"])
        for trial in _trials:
            try:
                model_name = tu.get_model_name(
                    trial, np.float32, np.float32, np.float32
                )

                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)
                self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)

                threads = []
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(
                            trial,
                            1,
                            (_max_queue_delay_ms * 1.5, _max_queue_delay_ms),
                        ),
                        kwargs={
                            "shm_region_names": shm0_region_names,
                            "precreated_shm_regions": precreated_shm0_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(
                            trial,
                            3,
                            (_max_queue_delay_ms * 1.5, _max_queue_delay_ms - 2000),
                        ),
                        kwargs={
                            "shm_region_names": shm1_region_names,
                            "precreated_shm_regions": precreated_shm1_regions,
                        },
                    )
                )
                threads[0].start()
                time.sleep(1)
                threads[1].start()
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                self.check_status(model_name, {4: 1}, 2, 4, (1,))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_multi_batch_not_preferred_different_shape(self):
        # Send two requests with total static batch size in between
        # preferred sizes. Then send a request with a different shape
        # and a non-preferred batch size. This should cause the first
        # two requests to be immediately responded to and the third
        # response to be delayed by the max batch queue delay.
        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
            shm0_region_names = ["ip00", "ip01", "op00", "op01"]
            shm1_region_names = ["ip10", "ip11", "op10", "op11"]
            shm2_region_names = ["ip20", "ip21", "op20", "op21"]
        else:
            shm0_region_names = None
            shm1_region_names = None
            shm2_region_names = None
        precreated_shm0_regions = self.create_advance(["op00", "op01"])
        precreated_shm1_regions = self.create_advance(["op10", "op11"])
        precreated_shm2_regions = self.create_advance(["op20", "op21"])
        for trial in _trials:
            try:
                model_name = tu.get_model_name(
                    trial, np.float32, np.float32, np.float32
                )

                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)
                self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)

                threads = []
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "shm_region_names": shm0_region_names,
                            "precreated_shm_regions": precreated_shm0_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 3, (6000, None)),
                        kwargs={
                            "shm_region_names": shm1_region_names,
                            "precreated_shm_regions": precreated_shm1_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(
                            trial,
                            1,
                            (_max_queue_delay_ms * 1.5, _max_queue_delay_ms),
                        ),
                        kwargs={
                            "input_size": 8,
                            "shm_region_names": shm2_region_names,
                            "precreated_shm_regions": precreated_shm2_regions,
                        },
                    )
                )
                threads[0].start()
                threads[1].start()
                time.sleep(1)
                threads[2].start()
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                self.check_status(model_name, {1: 1, 4: 1}, 3, 5, (2,))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_multi_batch_preferred_different_shape(self):
        # Send two requests with total static batch size in between
        # preferred sizes. Then send a request with a different shape
        # and a non-preferred batch size. This should cause the first
        # two requests to be immediately responded to. Send a forth
        # request with the same shape as the third that causes a
        # preferred size so that third and forth response are sent
        # immediately.
        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
            shm0_region_names = ["ip00", "ip01", "op00", "op01"]
            shm1_region_names = ["ip10", "ip11", "op10", "op11"]
            shm2_region_names = ["ip20", "ip21", "op20", "op21"]
            shm3_region_names = ["ip30", "ip31", "op30", "op31"]
        else:
            shm0_region_names = None
            shm1_region_names = None
            shm2_region_names = None
            shm3_region_names = None
        precreated_shm0_regions = self.create_advance(["op00", "op01"])
        precreated_shm1_regions = self.create_advance(["op10", "op11"])
        precreated_shm2_regions = self.create_advance(["op20", "op21"])
        precreated_shm3_regions = self.create_advance(["op30", "op31"])
        for trial in _trials:
            try:
                model_name = tu.get_model_name(
                    trial, np.float32, np.float32, np.float32
                )

                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)
                self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)

                threads = []
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "shm_region_names": shm0_region_names,
                            "precreated_shm_regions": precreated_shm0_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 3, (6000, None)),
                        kwargs={
                            "shm_region_names": shm1_region_names,
                            "precreated_shm_regions": precreated_shm1_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "input_size": 8,
                            "shm_region_names": shm2_region_names,
                            "precreated_shm_regions": precreated_shm2_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 5, (6000, None)),
                        kwargs={
                            "input_size": 8,
                            "shm_region_names": shm3_region_names,
                            "precreated_shm_regions": precreated_shm3_regions,
                        },
                    )
                )
                threads[0].start()
                threads[1].start()
                time.sleep(1)
                threads[2].start()
                threads[3].start()
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                self.check_status(model_name, {4: 1, 6: 1}, 4, 10, (2,))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_multi_batch_gt_max_preferred(self):
        # Send two requests with first not having preferred size and
        # second being larger than max preferred size. Delay the
        # second request so that it arrives after the first is already
        # be processed by the dynamic batcher. This should cause both
        # responses to be returned immediately.
        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
            shm0_region_names = ["ip00", "ip01", "op00", "op01"]
            shm1_region_names = ["ip10", "ip11", "op10", "op11"]
        else:
            shm0_region_names = None
            shm1_region_names = None
        precreated_shm0_regions = self.create_advance(["op00", "op01"])
        precreated_shm1_regions = self.create_advance(["op10", "op11"])
        for trial in _trials:
            try:
                model_name = tu.get_model_name(
                    trial, np.float32, np.float32, np.float32
                )

                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)
                self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)

                threads = []
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 3, (3000, None)),
                        kwargs={
                            "shm_region_names": shm0_region_names,
                            "precreated_shm_regions": precreated_shm0_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 7, (3000, None)),
                        kwargs={
                            "shm_region_names": shm1_region_names,
                            "precreated_shm_regions": precreated_shm1_regions,
                        },
                    )
                )
                threads[0].start()
                time.sleep(1)
                threads[1].start()
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                self.check_status(model_name, {3: 1, 7: 1}, 2, 10, (2,))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_multi_batch_sum_gt_max_preferred(self):
        # Send two requests with first not having preferred size and
        # second being smaller than max preferred size but the sum of
        # the requests being larger than max preferred size. Delay the
        # second request so that it arrives after the first is already
        # be processed by the dynamic batcher. This should cause first
        # response to be returned immediately but the second response,
        # since it alone is not greater than max preferred size, will
        # be delayed.
        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
            shm0_region_names = ["ip00", "ip01", "op00", "op01"]
            shm1_region_names = ["ip10", "ip11", "op10", "op11"]
        else:
            shm0_region_names = None
            shm1_region_names = None
        precreated_shm0_regions = self.create_advance(["op00", "op01"])
        precreated_shm1_regions = self.create_advance(["op10", "op11"])
        for trial in _trials:
            try:
                model_name = tu.get_model_name(
                    trial, np.float32, np.float32, np.float32
                )

                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)
                self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)

                threads = []
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 3, (3000, None)),
                        kwargs={
                            "shm_region_names": shm0_region_names,
                            "precreated_shm_regions": precreated_shm0_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(
                            trial,
                            4,
                            (_max_queue_delay_ms * 1.5, _max_queue_delay_ms),
                        ),
                        kwargs={
                            "shm_region_names": shm1_region_names,
                            "precreated_shm_regions": precreated_shm1_regions,
                        },
                    )
                )
                threads[0].start()
                time.sleep(1)
                threads[1].start()
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                self.check_status(model_name, {3: 1, 4: 1}, 2, 7, (2,))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_multi_same_output0(self):
        # Send two requests where both ask for OUTPUT0. They should be
        # batched and get the correct response even though they don't
        # request both outputs.
        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
            shm0_region_names = ["ip00", "ip01", "op00"]
            shm1_region_names = ["ip10", "ip11", "op10"]
        else:
            shm0_region_names = None
            shm1_region_names = None
        precreated_shm0_regions = self.create_advance(["op00"])
        precreated_shm1_regions = self.create_advance(["op10"])
        for trial in _trials:
            try:
                model_name = tu.get_model_name(
                    trial, np.float32, np.float32, np.float32
                )

                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)

                self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)

                threads = []
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (3000, None)),
                        kwargs={
                            "requested_outputs": ("OUTPUT0",),
                            "shm_region_names": shm0_region_names,
                            "precreated_shm_regions": precreated_shm0_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (3000, None)),
                        kwargs={
                            "requested_outputs": ("OUTPUT0",),
                            "shm_region_names": shm1_region_names,
                            "precreated_shm_regions": precreated_shm1_regions,
                        },
                    )
                )
                threads[0].start()
                threads[1].start()
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                self.check_status(model_name, {2: 1}, 2, 2, (1,))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_multi_same_output1(self):
        # Send two requests where both ask for OUTPUT1. They should be
        # batched and get the correct response even though they don't
        # request both outputs.
        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
            shm0_region_names = ["ip00", "ip01", "op01"]
            shm1_region_names = ["ip10", "ip11", "op11"]
        else:
            shm0_region_names = None
            shm1_region_names = None
        precreated_shm0_regions = self.create_advance(["op01"])
        precreated_shm1_regions = self.create_advance(["op11"])
        for trial in _trials:
            try:
                model_name = tu.get_model_name(
                    trial, np.float32, np.float32, np.float32
                )

                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)

                self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)

                threads = []
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (3000, None)),
                        kwargs={
                            "requested_outputs": ("OUTPUT1",),
                            "shm_region_names": shm0_region_names,
                            "precreated_shm_regions": precreated_shm0_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (3000, None)),
                        kwargs={
                            "requested_outputs": ("OUTPUT1",),
                            "shm_region_names": shm1_region_names,
                            "precreated_shm_regions": precreated_shm1_regions,
                        },
                    )
                )
                threads[0].start()
                threads[1].start()
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                self.check_status(model_name, {2: 1}, 2, 2, (1,))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_multi_different_outputs(self):
        # Send two requests where one request asks for one output and
        # the other request asks for the other output. They should be
        # batched and get the correct response even though they don't
        # request both outputs.
        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
            shm0_region_names = ["ip00", "ip01", "op00"]
            shm1_region_names = ["ip10", "ip11", "op11"]
        else:
            shm0_region_names = None
            shm1_region_names = None
        precreated_shm0_regions = self.create_advance(["op00"])
        precreated_shm1_regions = self.create_advance(["op11"])
        for trial in _trials:
            try:
                model_name = tu.get_model_name(
                    trial, np.float32, np.float32, np.float32
                )

                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)

                self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)

                threads = []
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "requested_outputs": ("OUTPUT0",),
                            "shm_region_names": shm0_region_names,
                            "precreated_shm_regions": precreated_shm0_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "requested_outputs": ("OUTPUT1",),
                            "shm_region_names": shm1_region_names,
                            "precreated_shm_regions": precreated_shm1_regions,
                        },
                    )
                )
                threads[0].start()
                threads[1].start()
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                self.check_status(model_name, {2: 1}, 2, 2, (1,))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_multi_different_output_order(self):
        # Send two requests that ask for both outputs, but in a
        # different order. They should be batched and get the correct
        # response even though they use different order.
        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
            shm0_region_names = ["ip00", "ip01", "op00", "op01"]
            shm1_region_names = ["ip10", "ip11", "op11", "op10"]
        else:
            shm0_region_names = None
            shm1_region_names = None
        for trial in _trials:
            try:
                model_name = tu.get_model_name(
                    trial, np.float32, np.float32, np.float32
                )

                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)

                self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)

                threads = []
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "requested_outputs": ("OUTPUT0", "OUTPUT1"),
                            "shm_region_names": shm0_region_names,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "requested_outputs": ("OUTPUT1", "OUTPUT0"),
                            "shm_region_names": shm1_region_names,
                        },
                    )
                )
                threads[0].start()
                threads[1].start()
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                self.check_status(model_name, {2: 1}, 2, 2, (1,))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_multi_batch_delayed_sum_gt_max_preferred(self):
        # Send two requests with first not having preferred size and
        # second being smaller than max preferred size but the sum of
        # the requests being larger than max preferred size. Use
        # TRITONSERVER_DELAY_SCHEDULER in the environment so that
        # requests can be queued up before scheduler starts
        # servicing. This should cause first response to be returned
        # immediately but the second response, since it alone is not
        # greater than max preferred size, will be delayed.
        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
            shm0_region_names = ["ip00", "ip01", "op00", "op01"]
            shm1_region_names = ["ip10", "ip11", "op10", "op11"]
        else:
            shm0_region_names = None
            shm1_region_names = None
        precreated_shm0_regions = self.create_advance(["op00", "op01"])
        precreated_shm1_regions = self.create_advance(["op10", "op11"])
        for trial in _trials:
            try:
                model_name = tu.get_model_name(
                    trial, np.float32, np.float32, np.float32
                )

                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)

                # Need scheduler to wait for queue to contain 2 requests
                self.assertTrue("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
                self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 2)

                threads = []
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 3, (6000, None)),
                        kwargs={
                            "shm_region_names": shm0_region_names,
                            "precreated_shm_regions": precreated_shm0_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(
                            trial,
                            4,
                            (_max_queue_delay_ms * 1.5, _max_queue_delay_ms),
                        ),
                        kwargs={
                            "shm_region_names": shm1_region_names,
                            "precreated_shm_regions": precreated_shm1_regions,
                        },
                    )
                )
                threads[0].start()
                time.sleep(1)
                threads[1].start()
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                self.check_status(model_name, {3: 1, 4: 1}, 2, 7, (2,))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_multi_batch_delayed_use_max_batch(self):
        # Send three requests with first not having preferred size,
        # second being smaller than max preferred size but the sum of
        # the requests being larger than max preferred size and third
        # is sent after the first two requests exceeds the queue delay
        # and the sum of the requests to be in full batch. Use
        # TRITONSERVER_DELAY_SCHEDULER in the environment so that
        # requests can be queued up before scheduler starts
        # servicing. This should cause all response to be returned together,
        # while it appears that the first two responses to be returned
        # after being delayed and the third response to be returned immediately.
        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
            shm0_region_names = ["ip00", "ip01", "op00", "op01"]
            shm1_region_names = ["ip10", "ip11", "op10", "op11"]
            shm2_region_names = ["ip20", "ip21", "op20", "op21"]
        else:
            shm0_region_names = None
            shm1_region_names = None
            shm2_region_names = None
        precreated_shm0_regions = self.create_advance(["op00", "op01"])
        precreated_shm1_regions = self.create_advance(["op10", "op11"])
        precreated_shm2_regions = self.create_advance(["op20", "op21"])
        for trial in _trials:
            try:
                model_name = tu.get_model_name(
                    trial, np.float32, np.float32, np.float32
                )

                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)

                # Need scheduler to wait for queue to contain 3 requests
                self.assertTrue("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
                self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 3)

                threads = []
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(
                            trial,
                            3,
                            (_max_queue_delay_ms * 1.5, _max_queue_delay_ms),
                        ),
                        kwargs={
                            "shm_region_names": shm0_region_names,
                            "precreated_shm_regions": precreated_shm0_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(
                            trial,
                            4,
                            (_max_queue_delay_ms * 1.5, _max_queue_delay_ms),
                        ),
                        kwargs={
                            "shm_region_names": shm1_region_names,
                            "precreated_shm_regions": precreated_shm1_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "shm_region_names": shm2_region_names,
                            "precreated_shm_regions": precreated_shm2_regions,
                        },
                    )
                )
                threads[0].start()
                threads[1].start()
                time.sleep(11)
                threads[2].start()
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                self.check_status(model_name, {8: 1}, 3, 8, (1,))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_multi_batch_delayed_preferred_different_shape(self):
        # Send two requests with total static batch size in between
        # preferred sizes. Then send a request with a different shape
        # and a non-preferred batch size. Use
        # TRITONSERVER_DELAY_SCHEDULER in the environment so that
        # requests can be queued up before scheduler starts
        # servicing. This should cause the first two requests to be
        # immediately responded to. Send a forth request with the same
        # shape as the third that causes a preferred size so that
        # third and forth response are sent immediately.
        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
            shm0_region_names = ["ip00", "ip01", "op00", "op01"]
            shm1_region_names = ["ip10", "ip11", "op10", "op11"]
            shm2_region_names = ["ip20", "ip21", "op20", "op21"]
            shm3_region_names = ["ip30", "ip31", "op30", "op31"]
        else:
            shm0_region_names = None
            shm1_region_names = None
            shm2_region_names = None
            shm3_region_names = None
        precreated_shm0_regions = self.create_advance(["op00", "op01"])
        precreated_shm1_regions = self.create_advance(["op10", "op11"])
        precreated_shm2_regions = self.create_advance(["op20", "op21"])
        precreated_shm3_regions = self.create_advance(["op30", "op31"])
        for trial in _trials:
            try:
                model_name = tu.get_model_name(
                    trial, np.float32, np.float32, np.float32
                )

                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)

                # Need scheduler to wait for queue to contain 4 requests
                self.assertTrue("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
                self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 4)

                threads = []
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (3000, None)),
                        kwargs={
                            "shm_region_names": shm0_region_names,
                            "precreated_shm_regions": precreated_shm0_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 3, (3000, None)),
                        kwargs={
                            "shm_region_names": shm1_region_names,
                            "precreated_shm_regions": precreated_shm1_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (3000, None)),
                        kwargs={
                            "input_size": 8,
                            "shm_region_names": shm2_region_names,
                            "precreated_shm_regions": precreated_shm2_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 5, (3000, None)),
                        kwargs={
                            "input_size": 8,
                            "shm_region_names": shm3_region_names,
                            "precreated_shm_regions": precreated_shm3_regions,
                        },
                    )
                )
                threads[0].start()
                threads[1].start()
                time.sleep(1)
                threads[2].start()
                threads[3].start()
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                self.check_status(model_name, {4: 1, 6: 1}, 4, 10, (2,))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_multi_batch_use_biggest_preferred(self):
        # Send multiple requests that sum to multiple preferred sizes
        # and make sure the largest preferred size is used for the
        # batch. Use TRITONSERVER_DELAY_SCHEDULER in the environment so
        # that requests can be queued up before scheduler starts
        # servicing.
        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
            shm0_region_names = ["ip00", "ip01", "op00", "op01"]
            shm1_region_names = ["ip10", "ip11", "op10", "op11"]
            shm2_region_names = ["ip20", "ip21", "op20", "op21"]
            shm3_region_names = ["ip30", "ip31", "op30", "op31"]
            shm4_region_names = ["ip40", "ip41", "op40", "op41"]
            shm5_region_names = ["ip50", "ip51", "op50", "op51"]
        else:
            shm0_region_names = None
            shm1_region_names = None
            shm2_region_names = None
            shm3_region_names = None
            shm4_region_names = None
            shm5_region_names = None
        precreated_shm0_regions = self.create_advance(["op00", "op01"])
        precreated_shm1_regions = self.create_advance(["op10", "op11"])
        precreated_shm2_regions = self.create_advance(["op20", "op21"])
        precreated_shm3_regions = self.create_advance(["op30", "op31"])
        precreated_shm4_regions = self.create_advance(["op40", "op41"])
        precreated_shm5_regions = self.create_advance(["op50", "op51"])
        for trial in _trials:
            try:
                model_name = tu.get_model_name(
                    trial, np.float32, np.float32, np.float32
                )

                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)

                # Need scheduler to wait for queue to contain 6 request
                self.assertTrue("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
                self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 6)

                threads = []
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "shm_region_names": shm0_region_names,
                            "precreated_shm_regions": precreated_shm0_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "shm_region_names": shm1_region_names,
                            "precreated_shm_regions": precreated_shm1_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "shm_region_names": shm2_region_names,
                            "precreated_shm_regions": precreated_shm2_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "shm_region_names": shm3_region_names,
                            "precreated_shm_regions": precreated_shm3_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "shm_region_names": shm4_region_names,
                            "precreated_shm_regions": precreated_shm4_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "shm_region_names": shm5_region_names,
                            "precreated_shm_regions": precreated_shm5_regions,
                        },
                    )
                )
                for t in threads:
                    t.start()
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                self.check_status(model_name, {6: 1}, 6, 6, (1,))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_multi_batch_use_best_preferred(self):
        # Send multiple requests where the initial ones sum to a
        # preferred size and then extra request goes beyond that. The
        # initial requests should be handled immediately at the
        # preferred batch size and then the other one after
        # timeout. Use TRITONSERVER_DELAY_SCHEDULER in the environment so
        # that requests can be queued up before scheduler starts
        # servicing.
        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
            shm0_region_names = ["ip00", "ip01", "op00", "op01"]
            shm1_region_names = ["ip10", "ip11", "op10", "op11"]
            shm2_region_names = ["ip20", "ip21", "op20", "op21"]
        else:
            shm0_region_names = None
            shm1_region_names = None
            shm2_region_names = None
        precreated_shm0_regions = self.create_advance(["op00", "op01"])
        precreated_shm1_regions = self.create_advance(["op10", "op11"])
        precreated_shm2_regions = self.create_advance(["op20", "op21"])
        for trial in _trials:
            try:
                model_name = tu.get_model_name(
                    trial, np.float32, np.float32, np.float32
                )

                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)

                # Need scheduler to wait for queue to contain 3 requests
                self.assertTrue("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
                self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 3)

                threads = []
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "shm_region_names": shm0_region_names,
                            "precreated_shm_regions": precreated_shm0_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "shm_region_names": shm1_region_names,
                            "precreated_shm_regions": precreated_shm1_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(
                            trial,
                            1,
                            (_max_queue_delay_ms * 1.5, _max_queue_delay_ms),
                        ),
                        kwargs={
                            "shm_region_names": shm2_region_names,
                            "precreated_shm_regions": precreated_shm2_regions,
                        },
                    )
                )
                threads[0].start()
                threads[1].start()
                time.sleep(1)
                threads[2].start()
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                self.check_status(model_name, {2: 1, 1: 1}, 3, 3, (2,))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_multi_batch_preserve_ordering(self):
        model_base = "custom"
        dtype = np.float32
        shapes = (
            [
                1,
                1,
            ],
        )

        try:
            # use threads to send 12 requests without waiting for response
            threads = []
            for i in range(12):
                if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
                    shm_region_name_prefix = ["input" + str(i), "output" + str(i)]
                else:
                    shm_region_name_prefix = None
                threads.append(
                    threading.Thread(
                        target=iu.infer_zero,
                        args=(self, model_base, 1, dtype, shapes, shapes),
                        kwargs={
                            "use_grpc": USE_GRPC,
                            "use_http": USE_HTTP,
                            "use_http_json_tensors": False,
                            "use_streaming": False,
                            "shm_region_name_prefix": shm_region_name_prefix,
                            "use_system_shared_memory": TEST_SYSTEM_SHARED_MEMORY,
                            "use_cuda_shared_memory": TEST_CUDA_SHARED_MEMORY,
                        },
                    )
                )
            for t in threads:
                t.start()
            for t in threads:
                t.join()
            self.check_deferred_exception()
            model_name = tu.get_zero_model_name(model_base, len(shapes), dtype)
            self.check_status(model_name, {4: 3}, 12, 12, (3,))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

    def test_preferred_batch_only_aligned(self):
        # Send 4 requests with batch size 1. Use
        # TRITONSERVER_DELAY_SCHEDULER in the environment so that
        # requests can be queued up before scheduler starts
        # servicing. The batcher should form a batch of preferred
        # size 4.
        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
            shm0_region_names = ["ip00", "ip01", "op00", "op01"]
            shm1_region_names = ["ip10", "ip11", "op10", "op11"]
            shm2_region_names = ["ip20", "ip21", "op20", "op21"]
            shm3_region_names = ["ip30", "ip31", "op30", "op31"]
        else:
            shm0_region_names = None
            shm1_region_names = None
            shm2_region_names = None
            shm3_region_names = None
        precreated_shm0_regions = self.create_advance(["op00", "op01"])
        precreated_shm1_regions = self.create_advance(["op10", "op11"])
        precreated_shm2_regions = self.create_advance(["op20", "op21"])
        precreated_shm3_regions = self.create_advance(["op30", "op31"])
        for trial in _trials:
            try:
                model_name = tu.get_model_name(
                    trial, np.float32, np.float32, np.float32
                )

                self.check_setup(model_name, [4, 6], 0)

                # Need scheduler to wait for queue to contain 4 requests
                self.assertTrue("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
                self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 4)

                threads = []
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "shm_region_names": shm0_region_names,
                            "precreated_shm_regions": precreated_shm0_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "shm_region_names": shm1_region_names,
                            "precreated_shm_regions": precreated_shm1_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "shm_region_names": shm2_region_names,
                            "precreated_shm_regions": precreated_shm2_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "shm_region_names": shm3_region_names,
                            "precreated_shm_regions": precreated_shm3_regions,
                        },
                    )
                )
                for t in threads:
                    t.start()
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                self.check_status(model_name, {4: 1}, 4, 4, (1,))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_preferred_batch_only_unaligned(self):
        # Send 5 requests with batch size 1. Use
        # TRITONSERVER_DELAY_SCHEDULER in the environment so that
        # requests can be queued up before scheduler starts
        # servicing. The batcher should form a batch of preferred
        # size 4 followed by a batch of size 1.
        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
            shm0_region_names = ["ip00", "ip01", "op00", "op01"]
            shm1_region_names = ["ip10", "ip11", "op10", "op11"]
            shm2_region_names = ["ip20", "ip21", "op20", "op21"]
            shm3_region_names = ["ip30", "ip31", "op30", "op31"]
            shm4_region_names = ["ip40", "ip41", "op40", "op41"]
        else:
            shm0_region_names = None
            shm1_region_names = None
            shm2_region_names = None
            shm3_region_names = None
            shm4_region_names = None
        precreated_shm0_regions = self.create_advance(["op00", "op01"])
        precreated_shm1_regions = self.create_advance(["op10", "op11"])
        precreated_shm2_regions = self.create_advance(["op20", "op21"])
        precreated_shm3_regions = self.create_advance(["op30", "op31"])
        precreated_shm4_regions = self.create_advance(["op40", "op41"])
        for trial in _trials:
            try:
                model_name = tu.get_model_name(
                    trial, np.float32, np.float32, np.float32
                )

                self.check_setup(model_name, [4, 6], 0)

                # Need scheduler to wait for queue to contain 3 requests
                self.assertTrue("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
                self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 5)

                threads = []
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "shm_region_names": shm0_region_names,
                            "precreated_shm_regions": precreated_shm0_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "shm_region_names": shm1_region_names,
                            "precreated_shm_regions": precreated_shm1_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "shm_region_names": shm2_region_names,
                            "precreated_shm_regions": precreated_shm2_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "shm_region_names": shm3_region_names,
                            "precreated_shm_regions": precreated_shm3_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "shm_region_names": shm4_region_names,
                            "precreated_shm_regions": precreated_shm4_regions,
                        },
                    )
                )
                for t in threads:
                    t.start()
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                self.check_status(model_name, {4: 1, 1: 1}, 5, 5, (2,))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_preferred_batch_only_use_biggest_preferred(self):
        # Send 7 requests with batch size 1. Use
        # TRITONSERVER_DELAY_SCHEDULER in the environment so that
        # requests can be queued up before scheduler starts
        # servicing. The batcher should form a batch of largest preferred
        # size 6 followed by a batch of size 1.
        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
            shm0_region_names = ["ip00", "ip01", "op00", "op01"]
            shm1_region_names = ["ip10", "ip11", "op10", "op11"]
            shm2_region_names = ["ip20", "ip21", "op20", "op21"]
            shm3_region_names = ["ip30", "ip31", "op30", "op31"]
            shm4_region_names = ["ip40", "ip41", "op40", "op41"]
            shm5_region_names = ["ip50", "ip51", "op50", "op51"]
            shm6_region_names = ["ip60", "ip61", "op60", "op61"]
        else:
            shm0_region_names = None
            shm1_region_names = None
            shm2_region_names = None
            shm3_region_names = None
            shm4_region_names = None
            shm5_region_names = None
            shm6_region_names = None
        precreated_shm0_regions = self.create_advance(["op00", "op01"])
        precreated_shm1_regions = self.create_advance(["op10", "op11"])
        precreated_shm2_regions = self.create_advance(["op20", "op21"])
        precreated_shm3_regions = self.create_advance(["op30", "op31"])
        precreated_shm4_regions = self.create_advance(["op40", "op41"])
        precreated_shm5_regions = self.create_advance(["op50", "op51"])
        precreated_shm6_regions = self.create_advance(["op60", "op61"])
        for trial in _trials:
            try:
                model_name = tu.get_model_name(
                    trial, np.float32, np.float32, np.float32
                )

                self.check_setup(model_name, [4, 6], 0)

                # Need scheduler to wait for queue to contain 6 request
                self.assertTrue("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
                self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 7)

                threads = []
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "shm_region_names": shm0_region_names,
                            "precreated_shm_regions": precreated_shm0_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "shm_region_names": shm1_region_names,
                            "precreated_shm_regions": precreated_shm1_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "shm_region_names": shm2_region_names,
                            "precreated_shm_regions": precreated_shm2_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "shm_region_names": shm3_region_names,
                            "precreated_shm_regions": precreated_shm3_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "shm_region_names": shm4_region_names,
                            "precreated_shm_regions": precreated_shm4_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "shm_region_names": shm5_region_names,
                            "precreated_shm_regions": precreated_shm5_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "shm_region_names": shm6_region_names,
                            "precreated_shm_regions": precreated_shm6_regions,
                        },
                    )
                )
                for t in threads:
                    t.start()
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                self.check_status(model_name, {6: 1, 1: 1}, 7, 7, (2,))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_preferred_batch_only_use_no_preferred_size(self):
        # Send 3 requests with batch size 1. Use
        # TRITONSERVER_DELAY_SCHEDULER in the environment so that
        # requests can be queued up before scheduler starts
        # servicing. The batcher should form a batch of of 3.
        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
            shm0_region_names = ["ip00", "ip01", "op00", "op01"]
            shm1_region_names = ["ip10", "ip11", "op10", "op11"]
            shm2_region_names = ["ip20", "ip21", "op20", "op21"]
        else:
            shm0_region_names = None
            shm1_region_names = None
            shm2_region_names = None
        precreated_shm0_regions = self.create_advance(["op00", "op01"])
        precreated_shm1_regions = self.create_advance(["op10", "op11"])
        precreated_shm2_regions = self.create_advance(["op20", "op21"])
        for trial in _trials:
            try:
                model_name = tu.get_model_name(
                    trial, np.float32, np.float32, np.float32
                )

                self.check_setup(model_name, [4, 6], 0)

                # Need scheduler to wait for queue to contain 3 request
                self.assertTrue("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
                self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 3)

                threads = []
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "shm_region_names": shm0_region_names,
                            "precreated_shm_regions": precreated_shm0_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "shm_region_names": shm1_region_names,
                            "precreated_shm_regions": precreated_shm1_regions,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(trial, 1, (6000, None)),
                        kwargs={
                            "shm_region_names": shm2_region_names,
                            "precreated_shm_regions": precreated_shm2_regions,
                        },
                    )
                )
                for t in threads:
                    t.start()
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                self.check_status(model_name, {3: 1}, 3, 3, (1,))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_max_queue_delay_only_non_default(self):
        # Send 12 requests with batch size 1. The max_queue_delay is set
        # to non-zero. Depending upon the timing of the requests arrival
        # there can be either 1 or 2 model executions.
        model_base = "custom"
        dtype = np.float32
        shapes = (
            [
                1,
                1,
            ],
        )

        try:
            # use threads to send 12 requests without waiting for response
            threads = []
            for i in range(12):
                if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
                    shm_region_name_prefix = ["input" + str(i), "output" + str(i)]
                else:
                    shm_region_name_prefix = None
                threads.append(
                    threading.Thread(
                        target=iu.infer_zero,
                        args=(self, model_base, 1, dtype, shapes, shapes),
                        kwargs={
                            "use_grpc": USE_GRPC,
                            "use_http": USE_HTTP,
                            "use_http_json_tensors": False,
                            "use_streaming": False,
                            "shm_region_name_prefix": shm_region_name_prefix,
                            "use_system_shared_memory": TEST_SYSTEM_SHARED_MEMORY,
                            "use_cuda_shared_memory": TEST_CUDA_SHARED_MEMORY,
                        },
                    )
                )
            for t in threads:
                t.start()
            for t in threads:
                t.join()
            self.check_deferred_exception()
            model_name = tu.get_zero_model_name(model_base, len(shapes), dtype)
            self.check_status(model_name, None, 12, 12, (1, 2))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

    def test_max_queue_delay_only_default(self):
        # Send 12 requests with batch size 1. The max_queue_delay is set
        # to default value of 0. There should be two distinct model
        # executions. The first few requests will form a first batch
        # and the remaining requests will form the second batch.
        model_base = "custom"
        dtype = np.float32
        shapes = (
            [
                1,
                1,
            ],
        )

        try:
            # use threads to send 12 requests without waiting for response
            threads = []
            for i in range(12):
                if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
                    shm_region_name_prefix = ["input" + str(i), "output" + str(i)]
                else:
                    shm_region_name_prefix = None
                threads.append(
                    threading.Thread(
                        target=iu.infer_zero,
                        args=(self, model_base, 1, dtype, shapes, shapes),
                        kwargs={
                            "use_grpc": USE_GRPC,
                            "use_http": USE_HTTP,
                            "use_http_json_tensors": False,
                            "use_streaming": False,
                            "shm_region_name_prefix": shm_region_name_prefix,
                            "use_system_shared_memory": TEST_SYSTEM_SHARED_MEMORY,
                            "use_cuda_shared_memory": TEST_CUDA_SHARED_MEMORY,
                        },
                    )
                )
            for t in threads:
                t.start()
            for t in threads:
                t.join()
            self.check_deferred_exception()
            model_name = tu.get_zero_model_name(model_base, len(shapes), dtype)
            self.check_status(model_name, None, 12, 12, (2,))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_batcher/queue_timeout_test.py
================================================
#!/usr/bin/env python3

# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import concurrent.futures
import os
import time
import unittest

import numpy as np
import tritonclient.grpc as grpcclient
from tritonclient.utils import InferenceServerException

# By default, find tritonserver on "localhost", but for windows tests
# we overwrite the IP address with the TRITONSERVER_IPADDR envvar
_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")


class TestMaxQueueDelayTimeout(unittest.TestCase):
    def setUp(self):
        # Initialize client
        self._triton = grpcclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8001")

    def _get_inputs(self, batch_size):
        self.assertIsInstance(batch_size, int)
        self.assertGreater(batch_size, 0)
        shape = [batch_size, 8]
        inputs = [grpcclient.InferInput("INPUT0", shape, "FP32")]
        inputs[0].set_data_from_numpy(np.ones(shape, dtype=np.float32))
        return inputs

    def _generate_callback_and_response_pair(self):
        response = {"responded": False, "result": None, "error": None}

        def callback(result, error):
            response["responded"] = True
            response["result"] = result
            response["error"] = error

        return callback, response

    # Test queued requests on dynamic batch scheduler can be cancelled
    def test_default_queue_policy_timeout_prompt_response(self):
        model_name = "dynamic_batch"
        with concurrent.futures.ThreadPoolExecutor() as pool:
            # Saturate the slots on the model
            saturate_thread = pool.submit(
                self._triton.infer, model_name, self._get_inputs(batch_size=1)
            )
            time.sleep(2)  # ensure the slots are filled
            # The next request should be queued
            callback, response = self._generate_callback_and_response_pair()
            self._triton.async_infer(
                model_name, self._get_inputs(batch_size=1), callback
            )
            time.sleep(2)  # ensure the request is queued
            # Check if the request has timed-out
            time.sleep(2)  # ensure the timeout period has expired
            self.assertTrue(response["responded"])
            self.assertEqual(response["result"], None)
            self.assertIsInstance(response["error"], InferenceServerException)
            self.assertEqual(response["error"].status(), "StatusCode.UNAVAILABLE")
            self.assertEqual(response["error"].message(), "Request timeout expired")
            # Join saturating thread
            saturate_thread.result()


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_batcher/test.sh
================================================
#!/bin/bash
# Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

# Must run on a single device or else the TRITONSERVER_DELAY_SCHEDULER
# can fail when the requests are distributed to multiple devices.
export CUDA_VISIBLE_DEVICES=0

CLIENT_LOG="./client.log"
BATCHER_TEST=batcher_test.py
VERIFY_TIMESTAMPS=verify_timestamps.py
TEST_RESULT_FILE='test_results.txt'

if [ -z "$TEST_VALGRIND" ]; then
    TEST_VALGRIND="0"
fi

if [ -z "$TEST_CUDA_SHARED_MEMORY" ]; then
    TEST_CUDA_SHARED_MEMORY="0"
fi

# Add valgrind flag check
if [ "$TEST_VALGRIND" -eq 1 ]; then
    LEAKCHECK=/usr/bin/valgrind
    LEAKCHECK_ARGS_BASE="--leak-check=full --show-leak-kinds=definite --max-threads=3000"
    SERVER_TIMEOUT=3600
    rm -f *.valgrind.log

    NO_DELAY_TESTS="test_static_batch_preferred \
                        test_multi_batch_sum_gt_max_preferred \
                        test_multi_same_output0 \
                        test_multi_different_output_order"

    DELAY_TESTS="test_multi_batch_use_biggest_preferred \
                    test_multi_batch_use_best_preferred"

    DIFFERENT_SHAPE_TESTS="test_multi_batch_not_preferred_different_shape \
                                test_multi_batch_different_shape_allow_ragged"
fi

# On windows the paths invoked by the script (running in WSL) must use
# /mnt/c when needed but the paths on the tritonserver command-line
# must be C:/ style.
if [[ -v WSL_DISTRO_NAME ]] || [[ -v MSYSTEM ]]; then
    MODELDIR=${MODELDIR:=C:/models}
    DATADIR=${DATADIR:="/mnt/c/data/inferenceserver/${REPO_VERSION}"}
    BACKEND_DIR=${BACKEND_DIR:=C:/tritonserver/backends}
    SERVER=${SERVER:=/mnt/c/tritonserver/bin/tritonserver.exe}
    export WSLENV=$WSLENV:TRITONSERVER_DELAY_SCHEDULER
    TEST_WINDOWS=1
    # DLIS-7683 This test fails performance-related response time parameters
    # when using HTTP protocol. Use gRPC protocol for now as a WAR.
    export USE_GRPC=1
else
    MODELDIR=${MODELDIR:=`pwd`}
    DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
    TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
    SERVER=${TRITON_DIR}/bin/tritonserver
    BACKEND_DIR=${TRITON_DIR}/backends

    # PyTorch on SBSA requires libgomp to be loaded first. See the following
    # GitHub issue for more information:
    # https://github.com/pytorch/pytorch/issues/2575
    arch=`uname -m`
    if [ $arch = "aarch64" ]; then
      SERVER_LD_PRELOAD=/usr/lib/$(uname -m)-linux-gnu/libgomp.so.1
    fi
fi

SERVER_ARGS_EXTRA="--backend-directory=${BACKEND_DIR}"
source ../common/util.sh

RET=0

# If BACKENDS not specified, set to all
BACKENDS=${BACKENDS:="onnx libtorch plan python"}
export BACKENDS

# Basic batcher tests
NO_DELAY_TESTS=${NO_DELAY_TESTS:="test_static_batch_preferred \
                            test_static_batch_lt_any_preferred \
                            test_static_batch_not_preferred \
                            test_static_batch_gt_max_preferred \
                            test_multi_batch_not_preferred \
                            test_multi_batch_gt_max_preferred \
                            test_multi_batch_sum_gt_max_preferred \
                            test_multi_same_output0 \
                            test_multi_same_output1 \
                            test_multi_different_outputs \
                            test_multi_different_output_order"}

# Tests that use scheduler delay
DELAY_TESTS=${DELAY_TESTS:="test_multi_batch_delayed_sum_gt_max_preferred \
                        test_multi_batch_use_biggest_preferred \
                        test_multi_batch_use_best_preferred \
                        test_multi_batch_delayed_use_max_batch"}

# Tests with different shapes
DIFFERENT_SHAPE_TESTS=${DIFFERENT_SHAPE_TESTS:="test_multi_batch_not_preferred_different_shape \
                                        test_multi_batch_preferred_different_shape \
                                        test_multi_batch_different_shape_allow_ragged \
                                        test_multi_batch_different_shape"}

# Test with preferred batch sizes but default max_queue_delay
PREFERRED_BATCH_ONLY_TESTS=${PREFERRED_BATCH_ONLY_TESTS:="test_preferred_batch_only_aligned \
                                                    test_preferred_batch_only_unaligned \
                                                    test_preferred_batch_only_use_biggest_preferred \
                                                    test_preferred_batch_only_use_no_preferred_size"}

# Tests with varying delay for max queue but no preferred batch size
MAX_QUEUE_DELAY_ONLY_TESTS=${MAX_QUEUE_DELAY_ONLY_TESTS:="test_max_queue_delay_only_default \
                                                    test_max_queue_delay_only_non_default"}

# Setup non-variable-size model repository
rm -fr *.log  models && mkdir models
for BACKEND in $BACKENDS; do
    TMP_MODEL_DIR="$DATADIR/qa_model_repository/${BACKEND}_float32_float32_float32"
    if [ "$BACKEND" == "python" ]; then
        # We will be using ONNX models config.pbtxt and tweak them to make them
        # appropriate for Python backend
        onnx_model="${DATADIR}/qa_model_repository/onnx_float32_float32_float32"
        python_model=`echo $onnx_model | sed 's/onnx/python/g' | sed 's,'"$DATADIR/qa_model_repository/"',,g'`
        mkdir -p models/$python_model/1/
        cat $onnx_model/config.pbtxt | sed 's/platform:.*/backend:\ "python"/g' | sed 's/onnx/python/g' > models/$python_model/config.pbtxt
        cp $onnx_model/output0_labels.txt models/$python_model
        cp ../python_models/add_sub/model.py models/$python_model/1/
    else
        cp -r $TMP_MODEL_DIR models/.
    fi
    (cd models/$(basename $TMP_MODEL_DIR) && \
          sed -i "s/^max_batch_size:.*/max_batch_size: 8/" config.pbtxt && \
          sed -i "s/^version_policy:.*/version_policy: { specific { versions: [1] }}/" config.pbtxt && \
          echo "dynamic_batching { preferred_batch_size: [ 2, 6 ], max_queue_delay_microseconds: 10000000 }" >> config.pbtxt)
done

rm -fr preferred_batch_only_models && mkdir preferred_batch_only_models
for BACKEND in $BACKENDS; do
    TMP_MODEL_DIR="$DATADIR/qa_model_repository/${BACKEND}_float32_float32_float32"
    if [ "$BACKEND" == "python" ]; then
        # We will be using ONNX models config.pbtxt and tweak them to make them
        # appropriate for Python backend
        onnx_model="${DATADIR}/qa_model_repository/onnx_float32_float32_float32"
        python_model=`echo $onnx_model | sed 's/onnx/python/g' | sed 's,'"$DATADIR/qa_model_repository/"',,g'`
        mkdir -p preferred_batch_only_models/$python_model/1/
        cat $onnx_model/config.pbtxt | sed 's/platform:.*/backend:\ "python"/g' | sed 's/onnx/python/g' > preferred_batch_only_models/$python_model/config.pbtxt
        cp $onnx_model/output0_labels.txt preferred_batch_only_models/$python_model
        cp ../python_models/add_sub/model.py preferred_batch_only_models/$python_model/1/
    else
        cp -r $TMP_MODEL_DIR preferred_batch_only_models/.
    fi
    (cd preferred_batch_only_models/$(basename $TMP_MODEL_DIR) && \
          sed -i "s/^max_batch_size:.*/max_batch_size: 8/" config.pbtxt && \
          sed -i "s/^version_policy:.*/version_policy: { specific { versions: [1] }}/" config.pbtxt && \
          echo "dynamic_batching { preferred_batch_size: [ 4, 6 ] }" >> config.pbtxt)
done

# Setup variable-size model repository
rm -fr var_models && mkdir var_models
for BACKEND in $BACKENDS; do
    TMP_MODEL_DIR="$DATADIR/qa_variable_model_repository/${BACKEND}_float32_float32_float32"
    if [ "$BACKEND" == "python" ]; then
        # We will be using ONNX models config.pbtxt and tweak them to make them
        # appropriate for Python backend
        onnx_model="${DATADIR}/qa_variable_model_repository/onnx_float32_float32_float32"
        python_model=`echo $onnx_model | sed 's/onnx/python/g' | sed 's,'"$DATADIR/qa_variable_model_repository/"',,g'`
        mkdir -p var_models/$python_model/1/
        cat $onnx_model/config.pbtxt | sed 's/platform:.*/backend:\ "python"/g' | sed 's/onnx/python/g' > var_models/$python_model/config.pbtxt
        cp $onnx_model/output0_labels.txt var_models/$python_model
        cp ../python_models/add_sub/model.py var_models/$python_model/1/
    else
        cp -r $TMP_MODEL_DIR var_models/.
    fi
    (cd var_models/$(basename $TMP_MODEL_DIR) && \
            sed -i "s/^max_batch_size:.*/max_batch_size: 8/" config.pbtxt && \
            sed -i "s/^version_policy:.*/version_policy: { specific { versions: [1] }}/" config.pbtxt && \
            echo "dynamic_batching { preferred_batch_size: [ 2, 6 ], max_queue_delay_microseconds: 10000000 }" >> config.pbtxt)
done

for MC in `ls var_models/*/config.pbtxt`; do
    sed -i "s/16/-1/g" $MC
done

# Create allow-ragged model to variable-size model repository
cp -r ../custom_models/custom_zero_1_float32 var_models/. && \
    (cd var_models/custom_zero_1_float32 && mkdir 1 && \
        echo "instance_group [ { kind: KIND_GPU count: 1 }]" >> config.pbtxt && \
        sed -i "s/^max_batch_size:.*/max_batch_size: 8/" config.pbtxt && \
        sed -i "s/dims:.*\[.*\]/dims: \[ -1 \]/g" config.pbtxt && \
        sed -i "s/name:.*\"INPUT0\"/name: \"INPUT0\"\\nallow_ragged_batch: true/" config.pbtxt && \
        sed -i "s/^version_policy:.*/version_policy: { specific { versions: [1] }}/" config.pbtxt && \
        echo "dynamic_batching { preferred_batch_size: [ 2, 6 ], max_queue_delay_microseconds: 10000000 }" >> config.pbtxt)

if [[ $BACKENDS == *"plan"* ]]; then
    # Use nobatch model to match the ragged test requirement
    cp -r $DATADIR/qa_identity_model_repository/plan_nobatch_zero_1_float32 var_models/plan_zero_1_float32 && \
        (cd var_models/plan_zero_1_float32 && \
            sed -i "s/nobatch_//" config.pbtxt && \
            sed -i "s/^max_batch_size:.*/max_batch_size: 8/" config.pbtxt && \
            sed -i "s/name: \"INPUT0\"/name: \"INPUT0\"\\nallow_ragged_batch: true/" config.pbtxt && \
            echo "batch_output [{target_name: \"OUTPUT0\" \
                                    kind: BATCH_SCATTER_WITH_INPUT_SHAPE \
                                    source_input: \"INPUT0\" }] \
                    dynamic_batching { preferred_batch_size: [ 2, 6 ], max_queue_delay_microseconds: 10000000 }" >> config.pbtxt)
fi

if [[ $BACKENDS == *"onnx"* ]]; then
    # Use nobatch model to match the ragged test requirement
    cp -r $DATADIR/qa_identity_model_repository/onnx_nobatch_zero_1_float32 var_models/onnx_zero_1_float32 && \
        (cd var_models/onnx_zero_1_float32 && \
            sed -i "s/nobatch_//" config.pbtxt && \
            sed -i "s/^max_batch_size:.*/max_batch_size: 8/" config.pbtxt && \
            sed -i "s/name: \"INPUT0\"/name: \"INPUT0\"\\nallow_ragged_batch: true/" config.pbtxt && \
            echo "batch_output [{target_name: \"OUTPUT0\" \
                                    kind: BATCH_SCATTER_WITH_INPUT_SHAPE \
                                    source_input: \"INPUT0\" }] \
                    dynamic_batching { preferred_batch_size: [ 2, 6 ], max_queue_delay_microseconds: 10000000 }" >> config.pbtxt)
fi

if [[ $BACKENDS == *"libtorch"* ]]; then
    # Use nobatch model to match the ragged test requirement
    cp -r $DATADIR/qa_identity_model_repository/libtorch_nobatch_zero_1_float32 var_models/libtorch_zero_1_float32 && \
        (cd var_models/libtorch_zero_1_float32 && \
            sed -i "s/nobatch_//" config.pbtxt && \
            sed -i "s/^max_batch_size:.*/max_batch_size: 8/" config.pbtxt && \
            sed -i "s/name: \"INPUT__0\"/name: \"INPUT__0\"\\nallow_ragged_batch: true/" config.pbtxt && \
            echo "batch_output [{target_name: \"OUTPUT__0\" \
                                    kind: BATCH_SCATTER_WITH_INPUT_SHAPE \
                                    source_input: \"INPUT__0\" }] \
                    dynamic_batching { preferred_batch_size: [ 2, 6 ], max_queue_delay_microseconds: 10000000 }" >> config.pbtxt)
fi

# Need to launch the server for each test so that the model status is
# reset (which is used to make sure the correctly batch size was used
# for execution). Test everything with fixed-tensor-size models and
# variable-tensor-size models.

for model_type in FIXED VARIABLE; do
    export BATCHER_TYPE=$model_type
    MODEL_PATH=models && [[ "$model_type" == "VARIABLE" ]] && MODEL_PATH=var_models
    for i in $NO_DELAY_TESTS ; do
        SERVER_ARGS="--model-repository=$MODELDIR/$MODEL_PATH ${SERVER_ARGS_EXTRA}"
        SERVER_LOG="./$i.$model_type.server.log"

        if [ "$TEST_VALGRIND" -eq 1 ]; then
            LEAKCHECK_LOG="./$i.$model_type.valgrind.log"
            LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
            run_server_leakcheck
        else
            run_server
        fi

        if [ "$SERVER_PID" == "0" ]; then
            echo -e "\n***\n*** Failed to start $SERVER\n***"
            cat $SERVER_LOG
            exit 1
        fi

        echo "Test: $i, $model_type" >>$CLIENT_LOG

        set +e
        python3 $BATCHER_TEST BatcherTest.$i >>$CLIENT_LOG 2>&1
        if [ $? -ne 0 ]; then
            echo -e "\n***\n*** Test Failed\n***"
            RET=1
        else
            check_test_results $TEST_RESULT_FILE 1
            if [ $? -ne 0 ]; then
                cat $CLIENT_LOG
                echo -e "\n***\n*** Test Result Verification Failed\n***"
                RET=1
            fi
        fi
        set -e

        kill_server

        set +e
        if [ "$TEST_VALGRIND" -eq 1 ]; then
            python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
            if [ $? -ne 0 ]; then
                RET=1
            fi
        fi
        set -e
    done

    # Tests that require TRITONSERVER_DELAY_SCHEDULER so that the
    # scheduler is delayed and requests can collect in the queue.
    for i in $DELAY_TESTS ; do
        export TRITONSERVER_DELAY_SCHEDULER=6 &&
            [[ "$i" != "test_multi_batch_use_biggest_preferred" ]] && export TRITONSERVER_DELAY_SCHEDULER=3 &&
            [[ "$i" != "test_multi_batch_use_best_preferred" ]] &&
            [[ "$i" != "test_multi_batch_delayed_use_max_batch" ]] && export TRITONSERVER_DELAY_SCHEDULER=2
        SERVER_ARGS="--model-repository=$MODELDIR/$MODEL_PATH ${SERVER_ARGS_EXTRA}"
        SERVER_LOG="./$i.$model_type.server.log"

        if [ "$TEST_VALGRIND" -eq 1 ]; then
            LEAKCHECK_LOG="./$i.$model_type.valgrind.log"
            LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
            run_server_leakcheck
        else
            run_server
        fi

        if [ "$SERVER_PID" == "0" ]; then
            echo -e "\n***\n*** Failed to start $SERVER\n***"
            cat $SERVER_LOG
            exit 1
        fi

        echo "Test: $i" >>$CLIENT_LOG

        set +e
        python3 $BATCHER_TEST BatcherTest.$i >>$CLIENT_LOG 2>&1
        if [ $? -ne 0 ]; then
            echo -e "\n***\n*** Test Failed\n***"
            RET=1
        else
            check_test_results $TEST_RESULT_FILE 1
            if [ $? -ne 0 ]; then
                cat $CLIENT_LOG
                echo -e "\n***\n*** Test Result Verification Failed\n***"
                RET=1
            fi
        fi
        set -e

        unset TRITONSERVER_DELAY_SCHEDULER
        kill_server

        set +e
        if [ "$TEST_VALGRIND" -eq 1 ]; then
            python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
            if [ $? -ne 0 ]; then
                RET=1
            fi
        fi
        set -e
    done
done

export BATCHER_TYPE=VARIABLE
for i in $DIFFERENT_SHAPE_TESTS ; do
    SERVER_ARGS="--model-repository=$MODELDIR/var_models ${SERVER_ARGS_EXTRA}"
    SERVER_LOG="./$i.VARIABLE.server.log"

    if [ "$TEST_VALGRIND" -eq 1 ]; then
        LEAKCHECK_LOG="./$i.VARIABLE.valgrind.log"
        LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
        run_server_leakcheck
    else
        run_server
    fi

    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    echo "Test: $i" >>$CLIENT_LOG

    set +e
    python3 $BATCHER_TEST BatcherTest.$i >>$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Test Failed\n***"
        RET=1
    else
        check_test_results $TEST_RESULT_FILE 1
        if [ $? -ne 0 ]; then
            cat $CLIENT_LOG
            echo -e "\n***\n*** Test Result Verification Failed\n***"
            RET=1
        fi
    fi
    set -e

    kill_server

    set +e
    if [ "$TEST_VALGRIND" -eq 1 ]; then
        python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
        if [ $? -ne 0 ]; then
            RET=1
        fi
    fi
    set -e
done

# Tests that run only on the variable-size tensor models and that
# require TRITONSERVER_DELAY_SCHEDULER so that the scheduler is delayed
# and requests can collect in the queue.
export BATCHER_TYPE=VARIABLE
for i in \
        test_multi_batch_delayed_preferred_different_shape ; do
    export TRITONSERVER_DELAY_SCHEDULER=4
    SERVER_ARGS="--model-repository=$MODELDIR/var_models ${SERVER_ARGS_EXTRA}"
    SERVER_LOG="./$i.VARIABLE.server.log"

    if [ "$TEST_VALGRIND" -eq 1 ]; then
        LEAKCHECK_LOG="./$i.VARIABLE.valgrind.log"
        LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
        run_server_leakcheck
    else
        run_server
    fi

    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    echo "Test: $i" >>$CLIENT_LOG

    set +e
    python3 $BATCHER_TEST BatcherTest.$i >>$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Test Failed\n***"
        RET=1
    else
        check_test_results $TEST_RESULT_FILE 1
        if [ $? -ne 0 ]; then
            cat $CLIENT_LOG
            echo -e "\n***\n*** Test Result Verification Failed\n***"
            RET=1
        fi
    fi
    set -e

    unset TRITONSERVER_DELAY_SCHEDULER
    kill_server

    set +e
    if [ "$TEST_VALGRIND" -eq 1 ]; then
        python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
        if [ $? -ne 0 ]; then
            RET=1
        fi
    fi
    set -e
done

export BATCHER_TYPE=FIXED
for i in $PREFERRED_BATCH_ONLY_TESTS ; do
    export TRITONSERVER_DELAY_SCHEDULER=4 &&
            [[ "$i" != "test_preferred_batch_only_aligned" ]] && export TRITONSERVER_DELAY_SCHEDULER=5 &&
            [[ "$i" != "test_preferred_batch_only_unaligned" ]] && export TRITONSERVER_DELAY_SCHEDULER=7 &&
            [[ "$i" != "test_preferred_batch_only_use_biggest_preferred" ]] && export TRITONSERVER_DELAY_SCHEDULER=3
    SERVER_ARGS="--model-repository=$MODELDIR/preferred_batch_only_models ${SERVER_ARGS_EXTRA}"
    SERVER_LOG="./$i.PREFERRED_BATCH_ONLY.server.log"

    if [ "$TEST_VALGRIND" -eq 1 ]; then
        LEAKCHECK_LOG="./$i.PREFERRED_BATCH_ONLY.valgrind.log"
        LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
        run_server_leakcheck
    else
        run_server
    fi

    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    echo "Test: $i" >>$CLIENT_LOG

    set +e
    python3 $BATCHER_TEST BatcherTest.$i >>$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Test Failed\n***"
        RET=1
    else
        check_test_results $TEST_RESULT_FILE 1
        if [ $? -ne 0 ]; then
            cat $CLIENT_LOG
            echo -e "\n***\n*** Test Result Verification Failed\n***"
            RET=1
        fi
    fi
    set -e

    unset TRITONSERVER_DELAY_SCHEDULER
    kill_server

    set +e
    if [ "$TEST_VALGRIND" -eq 1 ]; then
        python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
        if [ $? -ne 0 ]; then
            RET=1
        fi
    fi
    set -e
done

# Test cases that checks the runtime batches created with max_queue_delay
# specification only.
rm -fr ./custom_models && mkdir ./custom_models && \
cp -r ../custom_models/custom_zero_1_float32 ./custom_models/. && \
mkdir -p ./custom_models/custom_zero_1_float32/1

# Provide sufficient delay to allow forming of next batch.
(cd custom_models/custom_zero_1_float32 && \
        sed -i "s/dims:.*\[.*\]/dims: \[ -1 \]/g" config.pbtxt && \
        sed -i "s/max_batch_size:.*/max_batch_size: 100/g" config.pbtxt && \
        echo "dynamic_batching { max_queue_delay_microseconds: 0}" >> config.pbtxt && \
        echo "instance_group [ { kind: KIND_GPU } ]" >> config.pbtxt && \
        echo "parameters [" >> config.pbtxt && \
        echo "{ key: \"execute_delay_ms\"; value: { string_value: \"100\" }}" >> config.pbtxt && \
        echo "]" >> config.pbtxt)

for i in $MAX_QUEUE_DELAY_ONLY_TESTS ; do
    export MAX_QUEUE_DELAY_MICROSECONDS=20000 &&
        [[ "$i" != "test_max_queue_delay_only_non_default" ]] && export MAX_QUEUE_DELAY_MICROSECONDS=0
    (cd custom_models/custom_zero_1_float32 && \
        sed -i "s/max_queue_delay_microseconds:.*\[.*\]/max_queue_delay_microseconds: ${MAX_QUEUE_DELAY_MICROSECONDS}/g" config.pbtxt )

    SERVER_ARGS="--model-repository=$MODELDIR/custom_models ${SERVER_ARGS_EXTRA}"
    SERVER_LOG="./$i.MAX_QUEUE_DELAY_ONLY.server.log"

    if [ "$TEST_VALGRIND" -eq 1 ]; then
        LEAKCHECK_LOG="./$i.MAX_QUEUE_DELAY_ONLY.valgrind.log"
        LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
        run_server_leakcheck
    else
        run_server
    fi

    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    echo "Test: $i" >>$CLIENT_LOG

    set +e
    python3 $BATCHER_TEST BatcherTest.$i >>$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Test Failed\n***"
        RET=1
    else
        check_test_results $TEST_RESULT_FILE 1
        if [ $? -ne 0 ]; then
            cat $CLIENT_LOG
            echo -e "\n***\n*** Test Result Verification Failed\n***"
            RET=1
        fi
    fi
    set -e
    kill_server
    unset MAX_QUEUE_DELAY_MICROSECONDS
    set +e
    if [ "$TEST_VALGRIND" -eq 1 ]; then
        python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
        if [ $? -ne 0 ]; then
            RET=1
        fi
    fi
    set -e
done

# Test that verify the 'preserve_ordering' option in dynamic batcher
# Run the test scheme with and without preserve ordering, verify behavior
# by comparing the "response send" timestamps.
TEST_CASE=test_multi_batch_preserve_ordering

# Skip test for Windows. Trace file concats at 8192 chars on Windows.
if  [ $TEST_WINDOWS -eq 0 ]; then
    rm -fr ./custom_models && mkdir ./custom_models && \
        cp -r ../custom_models/custom_zero_1_float32 ./custom_models/. && \
        mkdir -p ./custom_models/custom_zero_1_float32/1

    # Two instances will be created for the custom model, one delays 100 ms while
    # the other delays 400 ms
    (cd custom_models/custom_zero_1_float32 && \
            sed -i "s/dims:.*\[.*\]/dims: \[ -1 \]/g" config.pbtxt && \
            sed -i "s/max_batch_size:.*/max_batch_size: 4/g" config.pbtxt && \
            echo "dynamic_batching { preferred_batch_size: [ 4 ] }" >> config.pbtxt && \
            echo "instance_group [ { kind: KIND_GPU count: 2 }]" >> config.pbtxt && \
            echo "parameters [" >> config.pbtxt && \
            echo "{ key: \"execute_delay_ms\"; value: { string_value: \"100\" }}," >> config.pbtxt && \
            echo "{ key: \"instance_wise_delay_multiplier\"; value: { string_value: \"4\" }}" >> config.pbtxt && \
            echo "]" >> config.pbtxt)

    # enqueue 3 batches to guarantee that a large delay batch will be followed by
    # a small delay one regardless of the order issued to model instances.
    # i.e. the 3 batches will be queued: [1, 2, 3] and there are two delay instances
    # [small, large], then the distributions can be the following:
    # [1:small 2:large 3:small] or [1:large 2:small 3:*] (* depends on whether order
    # is preserved), and we only interested in the timestamps where the large delay
    # batch is followed by small delay batch
    export TRITONSERVER_DELAY_SCHEDULER=12

    # not preserve
    SERVER_ARGS="--trace-file=not_preserve.log --trace-level=MIN --trace-rate=1 --model-repository=$MODELDIR/custom_models ${SERVER_ARGS_EXTRA}"
    SERVER_LOG="./not_preserve.server.log"

    if [ "$TEST_VALGRIND" -eq 1 ]; then
        LEAKCHECK_LOG="./not_preserve.valgrind.log"
        LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
        run_server_leakcheck
    else
        run_server
    fi

    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    echo "Test: not_preserve" >>$CLIENT_LOG

    set +e
    python3 $BATCHER_TEST BatcherTest.$TEST_CASE >>$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Test Failed\n***"
        RET=1
    else
        check_test_results $TEST_RESULT_FILE 1
        if [ $? -ne 0 ]; then
            cat $CLIENT_LOG
            echo -e "\n***\n*** Test Result Verification Failed\n***"
            RET=1
        fi
    fi
    set -e

    kill_server

    set +e
    if [ "$TEST_VALGRIND" -eq 1 ]; then
        python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
        if [ $? -ne 0 ]; then
            RET=1
        fi
    fi

    python3 $VERIFY_TIMESTAMPS not_preserve.log
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Test Failed\n***"
        RET=1
    fi
    set -e

    # preserve
    (cd custom_models/custom_zero_1_float32 && \
            sed -i "s/dynamic_batching.*/dynamic_batching { preferred_batch_size: [ 4 ] preserve_ordering: true }/g" config.pbtxt)

    SERVER_ARGS="--trace-file=preserve.log --trace-level=MIN --trace-rate=1 --model-repository=$MODELDIR/custom_models  ${SERVER_ARGS_EXTRA}"
    SERVER_LOG="./preserve.server.log"

    if [ "$TEST_VALGRIND" -eq 1 ]; then
        LEAKCHECK_LOG="./preserve.valgrind.log"
        LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
        run_server_leakcheck
    else
        run_server
    fi

    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    echo "Test: preserve" >>$CLIENT_LOG

    set +e
    python3 $BATCHER_TEST BatcherTest.$TEST_CASE >>$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Test Failed\n***"
        RET=1
    else
        check_test_results $TEST_RESULT_FILE 1
        if [ $? -ne 0 ]; then
            cat $CLIENT_LOG
            echo -e "\n***\n*** Test Result Verification Failed\n***"
            RET=1
        fi
    fi
    set -e

    kill_server

    set +e
    if [ "$TEST_VALGRIND" -eq 1 ]; then
        python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
        if [ $? -ne 0 ]; then
            RET=1
        fi
    fi

    python3 $VERIFY_TIMESTAMPS -p preserve.log
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Test Failed\n***"
        RET=1
    fi
    set -e
    unset TRITONSERVER_DELAY_SCHEDULER
fi

# Test requests should be returned immediately upon timeout, without waiting for
# the next slot to be available and then returned.
rm -rf models && mkdir models
mkdir -p models/dynamic_batch/1 && (cd models/dynamic_batch && \
    echo 'backend: "identity"' >> config.pbtxt && \
    echo 'max_batch_size: 1' >> config.pbtxt && \
    echo -e 'input [{ name: "INPUT0" \n data_type: TYPE_FP32 \n dims: [ -1 ] }]' >> config.pbtxt && \
    echo -e 'output [{ name: "OUTPUT0" \n data_type: TYPE_FP32 \n dims: [ -1 ] }]' >> config.pbtxt && \
    echo -e 'instance_group [{ count: 1 \n kind: KIND_CPU }]' >> config.pbtxt && \
    echo -e 'dynamic_batching {' >> config.pbtxt && \
    echo -e '  preferred_batch_size: [ 1 ]' >> config.pbtxt && \
    echo -e '  default_queue_policy { timeout_action: REJECT \n default_timeout_microseconds: 1000000 \n max_queue_size: 8 }' >> config.pbtxt && \
    echo -e '}' >> config.pbtxt && \
    echo -e 'parameters [{ key: "execute_delay_ms" \n value: { string_value: "8000" } }]' >> config.pbtxt)

TEST_LOG="queue_timeout_test.log"
SERVER_LOG="./queue_timeout_test.server.log"

SERVER_ARGS="--model-repository=$MODELDIR/models --log-verbose=2 --backend-directory=${BACKEND_DIR}"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python queue_timeout_test.py > $TEST_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Queue Timeout Tests Failed\n***"
    cat $TEST_LOG
    RET=1
fi
set -e

kill_server

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_batcher/verify_timestamps.py
================================================
#!/usr/bin/python
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import json

FLAGS = None


def verify_timestamps(traces, preserve):
    # Order traces by id
    traces = sorted(traces, key=lambda t: t.get("id", -1))

    # Filter the trace that is not meaningful and group them by 'id'
    filtered_traces = dict()
    grpc_id_offset = 0
    for trace in traces:
        if "id" not in trace:
            continue
        # Skip GRPC traces as actual traces are not generated via GRPC,
        # thus GRPC traces are ill-formed
        if "timestamps" in trace:
            is_grpc = False
            for ts in trace["timestamps"]:
                if "GRPC" in ts["name"]:
                    is_grpc = True
                    break
            if is_grpc:
                grpc_id_offset += 1
                continue

        if trace["id"] in filtered_traces.keys():
            rep_trace = filtered_traces[trace["id"]]
            # Append the timestamp to the trace representing this 'id'
            if "timestamps" in trace:
                rep_trace["timestamps"] += trace["timestamps"]
        else:
            # Use this trace to represent this 'id'
            if "timestamps" not in trace:
                trace["timestamps"] = []
            filtered_traces[trace["id"]] = trace

    # First find the latest response complete timestamp for the batch with large delay
    large_delay_response_complete = 0
    small_delay_traces = []
    for trace_id, trace in filtered_traces.items():
        timestamps = dict()
        for ts in trace["timestamps"]:
            timestamps[ts["name"]] = ts["ns"]
        # Hardcoded delay value here (knowing large delay is 400ms)
        compute_span = timestamps["COMPUTE_END"] - timestamps["COMPUTE_START"]
        # If the 3rd batch is also processed by large delay instance, we don't
        # want to use its responses as baseline
        if trace["id"] <= (8 + grpc_id_offset) and compute_span >= 400 * 1000 * 1000:
            response_complete = timestamps["INFER_RESPONSE_COMPLETE"]
            large_delay_response_complete = max(
                large_delay_response_complete, response_complete
            )
        else:
            small_delay_traces.append(trace)

    response_request_after_large_delay_count = 0
    for trace in small_delay_traces:
        timestamps = dict()
        for ts in trace["timestamps"]:
            timestamps[ts["name"]] = ts["ns"]
        response_complete = timestamps["INFER_RESPONSE_COMPLETE"]
        if response_complete > large_delay_response_complete:
            response_request_after_large_delay_count += 1

    # Hardcoded expected count here
    print(
        "responses after large delay count: {}".format(
            response_request_after_large_delay_count
        )
    )
    if preserve:
        # If preserve ordering, there must be large delay batch followed by
        # small delay batch and thus at least 4 responses are sent after
        return 0 if response_request_after_large_delay_count >= 4 else 1
    else:
        # If not preserve ordering, the small delay batches should all be done
        # before large delay batch regardless of the ordering in scheduler
        return 0 if response_request_after_large_delay_count == 0 else 1


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-p",
        "--preserve",
        action="store_true",
        required=False,
        default=False,
        help="Timestamps is collected with preserve ordering",
    )
    parser.add_argument("file", type=argparse.FileType("r"), nargs="+")
    FLAGS = parser.parse_args()

    for f in FLAGS.file:
        trace_data = json.loads(f.read())
        exit(verify_timestamps(trace_data, FLAGS.preserve))


================================================
FILE: qa/L0_buffer_attributes/buffer_attributes_test.py
================================================
#!/usr/bin/env python3

# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import unittest

import numpy as np
import test_util as tu
import tritonclient.grpc as grpcclient
import tritonclient.http as httpclient
import tritonclient.utils.cuda_shared_memory as cudashm
from tritonclient.utils import triton_to_np_dtype


class BufferAttributesTest(tu.TestResultCollector):
    def test_buffer_attributes(self):
        model_name = "bls"

        # Infer
        clients = [
            httpclient.InferenceServerClient(url="localhost:8000"),
            grpcclient.InferenceServerClient(url="localhost:8001"),
        ]
        triton_clients = [httpclient, grpcclient]
        for i, client in enumerate(clients):
            # To make sure no shared memory regions are registered with the
            # server.
            client.unregister_system_shared_memory()
            client.unregister_cuda_shared_memory()

            triton_client = triton_clients[i]
            inputs = []
            outputs = []
            inputs.append(triton_client.InferInput("INPUT0", [1, 1000], "INT32"))

            input0_data = np.arange(start=0, stop=1000, dtype=np.int32)
            input0_data = np.expand_dims(input0_data, axis=0)

            input_byte_size = input0_data.size * input0_data.itemsize
            output_byte_size = input_byte_size

            shm_ip0_handle = cudashm.create_shared_memory_region(
                "input0_data", input_byte_size, 0
            )
            shm_op0_handle = cudashm.create_shared_memory_region(
                "output0_data", output_byte_size, 0
            )

            client.register_cuda_shared_memory(
                "input0_data",
                cudashm.get_raw_handle(shm_ip0_handle),
                0,
                input_byte_size,
            )
            client.register_cuda_shared_memory(
                "output0_data",
                cudashm.get_raw_handle(shm_op0_handle),
                0,
                input_byte_size,
            )

            cudashm.set_shared_memory_region(shm_ip0_handle, [input0_data])
            inputs[0].set_shared_memory("input0_data", input_byte_size)

            if triton_client is grpcclient:
                outputs.append(triton_client.InferRequestedOutput("OUTPUT0"))
                outputs[0].set_shared_memory("output0_data", output_byte_size)
            else:
                outputs.append(
                    triton_client.InferRequestedOutput("OUTPUT0", binary_data=True)
                )
                outputs[0].set_shared_memory("output0_data", output_byte_size)

            results = client.infer(
                model_name=model_name, inputs=inputs, outputs=outputs
            )

            output0 = results.get_output("OUTPUT0")
            self.assertIsNotNone(output0)
            if triton_client is grpcclient:
                output0_data = cudashm.get_contents_as_numpy(
                    shm_op0_handle, triton_to_np_dtype(output0.datatype), output0.shape
                )
            else:
                output0_data = cudashm.get_contents_as_numpy(
                    shm_op0_handle,
                    triton_to_np_dtype(output0["datatype"]),
                    output0["shape"],
                )
            self.assertTrue(np.all(output0_data == input0_data))


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_buffer_attributes/models/bls/1/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import triton_python_backend_utils as pb_utils


# Simple Python model that executes a BLS request on an identity model.
class TritonPythonModel:
    def execute(self, requests):
        responses = []
        for request in requests:
            # Get INPUT0
            input0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            infer_request = pb_utils.InferenceRequest(
                model_name="identity",
                requested_output_names=["OUTPUT0"],
                inputs=[input0],
            )
            infer_response = infer_request.exec()

            if infer_response.has_error():
                raise pb_utils.TritonModelException(infer_response.error().message())

            inference_response = pb_utils.InferenceResponse(
                output_tensors=[
                    pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
                ]
            )
            responses.append(inference_response)

        return responses


================================================
FILE: qa/L0_buffer_attributes/models/bls/config.pbtxt
================================================
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "bls"
backend: "python"
max_batch_size: 64
input [
 {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 1000 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 1000 ]
  }
]
instance_group [{ kind: KIND_GPU }]

parameters: {
  key: "FORCE_CPU_ONLY_INPUT_TENSORS"
  value: {
    string_value: "no"
  }
}


================================================
FILE: qa/L0_buffer_attributes/models/identity/1/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def execute(self, requests):
        """
        Identity model using DLPack in Python backend.
        """
        responses = []
        for request in requests:
            input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            out_tensor = pb_utils.Tensor.from_dlpack(
                "OUTPUT0", input_tensor.to_dlpack()
            )
            responses.append(pb_utils.InferenceResponse([out_tensor]))
        return responses


================================================
FILE: qa/L0_buffer_attributes/models/identity/config.pbtxt
================================================
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "identity"
backend: "python"
max_batch_size: 64
input [
 {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 1000 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 1000 ]
  }
]
instance_group [{ kind: KIND_GPU }]

parameters: {
  key: "FORCE_CPU_ONLY_INPUT_TENSORS"
  value: {
    string_value: "no"
  }
}


================================================
FILE: qa/L0_buffer_attributes/test.sh
================================================
#!/bin/bash
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

source ../common/util.sh

RET=0

CLIENT_LOG="./buffer_attributes_client.log"
TEST_PY=./buffer_attributes_test.py
EXPECTED_NUM_TESTS="1"
TEST_RESULT_FILE='test_results.txt'

export CUDA_VISIBLE_DEVICES=0

rm -fr *.log

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=`pwd`/models"
SERVER_LOG="./inference_server.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python $TEST_PY >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    RET=1
else
    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    cat $CLIENT_LOG
    cat $SERVER_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_client_build_variants/test.sh
================================================
#!/bin/bash
# Copyright 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# Install required dependencies for client build
apt-get update && \
apt-get install -y --no-install-recommends \
        rapidjson-dev

# Client build requires recent version of CMake (FetchContent required)
# Using CMAKE installation instruction from:: https://apt.kitware.com/
apt update -q=2 \
    && apt install -y gpg wget \
    && wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - |  tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null \
    && . /etc/os-release \
    && echo "deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ $UBUNTU_CODENAME main" | tee /etc/apt/sources.list.d/kitware.list >/dev/null \
    && apt-get update -q=2 \
    && apt-get install -y --no-install-recommends cmake=4.0.3* cmake-data=4.0.3*
cmake --version


set +e

mkdir -p /workspace/build

#
# Build without GPU support
#
TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:="http://github.com/triton-inference-server"}
TRITON_BACKEND_REPO_TAG=${TRITON_BACKEND_REPO_TAG:="main"}
TRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG:="main"}
TRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG:="main"}

export CMAKE_POLICY_VERSION_MINIMUM=3.5

(cd /workspace/build && \
        rm -fr cc-clients java-clients python-clients && \
        export CMAKE_POLICY_VERSION_MINIMUM=3.5 && \
        cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
              -DTRITON_ENABLE_CC_HTTP=ON \
              -DTRITON_ENABLE_CC_GRPC=ON \
              -DTRITON_ENABLE_PYTHON_HTTP=ON \
              -DTRITON_ENABLE_PYTHON_GRPC=ON \
              -DTRITON_ENABLE_JAVA_HTTP=ON \
              -DTRITON_ENABLE_EXAMPLES=ON \
              -DTRITON_ENABLE_TESTS=ON \
              -DTRITON_ENABLE_GPU=OFF \
              -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} \
              -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
              -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
              -DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \
              /workspace/client && \
        make -j16 cc-clients java-clients python-clients)
if [ $? -eq 0 ]; then
    echo -e "\n***\n*** No-GPU Passed\n***"
else
    echo -e "\n***\n*** No-GPU FAILED\n***"
    exit 1
fi

#
# Build without HTTP
# Skip this test for java-clients because we can only build
# java-clients with http protocol
#
(cd /workspace/build && \
        rm -fr cc-clients python-clients && \
        export CMAKE_POLICY_VERSION_MINIMUM=3.5 && \
        cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
              -DTRITON_ENABLE_CC_HTTP=OFF \
              -DTRITON_ENABLE_CC_GRPC=ON \
              -DTRITON_ENABLE_PYTHON_HTTP=OFF \
              -DTRITON_ENABLE_PYTHON_GRPC=ON \
              -DTRITON_ENABLE_EXAMPLES=ON \
              -DTRITON_ENABLE_TESTS=ON \
              -DTRITON_ENABLE_GPU=ON \
              -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} \
              -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
              -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
              -DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \
              /workspace/client && \
        make -j16 cc-clients python-clients)
if [ $? -eq 0 ]; then
    echo -e "\n***\n*** No-HTTP Passed\n***"
else
    echo -e "\n***\n*** No-HTTP FAILED\n***"
    exit 1
fi

#
# Build without GRPC
# Skip this test for java-clients because grpc protocol is not supported
#
(cd /workspace/build && \
        rm -fr cc-clients python-clients && \
        export CMAKE_POLICY_VERSION_MINIMUM=3.5 && \
        cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
              -DTRITON_ENABLE_CC_HTTP=ON \
              -DTRITON_ENABLE_CC_GRPC=OFF \
              -DTRITON_ENABLE_PYTHON_HTTP=ON \
              -DTRITON_ENABLE_PYTHON_GRPC=OFF \
              -DTRITON_ENABLE_EXAMPLES=ON \
              -DTRITON_ENABLE_TESTS=ON \
              -DTRITON_ENABLE_GPU=ON \
              -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} \
              -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
              -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
              -DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \
              /workspace/client && \
        make -j16 cc-clients python-clients)
if [ $? -eq 0 ]; then
    echo -e "\n***\n*** No-GRPC Passed\n***"
else
    echo -e "\n***\n*** No-GRPC FAILED\n***"
    exit 1
fi

set -e

echo -e "\n***\n*** Test Passed\n***"


================================================
FILE: qa/L0_client_java/test.sh
================================================
#!/bin/bash
# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:=http://github.com/triton-inference-server}
TRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG:="main"}

RET=0

rm -f *.log.*

# Get the proto files from the common repo
rm -fr common
git clone --single-branch --depth=1 -b $TRITON_COMMON_REPO_TAG \
    ${TRITON_REPO_ORGANIZATION}/common.git
cp common/protobuf/*.proto java/library/src/main/proto/.

# Compile library
(cd java/library && \
    mvn compile && \
    cp -R target/generated-sources/protobuf/java/inference ../examples/src/main/java/inference && \
    cp -r target/generated-sources/protobuf/grpc-java/inference/*.java ../examples/src/main/java/inference/)

# Build simple java and scala client example
(cd java/examples && mvn clean install)

CLIENT_LOG=`pwd`/client.log
DATADIR=`pwd`/models
SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=$DATADIR"
source ../common/util.sh

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
pushd java/examples

# Test grpc_generated simple java client example
mvn exec:java -Dexec.mainClass=clients.SimpleJavaClient -Dexec.args="localhost 8001" >> ${CLIENT_LOG}.java 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}.java
    RET=1
fi

# Test grpc_generated simple scala client example
mvn exec:java -Dexec.mainClass=clients.SimpleClient -Dexec.args="localhost 8001" >> ${CLIENT_LOG}.scala 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}.scala
    RET=1
fi

popd

# Test simple infer java client
SIMPLE_INFER_JAVA_CLIENT=../clients/SimpleInferClient.jar

pushd ../clients

java -jar ${SIMPLE_INFER_JAVA_CLIENT} >> ${CLIENT_LOG}.simple_infer_java 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}.simple_infer_java
    RET=1
fi

popd
set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_client_memory_growth/client_memory_mail.py
================================================
#!/usr/bin/env python
# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import glob
from datetime import date

import nightly_email_helper

if __name__ == "__main__":
    today = date.today().strftime("%Y-%m-%d")
    subject = "Triton Client Memory Growth " + sys.argv[1] + " Summary: " + today
    memory_graphs = glob.glob("client_memory_growth*.log")
    write_up = "<p>This test is run for both HTTP and GRPC protocols using C++ and Python test scripts. The max-allowed difference between mean and maximum memory usage is set to 10MB and 1MB for C++ and Python tests individually.</p>"
    write_up += "<p><b>&#8226 What to look for</b><br>A linear memory growth in the beginning of the graph is acceptable only when it is followed by a flat memory usage. If a linear memory growth is observed during the entire test then there is possibly a memory leak.</p>"
    html_content = (
        '<html><head></head><body><pre style="font-size:11pt;font-family:Arial, sans-serif;">'
        + write_up
        + '</pre><pre style="font-size:11pt;font-family:Consolas;">'
    )
    for mem_graph in sorted(memory_graphs):
        html_content += "\n" + mem_graph + "\n"
        with open(mem_graph, "r") as f:
            html_content += f.read() + "\n"
    html_content += "</pre></body></html>"
    nightly_email_helper.send(subject, html_content, is_html=True)


================================================
FILE: qa/L0_client_memory_growth/models/custom_identity_int32/config.pbtxt
================================================
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "custom_identity_int32"
backend: "identity"
max_batch_size: 1024
version_policy: { latest { num_versions: 1 }}
instance_group [ { kind: KIND_CPU } ]

input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ -1 ]

  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]

================================================
FILE: qa/L0_client_memory_growth/test.sh
================================================
#!/bin/bash
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

# Must run on a single device or else the TRITONSERVER_DELAY_SCHEDULER
# can fail when the requests are distributed to multiple devices.
export CUDA_VISIBLE_DEVICES=0

LEAKCHECK=/usr/bin/valgrind
LEAKCHECK_ARGS_BASE="--max-threads=3000 --tool=massif --time-unit=B"
SERVER_TIMEOUT=3600
rm -f *.log *.massif

MEMORY_GROWTH_TEST_CPP=../clients/memory_leak_test
MEMORY_GROWTH_TEST_PY=../clients/memory_growth_test.py
MASSIF_TEST=../common/check_massif_log.py

DATADIR=`pwd`/models
SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=$DATADIR"
source ../common/util.sh

# Set the number of repetitions in nightly and weekly tests
# Set the email subject for nightly and weekly tests
if [ "$TRITON_PERF_WEEKLY" == 1 ]; then
    if [ "$TRITON_PERF_LONG" == 1 ]; then
        # ~ 12 hours
        # GRPC cycles are reduced as there is high fluctuation in time spent
        REPETITION_HTTP_CPP=2220000
        REPETITION_HTTP_PY=3600000
        REPETITION_GRPC_CPP=8000000
        REPETITION_GRPC_PY=1500000
        EMAIL_SUBJECT="Weekly Long"
    else
        # Run the test for each case approximately 1.5 hours
        # All tests are run cumulatively for 7 hours
        REPETITION_HTTP_CPP=1300000
        REPETITION_HTTP_PY=2100000
        REPETITION_GRPC_CPP=6600000
        REPETITION_GRPC_PY=1000000
        EMAIL_SUBJECT="Weekly"
    fi
else
    REPETITION_CPP=100000
    REPETITION_PY=10000
    EMAIL_SUBJECT="Nightly"
fi

mkdir -p $DATADIR/custom_identity_int32/1

RET=0

# Run test for both HTTP and GRPC, not re-using client object.
for PROTOCOL in http grpc; do
    for LANG in c++ python; do
        LEAKCHECK_LOG="./valgrind.${PROTOCOL}.${LANG}.log"
        CLIENT_LOG="./client.${PROTOCOL}.${LANG}.log"
        GRAPH_LOG="./client_memory_growth.${PROTOCOL}.${LANG}.log"
        MASSIF_LOG="./${PROTOCOL}.${LANG}.massif"
        LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG --massif-out-file=$MASSIF_LOG"

        if [ "$TRITON_PERF_WEEKLY" == 1 ]; then
            if [ $PROTOCOL ==  http ]; then
                REPETITION_CPP=$REPETITION_HTTP_CPP
                REPETITION_PY=$REPETITION_HTTP_PY
            else
                REPETITION_CPP=$REPETITION_GRPC_CPP
                REPETITION_PY=$REPETITION_GRPC_PY
            fi
        fi

        run_server
        if [ "$SERVER_PID" == "0" ]; then
            echo -e "\n***\n*** Failed to start $SERVER\n***"
            cat $SERVER_LOG
            exit 1
        fi

        # MAX_ALLOWED_ALLOC is the threshold memory growth in MB
        if [ "$LANG" == "c++" ]; then
            MEMORY_GROWTH_TEST=$MEMORY_GROWTH_TEST_CPP
            MAX_ALLOWED_ALLOC="10"
            # NOTE: This test has risk of exhausting all available sockets in
            # the ephemeral port range. Re-using the same client connection
            # ("-R") can easily solve this problem. However, to cleanly separate
            # the resources used by different client objects, we create new
            # connections for each request and retry/sleep on failure to give
            # the system time to reclaim sockets after TIME_WAIT.
            # TIP: You can use the "ss -s" command to observe the socket usage.
            EXTRA_ARGS="-r ${REPETITION_CPP} -i ${PROTOCOL}"
        else
            MEMORY_GROWTH_TEST="python $MEMORY_GROWTH_TEST_PY"
            MAX_ALLOWED_ALLOC="1"
            EXTRA_ARGS="-r ${REPETITION_PY} -i ${PROTOCOL}"
        fi

        set +e
        SECONDS=0
        $LEAKCHECK $LEAKCHECK_ARGS $MEMORY_GROWTH_TEST $EXTRA_ARGS >> ${CLIENT_LOG} 2>&1
        TEST_RETCODE=$?
        TEST_DURATION=$SECONDS
        set -e
        if [ ${TEST_RETCODE} -ne 0 ]; then
            cat ${CLIENT_LOG}
            RET=1
            echo -e "\n***\n*** Test FAILED\n***"
        else
            python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
            if [ $? -ne 0 ]; then
                echo -e "\n***\n*** Memory leak detected\n***"
                RET=1
            fi

            set +e
            # Check for memory growth
            python $MASSIF_TEST $MASSIF_LOG $MAX_ALLOWED_ALLOC >> ${CLIENT_LOG}.massif 2>&1
            if [ $? -ne 0 ]; then
                echo -e "\n***\n*** Massif Test for ${PROTOCOL} ${LANG} Failed\n***"
                RET=1
            fi

            # Log test duration, the graph for memory growth and the change between Average and Max memory usage
            hrs=$(printf "%02d" $((TEST_DURATION / 3600)))
            mins=$(printf "%02d" $(((TEST_DURATION / 60) % 60)))
            secs=$(printf "%02d" $((TEST_DURATION % 60)))
            echo -e "Test Duration: $hrs:$mins:$secs (HH:MM:SS)" >> ${GRAPH_LOG}
            cat ${CLIENT_LOG}.massif
            ms_print ${MASSIF_LOG} | head -n35 >> ${GRAPH_LOG}
            cat ${GRAPH_LOG}
            set -e
        fi

        # Stop Server
        kill $SERVER_PID
        wait $SERVER_PID
    done
done

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

# Run only if both TRITON_FROM and TRITON_TO_DL are set
if [[ ! -z "$TRITON_FROM" ]] && [[ ! -z "$TRITON_TO_DL" ]]; then
    python client_memory_mail.py "$EMAIL_SUBJECT"
fi

exit $RET


================================================
FILE: qa/L0_client_nobatch/client_test.py
================================================
#!/usr/bin/env python3

# Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import unittest

import numpy as np
import test_util as tu
import tritonclient.grpc as tritongrpcclient
import tritonclient.http as tritonhttpclient
from tritonclient.utils import InferenceServerException


class ClientNoBatchTest(tu.TestResultCollector):
    def test_nobatch_request_for_batching_model(self):
        input_size = 16

        # onnx_int32_int8_int8 has a batching version with max batch size of 8.
        # The server should return an error if the batch size is not included in the
        # input shapes.
        tensor_shape = (input_size,)
        for protocol in ["http", "grpc"]:
            model_name = tu.get_model_name("onnx", np.int32, np.int8, np.int8)
            in0 = np.random.randint(low=0, high=100, size=tensor_shape, dtype=np.int32)
            in1 = np.random.randint(low=0, high=100, size=tensor_shape, dtype=np.int32)

            inputs = []
            outputs = []
            if protocol == "http":
                triton_client = tritonhttpclient.InferenceServerClient(
                    url="localhost:8000", verbose=True
                )
                inputs.append(
                    tritonhttpclient.InferInput("INPUT0", tensor_shape, "INT32")
                )
                inputs.append(
                    tritonhttpclient.InferInput("INPUT1", tensor_shape, "INT32")
                )
                outputs.append(tritonhttpclient.InferRequestedOutput("OUTPUT0"))
                outputs.append(tritonhttpclient.InferRequestedOutput("OUTPUT1"))
            else:
                triton_client = tritongrpcclient.InferenceServerClient(
                    url="localhost:8001", verbose=True
                )
                inputs.append(
                    tritongrpcclient.InferInput("INPUT0", tensor_shape, "INT32")
                )
                inputs.append(
                    tritongrpcclient.InferInput("INPUT1", tensor_shape, "INT32")
                )
                outputs.append(tritongrpcclient.InferRequestedOutput("OUTPUT0"))
                outputs.append(tritongrpcclient.InferRequestedOutput("OUTPUT1"))

            # Initialize the data
            inputs[0].set_data_from_numpy(in0)
            inputs[1].set_data_from_numpy(in1)

            try:
                _ = triton_client.infer(model_name, inputs, outputs=outputs)
                self.assertTrue(
                    False, "expected failure with no batch request for batching model"
                )
            except InferenceServerException as ex:
                pass

    def test_batch_request_for_nobatching_model(self):
        input_size = 16

        # onnx_nobatch_int32_int8_int8 is non batching version.
        # The server should return an error if the batch size dimension
        # is included in the shape
        tensor_shape = (1, input_size)
        for protocol in ["http", "grpc"]:
            model_name = tu.get_model_name("onnx_nobatch", np.int32, np.int8, np.int8)
            in0 = np.random.randint(low=0, high=100, size=tensor_shape, dtype=np.int32)
            in1 = np.random.randint(low=0, high=100, size=tensor_shape, dtype=np.int32)

            inputs = []
            outputs = []
            if protocol == "http":
                triton_client = tritonhttpclient.InferenceServerClient(
                    url="localhost:8000", verbose=True
                )
                inputs.append(
                    tritonhttpclient.InferInput("INPUT0", tensor_shape, "INT32")
                )
                inputs.append(
                    tritonhttpclient.InferInput("INPUT1", tensor_shape, "INT32")
                )
                outputs.append(tritonhttpclient.InferRequestedOutput("OUTPUT0"))
                outputs.append(tritonhttpclient.InferRequestedOutput("OUTPUT1"))
            else:
                triton_client = tritongrpcclient.InferenceServerClient(
                    url="localhost:8001", verbose=True
                )
                inputs.append(
                    tritongrpcclient.InferInput("INPUT0", tensor_shape, "INT32")
                )
                inputs.append(
                    tritongrpcclient.InferInput("INPUT1", tensor_shape, "INT32")
                )
                outputs.append(tritongrpcclient.InferRequestedOutput("OUTPUT0"))
                outputs.append(tritongrpcclient.InferRequestedOutput("OUTPUT1"))

            # Initialize the data
            inputs[0].set_data_from_numpy(in0)
            inputs[1].set_data_from_numpy(in1)

            try:
                _ = triton_client.infer(model_name, inputs, outputs=outputs)
                self.assertTrue(
                    False,
                    "expected failure with batched request for non-batching model",
                )
            except InferenceServerException as ex:
                pass

    def test_nobatch_request_for_nonbatching_model(self):
        input_size = 16

        # onnx_int32_int8_int8 has a batching version with max batch size of 8.
        # The server should return an error if the batch size is not included in the
        # input shapes.
        tensor_shape = (input_size,)
        for protocol in ["http", "grpc"]:
            model_name = tu.get_model_name("onnx_nobatch", np.int32, np.int8, np.int8)
            in0 = np.random.randint(low=0, high=100, size=tensor_shape, dtype=np.int32)
            in1 = np.random.randint(low=0, high=100, size=tensor_shape, dtype=np.int32)

            inputs = []
            outputs = []
            if protocol == "http":
                triton_client = tritonhttpclient.InferenceServerClient(
                    url="localhost:8000", verbose=True
                )
                inputs.append(
                    tritonhttpclient.InferInput("INPUT0", tensor_shape, "INT32")
                )
                inputs.append(
                    tritonhttpclient.InferInput("INPUT1", tensor_shape, "INT32")
                )
                outputs.append(tritonhttpclient.InferRequestedOutput("OUTPUT0"))
                outputs.append(tritonhttpclient.InferRequestedOutput("OUTPUT1"))
            else:
                triton_client = tritongrpcclient.InferenceServerClient(
                    url="localhost:8001", verbose=True
                )
                inputs.append(
                    tritongrpcclient.InferInput("INPUT0", tensor_shape, "INT32")
                )
                inputs.append(
                    tritongrpcclient.InferInput("INPUT1", tensor_shape, "INT32")
                )
                outputs.append(tritongrpcclient.InferRequestedOutput("OUTPUT0"))
                outputs.append(tritongrpcclient.InferRequestedOutput("OUTPUT1"))

            # Initialize the data
            inputs[0].set_data_from_numpy(in0)
            inputs[1].set_data_from_numpy(in1)

            results = triton_client.infer(model_name, inputs, outputs=outputs)

    def test_batch_request_for_batching_model(self):
        input_size = 16

        # onnx_nobatch_int32_int8_int8 is non batching version.
        # The server should return an error if the batch size dimension
        # is included in the shape
        tensor_shape = (1, input_size)
        for protocol in ["http", "grpc"]:
            model_name = tu.get_model_name("onnx", np.int32, np.int8, np.int8)
            in0 = np.random.randint(low=0, high=100, size=tensor_shape, dtype=np.int32)
            in1 = np.random.randint(low=0, high=100, size=tensor_shape, dtype=np.int32)

            inputs = []
            outputs = []
            if protocol == "http":
                triton_client = tritonhttpclient.InferenceServerClient(
                    url="localhost:8000", verbose=True
                )
                inputs.append(
                    tritonhttpclient.InferInput("INPUT0", tensor_shape, "INT32")
                )
                inputs.append(
                    tritonhttpclient.InferInput("INPUT1", tensor_shape, "INT32")
                )
                outputs.append(tritonhttpclient.InferRequestedOutput("OUTPUT0"))
                outputs.append(tritonhttpclient.InferRequestedOutput("OUTPUT1"))
            else:
                triton_client = tritongrpcclient.InferenceServerClient(
                    url="localhost:8001", verbose=True
                )
                inputs.append(
                    tritongrpcclient.InferInput("INPUT0", tensor_shape, "INT32")
                )
                inputs.append(
                    tritongrpcclient.InferInput("INPUT1", tensor_shape, "INT32")
                )
                outputs.append(tritongrpcclient.InferRequestedOutput("OUTPUT0"))
                outputs.append(tritongrpcclient.InferRequestedOutput("OUTPUT1"))

            # Initialize the data
            inputs[0].set_data_from_numpy(in0)
            inputs[1].set_data_from_numpy(in1)

            results = triton_client.infer(model_name, inputs, outputs=outputs)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_client_nobatch/test.sh
================================================
#!/bin/bash
# Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

TEST_RESULT_FILE='test_results.txt'
CLIENT_LOG="./client.log"
CLIENT_TEST=client_test.py
EXPECTED_NUM_TESTS="4"

DATADIR=/data/inferenceserver/${REPO_VERSION}
MODELDIR="${PWD}/qa_model_repository"
rm -rf ${MODELDIR} && mkdir -p ${MODELDIR} && cp -r ${DATADIR}/qa_model_repository/onnx_* ${MODELDIR}/. # Note there is a coupling in ./client_test.py
SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=${MODELDIR}"
SERVER_LOG="./inference_server.log"
source ../common/util.sh

run_server
if (( $SERVER_PID == 0 )); then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

RET=0

set +e

# python unittest seems to swallow ImportError and still return 0 exit
# code. So need to explicitly check CLIENT_LOG to make sure we see
# some running tests
rm -f $CLIENT_LOG
python $CLIENT_TEST >$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
fi

exit $RET


================================================
FILE: qa/L0_client_timeout/client_infer_timeout_test.py
================================================
#!/usr/bin/env python3

# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import queue
import socket
import unittest
from functools import partial

import numpy as np
import test_util as tu
import tritonclient.grpc as grpcclient
import tritonclient.http as httpclient
from tritonclient.utils import InferenceServerException


class UserData:
    def __init__(self):
        self._completed_requests = queue.Queue()


def callback(user_data, result, error):
    if error:
        user_data._completed_requests.put(error)
    else:
        user_data._completed_requests.put(result)


class ClientInferTimeoutTest(tu.TestResultCollector):
    def setUp(self):
        self.model_name_ = "custom_identity_int32"
        self.input0_data_ = np.array([[10]], dtype=np.int32)
        self.input0_data_byte_size_ = 32
        self.INFER_SMALL_INTERVAL = 2.0  # seconds for a timeout

    def _prepare_request(self, protocol):
        if protocol == "grpc":
            self.inputs_ = []
            self.inputs_.append(grpcclient.InferInput("INPUT0", [1, 1], "INT32"))
            self.outputs_ = []
            self.outputs_.append(grpcclient.InferRequestedOutput("OUTPUT0"))
        else:
            self.inputs_ = []
            self.inputs_.append(httpclient.InferInput("INPUT0", [1, 1], "INT32"))
            self.outputs_ = []
            self.outputs_.append(httpclient.InferRequestedOutput("OUTPUT0"))

        self.inputs_[0].set_data_from_numpy(self.input0_data_)

    def test_grpc_infer(self):
        triton_client = grpcclient.InferenceServerClient(
            url="localhost:8001", verbose=True
        )
        self._prepare_request("grpc")

        # The model is configured to take three seconds to send the
        # response. Expect an exception for small timeout values.
        with self.assertRaises(InferenceServerException) as cm:
            _ = triton_client.infer(
                model_name=self.model_name_,
                inputs=self.inputs_,
                outputs=self.outputs_,
                client_timeout=0.2,
            )
        self.assertIn("Deadline Exceeded", str(cm.exception))

        # Expect inference to pass successfully for a large timeout
        # value
        result = triton_client.infer(
            model_name=self.model_name_,
            inputs=self.inputs_,
            outputs=self.outputs_,
            client_timeout=10,
        )

        output0_data = result.as_numpy("OUTPUT0")
        self.assertTrue(np.array_equal(self.input0_data_, output0_data))

    def test_grpc_async_infer(self):
        triton_client = grpcclient.InferenceServerClient(
            url="localhost:8001", verbose=True
        )
        self._prepare_request("grpc")

        user_data = UserData()

        # The model is configured to take three seconds to send the
        # response. Expect an exception for small timeout values.
        with self.assertRaises(InferenceServerException) as cm:
            triton_client.async_infer(
                model_name=self.model_name_,
                inputs=self.inputs_,
                callback=partial(callback, user_data),
                outputs=self.outputs_,
                client_timeout=self.INFER_SMALL_INTERVAL,
            )
            data_item = user_data._completed_requests.get()
            if type(data_item) == InferenceServerException:
                raise data_item
        self.assertIn("Deadline Exceeded", str(cm.exception))

        # Expect inference to pass successfully for a large timeout
        # value
        triton_client.async_infer(
            model_name=self.model_name_,
            inputs=self.inputs_,
            callback=partial(callback, user_data),
            outputs=self.outputs_,
            client_timeout=10,
        )

        # Wait until the results are available in user_data
        data_item = user_data._completed_requests.get()
        self.assertFalse(type(data_item) == InferenceServerException)

        output0_data = data_item.as_numpy("OUTPUT0")
        self.assertTrue(np.array_equal(self.input0_data_, output0_data))

    def test_grpc_stream_infer(self):
        triton_client = grpcclient.InferenceServerClient(
            url="localhost:8001", verbose=True
        )

        self._prepare_request("grpc")
        user_data = UserData()

        # The model is configured to take three seconds to send the
        # response. Expect an exception for small timeout values.
        with self.assertRaises(InferenceServerException) as cm:
            triton_client.stop_stream()
            triton_client.start_stream(
                callback=partial(callback, user_data), stream_timeout=1
            )
            triton_client.async_stream_infer(
                model_name=self.model_name_, inputs=self.inputs_, outputs=self.outputs_
            )
            data_item = user_data._completed_requests.get()
            if type(data_item) == InferenceServerException:
                raise data_item
        self.assertIn("Deadline Exceeded", str(cm.exception))

        # Expect inference to pass successfully for a large timeout
        # value
        triton_client.stop_stream()
        triton_client.start_stream(
            callback=partial(callback, user_data), stream_timeout=100
        )

        triton_client.async_stream_infer(
            model_name=self.model_name_, inputs=self.inputs_, outputs=self.outputs_
        )
        data_item = user_data._completed_requests.get()
        triton_client.stop_stream()

        if type(data_item) == InferenceServerException:
            raise data_item
        output0_data = data_item.as_numpy("OUTPUT0")
        self.assertTrue(np.array_equal(self.input0_data_, output0_data))

    def test_http_infer(self):
        self._prepare_request("http")

        # The model is configured to take three seconds to send the
        # response. Expect an exception for small timeout values.
        with self.assertRaises(socket.timeout) as cm:
            triton_client = httpclient.InferenceServerClient(
                url="localhost:8000",
                verbose=True,
                network_timeout=self.INFER_SMALL_INTERVAL,
            )
            _ = triton_client.infer(
                model_name=self.model_name_, inputs=self.inputs_, outputs=self.outputs_
            )
        self.assertIn("timed out", str(cm.exception))

        # Expect to successfully pass with sufficiently large timeout
        triton_client = httpclient.InferenceServerClient(
            url="localhost:8000", verbose=True, connection_timeout=10.0
        )

        result = triton_client.infer(
            model_name=self.model_name_, inputs=self.inputs_, outputs=self.outputs_
        )

        output0_data = result.as_numpy("OUTPUT0")
        self.assertTrue(np.array_equal(self.input0_data_, output0_data))

    def test_http_async_infer(self):
        self._prepare_request("http")

        # The model is configured to take three seconds to send the
        # response. Expect an exception for small timeout values.
        with self.assertRaises(socket.timeout) as cm:
            triton_client = httpclient.InferenceServerClient(
                url="localhost:8000",
                verbose=True,
                network_timeout=self.INFER_SMALL_INTERVAL,
            )
            async_request = triton_client.async_infer(
                model_name=self.model_name_, inputs=self.inputs_, outputs=self.outputs_
            )
            result = async_request.get_result()
        self.assertIn("timed out", str(cm.exception))

        # Expect to successfully pass with sufficiently large timeout
        triton_client = httpclient.InferenceServerClient(
            url="localhost:8000", verbose=True, connection_timeout=10.0
        )

        async_request = triton_client.async_infer(
            model_name=self.model_name_, inputs=self.inputs_, outputs=self.outputs_
        )
        result = async_request.get_result()

        output0_data = result.as_numpy("OUTPUT0")
        self.assertTrue(np.array_equal(self.input0_data_, output0_data))


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_client_timeout/client_non_infer_timeout_test.py
================================================
#!/usr/bin/env python3

# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import unittest

import numpy as np
import test_util as tu
import tritonclient.grpc as grpcclient
from tritonclient.utils import InferenceServerException


class ClientNonInferTimeoutTest(tu.TestResultCollector):
    def setUp(self):
        self.model_name_ = "custom_identity_int32"
        self.input0_data_ = np.array([[10]], dtype=np.int32)
        self.input0_data_byte_size_ = 32
        self.SMALL_INTERVAL = 0.1  # seconds for a timeout
        self.NORMAL_INTERVAL = 5.0  # seconds for server to load then receive request

    def test_grpc_server_live(self):
        triton_client = grpcclient.InferenceServerClient(
            url="localhost:8001", verbose=True
        )
        with self.assertRaises(InferenceServerException) as cm:
            _ = triton_client.is_server_live(client_timeout=self.SMALL_INTERVAL)
        self.assertIn("Deadline Exceeded", str(cm.exception))
        self.assertTrue(
            triton_client.is_server_live(client_timeout=self.NORMAL_INTERVAL)
        )

    def test_grpc_is_server_ready(self):
        triton_client = grpcclient.InferenceServerClient(
            url="localhost:8001", verbose=True
        )
        with self.assertRaises(InferenceServerException) as cm:
            _ = triton_client.is_server_ready(client_timeout=self.SMALL_INTERVAL)
        self.assertIn("Deadline Exceeded", str(cm.exception))
        self.assertTrue(
            triton_client.is_server_ready(client_timeout=self.NORMAL_INTERVAL)
        )

    def test_grpc_is_model_ready(self):
        triton_client = grpcclient.InferenceServerClient(
            url="localhost:8001", verbose=True
        )
        with self.assertRaises(InferenceServerException) as cm:
            _ = triton_client.is_model_ready(
                model_name=self.model_name_, client_timeout=self.SMALL_INTERVAL
            )
        self.assertIn("Deadline Exceeded", str(cm.exception))
        self.assertTrue(
            triton_client.is_model_ready(
                model_name=self.model_name_, client_timeout=self.NORMAL_INTERVAL
            )
        )

    def test_grpc_get_server_metadata(self):
        triton_client = grpcclient.InferenceServerClient(
            url="localhost:8001", verbose=True
        )
        with self.assertRaises(InferenceServerException) as cm:
            _ = triton_client.get_server_metadata(client_timeout=self.SMALL_INTERVAL)
        self.assertIn("Deadline Exceeded", str(cm.exception))

        triton_client.get_server_metadata(client_timeout=self.NORMAL_INTERVAL)

    def test_grpc_get_model_metadata(self):
        triton_client = grpcclient.InferenceServerClient(
            url="localhost:8001", verbose=True
        )
        with self.assertRaises(InferenceServerException) as cm:
            _ = triton_client.get_model_metadata(
                model_name=self.model_name_, client_timeout=self.SMALL_INTERVAL
            )
        self.assertIn("Deadline Exceeded", str(cm.exception))
        triton_client.get_model_metadata(
            model_name=self.model_name_, client_timeout=self.NORMAL_INTERVAL
        )

    def test_grpc_get_model_config(self):
        triton_client = grpcclient.InferenceServerClient(
            url="localhost:8001", verbose=True
        )
        with self.assertRaises(InferenceServerException) as cm:
            _ = triton_client.get_model_config(
                model_name=self.model_name_, client_timeout=self.SMALL_INTERVAL
            )
        self.assertIn("Deadline Exceeded", str(cm.exception))
        triton_client.get_model_config(
            model_name=self.model_name_, client_timeout=self.NORMAL_INTERVAL
        )

    def test_grpc_model_repository_index(self):
        triton_client = grpcclient.InferenceServerClient(
            url="localhost:8001", verbose=True
        )
        with self.assertRaises(InferenceServerException) as cm:
            _ = triton_client.get_model_repository_index(
                client_timeout=self.SMALL_INTERVAL
            )
        self.assertIn("Deadline Exceeded", str(cm.exception))
        triton_client.get_model_repository_index(client_timeout=self.NORMAL_INTERVAL)

    def test_grpc_load_model(self):
        triton_client = grpcclient.InferenceServerClient(
            url="localhost:8001", verbose=True
        )
        triton_client.unload_model(model_name=self.model_name_)
        with self.assertRaises(InferenceServerException) as cm:
            _ = triton_client.load_model(
                model_name=self.model_name_, client_timeout=self.SMALL_INTERVAL
            )
        self.assertIn("Deadline Exceeded", str(cm.exception))
        triton_client.unload_model(
            model_name=self.model_name_, client_timeout=self.NORMAL_INTERVAL
        )
        triton_client.load_model(
            model_name=self.model_name_, client_timeout=self.NORMAL_INTERVAL
        )

    def test_grpc_unload_model(self):
        triton_client = grpcclient.InferenceServerClient(
            url="localhost:8001", verbose=True
        )
        with self.assertRaises(InferenceServerException) as cm:
            _ = triton_client.unload_model(
                model_name=self.model_name_, client_timeout=self.SMALL_INTERVAL
            )
        self.assertIn("Deadline Exceeded", str(cm.exception))
        triton_client.load_model(model_name=self.model_name_)
        triton_client.unload_model(
            model_name=self.model_name_, client_timeout=self.NORMAL_INTERVAL
        )
        triton_client.load_model(model_name=self.model_name_)

    def test_grpc_get_inference_statistics(self):
        triton_client = grpcclient.InferenceServerClient(
            url="localhost:8001", verbose=True
        )
        with self.assertRaises(InferenceServerException) as cm:
            _ = triton_client.get_inference_statistics(
                model_name=self.model_name_, client_timeout=self.SMALL_INTERVAL
            )
        self.assertIn("Deadline Exceeded", str(cm.exception))
        triton_client.get_inference_statistics(
            model_name=self.model_name_, client_timeout=self.NORMAL_INTERVAL
        )

    def test_grpc_update_trace_settings(self):
        triton_client = grpcclient.InferenceServerClient(
            url="localhost:8001", verbose=True
        )
        with self.assertRaises(InferenceServerException) as cm:
            _ = triton_client.update_trace_settings(
                model_name=self.model_name_, client_timeout=self.SMALL_INTERVAL
            )
        self.assertIn("Deadline Exceeded", str(cm.exception))
        triton_client.update_trace_settings(
            model_name=self.model_name_, client_timeout=self.NORMAL_INTERVAL
        )

    def test_grpc_get_trace_settings(self):
        triton_client = grpcclient.InferenceServerClient(
            url="localhost:8001", verbose=True
        )
        with self.assertRaises(InferenceServerException) as cm:
            _ = triton_client.get_trace_settings(
                model_name=self.model_name_, client_timeout=self.SMALL_INTERVAL
            )
        self.assertIn("Deadline Exceeded", str(cm.exception))
        triton_client.get_trace_settings(
            model_name=self.model_name_, client_timeout=self.NORMAL_INTERVAL
        )

    def test_grpc_update_log_settings(self):
        triton_client = grpcclient.InferenceServerClient(
            url="localhost:8001", verbose=True
        )
        settings = {}
        with self.assertRaises(InferenceServerException) as cm:
            _ = triton_client.update_log_settings(
                settings=settings, client_timeout=self.SMALL_INTERVAL
            )
        self.assertIn("Deadline Exceeded", str(cm.exception))
        triton_client.update_log_settings(
            settings=settings, client_timeout=self.NORMAL_INTERVAL
        )

    def test_grpc_get_log_settings(self):
        triton_client = grpcclient.InferenceServerClient(
            url="localhost:8001", verbose=True
        )
        with self.assertRaises(InferenceServerException) as cm:
            _ = triton_client.get_log_settings(
                as_json=True, client_timeout=self.SMALL_INTERVAL
            )
        self.assertIn("Deadline Exceeded", str(cm.exception))
        triton_client.get_log_settings(
            as_json=True, client_timeout=self.NORMAL_INTERVAL
        )

    def test_grpc_get_system_shared_memory_status(self):
        triton_client = grpcclient.InferenceServerClient(
            url="localhost:8001", verbose=True
        )
        with self.assertRaises(InferenceServerException) as cm:
            _ = triton_client.get_system_shared_memory_status(
                client_timeout=self.SMALL_INTERVAL
            )
        self.assertIn("Deadline Exceeded", str(cm.exception))
        triton_client.get_system_shared_memory_status(
            client_timeout=self.NORMAL_INTERVAL
        )

    def test_grpc_register_system_shared_memory(self):
        triton_client = grpcclient.InferenceServerClient(
            url="localhost:8001", verbose=True
        )
        triton_client.unregister_system_shared_memory()
        import tritonclient.utils.shared_memory as shm

        shm_ip0_handle = shm.create_shared_memory_region(
            "input0_data", "/input_simple", self.input0_data_byte_size_
        )
        shm.set_shared_memory_region(shm_ip0_handle, [self.input0_data_])
        with self.assertRaises(InferenceServerException) as cm:
            _ = triton_client.register_system_shared_memory(
                "input0_data",
                "/input_simple",
                self.input0_data_byte_size_,
                client_timeout=self.SMALL_INTERVAL,
            )
        self.assertIn("Deadline Exceeded", str(cm.exception))
        triton_client.unregister_system_shared_memory()
        triton_client.register_system_shared_memory(
            "input0_data",
            "/input_simple",
            self.input0_data_byte_size_,
            client_timeout=self.NORMAL_INTERVAL,
        )
        triton_client.unregister_system_shared_memory()

    def test_grpc_unregister_system_shared_memory(self):
        triton_client = grpcclient.InferenceServerClient(
            url="localhost:8001", verbose=True
        )
        with self.assertRaises(InferenceServerException) as cm:
            _ = triton_client.unregister_system_shared_memory(
                client_timeout=self.SMALL_INTERVAL
            )
        self.assertIn("Deadline Exceeded", str(cm.exception))
        triton_client.unregister_system_shared_memory(
            client_timeout=self.NORMAL_INTERVAL
        )

    def test_grpc_get_cuda_shared_memory_status(self):
        triton_client = grpcclient.InferenceServerClient(
            url="localhost:8001", verbose=True
        )
        with self.assertRaises(InferenceServerException) as cm:
            _ = triton_client.get_cuda_shared_memory_status(
                client_timeout=self.SMALL_INTERVAL
            )
        self.assertIn("Deadline Exceeded", str(cm.exception))
        triton_client.get_cuda_shared_memory_status(client_timeout=self.NORMAL_INTERVAL)

    def test_grpc_register_cuda_shared_memory(self):
        triton_client = grpcclient.InferenceServerClient(
            url="localhost:8001", verbose=True
        )
        import tritonclient.utils.cuda_shared_memory as cshm

        input_data = np.array([[10]], dtype=np.int32)
        byteSize = input_data.itemsize * input_data.size
        shm_op0_handle = cshm.create_shared_memory_region(
            "dummy_data", byte_size=byteSize, device_id=0
        )
        cshm.set_shared_memory_region(shm_op0_handle, [input_data])
        with self.assertRaises(InferenceServerException) as cm:
            _ = triton_client.register_cuda_shared_memory(
                "dummy_data",
                cshm.get_raw_handle(shm_op0_handle),
                device_id=0,
                byte_size=byteSize,
                client_timeout=self.SMALL_INTERVAL,
            )
        self.assertIn("Deadline Exceeded", str(cm.exception))
        triton_client.unregister_cuda_shared_memory()
        triton_client.register_cuda_shared_memory(
            "dummy_data",
            cshm.get_raw_handle(shm_op0_handle),
            device_id=0,
            byte_size=byteSize,
            client_timeout=self.NORMAL_INTERVAL,
        )
        cshm.destroy_shared_memory_region(shm_op0_handle)

    def test_grpc_unregister_cuda_shared_memory(self):
        triton_client = grpcclient.InferenceServerClient(
            url="localhost:8001", verbose=True
        )
        with self.assertRaises(InferenceServerException) as cm:
            _ = triton_client.unregister_cuda_shared_memory(
                client_timeout=self.SMALL_INTERVAL
            )
        self.assertIn("Deadline Exceeded", str(cm.exception))
        triton_client.unregister_cuda_shared_memory(client_timeout=self.NORMAL_INTERVAL)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_client_timeout/models/custom_identity_int32/config.pbtxt
================================================
# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "custom_identity_int32"
backend: "identity"
max_batch_size: 1024
version_policy: { latest { num_versions: 1 }}
instance_group [ { kind: KIND_CPU } ]

input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ -1 ]

  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]

parameters [
  {
    key: "execute_delay_ms"
    value: { string_value: "3000" }
  }
]


================================================
FILE: qa/L0_client_timeout/test.sh
================================================
#!/bin/bash
# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0
TIMEOUT_VALUE=100000000
SHORT_TIMEOUT_VALUE=1000
RET=0

CLIENT_INFER_TIMEOUT_TEST=client_infer_timeout_test.py
CLIENT_NON_INFER_TIMEOUT_TEST=client_non_infer_timeout_test.py
CLIENT_TIMEOUT_TEST_CPP=../clients/client_timeout_test
TEST_RESULT_FILE='test_results.txt'

rm -f *.log
rm -f *.log.*

CLIENT_LOG=`pwd`/client.log
CLIENT_GRPC_TIMEOUTS_LOG=`pwd`/client.log.grpc
DATADIR=`pwd`/models
SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=$DATADIR --model-control-mode=explicit --load-model=custom_identity_int32 --log-verbose 2"
source ../common/util.sh

mkdir -p $DATADIR/custom_identity_int32/1

# Test all APIs apart from Infer.
export TRITONSERVER_SERVER_DELAY_GRPC_RESPONSE_SEC=2
run_server
if [ $? -eq 1 ]; then
    echo -e "\n***\n*** Test Failed: GRPC non-infer APIs\n***"
    RET=1
fi
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
# Expect timeout for everything
$CLIENT_TIMEOUT_TEST_CPP -t $SHORT_TIMEOUT_VALUE -v -i grpc -p >> ${CLIENT_LOG}.c++.grpc_non_infer_apis 2>&1
if [ `grep -c "Deadline Exceeded" ${CLIENT_LOG}.c++.grpc_non_infer_apis` != "18" ]; then
    cat ${CLIENT_LOG}.c++.grpc_non_infer_apis
    echo -e "\n***\n*** Test Failed. Expected 18 failed\n***"
    RET=1
fi
# Test all APIs with long timeout
$CLIENT_TIMEOUT_TEST_CPP -t $TIMEOUT_VALUE -v -i grpc -p >> ${CLIENT_LOG} 2>&1
if [ $? -eq 0 ]; then
    echo -e "\n***\n*** Test Failed: GRPC non-infer APIs\n***"
    RET=1
fi

set -e
kill $SERVER_PID
wait $SERVER_PID

# Test infer APIs
unset TRITONSERVER_SERVER_DELAY_GRPC_RESPONSE_SEC
SERVER_ARGS="--model-repository=$DATADIR --log-verbose 2"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi
set +e

# CASE 1: Provide too small a timeout and expect a failure.
# Note, the custom_identity_int32 is configured with a delay
# of 3 sec.
# Test request timeout in grpc synchronous inference
$CLIENT_TIMEOUT_TEST_CPP -t $SHORT_TIMEOUT_VALUE -v -i grpc >> ${CLIENT_LOG}.c++.grpc_infer 2>&1
if [ $? -eq 0 ]; then
    RET=1
fi
if [ `grep -c "Deadline Exceeded" ${CLIENT_LOG}.c++.grpc_infer` != "1" ]; then
    cat ${CLIENT_LOG}.c++.grpc_infer
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

# Test request timeout in grpc asynchronous inference
$CLIENT_TIMEOUT_TEST_CPP -t $SHORT_TIMEOUT_VALUE -v -i grpc -a >> ${CLIENT_LOG}.c++.grpc_async_infer 2>&1
if [ $? -eq 0 ]; then
    RET=1
fi
if [ `grep -c "Deadline Exceeded" ${CLIENT_LOG}.c++.grpc_async_infer` != "1" ]; then
    cat ${CLIENT_LOG}.c++.grpc_async_infer
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

# Test stream timeout in grpc asynchronous streaming inference
$CLIENT_TIMEOUT_TEST_CPP -t $SHORT_TIMEOUT_VALUE -v -i grpc -s >> ${CLIENT_LOG}.c++.grpc_async_stream_infer 2>&1
if [ $? -eq 0 ]; then
    RET=1
fi
if [ `grep -c "Stream has been closed" ${CLIENT_LOG}.c++.grpc_async_stream_infer` != "1" ]; then
    cat ${CLIENT_LOG}.c++.grpc_async_stream_infer
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

# Test request timeout in http synchronous inference
$CLIENT_TIMEOUT_TEST_CPP -t $SHORT_TIMEOUT_VALUE -v >> ${CLIENT_LOG}.c++.http_infer 2>&1
if [ $? -eq 0 ]; then
    RET=1
fi
if [ `grep -c "Deadline Exceeded" ${CLIENT_LOG}.c++.http_infer` == "0" ]; then
    cat ${CLIENT_LOG}.c++.http_infer
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi


# Test request timeout in http asynchronous inference
$CLIENT_TIMEOUT_TEST_CPP -t $SHORT_TIMEOUT_VALUE -v -a >> ${CLIENT_LOG}.c++.http_async_infer 2>&1
if [ $? -eq 0 ]; then
    RET=1
fi
if [ `grep -c "Deadline Exceeded" ${CLIENT_LOG}.c++.http_async_infer` == "0" ]; then
    cat ${CLIENT_LOG}.c++.http_async_infer
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

set -e

if [ $RET -eq 1 ]; then
    # Return if CASE 1 failed
    kill $SERVER_PID
    wait $SERVER_PID
    exit $RET
fi


# CASE 2: Provide sufficiently large timeout value
set +e

echo "TEST:  GRPC Synchronous" >> ${CLIENT_LOG}
$CLIENT_TIMEOUT_TEST_CPP -t $TIMEOUT_VALUE -v -i grpc >> ${CLIENT_LOG} 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed: GRPC Synchronous\n***"
    RET=1
fi

echo "TEST:  GRPC Asynchronous" >> ${CLIENT_LOG}
$CLIENT_TIMEOUT_TEST_CPP -t $TIMEOUT_VALUE -v -i grpc -a >> ${CLIENT_LOG}.c++.grpc_async_infer 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed: GRPC Asynchronous\n***"
    RET=1
fi

echo "TEST:  GRPC Streaming" >> ${CLIENT_LOG}
$CLIENT_TIMEOUT_TEST_CPP -t $TIMEOUT_VALUE -v -i grpc -s >> ${CLIENT_LOG}.c++.grpc_async_stream_infer 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed: GRPC Streaming\n***"
    RET=1
fi

echo "TEST:  HTTP Synchronous" >> ${CLIENT_LOG}
$CLIENT_TIMEOUT_TEST_CPP -t $TIMEOUT_VALUE -v >> ${CLIENT_LOG}.c++.http_infer 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed: HTTP Synchronous\n***"
    RET=1
fi

echo "TEST:  HTTP Asynchronous" >> ${CLIENT_LOG}
$CLIENT_TIMEOUT_TEST_CPP -t $TIMEOUT_VALUE -v -a >> ${CLIENT_LOG}.c++.http_async_infer 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed: HTTP Asynchronous\n***"
    RET=1
fi

echo "TEST:  Python Library" >> ${CLIENT_LOG}

# CASE 3: Python Library

for i in test_grpc_infer \
    test_grpc_async_infer \
    test_grpc_stream_infer \
    test_http_infer \
    test_http_async_infer \
   ; do
    python $CLIENT_INFER_TIMEOUT_TEST ClientInferTimeoutTest.$i >>$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
            echo -e "\n***\n*** Test $i Failed\n***"
            RET=1
    else
        check_test_results $TEST_RESULT_FILE 1
        if [ $? -ne 0 ]; then
            cat $CLIENT_LOG
            echo -e "\n***\n*** Test Result Verification Failed\n***"
            RET=1
        fi
    fi
done

set -e
kill $SERVER_PID
wait $SERVER_PID

# Test all APIs other than infer
export TRITONSERVER_SERVER_DELAY_GRPC_RESPONSE_SEC=2
SERVER_ARGS="${SERVER_ARGS} --model-control-mode=explicit --load-model=custom_identity_int32 --log-verbose 2"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi
set +e

python $CLIENT_NON_INFER_TIMEOUT_TEST >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
    echo -e "\n***\n*** Test $i Failed\n***"
    RET=1
fi

set -e
kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    cat ${CLIENT_LOG}
    echo -e "\n***\n*** Test FAILED\n***"
fi

set +e
exit $RET


================================================
FILE: qa/L0_client_valgrind/models/custom_identity_int32/config.pbtxt
================================================
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "custom_identity_int32"
backend: "identity"
max_batch_size: 1024
version_policy: { latest { num_versions: 1 }}
instance_group [ { kind: KIND_CPU } ]

input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ -1 ]

  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]

================================================
FILE: qa/L0_client_valgrind/test.sh
================================================
#!/bin/bash
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

# Must run on a single device or else the TRITONSERVER_DELAY_SCHEDULER
# can fail when the requests are distributed to multiple devices.
export CUDA_VISIBLE_DEVICES=0

LEAKCHECK=/usr/bin/valgrind
LEAKCHECK_ARGS_BASE="--leak-check=full --show-leak-kinds=definite --max-threads=3000"
SERVER_TIMEOUT=3600
rm -f *.log

MEMORY_GROWTH_TEST=../clients/memory_leak_test

DATADIR=`pwd`/models
SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=$DATADIR"
source ../common/util.sh

mkdir -p $DATADIR/custom_identity_int32/1

RET=0

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

# Run test for both HTTP and GRPC, re-using and not re-using client object.
# 1000 inferences in each case.
EXTRA_ARGS="-r 1000"
for PROTOCOL in http grpc; do
    for REUSE in reuse no_reuse; do
        LEAKCHECK_LOG="./valgrind.${PROTOCOL}.${REUSE}.c++.log"
        CLIENT_LOG="./client.${PROTOCOL}.${REUSE}.c++.log"
        LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
        if [ "$REUSE" == "reuse" ]; then
            EXTRA_CLIENT_ARGS="${EXTRA_ARGS} -i ${PROTOCOL} -R"
        else
            EXTRA_CLIENT_ARGS="${EXTRA_ARGS} -i ${PROTOCOL}"
        fi

        $LEAKCHECK $LEAKCHECK_ARGS $MEMORY_GROWTH_TEST $EXTRA_CLIENT_ARGS >> ${CLIENT_LOG} 2>&1
        if [ $? -ne 0 ]; then
            cat ${CLIENT_LOG}
            RET=1
            echo -e "\n***\n*** Test FAILED\n***"
        else
            python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
            if [ $? -ne 0 ]; then
                echo -e "\n***\n*** Memory leak detected\n***"
                RET=1
            fi
        fi
    done
done

# Stop Server
kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_cmdline_trace/test.sh
================================================
#!/bin/bash
# Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# ============================= Helpers =======================================
function assert_server_startup_failed() {
  if [ "$SERVER_PID" != "0" ]; then
      echo -e "\n***\n***Fail: Server start should have failed $SERVER\n***"
      cat $SERVER_LOG
      set -e
      kill $SERVER_PID
      wait $SERVER_PID
      set +e
      exit 1
  fi
}

TRACE_SUMMARY=../common/trace_summary.py
CLIENT_SCRIPT=trace_client.py

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository
ENSEMBLEDIR=$DATADIR/../qa_ensemble_model_repository/qa_model_repository/
MODELBASE=onnx_int32_int32_int32

MODELSDIR=`pwd`/trace_models

SERVER=/opt/tritonserver/bin/tritonserver
source ../common/util.sh

rm -f *.log
rm -fr $MODELSDIR && mkdir -p $MODELSDIR

# set up simple model using MODELBASE, this test needs gradually update as
# backends are ported to use backend API as backend API not yet support tracing.
rm -fr $MODELSDIR && mkdir -p $MODELSDIR && \
    cp -r $DATADIR/$MODELBASE $MODELSDIR/simple && \
    rm -r $MODELSDIR/simple/2 && rm -r $MODELSDIR/simple/3 && \
    (cd $MODELSDIR/simple && \
            sed -i "s/^name:.*/name: \"simple\"/" config.pbtxt)

RET=0

# trace-level=OFF make sure no tracing
SERVER_ARGS="--trace-file=trace_off.log --trace-level=OFF --trace-rate=1 --model-repository=$MODELSDIR"
SERVER_LOG="./inference_server_off.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

for p in {1..10}; do
    python3 $CLIENT_SCRIPT -i grpc -u localhost:8001 >> client_off.log 2>&1
    if [ $? -ne 0 ]; then
        RET=1
    fi

    python3 $CLIENT_SCRIPT -i http -u localhost:8000 >> client_off.log 2>&1
    if [ $? -ne 0 ]; then
        RET=1
    fi
done

set -e

kill $SERVER_PID
wait $SERVER_PID

set +e

if [ -f ./trace_off.log ]; then
    echo -e "\n***\n*** Test Failed, unexpected generation of trace_off.log\n***"
    RET=1
fi

set -e

# trace-rate == 1, trace-level=MIN make sure every request is traced
SERVER_ARGS="--trace-file=trace_min.log --trace-level=MIN --trace-rate=1 --model-repository=$MODELSDIR"
SERVER_LOG="./inference_server_min.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

for p in {1..10}; do
    python3 $CLIENT_SCRIPT -i grpc -u localhost:8001 >> client_min.log 2>&1
    if [ $? -ne 0 ]; then
        RET=1
    fi

    python3 $CLIENT_SCRIPT -i http -u localhost:8000 >> client_min.log 2>&1
    if [ $? -ne 0 ]; then
        RET=1
    fi
done

set -e

kill $SERVER_PID
wait $SERVER_PID

set +e

$TRACE_SUMMARY -t trace_min.log > summary_min.log

if [ `grep -c "COMPUTE_INPUT_END" summary_min.log` != "20" ]; then
    cat summary_min.log
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

if [ `grep -c ^simple summary_min.log` != "20" ]; then
    cat summary_min.log
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

set -e

# trace-rate == 9, trace-level=MAX
SERVER_ARGS="--http-thread-count=1 --trace-file=trace_max.log \
             --trace-level=MAX --trace-rate=9 --model-repository=$MODELSDIR"
SERVER_LOG="./inference_server_max.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

for p in {1..10}; do
    python3 $CLIENT_SCRIPT -i grpc -u localhost:8001 >> client_max.log 2>&1
    if [ $? -ne 0 ]; then
        RET=1
    fi

    python3 $CLIENT_SCRIPT -i http -u localhost:8000 >> client_max.log 2>&1
    if [ $? -ne 0 ]; then
        RET=1
    fi
done

set -e

kill $SERVER_PID
wait $SERVER_PID

set +e

$TRACE_SUMMARY -t trace_max.log > summary_max.log

if [ `grep -c "COMPUTE_INPUT_END" summary_max.log` != "2" ]; then
    cat summary_max.log
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

if [ `grep -c ^simple summary_max.log` != "2" ]; then
    cat summary_max.log
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

set -e

# trace-rate == 1, trace-level=TIMESTAMPS make sure every request is traced
SERVER_ARGS="--trace-file=trace_1.log --trace-level=TIMESTAMPS --trace-rate=1 --model-repository=$MODELSDIR"
SERVER_LOG="./inference_server_1.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

for p in {1..10}; do
    python3 $CLIENT_SCRIPT -i grpc -u localhost:8001 >> client_1.log 2>&1
    if [ $? -ne 0 ]; then
        RET=1
    fi

    python3 $CLIENT_SCRIPT -i http -u localhost:8000 >> client_1.log 2>&1
    if [ $? -ne 0 ]; then
        RET=1
    fi
done

set -e

kill $SERVER_PID
wait $SERVER_PID

set +e

$TRACE_SUMMARY -t trace_1.log > summary_1.log

if [ `grep -c "COMPUTE_INPUT_END" summary_1.log` != "20" ]; then
    cat summary_1.log
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

if [ `grep -c ^simple summary_1.log` != "20" ]; then
    cat summary_1.log
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

set -e

# trace-rate == 6, trace-level=TIMESTAMPS
SERVER_ARGS="--http-thread-count=1 --trace-file=trace_6.log \
             --trace-level=TIMESTAMPS --trace-rate=6 --model-repository=$MODELSDIR"
SERVER_LOG="./inference_server_6.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

for p in {1..10}; do
    python3 $CLIENT_SCRIPT -i grpc -u localhost:8001 >> client_6.log 2>&1
    if [ $? -ne 0 ]; then
        RET=1
    fi

    python3 $CLIENT_SCRIPT -i http -u localhost:8000 >> client_6.log 2>&1
    if [ $? -ne 0 ]; then
        RET=1
    fi
done

set -e

kill $SERVER_PID
wait $SERVER_PID

set +e

$TRACE_SUMMARY -t trace_6.log > summary_6.log

if [ `grep -c "COMPUTE_INPUT_END" summary_6.log` != "3" ]; then
    cat summary_6.log
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

if [ `grep -c ^simple summary_6.log` != "3" ]; then
    cat summary_6.log
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

set -e

# trace-rate == 6, trace-level=TIMESTAMPS, trace-log-frequency == 2
SERVER_ARGS="--http-thread-count=1 --trace-file=trace_frequency.log \
             --trace-level=TIMESTAMPS --trace-rate=6 \
             --trace-log-frequency=2 --model-repository=$MODELSDIR"
SERVER_LOG="./inference_server_frequency.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

for p in {1..10}; do
    python3 $CLIENT_SCRIPT -i grpc -u localhost:8001 >> client_frequency.log 2>&1
    if [ $? -ne 0 ]; then
        RET=1
    fi

    python3 $CLIENT_SCRIPT -i http -u localhost:8000 >> client_frequency.log 2>&1
    if [ $? -ne 0 ]; then
        RET=1
    fi
done

set -e

kill $SERVER_PID
wait $SERVER_PID

set +e

# Two trace files
$TRACE_SUMMARY -t trace_frequency.log.0 > summary_frequency.log.0
if [ `grep -c "COMPUTE_INPUT_END" summary_frequency.log.0` != "2" ]; then
    cat summary_frequency.log.0
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

if [ `grep -c ^simple summary_frequency.log.0` != "2" ]; then
    cat summary_frequency.log.0
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

$TRACE_SUMMARY -t trace_frequency.log.1 > summary_frequency.log.1
if [ `grep -c "COMPUTE_INPUT_END" summary_frequency.log.1` != "1" ]; then
    cat summary_frequency.log.1
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

if [ `grep -c ^simple summary_frequency.log.1` != "1" ]; then
    cat summary_frequency.log.1
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

set -e

# trace-rate == 9, trace-level=TIMESTAMPS
SERVER_ARGS="--http-thread-count=1 --trace-file=trace_9.log \
             --trace-level=TIMESTAMPS --trace-rate=9 --model-repository=$MODELSDIR"
SERVER_LOG="./inference_server_9.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

for p in {1..10}; do
    python3 $CLIENT_SCRIPT -i grpc -u localhost:8001 >> client_9.log 2>&1
    if [ $? -ne 0 ]; then
        RET=1
    fi

    python3 $CLIENT_SCRIPT -i http -u localhost:8000 >> client_9.log 2>&1
    if [ $? -ne 0 ]; then
        RET=1
    fi
done

set -e

kill $SERVER_PID
wait $SERVER_PID

set +e

$TRACE_SUMMARY -t trace_9.log > summary_9.log

if [ `grep -c "COMPUTE_INPUT_END" summary_9.log` != "2" ]; then
    cat summary_9.log
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

if [ `grep -c ^simple summary_9.log` != "2" ]; then
    cat summary_9.log
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

set -e

# Demonstrate trace for ensemble
# set up "addsub" nested ensemble
rm -fr $MODELSDIR && mkdir -p $MODELSDIR && \
    cp -r $DATADIR/$MODELBASE $MODELSDIR/$MODELBASE && \
    rm -r $MODELSDIR/$MODELBASE/2 && rm -r $MODELSDIR/$MODELBASE/3

# nested ensemble
mkdir -p $MODELSDIR/fan_$MODELBASE/1 && \
    cp $ENSEMBLEDIR/fan_$MODELBASE/config.pbtxt $MODELSDIR/fan_$MODELBASE/. && \
        (cd $MODELSDIR/fan_$MODELBASE && \
                sed -i "s/label_filename:.*//" config.pbtxt)

mkdir -p $MODELSDIR/simple/1 && \
    cp $ENSEMBLEDIR/fan_$MODELBASE/config.pbtxt $MODELSDIR/simple/. && \
        (cd $MODELSDIR/simple && \
                sed -i "s/^name:.*/name: \"simple\"/" config.pbtxt && \
                sed -i "s/$MODELBASE/fan_$MODELBASE/" config.pbtxt && \
                sed -i "s/label_filename:.*//" config.pbtxt)

cp -r $ENSEMBLEDIR/nop_TYPE_INT32_-1 $MODELSDIR/. && \
    mkdir -p $MODELSDIR/nop_TYPE_INT32_-1/1

# trace-rate == 1, trace-level=TIMESTAMPS
SERVER_ARGS="--http-thread-count=1 --trace-file=trace_ensemble.log \
             --trace-level=TIMESTAMPS --trace-rate=1 --model-repository=$MODELSDIR"
SERVER_LOG="./inference_server_ensemble.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

python3 $CLIENT_SCRIPT -i http -u localhost:8000 >> client_ensemble.log 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

set +e

$TRACE_SUMMARY -t trace_ensemble.log > summary_ensemble.log

# Check if the traces are captured with proper hierarchy
if [ `grep -c "COMPUTE_INPUT_END" summary_ensemble.log` != "7" ]; then
    echo -e "Ensemble trace log expects 7 compute"
    RET=1
fi

for trace_str in \
        "{\"id\":1,\"model_name\":\"simple\",\"model_version\":1,\"request_id\":\"1\"}" \
        "{\"id\":2,\"model_name\":\"nop_TYPE_INT32_-1\",\"model_version\":1,\"request_id\":\"1\",\"parent_id\":1}" \
        "{\"id\":3,\"model_name\":\"fan_${MODELBASE}\",\"model_version\":1,\"request_id\":\"1\",\"parent_id\":1}" \
        "{\"id\":4,\"model_name\":\"nop_TYPE_INT32_-1\",\"model_version\":1,\"request_id\":\"1\",\"parent_id\":3}" \
        "{\"id\":5,\"model_name\":\"${MODELBASE}\",\"model_version\":1,\"request_id\":\"1\",\"parent_id\":3}" \
        "{\"id\":6,\"model_name\":\"nop_TYPE_INT32_-1\",\"model_version\":1,\"request_id\":\"1\",\"parent_id\":3}" \
        "{\"id\":7,\"model_name\":\"nop_TYPE_INT32_-1\",\"model_version\":1,\"request_id\":\"1\",\"parent_id\":3}" \
        "{\"id\":8,\"model_name\":\"nop_TYPE_INT32_-1\",\"model_version\":1,\"request_id\":\"1\",\"parent_id\":1}" \
        "{\"id\":9,\"model_name\":\"nop_TYPE_INT32_-1\",\"model_version\":1,\"request_id\":\"1\",\"parent_id\":1}" ; do
    if [ `grep -c ${trace_str} trace_ensemble.log` != "1" ]; then
        echo -e "Ensemble trace log expects trace: ${trace_str}"
        RET=1
    fi
done

if [ `grep -c ^simple summary_ensemble.log` != "1" ]; then
    cat summary_ensemble.log
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

set -e


# trace-rate == 1, trace-level=TIMESTAMPS, trace-level=TENSORS
SERVER_ARGS="--http-thread-count=1 --trace-file=trace_ensemble_tensor.log \
             --trace-level=TIMESTAMPS --trace-level=TENSORS --trace-rate=1 --model-repository=$MODELSDIR"
SERVER_LOG="./inference_server_ensemble_tensor.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

python3 $CLIENT_SCRIPT -i http -u localhost:8000 >> client_ensemble_tensor.log 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

set +e

$TRACE_SUMMARY -t trace_ensemble_tensor.log > summary_ensemble_tensor.log

# Check if the traces are captured with proper hierarchy
if [ `grep -c "COMPUTE_INPUT_END" summary_ensemble_tensor.log` != "7" ]; then
    echo -e "Ensemble trace tensors log expects 7 compute"
    RET=1
fi
for trace_str in \
        "{\"id\":1,\"model_name\":\"simple\",\"model_version\":1,\"request_id\":\"1\"}" \
        "{\"id\":2,\"model_name\":\"nop_TYPE_INT32_-1\",\"model_version\":1,\"request_id\":\"1\",\"parent_id\":1}" \
        "{\"id\":3,\"model_name\":\"fan_${MODELBASE}\",\"model_version\":1,\"request_id\":\"1\",\"parent_id\":1}" \
        "{\"id\":4,\"model_name\":\"nop_TYPE_INT32_-1\",\"model_version\":1,\"request_id\":\"1\",\"parent_id\":3}" \
        "{\"id\":5,\"model_name\":\"${MODELBASE}\",\"model_version\":1,\"request_id\":\"1\",\"parent_id\":3}" \
        "{\"id\":6,\"model_name\":\"nop_TYPE_INT32_-1\",\"model_version\":1,\"request_id\":\"1\",\"parent_id\":3}" \
        "{\"id\":7,\"model_name\":\"nop_TYPE_INT32_-1\",\"model_version\":1,\"request_id\":\"1\",\"parent_id\":3}" \
        "{\"id\":8,\"model_name\":\"nop_TYPE_INT32_-1\",\"model_version\":1,\"request_id\":\"1\",\"parent_id\":1}" \
        "{\"id\":9,\"model_name\":\"nop_TYPE_INT32_-1\",\"model_version\":1,\"request_id\":\"1\",\"parent_id\":1}" ; do
    if [ `grep -c ${trace_str} trace_ensemble_tensor.log` != "1" ]; then
        echo -e "Ensemble trace tensors log expects trace: ${trace_str}"
        RET=1
    fi
done

if [ `grep -c ^simple summary_ensemble_tensor.log` != "1" ]; then
    cat summary_ensemble_tensor.log
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

if [ `grep -o TENSOR_QUEUE_INPUT trace_ensemble_tensor.log | wc -l` != "18" ]; then
    echo -e "Ensemble trace tensors log expects 18 TENSOR_QUEUE_INPUTs"
    RET=1
fi

if [ `grep -o TENSOR_BACKEND_OUTPUT trace_ensemble_tensor.log | wc -l` != "14" ]; then
    echo -e "Ensemble trace tensors log expects 14 TENSOR_BACKEND_OUTPUTs"
    RET=1
fi

for trace_str in \
        "{\"id\":1,\"activity\":\"TENSOR_QUEUE_INPUT\",\"tensor\":{\"name\":\"INPUT0\",\"data\":\"0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15\",\"shape\":\"1,16\",\"dtype\":\"INT32\"}}" \
        "{\"id\":1,\"activity\":\"TENSOR_QUEUE_INPUT\",\"tensor\":{\"name\":\"INPUT1\",\"data\":\"1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1\",\"shape\":\"1,16\",\"dtype\":\"INT32\"}}" \
        "{\"id\":1,\"activity\":\"TENSOR_BACKEND_OUTPUT\",\"tensor\":{\"name\":\"OUTPUT0\",\"data\":\"1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16\",\"shape\":\"1,16\",\"dtype\":\"INT32\"}}" \
        "{\"id\":1,\"activity\":\"TENSOR_BACKEND_OUTPUT\",\"tensor\":{\"name\":\"OUTPUT1\",\"data\":\"-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14\",\"shape\":\"1,16\",\"dtype\":\"INT32\"}}" ; do
    if [ `grep -c ${trace_str} trace_ensemble_tensor.log` != "1" ]; then
        echo -e "Ensemble trace tensors log expects trace: ${trace_str}"
        RET=1
    fi
done

set -e


if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi


# check deprecation warnings
SERVER_ARGS=" --trace-file=/tmp/trace.json --trace-rate=100 --trace-level=TIMESTAMPS \
              --trace-log-frequency=50 --trace-count=100 --model-repository=$MODELSDIR"
SERVER_LOG="./inference_server_trace_config_flag.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

if [ `grep -c "Warning: '--trace-file' has been deprecated" $SERVER_LOG` != "1" ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

if [ `grep -c "Warning: '--trace-rate' has been deprecated" $SERVER_LOG` != "1" ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

if [ `grep -c "Warning: '--trace-level' has been deprecated" $SERVER_LOG` != "1" ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

if [ `grep -c "Warning: '--trace-log-frequency' has been deprecated" $SERVER_LOG` != "1" ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

if [ `grep -c "Warning: '--trace-count' has been deprecated" $SERVER_LOG` != "1" ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

set +e

################################################################################
# The following set of tests checks that tritonserver gracefully handles       #
# bad OpenTelemetry BatchSpanProcessor parameters, provided through            #
# environment variables, or tritonserver's options.                            #
################################################################################
export OTEL_BSP_MAX_QUEUE_SIZE="bad_value"

SERVER_ARGS="--trace-config mode=opentelemetry --model-repository=$MODELSDIR"
SERVER_LOG="./inference_server_trace_config_flag.log"
run_server
assert_server_startup_failed

if [ `grep -c "Bad option: \"OTEL_BSP_MAX_QUEUE_SIZE\"" $SERVER_LOG` != "1" ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

unset OTEL_BSP_MAX_QUEUE_SIZE

export OTEL_BSP_SCHEDULE_DELAY="bad_value"
run_server
assert_server_startup_failed

if [ `grep -c "Bad option: \"OTEL_BSP_SCHEDULE_DELAY\"" $SERVER_LOG` != "1" ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

unset OTEL_BSP_SCHEDULE_DELAY

export OTEL_BSP_MAX_EXPORT_BATCH_SIZE="bad_value"
run_server
assert_server_startup_failed

if [ `grep -c "Bad option: \"OTEL_BSP_MAX_EXPORT_BATCH_SIZE\"" $SERVER_LOG` != "1" ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

unset OTEL_BSP_MAX_EXPORT_BATCH_SIZE

SERVER_ARGS="--model-repository=$MODELSDIR --trace-config mode=opentelemetry \
             --trace-config opentelemetry,bsp_max_queue_size=bad_value"
SERVER_LOG="./inference_server_trace_config_flag.log"
run_server
assert_server_startup_failed

if [ `grep -c "Bad option: \"--trace-config opentelemetry,bsp_max_queue_size\"" $SERVER_LOG` != "1" ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

SERVER_ARGS="--model-repository=$MODELSDIR --trace-config mode=opentelemetry \
             --trace-config opentelemetry,bsp_schedule_delay=bad_value"
SERVER_LOG="./inference_server_trace_config_flag.log"
run_server
assert_server_startup_failed

if [ `grep -c "Bad option: \"--trace-config opentelemetry,bsp_schedule_delay\"" $SERVER_LOG` != "1" ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

SERVER_ARGS="--model-repository=$MODELSDIR --trace-config mode=opentelemetry \
             --trace-config opentelemetry,bsp_max_export_batch_size=bad_value"
SERVER_LOG="./inference_server_trace_config_flag.log"
run_server
assert_server_startup_failed

if [ `grep -c "Bad option: \"--trace-config opentelemetry,bsp_max_export_batch_size\"" $SERVER_LOG` != "1" ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_cmdline_trace/trace_client.py
================================================
#!/usr/bin/env python
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import sys

import numpy as np
import tritonclient.grpc as grpcclient
import tritonclient.http as httpclient

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-u",
        "--url",
        type=str,
        required=False,
        default="localhost:8001",
        help="Inference server URL. Default is localhost:8001.",
    )
    parser.add_argument("-i", "--protocol", type=str, required=True)
    FLAGS = parser.parse_args()

    if FLAGS.protocol == "grpc":
        client_type = grpcclient
    else:
        client_type = httpclient

    try:
        triton_client = client_type.InferenceServerClient(url=FLAGS.url)
    except Exception as e:
        print("channel creation failed: " + str(e))
        sys.exit()

    model_name = "simple"

    # Infer
    inputs = []
    outputs = []
    inputs.append(client_type.InferInput("INPUT0", [1, 16], "INT32"))
    inputs.append(client_type.InferInput("INPUT1", [1, 16], "INT32"))

    input0_data = np.arange(start=0, stop=16, dtype=np.int32)
    input0_data = np.expand_dims(input0_data, axis=0)
    input1_data = np.ones(shape=(1, 16), dtype=np.int32)

    inputs[0].set_data_from_numpy(input0_data)
    inputs[1].set_data_from_numpy(input1_data)

    outputs.append(client_type.InferRequestedOutput("OUTPUT0"))
    outputs.append(client_type.InferRequestedOutput("OUTPUT1"))

    triton_client.infer(
        model_name=model_name, inputs=inputs, outputs=outputs, request_id="1"
    )


================================================
FILE: qa/L0_compute_capability/test.sh
================================================
#!/bin/bash
# Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
SERVER=${TRITON_DIR}/bin/tritonserver
DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
source ../common/util.sh

rm -f *.log

RET=0

# If BACKENDS not specified, set to all
BACKENDS=${BACKENDS:="onnx libtorch plan"}

for BACKEND in $BACKENDS; do
    # Need just one model for the backend...
    rm -fr models && mkdir models
    cp -r ${DATADIR}/qa_model_repository/${BACKEND}_float32_float32_float32 \
        models/.

    if [ "$BACKEND" != "plan" ]; then
        for MC in `ls models/*/config.pbtxt`; do
            echo "instance_group [ { kind: KIND_GPU }]" >> $MC
        done
    fi

    # Run with a high minimum capability so that no GPUs are
    # recognized. This should cause the server to fail to start since
    # we explicitly asked for a GPU in the instance_group.
    SERVER_ARGS="--min-supported-compute-capability=900.0 --model-repository=`pwd`/models"
    SERVER_LOG="./inference_server_${BACKEND}_cc900.log"
    run_server
    if [ "$SERVER_PID" != "0" ]; then
        echo -e "\n***\n*** Unexpected success with min compute 100.0 for ${BACKEND}\n***"
        RET=1

        kill $SERVER_PID
        wait $SERVER_PID
    fi

    # Run with a low minimum capability and make sure GPUs are
    # recognized.
    SERVER_ARGS="--min-supported-compute-capability=1.0 --model-repository=`pwd`/models"
    SERVER_LOG="./inference_server_${BACKEND}_cc1.log"
    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Unexpected failure with min compute 1.0 for ${BACKEND}\n***"
        RET=1
    else
        kill $SERVER_PID
        wait $SERVER_PID
    fi
done

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
fi

exit $RET


================================================
FILE: qa/L0_config_json/ensemble_config.pbtxt
================================================
# Copyright (c) 2020-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "simple_ensemble"
platform: "ensemble"
max_batch_size: 0
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
ensemble_scheduling {
  step [
    {
      model_name: "onnx_nobatch_float32_float32_float32"
      model_version: 1
      input_map [
        {
          key : "INPUT0"
          value : "INPUT0"
        },
        {
          key : "INPUT1"
          value : "INPUT1"
        }
      ]
      output_map [
        {
          key : "OUTPUT0"
          value : "out0"
        },
        {
          key : "OUTPUT1"
          value : "out1"
        }
      ]
    },
    {
      model_name: "onnx_nobatch_float32_float32_float32"
      model_version: -1
      input_map [
        {
          key : "INPUT0"
          value : "out0"
        },
        {
          key : "INPUT1"
          value : "out1"
        }
      ]
      output_map [
        {
          key : "OUTPUT0"
          value : "OUTPUT0"
        },
        {
          key : "OUTPUT1"
          value : "OUTPUT1"
        }
      ]
    }
  ]
}


================================================
FILE: qa/L0_config_json/max_priority_level.pbtxt
================================================
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "max_priority_level"
backend: "identity"
max_batch_size: 1
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]

dynamic_batching:
{
    # Max uint64
    priority_levels: 18446744073709551615
    # Max uint32
    default_priority_level: 4294967295
    # Max uint32 + 1
    priority_queue_policy: [
       {key: 4294967296
        value: {
          timeout_action: REJECT
	  default_timeout_microseconds: 18446744073709551615
	  allow_timeout_override: true
	  max_queue_size: 10
       }
    }
]
}

================================================
FILE: qa/L0_config_json/test.sh
================================================
#!/bin/bash
# Copyright (c) 2020-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

DATADIR="/data/inferenceserver/${REPO_VERSION}"
CLIENT_LOG="./client.log"
SERVER_LOG="./inference_server.log"

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=`pwd`/models"
source ../common/util.sh

RET=0
rm -fr *.log

rm -fr models && mkdir models
cp -r $DATADIR/qa_model_repository/onnx_nobatch_float32_float32_float32 models/.

# Test input and output dims are shown as numbers
TRIAL=ios

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
code=`curl -s -w %{http_code} -o ./$TRIAL.out localhost:8000/v2/models/onnx_nobatch_float32_float32_float32/config`
set -e
if [ "$code" != "200" ]; then
    cat $TRIAL.out
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

matches=`grep -o "\"dims\":\[16\]" $TRIAL.out | wc -l`
if [ $matches -ne 4 ]; then
    cat $TRIAL.out
    echo -e "\n***\n*** Expected 4 dims, got $matches\n***"
    RET=1
fi

kill $SERVER_PID
wait $SERVER_PID

# Test input and output reshape are shown as numbers
TRIAL=reshape

rm -fr models && mkdir models
cp -r $DATADIR/qa_model_repository/onnx_nobatch_float32_float32_float32 models/.
(cd models/onnx_nobatch_float32_float32_float32 && \
     sed -i "s/data_type:.*TYPE_FP32/data_type: TYPE_FP32\nreshape: { shape: [ 16 ]}/g" config.pbtxt)

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
code=`curl -s -w %{http_code} -o ./$TRIAL.out localhost:8000/v2/models/onnx_nobatch_float32_float32_float32/config`
set -e
if [ "$code" != "200" ]; then
    cat $TRIAL.out
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

matches=`grep -o "\"reshape\":{\"shape\":\[16\]}" $TRIAL.out | wc -l`
if [ $matches -ne 4 ]; then
    cat $TRIAL.out
    echo -e "\n***\n*** Expected 4 reshape:shape, got $matches\n***"
    RET=1
fi

kill $SERVER_PID
wait $SERVER_PID

# Test version_policy::specific
TRIAL=specific

rm -fr models && mkdir models
cp -r $DATADIR/qa_model_repository/onnx_nobatch_float32_float32_float32 models/.
(cd models/onnx_nobatch_float32_float32_float32 && \
    sed -i "s/^version_policy:.*/version_policy: { specific: { versions: [1] }}/" config.pbtxt)

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
code=`curl -s -w %{http_code} -o ./$TRIAL.out localhost:8000/v2/models/onnx_nobatch_float32_float32_float32/config`
set -e
if [ "$code" != "200" ]; then
    cat $TRIAL.out
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

matches=`grep -o "\"version_policy\":{\"specific\":{\"versions\":\[1\]}}" $TRIAL.out | wc -l`
if [ $matches -ne 1 ]; then
    cat $TRIAL.out
    echo -e "\n***\n*** Expected 1 version_policy:specific:versions, got $matches\n***"
    RET=1
fi

kill $SERVER_PID
wait $SERVER_PID

# Test dynamic_batching::max_queue_delay_microseconds,
# dynamic_batching::default_queue_policy::default_timeout_microseconds,
# dynamic_batching::priority_queue_policy::value::default_timeout_microseconds
TRIAL=dbatch

rm -fr models && mkdir models
cp -r $DATADIR/qa_model_repository/onnx_nobatch_float32_float32_float32 models/.
(cd models/onnx_nobatch_float32_float32_float32 && \
     echo "dynamic_batching: { max_queue_delay_microseconds: 42 \
          default_queue_policy: { default_timeout_microseconds: 123 } \
          priority_queue_policy: { key: 1  value: { default_timeout_microseconds: 123 }} \
          priority_queue_policy: { key: 2  value: { default_timeout_microseconds: 123 }}}" >> config.pbtxt)

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
code=`curl -s -w %{http_code} -o ./$TRIAL.out localhost:8000/v2/models/onnx_nobatch_float32_float32_float32/config`
set -e
if [ "$code" != "200" ]; then
    cat $TRIAL.out
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

matches=`grep -o "\"dynamic_batching\":{" $TRIAL.out | wc -l`
if [ $matches -ne 1 ]; then
    cat $TRIAL.out
    echo -e "\n***\n*** Expected 1 dynamic_batching, got $matches\n***"
    RET=1
fi

matches=`grep -o "\"max_queue_delay_microseconds\":42" $TRIAL.out | wc -l`
if [ $matches -ne 1 ]; then
    cat $TRIAL.out
    echo -e "\n***\n*** Expected 1 dynamic_batching:max_queue_delay_microseconds, got $matches\n***"
    RET=1
fi

matches=`grep -o "\"default_timeout_microseconds\":123" $TRIAL.out | wc -l`
if [ $matches -ne 3 ]; then
    cat $TRIAL.out
    echo -e "\n***\n*** Expected 3 dynamic_batching:*_queue_policy:default_timeout_microseconds, got $matches\n***"
    RET=1
fi

kill $SERVER_PID
wait $SERVER_PID

# Test sequence_batching::oldest::max_queue_delay_microseconds,
# sequence_batching::max_sequence_idle_microseconds
TRIAL=sbatch

rm -fr models && mkdir models
cp -r $DATADIR/qa_model_repository/onnx_nobatch_float32_float32_float32 models/.
(cd models/onnx_nobatch_float32_float32_float32 && \
     echo "sequence_batching: { max_sequence_idle_microseconds: 42 \
          oldest: { max_queue_delay_microseconds: 987 }}" >> config.pbtxt)

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
code=`curl -s -w %{http_code} -o ./$TRIAL.out localhost:8000/v2/models/onnx_nobatch_float32_float32_float32/config`
set -e
if [ "$code" != "200" ]; then
    cat $TRIAL.out
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

matches=`grep -o "\"sequence_batching\":{" $TRIAL.out | wc -l`
if [ $matches -ne 1 ]; then
    cat $TRIAL.out
    echo -e "\n***\n*** Expected 1 sequence_batching, got $matches\n***"
    RET=1
fi

matches=`grep -o "\"max_sequence_idle_microseconds\":42" $TRIAL.out | wc -l`
if [ $matches -ne 1 ]; then
    cat $TRIAL.out
    echo -e "\n***\n*** Expected 1 sequence_batching:max_sequence_idle_microseconds, got $matches\n***"
    RET=1
fi

matches=`grep -o "\"oldest\":{" $TRIAL.out | wc -l`
if [ $matches -ne 1 ]; then
    cat $TRIAL.out
    echo -e "\n***\n*** Expected 1 sequence_batching:oldest, got $matches\n***"
    RET=1
fi

matches=`grep -o "\"max_queue_delay_microseconds\":987" $TRIAL.out | wc -l`
if [ $matches -ne 1 ]; then
    cat $TRIAL.out
    echo -e "\n***\n*** Expected 1 sequence_batching:oldest:max_queue_delay_microseconds, got $matches\n***"
    RET=1
fi

kill $SERVER_PID
wait $SERVER_PID

# Test ensemble_scheduling::step::model_version
TRIAL=ensemble

rm -fr models && mkdir models
cp -r $DATADIR/qa_model_repository/onnx_nobatch_float32_float32_float32 models/.
mkdir -p models/simple_ensemble/1 && cp ensemble_config.pbtxt models/simple_ensemble/config.pbtxt

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
code=`curl -s -w %{http_code} -o ./$TRIAL.out localhost:8000/v2/models/simple_ensemble/config`
set -e
if [ "$code" != "200" ]; then
    cat $TRIAL.out
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

matches=`grep -o "\"model_version\":1" $TRIAL.out | wc -l`
if [ $matches -ne 1 ]; then
    cat $TRIAL.out
    echo -e "\n***\n*** Expected 1 ensemble_scheduling:step:model_version == 1, got $matches\n***"
    RET=1
fi

matches=`grep -o "\"model_version\":-1" $TRIAL.out | wc -l`
if [ $matches -ne 1 ]; then
    cat $TRIAL.out
    echo -e "\n***\n*** Expected 1 ensemble_scheduling:step:model_version == -1, got $matches\n***"
    RET=1
fi

kill $SERVER_PID
wait $SERVER_PID

rm -fr models/simple_ensemble

# Test model_warmup::inputs::value::dims
TRIAL=warmup

rm -fr models && mkdir models
cp -r $DATADIR/qa_model_repository/onnx_nobatch_float32_float32_float32 models/.
(cd models/onnx_nobatch_float32_float32_float32 && \
     echo "model_warmup [{" >> config.pbtxt && \
     echo "    name : \"warmup 1\"" >> config.pbtxt && \
     echo "    batch_size: 1" >> config.pbtxt && \
     echo "    inputs [{" >> config.pbtxt && \
     echo "        key: \"INPUT0\"" >> config.pbtxt && \
     echo "        value: {" >> config.pbtxt && \
     echo "            data_type: TYPE_FP32" >> config.pbtxt && \
     echo "            dims: 16" >> config.pbtxt && \
     echo "            zero_data: true" >> config.pbtxt && \
     echo "        }" >> config.pbtxt && \
     echo "    }, {" >> config.pbtxt && \
     echo "        key: \"INPUT1\"" >> config.pbtxt && \
     echo "        value: {" >> config.pbtxt && \
     echo "            data_type: TYPE_FP32" >> config.pbtxt && \
     echo "            dims: 16" >> config.pbtxt && \
     echo "            random_data: true" >> config.pbtxt && \
     echo "        }" >> config.pbtxt && \
     echo "    }]" >> config.pbtxt && \
     echo "  }, {" >> config.pbtxt && \
     echo "    name : \"warmup 2\"" >> config.pbtxt && \
     echo "    batch_size: 1" >> config.pbtxt && \
     echo "    inputs [{" >> config.pbtxt && \
     echo "        key: \"INPUT0\"" >> config.pbtxt && \
     echo "        value: {" >> config.pbtxt && \
     echo "            data_type: TYPE_FP32" >> config.pbtxt && \
     echo "            dims: 16" >> config.pbtxt && \
     echo "            zero_data: true" >> config.pbtxt && \
     echo "        }" >> config.pbtxt && \
     echo "    }, {" >> config.pbtxt && \
     echo "        key: \"INPUT1\"" >> config.pbtxt && \
     echo "        value: {" >> config.pbtxt && \
     echo "            data_type: TYPE_FP32" >> config.pbtxt && \
     echo "            dims: 16" >> config.pbtxt && \
     echo "            random_data: true" >> config.pbtxt && \
     echo "        }" >> config.pbtxt && \
     echo "    }]" >> config.pbtxt && \
     echo "  }]" >> config.pbtxt)

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
code=`curl -s -w %{http_code} -o ./$TRIAL.out localhost:8000/v2/models/onnx_nobatch_float32_float32_float32/config`
set -e
if [ "$code" != "200" ]; then
    cat $TRIAL.out
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

matches=`grep -o "\"dims\":\[16\]" $TRIAL.out | wc -l`
if [ $matches -ne 8 ]; then
    cat $TRIAL.out
    echo -e "\n***\n*** Expected 8 model_warmup:inputs:dims, got $matches\n***"
    RET=1
fi

kill $SERVER_PID
wait $SERVER_PID

# Test max_priority_level
TRIAL=max_priority_level

rm -fr models && mkdir models
mkdir -p models/max_priority_level/1 && cp max_priority_level.pbtxt models/max_priority_level/config.pbtxt

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
code=`curl -s -w %{http_code} -o ./$TRIAL.out localhost:8000/v2/models/max_priority_level/config`
set -e
if [ "$code" != "200" ]; then
    cat $TRIAL.out
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

declare -A expected_values

MAX_UINT64=18446744073709551615
MAX_UINT32=4294967295
MAX_UINT32_PLUS_1=4294967296

expected_values["priority_levels"]=$MAX_UINT64
expected_values["default_priority_level"]=$MAX_UINT32
expected_values[$MAX_UINT32_PLUS_1]=\{\"timeout_action\":\"REJECT\",\"default_timeout_microseconds\":18446744073709551615,\"allow_timeout_override\":true,\"max_queue_size\":10\}
expected_values["default_timeout_microseconds"]=$MAX_UINT64

for key in "${!expected_values[@]}"; do
    value=${expected_values[$key]}
    matches=`grep -o "\"$key\":$value" $TRIAL.out | wc -l`
    if [ $matches -ne 1 ]; then
	cat $TRIAL.out
	echo -e "\n***\n*** Expected 1 $key == $value, got $matches\n***"
	RET=1
    fi
done

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test Failed\n***"
fi

exit $RET


================================================
FILE: qa/L0_cuda_graph/test.sh
================================================
#!/bin/bash
# Copyright (c) 2020-2024, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

CLIENT_LOG="./client.log"
TRT_CUDA_GRAPH_TEST=trt_cuda_graph_test.py
TEST_RESULT_FILE='test_results.txt'
DATADIR="./models"

rm -rf ${DATADIR}
mkdir -p ${DATADIR}

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--log-verbose=1 --model-repository=$DATADIR --strict-model-config=true"
SERVER_LOG="./inference_server.log"
source ../common/util.sh

rm -f *.log*

RET=0

# TrtCudaGraphTest.test_fixed_shape
rm -rf ${DATADIR} && mkdir -p ${DATADIR}
cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/plan_float32_float32_float32 ${DATADIR}/
# Make sure only one version is present
rm -rf ${DATADIR}/plan_float32_float32_float32/3

CLIENT_LOG="./fixed_shape.client.log"
SERVER_LOG="./fixed_shape.inference_server.log"
echo "optimization { cuda { graphs: true } }" >> ${DATADIR}/plan_float32_float32_float32/config.pbtxt

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python $TRT_CUDA_GRAPH_TEST TrtCudaGraphTest.test_fixed_shape>>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    cat $CLIENT_LOG
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

set +e
if [ `grep -c "Context with profile default \[0\] is being executed for " $SERVER_LOG` != "1" ]; then
    echo -e "\n***\n*** Failed. Expected only one execution without CUDA graph\n***"
    RET=1
fi

if [ `grep -c "captured CUDA graph for" $SERVER_LOG` != "6" ]; then
    echo -e "\n***\n*** Failed. Expected 6 CUDA graphs are captured\n***"
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

# TrtCudaGraphTest.test_dynamic_shape
# plan_float32_float32_float32 models with dynamic shapes has 6 profiles
# min, opt, max, idx
# [1, 1], [1, 16], [8, 33], 0 (*)
# [1, 1], [2, 16], [7, 32], 1
# [1, 1], [3, 16], [6, 32], 2
# [1, 1], [4, 16], [5, 32], 3
# [5, 1], [6, 16], [8, 32], 4 (*)
# [6, 1], [6, 16], [8, 32], 5 (*)
# [1, 1], [1, 16], [8, 32], 6
rm -rf ${DATADIR} && mkdir -p ${DATADIR}
cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/plan_float32_float32_float32 ${DATADIR}/

SERVER_ARGS="--log-verbose=1 --model-repository=$DATADIR --strict-model-config=true"
CLIENT_LOG="./dynamic_shape.client.log"
SERVER_LOG="./dynamic_shape.inference_server.log"
sed -i "s/profile:.*/profile: [\"0\"]/" ${DATADIR}/plan_float32_float32_float32/config.pbtxt
echo "optimization { cuda { graphs: true } }" >> ${DATADIR}/plan_float32_float32_float32/config.pbtxt

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python $TRT_CUDA_GRAPH_TEST TrtCudaGraphTest.test_dynamic_shape>>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    cat $CLIENT_LOG
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

set +e
if [ `grep -c "Context with profile 0 \[0\] is being executed for " $SERVER_LOG` != "2" ]; then
    echo -e "\n***\n*** Failed. Expected 2 execution without CUDA graph\n***"
    RET=1
fi

if [ `grep -c "captured CUDA graph for" $SERVER_LOG` != "6" ]; then
    echo -e "\n***\n*** Failed. Expected 6 CUDA graphs are captured\n***"
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

# TrtCudaGraphTest.test_range_fixed_shape
rm -rf ${DATADIR} && mkdir -p ${DATADIR}
cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/plan_float32_float32_float32 ${DATADIR}/
# Make sure only one version is present
rm -rf ${DATADIR}/plan_float32_float32_float32/3

SERVER_ARGS="--log-verbose=1 --model-repository=$DATADIR"
CLIENT_LOG="./range_fixed_shape.client.log"
SERVER_LOG="./range_fixed_shape.inference_server.log"
echo "optimization { \
    cuda { \
        graphs: true \
        graph_spec [ { \
            batch_size: 4 \
            graph_lower_bound { \
                batch_size: 2 \
            } \
} ] } }" >> ${DATADIR}/plan_float32_float32_float32/config.pbtxt

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python $TRT_CUDA_GRAPH_TEST TrtCudaGraphTest.test_range_fixed_shape>>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    cat $CLIENT_LOG
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

set +e
if [ `grep -c "Context with profile default \[0\] is being executed for " $SERVER_LOG` != "3" ]; then
    echo -e "\n***\n*** Failed. Expected only 3 execution without CUDA graph\n***"
    RET=1
fi

if [ `grep -c "captured CUDA graph for" $SERVER_LOG` != "1" ]; then
    echo -e "\n***\n*** Failed. Expected 1 CUDA graphs are captured\n***"
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

# TrtCudaGraphTest.test_range_dynamic_shape
# plan_float32_float32_float32 models with dynamic shapes has 6 profiles
# min, opt, max, idx
# [1, 1], [1, 16], [8, 33], 0 (*)
# [1, 1], [2, 16], [7, 32], 1
# [1, 1], [3, 16], [6, 32], 2
# [1, 1], [4, 16], [5, 32], 3
# [5, 1], [6, 16], [8, 32], 4 (*)
# [6, 1], [6, 16], [8, 32], 5 (*)
# [1, 1], [1, 16], [8, 32], 6
rm -rf ${DATADIR} && mkdir -p ${DATADIR}
cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/plan_float32_float32_float32 ${DATADIR}/

CLIENT_LOG="./range_dynamic_shape.client.log"
SERVER_LOG="./range_dynamic_shape.inference_server.log"
sed -i "s/profile:.*/profile: [\"0\"]/" ${DATADIR}/plan_float32_float32_float32/config.pbtxt
echo "optimization { \
    cuda { \
        graphs: true \
        graph_spec [ { \
            batch_size: 4 \
            input { key: \"INPUT0\" value: {dim : [16]} } \
            input { key: \"INPUT1\" value: {dim : [16]} } \
            graph_lower_bound { \
                batch_size: 2 \
                input { key: \"INPUT0\" value: {dim : [8]} } \
                input { key: \"INPUT1\" value: {dim : [8]} } \
            } \
} ] } }" >> ${DATADIR}/plan_float32_float32_float32/config.pbtxt

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python $TRT_CUDA_GRAPH_TEST TrtCudaGraphTest.test_range_dynamic_shape>>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    cat $CLIENT_LOG
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

set +e
if [ `grep -c "Context with profile 0 \[0\] is being executed for " $SERVER_LOG` != "4" ]; then
    echo -e "\n***\n*** Failed. Expected 4 execution without CUDA graph\n***"
    RET=1
fi

if [ `grep -c "captured CUDA graph for" $SERVER_LOG` != "1" ]; then
    echo -e "\n***\n*** Failed. Expected 1 CUDA graphs are captured\n***"
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

# TrtCudaGraphTest.test_nobatch_fixed_shape
rm -rf ${DATADIR} && mkdir -p ${DATADIR}
cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/plan_nobatch_float32_float32_float32 ${DATADIR}/
# Make sure only one version is present
rm -rf ${DATADIR}/plan_nobatch_float32_float32_float32/2 ${DATADIR}/plan_nobatch_float32_float32_float32/3

CLIENT_LOG="./nobatch_fixed_shape.client.log"
SERVER_LOG="./nobatch_fixed_shape.inference_server.log"
echo "optimization { cuda { graphs: true } }" >> ${DATADIR}/plan_nobatch_float32_float32_float32/config.pbtxt

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python $TRT_CUDA_GRAPH_TEST TrtCudaGraphTest.test_nobatch_fixed_shape plan_nobatch>>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    cat $CLIENT_LOG
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

set +e
if [ `grep -c "Context with profile default \[0\] is launching CUDA graph " $SERVER_LOG` != "0" ]; then
    echo -e "\n***\n*** Failed. Expected 0 execution with CUDA graph\n***"
    RET=1
fi

if [ `grep -c "captured CUDA graph for" $SERVER_LOG` != "1" ]; then
    echo -e "\n***\n*** Failed. Expected 1 CUDA graph to be captured\n***"
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
else
  echo -e "\n***\n*** Test Failed\n***"
fi

exit $RET


================================================
FILE: qa/L0_cuda_graph/trt_cuda_graph_test.py
================================================
#!/usr/bin/env python3

# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import unittest

import infer_util as iu
import numpy as np
import test_util as tu
from tritonclient.utils import *


class TrtCudaGraphTest(tu.TestResultCollector):
    MODELNAME = "plan"

    def setUp(self):
        self.dtype_ = np.float32
        self.dtype_str_ = "FP32"
        self.model_name_ = self.MODELNAME

    def _check_infer(self, tensor_shape, batch_size=1):
        try:
            if batch_size:
                full_shape = (batch_size,) + tensor_shape
            else:
                full_shape = tensor_shape
            iu.infer_exact(
                self,
                self.model_name_,
                full_shape,
                batch_size,
                self.dtype_,
                self.dtype_,
                self.dtype_,
                model_version=1,
                use_http_json_tensors=False,
                use_grpc=False,
                use_streaming=False,
            )
        except InferenceServerException as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

    def _erroneous_infer(self, tensor_shape, batch_size):
        import tritonhttpclient

        item_size = batch_size
        for dim in tensor_shape:
            item_size *= dim
        full_shape = (batch_size,) + tensor_shape
        input_np = np.arange(item_size, dtype=self.dtype_).reshape(full_shape)
        expected_output0_np = input_np + input_np
        expected_output1_np = input_np - input_np

        inputs = []
        inputs.append(
            tritonhttpclient.InferInput("INPUT0", full_shape, self.dtype_str_)
        )
        inputs[-1].set_data_from_numpy(input_np)
        inputs.append(
            tritonhttpclient.InferInput("INPUT1", full_shape, self.dtype_str_)
        )
        inputs[-1].set_data_from_numpy(input_np)
        outputs = []
        outputs.append(
            tritonhttpclient.InferRequestedOutput("OUTPUT0", binary_data=True)
        )
        outputs.append(
            tritonhttpclient.InferRequestedOutput("OUTPUT1", binary_data=True)
        )

        model_name = tu.get_model_name(
            self.model_name_, self.dtype_, self.dtype_, self.dtype_
        )
        results = tritonhttpclient.InferenceServerClient(
            "localhost:8000", verbose=True
        ).infer(model_name=model_name, inputs=inputs, outputs=outputs)
        # Validate the results by comparing with precomputed values.
        output0_np = results.as_numpy("OUTPUT0")
        output1_np = results.as_numpy("OUTPUT1")
        self.assertFalse(
            np.array_equal(output0_np, expected_output0_np),
            "expects OUTPUT0 is not correct",
        )
        self.assertFalse(
            np.array_equal(output1_np, expected_output1_np),
            "expects OUTPUT1 is not correct",
        )

    def test_fixed_shape(self):
        tensor_shape = (16,)
        self._check_infer(tensor_shape)
        # Inference that should not have CUDA graph captured
        self._check_infer(tensor_shape, 5)

    def test_dynamic_shape(self):
        tensor_shape = (16,)
        self._check_infer(tensor_shape)
        # Inference that should not have CUDA graph captured
        self._check_infer((20,))
        self._check_infer(tensor_shape, 5)

    def test_range_fixed_shape(self):
        tensor_shape = (16,)
        # Inferences that are in range of captured CUDA graph,
        # model should tolerate difference in batch size
        self._check_infer(tensor_shape, 4)
        self._check_infer(tensor_shape, 2)
        # Inferences that shouldn't use CUDA graph
        self._check_infer(tensor_shape, 1)
        self._check_infer(tensor_shape, 8)

    def test_range_dynamic_shape(self):
        # Inferences that are in range of captured CUDA graph,
        # model should tolerate difference in batch size
        self._check_infer((16,), 4)
        self._check_infer((16,), 2)
        # Inference should return dummy result
        # because the input shape is different
        self._erroneous_infer((10,), 3)

        # Inferences that shouldn't use CUDA graph
        self._check_infer((7,), 3)
        self._check_infer((16,), 1)
        self._check_infer((16,), 8)
        self._check_infer((30,), 4)

    def test_nobatch_fixed_shape(self):
        self._check_infer((16,), 0)


if __name__ == "__main__":
    if len(sys.argv) > 2:
        TrtCudaGraphTest.MODELNAME = sys.argv.pop()

    unittest.main()


================================================
FILE: qa/L0_cuda_shared_memory/cuda_shared_memory_test.py
================================================
#!/usr/bin/env python3

# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import base64
import os
import time
import unittest
from functools import partial

import infer_util as iu
import numpy as np
import requests
import test_util as tu
import tritonclient.grpc as grpcclient
import tritonclient.http as httpclient
import tritonclient.utils.cuda_shared_memory as cshm
from tritonclient.utils import *


class CudaSharedMemoryTestBase(tu.TestResultCollector):
    DEFAULT_SHM_BYTE_SIZE = 64

    def setUp(self):
        self._setup_client()
        self._shm_handles = []

    def tearDown(self):
        self._cleanup_shm_handles()

    def _setup_client(self):
        self.protocol = os.environ.get("CLIENT_TYPE", "http")
        if self.protocol == "http":
            self.url = "localhost:8000"
            self.triton_client = httpclient.InferenceServerClient(
                self.url, verbose=True
            )
        else:
            self.url = "localhost:8001"
            self.triton_client = grpcclient.InferenceServerClient(
                self.url, verbose=True
            )

    def _configure_server(
        self,
        create_byte_size=DEFAULT_SHM_BYTE_SIZE,
        register_byte_size=DEFAULT_SHM_BYTE_SIZE,
        device_id=0,
    ):
        """Creates and registers cuda shared memory regions for testing.

        Parameters
        ----------
        create_byte_size: int
            Size of each cuda shared memory region to create.
            NOTE: This should be sufficiently large to hold the inputs/outputs
                  stored in shared memory.

        register_byte_size: int
            Size of each cuda shared memory region to register with server.
            NOTE: The register_byte_size should be less than or equal
            to the create_byte_size. Otherwise an exception will be raised for
            an invalid set of registration args.

        device_id: int
            The GPU device ID of the cuda shared memory region to be created.

        """

        self._cleanup_shm_handles()
        shm_ip0_handle = cshm.create_shared_memory_region(
            "input0_data", create_byte_size, device_id
        )
        shm_ip1_handle = cshm.create_shared_memory_region(
            "input1_data", create_byte_size, device_id
        )
        shm_op0_handle = cshm.create_shared_memory_region(
            "output0_data", create_byte_size, device_id
        )
        shm_op1_handle = cshm.create_shared_memory_region(
            "output1_data", create_byte_size, device_id
        )

        input0_data = np.arange(start=0, stop=16, dtype=np.int32)
        input1_data = np.ones(shape=16, dtype=np.int32)
        cshm.set_shared_memory_region(shm_ip0_handle, [input0_data])
        cshm.set_shared_memory_region(shm_ip1_handle, [input1_data])

        self.triton_client.register_cuda_shared_memory(
            "input0_data",
            cshm.get_raw_handle(shm_ip0_handle),
            device_id,
            register_byte_size,
        )
        self.triton_client.register_cuda_shared_memory(
            "input1_data",
            cshm.get_raw_handle(shm_ip1_handle),
            device_id,
            register_byte_size,
        )
        self.triton_client.register_cuda_shared_memory(
            "output0_data",
            cshm.get_raw_handle(shm_op0_handle),
            device_id,
            register_byte_size,
        )
        self.triton_client.register_cuda_shared_memory(
            "output1_data",
            cshm.get_raw_handle(shm_op1_handle),
            device_id,
            register_byte_size,
        )
        self._shm_handles = [
            shm_ip0_handle,
            shm_ip1_handle,
            shm_op0_handle,
            shm_op1_handle,
        ]
        self.shm_names = ["input0_data", "input1_data", "output0_data", "output1_data"]

    def _cleanup_shm_handles(self):
        for shm_handle in self._shm_handles:
            cshm.destroy_shared_memory_region(shm_handle)
        self._shm_handles = []


class CudaSharedMemoryTest(CudaSharedMemoryTestBase):
    def test_invalid_create_shm(self):
        # Raises error since tried to create invalid cuda shared memory region
        with self.assertRaisesRegex(
            cshm.CudaSharedMemoryException, "unable to create cuda shared memory handle"
        ):
            self._shm_handles.append(
                cshm.create_shared_memory_region("dummy_data", -1, 0)
            )

    def test_valid_create_set_register(self):
        # Create a valid cuda shared memory region, fill data in it and register
        shm_op0_handle = cshm.create_shared_memory_region("dummy_data", 8, 0)
        cshm.set_shared_memory_region(
            shm_op0_handle, [np.array([1, 2], dtype=np.float32)]
        )
        self.triton_client.register_cuda_shared_memory(
            "dummy_data", cshm.get_raw_handle(shm_op0_handle), 0, 8
        )
        shm_status = self.triton_client.get_cuda_shared_memory_status()
        if self.protocol == "http":
            self.assertEqual(len(shm_status), 1)
        else:
            self.assertEqual(len(shm_status.regions), 1)
        cshm.destroy_shared_memory_region(shm_op0_handle)

    def test_unregister_before_register(self):
        # Create a valid cuda shared memory region and unregister before register
        shm_op0_handle = cshm.create_shared_memory_region("dummy_data", 8, 0)
        self.triton_client.unregister_cuda_shared_memory("dummy_data")
        shm_status = self.triton_client.get_cuda_shared_memory_status()
        if self.protocol == "http":
            self.assertEqual(len(shm_status), 0)
        else:
            self.assertEqual(len(shm_status.regions), 0)
        cshm.destroy_shared_memory_region(shm_op0_handle)

    def test_unregister_after_register(self):
        # Create a valid cuda shared memory region and unregister after register
        shm_op0_handle = cshm.create_shared_memory_region("dummy_data", 8, 0)
        self.triton_client.register_cuda_shared_memory(
            "dummy_data", cshm.get_raw_handle(shm_op0_handle), 0, 8
        )
        self.triton_client.unregister_cuda_shared_memory("dummy_data")
        shm_status = self.triton_client.get_cuda_shared_memory_status()
        if self.protocol == "http":
            self.assertEqual(len(shm_status), 0)
        else:
            self.assertEqual(len(shm_status.regions), 0)
        cshm.destroy_shared_memory_region(shm_op0_handle)

    def test_reregister_after_register(self):
        # Create a valid cuda shared memory region and unregister after register
        shm_op0_handle = cshm.create_shared_memory_region("dummy_data", 8, 0)
        self.triton_client.register_cuda_shared_memory(
            "dummy_data", cshm.get_raw_handle(shm_op0_handle), 0, 8
        )
        try:
            self.triton_client.register_cuda_shared_memory(
                "dummy_data", cshm.get_raw_handle(shm_op0_handle), 0, 8
            )
        except Exception as ex:
            self.assertIn(
                "shared memory region 'dummy_data' already in manager", str(ex)
            )
        shm_status = self.triton_client.get_cuda_shared_memory_status()
        if self.protocol == "http":
            self.assertEqual(len(shm_status), 1)
        else:
            self.assertEqual(len(shm_status.regions), 1)
        cshm.destroy_shared_memory_region(shm_op0_handle)

    def test_unregister_after_inference(self):
        # Unregister after inference
        error_msg = []
        self._configure_server()
        iu.shm_basic_infer(
            self,
            self.triton_client,
            self._shm_handles[0],
            self._shm_handles[1],
            self._shm_handles[2],
            self._shm_handles[3],
            error_msg,
            protocol=self.protocol,
            use_cuda_shared_memory=True,
        )
        if len(error_msg) > 0:
            raise Exception(str(error_msg))

        self.triton_client.unregister_cuda_shared_memory("output0_data")
        shm_status = self.triton_client.get_cuda_shared_memory_status()
        if self.protocol == "http":
            self.assertEqual(len(shm_status), 3)
        else:
            self.assertEqual(len(shm_status.regions), 3)
        self._cleanup_shm_handles()

    def test_register_after_inference(self):
        # Register after inference
        error_msg = []
        self._configure_server()
        iu.shm_basic_infer(
            self,
            self.triton_client,
            self._shm_handles[0],
            self._shm_handles[1],
            self._shm_handles[2],
            self._shm_handles[3],
            error_msg,
            protocol=self.protocol,
            use_cuda_shared_memory=True,
        )
        if len(error_msg) > 0:
            raise Exception(str(error_msg))
        shm_ip2_handle = cshm.create_shared_memory_region("input2_data", 64, 0)
        self.triton_client.register_cuda_shared_memory(
            "input2_data", cshm.get_raw_handle(shm_ip2_handle), 0, 64
        )
        shm_status = self.triton_client.get_cuda_shared_memory_status()
        if self.protocol == "http":
            self.assertEqual(len(shm_status), 5)
        else:
            self.assertEqual(len(shm_status.regions), 5)
        self._shm_handles.append(shm_ip2_handle)
        self._cleanup_shm_handles()

    def test_too_big_shm(self):
        # Shared memory input region larger than needed - Throws error
        error_msg = []
        self._configure_server()
        shm_ip2_handle = cshm.create_shared_memory_region("input2_data", 128, 0)
        self.triton_client.register_cuda_shared_memory(
            "input2_data", cshm.get_raw_handle(shm_ip2_handle), 0, 128
        )
        iu.shm_basic_infer(
            self,
            self.triton_client,
            self._shm_handles[0],
            shm_ip2_handle,
            self._shm_handles[2],
            self._shm_handles[3],
            error_msg,
            big_shm_name="input2_data",
            big_shm_size=128,
            protocol=self.protocol,
            use_cuda_shared_memory=True,
        )
        if len(error_msg) > 0:
            self.assertIn(
                "input byte size mismatch for input 'INPUT1' for model 'simple'. Expected 64, got 128",
                error_msg[-1],
            )
        self._shm_handles.append(shm_ip2_handle)
        self._cleanup_shm_handles()

    def test_mixed_raw_shm(self):
        # Mix of shared memory and RAW inputs
        error_msg = []
        self._configure_server()
        input1_data = np.ones(shape=16, dtype=np.int32)
        iu.shm_basic_infer(
            self,
            self.triton_client,
            self._shm_handles[0],
            [input1_data],
            self._shm_handles[2],
            self._shm_handles[3],
            error_msg,
            protocol=self.protocol,
            use_cuda_shared_memory=True,
        )

        if len(error_msg) > 0:
            raise Exception(error_msg[-1])
        self._cleanup_shm_handles()

    def test_unregisterall(self):
        # Unregister all shared memory blocks
        self._configure_server()
        status_before = self.triton_client.get_cuda_shared_memory_status()
        if self.protocol == "http":
            self.assertEqual(len(status_before), 4)
        else:
            self.assertEqual(len(status_before.regions), 4)
        self.triton_client.unregister_cuda_shared_memory()
        status_after = self.triton_client.get_cuda_shared_memory_status()
        if self.protocol == "http":
            self.assertEqual(len(status_after), 0)
        else:
            self.assertEqual(len(status_after.regions), 0)
        self._cleanup_shm_handles()

    def test_register_out_of_bound(self):
        create_byte_size = self.DEFAULT_SHM_BYTE_SIZE
        # Verify various edge cases of registered region size don't go out of bounds of the actual created shm region's size.
        with self.assertRaisesRegex(
            InferenceServerException,
            "failed to register shared memory region.*invalid args",
        ):
            self._configure_server(
                create_byte_size=create_byte_size,
                register_byte_size=create_byte_size + 1,
            )

    def test_infer_offset_out_of_bound(self):
        # CUDA Shared memory offset outside output region - Throws error
        error_msg = []
        self._configure_server()
        if self.protocol == "http":
            # -32 when placed in an int64 signed type, to get a negative offset
            # by overflowing
            offset = 2**64 - 32
        else:
            # gRPC will throw an error if > 2**63 - 1, so instead test for
            # exceeding shm region size by 1 byte, given its size is 64 bytes
            offset = 64
        iu.shm_basic_infer(
            self,
            self.triton_client,
            self._shm_handles[0],
            self._shm_handles[1],
            self._shm_handles[2],
            self._shm_handles[3],
            error_msg,
            shm_output_offset=offset,
            protocol=self.protocol,
            use_system_shared_memory=False,
            use_cuda_shared_memory=True,
        )

        self.assertEqual(len(error_msg), 1)
        self.assertIn("Invalid offset for shared memory region", error_msg[0])
        self._cleanup_shm_handles()

    def test_infer_byte_size_out_of_bound(self):
        # Shared memory byte_size outside output region - Throws error
        error_msg = []
        self._configure_server()
        offset = 60
        byte_size = self.DEFAULT_SHM_BYTE_SIZE

        iu.shm_basic_infer(
            self,
            self.triton_client,
            self._shm_handles[0],
            self._shm_handles[1],
            self._shm_handles[2],
            self._shm_handles[3],
            error_msg,
            shm_output_offset=offset,
            shm_output_byte_size=byte_size,
            protocol=self.protocol,
            use_system_shared_memory=False,
            use_cuda_shared_memory=True,
        )
        self.assertEqual(len(error_msg), 1)
        self.assertIn(
            "Invalid offset + byte size for shared memory region", error_msg[0]
        )
        self._cleanup_shm_handles()


def callback(user_data, result, error):
    if error:
        user_data.append(error)
    else:
        user_data.append(result)


class TestCudaSharedMemoryUnregister(CudaSharedMemoryTestBase):
    def _create_request_data(self):
        self.triton_client.unregister_cuda_shared_memory()
        self._configure_server()

        if self.protocol == "http":
            inputs = [
                httpclient.InferInput("INPUT0", [1, 16], "INT32"),
                httpclient.InferInput("INPUT1", [1, 16], "INT32"),
            ]
            outputs = [
                httpclient.InferRequestedOutput("OUTPUT0", binary_data=True),
                httpclient.InferRequestedOutput("OUTPUT1", binary_data=False),
            ]
        else:
            inputs = [
                grpcclient.InferInput("INPUT0", [1, 16], "INT32"),
                grpcclient.InferInput("INPUT1", [1, 16], "INT32"),
            ]
            outputs = [
                grpcclient.InferRequestedOutput("OUTPUT0"),
                grpcclient.InferRequestedOutput("OUTPUT1"),
            ]

        inputs[0].set_shared_memory("input0_data", self.DEFAULT_SHM_BYTE_SIZE)
        inputs[1].set_shared_memory("input1_data", self.DEFAULT_SHM_BYTE_SIZE)
        outputs[0].set_shared_memory("output0_data", self.DEFAULT_SHM_BYTE_SIZE)
        outputs[1].set_shared_memory("output1_data", self.DEFAULT_SHM_BYTE_SIZE)

        return inputs, outputs

    def _test_unregister_shm_request_pass(self):
        self._test_shm_found()

        # Unregister all should not result in an error.
        # If shared memory regions are in use, they will be marked and unregistered after the inference is completed.
        with httpclient.InferenceServerClient(
            "localhost:8000", verbose=True
        ) as second_client:
            second_client.unregister_cuda_shared_memory()

        # Number of shared memory regions should be the same as the inference is not completed yet
        self._test_shm_found()

    def _test_shm_not_found(self):
        second_client = httpclient.InferenceServerClient("localhost:8000", verbose=True)

        for shm_name in self.shm_names:
            with self.assertRaises(InferenceServerException) as ex:
                second_client.get_cuda_shared_memory_status(shm_name)
                self.assertIn(
                    f"Unable to find cuda shared memory region: '{shm_name}'",
                    str(ex.exception),
                )

    def _test_shm_found(self):
        second_client = httpclient.InferenceServerClient("localhost:8000", verbose=True)

        status = second_client.get_cuda_shared_memory_status()
        self.assertEqual(len(status), len(self.shm_names))

        for shm_info in status:
            self.assertIn(shm_info["name"], self.shm_names)

    def test_unregister_shm_during_inference_single_req_http(self):
        inputs, outputs = self._create_request_data()

        async_request = self.triton_client.async_infer(
            model_name="simple", inputs=inputs, outputs=outputs
        )

        # Ensure inference started
        time.sleep(2)

        # Try unregister shm regions during inference
        self._test_unregister_shm_request_pass()

        # Blocking call
        async_request.get_result()

        # Test that all shm regions are successfully unregistered after inference without needing to call unregister again.
        self._test_shm_not_found()

    def test_unregister_shm_during_inference_multiple_req_http(self):
        inputs, outputs = self._create_request_data()

        # Place the first request
        async_request = self.triton_client.async_infer(
            model_name="simple", inputs=inputs, outputs=outputs
        )
        # Ensure inference started
        time.sleep(2)

        # Try unregister shm regions during inference
        self._test_unregister_shm_request_pass()
        time.sleep(2)

        # Place the second request
        second_client = httpclient.InferenceServerClient("localhost:8000", verbose=True)
        second_async_request = second_client.async_infer(
            model_name="simple", inputs=inputs, outputs=outputs
        )

        # Blocking call
        async_request.get_result()

        # Shm regions will remain available as the second request is still in progress
        self._test_shm_found()

        # Blocking call
        second_async_request.get_result()

        # Verify that all shm regions are successfully unregistered once all inference requests have completed,
        # without needing to manually call unregister again.
        self._test_shm_not_found()

    def test_unregister_shm_after_inference_http(self):
        inputs, outputs = self._create_request_data()

        async_request = self.triton_client.async_infer(
            model_name="simple", inputs=inputs, outputs=outputs
        )

        # Ensure inference started
        time.sleep(2)

        # Test all registered shm regions exist during inference.
        self._test_shm_found()

        # Blocking call
        async_request.get_result()

        # Test all registered shm regions exist after inference, as unregister API have not been called.
        self._test_shm_found()

        # Test all shm regions are successfully unregistered after calling the unregister API after inference completed.
        self.triton_client.unregister_cuda_shared_memory()
        self._test_shm_not_found()

    def test_unregister_shm_during_inference_single_req_grpc(self):
        inputs, outputs = self._create_request_data()
        user_data = []

        self.triton_client.async_infer(
            model_name="simple",
            inputs=inputs,
            outputs=outputs,
            callback=partial(callback, user_data),
        )

        # Ensure inference started
        time.sleep(2)

        # Try unregister shm regions during inference
        self._test_unregister_shm_request_pass()

        # Wait until the results are available in user_data
        time_out = 20
        while (len(user_data) == 0) and time_out > 0:
            time_out = time_out - 1
            time.sleep(1)
        time.sleep(2)

        # Test that all shm regions are successfully unregistered after inference without needing to call unregister again.
        self._test_shm_not_found()

    def test_unregister_shm_during_inference_multiple_req_grpc(self):
        inputs, outputs = self._create_request_data()
        user_data = []

        self.triton_client.async_infer(
            model_name="simple",
            inputs=inputs,
            outputs=outputs,
            callback=partial(callback, user_data),
        )

        # Ensure inference started
        time.sleep(2)

        # Try unregister shm regions during inference
        self._test_unregister_shm_request_pass()

        # Place the second request
        second_user_data = []
        second_client = grpcclient.InferenceServerClient("localhost:8001", verbose=True)
        second_client.async_infer(
            model_name="simple",
            inputs=inputs,
            outputs=outputs,
            callback=partial(callback, second_user_data),
        )

        # Wait until the 1st request results are available in user_data
        time_out = 10
        while (len(user_data) == 0) and time_out > 0:
            time_out = time_out - 1
            time.sleep(1)
        time.sleep(2)

        # Shm regions will remain available as the second request is still in progress
        self._test_shm_found()

        # Wait until the 2nd request results are available in user_data
        time_out = 20
        while (len(second_user_data) == 0) and time_out > 0:
            time_out = time_out - 1
            time.sleep(1)
        time.sleep(2)

        # Verify that all shm regions are successfully unregistered once all inference requests have completed,
        # without needing to manually call unregister again.
        self._test_shm_not_found()

    def test_unregister_shm_after_inference_grpc(self):
        inputs, outputs = self._create_request_data()
        user_data = []

        self.triton_client.async_infer(
            model_name="simple",
            inputs=inputs,
            outputs=outputs,
            callback=partial(callback, user_data),
        )

        # Ensure inference started
        time.sleep(2)

        # Test all registered shm regions exist during inference.
        self._test_shm_found()

        # Wait until the results are available in user_data
        time_out = 20
        while (len(user_data) == 0) and time_out > 0:
            time_out = time_out - 1
            time.sleep(1)
        time.sleep(2)

        # Test all registered shm regions exist after inference, as unregister API have not been called.
        self._test_shm_found()

        # Test all shm regions are successfully unregistered after calling the unregister API after inference completed.
        self.triton_client.unregister_cuda_shared_memory()
        self._test_shm_not_found()


class CudaSharedMemoryTestRawHttpRequest(unittest.TestCase):
    def setUp(self):
        self.url = "localhost:8000"
        self.client = httpclient.InferenceServerClient(url=self.url, verbose=True)
        self.valid_shm_handle = None

    def tearDown(self):
        self.client.unregister_cuda_shared_memory()
        if self.valid_shm_handle:
            cshm.destroy_shared_memory_region(self.valid_shm_handle)
        self.client.close()

    def _generate_mock_base64_raw_handle(self, data_length):
        original_data_length = data_length * 3 // 4
        random_data = b"A" * original_data_length
        encoded_data = base64.b64encode(random_data)

        assert (
            len(encoded_data) == data_length
        ), "Encoded data length does not match the required length."
        return encoded_data

    def _send_register_cshm_request(self, raw_handle, device_id, byte_size, shm_name):
        cuda_shared_memory_register_request = {
            "raw_handle": {"b64": raw_handle.decode("utf-8")},
            "device_id": device_id,
            "byte_size": byte_size,
        }

        url = "http://{}/v2/cudasharedmemory/region/{}/register".format(
            self.url, shm_name
        )
        headers = {"Content-Type": "application/json"}

        # Send POST request
        response = requests.post(
            url, headers=headers, json=cuda_shared_memory_register_request
        )
        return response

    def test_exceeds_cshm_handle_size_limit(self):
        # byte_size greater than INT_MAX
        byte_size = 1 << 31
        device_id = 0
        shm_name = "invalid_shm"

        raw_handle = self._generate_mock_base64_raw_handle(byte_size)
        response = self._send_register_cshm_request(
            raw_handle, device_id, byte_size, shm_name
        )
        self.assertNotEqual(response.status_code, 200)

        try:
            error_message = response.json().get("error", "")
            self.assertIn(
                "Request JSON size",
                error_message,
            )
            self.assertIn(
                "exceeds the maximum allowed value",
                error_message,
            )
        except ValueError:
            self.fail("Response is not valid JSON")

    def test_invalid_small_cshm_handle(self):
        byte_size = 64
        device_id = 0
        shm_name = "invalid_shm"

        raw_handle = self._generate_mock_base64_raw_handle(byte_size)
        response = self._send_register_cshm_request(
            raw_handle, device_id, byte_size, shm_name
        )
        self.assertNotEqual(response.status_code, 200)

        try:
            error_message = response.json().get("error", "")
            self.assertIn(
                "'raw_handle' must be a valid base64 encoded cudaIpcMemHandle_t",
                error_message,
            )
        except ValueError:
            self.fail("Response is not valid JSON")

    def test_valid_cshm_handle(self):
        byte_size = 64
        device_id = 0
        shm_name = "test_shm"

        # Create valid shared memory
        self.valid_shm_handle = cshm.create_shared_memory_region(
            shm_name, byte_size, device_id
        )
        raw_handle = cshm.get_raw_handle(self.valid_shm_handle)

        response = self._send_register_cshm_request(
            raw_handle, device_id, byte_size, shm_name
        )
        self.assertEqual(response.status_code, 200)

        # Verify shared memory status
        status = self.client.get_cuda_shared_memory_status()
        self.assertEqual(len(status), 1)
        self.assertEqual(status[0]["name"], shm_name)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_cuda_shared_memory/test.sh
================================================
#!/bin/bash
# Copyright 2019-2024, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

export CUDA_VISIBLE_DEVICES=0

CLIENT_LOG="./client.log"
SHM_TEST=cuda_shared_memory_test.py

TEST_RESULT_FILE='test_results.txt'
SERVER=/opt/tritonserver/bin/tritonserver
source ../common/util.sh

RET=0
rm -fr *.log

for i in \
        test_invalid_create_shm \
        test_valid_create_set_register \
        test_unregister_before_register \
        test_unregister_after_register \
        test_reregister_after_register \
        test_unregister_after_inference \
        test_register_after_inference \
        test_too_big_shm \
        test_mixed_raw_shm \
        test_unregisterall \
        test_register_out_of_bound \
        test_infer_offset_out_of_bound \
        test_infer_byte_size_out_of_bound; do
    for client_type in http grpc; do
        SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1"
        SERVER_LOG="./$i.$client_type.server.log"
        run_server
        if [ "$SERVER_PID" == "0" ]; then
            echo -e "\n***\n*** Failed to start $SERVER\n***"
            cat $SERVER_LOG
            exit 1
        fi

        export CLIENT_TYPE=$client_type
        echo "Test: $i, client type: $client_type" >>$CLIENT_LOG

        set +e
        python $SHM_TEST CudaSharedMemoryTest.$i >>$CLIENT_LOG 2>&1
        if [ $? -ne 0 ]; then
            echo -e "\n***\n*** Test Failed\n***"
            RET=1
        else
            check_test_results $TEST_RESULT_FILE 1
            if [ $? -ne 0 ]; then
                cat $CLIENT_LOG
                echo -e "\n***\n*** Test Result Verification Failed\n***"
                RET=1
            fi
        fi
        set -e

        kill $SERVER_PID
        wait $SERVER_PID
    done
done

for i in \
        test_exceeds_cshm_handle_size_limit \
        test_invalid_small_cshm_handle \
        test_valid_cshm_handle; do
    SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1"
    SERVER_LOG="./$i.server.log"
    CLIENT_LOG="./$i.client.log"
    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi
    echo "Test: $i, client type: HTTP" >>$CLIENT_LOG
    set +e
    python $SHM_TEST CudaSharedMemoryTestRawHttpRequest.$i >>$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Test Failed\n***"
        RET=1
    else
        check_test_results $TEST_RESULT_FILE 1
        if [ $? -ne 0 ]; then
            cat $CLIENT_LOG
            echo -e "\n***\n*** Test Result Verification Failed\n***"
            RET=1
        fi
    fi
    set -e
    kill $SERVER_PID
    wait $SERVER_PID
done

mkdir -p python_models/simple/1/
cp ../python_models/execute_delayed_model/model.py ./python_models/simple/1/
cp ../python_models/execute_delayed_model/config.pbtxt ./python_models/simple/
sed -i 's/KIND_CPU/KIND_GPU/g' ./python_models/simple/config.pbtxt


for test_case in \
        test_unregister_shm_during_inference_single_req \
        test_unregister_shm_during_inference_multiple_req \
        test_unregister_shm_after_inference; do
    for client_type in http grpc; do
        SERVER_ARGS="--model-repository=`pwd`/python_models --log-verbose=1 ${SERVER_ARGS_EXTRA}"
        SERVER_LOG="./${test_case}_${client_type}.server.log"
        run_server
        if [ "$SERVER_PID" == "0" ]; then
            echo -e "\n***\n*** Failed to start $SERVER\n***"
            cat $SERVER_LOG
            exit 1
        fi

        export CLIENT_TYPE=$client_type
        CLIENT_LOG="./${test_case}_${client_type}.client.log"
        set +e
        python3 $SHM_TEST "TestCudaSharedMemoryUnregister.${test_case}_${client_type}" >>"$CLIENT_LOG" 2>&1
        if [ $? -ne 0 ]; then
            cat $CLIENT_LOG
            echo -e "\n***\n*** Test Failed - ${test_case}_${client_type}\n***"
            RET=1
        else
            check_test_results $TEST_RESULT_FILE 1
            if [ $? -ne 0 ]; then
                cat $TEST_RESULT_FILE
                echo -e "\n***\n*** Test Result Verification Failed - ${test_case}_${client_type}\n***"
                RET=1
            fi
        fi

        kill $SERVER_PID
        wait $SERVER_PID
        if [ $? -ne 0 ]; then
            echo -e "\n***\n*** Test Server shut down non-gracefully\n***"
            RET=1
        fi
        set -e
    done
done

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test Failed\n***"
fi

exit $RET


================================================
FILE: qa/L0_custom_model_config/test.sh
================================================
#!/bin/bash
# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

DATADIR="/data/inferenceserver/${REPO_VERSION}"
CLIENT_LOG="./client.log"
SERVER_LOG="./inference_server.log"

SERVER=/opt/tritonserver/bin/tritonserver
source ../common/util.sh

RET=0
rm -fr *.log

rm -fr models && mkdir models
cp -r $DATADIR/qa_model_repository/onnx_nobatch_float32_float32_float32 models/.
mkdir models/onnx_nobatch_float32_float32_float32/configs

test_custom_config()
{
    VERSION=$@

    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    set +e
    code=`curl -s -w %{http_code} -o ./curl.out localhost:8000/v2/models/onnx_nobatch_float32_float32_float32/config`
    set -e
    if [ "$code" != "200" ]; then
        cat $out.out
        echo -e "\n***\n*** Test Failed to GET model configuration\n***"
        RET=1
    fi

    matches=`grep -o "\"version_policy\":{\"specific\":{\"versions\":\[$VERSION\]}}" curl.out | wc -l`
    if [ $matches -ne 1 ]; then
        cat curl.out
        echo -e "\n***\n*** Expected 1 version_policy:specific:versions, got $matches\n***"
        RET=1
    fi

    kill $SERVER_PID
    wait $SERVER_PID
}

# Prepare the file structure
VERSION_DEFAULT="1,3"
VERSION_H100="1"
VERSION_V100="2"
VERSION_CUSTOM="3"

# Distinguish configs with different model versions
(cd models/onnx_nobatch_float32_float32_float32 && \
     sed -i "s/^version_policy:.*/version_policy: { specific: { versions: [$VERSION_DEFAULT] }}/" config.pbtxt)
(cd models/onnx_nobatch_float32_float32_float32 && \
     cp config.pbtxt configs/h100.pbtxt && \
     sed -i "s/^version_policy:.*/version_policy: { specific: { versions: [$VERSION_H100] }}/" configs/h100.pbtxt)
(cd models/onnx_nobatch_float32_float32_float32 && \
     cp config.pbtxt configs/v100.pbtxt && \
     sed -i "s/^version_policy:.*/version_policy: { specific: { versions: [$VERSION_V100] }}/" configs/v100.pbtxt)
(cd models/onnx_nobatch_float32_float32_float32 && \
     cp config.pbtxt configs/config.pbtxt && \
     sed -i "s/^version_policy:.*/version_policy: { specific: { versions: [$VERSION_CUSTOM] }}/" configs/config.pbtxt)

# Test default model config
SERVER_ARGS="--model-repository=`pwd`/models"
test_custom_config $VERSION_DEFAULT

# Test model-config-name=h100
SERVER_ARGS="--model-repository=`pwd`/models --model-config-name=h100"
test_custom_config $VERSION_H100

# Test model-config-name=v100
SERVER_ARGS="--model-repository=`pwd`/models --model-config-name=v100"
test_custom_config $VERSION_V100

# Test model-config-name=config
SERVER_ARGS="--model-repository=`pwd`/models --model-config-name=config"
test_custom_config $VERSION_CUSTOM

# Test model-config-name=h200. Expect fall back to default config since h200 config does not exist.
SERVER_ARGS="--model-repository=`pwd`/models --model-config-name=h200"
test_custom_config $VERSION_DEFAULT

# Test model-config-name=
SERVER_ARGS="--model-repository=`pwd`/models --model-config-name="
run_server
if [ "$SERVER_PID" != "0" ]; then
    echo -e "\n***\n*** Failed: $SERVER started successfully when it was expected to fail\n***"
    cat $SERVER_LOG
    RET=1

    kill $SERVER_PID
    wait $SERVER_PID
fi

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test Failed\n***"
fi

exit $RET


================================================
FILE: qa/L0_custom_ops/mod_op_test.py
================================================
#!/usr/bin/python

# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import sys
from builtins import range

import numpy as np
import tritonclient.grpc as grpcclient
import tritonclient.http as httpclient
from tritonclient.utils import np_to_triton_dtype

FLAGS = None

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-v",
        "--verbose",
        action="store_true",
        required=False,
        default=False,
        help="Enable verbose output",
    )
    parser.add_argument(
        "-u",
        "--url",
        type=str,
        required=False,
        default="localhost:8000",
        help="Inference server URL. Default is localhost:8000.",
    )
    parser.add_argument(
        "-i",
        "--protocol",
        type=str,
        required=False,
        default="http",
        help='Protocol ("http"/"grpc") used to '
        + 'communicate with inference service. Default is "http".',
    )
    parser.add_argument("-m", "--model", type=str, required=True, help="Name of model.")

    FLAGS = parser.parse_args()
    if (FLAGS.protocol != "http") and (FLAGS.protocol != "grpc"):
        print(
            'unexpected protocol "{}", expects "http" or "grpc"'.format(FLAGS.protocol)
        )
        exit(1)

    client_util = httpclient if FLAGS.protocol == "http" else grpcclient

    # Run the custom_modulo model, which depends on a custom mod operation
    model_name = FLAGS.model
    elements = 10

    # Create the inference context for the model.
    client = client_util.InferenceServerClient(FLAGS.url, verbose=FLAGS.verbose)

    # Create the data for one input tensor.
    input_data = []
    input_data.append(np.arange(start=1, stop=1 + elements, dtype=np.float32))
    input_data.append(np.array([2] * elements, dtype=np.float32))

    inputs = []
    for i in range(len(input_data)):
        inputs.append(
            client_util.InferInput(
                "INPUT__{}".format(i),
                input_data[0].shape,
                np_to_triton_dtype(input_data[0].dtype),
            )
        )
        inputs[i].set_data_from_numpy(input_data[i])

    results = client.infer(model_name, inputs)

    # We expect 1 result of size 10 with alternating 1 and 0.
    output_data = results.as_numpy("OUTPUT__0")
    if output_data is None:
        print("error: expected 'OUTPUT__0'")
        sys.exit(1)

    for i in range(elements):
        print(
            str(i)
            + ": "
            + str(input_data[0][i])
            + " % "
            + str(input_data[1][i])
            + " = "
            + str(output_data[i])
        )
        if (input_data[0][i] % input_data[1][i]) != output_data[i]:
            print("error: incorrect value")
            sys.exit(1)


================================================
FILE: qa/L0_custom_ops/onnx_op_test.py
================================================
#!/usr/bin/python

# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import sys
from builtins import range

import numpy as np
import tritonclient.grpc as grpcclient
import tritonclient.http as httpclient
from tritonclient.utils import np_to_triton_dtype

FLAGS = None

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-v",
        "--verbose",
        action="store_true",
        required=False,
        default=False,
        help="Enable verbose output",
    )
    parser.add_argument(
        "-u",
        "--url",
        type=str,
        required=False,
        default="localhost:8000",
        help="Inference server URL. Default is localhost:8000.",
    )
    parser.add_argument(
        "-i",
        "--protocol",
        type=str,
        required=False,
        default="http",
        help='Protocol ("http"/"grpc") used to '
        + 'communicate with inference service. Default is "http".',
    )
    parser.add_argument("-m", "--model", type=str, required=True, help="Name of model.")

    FLAGS = parser.parse_args()
    if (FLAGS.protocol != "http") and (FLAGS.protocol != "grpc"):
        print(
            'unexpected protocol "{}", expects "http" or "grpc"'.format(FLAGS.protocol)
        )
        exit(1)

    client_util = httpclient if FLAGS.protocol == "http" else grpcclient

    # Run the custom_modulo model, which depends on a custom mod operation
    model_name = FLAGS.model
    shape = (3, 5)
    dtype = np.float32

    # Create the inference context for the model.
    client = client_util.InferenceServerClient(FLAGS.url, verbose=FLAGS.verbose)

    # Create the data for one input tensor.
    input_data = []
    input_data.append(np.ones((3, 5), dtype=np.float32))
    input_data.append(np.ones((3, 5), dtype=np.float32))

    inputs = []
    for i in range(len(input_data)):
        inputs.append(
            client_util.InferInput(
                "input_{}".format(i + 1), shape, np_to_triton_dtype(dtype)
            )
        )
        inputs[i].set_data_from_numpy(input_data[i])

    results = client.infer(model_name, inputs)

    # We expect 1 result of size 10 with alternating 1 and 0.
    output_data = results.as_numpy("output")
    if output_data is None:
        print("error: expected 'output'")
        sys.exit(1)

    for i in range(3):
        for j in range(5):
            print(
                str(input_data[0][i][j])
                + " + "
                + str(input_data[1][i][j])
                + " = "
                + str(output_data[i][j])
            )
            if (input_data[0][i][j] + input_data[1][i][j]) != output_data[i][j]:
                print("error: incorrect value")
                sys.exit(1)


================================================
FILE: qa/L0_custom_ops/test.sh
================================================
#!/bin/bash
# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

CLIENT_LOG="./client.log"
MOD_OP_TEST=mod_op_test.py
VISION_OP_TEST=vision_op_test.py
ONNX_OP_TEST=onnx_op_test.py

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_LOG="./inference_server.log"
source ../common/util.sh

rm -f $SERVER_LOG $CLIENT_LOG

RET=0

# Must set LD_LIBRARY_PATH just for the server launch so that the
# custom operations can find libtorch.so and other pytorch dependencies.
LD_LIBRARY_PATH=/opt/tritonserver/backends/pytorch:$LD_LIBRARY_PATH

# Pytorch
SERVER_ARGS="--model-repository=/data/inferenceserver/${REPO_VERSION}/qa_custom_ops/libtorch_custom_ops"
# FIXME: Pre-loading the python library system to satisfy the symbol definitions
# as the custom op library is built with different python version within
# pytorch container. See DLIS-4152.
SERVER_LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libpython3.12.so.1:/data/inferenceserver/${REPO_VERSION}/qa_custom_ops/libtorch_custom_ops/libtorch_modulo/custom_modulo.so"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

python $MOD_OP_TEST -v -m libtorch_modulo >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

python $VISION_OP_TEST -v -m libtorch_visionop >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

set -e

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
fi

kill $SERVER_PID
wait $SERVER_PID

# ONNX
rm -rf onnx_custom_ops && \
    mkdir -p onnx_custom_ops/custom_op/1 && \
    cp custom_op_test.onnx onnx_custom_ops/custom_op/1/model.onnx

touch onnx_custom_ops/custom_op/config.pbtxt
echo "name: \"custom_op\"" >> onnx_custom_ops/custom_op/config.pbtxt && \
echo "platform: \"onnxruntime_onnx\"" >> onnx_custom_ops/custom_op/config.pbtxt && \
echo "max_batch_size: 0" >> onnx_custom_ops/custom_op/config.pbtxt && \
echo "model_operations { op_library_filename: \"./libcustom_op_library.so\" }" >> onnx_custom_ops/custom_op/config.pbtxt

SERVER_ARGS="--model-repository=onnx_custom_ops --strict-model-config=false"
SERVER_LD_PRELOAD=""
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

python $ONNX_OP_TEST -v -m custom_op >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

set -e

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
fi

kill $SERVER_PID
wait $SERVER_PID

exit $RET


================================================
FILE: qa/L0_custom_ops/vision_op_test.py
================================================
#!/usr/bin/python

# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import sys

import numpy as np
import tritonclient.grpc as grpcclient
import tritonclient.http as httpclient
from tritonclient.utils import np_to_triton_dtype

FLAGS = None

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-v",
        "--verbose",
        action="store_true",
        required=False,
        default=False,
        help="Enable verbose output",
    )
    parser.add_argument(
        "-u",
        "--url",
        type=str,
        required=False,
        default="localhost:8000",
        help="Inference server URL. Default is localhost:8000.",
    )
    parser.add_argument(
        "-i",
        "--protocol",
        type=str,
        required=False,
        default="http",
        help='Protocol ("http"/"grpc") used to '
        + 'communicate with inference service. Default is "http".',
    )
    parser.add_argument("-m", "--model", type=str, required=True, help="Name of model.")

    FLAGS = parser.parse_args()
    if (FLAGS.protocol != "http") and (FLAGS.protocol != "grpc"):
        print(
            'unexpected protocol "{}", expects "http" or "grpc"'.format(FLAGS.protocol)
        )
        exit(1)

    client_util = httpclient if FLAGS.protocol == "http" else grpcclient

    # Run the libtorch_visionop model, which depends on a torchvision custom operation
    model_name = FLAGS.model

    # Create the inference context for the model.
    client = client_util.InferenceServerClient(FLAGS.url, verbose=FLAGS.verbose)

    # Create the data for the input tensors.
    input_data = np.random.rand(1, 3, 10, 10).astype(np.float32)
    box_data = np.array([[1, 1, 2, 3, 4]]).astype(np.float32)

    inputs = []
    inputs.append(
        client_util.InferInput(
            "INPUT__0", input_data.shape, np_to_triton_dtype(input_data.dtype)
        )
    )
    inputs[0].set_data_from_numpy(input_data)
    inputs.append(
        client_util.InferInput(
            "INPUT__1", box_data.shape, np_to_triton_dtype(box_data.dtype)
        )
    )
    inputs[1].set_data_from_numpy(box_data)

    results = client.infer(model_name, inputs)

    # We expect 1 result of shape [1, 3, 5, 5].
    output_data = results.as_numpy("OUTPUT__0")
    if output_data is None:
        print("error: expected 'OUTPUT__0'")
        sys.exit(1)

    if output_data.shape != (1, 3, 5, 5):
        print("error: incorrect shape " + str(output_data.shape) + "for 'OUTPUT__0'")
        sys.exit(1)


================================================
FILE: qa/L0_data_compression/test.sh
================================================
#!/bin/bash
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

source ../common/util.sh

RET=0

TEST_LOG="./data_compressor_test.log"
DATA_COMPRESSOR_TEST=./data_compressor_test


export CUDA_VISIBLE_DEVICES=0

rm -fr *.log *_data

set +e

echo "All work and no play makes Jack a dull boy" >> raw_data
python3 validation.py generate_compressed_data

LD_LIBRARY_PATH=/opt/tritonserver/lib:${LD_LIBRARY_PATH} $DATA_COMPRESSOR_TEST >>$TEST_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Data Compression Test Failed\n***"
    RET=1
fi

python3 validation.py validate_compressed_data
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Data Compression Failed\n***"
    RET=1
fi

set -e

# End-to-end testing with simple model
function run_data_compression_infer_client() {
    local client_path=$1
    local request_algorithm=$2
    local response_algorithm=$3
    local log_path=$4

    local python_or_cpp=`echo -n "$client_path" | tail -c 3`
    if [ "$python_or_cpp" == ".py" ]; then
        local infer_client="python $client_path"
        local request_cmd_option="--request-compression-algorithm $request_algorithm"
        local response_cmd_option="--response-compression-algorithm $response_algorithm"
    else  # C++ if not end with ".py"
        local infer_client=$client_path
        local request_cmd_option="-i $request_algorithm"
        local response_cmd_option="-o $response_algorithm"
    fi

    local cmd_options="-v"
    if [ "$request_algorithm" != "" ]; then
        cmd_options+=" $request_cmd_option"
    fi
    if [ "$response_algorithm" != "" ]; then
        cmd_options+=" $response_cmd_option"
    fi

    $infer_client $cmd_options >> $log_path 2>&1
    return $?
}

SIMPLE_INFER_CLIENT_PY=../clients/simple_http_infer_client.py
SIMPLE_AIO_INFER_CLIENT_PY=../clients/simple_http_aio_infer_client.py
SIMPLE_INFER_CLIENT=../clients/simple_http_infer_client

CLIENT_LOG=`pwd`/client.log
DATADIR=`pwd`/models
SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=$DATADIR"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

for INFER_CLIENT in "$SIMPLE_INFER_CLIENT_PY" "$SIMPLE_AIO_INFER_CLIENT_PY" "$SIMPLE_INFER_CLIENT"; do
    for REQUEST_ALGORITHM in "deflate" "gzip" ""; do
        for RESPONSE_ALGORITHM in "deflate" "gzip" ""; do
            if [ "$REQUEST_ALGORITHM" == "$RESPONSE_ALGORITHM" ]; then
                continue
            fi

            set +e
            run_data_compression_infer_client "$INFER_CLIENT" "$REQUEST_ALGORITHM" "$RESPONSE_ALGORITHM" "$CLIENT_LOG"
            if [ $? -ne 0 ]; then
                RET=1
            fi
            set -e
        done
    done
done

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    cat $TEST_LOG
    cat $SERVER_LOG
    cat ${CLIENT_LOG}
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_data_compression/validation.py
================================================
#!/usr/bin/env python3

# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys


def generate_compressed_data():
    with open("raw_data", "rb") as f:
        import gzip
        import zlib

        raw_data = f.read()
        with open("deflate_compressed_data", "wb") as of:
            of.write(zlib.compress(raw_data))
        with open("gzip_compressed_data", "wb") as of:
            of.write(gzip.compress(raw_data))


def validate_compressed_data():
    with open("raw_data", "rb") as f:
        import gzip
        import zlib

        raw_data = f.read()
        with open("generated_deflate_compressed_data", "rb") as cf:
            decompressed_data = zlib.decompress(cf.read())
            if decompressed_data != raw_data:
                exit(1)
        with open("generated_gzip_compressed_data", "rb") as cf:
            decompressed_data = gzip.decompress(cf.read())
            if decompressed_data != raw_data:
                exit(1)


if __name__ == "__main__":
    globals()[sys.argv[1]]()


================================================
FILE: qa/L0_decoupled/decoupled_test.py
================================================
#!/usr/bin/env python3

# Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import os
import queue
import threading
import time
import unittest
from functools import partial

import numpy as np
import test_util as tu
import tritonclient.grpc as grpcclient
import tritonclient.http as httpclient
from tritonclient.utils import InferenceServerException


class UserData:
    def __init__(self):
        self._response_queue = queue.Queue()


def callback(user_data, result, error):
    if error:
        user_data._response_queue.put(error)
    else:
        user_data._response_queue.put(result)


class DecoupledTest(tu.TestResultCollector):
    def setUp(self):
        self.trials_ = [
            ("repeat_int32", None),
            ("simple_repeat", None),
            ("sequence_repeat", None),
            ("fan_repeat", self._fan_validate),
            ("repeat_square", self._nested_validate),
            ("nested_square", self._nested_validate),
        ]
        self.model_name_ = "repeat_int32"

        self.inputs_ = []
        self.inputs_.append(grpcclient.InferInput("IN", [1], "INT32"))
        self.inputs_.append(grpcclient.InferInput("DELAY", [1], "UINT32"))
        self.inputs_.append(grpcclient.InferInput("WAIT", [1], "UINT32"))

        self.outputs_ = []
        self.outputs_.append(grpcclient.InferRequestedOutput("OUT"))
        self.outputs_.append(grpcclient.InferRequestedOutput("IDX"))
        # Some trials only expect a subset of outputs
        self.requested_outputs_ = self.outputs_

    # Client can receive a "triton_final_response" response parameter
    # from Triton server that indicates when a response is the final response for
    # its request.
    #
    # For non-decoupled models, there is a 1:1 request:response ratio, so every
    # response is the final response, and this parameter is unnecessary.
    #
    # For decoupled models, there is a 1:N request:response ratio, so there may be
    # more one response before receiving the "final" response.
    #
    # However, decoupled models have the unique property in that they can return
    # a flags-only response to the server to indicate completion, which is not
    # returned to the client by default (See TRITONBACKEND_ResponseFactorySendFlags).
    #
    # To forward this flags-only response to the client, users must opt-in to this
    # behavior by adding the following argument:
    # client.async_stream_infer(..., enable_empty_final_response=True).
    #
    # If the decoupled backend/model always sends the final response flag along
    # with a non-null response, no opt-in is needed.
    #
    # With this behavior, the client can programmatically detect when all responses
    # for an individual request have been received without knowing the expected
    # number of responses in advance and without closing the stream.
    def _stream_infer_with_params(
        self,
        request_count,
        request_delay,
        _,
        delay_data,
        delay_factor,
        user_data,
        result_dict,
    ):
        with grpcclient.InferenceServerClient(
            url="localhost:8001", verbose=True
        ) as triton_client:
            # Establish stream
            if "TRITONSERVER_GRPC_STATUS_FLAG" in os.environ:
                metadata = {"triton_grpc_error": "true"}
                triton_client.start_stream(
                    callback=partial(callback, user_data), headers=metadata
                )
            else:
                triton_client.start_stream(callback=partial(callback, user_data))
            # Send specified many requests in parallel
            for i in range(request_count):
                time.sleep((request_delay / 1000))
                self.inputs_[1].set_data_from_numpy(delay_data)
                triton_client.async_stream_infer(
                    model_name=self.model_name_,
                    inputs=self.inputs_,
                    request_id=str(i),
                    outputs=self.requested_outputs_,
                    # Opt-in to receiving flags-only responses from model/backend
                    # to help detect final responses for decoupled models.
                    enable_empty_final_response=True,
                )
                # Update delay input in accordance with the scaling factor
                delay_data = delay_data * delay_factor
                delay_data = delay_data.astype(np.uint32)

            # Retrieve results...
            recv_count = 0
            completed_requests = 0
            while completed_requests < request_count:
                data_item = user_data._response_queue.get()
                if type(data_item) == InferenceServerException:
                    raise data_item
                else:
                    response = data_item.get_response()
                    # Request IDs should generally be provided with each request
                    # to associate decoupled responses with their requests.
                    if not response.id:
                        raise ValueError(
                            "No response id found. Was a request_id provided?"
                        )

                    # Detect final response. Parameters are oneof and we expect bool_param
                    if response.parameters.get("triton_final_response").bool_param:
                        completed_requests += 1

                    # Only process non-empty response, ignore if empty (no outputs)
                    if response.outputs:
                        if response.id not in result_dict:
                            result_dict[response.id] = []
                        result_dict[response.id].append((recv_count, data_item))
                        recv_count += 1

    def _stream_infer(
        self,
        request_count,
        request_delay,
        expected_count,
        delay_data,
        delay_factor,
        user_data,
        result_dict,
    ):
        with grpcclient.InferenceServerClient(
            url="localhost:8001", verbose=True
        ) as triton_client:
            # Establish stream
            if "TRITONSERVER_GRPC_STATUS_FLAG" in os.environ:
                metadata = {"triton_grpc_error": "true"}
                triton_client.start_stream(
                    callback=partial(callback, user_data), headers=metadata
                )
            else:
                triton_client.start_stream(callback=partial(callback, user_data))
            # Send specified many requests in parallel
            for i in range(request_count):
                time.sleep((request_delay / 1000))
                self.inputs_[1].set_data_from_numpy(delay_data)
                triton_client.async_stream_infer(
                    model_name=self.model_name_,
                    inputs=self.inputs_,
                    request_id=str(i),
                    outputs=self.requested_outputs_,
                )
                # Update delay input in accordance with the scaling factor
                delay_data = delay_data * delay_factor
                delay_data = delay_data.astype(np.uint32)

            # Retrieve results...
            recv_count = 0
            while recv_count < expected_count:
                data_item = user_data._response_queue.get()
                if type(data_item) == InferenceServerException:
                    raise data_item
                else:
                    this_id = data_item.get_response().id
                    if this_id not in result_dict:
                        result_dict[this_id] = []
                    result_dict[this_id].append((recv_count, data_item))

                recv_count += 1

    def _fan_validate(self, result_list, data_offset, repeat_count):
        # fan_repeat returns "2 * data_offset" as result
        self.assertEqual(len(result_list), repeat_count)
        expected_data = 2 * data_offset
        for j in range(len(result_list)):
            this_data = result_list[j][1].as_numpy("OUT")
            self.assertEqual(len(this_data), 1)
            self.assertEqual(this_data[0], expected_data)
            expected_data += 2

    def _nested_validate(self, result_list, data_offset, repeat_count):
        # if repeat model returns repeat result n, repeat_square-like model
        # will return the same result n times
        expected_len = sum(x for x in range(data_offset, data_offset + repeat_count))
        self.assertEqual(len(result_list), expected_len)
        expected_data = data_offset
        expected_count = expected_data
        for j in range(len(result_list)):
            this_data = result_list[j][1].as_numpy("OUT")
            self.assertEqual(len(this_data), 1)
            self.assertEqual(this_data[0], expected_data)
            expected_count -= 1
            if expected_count == 0:
                expected_data += 1
                expected_count = expected_data

    def _decoupled_infer(
        self,
        request_count,
        request_delay=0,
        repeat_count=1,
        data_offset=100,
        delay_time=1000,
        delay_factor=1,
        wait_time=500,
        order_sequence=None,
        validate_fn=None,
    ):
        # Initialize data for IN
        input_data = np.arange(
            start=data_offset, stop=data_offset + repeat_count, dtype=np.int32
        )
        self.inputs_[0].set_shape([repeat_count])
        self.inputs_[0].set_data_from_numpy(input_data)

        # Initialize data for DELAY
        delay_data = (np.ones([repeat_count], dtype=np.uint32)) * delay_time
        self.inputs_[1].set_shape([repeat_count])

        # Initialize data for WAIT
        wait_data = np.array([wait_time], dtype=np.uint32)
        self.inputs_[2].set_data_from_numpy(wait_data)

        # use validate_fn to differentiate requested outputs
        self.requested_outputs_ = (
            self.outputs_ if validate_fn is None else self.outputs_[0:1]
        )

        for infer_helper in [self._stream_infer, self._stream_infer_with_params]:
            user_data = UserData()
            result_dict = {}

            try:
                if "square" not in self.model_name_:
                    expected_count = repeat_count * request_count
                else:
                    expected_count = (
                        sum(x for x in range(data_offset, data_offset + repeat_count))
                        * request_count
                    )
                infer_helper(
                    request_count,
                    request_delay,
                    expected_count,
                    delay_data,
                    delay_factor,
                    user_data,
                    result_dict,
                )
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

            # Validate the results..
            for i in range(request_count):
                this_id = str(i)
                if repeat_count != 0 and this_id not in result_dict.keys():
                    self.assertTrue(
                        False, "response for request id {} not received".format(this_id)
                    )
                elif repeat_count == 0 and this_id in result_dict.keys():
                    self.assertTrue(
                        False,
                        "received unexpected response for request id {}".format(
                            this_id
                        ),
                    )
                if repeat_count != 0:
                    if validate_fn is None:
                        self.assertEqual(len(result_dict[this_id]), repeat_count)
                        expected_data = data_offset
                        result_list = result_dict[this_id]
                        for j in range(len(result_list)):
                            if order_sequence is not None:
                                self.assertEqual(
                                    result_list[j][0], order_sequence[i][j]
                                )
                            this_data = result_list[j][1].as_numpy("OUT")
                            self.assertEqual(len(this_data), 1)
                            self.assertEqual(this_data[0], expected_data)
                            this_idx = result_list[j][1].as_numpy("IDX")
                            self.assertEqual(len(this_idx), 1)
                            self.assertEqual(this_idx[0], j)
                            expected_data += 1
                    else:
                        validate_fn(result_dict[this_id], data_offset, repeat_count)

    def test_one_to_none(self):
        # Test cases where each request generates no response.
        # Note the name of the test one_to_none implies the
        # mapping between requests and responses.

        for trial in self.trials_:
            self.model_name_ = trial[0]
            # Single request case
            self._decoupled_infer(request_count=1, repeat_count=0, validate_fn=trial[1])
            # Multiple request case
            self._decoupled_infer(request_count=5, repeat_count=0, validate_fn=trial[1])

    def test_one_to_one(self):
        # Test cases where each request generates single response.
        # Note the name of the test one_to_one implies the
        # mapping between requests and responses.

        for trial in self.trials_:
            self.model_name_ = trial[0]
            # Single request case
            # Release request before the response is delivered
            self._decoupled_infer(request_count=1, wait_time=500, validate_fn=trial[1])
            # Release request after the response is delivered
            self._decoupled_infer(request_count=1, wait_time=2000, validate_fn=trial[1])

            # Multiple request case
            # Release request before the response is delivered
            self._decoupled_infer(request_count=5, wait_time=500, validate_fn=trial[1])
            # Release request after the response is delivered
            self._decoupled_infer(request_count=5, wait_time=2000, validate_fn=trial[1])

    def test_one_to_many(self):
        # Test cases where each request generates multiple response.
        # Note the name of the test one_to_many implies the
        # mapping between requests and responses.

        self.assertFalse("TRITONSERVER_DELAY_GRPC_RESPONSE" in os.environ)

        for trial in self.trials_:
            self.model_name_ = trial[0]
            # Single request case
            # Release request before the first response is delivered
            self._decoupled_infer(
                request_count=1, repeat_count=5, wait_time=500, validate_fn=trial[1]
            )
            # Release request when the responses are getting delivered
            self._decoupled_infer(
                request_count=1, repeat_count=5, wait_time=2000, validate_fn=trial[1]
            )
            # Release request after all the responses are delivered
            self._decoupled_infer(
                request_count=1, repeat_count=5, wait_time=10000, validate_fn=trial[1]
            )

            # Multiple request case
            # Release request before the first response is delivered
            self._decoupled_infer(
                request_count=5, repeat_count=5, wait_time=500, validate_fn=trial[1]
            )
            # Release request when the responses are getting delivered
            self._decoupled_infer(
                request_count=5, repeat_count=5, wait_time=2000, validate_fn=trial[1]
            )
            # Release request after all the responses are delivered
            self._decoupled_infer(
                request_count=5, repeat_count=5, wait_time=10000, validate_fn=trial[1]
            )

    def test_one_to_multi_many(self):
        # Test cases where each request generates multiple response but the
        # responses are delayed so as to stress the control path handling the
        # queued responses.

        self.assertTrue("TRITONSERVER_DELAY_GRPC_RESPONSE" in os.environ)

        for trial in self.trials_:
            self.model_name_ = trial[0]
            # Single request case
            # Release request before the first response is delivered
            self._decoupled_infer(
                request_count=1, repeat_count=5, wait_time=500, validate_fn=trial[1]
            )
            # Release request when the responses are getting delivered
            self._decoupled_infer(
                request_count=1, repeat_count=5, wait_time=8000, validate_fn=trial[1]
            )
            # Release request after all the responses are delivered
            self._decoupled_infer(
                request_count=1, repeat_count=5, wait_time=20000, validate_fn=trial[1]
            )

            # Multiple request case
            # Release request before the first response is delivered
            self._decoupled_infer(
                request_count=5, repeat_count=5, wait_time=500, validate_fn=trial[1]
            )
            # Release request when the responses are getting delivered
            self._decoupled_infer(
                request_count=5, repeat_count=5, wait_time=3000, validate_fn=trial[1]
            )
            # Release request after all the responses are delivered
            self._decoupled_infer(
                request_count=5, repeat_count=5, wait_time=10000, validate_fn=trial[1]
            )

    def test_response_order(self):
        # Test the expected response order for different cases

        self.assertFalse("TRITONSERVER_DELAY_GRPC_RESPONSE" in os.environ)

        for trial in self.trials_:
            self.model_name_ = trial[0]

            # Case 1: Interleaved responses
            self._decoupled_infer(
                request_count=2,
                request_delay=500,
                repeat_count=4,
                order_sequence=[[0, 2, 4, 6], [1, 3, 5, 7]],
                validate_fn=trial[1],
            )

            # Case 2: All responses of second request delivered before any
            # response from the first
            self._decoupled_infer(
                request_count=2,
                request_delay=500,
                repeat_count=4,
                delay_time=2000,
                delay_factor=0.1,
                order_sequence=[[4, 5, 6, 7], [0, 1, 2, 3]],
                validate_fn=trial[1],
            )

            # Case 3: Similar to Case 2, but the second request is generated
            # after the first response from first request is received
            self._decoupled_infer(
                request_count=2,
                request_delay=2500,
                repeat_count=4,
                delay_time=2000,
                delay_factor=0.1,
                order_sequence=[[0, 5, 6, 7], [1, 2, 3, 4]],
                validate_fn=trial[1],
            )

            # Case 4: All the responses of second requests are dleivered after
            # all the responses from first requests are received
            self._decoupled_infer(
                request_count=2,
                request_delay=100,
                repeat_count=4,
                delay_time=500,
                delay_factor=10,
                order_sequence=[[0, 1, 2, 3], [4, 5, 6, 7]],
                validate_fn=trial[1],
            )

            # Case 5: Similar to Case 4, but the second request is generated
            # after the first response from the first request is received
            self._decoupled_infer(
                request_count=2,
                request_delay=750,
                repeat_count=4,
                delay_time=500,
                delay_factor=10,
                order_sequence=[[0, 1, 2, 3], [4, 5, 6, 7]],
                validate_fn=trial[1],
            )

    def _no_streaming_helper(self, protocol):
        data_offset = 100
        repeat_count = 1
        delay_time = 1000
        wait_time = 2000

        input_data = np.arange(
            start=data_offset, stop=data_offset + repeat_count, dtype=np.int32
        )
        delay_data = (np.ones([repeat_count], dtype=np.uint32)) * delay_time
        wait_data = np.array([wait_time], dtype=np.uint32)

        if protocol == "grpc":
            # Use the inputs and outputs from the setUp
            this_inputs = self.inputs_
            this_outputs = self.outputs_
        else:
            this_inputs = []
            this_inputs.append(httpclient.InferInput("IN", [repeat_count], "INT32"))
            this_inputs.append(httpclient.InferInput("DELAY", [1], "UINT32"))
            this_inputs.append(httpclient.InferInput("WAIT", [1], "UINT32"))
            this_outputs = []
            this_outputs.append(httpclient.InferRequestedOutput("OUT"))

        # Initialize data for IN
        this_inputs[0].set_shape([repeat_count])
        this_inputs[0].set_data_from_numpy(input_data)

        # Initialize data for DELAY
        this_inputs[1].set_shape([repeat_count])
        this_inputs[1].set_data_from_numpy(delay_data)

        # Initialize data for WAIT
        this_inputs[2].set_data_from_numpy(wait_data)

        if protocol == "grpc":
            triton_client = grpcclient.InferenceServerClient(
                url="localhost:8001", verbose=True
            )
        else:
            triton_client = httpclient.InferenceServerClient(
                url="localhost:8000", verbose=True
            )

        with self.assertRaises(InferenceServerException) as cm:
            triton_client.infer(
                model_name=self.model_name_, inputs=this_inputs, outputs=this_outputs
            )

        self.assertIn(
            "doesn't support models with decoupled transaction policy",
            str(cm.exception),
        )

    def test_no_streaming(self):
        # Test cases with no streaming inference. Server should give
        # appropriate error in such cases.

        self._no_streaming_helper("grpc")
        self._no_streaming_helper("http")

    def test_wrong_shape(self):
        # Sends mismatching shapes for IN and DELAY. Server should return
        # appropriate error message. The shape of IN is [repeat_count],
        # where as shape of DELAY is [repeat_count + 1].

        data_offset = 100
        repeat_count = 1
        delay_time = 1000
        wait_time = 2000

        input_data = np.arange(
            start=data_offset, stop=data_offset + repeat_count, dtype=np.int32
        )
        delay_data = (np.ones([repeat_count + 1], dtype=np.uint32)) * delay_time
        wait_data = np.array([wait_time], dtype=np.uint32)

        # Initialize data for IN
        self.inputs_[0].set_shape([repeat_count])
        self.inputs_[0].set_data_from_numpy(input_data)

        # Initialize data for DELAY
        self.inputs_[1].set_shape([repeat_count + 1])
        self.inputs_[1].set_data_from_numpy(delay_data)

        # Initialize data for WAIT
        self.inputs_[2].set_data_from_numpy(wait_data)

        user_data = UserData()
        result_dict = {}

        with self.assertRaises(InferenceServerException) as cm:
            self._stream_infer(
                1, 0, repeat_count, delay_data, 1, user_data, result_dict
            )

        self.assertIn(
            "expected IN and DELAY shape to match, got [1] and [2]", str(cm.exception)
        )


class NonDecoupledTest(tu.TestResultCollector):
    def setUp(self):
        self.model_name_ = "repeat_int32"
        self.data_matrix = [
            # ("IN", "DELAY", "WAIT")
            ([1], [0], [0]),
            ([1], [4000], [2000]),
            ([1], [2000], [4000]),
        ]

        # For grpc async infer test
        self.callback_error = None
        self.callback_result = None
        self.callback_invoked_event = threading.Event()

    def _input_data(self, in_value, delay_value, wait_value):
        return {
            "IN": np.array(in_value, dtype=np.int32),
            "DELAY": np.array(delay_value, dtype=np.uint32),
            "WAIT": np.array(wait_value, dtype=np.uint32),
        }

    def _async_callback(self, result, error):
        """Callback for async_infer."""
        self.callback_error = error
        self.callback_result = result
        self.callback_invoked_event.set()

    def test_grpc(self):
        for in_value, delay_value, wait_value in self.data_matrix:
            with self.subTest(IN=in_value, DELAY=delay_value, WAIT=wait_value):
                input_data = self._input_data(in_value, delay_value, wait_value)
                inputs = [
                    grpcclient.InferInput("IN", [1], "INT32").set_data_from_numpy(
                        input_data["IN"]
                    ),
                    grpcclient.InferInput("DELAY", [1], "UINT32").set_data_from_numpy(
                        input_data["DELAY"]
                    ),
                    grpcclient.InferInput("WAIT", [1], "UINT32").set_data_from_numpy(
                        input_data["WAIT"]
                    ),
                ]

                triton_client = grpcclient.InferenceServerClient(
                    url="localhost:8001", verbose=True
                )

                # Expect the inference is successful
                res = triton_client.infer(model_name=self.model_name_, inputs=inputs)
                self.assertEqual(1, res.as_numpy("OUT")[0])
                self.assertEqual(0, res.as_numpy("IDX")[0])

    def test_http(self):
        for in_value, delay_value, wait_value in self.data_matrix:
            with self.subTest(IN=in_value, DELAY=delay_value, WAIT=wait_value):
                input_data = self._input_data(in_value, delay_value, wait_value)
                inputs = [
                    httpclient.InferInput("IN", [1], "INT32").set_data_from_numpy(
                        input_data["IN"]
                    ),
                    httpclient.InferInput("DELAY", [1], "UINT32").set_data_from_numpy(
                        input_data["DELAY"]
                    ),
                    httpclient.InferInput("WAIT", [1], "UINT32").set_data_from_numpy(
                        input_data["WAIT"]
                    ),
                ]

                triton_client = httpclient.InferenceServerClient(
                    url="localhost:8000", verbose=True
                )

                # Expect the inference is successful
                res = triton_client.infer(model_name=self.model_name_, inputs=inputs)
                self.assertEqual(1, res.as_numpy("OUT")[0])
                self.assertEqual(0, res.as_numpy("IDX")[0])

    def test_grpc_async(self):
        for in_value, delay_value, wait_value in self.data_matrix:
            with self.subTest(IN=in_value, DELAY=delay_value, WAIT=wait_value):
                input_data = self._input_data(in_value, delay_value, wait_value)
                inputs = [
                    grpcclient.InferInput("IN", [1], "INT32").set_data_from_numpy(
                        input_data["IN"]
                    ),
                    grpcclient.InferInput("DELAY", [1], "UINT32").set_data_from_numpy(
                        input_data["DELAY"]
                    ),
                    grpcclient.InferInput("WAIT", [1], "UINT32").set_data_from_numpy(
                        input_data["WAIT"]
                    ),
                ]

                triton_client = grpcclient.InferenceServerClient(
                    url="localhost:8001",
                    verbose=True,
                )

                # Clear previous results
                self.callback_error = None
                self.callback_result = None
                self.callback_invoked_event.clear()

                try:
                    triton_client.async_infer(
                        model_name=self.model_name_,
                        inputs=inputs,
                        callback=self._async_callback,
                    )
                except Exception as e:
                    self.fail(f"Failed to initiate async_infer: {e}")
                    continue

                # Wait for the callback to be invoked, with a timeout
                self.assertTrue(
                    self.callback_invoked_event.wait(timeout=10),
                    "Callback not invoked within timeout.",
                )

                # Expect the inference is successful
                self.assertIsNone(
                    self.callback_error, f"Inference failed: {self.callback_error}"
                )
                self.assertIsNotNone(self.callback_result, "Inference result is None.")
                self.assertEqual(1, self.callback_result.as_numpy("OUT")[0])
                self.assertEqual(0, self.callback_result.as_numpy("IDX")[0])

                # Wait and check server/model health
                time.sleep(5)
                self.assertTrue(triton_client.is_model_ready(self.model_name_))

    def test_grpc_async_cancel(self):
        data_matrix = [
            # ("IN", "DELAY", "WAIT")
            ([1], [4000], [2000]),
            ([1], [2000], [4000]),
        ]

        for in_value, delay_value, wait_value in data_matrix:
            with self.subTest(IN=in_value, DELAY=delay_value, WAIT=wait_value):
                input_data = self._input_data(in_value, delay_value, wait_value)
                inputs = [
                    grpcclient.InferInput("IN", [1], "INT32").set_data_from_numpy(
                        input_data["IN"]
                    ),
                    grpcclient.InferInput("DELAY", [1], "UINT32").set_data_from_numpy(
                        input_data["DELAY"]
                    ),
                    grpcclient.InferInput("WAIT", [1], "UINT32").set_data_from_numpy(
                        input_data["WAIT"]
                    ),
                ]

                triton_client = grpcclient.InferenceServerClient(
                    url="localhost:8001",
                    verbose=True,
                )

                # Clear previous results
                self.callback_error = None
                self.callback_result = None
                self.callback_invoked_event.clear()

                request_handle = None
                try:
                    request_handle = triton_client.async_infer(
                        model_name=self.model_name_,
                        inputs=inputs,
                        callback=self._async_callback,
                    )
                except Exception as e:
                    self.fail(f"Failed to initiate async_infer: {e}")
                    continue

                # Allow request to be fully initiated
                time.sleep(0.5)

                # Attempt to cancel the request
                if request_handle:
                    try:
                        request_handle.cancel()
                    except Exception as e:
                        self.fail(f"Error calling request_handle.cancel(): {e}")
                        continue
                else:
                    self.fail("Invalid request_handle, cannot cancel.")
                    continue

                # Wait for the callback to be invoked
                self.assertTrue(
                    self.callback_invoked_event.wait(timeout=10),
                    "Callback not invoked within timeout after cancellation.",
                )

                # Expect the inference is failed
                self.assertIsInstance(
                    self.callback_error,
                    InferenceServerException,
                    f"Unexpected error type: {type(self.callback_error)}",
                )
                self.assertIn(
                    "StatusCode.CANCELLED",
                    self.callback_error.status(),
                )

                # Wait and check server/model health
                time.sleep(5)
                self.assertTrue(triton_client.is_model_ready(self.model_name_))


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_decoupled/models/fan_repeat/config.pbtxt
================================================
# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "fan_repeat"
max_batch_size: 0
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "repeat_int32"
      model_version: -1
      input_map {
        key: "IN"
        value: "IN"
      }
      input_map {
        key: "DELAY"
        value: "DELAY"
      }
      input_map {
        key: "WAIT"
        value: "WAIT"
      }
      output_map {
        key: "OUT"
        value: "repeat_out"
      }
    },
    {
      model_name: "identity_int32"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "repeat_out"
      }
      output_map {
        key: "OUTPUT0"
        value: "identity_out"
      }
    },
    {
      model_name: "libtorch_nobatch_int32_int32_int32"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "repeat_out"
      }
      input_map {
        key: "INPUT1"
        value: "identity_out"
      }
      output_map {
        key: "OUTPUT__1"
        value: "OUT"
      }

    }
  ]
}
input [
  {
    name: "IN"
    data_type: TYPE_INT32
    dims: [ -1 ]
  },
  {
    name: "DELAY"
    data_type: TYPE_UINT32
    dims: [ -1 ]
  },
  {
    name: "WAIT"
    data_type: TYPE_UINT32
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_decoupled/models/identity_int32/config.pbtxt
================================================
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "identity_int32"
backend: "identity"
max_batch_size: 0
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]


================================================
FILE: qa/L0_decoupled/models/nested_square/config.pbtxt
================================================
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "nested_square"
max_batch_size: 0
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "simple_repeat"
      model_version: -1
      input_map {
        key: "IN"
        value: "IN"
      }
      input_map {
        key: "DELAY"
        value: "DELAY"
      }
      input_map {
        key: "WAIT"
        value: "WAIT"
      }
      output_map {
        key: "OUT"
        value: "repeat_out"
      }
    },
    {
      model_name: "square_int32"
      model_version: -1
      input_map {
        key: "IN"
        value: "repeat_out"
      }
      output_map {
        key: "OUT"
        value: "OUT"
      }
    }
  ]
}
input [
  {
    name: "IN"
    data_type: TYPE_INT32
    dims: [ -1 ]
  },
  {
    name: "DELAY"
    data_type: TYPE_UINT32
    dims: [ -1 ]
  },
  {
    name: "WAIT"
    data_type: TYPE_UINT32
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_decoupled/models/repeat_square/config.pbtxt
================================================
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "repeat_square"
max_batch_size: 0
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "repeat_int32"
      model_version: -1
      input_map {
        key: "IN"
        value: "IN"
      }
      input_map {
        key: "DELAY"
        value: "DELAY"
      }
      input_map {
        key: "WAIT"
        value: "WAIT"
      }
      output_map {
        key: "OUT"
        value: "repeat_out"
      }
    },
    {
      model_name: "square_int32"
      model_version: -1
      input_map {
        key: "IN"
        value: "repeat_out"
      }
      output_map {
        key: "OUT"
        value: "OUT"
      }
    }
  ]
}
input [
  {
    name: "IN"
    data_type: TYPE_INT32
    dims: [ -1 ]
  },
  {
    name: "DELAY"
    data_type: TYPE_UINT32
    dims: [ -1 ]
  },
  {
    name: "WAIT"
    data_type: TYPE_UINT32
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_decoupled/models/sequence_repeat/config.pbtxt
================================================
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "sequence_repeat"
max_batch_size: 0
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "repeat_int32"
      model_version: -1
      input_map {
        key: "IN"
        value: "IN"
      }
      input_map {
        key: "DELAY"
        value: "DELAY"
      }
      input_map {
        key: "WAIT"
        value: "WAIT"
      }
      output_map {
        key: "OUT"
        value: "repeat_out"
      }
      output_map {
        key: "IDX"
        value: "IDX"
      }
    },
    {
      model_name: "identity_int32"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "repeat_out"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUT"
      }
    }
  ]
}
input [
  {
    name: "IN"
    data_type: TYPE_INT32
    dims: [ -1 ]
  },
  {
    name: "DELAY"
    data_type: TYPE_UINT32
    dims: [ -1 ]
  },
  {
    name: "WAIT"
    data_type: TYPE_UINT32
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  },
  {
    name: "IDX"
    data_type: TYPE_UINT32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_decoupled/models/simple_repeat/config.pbtxt
================================================
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "simple_repeat"
max_batch_size: 0
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "repeat_int32"
      model_version: -1
      input_map {
        key: "IN"
        value: "IN"
      }
      input_map {
        key: "DELAY"
        value: "DELAY"
      }
      input_map {
        key: "WAIT"
        value: "WAIT"
      }
      output_map {
        key: "OUT"
        value: "OUT"
      }
      output_map {
        key: "IDX"
        value: "IDX"
      }
    }
  ]
}
input [
  {
    name: "IN"
    data_type: TYPE_INT32
    dims: [ -1 ]
  },
  {
    name: "DELAY"
    data_type: TYPE_UINT32
    dims: [ -1 ]
  },
  {
    name: "WAIT"
    data_type: TYPE_UINT32
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  },
  {
    name: "IDX"
    data_type: TYPE_UINT32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_decoupled/test.sh
================================================
#!/bin/bash
# Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

RET=0
TEST_RESULT_FILE='test_results.txt'
DECOUPLED_TEST=decoupled_test.py

rm -f *.log

CLIENT_LOG=`pwd`/client.log
DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository
SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=`pwd`/models"
SERVER_LOG="./inference_server.log"
source ../common/util.sh


TRIALS="python custom"

for trial in $TRIALS; do
  if [ $trial == "python" ]; then
    MODELDIR=`pwd`/python_models
  else
    MODELDIR=`pwd`/models
  fi

  SERVER_ARGS="--model-repository=$MODELDIR"
  cp -r $DATADIR/libtorch_nobatch_int32_int32_int32 $MODELDIR/.
  (cd $MODELDIR/libtorch_nobatch_int32_int32_int32 && \
   sed -i "s/dims:.*\[.*\]/dims: \[ 1 \]/g" config.pbtxt)

  run_server
  if [ "$SERVER_PID" == "0" ]; then
      echo -e "\n***\n*** Failed to start $SERVER\n***"
      cat $SERVER_LOG
      exit 1
  fi

  for i in \
              test_one_to_none \
              test_one_to_one \
              test_one_to_many \
              test_no_streaming \
              test_response_order \
	      test_wrong_shape; do

      echo "Test: $i" >>$CLIENT_LOG
      set +e
      python $DECOUPLED_TEST DecoupledTest.$i >>$CLIENT_LOG 2>&1
      if [ $? -ne 0 ]; then
              echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
              echo -e "\n***\n*** Test $i Failed\n***"
              RET=1
      else
          check_test_results $TEST_RESULT_FILE 1
          if [ $? -ne 0 ]; then
              cat $CLIENT_LOG
              echo -e "\n***\n*** Test Result Verification Failed\n***"
              RET=1
          fi
      fi
      set -e
  done

  # Will delay the writing of each response by the specified many milliseconds.
  # This will ensure that there are multiple responses available to be written.
  export TRITONSERVER_DELAY_GRPC_RESPONSE=2000

  echo "Test: test_one_to_multi_many" >>$CLIENT_LOG
  set +e
  python $DECOUPLED_TEST DecoupledTest.test_one_to_multi_many >>$CLIENT_LOG 2>&1
  if [ $? -ne 0 ]; then
      echo -e "\n***\n*** Test test_one_to_multi_many Failed\n***" >>$CLIENT_LOG
          echo -e "\n***\n*** Test test_one_to_multi_many Failed\n***"
          RET=1
  else
      check_test_results $TEST_RESULT_FILE 1
      if [ $? -ne 0 ]; then
          cat $CLIENT_LOG
          echo -e "\n***\n*** Test Result Verification Failed\n***"
          RET=1
      fi
  fi

  set -e

  unset TRITONSERVER_DELAY_GRPC_RESPONSE

  kill $SERVER_PID
  wait $SERVER_PID

  SERVER_ARGS="--model-repository=$MODELDIR --grpc-max-response-pool-size=1"
  SERVER_LOG="grpc_max_response_pool_size_1_${trial}_server.log"
  CLIENT_LOG="grpc_max_response_pool_size_1_${trial}_client.log"
  run_server
  if [ "$SERVER_PID" == "0" ]; then
      echo -e "\n***\n*** Failed to start $SERVER\n***"
      cat $SERVER_LOG
      exit 1
  fi

  for test in \
              test_one_to_none \
              test_one_to_one \
              test_one_to_many \
              test_no_streaming \
              test_response_order \
        test_wrong_shape; do

      echo "Test: $test" >>$CLIENT_LOG
      set +e
      python $DECOUPLED_TEST DecoupledTest.$test >>$CLIENT_LOG 2>&1
      if [ $? -ne 0 ]; then
              echo -e "\n***\n*** Test grpc-max-response-pool-size=1 ${trial} - $test Failed\n***" >>$CLIENT_LOG
              echo -e "\n***\n*** Test grpc-max-response-pool-size=1 ${trial} - $test Failed\n***"
              RET=1
      else
          check_test_results $TEST_RESULT_FILE 1
          if [ $? -ne 0 ]; then
              cat $CLIENT_LOG
              echo -e "\n***\n*** Test Result Verification Failed\n***"
              RET=1
          fi
      fi
      set -e
  done

  kill $SERVER_PID
  wait $SERVER_PID
done

# Test the server frontend can merge the responses of non-decoupled model that
# sends inference response and COMPLETE flag separately. In other words, from
# the client's perspective there will still be one response.
NON_DECOUPLED_DIR=`pwd`/non_decoupled_models
rm -rf ${NON_DECOUPLED_DIR} && mkdir -p ${NON_DECOUPLED_DIR}
cp -r `pwd`/models/repeat_int32 ${NON_DECOUPLED_DIR}/. && \
    (cd ${NON_DECOUPLED_DIR}/repeat_int32 && \
        sed -i "s/decoupled: True/decoupled: False/" config.pbtxt)

SERVER_ARGS="--model-repository=${NON_DECOUPLED_DIR}"
SERVER_LOG="./non_decoupled_inference_server.log"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

CLIENT_LOG=`pwd`/non_decoupled_client.log
echo "Test: NonDecoupledTest" >>$CLIENT_LOG
set +e
python $DECOUPLED_TEST NonDecoupledTest >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test NonDecoupledTest Failed\n***" >>$CLIENT_LOG
        echo -e "\n***\n*** Test NonDecoupledTest Failed\n***"
        RET=1
else
    check_test_results $TEST_RESULT_FILE 4
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
else
  echo -e "\n***\n*** Test Failed\n***"
fi

exit $RET

================================================
FILE: qa/L0_device_memory_tracker/test.py
================================================
#!/usr/bin/env python
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import time
import unittest
from functools import partial

import nvidia_smi
import tritonclient.grpc as grpcclient
import tritonclient.http as httpclient


class UnifiedClientProxy:
    def __init__(self, client):
        self.client_ = client

    def __getattr__(self, attr):
        forward_attr = getattr(self.client_, attr)
        if type(self.client_) == grpcclient.InferenceServerClient:
            if attr == "get_model_config":
                return lambda *args, **kwargs: forward_attr(
                    *args, **kwargs, as_json=True
                )["config"]
            elif attr == "get_inference_statistics":
                return partial(forward_attr, as_json=True)
        return forward_attr


class MemoryUsageTest(unittest.TestCase):
    def setUp(self):
        nvidia_smi.nvmlInit()
        self.gpu_handle_ = nvidia_smi.nvmlDeviceGetHandleByIndex(0)
        self.http_client_ = httpclient.InferenceServerClient(url="localhost:8000")
        self.grpc_client_ = grpcclient.InferenceServerClient(url="localhost:8001")

    def tearDown(self):
        nvidia_smi.nvmlShutdown()

    def report_used_gpu_memory(self):
        info = nvidia_smi.nvmlDeviceGetMemoryInfo(self.gpu_handle_)
        return info.used

    def is_testing_backend(self, model_name, backend_name):
        return self.client_.get_model_config(model_name)["backend"] == backend_name

    def verify_recorded_usage(self, model_stat):
        recorded_gpu_usage = 0
        for usage in model_stat["memory_usage"]:
            if usage["type"] == "GPU":
                recorded_gpu_usage += int(usage["byte_size"])
        # unload and verify recorded usage
        before_total_usage = self.report_used_gpu_memory()
        self.client_.unload_model(model_stat["name"])
        # unload can return before the model is fully unloaded,
        # wait to be finished
        time.sleep(2)
        usage_delta = before_total_usage - self.report_used_gpu_memory()
        # check with tolerance as gpu usage obtained is overall usage
        self.assertTrue(
            usage_delta * 0.9 <= recorded_gpu_usage <= usage_delta * 1.1,
            msg="For model {}, expect recorded usage to be in range [{}, {}], got {}".format(
                model_stat["name"],
                usage_delta * 0.9,
                usage_delta * 1.1,
                recorded_gpu_usage,
            ),
        )

    def test_onnx_http(self):
        self.client_ = UnifiedClientProxy(self.http_client_)
        model_stats = self.client_.get_inference_statistics()["model_stats"]
        for model_stat in model_stats:
            if self.is_testing_backend(model_stat["name"], "onnxruntime"):
                self.verify_recorded_usage(model_stat)

    def test_plan_grpc(self):
        self.client_ = UnifiedClientProxy(self.grpc_client_)
        model_stats = self.client_.get_inference_statistics()["model_stats"]
        for model_stat in model_stats:
            if self.is_testing_backend(model_stat["name"], "tensorrt"):
                self.verify_recorded_usage(model_stat)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_device_memory_tracker/test.sh
================================================
#!/bin/bash
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

TEST_LOG="./test.log"
TEST_PY=test.py

DATADIR=/data/inferenceserver/${REPO_VERSION}
rm -f *.log

TRTEXEC=/usr/src/tensorrt/bin/trtexec
TEST_RESULT_FILE='test_results.txt'
SERVER=/opt/tritonserver/bin/tritonserver
SERVER_LOG="./server.log"

source ../common/util.sh

RET=0

# prepare model repository, only contains ONNX and TRT models as the
# corresponding backend are known to be memory.
rm -rf models && mkdir models
# ONNX
cp -r /data/inferenceserver/${REPO_VERSION}/onnx_model_store/* models/.
rm -r models/*cpu

set +e

# VGG19 plan
rm -fr models/vgg19_plan && mkdir -p models/vgg19_plan/1 && \
cp $DATADIR/qa_dynamic_batch_image_model_repository/vgg19_onnx/1/model.onnx models/vgg19_plan/ && \
cp $DATADIR/qa_dynamic_batch_image_model_repository/vgg19_onnx/labels.txt models/vgg19_plan/

$TRTEXEC --onnx=models/vgg19_plan/model.onnx --saveEngine=models/vgg19_plan/1/model.plan \
         --minShapes=input:1x3x224x224 --optShapes=input:32x3x224x224 \
         --maxShapes=input:32x3x224x224

if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to generate vgg19 PLAN\n***"
    exit 1
fi

rm models/vgg19_plan/model.onnx
cp $DATADIR/qa_dynamic_batch_image_model_repository/vgg19_onnx/config.pbtxt models/vgg19_plan/ && \
sed -i "s/^name: .*/name: \"vgg19_plan\"/g" models/vgg19_plan/config.pbtxt && \
sed -i 's/^platform: .*/platform: "tensorrt_plan"/g' models/vgg19_plan/config.pbtxt

# Resnet50 plan
rm -fr models/resnet50_plan && mkdir -p models/resnet50_plan/1 && \
cp $DATADIR/qa_dynamic_batch_image_model_repository/resnet50_onnx/1/model.onnx models/resnet50_plan/ && \
cp $DATADIR/qa_dynamic_batch_image_model_repository/resnet50_onnx/labels.txt models/resnet50_plan/

$TRTEXEC --onnx=models/resnet50_plan/model.onnx --saveEngine=models/resnet50_plan/1/model.plan \
         --minShapes=input:1x3x224x224 --optShapes=input:32x3x224x224 \
         --maxShapes=input:32x3x224x224

if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to generate resnet50 PLAN\n***"
    exit 1
fi

rm models/resnet50_plan/model.onnx
cp $DATADIR/qa_dynamic_batch_image_model_repository/resnet50_onnx/config.pbtxt models/resnet50_plan/ && \
sed -i "s/^name: .*/name: \"resnet50_plan\"/g" models/resnet50_plan/config.pbtxt && \
sed -i 's/^platform: .*/platform: "tensorrt_plan"/g' models/resnet50_plan/config.pbtxt


# Resnet152 plan
rm -fr models/resnet152_plan && mkdir -p models/resnet152_plan/1 && \
cp $DATADIR/qa_dynamic_batch_image_model_repository/resnet152_onnx/1/model.onnx models/resnet152_plan/ && \
cp $DATADIR/qa_dynamic_batch_image_model_repository/resnet152_onnx/labels.txt models/resnet152_plan/

$TRTEXEC --onnx=models/resnet152_plan/model.onnx --saveEngine=models/resnet152_plan/1/model.plan \
         --minShapes=input:1x3x224x224 --optShapes=input:32x3x224x224 \
         --maxShapes=input:32x3x224x224

if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to generate resnet152 PLAN\n***"
    exit 1
fi

rm models/resnet152_plan/model.onnx
cp $DATADIR/qa_dynamic_batch_image_model_repository/resnet152_onnx/config.pbtxt models/resnet152_plan/ && \
sed -i "s/^name: .*/name: \"resnet152_plan\"/g" models/resnet152_plan/config.pbtxt && \
sed -i 's/^platform: .*/platform: "tensorrt_plan"/g' models/resnet152_plan/config.pbtxt

set -e

# Set multiple instances on selected model to test instance-wise collection
# and accumulation.
echo "instance_group [{ count: 2; kind: KIND_GPU }]" >> models/resnet152_plan/config.pbtxt
echo "instance_group [{ count: 2; kind: KIND_GPU }]" >> models/densenet/config.pbtxt

# testing use nvidia-smi for Python to validate the reported usage
pip install nvidia-ml-py3

# Start server to load all models (in parallel), then gradually unload
# the models and expect the memory usage changes matches what are reported
# in statistic.
SERVER_ARGS="--backend-config=triton-backend-memory-tracker=true --model-repository=models --model-control-mode=explicit --load-model=*"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python $TEST_PY > $TEST_LOG 2>&1
if [ $? -ne 0 ]; then
    RET=1
fi
set -e
kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    cat $SERVER_LOG
    cat $TEST_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_dlpack_multi_gpu/test.sh
================================================
#!/bin/bash
# Copyright 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1"
CLIENT_PY=./test_infer_shm_leak.py
CLIENT_LOG="./client.log"
EXPECTED_NUM_TESTS="1"
TEST_RESULT_FILE='test_results.txt'
SERVER_LOG="./inference_server.log"
export CUDA_VISIBLE_DEVICES=0,1,2,3

RET=0
rm -fr *.log ./models

source ../common/util.sh

# Uninstall the non CUDA version of PyTorch
pip3 uninstall -y torch
pip3 install torch -f https://download.pytorch.org/whl/cu130

# Install CuPy for testing non_blocking compute streams
pip3 install cupy-cuda13x

if [ ${CUDA_VERSION%%.*} -gt 12 ]; then
    curl -L https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/linux-x86_64/cuda_nvrtc-linux-x86_64-12.9.86-archive.tar.xz \
         -o /tmp/cuda_nvrtc-linux-x86_64-12.9.86-archive.tar.xz ;
    curl -L https://developer.download.nvidia.com/compute/cuda/redist/libcublas/linux-x86_64/libcublas-linux-x86_64-12.9.1.4-archive.tar.xz \
         -o /tmp/libcublas-linux-x86_64-12.9.1.4-archive.tar.xz ;
    cd /tmp ;
    tar -xvf /tmp/cuda_nvrtc-linux-x86_64-12.9.86-archive.tar.xz --strip-components=1 ;
    tar -xvf /tmp/libcublas-linux-x86_64-12.9.1.4-archive.tar.xz --strip-components=1 ;
    export LD_LIBRARY_PATH=/tmp/lib:$LD_LIBRARY_PATH ;
    cd -
fi

rm -fr *.log ./models

mkdir -p models/dlpack_test/1/
cp ../python_models/dlpack_test/model.py models/dlpack_test/1/
cp ../python_models/dlpack_test/config.pbtxt models/dlpack_test
cp ../L0_backend_python/test_infer_shm_leak.py .
sed -i 's#sys.path.append("../../common")#sys.path.append("../common")#g' test_infer_shm_leak.py

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    RET=1
fi

set +e
export MODEL_NAME="dlpack_test"
python3 -m pytest --junitxml=dlpack_multi_gpu.report.xml $CLIENT_PY > $CLIENT_LOG 2>&1

if [ $? -ne 0 ]; then
    echo -e "\n***\n*** python_unittest.py FAILED. \n***"
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 1 ]; then
    cat $CLIENT_LOG
    cat $SERVER_LOG
    echo -e "\n***\n*** dlpack_multi_gpu test FAILED. \n***"
else
    echo -e "\n***\n*** dlpack_multi_gpu test PASSED. \n***"
fi

exit $RET


================================================
FILE: qa/L0_doc_links/mkdocs.yml
================================================
# Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

site_name: CI Test
use_directory_urls: False
docs_dir: "./repos"
plugins:
        - htmlproofer
        - search


================================================
FILE: qa/L0_doc_links/test.sh
================================================
#!/bin/bash
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

LOG="`pwd`/log.txt"
CONFIG="`pwd`/mkdocs.yml"
RET=0
# Download necessary packages
python3 -m pip install mkdocs
python3 -m pip install mkdocs-htmlproofer-plugin

# Get the necessary repos
mkdir repos && cd repos
TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:="http://github.com/triton-inference-server"}
TRITON_BACKEND_REPO_TAG=${TRITON_BACKEND_REPO_TAG:="main"}
echo ${TRITON_BACKEND_REPO_TAG}
git clone --single-branch --depth=1 -b ${TRITON_BACKEND_REPO_TAG} ${TRITON_REPO_ORGANIZATION}/backend.git
cd ..

exec mkdocs serve -f $CONFIG > $LOG &
PID=$!
# Time for the compilation to finish. This needs to be increased if other repos
# are added to the test
sleep 20

until [[ (-z `pgrep mkdocs`) ]]; do
    kill -2 $PID
    sleep 2
done

if [[ ! -z `grep "invalid url" $LOG` ]]; then
    cat $LOG
    RET=1
fi


if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test PASSED\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi
# exit $RET


================================================
FILE: qa/L0_dyna_implicit_state/test.sh
================================================
#!/bin/bash
# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION="${REPO_VERSION}_${TEST_REPO_ARCH}"
fi

export ENSEMBLES=0
BACKENDS=${BACKENDS:="onnx plan"}
export BACKENDS
export IMPLICIT_STATE=1

(cd ../L0_dyna_sequence_batcher/ && bash -ex test.sh $REPO_VERSION)
RET=$?

if [ $RET == 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_dyna_sequence_batcher/dyna_sequence_batcher_test.py
================================================
#!/usr/bin/env python3

# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import os
import threading
import time
import unittest
from builtins import str

import numpy as np
import sequence_util as su
import test_util as tu

_test_system_shared_memory = bool(int(os.environ.get("TEST_SYSTEM_SHARED_MEMORY", 0)))
_test_cuda_shared_memory = bool(int(os.environ.get("TEST_CUDA_SHARED_MEMORY", 0)))

NO_BATCHING = int(os.environ.get("NO_BATCHING", 0)) == 1
BACKENDS = os.environ.get("BACKENDS", "libtorch onnx plan custom custom_string")
IMPLICIT_STATE = int(os.environ["IMPLICIT_STATE"]) == 1

_trials = BACKENDS.split(" ")
for backend in BACKENDS.split(" "):
    if NO_BATCHING:
        if (backend != "custom") and (backend != "custom_string"):
            _trials += (backend + "_nobatch",)

_ragged_batch_supported_trials = []
if "custom" in BACKENDS.split(" "):
    _ragged_batch_supported_trials.append("custom")

_protocols = ("http", "grpc")
_max_sequence_idle_ms = 5000


class DynaSequenceBatcherTest(su.SequenceBatcherTestUtil):
    def get_datatype(self, trial):
        return np.int32

    def get_expected_result(self, expected_result, corrid, value, trial, flag_str=None):
        # Adjust the expected_result for models that
        # could not implement the full accumulator. See
        # qa/common/gen_qa_dyna_sequence_models.py for more
        # information.
        if (
            (("nobatch" not in trial) and ("custom" not in trial))
            or ("plan" in trial)
            or ("onnx" in trial)
            or ("libtorch" in trial)
        ):
            expected_result = value
            if flag_str is not None:
                if "start" in flag_str:
                    expected_result += 1
                if "end" in flag_str:
                    if isinstance(corrid, str):
                        expected_result += int(corrid)
                    else:
                        expected_result += corrid
        return expected_result

    def get_expected_result_implicit(
        self, expected_result, corrid, value, trial, flag_str=None
    ):
        return expected_result

    def test_simple_sequence(self):
        # Send one sequence and check for correct accumulator
        # result. The result should be returned immediately.
        for trial in _trials:
            # Run on different protocols.
            for idx, protocol in enumerate(_protocols):
                self.clear_deferred_exceptions()
                try:
                    dtype = self.get_datatype(trial)
                    model_name = tu.get_dyna_sequence_model_name(trial, dtype)

                    self.check_setup(model_name)
                    self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                    self.assertNotIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)

                    if "string" in trial:
                        corrid = "52"
                    else:
                        corrid = 52

                    expected_result = (
                        self.get_expected_result(
                            45 + int(corrid), corrid, 9, trial, "end"
                        )
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            45, corrid, 9, trial, "end"
                        )
                    )

                    self.check_sequence(
                        trial,
                        model_name,
                        dtype,
                        corrid,
                        (4000, None),
                        # (flag_str, value, (ls_ms, gt_ms), (pre_delay, post_delay))
                        (
                            ("start", 1, None, None),
                            (None, 2, None, None),
                            (None, 3, None, None),
                            (None, 4, None, None),
                            (None, 5, None, None),
                            (None, 6, None, None),
                            (None, 7, None, None),
                            (None, 8, None, None),
                            ("end", 9, None, None),
                        ),
                        expected_result,
                        protocol,
                        sequence_name="{}_{}".format(self._testMethodName, protocol),
                    )

                    self.check_deferred_exception()
                    self.check_status(
                        model_name, {1: 9 * (idx + 1)}, 9 * (idx + 1), 9 * (idx + 1)
                    )
                except Exception as ex:
                    self.assertTrue(False, "unexpected error {}".format(ex))

    def test_length1_sequence(self):
        # Send a length-1 sequence and check for correct accumulator
        # result. The result should be returned immediately.
        for trial in _trials:
            # Run on different protocols.
            for idx, protocol in enumerate(_protocols):
                self.clear_deferred_exceptions()
                try:
                    dtype = self.get_datatype(trial)
                    model_name = tu.get_dyna_sequence_model_name(trial, dtype)

                    self.check_setup(model_name)
                    self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                    self.assertNotIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)

                    if "string" in trial:
                        corrid = "99"
                    else:
                        corrid = 99

                    expected_result = (
                        self.get_expected_result(
                            42 + int(corrid), corrid, 42, trial, "start,end"
                        )
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            42, corrid, 42, trial, "start,end"
                        )
                    )

                    self.check_sequence(
                        trial,
                        model_name,
                        dtype,
                        corrid,
                        (4000, None),
                        # (flag_str, value, (ls_ms, gt_ms), (pre_delay, post_delay))
                        (("start,end", 42, None, None),),
                        expected_result,
                        protocol,
                        sequence_name="{}_{}".format(self._testMethodName, protocol),
                    )

                    self.check_deferred_exception()
                    self.check_status(model_name, {1: (idx + 1)}, (idx + 1), (idx + 1))
                except Exception as ex:
                    self.assertTrue(False, "unexpected error {}".format(ex))

    def _multi_sequence_impl(
        self, trials, expected_batch_exec, expected_exec_cnt, sleep_secs, tensor_shapes
    ):
        for trial in trials:
            self.clear_deferred_exceptions()
            dtype = self.get_datatype(trial)
            precreated_shm0_handles = self.precreate_register_regions(
                (1, 3), dtype, 0, tensor_shape=(tensor_shapes[0],)
            )
            precreated_shm1_handles = self.precreate_register_regions(
                (11, 12, 13), dtype, 1, tensor_shape=(tensor_shapes[1],)
            )
            precreated_shm2_handles = self.precreate_register_regions(
                (111, 112, 113), dtype, 2, tensor_shape=(tensor_shapes[2],)
            )
            precreated_shm3_handles = self.precreate_register_regions(
                (1111, 1112, 1113), dtype, 3, tensor_shape=(tensor_shapes[3],)
            )
            try:
                model_name = tu.get_dyna_sequence_model_name(trial, dtype)

                self.check_setup(model_name)
                self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                self.assertNotIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)

                if "string" in trial:
                    corrids = ["1001", "1002", "1003", "1004"]
                else:
                    corrids = [1001, 1002, 1003, 1004]

                expected_result = (
                    self.get_expected_result(
                        4 * tensor_shapes[0] + int(corrids[0]),
                        corrids[0],
                        3,
                        trial,
                        "end",
                    )
                    if not IMPLICIT_STATE
                    else self.get_expected_result_implicit(
                        4, corrids[0], 3, trial, "end"
                    )
                )

                threads = []
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[0],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (("start", 1, None), ("end", 3, None)),
                            expected_result,
                            precreated_shm0_handles,
                        ),
                        kwargs={
                            "sequence_name": "{}_{}".format(
                                self._testMethodName, corrids[0]
                            ),
                            "tensor_shape": (tensor_shapes[0],),
                        },
                    )
                )

                expected_result = (
                    self.get_expected_result(
                        36 * tensor_shapes[1] + int(corrids[1]),
                        corrids[1],
                        13,
                        trial,
                        "end",
                    )
                    if not IMPLICIT_STATE
                    else self.get_expected_result_implicit(
                        36, corrids[1], 13, trial, "end"
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[1],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (("start", 11, None), (None, 12, None), ("end", 13, None)),
                            expected_result,
                            precreated_shm1_handles,
                        ),
                        kwargs={
                            "sequence_name": "{}_{}".format(
                                self._testMethodName, corrids[1]
                            ),
                            "tensor_shape": (tensor_shapes[1],),
                        },
                    )
                )

                expected_result = (
                    self.get_expected_result(
                        336 * tensor_shapes[2] + int(corrids[2]),
                        corrids[2],
                        113,
                        trial,
                        "end",
                    )
                    if not IMPLICIT_STATE
                    else self.get_expected_result_implicit(
                        336, corrids[2], 113, trial, "end"
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[2],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (
                                ("start", 111, None),
                                (None, 112, None),
                                ("end", 113, None),
                            ),
                            expected_result,
                            precreated_shm2_handles,
                        ),
                        kwargs={
                            "sequence_name": "{}_{}".format(
                                self._testMethodName, corrids[2]
                            ),
                            "tensor_shape": (tensor_shapes[2],),
                        },
                    )
                )
                expected_result = (
                    self.get_expected_result(
                        3336 * tensor_shapes[3] + int(corrids[3]),
                        corrids[3],
                        1113,
                        trial,
                        "end",
                    )
                    if not IMPLICIT_STATE
                    else self.get_expected_result_implicit(
                        3336, corrids[3], 1113, trial, "end"
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[3],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (
                                ("start", 1111, None),
                                (None, 1112, None),
                                ("end", 1113, None),
                            ),
                            expected_result,
                            precreated_shm3_handles,
                        ),
                        kwargs={
                            "sequence_name": "{}_{}".format(
                                self._testMethodName, corrids[3]
                            ),
                            "tensor_shape": (tensor_shapes[3],),
                        },
                    )
                )

                for t in threads:
                    t.start()
                    if sleep_secs > 0:
                        time.sleep(sleep_secs)
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                self.check_status(
                    model_name, expected_batch_exec, expected_exec_cnt, 11
                )
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))
            finally:
                if _test_system_shared_memory or _test_cuda_shared_memory:
                    self.cleanup_shm_regions(precreated_shm0_handles)
                    self.cleanup_shm_regions(precreated_shm1_handles)
                    self.cleanup_shm_regions(precreated_shm2_handles)
                    self.cleanup_shm_regions(precreated_shm3_handles)

    def test_multi_sequence(self):
        # Send four sequences in series and make sure they get
        # batched correctly.
        self._multi_sequence_impl(_trials, {4: 2, 3: 1}, 3, 1, (1, 1, 1, 1))

    def test_multi_parallel_sequence(self):
        # Send four sequences in parallel and make sure they get
        # batched correctly.
        self._multi_sequence_impl(_trials, {4: 2, 3: 1}, 3, 0, (1, 1, 1, 1))

    def test_multi_sequence_different_shape(self):
        # Send four sequences in parallel where the requests in each
        # sequence have different shape. Sequences should not be
        # batched due to input tensor size differences.
        self._multi_sequence_impl(
            _ragged_batch_supported_trials, {1: 11}, 11, 0, (4, 3, 1, 2)
        )

    def test_multi_sequence_different_shape_allow_ragged(self):
        # Send four sequences in parallel where the requests in each
        # sequence have different shape. Input is marked as allowing
        # ragged and so sequences should be batched even with input
        # tensor size differences.
        self._multi_sequence_impl(
            _ragged_batch_supported_trials, {4: 2, 3: 1}, 3, 1, (4, 3, 1, 2)
        )

    def test_backlog(self):
        # Send 5 equal-length sequences in parallel and make sure they
        # get completely batched into batch-size 4 inferences plus the
        # 5th should go in the backlog and then get handled once there
        # is a free slot.
        for trial in _trials:
            self.clear_deferred_exceptions()
            dtype = self.get_datatype(trial)
            precreated_shm0_handles = self.precreate_register_regions(
                (1, 2, 3), dtype, 0
            )
            precreated_shm1_handles = self.precreate_register_regions(
                (11, 12, 13), dtype, 1
            )
            precreated_shm2_handles = self.precreate_register_regions(
                (111, 112, 113), dtype, 2
            )
            precreated_shm3_handles = self.precreate_register_regions(
                (1111, 1112, 1113), dtype, 3
            )
            precreated_shm4_handles = self.precreate_register_regions(
                (11111, 11112, 11113), dtype, 4
            )
            try:
                model_name = tu.get_dyna_sequence_model_name(trial, dtype)

                self.check_setup(model_name)
                self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                self.assertNotIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)

                if "string" in trial:
                    corrids = ["1001", "1002", "1003", "1004", "1005"]
                else:
                    corrids = [1001, 1002, 1003, 1004, 1005]

                expected_result = (
                    self.get_expected_result(
                        6 + int(corrids[0]), corrids[0], 3, trial, "end"
                    )
                    if not IMPLICIT_STATE
                    else self.get_expected_result_implicit(
                        6, corrids[0], 3, trial, "end"
                    )
                )

                threads = []
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[0],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (("start", 1, None), (None, 2, None), ("end", 3, None)),
                            expected_result,
                            precreated_shm0_handles,
                        ),
                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
                    )
                )

                expected_result = (
                    self.get_expected_result(
                        36 + int(corrids[1]), corrids[1], 13, trial, "end"
                    )
                    if not IMPLICIT_STATE
                    else self.get_expected_result_implicit(
                        36, corrids[1], 13, trial, "end"
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[1],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (("start", 11, None), (None, 12, None), ("end", 13, None)),
                            expected_result,
                            precreated_shm1_handles,
                        ),
                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
                    )
                )

                expected_result = (
                    self.get_expected_result(
                        336 + int(corrids[2]), corrids[2], 113, trial, "end"
                    )
                    if not IMPLICIT_STATE
                    else self.get_expected_result_implicit(
                        336, corrids[2], 113, trial, "end"
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[2],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (
                                ("start", 111, None),
                                (None, 112, None),
                                ("end", 113, None),
                            ),
                            expected_result,
                            precreated_shm2_handles,
                        ),
                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
                    )
                )

                expected_result = (
                    self.get_expected_result(
                        3336 + int(corrids[3]), corrids[3], 1113, trial, "end"
                    )
                    if not IMPLICIT_STATE
                    else self.get_expected_result_implicit(
                        3336, corrids[3], 1113, trial, "end"
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[3],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (
                                ("start", 1111, None),
                                (None, 1112, None),
                                ("end", 1113, None),
                            ),
                            expected_result,
                            precreated_shm3_handles,
                        ),
                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
                    )
                )

                expected_result = (
                    self.get_expected_result(
                        33336 + int(corrids[4]), corrids[4], 11113, trial, "end"
                    )
                    if not IMPLICIT_STATE
                    else self.get_expected_result_implicit(
                        33336, corrids[4], 11113, trial, "end"
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[4],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (
                                ("start", 11111, None),
                                (None, 11112, None),
                                ("end", 11113, None),
                            ),
                            expected_result,
                            precreated_shm4_handles,
                        ),
                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
                    )
                )

                for t in threads:
                    t.start()
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                self.check_status(model_name, {4: 3, 1: 3}, 6, 15)
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))
            finally:
                if _test_system_shared_memory or _test_cuda_shared_memory:
                    self.cleanup_shm_regions(precreated_shm0_handles)
                    self.cleanup_shm_regions(precreated_shm1_handles)
                    self.cleanup_shm_regions(precreated_shm2_handles)
                    self.cleanup_shm_regions(precreated_shm3_handles)
                    self.cleanup_shm_regions(precreated_shm4_handles)

    def test_backlog_fill(self):
        # Send 4 sequences in parallel, two of which are shorter. Send
        # 2 additional sequences that should go into backlog but
        # should immediately fill into the short sequences.
        for trial in _trials:
            self.clear_deferred_exceptions()
            dtype = self.get_datatype(trial)
            precreated_shm0_handles = self.precreate_register_regions(
                (1, 2, 3), dtype, 0
            )
            precreated_shm1_handles = self.precreate_register_regions(
                (11, 13), dtype, 1
            )
            precreated_shm2_handles = self.precreate_register_regions(
                (111, 113), dtype, 2
            )
            precreated_shm3_handles = self.precreate_register_regions(
                (1111, 1112, 1113), dtype, 3
            )
            precreated_shm4_handles = self.precreate_register_regions(
                (11111,), dtype, 4
            )
            precreated_shm5_handles = self.precreate_register_regions(
                (22222,), dtype, 5
            )
            try:
                model_name = tu.get_dyna_sequence_model_name(trial, dtype)

                self.check_setup(model_name)
                self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                self.assertNotIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
                if "string" in trial:
                    corrids = ["1001", "1002", "1003", "1004", "1005", "1006"]
                else:
                    corrids = [1001, 1002, 1003, 1004, 1005, 1006]
                threads = []

                expected_result = (
                    self.get_expected_result(
                        6 + int(corrids[0]), corrids[0], 3, trial, "end"
                    )
                    if not IMPLICIT_STATE
                    else self.get_expected_result_implicit(
                        6, corrids[0], 3, trial, "end"
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[0],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (("start", 1, None), (None, 2, None), ("end", 3, None)),
                            expected_result,
                            precreated_shm0_handles,
                        ),
                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
                    )
                )
                expected_result = (
                    self.get_expected_result(
                        24 + int(corrids[1]), corrids[1], 13, trial, "end"
                    )
                    if not IMPLICIT_STATE
                    else self.get_expected_result_implicit(
                        24, corrids[1], 13, trial, "end"
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[1],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (("start", 11, None), ("end", 13, None)),
                            expected_result,
                            precreated_shm1_handles,
                        ),
                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
                    )
                )
                expected_result = (
                    self.get_expected_result(
                        224 + int(corrids[2]), corrids[2], 113, trial, "end"
                    )
                    if not IMPLICIT_STATE
                    else self.get_expected_result_implicit(
                        224, corrids[2], 113, trial, "end"
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[2],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (("start", 111, None), ("end", 113, None)),
                            expected_result,
                            precreated_shm2_handles,
                        ),
                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
                    )
                )
                expected_result = (
                    self.get_expected_result(
                        3336 + int(corrids[3]), corrids[3], 1113, trial, "end"
                    )
                    if not IMPLICIT_STATE
                    else self.get_expected_result_implicit(
                        3336, corrids[3], 1113, trial, "end"
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[3],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (
                                ("start", 1111, None),
                                (None, 1112, 3000),
                                ("end", 1113, None),
                            ),
                            expected_result,
                            precreated_shm3_handles,
                        ),
                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
                    )
                )
                expected_result = (
                    self.get_expected_result(
                        11111 + int(corrids[4]), corrids[4], 11111, trial, "start,end"
                    )
                    if not IMPLICIT_STATE
                    else self.get_expected_result_implicit(
                        11111, corrids[4], 11111, trial, "start,end"
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[4],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (("start,end", 11111, None),),
                            expected_result,
                            precreated_shm4_handles,
                        ),
                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
                    )
                )
                expected_result = (
                    self.get_expected_result(
                        22222 + int(corrids[5]), corrids[5], 22222, trial, "start,end"
                    )
                    if not IMPLICIT_STATE
                    else self.get_expected_result_implicit(
                        22222, corrids[5], 22222, trial, "start,end"
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[5],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (("start,end", 22222, None),),
                            expected_result,
                            precreated_shm5_handles,
                        ),
                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
                    )
                )

                threads[0].start()
                threads[1].start()
                threads[2].start()
                threads[3].start()
                time.sleep(2)
                threads[4].start()
                threads[5].start()
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                self.check_status(model_name, {4: 3}, 3, 12)
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))
            finally:
                if _test_system_shared_memory or _test_cuda_shared_memory:
                    self.cleanup_shm_regions(precreated_shm0_handles)
                    self.cleanup_shm_regions(precreated_shm1_handles)
                    self.cleanup_shm_regions(precreated_shm2_handles)
                    self.cleanup_shm_regions(precreated_shm3_handles)
                    self.cleanup_shm_regions(precreated_shm4_handles)
                    self.cleanup_shm_regions(precreated_shm5_handles)

    def test_backlog_fill_no_end(self):
        # Send 4 sequences in parallel, two of which are shorter. Send
        # 2 additional sequences that should go into backlog but
        # should immediately fill into the short sequences. One of
        # those sequences is filled before it gets its end request.
        for trial in _trials:
            self.clear_deferred_exceptions()
            dtype = self.get_datatype(trial)
            precreated_shm0_handles = self.precreate_register_regions(
                (1, 2, 3), dtype, 0
            )
            precreated_shm1_handles = self.precreate_register_regions(
                (11, 13), dtype, 1
            )
            precreated_shm2_handles = self.precreate_register_regions(
                (111, 113), dtype, 2
            )
            precreated_shm3_handles = self.precreate_register_regions(
                (1111, 1112, 1113), dtype, 3
            )
            precreated_shm4_handles = self.precreate_register_regions(
                (11111,), dtype, 4
            )
            precreated_shm5_handles = self.precreate_register_regions(
                (22222, 22223, 22224), dtype, 5
            )
            try:
                model_name = tu.get_dyna_sequence_model_name(trial, dtype)

                self.check_setup(model_name)
                self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                self.assertNotIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)

                if "string" in trial:
                    corrids = ["1001", "1002", "1003", "1004", "1005", "1006"]
                else:
                    corrids = [1001, 1002, 1003, 1004, 1005, 1006]
                threads = []
                expected_result = (
                    self.get_expected_result(
                        6 + int(corrids[0]), corrids[0], 3, trial, "end"
                    )
                    if not IMPLICIT_STATE
                    else self.get_expected_result_implicit(
                        6, corrids[0], 3, trial, "end"
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[0],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (("start", 1, None), (None, 2, None), ("end", 3, None)),
                            expected_result,
                            precreated_shm0_handles,
                        ),
                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
                    )
                )
                expected_result = (
                    self.get_expected_result(
                        24 + int(corrids[1]), corrids[1], 13, trial, "end"
                    )
                    if not IMPLICIT_STATE
                    else self.get_expected_result_implicit(
                        24, corrids[1], 13, trial, "end"
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[1],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (("start", 11, None), ("end", 13, None)),
                            expected_result,
                            precreated_shm1_handles,
                        ),
                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
                    )
                )
                expected_result = (
                    self.get_expected_result(
                        224 + int(corrids[2]), corrids[2], 113, trial, "end"
                    )
                    if not IMPLICIT_STATE
                    else self.get_expected_result_implicit(
                        224, corrids[2], 113, trial, "end"
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[2],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (("start", 111, None), ("end", 113, None)),
                            expected_result,
                            precreated_shm2_handles,
                        ),
                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
                    )
                )
                expected_result = (
                    self.get_expected_result(
                        3336 + int(corrids[3]), corrids[3], 1113, trial, "end"
                    )
                    if not IMPLICIT_STATE
                    else self.get_expected_result_implicit(
                        3336, corrids[3], 1113, trial, "end"
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[3],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (
                                ("start", 1111, None),
                                (None, 1112, 3000),
                                ("end", 1113, None),
                            ),
                            expected_result,
                            precreated_shm3_handles,
                        ),
                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
                    )
                )
                expected_result = (
                    self.get_expected_result(
                        11111 + int(corrids[4]), corrids[4], 11111, trial, "start,end"
                    )
                    if not IMPLICIT_STATE
                    else self.get_expected_result_implicit(
                        11111, corrids[4], 11111, trial, "start,end"
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[4],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (("start,end", 11111, None),),
                            expected_result,
                            precreated_shm4_handles,
                        ),
                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
                    )
                )
                expected_result = (
                    self.get_expected_result(
                        66669 + int(corrids[5]), corrids[5], 22224, trial, "end"
                    )
                    if not IMPLICIT_STATE
                    else self.get_expected_result_implicit(
                        66669, corrids[5], 22224, trial, "end"
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[5],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (
                                ("start", 22222, None),
                                (None, 22223, None),
                                ("end", 22224, 2000),
                            ),
                            expected_result,
                            precreated_shm5_handles,
                        ),
                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
                    )
                )

                threads[0].start()
                threads[1].start()
                threads[2].start()
                threads[3].start()
                time.sleep(2)
                threads[4].start()
                threads[5].start()
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                # Expecting the requests of the same sequence to be in the same
                # slot, so the execution for thelast long sequence will be
                # padded to a batch.
                self.check_status(model_name, {4: 3, 1: 2}, 5, 14)
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))
            finally:
                if _test_system_shared_memory or _test_cuda_shared_memory:
                    self.cleanup_shm_regions(precreated_shm0_handles)
                    self.cleanup_shm_regions(precreated_shm1_handles)
                    self.cleanup_shm_regions(precreated_shm2_handles)
                    self.cleanup_shm_regions(precreated_shm3_handles)
                    self.cleanup_shm_regions(precreated_shm4_handles)
                    self.cleanup_shm_regions(precreated_shm5_handles)

    def test_backlog_sequence_timeout(self):
        # Send 4 sequences in parallel and make sure they get
        # completely batched into batch-size 4 inferences. One of the
        # sequences has a long delay that causes it to timeout and
        # that allows a 5th sequence to come out of the backlog and
        # finish. The timed-out sequence will then send the delayed
        # inference but it will appear as a new sequence and so fail
        # because it doesn't have the START flag.
        for trial in _trials:
            self.clear_deferred_exceptions()
            dtype = self.get_datatype(trial)
            precreated_shm0_handles = self.precreate_register_regions((1, 3), dtype, 0)
            precreated_shm1_handles = self.precreate_register_regions(
                (11, 12, 12, 13), dtype, 1
            )
            precreated_shm2_handles = self.precreate_register_regions(
                (111, 112, 112, 113), dtype, 2
            )
            precreated_shm3_handles = self.precreate_register_regions(
                (1111, 1112, 1112, 1113), dtype, 3
            )
            precreated_shm4_handles = self.precreate_register_regions(
                (11111, 11113), dtype, 4
            )
            try:
                model_name = tu.get_dyna_sequence_model_name(trial, dtype)

                self.check_setup(model_name)
                self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                self.assertNotIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)

                if "string" in trial:
                    corrids = ["1001", "1002", "1003", "1004", "1005"]
                else:
                    corrids = [1001, 1002, 1003, 1004, 1005]
                threads = []
                expected_result = (
                    self.get_expected_result(
                        4 + int(corrids[0]), corrids[0], 3, trial, None
                    )
                    if not IMPLICIT_STATE
                    else self.get_expected_result_implicit(
                        4, corrids[0], 3, trial, None
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[0],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (
                                ("start", 1, None),
                                (None, 3, _max_sequence_idle_ms + 1000),
                            ),
                            expected_result,
                            precreated_shm0_handles,
                        ),
                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
                    )
                )
                expected_result = (
                    self.get_expected_result(
                        48 + int(corrids[1]), corrids[1], 13, trial, None
                    )
                    if not IMPLICIT_STATE
                    else self.get_expected_result_implicit(
                        48, corrids[1], 13, trial, None
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[1],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (
                                ("start", 11, None),
                                (None, 12, _max_sequence_idle_ms / 2),
                                (None, 12, _max_sequence_idle_ms / 2),
                                ("end", 13, _max_sequence_idle_ms / 2),
                            ),
                            expected_result,
                            precreated_shm1_handles,
                        ),
                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
                    )
                )
                expected_result = (
                    self.get_expected_result(
                        448 + int(corrids[2]), corrids[2], 113, trial, None
                    )
                    if not IMPLICIT_STATE
                    else self.get_expected_result_implicit(
                        448, corrids[2], 113, trial, None
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[2],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (
                                ("start", 111, None),
                                (None, 112, _max_sequence_idle_ms / 2),
                                (None, 112, _max_sequence_idle_ms / 2),
                                ("end", 113, _max_sequence_idle_ms / 2),
                            ),
                            expected_result,
                            precreated_shm2_handles,
                        ),
                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
                    )
                )
                expected_result = (
                    self.get_expected_result(
                        4448 + int(corrids[3]), corrids[3], 1113, trial, None
                    )
                    if not IMPLICIT_STATE
                    else self.get_expected_result_implicit(
                        4448, corrids[3], 1113, trial, None
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[3],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (
                                ("start", 1111, None),
                                (None, 1112, _max_sequence_idle_ms / 2),
                                (None, 1112, _max_sequence_idle_ms / 2),
                                ("end", 1113, _max_sequence_idle_ms / 2),
                            ),
                            expected_result,
                            precreated_shm3_handles,
                        ),
                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
                    )
                )
                expected_result = (
                    self.get_expected_result(
                        22224 + int(corrids[4]), corrids[4], 11113, trial, "end"
                    )
                    if not IMPLICIT_STATE
                    else self.get_expected_result_implicit(
                        22224, corrids[4], 11113, trial, "end"
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[4],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (("start", 11111, None), ("end", 11113, None)),
                            expected_result,
                            precreated_shm4_handles,
                        ),
                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
                    )
                )

                threads[0].start()
                threads[1].start()
                threads[2].start()
                threads[3].start()
                time.sleep(2)
                threads[4].start()
                for t in threads:
                    t.join()

                self.check_deferred_exception()
                self.assertTrue(False, "expected error")
            except Exception as ex:
                self.assertTrue(
                    ex.message().startswith(
                        str(
                            "inference request for sequence 1001 to "
                            + "model '{}' must specify the START flag on the first "
                            + "request of the sequence"
                        ).format(model_name)
                    )
                )
            finally:
                if _test_system_shared_memory or _test_cuda_shared_memory:
                    self.cleanup_shm_regions(precreated_shm0_handles)
                    self.cleanup_shm_regions(precreated_shm1_handles)
                    self.cleanup_shm_regions(precreated_shm2_handles)
                    self.cleanup_shm_regions(precreated_shm3_handles)
                    self.cleanup_shm_regions(precreated_shm4_handles)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_dyna_sequence_batcher/test.sh
================================================
#!/bin/bash
# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [[ -n "$TEST_REPO_ARCH" && "$REPO_VERSION" != *"_${TEST_REPO_ARCH}" ]]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

CLIENT_LOG="./client.log"
BATCHER_TEST=dyna_sequence_batcher_test.py

DATADIR=/data/inferenceserver/${REPO_VERSION}

SERVER=/opt/tritonserver/bin/tritonserver
source ../common/util.sh

export CUDA_VISIBLE_DEVICES=0

# If IMPLICIT_STATE not specified, set to 0
IMPLICIT_STATE=${IMPLICIT_STATE:="0"}
export IMPLICIT_STATE

# If BACKENDS not specified, set to all
BACKENDS=${BACKENDS:="libtorch onnx plan custom custom_string"}
export BACKENDS

MODEL_REPOSITORY=''
if [ "$IMPLICIT_STATE" == "1" ]; then
  MODEL_REPOSITORY="qa_dyna_sequence_implicit_model_repository"
else
  MODEL_REPOSITORY="qa_dyna_sequence_model_repository"
fi

RET=0

rm -fr *.log

# models
rm -fr models && mkdir models
for MODEL in ${DATADIR}/$MODEL_REPOSITORY/* ; do
    cp -r $MODEL models/. && \
        (cd models/$(basename $MODEL) && \
            sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 1/" config.pbtxt)
done

# Implicit state models for custom backend do not exist.
if [ $IMPLICIT_STATE == "0" ]; then
    cp -r ../custom_models/custom_dyna_sequence_int32 models/.
    sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 1/" models/custom_dyna_sequence_int32/config.pbtxt
    # Construct custom dyna_sequence_model with STRING sequence ID. Copy model and edit config.pbtxt
    cp -r models/custom_dyna_sequence_int32 models/custom_string_dyna_sequence_int32
    sed -i "s/custom_dyna_sequence_int32/custom_string_dyna_sequence_int32/g" models/custom_string_dyna_sequence_int32/config.pbtxt
    sed -i "/CONTROL_SEQUENCE_CORRID/{n;s/data_type:.*/data_type: TYPE_STRING/}" models/custom_string_dyna_sequence_int32/config.pbtxt
fi

# Implicit state models that support ragged batching do not exist.
if [ $IMPLICIT_STATE == "0" ]; then
    # ragged models
    rm -fr ragged_models && mkdir ragged_models
    cp -r ../custom_models/custom_dyna_sequence_int32 ragged_models/.
    (cd ragged_models/custom_dyna_sequence_int32 && \
            sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 1/" config.pbtxt && \
            sed -i "s/name:.*\"INPUT\"/name: \"INPUT\"\\nallow_ragged_batch: true/" config.pbtxt)
fi

# Need to launch the server for each test so that the model status is
# reset (which is used to make sure the correct batch size was used
# for execution). Test everything with fixed-tensor-size models and
# variable-tensor-size models.
export NO_BATCHING=1
for i in \
        test_simple_sequence \
        test_length1_sequence \
         ; do
    SERVER_LOG="./$i.server.log"
    SERVER_ARGS="--model-repository=`pwd`/models"
    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    echo "Test: $i" >>$CLIENT_LOG

    set +e
    python $BATCHER_TEST DynaSequenceBatcherTest.$i >>$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
        echo -e "\n***\n*** Test $i Failed\n***"
        RET=1
    fi
    set -e

    kill $SERVER_PID
    wait $SERVER_PID
done

# Tests that require max_queue_delay_microseconds to be non-zero so
# that batching is delayed until a full preferred batch is available.
for m in `ls models`; do
    (cd models/$m && \
            sed -i "s/max_candidate_sequences:.*/max_candidate_sequences:4/" config.pbtxt && \
            sed -i "s/max_queue_delay_microseconds:.*/max_queue_delay_microseconds:5000000/" config.pbtxt)
done

export NO_BATCHING=0
for i in \
        test_multi_sequence_different_shape \
        test_multi_sequence \
        test_multi_parallel_sequence \
        test_backlog \
        test_backlog_fill \
        test_backlog_fill_no_end \
        test_backlog_sequence_timeout \
    ; do

    SERVER_LOG="./$i.server.log"
    SERVER_ARGS="--model-repository=`pwd`/models"
    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    echo "Test: $i" >>$CLIENT_LOG

    set +e
    python $BATCHER_TEST DynaSequenceBatcherTest.$i >>$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
        echo -e "\n***\n*** Test $i Failed\n***"
        RET=1
    fi
    set -e

    kill $SERVER_PID
    wait $SERVER_PID
done

if [ $IMPLICIT_STATE == "0" ]; then
    # Ragged-batch tests that require max_queue_delay_microseconds to be
    # non-zero so that batching is delayed until a full preferred batch is
    # available.
    for m in `ls ragged_models`; do
        (cd ragged_models/$m && \
                sed -i "s/max_candidate_sequences:.*/max_candidate_sequences:4/" config.pbtxt && \
                sed -i "s/max_queue_delay_microseconds:.*/max_queue_delay_microseconds:5000000/" config.pbtxt)
    done

    export NO_BATCHING=0
    for i in \
        test_multi_sequence_different_shape_allow_ragged \
        ; do

        SERVER_LOG="./$i.server.log"
        SERVER_ARGS="--model-repository=`pwd`/ragged_models"
        run_server
        if [ "$SERVER_PID" == "0" ]; then
            echo -e "\n***\n*** Failed to start $SERVER\n***"
            cat $SERVER_LOG
            exit 1
        fi

        echo "Test: $i" >>$CLIENT_LOG

        set +e
        python $BATCHER_TEST DynaSequenceBatcherTest.$i >>$CLIENT_LOG 2>&1
        if [ $? -ne 0 ]; then
            echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
            echo -e "\n***\n*** Test $i Failed\n***"
            RET=1
        fi
        set -e

        kill $SERVER_PID
        wait $SERVER_PID
    done
fi

# python unittest seems to swallow ImportError and still return 0 exit
# code. So need to explicitly check CLIENT_LOG to make sure we see
# some running tests
grep -c "HTTPSocketPoolResponse status=200" $CLIENT_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed To Run\n***"
    RET=1
fi

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_grpc/client_plugin_models/client_plugin_test/1/model.py
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def execute(self, requests):
        responses = []

        for request in requests:
            json_string = (
                pb_utils.get_input_tensor_by_name(request, "EXPECTED_HEADERS")
                .as_numpy()[0]
                .decode("utf-8")
            )
            expected_headers = json.loads(json_string)

            success = True
            if request.parameters() != "":
                parameters = json.loads(request.parameters())
                for key, value in expected_headers.items():
                    if key in parameters:
                        if parameters[key] != value:
                            success = False
                    else:
                        success = False

            test_success = pb_utils.Tensor(
                "TEST_SUCCESS", np.array([success], dtype=bool)
            )
            inference_response = pb_utils.InferenceResponse(
                output_tensors=[test_success]
            )
            responses.append(inference_response)

        return responses


================================================
FILE: qa/L0_grpc/client_plugin_models/client_plugin_test/config.pbtxt
================================================
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "client_plugin_test"
backend: "python"

input [
  {
    name: "EXPECTED_HEADERS"
    data_type: TYPE_STRING
    dims: [ 1 ]
  }
]
output [
  {
    name: "TEST_SUCCESS"
    data_type: TYPE_BOOL
    dims: [ 1 ]
  }
]

instance_group [{ kind: KIND_CPU }]


================================================
FILE: qa/L0_grpc/grpc_basic_auth_test.py
================================================
#!/usr/bin/python
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import sys
import unittest

sys.path.append("../common")

import test_util as tu
import tritonclient.grpc as tritongrpcclient
import tritonclient.grpc.aio as asynctritongrpcclient
from tritonclient.grpc.aio.auth import BasicAuth as AsyncBasicAuth
from tritonclient.grpc.auth import BasicAuth


class GRPCBasicAuthTest(tu.TestResultCollector):
    def setUp(self):
        # Use the nginx port
        self._client = tritongrpcclient.InferenceServerClient(url="localhost:8004")
        self._client.register_plugin(BasicAuth("username", "password"))

    def test_client_call(self):
        self.assertTrue(self._client.is_server_live())

    def tearDown(self):
        self._client.close()


class GRPCBasicAuthAsyncTest(unittest.IsolatedAsyncioTestCase):
    async def asyncSetUp(self):
        # Use the nginx port
        self._client = asynctritongrpcclient.InferenceServerClient(url="localhost:8004")
        self._client.register_plugin(AsyncBasicAuth("username", "password"))

    async def test_client_call(self):
        self.assertTrue(await self._client.is_server_live())

    async def asyncTearDown(self):
        await self._client.close()


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_grpc/grpc_client_plugin_test.py
================================================
#!/usr/bin/python
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import json
import sys

sys.path.append("../common")

import unittest

import numpy as np
import test_util as tu
import tritonclient.grpc as tritongrpcclient
import tritonclient.grpc.aio as asynctritongrpcclient
from tritonclient.grpc import InferenceServerClientPlugin
from tritonclient.utils import np_to_triton_dtype


# A simple plugin that adds headers to the inference request.
class TestPlugin(InferenceServerClientPlugin):
    def __init__(self, headers):
        self._headers = headers

    def __call__(self, request):
        request.headers.update(self._headers)


def prepare_infer_inputs(headers):
    expected_headers = np.array([json.dumps(headers)], dtype=object)
    inputs = []
    inputs.append(
        tritongrpcclient.InferInput(
            "EXPECTED_HEADERS",
            expected_headers.shape,
            np_to_triton_dtype(expected_headers.dtype),
        )
    )
    inputs[0].set_data_from_numpy(expected_headers)

    return inputs


class GRPCClientPluginAsyncTest(unittest.IsolatedAsyncioTestCase):
    async def asyncSetUp(self):
        self._headers = {"my-key": "my-value"}
        self._plugin = TestPlugin(self._headers)
        self._client = asynctritongrpcclient.InferenceServerClient(url="localhost:8001")

    async def test_simple_infer(self):
        model = "client_plugin_test"
        inputs = prepare_infer_inputs(self._headers)
        self._client.register_plugin(self._plugin)
        response = await self._client.infer(model_name=model, inputs=inputs)
        test_success = response.as_numpy("TEST_SUCCESS")
        self.assertEqual(test_success, True)

        self._client.unregister_plugin()
        inputs = prepare_infer_inputs({})
        response = await self._client.infer(model_name=model, inputs=inputs)
        test_success = response.as_numpy("TEST_SUCCESS")
        self.assertEqual(test_success, True)

    async def asyncTearDown(self):
        await self._client.close()


class GRPCClientPluginTest(tu.TestResultCollector):
    def setUp(self):
        self._headers = {"my-key": "my-value"}
        self._plugin = TestPlugin(self._headers)
        self._client = tritongrpcclient.InferenceServerClient(url="localhost:8001")

    def test_simple_infer(self):
        # Set the binary data to False so that 'Inference-Header-Length' is not
        # added to the headers.
        model = "client_plugin_test"
        inputs = prepare_infer_inputs(self._headers)
        self._client.register_plugin(self._plugin)
        self.assertEqual(self._plugin, self._client.plugin())
        response = self._client.infer(model_name=model, inputs=inputs)
        test_success = response.as_numpy("TEST_SUCCESS")
        self.assertEqual(test_success, True)

        # Unregister the plugin
        inputs = prepare_infer_inputs({})
        self._client.unregister_plugin()
        self.assertEqual(None, self._client.plugin())
        response = self._client.infer(model_name=model, inputs=inputs)
        test_success = response.as_numpy("TEST_SUCCESS")
        self.assertEqual(test_success, True)

    def tearDown(self):
        self._client.close()


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_grpc/nginx.conf
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

worker_processes  1;

error_log  /var/log/nginx/error.log;

events {
    worker_connections  1024;
}

http {
    # Configure basic authentication
    auth_basic "Restricted Content";
    auth_basic_user_file /opt/tritonserver/qa/L0_grpc/pswd;

    # Define upstream server
    upstream backend {
        server localhost:8001;
    }

    # Define server block for reverse proxy
    server {
        listen 8004 http2;

        # Configure location for reverse proxy
        location / {
            grpc_pass grpc://backend;
        }
    }
}


================================================
FILE: qa/L0_grpc/python_grpc_aio_test.py
================================================
#!/usr/bin/env python
# Copyright 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import unittest

import tritonclient.grpc.aio as grpcclient
from tritonclient.utils import *


class TestGrpcAioClient(unittest.IsolatedAsyncioTestCase):
    """Test if aio rpc can reach the server"""

    def setUp(self):
        self._triton_client = grpcclient.InferenceServerClient(url="localhost:8001")

    async def asyncTearDown(self):
        await self._triton_client.close()

    async def test_is_server_live(self):
        ret = await self._triton_client.is_server_live()
        self.assertEqual(ret, True)

    async def test_is_server_ready(self):
        ret = await self._triton_client.is_server_ready()
        self.assertEqual(ret, True)

    async def test_is_model_ready(self):
        ret = await self._triton_client.is_model_ready("simple")
        self.assertEqual(ret, True)

    async def test_get_server_metadata(self):
        ret = await self._triton_client.get_server_metadata()
        self.assertEqual(ret.name, "triton")

        ret = await self._triton_client.get_server_metadata(as_json=True)
        self.assertEqual(ret["name"], "triton")

    async def test_get_model_metadata(self):
        ret = await self._triton_client.get_model_metadata("simple")
        self.assertEqual(ret.name, "simple")

    async def test_get_model_config(self):
        ret = await self._triton_client.get_model_config("simple")
        self.assertEqual(ret.config.name, "simple")

    async def test_get_model_repository_index(self):
        ret = await self._triton_client.get_model_repository_index()
        self.assertEqual(len(ret.models), 8)

    async def test_load_model(self):
        with self.assertRaisesRegex(
            InferenceServerException,
            "\[StatusCode\.UNAVAILABLE\] explicit model load / unload is not allowed if polling is enabled",
        ):
            await self._triton_client.load_model("simple")

    async def test_unload_model(self):
        with self.assertRaisesRegex(
            InferenceServerException,
            "\[StatusCode\.UNAVAILABLE\] explicit model load / unload is not allowed if polling is enabled",
        ):
            await self._triton_client.load_model("simple")

    async def test_get_inference_statistics(self):
        await self._triton_client.get_inference_statistics()

    async def test_update_trace_settings(self):
        await self._triton_client.update_trace_settings()

    async def test_get_trace_settings(self):
        await self._triton_client.get_trace_settings()

    async def test_get_system_shared_memory_status(self):
        await self._triton_client.get_system_shared_memory_status()

    async def test_register_system_shared_memory(self):
        with self.assertRaisesRegex(
            InferenceServerException,
            "\[StatusCode\.INTERNAL\] Unable to open shared memory region: '/test_shm'",
        ):
            await self._triton_client.register_system_shared_memory(
                "test_shm", "/test_shm", 0
            )

    async def test_unregister_system_shared_memory(self):
        await self._triton_client.unregister_system_shared_memory()

    async def test_get_cuda_shared_memory_status(self):
        await self._triton_client.get_cuda_shared_memory_status()

    async def test_register_cuda_shared_memory(self):
        with self.assertRaisesRegex(
            InferenceServerException,
            "failed to register shared memory region.*invalid args",
        ):
            await self._triton_client.register_cuda_shared_memory("", b"", 0, 0)

    async def test_unregister_cuda_shared_memory(self):
        await self._triton_client.unregister_cuda_shared_memory()


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_grpc/python_unit_test.py
================================================
#!/usr/bin/env python
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import queue
import time
import unittest

# For stream infer test
from functools import partial

import numpy as np
import tritonclient.grpc as grpcclient
from tritonclient.utils import InferenceServerException


class UserData:
    def __init__(self):
        self._completed_requests = queue.Queue()


def callback(user_data, result, error):
    if error:
        user_data._completed_requests.put(error)
    else:
        user_data._completed_requests.put(result)


class RestrictedProtocolTest(unittest.TestCase):
    def setUp(self):
        self.client_ = grpcclient.InferenceServerClient(url="localhost:8001")
        self.model_name_ = "simple"
        self.prefix_ = "triton-grpc-protocol-"

    # Other unspecified protocols should not be restricted
    def test_sanity(self):
        self.client_.get_inference_statistics("simple")
        self.client_.get_inference_statistics(
            "simple", headers={self.prefix_ + "infer-key": "infer-value"}
        )

    # health, infer, model repository protocols are restricted.
    # health and infer expects "triton-grpc-restricted-infer-key : infer-value" header,
    # model repository expected "triton-grpc-restricted-admin-key : admin-value".
    def test_model_repository(self):
        with self.assertRaisesRegex(
            InferenceServerException, "This protocol is restricted"
        ):
            self.client_.unload_model(
                self.model_name_, headers={self.prefix_ + "infer-key": "infer-value"}
            )
        # Request go through and get actual transaction error
        with self.assertRaisesRegex(
            InferenceServerException, "explicit model load / unload is not allowed"
        ):
            self.client_.unload_model(
                self.model_name_, headers={self.prefix_ + "admin-key": "admin-value"}
            )

    def test_health(self):
        with self.assertRaisesRegex(
            InferenceServerException, "This protocol is restricted"
        ):
            self.client_.is_server_live()
        self.client_.is_server_live({self.prefix_ + "infer-key": "infer-value"})

    def test_infer(self):
        # setup
        inputs = [
            grpcclient.InferInput("INPUT0", [1, 16], "INT32"),
            grpcclient.InferInput("INPUT1", [1, 16], "INT32"),
        ]
        inputs[0].set_data_from_numpy(np.ones(shape=(1, 16), dtype=np.int32))
        inputs[1].set_data_from_numpy(np.ones(shape=(1, 16), dtype=np.int32))

        # This test only care if the request goes through
        with self.assertRaisesRegex(
            InferenceServerException, "This protocol is restricted"
        ):
            _ = self.client_.infer(
                model_name=self.model_name_, inputs=inputs, headers={"test": "1"}
            )
        self.client_.infer(
            model_name=self.model_name_,
            inputs=inputs,
            headers={self.prefix_ + "infer-key": "infer-value"},
        )

    def test_stream_infer(self):
        # setup
        inputs = [
            grpcclient.InferInput("INPUT0", [1, 16], "INT32"),
            grpcclient.InferInput("INPUT1", [1, 16], "INT32"),
        ]
        inputs[0].set_data_from_numpy(np.ones(shape=(1, 16), dtype=np.int32))
        inputs[1].set_data_from_numpy(np.ones(shape=(1, 16), dtype=np.int32))
        user_data = UserData()
        # The server can't interfere with whether GRPC should create the stream,
        # server will be notified after the stream is established and only
        # until then be able to access metadata to decide whether to continue
        # the stream.
        # So on client side, it will always perceive that the stream is
        # successfully created and can only check its health at a later time.
        self.client_.start_stream(partial(callback, user_data), headers={"test": "1"})
        # wait for sufficient round-trip time
        time.sleep(1)
        with self.assertRaisesRegex(
            InferenceServerException, "The stream is no longer in valid state"
        ):
            self.client_.async_stream_infer(model_name=self.model_name_, inputs=inputs)
        # callback should record error detail
        self.assertFalse(user_data._completed_requests.empty())
        with self.assertRaisesRegex(
            InferenceServerException, "This protocol is restricted"
        ):
            raise user_data._completed_requests.get()

        self.assertTrue(user_data._completed_requests.empty())

        # Stop and start new stream with proper header
        self.client_.stop_stream()
        self.client_.start_stream(
            partial(callback, user_data),
            headers={self.prefix_ + "infer-key": "infer-value"},
        )
        self.client_.async_stream_infer(model_name=self.model_name_, inputs=inputs)
        # wait for response
        time.sleep(1)
        self.assertFalse(user_data._completed_requests.empty())
        self.assertNotEqual(
            type(user_data._completed_requests.get()), InferenceServerException
        )


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_grpc/test.sh
================================================
#!/bin/bash
# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

RET=0

CLIENT_PLUGIN_TEST="./grpc_client_plugin_test.py"
BASIC_AUTH_TEST="./grpc_basic_auth_test.py"
NGINX_CONF="./nginx.conf"
# On windows the paths invoked by the script (running in WSL) must use
# /mnt/c when needed but the paths on the tritonserver command-line
# must be C:/ style.
if [[ -v WSL_DISTRO_NAME ]] || [[ -v MSYSTEM ]]; then
    SDKDIR=${SDKDIR:=C:/sdk}
    MODELDIR=${MODELDIR:=C:/models}
    CLIENT_PLUGIN_MODELDIR=${MODELDIR:=C:/client_plugin_models}
    DATADIR=${DATADIR:="/mnt/c/data/inferenceserver/${REPO_VERSION}"}
    BACKEND_DIR=${BACKEND_DIR:=C:/tritonserver/backends}
    SERVER=${SERVER:=/mnt/c/tritonserver/bin/tritonserver.exe}

    SIMPLE_AIO_INFER_CLIENT_PY=${SDKDIR}/python/simple_grpc_aio_infer_client.py
    SIMPLE_AIO_STREAM_INFER_CLIENT_PY=${SDKDIR}/python/simple_grpc_aio_sequence_stream_infer_client.py
    SIMPLE_HEALTH_CLIENT_PY=${SDKDIR}/python/simple_grpc_health_metadata.py
    SIMPLE_INFER_CLIENT_PY=${SDKDIR}/python/simple_grpc_infer_client.py
    SIMPLE_ASYNC_INFER_CLIENT_PY=${SDKDIR}/python/simple_grpc_async_infer_client.py
    SIMPLE_STRING_INFER_CLIENT_PY=${SDKDIR}/python/simple_grpc_string_infer_client.py
    SIMPLE_STREAM_INFER_CLIENT_PY=${SDKDIR}/python/simple_grpc_sequence_stream_infer_client.py
    SIMPLE_SEQUENCE_INFER_CLIENT_PY=${SDKDIR}/python/simple_grpc_sequence_sync_infer_client.py
    SIMPLE_IMAGE_CLIENT_PY=${SDKDIR}/python/image_client.py
    # SIMPLE_ENSEMBLE_IMAGE_CLIENT_PY=${SDKDIR}/python/ensemble_image_client.py
    SIMPLE_SHM_STRING_CLIENT_PY=${SDKDIR}/python/simple_grpc_shm_string_client.py
    SIMPLE_SHM_CLIENT_PY=${SDKDIR}/python/simple_grpc_shm_client.py
    SIMPLE_CUDASHM_CLIENT_PY=${SDKDIR}/python/simple_grpc_cudashm_client.py
    SIMPLE_MODEL_CONTROL_PY=${SDKDIR}/python/simple_grpc_model_control.py
    SIMPLE_REUSE_INFER_OBJECTS_CLIENT_PY=${SDKDIR}/python/reuse_infer_objects_client.py
    SIMPLE_KEEPALIVE_CLIENT_PY=${SDKDIR}/python/simple_grpc_keepalive_client.py
    SIMPLE_CUSTOM_ARGS_CLIENT_PY=${SDKDIR}/python/simple_grpc_custom_args_client.py
    EXPLICIT_BYTE_CONTENT_CLIENT_PY=${SDKDIR}/python/grpc_explicit_byte_content_client.py
    EXPLICIT_INT_CONTENT_CLIENT_PY=${SDKDIR}/python/grpc_explicit_int_content_client.py
    EXPLICIT_INT8_CONTENT_CLIENT_PY=${SDKDIR}/python/grpc_explicit_int8_content_client.py
    GRPC_CLIENT_PY=${SDKDIR}/python/grpc_client.py
    GRPC_IMAGE_CLIENT_PY=${SDKDIR}/python/grpc_image_client.py

    SIMPLE_HEALTH_CLIENT=${SDKDIR}/python/simple_grpc_health_metadata
    SIMPLE_INFER_CLIENT=${SDKDIR}/python/simple_grpc_infer_client
    SIMPLE_STRING_INFER_CLIENT=${SDKDIR}/python/simple_grpc_string_infer_client
    SIMPLE_ASYNC_INFER_CLIENT=${SDKDIR}/python/simple_grpc_async_infer_client
    SIMPLE_MODEL_CONTROL=${SDKDIR}/python/simple_grpc_model_control
    SIMPLE_STREAM_INFER_CLIENT=${SDKDIR}/python/simple_grpc_sequence_stream_infer_client
    SIMPLE_SEQUENCE_INFER_CLIENT=${SDKDIR}/python/simple_grpc_sequence_sync_infer_client
    SIMPLE_SHM_CLIENT=${SDKDIR}/python/simple_grpc_shm_client
    SIMPLE_CUDASHM_CLIENT=${SDKDIR}/python/simple_grpc_cudashm_client
    SIMPLE_IMAGE_CLIENT=${SDKDIR}/python/image_client
    # SIMPLE_ENSEMBLE_IMAGE_CLIENT=${SDKDIR}/python/ensemble_image_client
    SIMPLE_REUSE_INFER_OBJECTS_CLIENT=${SDKDIR}/python/reuse_infer_objects_client
    SIMPLE_KEEPALIVE_CLIENT=${SDKDIR}/python/simple_grpc_keepalive_client
    SIMPLE_CUSTOM_ARGS_CLIENT=${SDKDIR}/python/simple_grpc_custom_args_client
    # [FIXME] point to proper client
    CC_UNIT_TEST=${SDKDIR}/python/cc_client_test
else
    MODELDIR=${MODELDIR:=`pwd`/models}
    CLIENT_PLUGIN_MODELDIR=${CLIENTPLUGINMODELDIR:=`pwd`/client_plugin_models}
    DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
    TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
    SERVER=${TRITON_DIR}/bin/tritonserver
    BACKEND_DIR=${TRITON_DIR}/backends

    SIMPLE_AIO_INFER_CLIENT_PY=../clients/simple_grpc_aio_infer_client.py
    SIMPLE_AIO_STREAM_INFER_CLIENT_PY=../clients/simple_grpc_aio_sequence_stream_infer_client.py
    SIMPLE_HEALTH_CLIENT_PY=../clients/simple_grpc_health_metadata.py
    SIMPLE_INFER_CLIENT_PY=../clients/simple_grpc_infer_client.py
    SIMPLE_ASYNC_INFER_CLIENT_PY=../clients/simple_grpc_async_infer_client.py
    SIMPLE_STRING_INFER_CLIENT_PY=../clients/simple_grpc_string_infer_client.py
    SIMPLE_STREAM_INFER_CLIENT_PY=../clients/simple_grpc_sequence_stream_infer_client.py
    SIMPLE_SEQUENCE_INFER_CLIENT_PY=../clients/simple_grpc_sequence_sync_infer_client.py
    SIMPLE_IMAGE_CLIENT_PY=../clients/image_client.py
    # SIMPLE_ENSEMBLE_IMAGE_CLIENT_PY=../clients/ensemble_image_client.py
    SIMPLE_SHM_STRING_CLIENT_PY=../clients/simple_grpc_shm_string_client.py
    SIMPLE_SHM_CLIENT_PY=../clients/simple_grpc_shm_client.py
    SIMPLE_CUDASHM_CLIENT_PY=../clients/simple_grpc_cudashm_client.py
    SIMPLE_MODEL_CONTROL_PY=../clients/simple_grpc_model_control.py
    SIMPLE_REUSE_INFER_OBJECTS_CLIENT_PY=../clients/reuse_infer_objects_client.py
    SIMPLE_KEEPALIVE_CLIENT_PY=../clients/simple_grpc_keepalive_client.py
    SIMPLE_CUSTOM_ARGS_CLIENT_PY=../clients/simple_grpc_custom_args_client.py
    EXPLICIT_BYTE_CONTENT_CLIENT_PY=../clients/grpc_explicit_byte_content_client.py
    EXPLICIT_INT_CONTENT_CLIENT_PY=../clients/grpc_explicit_int_content_client.py
    EXPLICIT_INT8_CONTENT_CLIENT_PY=../clients/grpc_explicit_int8_content_client.py
    GRPC_CLIENT_PY=../clients/grpc_client.py
    GRPC_IMAGE_CLIENT_PY=../clients/grpc_image_client.py

    SIMPLE_HEALTH_CLIENT=../clients/simple_grpc_health_metadata
    SIMPLE_INFER_CLIENT=../clients/simple_grpc_infer_client
    SIMPLE_STRING_INFER_CLIENT=../clients/simple_grpc_string_infer_client
    SIMPLE_ASYNC_INFER_CLIENT=../clients/simple_grpc_async_infer_client
    SIMPLE_MODEL_CONTROL=../clients/simple_grpc_model_control
    SIMPLE_STREAM_INFER_CLIENT=../clients/simple_grpc_sequence_stream_infer_client
    SIMPLE_SEQUENCE_INFER_CLIENT=../clients/simple_grpc_sequence_sync_infer_client
    SIMPLE_SHM_CLIENT=../clients/simple_grpc_shm_client
    SIMPLE_CUDASHM_CLIENT=../clients/simple_grpc_cudashm_client
    SIMPLE_IMAGE_CLIENT=../clients/image_client
    # SIMPLE_ENSEMBLE_IMAGE_CLIENT=../clients/ensemble_image_client
    SIMPLE_REUSE_INFER_OBJECTS_CLIENT=../clients/reuse_infer_objects_client
    SIMPLE_KEEPALIVE_CLIENT=../clients/simple_grpc_keepalive_client
    SIMPLE_CUSTOM_ARGS_CLIENT=../clients/simple_grpc_custom_args_client
    CC_UNIT_TEST=../clients/cc_client_test
fi
PYTHON_UNIT_TEST=python_unit_test.py

# Add string_dyna_sequence model to repo
cp -r ${MODELDIR}/simple_dyna_sequence ${MODELDIR}/simple_string_dyna_sequence
sed -i "s/simple_dyna_sequence/simple_string_dyna_sequence/g" ${MODELDIR}/simple_string_dyna_sequence/config.pbtxt
sed -i "s/^platform: .*/backend: \"dyna_sequence\"/g" ${MODELDIR}/simple_string_dyna_sequence/config.pbtxt
sed -i "/CONTROL_SEQUENCE_CORRID/{n;s/data_type:.*/data_type: TYPE_STRING/}" ${MODELDIR}/simple_string_dyna_sequence/config.pbtxt
rm -f ${MODELDIR}/simple_string_dyna_sequence/1/model.onnx
cp ../custom_models/custom_dyna_sequence_int32/1/libtriton_dyna_sequence.so ${MODELDIR}/simple_string_dyna_sequence/1/

rm -f *.log
rm -f *.log.*

set -e

CLIENT_LOG=`pwd`/client.log
SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR}"
source ../common/util.sh

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

python $SIMPLE_HEALTH_CLIENT_PY -v >> ${CLIENT_LOG}.health 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}.health
    RET=1
fi

IMAGE=../images/vulture.jpeg
for i in \
        $SIMPLE_AIO_INFER_CLIENT_PY \
        $SIMPLE_AIO_STREAM_INFER_CLIENT_PY \
        $SIMPLE_INFER_CLIENT_PY \
        $SIMPLE_ASYNC_INFER_CLIENT_PY \
        $SIMPLE_STRING_INFER_CLIENT_PY \
        $SIMPLE_IMAGE_CLIENT_PY \
        $SIMPLE_ENSEMBLE_IMAGE_CLIENT_PY \
        $SIMPLE_STREAM_INFER_CLIENT_PY \
        $SIMPLE_SEQUENCE_INFER_CLIENT_PY \
        $SIMPLE_SHM_STRING_CLIENT_PY \
        $SIMPLE_SHM_CLIENT_PY \
        $SIMPLE_CUDASHM_CLIENT_PY \
        $SIMPLE_KEEPALIVE_CLIENT_PY \
        $SIMPLE_CUSTOM_ARGS_CLIENT_PY \
        $EXPLICIT_BYTE_CONTENT_CLIENT_PY \
        $EXPLICIT_INT_CONTENT_CLIENT_PY \
        $EXPLICIT_INT8_CONTENT_CLIENT_PY \
        $GRPC_CLIENT_PY \
        $GRPC_IMAGE_CLIENT_PY \
        ; do
    BASE=$(basename -- $i)
    SUFFIX="${BASE%.*}"
    EXTRA_ARGS=""
    if [ $SUFFIX == "image_client" ]; then
        EXTRA_ARGS="-i grpc -u localhost:8001"
    fi
    if [[ ($SUFFIX == "image_client") || ($SUFFIX == "grpc_image_client") ]]; then
        python $i -m densenet_onnx -s INCEPTION -a -c 1 -b 1 $EXTRA_ARGS $IMAGE >> "${CLIENT_LOG}.async.${SUFFIX}" 2>&1
        if [ `grep -c VULTURE ${CLIENT_LOG}.async.${SUFFIX}` != "1" ]; then
            echo -e "\n***\n*** Failed. Expected 1 VULTURE results\n***"
            cat $CLIENT_LOG.async.${SUFFIX}
            RET=1
        fi
        python $i -m densenet_onnx -s INCEPTION -a --streaming -c 1 -b 1 $EXTRA_ARGS $IMAGE >> "${CLIENT_LOG}.streaming.${SUFFIX}" 2>&1
        if [ `grep -c VULTURE ${CLIENT_LOG}.streaming.${SUFFIX}` != "1" ]; then
            echo -e "\n***\n*** Failed. Expected 1 VULTURE results\n***"
            cat $CLIENT_LOG.streaming.${SUFFIX}
            RET=1
        fi
        python $i -m densenet_onnx -s INCEPTION -c 1 -b 1 $EXTRA_ARGS $IMAGE >> "${CLIENT_LOG}.${SUFFIX}" 2>&1
        if [ `grep -c VULTURE ${CLIENT_LOG}.${SUFFIX}` != "1" ]; then
            echo -e "\n***\n*** Failed. Expected 1 VULTURE results\n***"
            cat $CLIENT_LOG.${SUFFIX}
            RET=1
        fi
    # elif [ $SUFFIX == "ensemble_image_client" ]; then
    #     python $i -c 1 $EXTRA_ARGS ../images >> "${CLIENT_LOG}.${SUFFIX}" 2>&1
    #     for result in "SPORTS CAR" "COFFEE MUG" "VULTURE"; do
    #         if [ `grep -c "$result" ${CLIENT_LOG}.${SUFFIX}` != "1" ]; then
    #             echo -e "\n***\n*** Failed. Expected 1 $result result\n***"
    #             RET=1
    #         fi
    #     done
    else
        python $i -v >> "${CLIENT_LOG}.${SUFFIX}" 2>&1
    fi

    if [ $? -ne 0 ]; then
        cat "${CLIENT_LOG}.${SUFFIX}"
        RET=1
    fi

    if [ $(cat "${CLIENT_LOG}.${SUFFIX}" | grep "PASS" | wc -l) -ne 1 ]; then
        cat "${CLIENT_LOG}.${SUFFIX}"
        RET=1
    fi
done

# Test while reusing the InferInput and InferRequestedOutput objects
$SIMPLE_REUSE_INFER_OBJECTS_CLIENT_PY -v -i grpc -u localhost:8001 >> ${CLIENT_LOG}.reuse 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}.reuse
    RET=1
fi

for i in \
   $SIMPLE_INFER_CLIENT \
   $SIMPLE_STRING_INFER_CLIENT \
   $SIMPLE_ASYNC_INFER_CLIENT \
   $SIMPLE_HEALTH_CLIENT \
   $SIMPLE_STREAM_INFER_CLIENT \
   $SIMPLE_SEQUENCE_INFER_CLIENT \
   $SIMPLE_SHM_CLIENT \
   $SIMPLE_CUDASHM_CLIENT \
   $SIMPLE_IMAGE_CLIENT \
   $SIMPLE_ENSEMBLE_IMAGE_CLIENT \
   $SIMPLE_KEEPALIVE_CLIENT \
   $SIMPLE_CUSTOM_ARGS_CLIENT \
   ; do
   BASE=$(basename -- $i)
   SUFFIX="${BASE%.*}"
    if [[ $SUFFIX == "image_client" ]]; then
        $i -m densenet_onnx -s INCEPTION -a -c 1 -b 1 -i grpc -u localhost:8001 $IMAGE >> "${CLIENT_LOG}.c++.async.${SUFFIX}" 2>&1
        if [ `grep -c VULTURE ${CLIENT_LOG}.c++.async.${SUFFIX}` != "1" ]; then
            echo -e "\n***\n*** Failed. Expected 1 VULTURE results\n***"
            cat $CLIENT_LOG.c++.${SUFFIX}
            RET=1
        fi
        $i -m densenet_onnx -s INCEPTION -a --streaming -c 1 -b 1 -i grpc -u localhost:8001 $IMAGE >> "${CLIENT_LOG}.c++.streaming.${SUFFIX}" 2>&1
        if [ `grep -c VULTURE ${CLIENT_LOG}.c++.streaming.${SUFFIX}` != "1" ]; then
            echo -e "\n***\n*** Failed. Expected 1 VULTURE results\n***"
            cat $CLIENT_LOG.c++.${SUFFIX}
            RET=1
        fi
        $i -m densenet_onnx -s INCEPTION -c 1 -b 1 -i grpc -u localhost:8001 $IMAGE >> "${CLIENT_LOG}.c++.${SUFFIX}" 2>&1
        if [ `grep -c VULTURE ${CLIENT_LOG}.c++.${SUFFIX}` != "1" ]; then
            echo -e "\n***\n*** Failed. Expected 1 VULTURE results\n***"
            cat $CLIENT_LOG.c++.${SUFFIX}
            RET=1
        fi
    # elif [ $SUFFIX == "ensemble_image_client" ]; then
    #     $i -c 1 -i grpc -u localhost:8001 ../images >> "${CLIENT_LOG}.c++.${SUFFIX}" 2>&1
    #     for result in "SPORTS CAR" "COFFEE MUG" "VULTURE"; do
    #         if [ `grep -c "$result" ${CLIENT_LOG}.c++.${SUFFIX}` != "1" ]; then
    #             echo -e "\n***\n*** Failed. Expected 1 $result result\n***"
    #             RET=1
    #         fi
    #     done
    elif [[ $BASE == "simple_grpc_infer_client" ]]; then
        # Test forcing new channel creation with simple infer client
        NEW_CHANNEL_STRING="new connected subchannel"
        CACHED_CHANNEL_STRING_NONE="There are 0 cached channels"
        CACHED_CHANNEL_STRING_ONE="There are 1 cached channel"
        GRPC_TRACE=subchannel GRPC_VERBOSITY=info $i -v -c "true" >> ${CLIENT_LOG}.c++.${SUFFIX} 2>&1
        if [ $? -ne 0 ]; then
            cat ${CLIENT_LOG}.c++.${SUFFIX}
            RET=1
        fi
        NUM_NEW_CHANNEL_CALLS=`grep -c "${NEW_CHANNEL_STRING}" ${CLIENT_LOG}.c++.${SUFFIX}`
        if [ $NUM_NEW_CHANNEL_CALLS != "1" ]; then
            echo -e "\n***\n*** Failed. Expected 1 ${NEW_CHANNEL_STRING} calls but got ${NUM_NEW_CHANNEL_CALLS}\n***"
            cat $CLIENT_LOG.c++.${SUFFIX}
            RET=1
        fi
        if [ `grep -c "${CACHED_CHANNEL_STRING_ONE}" ${CLIENT_LOG}.c++.${SUFFIX}` != "2" ]; then
            echo -e "\n***\n*** Failed. Expected 1 cached channel\n***"
            cat $CLIENT_LOG.c++.${SUFFIX}
            RET=1
        fi
        GRPC_TRACE=subchannel GRPC_VERBOSITY=info $i -v -c "false" >> ${CLIENT_LOG}.c++.${SUFFIX} 2>&1
        if [ $? -ne 0 ]; then
            cat ${CLIENT_LOG}.c++.${SUFFIX}
            RET=1
        fi
        NUM_NEW_CHANNEL_CALLS=`grep -c "${NEW_CHANNEL_STRING}" ${CLIENT_LOG}.c++.${SUFFIX}`
        if [ $NUM_NEW_CHANNEL_CALLS != "3" ]; then
            echo -e "\n***\n*** Failed. Expected 2 ${NEW_CHANNEL_STRING} calls but got ${NUM_NEW_CHANNEL_CALLS}\n***"
            cat $CLIENT_LOG.c++.${SUFFIX}
            RET=1
        fi
        if [ `grep -c "${CACHED_CHANNEL_STRING_NONE}" ${CLIENT_LOG}.c++.${SUFFIX}` != "2" ]; then
            echo -e "\n***\n*** Failed. Expected 0 cached channels\n***"
            cat $CLIENT_LOG.c++.${SUFFIX}
            RET=1
        fi
    else
        $i -v -H test:1 >> ${CLIENT_LOG}.c++.${SUFFIX} 2>&1
        if [ $? -ne 0 ]; then
            cat ${CLIENT_LOG}.c++.${SUFFIX}
            RET=1
        fi
    fi
done

# Test while reusing the InferInput and InferRequestedOutput objects
$SIMPLE_REUSE_INFER_OBJECTS_CLIENT -v -i grpc -u localhost:8001 >> ${CLIENT_LOG}.c++.reuse 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}.c++.reuse
    RET=1
fi

set -e
kill $SERVER_PID
wait $SERVER_PID

SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${CLIENT_PLUGIN_MODELDIR} --http-header-forward-pattern=.* --grpc-header-forward-pattern=.*"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python3 $CLIENT_PLUGIN_TEST >> ${CLIENT_LOG}.python.plugin 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}.python.plugin
    RET=1
fi
set -e

# Create a password file with username:password
echo -n 'username:' > pswd
echo "password" | openssl passwd -stdin -apr1 >> pswd
nginx -c `pwd`/$NGINX_CONF

python3 $BASIC_AUTH_TEST
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}.python.plugin.auth
    RET=1
fi
service nginx stop

kill $SERVER_PID
wait $SERVER_PID

export GRPC_TRACE=compression, channel
export GRPC_VERBOSITY=DEBUG
SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR} --grpc-infer-response-compression-level=high"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

$SIMPLE_INFER_CLIENT -v -C deflate>> ${CLIENT_LOG}.c++.compress 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}.c++.compress
    RET=1
fi
if [ $(cat ${CLIENT_LOG}.c++.compress | grep "Compressed\[deflate\]" | wc -l) -eq 0 ]; then
    cat ${CLIENT_LOG}.c++.compress
    RET=1
fi

python $SIMPLE_INFER_CLIENT_PY -v -C deflate>> ${CLIENT_LOG}.compress 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}.compress
    RET=1
fi
if [ $(cat ${CLIENT_LOG}.compress | grep "Compressed\[deflate\]" | wc -l) -eq 0 ]; then
    cat ${CLIENT_LOG}.compress
    RET=1
fi

set -e
kill $SERVER_PID
wait $SERVER_PID

unset GRPC_TRACE
unset GRPC_VERBOSITY

SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR} --model-control-mode=explicit"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
# Test Model Control API
python $SIMPLE_MODEL_CONTROL_PY -v >> ${CLIENT_LOG}.model_control 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}.model_control
    RET=1
fi

if [ $(cat ${CLIENT_LOG}.model_control | grep "PASS" | wc -l) -ne 1 ]; then
    cat ${CLIENT_LOG}.model_control
    RET=1
fi
if [ $(cat ${SERVER_LOG} | grep "Invalid config override" | wc -l) -eq 0 ]; then
    cat ${SERVER_LOG}
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR} --model-control-mode=explicit"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
# Test Model Control API
$SIMPLE_MODEL_CONTROL -v >> ${CLIENT_LOG}.c++.model_control 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}.c++.model_control
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

# Test with dynamic sequence models
SERVER_ARGS="--model-repository=`pwd`/models"
SERVER_LOG="./inference_server_dyna.log"
CLIENT_LOG="./client_dyna.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi
set +e

for i in \
    $SIMPLE_STREAM_INFER_CLIENT_PY \
    $SIMPLE_SEQUENCE_INFER_CLIENT_PY \
    $SIMPLE_STREAM_INFER_CLIENT \
    $SIMPLE_SEQUENCE_INFER_CLIENT; do

    $i -v -d >>$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        RET=1
    fi
done

set -e

kill $SERVER_PID
wait $SERVER_PID

# Run cpp client unit test
rm -rf unit_test_models && mkdir unit_test_models
cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 unit_test_models/.
cp -r ${MODELDIR}/simple unit_test_models/.

SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=unit_test_models
            --trace-file=global_unittest.log --trace-level=TIMESTAMPS --trace-rate=1"
SERVER_LOG="./inference_server_cc_unit_test.log"
CLIENT_LOG="./cc_unit_test.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
# Run all unit tests except load
$CC_UNIT_TEST --gtest_filter=GRPC*:-*Load* >> ${CLIENT_LOG} 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

# Run cpp client load API unit test
rm -rf unit_test_models && mkdir unit_test_models
cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 unit_test_models/.
# Make only version 2, 3 is valid version directory while config requests 1, 3
rm -rf unit_test_models/onnx_int32_int32_int32/1

# Start with EXPLICIT mode and load onnx_float32_float32_float32
SERVER_ARGS="--model-repository=`pwd`/unit_test_models \
             --model-control-mode=explicit \
             --load-model=onnx_int32_int32_int32 \
             --strict-model-config=false"
SERVER_LOG="./inference_server_cc_unit_test.load.log"
CLIENT_LOG="./cc_unit_test.load.log"

for i in \
   "LoadWithFileOverride" \
   "LoadWithConfigOverride" \
   ; do
    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    set +e
    $CC_UNIT_TEST --gtest_filter=GRPC*$i >> ${CLIENT_LOG}.$i 2>&1
    if [ $? -ne 0 ]; then
        cat ${CLIENT_LOG}.$i
        RET=1
    fi
    set -e

    kill $SERVER_PID
    wait $SERVER_PID
done

# Run python grpc aio unit test
PYTHON_GRPC_AIO_TEST=python_grpc_aio_test.py
CLIENT_LOG=`pwd`/python_grpc_aio_test.log
SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR}"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi
set +e
python $PYTHON_GRPC_AIO_TEST > $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Python GRPC AsyncIO Test Failed\n***"
    RET=1
fi
set -e
kill $SERVER_PID
wait $SERVER_PID

# Test GRPC health check implemented
go install github.com/grpc-ecosystem/grpc-health-probe@latest
HEALTH_PROBE="${GOPATH}/bin/grpc-health-probe -addr=localhost:8001"

CLIENT_LOG=`pwd`/grpc_health_probe_offline.log
set +e
$HEALTH_PROBE > $CLIENT_LOG 2>&1
set -e
if [ `grep -c "timeout: failed to connect service" ${CLIENT_LOG}` != "1" ]; then
    echo -e "\n***\n*** Failed. Expected health check timeout\n***"
    cat $CLIENT_LOG
    RET=1
fi

SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR}"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

CLIENT_LOG=`pwd`/grpc_health_probe_online.log
set +e
$HEALTH_PROBE > $CLIENT_LOG 2>&1
set -e
if [ `grep -c "status: SERVING" ${CLIENT_LOG}` != "1" ]; then
    echo -e "\n***\n*** Failed. Expected health check to return SERVING\n***"
    cat $CLIENT_LOG
    RET=1
fi

kill $SERVER_PID
wait $SERVER_PID

# Repeated protocol, not allowed
SERVER_ARGS="--model-repository=${MODELDIR} \
             --grpc-restricted-protocol=model-repository,health:k1=v1 \
             --grpc-restricted-protocol=metadata,health:k2=v2"
run_server
EXPECTED_MSG="protocol 'health' can not be specified in multiple config groups"
if [ "$SERVER_PID" != "0" ]; then
    echo -e "\n***\n*** Expect fail to start $SERVER\n***"
    kill $SERVER_PID
    wait $SERVER_PID
    RET=1
elif [ `grep -c "${EXPECTED_MSG}" ${SERVER_LOG}` != "1" ]; then
    echo -e "\n***\n*** Failed. Expected ${EXPECTED_MSG} to be found in log\n***"
    cat $SERVER_LOG
    RET=1
fi

# Unknown protocol, not allowed
SERVER_ARGS="--model-repository=${MODELDIR} \
             --grpc-restricted-protocol=model-reposit,health:k1=v1 \
             --grpc-restricted-protocol=metadata,health:k2=v2"
run_server
EXPECTED_MSG="unknown restricted protocol 'model-reposit'"
if [ "$SERVER_PID" != "0" ]; then
    echo -e "\n***\n*** Expect fail to start $SERVER\n***"
    kill $SERVER_PID
    wait $SERVER_PID
    RET=1
elif [ `grep -c "${EXPECTED_MSG}" ${SERVER_LOG}` != "1" ]; then
    echo -e "\n***\n*** Failed. Expected ${EXPECTED_MSG} to be found in log\n***"
    cat $SERVER_LOG
    RET=1
fi

# Test restricted protocols
SERVER_ARGS="--model-repository=${MODELDIR} \
             --grpc-restricted-protocol=model-repository:admin-key=admin-value \
             --grpc-restricted-protocol=inference,health:infer-key=infer-value"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi
set +e
python $PYTHON_UNIT_TEST RestrictedProtocolTest > $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Python GRPC Restricted Protocol Test Failed\n***"
    RET=1
fi
set -e
kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_grpc_state_cleanup/cleanup_test.py
================================================
#!/usr/bin/env python3

# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import os
import queue
import signal
import time
import unittest
from functools import partial

import numpy as np
import test_util as tu
import tritonclient.grpc as grpcclient
from tritonclient.utils import InferenceServerException


class UserData:
    def __init__(self):
        self._response_queue = queue.Queue()


def callback(user_data, result, error):
    if error:
        user_data._response_queue.put(error)
    else:
        user_data._response_queue.put(result)


# These state cleanup tests relies on the test.sh
# to check whether all the created request objects
# were properly deleted by the sever.
# The purpose on these unittest is to exercise
# different portions of the gRPC frontend and
# and track the state objects.
class CleanUpTest(tu.TestResultCollector):
    SERVER_PID = None

    def setUp(self):
        self.decoupled_model_name_ = "repeat_int32"
        self.identity_model_name_ = "custom_zero_1_float32"
        self.repeat_non_decoupled_model_name = "repeat_int32_non_decoupled"

    def _prepare_inputs_and_outputs(self, kind):
        if kind in ("decoupled_streaming", "non_decoupled_streaming"):
            self.inputs_ = []
            self.inputs_.append(grpcclient.InferInput("IN", [1], "INT32"))
            self.inputs_.append(grpcclient.InferInput("DELAY", [1], "UINT32"))
            self.inputs_.append(grpcclient.InferInput("WAIT", [1], "UINT32"))

            self.outputs_ = []
            self.outputs_.append(grpcclient.InferRequestedOutput("OUT"))
            self.outputs_.append(grpcclient.InferRequestedOutput("IDX"))
            self.requested_outputs_ = self.outputs_
        elif kind in ("simple", "streaming"):
            self.inputs_ = []
            self.inputs_.append(grpcclient.InferInput("INPUT0", [1, 1], "FP32"))

            self.outputs_ = []
            self.outputs_.append(grpcclient.InferRequestedOutput("OUTPUT0"))
            self.requested_outputs_ = self.outputs_
        else:
            raise ValueError("Unsupported kind specified to prepare inputs/outputs")

    def _simple_infer(
        self,
        request_count,
        cancel_response_idx=None,
        client_timeout_pair=None,
        kill_server=None,
    ):
        with grpcclient.InferenceServerClient(
            url="localhost:8001", verbose=True
        ) as triton_client:
            self._prepare_inputs_and_outputs("simple")

            input_data = np.array([[1.0]], dtype=np.float32)
            self.inputs_[0].set_data_from_numpy(input_data)

            user_data = UserData()

            futures = []
            timeout_idx = None
            timeout_value = None
            if client_timeout_pair:
                timeout_idx, timeout_value = client_timeout_pair
            for i in range(request_count):
                if kill_server == i:
                    os.kill(int(self.SERVER_PID), signal.SIGINT)
                this_timeout = None
                if timeout_idx == i:
                    this_timeout = timeout_value
                futures.append(
                    triton_client.async_infer(
                        model_name=self.identity_model_name_,
                        inputs=self.inputs_,
                        request_id=str(i),
                        callback=partial(callback, user_data),
                        outputs=self.requested_outputs_,
                        client_timeout=this_timeout,
                    )
                )

            if cancel_response_idx is not None:
                futures[cancel_response_idx].cancel()

            responses = []
            while len(responses) < len(futures):
                data_item = user_data._response_queue.get()
                if type(data_item) == InferenceServerException:
                    raise data_item
                else:
                    responses.append(data_item)

            for response in responses:
                output0_data = response.as_numpy("OUTPUT0")
                self.assertTrue(np.array_equal(input_data, output0_data))

    def _stream_infer_with_params(
        self,
        request_count,
        request_delay,
        _,
        user_data,
        result_dict,
        delay_data=None,
        delay_factor=None,
        cancel_response_idx=None,
        stream_timeout=None,
        kill_server=None,
    ):
        with grpcclient.InferenceServerClient(
            url="localhost:8001", verbose=True
        ) as triton_client:
            # Establish stream
            if "TRITONSERVER_GRPC_STATUS_FLAG" in os.environ:
                metadata = {"triton_grpc_error": "true"}
                triton_client.start_stream(
                    callback=partial(callback, user_data),
                    stream_timeout=stream_timeout,
                    headers=metadata,
                )
            else:
                triton_client.start_stream(
                    callback=partial(callback, user_data), stream_timeout=stream_timeout
                )
            # Send specified many requests in parallel
            for i in range(request_count):
                time.sleep((request_delay / 1000))
                self.inputs_[1].set_data_from_numpy(delay_data)
                if kill_server == i:
                    os.kill(int(self.SERVER_PID), signal.SIGINT)
                triton_client.async_stream_infer(
                    model_name=self.decoupled_model_name_,
                    inputs=self.inputs_,
                    request_id=str(i),
                    outputs=self.requested_outputs_,
                    # Opt-in to receiving flags-only responses from model/backend
                    # to help detect final responses for decoupled models.
                    enable_empty_final_response=True,
                )
                # Update delay input in accordance with the scaling factor
                delay_data = delay_data * delay_factor
                delay_data = delay_data.astype(np.uint32)

            # Retrieve results...
            recv_count = 0
            completed_requests = 0
            while completed_requests < request_count:
                if cancel_response_idx == recv_count:
                    triton_client.stop_stream(cancel_requests=True)
                data_item = user_data._response_queue.get()
                if type(data_item) == InferenceServerException:
                    raise data_item
                else:
                    response = data_item.get_response()
                    # Request IDs should generally be provided with each request
                    # to associate decoupled responses with their requests.
                    if not response.id:
                        raise ValueError(
                            "No response id found. Was a request_id provided?"
                        )

                    # Detect final response. Parameters are oneof and we expect bool_param
                    if response.parameters.get("triton_final_response").bool_param:
                        completed_requests += 1

                    # Only process non-empty response, ignore if empty (no outputs)
                    if response.outputs:
                        if response.id not in result_dict:
                            result_dict[response.id] = []
                        result_dict[response.id].append((recv_count, data_item))
                        recv_count += 1

    def _stream_infer(
        self,
        request_count,
        request_delay,
        expected_count,
        user_data,
        result_dict,
        delay_data=None,
        delay_factor=None,
        cancel_response_idx=None,
        stream_timeout=None,
        kill_server=None,
    ):
        with grpcclient.InferenceServerClient(
            url="localhost:8001", verbose=True
        ) as triton_client:
            # Establish stream
            if "TRITONSERVER_GRPC_STATUS_FLAG" in os.environ:
                metadata = {"triton_grpc_error": "true"}
                triton_client.start_stream(
                    callback=partial(callback, user_data),
                    stream_timeout=stream_timeout,
                    headers=metadata,
                )
            else:
                triton_client.start_stream(
                    callback=partial(callback, user_data), stream_timeout=stream_timeout
                )
            # Send specified many requests in parallel
            for i in range(request_count):
                time.sleep((request_delay / 1000))
                model_name = self.identity_model_name_
                if delay_data is not None:
                    model_name = self.decoupled_model_name_
                    self.inputs_[1].set_data_from_numpy(delay_data)
                if kill_server == i:
                    os.kill(int(self.SERVER_PID), signal.SIGINT)
                triton_client.async_stream_infer(
                    model_name=model_name,
                    inputs=self.inputs_,
                    request_id=str(i),
                    outputs=self.requested_outputs_,
                )
                if (delay_data is not None) and (delay_factor is not None):
                    # Update delay input in accordance with the scaling factor
                    delay_data = delay_data * delay_factor
                    delay_data = delay_data.astype(np.uint32)

            # Retrieve results...
            recv_count = 0
            while recv_count < expected_count:
                if cancel_response_idx == recv_count:
                    triton_client.stop_stream(cancel_requests=True)
                data_item = user_data._response_queue.get()
                if type(data_item) == InferenceServerException:
                    raise data_item
                else:
                    this_id = data_item.get_response().id
                    if this_id not in result_dict:
                        result_dict[this_id] = []
                    result_dict[this_id].append((recv_count, data_item))

                recv_count += 1

    def _streaming_infer(
        self,
        request_count,
        request_delay=0,
        cancel_response_idx=None,
        stream_timeout=None,
        kill_server=None,
        should_error=True,
    ):
        self._prepare_inputs_and_outputs("streaming")

        input_data = np.array([[1.0]], dtype=np.float32)
        self.inputs_[0].set_data_from_numpy(input_data)

        user_data = UserData()
        result_dict = {}

        try:
            expected_count = request_count
            self._stream_infer(
                request_count,
                request_delay,
                expected_count,
                user_data,
                result_dict,
                cancel_response_idx=cancel_response_idx,
                stream_timeout=stream_timeout,
                kill_server=kill_server,
            )
        except Exception as ex:
            if cancel_response_idx or stream_timeout or should_error:
                raise ex
            self.assertTrue(False, "unexpected error {}".format(ex))

        # Validate the results..
        for i in range(request_count):
            this_id = str(i)
            if this_id not in result_dict.keys():
                self.assertTrue(
                    False, "response for request id {} not received".format(this_id)
                )
            self.assertEqual(len(result_dict[this_id]), 1)
            result = result_dict[this_id][0][1]
            output0_data = result.as_numpy("OUTPUT0")
            self.assertTrue(np.array_equal(input_data, output0_data))

    def _decoupled_infer(
        self,
        request_count,
        request_delay=0,
        repeat_count=1,
        data_offset=100,
        delay_time=1000,
        delay_factor=1,
        wait_time=500,
        cancel_response_idx=None,
        stream_timeout=None,
        kill_server=None,
        should_error=True,
        infer_helper_map=[True, True],
    ):
        self._prepare_inputs_and_outputs(kind="decoupled_streaming")

        # Initialize data for IN
        input_data = np.arange(
            start=data_offset, stop=data_offset + repeat_count, dtype=np.int32
        )
        self.inputs_[0].set_shape([repeat_count])
        self.inputs_[0].set_data_from_numpy(input_data)

        # Initialize data for DELAY
        delay_data = (np.ones([repeat_count], dtype=np.uint32)) * delay_time
        self.inputs_[1].set_shape([repeat_count])

        # Initialize data for WAIT
        wait_data = np.array([wait_time], dtype=np.uint32)
        self.inputs_[2].set_data_from_numpy(wait_data)

        infer_helpers = []
        if infer_helper_map[0]:
            infer_helpers.append(self._stream_infer)
        if infer_helper_map[1]:
            infer_helpers.append(self._stream_infer_with_params)

        for infer_helper in infer_helpers:
            user_data = UserData()
            result_dict = {}

            try:
                expected_count = repeat_count * request_count
                infer_helper(
                    request_count,
                    request_delay,
                    expected_count,
                    user_data,
                    result_dict,
                    delay_data,
                    delay_factor,
                    cancel_response_idx,
                    stream_timeout,
                    kill_server,
                )
            except Exception as ex:
                if cancel_response_idx or stream_timeout or should_error:
                    raise ex
                self.assertTrue(False, "unexpected error {}".format(ex))

            # Validate the results..
            for i in range(request_count):
                this_id = str(i)
                if repeat_count != 0 and this_id not in result_dict.keys():
                    self.assertTrue(
                        False, "response for request id {} not received".format(this_id)
                    )
                elif repeat_count == 0 and this_id in result_dict.keys():
                    self.assertTrue(
                        False,
                        "received unexpected response for request id {}".format(
                            this_id
                        ),
                    )
                if repeat_count != 0:
                    self.assertEqual(len(result_dict[this_id]), repeat_count)
                    expected_data = data_offset
                    result_list = result_dict[this_id]
                    for j in range(len(result_list)):
                        this_data = result_list[j][1].as_numpy("OUT")
                        self.assertEqual(len(this_data), 1)
                        self.assertEqual(this_data[0], expected_data)
                        this_idx = result_list[j][1].as_numpy("IDX")
                        self.assertEqual(len(this_idx), 1)
                        self.assertEqual(this_idx[0], j)
                        expected_data += 1

    ###
    ### Non-Streaming Tests
    ###
    def test_simple_infer(self):
        # This test case sends 10 asynchronous requests and validates
        # the response.
        self._simple_infer(request_count=10)

    def test_simple_infer_cancellation(self):
        # This test case is used to check whether all the states are
        # correctly released when one of the request is cancelled from
        # the client side.
        with self.assertRaises(InferenceServerException) as cm:
            self._simple_infer(request_count=10, cancel_response_idx=5)
        self.assertIn("Locally cancelled by application!", str(cm.exception))

    def test_simple_infer_timeout(self):
        # This test case is used to check whether all the states are
        # correctly released when the request gets timed-out on the client.
        with self.assertRaises(InferenceServerException) as cm:
            self._simple_infer(request_count=10, client_timeout_pair=[5, 0.1])
        self.assertIn("Deadline Exceeded", str(cm.exception))

    def test_simple_infer_error_status(self):
        # This test case is used to check whether all the state objects are
        # released when RPC runs into error.
        with self.assertRaises(InferenceServerException) as cm:
            self._simple_infer(request_count=10)
        self.assertIn(
            "This protocol is restricted, expecting header 'triton-grpc-protocol-infer-key'",
            str(cm.exception),
        )

    def test_simple_infer_shutdownserver(self):
        # This test case is used to check whether all the state objects are
        # released when the server is interrupted to shutdown in the beginning
        # of inference run with final parameters being returned.
        with self.assertRaises(InferenceServerException) as cm:
            self._simple_infer(request_count=20, kill_server=5)

    ###
    ### Streaming Tests
    ###
    def test_streaming_infer(self):
        # Sanity test to check whether all the state objects
        # are correctly released. Sends 10 requests in a single
        # gRPC bidirectional stream.
        self._streaming_infer(request_count=10)

    def test_streaming_cancellation(self):
        # This test case is used to check whether all the states are
        # correctly released when the stream is closed when fifth
        # response is received.
        with self.assertRaises(InferenceServerException) as cm:
            self._streaming_infer(request_count=10, cancel_response_idx=5)
        self.assertIn("Locally cancelled by application!", str(cm.exception))

    def test_streaming_timeout(self):
        # This test case is used to check whether all the states are
        # released when some of the requests timeouts.
        with self.assertRaises(InferenceServerException) as cm:
            self._streaming_infer(request_count=10, request_delay=1, stream_timeout=2)
        self.assertIn("Deadline Exceeded", str(cm.exception))

    def test_streaming_error_status(self):
        # This test case is used to check whether all the state objects are
        # released when RPC runs into error.
        expected_exceptions = [
            "This protocol is restricted, expecting header 'triton-grpc-protocol-infer-key'",
            "The stream is no longer in valid state, the error detail is reported through provided callback. A new stream should be started after stopping the current stream.",
        ]
        with self.assertRaises(InferenceServerException) as cm:
            self._streaming_infer(request_count=10, should_error=True)

        exception_match = False
        for expected_exception in expected_exceptions:
            exception_match |= expected_exception in str(cm.exception)
        self.assertTrue(
            exception_match, "Raised unexpected exception {}".format(str(cm.exception))
        )

    def test_streaming_infer_shutdownserver(self):
        # This test case is used to check whether all the state objects are
        # released when the server is interrupted to shutdown in middle of
        # inference run.
        with self.assertRaises(InferenceServerException) as cm:
            self._streaming_infer(
                request_count=10,
                request_delay=1,
                kill_server=5,
                should_error=True,
            )

    ###
    ### Decoupled Streaming Tests
    ###
    def test_decoupled_infer(self):
        # Sanity test to check whether all the state objects
        # are correctly released. Sends 10 requests in a single
        # gRPC bidirectional stream and expects each of these
        # requests to generate 10 responses.
        self._decoupled_infer(request_count=10, repeat_count=10)

    def test_decoupled_cancellation(self):
        # This test case is used to check whether all the states are
        # correctly released when the stream is closed when fifth
        # response is received.
        with self.assertRaises(InferenceServerException) as cm:
            self._decoupled_infer(
                request_count=10, repeat_count=10, cancel_response_idx=5
            )
        self.assertIn("Locally cancelled by application!", str(cm.exception))

    def test_decoupled_timeout(self):
        # This test case is used to check whether all the states are
        # released when some of the requests timeouts.
        with self.assertRaises(InferenceServerException) as cm:
            self._decoupled_infer(
                request_count=10, repeat_count=10, request_delay=1, stream_timeout=2
            )
        self.assertIn("Deadline Exceeded", str(cm.exception))

    def test_decoupled_error_status(self):
        # This test case is used to check whether all the state objects are
        # released when RPC runs into error.
        expected_exceptions = [
            "This protocol is restricted, expecting header 'triton-grpc-protocol-infer-key'",
            "The stream is no longer in valid state, the error detail is reported through provided callback. A new stream should be started after stopping the current stream.",
        ]
        with self.assertRaises(InferenceServerException) as cm:
            self._decoupled_infer(request_count=10, repeat_count=10, should_error=True)

        exception_match = False
        for expected_exception in expected_exceptions:
            exception_match |= expected_exception in str(cm.exception)
        self.assertTrue(
            exception_match, "Raised unexpected exception {}".format(str(cm.exception))
        )

    def test_decoupled_infer_shutdownserver(self):
        # This test case is used to check whether all the state objects are
        # released when the server is interrupted to shutdown in middle of
        # inference run.
        with self.assertRaises(InferenceServerException) as cm:
            self._decoupled_infer(
                request_count=10,
                repeat_count=10,
                request_delay=1,
                kill_server=5,
                should_error=True,
                infer_helper_map=[True, False],
            )

    def test_decoupled_infer_with_params_shutdownserver(self):
        # This test case is used to check whether all the state objects are
        # released when the server is interrupted to shutdown in middle of
        # inference run with final parameters being returned.
        with self.assertRaises(InferenceServerException) as cm:
            self._decoupled_infer(
                request_count=10,
                repeat_count=10,
                request_delay=1,
                kill_server=5,
                should_error=True,
                infer_helper_map=[False, True],
            )

    def test_decoupled_infer_complete(self):
        # Test if the Process() thread could release the state object before
        # the StreamInferResponseComplete() thread is done accessing it.
        self._decoupled_infer(request_count=1, repeat_count=1, stream_timeout=16)
        # Check no error is printed to the log.
        with open(os.environ["SERVER_LOG"]) as f:
            server_log = f.read()
        self.assertNotIn("Should not print this", server_log)

    def test_non_decoupled_streaming_multi_response(self):
        # Test non-decoupled streaming infer with more than one response should return
        # the first response.
        response_count = 4
        expected_response_count = 1
        expected_response_index = 0

        # Prepare input data
        self._prepare_inputs_and_outputs("non_decoupled_streaming")
        # Initialize data for IN
        data_offset = 100
        input_data = np.arange(
            start=data_offset, stop=data_offset + response_count, dtype=np.int32
        )
        self.inputs_[0].set_shape([response_count])
        self.inputs_[0].set_data_from_numpy(input_data)
        # Initialize data for DELAY
        delay_data = np.zeros([response_count], dtype=np.uint32)
        self.inputs_[1].set_shape([response_count])
        self.inputs_[1].set_data_from_numpy(delay_data)
        # Initialize data for WAIT
        wait_data = np.array([0], dtype=np.uint32)
        self.inputs_[2].set_data_from_numpy(wait_data)

        # Infer
        user_data = UserData()
        with grpcclient.InferenceServerClient(
            url="localhost:8001", verbose=True
        ) as client:
            # Establish stream
            if "TRITONSERVER_GRPC_STATUS_FLAG" in os.environ:
                metadata = {"triton_grpc_error": "true"}
                client.start_stream(
                    callback=partial(callback, user_data),
                    stream_timeout=16,
                    headers=metadata,
                )
            else:
                client.start_stream(
                    callback=partial(callback, user_data), stream_timeout=16
                )
            # Send a request
            client.async_stream_infer(
                model_name=self.repeat_non_decoupled_model_name,
                inputs=self.inputs_,
                request_id="0",
                outputs=self.requested_outputs_,
            )
            # Wait for all results and stop stream
            client.stop_stream()

        # Check infer output
        actual_response_count = 0
        while not user_data._response_queue.empty():
            actual_response_count += 1
            data_item = user_data._response_queue.get()
            if type(data_item) == InferenceServerException:
                raise data_item
            else:
                response_idx = data_item.as_numpy("IDX")[0]
                self.assertEqual(response_idx, expected_response_index)
        self.assertEqual(actual_response_count, expected_response_count)


if __name__ == "__main__":
    CleanUpTest.SERVER_PID = os.environ.get("SERVER_PID", CleanUpTest.SERVER_PID)
    unittest.main()


================================================
FILE: qa/L0_grpc_state_cleanup/test.sh
================================================
#!/bin/bash
# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

RET=0
CLEANUP_TEST=cleanup_test.py

rm -f *.log

CLIENT_LOG=`pwd`/client.log
SERVER=/opt/tritonserver/bin/tritonserver
source ../common/util.sh

function check_state_release() {
  local log_file=$1

  num_state_release=`cat $log_file | grep  "StateRelease" | wc -l`
  num_state_new=`cat $log_file | grep  "StateNew" | wc -l`

  if [ $num_state_release -ne $num_state_new ]; then
    echo -e "\n***\n*** Test Failed: Mismatch detected, $num_state_new state(s) created, $num_state_release state(s) released. \n***" >> $log_file
    return 1
  fi

  return 0
}

rm -fr ./models/custom_zero_1_float32 && \
        cp -r ../custom_models/custom_zero_1_float32 ./models/. && \
        mkdir -p ./models/custom_zero_1_float32/1

(cd models/custom_zero_1_float32 && \
    echo "parameters [" >> config.pbtxt && \
    echo "{ key: \"execute_delay_ms\"; value: { string_value: \"1000\" }}" >> config.pbtxt && \
    echo "]" >> config.pbtxt)

rm -rf models/repeat_int32_non_decoupled && \
    cp -r models/repeat_int32 models/repeat_int32_non_decoupled && \
    (cd models/repeat_int32_non_decoupled && \
        sed -i "/model_transaction_policy/,+2d" config.pbtxt && \
        sed -i "s/repeat_int32/repeat_int32_non_decoupled/" config.pbtxt)

for i in test_simple_infer \
            test_simple_infer_cancellation \
            test_simple_infer_timeout \
            test_streaming_infer \
            test_streaming_timeout \
            test_streaming_cancellation \
            test_decoupled_infer \
            test_decoupled_cancellation \
            test_decoupled_timeout \
            test_non_decoupled_streaming_multi_response; do
  SERVER_LOG="./inference_server.$i.log"
  SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=2"
  run_server
  if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
  fi

  echo "Test: $i" >>$CLIENT_LOG

  set +e
  python $CLEANUP_TEST CleanUpTest.$i >>$CLIENT_LOG 2>&1
  if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
    echo -e "\n***\n*** Test $i Failed\n***"
    RET=1
  fi

  kill $SERVER_PID
  wait $SERVER_PID

  check_state_release $SERVER_LOG
  if [ $? -ne 0 ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** State Verification Failed for $i\n***"
      RET=1
  fi
  set -e
done


for i in test_simple_infer_error_status \
                test_streaming_error_status \
                test_decoupled_error_status; do
  SERVER_LOG="./inference_server.$i.log"
  SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=2 --grpc-restricted-protocol=inference:infer-key=infer-value"
  run_server
  if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
  fi

  echo "Test: $i" >>$CLIENT_LOG

  set +e
  python $CLEANUP_TEST CleanUpTest.$i >>$CLIENT_LOG 2>&1
  if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
    echo -e "\n***\n*** Test $i Failed\n***"
    RET=1
  fi

  kill $SERVER_PID
  wait $SERVER_PID

  check_state_release $SERVER_LOG
  if [ $? -ne 0 ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** State Verification Failed for $i\n***"
      RET=1
  fi

  set -e
done

for i in test_simple_infer_shutdownserver \
         test_streaming_infer_shutdownserver \
         test_decoupled_infer_shutdownserver \
         test_decoupled_infer_with_params_shutdownserver; do
  SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=2"
  SERVER_LOG="./inference_server.$i.log"
  run_server
  if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
  fi

  echo "Test: $i" >>$CLIENT_LOG

  set +e
  SERVER_PID=$SERVER_PID python $CLEANUP_TEST CleanUpTest.$i >>$CLIENT_LOG 2>&1
  if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
    echo -e "\n***\n*** Test $i Failed\n***"
    RET=1
  fi

  wait $SERVER_PID

  check_state_release $SERVER_LOG
  if [ $? -ne 0 ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** State Verification Failed for $i\n***"
      RET=1
  fi

  set -e
done

TEST_NAME=test_decoupled_infer_complete
export TRITONSERVER_DELAY_GRPC_COMPLETE=2000

SERVER_LOG="./inference_server.$TEST_NAME.log"
SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=2"
run_server
if [ "$SERVER_PID" == "0" ]; then
  echo -e "\n***\n*** Failed to start $SERVER\n***"
  cat $SERVER_LOG
  exit 1
fi

echo "Test: $TEST_NAME" >>$CLIENT_LOG

set +e

SERVER_LOG=$SERVER_LOG python $CLEANUP_TEST CleanUpTest.$TEST_NAME >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
  cat $CLIENT_LOG
  echo -e "\n***\n*** Test $TEST_NAME Failed\n***"
  RET=1
fi

kill $SERVER_PID
wait $SERVER_PID

check_state_release $SERVER_LOG
if [ $? -ne 0 ]; then
  cat $SERVER_LOG
  echo -e "\n***\n*** State Verification Failed for $TEST_NAME\n***"
  RET=1
fi

set -e

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
else
  echo -e "\n***\n*** Test Failed\n***"
fi

exit $RET


================================================
FILE: qa/L0_http/generate_endpoint_test.py
================================================
#!/usr/bin/python3
# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import json
import threading
import time
import unittest

import requests
import sseclient
import test_util as tu


class GenerateEndpointTest(tu.TestResultCollector):
    def setUp(self):
        self._model_name = "mock_llm"

    def _get_infer_url(self, model_name, route):
        return f"http://localhost:8000/v2/models/{model_name}/{route}"

    def generate_stream(self, model_name, inputs, stream=False):
        headers = {"Accept": "text/event-stream"}
        url = self._get_infer_url(model_name, "generate_stream")
        # stream=True used to indicate response can be iterated over, which
        # should be the common setting for generate_stream.
        # For correctness test case, stream=False so that we can re-examine
        # the response content.
        return requests.post(
            url,
            data=inputs if isinstance(inputs, str) else json.dumps(inputs),
            headers=headers,
            stream=stream,
        )

    def generate(self, model_name, inputs):
        url = self._get_infer_url(model_name, "generate")
        return requests.post(
            url, data=inputs if isinstance(inputs, str) else json.dumps(inputs)
        )

    def generate_expect_failure(self, model_name, inputs, msg):
        url = self._get_infer_url(model_name, "generate")
        r = requests.post(
            url, data=inputs if isinstance(inputs, str) else json.dumps(inputs)
        )
        # Content-Type header should always be JSON for errors
        self.assertEqual(r.headers["Content-Type"], "application/json")

        try:
            r.raise_for_status()
            self.assertTrue(False, f"Expected failure, success for {inputs}")
        except requests.exceptions.HTTPError as e:
            self.assertIn(msg, r.json()["error"])

    def generate_stream_expect_failure(self, model_name, inputs, msg):
        r = self.generate_stream(model_name, inputs)
        # Content-Type header should always be JSON for errors
        self.assertEqual(r.headers["Content-Type"], "application/json")

        try:
            r.raise_for_status()
            self.assertTrue(False, f"Expected failure, success for {inputs}")
        except requests.exceptions.HTTPError as e:
            self.assertIn(msg, r.json()["error"])

    def generate_stream_expect_success(
        self, model_name, inputs, expected_output, rep_count
    ):
        r = self.generate_stream(model_name, inputs)
        r.raise_for_status()
        self.check_sse_responses(r, [{"TEXT": expected_output}] * rep_count)

    def check_sse_responses(self, res, expected_res):
        # Validate SSE format
        self.assertIn("Content-Type", res.headers)
        self.assertEqual(
            "text/event-stream; charset=utf-8", res.headers["Content-Type"]
        )

        # SSE format (data: []) is hard to parse, use helper library for simplicity
        client = sseclient.SSEClient(res)
        res_count = 0
        for event in client.events():
            # Parse event data, join events into a single response
            data = json.loads(event.data)
            for key, value in expected_res[res_count].items():
                self.assertIn(key, data)
                self.assertEqual(value, data[key])
            res_count += 1
        self.assertEqual(len(expected_res), res_count)
        # Make sure there is no message in the wrong form
        for remaining in client._read():
            self.assertTrue(
                remaining.startswith(b"data:"),
                f"SSE response not formed properly, got: {remaining}",
            )
            self.assertTrue(
                remaining.endswith(b"\n\n"),
                f"SSE response not formed properly, got: {remaining}",
            )

    def test_generate(self):
        # Setup text-based input
        text = "hello world"
        inputs = {"PROMPT": text, "STREAM": False}

        r = self.generate(self._model_name, inputs)
        r.raise_for_status()

        self.assertIn("Content-Type", r.headers)
        self.assertEqual(r.headers["Content-Type"], "application/json")

        data = r.json()
        self.assertIn("TEXT", data)
        self.assertEqual(text, data["TEXT"])

    def test_generate_with_all_inputs(self):
        # Setup text-based input
        text = "hello world"
        inputs = {"PROMPT": text, "STREAM": False, "input_ids": [100, 200]}

        r = self.generate(self._model_name, inputs)
        r.raise_for_status()

        self.assertIn("Content-Type", r.headers)
        self.assertEqual(r.headers["Content-Type"], "application/json")

        data = r.json()
        self.assertIn("TEXT", data)
        self.assertEqual(text, data["TEXT"])

    def test_request_id(self):
        # Setup text based input
        text = "hello world"
        request_id = "42"

        # Test when request id in request body
        inputs = {"PROMPT": text, "id": request_id, "STREAM": False}
        r = self.generate(self._model_name, inputs)
        r.raise_for_status()

        self.assertIn("Content-Type", r.headers)
        self.assertEqual(r.headers["Content-Type"], "application/json")

        data = r.json()
        self.assertIn("id", data)
        self.assertEqual(request_id, data["id"])
        self.assertIn("TEXT", data)
        self.assertEqual(text, data["TEXT"])

        # Test when request id not in request body
        inputs = {"PROMPT": text, "STREAM": False}
        r = self.generate(self._model_name, inputs)
        r.raise_for_status()

        self.assertIn("Content-Type", r.headers)
        self.assertEqual(r.headers["Content-Type"], "application/json")

        data = r.json()
        self.assertNotIn("id", data)

        # Test when request id is empty
        inputs = {"PROMPT": text, "id": "", "STREAM": False}
        r = self.generate(self._model_name, inputs)
        r.raise_for_status()

        self.assertIn("Content-Type", r.headers)
        self.assertEqual(r.headers["Content-Type"], "application/json")

        data = r.json()
        self.assertNotIn("id", data)
        self.assertIn("TEXT", data)
        self.assertEqual(text, data["TEXT"])

    def test_generate_stream(self):
        # Setup text-based input
        text = "hello world"
        rep_count = 3
        inputs = {"PROMPT": [text], "STREAM": True, "REPETITION": rep_count}
        self.generate_stream_expect_success(self._model_name, inputs, text, rep_count)

    def test_streaming(self):
        # verify the responses are streamed as soon as it is generated
        text = "hello world"
        rep_count = 3
        inputs = {"PROMPT": [text], "STREAM": True, "REPETITION": rep_count, "DELAY": 2}
        past = time.time()
        res = self.generate_stream(self._model_name, inputs, stream=True)
        client = sseclient.SSEClient(res)
        # This test does not focus on event content
        for _ in client.events():
            now = time.time()
            self.assertTrue(1 < (now - past) < 3)
            past = now

    def test_missing_inputs(self):
        missing_all_inputs = [
            # Missing all inputs
            {},
            {"abc": 123},
        ]
        missing_one_input = [
            # Missing 1 input
            {"PROMPT": "hello"},
            {"STREAM": False},
            {"STREAM": False, "other": "param"},
        ]
        for inputs in missing_all_inputs:
            self.generate_expect_failure(
                self._model_name,
                inputs,
                "expected number of inputs between 2 and 3 but got 0",
            )
            self.generate_stream_expect_failure(
                self._model_name,
                inputs,
                "expected number of inputs between 2 and 3 but got 0",
            )

        for inputs in missing_one_input:
            self.generate_expect_failure(
                self._model_name,
                inputs,
                "expected number of inputs between 2 and 3 but got 1",
            )
            self.generate_stream_expect_failure(
                self._model_name,
                inputs,
                "expected number of inputs between 2 and 3 but got 1",
            )

    def test_invalid_input_types(self):
        invalid_bool = "attempt to access JSON non-boolean as boolean"
        invalid_string = "attempt to access JSON non-string as string"
        invalid_type_inputs = [
            # Prompt bad type
            ({"PROMPT": 123, "STREAM": False}, invalid_string),
            # Stream bad type
            ({"PROMPT": "hello", "STREAM": "false"}, invalid_bool),
            # Both bad type, parsed in order
            ({"PROMPT": True, "STREAM": 123}, invalid_string),
            ({"STREAM": 123, "PROMPT": True}, invalid_bool),
        ]

        for inputs, error_msg in invalid_type_inputs:
            self.generate_expect_failure(self._model_name, inputs, error_msg)
            self.generate_stream_expect_failure(self._model_name, inputs, error_msg)

    def test_duplicate_inputs(self):
        dupe_prompt = "input 'PROMPT' already exists in request"
        dupe_stream = "input 'STREAM' already exists in request"
        # Use JSON string directly as Python Dict doesn't support duplicate keys
        invalid_type_inputs = [
            # One duplicate
            (
                '{"PROMPT": "hello", "STREAM": false, "PROMPT": "duplicate"}',
                dupe_prompt,
            ),
            ('{"PROMPT": "hello", "STREAM": false, "STREAM": false}', dupe_stream),
            # Multiple duplicates, parsed in order
            (
                '{"PROMPT": "hello", "STREAM": false, "PROMPT": "duplicate", "STREAM": true}',
                dupe_prompt,
            ),
            (
                '{"PROMPT": "hello", "STREAM": false, "STREAM": true, "PROMPT": "duplicate"}',
                dupe_stream,
            ),
        ]
        for inputs, error_msg in invalid_type_inputs:
            self.generate_expect_failure(self._model_name, inputs, error_msg)
            self.generate_stream_expect_failure(self._model_name, inputs, error_msg)

    def test_generate_stream_response_error(self):
        # Setup text-based input
        text = "hello world"
        inputs = {"PROMPT": [text], "STREAM": True, "REPETITION": 0, "FAIL_LAST": True}
        r = self.generate_stream(self._model_name, inputs)

        # With "REPETITION": 0, error will be first response and the HTTP code
        # will be set properly
        try:
            r.raise_for_status()
        except requests.exceptions.HTTPError as e:
            self.check_sse_responses(r, [{"error": "An Error Occurred"}])

        # With "REPETITION" > 0, the first response is valid response and set
        # HTTP code to success, so user must validate each response
        inputs["REPETITION"] = 1
        r = self.generate_stream(self._model_name, inputs)
        r.raise_for_status()

        self.check_sse_responses(r, [{"TEXT": text}, {"error": "An Error Occurred"}])

    def test_race_condition(self):
        # In Triton HTTP frontend, the HTTP response is sent in a different
        # thread than Triton response complete thread, both programs have shared
        # access to the same object, so this test is sending sufficient load to
        # the endpoint, in attempt to expose race condition if any  .
        input1 = {"PROMPT": "hello", "STREAM": False, "param": "segfault"}
        input2 = {
            "PROMPT": "hello",
            "STREAM": True,
            "REPETITION": 3,
            "param": "segfault",
        }
        threads = []

        def thread_func(model_name, inputs):
            self.generate_stream(model_name, inputs).raise_for_status()

        for _ in range(50):
            threads.append(
                threading.Thread(target=thread_func, args=((self._model_name, input1)))
            )
            threads.append(
                threading.Thread(target=thread_func, args=((self._model_name, input2)))
            )
        for thread in threads:
            thread.start()
        for thread in threads:
            thread.join()

    def test_one_response(self):
        # In the current 'inputs' setting, the model will send at least 1
        # response, "STREAM" controls model behavior on sending model responses:
        # If True, the model sends two responses, one is the actual infer
        # response and the other contains flag only to signal end of response.
        # 'generate_stream' endpoint is designed for this case so it should send
        # infer response and complete HTTP response appropriately. And
        # 'generate' endpoint will be able to handle this case as at its core
        # only one infer response is received, which is the same as typical HTTP
        # usage.
        # If False, the model sends one response containing infer response and
        # end flag, which is the same as how non-decoupled model responds.
        inputs = {"PROMPT": "hello world", "STREAM": True}
        r = self.generate_stream(self._model_name, inputs)
        r.raise_for_status()
        r = self.generate(self._model_name, inputs)
        r.raise_for_status()

        inputs["STREAM"] = False
        r = self.generate_stream(self._model_name, inputs)
        r.raise_for_status()
        r = self.generate(self._model_name, inputs)
        r.raise_for_status()

    def test_zero_response(self):
        inputs = {"PROMPT": "hello world", "STREAM": True, "REPETITION": 0}
        r = self.generate_stream(self._model_name, inputs)
        r.raise_for_status()
        # Expect generate fails the inference
        r = self.generate(self._model_name, inputs)
        try:
            r.raise_for_status()
        except requests.exceptions.HTTPError as e:
            self.assertIn(
                "generate expects model to produce exactly 1 response",
                r.json()["error"],
            )

    def test_many_response(self):
        inputs = {"PROMPT": "hello world", "STREAM": True, "REPETITION": 2}
        r = self.generate_stream(self._model_name, inputs)
        r.raise_for_status()
        # Expect generate fails the inference
        r = self.generate(self._model_name, inputs)
        try:
            r.raise_for_status()
        except requests.exceptions.HTTPError as e:
            self.assertIn(
                "generate expects model to produce exactly 1 response",
                r.json()["error"],
            )

    def test_complex_schema(self):
        # Currently only the fundamental conversion is supported, nested object
        # in the request will results in parsing error

        # complex object to parameters (specifying non model input)
        inputs = {
            "PROMPT": "hello world",
            "STREAM": True,
            "PARAMS": {"PARAM_0": 0, "PARAM_1": True, "PARAM_2": 123.123},
        }
        r = self.generate(self._model_name, inputs)
        try:
            r.raise_for_status()
        except requests.exceptions.HTTPError as e:
            self.assertIn("parameter 'PARAMS' has invalid type", r.json()["error"])

        # complex object to model input
        inputs = {
            "PROMPT": {"USER": "hello world", "BOT": "world hello"},
            "STREAM": True,
        }
        r = self.generate(self._model_name, inputs)
        try:
            r.raise_for_status()
        except requests.exceptions.HTTPError as e:
            self.assertIn(
                "attempt to access JSON non-string as string", r.json()["error"]
            )

    def test_close_connection_during_streaming(self):
        # verify the responses are streamed as soon as it is generated
        text = "hello world"
        rep_count = 3
        inputs = {"PROMPT": [text], "STREAM": True, "REPETITION": rep_count, "DELAY": 2}
        res = self.generate_stream(self._model_name, inputs, stream=True)
        # close connection while the responses are being generated
        res.close()
        # check server healthiness
        health_url = "http://localhost:8000/v2/health/live"
        requests.get(health_url).raise_for_status()

    def test_parameters(self):
        # Test reserved nested object for parameters
        text = "hello world"
        rep_count = 3
        inputs = {
            "PROMPT": [text],
            "STREAM": True,
            "parameters": {"REPETITION": rep_count},
        }
        self.generate_stream_expect_success(self._model_name, inputs, text, rep_count)

        # parameters keyword is not an object
        inputs = {"PROMPT": [text], "STREAM": True, "parameters": 1}

        r = self.generate(self._model_name, inputs)
        try:
            r.raise_for_status()
        except requests.exceptions.HTTPError as e:
            self.assertIn(
                "Expected JSON object for keyword: 'parameters'", r.json()["error"]
            )

        # parameters contains complex object
        inputs = {
            "PROMPT": [text],
            "STREAM": True,
            "parameters": {"nested": {"twice": 1}},
        }

        r = self.generate(self._model_name, inputs)
        try:
            r.raise_for_status()
        except requests.exceptions.HTTPError as e:
            self.assertIn(
                "Converting keyword: 'parameters': parameter 'nested' has invalid type.",
                r.json()["error"],
            )

    def test_0_dimension_output(self):
        # With the trtllm backend, if the end token is predicted at the first
        # step, the output tensors will have the shapes with 0 dimension.
        text = "hello world"
        inputs = {
            "PROMPT": text,
            "STREAM": False,
            "REPETITION": 0,
            "OUTPUT_0_DIM": True,
        }

        r = self.generate(self._model_name, inputs)
        r.raise_for_status()

        self.assertIn("Content-Type", r.headers)
        self.assertEqual(r.headers["Content-Type"], "application/json")

        data = r.json()
        self.assertIn("TEXT", data)
        self.assertEqual([], data["TEXT"])


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_http/http_basic_auth_test.py
================================================
#!/usr/bin/python
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import sys
import unittest

sys.path.append("../common")

import test_util as tu
import tritonclient.http as tritonhttpclient
import tritonclient.http.aio as asynctritonhttpclient
from tritonclient.http.aio.auth import BasicAuth as AsyncBasicAuth
from tritonclient.http.auth import BasicAuth


class HTTPBasicAuthTest(tu.TestResultCollector):
    def setUp(self):
        # Use the nginx port
        self._client = tritonhttpclient.InferenceServerClient(url="localhost:8004")
        self._client.register_plugin(BasicAuth("username", "password"))

    def test_client_call(self):
        self.assertTrue(self._client.is_server_live())

    def tearDown(self):
        self._client.close()


class HTTPBasicAuthAsyncTest(unittest.IsolatedAsyncioTestCase):
    async def asyncSetUp(self):
        # Use the nginx port
        self._client = asynctritonhttpclient.InferenceServerClient(url="localhost:8004")
        self._client.register_plugin(AsyncBasicAuth("username", "password"))

    async def test_client_call(self):
        self.assertTrue(await self._client.is_server_live())

    async def asyncTearDown(self):
        await self._client.close()


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_http/http_client_plugin_test.py
================================================
#!/usr/bin/python
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import unittest
from unittest.mock import AsyncMock, MagicMock, patch

import numpy as np
import test_util as tu
import tritonclient.http as tritonhttpclient
import tritonclient.http.aio as asynctritonhttpclient
from tritonclient.http import InferenceServerClientPlugin
from tritonclient.utils import np_to_triton_dtype


# A simple plugin that adds headers to the inference request.
class TestPlugin(InferenceServerClientPlugin):
    def __init__(self, headers):
        self._headers = headers

    def __call__(self, request):
        request.headers.update(self._headers)


class HTTPClientPluginAsyncTest(unittest.IsolatedAsyncioTestCase):
    async def asyncSetUp(self):
        self._headers = {"MY-KEY": "MY-VALUE"}
        self._plugin = TestPlugin(self._headers)
        self._client = asynctritonhttpclient.InferenceServerClient(url="localhost:8001")

    async def test_server_is_live(self):
        # We are testing is_server_live as an example API that uses GET method
        # for communication with the server.
        self._client._stub.get = AsyncMock()

        self._client.register_plugin(self._plugin)
        self.assertEqual(self._plugin, self._client.plugin())
        await self._client.is_server_live()
        self._client._stub.get.assert_awaited_with(
            url=unittest.mock.ANY, headers=self._headers
        )

        # Make sure unregistering the plugin would no longer add the headers
        self._client.unregister_plugin()
        self.assertEqual(None, self._client.plugin())
        await self._client.is_server_live()
        self._client._stub.get.assert_awaited_with(url=unittest.mock.ANY, headers={})

    async def test_simple_infer(self):
        # Only the read function must return async
        post_return = MagicMock()
        post_return.read = AsyncMock()
        self._client._stub.post = AsyncMock(return_value=post_return)

        np_input = np.arange(8, dtype=np.float32).reshape(1, -1)
        model = "onnx_zero_1_float32"

        # Setup inputs
        inputs = []
        inputs.append(
            tritonhttpclient.InferInput(
                "INPUT0", np_input.shape, np_to_triton_dtype(np_input.dtype)
            )
        )

        # Set the binary data to False so that 'Inference-Header-Length' is not
        # added to the headers.
        inputs[0].set_data_from_numpy(np_input, binary_data=False)

        async def run_infer(headers):
            with patch("tritonclient.http.aio._raise_if_error"):
                with patch("tritonclient.http.aio.InferResult"):
                    await self._client.infer(model_name=model, inputs=inputs)
                    self._client._stub.post.assert_awaited_with(
                        url=unittest.mock.ANY, data=unittest.mock.ANY, headers=headers
                    )

        self._client.register_plugin(self._plugin)
        await run_infer(self._headers)

        self._client.unregister_plugin()
        await run_infer({})

    async def asyncTearDown(self):
        await self._client.close()


class HTTPClientPluginTest(tu.TestResultCollector):
    def setUp(self):
        self._headers = {"MY-KEY": "MY-VALUE"}
        self._plugin = TestPlugin(self._headers)
        self._client = tritonhttpclient.InferenceServerClient(url="localhost:8001")

        # Use magic mock for the client stub
        self._client._client_stub = MagicMock()

    def test_server_is_live(self):
        # We are testing is_server_live as an example API that uses GET method
        # for communication with the server.
        self._client.register_plugin(self._plugin)
        self._client.is_server_live()
        self._client._client_stub.get.assert_called_with(
            unittest.mock.ANY, headers=self._headers
        )

        # Make sure unregistering the plugin would no longer add the headers
        self._client.unregister_plugin()
        self._client.is_server_live()
        self._client._client_stub.get.assert_called_with(unittest.mock.ANY, headers={})

    def test_simple_infer(self):
        np_input = np.arange(8, dtype=np.float32).reshape(1, -1)
        model = "onnx_zero_1_float32"

        # Setup inputs
        inputs = []
        inputs.append(
            tritonhttpclient.InferInput(
                "INPUT0", np_input.shape, np_to_triton_dtype(np_input.dtype)
            )
        )

        # Set the binary data to False so that 'Inference-Header-Length' is not
        # added to the headers.
        inputs[0].set_data_from_numpy(np_input, binary_data=False)

        def run_infer(headers):
            with patch("tritonclient.http._client._raise_if_error"):
                with patch("tritonclient.http._client.InferResult"):
                    self._client.infer(model_name=model, inputs=inputs)
                    self._client._client_stub.post.assert_called_with(
                        request_uri=unittest.mock.ANY,
                        body=unittest.mock.ANY,
                        headers=headers,
                    )

        self._client.register_plugin(self._plugin)
        run_infer(self._headers)

        self._client.unregister_plugin()
        run_infer({})

    def tearDown(self):
        self._client.close()


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_http/http_input_size_limit_test.py
================================================
#!/usr/bin/python
# Copyright 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import gzip
import io
import json
import unittest

import numpy as np
import requests
import test_util as tu

# Constants for size calculations
# Each FP32 value is 4 bytes, so we need to divide target byte sizes by 4 to get element counts
BYTES_PER_FP32 = 4
MB = 2**20  # 1 MB = 1,048,576 bytes
GB = 2**30  # 1 GB = 1,073,741,824 bytes
DEFAULT_LIMIT_BYTES = 64 * MB  # 64MB default limit
INCREASED_LIMIT_BYTES = 128 * MB  # 128MB increased limit

# Calculate element counts for size limits
DEFAULT_LIMIT_ELEMENTS = DEFAULT_LIMIT_BYTES // BYTES_PER_FP32  # 16,777,216 elements
INCREASED_LIMIT_ELEMENTS = (
    INCREASED_LIMIT_BYTES // BYTES_PER_FP32
)  # 33,554,432 elements

# Small offsets to go just over/under the limits
OFFSET_ELEMENTS = 32


class InferSizeLimitTest(tu.TestResultCollector):
    def _get_infer_url(self, model_name):
        return "http://localhost:8000/v2/models/{}/infer".format(model_name)

    def test_default_limit_raw_binary(self):
        """Test raw binary inputs with default limit"""
        model = "onnx_zero_1_float32"

        # Test case 1: Input just over the 64MB limit (should fail)
        # (2^24 + 32) elements * 4 bytes = 64MB + 128 bytes = 67,108,992 bytes
        large_input = np.ones(
            DEFAULT_LIMIT_ELEMENTS + OFFSET_ELEMENTS, dtype=np.float32
        )
        input_bytes = large_input.tobytes()
        assert len(input_bytes) > 64 * MB  # Verify we're actually over the 64MB limit

        headers = {"Inference-Header-Content-Length": "0"}
        response = requests.post(
            self._get_infer_url(model), data=input_bytes, headers=headers
        )

        # Should fail with 400 bad request with default limit
        self.assertEqual(
            400,
            response.status_code,
            "Expected error code for oversized request, got: {}".format(
                response.status_code
            ),
        )

        # Verify error message contains size limit info
        error_msg = response.content.decode()
        self.assertIn(
            "exceeds the maximum allowed value",
            error_msg,
            "Expected error message about exceeding max input size",
        )

        # Test case 2: Input just under the 64MB limit (should succeed)
        # (2^24 - 32) elements * 4 bytes = 64MB - 128 bytes = 67,108,736 bytes
        small_input = np.ones(
            DEFAULT_LIMIT_ELEMENTS - OFFSET_ELEMENTS, dtype=np.float32
        )
        input_bytes = small_input.tobytes()
        assert len(input_bytes) < 64 * MB  # Verify we're actually under the 64MB limit

        response = requests.post(
            self._get_infer_url(model), data=input_bytes, headers=headers
        )

        # Should succeed with 200 OK
        self.assertEqual(
            200,
            response.status_code,
            "Expected success code for request within size limit, got: {}".format(
                response.status_code
            ),
        )

        # Verify output matches our input (identity model)
        header_size = int(response.headers["Inference-Header-Content-Length"])
        output_data = response.content[header_size:]

        # Convert output bytes back to numpy array for comparison
        output_array = np.frombuffer(output_data, dtype=np.float32)
        self.assertTrue(
            np.array_equal(output_array, small_input),
            "Response data does not match input data",
        )

    def test_default_limit_json(self):
        """Test JSON inputs with default limit"""
        model = "onnx_zero_1_float32"

        # Test case 1: Input just over the 64MB limit (should fail)
        # (2^24 + 32) elements * 4 bytes = 64MB + 128 bytes = 67,108,992 bytes
        shape_size = DEFAULT_LIMIT_ELEMENTS + OFFSET_ELEMENTS

        payload = {
            "inputs": [
                {
                    "name": "INPUT0",
                    "datatype": "FP32",
                    "shape": [1, shape_size],
                    "data": [1.0] * shape_size,
                }
            ]
        }
        assert (
            shape_size * BYTES_PER_FP32 > 64 * MB
        )  # Verify we're actually over the 64MB limit

        headers = {"Content-Type": "application/json"}
        response = requests.post(
            self._get_infer_url(model), headers=headers, json=payload
        )

        # Should fail with 400 bad request with default limit
        self.assertEqual(
            400,
            response.status_code,
            "Expected error code for oversized JSON request, got: {}".format(
                response.status_code
            ),
        )

        # Verify error message contains size limit info
        error_msg = response.content.decode()
        self.assertIn(
            "exceeds the maximum allowed value",
            error_msg,
            "Expected error message about exceeding max input size",
        )

        # Test case 2: Input just under the 64MB limit (should succeed)
        # The test creates a JSON payload with data, which adds overhead compared
        # to raw binary format. We adjust the shape size to ensure the final
        # JSON payload is under the size limit. An element is roughly 5
        # bytes in JSON, compared to 4 bytes as a raw FP32.
        shape_size = (DEFAULT_LIMIT_ELEMENTS - OFFSET_ELEMENTS) * 4 // 5

        payload = {
            "inputs": [
                {
                    "name": "INPUT0",
                    "datatype": "FP32",
                    "shape": [1, shape_size],
                    "data": [1.0] * shape_size,
                }
            ]
        }
        # Verify we're actually under the 64MB limit
        self.assertLess(len(json.dumps(payload).encode("utf-8")), DEFAULT_LIMIT_BYTES)

        response = requests.post(
            self._get_infer_url(model), headers=headers, json=payload
        )

        # Should succeed with 200 OK
        self.assertEqual(
            200,
            response.status_code,
            "Expected success code for JSON request within size limit, got: {}".format(
                response.status_code
            ),
        )

        # Verify we got a valid response
        result = response.json()
        self.assertIn("outputs", result, "Response missing outputs field")
        self.assertEqual(1, len(result["outputs"]), "Expected 1 output")
        self.assertEqual(
            shape_size,
            result["outputs"][0]["shape"][1],
            f"Expected shape {[1, shape_size]}, got {result['outputs'][0]['shape']}",
        )

    def test_large_input_raw_binary(self):
        """Test raw binary input larger with custom limit set"""
        model = "onnx_zero_1_float32"

        # Test case 1: Input just over the 128MB configured limit (should fail)
        # (2^25 + 32) elements * 4 bytes = 128MB + 128 bytes = 134,217,856 bytes
        large_input = np.ones(
            INCREASED_LIMIT_ELEMENTS + OFFSET_ELEMENTS, dtype=np.float32
        )
        input_bytes = large_input.tobytes()
        assert len(input_bytes) > 128 * MB  # Verify we're actually over the 128MB limit

        headers = {"Inference-Header-Content-Length": "0"}
        response = requests.post(
            self._get_infer_url(model), data=input_bytes, headers=headers
        )

        # Should fail with 400 bad request with our increased limit
        self.assertEqual(
            400,
            response.status_code,
            "Expected error code for oversized request, got: {}".format(
                response.status_code
            ),
        )

        # Verify error message contains size limit info
        error_msg = response.content.decode()
        self.assertIn(
            "exceeds the maximum allowed value",
            error_msg,
            "Expected error message about exceeding max input size",
        )

        # Test case 2: Input just under the 128MB configured limit (should succeed)
        # (2^25 - 32) elements * 4 bytes = 128MB - 128 bytes = 134,217,600 bytes
        small_input = np.ones(
            INCREASED_LIMIT_ELEMENTS - OFFSET_ELEMENTS, dtype=np.float32
        )
        input_bytes = small_input.tobytes()
        assert (
            len(input_bytes) < 128 * MB
        )  # Verify we're actually under the 128MB limit

        response = requests.post(
            self._get_infer_url(model), data=input_bytes, headers=headers
        )

        # Should succeed with 200 OK
        self.assertEqual(
            200,
            response.status_code,
            "Expected success code for request within increased limit, got: {}".format(
                response.status_code
            ),
        )

        # Verify output matches our input (identity model)
        header_size = int(response.headers["Inference-Header-Content-Length"])
        output_data = response.content[header_size:]

        # Convert output bytes back to numpy array for comparison
        output_array = np.frombuffer(output_data, dtype=np.float32)
        self.assertTrue(
            np.array_equal(output_array, small_input),
            "Response data does not match input data",
        )

    def test_large_input_json(self):
        """Test JSON input larger with custom limit set"""
        model = "onnx_zero_1_float32"

        # Test case 1: Input just over the 128MB configured limit (should fail)
        # (2^25 + 32) elements * 4 bytes = 128MB + 128 bytes = 134,217,856 bytes
        shape_size = INCREASED_LIMIT_ELEMENTS + OFFSET_ELEMENTS

        payload = {
            "inputs": [
                {
                    "name": "INPUT0",
                    "datatype": "FP32",
                    "shape": [1, shape_size],
                    "data": [1.0] * shape_size,
                }
            ]
        }
        assert (
            shape_size * BYTES_PER_FP32 > 128 * MB
        )  # Verify we're actually over the 128MB limit

        headers = {"Content-Type": "application/json"}
        response = requests.post(
            self._get_infer_url(model), headers=headers, json=payload
        )

        # Should fail with 400 bad request with our increased limit
        self.assertEqual(
            400,
            response.status_code,
            "Expected error code for oversized JSON request, got: {}".format(
                response.status_code
            ),
        )

        # Verify error message contains size limit info
        error_msg = response.content.decode()
        self.assertIn(
            "exceeds the maximum allowed value",
            error_msg,
            "Expected error message about exceeding max input size",
        )

        # Test case 2: Input just under the 128MB configured limit (should succeed)
        # The test creates a JSON payload with data, which adds overhead compared
        # to raw binary format. We adjust the shape size to ensure the final
        # JSON payload is under the size limit. An element is roughly 5
        # bytes in JSON, compared to 4 bytes as a raw FP32.
        shape_size = (INCREASED_LIMIT_ELEMENTS - OFFSET_ELEMENTS) * 4 // 5

        payload = {
            "inputs": [
                {
                    "name": "INPUT0",
                    "datatype": "FP32",
                    "shape": [1, shape_size],
                    "data": [1.0] * shape_size,
                }
            ]
        }
        # Verify we're actually under the 128MB limit
        self.assertLess(len(json.dumps(payload).encode("utf-8")), INCREASED_LIMIT_BYTES)

        response = requests.post(
            self._get_infer_url(model), headers=headers, json=payload
        )

        # Should succeed with 200 OK
        self.assertEqual(
            200,
            response.status_code,
            "Expected success code for request within increased limit, got: {}".format(
                response.status_code
            ),
        )

        # Verify we got a valid response
        result = response.json()
        self.assertIn("outputs", result, "Response missing outputs field")
        self.assertEqual(1, len(result["outputs"]), "Expected 1 output")
        self.assertEqual(
            shape_size,
            result["outputs"][0]["shape"][1],
            f"Expected shape {[1, shape_size]}, got {result['outputs'][0]['shape']}",
        )

    def test_large_string_in_json(self):
        """Test JSON request with large string input"""
        model = "simple_identity"

        # Create a string that is larger (large payload about 2GB) than the default limit of 64MB
        # (2^31 + 64) elements * 1 bytes = 2GB + 64 bytes = 2,147,483,712 bytes
        large_string_size = 2 * GB + 64
        large_string = "A" * large_string_size

        payload = {
            "inputs": [
                {
                    "name": "INPUT0",
                    "datatype": "BYTES",
                    "shape": [1, 1],
                    "data": [large_string],
                }
            ]
        }

        headers = {"Content-Type": "application/json"}
        response = requests.post(
            self._get_infer_url(model), headers=headers, json=payload
        )

        # Should fail with 400 bad request
        self.assertEqual(
            400,
            response.status_code,
            "Expected error code for oversized JSON request, got: {}".format(
                response.status_code
            ),
        )

        # Verify error message
        error_msg = response.content.decode()
        self.assertIn(
            "Request JSON size",
            error_msg,
        )
        self.assertIn(
            "exceeds the maximum allowed value",
            error_msg,
        )
        self.assertIn(
            "Use --http-max-input-size to increase the limit",
            error_msg,
        )

    def _create_compressed_payload(self, target_size):
        """Helper to create a gzip-compressed JSON payload of specified decompressed size."""
        shape_size = 1000  # Small actual data
        payload = {
            "inputs": [
                {
                    "name": "INPUT0",
                    "datatype": "FP32",
                    "shape": [1, shape_size],
                    "data": [1.0] * shape_size,
                }
            ]
        }
        json_str = json.dumps(payload, indent=4)

        # Pad with whitespace to reach target size (whitespace before closing brace is valid JSON)
        padding_needed = target_size - len(json_str)
        padded_json = json_str[:-1] + (" " * padding_needed) + json_str[-1]

        # Compress the payload
        compressed_buffer = io.BytesIO()
        with gzip.GzipFile(fileobj=compressed_buffer, mode="wb") as gz:
            gz.write(padded_json.encode("utf-8"))

        return compressed_buffer.getvalue(), len(padded_json.encode("utf-8"))

    def test_default_limit_compressed(self):
        """Test compressed inputs with default 64MB limit.

        This test verifies that the --http-max-input-size limit is enforced on
        the decompressed data size, not just the compressed request size.
        """
        model = "onnx_zero_1_float32"

        headers = {
            "Content-Type": "application/json",
            "Content-Encoding": "gzip",
        }

        # Test case 1: Payload that decompresses to 64MB + 1MB (over limit) should fail
        large_target_size = DEFAULT_LIMIT_BYTES + MB
        (
            large_compressed_data,
            large_uncompressed_size,
        ) = self._create_compressed_payload(large_target_size)

        # Verify uncompressed size is over 64MB limit
        self.assertGreater(
            large_uncompressed_size,
            DEFAULT_LIMIT_BYTES,
            f"Large payload should decompress to > 64MB, got {large_uncompressed_size}",
        )

        # Verify compressed size is under the limit
        self.assertLess(
            len(large_compressed_data),
            DEFAULT_LIMIT_BYTES,
            f"Compressed size should be under limit, got {len(large_compressed_data)}",
        )

        response = requests.post(
            self._get_infer_url(model), data=large_compressed_data, headers=headers
        )

        # Should fail with 400 bad request - decompressed size exceeds limit
        self.assertEqual(
            400,
            response.status_code,
            f"Expected 400 for compressed request that decompresses to >64MB, got: {response.status_code}",
        )

        # Verify error message contains size limit info
        error_msg = response.content.decode()
        self.assertIn(
            "exceeds the maximum allowed value",
            error_msg,
            "Expected error message about exceeding max input size",
        )

        # Test case 2: Payload that decompresses to 64MB - 1MB (under limit) should succeed
        small_target_size = DEFAULT_LIMIT_BYTES - MB
        (
            small_compressed_data,
            small_uncompressed_size,
        ) = self._create_compressed_payload(small_target_size)

        # Verify uncompressed size is under 64MB limit
        self.assertLess(
            small_uncompressed_size,
            DEFAULT_LIMIT_BYTES,
            f"Small payload should decompress to < 64MB, got {small_uncompressed_size}",
        )

        response = requests.post(
            self._get_infer_url(model), data=small_compressed_data, headers=headers
        )

        # Should succeed with 200 OK
        self.assertEqual(
            200,
            response.status_code,
            f"Expected 200 for compressed request within limit, got: {response.status_code}",
        )

        # Verify we got a valid response
        result = response.json()
        self.assertIn("outputs", result, "Response missing outputs field")

    def test_large_input_compressed(self):
        """Test compressed inputs with custom 128MB limit set.

        This test verifies that compressed inputs work correctly when the
        --http-max-input-size limit is increased.
        """
        model = "onnx_zero_1_float32"

        headers = {
            "Content-Type": "application/json",
            "Content-Encoding": "gzip",
        }

        # Test case 1: Input that decompresses to 128MB + 1MB (over limit) should fail
        large_target_size = INCREASED_LIMIT_BYTES + MB
        (
            large_compressed_data,
            large_uncompressed_size,
        ) = self._create_compressed_payload(large_target_size)

        # Verify sizes
        self.assertGreater(
            large_uncompressed_size,
            INCREASED_LIMIT_BYTES,
            f"Large payload should decompress to > 128MB, got {large_uncompressed_size}",
        )

        response = requests.post(
            self._get_infer_url(model), data=large_compressed_data, headers=headers
        )

        # Should fail with 400 bad request
        self.assertEqual(
            400,
            response.status_code,
            f"Expected 400 for compressed request exceeding 128MB limit, got: {response.status_code}",
        )

        error_msg = response.content.decode()
        self.assertIn(
            "exceeds the maximum allowed value",
            error_msg,
            "Expected error message about exceeding max input size",
        )

        # Test case 2: Input that decompresses to 128MB - 1MB (under limit) should succeed
        small_target_size = INCREASED_LIMIT_BYTES - MB
        (
            small_compressed_data,
            small_uncompressed_size,
        ) = self._create_compressed_payload(small_target_size)

        # Verify sizes
        self.assertLess(
            small_uncompressed_size,
            INCREASED_LIMIT_BYTES,
            f"Small payload should decompress to < 128MB, got {small_uncompressed_size}",
        )
        self.assertGreater(
            small_uncompressed_size,
            DEFAULT_LIMIT_BYTES,
            f"Small payload should decompress to > 64MB (default), got {small_uncompressed_size}",
        )

        response = requests.post(
            self._get_infer_url(model), data=small_compressed_data, headers=headers
        )

        # Should succeed with 200 OK
        self.assertEqual(
            200,
            response.status_code,
            f"Expected 200 for compressed request within 128MB limit, got: {response.status_code}",
        )

        # Verify we got a valid response
        result = response.json()
        self.assertIn("outputs", result, "Response missing outputs field")


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_http/http_request_many_chunks.py
================================================
#!/usr/bin/python
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import socket
import unittest


class HTTPRequestManyChunksTest(unittest.TestCase):
    def setUp(self):
        self._model_name = "simple"
        self._local_host = "localhost"
        self._http_port = 8000
        self._malicious_chunk_count = (
            1000000  # large enough to cause a stack overflow if using alloca()
        )
        self._parse_error = (
            "failed to parse the request JSON buffer: Invalid value. at 0"
        )

    def send_chunked_request(
        self, header: str, chunk_count: int, expected_response: str
    ):
        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        header = (
            f"{header}"
            f"Host: {self._local_host}:{self._http_port}\r\n"
            f"Content-Type: application/octet-stream\r\n"
            f"Transfer-Encoding: chunked\r\n"
            f"Connection: close\r\n"
            f"\r\n"
        )
        try:
            s.connect((self._local_host, self._http_port))
            # HTTP request with chunked encoding
            s.sendall((header.encode()))

            # Send chunked payload
            for _ in range(chunk_count):
                s.send(b"1\r\nA\r\n")
            # End chunked encoding
            s.sendall(b"0\r\n\r\n")

            # Receive response
            response = b""
            while True:
                try:
                    chunk = s.recv(4096)
                    if not chunk:
                        break
                    response += chunk
                except socket.timeout:
                    break
            self.assertIn(expected_response, response.decode())
        except Exception as e:
            raise (e)
        finally:
            s.close()

    def test_infer(self):
        request_header = (
            f"POST /v2/models/{self._model_name}/infer HTTP/1.1\r\n"
            f"Inference-Header-Content-Length: 0\r\n"
        )

        self.send_chunked_request(
            request_header,
            self._malicious_chunk_count,
            "Raw request must only have 1 input (found 1) to be deduced but got 2 inputs in 'simple' model configuration",
        )

    def test_registry_index(self):
        request_header = f"POST /v2/repository/index HTTP/1.1\r\n"

        self.send_chunked_request(
            request_header, self._malicious_chunk_count, self._parse_error
        )

    def test_model_control(self):
        load_request_header = (
            f"POST /v2/repository/models/{self._model_name}/load HTTP/1.1\r\n"
        )
        unload_request_header = load_request_header.replace("/load", "/unload")

        self.send_chunked_request(
            load_request_header, self._malicious_chunk_count, self._parse_error
        )
        self.send_chunked_request(
            unload_request_header, self._malicious_chunk_count, self._parse_error
        )

    def test_trace(self):
        request_header = (
            f"POST /v2/models/{self._model_name}/trace/setting HTTP/1.1\r\n"
        )

        self.send_chunked_request(
            request_header, self._malicious_chunk_count, self._parse_error
        )

    def test_logging(self):
        request_header = f"POST /v2/logging HTTP/1.1\r\n"

        self.send_chunked_request(
            request_header, self._malicious_chunk_count, self._parse_error
        )

    def test_system_shm_register(self):
        request_header = f"POST /v2/systemsharedmemory/region/test_system_shm_register/register HTTP/1.1\r\n"

        self.send_chunked_request(
            request_header, self._malicious_chunk_count, self._parse_error
        )

    def test_cuda_shm_register(self):
        request_header = f"POST /v2/cudasharedmemory/region/test_cuda_shm_register/register HTTP/1.1\r\n"

        self.send_chunked_request(
            request_header, self._malicious_chunk_count, self._parse_error
        )

    def test_generate(self):
        request_header = f"POST /v2/models/{self._model_name}/generate HTTP/1.1\r\n"
        self.send_chunked_request(
            request_header, self._malicious_chunk_count, self._parse_error
        )


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_http/http_restricted_api_test.py
================================================
#!/usr/bin/python
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import unittest

import numpy as np
import tritonclient.http as tritonhttpclient
from tritonclient.utils import InferenceServerException


class RestrictedAPITest(unittest.TestCase):
    def setUp(self):
        self.model_name_ = "simple"
        self.client_ = tritonhttpclient.InferenceServerClient("localhost:8000")

    # Other unspecified APIs should not be restricted
    def test_sanity(self):
        self.client_.get_inference_statistics("simple")
        self.client_.get_inference_statistics(
            "simple", headers={"infer-key": "infer-value"}
        )

    # metadata, infer, model repository APIs are restricted.
    # metadata and infer expects "infer-key : infer-value" header,
    # model repository expected "admin-key : admin-value".
    def test_model_repository(self):
        with self.assertRaisesRegex(InferenceServerException, "This API is restricted"):
            self.client_.unload_model(
                self.model_name_, headers={"infer-key": "infer-value"}
            )
        # Request go through and get actual transaction error
        with self.assertRaisesRegex(
            InferenceServerException, "explicit model load / unload is not allowed"
        ):
            self.client_.unload_model(
                self.model_name_, headers={"admin-key": "admin-value"}
            )

    def test_metadata(self):
        with self.assertRaisesRegex(InferenceServerException, "This API is restricted"):
            self.client_.get_server_metadata()
        self.client_.get_server_metadata({"infer-key": "infer-value"})

    def test_infer(self):
        # setup
        inputs = [
            tritonhttpclient.InferInput("INPUT0", [1, 16], "INT32"),
            tritonhttpclient.InferInput("INPUT1", [1, 16], "INT32"),
        ]
        inputs[0].set_data_from_numpy(np.ones(shape=(1, 16), dtype=np.int32))
        inputs[1].set_data_from_numpy(np.ones(shape=(1, 16), dtype=np.int32))

        # This test only care if the request goes through
        with self.assertRaisesRegex(InferenceServerException, "This API is restricted"):
            _ = self.client_.infer(
                model_name=self.model_name_, inputs=inputs, headers={"test": "1"}
            )
        self.client_.infer(
            model_name=self.model_name_,
            inputs=inputs,
            headers={"infer-key": "infer-value"},
        )


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_http/http_test.py
================================================
#!/usr/bin/python
# Copyright 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import base64
import json
import threading
import time
import unittest

import numpy as np
import requests
import test_util as tu
import tritonclient.http as tritonhttpclient
from tritonclient.utils import InferenceServerException, np_to_triton_dtype


class HttpTest(tu.TestResultCollector):
    def _get_infer_url(self, model_name):
        return "http://localhost:8000/v2/models/{}/infer".format(model_name)

    def _get_load_model_url(self, model_name):
        return "http://localhost:8000/v2/repository/models/{}/load".format(model_name)

    def _raw_binary_helper(
        self, model, input_bytes, expected_output_bytes, extra_headers={}
    ):
        # Select model that satisfies constraints for raw binary request
        headers = {"Inference-Header-Content-Length": "0"}
        # Add extra headers (if any) before sending request
        headers.update(extra_headers)
        r = requests.post(self._get_infer_url(model), data=input_bytes, headers=headers)
        r.raise_for_status()

        # Get the inference header size so we can locate the output binary data
        header_size = int(r.headers["Inference-Header-Content-Length"])
        # Assert input == output since this tests an identity model
        self.assertEqual(
            expected_output_bytes,
            r.content[header_size:],
            "Expected response body contains correct output binary data: {}; got: {}".format(
                expected_output_bytes, r.content[header_size:]
            ),
        )

    def test_raw_binary(self):
        model = "onnx_zero_1_float32"
        input_bytes = np.arange(8, dtype=np.float32).tobytes()
        self._raw_binary_helper(model, input_bytes, input_bytes)

    def test_raw_binary_longer(self):
        # Similar to test_raw_binary but test with different data size
        model = "onnx_zero_1_float32"
        input_bytes = np.arange(32, dtype=np.float32).tobytes()
        self._raw_binary_helper(model, input_bytes, input_bytes)

    def test_byte(self):
        # Select model that satisfies constraints for raw binary request
        # i.e. BYTE type the element count must be 1
        model = "onnx_zero_1_object_1_element"
        input = "427"
        headers = {"Inference-Header-Content-Length": "0"}
        r = requests.post(self._get_infer_url(model), data=input, headers=headers)
        r.raise_for_status()

        # Get the inference header size so we can locate the output binary data
        header_size = int(r.headers["Inference-Header-Content-Length"])
        # Triton returns BYTES tensor with byte size prepended
        output = r.content[header_size + 4 :].decode()
        self.assertEqual(
            input,
            output,
            "Expected response body contains correct output binary data: {}; got: {}".format(
                input, output
            ),
        )

    def test_byte_too_many_elements(self):
        # Select model that doesn't satisfy constraints for raw binary request
        # i.e. BYTE type the element count must be 1
        model = "onnx_zero_1_object"
        input = "427"
        headers = {"Inference-Header-Content-Length": "0"}
        r = requests.post(self._get_infer_url(model), data=input, headers=headers)
        self.assertEqual(
            400,
            r.status_code,
            "Expected error code {} returned for the request; got: {}".format(
                400, r.status_code
            ),
        )
        self.assertIn(
            "For BYTE datatype raw input 'INPUT0', the model must have input shape [1]",
            r.content.decode(),
        )

    def test_multi_variable_dimensions(self):
        # Select model that doesn't satisfy constraints for raw binary request
        # i.e. this model has multiple variable-sized dimensions
        model = "onnx_zero_1_float16"
        input = np.ones([2, 2], dtype=np.float16)
        headers = {"Inference-Header-Content-Length": "0"}
        r = requests.post(
            self._get_infer_url(model), data=input.tobytes(), headers=headers
        )
        self.assertEqual(
            400,
            r.status_code,
            "Expected error code {} returned for the request; got: {}".format(
                400, r.status_code
            ),
        )
        self.assertIn(
            "The shape of the raw input 'INPUT0' can not be deduced because there are more than one variable-sized dimension",
            r.content.decode(),
        )

    def test_multi_inputs(self):
        # Select model that doesn't satisfy constraints for raw binary request
        # i.e. input count must be 1
        model = "onnx_zero_3_float32"
        # Use one numpy array, after tobytes() it can be seen as three inputs
        # each with 8 elements (this ambiguity is why this is not allowed)
        input = np.arange(24, dtype=np.float32)
        headers = {"Inference-Header-Content-Length": "0"}
        r = requests.post(
            self._get_infer_url(model), data=input.tobytes(), headers=headers
        )
        self.assertEqual(
            400,
            r.status_code,
            "Expected error code {} returned for the request; got: {}".format(
                400, r.status_code
            ),
        )
        self.assertIn(
            "Raw request must only have 1 input (found 1) to be deduced but got 3 inputs in",
            r.content.decode(),
        )

    # This is to test that a properly chunk-encoded request by the caller works,
    # though Triton does not specifically do any special chunk handling outside
    # of underlying HTTP libraries used
    # Future Enhancement: Test other encodings as they come up
    def test_content_encoding_chunked_manually(self):
        # Similar to test_raw_binary but test with extra headers
        extra_headers = {"Transfer-Encoding": "chunked"}
        model = "onnx_zero_1_float32"
        input_bytes = np.arange(8, dtype=np.float32).tobytes()
        # Encode input into a single chunk (for simplicity) following chunked
        # encoding format: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Transfer-Encoding
        chunk_encoded_input = b""
        # Length of chunk in hexadecimal and line separator
        chunk_encoded_input += f"{len(input_bytes):X}\r\n".encode("utf-8")
        # Chunk bytes and line separator
        chunk_encoded_input += input_bytes + b"\r\n"
        # Final byte (0) and end message
        chunk_encoded_input += b"0\r\n\r\n"
        self._raw_binary_helper(model, chunk_encoded_input, input_bytes, extra_headers)

    # Test that Python client rejects any "Transfer-Encoding" HTTP headers
    # as we don't specially handle encoding requests for the user through
    # these headers. There are special arguments exposed in the client to
    # handle some "Content-Encoding" headers.
    def test_content_encoding_unsupported_client(self):
        for encoding in ["chunked", "compress", "deflate", "gzip"]:
            with self.subTest(encoding=encoding):
                headers = {"Transfer-Encoding": encoding}
                np_input = np.arange(8, dtype=np.float32).reshape(1, -1)
                model = "onnx_zero_1_float32"
                # Setup inputs
                inputs = []
                inputs.append(
                    tritonhttpclient.InferInput(
                        "INPUT0", np_input.shape, np_to_triton_dtype(np_input.dtype)
                    )
                )
                inputs[0].set_data_from_numpy(np_input)

                with tritonhttpclient.InferenceServerClient("localhost:8000") as client:
                    # Python client is expected to raise an exception to reject
                    # 'content-encoding' HTTP headers.
                    with self.assertRaisesRegex(
                        InferenceServerException, "Unsupported HTTP header"
                    ):
                        client.infer(model_name=model, inputs=inputs, headers=headers)

    def test_descriptive_status_code(self):
        model = "onnx_zero_1_float32_queue"
        input_bytes = np.arange(8, dtype=np.float32).tobytes()

        # Send two requests to model that only queues 1 request at the maximum,
        # Expect the second request will be rejected with HTTP status code that
        # aligns with error detail (server unavailable).
        t = threading.Thread(
            target=self._raw_binary_helper, args=(model, input_bytes, input_bytes)
        )
        t.start()
        time.sleep(0.5)
        with self.assertRaises(requests.exceptions.HTTPError) as context:
            self._raw_binary_helper(model, input_bytes, input_bytes)
        self.assertEqual(
            503,
            context.exception.response.status_code,
            "Expected error code {} returned for the request; got: {}".format(
                503,
                context.exception.response.status_code,
            ),
        )
        t.join()

    def test_buffer_size_overflow(self):
        model = "onnx_zero_1_float32"

        # Test for overflow within GetElementCount()
        payload1 = {
            "inputs": [
                {
                    "name": "INPUT0",
                    "shape": [
                        2**4,
                        2**60 + 2,
                    ],  # This evaluates to 2^64 + 32 during GetElementCount()
                    "datatype": "FP32",
                    "data": [1.0],
                }
            ]
        }

        # Test for overflow with type_byte_size multiplication
        payload2 = {
            "inputs": [
                {
                    "name": "INPUT0",
                    "shape": [
                        2**2,
                        2**60 + 2,
                    ],  # This evaluates to 2^64 + 32 during type_byte_size multiplication since FP32 is 4 bytes
                    "datatype": "FP32",
                    "data": [1.0],
                }
            ]
        }

        # Send request and expect a 400 error with specific overflow message
        headers = {"Content-Type": "application/json"}

        # Test the first payload (GetElementCount overflow)
        r1 = requests.post(self._get_infer_url(model), json=payload1, headers=headers)

        self.assertEqual(
            400,
            r1.status_code,
            "Expected error code 400 for GetElementCount overflow check; got: {}".format(
                r1.status_code
            ),
        )

        error_message1 = r1.content.decode()
        self.assertIn(
            "causes total element count to exceed maximum size of", error_message1
        )

        # Test the second payload (type_byte_size multiplication overflow)
        r2 = requests.post(self._get_infer_url(model), json=payload2, headers=headers)

        self.assertEqual(
            400,
            r2.status_code,
            "Expected error code 400 for type_byte_size multiplication overflow check; got: {}".format(
                r2.status_code
            ),
        )

        error_message2 = r2.content.decode()
        self.assertIn("byte size overflow for input", error_message2)

    def test_negative_dimensions(self):
        model = "onnx_zero_1_float32"

        payload = {
            "inputs": [
                {
                    "name": "INPUT0",
                    "shape": [2, -5],  # Negative dimension should be invalid
                    "datatype": "FP32",
                    "data": [1.0],
                }
            ]
        }

        # Send request and expect a 500 error
        headers = {"Content-Type": "application/json"}
        r = requests.post(self._get_infer_url(model), json=payload, headers=headers)

        self.assertEqual(
            500,
            r.status_code,
            "Expected error code 500 for negative dimension; got: {}".format(
                r.status_code
            ),
        )

        error_message = r.content.decode()
        self.assertIn(
            "Unable to parse 'shape': attempt to access JSON non-unsigned-integer as unsigned-integer",
            error_message,
        )

    def test_loading_large_invalid_model(self):
        # Generate large base64 encoded data
        data_length = 1 << 31
        int_max = (1 << 31) - 1
        random_data = b"A" * data_length
        encoded_data = base64.b64encode(random_data)

        assert (
            len(encoded_data) > int_max
        ), "Encoded data length does not match the required length."

        # Prepare payload with large base64 encoded data
        payload = {
            "parameters": {
                "config": json.dumps({"backend": "onnxruntime"}),
                "file:1/model.onnx": encoded_data.decode("utf-8"),
            }
        }
        headers = {"Content-Type": "application/json"}

        # Send POST request
        response = requests.post(
            self._get_load_model_url("invalid_onnx"), headers=headers, json=payload
        )

        # Assert the response is not successful
        self.assertNotEqual(response.status_code, 200)
        try:
            error_message = response.json().get("error", "")
            self.assertIn(
                "Request JSON size",
                error_message,
            )
            self.assertIn(
                "exceeds the maximum allowed value",
                error_message,
            )
        except ValueError:
            self.fail("Response is not valid JSON")

    def test_json_recursion_depth_limit(self):
        """Test that server properly handles and rejects deeply nested JSON."""

        def create_nested_json(depth, value):
            for _ in range(depth):
                value = f"[{value}]"
            return json.loads(value)

        headers = {"Content-Type": "application/json"}
        test_matrix = [
            # (datatype, data, model, json_depth, should_succeed)
            ("BYTES", '"hello"', "simple_identity", 120, False),
            ("BYTES", '"hello"', "simple_identity", 50, True),
            ("INT64", "123", "simple_identity_int64", 120, False),
            ("INT64", "123", "simple_identity_int64", 50, True),
        ]

        for dtype, data, model, json_depth, should_succeed in test_matrix:
            with self.subTest(
                datatype=dtype, depth=json_depth, should_succeed=should_succeed
            ):
                payload = {
                    "inputs": [
                        {
                            "name": "INPUT0",
                            "datatype": dtype,
                            "shape": [1, 1],
                            "data": create_nested_json(json_depth, data),
                        }
                    ]
                }

                response = requests.post(
                    self._get_infer_url(model), headers=headers, json=payload
                )

                if should_succeed:
                    self.assertEqual(response.status_code, 200)
                else:
                    self.assertNotEqual(response.status_code, 200)
                    try:
                        error_message = response.json().get("error", "")
                        self.assertIn(
                            "JSON nesting depth exceeds maximum allowed limit (100)",
                            error_message,
                        )
                    except ValueError:
                        self.fail("Response is not valid JSON")


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_http/nginx.conf
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

worker_processes  1;

error_log  /var/log/nginx/error.log;

events {
    worker_connections  1024;
}

http {
    # Configure basic authentication
    auth_basic "Restricted Content";
    auth_basic_user_file /opt/tritonserver/qa/L0_http/pswd;

    # Define upstream server
    upstream backend {
        server localhost:8000;
    }

    # Define server block for reverse proxy
    server {
        listen 8004;

        # Configure location for reverse proxy
        location / {
            proxy_pass http://backend;
            proxy_set_header Host $host;
            proxy_set_header X-Real-IP $remote_addr;
            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        }
    }
}


================================================
FILE: qa/L0_http/python_http_aio_test.py
================================================
#!/usr/bin/env python
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import unittest

import tritonclient.http.aio as httpclient
from tritonclient.utils import *


class TestHttpAioClient(unittest.IsolatedAsyncioTestCase):
    """Test if aio rpc can reach the server"""

    async def asyncSetUp(self):
        self._triton_client = httpclient.InferenceServerClient(url="localhost:8000")

    async def asyncTearDown(self):
        await self._triton_client.close()

    async def test_is_server_live(self):
        ret = await self._triton_client.is_server_live()
        self.assertEqual(ret, True)

    async def test_is_server_ready(self):
        ret = await self._triton_client.is_server_ready()
        self.assertEqual(ret, True)

    async def test_is_model_ready(self):
        ret = await self._triton_client.is_model_ready("simple")
        self.assertEqual(ret, True)

    async def test_get_server_metadata(self):
        ret = await self._triton_client.get_server_metadata()
        self.assertEqual(ret["name"], "triton")

    async def test_get_model_metadata(self):
        ret = await self._triton_client.get_model_metadata("simple")
        self.assertEqual(ret["name"], "simple")

    async def test_get_model_config(self):
        ret = await self._triton_client.get_model_config("simple")
        self.assertEqual(ret["name"], "simple")

    async def test_get_model_repository_index(self):
        ret = await self._triton_client.get_model_repository_index()
        self.assertEqual(len(ret), 7)

    async def test_load_model(self):
        with self.assertRaisesRegex(
            InferenceServerException,
            "explicit model load / unload is not allowed if polling is enabled",
        ):
            await self._triton_client.load_model("simple")

    async def test_unload_model(self):
        with self.assertRaisesRegex(
            InferenceServerException,
            "explicit model load / unload is not allowed if polling is enabled",
        ):
            await self._triton_client.load_model("simple")

    async def test_get_inference_statistics(self):
        await self._triton_client.get_inference_statistics()

    async def test_update_trace_settings(self):
        await self._triton_client.update_trace_settings()

    async def test_get_trace_settings(self):
        await self._triton_client.get_trace_settings()

    async def test_get_system_shared_memory_status(self):
        await self._triton_client.get_system_shared_memory_status()

    async def test_register_system_shared_memory(self):
        with self.assertRaisesRegex(InferenceServerException, ""):
            await self._triton_client.register_system_shared_memory("", "", 0)

    async def test_unregister_system_shared_memory(self):
        await self._triton_client.unregister_system_shared_memory()

    async def test_get_cuda_shared_memory_status(self):
        await self._triton_client.get_cuda_shared_memory_status()

    async def test_register_cuda_shared_memory(self):
        with self.assertRaisesRegex(InferenceServerException, ""):
            await self._triton_client.register_cuda_shared_memory("", b"", 0, 0)

    async def test_unregister_cuda_shared_memory(self):
        await self._triton_client.unregister_cuda_shared_memory()


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_http/test.sh
================================================
#!/bin/bash
# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

source ../common/util.sh
RET=0

CLIENT_PLUGIN_TEST="./http_client_plugin_test.py"
BASIC_AUTH_TEST="./http_basic_auth_test.py"
RESTRICTED_API_TEST="./http_restricted_api_test.py"
NGINX_CONF="./nginx.conf"
# On windows the paths invoked by the script (running in WSL) must use
# /mnt/c when needed but the paths on the tritonserver command-line
# must be C:/ style.
if [[ -v WSL_DISTRO_NAME ]] || [[ -v MSYSTEM ]]; then
    SDKDIR=${SDKDIR:=C:/sdk}
    MODELDIR=${MODELDIR:=C:/models}
    DATADIR=${DATADIR:="/mnt/c/data/inferenceserver/${REPO_VERSION}"}
    BACKEND_DIR=${BACKEND_DIR:=C:/tritonserver/backends}
    SERVER=${SERVER:=/mnt/c/tritonserver/bin/tritonserver.exe}

    SIMPLE_AIO_INFER_CLIENT_PY=${SDKDIR}/python/simple_http_aio_infer_client.py
    SIMPLE_HEALTH_CLIENT_PY=${SDKDIR}/python/simple_http_health_metadata.py
    SIMPLE_INFER_CLIENT_PY=${SDKDIR}/python/simple_http_infer_client.py
    SIMPLE_ASYNC_INFER_CLIENT_PY=${SDKDIR}/python/simple_http_async_infer_client.py
    SIMPLE_STRING_INFER_CLIENT_PY=${SDKDIR}/python/simple_http_string_infer_client.py
    SIMPLE_IMAGE_CLIENT_PY=${SDKDIR}/python/image_client.py
    # SIMPLE_ENSEMBLE_IMAGE_CLIENT_PY=${SDKDIR}/python/ensemble_image_client.py
    SIMPLE_SHM_STRING_CLIENT_PY=${SDKDIR}/python/simple_http_shm_string_client.py
    SIMPLE_SHM_CLIENT_PY=${SDKDIR}/python/simple_http_shm_client.py
    SIMPLE_CUDASHM_CLIENT_PY=${SDKDIR}/python/simple_http_cudashm_client.py
    SIMPLE_MODEL_CONTROL_PY=${SDKDIR}/python/simple_http_model_control.py
    SIMPLE_SEQUENCE_INFER_CLIENT_PY=${SDKDIR}/python/simple_http_sequence_sync_infer_client.py
    SIMPLE_REUSE_INFER_OBJECTS_CLIENT_PY=${SDKDIR}/python/reuse_infer_objects_client.py

    SIMPLE_HEALTH_CLIENT=${SDKDIR}/python/simple_http_health_metadata
    SIMPLE_INFER_CLIENT=${SDKDIR}/python/simple_http_infer_client
    SIMPLE_STRING_INFER_CLIENT=${SDKDIR}/python/simple_http_string_infer_client
    SIMPLE_ASYNC_INFER_CLIENT=${SDKDIR}/python/simple_http_async_infer_client
    SIMPLE_MODEL_CONTROL=${SDKDIR}/python/simple_http_model_control
    SIMPLE_SEQUENCE_INFER_CLIENT=${SDKDIR}/python/simple_http_sequence_sync_infer_client
    SIMPLE_SHM_CLIENT=${SDKDIR}/python/simple_http_shm_client
    SIMPLE_CUDASHM_CLIENT=${SDKDIR}/python/simple_http_cudashm_client
    SIMPLE_REUSE_INFER_OBJECTS_CLIENT=${SDKDIR}/python/reuse_infer_objects_client
    # [FIXME] point to proper client
    CC_UNIT_TEST=${SDKDIR}/python/cc_client_test
else
    MODELDIR=${MODELDIR:=`pwd`/models}
    DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
    TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
    SERVER=${TRITON_DIR}/bin/tritonserver
    BACKEND_DIR=${TRITON_DIR}/backends

    SIMPLE_AIO_INFER_CLIENT_PY=../clients/simple_http_aio_infer_client.py
    SIMPLE_HEALTH_CLIENT_PY=../clients/simple_http_health_metadata.py
    SIMPLE_INFER_CLIENT_PY=../clients/simple_http_infer_client.py
    SIMPLE_ASYNC_INFER_CLIENT_PY=../clients/simple_http_async_infer_client.py
    SIMPLE_STRING_INFER_CLIENT_PY=../clients/simple_http_string_infer_client.py
    SIMPLE_IMAGE_CLIENT_PY=../clients/image_client.py
    # SIMPLE_ENSEMBLE_IMAGE_CLIENT_PY=../clients/ensemble_image_client.py
    SIMPLE_SHM_STRING_CLIENT_PY=../clients/simple_http_shm_string_client.py
    SIMPLE_SHM_CLIENT_PY=../clients/simple_http_shm_client.py
    SIMPLE_CUDASHM_CLIENT_PY=../clients/simple_http_cudashm_client.py
    SIMPLE_MODEL_CONTROL_PY=../clients/simple_http_model_control.py
    SIMPLE_SEQUENCE_INFER_CLIENT_PY=../clients/simple_http_sequence_sync_infer_client.py
    SIMPLE_REUSE_INFER_OBJECTS_CLIENT_PY=../clients/reuse_infer_objects_client.py

    SIMPLE_HEALTH_CLIENT=../clients/simple_http_health_metadata
    SIMPLE_INFER_CLIENT=../clients/simple_http_infer_client
    SIMPLE_STRING_INFER_CLIENT=../clients/simple_http_string_infer_client
    SIMPLE_ASYNC_INFER_CLIENT=../clients/simple_http_async_infer_client
    SIMPLE_MODEL_CONTROL=../clients/simple_http_model_control
    SIMPLE_SEQUENCE_INFER_CLIENT=../clients/simple_http_sequence_sync_infer_client
    SIMPLE_SHM_CLIENT=../clients/simple_http_shm_client
    SIMPLE_CUDASHM_CLIENT=../clients/simple_http_cudashm_client
    SIMPLE_REUSE_INFER_OBJECTS_CLIENT=../clients/reuse_infer_objects_client
    CC_UNIT_TEST=../clients/cc_client_test
fi

# Add string_dyna_sequence model to repo
cp -r ${MODELDIR}/simple_dyna_sequence ${MODELDIR}/simple_string_dyna_sequence
sed -i "s/simple_dyna_sequence/simple_string_dyna_sequence/g" ${MODELDIR}/simple_string_dyna_sequence/config.pbtxt
sed -i "s/^platform: .*/backend: \"dyna_sequence\"/g" ${MODELDIR}/simple_string_dyna_sequence/config.pbtxt
sed -i "/CONTROL_SEQUENCE_CORRID/{n;s/data_type:.*/data_type: TYPE_STRING/}" ${MODELDIR}/simple_string_dyna_sequence/config.pbtxt
rm -f ${MODELDIR}/simple_string_dyna_sequence/1/model.onnx
cp ../custom_models/custom_dyna_sequence_int32/1/libtriton_dyna_sequence.so ${MODELDIR}/simple_string_dyna_sequence/1/

rm -f *.log
rm -f *.log.*

set -e

CLIENT_LOG=`pwd`/client.log
SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR}"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

# Test health
python $SIMPLE_HEALTH_CLIENT_PY -v >> ${CLIENT_LOG}.health 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}.health
    RET=1
fi

IMAGE=../images/vulture.jpeg
for i in \
        $SIMPLE_AIO_INFER_CLIENT_PY \
        $SIMPLE_INFER_CLIENT_PY \
        $SIMPLE_ASYNC_INFER_CLIENT_PY \
        $SIMPLE_IMAGE_CLIENT_PY \
        $SIMPLE_ENSEMBLE_IMAGE_CLIENT_PY \
        $SIMPLE_SHM_STRING_CLIENT_PY \
        $SIMPLE_SHM_CLIENT_PY \
        $SIMPLE_CUDASHM_CLIENT_PY \
        $SIMPLE_STRING_INFER_CLIENT_PY \
        $SIMPLE_SEQUENCE_INFER_CLIENT_PY \
        ; do
    BASE=$(basename -- $i)
    SUFFIX="${BASE%.*}"
    if [ $SUFFIX == "image_client" ]; then
        python $i -m densenet_onnx -s INCEPTION -a -c 1 -b 1 $IMAGE >> "${CLIENT_LOG}.async.${SUFFIX}" 2>&1
        if [ `grep -c VULTURE ${CLIENT_LOG}.async.${SUFFIX}` != "1" ]; then
            echo -e "\n***\n*** Failed. Expected 1 VULTURE results\n***"
            cat $CLIENT_LOG.async.${SUFFIX}
            RET=1
        fi
        python $i -m densenet_onnx -s INCEPTION -c 1 -b 1 $IMAGE >> "${CLIENT_LOG}.${SUFFIX}" 2>&1
        if [ `grep -c VULTURE ${CLIENT_LOG}.${SUFFIX}` != "1" ]; then
            echo -e "\n***\n*** Failed. Expected 1 VULTURE results\n***"
            cat $CLIENT_LOG.${SUFFIX}
            RET=1
        fi
    # elif [ $SUFFIX == "ensemble_image_client" ]; then
    #     python $i -c 1 ../images >> "${CLIENT_LOG}.${SUFFIX}" 2>&1
    #     for result in "SPORTS CAR" "COFFEE MUG" "VULTURE"; do
    #         if [ `grep -c "$result" ${CLIENT_LOG}.${SUFFIX}` != "1" ]; then
    #             echo -e "\n***\n*** Failed. Expected 1 $result result\n***"
    #             RET=1
    #         fi
    #     done
    else
        python $i -v >> "${CLIENT_LOG}.${SUFFIX}" 2>&1
    fi

    if [ $? -ne 0 ]; then
        cat "${CLIENT_LOG}.${SUFFIX}"
        RET=1
    fi
done

# Test while reusing the InferInput and InferRequestedOutput objects
$SIMPLE_REUSE_INFER_OBJECTS_CLIENT_PY -v >> ${CLIENT_LOG}.reuse 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}.reuse
    RET=1
fi

# Test with the base path in url.
$SIMPLE_INFER_CLIENT_PY -u localhost:8000/base_path -v >> ${CLIENT_LOG}.base_path_url 2>&1
if [ $? -eq 0 ]; then
    cat ${CLIENT_LOG}.base_path_url
    RET=1
fi
if [ $(cat ${CLIENT_LOG}.base_path_url | grep "POST /base_path/v2/models/simple/infer" | wc -l) -eq 0 ]; then
    cat ${CLIENT_LOG}.base_path_url
    RET=1
fi

for i in \
   $SIMPLE_INFER_CLIENT \
   $SIMPLE_STRING_INFER_CLIENT \
   $SIMPLE_ASYNC_INFER_CLIENT \
   $SIMPLE_HEALTH_CLIENT \
   $SIMPLE_SHM_CLIENT \
   $SIMPLE_CUDASHM_CLIENT \
   $SIMPLE_SEQUENCE_INFER_CLIENT \
   ; do
   BASE=$(basename -- $i)
   SUFFIX="${BASE%.*}"

    $i -v -H test:1 >> ${CLIENT_LOG}.c++.${SUFFIX} 2>&1
    if [ $? -ne 0 ]; then
        cat ${CLIENT_LOG}.c++.${SUFFIX}
        RET=1
    fi
done

# Test with json input and output data
$SIMPLE_STRING_INFER_CLIENT --json-input-data --json-output-data >> ${CLIENT_LOG}.c++.json 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}.c++.json
    RET=1
fi

# Test while reusing the InferInput and InferRequestedOutput objects
$SIMPLE_REUSE_INFER_OBJECTS_CLIENT -v >> ${CLIENT_LOG}.c++.reuse 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}.c++.reuse
    RET=1
fi

python $CLIENT_PLUGIN_TEST >> ${CLIENT_LOG}.python.plugin 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}.python.plugin
    RET=1
fi

# Create a password file with username:password
echo -n 'username:' > pswd
echo "password" | openssl passwd -stdin -apr1 >> pswd
nginx -c `pwd`/$NGINX_CONF

python $BASIC_AUTH_TEST
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}.python.plugin.auth
    RET=1
fi
service nginx stop

# Test with the base path in url.
$SIMPLE_INFER_CLIENT -u localhost:8000/base_path -v >> ${CLIENT_LOG}.c++.base_path_url 2>&1
if [ $? -eq 0 ]; then
    cat ${CLIENT_LOG}.c++.base_path_url
    RET=1
fi
if [ $(cat ${CLIENT_LOG}.c++.base_path_url | grep "POST /base_path/v2/models/simple/infer" | wc -l) -eq 0 ]; then
    cat ${CLIENT_LOG}.c++.base_path_url
    RET=1
fi


set -e

kill $SERVER_PID
wait $SERVER_PID

SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR} --model-control-mode=explicit"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

# Test Model Control API
python $SIMPLE_MODEL_CONTROL_PY -v >> ${CLIENT_LOG}.model_control 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}.model_control
    RET=1
fi

if [ $(cat ${CLIENT_LOG}.model_control | grep "PASS" | wc -l) -ne 1 ]; then
    cat ${CLIENT_LOG}.model_control
    RET=1
fi
if [ $(cat ${SERVER_LOG} | grep "Invalid config override" | wc -l) -eq 0 ]; then
    cat ${SERVER_LOG}
    RET=1
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR} --model-control-mode=explicit"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

# Test Model Control API
$SIMPLE_MODEL_CONTROL -v >> ${CLIENT_LOG}.c++.model_control 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}.c++.model_control
    RET=1
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

# Test with dynamic sequence models
SERVER_ARGS="--model-repository=`pwd`/models"
SERVER_LOG="./inference_server_dyna.log"
CLIENT_LOG="./client_dyna.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi
set +e

for i in \
    $SIMPLE_SEQUENCE_INFER_CLIENT \
    $SIMPLE_SEQUENCE_INFER_CLIENT_PY; do

    $i -v -d >>$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        RET=1
    fi
done

set -e

kill $SERVER_PID
wait $SERVER_PID

# Test combinations of binary and JSON data
SERVER_ARGS="--model-repository=`pwd`/models"
SERVER_LOG="./inference_server_binaryjson.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

# no parameters, no outputs == json output
rm -f ./curl.out
set +e
code=`curl -s -w %{http_code} -o ./curl.out -d'{"inputs":[{"name":"INPUT0","datatype":"INT32","shape":[1,16],"data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},{"name":"INPUT1","datatype":"INT32","shape":[1,16],"data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]}]}' localhost:8000/v2/models/simple/infer`
set -e
if [ "$code" != "200" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi
if [ `grep -c "\[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32\]" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0\]" ./curl.out` != "1" ]; then
    RET=1
fi

# binary_data=true on INPUT0, binary_data=false on INPUT1
rm -f ./curl.out
set +e
code=`curl -s -w %{http_code} -o ./curl.out -d'{"inputs":[{"name":"INPUT0","datatype":"INT32","shape":[1,16],"data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},{"name":"INPUT1","datatype":"INT32","shape":[1,16],"data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]}],"outputs":[{"name":"OUTPUT0","parameters":{"binary_data":true}},{"name":"OUTPUT1","parameters":{"binary_data":false}}]}' localhost:8000/v2/models/simple/infer`
set -e
if [ "$code" != "200" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi
if [ `grep -c "\[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32\]" ./curl.out` != "0" ]; then
    RET=1
fi
if [ `grep -c "\[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0\]" ./curl.out` != "1" ]; then
    RET=1
fi

# binary_data=true on INPUT0, binary_data not given on INPUT1
rm -f ./curl.out
set +e
code=`curl -s -w %{http_code} -o ./curl.out -d'{"inputs":[{"name":"INPUT0","datatype":"INT32","shape":[1,16],"data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},{"name":"INPUT1","datatype":"INT32","shape":[1,16],"data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]}],"outputs":[{"name":"OUTPUT0","parameters":{"binary_data":true}},{"name":"OUTPUT1"}]}' localhost:8000/v2/models/simple/infer`
set -e
if [ "$code" != "200" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi
if [ `grep -c "\[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32\]" ./curl.out` != "0" ]; then
    RET=1
fi
if [ `grep -c "\[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0\]" ./curl.out` != "1" ]; then
    RET=1
fi

# binary_data_output=true, no outputs requested
rm -f ./curl.out
set +e
code=`curl -s -w %{http_code} -o ./curl.out -d'{"parameters":{"binary_data_output":true},"inputs":[{"name":"INPUT0","datatype":"INT32","shape":[1,16],"data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},{"name":"INPUT1","datatype":"INT32","shape":[1,16],"data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]}]}' localhost:8000/v2/models/simple/infer`
set -e
if [ "$code" != "200" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi
if [ `grep -c "\[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32\]" ./curl.out` != "0" ]; then
    RET=1
fi
if [ `grep -c "\[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0\]" ./curl.out` != "0" ]; then
    RET=1
fi

# binary_data_output=true
# binary_data=false on INPUT0, binary_data not given on INPUT1
rm -f ./curl.out
set +e
code=`curl -s -w %{http_code} -o ./curl.out -d'{"parameters":{"binary_data_output":true},"inputs":[{"name":"INPUT0","datatype":"INT32","shape":[1,16],"data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},{"name":"INPUT1","datatype":"INT32","shape":[1,16],"data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]}],"outputs":[{"name":"OUTPUT0","parameters":{"binary_data":false}},{"name":"OUTPUT1"}]}' localhost:8000/v2/models/simple/infer`
set -e
if [ "$code" != "200" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi
if [ `grep -c "\[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32\]" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0\]" ./curl.out` != "1" ]; then
    RET=1
fi

# Send bad request where the 'data' field misaligns with the 'shape' field of the input
rm -f ./curl.out
set +e
code=`curl -s -w %{http_code} -o ./curl.out -d'{"inputs":[{"name":"INPUT0","datatype":"INT32","shape":[1,16],"data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]}]}' localhost:8000/v2/models/simple/infer`
set -e
if [ "$code" == "200" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi
if [ `grep -c "\{\"error\":\"Unable to parse 'data': Shape does not match true shape of 'data' field\"\}" ./curl.out` != "1" ]; then
    RET=1
fi

rm -f ./curl.out
set +e
code=`curl -s -w %{http_code} -o ./curl.out -d'{"inputs":[{"name":"INPUT0","datatype":"INT32","shape":[1,16],"data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18]}]}' localhost:8000/v2/models/simple/infer`
set -e
if [ "$code" == "200" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi
if [ `grep -c "\{\"error\":\"Unable to parse 'data': Shape does not match true shape of 'data' field\"\}" ./curl.out` != "1" ]; then
    RET=1
fi

# Check if the server is still working after the above bad requests
rm -f ./curl.out
set +e
code=`curl -s -w %{http_code} -o ./curl.out -d'{"inputs":[{"name":"INPUT0","datatype":"INT32","shape":[1,16],"data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},{"name":"INPUT1","datatype":"INT32","shape":[1,16],"data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]}]}' localhost:8000/v2/models/simple/infer`
set -e
if [ "$code" != "200" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi
if [ `grep -c "\[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32\]" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0\]" ./curl.out` != "1" ]; then
    RET=1
fi

kill $SERVER_PID
wait $SERVER_PID

# Run cpp client unit test
rm -rf unit_test_models && mkdir unit_test_models
cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 unit_test_models/.
cp -r ${MODELDIR}/simple unit_test_models/.

SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=unit_test_models
            --trace-file=global_unittest.log --trace-level=TIMESTAMPS --trace-rate=1"
SERVER_LOG="./inference_server_cc_unit_test.log"
CLIENT_LOG="./cc_unit_test.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
# Run all unit tests except load
$CC_UNIT_TEST --gtest_filter=HTTP*:-*Load* >> ${CLIENT_LOG} 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

# Run cpp client load API unit test
rm -rf unit_test_models && mkdir unit_test_models
cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 unit_test_models/.
# Make only version 2, 3 is valid version directory while config requests 1, 3
rm -rf unit_test_models/onnx_int32_int32_int32/1

# Start with EXPLICIT mode and load onnx_float32_float32_float32
SERVER_ARGS="--model-repository=`pwd`/unit_test_models \
             --model-control-mode=explicit \
             --load-model=onnx_int32_int32_int32 \
             --strict-model-config=false"
SERVER_LOG="./inference_server_cc_unit_test.load.log"
CLIENT_LOG="./cc_unit_test.load.log"

for i in \
   "LoadWithFileOverride" \
   "LoadWithConfigOverride" \
   ; do
    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    set +e
    $CC_UNIT_TEST --gtest_filter=HTTP*$i >> ${CLIENT_LOG}.$i 2>&1
    if [ $? -ne 0 ]; then
        cat ${CLIENT_LOG}.$i
        RET=1
    fi
    set -e

    kill $SERVER_PID
    wait $SERVER_PID
done

# Run python http aio unit test
PYTHON_HTTP_AIO_TEST=python_http_aio_test.py
CLIENT_LOG=`pwd`/python_http_aio_test.log
SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR}"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi
set +e
python $PYTHON_HTTP_AIO_TEST > $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Python HTTP AsyncIO Test Failed\n***"
    RET=1
fi
set -e
kill $SERVER_PID
wait $SERVER_PID

# Run python unit test
MODELDIR=python_unit_test_models
mkdir -p $MODELDIR
rm -rf ${MODELDIR}/*
cp -r $DATADIR/qa_identity_model_repository/onnx_zero_1_float32 ${MODELDIR}/.
cp -r $DATADIR/qa_identity_model_repository/onnx_zero_1_object ${MODELDIR}/.
cp -r $DATADIR/qa_identity_model_repository/onnx_zero_1_float16 ${MODELDIR}/.
cp -r $DATADIR/qa_identity_model_repository/onnx_zero_3_float32 ${MODELDIR}/.
cp -r ${MODELDIR}/onnx_zero_1_object ${MODELDIR}/onnx_zero_1_object_1_element && \
    (cd $MODELDIR/onnx_zero_1_object_1_element && \
        sed -i "s/onnx_zero_1_object/onnx_zero_1_object_1_element/" config.pbtxt && \
        sed -i "0,/-1/{s/-1/1/}" config.pbtxt)
# Model for error code test
cp -r ${MODELDIR}/onnx_zero_1_float32 ${MODELDIR}/onnx_zero_1_float32_queue && \
    (cd $MODELDIR/onnx_zero_1_float32_queue && \
        sed -i "s/onnx_zero_1_float32/onnx_zero_1_float32_queue/" config.pbtxt && \
        echo "dynamic_batching { " >> config.pbtxt && \
        echo "    max_queue_delay_microseconds: 1000000" >> config.pbtxt && \
        echo "    preferred_batch_size: [ 8 ]" >> config.pbtxt && \
        echo "    default_queue_policy {" >> config.pbtxt && \
        echo "        max_queue_size: 1" >> config.pbtxt && \
        echo "    }" >> config.pbtxt && \
        echo "}" >> config.pbtxt)

cp -r ./models/simple_identity ${MODELDIR}
cp -r ./models/simple_identity ${MODELDIR}/simple_identity_int64 && \
    (cd $MODELDIR/simple_identity_int64 && \
        sed -i "s/TYPE_STRING/TYPE_INT64/" config.pbtxt && \
        sed -i "s/simple_identity/simple_identity_int64/" config.pbtxt)

SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR}"
SERVER_LOG="./inference_server_http_test.log"
CLIENT_LOG="./http_test.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

TEST_RESULT_FILE='test_results.txt'
PYTHON_TEST=http_test.py
EXPECTED_NUM_TESTS=13
set +e
python $PYTHON_TEST >$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    RET=1
else
    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

### LLM / Generate REST API Endpoint Tests ###

# Helper library to parse SSE events
# https://github.com/mpetazzoni/sseclient
pip install sseclient-py

SERVER_ARGS="--model-repository=`pwd`/../python_models/generate_models"
SERVER_LOG="./inference_server_generate_endpoint_test.log"
CLIENT_LOG="./generate_endpoint_test.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

## Python Unit Tests
TEST_RESULT_FILE='test_results.txt'
PYTHON_TEST=generate_endpoint_test.py
EXPECTED_NUM_TESTS=17
set +e
python $PYTHON_TEST >$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    RET=1
else
    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

### Test Restricted APIs ###
### Repeated API not allowed

MODELDIR="`pwd`/models"
SERVER_ARGS="--model-repository=${MODELDIR}
             --http-restricted-api=model-repository,health:k1=v1 \
             --http-restricted-api=metadata,health:k2=v2"
SERVER_LOG="./http_restricted_endpoint_test.log"
CLIENT_LOG="./http_restricted_endpoint_test.log"
run_server
EXPECTED_MSG="api 'health' can not be specified in multiple config groups"
if [ "$SERVER_PID" != "0" ]; then
    echo -e "\n***\n*** Expect fail to start $SERVER\n***"
    kill $SERVER_PID
    wait $SERVER_PID
    RET=1
elif [ `grep -c "${EXPECTED_MSG}" ${SERVER_LOG}` != "1" ]; then
    echo -e "\n***\n*** Failed. Expected ${EXPECTED_MSG} to be found in log\n***"
    cat $SERVER_LOG
    RET=1
fi

### Test Unknown Restricted API###
### Unknown API not allowed

MODELDIR="`pwd`/models"
SERVER_ARGS="--model-repository=${MODELDIR}
             --http-restricted-api=model-reposit,health:k1=v1 \
             --http-restricted-api=metadata,health:k2=v2"
run_server
EXPECTED_MSG="unknown restricted api 'model-reposit'"
if [ "$SERVER_PID" != "0" ]; then
    echo -e "\n***\n*** Expect fail to start $SERVER\n***"
    kill $SERVER_PID
    wait $SERVER_PID
    RET=1
elif [ `grep -c "${EXPECTED_MSG}" ${SERVER_LOG}` != "1" ]; then
    echo -e "\n***\n*** Failed. Expected ${EXPECTED_MSG} to be found in log\n***"
    cat $SERVER_LOG
    RET=1
fi

### Test Restricted APIs ###
### Restricted model-repository, metadata, and inference

SERVER_ARGS="--model-repository=${MODELDIR} \
             --http-restricted-api=model-repository:admin-key=admin-value \
             --http-restricted-api=inference,metadata:infer-key=infer-value"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi
set +e

python $RESTRICTED_API_TEST RestrictedAPITest > $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Python HTTP Restricted Protocol Test Failed\n***"
    RET=1
fi
set -e
kill $SERVER_PID
wait $SERVER_PID

### Test HTTP input size limits ###

# Setup models needed for the test
MODELDIR=http_input_size_limit_test_models
mkdir -p $MODELDIR
rm -rf ${MODELDIR}/*
cp -r $DATADIR/qa_identity_model_repository/onnx_zero_1_float32 ${MODELDIR}/.
cp -r ./models/simple_identity ${MODELDIR}/.

# First run with default size limit - large inputs should fail
SERVER_ARGS="--model-repository=${MODELDIR}"
SERVER_LOG="./inference_server_default_limit.log"
CLIENT_LOG="./http_input_size_limit_default.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
# Run test to verify that large inputs fail with default limit
python http_input_size_limit_test.py InferSizeLimitTest.test_default_limit_raw_binary >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Default Input Size Limit Test Failed for raw binary input\n***"
    RET=1
fi

python http_input_size_limit_test.py InferSizeLimitTest.test_default_limit_json >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Default Input Size Limit Test Failed for JSON input\n***"
    RET=1
fi

python http_input_size_limit_test.py InferSizeLimitTest.test_large_string_in_json >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Default Input Size Limit Test Failed for large string in JSON\n***"
    RET=1
fi

python http_input_size_limit_test.py InferSizeLimitTest.test_default_limit_compressed >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Default Input Size Limit Test Failed for compressed input\n***"
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

# Now run with increased size limit (128MB) - large inputs should succeed
SERVER_ARGS="--model-repository=${MODELDIR} --http-max-input-size=$((2**27))"
SERVER_LOG="./inference_server_increased_limit.log"
CLIENT_LOG="./http_input_size_limit_increased.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER with increased HTTP input size limit\n***"
    cat $SERVER_LOG
    exit 1
fi

rm -f $CLIENT_LOG
set +e
python http_input_size_limit_test.py InferSizeLimitTest.test_large_input_raw_binary >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Input Size Limit Test Failed for raw binary input with increased limits\n***"
    RET=1
fi

python http_input_size_limit_test.py InferSizeLimitTest.test_large_input_json >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Input Size Limit Test Failed for JSON input with increased limits\n***"
    RET=1
fi

python http_input_size_limit_test.py InferSizeLimitTest.test_large_input_compressed >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Input Size Limit Test Failed for compressed input with increased limits\n***"
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

# Test with zero max input size - should fail to start
SERVER_ARGS="--model-repository=${MODELDIR} --http-max-input-size=0"
SERVER_LOG="./inference_server_zero_limit.log"
CLIENT_LOG="./http_input_size_limit_zero.log"
run_server
if [ "$SERVER_PID" != "0" ]; then
    echo -e "\n***\n*** Server should not start with zero max input size\n***"
    kill $SERVER_PID
    wait $SERVER_PID
    RET=1
elif [ `grep -c "Error: --http-max-input-size must be greater than 0." ${SERVER_LOG}` != "1" ]; then
    echo -e "\n***\n*** Failed. Expected '--http-max-input-size must be greater than 0' to be found in log\n***"
    cat $SERVER_LOG
    RET=1
fi

# Test with negative max input size - should fail to start
SERVER_ARGS="--model-repository=${MODELDIR} --http-max-input-size=-1024"
SERVER_LOG="./inference_server_negative_limit.log"
CLIENT_LOG="./http_input_size_limit_negative.log"
run_server
if [ "$SERVER_PID" != "0" ]; then
    echo -e "\n***\n*** Server should not start with negative max input size\n***"
    kill $SERVER_PID
    wait $SERVER_PID
    RET=1
elif [ `grep -c "Error: --http-max-input-size must be greater than 0." ${SERVER_LOG}` != "1" ]; then
    echo -e "\n***\n*** Failed. Expected '--http-max-input-size must be greater than 0' to be found in log\n***"
    cat $SERVER_LOG
    RET=1
fi

### Test HTTP Requests Containing Many Chunks ###
MODELDIR="`pwd`/models"
REQUEST_MANY_CHUNKS_PY="http_request_many_chunks.py"
CLIENT_LOG="./client.http_request_many_chunks.log"
SERVER_ARGS="--model-repository=${MODELDIR} --log-verbose=1 --model-control-mode=explicit --load-model=simple"
SERVER_LOG="./inference_server_request_many_chunks.log"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python $REQUEST_MANY_CHUNKS_PY -v >> ${CLIENT_LOG} 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** HTTP Request Many Chunks Test Failed\n***"
    cat $SERVER_LOG
    cat $CLIENT_LOG
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_http_fuzz/fuzztest.py
================================================
#!/usr/bin/env python3

# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import glob
import os
import sqlite3
import unittest

import test_util as tu
from boofuzz import *


class FuzzTest(tu.TestResultCollector):
    def _run_fuzz(self, url, logger):
        session = Session(
            target=Target(connection=TCPSocketConnection("127.0.0.1", 8000)),
            fuzz_loggers=logger,
            keep_web_open=False,
        )

        s_initialize(name="Request" + url)
        with s_block("Request-Line"):
            s_group(
                "Method",
                ["GET", "HEAD", "POST", "PUT", "DELETE", "CONNECT", "OPTIONS", "TRACE"],
            )
            s_delim(" ", name="space-1")
            s_string(url, name="Request-URI")
            s_delim(" ", name="space-2")
            s_string("HTTP/1.1", name="HTTP-Version")
            s_static("\r\n", name="Request-Line-CRLF")
        s_static("\r\n", "Request-CRLF")

        session.connect(s_get("Request" + url))
        session.fuzz()

    def test_failures_from_db(self):
        url_list = [
            "/v2",
            "/v2/models/simple",
            "/v2/models/simple/infer",
            "/v2/models/simple/versions/v1",
            "/v2/models/simple/config",
            "/v2/models/simple/stats",
            "/v2/models/simple/ready",
            "/v2/health/ready",
            "/v2/health/live",
            "/v2/repository/index",
            "/v2/repository/models/simple/unload",
            "/v2/repository/models/simple/load",
            "/v2/systemsharedmemory/status",
            "/v2/systemsharedmemory/register",
            "/v2/systemsharedmemory/unregister",
            "/v2/systemsharedmemory/region/xx/status",
            "/v2/cudasharedmemory/status",
            "/v2/cudasharedmemory/register",
            "/v2/cudasharedmemory/unregister",
            "/v2/cudasharedmemory/region/xx/status",
        ]

        csv_log = open("fuzz_results.csv", "w")
        logger = [FuzzLoggerCsv(file_handle=csv_log)]

        for url in url_list:
            self._run_fuzz(url, logger)

            # Get latest db file
            files = glob.glob("boofuzz-results/*")
            dbfile = max(files, key=os.path.getctime)

            conn = sqlite3.connect(dbfile)
            c = conn.cursor()

            # Get number of failures, should be 0
            self.assertEqual(
                len([x for x in c.execute('SELECT * FROM steps WHERE type="fail"')]), 0
            )


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_http_fuzz/test.sh
================================================
#!/bin/bash
# Copyright (c) 2020-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

TEST_RESULT_FILE='test_results.txt'
RET=0
rm -f *.log *.db
EXPECTED_NUM_TESTS="1"

mkdir -p models
cp -r /data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository/onnx_zero_1_object models/

FUZZTEST=fuzztest.py
FUZZ_LOG=`pwd`/fuzz.log
DATADIR=`pwd`/models
SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=$DATADIR"
source ../common/util.sh

# Remove this once foobuzz and tornado packages upgrade to work with python 3.10
# This test tests the server's ability to handle poor input and not the compatibility
# with python 3.10. Python 3.8 is ok to use here.
function_install_python38() {
    source ../L0_backend_python/common.sh
    install_conda
    create_conda_env "3.8" "python-3-8"

    # Install test script dependencies
    pip3 install --upgrade wheel setuptools boofuzz==0.3.0 "numpy<2" pillow attrdict future grpcio requests gsutil \
                            awscli six grpcio-channelz prettytable virtualenv
}
function_install_python38

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

# Test health
python3 $FUZZTEST -v >> ${FUZZ_LOG} 2>&1
if [ $? -ne 0 ]; then
    cat ${FUZZ_LOG}
    RET=1
else
    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
    if [ $? -ne 0 ]; then
        cat $TEST_RESULT_FILE
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi

set -e

kill $SERVER_PID
wait $SERVER_PID


if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_https/nginx.conf
================================================
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

server {
   listen 443 ssl;
   server_name localhost;

   ssl_certificate /etc/nginx/cert.crt;
   ssl_certificate_key /etc/nginx/cert.key;

    location / {
              proxy_pass http://localhost:8000;
              proxy_http_version 1.1;
              }
}


================================================
FILE: qa/L0_https/test.sh
================================================
#!/bin/bash
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

RET=0

SIMPLE_AIO_INFER_CLIENT_PY=../clients/simple_http_aio_infer_client.py
SIMPLE_INFER_CLIENT_PY=../clients/simple_http_infer_client.py
TEST_CLIENT=../clients/simple_http_infer_client

NGINX_CONF=`pwd`/nginx.conf
CLIENT_LOG=`pwd`/client.log
DATADIR=`pwd`/models
SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=$DATADIR"
source ../common/util.sh

rm -f *.key *.crt ${CLIENT_LOG}.* server.log

# Generate valid CA
openssl genrsa -passout pass:1234 -des3 -out ca.key 4096
openssl req -passin pass:1234 -new -x509 -days 365 -key ca.key -out ca.crt -subj  "/C=SP/ST=Spain/L=Valdepenias/O=Test/OU=Test/CN=Root CA"

# Generate valid Server Key/Cert
openssl genrsa -passout pass:1234 -des3 -out server.key 4096
openssl req -passin pass:1234 -new -key server.key -out server.csr -subj  "/C=SP/ST=Spain/L=Valdepenias/O=Test/OU=Server/CN=localhost"
openssl x509 -req -passin pass:1234 -days 365 -in server.csr -CA ca.crt -CAkey ca.key -set_serial 01 -out server.crt

# Remove passphrase from the Server Key
openssl rsa -passin pass:1234 -in server.key -out server.key

# Generate valid Client Key/Cert
openssl genrsa -passout pass:1234 -des3 -out client.key 4096
openssl req -passin pass:1234 -new -key client.key -out client.csr -subj  "/C=SP/ST=Spain/L=Valdepenias/O=Test/OU=Client/CN=localhost"
openssl x509 -passin pass:1234 -req -days 365 -in client.csr -CA ca.crt -CAkey ca.key -set_serial 01 -out client.crt

# Remove passphrase from Client Key
openssl rsa -passin pass:1234 -in client.key -out client.key

# Create mutated client key (Make first char of each like capital)
cp client.key client2.key && sed -i "s/\b\(.\)/\u\1/g" client2.key
cp client.crt client2.crt && sed -i "s/\b\(.\)/\u\1/g" client2.crt

mv server.crt /etc/nginx/cert.crt
mv server.key /etc/nginx/cert.key

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

# Setup the new configuration for the proxy. The HTTPS traffic will be
# redirected to the running instance of server at localhost:8000
cp ${NGINX_CONF} /etc/nginx/sites-available/default

# Start the proxy server
service nginx restart

set +e

# Test basic inference with https
python $SIMPLE_INFER_CLIENT_PY -v -u localhost --ssl --key-file client.key --cert-file client.crt --ca-certs ca.crt >> ${CLIENT_LOG}.ssl_infer 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}.ssl_infer
    RET=1
fi
python $SIMPLE_AIO_INFER_CLIENT_PY -v -u localhost --ssl --key-file client.key --cert-file client.crt --ca-certs ca.crt >> ${CLIENT_LOG}.ssl_infer.aio 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}.ssl_infer.aio
    RET=1
fi

$TEST_CLIENT -v -u https://localhost:443 --key-file client.key --cert-file client.crt --ca-certs ca.crt >> ${CLIENT_LOG}.c++.ssl_infer 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}.c++.ssl_infer
    RET=1
fi

# Test basic inference on https without peer verification
python $SIMPLE_INFER_CLIENT_PY -v -u localhost --ssl --insecure >> ${CLIENT_LOG}.ssl_infer_insecure 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}.ssl_infer_insecure
    RET=1
fi
python $SIMPLE_AIO_INFER_CLIENT_PY -v -u localhost --ssl --insecure >> ${CLIENT_LOG}.ssl_infer_insecure.aio 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}.ssl_infer_insecure.aio
    RET=1
fi

$TEST_CLIENT -v -u https://localhost:443 --verify-host 0 --verify-peer 0 >> ${CLIENT_LOG}.c++.ssl_infer_insecure 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}.c++.ssl_infer_insecure
    RET=1
fi

# Test failure cases for SSL
# Try without SSL
$SIMPLE_INFER_CLIENT_PY -v -u localhost >> ${CLIENT_LOG}.no_ssl_fail_infer 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}.no_ssl_fail_infer
    echo -e "\n***\n*** Expected test failure\n***"
else
    RET=1
fi
$SIMPLE_AIO_INFER_CLIENT_PY -v -u localhost >> ${CLIENT_LOG}.no_ssl_fail_infer.aio 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}.no_ssl_fail_infer.aio
    echo -e "\n***\n*** Expected test failure\n***"
else
    RET=1
fi

$TEST_CLIENT -v -u https://localhost:443 >> ${CLIENT_LOG}.c++.no_ssl_fail_infer 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}.c++.no_ssl_fail_infer
    echo -e "\n***\n*** Expected test failure\n***"
else
    RET=1
fi


# Try with incorrect key
$SIMPLE_INFER_CLIENT_PY -v -u localhost --ssl --key-file client2.key --cert-file client.crt --ca-certs ca.crt >> ${CLIENT_LOG}.ssl_wrong_key 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}.ssl_wrong_key
    echo -e "\n***\n*** Expected test failure\n***"
else
    RET=1
fi
$SIMPLE_AIO_INFER_CLIENT_PY -v -u localhost --ssl --key-file client2.key --cert-file client.crt --ca-certs ca.crt >> ${CLIENT_LOG}.ssl_wrong_key.aio 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}.ssl_wrong_key.aio
    echo -e "\n***\n*** Expected test failure\n***"
else
    RET=1
fi

$TEST_CLIENT -v -u https://localhost:443 --key-file client2.key --cert-file client.crt --ca-certs ca.crt >> ${CLIENT_LOG}.c++.ssl_wrong_key 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}.c++.ssl_wrong_key
    echo -e "\n***\n*** Expected test failure\n***"
else
    RET=1
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

# Stop the proxy server
service nginx stop

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_implicit_state/implicit_state.py
================================================
#!/usr/bin/env python
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import os
import unittest
from builtins import range

import numpy as np
import test_util as tu
import tritonclient.http as tritonhttpclient
from tritonclient.utils import InferenceServerException

BACKENDS = os.environ.get("BACKENDS", "onnx plan libtorch")


class ImplicitStateTest(tu.TestResultCollector):
    def test_no_implicit_state(self):
        triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
        inputs = []
        inputs.append(tritonhttpclient.InferInput("INPUT", [1], "INT32"))
        inputs.append(tritonhttpclient.InferInput("TEST_CASE", [1], "INT32"))
        inputs[0].set_data_from_numpy(np.random.randint(5, size=[1], dtype=np.int32))
        inputs[1].set_data_from_numpy(np.asarray([0], dtype=np.int32))

        with self.assertRaises(InferenceServerException) as e:
            triton_client.infer(
                model_name="no_implicit_state",
                inputs=inputs,
                sequence_id=1,
                sequence_start=True,
            )

        err_str = str(e.exception).lower()
        self.assertIn("unable to add state 'undefined_state'", err_str)
        self.assertIn(
            "state configuration is missing for model 'no_implicit_state'", err_str
        )

    def test_wrong_implicit_state_name(self):
        triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
        inputs = []
        inputs.append(tritonhttpclient.InferInput("INPUT", [1], "INT32"))
        inputs.append(tritonhttpclient.InferInput("TEST_CASE", [1], "INT32"))
        inputs[0].set_data_from_numpy(np.random.randint(5, size=[1], dtype=np.int32))
        inputs[1].set_data_from_numpy(np.asarray([0], dtype=np.int32))

        with self.assertRaises(InferenceServerException) as e:
            triton_client.infer(
                model_name="wrong_internal_state",
                inputs=inputs,
                sequence_id=2,
                sequence_start=True,
            )

        err_str = str(e.exception).lower()
        self.assertIn("state 'undefined_state' is not a valid state name", err_str)

    def test_implicit_state_single_buffer(self):
        triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
        inputs = []
        inputs.append(tritonhttpclient.InferInput("INPUT", [1], "INT32"))
        inputs.append(tritonhttpclient.InferInput("TEST_CASE", [1], "INT32"))
        inputs[0].set_data_from_numpy(np.random.randint(5, size=[1], dtype=np.int32))
        inputs[1].set_data_from_numpy(np.asarray([2], dtype=np.int32))

        triton_client.infer(
            model_name="single_state_buffer",
            inputs=inputs,
            sequence_id=2,
            sequence_start=True,
            sequence_end=False,
        )

        triton_client.infer(
            model_name="single_state_buffer",
            inputs=inputs,
            sequence_id=2,
            sequence_start=False,
            sequence_end=True,
        )

    def test_implicit_state_growable_memory(self):
        triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
        inputs = []
        inputs.append(tritonhttpclient.InferInput("INPUT", [1], "INT32"))
        inputs.append(tritonhttpclient.InferInput("TEST_CASE", [1], "INT32"))
        inputs[0].set_data_from_numpy(np.random.randint(5, size=[1], dtype=np.int32))
        inputs[1].set_data_from_numpy(np.asarray([3], dtype=np.int32))

        output = triton_client.infer(
            model_name="growable_memory",
            inputs=inputs,
            sequence_id=2,
            sequence_start=True,
            sequence_end=False,
        )
        output_state = output.as_numpy("OUTPUT_STATE")
        expected_output_state = np.zeros(output_state.shape, dtype=np.int8)
        np.testing.assert_equal(output_state, expected_output_state)

        output = triton_client.infer(
            model_name="growable_memory",
            inputs=inputs,
            sequence_id=2,
            sequence_start=False,
            sequence_end=False,
        )
        output_state = output.as_numpy("OUTPUT_STATE")
        expected_output_state = np.concatenate(
            [expected_output_state, np.ones(expected_output_state.shape, dtype=np.int8)]
        )
        np.testing.assert_equal(output_state, expected_output_state)

        output = triton_client.infer(
            model_name="growable_memory",
            inputs=inputs,
            sequence_id=2,
            sequence_start=False,
            sequence_end=False,
        )
        output_state = output.as_numpy("OUTPUT_STATE")
        expected_output_state = np.concatenate(
            [
                expected_output_state,
                np.full(
                    (expected_output_state.shape[0] // 2,), dtype=np.int8, fill_value=2
                ),
            ]
        )
        np.testing.assert_equal(output_state, expected_output_state)

    def test_no_update(self):
        # Test implicit state without updating any state
        triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
        inputs = []
        inputs.append(tritonhttpclient.InferInput("INPUT", [1], "INT32"))
        inputs.append(tritonhttpclient.InferInput("TEST_CASE", [1], "INT32"))
        inputs[0].set_data_from_numpy(np.asarray([1], dtype=np.int32))
        inputs[1].set_data_from_numpy(np.asarray([1], dtype=np.int32))
        correlation_id = 3

        # Make sure the state is never updated.
        result_start = triton_client.infer(
            model_name="no_state_update",
            inputs=inputs,
            sequence_id=correlation_id,
            sequence_start=True,
        )
        self.assertEqual(result_start.as_numpy("OUTPUT")[0], 1)
        for _ in range(10):
            result = triton_client.infer(
                model_name="no_state_update", inputs=inputs, sequence_id=correlation_id
            )
            self.assertEqual(result.as_numpy("OUTPUT")[0], 1)

        _ = triton_client.infer(
            model_name="no_state_update",
            inputs=inputs,
            sequence_id=correlation_id,
            sequence_end=True,
        )
        self.assertEqual(result.as_numpy("OUTPUT")[0], 1)

    def test_request_output_not_allowed(self):
        triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")

        for backend in BACKENDS.split(" "):
            inputs = []
            if backend.strip() == "libtorch":
                inputs.append(tritonhttpclient.InferInput("INPUT__0", [1], "INT32"))
            else:
                inputs.append(tritonhttpclient.InferInput("INPUT", [1], "INT32"))
            inputs[0].set_data_from_numpy(np.asarray([1], dtype=np.int32))

            outputs = []
            if backend.strip() == "libtorch":
                outputs.append(tritonhttpclient.InferRequestedOutput("OUTPUT_STATE__1"))
            else:
                outputs.append(tritonhttpclient.InferRequestedOutput("OUTPUT_STATE"))

            with self.assertRaises(InferenceServerException) as e:
                triton_client.infer(
                    model_name=f"{backend}_nobatch_sequence_int32",
                    inputs=inputs,
                    outputs=outputs,
                    sequence_id=1,
                    sequence_start=True,
                    sequence_end=True,
                )
            if backend.strip() == "libtorch":
                self.assertIn(
                    "unexpected inference output 'OUTPUT_STATE__1' for model",
                    str(e.exception),
                )
            else:
                self.assertIn(
                    "unexpected inference output 'OUTPUT_STATE' for model",
                    str(e.exception),
                )

    def test_request_output(self):
        triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
        for backend in BACKENDS.split(" "):
            inputs = []
            if backend.strip() == "libtorch":
                inputs.append(tritonhttpclient.InferInput("INPUT__0", [1], "INT32"))
            else:
                inputs.append(tritonhttpclient.InferInput("INPUT", [1], "INT32"))
            inputs[0].set_data_from_numpy(np.asarray([1], dtype=np.int32))

            outputs = []
            if backend.strip() == "libtorch":
                outputs.append(tritonhttpclient.InferRequestedOutput("OUTPUT_STATE__1"))
                outputs.append(tritonhttpclient.InferRequestedOutput("OUTPUT__0"))
            else:
                outputs.append(tritonhttpclient.InferRequestedOutput("OUTPUT_STATE"))
                outputs.append(tritonhttpclient.InferRequestedOutput("OUTPUT"))

            result = triton_client.infer(
                model_name=f"{backend}_nobatch_sequence_int32_output",
                inputs=inputs,
                outputs=outputs,
                sequence_id=1,
                sequence_start=True,
                sequence_end=True,
            )
            if backend.strip() == "libtorch":
                self.assertTrue(result.as_numpy("OUTPUT_STATE__1")[0], 1)
                self.assertTrue(result.as_numpy("OUTPUT__0")[0], 1)
            else:
                self.assertTrue(result.as_numpy("OUTPUT_STATE")[0], 1)
                self.assertTrue(result.as_numpy("OUTPUT")[0], 1)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_implicit_state/models/growable_memory/config.pbtxt
================================================
# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "growable_memory"
backend: "implicit_state"
max_batch_size: 0
sequence_batching {
  # Set large idle timeout to avoid inter-request timeouts for test consistency
  max_sequence_idle_microseconds: 10000000
  control_input [
    {
      name: "START"
      control [
        {
          kind: CONTROL_SEQUENCE_START
          fp32_false_true: [ 0, 1 ]
        }
      ]
    },
    {
      name: "READY"
      control [
        {
          kind: CONTROL_SEQUENCE_READY
          fp32_false_true: [ 0, 1 ]
        }
      ]
    },
    {
      name: "END"
      control [
        {
          kind: CONTROL_SEQUENCE_END
          fp32_false_true: [ 0, 1 ]
        }
      ]
    }
  ]
  state [
    {
        input_name: "INPUT_STATE"
        output_name: "OUTPUT_STATE"
        data_type: TYPE_INT8
        dims: [1024, 1024]
        use_same_buffer_for_input_output: true
        use_growable_memory: true
    }
  ]
}

input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  },
  {
    name: "TEST_CASE"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]

output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  },
  {
    name: "OUTPUT_STATE"
    data_type: TYPE_INT8
    dims: [ 1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_GPU
  }
]


================================================
FILE: qa/L0_implicit_state/models/no_implicit_state/config.pbtxt
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "no_implicit_state"
backend: "implicit_state"
max_batch_size: 0

sequence_batching {
  control_input [
    {
      name: "START"
      control [
        {
          kind: CONTROL_SEQUENCE_START
          fp32_false_true: [ 0, 1 ]
        }
      ]
    },
    {
      name: "READY"
      control [
        {
          kind: CONTROL_SEQUENCE_READY
          fp32_false_true: [ 0, 1 ]
        }
      ]
    },
    {
      name: "END"
      control [
        {
          kind: CONTROL_SEQUENCE_END
          fp32_false_true: [ 0, 1 ]
        }
      ]
    }
  ]
}

input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  },
  {
    name: "TEST_CASE"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]

output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]


================================================
FILE: qa/L0_implicit_state/models/no_state_update/config.pbtxt
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "no_state_update"
backend: "implicit_state"
max_batch_size: 0
sequence_batching {
  control_input [
    {
      name: "START"
      control [
        {
          kind: CONTROL_SEQUENCE_START
          fp32_false_true: [ 0, 1 ]
        }
      ]
    },
    {
      name: "READY"
      control [
        {
          kind: CONTROL_SEQUENCE_READY
          fp32_false_true: [ 0, 1 ]
        }
      ]
    },
    {
      name: "END"
      control [
        {
          kind: CONTROL_SEQUENCE_END
          fp32_false_true: [ 0, 1 ]
        }
      ]
    }
  ]
  state [
    {
        input_name: "INPUT_STATE"
        output_name: "OUTPUT_STATE"
        data_type: TYPE_INT32
        dims: 1
        initial_state: {
          name: "state init"
          data_type: TYPE_INT32
          dims: 1
          zero_data: true
        }
    }
  ]
}

input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  },
  {
    name: "TEST_CASE"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]

output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]


================================================
FILE: qa/L0_implicit_state/models/single_state_buffer/config.pbtxt
================================================
# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "single_state_buffer"
backend: "implicit_state"
max_batch_size: 0
sequence_batching {
  control_input [
    {
      name: "START"
      control [
        {
          kind: CONTROL_SEQUENCE_START
          fp32_false_true: [ 0, 1 ]
        }
      ]
    },
    {
      name: "READY"
      control [
        {
          kind: CONTROL_SEQUENCE_READY
          fp32_false_true: [ 0, 1 ]
        }
      ]
    },
    {
      name: "END"
      control [
        {
          kind: CONTROL_SEQUENCE_END
          fp32_false_true: [ 0, 1 ]
        }
      ]
    }
  ]
  state [
    {
        input_name: "INPUT_STATE"
        output_name: "OUTPUT_STATE"
        data_type: TYPE_INT32
        dims: 1
        use_same_buffer_for_input_output: true
    }
  ]
}

input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  },
  {
    name: "TEST_CASE"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]

output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]


================================================
FILE: qa/L0_implicit_state/models/wrong_internal_state/config.pbtxt
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "wrong_internal_state"
backend: "implicit_state"
max_batch_size: 0

sequence_batching {
  control_input [
    {
      name: "START"
      control [
        {
          kind: CONTROL_SEQUENCE_START
          fp32_false_true: [ 0, 1 ]
        }
      ]
    },
    {
      name: "READY"
      control [
        {
          kind: CONTROL_SEQUENCE_READY
          fp32_false_true: [ 0, 1 ]
        }
      ]
    },
    {
      name: "END"
      control [
        {
          kind: CONTROL_SEQUENCE_END
          fp32_false_true: [ 0, 1 ]
        }
      ]
    }
  ]
  state [
    {
        input_name: "INPUT_STATE"
        output_name: "OUTPUT_STATE"
        data_type: TYPE_INT32
        dims: 1
    }
  ]
}

input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  },
  {
    name: "TEST_CASE"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]

output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]


================================================
FILE: qa/L0_implicit_state/test.sh
================================================
#!/bin/bash
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi
DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
TEST_RESULT_FILE='test_results.txt'

export ENSEMBLES=0
BACKENDS=${BACKENDS:="libtorch onnx plan"}
export BACKENDS
export IMPLICIT_STATE=1
INITIAL_STATE_ZERO=${INITIAL_STATE_ZERO:="0"}
INITIAL_STATE_FILE=${INITIAL_STATE_FILE:="0"}
SINGLE_STATE_BUFFER=${SINGLE_STATE_BUFFER:="0"}

export INITIAL_STATE_ZERO
export INITIAL_STATE_FILE
export SINGLE_STATE_BUFFER

MODELDIR=${MODELDIR:=`pwd`/models}
TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
SERVER=${TRITON_DIR}/bin/tritonserver
BACKEND_DIR=${TRITON_DIR}/backends
source ../common/util.sh

# Setup the custom models shared library
cp ./libtriton_implicit_state.so models/no_implicit_state/
cp ./libtriton_implicit_state.so models/no_state_update/
cp ./libtriton_implicit_state.so models/wrong_internal_state/
cp ./libtriton_implicit_state.so models/single_state_buffer/
cp ./libtriton_implicit_state.so models/growable_memory/

mkdir -p models/no_implicit_state/1/
mkdir -p models/no_state_update/1/
mkdir -p models/wrong_internal_state/1/
mkdir -p models/single_state_buffer/1/
mkdir -p models/growable_memory/1/

for BACKEND in $BACKENDS; do
    dtype="int32"
    model_name=${BACKEND}_nobatch_sequence_${dtype}
    rm -rf models/$model_name
    cp -r $DATADIR/qa_sequence_implicit_model_repository/$model_name models
    output_dtype=

    # In order to allow the state to be returned, the model must describe
    # state as one of the outputs of the model.
    model_name_allow_output=${BACKEND}_nobatch_sequence_${dtype}_output
    rm -rf models/$model_name_allow_output
    cp -r $DATADIR/qa_sequence_implicit_model_repository/$model_name models/$model_name_allow_output

    if [ $BACKEND == "libtorch" ]; then
    	(cd models/$model_name_allow_output && \
    	    sed -i "s/^name:.*/name: \"$model_name_allow_output\"/" config.pbtxt && \
    	    echo -e "output [{ name: \"OUTPUT_STATE__1\" \n data_type: TYPE_INT32 \n dims: [ 1 ] }]" >> config.pbtxt)
    else
    	(cd models/$model_name_allow_output && \
    	    sed -i "s/^name:.*/name: \"$model_name_allow_output\"/" config.pbtxt && \
    	    echo -e "output [{ name: \"OUTPUT_STATE\" \n data_type: TYPE_INT32 \n dims: [ 1 ] }]" >> config.pbtxt)
    fi
done

CLIENT_LOG=`pwd`/client.log
SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR} --cuda-virtual-address-size=0:$((1024*1024*4))"
IMPLICIT_STATE_CLIENT='implicit_state.py'
EXPECTED_TEST_NUM=7
rm -rf $CLIENT_LOG

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

python3 $IMPLICIT_STATE_CLIENT > $CLIENT_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Implicit State FAILED\n***"
    cat ${CLIENT_LOG}
    exit 1
else
    check_test_results $TEST_RESULT_FILE $EXPECTED_TEST_NUM
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

(cd ../L0_sequence_batcher/ && bash -ex test.sh)
RET=$?

if [ $RET == 0 ]; then
    echo -e "\n***\n*** Implicit State Passed\n***"
else
    echo -e "\n***\n*** Implicit State FAILED\n***"
    exit 1
fi

exit $RET


================================================
FILE: qa/L0_infer/infer_test.py
================================================
#!/usr/bin/env python3

# Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import os
import unittest

import infer_util as iu
import numpy as np
import test_util as tu
from tritonclient.utils import *

TEST_SYSTEM_SHARED_MEMORY = bool(int(os.environ.get("TEST_SYSTEM_SHARED_MEMORY", 0)))
TEST_CUDA_SHARED_MEMORY = bool(int(os.environ.get("TEST_CUDA_SHARED_MEMORY", 0)))
CPU_ONLY = os.environ.get("TRITON_SERVER_CPU_ONLY") is not None
TEST_VALGRIND = bool(int(os.environ.get("TEST_VALGRIND", 0)))
VALGRIND_TESTS = bool(int(os.environ.get("VALGRIND_TESTS", 0)))

USE_GRPC = os.environ.get("USE_GRPC", 1) != "0"
USE_HTTP = os.environ.get("USE_HTTP", 1) != "0"
assert USE_GRPC or USE_HTTP, "USE_GRPC or USE_HTTP must be non-zero"

BACKENDS = os.environ.get(
    "BACKENDS", "onnx libtorch plan python python_dlpack openvino"
)
ENSEMBLES = bool(int(os.environ.get("ENSEMBLES", 1)))
NOBATCH = bool(int(os.environ.get("NOBATCH", 1)))
BATCH = bool(int(os.environ.get("BATCH", 1)))

np_dtype_string = np.dtype(object)

# 60 sec is the default value
NETWORK_TIMEOUT = 300.0 if TEST_VALGRIND else 60.0


class InferTest(tu.TestResultCollector):
    def _full_exact(
        self,
        input_dtype,
        output0_dtype,
        output1_dtype,
        output0_raw,
        output1_raw,
        swap,
    ):
        def _infer_exact_helper(
            tester,
            pf,
            tensor_shape,
            batch_size,
            input_dtype,
            output0_dtype,
            output1_dtype,
            output0_raw=True,
            output1_raw=True,
            model_version=None,
            swap=False,
            outputs=("OUTPUT0", "OUTPUT1"),
            use_http=USE_HTTP,
            use_grpc=USE_GRPC,
            use_http_json_tensors=True,
            skip_request_id_check=True,
            use_streaming=True,
            correlation_id=0,
            network_timeout=NETWORK_TIMEOUT,
        ):
            for bs in (1, batch_size):
                # model that does not support batching
                if NOBATCH:
                    if bs == 1:
                        iu.infer_exact(
                            tester,
                            pf + "_nobatch",
                            tensor_shape,
                            bs,
                            input_dtype,
                            output0_dtype,
                            output1_dtype,
                            output0_raw=output0_raw,
                            output1_raw=output1_raw,
                            model_version=model_version,
                            swap=swap,
                            outputs=outputs,
                            use_http=use_http,
                            use_grpc=use_grpc,
                            use_http_json_tensors=use_http_json_tensors,
                            skip_request_id_check=skip_request_id_check,
                            use_streaming=use_streaming,
                            correlation_id=correlation_id,
                            use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                            use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                            network_timeout=network_timeout,
                        )

                if BATCH:
                    # model that supports batching.
                    iu.infer_exact(
                        tester,
                        pf,
                        (bs,) + tensor_shape,
                        bs,
                        input_dtype,
                        output0_dtype,
                        output1_dtype,
                        output0_raw=output0_raw,
                        output1_raw=output1_raw,
                        model_version=model_version,
                        swap=swap,
                        outputs=outputs,
                        use_http=use_http,
                        use_grpc=use_grpc,
                        use_http_json_tensors=use_http_json_tensors,
                        skip_request_id_check=skip_request_id_check,
                        use_streaming=use_streaming,
                        correlation_id=correlation_id,
                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                        network_timeout=network_timeout,
                    )

        input_size = 16

        all_ensemble_prefix = ["simple_", "sequence_", "fan_"]
        ensemble_prefix = [""]
        if ENSEMBLES:
            for prefix in all_ensemble_prefix:
                if tu.validate_for_ensemble_model(
                    prefix,
                    input_dtype,
                    output0_dtype,
                    output1_dtype,
                    (input_size,),
                    (input_size,),
                    (input_size,),
                ):
                    ensemble_prefix.append(prefix)

        if not CPU_ONLY and tu.validate_for_trt_model(
            input_dtype,
            output0_dtype,
            output1_dtype,
            (input_size, 1, 1),
            (input_size, 1, 1),
            (input_size, 1, 1),
        ):
            for prefix in ensemble_prefix:
                if "plan" in BACKENDS:
                    if input_dtype == np.int8:
                        _infer_exact_helper(
                            self,
                            prefix + "plan",
                            (input_size, 1, 1),
                            8,
                            input_dtype,
                            output0_dtype,
                            output1_dtype,
                            output0_raw=output0_raw,
                            output1_raw=output1_raw,
                            swap=swap,
                        )
                    else:
                        _infer_exact_helper(
                            self,
                            prefix + "plan",
                            (input_size,),
                            8,
                            input_dtype,
                            output0_dtype,
                            output1_dtype,
                            output0_raw=output0_raw,
                            output1_raw=output1_raw,
                            swap=swap,
                        )

        if tu.validate_for_onnx_model(
            input_dtype,
            output0_dtype,
            output1_dtype,
            (input_size,),
            (input_size,),
            (input_size,),
        ):
            for prefix in ensemble_prefix:
                if "onnx" in BACKENDS:
                    _infer_exact_helper(
                        self,
                        prefix + "onnx",
                        (input_size,),
                        8,
                        input_dtype,
                        output0_dtype,
                        output1_dtype,
                        output0_raw=output0_raw,
                        output1_raw=output1_raw,
                        swap=swap,
                    )

        if tu.validate_for_libtorch_model(
            input_dtype,
            output0_dtype,
            output1_dtype,
            (input_size,),
            (input_size,),
            (input_size,),
        ):
            # Due to PyTorch bug
            # https://github.com/pytorch/pytorch/issues/66930 we can't
            # run this test with int8 input and int32 outputs.
            if (
                (input_dtype == np.int8)
                and (output0_dtype == np.int32)
                and (output1_dtype == np.int32)
            ):
                print(
                    "skipping pytorch test for input dtype int8 and outputs dtype int32 due to a pytorch bug"
                )
            else:
                for prefix in ensemble_prefix:
                    if "libtorch" in BACKENDS:
                        # Skip batching for PyTorch String I/O
                        if (
                            (input_dtype == np_dtype_string)
                            or (output0_dtype == np_dtype_string)
                            or (output1_dtype == np_dtype_string)
                        ):
                            iu.infer_exact(
                                self,
                                prefix + "libtorch_nobatch",
                                (input_size,),
                                1,  # batch_size
                                input_dtype,
                                output0_dtype,
                                output1_dtype,
                                output0_raw=output0_raw,
                                output1_raw=output1_raw,
                                swap=swap,
                                use_http=USE_HTTP,
                                use_grpc=USE_GRPC,
                                use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                                use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                            )
                        else:
                            _infer_exact_helper(
                                self,
                                prefix + "libtorch",
                                (input_size,),
                                8,
                                input_dtype,
                                output0_dtype,
                                output1_dtype,
                                output0_raw=output0_raw,
                                output1_raw=output1_raw,
                                swap=swap,
                            )

        for prefix in ensemble_prefix:
            if prefix != "":
                continue
            if (
                input_dtype == np.uint8
                or output0_dtype == np.uint8
                or output1_dtype == np.uint8
            ):
                continue

            if "python_dlpack" in BACKENDS:
                _infer_exact_helper(
                    self,
                    prefix + "python_dlpack",
                    (input_size,),
                    8,
                    input_dtype,
                    output0_dtype,
                    output1_dtype,
                    output0_raw=output0_raw,
                    output1_raw=output1_raw,
                    swap=swap,
                )
            elif "python" in BACKENDS:
                _infer_exact_helper(
                    self,
                    prefix + "python",
                    (input_size,),
                    8,
                    input_dtype,
                    output0_dtype,
                    output1_dtype,
                    output0_raw=output0_raw,
                    output1_raw=output1_raw,
                    swap=swap,
                )

    def test_raw_uuu(self):
        self._full_exact(
            np.uint8, np.uint8, np.uint8, output0_raw=True, output1_raw=True, swap=True
        )

    def test_raw_bbb(self):
        self._full_exact(
            np.int8, np.int8, np.int8, output0_raw=True, output1_raw=True, swap=True
        )

    def test_raw_sss(self):
        self._full_exact(
            np.int16, np.int16, np.int16, output0_raw=True, output1_raw=True, swap=True
        )

    def test_raw_iii(self):
        self._full_exact(
            np.int32, np.int32, np.int32, output0_raw=True, output1_raw=True, swap=True
        )

    def test_raw_lll(self):
        self._full_exact(
            np.int64, np.int64, np.int64, output0_raw=True, output1_raw=True, swap=False
        )

    def test_raw_hhh(self):
        self._full_exact(
            np.float16,
            np.float16,
            np.float16,
            output0_raw=True,
            output1_raw=True,
            swap=False,
        )

    def test_raw_fff(self):
        self._full_exact(
            np.float32,
            np.float32,
            np.float32,
            output0_raw=True,
            output1_raw=True,
            swap=True,
        )

    def test_raw_hff(self):
        self._full_exact(
            np.float16,
            np.float32,
            np.float32,
            output0_raw=True,
            output1_raw=True,
            swap=False,
        )

    def test_raw_bii(self):
        self._full_exact(
            np.int8, np.int32, np.int32, output0_raw=True, output1_raw=True, swap=False
        )

    def test_raw_ibb(self):
        self._full_exact(
            np.int32, np.int8, np.int8, output0_raw=True, output1_raw=True, swap=False
        )

    def test_raw_ibs(self):
        self._full_exact(
            np.int32, np.int8, np.int16, output0_raw=True, output1_raw=True, swap=False
        )

    def test_raw_fuu(self):
        self._full_exact(
            np.float32,
            np.uint8,
            np.uint8,
            output0_raw=True,
            output1_raw=True,
            swap=False,
        )

    def test_raw_uff(self):
        self._full_exact(
            np.uint8,
            np.float32,
            np.float32,
            output0_raw=True,
            output1_raw=True,
            swap=False,
        )

    def test_raw_fuh(self):
        self._full_exact(
            np.float32,
            np.uint8,
            np.float16,
            output0_raw=True,
            output1_raw=True,
            swap=False,
        )

    def test_raw_iff(self):
        self._full_exact(
            np.int32,
            np.float32,
            np.float32,
            output0_raw=True,
            output1_raw=True,
            swap=False,
        )

    def test_raw_fii(self):
        self._full_exact(
            np.float32,
            np.int32,
            np.int32,
            output0_raw=True,
            output1_raw=True,
            swap=False,
        )

    def test_raw_ihs(self):
        self._full_exact(
            np.int32,
            np.float16,
            np.int16,
            output0_raw=True,
            output1_raw=True,
            swap=False,
        )

    def test_raw_ooo(self):
        self._full_exact(
            np_dtype_string,
            np_dtype_string,
            np_dtype_string,
            output0_raw=True,
            output1_raw=True,
            swap=False,
        )

    def test_raw_oii(self):
        self._full_exact(
            np_dtype_string,
            np.int32,
            np.int32,
            output0_raw=True,
            output1_raw=True,
            swap=False,
        )

    def test_raw_oio(self):
        self._full_exact(
            np_dtype_string,
            np.int32,
            np_dtype_string,
            output0_raw=True,
            output1_raw=True,
            swap=False,
        )

    def test_raw_ooi(self):
        self._full_exact(
            np_dtype_string,
            np_dtype_string,
            np.int32,
            output0_raw=True,
            output1_raw=True,
            swap=False,
        )

    def test_raw_ioo(self):
        self._full_exact(
            np.int32,
            np_dtype_string,
            np_dtype_string,
            output0_raw=True,
            output1_raw=True,
            swap=False,
        )

    def test_raw_iio(self):
        self._full_exact(
            np.int32,
            np.int32,
            np_dtype_string,
            output0_raw=True,
            output1_raw=True,
            swap=False,
        )

    def test_raw_ioi(self):
        self._full_exact(
            np.int32,
            np_dtype_string,
            np.int32,
            output0_raw=True,
            output1_raw=True,
            swap=False,
        )

    # shared memory does not support class output
    if not (TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY):

        def test_class_bbb(self):
            self._full_exact(
                np.int8,
                np.int8,
                np.int8,
                output0_raw=False,
                output1_raw=False,
                swap=True,
            )

        def test_class_sss(self):
            self._full_exact(
                np.int16,
                np.int16,
                np.int16,
                output0_raw=False,
                output1_raw=False,
                swap=True,
            )

        def test_class_iii(self):
            self._full_exact(
                np.int32,
                np.int32,
                np.int32,
                output0_raw=False,
                output1_raw=False,
                swap=True,
            )

        def test_class_lll(self):
            self._full_exact(
                np.int64,
                np.int64,
                np.int64,
                output0_raw=False,
                output1_raw=False,
                swap=False,
            )

        def test_class_fff(self):
            self._full_exact(
                np.float32,
                np.float32,
                np.float32,
                output0_raw=False,
                output1_raw=False,
                swap=True,
            )

        def test_class_iff(self):
            self._full_exact(
                np.int32,
                np.float32,
                np.float32,
                output0_raw=False,
                output1_raw=False,
                swap=False,
            )

        def test_mix_bbb(self):
            self._full_exact(
                np.int8,
                np.int8,
                np.int8,
                output0_raw=True,
                output1_raw=False,
                swap=True,
            )

        def test_mix_sss(self):
            self._full_exact(
                np.int16,
                np.int16,
                np.int16,
                output0_raw=False,
                output1_raw=True,
                swap=True,
            )

        def test_mix_iii(self):
            self._full_exact(
                np.int32,
                np.int32,
                np.int32,
                output0_raw=True,
                output1_raw=False,
                swap=True,
            )

        def test_mix_lll(self):
            self._full_exact(
                np.int64,
                np.int64,
                np.int64,
                output0_raw=False,
                output1_raw=True,
                swap=False,
            )

        def test_mix_fff(self):
            self._full_exact(
                np.float32,
                np.float32,
                np.float32,
                output0_raw=True,
                output1_raw=False,
                swap=True,
            )

        def test_mix_iff(self):
            self._full_exact(
                np.int32,
                np.float32,
                np.float32,
                output0_raw=False,
                output1_raw=True,
                swap=False,
            )

    if not VALGRIND_TESTS:

        def test_raw_version_latest_1(self):
            input_size = 16
            tensor_shape = (1, input_size)

            # There are 3 versions of onnx_int8_int8_int8 but
            # only version 3 should be available
            for platform in ["onnx"]:
                if platform not in BACKENDS:
                    continue
                try:
                    iu.infer_exact(
                        self,
                        platform,
                        tensor_shape,
                        1,
                        np.int8,
                        np.int8,
                        np.int8,
                        model_version=1,
                        swap=False,
                        use_http=USE_HTTP,
                        use_grpc=USE_GRPC,
                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                    )
                except InferenceServerException as ex:
                    self.assertTrue(
                        ex.message().startswith("Request for unknown model")
                    )

                try:
                    iu.infer_exact(
                        self,
                        platform,
                        tensor_shape,
                        1,
                        np.int8,
                        np.int8,
                        np.int8,
                        model_version=2,
                        swap=True,
                        use_http=USE_HTTP,
                        use_grpc=USE_GRPC,
                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                    )
                except InferenceServerException as ex:
                    self.assertTrue(
                        ex.message().startswith("Request for unknown model")
                    )

                iu.infer_exact(
                    self,
                    platform,
                    tensor_shape,
                    1,
                    np.int8,
                    np.int8,
                    np.int8,
                    model_version=3,
                    swap=True,
                    use_http=USE_HTTP,
                    use_grpc=USE_GRPC,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                )

        def test_raw_version_latest_2(self):
            input_size = 16
            tensor_shape = (1, input_size)

            # There are 3 versions of onnx_int16_int16_int16 but only
            # versions 2 and 3 should be available
            for platform in ["onnx"]:
                if platform not in BACKENDS:
                    continue
                try:
                    iu.infer_exact(
                        self,
                        platform,
                        tensor_shape,
                        1,
                        np.int16,
                        np.int16,
                        np.int16,
                        model_version=1,
                        swap=False,
                        use_http=USE_HTTP,
                        use_grpc=USE_GRPC,
                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                    )
                except InferenceServerException as ex:
                    self.assertTrue(
                        ex.message().startswith("Request for unknown model")
                    )

                iu.infer_exact(
                    self,
                    platform,
                    tensor_shape,
                    1,
                    np.int16,
                    np.int16,
                    np.int16,
                    model_version=2,
                    swap=True,
                    use_http=USE_HTTP,
                    use_grpc=USE_GRPC,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                )
                iu.infer_exact(
                    self,
                    platform,
                    tensor_shape,
                    1,
                    np.int16,
                    np.int16,
                    np.int16,
                    model_version=3,
                    swap=True,
                    use_http=USE_HTTP,
                    use_grpc=USE_GRPC,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                )

        def test_raw_version_all(self):
            input_size = 16
            tensor_shape = (1, input_size)

            # There are 3 versions of onnx_int32_int32_int32 and all should
            # be available.
            for platform in ["onnx"]:
                if platform not in BACKENDS:
                    continue
                iu.infer_exact(
                    self,
                    platform,
                    tensor_shape,
                    1,
                    np.int32,
                    np.int32,
                    np.int32,
                    model_version=1,
                    swap=False,
                    use_http=USE_HTTP,
                    use_grpc=USE_GRPC,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                )
                iu.infer_exact(
                    self,
                    platform,
                    tensor_shape,
                    1,
                    np.int32,
                    np.int32,
                    np.int32,
                    model_version=2,
                    swap=True,
                    use_http=USE_HTTP,
                    use_grpc=USE_GRPC,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                )
                iu.infer_exact(
                    self,
                    platform,
                    tensor_shape,
                    1,
                    np.int32,
                    np.int32,
                    np.int32,
                    model_version=3,
                    swap=True,
                    use_http=USE_HTTP,
                    use_grpc=USE_GRPC,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                )

        def test_raw_version_specific_1(self):
            input_size = 16
            tensor_shape = (1, input_size)

            # There are 3 versions of onnx_float16_float16_float16 but only
            # version 1 should be available.
            for platform in ["onnx"]:
                if platform not in BACKENDS:
                    continue
                iu.infer_exact(
                    self,
                    platform,
                    tensor_shape,
                    1,
                    np.float16,
                    np.float16,
                    np.float16,
                    model_version=1,
                    swap=False,
                    use_http=USE_HTTP,
                    use_grpc=USE_GRPC,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                )

                try:
                    iu.infer_exact(
                        self,
                        platform,
                        tensor_shape,
                        1,
                        np.float16,
                        np.float16,
                        np.float16,
                        model_version=2,
                        swap=True,
                        use_http=USE_HTTP,
                        use_grpc=USE_GRPC,
                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                    )
                except InferenceServerException as ex:
                    self.assertTrue(
                        ex.message().startswith("Request for unknown model")
                    )

                try:
                    iu.infer_exact(
                        self,
                        platform,
                        tensor_shape,
                        1,
                        np.float16,
                        np.float16,
                        np.float16,
                        model_version=3,
                        swap=True,
                        use_http=USE_HTTP,
                        use_grpc=USE_GRPC,
                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                    )
                except InferenceServerException as ex:
                    self.assertTrue(
                        ex.message().startswith("Request for unknown model")
                    )

        def test_raw_version_specific_1_3(self):
            input_size = 16

            # There are 3 versions of *_float32_float32_float32 but only
            # versions 1 and 3 should be available.
            for platform in ("onnx", "plan"):
                if platform == "plan" and CPU_ONLY:
                    continue
                if platform not in BACKENDS:
                    continue
                tensor_shape = (1, input_size)
                iu.infer_exact(
                    self,
                    platform,
                    tensor_shape,
                    1,
                    np.float32,
                    np.float32,
                    np.float32,
                    model_version=1,
                    swap=False,
                    use_http=USE_HTTP,
                    use_grpc=USE_GRPC,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                )

                try:
                    iu.infer_exact(
                        self,
                        platform,
                        tensor_shape,
                        1,
                        np.float32,
                        np.float32,
                        np.float32,
                        model_version=2,
                        swap=True,
                        use_http=USE_HTTP,
                        use_grpc=USE_GRPC,
                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                    )
                except InferenceServerException as ex:
                    self.assertTrue(
                        ex.message().startswith("Request for unknown model")
                    )

                iu.infer_exact(
                    self,
                    platform,
                    tensor_shape,
                    1,
                    np.float32,
                    np.float32,
                    np.float32,
                    model_version=3,
                    swap=True,
                    use_http=USE_HTTP,
                    use_grpc=USE_GRPC,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                )

        if ENSEMBLES:
            if all(x in BACKENDS for x in ["onnx", "libtorch"]):

                def test_ensemble_mix_platform(self):
                    # Skip on CPU only machine as TensorRT model is used in this ensemble
                    if CPU_ONLY:
                        return
                    for bs in (1, 8):
                        iu.infer_exact(
                            self,
                            "mix_platform",
                            (bs, 16),
                            bs,
                            np.float32,
                            np.float32,
                            np.float32,
                            use_http=USE_HTTP,
                            use_grpc=USE_GRPC,
                            use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                            use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                        )

            if "onnx" in BACKENDS:

                def test_ensemble_mix_type(self):
                    for bs in (1, 8):
                        iu.infer_exact(
                            self,
                            "mix_type",
                            (bs, 16),
                            bs,
                            np.int32,
                            np.float32,
                            np.float32,
                            use_http=USE_HTTP,
                            use_grpc=USE_GRPC,
                            use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                            use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                        )

            if all(x in BACKENDS for x in ["onnx", "libtorch"]):

                def test_ensemble_mix_ensemble(self):
                    for bs in (1, 8):
                        iu.infer_exact(
                            self,
                            "mix_ensemble",
                            (bs, 16),
                            bs,
                            np.int32,
                            np.float32,
                            np.float32,
                            use_http=USE_HTTP,
                            use_grpc=USE_GRPC,
                            use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                            use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                        )

            if all(
                x in BACKENDS
                for x in [
                    "onnx",
                ]
            ):

                def test_ensemble_mix_batch_nobatch(self):
                    base_names = ["batch_to_nobatch", "nobatch_to_batch"]
                    for name in base_names:
                        for bs in (1, 8):
                            iu.infer_exact(
                                self,
                                name,
                                (bs, 16),
                                bs,
                                np.float32,
                                np.float32,
                                np.float32,
                                use_http=USE_HTTP,
                                use_grpc=USE_GRPC,
                                use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                                use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                            )
                        iu.infer_exact(
                            self,
                            name + "_nobatch",
                            (8, 16),
                            1,
                            np.float32,
                            np.float32,
                            np.float32,
                            use_http=USE_HTTP,
                            use_grpc=USE_GRPC,
                            use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                            use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                        )

                    # batch -> nobatch -> batch
                    for bs in (1, 8):
                        iu.infer_exact(
                            self,
                            "mix_nobatch_batch",
                            (bs, 16),
                            bs,
                            np.float32,
                            np.float32,
                            np.float32,
                            use_http=USE_HTTP,
                            use_grpc=USE_GRPC,
                            use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                            use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                        )

            if not (TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY):

                def test_ensemble_label_lookup(self):
                    if all(x in BACKENDS for x in ["onnx", "libtorch"]):
                        # Ensemble needs to look up label from the actual model
                        for bs in (1, 8):
                            iu.infer_exact(
                                self,
                                "mix_platform",
                                (bs, 16),
                                bs,
                                np.float32,
                                np.float32,
                                np.float32,
                                output0_raw=False,
                                output1_raw=False,
                                use_http=USE_HTTP,
                                use_grpc=USE_GRPC,
                                use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                                use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                            )

                    if all(x in BACKENDS for x in ["onnx", "libtorch"]):
                        # Label from the actual model will be passed along the nested ensemble
                        for bs in (1, 8):
                            iu.infer_exact(
                                self,
                                "mix_ensemble",
                                (bs, 16),
                                bs,
                                np.int32,
                                np.float32,
                                np.float32,
                                output0_raw=False,
                                output1_raw=False,
                                use_http=USE_HTTP,
                                use_grpc=USE_GRPC,
                                use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                                use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                            )

                    if "onnx" in BACKENDS:
                        # If label file is provided, it will use the provided label file directly
                        try:
                            iu.infer_exact(
                                self,
                                "wrong_label",
                                (1, 16),
                                1,
                                np.int32,
                                np.float32,
                                np.float32,
                                output0_raw=False,
                                output1_raw=False,
                                use_http=USE_HTTP,
                                use_grpc=USE_GRPC,
                                use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                                use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                            )
                        except AssertionError:
                            # Sanity check that infer_exact failed since this ensemble is provided
                            # with unexpected labels
                            pass

                    if "onnx" in BACKENDS:
                        for bs in (1, 8):
                            iu.infer_exact(
                                self,
                                "label_override",
                                (bs, 16),
                                bs,
                                np.int32,
                                np.float32,
                                np.float32,
                                output0_raw=False,
                                output1_raw=False,
                                use_http=USE_HTTP,
                                use_grpc=USE_GRPC,
                                use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                                use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                            )


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_infer/install_and_test.sh
================================================
#!/bin/bash
# Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# Determine the operating system to call the correct package manager.
ID_LIKE=$(grep -Po '(?<=ID_LIKE=).*' /etc/os-release | awk -F= '{print $1}' |  tr -d '"' | awk '{print $1}')

# Note: This script is to be used with customized triton containers that need
# dependencies to run L0_infer tests
if [[ "$ID_LIKE" =~ "debian" ]]; then
    apt-get update && \
        apt-get install -y --no-install-recommends \
            build-essential \
            curl \
            jq \
            libnvrtc12 \
            python3 \
            python3-pip
else
    yum install -y \
        jq \
        curl
fi

# install client libraries
pip3 install tritonclient[all]

# Run the actual test
bash -x test.sh


================================================
FILE: qa/L0_infer/test.sh
================================================
#!/bin/bash
# Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

ldconfig || true

export CUDA_VISIBLE_DEVICES=0

TEST_RESULT_FILE='test_results.txt'
CLIENT_LOG_BASE="./client"
INFER_TEST=infer_test.py
SERVER_TIMEOUT=${SERVER_TIMEOUT:=600}

if [ -z "$TEST_SYSTEM_SHARED_MEMORY" ]; then
    TEST_SYSTEM_SHARED_MEMORY="0"
fi

if [ -z "$TEST_CUDA_SHARED_MEMORY" ]; then
    TEST_CUDA_SHARED_MEMORY="0"
fi

if [ -z "$TEST_VALGRIND" ]; then
    TEST_VALGRIND="0"
fi

if [ "$TEST_VALGRIND" -eq 1 ]; then
    LEAKCHECK_LOG_BASE="./valgrind_test"
    LEAKCHECK=/usr/bin/valgrind
    LEAKCHECK_ARGS_BASE="--leak-check=full --show-leak-kinds=definite --max-threads=3000 --num-callers=20"
    SERVER_TIMEOUT=4000
    rm -f $LEAKCHECK_LOG_BASE*
    # Remove 'python', 'python_dlpack' and 'onnx' from BACKENDS and test them
    # separately below.
    BACKENDS="libtorch plan openvino"
fi

if [ "$TEST_SYSTEM_SHARED_MEMORY" -eq 1 ] || [ "$TEST_CUDA_SHARED_MEMORY" -eq 1 ]; then
    EXPECTED_NUM_TESTS=${EXPECTED_NUM_TESTS:="33"}
else
    EXPECTED_NUM_TESTS=${EXPECTED_NUM_TESTS:="46"}
fi

TEST_JETSON=${TEST_JETSON:=0}

# Default size (in MB) of shared memory to be used by each python model
# instance (Default is 1MB)
DEFAULT_SHM_SIZE_MB=${DEFAULT_SHM_SIZE_MB:=1}
DEFAULT_SHM_SIZE_BYTES=$((1024*1024*$DEFAULT_SHM_SIZE_MB))

# On windows the paths invoked by the script (running in WSL) must use
# /mnt/c when needed but the paths on the tritonserver command-line
# must be C:/ style.
if [[ -v WSL_DISTRO_NAME ]] || [[ -v MSYSTEM ]]; then
    MODELDIR=${MODELDIR:=C:/models}
    DATADIR=${DATADIR:="/mnt/c/data/inferenceserver/${REPO_VERSION}"}
    BACKEND_DIR=${BACKEND_DIR:=C:/tritonserver/backends}
    SERVER=${SERVER:=/mnt/c/tritonserver/bin/tritonserver.exe}
else
    MODELDIR=${MODELDIR:=`pwd`/models}
    DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
    TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
    SERVER=${TRITON_DIR}/bin/tritonserver
    BACKEND_DIR=${TRITON_DIR}/backends

    # PyTorch on SBSA requires libgomp to be loaded first. See the following
    # GitHub issue for more information:
    # https://github.com/pytorch/pytorch/issues/2575
    arch=`uname -m`
    if [ $arch = "aarch64" ]; then
      SERVER_LD_PRELOAD=/usr/lib/$(uname -m)-linux-gnu/libgomp.so.1
    fi
fi

# Allow more time to exit. Ensemble brings in too many models
SERVER_ARGS_EXTRA="--exit-timeout-secs=${SERVER_TIMEOUT} --backend-directory=${BACKEND_DIR} --backend-config=python,stub-timeout-seconds=120 --backend-config=python,shm-default-byte-size=${DEFAULT_SHM_SIZE_BYTES}"
SERVER_ARGS="--model-repository=${MODELDIR} ${SERVER_ARGS_EXTRA}"
SERVER_LOG_BASE="./inference_server"
source ../common/util.sh

rm -f $SERVER_LOG_BASE* $CLIENT_LOG_BASE*

RET=0

# Verify the flag is set only on CPU-only device
if [ "$TRITON_SERVER_CPU_ONLY" == "1" ]; then
    gpu_count=`nvidia-smi -L | grep GPU | wc -l`
    if [ "$gpu_count" -ne 0 ]; then
    echo -e "\n***\n*** Running on a device with GPU\n***"
    echo -e "\n***\n*** Test Failed To Run\n***"
    exit 1
    fi
fi

# If BACKENDS not specified, set to all
BACKENDS=${BACKENDS:="onnx libtorch plan python python_dlpack openvino"}
export BACKENDS

# If ENSEMBLES not specified, set to 1
ENSEMBLES=${ENSEMBLES:="1"}
export ENSEMBLES

# Test for both batch and nobatch models
NOBATCH=${NOBATCH:="1"}
export NOBATCH
BATCH=${BATCH:="1"}
export BATCH

if [[ $BACKENDS == *"python_dlpack"* ]]; then
    if [[ "aarch64" != $(uname -m) ]] ; then
        pip3 install torch==2.3.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
    else
        pip3 install torch==2.3.1 -f https://download.pytorch.org/whl/torch_stable.html
    fi
fi

function generate_model_repository() {
    rm -fr models && mkdir models
    for BACKEND in $BACKENDS; do
      if [ "$BACKEND" == "python" ] || [ "$BACKEND" == "python_dlpack" ]; then
        # We will be using ONNX models config.pbtxt and tweak them to make them
        # appropriate for Python backend
        onnx_models=`find ${DATADIR}/qa_model_repository/ -maxdepth 1 -type d -regex '.*onnx_.*'`

        # Types that need to use SubAdd instead of AddSub
        swap_types="float32 int32 int16 int8"
        for onnx_model in $onnx_models; do
          if [ "$BACKEND" == "python_dlpack" ]; then
            python_model=`echo $onnx_model | sed 's/onnx/python_dlpack/g' | sed 's,'"$DATADIR/qa_model_repository/"',,g'`
          else
            python_model=`echo $onnx_model | sed 's/onnx/python/g' | sed 's,'"$DATADIR/qa_model_repository/"',,g'`
          fi

          mkdir -p models/$python_model/1/
          # Remove platform and use Python as the backend
          if [ "$BACKEND" == "python" ]; then
            cat $onnx_model/config.pbtxt | sed 's/platform:.*//g' | sed 's/version_policy.*/backend:\ "python"/g' | sed 's/onnx/python/g' > models/$python_model/config.pbtxt
          else
            cat $onnx_model/config.pbtxt | sed 's/platform:.*//g' | sed 's/version_policy.*/backend:\ "python"/g' | sed 's/onnx/python_dlpack/g' > models/$python_model/config.pbtxt
          fi
          cp $onnx_model/output0_labels.txt models/$python_model

          is_swap_type="0"

          # Check whether this model needs to be swapped
          for swap_type in $swap_types; do
            model_type="$swap_type"_"$swap_type"_"$swap_type"
            if [ "$BACKEND" == "python_dlpack" ]; then
              model_name=python_dlpack_$model_type
              model_name_nobatch=python_dlpack_nobatch_$model_type
              if [ $python_model == $model_name ] || [ $python_model == $model_name_nobatch ]; then
                  cp ../python_models/dlpack_sub_add/model.py models/$python_model/1/
                  is_swap_type="1"
              fi
            else
              model_name=python_$model_type
              model_name_nobatch=python_nobatch_$model_type
              if [ $python_model == $model_name ] || [ $python_model == $model_name_nobatch ]; then
                  cp ../python_models/sub_add/model.py models/$python_model/1/
                  is_swap_type="1"
              fi
            fi
          done

          # Use the AddSub model if it doesn't need to be swapped
          if [ $is_swap_type == "0" ]; then
            if [ "$BACKEND" == "python_dlpack" ]; then
                    cp ../python_models/dlpack_add_sub/model.py models/$python_model/1/
            else
                    cp ../python_models/add_sub/model.py models/$python_model/1/
            fi
          fi
        done
      elif [ "$BACKEND" == "plan" ] && [ "$TRITON_SERVER_CPU_ONLY" == "1" ]; then
        # skip plan_tensorrt models since they don't run on CPU only containers
        continue
      else
        cp -r ${DATADIR}/qa_model_repository/${BACKEND}* \
          models/.
      fi
    done

    if [ "$ENSEMBLES" == "1" ]; then

      # Copy identity backend models and ensembles
      for BACKEND in $BACKENDS; do
        if [ "$BACKEND" == "plan" ] && [ "$TRITON_SERVER_CPU_ONLY" == "1" ]; then
            # skip plan_tensorrt models since they don't run on CPU only containers
            continue
        elif [ "$BACKEND" != "python" ] && [ "$BACKEND" != "python_dlpack" ] && [ "$BACKEND" != "openvino" ]; then
            cp -r ${DATADIR}/qa_ensemble_model_repository/qa_model_repository/*${BACKEND}* \
              models/.
        fi
      done

      cp -r ${DATADIR}/qa_ensemble_model_repository/qa_model_repository/nop_* \
        models/.

      create_nop_version_dir `pwd`/models

      if [[ $BACKENDS == *"onnx"* ]]; then
        ENSEMBLE_MODELS="wrong_label_int32_float32_float32 label_override_int32_float32_float32 mix_type_int32_float32_float32"

        ENSEMBLE_MODELS="${ENSEMBLE_MODELS} batch_to_nobatch_float32_float32_float32 batch_to_nobatch_nobatch_float32_float32_float32 nobatch_to_batch_float32_float32_float32 nobatch_to_batch_nobatch_float32_float32_float32 mix_nobatch_batch_float32_float32_float32"

        if [[ $BACKENDS == *"libtorch"* ]] ; then
          ENSEMBLE_MODELS="${ENSEMBLE_MODELS} mix_platform_float32_float32_float32 mix_ensemble_int32_float32_float32"
        fi

        for EM in $ENSEMBLE_MODELS; do
          mkdir -p ../ensemble_models/$EM/1 && cp -r ../ensemble_models/$EM models/.
        done
      fi
    fi

    KIND="KIND_GPU" && [[ "$TARGET" == "cpu" ]] && KIND="KIND_CPU"
    for FW in $BACKENDS; do
      if [ "$FW" == "onnx" ] && [ "$TEST_VALGRIND" -eq 1 ]; then
        # Reduce the instance count to make loading onnx models faster
        for MC in `ls models/${FW}*/config.pbtxt`; do
            echo "instance_group [ { kind: ${KIND} count: 1 }]" >> $MC
        done
      elif [ "$FW" != "plan" ] && [ "$FW" != "python" ] && [ "$FW" != "python_dlpack" ] && [ "$FW" != "openvino" ];then
        for MC in `ls models/${FW}*/config.pbtxt`; do
            echo "instance_group [ { kind: ${KIND} }]" >> $MC
        done
      elif [ "$FW" == "python" ] || [ "$FW" == "python_dlpack" ] || [ "$FW" == "openvino" ]; then
        for MC in `ls models/${FW}*/config.pbtxt`; do
            echo "instance_group [ { kind: KIND_CPU }]" >> $MC
        done
      fi
    done

    # Modify custom_zero_1_float32 and custom_nobatch_zero_1_float32 for relevant ensembles
    # This is done after the instance group change above so that identity backend models
    # are run on CPU. Skip for Windows test.
    cp -r ../custom_models/custom_zero_1_float32 models/. &&\
        mkdir -p models/custom_zero_1_float32/1 && \
        (cd models/custom_zero_1_float32 && \
            echo "instance_group [ { kind: KIND_CPU }]" >> config.pbtxt)
    cp -r models/custom_zero_1_float32 models/custom_nobatch_zero_1_float32 && \
        (cd models/custom_zero_1_float32 && \
            sed -i "s/max_batch_size: 1/max_batch_size: 8/" config.pbtxt && \
            sed -i "s/dims: \[ 1 \]/dims: \[ -1 \]/" config.pbtxt) && \
        (cd models/custom_nobatch_zero_1_float32 && \
            sed -i "s/custom_zero_1_float32/custom_nobatch_zero_1_float32/" config.pbtxt && \
            sed -i "s/max_batch_size: 1/max_batch_size: 0/" config.pbtxt && \
            sed -i "s/dims: \[ 1 \]/dims: \[ -1, -1 \]/" config.pbtxt)

}

for TARGET in cpu gpu; do
    if [ "$TRITON_SERVER_CPU_ONLY" == "1" ]; then
        if [ "$TARGET" == "gpu" ]; then
            echo -e "Skip GPU testing on CPU-only device"
            continue
        fi
    fi

    SERVER_LOG=$SERVER_LOG_BASE.${TARGET}.log
    CLIENT_LOG=$CLIENT_LOG_BASE.${TARGET}.log

    generate_model_repository

    # Check if running a memory leak check
    if [ "$TEST_VALGRIND" -eq 1 ]; then
        LEAKCHECK_LOG=$LEAKCHECK_LOG_BASE.${TARGET}.log
        LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
        run_server_leakcheck
    else
        run_server
    fi

    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    set +e

    python3 $INFER_TEST >$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        RET=1
    else
        check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
        if [ $? -ne 0 ]; then
            cat $CLIENT_LOG
            cat $TEST_RESULT_FILE
            echo -e "\n***\n*** Test Result Verification Failed\n***"
            RET=1
        fi
    fi

    set -e

    kill_server

    set +e
    if [ "$TEST_VALGRIND" -eq 1 ]; then
        python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
        if [ $? -ne 0 ]; then
            RET=1
        fi
    fi
    set -e
done

# Run 'python', 'python_dlpack' and 'onnx' models separately in valgrind test.
# Loading python and python_dlpack models has OOM issue when running with
# valgrind, so loading only batch or nobatch models for each time.
# Loading all the onnx models at once requires more than 12 hours. Loading them
# separately to reduce the loading time.
if [ "$TEST_VALGRIND" -eq 1 ]; then
  TESTING_BACKENDS="python python_dlpack onnx"
  EXPECTED_NUM_TESTS=36
  if [[ "aarch64" != $(uname -m) ]] ; then
      pip3 install torch==2.3.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
  else
      pip3 install torch==2.3.1 -f https://download.pytorch.org/whl/torch_stable.html
  fi

  for BACKENDS in $TESTING_BACKENDS; do
    export BACKENDS
    for TARGET in cpu gpu; do
      rm -fr *models
      generate_model_repository
      mkdir nobatch_models
      mv ./models/*nobatch_* ./nobatch_models/.
      cp -fr ./models/nop_* ./nobatch_models/.
      if [[ $BACKENDS == *"onnx"* ]]; then
        # These two models are required by test_ensemble_mix_batch_nobatch test case.
        cp -fr ./models/onnx_float32_float32_float32 ./nobatch_models/.
        cp -fr ./models/custom_zero_1_float32 ./nobatch_models/.
      fi

      for BATCHING_MODE in batch nobatch; do
        if [ "$TRITON_SERVER_CPU_ONLY" == "1" ]; then
          if [ "$TARGET" == "gpu" ]; then
              echo -e "Skip GPU testing on CPU-only device"
              continue
          fi
        fi

        SERVER_LOG=$SERVER_LOG_BASE.${TARGET}.${BACKENDS}.${BATCHING_MODE}.log
        CLIENT_LOG=$CLIENT_LOG_BASE.${TARGET}.${BACKENDS}.${BATCHING_MODE}.log

        if [ "$BATCHING_MODE" == "batch" ]; then
          NOBATCH="0"
          export NOBATCH
          BATCH="1"
          export BATCH
          MODELDIR=`pwd`/models
        else
          NOBATCH="1"
          export NOBATCH
          BATCH="0"
          export BATCH
          MODELDIR=`pwd`/nobatch_models
        fi

        SERVER_ARGS="--model-repository=${MODELDIR} ${SERVER_ARGS_EXTRA}"
        LEAKCHECK_LOG=$LEAKCHECK_LOG_BASE.${TARGET}.${BACKENDS}.${BATCHING_MODE}.log
        LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
        run_server_leakcheck

        if [ "$SERVER_PID" == "0" ]; then
            echo -e "\n***\n*** Failed to start $SERVER\n***"
            cat $SERVER_LOG
            exit 1
        fi

        set +e

        VALGRIND_TESTS="1" python3 $INFER_TEST >$CLIENT_LOG 2>&1
        if [ $? -ne 0 ]; then
            cat $CLIENT_LOG
            RET=1
        else
            check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
            if [ $? -ne 0 ]; then
                cat $CLIENT_LOG
                cat $TEST_RESULT_FILE
                echo -e "\n***\n*** Test Result Verification Failed\n***"
                RET=1
            fi
        fi

        set -e

        kill_server

        set +e
        python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
        if [ $? -ne 0 ]; then
            RET=1
        fi
        set -e
      done
    done
  done
fi

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
else
  echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_infer_reshape/infer_reshape_test.py
================================================
#!/usr/bin/env python3

# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import os
import unittest

import infer_util as iu
import numpy as np
import test_util as tu

np_dtype_string = np.dtype(object)

TEST_SYSTEM_SHARED_MEMORY = bool(int(os.environ.get("TEST_SYSTEM_SHARED_MEMORY", 0)))
TEST_CUDA_SHARED_MEMORY = bool(int(os.environ.get("TEST_CUDA_SHARED_MEMORY", 0)))


class InferReshapeTest(tu.TestResultCollector):
    def _full_reshape(self, dtype, input_shapes, output_shapes=None, no_batch=True):
        # 'shapes' is list of shapes, one for each input.
        if output_shapes is None:
            output_shapes = input_shapes

        # For validation assume any shape can be used...
        if tu.validate_for_onnx_model(
            dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]
        ):
            # model that supports batching
            for bs in (1, 8):
                full_shapes = [
                    [
                        bs,
                    ]
                    + input_shape
                    for input_shape in input_shapes
                ]
                full_output_shapes = [
                    [
                        bs,
                    ]
                    + output_shape
                    for output_shape in output_shapes
                ]
                iu.infer_zero(
                    self,
                    "onnx",
                    bs,
                    dtype,
                    full_shapes,
                    full_output_shapes,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                )
            # model that does not support batching
            if no_batch:
                iu.infer_zero(
                    self,
                    "onnx_nobatch",
                    1,
                    dtype,
                    input_shapes,
                    output_shapes,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                )

        if tu.validate_for_libtorch_model(
            dtype,
            dtype,
            dtype,
            input_shapes[0],
            input_shapes[0],
            input_shapes[0],
            reshape=True,
        ):
            # skip variable size reshape on libtorch for now,
            # see "gen_qa_reshape_model.py" for detail
            if dtype != np.int32:
                # model that does not support batching
                # skip for libtorch string I/O
                if no_batch and (dtype != np_dtype_string):
                    iu.infer_zero(
                        self,
                        "libtorch_nobatch",
                        1,
                        dtype,
                        input_shapes,
                        output_shapes,
                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                    )

                # model that supports batching
                for bs in (1, 8):
                    full_shapes = [
                        [
                            bs,
                        ]
                        + input_shape
                        for input_shape in input_shapes
                    ]
                    full_output_shapes = [
                        [
                            bs,
                        ]
                        + output_shape
                        for output_shape in output_shapes
                    ]
                    iu.infer_zero(
                        self,
                        "libtorch",
                        bs,
                        dtype,
                        full_shapes,
                        full_output_shapes,
                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                    )

        for name in ["simple_reshape", "sequence_reshape", "fan_reshape"]:
            # [TODO] Skip variable size reshape on ensemble for now.
            # Need rework on how ensemble for reshape are generated
            if dtype == np.int32:
                break
            if tu.validate_for_ensemble_model(
                name,
                dtype,
                dtype,
                dtype,
                input_shapes[0],
                input_shapes[0],
                input_shapes[0],
            ):
                # model that supports batching
                for bs in (1, 8):
                    full_shapes = [
                        [
                            bs,
                        ]
                        + input_shape
                        for input_shape in input_shapes
                    ]
                    full_output_shapes = [
                        [
                            bs,
                        ]
                        + output_shape
                        for output_shape in output_shapes
                    ]
                    iu.infer_zero(
                        self,
                        name,
                        bs,
                        dtype,
                        full_shapes,
                        full_output_shapes,
                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                    )
                # model that does not support batching
                if no_batch:
                    iu.infer_zero(
                        self,
                        name + "_nobatch",
                        1,
                        dtype,
                        input_shapes,
                        output_shapes,
                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                    )

    def _trt_reshape(self, dtype, input_shapes, output_shapes=None, no_batch=True):
        # 'shapes' is list of shapes, one for each input.
        if output_shapes is None:
            output_shapes = input_shapes

        if tu.validate_for_trt_model(
            dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]
        ):
            # model that supports batching
            for bs in (1, 8):
                full_shapes = [
                    [
                        bs,
                    ]
                    + input_shape
                    for input_shape in input_shapes
                ]
                full_output_shapes = [
                    [
                        bs,
                    ]
                    + output_shape
                    for output_shape in output_shapes
                ]
                iu.infer_zero(
                    self,
                    "plan",
                    bs,
                    dtype,
                    full_shapes,
                    full_output_shapes,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                )
            # model that does not support batching
            if no_batch:
                iu.infer_zero(
                    self,
                    "plan_nobatch",
                    1,
                    dtype,
                    input_shapes,
                    output_shapes,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                )

    def test_ff1(self):
        self._full_reshape(np.float32, input_shapes=([1],), no_batch=False)

    def test_ff2(self):
        self._full_reshape(np.float32, input_shapes=([1], [8]), no_batch=False)
        self._trt_reshape(np.float32, input_shapes=([1], [8]))

    def test_ff3(self):
        self._full_reshape(np.float32, input_shapes=([4, 4], [2], [2, 2, 3]))

    def test_ff4(self):
        self._full_reshape(
            np.float32,
            input_shapes=([4, 4], [2], [2, 2, 3], [1]),
            output_shapes=([16], [1, 2], [3, 2, 2], [1]),
        )
        self._trt_reshape(
            np.float32,
            input_shapes=([4, 4], [2], [2, 2, 3], [1]),
            output_shapes=([2, 2, 4], [1, 2, 1], [3, 2, 2], [1, 1, 1]),
        )

    def test_ii1(self):
        self._full_reshape(np.int32, input_shapes=([2, 4, 5, 6],))

    def test_ii2(self):
        self._full_reshape(
            np.int32, input_shapes=([4, 1], [2]), output_shapes=([1, 4], [1, 2])
        )

    def test_ii3(self):
        self._full_reshape(np.int32, input_shapes=([1, 4, 1], [8], [2, 2, 3]))

    def test_oo1(self):
        self._full_reshape(np.object_, input_shapes=([1],), no_batch=False)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_infer_reshape/test.sh
================================================
#!/bin/bash
# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

CLIENT_LOG="./client.log"
INFER_TEST=infer_reshape_test.py
EXPECTED_NUM_TESTS="8"
TEST_RESULT_FILE='test_results.txt'
SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=`pwd`/models"
SERVER_LOG="./inference_server.log"
source ../common/util.sh

rm -f $SERVER_LOG $CLIENT_LOG
rm -fr models && mkdir models
cp -r /data/inferenceserver/${REPO_VERSION}/qa_reshape_model_repository/* models/. && \
    cp -r /data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/qa_reshape_model_repository/* \
       models/.
for i in \
        nobatch_zero_3_float32 \
        nobatch_zero_4_float32 \
        zero_1_float32 \
        zero_2_float32 \
        zero_3_float32 \
        zero_4_float32 \
        nobatch_zero_1_int32 \
        nobatch_zero_2_int32 \
        nobatch_zero_3_int32 \
        zero_1_int32 \
        zero_2_int32 \
        zero_3_int32 ; do
    cp -r models/onnx_${i} models/custom_${i}
    rm -fr models/custom_${i}/1/*
    (cd models/custom_${i} && \
                sed -i "s/^platform:.*/backend: \"identity\"/" config.pbtxt && \
                sed -i "s/^name:.*/name: \"custom_${i}\"/" config.pbtxt && \
                echo "instance_group [ { kind: KIND_CPU }]" >> config.pbtxt)
done

create_nop_version_dir `pwd`/models

RET=0

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

# python unittest seems to swallow ImportError and still return 0
# exit code. So need to explicitly check CLIENT_LOG to make sure
# we see some running tests
python $INFER_TEST >$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
fi

exit $RET


================================================
FILE: qa/L0_infer_variable/infer_variable_test.py
================================================
#!/usr/bin/env python3

# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import os
import unittest

import infer_util as iu
import numpy as np
import test_util as tu

np_dtype_string = np.dtype(object)

TEST_SYSTEM_SHARED_MEMORY = bool(int(os.environ.get("TEST_SYSTEM_SHARED_MEMORY", 0)))
TEST_CUDA_SHARED_MEMORY = bool(int(os.environ.get("TEST_CUDA_SHARED_MEMORY", 0)))


class InferVariableTest(tu.TestResultCollector):
    def _full_exact(
        self,
        input_dtype,
        output0_dtype,
        output1_dtype,
        input_shape,
        output0_shape,
        output1_shape,
        output0_raw=True,
        output1_raw=True,
        swap=False,
    ):
        def _infer_exact_helper(
            tester,
            pf,
            tensor_shape,
            batch_size,
            input_dtype,
            output0_dtype,
            output1_dtype,
            output0_raw=True,
            output1_raw=True,
            model_version=None,
            swap=False,
            outputs=("OUTPUT0", "OUTPUT1"),
            use_http=True,
            use_grpc=True,
            skip_request_id_check=False,
            use_streaming=True,
            correlation_id=0,
        ):
            for bs in (1, batch_size):
                # model that does not support batching
                if bs == 1:
                    iu.infer_exact(
                        tester,
                        pf + "_nobatch",
                        tensor_shape,
                        bs,
                        input_dtype,
                        output0_dtype,
                        output1_dtype,
                        output0_raw=output0_raw,
                        output1_raw=output1_raw,
                        model_version=model_version,
                        swap=swap,
                        outputs=outputs,
                        use_http=use_http,
                        use_grpc=use_grpc,
                        skip_request_id_check=skip_request_id_check,
                        use_streaming=use_streaming,
                        correlation_id=correlation_id,
                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                    )

                # model that supports batching. Skip for libtorch string I/O
                elif pf == "libtorch" and tu.validate_for_libtorch_model(
                    input_dtype,
                    output0_dtype,
                    output1_dtype,
                    tensor_shape,
                    tensor_shape,
                    tensor_shape,
                    bs,
                ):
                    iu.infer_exact(
                        tester,
                        pf,
                        (bs,) + tensor_shape,
                        bs,
                        input_dtype,
                        output0_dtype,
                        output1_dtype,
                        output0_raw=output0_raw,
                        output1_raw=output1_raw,
                        model_version=model_version,
                        swap=swap,
                        outputs=outputs,
                        use_http=use_http,
                        use_grpc=use_grpc,
                        skip_request_id_check=skip_request_id_check,
                        use_streaming=use_streaming,
                        correlation_id=correlation_id,
                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                    )

        all_ensemble_prefix = ["simple_", "sequence_", "fan_"]
        ensemble_prefix = [""]
        for prefix in all_ensemble_prefix:
            if tu.validate_for_ensemble_model(
                prefix,
                input_dtype,
                output0_dtype,
                output1_dtype,
                input_shape,
                input_shape,
                input_shape,
            ):
                ensemble_prefix.append(prefix)

        if tu.validate_for_trt_model(
            input_dtype,
            output0_dtype,
            output1_dtype,
            input_shape,
            output0_shape,
            output1_shape,
        ):
            for prefix in ensemble_prefix:
                if input_dtype == np.int8:
                    _infer_exact_helper(
                        self,
                        prefix + "plan",
                        input_shape + (1, 1),
                        8,
                        input_dtype,
                        output0_dtype,
                        output1_dtype,
                        output0_raw=output0_raw,
                        output1_raw=output1_raw,
                        swap=swap,
                    )
                else:
                    _infer_exact_helper(
                        self,
                        prefix + "plan",
                        input_shape,
                        8,
                        input_dtype,
                        output0_dtype,
                        output1_dtype,
                        output0_raw=output0_raw,
                        output1_raw=output1_raw,
                        swap=swap,
                    )

        if tu.validate_for_onnx_model(
            input_dtype,
            output0_dtype,
            output1_dtype,
            input_shape,
            output0_shape,
            output1_shape,
        ):
            # No basic ensemble models are created against custom models [TODO]
            _infer_exact_helper(
                self,
                "onnx",
                input_shape,
                8,
                input_dtype,
                output0_dtype,
                output1_dtype,
                output0_raw=output0_raw,
                output1_raw=output1_raw,
                swap=swap,
            )

        if tu.validate_for_libtorch_model(
            input_dtype,
            output0_dtype,
            output1_dtype,
            input_shape,
            output0_shape,
            output1_shape,
        ):
            # No basic ensemble models are created against custom models [TODO]
            _infer_exact_helper(
                self,
                "libtorch",
                input_shape,
                8,
                input_dtype,
                output0_dtype,
                output1_dtype,
                output0_raw=output0_raw,
                output1_raw=output1_raw,
                swap=swap,
            )

    def test_raw_fff(self):
        self._full_exact(np.float32, np.float32, np.float32, (16,), (16,), (16,))

    def test_raw_fii(self):
        self._full_exact(np.float32, np.int32, np.int32, (2, 8), (2, 8), (2, 8))

    def test_raw_fll(self):
        self._full_exact(np.float32, np.int64, np.int64, (8, 4), (8, 4), (8, 4))

    def test_raw_fil(self):
        self._full_exact(
            np.float32, np.int32, np.int64, (2, 8, 2), (2, 8, 2), (2, 8, 2)
        )

    def test_raw_ffi(self):
        self._full_exact(np.float32, np.float32, np.int32, (16,), (16,), (16,))

    def test_raw_iii(self):
        self._full_exact(np.int32, np.int32, np.int32, (2, 8), (2, 8), (2, 8))

    def test_faw_iif(self):
        self._full_exact(
            np.int32, np.int32, np.float32, (2, 8, 2), (2, 8, 2), (2, 8, 2)
        )

    def test_raw_ooo(self):
        self._full_exact(
            np_dtype_string, np_dtype_string, np_dtype_string, (16,), (16,), (16,)
        )

    def test_raw_oii(self):
        self._full_exact(np_dtype_string, np.int32, np.int32, (2, 8), (2, 8), (2, 8))

    def test_raw_ooi(self):
        self._full_exact(
            np_dtype_string, np_dtype_string, np.int32, (8, 4), (8, 4), (8, 4)
        )

    def test_raw_oio(self):
        self._full_exact(
            np_dtype_string, np.int32, np_dtype_string, (2, 8, 2), (2, 8, 2), (2, 8, 2)
        )

    def test_class_fff(self):
        self._full_exact(
            np.float32,
            np.float32,
            np.float32,
            (16,),
            (16,),
            (16,),
            output0_raw=False,
            output1_raw=False,
        )

    def test_class_fii(self):
        self._full_exact(
            np.float32,
            np.int32,
            np.int32,
            (2, 8),
            (2, 8),
            (2, 8),
            output0_raw=False,
            output1_raw=False,
        )

    def test_class_fll(self):
        self._full_exact(
            np.float32,
            np.int64,
            np.int64,
            (8, 4),
            (8, 4),
            (8, 4),
            output0_raw=False,
            output1_raw=False,
        )

    def test_class_fil(self):
        self._full_exact(
            np.float32,
            np.int32,
            np.int64,
            (2, 8, 2),
            (2, 8, 2),
            (2, 8, 2),
            output0_raw=False,
            output1_raw=False,
        )

    def test_class_ffi(self):
        self._full_exact(
            np.float32,
            np.float32,
            np.int32,
            (16,),
            (16,),
            (16,),
            output0_raw=False,
            output1_raw=False,
        )

    def test_class_iii(self):
        self._full_exact(
            np.int32,
            np.int32,
            np.int32,
            (2, 8),
            (2, 8),
            (2, 8),
            output0_raw=False,
            output1_raw=False,
        )

    def test_class_iif(self):
        self._full_exact(
            np.int32,
            np.int32,
            np.float32,
            (2, 8, 2),
            (2, 8, 2),
            (2, 8, 2),
            output0_raw=False,
            output1_raw=False,
        )

    def test_mix_ffi(self):
        self._full_exact(
            np.float32,
            np.float32,
            np.int32,
            (16,),
            (16,),
            (16,),
            output0_raw=True,
            output1_raw=False,
        )

    def test_mix_iii(self):
        self._full_exact(
            np.int32,
            np.int32,
            np.int32,
            (2, 8),
            (2, 8),
            (2, 8),
            output0_raw=False,
            output1_raw=True,
        )

    def test_mix_iif(self):
        self._full_exact(
            np.int32,
            np.int32,
            np.float32,
            (2, 8, 2),
            (2, 8, 2),
            (2, 8, 2),
            output0_raw=True,
            output1_raw=False,
        )


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_infer_variable/test.sh
================================================
#!/bin/bash
# Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

CLIENT_LOG_BASE="./client"
INFER_TEST=infer_variable_test.py
EXPECTED_NUM_TESTS="21"
TEST_RESULT_FILE='test_results.txt'

DATADIR=`pwd`/models

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=$DATADIR --exit-timeout-secs=120"
SERVER_LOG_BASE="./inference_server"
source ../common/util.sh

rm -f $SERVER_LOG_BASE* $CLIENT_LOG_BASE*

RET=0

for TARGET in cpu gpu; do
    SERVER_LOG=$SERVER_LOG_BASE.${TARGET}.log
    CLIENT_LOG=$CLIENT_LOG_BASE.${TARGET}.log

    rm -fr models && \
        cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository models && \
        cp -r /data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/qa_variable_model_repository/* models/.

    create_nop_version_dir `pwd`/models

    KIND="KIND_GPU" && [[ "$TARGET" == "cpu" ]] && KIND="KIND_CPU"
    # Onnx models are handled separately, see below
    for FW in onnx libtorch; do
        for MC in `ls models/${FW}*/config.pbtxt`; do
            echo "instance_group [ { kind: ${KIND} }]" >> $MC
        done
    done

    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    set +e

    python $INFER_TEST >$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Failed\n***"
        RET=1
    else
        check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
        if [ $? -ne 0 ]; then
            cat $CLIENT_LOG
            echo -e "\n***\n*** Test Result Verification Failed\n***"
            RET=1
        fi
    fi


    set -e

    kill $SERVER_PID
    wait $SERVER_PID
done

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
fi

exit $RET


================================================
FILE: qa/L0_infer_zero/infer_zero_test.py
================================================
#!/usr/bin/env python3

# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import os
import unittest

import infer_util as iu
import numpy as np
import test_util as tu

np_dtype_string = np.dtype(object)

TEST_SYSTEM_SHARED_MEMORY = bool(int(os.environ.get("TEST_SYSTEM_SHARED_MEMORY", 0)))
TEST_CUDA_SHARED_MEMORY = bool(int(os.environ.get("TEST_CUDA_SHARED_MEMORY", 0)))
BACKENDS = os.environ.get("BACKENDS", "onnx libtorch")
VALIDATION_FNS = {
    "onnx": tu.validate_for_onnx_model,
    "libtorch": tu.validate_for_libtorch_model,
}


class InferZeroTest(tu.TestResultCollector):
    def _full_zero(self, dtype, shapes):
        # 'shapes' is list of shapes, one for each input.
        for backend in BACKENDS.split(" "):
            # object models do not exist right now for PyTorch
            if backend == "libtorch" and dtype == "object":
                return

            if not VALIDATION_FNS[backend](
                dtype, dtype, dtype, shapes[0], shapes[0], shapes[0]
            ):
                return

            for bs in (1, 8):
                batch_shapes = [
                    [
                        bs,
                    ]
                    + shape
                    for shape in shapes
                ]
                iu.infer_zero(
                    self,
                    backend,
                    bs,
                    dtype,
                    batch_shapes,
                    batch_shapes,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                )

            # model that does not support batching
            iu.infer_zero(
                self,
                f"{backend}_nobatch",
                1,
                dtype,
                shapes,
                shapes,
                use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
            )

        for name in ["simple_zero", "sequence_zero", "fan_zero"]:
            if tu.validate_for_ensemble_model(
                name, dtype, dtype, dtype, shapes[0], shapes[0], shapes[0]
            ):
                # model that supports batching
                for bs in (1, 8):
                    batch_shapes = [
                        [
                            bs,
                        ]
                        + shape
                        for shape in shapes
                    ]
                    iu.infer_zero(
                        self,
                        name,
                        bs,
                        dtype,
                        batch_shapes,
                        batch_shapes,
                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                    )
                # model that does not support batching
                iu.infer_zero(
                    self,
                    name + "_nobatch",
                    1,
                    dtype,
                    shapes,
                    shapes,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                )

    def test_ff1_sanity(self):
        self._full_zero(
            np.float32,
            (
                [
                    1,
                ],
            ),
        )

    def test_ff1(self):
        self._full_zero(
            np.float32,
            (
                [
                    0,
                ],
            ),
        )

    def test_ff3_sanity(self):
        self._full_zero(
            np.float32,
            (
                [
                    1,
                ],
                [
                    2,
                ],
                [
                    1,
                ],
            ),
        )

    def test_ff3_0(self):
        self._full_zero(
            np.float32,
            (
                [
                    0,
                ],
                [
                    0,
                ],
                [
                    0,
                ],
            ),
        )

    def test_ff3_1(self):
        self._full_zero(
            np.float32,
            (
                [
                    0,
                ],
                [
                    0,
                ],
                [
                    1,
                ],
            ),
        )

    def test_ff3_2(self):
        self._full_zero(
            np.float32,
            (
                [
                    0,
                ],
                [
                    1,
                ],
                [
                    0,
                ],
            ),
        )

    def test_ff3_3(self):
        self._full_zero(
            np.float32,
            (
                [
                    1,
                ],
                [
                    0,
                ],
                [
                    0,
                ],
            ),
        )

    def test_ff3_4(self):
        self._full_zero(
            np.float32,
            (
                [
                    1,
                ],
                [
                    0,
                ],
                [
                    1,
                ],
            ),
        )

    def test_hh1_sanity(self):
        self._full_zero(np.float16, ([2, 2],))

    def test_hh1_0(self):
        self._full_zero(np.float16, ([1, 0],))

    def test_hh1_1(self):
        self._full_zero(np.float16, ([0, 1],))

    def test_hh1_2(self):
        self._full_zero(np.float16, ([0, 0],))

    def test_hh3_sanity(self):
        self._full_zero(np.float16, ([2, 2], [2, 2], [1, 1]))

    def test_hh3_0(self):
        self._full_zero(np.float16, ([0, 0], [0, 0], [0, 0]))

    def test_hh3_1(self):
        self._full_zero(np.float16, ([0, 1], [0, 1], [2, 3]))

    def test_hh3_2(self):
        self._full_zero(np.float16, ([1, 0], [1, 3], [0, 1]))

    def test_hh3_3(self):
        self._full_zero(np.float16, ([1, 1], [3, 0], [0, 0]))

    def test_hh3_4(self):
        self._full_zero(np.float16, ([1, 1], [0, 6], [2, 2]))

    def test_oo1_sanity(self):
        self._full_zero(
            np_dtype_string,
            (
                [
                    2,
                ],
            ),
        )

    def test_oo1(self):
        self._full_zero(
            np_dtype_string,
            (
                [
                    0,
                ],
            ),
        )

    def test_oo3_sanity(self):
        self._full_zero(np_dtype_string, ([2, 2], [2, 2], [1, 1]))

    def test_oo3_0(self):
        self._full_zero(np_dtype_string, ([0, 0], [0, 0], [0, 0]))

    def test_oo3_1(self):
        self._full_zero(np_dtype_string, ([0, 1], [0, 1], [2, 3]))

    def test_oo3_2(self):
        self._full_zero(np_dtype_string, ([1, 0], [1, 3], [0, 1]))

    def test_oo3_3(self):
        self._full_zero(np_dtype_string, ([1, 1], [3, 0], [0, 0]))

    def test_oo3_4(self):
        self._full_zero(np_dtype_string, ([1, 1], [0, 6], [2, 2]))

    def test_bb1_sanity(self):
        self._full_zero(
            bool,
            (
                [
                    10,
                ],
            ),
        )

    def test_bb1_0(self):
        self._full_zero(
            bool,
            (
                [
                    0,
                ],
            ),
        )


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_infer_zero/test.sh
================================================
#!/bin/bash
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

TEST_RESULT_FILE='test_results.txt'
CLIENT_LOG="./client.log"
INFER_TEST=infer_zero_test.py
EXPECTED_NUM_TESTS="28"

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=`pwd`/models"
SERVER_LOG="./inference_server.log"
source ../common/util.sh

rm -f $SERVER_LOG $CLIENT_LOG
rm -fr models && mkdir models
cp -r /data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository/* models/. && \
    cp -r /data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/qa_identity_model_repository/* models/.

# Remove version-compatible TensorRT models, as they require version-compatibility
# mode to be turned on when starting the server.
rm -rf models/plan_compatible*

create_nop_version_dir `pwd`/models

RET=0

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

# python unittest seems to swallow ImportError and still return 0
# exit code. So need to explicitly check CLIENT_LOG to make sure
# we see some running tests
python $INFER_TEST >$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
fi

exit $RET


================================================
FILE: qa/L0_input_validation/input_validation_test.py
================================================
#!/usr/bin/env python
# Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import unittest

import infer_util as iu
import numpy as np
import tritonclient.grpc as tritongrpcclient
import tritonclient.utils.shared_memory as shm
from tritonclient.utils import InferenceServerException, np_to_triton_dtype


class InputValTest(unittest.TestCase):
    def test_input_validation_required_empty(self):
        triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
        inputs = []
        with self.assertRaises(InferenceServerException) as e:
            triton_client.infer(
                model_name="input_all_required",
                inputs=inputs,
            )
        err_str = str(e.exception)
        self.assertIn(
            "expected 3 inputs but got 0 inputs for model 'input_all_required'. Got input(s) [], but missing required input(s) ['INPUT0','INPUT1','INPUT2']. Please provide all required input(s).",
            err_str,
        )

    def test_input_validation_optional_empty(self):
        triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
        inputs = []
        with self.assertRaises(InferenceServerException) as e:
            triton_client.infer(
                model_name="input_optional",
                inputs=inputs,
            )
        err_str = str(e.exception)
        self.assertIn(
            "expected number of inputs between 3 and 4 but got 0 inputs for model 'input_optional'. Got input(s) [], but missing required input(s) ['INPUT0','INPUT1','INPUT2']. Please provide all required input(s).",
            err_str,
        )

    def test_input_validation_required_missing(self):
        triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
        inputs = []
        inputs.append(tritongrpcclient.InferInput("INPUT0", [1], "FP32"))

        inputs[0].set_data_from_numpy(np.arange(1, dtype=np.float32))

        with self.assertRaises(InferenceServerException) as e:
            triton_client.infer(
                model_name="input_all_required",
                inputs=inputs,
            )
        err_str = str(e.exception)
        self.assertIn(
            "expected 3 inputs but got 1 inputs for model 'input_all_required'. Got input(s) ['INPUT0'], but missing required input(s) ['INPUT1','INPUT2']. Please provide all required input(s).",
            err_str,
        )

    def test_input_validation_optional(self):
        triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
        inputs = []
        inputs.append(tritongrpcclient.InferInput("INPUT0", [1], "FP32"))
        # Option Input is added, 2 required are missing

        inputs[0].set_data_from_numpy(np.arange(1, dtype=np.float32))

        with self.assertRaises(InferenceServerException) as e:
            triton_client.infer(
                model_name="input_optional",
                inputs=inputs,
            )
        err_str = str(e.exception)
        self.assertIn(
            "expected number of inputs between 3 and 4 but got 1 inputs for model 'input_optional'. Got input(s) ['INPUT0'], but missing required input(s) ['INPUT1','INPUT2']. Please provide all required input(s).",
            err_str,
        )

    def test_input_validation_all_optional(self):
        triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
        inputs = []
        result = triton_client.infer(
            model_name="input_all_optional",
            inputs=inputs,
        )
        response = result.get_response()
        self.assertIn(str(response.outputs[0].name), "OUTPUT0")


class InputShapeTest(unittest.TestCase):
    def test_input_shape_validation(self):
        input_size = 8
        model_name = "pt_identity"
        triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")

        # Pass
        input_data = np.arange(input_size)[None].astype(np.float32)
        inputs = [
            tritongrpcclient.InferInput(
                "INPUT0", input_data.shape, np_to_triton_dtype(input_data.dtype)
            )
        ]
        inputs[0].set_data_from_numpy(input_data)
        triton_client.infer(model_name=model_name, inputs=inputs)

        # Larger input byte size than expected
        input_data = np.arange(input_size + 2)[None].astype(np.float32)
        inputs = [
            tritongrpcclient.InferInput(
                "INPUT0", input_data.shape, np_to_triton_dtype(input_data.dtype)
            )
        ]
        inputs[0].set_data_from_numpy(input_data)
        # Compromised input shape
        inputs[0].set_shape((1, input_size))
        with self.assertRaises(InferenceServerException) as e:
            triton_client.infer(
                model_name=model_name,
                inputs=inputs,
            )
        err_str = str(e.exception)
        self.assertIn(
            "input byte size mismatch for input 'INPUT0' for model 'pt_identity'. Expected 32, got 40",
            err_str,
        )

    def test_input_string_shape_validation(self):
        input_size = 16
        model_name = "onnx_object_int32_int32"
        np_dtype_string = np.dtype(object)
        triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")

        def get_input_array(input_size, np_dtype):
            rinput_dtype = iu._range_repr_dtype(np_dtype)
            input_array = np.random.randint(
                low=0, high=127, size=(1, input_size), dtype=rinput_dtype
            )

            # Convert to string type
            inn = np.array(
                [str(x) for x in input_array.reshape(input_array.size)], dtype=object
            )
            input_array = inn.reshape(input_array.shape)

            inputs = []
            inputs.append(
                tritongrpcclient.InferInput(
                    "INPUT0", input_array.shape, np_to_triton_dtype(np_dtype)
                )
            )
            inputs.append(
                tritongrpcclient.InferInput(
                    "INPUT1", input_array.shape, np_to_triton_dtype(np_dtype)
                )
            )

            inputs[0].set_data_from_numpy(input_array)
            inputs[1].set_data_from_numpy(input_array)
            return inputs

        # Input size is less than expected
        inputs = get_input_array(input_size - 2, np_dtype_string)
        # Compromised input shape
        inputs[0].set_shape((1, input_size))
        inputs[1].set_shape((1, input_size))
        with self.assertRaises(InferenceServerException) as e:
            triton_client.infer(model_name=model_name, inputs=inputs)
        err_str = str(e.exception)
        self.assertIn(
            f"expected {input_size} string elements for inference input 'INPUT1' for model '{model_name}', got {input_size-2}",
            err_str,
        )

        # Input size is greater than expected
        inputs = get_input_array(input_size + 2, np_dtype_string)
        # Compromised input shape
        inputs[0].set_shape((1, input_size))
        inputs[1].set_shape((1, input_size))
        with self.assertRaises(InferenceServerException) as e:
            triton_client.infer(model_name=model_name, inputs=inputs)
        err_str = str(e.exception)
        self.assertIn(
            f"unexpected number of string elements {input_size+1} for inference input 'INPUT1' for model '{model_name}', expecting {input_size}",
            err_str,
        )

    def test_wrong_input_shape_tensor_size(self):
        def inference_helper(model_name, batch_size=1):
            triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
            if batch_size > 1:
                dummy_input_data = np.random.rand(batch_size, 32, 32).astype(np.float32)
            else:
                dummy_input_data = np.random.rand(32, 32).astype(np.float32)
            shape_tensor_data = np.asarray([4, 4], dtype=np.int32)

            # Pass an incorrect input byte size for the shape tensor
            # Use shared memory to bypass the shape check in client library
            input_byte_size = (shape_tensor_data.size - 1) * np.dtype(np.int32).itemsize

            # Create a shared memory region with the incorrect byte size (input_byte_size)
            input_shm_handle = shm.create_shared_memory_region(
                "INPUT0_SHM",
                "/INPUT0_SHM",
                input_byte_size,
            )

            # Write the shape tensor data into the shared memory region
            # Slice the data to match the incorrect byte size (input_byte_size)
            shm.set_shared_memory_region(
                input_shm_handle,
                [
                    shape_tensor_data[: input_byte_size // np.dtype(np.int32).itemsize],
                ],
            )
            triton_client.register_system_shared_memory(
                "INPUT0_SHM",
                "/INPUT0_SHM",
                input_byte_size,
            )

            inputs = [
                tritongrpcclient.InferInput(
                    "DUMMY_INPUT0",
                    dummy_input_data.shape,
                    np_to_triton_dtype(np.float32),
                ),
                tritongrpcclient.InferInput(
                    "INPUT0",
                    shape_tensor_data.shape,
                    np_to_triton_dtype(np.int32),
                ),
            ]
            inputs[0].set_data_from_numpy(dummy_input_data)
            inputs[1].set_shared_memory("INPUT0_SHM", input_byte_size)

            outputs = [
                tritongrpcclient.InferRequestedOutput("DUMMY_OUTPUT0"),
                tritongrpcclient.InferRequestedOutput("OUTPUT0"),
            ]

            try:
                # Perform inference
                with self.assertRaises(InferenceServerException) as e:
                    triton_client.infer(
                        model_name=model_name, inputs=inputs, outputs=outputs
                    )
                err_str = str(e.exception)
                correct_input_byte_size = (
                    shape_tensor_data.size * np.dtype(np.int32).itemsize
                )
                self.assertIn(
                    f"input byte size mismatch for input 'INPUT0' for model '{model_name}'. Expected {correct_input_byte_size}, got {input_byte_size}",
                    err_str,
                )
            finally:
                shm.destroy_shared_memory_region(input_shm_handle)
                triton_client.unregister_system_shared_memory("INPUT0_SHM")

        inference_helper(model_name="plan_nobatch_zero_1_float32_int32")
        inference_helper(model_name="plan_zero_1_float32_int32", batch_size=8)


class ModelNameValidationTest(unittest.TestCase):
    INVALID_TRAVERSAL_NAMES = [
        "../etc",
        "a/../b",
        "../../etc/passwd",
        "../../../../etc",
        "model/..",
        "..",
        "/etc/passwd",
        "model/subdir",
        "model/",
        " ..",
        ".. ",
    ]

    def test_model_name_invalid_load(self):
        client = tritongrpcclient.InferenceServerClient("localhost:8001")
        for model_name in self.INVALID_TRAVERSAL_NAMES:
            print(f"Testing model name: {model_name!r}")
            with self.assertRaises(InferenceServerException) as cm:
                client.load_model(model_name)
            self.assertIn(
                "model name must not contain path traversal characters",
                str(cm.exception),
                f"Expected traversal rejection for model name: {model_name!r}",
            )

    def test_model_name_empty_load(self):
        client = tritongrpcclient.InferenceServerClient("localhost:8001")
        with self.assertRaises(InferenceServerException) as cm:
            client.load_model("")
        self.assertIn(
            "Model name cannot be empty. Please enter a valid name to deploy.",
            str(cm.exception),
        )

    def test_model_name_whitespace_only_load(self):
        client = tritongrpcclient.InferenceServerClient("localhost:8001")
        whitespace_names = [" ", "   ", "\t", "\n", "\r", "\f", "\v", " \t \n "]
        for model_name in whitespace_names:
            with self.assertRaises(InferenceServerException) as cm:
                client.load_model(model_name)
            self.assertIn(
                "Model name cannot be empty. Please enter a valid name to deploy.",
                str(cm.exception),
                f"Expected whitespace-only rejection for model name: {model_name!r}",
            )

    def test_model_name_invalid_unload(self):
        # Unload should not trigger traversal check
        client = tritongrpcclient.InferenceServerClient("localhost:8001")
        for model_name in self.INVALID_TRAVERSAL_NAMES:
            try:
                client.unload_model(model_name)
            except InferenceServerException as e:
                self.assertNotIn(
                    "model name must not contain path traversal characters",
                    str(e),
                    f"Unload should not trigger traversal rejection for model name: {model_name!r}",
                )

    def test_model_name_valid(self):
        """Verify that a syntactically valid model name is not rejected by
        the traversal check -- it should fail with a model not found error instead."""
        VALID_MODEL_NAMES = [
            "model123",
            # "model  OAI",   TRI-769: Fix this test case
            "model.version1",
            "...",
            "..my_model",
            "model..1",
            "model....1",
        ]
        client = tritongrpcclient.InferenceServerClient("localhost:8001")
        for model_name in VALID_MODEL_NAMES:
            with self.assertRaises(InferenceServerException) as cm:
                client.load_model(model_name)
            self.assertNotIn(
                "path traversal characters",
                str(cm.exception),
                "Valid model name should not trigger path traversal rejection",
            )
            self.assertIn("failed to poll from model repository", str(cm.exception))


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_input_validation/models/input_all_optional/1/model.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def initialize(self, args):
        self.model_config = json.loads(args["model_config"])

    def execute(self, requests):
        """This function is called on inference request."""

        responses = []
        for _ in requests:
            # Include one of each specially parsed JSON value: nan, inf, and -inf
            out_0 = np.array([1], dtype=np.float32)
            out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0)
            responses.append(pb_utils.InferenceResponse([out_tensor_0]))

        return responses


================================================
FILE: qa/L0_input_validation/models/input_all_optional/config.pbtxt
================================================
# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "input_all_optional"
backend: "python"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
    optional: true
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ -1 ]
    optional: true
  },
  {
    name: "INPUT2"
    data_type: TYPE_FP32
    dims: [ -1 ]
    optional: true
  }
]

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]

instance_group [{ kind: KIND_CPU }]


================================================
FILE: qa/L0_input_validation/models/input_all_required/1/model.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def initialize(self, args):
        self.model_config = json.loads(args["model_config"])

    def execute(self, requests):
        """This function is called on inference request."""

        responses = []
        for _ in requests:
            # Include one of each specially parsed JSON value: nan, inf, and -inf
            out_0 = np.array([1], dtype=np.float32)
            out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0)
            responses.append(pb_utils.InferenceResponse([out_tensor_0]))

        return responses


================================================
FILE: qa/L0_input_validation/models/input_all_required/config.pbtxt
================================================
# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "input_all_required"
backend: "python"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ -1 ]
  },
  {
    name: "INPUT2"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]

instance_group [{ kind: KIND_CPU }]


================================================
FILE: qa/L0_input_validation/models/input_optional/1/model.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def initialize(self, args):
        self.model_config = json.loads(args["model_config"])

    def execute(self, requests):
        """This function is called on inference request."""

        responses = []
        for _ in requests:
            # Include one of each specially parsed JSON value: nan, inf, and -inf
            out_0 = np.array([1], dtype=np.float32)
            out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0)
            responses.append(pb_utils.InferenceResponse([out_tensor_0]))

        return responses


================================================
FILE: qa/L0_input_validation/models/input_optional/config.pbtxt
================================================
# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "input_optional"
backend: "python"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ -1 ]
  },
  {
    name: "INPUT2"
    data_type: TYPE_FP32
    dims: [ -1 ]
  },
  {
    name: "INPUT3"
    data_type: TYPE_FP32
    dims: [ -1 ]
    optional: true
  }
]

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]

instance_group [{ kind: KIND_CPU }]


================================================
FILE: qa/L0_input_validation/test.sh
================================================
#!/bin/bash
# Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

source ../common/util.sh

RET=0

DATADIR=/data/inferenceserver/${REPO_VERSION}
SERVER=/opt/tritonserver/bin/tritonserver
CLIENT_LOG="./input_validation_client.log"
TEST_PY=./input_validation_test.py
TEST_RESULT_FILE='./test_results.txt'
SERVER_LOG="./inference_server.log"

export CUDA_VISIBLE_DEVICES=0

rm -fr *.log

# input_validation_test
SERVER_ARGS="--model-repository=`pwd`/models"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python3 -m pytest --junitxml="input_validation.report.xml" $TEST_PY::InputValTest >> $CLIENT_LOG 2>&1

if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    cat $SERVER_LOG
    echo -e "\n***\n*** input_validation_test.py::InputValTest FAILED. \n***"
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

# input_shape_validation_test
pip install torch
pip install pytest-asyncio

mkdir -p models/pt_identity/1
PYTHON_CODE=$(cat <<END
import torch
torch.jit.save(
    torch.jit.script(torch.nn.Identity()),
    "`pwd`/models/pt_identity/1/model.pt",
)
END
)
res="$(python3 -c "$PYTHON_CODE")"

if [ $? -ne 0 ]; then
    echo -e "\n***\n*** model "pt_identity" initialization FAILED. \n***"
    echo $res
    exit 1
fi

# Create the config.pbtxt file with the specified configuration
cat > models/pt_identity/config.pbtxt << EOL
name: "pt_identity"
backend: "pytorch"
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [8]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [8]
  }
]
# ensure we batch requests together
dynamic_batching {
    max_queue_delay_microseconds: 1000000
}
EOL

cp -r $DATADIR/qa_model_repository/onnx_object_int32_int32 models/.
cp -r $DATADIR/qa_shapetensor_model_repository/plan_nobatch_zero_1_float32_int32 models/.
cp -r $DATADIR/qa_shapetensor_model_repository/plan_zero_1_float32_int32 models/.

SERVER_ARGS="--model-repository=`pwd`/models"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python3 -m pytest --junitxml="input_shape_validation.report.xml" $TEST_PY::InputShapeTest >> $CLIENT_LOG 2>&1

if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    cat $SERVER_LOG
    echo -e "\n***\n*** input_validation_test.py::InputShapeTest FAILED. \n***"
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

# input_byte_size_test
TEST_LOG="./input_byte_size_test.log"
TEST_EXEC=./input_byte_size_test
cp -r /data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository/{onnx_zero_1_float32,onnx_zero_1_object,onnx_zero_1_bool} ./models

set +e
LD_LIBRARY_PATH=/opt/tritonserver/lib:$LD_LIBRARY_PATH $TEST_EXEC >> $TEST_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $TEST_LOG
    echo -e "\n***\n*** input_byte_size_test FAILED\n***"
    RET=1
fi
set -e

# tensor_size_test
TEST_LOG="./tensor_size_test.log"
TEST_EXEC=./tensor_size_test

set +e
LD_LIBRARY_PATH=/opt/tritonserver/lib:$LD_LIBRARY_PATH $TEST_EXEC >> $TEST_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $TEST_LOG
    echo -e "\n***\n*** tensor_size_test FAILED\n***"
    RET=1
fi
set -e

# Model name validation test
rm -rf test_models ; mkdir -p test_models
SERVER_LOG="./model_name_validation_server.log"
CLIENT_LOG="./model_name_validation_client.log"
SERVER_ARGS="--model-repository=`pwd`/test_models --model-control-mode=explicit --log-verbose=1"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python3 -m pytest -s --junitxml="model_name_validation.report.xml" $TEST_PY::ModelNameValidationTest >> $CLIENT_LOG 2>&1

if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    cat $SERVER_LOG
    echo -e "\n***\n*** input_validation_test.py::ModelNameValidationTest FAILED. \n***"
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Input Validation Test Passed\n***"
else
    echo -e "\n***\n*** Input Validation Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_io/gen_libtorch_model.py
================================================
#!/usr/bin/python
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import torch
import torch.nn as nn


class SumModule(nn.Module):
    def __init__(self, device):
        super(SumModule, self).__init__()
        self.device = device

    def forward(self, INPUT0, INPUT1):
        INPUT0 = INPUT0.to(self.device)
        INPUT1 = INPUT1.to(self.device)
        print(
            "SumModule - INPUT0 device: {}, INPUT1 device: {}\n".format(
                INPUT0.device, INPUT1.device
            )
        )
        return INPUT0 + INPUT1


class DiffModule(nn.Module):
    def __init__(self, device):
        super(DiffModule, self).__init__()
        self.device = device

    def forward(self, INPUT0, INPUT1):
        INPUT0 = INPUT0.to(self.device)
        INPUT1 = INPUT1.to(self.device)
        print(
            "DiffModule - INPUT0 device: {}, INPUT1 device: {}\n".format(
                INPUT0.device, INPUT1.device
            )
        )
        return INPUT0 - INPUT1


class TestModel(nn.Module):
    def __init__(self, device0, device1):
        super(TestModel, self).__init__()
        self.device0 = device0
        self.device1 = device1

        self.layer1 = SumModule(self.device0)
        self.layer2 = DiffModule(self.device1)

    def forward(self, INPUT0, INPUT1):
        op0 = self.layer1(INPUT0, INPUT1)
        op1 = self.layer2(INPUT0, INPUT1)
        return op0, op1


if torch.cuda.device_count() < 2:
    print("Need at least 2 GPUs to run this test")
    exit(1)

devices = [("cuda:1", "cuda:0"), ("cpu", "cuda:1")]
model_names = ["libtorch_multi_gpu", "libtorch_multi_device"]

for device_pair, model_name in zip(devices, model_names):
    model = TestModel(device_pair[0], device_pair[1])
    model_path = "models/" + model_name + "/1/model.pt"
    scripted_model = torch.jit.script(model)
    scripted_model.save(model_path)


================================================
FILE: qa/L0_io/test.sh
================================================
#!/bin/bash
# Copyright 2019-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

# This test requires at least 2 GPUs to test h2d and d2d transfer combinations
export CUDA_VISIBLE_DEVICES=0,1

IO_TEST_UTIL=./memory_alloc
CLIENT_LOG="./client.log"
MODELSDIR=`pwd`/models

DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository
ENSEMBLEDIR=/data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/qa_model_repository

# Must explicitly set LD_LIBRARY_PATH so that IO_TEST_UTIL can find
# libtritonserver.so.
LD_LIBRARY_PATH=/opt/tritonserver/lib:$LD_LIBRARY_PATH

rm -f $CLIENT_LOG*

# PyTorch is required for the Python backend dlpack add sub models
pip3 install torch -f https://download.pytorch.org/whl/cu130
RET=0

# Prepare float32 models with basic config
rm -rf $MODELSDIR

for trial in onnx libtorch plan python python_dlpack; do
    full=${trial}_float32_float32_float32
    if [ "$trial" == "python" ]; then
        mkdir -p $MODELSDIR/${full}/1 && \
            cp ../python_models/add_sub/model.py $MODELSDIR/${full}/1/. && \
            cp ../python_models/add_sub/config.pbtxt $MODELSDIR/${full}/. && \
            (cd $MODELSDIR/${full} && \
                    sed -i "s/label_filename:.*//" config.pbtxt && \
                    echo "max_batch_size: 64" >> config.pbtxt)

        # ensemble version of the model.
        mkdir -p $MODELSDIR/fan_${full}/1 && \
            cp ../python_models/add_sub/model.py $MODELSDIR/fan_${full}/1/. && \
            cp ../python_models/fan_add_sub/config.pbtxt $MODELSDIR/fan_${full}/. && \
            (cd $MODELSDIR/fan_${full} && \
                    sed -i "s/label_filename:.*//" config.pbtxt && \
                    sed -i "s/model_name: \"ENSEMBLE_MODEL_NAME\"/model_name: \"${full}\"/" config.pbtxt && \
                    sed -i "0,/name:.*/{s/name:.*/name: \"fan_${full}\"/}" config.pbtxt && \
                    echo "max_batch_size: 64" >> config.pbtxt)
        continue
    fi

    if [ "$trial" == "python_dlpack" ]; then
        mkdir -p $MODELSDIR/${full}/1 && \
            cp ../python_models/dlpack_add_sub/model.py $MODELSDIR/${full}/1/. && \
            cp ../python_models/dlpack_add_sub/config.pbtxt $MODELSDIR/${full}/. && \
            (cd $MODELSDIR/${full} && \
                    sed -i "s/label_filename:.*//" config.pbtxt && \
                    sed -i "0,/name:.*/{s/name:.*/name: \"${full}\"/}" config.pbtxt && \
                    echo "max_batch_size: 64" >> config.pbtxt)

        # ensemble version of the model.
        mkdir -p $MODELSDIR/fan_${full}/1 && \
            cp ../python_models/dlpack_add_sub/model.py $MODELSDIR/fan_${full}/1/. && \
            cp ../python_models/fan_add_sub/config.pbtxt $MODELSDIR/fan_${full}/. && \
            (cd $MODELSDIR/fan_${full} && \
                    sed -i "s/label_filename:.*//" config.pbtxt && \
                    sed -i "s/model_name: \"ENSEMBLE_MODEL_NAME\"/model_name: \"${full}\"/" config.pbtxt && \
                    sed -i "0,/name:.*/{s/name:.*/name: \"fan_${full}\"/}" config.pbtxt && \
                    echo "max_batch_size: 64" >> config.pbtxt)
        continue
    fi

    mkdir -p $MODELSDIR/${full}/1 && \
        cp -r $DATADIR/${full}/1/* $MODELSDIR/${full}/1/. && \
        cp $DATADIR/${full}/config.pbtxt $MODELSDIR/${full}/. && \
        (cd $MODELSDIR/${full} && \
                sed -i "s/label_filename:.*//" config.pbtxt && \
                echo "instance_group [{ kind: KIND_CPU }]" >> config.pbtxt)

    # ensemble version of the model.
    mkdir -p $MODELSDIR/fan_${full}/1 && \
    cp $ENSEMBLEDIR/fan_${full}/config.pbtxt $MODELSDIR/fan_${full}/. && \
        (cd $MODELSDIR/fan_${full} && \
                sed -i "s/label_filename:.*//" config.pbtxt)

    if [ "$trial" == "libtorch" ]; then
        (cd $MODELSDIR/fan_${full} && \
                sed -i -e '{
                    N
                    s/key: "OUTPUT\([0-9]\)"\n\(.*\)value: "same_output/key: "OUTPUT__\1"\n\2value: "same_output/
                }' config.pbtxt)
    fi
done

# Prepare string models with basic config
for trial in onnx ; do
    full=${trial}_object_object_object
    mkdir -p $MODELSDIR/${full}/1 && \
        cp -r $DATADIR/${full}/1/* $MODELSDIR/${full}/1/. && \
        cp $DATADIR/${full}/config.pbtxt $MODELSDIR/${full}/. && \
                (cd $MODELSDIR/${full} && \
                sed -i "s/label_filename:.*//" config.pbtxt && \
                echo "instance_group [{ kind: KIND_CPU }]" >> config.pbtxt)
done

# set up "addsub" ensemble for custom float32 model
cp -r $MODELSDIR/fan_onnx_float32_float32_float32 $MODELSDIR/fan_${full} && \
    (cd $MODELSDIR/fan_${full} && \
            sed -i "s/onnx_float32_float32_float32/${full}/" config.pbtxt)

# custom float32 component of ensemble
cp -r $ENSEMBLEDIR/nop_TYPE_FP32_-1 $MODELSDIR/. && \
    mkdir -p $MODELSDIR/nop_TYPE_FP32_-1/1

# prepare libtorch multi-device and multi-gpu models
cp -r ../L0_libtorch_instance_group_kind_model/models/libtorch_multi_device $MODELSDIR/.
mkdir -p $MODELSDIR/libtorch_multi_device/1
mkdir -p $MODELSDIR/libtorch_multi_gpu/1
cp $MODELSDIR/libtorch_multi_device/config.pbtxt $MODELSDIR/libtorch_multi_gpu/.
(cd $MODELSDIR/libtorch_multi_gpu && \
    sed -i "s/name: \"libtorch_multi_device\"/name: \"libtorch_multi_gpu\"/" config.pbtxt)

set +e
python3 gen_libtorch_model.py >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Error when generating libtorch models. \n***"
    cat $CLIENT_LOG
    exit 1
fi
set -e

TRIALS="onnx libtorch plan python python_dlpack libtorch_multi_gpu libtorch_multi_device"
for input_device in -1 0 1; do
    for output_device in -1 0 1; do
        for trial in ${TRIALS}; do
            # TensorRT Plan should only be deployed on GPU device
            model_devices="-1 0 1" && [[ "$trial" == "plan" ]] && model_devices="0 1"
            full=${trial}_float32_float32_float32 && [[ "$trial" == "libtorch_multi"* ]] && full=${trial}

            for model_device in $model_devices; do
                full_log=$CLIENT_LOG.$full.$input_device.$output_device.$model_device

                host_policy=cpu
                if [ "$model_device" == "-1" ]; then
                    if [[ "$trial" != "libtorch_multi"* ]]; then
                        (cd $MODELSDIR/${full} && \
                            sed -i "s/instance_group.*/instance_group [{ kind: KIND_CPU }]/" config.pbtxt)
                    fi
                else
                    host_policy=gpu_${model_device}
                    if [[ "$trial" != "libtorch_multi"* ]]; then
                        (cd $MODELSDIR/${full} && \
                            sed -i "s/instance_group.*/instance_group [{ kind: KIND_GPU, gpus: [${model_device}] }]/" config.pbtxt)
                    fi
                fi

                set +e
                $IO_TEST_UTIL -i $input_device -o $output_device -r $MODELSDIR -m $full >>$full_log 2>&1
                if [ $? -ne 0 ]; then
                    cat $full_log
                    echo -e "\n***\n*** Test Failed\n***"
                    RET=1
                fi
                set -e

                # Test with host policy
                set +e
                $IO_TEST_UTIL -i $input_device -o $output_device -h $host_policy -r $MODELSDIR -m $full >>$full_log 2>&1
                # FIXME currently only apply the new changes to ORT backend, should apply to others
                if [[ "$trial" == "onnx" ]]; then
                  if [ $? -ne 0 ]; then
                      cat $full_log
                      echo -e "\n***\n*** Test Failed. Expect passing \n***"
                      RET=1
                  fi
                else
                  if [ $? -eq 0 ]; then
                      cat $full_log
                      echo -e "\n***\n*** Test Failed. Expect failure \n***"
                      RET=1
                  fi
                fi
                set -e

                # ensemble
                if [[ "$trial" != "libtorch_multi"* ]]; then
                    set +e
                    $IO_TEST_UTIL -i $input_device -o $output_device -r $MODELSDIR -m fan_$full >>$full_log.ensemble 2>&1
                    if [ $? -ne 0 ]; then
                        cat $full_log.ensemble
                        echo -e "\n***\n*** Test Failed\n***"
                        RET=1
                    fi
                    set -e
                fi
            done
        done

        for trial in onnx; do
            model_devices="-1 0 1"
            for model_device in $model_devices; do
                full=${trial}_object_object_object
                full_log=$CLIENT_LOG.$full.$input_device.$output_device.$model_device

                if [ "$model_device" == "-1" ]; then
                    (cd $MODELSDIR/${full} && \
                        sed -i "s/instance_group.*/instance_group [{ kind: KIND_CPU }]/" config.pbtxt)
                else
                    (cd $MODELSDIR/${full} && \
                        sed -i "s/instance_group.*/instance_group [{ kind: KIND_GPU, gpus: [${model_device}] }]/" config.pbtxt)
                fi

                set +e
                $IO_TEST_UTIL -i $input_device -o $output_device -r $MODELSDIR -m $full >>$full_log 2>&1
                if [ $? -ne 0 ]; then
                    cat $full_log
                    echo -e "\n***\n*** Test Failed\n***"
                    RET=1
                fi
                set -e
            done
        done
    done
done

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_iterative_sequence/iterative_sequence_e2e.py
================================================
#!/usr/bin/env python
# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import json

# GRPC streaming helpers..
import queue
import unittest
from functools import partial

import numpy as np
import requests
import sseclient
import test_util as tu
import tritonclient.grpc as grpcclient
from tritonclient.utils import InferenceServerException

MODEL_CONFIG_BASE = """
{{
"backend": "iterative_sequence",
"max_batch_size": 1,
"input" : [
  {{
    "name": "INPUT",
    "data_type": "TYPE_INT32",
    "dims": [ 1 ]
  }}
],
"output" : [
  {{
    "name": "OUTPUT",
    "data_type": "TYPE_INT32",
    "dims": [ 1 ]
  }}
],
"model_transaction_policy" : {{
  "decoupled": true
}},
{},
"instance_group" : [{{ "kind": "KIND_CPU" }}]
}}
"""


class UserData:
    def __init__(self):
        self._completed_requests = queue.Queue()


def callback(user_data, result, error):
    if error:
        user_data._completed_requests.put(error)
    else:
        user_data._completed_requests.put(result)


class IterativeSequenceTest(tu.TestResultCollector):
    def setUp(self):
        # Always make sure the original config is used
        with grpcclient.InferenceServerClient("localhost:8001") as triton_client:
            triton_client.load_model("iterative_sequence")

    def test_generate_stream(self):
        headers = {"Accept": "text/event-stream"}
        url = "http://localhost:8000/v2/models/iterative_sequence/generate_stream"
        inputs = {"INPUT": 2}
        res = requests.post(url, data=json.dumps(inputs), headers=headers)
        res.raise_for_status()
        client = sseclient.SSEClient(res)
        res_count = 2
        for event in client.events():
            res_count -= 1
            data = json.loads(event.data)
            self.assertIn("OUTPUT", data)
            self.assertEqual(res_count, data["OUTPUT"])
        self.assertEqual(0, res_count)

    def test_grpc_stream(
        self, sequence_id=0, sequence_start=False, num_requests=1, validation=True
    ):
        user_data = UserData()
        with grpcclient.InferenceServerClient("localhost:8001") as triton_client:
            triton_client.start_stream(callback=partial(callback, user_data))
            inputs = []
            inputs.append(grpcclient.InferInput("INPUT", [1, 1], "INT32"))
            inputs[0].set_data_from_numpy(np.array([[2]], dtype=np.int32))

            for _ in range(num_requests):
                triton_client.async_stream_infer(
                    model_name="iterative_sequence",
                    inputs=inputs,
                    sequence_id=sequence_id,
                    sequence_start=sequence_start,
                )
            res_count = 2 * num_requests
            while res_count > 0:
                data_item = user_data._completed_requests.get()
                res_count -= 1
                if type(data_item) == InferenceServerException:
                    raise data_item
                else:
                    if validation:
                        self.assertEqual(
                            res_count % 2, data_item.as_numpy("OUTPUT")[0][0]
                        )
            self.assertEqual(0, res_count)

    def test_backlog_fill(self):
        config = r'"sequence_batching" : { "iterative_sequence" : true, "max_sequence_idle_microseconds": 8000000, direct: { "max_queue_delay_microseconds" : 10000000 }}'
        with grpcclient.InferenceServerClient("localhost:8001") as triton_client:
            triton_client.load_model(
                "iterative_sequence", config=MODEL_CONFIG_BASE.format(config)
            )
        self.test_grpc_stream(num_requests=4, validation=False)

    def test_reschedule_error(self):
        # Use short idle timeout (< backend reschedule delay: 0.5s) so that
        # the backend won't be able to reschedule the request as the scheduler
        # will terminate the sequence early
        config = r'"sequence_batching" : { "iterative_sequence" : true, "max_sequence_idle_microseconds" : 200000 }'
        with grpcclient.InferenceServerClient("localhost:8001") as triton_client:
            triton_client.load_model(
                "iterative_sequence", config=MODEL_CONFIG_BASE.format(config)
            )
        with self.assertRaises(InferenceServerException) as context:
            # Without specifying 'iterative_sequence : true', the sequence
            # batcher expects sequence parameters to be provided explicitly
            self.test_grpc_stream()
        print(str(context.exception))
        self.assertTrue(
            "must specify the START flag on the first request of the sequence"
            in str(context.exception)
        )

    def test_unsupported_sequence_scheduler(self):
        # Override model config with scheduler settings that do not support
        # request rescheduling.
        configs = [
            r'"sequence_batching" : { "direct" : {}, "iterative_sequence" : false }',
            r'"sequence_batching" : { "oldest" : {}, "iterative_sequence" : false }',
        ]
        sid = 1
        for sc in configs:
            with grpcclient.InferenceServerClient("localhost:8001") as triton_client:
                triton_client.load_model(
                    "iterative_sequence", config=MODEL_CONFIG_BASE.format(sc)
                )
            with self.assertRaises(InferenceServerException) as context:
                # Without specifying 'iterative_sequence : true', the sequence
                # batcher expects sequence parameters to be provided explicitly
                self.test_grpc_stream(sequence_id=sid, sequence_start=True)
            sid += 1
            self.assertTrue(
                "Request is released with TRITONSERVER_REQUEST_RELEASE_RESCHEDULE"
                in str(context.exception)
            )

    def test_unsupported_dynamic_scheduler(self):
        # Override model config with scheduler settings that do not support
        # request rescheduling.
        configs = [
            r'"dynamic_batching" : {}',
        ]
        for sc in configs:
            with grpcclient.InferenceServerClient("localhost:8001") as triton_client:
                triton_client.load_model(
                    "iterative_sequence", config=MODEL_CONFIG_BASE.format(sc)
                )
            with self.assertRaises(InferenceServerException) as context:
                self.test_grpc_stream()
            self.assertTrue(
                "Request is released with TRITONSERVER_REQUEST_RELEASE_RESCHEDULE"
                in str(context.exception)
            )


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_iterative_sequence/models/iterative_sequence/config.pbtxt
================================================
# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
backend: "iterative_sequence"
max_batch_size: 1
input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
model_transaction_policy {
  decoupled: True
}
sequence_batching {
  iterative_sequence : true
}
instance_group [{ kind: KIND_CPU }]


================================================
FILE: qa/L0_iterative_sequence/test.sh
================================================
#!/bin/bash
# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

source ../common/util.sh

RET=0

CLIENT_LOG="./iterative_sequence_client.log"
TEST_PY=./iterative_sequence_e2e.py
EXPECTED_NUM_TESTS="6"
TEST_RESULT_FILE='test_results.txt'


export CUDA_VISIBLE_DEVICES=0

rm -fr *.log

pip install sseclient-py

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=EXPLICIT"
SERVER_LOG="./inference_server.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python $TEST_PY >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    RET=1
else
    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    cat $CLIENT_LOG
    cat $SERVER_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_java_memory_growth/MemoryGrowthTest.java
================================================
// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import static org.bytedeco.tritonserver.global.tritonserver.*;

import com.google.gson.*;
import java.io.*;
import java.util.*;
import java.util.concurrent.*;
import org.bytedeco.javacpp.*;
import org.bytedeco.tritonserver.tritonserver.*;

public class MemoryGrowthTest {
  static final double TRITON_MIN_COMPUTE_CAPABILITY = 7.5;
  private static boolean done = false;
  static float max_growth_allowed = .10f;
  static int max_mem_allowed = 30;

  static void FAIL(String MSG)
  {
    System.err.println("failure: " + MSG);
    System.exit(1);
  }

  static void FAIL_IF_ERR(TRITONSERVER_Error err__, String MSG)
  {
    if (err__ != null) {
      System.err.println(
          "error: " + MSG + ":" + TRITONSERVER_ErrorCodeString(err__) + " - "
          + TRITONSERVER_ErrorMessage(err__));
      TRITONSERVER_ErrorDelete(err__);
      System.exit(1);
    }
  }

  static boolean enforce_memory_type = false;
  static int requested_memory_type;
  // Parameters for percentile range to include (exclude outliers)
  static final int max_percentile = 90;
  static final int min_percentile = 10;

  static class TRITONSERVER_ServerDeleter extends TRITONSERVER_Server {
    public TRITONSERVER_ServerDeleter(TRITONSERVER_Server p)
    {
      super(p);
      deallocator(new DeleteDeallocator(this));
    }
    protected static class DeleteDeallocator
        extends TRITONSERVER_Server implements Deallocator {
      DeleteDeallocator(Pointer p) { super(p); }
      @Override public void deallocate() { TRITONSERVER_ServerDelete(this); }
    }
  }

  static void Usage(String msg)
  {
    if (msg != null) {
      System.err.println(msg);
    }

    System.err.println(
        "Usage: java " + MemoryGrowthTest.class.getSimpleName() + " [options]");
    System.err.println("\t-i Set number of iterations");
    System.err.println(
        "\t-m <\"system\"|\"pinned\"|gpu>"
        + " Enforce the memory type for input and output tensors."
        + " If not specified, inputs will be in system memory and outputs"
        + " will be based on the model's preferred type.");
    System.err.println("\t-v Enable verbose logging");
    System.err.println("\t-r [model repository absolute path]");
    System.err.println(
        "\t--max-growth Specify maximum allowed memory growth (%)");
    System.err.println("\t--max-memory Specify maximum allowed memory (MB)");

    System.exit(1);
  }

  static class ResponseAlloc extends TRITONSERVER_ResponseAllocatorAllocFn_t {
    @Override
    public TRITONSERVER_Error call(
        TRITONSERVER_ResponseAllocator allocator, String tensor_name,
        long byte_size, int preferred_memory_type,
        long preferred_memory_type_id, Pointer userp, PointerPointer buffer,
        PointerPointer buffer_userp, IntPointer actual_memory_type,
        LongPointer actual_memory_type_id)
    {
      // Initially attempt to make the actual memory type and id that we
      // allocate be the same as preferred memory type
      actual_memory_type.put(0, preferred_memory_type);
      actual_memory_type_id.put(0, preferred_memory_type_id);

      // If 'byte_size' is zero just return 'buffer' == nullptr, we don't
      // need to do any other book-keeping.
      if (byte_size == 0) {
        buffer.put(0, null);
        buffer_userp.put(0, null);
      } else {
        Pointer allocated_ptr = new Pointer();
        if (enforce_memory_type) {
          actual_memory_type.put(0, requested_memory_type);
        }

        actual_memory_type.put(0, TRITONSERVER_MEMORY_CPU);
        allocated_ptr = Pointer.malloc(byte_size);

        // Pass the tensor name with buffer_userp so we can show it when
        // releasing the buffer.
        if (!allocated_ptr.isNull()) {
          buffer.put(0, allocated_ptr);
          buffer_userp.put(0, Loader.newGlobalRef(tensor_name));
        }
      }

      return null; // Success
    }
  }

  static class ResponseRelease
      extends TRITONSERVER_ResponseAllocatorReleaseFn_t {
    @Override
    public TRITONSERVER_Error call(
        TRITONSERVER_ResponseAllocator allocator, Pointer buffer,
        Pointer buffer_userp, long byte_size, int memory_type,
        long memory_type_id)
    {
      String name = null;
      if (buffer_userp != null) {
        name = (String) Loader.accessGlobalRef(buffer_userp);
      } else {
        name = "<unknown>";
      }
      Pointer.free(buffer);
      Loader.deleteGlobalRef(buffer_userp);

      return null; // Success
    }
  }

  static class InferRequestComplete
      extends TRITONSERVER_InferenceRequestReleaseFn_t {
    @Override
    public void call(
        TRITONSERVER_InferenceRequest request, int flags, Pointer userp)
    {
      // We reuse the request so we don't delete it here.
    }
  }

  static class InferResponseComplete
      extends TRITONSERVER_InferenceResponseCompleteFn_t {
    @Override
    public void call(
        TRITONSERVER_InferenceResponse response, int flags, Pointer userp)
    {
      if (response != null) {
        // Send 'response' to the future.
        futures.get(userp).complete(response);
      }
    }
  }

  static ConcurrentHashMap<
      Pointer, CompletableFuture<TRITONSERVER_InferenceResponse>> futures =
      new ConcurrentHashMap<>();
  static ResponseAlloc responseAlloc = new ResponseAlloc();
  static ResponseRelease responseRelease = new ResponseRelease();
  static InferRequestComplete inferRequestComplete = new InferRequestComplete();
  static InferResponseComplete inferResponseComplete =
      new InferResponseComplete();

  static TRITONSERVER_Error ParseModelMetadata(
      JsonObject model_metadata, boolean[] is_int, boolean[] is_torch_model)
  {
    String seen_data_type = null;
    for (JsonElement input_element :
         model_metadata.get("inputs").getAsJsonArray()) {
      JsonObject input = input_element.getAsJsonObject();
      if (!input.get("datatype").getAsString().equals("INT32")
          && !input.get("datatype").getAsString().equals("FP32")) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_UNSUPPORTED,
            "simple lib example only supports model with data type INT32 or "
                + "FP32");
      }
      if (seen_data_type == null) {
        seen_data_type = input.get("datatype").getAsString();
      } else if (!seen_data_type.equals(input.get("datatype").getAsString())) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            "the inputs and outputs of 'simple' model must have the data type");
      }
    }
    for (JsonElement output_element :
         model_metadata.get("outputs").getAsJsonArray()) {
      JsonObject output = output_element.getAsJsonObject();
      if (!output.get("datatype").getAsString().equals("INT32")
          && !output.get("datatype").getAsString().equals("FP32")) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_UNSUPPORTED,
            "simple lib example only supports model with data type INT32 or "
                + "FP32");
      } else if (!seen_data_type.equals(output.get("datatype").getAsString())) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            "the inputs and outputs of 'simple' model must have the data type");
      }
    }

    is_int[0] = seen_data_type.equals("INT32");
    is_torch_model[0] =
        model_metadata.get("platform").getAsString().equals("pytorch_libtorch");
    return null;
  }

  static void GenerateInputData(
      IntPointer[] input0_data, IntPointer[] input1_data)
  {
    input0_data[0] = new IntPointer(16);
    input1_data[0] = new IntPointer(16);
    for (int i = 0; i < 16; ++i) {
      input0_data[0].put(i, i);
      input1_data[0].put(i, 1);
    }
  }

  static void GenerateInputData(
      FloatPointer[] input0_data, FloatPointer[] input1_data)
  {
    input0_data[0] = new FloatPointer(16);
    input1_data[0] = new FloatPointer(16);
    for (int i = 0; i < 16; ++i) {
      input0_data[0].put(i, i);
      input1_data[0].put(i, 1);
    }
  }

  static void CompareResult(
      String output0_name, String output1_name, IntPointer input0,
      IntPointer input1, IntPointer output0, IntPointer output1)
  {
    for (int i = 0; i < 16; ++i) {
      if ((input0.get(i) + input1.get(i)) != output0.get(i)) {
        FAIL("incorrect sum in " + output0_name);
      }
      if ((input0.get(i) - input1.get(i)) != output1.get(i)) {
        FAIL("incorrect difference in " + output1_name);
      }
    }
  }

  static void CompareResult(
      String output0_name, String output1_name, FloatPointer input0,
      FloatPointer input1, FloatPointer output0, FloatPointer output1)
  {
    for (int i = 0; i < 16; ++i) {
      if ((input0.get(i) + input1.get(i)) != output0.get(i)) {
        FAIL("incorrect sum in " + output0_name);
      }
      if ((input0.get(i) - input1.get(i)) != output1.get(i)) {
        FAIL("incorrect difference in " + output1_name);
      }
    }
  }

  static void Check(
      TRITONSERVER_InferenceResponse response, Pointer input0_data,
      Pointer input1_data, String output0, String output1,
      long expected_byte_size, int expected_datatype, boolean is_int)
  {
    HashMap<String, Pointer> output_data = new HashMap<>();

    int[] output_count = {0};
    FAIL_IF_ERR(
        TRITONSERVER_InferenceResponseOutputCount(response, output_count),
        "getting number of response outputs");
    if (output_count[0] != 2) {
      FAIL("expecting 2 response outputs, got " + output_count[0]);
    }

    for (int idx = 0; idx < output_count[0]; ++idx) {
      BytePointer cname = new BytePointer((Pointer) null);
      IntPointer datatype = new IntPointer(1);
      LongPointer shape = new LongPointer((Pointer) null);
      LongPointer dim_count = new LongPointer(1);
      Pointer base = new Pointer();
      SizeTPointer byte_size = new SizeTPointer(1);
      IntPointer memory_type = new IntPointer(1);
      LongPointer memory_type_id = new LongPointer(1);
      Pointer userp = new Pointer();

      FAIL_IF_ERR(
          TRITONSERVER_InferenceResponseOutput(
              response, idx, cname, datatype, shape, dim_count, base, byte_size,
              memory_type, memory_type_id, userp),
          "getting output info");

      if (cname.isNull()) {
        FAIL("unable to get output name");
      }

      String name = cname.getString();
      if ((!name.equals(output0)) && (!name.equals(output1))) {
        FAIL("unexpected output '" + name + "'");
      }

      if ((dim_count.get() != 2) || (shape.get(0) != 1)
          || (shape.get(1) != 16)) {
        FAIL("unexpected shape for '" + name + "'");
      }

      if (datatype.get() != expected_datatype) {
        FAIL(
            "unexpected datatype '"
            + TRITONSERVER_DataTypeString(datatype.get()) + "' for '" + name
            + "'");
      }

      if (byte_size.get() != expected_byte_size) {
        FAIL(
            "unexpected byte-size, expected " + expected_byte_size + ", got "
            + byte_size.get() + " for " + name);
      }

      if (enforce_memory_type && (memory_type.get() != requested_memory_type)) {
        FAIL(
            "unexpected memory type, expected to be allocated in "
            + TRITONSERVER_MemoryTypeString(requested_memory_type) + ", got "
            + TRITONSERVER_MemoryTypeString(memory_type.get()) + ", id "
            + memory_type_id.get() + " for " + name);
      }

      // We make a copy of the data here... which we could avoid for
      // performance reasons but ok for this simple example.
      BytePointer odata = new BytePointer(byte_size.get());
      output_data.put(name, odata);
      odata.put(base.limit(byte_size.get()));
    }

    if (is_int) {
      CompareResult(
          output0, output1, new IntPointer(input0_data),
          new IntPointer(input1_data), new IntPointer(output_data.get(output0)),
          new IntPointer(output_data.get(output1)));
    } else {
      CompareResult(
          output0, output1, new FloatPointer(input0_data),
          new FloatPointer(input1_data),
          new FloatPointer(output_data.get(output0)),
          new FloatPointer(output_data.get(output1)));
    }
  }

  /**
  Returns whether the memory growth is within the acceptable range
  @param  max_float_allowed     Maximum allowed memory growth (%)
  @param  max_mem_allowed       Maximum allowed memory (MB)
   */
  static boolean ValidateMemoryGrowth(
      float max_growth_allowed, int max_mem_allowed)
  {
    // Allocate list starting capacity to hold up to 24 hours worth of
    // snapshots.
    List<Double> memory_snapshots = new ArrayList<Double>(20000);
    while (!done) {
      try {
        Thread.sleep(5000);
      }
      catch (InterruptedException e) {
        System.out.println("Memory growth validation interrupted.");
      }
      System.gc();
      double snapshot = Runtime.getRuntime().totalMemory()
          - Runtime.getRuntime().freeMemory();
      memory_snapshots.add(snapshot);
      System.out.println("Memory allocated (MB):" + snapshot / 1E6);
    }
    if (memory_snapshots.size() < 5) {
      System.out.println(
          "Error: Not enough snapshots, found " + memory_snapshots.size()
          + " snapshots");
      return false;
    }

    // Measure memory growth without outliers by taking difference
    // between 90th percentile and 10th percentile memory usage.
    final double bytes_in_mb = 1E6;
    Collections.sort(memory_snapshots);
    int index_max =
        ((int) Math.ceil(max_percentile / 100.0 * memory_snapshots.size())) - 1;
    int index_min =
        ((int) Math.ceil(min_percentile / 100.0 * memory_snapshots.size())) - 1;
    double memory_allocation_delta =
        memory_snapshots.get(index_max) - memory_snapshots.get(index_min);
    double memory_allocation_delta_mb = memory_allocation_delta / bytes_in_mb;
    double memory_allocation_delta_percent =
        memory_allocation_delta / memory_snapshots.get(index_max);

    System.out.println(
        "Change in memory allocation (MB): " + memory_allocation_delta_mb + ", "
        + (memory_allocation_delta_percent * 100) + "%");

    boolean passed = true;

    if (memory_allocation_delta_percent >= max_growth_allowed) {
      passed = false;
      System.out.println(
          "Exceeded allowed memory growth (" + (max_growth_allowed * 100)
          + "%)");
    }

    if ((memory_snapshots.get(index_max) / bytes_in_mb) >= max_mem_allowed) {
      passed = false;
      System.out.println(
          "Exceeded allowed memory (" + max_mem_allowed + "MB), got "
          + (memory_snapshots.get(index_max) / bytes_in_mb) + "MB");
    }
    return passed;
  }

  static void RunInference(
      TRITONSERVER_ServerDeleter server, String model_name, boolean[] is_int,
      boolean[] is_torch_model, boolean check_accuracy) throws Exception
  {
    // Create the allocator that will be used to allocate buffers for
    // the result tensors.
    TRITONSERVER_ResponseAllocator allocator =
        new TRITONSERVER_ResponseAllocator(null);
    FAIL_IF_ERR(
        TRITONSERVER_ResponseAllocatorNew(
            allocator, responseAlloc, responseRelease, null /* start_fn */),
        "creating response allocator");

    // Inference
    TRITONSERVER_InferenceRequest irequest =
        new TRITONSERVER_InferenceRequest(null);
    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestNew(
            irequest, server, model_name, -1 /* model_version */),
        "creating inference request");

    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestSetId(irequest, "my_request_id"),
        "setting ID for the request");

    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestSetReleaseCallback(
            irequest, inferRequestComplete, null /* request_release_userp */),
        "setting request release callback");

    // Inputs
    String input0 = is_torch_model[0] ? "INPUT__0" : "INPUT0";
    String input1 = is_torch_model[0] ? "INPUT__1" : "INPUT1";

    long[] input0_shape = {1, 16};
    long[] input1_shape = {1, 16};

    int datatype =
        (is_int[0]) ? TRITONSERVER_TYPE_INT32 : TRITONSERVER_TYPE_FP32;

    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestAddInput(
            irequest, input0, datatype, input0_shape, input0_shape.length),
        "setting input 0 meta-data for the request");
    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestAddInput(
            irequest, input1, datatype, input1_shape, input1_shape.length),
        "setting input 1 meta-data for the request");

    String output0 = is_torch_model[0] ? "OUTPUT__0" : "OUTPUT0";
    String output1 = is_torch_model[0] ? "OUTPUT__1" : "OUTPUT1";

    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output0),
        "requesting output 0 for the request");
    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output1),
        "requesting output 1 for the request");

    // Create the data for the two input tensors. Initialize the first
    // to unique values and the second to all ones.
    BytePointer input0_data;
    BytePointer input1_data;
    if (is_int[0]) {
      IntPointer[] p0 = {null}, p1 = {null};
      GenerateInputData(p0, p1);
      input0_data = p0[0].getPointer(BytePointer.class);
      input1_data = p1[0].getPointer(BytePointer.class);
    } else {
      FloatPointer[] p0 = {null}, p1 = {null};
      GenerateInputData(p0, p1);
      input0_data = p0[0].getPointer(BytePointer.class);
      input1_data = p1[0].getPointer(BytePointer.class);
    }

    long input0_size = input0_data.limit();
    long input1_size = input1_data.limit();

    Pointer input0_base = input0_data;
    Pointer input1_base = input1_data;

    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestAppendInputData(
            irequest, input0, input0_base, input0_size, requested_memory_type,
            0 /* memory_type_id */),
        "assigning INPUT0 data");
    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestAppendInputData(
            irequest, input1, input1_base, input1_size, requested_memory_type,
            0 /* memory_type_id */),
        "assigning INPUT1 data");

    // Perform inference...
    {
      CompletableFuture<TRITONSERVER_InferenceResponse> completed =
          new CompletableFuture<>();
      futures.put(irequest, completed);

      FAIL_IF_ERR(
          TRITONSERVER_InferenceRequestSetResponseCallback(
              irequest, allocator, null /* response_allocator_userp */,
              inferResponseComplete, irequest),
          "setting response callback");

      FAIL_IF_ERR(
          TRITONSERVER_ServerInferAsync(server, irequest, null /* trace */),
          "running inference");

      // Wait for the inference to complete.
      TRITONSERVER_InferenceResponse completed_response = completed.get();
      futures.remove(irequest);

      FAIL_IF_ERR(
          TRITONSERVER_InferenceResponseError(completed_response),
          "response status");
      if (check_accuracy) {
        Check(
            completed_response, input0_data, input1_data, output0, output1,
            input0_size, datatype, is_int[0]);
      }
      FAIL_IF_ERR(
          TRITONSERVER_InferenceResponseDelete(completed_response),
          "deleting inference response");
    }

    // Modify some input data in place and then reuse the request
    // object. For simplicity we only do this when the input tensors are
    // in non-pinned system memory.
    if (!enforce_memory_type
        || (requested_memory_type == TRITONSERVER_MEMORY_CPU)) {
      if (is_int[0]) {
        new IntPointer(input0_data).put(0, 27);
      } else {
        new FloatPointer(input0_data).put(0, 27.0f);
      }

      CompletableFuture<TRITONSERVER_InferenceResponse> completed =
          new CompletableFuture<>();
      futures.put(irequest, completed);

      // Using a new promise so have to re-register the callback to set
      // the promise as the userp.
      FAIL_IF_ERR(
          TRITONSERVER_InferenceRequestSetResponseCallback(
              irequest, allocator, null /* response_allocator_userp */,
              inferResponseComplete, irequest),
          "setting response callback");

      FAIL_IF_ERR(
          TRITONSERVER_ServerInferAsync(server, irequest, null /* trace */),
          "running inference");

      // Wait for the inference to complete.
      TRITONSERVER_InferenceResponse completed_response = completed.get();
      futures.remove(irequest);
      FAIL_IF_ERR(
          TRITONSERVER_InferenceResponseError(completed_response),
          "response status");
      if (check_accuracy) {
        Check(
            completed_response, input0_data, input1_data, output0, output1,
            input0_size, datatype, is_int[0]);
      }

      FAIL_IF_ERR(
          TRITONSERVER_InferenceResponseDelete(completed_response),
          "deleting inference response");
    }

    // Remove input data and then add back different data.
    {
      FAIL_IF_ERR(
          TRITONSERVER_InferenceRequestRemoveAllInputData(irequest, input0),
          "removing INPUT0 data");
      FAIL_IF_ERR(
          TRITONSERVER_InferenceRequestAppendInputData(
              irequest, input0, input1_base, input1_size, requested_memory_type,
              0 /* memory_type_id */),
          "assigning INPUT1 data to INPUT0");

      CompletableFuture<TRITONSERVER_InferenceResponse> completed =
          new CompletableFuture<>();
      futures.put(irequest, completed);

      // Using a new promise so have to re-register the callback to set
      // the promise as the userp.
      FAIL_IF_ERR(
          TRITONSERVER_InferenceRequestSetResponseCallback(
              irequest, allocator, null /* response_allocator_userp */,
              inferResponseComplete, irequest),
          "setting response callback");

      FAIL_IF_ERR(
          TRITONSERVER_ServerInferAsync(server, irequest, null /* trace */),
          "running inference");

      // Wait for the inference to complete.
      TRITONSERVER_InferenceResponse completed_response = completed.get();
      futures.remove(irequest);
      FAIL_IF_ERR(
          TRITONSERVER_InferenceResponseError(completed_response),
          "response status");

      if (check_accuracy) {
        // Both inputs are using input1_data...
        Check(
            completed_response, input1_data, input1_data, output0, output1,
            input0_size, datatype, is_int[0]);
      }

      FAIL_IF_ERR(
          TRITONSERVER_InferenceResponseDelete(completed_response),
          "deleting inference response");
    }

    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestDelete(irequest),
        "deleting inference request");

    FAIL_IF_ERR(
        TRITONSERVER_ResponseAllocatorDelete(allocator),
        "deleting response allocator");
  }

  public static void main(String[] args) throws Exception
  {
    int num_iterations = 1000000;
    String model_repository_path = null;
    int verbose_level = 0;
    boolean check_accuracy = false;

    // Parse commandline...
    for (int i = 0; i < args.length; i++) {
      switch (args[i]) {
        case "-i":
          i++;
          try {
            num_iterations = Integer.parseInt(args[i]);
          }
          catch (NumberFormatException e) {
            Usage("-i must be used to specify number of iterations");
          }
          break;
        case "-m":
          enforce_memory_type = true;
          i++;
          if (args[i].equals("system")) {
            requested_memory_type = TRITONSERVER_MEMORY_CPU;
          } else if (args[i].equals("pinned")) {
            requested_memory_type = TRITONSERVER_MEMORY_CPU_PINNED;
          } else if (args[i].equals("gpu")) {
            requested_memory_type = TRITONSERVER_MEMORY_GPU;
          } else {
            Usage(
                "-m must be used to specify one of the following types:"
                + " <\"system\"|\"pinned\"|gpu>");
          }
          break;
        case "-r":
          model_repository_path = args[++i];
          break;
        case "-v":
          verbose_level = 1;
          break;
        case "-c":
          check_accuracy = true;
          break;
        case "-?":
          Usage(null);
          break;
        case "--max-growth":
          i++;
          try {
            max_growth_allowed = Integer.parseInt(args[i]) / 100.0f;
          }
          catch (NumberFormatException e) {
            Usage(
                "--max-growth must be an integer value specifying allowed memory growth (%)");
          }
          break;
        case "--max-memory":
          i++;
          try {
            max_mem_allowed = Integer.parseInt(args[i]);
          }
          catch (NumberFormatException e) {
            Usage(
                "--max-memory must be an integer value specifying maximum allowed memory (MB)");
          }
          break;
      }
    }

    if (model_repository_path == null) {
      Usage("-r must be used to specify model repository path");
    }
    if (enforce_memory_type
        && requested_memory_type != TRITONSERVER_MEMORY_CPU) {
      Usage("-m can only be set to \"system\" without enabling GPU");
    }

    // Check API version.
    int[] api_version_major = {0}, api_version_minor = {0};
    FAIL_IF_ERR(
        TRITONSERVER_ApiVersion(api_version_major, api_version_minor),
        "getting Triton API version");
    if ((TRITONSERVER_API_VERSION_MAJOR != api_version_major[0])
        || (TRITONSERVER_API_VERSION_MINOR > api_version_minor[0])) {
      FAIL("triton server API version mismatch");
    }

    // Create the server...
    TRITONSERVER_ServerOptions server_options =
        new TRITONSERVER_ServerOptions(null);
    FAIL_IF_ERR(
        TRITONSERVER_ServerOptionsNew(server_options),
        "creating server options");
    FAIL_IF_ERR(
        TRITONSERVER_ServerOptionsSetModelRepositoryPath(
            server_options, model_repository_path),
        "setting model repository path");
    FAIL_IF_ERR(
        TRITONSERVER_ServerOptionsSetLogVerbose(server_options, verbose_level),
        "setting verbose logging level");
    FAIL_IF_ERR(
        TRITONSERVER_ServerOptionsSetBackendDirectory(
            server_options, "/opt/tritonserver/backends"),
        "setting backend directory");
    FAIL_IF_ERR(
        TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
            server_options, "/opt/tritonserver/repoagents"),
        "setting repository agent directory");
    FAIL_IF_ERR(
        TRITONSERVER_ServerOptionsSetStrictModelConfig(server_options, true),
        "setting strict model configuration");
    double min_compute_capability = TRITON_MIN_COMPUTE_CAPABILITY;
    FAIL_IF_ERR(
        TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability(
            server_options, min_compute_capability),
        "setting minimum supported CUDA compute capability");

    TRITONSERVER_Server server_ptr = new TRITONSERVER_Server(null);
    FAIL_IF_ERR(
        TRITONSERVER_ServerNew(server_ptr, server_options), "creating server");
    FAIL_IF_ERR(
        TRITONSERVER_ServerOptionsDelete(server_options),
        "deleting server options");

    TRITONSERVER_ServerDeleter server =
        new TRITONSERVER_ServerDeleter(server_ptr);

    // Wait until the server is both live and ready.
    int health_iters = 0;
    while (true) {
      boolean[] live = {false}, ready = {false};
      FAIL_IF_ERR(
          TRITONSERVER_ServerIsLive(server, live),
          "unable to get server liveness");
      FAIL_IF_ERR(
          TRITONSERVER_ServerIsReady(server, ready),
          "unable to get server readiness");
      System.out.println(
          "Server Health: live " + live[0] + ", ready " + ready[0]);
      if (live[0] && ready[0]) {
        break;
      }

      if (++health_iters >= 10) {
        FAIL("failed to find healthy inference server");
      }

      Thread.sleep(500);
    }

    // Print status of the server.
    {
      TRITONSERVER_Message server_metadata_message =
          new TRITONSERVER_Message(null);
      FAIL_IF_ERR(
          TRITONSERVER_ServerMetadata(server, server_metadata_message),
          "unable to get server metadata message");
      BytePointer buffer = new BytePointer((Pointer) null);
      SizeTPointer byte_size = new SizeTPointer(1);
      FAIL_IF_ERR(
          TRITONSERVER_MessageSerializeToJson(
              server_metadata_message, buffer, byte_size),
          "unable to serialize server metadata message");

      System.out.println("Server Status:");
      System.out.println(buffer.limit(byte_size.get()).getString());

      FAIL_IF_ERR(
          TRITONSERVER_MessageDelete(server_metadata_message),
          "deleting status metadata");
    }

    String model_name = "simple";

    // Wait for the model to become available.
    boolean[] is_torch_model = {false};
    boolean[] is_int = {true};
    boolean[] is_ready = {false};
    health_iters = 0;
    while (!is_ready[0]) {
      FAIL_IF_ERR(
          TRITONSERVER_ServerModelIsReady(server, model_name, 1, is_ready),
          "unable to get model readiness");
      if (!is_ready[0]) {
        if (++health_iters >= 10) {
          FAIL("model failed to be ready in 10 iterations");
        }
        Thread.sleep(500);
        continue;
      }

      TRITONSERVER_Message model_metadata_message =
          new TRITONSERVER_Message(null);
      FAIL_IF_ERR(
          TRITONSERVER_ServerModelMetadata(
              server, model_name, 1, model_metadata_message),
          "unable to get model metadata message");
      BytePointer buffer = new BytePointer((Pointer) null);
      SizeTPointer byte_size = new SizeTPointer(1);
      FAIL_IF_ERR(
          TRITONSERVER_MessageSerializeToJson(
              model_metadata_message, buffer, byte_size),
          "unable to serialize model status protobuf");

      JsonParser parser = new JsonParser();
      JsonObject model_metadata = null;
      try {
        model_metadata = parser.parse(buffer.limit(byte_size.get()).getString())
                             .getAsJsonObject();
      }
      catch (Exception e) {
        FAIL("error: failed to parse model metadata from JSON: " + e);
      }

      FAIL_IF_ERR(
          TRITONSERVER_MessageDelete(model_metadata_message),
          "deleting status protobuf");

      if (!model_metadata.get("name").getAsString().equals(model_name)) {
        FAIL("unable to find metadata for model");
      }

      boolean found_version = false;
      if (model_metadata.has("versions")) {
        for (JsonElement version :
             model_metadata.get("versions").getAsJsonArray()) {
          if (version.getAsString().equals("1")) {
            found_version = true;
            break;
          }
        }
      }
      if (!found_version) {
        FAIL("unable to find version 1 status for model");
      }

      FAIL_IF_ERR(
          ParseModelMetadata(model_metadata, is_int, is_torch_model),
          "parsing model metadata");
    }

    Runnable runnable = () ->
    {
      boolean passed =
          ValidateMemoryGrowth(max_growth_allowed, max_mem_allowed);

      // Sleep to give the garbage collector time to free the server.
      // This avoids race conditions between Triton bindings' printing and
      // Java's native printing below.
      try {
        Thread.sleep(5000);
      }
      catch (InterruptedException e) {
        System.out.println("Sleep interrupted: " + e.toString());
      }

      if (passed) {
        System.out.println("Memory growth test passed");
      } else {
        System.out.println("Memory growth test FAILED");
      }
    };
    Thread memory_thread = new Thread(runnable);
    memory_thread.start();

    for (int i = 0; i < num_iterations; i++) {
      try (PointerScope scope = new PointerScope()) {
        RunInference(
            server, model_name, is_int, is_torch_model, check_accuracy);
      }
    }
    done = true;
    memory_thread.join();

    System.exit(0);
  }
}


================================================
FILE: qa/L0_java_memory_growth/test.sh
================================================
#!/bin/bash
# Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# Set up test files based on installation instructions
# https://github.com/bytedeco/javacpp-presets/blob/master/tritonserver/README.md
TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:="https://github.com/triton-inference-server"}
JAVACPP_BRANCH=${JAVACPP_BRANCH:="https://github.com/bytedeco/javacpp-presets.git"}
JAVACPP_BRANCH_TAG=${JAVACPP_BRANCH_TAG:="master"}
set -e
git clone --single-branch --depth=1 -b ${TRITON_CLIENT_REPO_TAG} ${TRITON_REPO_ORGANIZATION}/client.git
source client/src/java-api-bindings/scripts/install_dependencies_and_build.sh -b $PWD --javacpp-branch ${JAVACPP_BRANCH} --javacpp-tag ${JAVACPP_BRANCH_TAG} --keep-build-dependencies
cd ..

export MAVEN_OPTS="-XX:MaxGCPauseMillis=40"
MODEL_REPO=`pwd`/models
SAMPLES_REPO=`pwd`/javacpp-presets/tritonserver/samples/simple
BASE_COMMAND="mvn clean compile -f $SAMPLES_REPO exec:java -Djavacpp.platform=linux-x86_64"
source ../common/util.sh

# Create local model repository
rm -rf ${MODEL_REPO}
mkdir ${MODEL_REPO}
cp -r `pwd`/../L0_simple_ensemble/models/simple ${MODEL_REPO}/.

cp MemoryGrowthTest.java $SAMPLES_REPO
sed -i 's/Simple/MemoryGrowthTest/g' $SAMPLES_REPO/pom.xml

rm -f *.log
RET=0


# Sanity test: check accuracy
ITERS=200000

LOG_IDX=0
CLIENT_LOG="./client_$LOG_IDX.log"

echo -e "\nRunning Sanity Test (accuracy checking)\n"
$BASE_COMMAND -Dexec.args="-r $MODEL_REPO -i $ITERS" >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to run sanity test to complete\n***"
    RET=1
fi

if [ `grep -c "Memory growth test passed" $CLIENT_LOG` != "1" ]; then
    echo -e "\n***\n*** Failed. Expected 1 'Memory growth test passed' in $CLIENT_LOG\n***"
    cat $CLIENT_LOG
    RET=1
fi

LOG_IDX=$((LOG_IDX+1))
CLIENT_LOG="./client_$LOG_IDX.log"

# Longer-running memory growth test
ITERS=1000000
MAX_MEM_GROWTH_MB=10
if [ "$TRITON_PERF_LONG" == 1 ]; then
    # ~1 day
    ITERS=150000000
    MAX_MEM_GROWTH_MB=25
fi

echo -e "\nRunning Memory Growth Test, $ITERS Iterations\n"
$BASE_COMMAND -Dexec.args="-r $MODEL_REPO -c -i $ITERS --max-growth $MAX_MEM_GROWTH_MB" >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to run memory growth test to complete\n***"
    RET=1
fi

if [ `grep -c "Memory growth test passed" $CLIENT_LOG` != "1" ]; then
    echo -e "\n***\n*** Failed. Expected 1 'Memory growth test passed' in $CLIENT_LOG\n***"
    cat $CLIENT_LOG
    RET=1
fi

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_java_resnet/ResnetTest.java
================================================
// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import static org.bytedeco.tritonserver.global.tritonserver.*;

import com.google.gson.*;
import java.io.*;
import java.util.*;
import java.util.concurrent.*;
import org.bytedeco.javacpp.*;
import org.bytedeco.tritonserver.tritonserver.*;

public class ResnetTest {
  // Maximum allowed difference from expected model outputs
  private static final float ALLOWED_DELTA = .001f;
  private static final String[] MODELS = {
      "resnet50_fp32_libtorch",
      "resnet50_fp32_onnx",
  };
  private static final double TRITON_MIN_COMPUTE_CAPABILITY = 7.5;
  private enum Backend {
    NONE,
    ONNX,
    TF,
    TORCH,
  }

  static void FAIL(String MSG)
  {
    System.err.println("failure: " + MSG);
    System.exit(1);
  }

  static void FAIL_IF_ERR(TRITONSERVER_Error err__, String MSG)
  {
    if (err__ != null) {
      System.err.println(
          "error: " + MSG + ":" + TRITONSERVER_ErrorCodeString(err__) + " - "
          + TRITONSERVER_ErrorMessage(err__));
      TRITONSERVER_ErrorDelete(err__);
      System.exit(1);
    }
  }

  static boolean enforce_memory_type = false;
  static int requested_memory_type;

  static class TRITONSERVER_ServerDeleter extends TRITONSERVER_Server {
    public TRITONSERVER_ServerDeleter(TRITONSERVER_Server p)
    {
      super(p);
      deallocator(new DeleteDeallocator(this));
    }
    protected static class DeleteDeallocator
        extends TRITONSERVER_Server implements Deallocator {
      DeleteDeallocator(Pointer p) { super(p); }
      @Override public void deallocate() { TRITONSERVER_ServerDelete(this); }
    }
  }

  static void Usage(String msg)
  {
    if (msg != null) {
      System.err.println(msg);
    }

    System.err.println(
        "Usage: java " + ResnetTest.class.getSimpleName() + " [options]");
    System.err.println(
        "\t-m <\"system\"|\"pinned\"|gpu>"
        + " Enforce the memory type for input and output tensors."
        + " If not specified, inputs will be in system memory and outputs"
        + " will be based on the model's preferred type.");
    System.err.println("\t-v Enable verbose logging");
    System.err.println("\t-r [model repository absolute path]");

    System.exit(1);
  }

  static class ResponseAlloc extends TRITONSERVER_ResponseAllocatorAllocFn_t {
    @Override
    public TRITONSERVER_Error call(
        TRITONSERVER_ResponseAllocator allocator, String tensor_name,
        long byte_size, int preferred_memory_type,
        long preferred_memory_type_id, Pointer userp, PointerPointer buffer,
        PointerPointer buffer_userp, IntPointer actual_memory_type,
        LongPointer actual_memory_type_id)
    {
      // Initially attempt to make the actual memory type and id that we
      // allocate be the same as preferred memory type
      actual_memory_type.put(0, preferred_memory_type);
      actual_memory_type_id.put(0, preferred_memory_type_id);

      // If 'byte_size' is zero just return 'buffer' == nullptr, we don't
      // need to do any other book-keeping.
      if (byte_size == 0) {
        buffer.put(0, null);
        buffer_userp.put(0, null);
        System.out.println(
            "allocated " + byte_size + " bytes for result tensor "
            + tensor_name);
      } else {
        Pointer allocated_ptr = new Pointer();
        if (enforce_memory_type) {
          actual_memory_type.put(0, requested_memory_type);
        }

        actual_memory_type.put(0, TRITONSERVER_MEMORY_CPU);
        allocated_ptr = Pointer.malloc(byte_size);

        // Pass the tensor name with buffer_userp so we can show it when
        // releasing the buffer.
        if (!allocated_ptr.isNull()) {
          buffer.put(0, allocated_ptr);
          buffer_userp.put(0, Loader.newGlobalRef(tensor_name));
          System.out.println(
              "allocated " + byte_size + " bytes in "
              + TRITONSERVER_MemoryTypeString(actual_memory_type.get())
              + " for result tensor " + tensor_name);
        }
      }

      return null; // Success
    }
  }

  static class ResponseRelease
      extends TRITONSERVER_ResponseAllocatorReleaseFn_t {
    @Override
    public TRITONSERVER_Error call(
        TRITONSERVER_ResponseAllocator allocator, Pointer buffer,
        Pointer buffer_userp, long byte_size, int memory_type,
        long memory_type_id)
    {
      String name = null;
      if (buffer_userp != null) {
        name = (String) Loader.accessGlobalRef(buffer_userp);
      } else {
        name = "<unknown>";
      }

      Pointer.free(buffer);
      Loader.deleteGlobalRef(buffer_userp);

      return null; // Success
    }
  }

  static class InferRequestComplete
      extends TRITONSERVER_InferenceRequestReleaseFn_t {
    @Override
    public void call(
        TRITONSERVER_InferenceRequest request, int flags, Pointer userp)
    {
      // We reuse the request so we don't delete it here.
    }
  }

  static class InferResponseComplete
      extends TRITONSERVER_InferenceResponseCompleteFn_t {
    @Override
    public void call(
        TRITONSERVER_InferenceResponse response, int flags, Pointer userp)
    {
      if (response != null) {
        // Send 'response' to the future.
        futures.get(userp).complete(response);
      }
    }
  }

  static ConcurrentHashMap<
      Pointer, CompletableFuture<TRITONSERVER_InferenceResponse>> futures =
      new ConcurrentHashMap<>();
  static ResponseAlloc responseAlloc = new ResponseAlloc();
  static ResponseRelease responseRelease = new ResponseRelease();
  static InferRequestComplete inferRequestComplete = new InferRequestComplete();
  static InferResponseComplete inferResponseComplete =
      new InferResponseComplete();

  static void GenerateInputData(FloatPointer[] input_data)
  {
    // Input size is 3 * 224 * 224
    input_data[0] = new FloatPointer(150528);
    for (int i = 0; i < 150528; ++i) {
      input_data[0].put(i, 1);
    }
  }

  static boolean AreValidResults(
      String model_name, FloatPointer output, FloatPointer expected_output)
  {
    int output_length = 1000;
    for (int i = 0; i < output_length; ++i) {
      float difference = output.get(i) - expected_output.get(i);
      if (difference > ALLOWED_DELTA) {
        System.out.println(
            model_name + "inference failure: unexpected output "
            + "in " + model_name + ", index " + i);

        System.out.println(
            "Value: " + output.get(i) + ", expected " + expected_output.get(i));

        return false; // Failure
      }
    }
    return true; // Success
  }

  static void Check(
      String model_name, Backend backend,
      TRITONSERVER_InferenceResponse response, Pointer input_data,
      String output, int expected_datatype) throws Exception
  {
    HashMap<String, Pointer> output_data = new HashMap<>();

    int[] output_count = {0};
    FAIL_IF_ERR(
        TRITONSERVER_InferenceResponseOutputCount(response, output_count),
        "getting number of response outputs");
    if (output_count[0] != 1) {
      FAIL("expecting 1 response output, got " + output_count[0]);
    }

    for (int idx = 0; idx < output_count[0]; ++idx) {
      BytePointer cname = new BytePointer((Pointer) null);
      IntPointer datatype = new IntPointer(1);
      LongPointer shape = new LongPointer((Pointer) null);
      LongPointer dim_count = new LongPointer(1);
      Pointer base = new Pointer();
      SizeTPointer byte_size = new SizeTPointer(1);
      IntPointer memory_type = new IntPointer(1);
      LongPointer memory_type_id = new LongPointer(1);
      Pointer userp = new Pointer();

      FAIL_IF_ERR(
          TRITONSERVER_InferenceResponseOutput(
              response, idx, cname, datatype, shape, dim_count, base, byte_size,
              memory_type, memory_type_id, userp),
          "getting output info");

      if (cname.isNull()) {
        FAIL("unable to get output name");
      }

      String name = cname.getString();
      if (!name.equals(output)) {
        FAIL("unexpected output '" + name + "'");
      }

      int output_length = backend == backend.TF ? 1001 : 1000;

      if ((dim_count.get() != 2) || (shape.get(0) != 1)
          || shape.get(1) != output_length) {
        FAIL("unexpected shape for '" + name + "'");
      }

      if (datatype.get() != expected_datatype) {
        FAIL(
            "unexpected datatype '"
            + TRITONSERVER_DataTypeString(datatype.get()) + "' for '" + name
            + "'");
      }

      if (enforce_memory_type && (memory_type.get() != requested_memory_type)) {
        FAIL(
            "unexpected memory type, expected to be allocated in "
            + TRITONSERVER_MemoryTypeString(requested_memory_type) + ", got "
            + TRITONSERVER_MemoryTypeString(memory_type.get()) + ", id "
            + memory_type_id.get() + " for " + name);
      }

      // We make a copy of the data here... which we could avoid for
      // performance reasons but ok for this simple example.
      BytePointer odata = new BytePointer(byte_size.get());
      output_data.put(name, odata);
      odata.put(base.limit(byte_size.get()));
    }

    // Expected output for model
    String file_name = "expected_output_data/expected_output_";
    switch (backend) {
      case ONNX:
        file_name += "onnx";
        break;
      case TORCH:
        file_name += "pytorch";
        break;
      default:
        FAIL("Unsupported model type");
        break;
    }
    file_name += ".txt";

    int output_length = backend == backend.TF ? 1001 : 1000;
    FloatPointer expected_output = new FloatPointer(output_length);

    try (Scanner scanner = new Scanner(new File(file_name))) {
      for (int i = 0; i < output_length; ++i) {
        expected_output.put(i, scanner.nextFloat());
      }
    }

    boolean correct_results = AreValidResults(
        model_name, new FloatPointer(output_data.get(output)), expected_output);

    if (correct_results) {
      System.out.println(backend.name() + " test PASSED");
    } else {
      System.out.println(backend.name() + " test FAILED");
    }
  }

  static void PerformInference(
      TRITONSERVER_ServerDeleter server, String model_name) throws Exception
  {
    // Get type of model
    Backend backend = Backend.NONE;
    if (model_name.contains("onnx")) {
      backend = Backend.ONNX;
    } else if (model_name.contains("torch")) {
      backend = Backend.TORCH;
    } else {
      FAIL(
          "Supported model types (Onnx, Torch) "
          + "cannot be inferred from model name " + model_name);
    }

    // Wait for the model to become available.
    boolean[] is_ready = {false};
    int health_iters = 0;
    while (!is_ready[0]) {
      FAIL_IF_ERR(
          TRITONSERVER_ServerModelIsReady(server, model_name, 1, is_ready),
          "unable to get model readiness");
      if (!is_ready[0]) {
        if (++health_iters >= 10) {
          FAIL(model_name + " model failed to be ready in 10 iterations");
        }
        Thread.sleep(500);
        continue;
      }
    }

    // Create the allocator that will be used to allocate buffers for
    // the result tensors.
    TRITONSERVER_ResponseAllocator allocator =
        new TRITONSERVER_ResponseAllocator(null);
    FAIL_IF_ERR(
        TRITONSERVER_ResponseAllocatorNew(
            allocator, responseAlloc, responseRelease, null /* start_fn */),
        "creating response allocator");

    // Inference
    TRITONSERVER_InferenceRequest irequest =
        new TRITONSERVER_InferenceRequest(null);
    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestNew(
            irequest, server, model_name, -1 /* model_version */),
        "creating inference request");

    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestSetId(irequest, "my_request_id"),
        "setting ID for the request");

    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestSetReleaseCallback(
            irequest, inferRequestComplete, null /* request_release_userp */),
        "setting request release callback");


    // Model inputs
    String input = "";
    String output = "";
    long[] input_shape = {1, 224, 224, 3};

    switch (backend) {
      case ONNX:
        input = "import/input:0";
        output = "import/resnet_v1_50/predictions/Softmax:0";
        break;
      case TF:
        input = "input";
        output = "probabilities";
        break;
      case TORCH:
        input = "INPUT__0";
        input_shape[1] = 3;
        input_shape[3] = 224;
        output = "OUTPUT__0";
        break;
      default:
        FAIL("Unsupported model type");
        break;
    }

    int datatype = TRITONSERVER_TYPE_FP32;

    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestAddInput(
            irequest, input, datatype, input_shape, input_shape.length),
        "setting input 0 meta-data for the request");

    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output),
        "requesting output 0 for the request");

    // Create the data for the two input tensors. Initialize the first
    // to unique values and the second to all ones.
    BytePointer input_data;
    FloatPointer[] p0 = {null};
    GenerateInputData(p0);
    input_data = p0[0].getPointer(BytePointer.class);
    long input_size = input_data.limit();
    Pointer input_base = input_data;

    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestAppendInputData(
            irequest, input, input_base, input_size, requested_memory_type,
            0 /* memory_type_id */),
        "assigning INPUT data");

    // Perform inference...
    {
      CompletableFuture<TRITONSERVER_InferenceResponse> completed =
          new CompletableFuture<>();
      futures.put(irequest, completed);

      FAIL_IF_ERR(
          TRITONSERVER_InferenceRequestSetResponseCallback(
              irequest, allocator, null /* response_allocator_userp */,
              inferResponseComplete, irequest),
          "setting response callback");

      FAIL_IF_ERR(
          TRITONSERVER_ServerInferAsync(server, irequest, null /* trace */),
          "running inference");

      // Wait for the inference to complete.
      TRITONSERVER_InferenceResponse completed_response = completed.get();
      futures.remove(irequest);

      FAIL_IF_ERR(
          TRITONSERVER_InferenceResponseError(completed_response),
          "response status");

      Check(
          model_name, backend, completed_response, input_data, output,
          datatype);

      FAIL_IF_ERR(
          TRITONSERVER_InferenceResponseDelete(completed_response),
          "deleting inference response");
    }

    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestDelete(irequest),
        "deleting inference request");

    FAIL_IF_ERR(
        TRITONSERVER_ResponseAllocatorDelete(allocator),
        "deleting response allocator");
  }

  public static void main(String[] args) throws Exception
  {
    String model_repository_path = null;
    int verbose_level = 0;

    // Parse commandline...
    for (int i = 0; i < args.length; i++) {
      switch (args[i]) {
        case "-m": {
          enforce_memory_type = true;
          i++;
          if (args[i].equals("system")) {
            requested_memory_type = TRITONSERVER_MEMORY_CPU;
          } else if (args[i].equals("pinned")) {
            requested_memory_type = TRITONSERVER_MEMORY_CPU_PINNED;
          } else if (args[i].equals("gpu")) {
            requested_memory_type = TRITONSERVER_MEMORY_GPU;
          } else {
            Usage(
                "-m must be used to specify one of the following types:"
                + " <\"system\"|\"pinned\"|gpu>");
          }
          break;
        }
        case "-r":
          model_repository_path = args[++i];
          break;
        case "-v":
          verbose_level = 1;
          break;
        case "-?":
          Usage(null);
          break;
      }
    }

    if (model_repository_path == null) {
      Usage("-r must be used to specify model repository path");
    }
    if (enforce_memory_type
        && requested_memory_type != TRITONSERVER_MEMORY_CPU) {
      Usage("-m can only be set to \"system\" without enabling GPU");
    }

    // Check API version.
    int[] api_version_major = {0}, api_version_minor = {0};
    FAIL_IF_ERR(
        TRITONSERVER_ApiVersion(api_version_major, api_version_minor),
        "getting Triton API version");
    if ((TRITONSERVER_API_VERSION_MAJOR != api_version_major[0])
        || (TRITONSERVER_API_VERSION_MINOR > api_version_minor[0])) {
      FAIL("triton server API version mismatch");
    }

    // Create the server...
    TRITONSERVER_ServerOptions server_options =
        new TRITONSERVER_ServerOptions(null);
    FAIL_IF_ERR(
        TRITONSERVER_ServerOptionsNew(server_options),
        "creating server options");
    FAIL_IF_ERR(
        TRITONSERVER_ServerOptionsSetModelRepositoryPath(
            server_options, model_repository_path),
        "setting model repository path");
    FAIL_IF_ERR(
        TRITONSERVER_ServerOptionsSetLogVerbose(server_options, verbose_level),
        "setting verbose logging level");
    FAIL_IF_ERR(
        TRITONSERVER_ServerOptionsSetBackendDirectory(
            server_options, "/opt/tritonserver/backends"),
        "setting backend directory");
    FAIL_IF_ERR(
        TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
            server_options, "/opt/tritonserver/repoagents"),
        "setting repository agent directory");
    FAIL_IF_ERR(
        TRITONSERVER_ServerOptionsSetStrictModelConfig(server_options, true),
        "setting strict model configuration");
    double min_compute_capability = TRITON_MIN_COMPUTE_CAPABILITY;
    FAIL_IF_ERR(
        TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability(
            server_options, min_compute_capability),
        "setting minimum supported CUDA compute capability");

    TRITONSERVER_Server server_ptr = new TRITONSERVER_Server(null);
    FAIL_IF_ERR(
        TRITONSERVER_ServerNew(server_ptr, server_options), "creating server");
    FAIL_IF_ERR(
        TRITONSERVER_ServerOptionsDelete(server_options),
        "deleting server options");

    TRITONSERVER_ServerDeleter server =
        new TRITONSERVER_ServerDeleter(server_ptr);

    // Wait until the server is both live and ready.
    int health_iters = 0;
    while (true) {
      boolean[] live = {false}, ready = {false};
      FAIL_IF_ERR(
          TRITONSERVER_ServerIsLive(server, live),
          "unable to get server liveness");
      FAIL_IF_ERR(
          TRITONSERVER_ServerIsReady(server, ready),
          "unable to get server readiness");
      System.out.println(
          "Server Health: live " + live[0] + ", ready " + ready[0]);
      if (live[0] && ready[0]) {
        break;
      }

      if (++health_iters >= 10) {
        FAIL("failed to find healthy inference server");
      }

      Thread.sleep(500);
    }

    // Print status of the server.
    {
      TRITONSERVER_Message server_metadata_message =
          new TRITONSERVER_Message(null);
      FAIL_IF_ERR(
          TRITONSERVER_ServerMetadata(server, server_metadata_message),
          "unable to get server metadata message");
      BytePointer buffer = new BytePointer((Pointer) null);
      SizeTPointer byte_size = new SizeTPointer(1);
      FAIL_IF_ERR(
          TRITONSERVER_MessageSerializeToJson(
              server_metadata_message, buffer, byte_size),
          "unable to serialize server metadata message");

      System.out.println("Server Status:");
      System.out.println(buffer.limit(byte_size.get()).getString());

      FAIL_IF_ERR(
          TRITONSERVER_MessageDelete(server_metadata_message),
          "deleting status metadata");
    }

    for (String model : MODELS) {
      PerformInference(server, model);
    }

    System.exit(0);
  }
}


================================================
FILE: qa/L0_java_resnet/expected_output_data/expected_output_onnx.txt
================================================
0.00016980497
0.0003274878
5.9229897e-05
0.00010386822
5.1683604e-05
0.0005200729
9.252152e-06
3.5043122e-05
1.7310056e-05
0.00014115982
0.0007192011
0.00014146192
5.864904e-05
8.102552e-05
1.6766031e-05
4.9913597e-05
0.00012557638
2.9249455e-05
5.8186713e-05
4.997704e-05
0.00019478115
0.001593597
0.0009770031
0.00022523475
8.752639e-05
0.00011251909
0.00031572866
0.00023567723
0.00017536257
0.00018674227
4.333203e-05
0.00033384693
8.9560366e-05
0.00011413613
0.00028333033
1.6440303e-05
0.000121921854
1.1142264e-05
0.0059000477
3.741594e-05
4.867915e-05
0.00020082401
0.00023553567
0.00016318199
5.550063e-05
0.00012654626
4.0553005e-05
0.00023072284
3.575522e-05
3.5885336e-05
0.000167727
0.0004284156
0.00029606326
0.0005308822
0.00025003406
2.4711164e-05
5.9230402e-05
1.4644651e-05
0.00013812816
0.0030018578
0.0004657613
3.8773556e-05
0.00029646824
0.00039393824
0.0006814109
0.00017464366
0.000501648
6.748e-05
0.00021987612
4.2551095e-05
7.442098e-05
0.00073552737
6.145523e-05
0.0019270201
1.1406245e-05
0.00033168247
2.7609263e-05
0.00055849075
0.0018151653
0.0012854141
0.0005644851
0.0002643019
0.00012686521
0.00031014124
3.576934e-05
1.5226503e-05
0.00023373427
0.00025264034
9.125392e-05
0.00010886967
5.68172e-05
0.00022797973
0.0005024418
0.00013592323
0.00016360248
4.724841e-05
0.00016500028
3.5815625e-05
0.0009926121
0.00018996779
0.00032009778
6.5463086e-05
4.915879e-05
0.0023545807
0.00019779587
9.740985e-06
5.916514e-05
8.342835e-05
3.5502824e-05
5.5155975e-05
0.0002953913
0.14522666
0.00026150284
0.0004633083
0.00010001568
7.724773e-05
0.00020212827
0.0003651837
2.3665098e-05
8.007319e-05
0.000164059
2.0919639e-05
0.00015904989
2.8181286e-05
2.1252014e-05
0.00016757358
0.0026105049
0.00016491314
0.0033536772
0.00045177306
0.00015669028
5.8178866e-05
0.0005335324
7.4700896e-05
4.13347e-05
0.00013332519
0.00024123705
0.00024420477
0.00010249778
0.00014476122
0.00043495715
0.00040990766
0.00021976302
0.00028396113
0.00018292265
0.0005685563
0.0005261158
0.0005394564
0.0006722254
0.00041848654
0.0002058497
0.00020697096
0.00038915384
0.00063642685
3.952872e-05
4.7074976e-05
0.0001484932
0.0001767462
0.00018367334
9.1362854e-05
0.00020925087
4.683706e-05
8.098025e-05
0.00038643452
2.1166008e-05
0.00023816078
0.00040344987
0.00014309994
0.00016946216
0.0001158025
0.00015477811
0.00013820603
0.00039157816
0.00012628519
6.416812e-05
5.319338e-05
8.096635e-05
9.268181e-05
0.00021009038
8.123741e-05
2.1137266e-05
0.00013860558
9.888543e-06
9.180427e-05
5.726596e-05
0.00024706984
3.4873163e-05
9.941785e-05
0.0002506603
0.00011764638
0.00086345134
0.00011305928
3.6803817e-06
8.0881466e-05
0.00017012736
0.0003054968
5.9778555e-05
1.0738367e-05
3.9709514e-05
7.807765e-05
8.485133e-05
1.4551556e-05
5.0553328e-05
0.0001432179
0.00012594614
4.33862e-05
0.00016131556
0.00012815706
4.6910594e-05
5.9233225e-05
5.5869554e-05
7.410936e-05
9.937572e-05
6.092812e-05
6.309549e-05
8.338313e-05
0.00044575817
5.111232e-05
2.1025462e-05
4.1145802e-05
0.00019077354
0.00019071896
0.00025231927
0.00019271992
0.00013492932
0.00010883319
2.025502e-05
0.0002089905
9.62682e-05
0.00012668235
1.5566122e-05
2.2314523e-05
0.00017040399
0.0001946466
2.8189646e-05
4.8383175e-05
0.00013236424
0.00016888845
5.468688e-05
0.00014190435
8.5229825e-05
5.173721e-05
3.7611204e-05
9.9274024e-05
3.191364e-05
6.1621664e-05
0.00013842362
6.9894915e-05
9.658343e-05
6.903254e-05
0.0002400999
0.00026015204
0.000105622945
0.0001664888
0.00013265685
1.5738156e-05
0.0003335177
0.00010971267
0.0002484887
0.00019186472
8.8625755e-05
6.912767e-05
0.00045799493
5.394646e-05
0.00017973136
8.907009e-05
0.000110481764
4.1266052e-05
0.00013683847
4.2938726e-05
0.00012697978
5.5856824e-05
0.00014599289
9.960172e-05
0.00012956791
0.00027035273
0.00026089343
0.00058428914
9.604311e-05
0.00030085753
0.00013629998
7.053258e-05
0.00023789746
0.00045626136
0.00024321792
0.00039255328
9.378134e-05
3.3330132e-05
6.2762956e-05
0.00010464993
6.4440836e-05
0.000114770344
9.773856e-05
0.00024476458
0.00022140365
8.682848e-05
0.00014253015
0.00041922313
9.2946466e-05
0.0007321677
8.819961e-05
0.00033927264
0.0001434792
0.0004997533
5.05367e-05
1.6199812e-05
0.00081437116
0.00029276052
0.0003227374
2.10321e-05
0.00041501687
7.6642566e-05
0.0007460653
0.00010704513
0.0010337052
0.00016585
0.00010267203
9.844521e-05
0.00036912857
0.0004210494
0.0007636784
8.831775e-06
2.4511684e-05
6.654908e-05
3.845051e-05
3.2900447e-05
0.0002467062
5.595124e-05
0.00010915978
1.5788999e-05
0.00010652153
0.0002424042
0.0001448311
1.1700289e-05
3.8083996e-05
9.013652e-05
0.00016588188
0.00014541998
4.446017e-05
5.857866e-05
5.703819e-05
6.140147e-05
2.5429461e-05
1.2527011e-05
0.00029506863
0.00017385624
4.4041873e-05
4.213424e-05
7.223138e-05
5.3147643e-05
0.00028015298
0.0005170326
9.355127e-05
0.00023953259
0.00041169117
6.737018e-05
0.00097511435
0.00044960703
0.00041690134
0.00036505918
0.00035000656
0.00020413095
0.00014936135
4.925268e-05
4.6020858e-05
0.0001434502
3.7963135e-05
0.00053391827
3.7399033e-05
0.000112552734
8.935715e-05
0.0008973427
6.539161e-05
0.00023165658
0.0003438208
6.735287e-05
0.00016886953
0.00042564265
0.0001101864
3.034124e-05
0.000176773
2.9307617e-05
8.214749e-05
7.6573786e-05
0.00032455323
0.00018222861
3.7278707e-05
0.00011895009
6.777756e-05
0.00040660411
4.0756473e-05
2.686724e-05
0.0011102126
1.7472128e-05
3.215658e-05
0.00019766577
2.4107696e-05
9.5941454e-05
0.00013294643
0.012934193
0.0014889088
0.00030110637
0.0004861949
0.00022020873
0.0004120663
0.0028884916
2.075195e-05
5.6945166e-05
0.00010725547
0.00061704434
1.2163917e-05
0.00013528275
0.000321602
0.0049974765
0.00036395655
7.939798e-06
0.0027076406
0.0009837962
0.017314037
0.00036551448
0.00027795092
0.00029623153
0.00016959595
0.00019360533
3.4470788e-05
6.317202e-05
0.00028958637
0.00052192796
9.2430375e-05
0.0010162767
0.00010013961
5.5248547e-05
0.01881616
0.000114972405
0.00012866792
0.0001735118
9.917765e-05
0.0011450195
0.0015877285
0.0017322781
0.00056879356
0.00025545148
0.0007390253
0.00012345372
0.00022441847
0.0001914855
0.0026525552
0.00044881727
0.00034022957
0.00028609563
1.7402317e-05
0.004177963
5.312598e-05
7.086197e-05
1.07296755e-05
0.0003122828
0.0017724611
0.0011016912
2.7802036e-05
0.00044330902
2.7724009e-05
0.00070999836
0.0025074244
0.00029760305
0.0017468698
0.0033079428
0.00023698558
1.8203225e-05
4.298752e-05
0.003792394
0.0043774135
0.0002578806
6.7714565e-05
0.010979906
7.88773e-05
0.00020034179
3.9189625e-05
7.9022284e-05
0.00019010075
0.00018935381
0.000151655
0.00063424406
0.00010652139
2.2907618e-05
0.00021650721
0.0006931013
0.0016945377
0.0018049135
0.0016268345
1.3866996e-05
0.00023594845
0.00077581
0.00037083545
0.0002482703
0.00199448
8.8194734e-05
4.5612232e-05
8.859373e-05
5.174079e-05
0.027618717
7.469677e-05
0.004212717
0.00029109194
0.0042880555
0.0015089285
0.0005760798
0.0002101491
0.0030193415
0.0002710225
1.43144425e-05
0.0012474942
7.6482655e-05
0.012027938
0.0013138817
0.00024912177
0.00039606096
0.00017222571
0.00077096495
9.616005e-05
0.00012808497
0.00011093941
0.0004788455
0.00027597338
0.0018378077
0.00048597282
2.693032e-05
0.00015658996
0.00045992344
4.849936e-05
0.00023919567
0.0032133528
0.0044528083
0.00015469016
8.7847635e-05
0.0121315615
0.00018360339
2.8868575e-05
7.337089e-05
0.000533506
0.0002060245
0.001834617
0.0014196439
0.00109954
0.0014719801
0.00013069775
0.000612675
0.0007288255
4.03345e-05
6.908545e-05
0.0045452276
0.00020541927
0.0022583636
0.00011107671
0.00054280076
0.00014280484
5.260433e-05
0.0013882591
0.0004975726
0.004215462
0.00118553
6.8419955e-05
7.3308154e-05
7.351188e-05
0.0012610124
2.1918344e-05
5.3881315e-05
0.000348318
0.0111174295
0.0001844288
0.00023055756
0.00067666965
2.1618225e-05
0.00065558555
0.00011886986
0.004878329
7.532305e-05
0.00029515053
0.0008771214
0.00044318815
0.00045352246
2.219967e-05
3.4630368e-05
2.1955417e-05
0.0082423575
0.02084665
0.008617819
2.37336e-05
0.0007988152
0.00033299648
0.00053600385
0.00012942769
0.00023972764
0.00047354214
0.0029637653
0.00017331565
5.8418576e-05
0.0026522074
0.00013416266
0.00024219774
0.0002707129
0.0037202735
0.0004878337
0.0016466635
3.0741547e-05
0.00824405
0.0016471919
0.00048588854
0.00041886864
0.00038283042
5.720226e-05
0.0013508176
0.00025465732
6.677686e-05
0.0031950285
0.00022743837
0.0012873787
0.0019100192
0.00016512939
0.0066867983
0.0025570705
7.590332e-05
0.0001290511
0.0013077843
0.009066646
8.278893e-06
0.00014440181
0.008204297
0.0006864818
0.0008325608
0.0047303867
0.00063803
0.00058498216
0.007141755
0.0025759342
1.5265148e-05
0.000791608
0.0002963567
6.699214e-05
0.00015540588
1.9577861e-05
0.00019148094
0.0050711925
0.0003821164
0.00031181856
0.02256623
9.6739546e-05
0.00022743792
0.0002277875
0.00024204118
9.2040355e-05
0.006166843
0.0004336779
0.0001697661
0.0033746548
0.00019502817
5.0561524e-05
2.586181e-05
0.0010798759
3.664102e-05
0.00013510302
0.00016221526
2.2405515e-05
0.0014313295
0.00017091136
0.0023739443
6.802837e-05
0.00064769934
0.00034750463
0.00011071275
1.7708879e-05
0.00013680755
2.4237579e-05
0.0003371289
0.0006825689
0.0028515519
0.00011692811
0.00022007397
0.02142835
0.0017977277
0.00035943018
0.001095244
0.00077389204
0.0002297276
0.025019487
0.0019389915
0.00033054518
0.0114699
5.516768e-05
0.000209548
0.00040630833
2.0364629e-05
0.00039122297
0.0020364495
0.0008940088
6.6173154e-05
0.00034862926
0.0042634625
2.3698478e-05
5.9804384e-05
0.0037845175
0.00018579431
0.0011340764
0.0005943249
0.00020876242
0.0001095363
1.866407e-05
1.5485472e-05
8.666633e-05
0.0040748627
6.6307715e-05
0.00070469885
0.0008672148
0.0002835482
0.0002781067
0.0025088897
0.0002623553
0.0002617934
4.9439703e-05
0.00010924356
0.00043568495
0.002368831
1.9224659e-05
0.0015811798
0.0006842592
0.0002917136
0.0003131275
0.00060534995
0.0001427105
8.8764216e-05
0.001122838
7.210702e-05
0.0041576345
0.00011061608
0.0007480099
1.3065656e-05
4.5712564e-05
0.0007861731
0.0003158539
0.00015036995
0.0003323501
0.0012030656
0.00019688989
0.00016745002
0.00024887823
0.0034065044
0.0023652983
0.00031526107
0.0066307853
0.00017283301
0.0022883036
0.00017895563
0.00018347587
0.00035834042
0.0008326437
0.0017283945
0.00035829068
0.00029964442
2.0670632e-05
0.0008355308
0.00048754443
0.0017713069
0.00191648
1.9209521e-05
0.005908878
0.002205918
0.00039330104
0.00043703758
0.0017654483
0.00013185009
0.00395082
0.0001576185
0.00038202494
0.0038736896
0.00041661857
0.00012902985
7.777089e-05
0.0017715484
0.0023155885
0.00055541855
4.9337166e-05
0.00047428903
0.00043557858
0.00069765287
0.009222093
0.010263749
6.7705434e-05
5.966209e-05
5.7554716e-05
8.994978e-06
0.0009418844
0.00019504203
0.000114773786
0.0004218587
0.00014428151
8.655709e-05
0.0008147674
0.0008794013
0.00014804432
0.0027704514
8.3283114e-05
4.3073826e-05
0.00018634342
9.9652214e-05
0.000109504916
0.0067855045
0.00015742471
0.00077502604
0.0006362351
0.00046153838
5.4576325e-05
0.00017408792
0.0012021991
0.009413977
0.022948345
0.0010692423
3.5031127e-05
5.092194e-05
6.2689374e-05
0.0068375845
0.00027439403
2.1836517e-05
0.0002581114
0.00027914194
0.00027809184
3.0986383e-05
3.457496e-05
0.0046969666
0.00046523788
0.0021990726
5.2927287e-05
0.00029199888
0.0006094933
0.00014609241
0.0005544162
0.0021697562
3.2796317e-05
0.00084513065
0.000516489
0.0005635408
6.230352e-05
0.00054642366
0.00013715419
0.00013440092
0.00011689427
0.00056491833
0.00064705784
0.010491602
3.0012101e-05
0.0005605288
0.0002985542
6.826285e-05
0.0013857664
0.0032425607
1.2750059e-05
0.00404577
0.0050039887
0.0001610246
0.0003332945
0.00028637925
0.0011893546
0.00030820677
0.0022603609
0.0010670897
0.00031939565
4.9374088e-05
9.66541e-06
8.219991e-05
0.00027665813
6.6826746e-05
0.0003693902
7.4780626e-05
0.00018097041
0.0014217026
0.00015682563
5.9905306e-06
0.0035234408
0.0001482323
0.00035662283
4.427336e-05
0.00025081105
0.00036762984
0.00013225578
0.00017834459
0.0041054576
8.1886355e-05
0.0006386442
0.00016379755
0.00014210392
8.108431e-05
0.0007447243
8.90168e-05
9.151607e-05
0.0005884257
0.0022961798
0.00013226802
0.00066101504
0.00046616056
0.00064051466
0.003273349
0.00048656296
0.00022358973
0.0043424554
0.0039812205
0.00028370952
0.0008125159
0.0004582208
0.0012607021
0.0009775694
0.00010673987
6.354423e-05
0.0003419572
0.00018932321
3.1185988e-05
0.00031975837
0.00031104262
7.3926254e-05
0.0011545917
8.575014e-05
0.00023361114
0.0006610472
0.0004883716
0.0003722783
9.297524e-05
0.00012120991
3.4105407e-05
0.00024642906
0.000107494736
6.998423e-05
1.7957382e-05
1.0631384e-05
0.00018812768
9.721867e-05
5.1466308e-05
2.9841798e-05
5.317565e-05
4.5402485e-05
7.383276e-05
5.9323876e-05
0.00011473314
2.5858333e-05
0.0002425595
4.3574375e-05
0.00016768574
0.00012793462
7.1418945e-05
0.00023895786
0.00017441496
2.3925382e-05
0.0007274894
0.00054904143
0.0006600553
0.0003689452
0.00019176958
8.68306e-05
0.00018872788
9.3901745e-06
0.0003732282
9.679007e-05
5.338826e-05
8.710209e-05
0.00010672185
4.1709736e-05
7.757896e-05
1.37239085e-05
3.7243954e-05
0.00015834477
0.0005567674
0.00032743503
0.0011654142
0.00081817544
0.00024791955
0.00015350303
4.055702e-05
1.2827285e-05
0.00036997424
6.42643e-05
0.00015970865
0.00030701264
0.0005480433
3.475775e-05
0.0002730317
0.00013267291
2.429988e-05
0.0001434095
8.784407e-05
0.00047590246
3.644311e-05
0.00023676634
0.0002182384
0.000118374526
0.00029589442
3.6611822e-05
1.2448694e-05
9.5065865e-05
0.00013185348
5.2593718e-05
0.00011015442
1.475699e-05
0.00014547075
0.0006541775

================================================
FILE: qa/L0_java_resnet/expected_output_data/expected_output_pytorch.txt
================================================
-0.30805874
0.07984302
-1.1900374
-1.4836702
-0.5135901
0.36827153
-2.1639166
-0.8705013
-1.8812447
-0.16076666
0.21684004
-0.928281
-1.2953714
-1.0791287
-1.444455
-0.89458805
-0.09590192
-1.3098954
-1.2062448
-1.2327268
-1.0658404
0.9427469
0.5738615
-0.27459937
-1.0188934
-0.35831845
-0.18257675
0.27853626
0.22089688
-0.3340493
-1.979969
-0.555245
-1.0804464
-0.8055694
-0.0004951467
-1.8401799
-0.79792225
-1.4822828
1.3656672
-0.89703584
-1.0853906
-1.1591249
-0.032266144
0.19187923
-0.4777367
0.031621072
-0.7464974
-0.10246294
-1.3072289
-1.8479855
-0.86044043
0.8683053
-0.13818197
-0.5942293
-1.0837044
-1.5115174
-1.4216323
-1.7622145
-1.3229938
0.3092505
-0.91198456
-1.2568892
-0.42140645
0.7647873
0.096434265
-0.2201274
0.20995392
-1.071132
0.14306861
0.7973344
-0.8894367
1.6341836
-0.98152703
1.1916499
-1.625073
0.2928239
-0.8159483
-0.19991271
1.6001159
1.1522979
0.5397157
-0.21569327
-0.5722878
-0.2540483
-1.3144569
-1.3187109
-0.6919892
0.06748002
-0.16136988
-0.16745704
-1.043228
-0.07053011
0.6526221
-0.6888746
-1.0834798
-0.76091695
-0.69209605
-2.3364725
0.20736966
-0.21594861
-0.5073983
-0.18135151
-0.85716504
0.7947216
-1.5203276
-2.1758971
-1.1328814
-0.13168834
-1.00645
-1.0352936
-0.7703913
2.5937598
-0.18291345
0.037092943
-0.8275598
-1.6695257
-0.007664892
1.1827207
-1.3609017
-1.6130087
-0.34498727
-2.0094082
-0.3217112
-1.40436
-1.0353576
-0.5387643
1.3731303
0.17038514
1.3134736
-0.6706346
-1.2812335
-1.2500542
-0.8758088
-1.2494946
-1.1121799
-0.43794972
-1.3552142
-0.85109013
-0.806748
-1.3894855
-0.93128216
-0.5771268
-0.8600849
-0.6528389
-0.96694344
-0.2790189
-0.13756554
0.33111212
-1.017053
-0.06247963
-0.82307434
0.2321171
0.5925774
0.11956272
-0.39129296
-0.96967256
-0.34883505
-0.32861945
-0.17424661
-0.5203654
-0.05074156
-0.5735833
-0.89118445
0.94264233
-0.48076403
0.23871332
-0.5359333
-0.17496297
-0.1825326
-0.8143634
-0.25432184
-0.8875172
-0.40212584
-0.4248538
-1.0707774
0.28054383
-0.8788248
-0.063131236
-0.13580973
-0.633922
-1.0408156
-0.2155596
-0.868021
0.02111919
-0.8062073
0.21586944
-0.84782946
0.36418468
0.23975046
0.07298894
0.8168585
-0.37726068
-0.8602677
-0.21154118
-0.06361114
0.39261663
-1.0140715
-1.0971476
-0.94316417
-0.12982899
-0.7508501
-1.874781
-0.21622303
-0.7669267
-0.42140815
-1.5047493
-0.6215693
-0.2612905
-0.35666725
-1.0537395
-0.38551807
-0.6064094
-1.2473556
-1.0768366
-0.3829122
-0.85829455
0.25932565
-0.9240785
-1.4660195
-1.1539187
0.5768459
-0.21287401
-0.4301784
-0.27853447
-0.5630739
-0.88488144
-0.6149986
-1.2260586
-0.118166335
-0.30751112
-1.3458123
-0.787824
-0.4979396
-0.07821896
-0.47691333
0.21768509
-0.28501546
-0.39360434
-0.99358493
-0.44038853
-1.1004056
-0.36356282
-1.4787167
-0.6785121
-1.0707904
-0.60454124
0.0018921697
-0.60659164
-0.7804347
-0.70279366
-0.45327887
-0.5740117
0.12954347
-1.0870117
-0.071922086
-1.5970279
-0.12967396
-0.41402286
-0.34608856
-0.45053896
-0.050228007
-0.036393084
0.64593357
-0.91866577
-0.79366595
-0.60279816
-0.55361813
-0.9942526
-0.30023605
-1.0588075
-0.1602141
-1.2761784
-0.80111355
-0.7847453
-0.4366057
-0.29868704
-0.11143246
0.24950753
-1.0829991
-0.235288
-0.56935483
0.8004865
-0.15923998
1.5074099
0.15986127
0.42949948
-1.5360352
-1.3022994
-0.621235
-1.2557826
-1.6063809
-0.39241713
-0.8660014
0.43634364
-0.7142573
-1.8392187
-0.66524017
-0.4094579
-0.55560684
0.26369932
-0.2994155
0.19446117
0.00012531597
-0.056575328
-1.0310686
-1.1073819
0.95716393
-0.039132416
-0.17284413
-1.7137713
1.0318145
-0.6407014
-0.20157519
-0.53714764
1.3076634
-0.21518743
-0.10755904
-0.6703936
0.58359814
0.1296847
-0.74383837
-2.052296
-1.943493
-1.2419901
-1.5791146
-1.7323232
-0.4647262
-0.8547239
-0.5982981
-1.3872371
-0.8413639
0.5059893
-0.028888466
-1.0159539
-0.8781407
-0.8586551
-1.5765216
-0.72110957
-0.54951406
-1.0456697
-0.46384534
-0.8682762
-1.329279
-1.5812683
-1.1616806
-0.5591132
-0.68271846
-0.6140093
-0.8487391
-0.8138591
0.5194415
0.8475472
-1.2317592
-0.06508279
0.84332556
-0.7534412
-0.061359435
-0.17108928
0.029114015
0.29252198
-0.99659246
0.18716425
-0.48432857
-0.574279
-0.149806
-0.526539
-1.6839328
0.298726
-1.12589
-1.2416302
-1.0083416
-0.1886835
-1.2171522
-0.11976431
-0.2596951
-1.1662437
-0.019736286
-0.4496138
-0.12932746
-1.9007655
-1.2868488
-1.1776145
-0.70207584
-0.99402976
-1.5353495
-0.08161428
-0.7827241
-0.9851597
-1.7212214
0.30599424
-1.3255223
-0.78677404
0.020959575
-1.938007
-0.87134534
-0.4159284
-1.7782842
-1.1730373
0.08866749
2.0492961
1.0762362
-0.007216261
0.97626513
-0.74596655
0.4418997
1.1642963
-1.1256992
-0.95673156
-0.64594465
1.9042864
-2.266134
0.23068756
-0.2024498
2.4514055
0.17042859
-2.1488516
2.029247
0.08730792
2.4830267
1.5408521
0.22265166
0.059148587
0.47132158
1.0918839
-0.8906889
-0.35221744
1.1023836
0.71585846
-0.080300555
0.7830516
-0.1106352
0.3845232
1.8773831
0.25839618
0.01753266
0.4585675
-0.556912
0.18327093
1.1123434
0.54181975
0.2650874
-0.6121097
0.6899404
0.5327037
-0.4582712
-0.3463424
2.0124485
0.92011964
-0.21810834
-0.7299846
-0.9932826
2.0011783
-0.9193375
0.07095367
-2.3654485
-0.13455993
1.2373506
0.55914795
-0.7927128
0.24315542
-0.6954934
0.818556
2.2227504
0.50524724
1.5352136
2.1754854
-1.2847167
-1.7190467
-1.2820773
0.48650953
0.9624859
-0.28632626
-1.7782934
3.3267088
-0.80292964
-0.82254416
-2.4419034
1.1831589
0.27238667
-0.08926326
0.114699185
2.2780476
1.2212758
-1.5160606
-0.7004898
0.46838894
0.61680245
1.7088135
1.709012
-1.2258106
0.67940307
1.9137111
-0.13307501
0.8966815
1.0661377
-0.077985905
0.294199
-1.1051399
-0.61139315
3.6302567
-0.82702637
0.40620643
0.898003
2.2812579
0.42015857
0.41871074
-0.5433154
2.1934881
0.44938952
-2.4096403
0.3080853
-0.75909114
2.749651
1.273376
0.88220817
0.46447915
0.84428304
0.5331683
0.41311303
0.3472368
0.42634374
0.5020205
1.133693
0.6315067
0.49277782
-1.1333336
0.5877674
1.6507065
0.6192476
0.6534441
1.9449492
0.80630463
0.57669324
-0.67982227
1.7395757
-0.028182037
-0.9472996
-0.8416842
-0.12939622
1.2086351
0.57445955
0.7767944
1.280486
1.2262709
-0.8028702
1.0569873
0.94939137
-1.4751376
-0.19903125
2.3615687
1.1166264
2.325268
0.8368003
1.1348325
-0.81748235
-0.94805723
1.3997422
0.48129374
0.87885517
1.8402383
-0.7471128
0.063835524
-1.1082904
0.8763111
-1.1521848
-1.3750111
-0.17355038
2.084852
0.0059059784
-0.9651331
1.4963127
-1.2178527
0.85985076
-0.04743771
1.2991531
-1.2023815
-0.538383
1.2776058
0.44704303
-0.09368593
-1.4124348
-1.66763
-1.382003
2.56167
2.7520278
1.7802238
0.20748135
2.201629
1.4195694
1.0006833
1.2050105
0.7915465
0.80263686
1.54673
0.29449403
-0.18094113
2.7645786
0.08308226
0.32472438
0.41058362
2.673242
1.3079755
0.78823054
-1.2491844
2.6995187
0.3947289
1.5972215
-0.2016275
0.667046
-1.0026234
1.5369157
-0.21158755
-0.5587798
1.8455683
0.18770997
1.7668104
1.3544986
0.5668934
1.6499695
0.79549676
0.23864032
-0.076060526
0.54530853
3.0026731
-1.3816507
-0.9419994
2.1659389
-0.49469137
-0.23300627
2.2649322
0.6988553
1.7207134
1.4296931
1.8957422
-1.7843419
2.108782
0.63150716
-1.2306048
0.4726084
0.16148792
-1.1888111
2.5059545
0.49573082
1.0300703
2.1389406
-0.6599807
-0.037568122
0.94101214
-0.2563992
0.37840766
2.115041
0.7366525
0.3634316
0.93945736
-0.4147591
-0.38213915
-1.2784125
-0.08756078
-0.9641913
0.19105943
-0.3143284
-1.6625874
1.6527823
-0.5382227
0.3207345
-0.595412
1.5850205
0.8305495
-0.8234362
-0.8500601
-0.7534717
-0.9616986
0.4730339
1.5510118
2.668524
-0.60776836
1.7700179
2.7614388
1.3252912
0.59501547
2.1923153
1.6112024
-0.40866897
1.8549836
2.2821114
-0.77804285
1.6713705
-1.6944448
0.17435041
-0.2616872
-1.3363857
0.6129463
0.86893713
0.6393853
-1.234884
1.1132063
2.0555096
0.022984732
-1.0277154
2.4854038
1.451681
1.6226276
0.67418146
-0.85724473
-0.7612631
-1.2767704
-1.0986053
-0.21717405
1.6196754
0.6333269
1.2900922
1.2161998
0.36294502
1.5778857
1.6918045
0.99078727
-0.45147473
-1.2807459
0.045685403
1.0520277
1.9152287
-1.3029758
0.9261474
0.7156784
-0.19225252
0.55643463
2.0766673
-0.18557347
0.13493066
1.802568
0.23648183
2.766143
0.2725357
1.0387229
-1.9429945
0.23742795
0.54052275
0.2342531
0.132205
0.82999367
1.7976496
0.49230877
0.7958189
-0.37094918
1.110652
0.6413396
1.1133307
1.7305324
0.37832874
2.2200847
-0.36919576
-0.9609986
0.19756792
1.3253196
1.8076504
0.103227235
-0.42585406
-1.348184
1.8132821
1.2306423
1.1028852
1.9165587
-2.4476745
2.054153
1.682224
0.44401717
0.19734457
1.5318341
-0.47473955
2.3914623
0.42040017
0.6056829
2.4316716
0.34631512
1.3324567
0.0011816069
1.1105287
1.4553503
1.7634965
-0.6814372
0.2123078
0.16176923
1.0453559
2.9997826
2.2626696
-0.76536435
-0.42744967
0.14685751
-2.1144905
0.90889215
1.048776
-0.1111255
1.91633
0.45815408
0.054494135
0.420825
0.21111344
1.0745884
1.3172199
-0.20259683
-0.77705085
-0.0074540502
-0.3671591
-0.33085522
1.9708865
-0.57260597
0.46406755
-0.46640325
-0.46216512
-0.59125966
0.87914044
0.7298775
1.101785
3.035671
-0.35254276
-0.86594146
-0.80589545
-0.7337217
1.8224323
-1.2016355
-0.72215164
-0.47425175
0.3528979
1.0273298
-0.036939412
0.2297522
2.528665
0.3788014
1.9056299
-1.8528597
1.3645221
1.9897952
-0.32049844
0.20599015
1.1722815
-0.74404633
1.4928225
0.8872909
1.4359131
-0.72126484
1.1888711
1.0988497
0.34612125
-1.1861738
2.339421
1.8755157
2.8820977
-0.5806484
0.39929
-0.2774235
-0.27243808
1.1287675
1.7444426
-0.59589016
1.1558293
1.2643657
-0.024029814
0.23252903
-0.2631906
0.82813776
-0.01724714
1.3382394
0.8137164
0.0848312
-0.667315
-2.0700092
-0.4838388
-0.51320595
0.0037372224
1.3113365
-0.22582024
-0.48156402
1.6307961
0.09801248
-2.1774163
0.64898616
0.19490883
0.1979113
0.5982482
0.08691002
0.46526366
0.80410117
0.7230205
1.8608608
-0.79288054
1.1912636
-0.38980532
-0.44946012
-0.18038842
0.37972292
-1.0056939
1.2174432
2.4348667
0.66281396
1.1692165
0.5451535
1.162487
1.303657
1.1611288
1.4010147
0.04817031
1.7428269
3.0368202
0.8766508
-0.26485524
0.26849088
1.869811
0.48758182
-0.6030314
0.14393385
0.57609755
0.5643001
-1.2467934
0.17159785
-0.56257993
-1.4873617
2.5040245
-0.57016486
-0.56566435
0.13093448
0.35735604
0.5589198
-0.28002298
-0.20552874
-1.1545538
-0.12005596
-0.63608867
-0.5422438
-1.5786606
-0.08732763
0.26583073
-0.48822308
-0.61887413
-2.0053678
-0.8047017
-0.78162575
-0.06668275
0.49894157
0.15497255
-0.7863977
0.6278491
-0.9034021
0.19300902
0.026619527
-0.3625757
0.51064104
-0.40118733
-1.2872294
1.4680091
1.5331635
-0.0104825385
0.7074813
0.47988775
-0.15154226
0.9793232
-0.8414473
0.6749984
0.3124825
0.027812386
-0.59152645
-0.05568023
-0.7404828
-0.5500867
-1.7206669
-0.7042971
-1.0925202
1.581233
-0.121507704
0.8914928
0.9794418
-1.1422362
-0.12346666
-0.5999273
-2.1338222
-0.077511735
-0.8373626
-0.23501818
-0.010404997
-0.041594535
-1.0295677
-0.29143637
-0.22416036
-0.8062624
-0.7818173
-0.2714035
0.00018124096
-1.2354704
0.123760514
0.018292539
-0.6903522
0.52160364
-1.8007841
-1.782615
-1.2970004
-1.6565065
-1.3305808
-0.6563534
-1.6530751
0.117775925
0.24357137

================================================
FILE: qa/L0_java_resnet/expected_output_data/expected_output_tensorflow.txt
================================================
0.00070911006
0.0010684511
0.0002289149
0.0002890797
0.001823506
0.00033588437
0.0005761559
0.00026887475
0.00016327911
0.00062107155
0.00035215134
0.00021309333
0.0002824714
0.00032690517
0.000362966
0.00029754156
0.000462734
0.0009069857
0.00024187386
0.00022825644
0.0005646942
0.0005685028
0.0015051479
0.000550871
0.00035833745
0.0007460652
0.00018980923
0.0006296634
0.0009744452
0.0004044121
0.00021716364
0.003566736
0.00033353135
0.00038591775
0.0012752721
0.00010569831
0.0002329158
7.213156e-05
0.0042858184
0.0008237876
0.0010394026
0.00012532603
0.00022559383
0.00018184909
0.00024319398
0.0005497621
0.0010193866
0.0012020781
0.0002604365
0.00036887883
0.00039009948
0.0005622609
0.0005074424
0.00065419363
0.0001678674
0.0007651498
0.00019579448
0.000100849866
0.00060587144
0.009335775
0.002238217
0.00042261003
0.0004869275
0.0017416928
0.00050716975
0.0003331386
0.0009492363
0.00026299703
0.00096314494
0.0002454126
0.00052854954
0.0022881972
9.451885e-05
0.0020056511
0.00017017504
0.00013614705
0.00031952796
0.0006581821
0.00086781656
0.0010920991
0.00016639908
0.00029970525
0.00036486977
8.347438e-05
0.00027294483
0.00027506787
0.00014957263
0.00012473388
0.00047103016
0.00068512204
0.00026231256
0.0002471854
0.00038985602
0.0005510145
0.0015379117
6.391459e-05
0.00075941073
0.00021282899
0.00016255074
0.0006057964
0.00034061813
0.000116008356
0.00013254896
0.00072937884
0.00025322058
0.00013424494
0.000116978124
0.0012361282
0.0008600386
0.00016587735
0.0008544744
0.048096422
0.0014968353
0.0025525852
0.0003895892
0.0004779505
0.0010679476
0.001583124
6.0117403e-05
0.00023506799
0.00080862094
9.170748e-05
0.0003459704
0.00025960154
0.00032231968
0.00024193742
0.0005336417
0.0003331181
0.00083348254
0.0005098401
0.00050219166
0.0001382532
0.0013238905
0.00024549733
0.00018288675
0.00032616325
0.00016282972
0.00012510039
0.00037040826
0.00023140096
0.00033143876
0.00035791306
0.00014701433
0.00016613651
0.0007882612
0.00020208673
0.00025587558
0.0005947067
0.00020411932
0.0003501464
0.00019414612
0.00036807868
0.00016704168
0.00031899076
0.00014016406
0.0001590781
0.0001042989
6.693639e-05
0.00044032355
0.00019823047
0.0001648452
0.00021023075
0.000121602214
0.0008859733
0.00027336556
0.00021329267
0.00042354263
0.00015121586
0.000366059
0.00013732535
0.00029352968
0.00021702389
0.00028322692
0.00041577345
0.00022989941
0.00022801253
0.00016557571
0.00020442168
0.00084447116
0.00024891275
0.0002122566
0.00030452234
7.565878e-05
0.00012686373
0.00019746723
0.00032517608
0.00019016817
0.00029626995
0.00016989792
0.00049037643
0.00020838893
0.00019873244
9.189098e-05
0.0006875357
0.00064732507
0.00034183732
0.00015014365
0.00011403188
0.000537032
0.0003341667
0.00029259248
0.00038738886
0.00012182328
0.00051590457
0.00033943634
9.197326e-05
0.00039432684
0.00014883016
0.00045966395
0.00023865228
7.6960285e-05
0.00014399357
0.0003608486
0.00025755627
0.00020178013
0.0003600289
0.0011284449
0.0001712409
0.00019862416
0.00025335004
0.0001756047
0.00034503645
0.00039285867
0.00017203313
0.0012871717
0.00030436684
0.00024817986
0.0010085882
0.00027581956
0.00028622823
0.0002573273
0.00038505017
0.00039457608
0.0002494052
0.00018972508
0.0003315194
0.00022963689
7.301075e-05
0.00023747115
0.00032635694
0.00021661345
0.00034653372
0.00018944537
0.0002243273
0.0003466119
7.474429e-05
0.00029931893
0.00026417332
0.000116994954
0.0002012358
0.0004963594
0.00027601913
0.00023313782
0.00021496546
0.00033204685
0.00038143748
0.00010215905
0.00022710346
0.0004710895
0.00010912214
0.00067364104
0.0002553266
0.00024328758
0.00018621673
0.00024005111
3.6619393e-05
0.00031510097
0.00025127587
0.00020713067
0.00053867674
0.0004486591
0.00012326887
0.00013776327
0.00010066613
0.0001907201
0.00019176993
0.00028617049
0.00043150192
0.00022882965
0.00017046132
0.0001404705
0.0003074807
0.00069475063
0.0005420082
0.00016548761
0.0011550415
0.0003579725
0.00013039725
0.00046354206
0.00025531164
0.00015127688
0.0003076982
9.368715e-05
0.000253574
0.0004157336
0.00025558594
0.00020862755
0.0003325044
0.00010430214
0.0005750662
0.00034912725
0.0003502339
0.00013765084
0.0011814896
0.0007353515
0.0004288803
0.0010895525
0.0021925315
0.0010849636
0.0002088477
0.000698407
0.0005413023
0.0025422976
0.00050733547
0.00056180026
0.0032103728
0.00023816712
0.0017631998
0.003166806
0.00075065246
0.00043124682
0.00020120693
0.00030978755
0.00040472345
0.0010322309
0.0002756523
0.0007263063
0.00038796544
0.0014804546
0.00025164674
0.00021415394
0.00015569745
0.00047274903
0.00026750995
9.396422e-05
0.0003232726
0.0003681733
0.00017011825
0.00037481345
0.000110637375
0.00027844915
0.00027941877
0.00028294954
0.000107615866
0.00013299155
0.00025102712
0.0003521134
0.00018762982
0.0005306597
0.00027527596
0.0001893789
0.0006203038
0.0002596028
8.3349165e-05
0.000421517
0.00033665064
0.00045308896
0.000110814566
0.00016861226
0.0006383047
0.00020831541
0.00014839825
0.00029492003
0.00019427085
0.00045692816
0.00020844795
0.00019500752
0.00040315292
8.695124e-05
0.00013987756
0.00012228725
0.00056897226
0.00020290921
0.0002687522
0.00023272065
0.00015077695
0.0004568092
0.00052215316
0.00027182538
0.00020620856
0.0010283174
8.266399e-05
0.00021341672
0.00019470627
0.0004475956
0.00043766637
0.00018623582
0.00022168642
0.00027278156
0.00027336203
0.00034579786
9.910105e-05
0.00036059332
0.0005613833
0.00021642471
0.00061176467
0.00032723378
0.0007215444
0.00042581535
0.006056687
0.00015225813
0.0038606655
0.0033682694
0.0005007813
0.00034089078
0.001088126
0.0003091816
0.00025670388
0.00028364526
0.0039907284
8.005619e-05
0.0003177985
0.00044217892
0.003775855
0.00022793307
9.9455334e-05
0.0042361487
0.0015110963
0.0014649354
0.00076693745
0.00014660056
0.0008259513
0.00014898773
0.00094022823
0.0018079237
0.00027478446
0.0008579107
0.00027007243
0.00027866405
0.0021426745
0.00030444653
0.00013589527
0.0025529363
0.00022925495
0.00020205135
0.0006399196
0.0001175159
0.0008898152
0.0007308672
0.00015426724
0.00070449885
0.00063714065
0.0011764771
0.000113688315
0.00025997663
0.0002751466
0.0012629845
0.00061876763
0.00047713597
0.00018022317
0.000112102745
0.0019180076
0.00014341537
0.00038212672
0.00023863306
0.00014654716
0.0009910533
0.00046345408
0.0006146838
0.0022888945
0.00014176867
0.0009656023
0.0007254071
0.0003110353
0.00075938756
0.0017488213
0.00026165575
0.00043671884
0.00025007708
0.0010205496
0.0072930735
0.00079188804
0.00014444374
0.0022240074
9.0894464e-05
0.0005548176
0.00036375815
0.00045969928
0.00049831875
0.0006171517
0.0005445464
0.0005370632
0.0009902638
0.0005372154
0.00047807358
0.0018499892
0.00092412543
0.006397552
0.0046642385
0.00015648386
0.0003896425
0.0050082384
0.0003178785
0.00040727912
0.0012690715
0.00029073926
0.00041457833
0.0022713607
0.0026651558
0.0043892586
0.0002917294
0.0015015705
0.0002936945
0.0011139546
0.0022272936
0.0006511537
0.0008047797
0.0006209673
0.0012966822
0.000117934265
0.0003287383
0.00011685335
0.00408869
0.0020391766
0.0005868179
0.00081892085
0.0008156648
0.0029200844
0.0005166022
0.0005672602
0.0001692095
0.0003508818
0.00013667026
0.0019258707
0.0002646609
0.000203857
0.0004557036
0.0014699163
0.00075061055
0.00027520838
0.024521487
0.0023796572
0.00031702282
0.00016261516
0.0030187324
0.0001344725
0.00052194105
0.00040833795
0.00073826866
0.0013697975
0.00053330604
0.00047440815
0.002975715
0.0034163008
0.00039923526
0.0003814433
0.0033519045
0.00018409718
0.00015521496
0.0012500272
0.000352364
0.0026179687
0.0009001603
0.0007409122
0.00020439974
0.0001491525
0.00057623035
0.00053739885
0.001619859
0.0007606476
0.00043201642
0.00048651415
0.001913164
0.001860702
7.184188e-05
0.0003567602
0.00047219777
0.0013026253
0.0005371198
0.0003526595
0.0010887473
0.00021295802
0.0015989357
0.00016607663
0.002568109
0.0002009078
0.00010516546
0.00071283046
0.0003078614
0.0021156948
0.00077290024
0.00027787217
0.00018751186
0.0016615124
0.0015545615
0.0010933804
0.0001293072
0.0012888343
0.00020816487
0.00030583332
0.00016422627
0.00015186946
0.00031760518
0.003668799
0.0008204296
0.0006058452
0.0075512384
0.0006543231
0.0003984883
0.0004991135
0.0063148434
0.0004667902
0.0019243147
0.00026864174
0.004626689
0.0016829795
0.0024464321
0.002604262
0.0005715485
0.0004827969
0.00059977506
0.00044812242
0.00018801834
0.0014922172
0.00039306682
0.00038797187
0.0017823273
7.7641904e-05
0.0013096565
0.0033977008
0.0014362672
0.0010601832
0.0016821629
0.0041754427
0.00036547767
0.00034212973
0.04761514
0.00039928395
0.0007339863
0.0048003797
0.00032377243
0.0006853962
0.0019343331
0.0021214003
8.4536754e-05
0.0014679983
9.906235e-05
0.0001737739
0.00044015897
5.232733e-05
0.0003227811
0.0011037658
0.0009596574
0.002163132
0.034116793
0.00018434737
0.00054400944
0.00027010517
0.00029613025
0.0002854188
0.008274664
0.0026966897
0.00056778896
0.00056742143
0.0001424069
0.00021398348
0.0002040955
0.0007528397
0.00047215613
0.0003180315
0.00026779302
0.00017190988
0.00057392224
0.00026870312
0.0041729347
0.00022995795
0.00473034
0.00053698535
0.0015700939
8.663364e-05
0.00037708133
0.00010627266
0.0008188108
0.0013689178
0.0028652248
0.00030012682
0.00019088034
0.0020974467
0.0005804101
0.0046054157
0.000866855
0.0028432102
0.0004053386
0.0022837527
0.00031697293
0.0020557377
0.0006195521
0.00029529422
0.0019667863
0.00028010362
0.00036917007
0.0014461187
0.0010241494
0.00035407842
0.0007762103
0.0007345563
0.0016735821
0.000100398545
0.00042761158
0.0018091354
0.0011984855
0.00054059736
0.0010517223
0.0003952099
0.0004072673
5.0896953e-05
0.00015406184
0.0011205417
0.0016784162
6.48444e-05
0.001374897
0.0049680024
0.00031736813
0.00040638892
0.0031774077
0.00014365144
0.00058315735
9.539311e-05
0.0002490495
0.00080948864
0.0026334277
5.1187024e-05
0.0019501996
0.00017581039
0.0007018262
0.00082990975
0.00033347218
0.0003785377
0.00024977518
0.0006290335
0.0005053414
0.001499565
0.0002951073
0.00053611986
0.00018856855
0.00011126017
0.0019289504
0.0006362068
0.000522457
0.00032152023
0.0018640001
0.0008822051
0.0009148322
0.0009896222
0.0029765042
0.0014977105
0.0003173049
0.0015661103
0.00010378374
0.0067265066
0.0005495047
0.00020958934
0.00019278725
0.0009433383
0.0026177543
0.00051816285
0.00017156888
7.744175e-05
0.0003151731
0.0008290297
0.0032181763
0.0024396458
0.00025281956
0.0029372664
0.0014309491
0.00055660465
0.0007385025
0.0009333291
0.0002543238
0.0060301092
0.00057014904
0.0013402926
0.0027256922
0.0009102879
0.0001869125
8.260008e-05
0.0039338632
0.0023134286
0.0012300106
0.00029246748
0.000283189
0.00026828857
0.0025049848
0.0016384326
0.0022900025
0.0002599975
0.0004017206
0.0016243177
0.0006216647
0.0036319585
0.00028053645
0.0004719083
0.00096298783
0.00025558157
0.00021045441
0.00043856484
0.00095168
0.0002192634
0.00033050985
0.00012919637
0.00022991112
0.00042593313
0.00029524197
0.0003437868
0.0051064813
0.0005583069
0.0007269702
0.00024129995
0.00030284372
0.00027721547
0.0003213354
0.0006788763
0.0012024492
0.0036741009
0.00024671428
0.0005882029
5.3842294e-05
0.00040663296
0.02228713
0.0016194598
0.00015659895
0.00037711856
0.00040618918
0.0011397398
0.00011992812
0.0001520243
0.0048938077
0.0016474533
0.0019597847
0.00048948237
0.00030241054
0.00049067725
0.00073232397
0.00032315947
0.0014954191
0.00037097387
0.0013783753
0.0016116645
0.00029578464
0.00090505433
0.00027435934
0.0005812986
0.000120840086
0.00039883642
0.0015213061
0.0027571726
0.0023957484
0.00019108997
0.0007307167
0.000956605
0.0006839414
0.0024526927
0.007934612
0.00020379393
0.015423247
0.001909548
0.000276556
0.00094950746
0.00063008594
0.0019207522
0.00024915
0.00062654825
0.0019300466
0.00035208502
0.00028049122
0.0003148443
0.00038308708
0.00027527692
0.00026734636
0.000109911365
0.00015939883
0.00020454325
0.0014520382
0.0005228617
0.00011064936
0.003540477
0.00031232936
0.00044735873
0.00017807365
0.0013564116
0.000965749
0.0010829738
0.00073439174
0.0027080632
0.00030311418
0.00044519626
0.0007992933
0.00032909622
0.00030226275
0.0029641816
0.00011622985
0.0007482988
0.001229003
0.0025723213
0.00065770274
0.00015693594
0.00054296193
0.0013329909
0.002655394
0.00034390666
0.00031026872
0.0020210485
0.0008697185
0.00032176377
0.0041055335
0.0057543945
0.00040670217
0.0005435844
0.009029863
0.00028603026
0.00064405525
9.242199e-05
6.4520485e-05
0.00018704256
0.00015222837
0.00019523445
0.005567865
6.787147e-05
0.00034305613
0.0028331447
0.0020781667
0.00010261523
0.0002362934
0.00013399884
0.00022745578
0.00025935622
0.00031119035
0.00038356654
0.00022390902
0.00047898493
0.0004629675
0.000112182315
0.00013342654
0.00018693593
0.00046389582
0.00042846476
0.00045707394
0.00045862008
0.00034546596
8.175569e-05
0.00023262479
0.00021009706
0.00047855324
0.00030753214
0.00019426928
0.0010725219
0.0003141107
0.0005669363
0.0012055356
0.001431565
0.0007926821
0.0008843769
0.0005278664
0.00042725797
0.003944173
0.00015261356
0.000299945
0.00079040887
0.00060629344
0.00020051922
0.00031456698
0.00040859287
0.00027128076
0.00021296159
8.693237e-05
0.00027029635
0.00305675
0.0023890452
0.003111028
0.0006668401
0.0004029482
0.0032200122
0.00013293372
0.0007656965
0.00023606456
0.0003478867
0.00031042635
0.00016308061
0.00038783776
0.00043370973
0.00089249195
4.2713556e-05
0.0004966322
0.0016314207
0.0004260099
0.0017055604
0.00043873576
0.0004356743
0.00071425876
0.00013353773
0.00031172932
0.00033197878
0.00043404778
0.00013681914
0.00016265325
0.000201886
0.000113467126
0.000118104785
0.0006379289
0.0009817044
0.00019666742

================================================
FILE: qa/L0_java_resnet/test.sh
================================================
#!/bin/bash
# Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

# Models
DATADIR=/data/inferenceserver/${REPO_VERSION}
MODEL_REPO=`pwd`/models
TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:="https://github.com/triton-inference-server"}
JAVACPP_BRANCH=${JAVACPP_BRANCH:="https://github.com/bytedeco/javacpp-presets.git"}
JAVACPP_BRANCH_TAG=${JAVACPP_BRANCH_TAG:="master"}

# Create local model repository
mkdir -p ${MODEL_REPO}
for BACKEND in _fp32_libtorch _fp32_onnx; do
    cp -r $DATADIR/perf_model_store/resnet50${BACKEND} ${MODEL_REPO}/
    echo ${MODEL_REPO}/resnet50${BACKEND}/config.pbtxt
    sed -i "s/kind: KIND_GPU/kind: KIND_CPU/" ${MODEL_REPO}/resnet50${BACKEND}/config.pbtxt
done

# Set up test files based on installation instructions
# https://github.com/bytedeco/javacpp-presets/blob/master/tritonserver/README.md
set -e
git clone --single-branch --depth=1 -b ${TRITON_CLIENT_REPO_TAG} ${TRITON_REPO_ORGANIZATION}/client.git
source client/src/java-api-bindings/scripts/install_dependencies_and_build.sh -b $PWD --javacpp-branch ${JAVACPP_BRANCH} --javacpp-tag ${JAVACPP_BRANCH_TAG} --keep-build-dependencies
cd ..

CLIENT_LOG="client.log"
SAMPLES_REPO=`pwd`/javacpp-presets/tritonserver/samples/simple
BASE_COMMAND="mvn clean compile -f $SAMPLES_REPO exec:java -Djavacpp.platform=linux-x86_64"
source ../common/util.sh

cp ResnetTest.java $SAMPLES_REPO
sed -i 's/Simple/ResnetTest/g' $SAMPLES_REPO/pom.xml

rm -f *.log
RET=0

# Run with default settings
$BASE_COMMAND -Dexec.args="-r $MODEL_REPO" >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    RET=1
fi

# TODO: fix build to support GPU only resnet so can test TF as well
for BACKEND in ONNX TORCH; do
    if [ `grep -c "${BACKEND} test PASSED" ${CLIENT_LOG}` != "1" ]; then
        echo -e "\n***\n*** ${BACKEND} backend test FAILED. Expected '${BACKEND} test PASSED'\n***"
        RET=1
    fi
done

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_java_sequence_batcher/SequenceTest.java
================================================
// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import static org.bytedeco.tritonserver.global.tritonserver.*;

import com.google.gson.*;
import java.io.*;
import java.util.*;
import java.util.concurrent.*;
import org.bytedeco.javacpp.*;
import org.bytedeco.tritonserver.tritonserver.*;

public class SequenceTest {
  // Boilerplate code for setting up Triton
  static void FAIL(String MSG)
  {
    System.err.println("Failure: " + MSG);
    System.exit(1);
  }

  static void FAIL_IF_ERR(TRITONSERVER_Error err__, String MSG)
  {
    if (err__ != null) {
      System.err.println(
          "error: " + MSG + ":" + TRITONSERVER_ErrorCodeString(err__) + " - "
          + TRITONSERVER_ErrorMessage(err__));
      TRITONSERVER_ErrorDelete(err__);
      System.exit(1);
    }
  }

  static int requested_memory_type = TRITONSERVER_MEMORY_CPU;

  static class TRITONSERVER_ServerDeleter extends TRITONSERVER_Server {
    public TRITONSERVER_ServerDeleter(TRITONSERVER_Server p)
    {
      super(p);
      deallocator(new DeleteDeallocator(this));
    }
    protected static class DeleteDeallocator
        extends TRITONSERVER_Server implements Deallocator {
      DeleteDeallocator(Pointer p) { super(p); }
      @Override public void deallocate() { TRITONSERVER_ServerDelete(this); }
    }
  }

  static void Usage(String msg)
  {
    if (msg != null) {
      System.err.println(msg);
    }

    System.err.println(
        "Usage: java " + SequenceTest.class.getSimpleName() + " [options]");
    System.err.println("\t-m [model name]");
    System.err.println("\t-v Enable verbose logging");
    System.err.println("\t-r [model repository absolute path]");

    System.exit(1);
  }

  static class ResponseAlloc extends TRITONSERVER_ResponseAllocatorAllocFn_t {
    @Override
    public TRITONSERVER_Error call(
        TRITONSERVER_ResponseAllocator allocator, String tensor_name,
        long byte_size, int preferred_memory_type,
        long preferred_memory_type_id, Pointer userp, PointerPointer buffer,
        PointerPointer buffer_userp, IntPointer actual_memory_type,
        LongPointer actual_memory_type_id)
    {
      // Initially attempt to make the actual memory type and id that we
      // allocate be the same as preferred memory type
      actual_memory_type.put(0, preferred_memory_type);
      actual_memory_type_id.put(0, preferred_memory_type_id);

      // If 'byte_size' is zero just return 'buffer' == nullptr, we don't
      // need to do any other book-keeping.
      if (byte_size == 0) {
        buffer.put(0, null);
        buffer_userp.put(0, null);
        System.out.println(
            "allocated " + byte_size + " bytes for result tensor "
            + tensor_name);
      } else {
        Pointer allocated_ptr = new Pointer();
        actual_memory_type.put(0, requested_memory_type);

        actual_memory_type.put(0, TRITONSERVER_MEMORY_CPU);
        allocated_ptr = Pointer.malloc(byte_size);

        // Pass the tensor name with buffer_userp so we can show it when
        // releasing the buffer.
        if (!allocated_ptr.isNull()) {
          buffer.put(0, allocated_ptr);
          buffer_userp.put(0, new BytePointer(tensor_name));
          System.out.println(
              "allocated " + byte_size + " bytes in "
              + TRITONSERVER_MemoryTypeString(actual_memory_type.get())
              + " for result tensor " + tensor_name);
        }
      }

      return null; // Success
    }
  }

  static class ResponseRelease
      extends TRITONSERVER_ResponseAllocatorReleaseFn_t {
    @Override
    public TRITONSERVER_Error call(
        TRITONSERVER_ResponseAllocator allocator, Pointer buffer,
        Pointer buffer_userp, long byte_size, int memory_type,
        long memory_type_id)
    {
      BytePointer name = null;
      if (buffer_userp != null) {
        name = new BytePointer(buffer_userp);
      } else {
        name = new BytePointer("<unknown>");
      }

      System.out.println(
          "Releasing buffer " + buffer + " of size " + byte_size + " in "
          + TRITONSERVER_MemoryTypeString(memory_type) + " for result '"
          + name.getString() + "'");
      Pointer.free(buffer);
      name.deallocate();

      return null; // Success
    }
  }

  static class InferRequestComplete
      extends TRITONSERVER_InferenceRequestReleaseFn_t {
    @Override
    public void call(
        TRITONSERVER_InferenceRequest request, int flags, Pointer userp)
    {
      // We reuse the request so we don't delete it here.
    }
  }

  static class InferResponseComplete
      extends TRITONSERVER_InferenceResponseCompleteFn_t {
    @Override
    public void call(
        TRITONSERVER_InferenceResponse response, int flags, Pointer userp)
    {
      if (response != null) {
        // Send 'response' to the future.
        futures.get(userp).complete(response);
      }
    }
  }

  static ConcurrentHashMap<
      Pointer, CompletableFuture<TRITONSERVER_InferenceResponse>> futures =
      new ConcurrentHashMap<>();
  static ResponseAlloc responseAlloc = new ResponseAlloc();
  static ResponseRelease responseRelease = new ResponseRelease();
  static InferRequestComplete inferRequestComplete = new InferRequestComplete();
  static InferResponseComplete inferResponseComplete =
      new InferResponseComplete();

  static TRITONSERVER_Error ParseModelMetadata(
      JsonObject model_metadata, boolean[] is_torch_model)
  {
    String seen_data_type = null;
    for (JsonElement input_element :
         model_metadata.get("inputs").getAsJsonArray()) {
      JsonObject input = input_element.getAsJsonObject();
      if (!input.get("datatype").getAsString().equals("INT32")) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_UNSUPPORTED,
            "sequence qa example only supports model with data type INT32");
      }
      if (seen_data_type == null) {
        seen_data_type = input.get("datatype").getAsString();
      } else if (!seen_data_type.equals(input.get("datatype").getAsString())) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            "the inputs and outputs of sequence model must have the data type");
      }
    }
    for (JsonElement output_element :
         model_metadata.get("outputs").getAsJsonArray()) {
      JsonObject output = output_element.getAsJsonObject();
      if (!output.get("datatype").getAsString().equals("INT32")) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_UNSUPPORTED,
            "sequence qa example only supports model with data type INT32");
      } else if (!seen_data_type.equals(output.get("datatype").getAsString())) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            "the inputs and outputs of sequence' model must have the data type");
      }
    }

    is_torch_model[0] =
        model_metadata.get("platform").getAsString().equals("pytorch_libtorch");
    return null;
  }

  // Custom function to set metadata required for sequence batcher
  static void SetSequenceMetadata(
      TRITONSERVER_InferenceRequest irequest, long correlation_id,
      boolean sequence_start, boolean sequence_end)
  {
    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestSetCorrelationId(irequest, correlation_id),
        "Unable to set correlation ID");
    int flags = 0;
    if (sequence_start) {
      flags += TRITONSERVER_REQUEST_FLAG_SEQUENCE_START;
    }
    if (sequence_end) {
      flags += TRITONSERVER_REQUEST_FLAG_SEQUENCE_END;
    }
    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestSetFlags(irequest, flags),
        "Unable to set flags");
  }

  // Custom function for adjusting sequence batcher
  // expected results for backends that do not implement
  // full accumulator
  static int GetExpectedResult(
      String model_name, int expected_result, int value, String flag)
  {
    if ((!model_name.contains("nobatch") && !model_name.contains("custom"))
        || model_name.contains("plan") || model_name.contains("onnx")
        || model_name.contains("libtorch")) {
      expected_result = value;
      if (flag != null && flag.contains("start")) {
        expected_result++;
      }
    }
    return expected_result;
  }

  // Standard function for checking response parameters,
  // plus customized check that final sequence result
  // "out" matches expected result
  static void Check(
      String model_name, TRITONSERVER_InferenceResponse response,
      int input_value, String output0, long expected_byte_size,
      int expected_datatype, boolean sequence_end, int expected_result)
  {
    HashMap<String, Pointer> output_data = new HashMap<>();

    int[] output_count = {0};
    FAIL_IF_ERR(
        TRITONSERVER_InferenceResponseOutputCount(response, output_count),
        "getting number of response outputs");
    if (output_count[0] != 1) {
      FAIL("expecting 1 response outputs, got " + output_count[0]);
    }

    for (int idx = 0; idx < output_count[0]; ++idx) {
      BytePointer cname = new BytePointer((Pointer) null);
      IntPointer datatype = new IntPointer(1);
      LongPointer shape = new LongPointer((Pointer) null);
      LongPointer dim_count = new LongPointer(1);
      Pointer base = new Pointer();
      SizeTPointer byte_size = new SizeTPointer(1);
      IntPointer memory_type = new IntPointer(1);
      LongPointer memory_type_id = new LongPointer(1);
      Pointer userp = new Pointer();

      FAIL_IF_ERR(
          TRITONSERVER_InferenceResponseOutput(
              response, idx, cname, datatype, shape, dim_count, base, byte_size,
              memory_type, memory_type_id, userp),
          "getting output info");

      if (cname.isNull()) {
        FAIL("unable to get output name");
      }

      String name = cname.getString();
      if (!name.equals(output0)) {
        FAIL("unexpected output '" + name + "'");
      }

      if ((dim_count.get() != 1) || (shape.get(0) != 1)) {
        FAIL("unexpected shape for '" + name + "'");
      }

      if (datatype.get() != expected_datatype) {
        FAIL(
            "unexpected datatype '"
            + TRITONSERVER_DataTypeString(datatype.get()) + "' for '" + name
            + "'");
      }

      if (byte_size.get() != expected_byte_size) {
        FAIL(
            "unexpected byte-size, expected " + expected_byte_size + ", got "
            + byte_size.get() + " for " + name);
      }

      if (memory_type.get() != requested_memory_type) {
        FAIL(
            "unexpected memory type, expected to be allocated in "
            + TRITONSERVER_MemoryTypeString(requested_memory_type) + ", got "
            + TRITONSERVER_MemoryTypeString(memory_type.get()) + ", id "
            + memory_type_id.get() + " for " + name);
      }

      // We make a copy of the data here... which we could avoid for
      // performance reasons but ok for this sequence example.
      BytePointer odata = new BytePointer(byte_size.get());
      output_data.put(name, odata);
      System.out.println(name + " is stored in system memory");
      odata.put(base.limit(byte_size.get()));
    }

    int out = new IntPointer(output_data.get(output0)).get(0);
    System.out.println("Value: " + out);
    if (sequence_end) {
      expected_result =
          GetExpectedResult(model_name, expected_result, input_value, "end");
      if (out != expected_result) {
        FAIL("Expected result: " + expected_result + ", got " + out);
      } else {
        System.out.println(model_name + " test PASSED");
      }
    }
  }

  // Boilerplate main function to run inference
  // for provided model, custom setting of
  // sequence metadata
  public static void main(String[] args) throws Exception
  {
    String model_repository_path = null;
    String model_name = null;
    int verbose_level = 0;

    // Parse commandline...
    for (int i = 0; i < args.length; i++) {
      switch (args[i]) {
        case "-m":
          model_name = args[++i];
          break;
        case "-r":
          model_repository_path = args[++i];
          break;
        case "-v":
          verbose_level = 1;
          break;
        case "-?":
          Usage(null);
          break;
      }
    }

    if (model_name == null) {
      Usage("-m must be used to specify model name");
    }
    if (model_repository_path == null) {
      Usage("-r must be used to specify model repository path");
    }

    // Check API version.
    int[] api_version_major = {0}, api_version_minor = {0};
    FAIL_IF_ERR(
        TRITONSERVER_ApiVersion(api_version_major, api_version_minor),
        "getting Triton API version");
    if ((TRITONSERVER_API_VERSION_MAJOR != api_version_major[0])
        || (TRITONSERVER_API_VERSION_MINOR > api_version_minor[0])) {
      FAIL("triton server API version mismatch");
    }

    // Create the server...
    TRITONSERVER_ServerOptions server_options =
        new TRITONSERVER_ServerOptions(null);
    FAIL_IF_ERR(
        TRITONSERVER_ServerOptionsNew(server_options),
        "creating server options");
    FAIL_IF_ERR(
        TRITONSERVER_ServerOptionsSetModelRepositoryPath(
            server_options, model_repository_path),
        "setting model repository path");
    FAIL_IF_ERR(
        TRITONSERVER_ServerOptionsSetLogVerbose(server_options, verbose_level),
        "setting verbose logging level");
    FAIL_IF_ERR(
        TRITONSERVER_ServerOptionsSetBackendDirectory(
            server_options, "/opt/tritonserver/backends"),
        "setting backend directory");
    FAIL_IF_ERR(
        TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
            server_options, "/opt/tritonserver/repoagents"),
        "setting repository agent directory");
    FAIL_IF_ERR(
        TRITONSERVER_ServerOptionsSetStrictModelConfig(server_options, true),
        "setting strict model configuration");

    TRITONSERVER_Server server_ptr = new TRITONSERVER_Server(null);
    FAIL_IF_ERR(
        TRITONSERVER_ServerNew(server_ptr, server_options), "creating server");
    FAIL_IF_ERR(
        TRITONSERVER_ServerOptionsDelete(server_options),
        "deleting server options");

    TRITONSERVER_ServerDeleter server =
        new TRITONSERVER_ServerDeleter(server_ptr);

    // Wait until the server is both live and ready.
    int health_iters = 0;
    while (true) {
      boolean[] live = {false}, ready = {false};
      FAIL_IF_ERR(
          TRITONSERVER_ServerIsLive(server, live),
          "unable to get server liveness");
      FAIL_IF_ERR(
          TRITONSERVER_ServerIsReady(server, ready),
          "unable to get server readiness");
      System.out.println(
          "Server Health: live " + live[0] + ", ready " + ready[0]);
      if (live[0] && ready[0]) {
        break;
      }

      if (++health_iters >= 10) {
        FAIL("failed to find healthy inference server");
      }

      Thread.sleep(500);
    }

    // Print status of the server.
    {
      TRITONSERVER_Message server_metadata_message =
          new TRITONSERVER_Message(null);
      FAIL_IF_ERR(
          TRITONSERVER_ServerMetadata(server, server_metadata_message),
          "unable to get server metadata message");
      BytePointer buffer = new BytePointer((Pointer) null);
      SizeTPointer byte_size = new SizeTPointer(1);
      FAIL_IF_ERR(
          TRITONSERVER_MessageSerializeToJson(
              server_metadata_message, buffer, byte_size),
          "unable to serialize server metadata message");

      System.out.println("Server Status:");
      System.out.println(buffer.limit(byte_size.get()).getString());

      FAIL_IF_ERR(
          TRITONSERVER_MessageDelete(server_metadata_message),
          "deleting status metadata");
    }

    // Wait for the model to become available.
    boolean[] is_torch_model = {false};
    boolean[] is_ready = {false};
    health_iters = 0;
    while (!is_ready[0]) {
      FAIL_IF_ERR(
          TRITONSERVER_ServerModelIsReady(server, model_name, 1, is_ready),
          "unable to get model readiness");
      if (!is_ready[0]) {
        if (++health_iters >= 10) {
          FAIL("model failed to be ready in 10 iterations");
        }
        Thread.sleep(500);
        continue;
      }

      TRITONSERVER_Message model_metadata_message =
          new TRITONSERVER_Message(null);
      FAIL_IF_ERR(
          TRITONSERVER_ServerModelMetadata(
              server, model_name, 1, model_metadata_message),
          "unable to get model metadata message");
      BytePointer buffer = new BytePointer((Pointer) null);
      SizeTPointer byte_size = new SizeTPointer(1);
      FAIL_IF_ERR(
          TRITONSERVER_MessageSerializeToJson(
              model_metadata_message, buffer, byte_size),
          "unable to serialize model status protobuf");

      JsonParser parser = new JsonParser();
      JsonObject model_metadata = null;
      try {
        model_metadata = parser.parse(buffer.limit(byte_size.get()).getString())
                             .getAsJsonObject();
      }
      catch (Exception e) {
        FAIL("error: failed to parse model metadata from JSON: " + e);
      }

      FAIL_IF_ERR(
          TRITONSERVER_MessageDelete(model_metadata_message),
          "deleting status protobuf");

      if (!model_metadata.get("name").getAsString().equals(model_name)) {
        FAIL("unable to find metadata for model");
      }

      boolean found_version = false;
      if (model_metadata.has("versions")) {
        for (JsonElement version :
             model_metadata.get("versions").getAsJsonArray()) {
          if (version.getAsString().equals("1")) {
            found_version = true;
            break;
          }
        }
      }
      if (!found_version) {
        FAIL("unable to find version 1 status for model");
      }

      FAIL_IF_ERR(
          ParseModelMetadata(model_metadata, is_torch_model),
          "parsing model metadata");
    }

    // Create the allocator that will be used to allocate buffers for
    // the result tensors.
    TRITONSERVER_ResponseAllocator allocator =
        new TRITONSERVER_ResponseAllocator(null);
    FAIL_IF_ERR(
        TRITONSERVER_ResponseAllocatorNew(
            allocator, responseAlloc, responseRelease, null /* start_fn */),
        "creating response allocator");

    // Inference
    TRITONSERVER_InferenceRequest irequest =
        new TRITONSERVER_InferenceRequest(null);
    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestNew(
            irequest, server, model_name, -1 /* model_version */),
        "creating inference request");

    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestSetId(irequest, "my_request_id"),
        "setting ID for the request");

    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestSetReleaseCallback(
            irequest, inferRequestComplete, null /* request_release_userp */),
        "setting request release callback");

    // Inputs
    String input0 = is_torch_model[0] ? "INPUT__0" : "INPUT";

    long[] input0_shape = {1};

    int datatype = TRITONSERVER_TYPE_INT32;

    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestAddInput(
            irequest, input0, datatype, input0_shape, input0_shape.length),
        "setting input 0 meta-data for the request");

    String output0 = is_torch_model[0] ? "OUTPUT__0" : "OUTPUT";

    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output0),
        "requesting output 0 for the request");

    // Non-zero ID for the sequence requests
    long correlation_id = 5;
    // Number of requests in the sequence
    int num_requests = 9;
    // Expected_result is  1+2+3+...+num_requests
    int expected_result = num_requests * (1 + num_requests) / 2;
    boolean sequence_start = true;
    boolean sequence_end = false;

    // Create the initial data for the input tensor.
    IntPointer[] p0 = {new IntPointer(1)};
    BytePointer input0_data = p0[0].getPointer(BytePointer.class);
    long input0_size = input0_data.limit();

    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestAppendInputData(
            irequest, input0, input0_data, input0_size, requested_memory_type,
            0 /* memory_type_id */),
        "assigning INPUT0 data");

    for (int i = 0; i < num_requests; i++) {
      // Update input value
      int input = i + 1;
      p0[0].put(0, input);

      // Set sequence metadata
      if (i == 1) {
        sequence_start = false;
      }
      if (i == num_requests - 1) {
        sequence_end = true;
      }
      SetSequenceMetadata(
          irequest, correlation_id, sequence_start, sequence_end);

      // Perform inference...
      CompletableFuture<TRITONSERVER_InferenceResponse> completed =
          new CompletableFuture<>();
      futures.put(irequest, completed);

      FAIL_IF_ERR(
          TRITONSERVER_InferenceRequestSetResponseCallback(
              irequest, allocator, null /* response_allocator_userp */,
              inferResponseComplete, irequest),
          "setting response callback");

      FAIL_IF_ERR(
          TRITONSERVER_ServerInferAsync(server, irequest, null /* trace */),
          "running inference");

      // Wait for the inference to complete.
      TRITONSERVER_InferenceResponse completed_response = completed.get();
      futures.remove(irequest);

      FAIL_IF_ERR(
          TRITONSERVER_InferenceResponseError(completed_response),
          "response status");

      Check(
          model_name, completed_response, input, output0, input0_size, datatype,
          sequence_end, expected_result);

      FAIL_IF_ERR(
          TRITONSERVER_InferenceResponseDelete(completed_response),
          "deleting inference response");
    }

    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestDelete(irequest),
        "deleting inference request");

    FAIL_IF_ERR(
        TRITONSERVER_ResponseAllocatorDelete(allocator),
        "deleting response allocator");

    System.exit(0);
  }
}


================================================
FILE: qa/L0_java_sequence_batcher/test.sh
================================================
#!/bin/bash
# Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

# Models
DATADIR=/data/inferenceserver/${REPO_VERSION}
TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:="https://github.com/triton-inference-server"}
JAVACPP_BRANCH=${JAVACPP_BRANCH:="https://github.com/bytedeco/javacpp-presets.git"}
JAVACPP_BRANCH_TAG=${JAVACPP_BRANCH_TAG:="master"}

# Set up test files based on installation instructions
# https://github.com/bytedeco/javacpp-presets/blob/master/tritonserver/README.md
set -e
git clone --single-branch --depth=1 -b ${TRITON_CLIENT_REPO_TAG} ${TRITON_REPO_ORGANIZATION}/client.git
source client/src/java-api-bindings/scripts/install_dependencies_and_build.sh -b $PWD --javacpp-branch ${JAVACPP_BRANCH} --javacpp-tag ${JAVACPP_BRANCH_TAG} --keep-build-dependencies
cd ..

CLIENT_LOG="client.log"
MODEL_REPO=`pwd`/models
SAMPLES_REPO=`pwd`/javacpp-presets/tritonserver/samples/simple
BASE_COMMAND="mvn clean compile -f $SAMPLES_REPO exec:java -Djavacpp.platform=linux-x86_64"
source ../common/util.sh

cp SequenceTest.java $SAMPLES_REPO
sed -i 's/Simple/SequenceTest/g' $SAMPLES_REPO/pom.xml

rm -f *.log
RET=0

for BACKEND in libtorch onnx; do
    # Create local model repository
    mkdir -p ${MODEL_REPO}
    MODEL=${BACKEND}_nobatch_sequence_int32
    cp -r $DATADIR/qa_sequence_model_repository/${MODEL}/ ${MODEL_REPO}/
    sed -i "s/kind: KIND_GPU/kind: KIND_CPU/" ${MODEL_REPO}/$MODEL/config.pbtxt

    # Run with default settings
    $BASE_COMMAND -Dexec.args="-r $MODEL_REPO -m ${MODEL}" >>client.log 2>&1
    if [ $? -ne 0 ]; then
        RET=1
    fi

    # Check results
    if [ `grep -c "${MODEL} test PASSED" ${CLIENT_LOG}` != "1" ]; then
        echo -e "\n***\n*** ${BACKEND} sequence batcher test FAILED. Expected '${MODEL} test PASSED'\n***"
        RET=1
    fi
    rm -r ${MODEL_REPO}
    rm ${CLIENT_LOG}
done

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_java_simple_example/test.sh
================================================
#!/bin/bash
# Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# Set up test files based on installation instructions
# https://github.com/bytedeco/javacpp-presets/blob/master/tritonserver/README.md
REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi

TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:="https://github.com/triton-inference-server"}
JAVACPP_BRANCH=${JAVACPP_BRANCH:="https://github.com/bytedeco/javacpp-presets.git"}
JAVACPP_BRANCH_TAG=${JAVACPP_BRANCH_TAG:="master"}
set -e
git clone --single-branch --depth=1 -b ${TRITON_CLIENT_REPO_TAG} ${TRITON_REPO_ORGANIZATION}/client.git
source client/src/java-api-bindings/scripts/install_dependencies_and_build.sh -b $PWD --javacpp-branch ${JAVACPP_BRANCH} --javacpp-tag ${JAVACPP_BRANCH_TAG} --keep-build-dependencies
cd ..

CLIENT_LOG="client_cpu_only.log"
DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository
MODEL_REPO=`pwd`/models

SAMPLES_REPO=`pwd`/javacpp-presets/tritonserver/samples/simple
BASE_COMMAND="mvn clean compile -f $SAMPLES_REPO exec:java -Djavacpp.platform=linux-x86_64"
source ../common/util.sh


rm -f *.log
RET=0

function run_cpu_tests_int32() {
    # Create local model repository
    set +e
    rm -r ${MODEL_REPO}
    cp -r `pwd`/../L0_simple_ensemble/models .
    mkdir ${MODEL_REPO}/ensemble_add_sub_int32_int32_int32/1
    set -e

    # Run with default settings
    $BASE_COMMAND -Dexec.args="-r $MODEL_REPO" >>$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        echo -e "Failed to run: ${BASE_COMMAND} -Dexec.args=\"-r ${MODEL_REPO}\""
        RET=1
    fi

    if [ `grep -c "1 - 1 = 0" ${CLIENT_LOG}` != "18" ]; then
        echo -e "\n***\n*** Failed. Expected 18 '1 - 1 = 0'\n***"
        RET=1
    fi

    # Run with verbose logging
    $BASE_COMMAND -Dexec.args="-r $MODEL_REPO -v" >>$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        echo -e "Failed to run: ${BASE_COMMAND} -Dexec.args=\"-r ${MODEL_REPO} -v\""
        RET=1
    fi

    if [ `grep -c "Server side auto-completed config" ${CLIENT_LOG}` != "2" ]; then
        echo -e "\n***\n*** Failed. Expected 'Server side auto-completed config'\n***"
        RET=1
    fi

    # Run with memory set to system
    $BASE_COMMAND -Dexec.args="-r $MODEL_REPO -m system" >>$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        echo -e "Failed to run: ${BASE_COMMAND} -Dexec.args=\"-r ${MODEL_REPO} -m system\""
        RET=1
    fi

    if [ `grep -c "OUTPUT0 is stored in system memory" ${CLIENT_LOG}` != "9" ]; then
        echo -e "\n***\n*** Failed. Expected 9 'OUTPUT0 is stored in system memory'\n***"
        RET=1
    fi

}

function run_cpu_tests_fp32() {
    for trial in onnx; do
        full=${trial}_float32_float32_float32
        set +e
        rm -rf ${MODEL_REPO}
        mkdir -p ${MODEL_REPO}/simple/1 && \
            cp -r $DATADIR/${full}/1/* ${MODEL_REPO}/simple/1/. && \
            cp $DATADIR/${full}/config.pbtxt ${MODEL_REPO}/simple/. && \
            (cd ${MODEL_REPO}/simple && \
                    sed -i "s/^name:.*/name: \"simple\"/" config.pbtxt && \
                    sed -i "s/label_filename:.*//" config.pbtxt)


        # No memory type enforcement
        $BASE_COMMAND -Dexec.args="-r $MODEL_REPO -v" >>$CLIENT_LOG.$full.log 2>&1
        if [ $? -ne 0 ]; then
            cat $CLIENT_LOG.$full.log
            echo -e "Failed to run: ${BASE_COMMAND} -Dexec.args=\"-r ${MODEL_REPO} -v\" for ${full}"
            RET=1
        fi

        # Enforce I/O to be in specific memory type
        for MEM_TYPE in system; do
            $BASE_COMMAND -Dexec.args="-r $MODEL_REPO -m ${MEM_TYPE}" >>$CLIENT_LOG.$full.${MEM_TYPE}.log 2>&1
            if [ $? -ne 0 ]; then
                cat $CLIENT_LOG.$full.$MEM_TYPE.log
                echo -e "Failed to run: ${BASE_COMMAND} -Dexec.args=\"-r ${MODEL_REPO} -v -m ${MEM_TYPE}\" for ${full}"
                RET=1
            fi
        done
    done
    set -e
}


# Run ensemble
function run_ensemble_tests() {
    set +e
    rm -r ${MODEL_REPO}
    cp -r `pwd`/../L0_simple_ensemble/models .
    mkdir -p ${MODEL_REPO}/ensemble_add_sub_int32_int32_int32/1
    sed -i 's/"simple"/"ensemble_add_sub_int32_int32_int32"/g' $SAMPLES_REPO/Simple.java
    cat $SAMPLES_REPO/pom.xml >>$CLIENT_LOG 2>&1
    set -e

    $BASE_COMMAND -Dexec.args="-r $MODEL_REPO -v" >>$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        echo -e "Failed to run ensemble model: ${BASE_COMMAND} -Dexec.args=\"-r ${MODEL_REPO} -v\""
        RET=1
    fi
    sed -i 's/"ensemble_add_sub_int32_int32_int32"/"simple"/g' $SAMPLES_REPO/Simple.java

    if [ `grep -c "request id: my_request_id, model: ensemble_add_sub_int32_int32_int32" ${CLIENT_LOG}` != "3" ]; then
        echo -e "\n***\n*** Failed. Expected 3 'request id: my_request_id, model: ensemble_add_sub_int32_int32_int32'\n***"
        RET=1
    fi
}

# Run tests on simple example
echo -e "\nRunning Simple Tests\n"

run_cpu_tests_fp32
run_cpu_tests_int32
run_ensemble_tests

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_json/test.sh
================================================
#!/bin/bash
# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

RET=0
UNIT_TEST="./triton_json_test --gtest_output=xml:triton_json.report.xml"
TEST_LOG="./triton_json_test.log"
$UNIT_TEST >> $TEST_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $TEST_LOG
    echo -e "\n***\n*** Triton Json Unit Test Failed\n***"
    RET=1
fi

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
else
  echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_large_payload/large_payload_test.py
================================================
#!/usr/bin/env python3

# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import math
import unittest

import numpy as np
import test_util as tu
import tritonclient.grpc as grpcclient
import tritonclient.http as httpclient
from tritonclient.utils import InferenceServerException, np_to_triton_dtype


class LargePayLoadTest(tu.TestResultCollector):
    def setUp(self):
        self._data_type = np.float32

        # Very large tensor will always fail for gRPC because the Protobuf has a
        # hard limit on 2GBs for the size of input tensors. All backends except
        # plan backend should be able to handle payloads larger than 2GBs using
        # HTTP.
        very_large_tensor_shape = (
            math.trunc(3 * (1024 * 1024 * 1024) / np.dtype(self._data_type).itemsize),
        )
        self._very_large_in0 = np.random.random(very_large_tensor_shape).astype(
            self._data_type
        )

        # 1.9 GBs allows us to test gRPC with moderate sizes too.
        large_tensor_shape = (
            math.trunc(
                1.9 * (1024 * 1024 * 1024) // np.dtype(self._data_type).itemsize
            ),
        )
        self._large_in0 = np.random.random(large_tensor_shape).astype(self._data_type)

        small_tensor_shape = (1,)
        self._small_in0 = np.random.random(small_tensor_shape).astype(self._data_type)

        self._clients = (
            (httpclient, httpclient.InferenceServerClient("localhost:8000")),
            (grpcclient, grpcclient.InferenceServerClient("localhost:8001")),
        )

    def _test_helper(
        self, client, model_name, input_name="INPUT0", output_name="OUTPUT0"
    ):
        # plan does not support large batch sizes.
        if not model_name.startswith("plan"):
            inputs = [
                client[0].InferInput(
                    input_name,
                    self._large_in0.shape,
                    np_to_triton_dtype(self._data_type),
                )
            ]
            inputs[0].set_data_from_numpy(self._large_in0)
            results = client[1].infer(model_name, inputs)

            # if the inference is completed, examine results to ensure that
            # the framework and protocol do support large payload
            self.assertTrue(
                np.array_equal(self._large_in0, results.as_numpy(output_name)),
                "output is different from input",
            )

        if client[0] == httpclient:
            # FIXME HTTPServer cannot support large payloads. See DLIS-1776.
            inputs = [
                client[0].InferInput(
                    input_name,
                    self._very_large_in0.shape,
                    np_to_triton_dtype(self._data_type),
                )
            ]
            inputs[0].set_data_from_numpy(self._very_large_in0)
            with self.assertRaises(InferenceServerException):
                results = client[1].infer(model_name, inputs)

        # FIXME Test is terminated due to libprotobuf FATAL error when GRPC sends
        # the second request with input tensors larger than 1.3GBs. In this test
        # GRPC has been currently exempted from testing for Very Large tensor(3GBs)
        # until the problem is resolved. Should be uncommented once the GRPC issue is resolved.
        # See DLIS-2474.
        # if client[0] == grpcclient:
        #     inputs = [
        #         client[0].InferInput(input_name, self._very_large_in0.shape,
        #                              np_to_triton_dtype(self._data_type))
        #     ]
        #     inputs[0].set_data_from_numpy(self._very_large_in0)
        #     # GRPC must fail for large payloads because of a 2GB protobuf limit
        #     with self.assertRaises(InferenceServerException):
        #         results = client[1].infer(model_name, inputs)

        # Send a small payload to verify if the server is still functional
        inputs = [
            client[0].InferInput(
                input_name, self._small_in0.shape, np_to_triton_dtype(self._data_type)
            )
        ]
        inputs[0].set_data_from_numpy(self._small_in0)
        results = client[1].infer(model_name, inputs)
        self.assertTrue(
            np.array_equal(self._small_in0, results.as_numpy(output_name)),
            "output is different from input",
        )

    def test_onnx(self):
        # onnx_nobatch_zero_1_float32 is identity model with input shape [-1]
        for client in self._clients:
            model_name = tu.get_zero_model_name("onnx_nobatch", 1, self._data_type)
            self._test_helper(client, model_name)

    def test_python(self):
        # python_nobatch_zero_1_float32 is identity model with input shape [-1]
        for client in self._clients:
            model_name = tu.get_zero_model_name("python_nobatch", 1, self._data_type)
            self._test_helper(client, model_name)

    def test_plan(self):
        # plan_nobatch_zero_1_float32 is identity model with input shape [-1]
        for client in self._clients:
            model_name = tu.get_zero_model_name("plan_nobatch", 1, self._data_type)
            self._test_helper(client, model_name)

    def test_libtorch(self):
        # libtorch_nobatch_zero_1_float32 is identity model with input shape [-1]
        for client in self._clients:
            model_name = tu.get_zero_model_name("libtorch_nobatch", 1, self._data_type)
            self._test_helper(client, model_name, "INPUT__0", "OUTPUT__0")

    def test_custom(self):
        # custom_zero_1_float32 is identity model with input shape [-1]
        for client in self._clients:
            model_name = tu.get_zero_model_name("custom", 1, self._data_type)
            self._test_helper(client, model_name)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_large_payload/test.sh
================================================
#!/bin/bash
# Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

TEST_RESULT_FILE='test_results.txt'
LARGE_PAYLOAD_TEST_PY=large_payload_test.py
CLIENT_LOG_BASE="./client.log"
DATADIR=`pwd`/models

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=$DATADIR --log-verbose=1"
SERVER_LOG_BASE="./inference_server.log"
source ../common/util.sh

rm -f $SERVER_LOG_BASE* $CLIENT_LOG_BASE*

RET=0

MODEL_SUFFIX=nobatch_zero_1_float32
rm -fr all_models && mkdir all_models
for TARGET in onnx libtorch plan; do
    cp -r /data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository/${TARGET}_$MODEL_SUFFIX \
       all_models/.
done

mkdir -p all_models/python_$MODEL_SUFFIX/1/
cp ../python_models/identity_fp32/config.pbtxt all_models/python_$MODEL_SUFFIX/
(cd all_models/python_$MODEL_SUFFIX && \
            sed -i "s/max_batch_size: 64/max_batch_size: 0/" config.pbtxt && \
            sed -i "s/name: \"identity_fp32\"/name: \"python_$MODEL_SUFFIX\"/" config.pbtxt)

cp ../python_models/identity_fp32/model.py all_models/python_$MODEL_SUFFIX/1/model.py

# Restart server before every test to make sure server state
# is invariant to previous test
for TARGET in onnx libtorch plan python; do
    rm -fr models && mkdir models && \
        cp -r all_models/${TARGET}_$MODEL_SUFFIX models/.

    SERVER_LOG=$SERVER_LOG_BASE.$TARGET
    CLIENT_LOG=$CLIENT_LOG_BASE.$TARGET

    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    set +e

    python $LARGE_PAYLOAD_TEST_PY LargePayLoadTest.test_$TARGET >$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Failed\n***"
        RET=1
    else
        check_test_results $TEST_RESULT_FILE 1
        if [ $? -ne 0 ]; then
            cat $CLIENT_LOG
            echo -e "\n***\n*** Test Result Verification Failed\n***"
            RET=1
        fi
    fi

    set -e

    kill $SERVER_PID
    wait $SERVER_PID
done

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
fi

exit $RET


================================================
FILE: qa/L0_libtorch_disable_cudnn/test.sh
================================================
#!/bin/bash
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

LIBTORCH_INFER_CLIENT_PY=../common/libtorch_infer_client.py

DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=models --log-verbose=1"
SERVER_LOG="./inference_server.log"
CLIENT_LOG="./client.log"
source ../common/util.sh

RET=0

for FLAG in true false; do
    rm -f *.log
    mkdir -p models && cp -r $DATADIR/libtorch_int32_int32_int32 models/.

    echo """
    parameters: {
        key: \"DISABLE_CUDNN\"
        value: {
            string_value: \"$FLAG\"
        }
    }""" >> models/libtorch_int32_int32_int32/config.pbtxt

    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    set +e

    python $LIBTORCH_INFER_CLIENT_PY >> $CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        RET=1
    fi

    CUDNN_LOG="cuDNN is "
    if [ "$FLAG" == "true" ]; then
        CUDNN_LOG+=disabled
    else
        CUDNN_LOG+=enabled
    fi

    if [ `grep -c "$CUDNN_LOG" $SERVER_LOG` != "3" ]; then
        echo -e "\n***\n*** Failed. Expected 3 $CUDNN_LOG in log\n***"
        RET=1
    fi

    set -e

    kill $SERVER_PID
    wait $SERVER_PID

    rm -rf models
done

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_libtorch_inference_mode/test.sh
================================================
#!/bin/bash
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

LIBTORCH_INFER_CLIENT_PY=../common/libtorch_infer_client.py

DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=models --log-verbose=1"
SERVER_LOG="./inference_server.log"
CLIENT_LOG="./client.log"
source ../common/util.sh

RET=0

for FLAG in true false; do
    rm -f *.log
    mkdir -p models && cp -r $DATADIR/libtorch_int32_int32_int32 models/.

    echo """
    parameters: {
        key: \"INFERENCE_MODE\"
        value: {
            string_value: \"$FLAG\"
        }
    }""" >> models/libtorch_int32_int32_int32/config.pbtxt

    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    set +e

    python $LIBTORCH_INFER_CLIENT_PY >> $CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        RET=1
    fi

    INFERMODE_LOG="Inference Mode is "
    if [ "$FLAG" == "true" ]; then
        INFERMODE_LOG+=enabled
    else
        INFERMODE_LOG+=disabled
    fi

    if [ `grep -c "$INFERMODE_LOG" $SERVER_LOG` != "3" ]; then
        echo -e "\n***\n*** Failed. Expected 3 $INFERMODE_LOG in log\n***"
        RET=1
    fi

    set -e

    kill $SERVER_PID
    wait $SERVER_PID

    rm -rf models
done

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_libtorch_instance_group_kind_model/client.py
================================================
#!/usr/bin/env python
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import sys

sys.path.append("../common")

import unittest

import numpy as np
import test_util as tu
import tritonclient.http as httpclient

# By default, find tritonserver on "localhost", but can be overridden
# with TRITONSERVER_IPADDR envvar
_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")


class InferTest(tu.TestResultCollector):
    def test_infer(self):
        try:
            triton_client = httpclient.InferenceServerClient(
                url=f"{_tritonserver_ipaddr}:8000"
            )
        except Exception as e:
            print("channel creation failed: " + str(e))
            sys.exit(1)

        model_name = os.environ["MODEL_NAME"]

        inputs = []
        outputs = []
        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "FP32"))
        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "FP32"))

        # Create the data for the two input tensors.
        input0_data = np.arange(start=0, stop=16, dtype=np.float32)
        input0_data = np.expand_dims(input0_data, axis=0)
        input1_data = np.arange(start=32, stop=48, dtype=np.float32)
        input1_data = np.expand_dims(input1_data, axis=0)

        # Initialize the data
        inputs[0].set_data_from_numpy(input0_data, binary_data=True)
        inputs[1].set_data_from_numpy(input1_data, binary_data=True)

        outputs.append(httpclient.InferRequestedOutput("OUTPUT__0", binary_data=True))
        outputs.append(httpclient.InferRequestedOutput("OUTPUT__1", binary_data=True))

        results = triton_client.infer(model_name, inputs, outputs=outputs)

        output0_data = results.as_numpy("OUTPUT__0")
        output1_data = results.as_numpy("OUTPUT__1")

        expected_output_0 = input0_data + input1_data
        expected_output_1 = input0_data - input1_data

        self.assertEqual(output0_data.shape, (1, 16))
        self.assertEqual(output1_data.shape, (1, 16))

        self.assertTrue(np.all(expected_output_0 == output0_data))
        self.assertTrue(np.all(expected_output_1 == output1_data))


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_libtorch_instance_group_kind_model/gen_models.py
================================================
#!/usr/bin/python
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import torch
import torch.nn as nn


class SumModule(nn.Module):
    def __init__(self, device):
        super(SumModule, self).__init__()
        self.device = device

    def forward(self, INPUT0, INPUT1):
        INPUT0 = INPUT0.to(self.device)
        INPUT1 = INPUT1.to(self.device)
        print(
            "SumModule - INPUT0 device: {}, INPUT1 device: {}\n".format(
                INPUT0.device, INPUT1.device
            )
        )
        return INPUT0 + INPUT1


class DiffModule(nn.Module):
    def __init__(self, device):
        super(DiffModule, self).__init__()
        self.device = device

    def forward(self, INPUT0, INPUT1):
        INPUT0 = INPUT0.to(self.device)
        INPUT1 = INPUT1.to(self.device)
        print(
            "DiffModule - INPUT0 device: {}, INPUT1 device: {}\n".format(
                INPUT0.device, INPUT1.device
            )
        )
        return INPUT0 - INPUT1


class TestModel(nn.Module):
    def __init__(self, device0, device1):
        super(TestModel, self).__init__()
        self.device0 = device0
        self.device1 = device1

        self.layer1 = SumModule(self.device0)
        self.layer2 = DiffModule(self.device1)

    def forward(self, INPUT0, INPUT1):
        op0 = self.layer1(INPUT0, INPUT1)
        op1 = self.layer2(INPUT0, INPUT1)
        return op0, op1


if torch.cuda.device_count() < 4:
    print("Need at least 4 GPUs to run this test")
    exit(1)

devices = [("cuda:2", "cuda:0"), ("cpu", "cuda:3")]
model_names = ["libtorch_multi_gpu", "libtorch_multi_device"]

for device_pair, model_name in zip(devices, model_names):
    model = TestModel(device_pair[0], device_pair[1])
    model_path = "models/" + model_name + "/1/model.pt"
    scripted_model = torch.jit.script(model)
    scripted_model.save(model_path)


================================================
FILE: qa/L0_libtorch_instance_group_kind_model/models/libtorch_multi_device/config.pbtxt
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "libtorch_multi_device"
platform: "pytorch_libtorch"
max_batch_size: 8

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT__0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "OUTPUT__1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]

instance_group [
  {
    kind: KIND_MODEL
  }
]


================================================
FILE: qa/L0_libtorch_instance_group_kind_model/test.sh
================================================
#!/bin/bash
# Copyright 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

pip3 uninstall -y torch
pip3 install torch -f https://download.pytorch.org/whl/cu130

DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository
SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=models --log-verbose=1"
SERVER_LOG="./inference_server.log"

CLIENT_PY=./client.py
CLIENT_LOG="./client.log"
EXPECTED_NUM_TESTS="1"
TEST_RESULT_FILE='test_results.txt'

source ../common/util.sh

RET=0

rm -f *.log *.txt

mkdir -p models/libtorch_multi_device/1
mkdir -p models/libtorch_multi_gpu/1
cp models/libtorch_multi_device/config.pbtxt models/libtorch_multi_gpu/.
(cd models/libtorch_multi_gpu && \
    sed -i "s/name: \"libtorch_multi_device\"/name: \"libtorch_multi_gpu\"/" config.pbtxt)

# Generate the models which are partitioned across multiple devices
set +e
python3 gen_models.py >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Error when generating models. \n***"
    cat $CLIENT_LOG
    exit 1
fi
set -e

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

export MODEL_NAME='libtorch_multi_device'
python3 $CLIENT_PY >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Model $MODEL_NAME FAILED. \n***"
    cat $CLIENT_LOG
    RET=1
else
    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi

MESSAGES=("SumModule - INPUT0 device: cpu, INPUT1 device: cpu"
    "DiffModule - INPUT0 device: cuda:3, INPUT1 device: cuda:3")
for MESSAGE in "${MESSAGES[@]}"; do
    if grep -q "$MESSAGE" "$SERVER_LOG"; then
        echo -e "Found \"$MESSAGE\"" >> "$CLIENT_LOG"
    else
        echo -e "Not found \"$MESSAGE\"" >> "$CLIENT_LOG"
        RET=1
    fi
done

export MODEL_NAME='libtorch_multi_gpu'
python3 $CLIENT_PY >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Model $MODEL_NAME FAILED. \n***"
    cat $CLIENT_LOG
    RET=1
else
    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi

MESSAGES=("SumModule - INPUT0 device: cuda:2, INPUT1 device: cuda:2"
    "DiffModule - INPUT0 device: cuda:0, INPUT1 device: cuda:0")
for MESSAGE in "${MESSAGES[@]}"; do
    if grep -q "$MESSAGE" "$SERVER_LOG"; then
        echo -e "Found \"$MESSAGE\"" >> "$CLIENT_LOG"
    else
        echo -e "Not found \"$MESSAGE\"" >> "$CLIENT_LOG"
        RET=1
    fi
done

set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_libtorch_io_names/io_names_client.py
================================================
#!/usr/bin/python
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import unittest
from builtins import range

import numpy as np
import test_util as tu
import tritonclient.http as httpclient


class IONamingConvention(tu.TestResultCollector):
    def _infer_helper(self, model_name, io_names, reversed_order=False):
        triton_client = httpclient.InferenceServerClient(
            "localhost:8000", verbose=False
        )

        # Create the data for the two inputs. Initialize the first to unique
        # integers and the second to all ones.
        input0_data = np.arange(start=0, stop=16, dtype=np.float32)
        input0_data = np.expand_dims(input0_data, axis=0)
        input1_data = np.full(shape=(1, 16), fill_value=-1, dtype=np.float32)

        inputs = []
        output_req = []
        inputs.append(
            httpclient.InferInput(
                io_names[0] if not reversed_order else io_names[1], [1, 16], "FP32"
            )
        )
        inputs[-1].set_data_from_numpy(input0_data)
        inputs.append(
            httpclient.InferInput(
                io_names[1] if not reversed_order else io_names[0], [1, 16], "FP32"
            )
        )
        inputs[-1].set_data_from_numpy(input1_data)
        output_req.append(
            httpclient.InferRequestedOutput(io_names[2], binary_data=True)
        )
        output_req.append(
            httpclient.InferRequestedOutput(io_names[3], binary_data=True)
        )

        results = triton_client.infer(model_name, inputs, outputs=output_req)

        output0_data = results.as_numpy(
            io_names[2] if not reversed_order else io_names[3]
        )
        output1_data = results.as_numpy(
            io_names[3] if not reversed_order else io_names[2]
        )
        for i in range(16):
            self.assertEqual(input0_data[0][i] - input1_data[0][i], output0_data[0][i])
            self.assertEqual(input0_data[0][i] + input1_data[0][i], output1_data[0][i])

    def test_io_index(self):
        io_names = ["INPUT__0", "INPUT__1", "OUTPUT__0", "OUTPUT__1"]
        self._infer_helper("libtorch_io_index", io_names)

    def test_output_index(self):
        io_names = ["INPUT0", "INPUT1", "OUTPUT__0", "OUTPUT__1"]
        self._infer_helper("libtorch_output_index", io_names)

    def test_no_output_index(self):
        io_names = ["INPUT0", "INPUT1", "OUTPUT0", "OUTPUT1"]
        self._infer_helper("libtorch_no_output_index", io_names)

    def test_no_arguments_no_output_index(self):
        io_names = ["INPUTA", "INPUTB", "OUTPUTA", "OUTPUTB"]
        self._infer_helper("libtorch_no_arguments_output_index", io_names)

    def test_mix_index(self):
        io_names = ["INPUTA", "INPUT__1", "OUTPUTA", "OUTPUT__1"]
        self._infer_helper("libtorch_mix_index", io_names)

    def test_mix_arguments(self):
        io_names = ["INPUT0", "INPUTB", "OUTPUTA", "OUTPUT__1"]
        self._infer_helper("libtorch_mix_arguments", io_names)

    def test_mix_arguments_index(self):
        io_names = ["INPUT0", "INPUT__1", "OUTPUT0", "OUTPUT__1"]
        self._infer_helper("libtorch_mix_arguments_index", io_names)

    def test_unordered_index(self):
        io_names = ["INPUT1", "INPUT0", "OUT__1", "OUT__0"]
        self._infer_helper("libtorch_unordered_index", io_names, reversed_order=True)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_libtorch_io_names/test.sh
================================================
#!/bin/bash
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

IO_NAMES_CLIENT=./io_names_client.py
DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository

rm -rf models && mkdir -p models

# Prepare models
cp -r $DATADIR/libtorch_float32_float32_float32 models/libtorch_output_index && \
    sed -i 's/libtorch_float32_float32_float32/libtorch_output_index/' models/libtorch_output_index/config.pbtxt

cp -r $DATADIR/libtorch_float32_float32_float32 models/libtorch_io_index && \
    sed -i 's/libtorch_float32_float32_float32/libtorch_io_index/' models/libtorch_io_index/config.pbtxt && \
    sed -i 's/INPUT0/INPUT__0/' models/libtorch_io_index/config.pbtxt && \
    sed -i 's/INPUT1/INPUT__1/' models/libtorch_io_index/config.pbtxt

cp -r $DATADIR/libtorch_float32_float32_float32 models/libtorch_no_output_index && \
    sed -i 's/libtorch_float32_float32_float32/libtorch_no_output_index/' models/libtorch_no_output_index/config.pbtxt && \
    sed -i 's/OUTPUT__0/OUTPUT0/' models/libtorch_no_output_index/config.pbtxt && \
    sed -i 's/OUTPUT__1/OUTPUT1/' models/libtorch_no_output_index/config.pbtxt

cp -r $DATADIR/libtorch_float32_float32_float32 models/libtorch_no_arguments_output_index && \
    sed -i 's/libtorch_float32_float32_float32/libtorch_no_arguments_output_index/' models/libtorch_no_arguments_output_index/config.pbtxt && \
    sed -i 's/INPUT0/INPUTA/' models/libtorch_no_arguments_output_index/config.pbtxt && \
    sed -i 's/INPUT1/INPUTB/' models/libtorch_no_arguments_output_index/config.pbtxt && \
    sed -i 's/OUTPUT__0/OUTPUTA/' models/libtorch_no_arguments_output_index/config.pbtxt && \
    sed -i 's/OUTPUT__1/OUTPUTB/' models/libtorch_no_arguments_output_index/config.pbtxt

cp -r $DATADIR/libtorch_float32_float32_float32 models/libtorch_mix_index && \
    sed -i 's/libtorch_float32_float32_float32/libtorch_mix_index/' models/libtorch_mix_index/config.pbtxt && \
    sed -i 's/INPUT0/INPUTA/' models/libtorch_mix_index/config.pbtxt && \
    sed -i 's/INPUT1/INPUT__1/' models/libtorch_mix_index/config.pbtxt && \
    sed -i 's/OUTPUT__0/OUTPUTA/' models/libtorch_mix_index/config.pbtxt

cp -r $DATADIR/libtorch_float32_float32_float32 models/libtorch_mix_arguments && \
    sed -i 's/libtorch_float32_float32_float32/libtorch_mix_arguments/' models/libtorch_mix_arguments/config.pbtxt && \
    sed -i 's/INPUT1/INPUTB/' models/libtorch_mix_arguments/config.pbtxt && \
    sed -i 's/OUTPUT__0/OUTPUTA/' models/libtorch_mix_arguments/config.pbtxt

cp -r $DATADIR/libtorch_float32_float32_float32 models/libtorch_mix_arguments_index && \
    sed -i 's/libtorch_float32_float32_float32/libtorch_mix_arguments_index/' models/libtorch_mix_arguments_index/config.pbtxt && \
    sed -i 's/INPUT1/INPUT__1/' models/libtorch_mix_arguments_index/config.pbtxt && \
    sed -i 's/OUTPUT__0/OUTPUT0/' models/libtorch_mix_arguments_index/config.pbtxt

cp -r $DATADIR/libtorch_float32_float32_float32 models/libtorch_unordered_index && \
    sed -i 's/libtorch_float32_float32_float32/libtorch_unordered_index/' models/libtorch_unordered_index/config.pbtxt && \
    sed -i 's/INPUT0/INPUT_TMP1/' models/libtorch_unordered_index/config.pbtxt && \
    sed -i 's/INPUT1/INPUT0/' models/libtorch_unordered_index/config.pbtxt && \
    sed -i 's/INPUT_TMP1/INPUT1/' models/libtorch_unordered_index/config.pbtxt && \
    sed -i 's/OUTPUT__0/OUT__1/' models/libtorch_unordered_index/config.pbtxt && \
    sed -i 's/OUTPUT__1/OUT__0/' models/libtorch_unordered_index/config.pbtxt


SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=models"
SERVER_LOG="./inference_server.log"
source ../common/util.sh

rm -f *.log

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

RET=0

set +e

CLIENT_LOG=client.log
python $IO_NAMES_CLIENT >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    RET=1
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_libtorch_io_types/test.sh
================================================
#!/bin/bash
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=models"
SERVER_LOG="./server.log"
DATADIR=/data/inferenceserver/${REPO_VERSION}
source ../common/util.sh

# Test unsupported INPUT data type
rm -rf models && mkdir -p models
cp -r $DATADIR/qa_model_repository/libtorch_int32_int8_int8 models/libtorch_invalid_input_type && \
    sed -i 's/libtorch_int32_int8_int8/libtorch_invalid_input_type/' models/libtorch_invalid_input_type/config.pbtxt && \
    sed -i 's/TYPE_INT32/TYPE_UINT32/' models/libtorch_invalid_input_type/config.pbtxt

rm -f *.log

run_server
if [ "$SERVER_PID" != "0" ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Unexpected server start $SERVER\n***"
    kill $SERVER_PID
    wait $SERVER_PID
    exit 1
fi

set +e
grep "unsupported datatype TYPE_UINT32 for input 'INPUT0' for model 'libtorch_invalid_input_type'" $SERVER_LOG
if [ $? -ne 0 ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Unsupported INPUT datatype not found in server log\n***"
    exit 1
fi
set -e

# Test unsupported OUTPUT data type
rm -rf models && mkdir -p models
cp -r $DATADIR/qa_model_repository/libtorch_int32_int8_int8 models/libtorch_invalid_output_type && \
    sed -i 's/libtorch_int32_int8_int8/libtorch_invalid_output_type/' models/libtorch_invalid_output_type/config.pbtxt && \
    sed -i 's/TYPE_INT8/TYPE_UINT64/' models/libtorch_invalid_output_type/config.pbtxt

rm -f *.log

run_server
if [ "$SERVER_PID" != "0" ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Unexpected server start $SERVER\n***"
    kill $SERVER_PID
    wait $SERVER_PID
    exit 1
fi

set +e
grep "unsupported datatype TYPE_UINT64 for output 'OUTPUT__0' for model 'libtorch_invalid_output_type'" $SERVER_LOG
if [ $? -ne 0 ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Unsupported OUTPUT datatype not found in server log\n***"
    exit 1
fi
set -e

# Test unsupported sequence_batching data type
rm -rf models && mkdir -p models
cp -r $DATADIR/qa_variable_sequence_model_repository/libtorch_sequence_int32 models/libtorch_invalid_sequence_int32 && \
    sed -i 's/libtorch_sequence_int32/libtorch_invalid_sequence_int32/' models/libtorch_invalid_sequence_int32/config.pbtxt && \
    sed -i 's/READY__2/CORRID__2/' models/libtorch_invalid_sequence_int32/config.pbtxt && \
    sed -i 's/CONTROL_SEQUENCE_READY/CONTROL_SEQUENCE_CORRID/' models/libtorch_invalid_sequence_int32/config.pbtxt && \
    sed -i ':begin;$!N;s/CORRID\n\(.*\)int32_false_true: \[ 0, 1 \]/CORRID\ndata_type: TYPE_UINT32/' models/libtorch_invalid_sequence_int32/config.pbtxt

rm -f *.log

run_server
if [ "$SERVER_PID" != "0" ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Unexpected server start $SERVER\n***"
    kill $SERVER_PID
    wait $SERVER_PID
    exit 1
fi

set +e
grep "input 'CORRID__2' type 'TYPE_UINT32' is not supported by PyTorch." $SERVER_LOG
if [ $? -ne 0 ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Unsupported sequence_batching datatype not found in server log\n***"
    exit 1
fi
set -e

# Test passed
echo -e "\n***\n*** Test Passed\n***"
exit 0


================================================
FILE: qa/L0_libtorch_optimized_execution/test.sh
================================================
#!/bin/bash
# Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

LIBTORCH_INFER_CLIENT_PY=../common/libtorch_infer_client.py

DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=models --log-verbose=1"
SERVER_LOG="./inference_server.log"
CLIENT_LOG="./client.log"
source ../common/util.sh

RET=0

for FLAG in true false; do
    rm -f *.log
    mkdir -p models && cp -r $DATADIR/libtorch_int32_int32_int32 models/.

    echo """
    parameters: {
        key: \"DISABLE_OPTIMIZED_EXECUTION\"
        value: {
            string_value: \"$FLAG\"
        }
    }""" >> models/libtorch_int32_int32_int32/config.pbtxt

    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    set +e

    python $LIBTORCH_INFER_CLIENT_PY >> $CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        RET=1
    fi

    OPTIMIZED_LOG="Optimized execution is "
    if [ "$FLAG" == "true" ]; then
        OPTIMIZED_LOG+=disabled
    else
        OPTIMIZED_LOG+=enabled
    fi

    if [ `grep -c "$OPTIMIZED_LOG" $SERVER_LOG` != "3" ]; then
        echo -e "\n***\n*** Failed. Expected 3 $OPTIMIZED_LOG in log\n***"
        RET=1
    fi

    set -e

    kill $SERVER_PID
    wait $SERVER_PID

    rm -rf models
done

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_libtorch_shared_weights/libtorch_shared_weights_test.py
================================================
#!/usr/bin/env python3

# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import unittest
from builtins import range

import numpy as np
import test_util as tu
import tritonhttpclient as httpclient

FLAGS = None


class SharedWeightsTest(tu.TestResultCollector):
    def _full_exact(self, model_name, request_concurrency, shape):
        # Run async requests to make sure backend handles concurrent requests
        # correctly.
        client = httpclient.InferenceServerClient(
            "localhost:8000", concurrency=request_concurrency
        )
        input_datas = []
        requests = []
        for i in range(request_concurrency):
            input_data = (16384 * np.random.randn(*shape)).astype(np.float32)
            input_datas.append(input_data)
            inputs = [httpclient.InferInput("INPUT__0", input_data.shape, "FP32")]
            inputs[0].set_data_from_numpy(input_data)
            requests.append(client.async_infer(model_name, inputs))

        for i in range(request_concurrency):
            # Get the result from the initiated asynchronous inference request.
            # Note the call will block until the server responds.
            results = requests[i].get_result()

            output_data = results.as_numpy("OUTPUT__0")
            self.assertIsNotNone(output_data, "error: expected 'OUTPUT__0' to be found")
            np.testing.assert_allclose(output_data, input_datas[i])

    def test_pytorch_identity_model(self):
        model_name = "libtorch_nobatch_zero_1_float32"
        self._full_exact(model_name, 128, [8])


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_libtorch_shared_weights/test.sh
================================================
#!/bin/bash
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

CLIENT_LOG="./client.log"
DATADIR=/data/inferenceserver/${REPO_VERSION}
INSTANCE_CNT=16
REUSE_MSG="Reusing TorchScript model for instance"
SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=`pwd`/models --exit-on-error=false \
             --exit-timeout-secs=10"
TEST_RESULT_FILE='test_results.txt'
WEIGHTS_TEST=libtorch_shared_weights_test.py
source ../common/util.sh

RET=0
rm -fr *.log

LOG_IDX=0

# SharedWeightsTest.test_pytorch_identity_model
# Without shared weights, GPU

# Prepare model repository
rm -fr models
mkdir models
for i in models; do
    cp -r $DATADIR/qa_identity_model_repository/libtorch_nobatch_zero_1_float32 models/.
done

for MC in `ls models/libtorch*/config.pbtxt`; do
    echo "instance_group [ { count: ${INSTANCE_CNT} kind: KIND_GPU}]" >> $MC
done

# Start server
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

# Run test
rm -f $CLIENT_LOG
set +e
python $WEIGHTS_TEST SharedWeightsTest.test_pytorch_identity_model >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi

if [ `grep -c "$REUSE_MSG" $SERVER_LOG` != "0" ]; then
    echo -e "\n***\n*** Failed. Expected 0 "$REUSE_MSG"\n***"
    RET=1
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

# SharedWeightsTest.test_pytorch_identity_model
# With shared weights

for KIND in KIND_CPU KIND_GPU; do

    # Prepare model repository
    rm -fr models
    mkdir models
    for i in models; do
        cp -r $DATADIR/qa_identity_model_repository/libtorch_nobatch_zero_1_float32 models/.
    done

    LOG_IDX=$((LOG_IDX+1))
    for MC in `ls models/libtorch*/config.pbtxt`; do
        echo "instance_group [ { count: ${INSTANCE_CNT} kind: ${KIND}}]" >> $MC
    done

    for MC in `ls models/libtorch*/config.pbtxt`; do
        echo """
        parameters: {
            key: \"ENABLE_WEIGHT_SHARING\"
            value: {
                string_value: \"true\"
            }
        }""" >> $MC
    done

    # Start server
    SERVER_LOG="./inference_server_$LOG_IDX.log"
    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    # Run test
    rm -f $CLIENT_LOG
    set +e
    python $WEIGHTS_TEST SharedWeightsTest.test_pytorch_identity_model >>$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Failed\n***"
        RET=1
    else
        check_test_results $TEST_RESULT_FILE 1
        if [ $? -ne 0 ]; then
            cat $CLIENT_LOG
            echo -e "\n***\n*** Test Result Verification Failed\n***"
            RET=1
        fi
    fi

    if [ `grep -c "$REUSE_MSG" $SERVER_LOG` != "15" ]; then
        echo -e "\n***\n*** Failed. Expected 15 "$REUSE_MSG"\n***"
        RET=1
    fi

    set -e

    kill $SERVER_PID
    wait $SERVER_PID
done

# Test Cleanup
rm -f $CLIENT_LOG

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
fi

exit $RET


================================================
FILE: qa/L0_lifecycle/ensemble_zero_1_float32/config.pbtxt
================================================
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "ensemble_zero_1_float32"
platform: "ensemble"
max_batch_size: 1
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]
ensemble_scheduling {
  step [
    {
      model_name: "custom_zero_1_float32"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT0"
      }
    }
  ]
}


================================================
FILE: qa/L0_lifecycle/identity_zero_1_int32/config.pbtxt
================================================
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "identity_zero_1_int32"
backend: "identity"
max_batch_size: 0
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]
parameters [
  {
    key: "creation_delay_sec"
    value: { string_value: "10" }
  }
]


================================================
FILE: qa/L0_lifecycle/lifecycle_test.py
================================================
#!/usr/bin/env python3

# Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import base64
import concurrent.futures
import json
import multiprocessing
import os
import shutil
import signal
import threading
import time
import unittest
from builtins import range
from functools import partial
from pathlib import Path

import infer_util as iu
import numpy as np
import test_util as tu
import tritonclient.grpc as grpcclient
import tritonclient.http as httpclient
from tritonclient.utils import InferenceServerException


class LifeCycleTest(tu.TestResultCollector):
    def _infer_success_models(
        self, model_base_names, versions, tensor_shape, swap=False
    ):
        for base_name in model_base_names:
            try:
                model_name = tu.get_model_name(
                    base_name, np.float32, np.float32, np.float32
                )
                for triton_client in (
                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
                ):
                    self.assertTrue(triton_client.is_server_live())
                    # FIXME is_server_ready should be true here DLIS-1296
                    # self.assertTrue(triton_client.is_server_ready())
                    for v in versions:
                        self.assertTrue(
                            triton_client.is_model_ready(model_name, str(v))
                        )

                for v in versions:
                    iu.infer_exact(
                        self,
                        base_name,
                        tensor_shape,
                        1,
                        np.float32,
                        np.float32,
                        np.float32,
                        model_version=v,
                        swap=(swap or (v != 1)),
                    )
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def _infer_success_identity(self, model_base, versions, tensor_dtype, tensor_shape):
        try:
            triton_client = httpclient.InferenceServerClient(
                "localhost:8000", verbose=True
            )
            self.assertTrue(triton_client.is_server_live())
            self.assertTrue(triton_client.is_server_ready())
            for v in versions:
                self.assertTrue(
                    triton_client.is_model_ready(
                        tu.get_zero_model_name(model_base, 1, tensor_dtype), str(v)
                    )
                )

            for v in versions:
                iu.infer_zero(
                    self,
                    model_base,
                    1,
                    tensor_dtype,
                    tensor_shape,
                    tensor_shape,
                    use_http=False,
                    use_grpc=True,
                    use_http_json_tensors=False,
                    use_streaming=False,
                )
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

    def _get_client(self, use_grpc=False):
        if use_grpc:
            triton_client = grpcclient.InferenceServerClient(
                "localhost:8001", verbose=True
            )
        else:
            triton_client = httpclient.InferenceServerClient(
                "localhost:8000", verbose=True
            )
        return triton_client

    def _async_load(self, model_name, use_grpc):
        try:
            triton_client = self._get_client(use_grpc)
            triton_client.load_model(model_name)
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

    def test_parse_error_noexit(self):
        # Server was started with invalid args and
        # --exit-on-error=false so expect it to be running with
        # SERVER_FAILED_TO_INITIALIZE status.
        # Server is not live and not ready regardless of --strict-readiness
        try:
            triton_client = grpcclient.InferenceServerClient(
                "localhost:8001", verbose=True
            )
            self.assertFalse(triton_client.is_server_live())
            self.assertFalse(triton_client.is_server_ready())
            md = triton_client.get_server_metadata()
            self.assertEqual(os.environ["TRITON_SERVER_VERSION"], md.version)
            self.assertEqual("triton", md.name)
        except InferenceServerException as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        try:
            triton_client = httpclient.InferenceServerClient(
                "localhost:8000", verbose=True
            )
            self.assertFalse(triton_client.is_server_live())
            self.assertFalse(triton_client.is_server_ready())
            md = triton_client.get_server_metadata()
            self.assertEqual(os.environ["TRITON_SERVER_VERSION"], md["version"])
            self.assertEqual("triton", md["name"])
        except InferenceServerException as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

    def test_parse_error_modelfail(self):
        # --strict-readiness=true so server is live but not ready
        tensor_shape = (1, 16)

        # Server was started but with a model that fails to load
        try:
            model_name = tu.get_model_name(
                "libtorch", np.float32, np.float32, np.float32
            )

            triton_client = grpcclient.InferenceServerClient(
                "localhost:8001", verbose=True
            )
            self.assertTrue(triton_client.is_server_live())
            self.assertFalse(triton_client.is_server_ready())
            self.assertFalse(triton_client.is_model_ready(model_name, "1"))

            triton_client = httpclient.InferenceServerClient(
                "localhost:8000", verbose=True
            )
            self.assertTrue(triton_client.is_server_live())
            self.assertFalse(triton_client.is_server_ready())
            self.assertFalse(triton_client.is_model_ready(model_name, "1"))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        # Inferencing with the missing model should fail.
        try:
            iu.infer_exact(
                self, "libtorch", tensor_shape, 1, np.float32, np.float32, np.float32
            )
            self.assertTrue(False, "expected error for unavailable model " + model_name)
        except Exception as ex:
            self.assertIn(
                "Request for unknown model: 'libtorch_float32_float32_float32' has no available versions",
                ex.message(),
            )

        # And other models should be loaded successfully
        try:
            for base_name in ["openvino", "onnx"]:
                for triton_client in (
                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
                ):
                    model_name = tu.get_model_name(
                        base_name, np.float32, np.float32, np.float32
                    )
                    self.assertTrue(triton_client.is_model_ready(model_name, "1"))

                iu.infer_exact(
                    self,
                    base_name,
                    tensor_shape,
                    1,
                    np.float32,
                    np.float32,
                    np.float32,
                    model_version=1,
                )
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

    def test_parse_error_modelfail_nostrict(self):
        # --strict-readiness=false so server is live and ready
        tensor_shape = (1, 16)

        # Server was started but with a model that fails to load
        try:
            model_name = tu.get_model_name(
                "libtorch", np.float32, np.float32, np.float32
            )

            triton_client = grpcclient.InferenceServerClient(
                "localhost:8001", verbose=True
            )
            self.assertTrue(triton_client.is_server_live())
            self.assertTrue(triton_client.is_server_ready())
            self.assertFalse(triton_client.is_model_ready(model_name, "1"))

            triton_client = httpclient.InferenceServerClient(
                "localhost:8000", verbose=True
            )
            self.assertTrue(triton_client.is_server_live())
            self.assertTrue(triton_client.is_server_ready())
            self.assertFalse(triton_client.is_model_ready(model_name, "1"))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        # Inferencing with the missing model should fail.
        try:
            iu.infer_exact(
                self, "libtorch", tensor_shape, 1, np.float32, np.float32, np.float32
            )
            self.assertTrue(False, "expected error for unavailable model " + model_name)
        except Exception as ex:
            self.assertIn(
                "Request for unknown model: 'libtorch_float32_float32_float32' has no available versions",
                ex.message(),
            )

        # And other models should be loaded successfully
        try:
            for base_name in ["openvino", "onnx"]:
                for triton_client in (
                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
                ):
                    model_name = tu.get_model_name(
                        base_name, np.float32, np.float32, np.float32
                    )
                    self.assertTrue(triton_client.is_model_ready(model_name, "1"))

                iu.infer_exact(
                    self,
                    base_name,
                    tensor_shape,
                    1,
                    np.float32,
                    np.float32,
                    np.float32,
                    model_version=1,
                )
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

    def test_parse_error_no_model_config(self):
        tensor_shape = (1, 16)

        # Server was started but with a model that fails to be polled
        for triton_client in (
            httpclient.InferenceServerClient("localhost:8000", verbose=True),
            grpcclient.InferenceServerClient("localhost:8001", verbose=True),
        ):
            try:
                model_name = tu.get_model_name(
                    "openvino", np.float32, np.float32, np.float32
                )

                # expecting ready because not strict readiness
                self.assertTrue(triton_client.is_server_live())
                self.assertTrue(triton_client.is_server_ready())

                md = triton_client.get_model_metadata(model_name, "1")
                self.assertTrue(
                    False,
                    "expected model '"
                    + model_name
                    + "' to be ignored due to polling failure",
                )

            except Exception as ex:
                self.assertIn(
                    "Request for unknown model: 'openvino_float32_float32_float32' is not found",
                    ex.message(),
                )

        # And other models should be loaded successfully
        try:
            for base_name in ["libtorch", "onnx"]:
                model_name = tu.get_model_name(
                    base_name, np.float32, np.float32, np.float32
                )
                self.assertTrue(triton_client.is_model_ready(model_name, "1"))

                iu.infer_exact(
                    self,
                    base_name,
                    tensor_shape,
                    1,
                    np.float32,
                    np.float32,
                    np.float32,
                    model_version=1,
                )
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

    def test_init_error_modelfail(self):
        # --strict-readiness=true so server is live but not ready

        # Server was started but with models that fail to load
        for triton_client in (
            httpclient.InferenceServerClient("localhost:8000", verbose=True),
            grpcclient.InferenceServerClient("localhost:8001", verbose=True),
        ):
            try:
                self.assertTrue(triton_client.is_server_live())
                self.assertFalse(triton_client.is_server_ready())

                # one model uses sequence batcher while the other uses dynamic batcher
                model_names = ["onnx_sequence_int32", "onnx_int32_int32_int32"]
                for model_name in model_names:
                    self.assertFalse(triton_client.is_model_ready(model_name))

            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

            # And other models should be loaded successfully
            try:
                for base_name in ["openvino", "libtorch", "onnx"]:
                    model_name = tu.get_model_name(
                        base_name, np.float32, np.float32, np.float32
                    )
                    self.assertTrue(triton_client.is_model_ready(model_name))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

        try:
            tensor_shape = (1, 16)
            for base_name in ["openvino", "libtorch", "onnx"]:
                iu.infer_exact(
                    self,
                    base_name,
                    tensor_shape,
                    1,
                    np.float32,
                    np.float32,
                    np.float32,
                    model_version=1,
                )
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

    def test_parse_error_model_no_version(self):
        # --strict-readiness=true so server is live but not ready
        tensor_shape = (1, 16)

        # Server was started but with a model that fails to load
        for triton_client in (
            httpclient.InferenceServerClient("localhost:8000", verbose=True),
            grpcclient.InferenceServerClient("localhost:8001", verbose=True),
        ):
            try:
                self.assertTrue(triton_client.is_server_live())
                self.assertFalse(triton_client.is_server_ready())

                model_name = tu.get_model_name(
                    "openvino", np.float32, np.float32, np.float32
                )
                self.assertFalse(triton_client.is_model_ready(model_name))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

            # Sanity check that other models are loaded properly
            try:
                for base_name in ["libtorch", "onnx"]:
                    model_name = tu.get_model_name(
                        base_name, np.float32, np.float32, np.float32
                    )
                    self.assertTrue(triton_client.is_model_ready(model_name))
                for version in ["1", "3"]:
                    model_name = tu.get_model_name(
                        "plan", np.float32, np.float32, np.float32
                    )
                    self.assertTrue(triton_client.is_model_ready(model_name, version))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

        try:
            for base_name in ["libtorch", "onnx"]:
                iu.infer_exact(
                    self,
                    base_name,
                    tensor_shape,
                    1,
                    np.float32,
                    np.float32,
                    np.float32,
                    swap=True,
                )
            for version in [1, 3]:
                iu.infer_exact(
                    self,
                    "plan",
                    tensor_shape,
                    1,
                    np.float32,
                    np.float32,
                    np.float32,
                    swap=(version == 3),
                    model_version=version,
                )
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        try:
            iu.infer_exact(
                self, "openvino", tensor_shape, 1, np.float32, np.float32, np.float32
            )
            self.assertTrue(False, "expected error for unavailable model " + model_name)
        except Exception as ex:
            self.assertIn(
                "Request for unknown model: 'openvino_float32_float32_float32' has no available versions",
                ex.message(),
            )

    def test_parse_ignore_zero_prefixed_version(self):
        tensor_shape = (1, 16)

        # Server was started but only version 1 is loaded
        for triton_client in (
            httpclient.InferenceServerClient("localhost:8000", verbose=True),
            grpcclient.InferenceServerClient("localhost:8001", verbose=True),
        ):
            try:
                self.assertTrue(triton_client.is_server_live())
                self.assertTrue(triton_client.is_server_ready())

                model_name = tu.get_model_name(
                    "libtorch", np.float32, np.float32, np.float32
                )
                self.assertTrue(triton_client.is_model_ready(model_name, "1"))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

        try:
            # swap=False for version 1
            iu.infer_exact(
                self,
                "libtorch",
                tensor_shape,
                1,
                np.float32,
                np.float32,
                np.float32,
                swap=False,
            )
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

    def test_parse_ignore_non_intergral_version(self):
        tensor_shape = (1, 16)

        # Server was started but only version 1 is loaded
        for triton_client in (
            httpclient.InferenceServerClient("localhost:8000", verbose=True),
            grpcclient.InferenceServerClient("localhost:8001", verbose=True),
        ):
            try:
                self.assertTrue(triton_client.is_server_live())
                self.assertTrue(triton_client.is_server_ready())

                model_name = tu.get_model_name(
                    "libtorch", np.float32, np.float32, np.float32
                )
                self.assertTrue(triton_client.is_model_ready(model_name, "1"))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

        try:
            # swap=False for version 1
            iu.infer_exact(
                self,
                "libtorch",
                tensor_shape,
                1,
                np.float32,
                np.float32,
                np.float32,
                swap=False,
            )
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

    def test_dynamic_model_load_unload(self):
        tensor_shape = (1, 16)
        libtorch_name = tu.get_model_name(
            "libtorch", np.float32, np.float32, np.float32
        )
        onnx_name = tu.get_model_name("onnx", np.float32, np.float32, np.float32)

        # Make sure libtorch model is not in the status (because
        # initially it is not in the model repository)
        for triton_client in (
            httpclient.InferenceServerClient("localhost:8000", verbose=True),
            grpcclient.InferenceServerClient("localhost:8001", verbose=True),
        ):
            try:
                self.assertTrue(triton_client.is_server_live())
                self.assertTrue(triton_client.is_server_ready())
                self.assertFalse(triton_client.is_model_ready(libtorch_name, "1"))
                self.assertFalse(triton_client.is_model_ready(libtorch_name, "3"))
                self.assertTrue(triton_client.is_model_ready(onnx_name, "1"))
                self.assertTrue(triton_client.is_model_ready(onnx_name, "3"))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

        # Add libtorch model to the model repository and give it time to
        # load. Make sure that it has a status and is ready.
        try:
            shutil.copytree(libtorch_name, "models/" + libtorch_name)
            time.sleep(5)  # wait for model to load
            for triton_client in (
                httpclient.InferenceServerClient("localhost:8000", verbose=True),
                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
            ):
                self.assertTrue(triton_client.is_server_live())
                self.assertTrue(triton_client.is_server_ready())
                self.assertTrue(triton_client.is_model_ready(libtorch_name, "1"))
                self.assertTrue(triton_client.is_model_ready(libtorch_name, "3"))
                self.assertTrue(triton_client.is_model_ready(onnx_name, "1"))
                self.assertTrue(triton_client.is_model_ready(onnx_name, "3"))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        # Run inference on the just loaded model
        try:
            iu.infer_exact(
                self,
                "libtorch",
                tensor_shape,
                1,
                np.float32,
                np.float32,
                np.float32,
                swap=True,
            )
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        # Make sure libtorch has execution stats
        try:
            triton_client = httpclient.InferenceServerClient(
                "localhost:8000", verbose=True
            )
            stats = triton_client.get_inference_statistics(libtorch_name)
            self.assertEqual(len(stats["model_stats"]), 2)
            for idx in range(len(stats["model_stats"])):
                self.assertEqual(stats["model_stats"][idx]["name"], libtorch_name)
                if stats["model_stats"][idx]["version"] == "1":
                    self.assertEqual(
                        stats["model_stats"][idx]["inference_stats"]["success"][
                            "count"
                        ],
                        0,
                    )
                else:
                    self.assertNotEqual(
                        stats["model_stats"][idx]["inference_stats"]["success"][
                            "count"
                        ],
                        0,
                    )

            triton_client = grpcclient.InferenceServerClient(
                "localhost:8001", verbose=True
            )
            stats = triton_client.get_inference_statistics(libtorch_name)
            self.assertEqual(len(stats.model_stats), 2)
            for idx in range(len(stats.model_stats)):
                self.assertEqual(stats.model_stats[idx].name, libtorch_name)
                if stats.model_stats[idx].version == "1":
                    self.assertEqual(
                        stats.model_stats[idx].inference_stats.success.count, 0
                    )
                else:
                    self.assertNotEqual(
                        stats.model_stats[idx].inference_stats.success.count, 0
                    )

        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        # Remove libtorch model from the model repository and give it
        # time to unload. Make sure that it is no longer available.
        try:
            shutil.rmtree("models/" + libtorch_name)
            time.sleep(5)  # wait for model to unload
            for triton_client in (
                httpclient.InferenceServerClient("localhost:8000", verbose=True),
                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
            ):
                self.assertTrue(triton_client.is_server_live())
                self.assertTrue(triton_client.is_server_ready())
                self.assertFalse(triton_client.is_model_ready(libtorch_name, "1"))
                self.assertFalse(triton_client.is_model_ready(libtorch_name, "3"))
                self.assertTrue(triton_client.is_model_ready(onnx_name, "1"))
                self.assertTrue(triton_client.is_model_ready(onnx_name, "3"))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        # Model is removed so inference should fail
        try:
            iu.infer_exact(
                self,
                "libtorch",
                tensor_shape,
                1,
                np.float32,
                np.float32,
                np.float32,
                swap=True,
            )
            self.assertTrue(
                False, "expected error for unavailable model " + libtorch_name
            )
        except Exception as ex:
            self.assertIn(
                "Request for unknown model: '{}' has no available versions".format(
                    libtorch_name
                ),
                ex.message(),
            )

        # Add back the same model. The status/stats should be reset.
        try:
            shutil.copytree(libtorch_name, "models/" + libtorch_name)
            time.sleep(5)  # wait for model to load
            for triton_client in (
                httpclient.InferenceServerClient("localhost:8000", verbose=True),
                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
            ):
                self.assertTrue(triton_client.is_server_live())
                self.assertTrue(triton_client.is_server_ready())
                self.assertTrue(triton_client.is_model_ready(libtorch_name, "1"))
                self.assertTrue(triton_client.is_model_ready(libtorch_name, "3"))
                self.assertTrue(triton_client.is_model_ready(onnx_name, "1"))
                self.assertTrue(triton_client.is_model_ready(onnx_name, "3"))

            triton_client = httpclient.InferenceServerClient(
                "localhost:8000", verbose=True
            )
            stats = triton_client.get_inference_statistics(libtorch_name)
            self.assertEqual(len(stats["model_stats"]), 2)
            self.assertEqual(stats["model_stats"][0]["name"], libtorch_name)
            self.assertEqual(stats["model_stats"][1]["name"], libtorch_name)
            self.assertEqual(
                stats["model_stats"][0]["inference_stats"]["success"]["count"], 0
            )
            self.assertEqual(
                stats["model_stats"][1]["inference_stats"]["success"]["count"], 0
            )

            triton_client = grpcclient.InferenceServerClient(
                "localhost:8001", verbose=True
            )
            stats = triton_client.get_inference_statistics(libtorch_name)
            self.assertEqual(len(stats.model_stats), 2)
            self.assertEqual(stats.model_stats[0].name, libtorch_name)
            self.assertEqual(stats.model_stats[1].name, libtorch_name)
            self.assertEqual(stats.model_stats[0].inference_stats.success.count, 0)
            self.assertEqual(stats.model_stats[1].inference_stats.success.count, 0)

        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        # Remove onnx model from the model repository and give it
        # time to unload. Make sure that it is unavailable.
        try:
            shutil.rmtree("models/" + onnx_name)
            time.sleep(5)  # wait for model to unload
            for triton_client in (
                httpclient.InferenceServerClient("localhost:8000", verbose=True),
                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
            ):
                self.assertTrue(triton_client.is_server_live())
                self.assertTrue(triton_client.is_server_ready())
                self.assertTrue(triton_client.is_model_ready(libtorch_name, "1"))
                self.assertTrue(triton_client.is_model_ready(libtorch_name, "3"))
                self.assertFalse(triton_client.is_model_ready(onnx_name, "1"))
                self.assertFalse(triton_client.is_model_ready(onnx_name, "3"))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        # Model is removed so inference should fail
        try:
            iu.infer_exact(
                self,
                "onnx",
                tensor_shape,
                1,
                np.float32,
                np.float32,
                np.float32,
                swap=True,
            )
            self.assertTrue(False, "expected error for unavailable model " + onnx_name)
        except Exception as ex:
            self.assertIn(
                "Request for unknown model: 'onnx_float32_float32_float32' has no available versions",
                ex.message(),
            )

    def test_dynamic_model_load_unload_disabled(self):
        tensor_shape = (1, 16)
        libtorch_name = tu.get_model_name(
            "libtorch", np.float32, np.float32, np.float32
        )
        onnx_name = tu.get_model_name("onnx", np.float32, np.float32, np.float32)

        # Make sure libtorch model is not in the status (because
        # initially it is not in the model repository)
        for triton_client in (
            httpclient.InferenceServerClient("localhost:8000", verbose=True),
            grpcclient.InferenceServerClient("localhost:8001", verbose=True),
        ):
            try:
                self.assertTrue(triton_client.is_server_live())
                self.assertTrue(triton_client.is_server_ready())
                self.assertFalse(triton_client.is_model_ready(libtorch_name, "1"))
                self.assertFalse(triton_client.is_model_ready(libtorch_name, "3"))
                self.assertTrue(triton_client.is_model_ready(onnx_name, "1"))
                self.assertTrue(triton_client.is_model_ready(onnx_name, "3"))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

        # Add libtorch model to the model repository and give it time to
        # load. But it shouldn't load because dynamic loading is disabled.
        try:
            shutil.copytree(libtorch_name, "models/" + libtorch_name)
            time.sleep(5)  # wait for model to load
            for triton_client in (
                httpclient.InferenceServerClient("localhost:8000", verbose=True),
                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
            ):
                self.assertTrue(triton_client.is_server_live())
                self.assertTrue(triton_client.is_server_ready())
                self.assertFalse(triton_client.is_model_ready(libtorch_name, "1"))
                self.assertFalse(triton_client.is_model_ready(libtorch_name, "3"))
                self.assertTrue(triton_client.is_model_ready(onnx_name, "1"))
                self.assertTrue(triton_client.is_model_ready(onnx_name, "3"))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        # Run inference which should fail because the model isn't there
        try:
            iu.infer_exact(
                self,
                "libtorch",
                tensor_shape,
                1,
                np.float32,
                np.float32,
                np.float32,
                swap=True,
            )
            self.assertTrue(
                False, "expected error for unavailable model " + libtorch_name
            )
        except Exception as ex:
            self.assertIn(
                "Request for unknown model: 'libtorch_float32_float32_float32' is not found",
                ex.message(),
            )

        # Remove one of the original models from the model repository.
        # Unloading is disabled so it should remain available in the status.
        try:
            shutil.rmtree("models/" + onnx_name)
            time.sleep(5)  # wait for model to unload (but it shouldn't)
            for triton_client in (
                httpclient.InferenceServerClient("localhost:8000", verbose=True),
                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
            ):
                self.assertTrue(triton_client.is_server_live())
                self.assertTrue(triton_client.is_server_ready())
                self.assertFalse(triton_client.is_model_ready(libtorch_name, "1"))
                self.assertFalse(triton_client.is_model_ready(libtorch_name, "3"))
                self.assertTrue(triton_client.is_model_ready(onnx_name, "1"))
                self.assertTrue(triton_client.is_model_ready(onnx_name, "3"))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        # Run inference to make sure model still being served even
        # though deleted from model repository
        try:
            iu.infer_exact(
                self,
                "onnx",
                tensor_shape,
                1,
                np.float32,
                np.float32,
                np.float32,
                swap=True,
            )
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

    def test_dynamic_version_load_unload(self):
        tensor_shape = (1, 16)
        libtorch_name = tu.get_model_name("libtorch", np.int32, np.int32, np.int32)

        # There are 3 versions. Make sure that all have status and are
        # ready.
        try:
            for triton_client in (
                httpclient.InferenceServerClient("localhost:8000", verbose=True),
                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
            ):
                self.assertTrue(triton_client.is_server_live())
                self.assertTrue(triton_client.is_server_ready())
                self.assertTrue(triton_client.is_model_ready(libtorch_name, "1"))
                self.assertTrue(triton_client.is_model_ready(libtorch_name, "2"))
                self.assertTrue(triton_client.is_model_ready(libtorch_name, "3"))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        # Run inference on version 1 to make sure it is available
        try:
            iu.infer_exact(
                self,
                "libtorch",
                tensor_shape,
                1,
                np.int32,
                np.int32,
                np.int32,
                swap=False,
                model_version=1,
            )
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        # Make sure only version 1 has execution stats in the status.
        try:
            triton_client = httpclient.InferenceServerClient(
                "localhost:8000", verbose=True
            )
            stats = triton_client.get_inference_statistics(libtorch_name)
            self.assertEqual(len(stats["model_stats"]), 3)
            for idx in range(len(stats["model_stats"])):
                self.assertEqual(stats["model_stats"][idx]["name"], libtorch_name)
                if stats["model_stats"][idx]["version"] == "1":
                    self.assertNotEqual(
                        stats["model_stats"][idx]["inference_stats"]["success"][
                            "count"
                        ],
                        0,
                    )
                else:
                    self.assertEqual(
                        stats["model_stats"][idx]["inference_stats"]["success"][
                            "count"
                        ],
                        0,
                    )

            triton_client = grpcclient.InferenceServerClient(
                "localhost:8001", verbose=True
            )
            stats = triton_client.get_inference_statistics(libtorch_name)
            self.assertEqual(len(stats.model_stats), 3)
            for idx in range(len(stats.model_stats)):
                self.assertEqual(stats.model_stats[idx].name, libtorch_name)
                if stats.model_stats[idx].version == "1":
                    self.assertNotEqual(
                        stats.model_stats[idx].inference_stats.success.count, 0
                    )
                else:
                    self.assertEqual(
                        stats.model_stats[idx].inference_stats.success.count, 0
                    )

        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        # Remove version 1 from the model repository and give it time to
        # unload. Make sure that it is unavailable.
        try:
            shutil.rmtree("models/" + libtorch_name + "/1")
            time.sleep(5)  # wait for version to unload
            for triton_client in (
                httpclient.InferenceServerClient("localhost:8000", verbose=True),
                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
            ):
                self.assertTrue(triton_client.is_server_live())
                self.assertTrue(triton_client.is_server_ready())
                self.assertFalse(triton_client.is_model_ready(libtorch_name, "1"))
                self.assertTrue(triton_client.is_model_ready(libtorch_name, "2"))
                self.assertTrue(triton_client.is_model_ready(libtorch_name, "3"))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        # Version is removed so inference should fail
        try:
            iu.infer_exact(
                self,
                "libtorch",
                tensor_shape,
                1,
                np.int32,
                np.int32,
                np.int32,
                swap=False,
                model_version=1,
            )
            self.assertTrue(
                False, "expected error for unavailable model " + libtorch_name
            )
        except Exception as ex:
            self.assertIn(
                "Request for unknown model: 'libtorch_int32_int32_int32' version 1 is not at ready state",
                ex.message(),
            )

        # Add another version to the model repository.
        try:
            shutil.copytree(
                "models/" + libtorch_name + "/2", "models/" + libtorch_name + "/7"
            )
            time.sleep(5)  # wait for version to load
            for triton_client in (
                httpclient.InferenceServerClient("localhost:8000", verbose=True),
                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
            ):
                self.assertTrue(triton_client.is_server_live())
                self.assertTrue(triton_client.is_server_ready())
                self.assertFalse(triton_client.is_model_ready(libtorch_name, "1"))
                self.assertTrue(triton_client.is_model_ready(libtorch_name, "2"))
                self.assertTrue(triton_client.is_model_ready(libtorch_name, "3"))
                self.assertTrue(triton_client.is_model_ready(libtorch_name, "7"))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

    def test_dynamic_version_load_unload_disabled(self):
        tensor_shape = (1, 16)
        libtorch_name = tu.get_model_name("libtorch", np.int32, np.int32, np.int32)

        # Add a new version to the model repository and give it time to
        # load. But it shouldn't load because dynamic loading is
        # disabled.
        try:
            shutil.copytree(
                "models/" + libtorch_name + "/2", "models/" + libtorch_name + "/7"
            )
            time.sleep(5)  # wait for model to load
            for triton_client in (
                httpclient.InferenceServerClient("localhost:8000", verbose=True),
                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
            ):
                self.assertTrue(triton_client.is_server_live())
                self.assertTrue(triton_client.is_server_ready())
                self.assertTrue(triton_client.is_model_ready(libtorch_name, "1"))
                self.assertTrue(triton_client.is_model_ready(libtorch_name, "2"))
                self.assertTrue(triton_client.is_model_ready(libtorch_name, "3"))
                self.assertFalse(triton_client.is_model_ready(libtorch_name, "7"))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        # Remove one of the original versions from the model repository.
        # Unloading is disabled so it should remain available
        # in the status.
        try:
            shutil.rmtree("models/" + libtorch_name + "/1")
            time.sleep(5)  # wait for version to unload (but it shouldn't)
            for triton_client in (
                httpclient.InferenceServerClient("localhost:8000", verbose=True),
                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
            ):
                self.assertTrue(triton_client.is_server_live())
                self.assertTrue(triton_client.is_server_ready())
                self.assertTrue(triton_client.is_model_ready(libtorch_name, "1"))
                self.assertTrue(triton_client.is_model_ready(libtorch_name, "2"))
                self.assertTrue(triton_client.is_model_ready(libtorch_name, "3"))
                self.assertFalse(triton_client.is_model_ready(libtorch_name, "7"))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        # Run inference to make sure model still being served even
        # though version deleted from model repository
        try:
            iu.infer_exact(
                self,
                "libtorch",
                tensor_shape,
                1,
                np.int32,
                np.int32,
                np.int32,
                swap=False,
                model_version=1,
            )
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

    def test_dynamic_model_modify(self):
        models_base = ("libtorch", "plan")
        models_shape = ((1, 16), (1, 16))
        models = list()
        for m in models_base:
            models.append(tu.get_model_name(m, np.float32, np.float32, np.float32))

        # Make sure libtorch and plan are in the status
        for model_name in models:
            try:
                for triton_client in (
                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
                ):
                    self.assertTrue(triton_client.is_server_live())
                    self.assertTrue(triton_client.is_server_ready())
                    self.assertTrue(triton_client.is_model_ready(model_name, "1"))
                    self.assertTrue(triton_client.is_model_ready(model_name, "3"))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

        # Run inference on the model, both versions 1 and 3
        for version in (1, 3):
            for model_name, model_shape in zip(models_base, models_shape):
                try:
                    iu.infer_exact(
                        self,
                        model_name,
                        model_shape,
                        1,
                        np.float32,
                        np.float32,
                        np.float32,
                        swap=(version == 3),
                        model_version=version,
                    )
                except Exception as ex:
                    self.assertTrue(False, "unexpected error {}".format(ex))

        # Change the model configuration to use wrong label file
        for base_name, model_name in zip(models_base, models):
            shutil.copyfile(
                "config.pbtxt.wrong." + base_name,
                "models/" + model_name + "/config.pbtxt",
            )

        time.sleep(5)  # wait for models to reload
        for model_name in models:
            for model_name, model_shape in zip(models_base, models_shape):
                try:
                    iu.infer_exact(
                        self,
                        model_name,
                        model_shape,
                        1,
                        np.float32,
                        np.float32,
                        np.float32,
                        swap=(version == 3),
                        model_version=version,
                        output0_raw=False,
                    )
                    self.assertTrue(
                        False, "expected error for wrong label for " + model_name
                    )
                except AssertionError as ex:
                    self.assertTrue("'label9" in str(ex) and "!=" in str(ex), str(ex))

        # Change the model configuration to use correct label file and to have
        # the default version policy (so that only version 3) is available.
        for base_name, model_name in zip(models_base, models):
            shutil.copyfile(
                "config.pbtxt." + base_name, "models/" + model_name + "/config.pbtxt"
            )

        time.sleep(5)  # wait for models to reload
        for model_name in models:
            try:
                for triton_client in (
                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
                ):
                    self.assertTrue(triton_client.is_server_live())
                    self.assertTrue(triton_client.is_server_ready())
                    self.assertFalse(triton_client.is_model_ready(model_name, "1"))
                    self.assertTrue(triton_client.is_model_ready(model_name, "3"))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

        # Attempt inferencing using version 1, should fail since
        # change in model policy makes that no longer available.
        for model_name, model_shape in zip(models_base, models_shape):
            try:
                iu.infer_exact(
                    self,
                    model_name,
                    model_shape,
                    1,
                    np.float32,
                    np.float32,
                    np.float32,
                    swap=False,
                    model_version=1,
                )
                self.assertTrue(
                    False, "expected error for unavailable model " + model_name
                )
            except Exception as ex:
                self.assertIn("Request for unknown model", ex.message())

        # Version 3 should continue to work...
        for model_name, model_shape in zip(models_base, models_shape):
            try:
                iu.infer_exact(
                    self,
                    model_name,
                    model_shape,
                    1,
                    np.float32,
                    np.float32,
                    np.float32,
                    swap=True,
                    model_version=3,
                )
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_dynamic_file_delete(self):
        models_base = ("onnx", "plan")
        models_shape = ((1, 16), (1, 16))
        models = list()
        for m in models_base:
            models.append(tu.get_model_name(m, np.float32, np.float32, np.float32))

        # Make sure onnx and plan are in the status
        for model_name in models:
            try:
                for triton_client in (
                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
                ):
                    self.assertTrue(triton_client.is_server_live())
                    self.assertTrue(triton_client.is_server_ready())
                    self.assertTrue(triton_client.is_model_ready(model_name, "1"))
                    self.assertTrue(triton_client.is_model_ready(model_name, "3"))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

        # Run inference on the model, both versions 1 and 3
        for version in (1, 3):
            for model_name, model_shape in zip(models_base, models_shape):
                try:
                    iu.infer_exact(
                        self,
                        model_name,
                        model_shape,
                        1,
                        np.float32,
                        np.float32,
                        np.float32,
                        swap=(version == 3),
                        model_version=version,
                    )
                except Exception as ex:
                    self.assertTrue(False, "unexpected error {}".format(ex))

        # Delete model configuration, which cause model to be
        # re-loaded and use autofilled config, which means that
        # version policy will be latest and so only version 3 will be
        # available
        for model_name in models:
            os.remove("models/" + model_name + "/config.pbtxt")

        time.sleep(5)  # wait for models to reload
        for model_name in models:
            try:
                for triton_client in (
                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
                ):
                    self.assertTrue(triton_client.is_server_live())
                    self.assertTrue(triton_client.is_server_ready())
                    self.assertFalse(triton_client.is_model_ready(model_name, "1"))
                    self.assertTrue(triton_client.is_model_ready(model_name, "3"))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

        # Only version 3 (latest) should work...
        for model_name, model_shape in zip(models_base, models_shape):
            try:
                iu.infer_exact(
                    self,
                    model_name,
                    model_shape,
                    1,
                    np.float32,
                    np.float32,
                    np.float32,
                    swap=True,
                    model_version=3,
                )
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

            try:
                iu.infer_exact(
                    self,
                    model_name,
                    model_shape,
                    1,
                    np.float32,
                    np.float32,
                    np.float32,
                    swap=False,
                    model_version=1,
                )
                self.assertTrue(
                    False, "expected error for unavailable model " + model_name
                )
            except Exception as ex:
                self.assertIn("Request for unknown model", ex.message())

    def test_multiple_model_repository_polling(self):
        model_shape = (1, 16)
        libtorch_name = tu.get_model_name(
            "libtorch", np.float32, np.float32, np.float32
        )

        # Models should be loaded successfully and infer
        # successfully. Initially libtorch only has version 1.
        self._infer_success_models(
            [
                "libtorch",
            ],
            (1,),
            model_shape,
        )
        self._infer_success_models(["openvino", "onnx"], (1, 3), model_shape)

        # Add the libtorch to the second model repository, should cause
        # it to be unloaded due to duplication
        shutil.copytree(libtorch_name, "models_0/" + libtorch_name)
        time.sleep(5)  # wait for models to reload
        try:
            for triton_client in (
                httpclient.InferenceServerClient("localhost:8000", verbose=True),
                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
            ):
                self.assertTrue(triton_client.is_server_live())
                self.assertTrue(triton_client.is_server_ready())
                self.assertFalse(triton_client.is_model_ready(libtorch_name, "1"))
                self.assertFalse(triton_client.is_model_ready(libtorch_name, "3"))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        self._infer_success_models(["openvino", "onnx"], (1, 3), model_shape)

        # Remove the libtorch from the first model repository, the
        # model from the second model repository should be loaded
        # properly. In the second model repository libtorch should
        # have versions 1 and 3.
        shutil.rmtree("models/" + libtorch_name)
        time.sleep(5)  # wait for model to unload
        self._infer_success_models(
            ["libtorch", "openvino", "onnx"], (1, 3), model_shape
        )

    def test_multiple_model_repository_control(self):
        # similar to test_multiple_model_repository_polling, but the
        # model load/unload is controlled by the API
        model_shape = (1, 16)
        libtorch_name = tu.get_model_name(
            "libtorch", np.float32, np.float32, np.float32
        )
        model_bases = ["libtorch", "openvino", "onnx"]

        # Initially models are not loaded
        for base in model_bases:
            try:
                model_name = tu.get_model_name(base, np.float32, np.float32, np.float32)
                for triton_client in (
                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
                ):
                    self.assertTrue(triton_client.is_server_live())
                    self.assertTrue(triton_client.is_server_ready())
                    self.assertFalse(triton_client.is_model_ready(model_name, "1"))
                    self.assertFalse(triton_client.is_model_ready(model_name, "3"))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

        # Load all models, here we use GRPC
        for base in model_bases:
            try:
                model_name = tu.get_model_name(base, np.float32, np.float32, np.float32)
                triton_client = grpcclient.InferenceServerClient(
                    "localhost:8001", verbose=True
                )
                triton_client.load_model(model_name)
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

        # Models should be loaded successfully and infer
        # successfully. Initially libtorch only has version 1.
        self._infer_success_models(
            [
                "libtorch",
            ],
            (1,),
            model_shape,
        )
        self._infer_success_models(["openvino", "onnx"], (1, 3), model_shape)

        # Add the libtorch to the second model repository. Because
        # not polling this doesn't change any model state, all models
        # are still loaded and available.
        shutil.copytree(libtorch_name, "models_0/" + libtorch_name)
        self._infer_success_models(
            [
                "libtorch",
            ],
            (1,),
            model_shape,
        )
        self._infer_success_models(["openvino", "onnx"], (1, 3), model_shape)

        # Load libtorch again which should fail because it is now duplicated
        # in 2 model repositories. Use HTTP here.
        try:
            triton_client = httpclient.InferenceServerClient(
                "localhost:8000", verbose=True
            )
            triton_client.load_model(libtorch_name)
        except Exception as ex:
            self.assertIn("failed to load '{}'".format(libtorch_name), ex.message())

        try:
            for triton_client in (
                httpclient.InferenceServerClient("localhost:8000", verbose=True),
                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
            ):
                self.assertTrue(triton_client.is_server_live())
                self.assertTrue(triton_client.is_server_ready())
                # Unlike polling mode, the failed load on the duplicate model
                # should NOT unload the existing versions in model control mode.
                self.assertTrue(triton_client.is_model_ready(libtorch_name, "1"))
                # Version 3 did not exist in the first model repository, so
                # it should still not be loaded.
                self.assertFalse(triton_client.is_model_ready(libtorch_name, "3"))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        self._infer_success_models(["openvino", "onnx"], (1, 3), model_shape)

        # Remove the libtorch from the first model repository and
        # explicitly load libtorch. The libtorch from the second
        # model repository should be loaded properly. In the second
        # model repository libtorch should have versions 1 and 3.
        shutil.rmtree("models/" + libtorch_name)
        try:
            triton_client = httpclient.InferenceServerClient(
                "localhost:8000", verbose=True
            )
            # Unload existing in-memory model from first model repository
            triton_client.unload_model(libtorch_name)
            # Load model from second model repository since original was deleted
            triton_client.load_model(libtorch_name)
        except Exception as ex:
            self.assertIn("failed to load '{}'".format(libtorch_name), ex.message())

        self._infer_success_models(
            ["libtorch", "openvino", "onnx"], (1, 3), model_shape
        )

    def test_model_control(self):
        model_shape = (1, 16)
        onnx_name = tu.get_model_name("onnx", np.float32, np.float32, np.float32)

        ensemble_prefix = "simple_"
        ensemble_name = ensemble_prefix + onnx_name

        # Make sure no models are loaded
        for model_name in (onnx_name, ensemble_name):
            try:
                for triton_client in (
                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
                ):
                    self.assertTrue(triton_client.is_server_live())
                    self.assertTrue(triton_client.is_server_ready())
                    self.assertFalse(triton_client.is_model_ready(model_name, "1"))
                    self.assertFalse(triton_client.is_model_ready(model_name, "3"))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

        # Load non-existent model
        for triton_client in (
            httpclient.InferenceServerClient("localhost:8000", verbose=True),
            grpcclient.InferenceServerClient("localhost:8001", verbose=True),
        ):
            try:
                triton_client.load_model("unknown_model")
                self.assertTrue(False, "expected unknown model failure")
            except Exception as ex:
                self.assertIn(
                    "failed to load 'unknown_model', failed to poll from model repository",
                    ex.message(),
                )

        # Load ensemble model, the dependent model should be polled and loaded
        try:
            triton_client = httpclient.InferenceServerClient(
                "localhost:8000", verbose=True
            )
            triton_client.load_model(ensemble_name)
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        self._infer_success_models(
            [
                "onnx",
            ],
            (1, 3),
            model_shape,
        )
        self._infer_success_models(
            [
                "simple_onnx",
            ],
            (1, 3),
            model_shape,
            swap=True,
        )

        # Delete model configuration for onnx, which will cause
        # the autofiller to use the latest version policy so that only
        # version 3 will be available if the models are re-loaded
        for model_name in (onnx_name,):
            os.remove("models/" + model_name + "/config.pbtxt")

        self._infer_success_models(
            [
                "onnx",
            ],
            (1, 3),
            model_shape,
        )
        self._infer_success_models(
            [
                "simple_onnx",
            ],
            (1, 3),
            model_shape,
            swap=True,
        )

        # Reload models, only version 3 should be available for onnx
        for model_name in (onnx_name, ensemble_name):
            try:
                triton_client = grpcclient.InferenceServerClient(
                    "localhost:8001", verbose=True
                )
                triton_client.load_model(model_name)
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

        self._infer_success_models(
            [
                "onnx",
            ],
            (3,),
            model_shape,
        )
        self._infer_success_models(
            [
                "simple_onnx",
            ],
            (1, 3),
            model_shape,
            swap=True,
        )

        for model_name in (onnx_name,):
            try:
                for triton_client in (
                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
                ):
                    self.assertTrue(triton_client.is_server_live())
                    self.assertTrue(triton_client.is_server_ready())
                    self.assertFalse(triton_client.is_model_ready(model_name, "1"))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

        # Unload non-existing model, nothing should happen
        for triton_client in (
            httpclient.InferenceServerClient("localhost:8000", verbose=True),
            grpcclient.InferenceServerClient("localhost:8001", verbose=True),
        ):
            try:
                triton_client.unload_model("unknown_model")
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

        # Unload the depending model, as side effect, the ensemble model will be
        # forced to be unloaded
        try:
            triton_client = httpclient.InferenceServerClient(
                "localhost:8000", verbose=True
            )
            triton_client.unload_model(onnx_name)
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        for model_name in (onnx_name, ensemble_name):
            try:
                for triton_client in (
                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
                ):
                    self.assertTrue(triton_client.is_server_live())
                    self.assertTrue(triton_client.is_server_ready())
                    self.assertFalse(triton_client.is_model_ready(model_name, "1"))
                    self.assertFalse(triton_client.is_model_ready(model_name, "3"))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

        # Explicitly unload the ensemble and load the depending
        # model. The ensemble model should not be reloaded because it
        # was explicitly unloaded.
        try:
            triton_client = httpclient.InferenceServerClient(
                "localhost:8000", verbose=True
            )
            triton_client.unload_model(ensemble_name)
            triton_client.load_model(onnx_name)
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        self._infer_success_models(
            [
                "onnx",
            ],
            (3,),
            model_shape,
        )

        try:
            for triton_client in (
                httpclient.InferenceServerClient("localhost:8000", verbose=True),
                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
            ):
                self.assertTrue(triton_client.is_server_live())
                self.assertTrue(triton_client.is_server_ready())
                self.assertFalse(triton_client.is_model_ready(ensemble_name, "1"))
                self.assertFalse(triton_client.is_model_ready(ensemble_name, "3"))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

    def test_model_control_fail(self):
        model_name = tu.get_model_name("onnx", np.float32, np.float32, np.float32)

        # Make sure no models are loaded
        try:
            for triton_client in (
                httpclient.InferenceServerClient("localhost:8000", verbose=True),
                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
            ):
                self.assertTrue(triton_client.is_server_live())
                self.assertTrue(triton_client.is_server_ready())
                self.assertFalse(triton_client.is_model_ready(model_name, "1"))
                self.assertFalse(triton_client.is_model_ready(model_name, "3"))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        # Request to load the model and expect fail to load
        try:
            triton_client = httpclient.InferenceServerClient(
                "localhost:8000", verbose=True
            )
            triton_client.load_model(model_name)
            self.assertTrue(False, "expecting load failure")
        except InferenceServerException as ex:
            self.assertIn("load failed for model '{}'".format(model_name), ex.message())

        # Another attempt should fail as well
        try:
            triton_client = httpclient.InferenceServerClient(
                "localhost:8000", verbose=True
            )
            triton_client.load_model(model_name)
            self.assertTrue(False, "expecting load failure")
        except InferenceServerException as ex:
            self.assertIn("load failed for model '{}'".format(model_name), ex.message())

    def test_model_control_ensemble(self):
        model_shape = (1, 16)
        onnx_name = tu.get_model_name("onnx", np.float32, np.float32, np.float32)

        ensemble_prefix = "simple_"
        ensemble_name = ensemble_prefix + onnx_name

        # Make sure no models are loaded
        for model_name in (onnx_name, ensemble_name):
            try:
                for triton_client in (
                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
                ):
                    self.assertTrue(triton_client.is_server_live())
                    self.assertTrue(triton_client.is_server_ready())
                    self.assertFalse(triton_client.is_model_ready(model_name, "1"))
                    self.assertFalse(triton_client.is_model_ready(model_name, "3"))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

        # Load ensemble model, the dependent model should be polled and loaded
        try:
            triton_client = httpclient.InferenceServerClient(
                "localhost:8000", verbose=True
            )
            triton_client.load_model(ensemble_name)
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        self._infer_success_models(
            [
                "onnx",
            ],
            (1, 3),
            model_shape,
        )
        self._infer_success_models(
            [
                "simple_onnx",
            ],
            (1, 3),
            model_shape,
            swap=True,
        )

        # Unload the ensemble with unload_dependents flag. all models should be unloaded
        try:
            triton_client = httpclient.InferenceServerClient(
                "localhost:8000", verbose=True
            )
            triton_client.unload_model(ensemble_name, unload_dependents=True)
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))
        for model_name in (onnx_name, ensemble_name):
            try:
                for triton_client in (
                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
                ):
                    self.assertTrue(triton_client.is_server_live())
                    self.assertTrue(triton_client.is_server_ready())
                    self.assertFalse(triton_client.is_model_ready(model_name, "1"))
                    self.assertFalse(triton_client.is_model_ready(model_name, "3"))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

        # Load ensemble model, and unload it without unload_dependents flag (default).
        # The dependent model should still be available
        try:
            triton_client = httpclient.InferenceServerClient(
                "localhost:8000", verbose=True
            )
            triton_client.load_model(ensemble_name)
            triton_client.unload_model(ensemble_name)
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        self._infer_success_models(
            [
                "onnx",
            ],
            (1, 3),
            model_shape,
        )

        try:
            for triton_client in (
                httpclient.InferenceServerClient("localhost:8000", verbose=True),
                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
            ):
                self.assertTrue(triton_client.is_server_live())
                self.assertTrue(triton_client.is_server_ready())
                self.assertFalse(triton_client.is_model_ready(ensemble_name, "1"))
                self.assertFalse(triton_client.is_model_ready(ensemble_name, "3"))
                self.assertTrue(triton_client.is_model_ready(onnx_name, "1"))
                self.assertTrue(triton_client.is_model_ready(onnx_name, "3"))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

    def test_load_same_model_different_platform(self):
        model_shape = (1, 16)
        model_name = tu.get_model_name("simple", np.float32, np.float32, np.float32)

        # Check whether or not to use grpc protocol
        use_grpc = "TRITONSERVER_USE_GRPC" in os.environ

        # Make sure version 1 and 3 of the model are loaded
        # and the model platform is TensorRT
        try:
            triton_client = self._get_client(use_grpc)
            self.assertTrue(triton_client.is_server_live())
            self.assertTrue(triton_client.is_server_ready())
            self.assertTrue(triton_client.is_model_ready(model_name, "1"))
            self.assertTrue(triton_client.is_model_ready(model_name, "3"))
            if use_grpc:
                metadata = triton_client.get_model_metadata(model_name, as_json=True)
            else:
                metadata = triton_client.get_model_metadata(model_name)
            self.assertEqual(metadata["platform"], "tensorrt_plan")
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))
        self._infer_success_models(
            [
                "simple",
            ],
            (
                1,
                3,
            ),
            model_shape,
        )

        # Copy the same model of different platform to model repository
        shutil.rmtree("models/" + model_name)
        shutil.copytree(model_name, "models/" + model_name)

        # Reload models
        try:
            triton_client = self._get_client(use_grpc)
            triton_client.load_model(model_name)
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        # Make sure version 1 and 3 of the model are loaded
        # and the model platform is PyTorch
        try:
            triton_client = self._get_client(use_grpc)
            self.assertTrue(triton_client.is_server_live())
            self.assertTrue(triton_client.is_server_ready())
            self.assertTrue(triton_client.is_model_ready(model_name, "1"))
            self.assertTrue(triton_client.is_model_ready(model_name, "3"))
            if use_grpc:
                metadata = triton_client.get_model_metadata(model_name, as_json=True)
            else:
                metadata = triton_client.get_model_metadata(model_name)
            self.assertEqual(metadata["platform"], "pytorch_libtorch")
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))
        self._infer_success_models(
            [
                "simple",
            ],
            (
                1,
                3,
            ),
            model_shape,
        )

    def test_model_availability_on_reload(self):
        model_name = "identity_zero_1_int32"
        model_base = "identity"
        model_shape = (16,)

        # Check whether or not to use grpc protocol
        use_grpc = "TRITONSERVER_USE_GRPC" in os.environ

        # Make sure version 1 of the model is loaded
        try:
            triton_client = self._get_client(use_grpc)
            self.assertTrue(triton_client.is_server_live())
            self.assertTrue(triton_client.is_server_ready())
            self.assertTrue(triton_client.is_model_ready(model_name, "1"))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))
        self._infer_success_identity(model_base, (1,), np.int32, model_shape)

        # Create a new version for reload
        os.mkdir("models/" + model_name + "/2")

        # Reload models, v1 should still be available until v2 is loaded
        # The load is requested in other thread as it is blocking API,
        # and the v1 availability should be tested during the reload
        thread = threading.Thread(target=self._async_load, args=(model_name, use_grpc))
        thread.start()
        # wait for time < model creation delay to ensure load request is sent
        time.sleep(3)
        load_start = time.time()

        # Make sure version 1 of the model is still available
        try:
            triton_client = self._get_client(use_grpc)
            self.assertTrue(triton_client.is_server_live())
            load_end = time.time()
            self.assertTrue(
                (load_end - load_start) < 5,
                "server was waiting unexpectedly, waited {}".format(
                    (load_end - load_start)
                ),
            )
            self.assertTrue(triton_client.is_server_ready())
            self.assertTrue(triton_client.is_model_ready(model_name, "1"))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))
        self._infer_success_identity(model_base, (1,), np.int32, model_shape)

        thread.join()
        # Make sure version 2 of the model is available while version 1 is not
        try:
            triton_client = self._get_client(use_grpc)
            self.assertTrue(triton_client.is_server_live())
            self.assertTrue(triton_client.is_server_ready())
            self.assertFalse(triton_client.is_model_ready(model_name, "1"))
            self.assertTrue(triton_client.is_model_ready(model_name, "2"))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))
        self._infer_success_identity(model_base, (2,), np.int32, model_shape)

    def test_model_availability_on_reload_2(self):
        model_name = "identity_zero_1_int32"
        model_base = "identity"
        model_shape = (16,)

        # Check whether or not to use grpc protocol
        use_grpc = "TRITONSERVER_USE_GRPC" in os.environ

        # Make sure version 1 of the model is loaded
        try:
            triton_client = self._get_client(use_grpc)
            self.assertTrue(triton_client.is_server_live())
            self.assertTrue(triton_client.is_server_ready())
            self.assertTrue(triton_client.is_model_ready(model_name, "1"))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))
        self._infer_success_identity(model_base, (1,), np.int32, model_shape)

        # Overwrite config.pbtxt to load v2 only
        shutil.copyfile("config.pbtxt.v2", "models/" + model_name + "/config.pbtxt")

        # Reload models, v1 should still be available until v2 is loaded
        # The load is requested in other thread as it is blocking API,
        # and the v1 availability should be tested during the reload
        thread = threading.Thread(target=self._async_load, args=(model_name, use_grpc))
        thread.start()
        # wait for time < model creation delay to ensure load request is sent
        time.sleep(3)
        load_start = time.time()

        # Make sure version 1 of the model is still available
        try:
            triton_client = self._get_client(use_grpc)
            self.assertTrue(triton_client.is_server_live())
            load_end = time.time()
            self.assertTrue(
                (load_end - load_start) < 5,
                "server was waiting unexpectedly, waited {}".format(
                    (load_end - load_start)
                ),
            )
            self.assertTrue(triton_client.is_server_ready())
            self.assertTrue(triton_client.is_model_ready(model_name, "1"))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))
        self._infer_success_identity(model_base, (1,), np.int32, model_shape)

        thread.join()
        # Make sure version 2 of the model is available while version 1 is not
        try:
            triton_client = self._get_client(use_grpc)
            self.assertTrue(triton_client.is_server_live())
            self.assertTrue(triton_client.is_server_ready())
            self.assertFalse(triton_client.is_model_ready(model_name, "1"))
            self.assertTrue(triton_client.is_model_ready(model_name, "2"))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))
        self._infer_success_identity(model_base, (2,), np.int32, model_shape)

    def test_model_availability_on_reload_3(self):
        model_name = "identity_zero_1_int32"
        model_base = "identity"
        model_shape = (16,)

        # Check whether or not to use grpc protocol
        use_grpc = "TRITONSERVER_USE_GRPC" in os.environ

        # Make sure version 1 of the model is loaded
        try:
            triton_client = self._get_client(use_grpc)
            self.assertTrue(triton_client.is_server_live())
            self.assertTrue(triton_client.is_server_ready())
            self.assertTrue(triton_client.is_model_ready(model_name, "1"))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))
        self._infer_success_identity(model_base, (1,), np.int32, model_shape)

        # Overwrite config.pbtxt to load v2 only
        shutil.copyfile("config.pbtxt.new", "models/" + model_name + "/config.pbtxt")

        # Reload models, v1 will be reloaded but it should  be available
        # during the whole reload
        thread = threading.Thread(target=self._async_load, args=(model_name, use_grpc))
        thread.start()
        # wait for time < model creation delay to ensure load request is sent
        time.sleep(3)
        load_start = time.time()

        # Make sure version 1 of the model is still available
        try:
            triton_client = self._get_client(use_grpc)
            self.assertTrue(triton_client.is_server_live())
            load_end = time.time()
            self.assertTrue(
                (load_end - load_start) < 5,
                "server was waiting unexpectedly, waited {}".format(
                    (load_end - load_start)
                ),
            )
            self.assertTrue(triton_client.is_server_ready())
            self.assertTrue(triton_client.is_model_ready(model_name, "1"))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))
        self._infer_success_identity(model_base, (1,), np.int32, model_shape)

        thread.join()
        # Make sure version 1 of the model is still available after reload
        try:
            triton_client = self._get_client(use_grpc)
            self.assertTrue(triton_client.is_server_live())
            self.assertTrue(triton_client.is_server_ready())
            self.assertTrue(triton_client.is_model_ready(model_name, "1"))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))
        self._infer_success_identity(model_base, (1,), np.int32, model_shape)

    def test_model_reload_fail(self):
        model_name = "identity_zero_1_int32"
        model_base = "identity"
        model_shape = (16,)

        # Make sure version 1 of the model is loaded
        try:
            triton_client = httpclient.InferenceServerClient(
                "localhost:8000", verbose=True
            )
            self.assertTrue(triton_client.is_server_live())
            self.assertTrue(triton_client.is_server_ready())
            self.assertTrue(triton_client.is_model_ready(model_name, "1"))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))
        self._infer_success_identity(model_base, (1,), np.int32, model_shape)

        # Overwrite config.pbtxt to load v2 only on GPU, which will fail
        shutil.copyfile("config.pbtxt.v2.gpu", "models/" + model_name + "/config.pbtxt")

        # Reload models, v1 should still be available even if v2 fails to load
        try:
            triton_client = httpclient.InferenceServerClient(
                "localhost:8000", verbose=True
            )
            triton_client.load_model(model_name)
            self.assertTrue(False, "expecting load failure")
        except Exception as ex:
            self.assertIn(
                "version 2 is at UNAVAILABLE state: Internal: GPU instances not supported",
                ex.message(),
            )

        # Make sure version 1 of the model is available, and version 2 is not
        try:
            triton_client = httpclient.InferenceServerClient(
                "localhost:8000", verbose=True
            )
            self.assertTrue(triton_client.is_server_live())
            self.assertTrue(triton_client.is_server_ready())
            self.assertTrue(triton_client.is_model_ready(model_name, "1"))
            self.assertFalse(triton_client.is_model_ready(model_name, "2"))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))
        self._infer_success_identity(model_base, (1,), np.int32, model_shape)

    def test_multiple_model_repository_control_startup_models(self):
        model_shape = (1, 16)
        onnx_name = tu.get_model_name("onnx", np.float32, np.float32, np.float32)
        plan_name = tu.get_model_name("plan", np.float32, np.float32, np.float32)

        ensemble_prefix = "simple_"
        onnx_ensemble_name = ensemble_prefix + onnx_name
        plan_ensemble_name = ensemble_prefix + plan_name

        # Make sure unloaded models are not in the status
        for base in ("libtorch",):
            model_name = tu.get_model_name(base, np.float32, np.float32, np.float32)
            try:
                for triton_client in (
                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
                ):
                    self.assertTrue(triton_client.is_server_live())
                    self.assertTrue(triton_client.is_server_ready())
                    self.assertFalse(triton_client.is_model_ready(model_name, "1"))
                    self.assertFalse(triton_client.is_model_ready(model_name, "3"))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

        # And loaded models work properly
        self._infer_success_models(
            [
                "onnx",
            ],
            (1, 3),
            model_shape,
        )
        self._infer_success_models(
            [
                "simple_onnx",
            ],
            (1, 3),
            model_shape,
            swap=True,
        )
        self._infer_success_models(
            [
                "plan",
            ],
            (1, 3),
            model_shape,
        )

        # Load non-existing model
        for triton_client in (
            httpclient.InferenceServerClient("localhost:8000", verbose=True),
            grpcclient.InferenceServerClient("localhost:8001", verbose=True),
        ):
            try:
                triton_client.load_model("unknown_model")
                self.assertTrue(False, "expected unknown model failure")
            except Exception as ex:
                self.assertIn(
                    "failed to load 'unknown_model', failed to poll from model repository",
                    ex.message(),
                )

        # Load plan ensemble model, the dependent model is already
        # loaded via command-line
        try:
            triton_client = httpclient.InferenceServerClient(
                "localhost:8000", verbose=True
            )
            triton_client.load_model(plan_ensemble_name)
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        self._infer_success_models(
            [
                "plan",
            ],
            (1, 3),
            model_shape,
        )
        self._infer_success_models(
            [
                "simple_plan",
            ],
            (1, 3),
            model_shape,
            swap=True,
        )

        # Delete model configuration, which will cause the autofiller
        # to use the latest version policy so that only version 3 will
        # be available if the models are re-loaded
        os.remove("models/" + onnx_name + "/config.pbtxt")

        self._infer_success_models(
            [
                "plan",
            ],
            (1, 3),
            model_shape,
        )
        self._infer_success_models(
            [
                "simple_plan",
            ],
            (1, 3),
            model_shape,
            swap=True,
        )

        # Reload onnx, only version 3 should be available
        try:
            triton_client = grpcclient.InferenceServerClient(
                "localhost:8001", verbose=True
            )
            triton_client.load_model(onnx_name)
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        self._infer_success_models(
            [
                "onnx",
            ],
            (3,),
            model_shape,
        )
        self._infer_success_models(
            [
                "simple_onnx",
            ],
            (1, 3),
            model_shape,
            swap=True,
        )

        try:
            for triton_client in (
                httpclient.InferenceServerClient("localhost:8000", verbose=True),
                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
            ):
                self.assertTrue(triton_client.is_server_live())
                self.assertTrue(triton_client.is_server_ready())
                self.assertFalse(triton_client.is_model_ready(onnx_name, "1"))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        # Unload non-existing model, nothing should happen
        for triton_client in (
            httpclient.InferenceServerClient("localhost:8000", verbose=True),
            grpcclient.InferenceServerClient("localhost:8001", verbose=True),
        ):
            try:
                triton_client.unload_model("unknown_model")
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

        # Unload the onnx, as side effect, the ensemble model
        # will be forced to be unloaded
        try:
            triton_client = httpclient.InferenceServerClient(
                "localhost:8000", verbose=True
            )
            triton_client.unload_model(onnx_name)
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        for model_name in [onnx_name, onnx_ensemble_name]:
            try:
                for triton_client in (
                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
                ):
                    self.assertTrue(triton_client.is_server_live())
                    self.assertTrue(triton_client.is_server_ready())
                    self.assertFalse(triton_client.is_model_ready(model_name, "1"))
                    self.assertFalse(triton_client.is_model_ready(model_name, "3"))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

        # Explicitly unload the onnx ensemble and load the
        # depending model. The ensemble model should not be reloaded
        # because it was explicitly unloaded.
        try:
            triton_client = httpclient.InferenceServerClient(
                "localhost:8000", verbose=True
            )
            triton_client.unload_model(onnx_ensemble_name)
            triton_client.load_model(onnx_name)
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        self._infer_success_models(
            [
                "onnx",
            ],
            (3,),
            model_shape,
        )
        self._infer_success_models(
            [
                "plan",
            ],
            (1, 3),
            model_shape,
        )
        self._infer_success_models(
            [
                "simple_plan",
            ],
            (1, 3),
            model_shape,
            swap=True,
        )

        try:
            for triton_client in (
                httpclient.InferenceServerClient("localhost:8000", verbose=True),
                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
            ):
                self.assertTrue(triton_client.is_server_live())
                self.assertTrue(triton_client.is_server_ready())
                self.assertFalse(triton_client.is_model_ready(onnx_ensemble_name, "1"))
                self.assertFalse(triton_client.is_model_ready(onnx_ensemble_name, "3"))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

    def test_model_repository_index(self):
        # use model control EXPLICIT and --load-model to load a subset of models
        # in model repository
        tensor_shape = (1, 16)
        model_bases = ["plan", "libtorch", "simple_libtorch"]

        # Sanity check on loaded models
        # 2 models should be loaded:
        #     simple_libtorch_float32_float32_float32
        #     libtorch_float32_float32_float32
        for model_base in model_bases:
            try:
                model_name = tu.get_model_name(
                    model_base, np.float32, np.float32, np.float32
                )
                for triton_client in (
                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
                ):
                    self.assertTrue(triton_client.is_server_live())
                    self.assertTrue(triton_client.is_server_ready())
                    self.assertTrue(triton_client.is_model_ready(model_name))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

        # Check model repository index
        # All models should be in ready state except onnx_float32_float32_float32
        # which appears in two repositories.
        model_bases.append("simple_plan")
        try:
            triton_client = httpclient.InferenceServerClient(
                "localhost:8000", verbose=True
            )
            index = triton_client.get_model_repository_index()
            indexed = list()
            self.assertEqual(len(index), 8)
            for i in index:
                indexed.append(i["name"])
                if i["name"] == "onnx_float32_float32_float32":
                    self.assertEqual(i["state"], "UNAVAILABLE")
                    self.assertEqual(
                        i["reason"], "model appears in two or more repositories"
                    )
            for model_base in model_bases:
                model_name = tu.get_model_name(
                    model_base, np.float32, np.float32, np.float32
                )
                self.assertTrue(model_name in indexed)

            triton_client = grpcclient.InferenceServerClient(
                "localhost:8001", verbose=True
            )
            index = triton_client.get_model_repository_index()
            indexed = list()
            self.assertEqual(len(index.models), 8)
            for i in index.models:
                indexed.append(i.name)
                if i.name == "onnx_float32_float32_float32":
                    self.assertEqual(i.state, "UNAVAILABLE")
                    self.assertEqual(
                        i.reason, "model appears in two or more repositories"
                    )
            for model_base in model_bases:
                model_name = tu.get_model_name(
                    model_base, np.float32, np.float32, np.float32
                )
                self.assertTrue(model_name in indexed)

        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

    def test_config_override(self):
        model_shape = (1, 16)

        for triton_client in (
            httpclient.InferenceServerClient("localhost:8000", verbose=True),
            grpcclient.InferenceServerClient("localhost:8001", verbose=True),
        ):
            for base in (("onnx", "onnxruntime"),):
                model_name = tu.get_model_name(
                    base[0], np.float32, np.float32, np.float32
                )
                try:
                    self.assertTrue(triton_client.is_server_live())
                    self.assertFalse(triton_client.is_model_ready(model_name, "1"))
                    self.assertFalse(triton_client.is_model_ready(model_name, "2"))
                    self.assertFalse(triton_client.is_model_ready(model_name, "3"))
                except Exception as ex:
                    self.assertTrue(False, "unexpected error {}".format(ex))

                # Request to load the model as is and expect the model fails
                # to load with default config
                try:
                    triton_client.load_model(model_name)
                    self.assertTrue(
                        False, "expected fail to load '{}'".format(model_name)
                    )
                except Exception as ex:
                    self.assertIn(
                        "load failed for model '{}'".format(model_name), ex.message()
                    )

                # Request to load the model with provided "correct" config
                try:
                    triton_client.load_model(
                        model_name,
                        config="""
{{"backend":"{backend}","version_policy":{{"specific" : {{ "versions": [2] }} }} }}
""".format(
                            backend=base[1]
                        ),
                    )
                except Exception as ex:
                    self.assertTrue(False, "unexpected error {}".format(ex))
                self.assertFalse(triton_client.is_model_ready(model_name, "1"))
                self.assertTrue(triton_client.is_model_ready(model_name, "2"))
                self.assertFalse(triton_client.is_model_ready(model_name, "3"))

                # And loaded models work properly
                self._infer_success_models(
                    [
                        base[0],
                    ],
                    (2,),
                    model_shape,
                )

                # request without additional config will load retain the provided
                # config and expect to not fail, and version 2 will not be loaded.
                try:
                    triton_client.load_model(model_name)
                except Exception as ex:
                    self.assertTrue(False, "unexpected error {}".format(ex))
                self.assertFalse(triton_client.is_model_ready(model_name, "1"))
                self.assertTrue(triton_client.is_model_ready(model_name, "2"))
                self.assertFalse(triton_client.is_model_ready(model_name, "3"))

                # Unload model for the next client iteration
                try:
                    triton_client.unload_model(model_name)
                    self.assertFalse(triton_client.is_model_ready(model_name, "1"))
                    self.assertFalse(triton_client.is_model_ready(model_name, "2"))
                    self.assertFalse(triton_client.is_model_ready(model_name, "3"))
                except Exception as ex:
                    self.assertTrue(False, "unexpected error {}".format(ex))

    def test_file_override(self):
        model_shape = (1, 16)
        override_base = "override_model"

        for base in (("onnx", "onnxruntime"),):
            model_name = tu.get_model_name(base[0], np.float32, np.float32, np.float32)
            override_model_name = tu.get_model_name(
                override_base, np.float32, np.float32, np.float32
            )

            # Prepare override file
            with open("models/{}/3/model.{}".format(model_name, base[0]), "rb") as f:
                file_content = f.read()

            for triton_client in (
                httpclient.InferenceServerClient("localhost:8000", verbose=True),
                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
            ):
                try:
                    self.assertTrue(triton_client.is_server_live())
                    self.assertFalse(triton_client.is_model_ready(model_name, "1"))
                    self.assertFalse(triton_client.is_model_ready(model_name, "2"))
                    self.assertTrue(triton_client.is_model_ready(model_name, "3"))
                except Exception as ex:
                    self.assertTrue(False, "unexpected error {}".format(ex))

                # Request to load the model with override file, should fail
                # without providing override config. The config requirement
                # serves as an reminder that the existing model directory will
                # not be used.
                try:
                    triton_client.load_model(
                        model_name, files={"file:1/model.onnx": file_content}
                    )
                    self.assertTrue(False, "expected error on missing override config")
                except InferenceServerException as ex:
                    # [FIXME] Improve error reporting to mention missing config
                    self.assertIn(
                        "failed to load '{}', failed to poll from model repository".format(
                            model_name
                        ),
                        ex.message(),
                    )

                # Sanity check on previous loaded version is still available
                # after the failure attempt to load model with different version
                self.assertFalse(triton_client.is_model_ready(model_name, "1"))
                self.assertFalse(triton_client.is_model_ready(model_name, "2"))
                self.assertTrue(triton_client.is_model_ready(model_name, "3"))

                self._infer_success_models(
                    [
                        base[0],
                    ],
                    (3,),
                    model_shape,
                )

                # Request to load the model with override file and config in
                # a different name
                try:
                    triton_client.load_model(
                        override_model_name,
                        config="""{{"backend":"{backend}" }}""".format(backend=base[1]),
                        files={"file:1/model.onnx": file_content},
                    )
                except Exception as ex:
                    self.assertTrue(False, "unexpected error {}".format(ex))

                # Sanity check on previous loaded version is still available
                # after the load with different model name
                self.assertFalse(triton_client.is_model_ready(model_name, "1"))
                self.assertFalse(triton_client.is_model_ready(model_name, "2"))
                self.assertTrue(triton_client.is_model_ready(model_name, "3"))
                self._infer_success_models(
                    [
                        base[0],
                    ],
                    (3,),
                    model_shape,
                )

                # New override model should also be available
                self.assertTrue(triton_client.is_model_ready(override_model_name, "1"))
                self.assertFalse(triton_client.is_model_ready(override_model_name, "2"))
                self.assertFalse(triton_client.is_model_ready(override_model_name, "3"))
                self._infer_success_models(
                    [
                        override_base,
                    ],
                    (1,),
                    model_shape,
                    swap=True,
                )

                # Request to load the model with override file and config in
                # original name
                try:
                    triton_client.load_model(
                        model_name,
                        config="""{{"backend":"{backend}" }}""".format(backend=base[1]),
                        files={"file:1/model.onnx": file_content},
                    )
                except Exception as ex:
                    self.assertTrue(False, "unexpected error {}".format(ex))

                # The model should be loaded from the override model directory
                # which has different model version
                self.assertTrue(triton_client.is_model_ready(model_name, "1"))
                self.assertFalse(triton_client.is_model_ready(model_name, "2"))
                self.assertFalse(triton_client.is_model_ready(model_name, "3"))
                self._infer_success_models(
                    [
                        base[0],
                    ],
                    (1,),
                    model_shape,
                    swap=True,
                )

                # The model with different name should be available
                self.assertTrue(triton_client.is_model_ready(override_model_name, "1"))
                self.assertFalse(triton_client.is_model_ready(override_model_name, "2"))
                self.assertFalse(triton_client.is_model_ready(override_model_name, "3"))
                self._infer_success_models(
                    [
                        override_base,
                    ],
                    (1,),
                    model_shape,
                    swap=True,
                )

                # Reset model for the next client iteration
                try:
                    # Unload and load the model again and the original model repository will
                    # be used
                    triton_client.unload_model(model_name)
                    triton_client.load_model(model_name)
                    triton_client.unload_model(override_model_name)
                except Exception as ex:
                    self.assertTrue(False, "unexpected error {}".format(ex))
                self.assertFalse(triton_client.is_model_ready(model_name, "1"))
                self.assertFalse(triton_client.is_model_ready(model_name, "2"))
                self.assertTrue(triton_client.is_model_ready(model_name, "3"))
                self._infer_success_models(
                    [
                        base[0],
                    ],
                    (3,),
                    model_shape,
                )

    # Test that model load API file override can't be used to create files
    # outside of any model directory.
    def test_file_override_security(self):
        # When using model load API, temporary model directories are created in
        # a randomly generated /tmp/folderXXXXXX directory for the life of the
        # model, and cleaned up on model unload.
        model_basepath = "/tmp/folderXXXXXX"
        if os.path.exists(model_basepath) and os.path.isdir(model_basepath):
            shutil.rmtree(model_basepath)
        os.makedirs(model_basepath)

        # Set file override paths that try to escape out of model directory,
        # and test both pre-existing and non-existent files.
        root_home_dir = "/root"

        # Relative paths
        escape_dir_rel = os.path.join("..", "..", "root")
        escape_dir_full = os.path.join(model_basepath, escape_dir_rel)
        self.assertEqual(os.path.abspath(escape_dir_full), root_home_dir)

        new_file_rel = os.path.join(escape_dir_rel, "new_dir", "test.txt")
        self.assertFalse(os.path.exists(os.path.join(model_basepath, new_file_rel)))
        existing_file_rel = os.path.join(escape_dir_rel, ".bashrc")
        self.assertTrue(os.path.exists(os.path.join(model_basepath, existing_file_rel)))

        # Symlinks
        ## No easy way to inject symlink into generated temp model dir, so for
        ## testing sake, make a fixed symlink path in /tmp.
        escape_dir_symlink_rel = os.path.join("..", "escape_symlink")
        escape_dir_symlink_full = "/tmp/escape_symlink"
        self.assertEqual(
            os.path.abspath(os.path.join(model_basepath, escape_dir_symlink_rel)),
            escape_dir_symlink_full,
        )
        if os.path.exists(escape_dir_symlink_full):
            os.unlink(escape_dir_symlink_full)
        os.symlink(root_home_dir, escape_dir_symlink_full)
        self.assertTrue(os.path.abspath(escape_dir_symlink_full), root_home_dir)

        symlink_new_file_rel = os.path.join(
            escape_dir_symlink_rel, "new_dir", "test.txt"
        )
        self.assertFalse(
            os.path.exists(os.path.join(model_basepath, symlink_new_file_rel))
        )
        symlink_existing_file_rel = os.path.join(escape_dir_symlink_rel, ".bashrc")
        self.assertTrue(
            os.path.exists(os.path.join(model_basepath, symlink_existing_file_rel))
        )

        # Contents to try writing to file, though it should fail to be written
        new_contents = "This shouldn't exist"
        new_contents_b64 = base64.b64encode(new_contents.encode())

        new_files = [new_file_rel, symlink_new_file_rel]
        existing_files = [existing_file_rel, symlink_existing_file_rel]
        all_files = new_files + existing_files
        for filepath in all_files:
            # minimal config to create a new model
            config = json.dumps({"backend": "identity"})
            files = {f"file:{filepath}": new_contents_b64}
            with httpclient.InferenceServerClient("localhost:8000") as client:
                with self.assertRaisesRegex(InferenceServerException, "failed to load"):
                    client.load_model("new_model", config=config, files=files)

        for rel_path in new_files:
            # Assert new file wasn't created
            self.assertFalse(os.path.exists(os.path.join(model_basepath, rel_path)))

        for rel_path in existing_files:
            # Read the existing file and make sure it's contents weren't overwritten
            existing_file = os.path.join(model_basepath, rel_path)
            self.assertTrue(os.path.exists(existing_file))
            with open(existing_file) as f:
                contents = f.read()
                self.assertNotEqual(contents, new_contents)

    def test_shutdown_dynamic(self):
        model_shape = (1, 1)
        input_data = np.ones(shape=(1, 1), dtype=np.float32)

        inputs = [grpcclient.InferInput("INPUT0", model_shape, "FP32")]
        inputs[0].set_data_from_numpy(input_data)

        triton_client = grpcclient.InferenceServerClient("localhost:8001", verbose=True)
        model_name = "custom_zero_1_float32"

        # Send two requests as only requests held in scheduler are counted
        # as in-flight (the first request is in execution)
        def callback(user_data, result, error):
            if error:
                user_data.append(error)
            else:
                user_data.append(result)

        # Currently the dynamic batcher will form payloads and place to
        # instance queue in advance. The batcher doesn't track requests
        # in the next stage so need to send more requests to saturate the
        # queue.
        request_count = 6
        async_results = []
        for _ in range(request_count):
            triton_client.async_infer(
                model_name, inputs, partial(callback, async_results)
            )
        time.sleep(1)

        # Send signal to shutdown the server
        os.kill(int(os.environ["SERVER_PID"]), signal.SIGINT)
        time.sleep(0.5)

        # Send more requests and should be rejected
        try:
            triton_client.infer(model_name, inputs)
            self.assertTrue(False, "expected error for new inference during shutdown")
        except InferenceServerException as ex:
            self.assertIn(
                "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:8001: "
                + "Failed to connect to remote host: connect: Connection refused (111)",
                ex.message(),
            )

        # Wait until the results are available in user_data
        time_out = 30
        while (len(async_results) < request_count) and time_out > 0:
            time_out = time_out - 1
            time.sleep(1)

        # Previous requests should succeed
        for result in async_results:
            if type(result) == InferenceServerException:
                raise result
            output_data = result.as_numpy("OUTPUT0")
            np.testing.assert_allclose(
                output_data, input_data, err_msg="Inference result is not correct"
            )

    def test_shutdown_sequence(self):
        model_shape = (1, 1)
        input_data = np.ones(shape=(1, 1), dtype=np.int32)

        inputs = [grpcclient.InferInput("INPUT", model_shape, "INT32")]
        inputs[0].set_data_from_numpy(input_data)

        triton_client = grpcclient.InferenceServerClient("localhost:8001", verbose=True)
        model_name = "custom_sequence_int32"

        # Send two requests as only requests held in scheduler are counted
        # as in-flight (the first request is in execution)
        def callback(user_data, result, error):
            if error:
                user_data.append(error)
            else:
                user_data.append(result)

        # Start multiple sequences
        request_count = 2
        async_results = []
        for i in range(request_count):
            triton_client.async_infer(
                model_name,
                inputs,
                partial(callback, async_results),
                sequence_id=(i + 1),
                sequence_start=True,
            )
        time.sleep(1)

        # Send signal to shutdown the server
        os.kill(int(os.environ["SERVER_PID"]), signal.SIGINT)
        time.sleep(0.5)

        # Send requests with different characteristic
        # 1: New sequence with new sequence ID
        try:
            triton_client.infer(
                model_name, inputs, sequence_id=request_count, sequence_start=True
            )
            self.assertTrue(False, "expected error for new inference during shutdown")
        except InferenceServerException as ex:
            # The first request received by the gRPC endpoint while shutting down returns CANCELLED
            # each subsequent request returns Connection refused
            self.assertIn("CANCELLED", ex.message())
        # 2: New sequence with existing sequence ID
        try:
            triton_client.infer(model_name, inputs, sequence_id=1, sequence_start=True)
            self.assertTrue(False, "expected error for new inference during shutdown")
        except InferenceServerException as ex:
            self.assertIn(
                "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:8001: "
                + "Failed to connect to remote host: connect: Connection refused (111)",
                ex.message(),
            )
        # 3: Continuing sequence after shutdown
        try:
            triton_client.infer(model_name, inputs, sequence_id=2, sequence_end=True)
            self.assertTrue(False, "expected error for new inference during shutdown")
        except InferenceServerException as ex:
            self.assertIn(
                "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:8001: "
                + "Failed to connect to remote host: connect: Connection refused (111)",
                ex.message(),
            )

        # Wait until the results are available in user_data
        time_out = 30
        while (len(async_results) < request_count) and time_out > 0:
            time_out = time_out - 1
            time.sleep(1)

        # Previous requests should succeed
        for result in async_results:
            if type(result) == InferenceServerException:
                raise result
            output_data = result.as_numpy("OUTPUT")
            np.testing.assert_allclose(
                output_data, input_data, err_msg="Inference result is not correct"
            )

        # Sleep 5 seconds for scheduler timeout to work and should
        # reduce the in-flight count
        time.sleep(5)

    def test_shutdown_ensemble(self):
        model_shape = (1, 1)
        input_data = np.ones(shape=(1, 1), dtype=np.float32)

        inputs = [grpcclient.InferInput("INPUT0", model_shape, "FP32")]
        inputs[0].set_data_from_numpy(input_data)

        triton_client = grpcclient.InferenceServerClient("localhost:8001", verbose=True)
        model_name = "ensemble_zero_1_float32"

        # Send two requests as only requests held in scheduler are counted
        # as in-flight (the first request is in execution)
        def callback(user_data, result, error):
            if error:
                user_data.append(error)
            else:
                user_data.append(result)

        # Even the ensemble is actually a wrapper over the model for
        # test_shutdown_dynamic, we don't need to send many requests as
        # ensemble scheduler tracks in-flight requests w.r.t. the whole pipeline
        request_count = 1
        async_results = []
        for _ in range(request_count):
            triton_client.async_infer(
                model_name, inputs, partial(callback, async_results)
            )
        time.sleep(1)

        # Send signal to shutdown the server
        os.kill(int(os.environ["SERVER_PID"]), signal.SIGINT)
        time.sleep(0.5)

        # Send more requests and should be rejected
        try:
            triton_client.infer(model_name, inputs)
            self.assertTrue(False, "expected error for new inference during shutdown")
        except InferenceServerException as ex:
            self.assertIn(
                "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:8001: "
                + "Failed to connect to remote host: connect: Connection refused (111)",
                ex.message(),
            )

        # Wait until the results are available in user_data
        time_out = 10
        while (len(async_results) < request_count) and time_out > 0:
            time_out = time_out - 1
            time.sleep(1)

        # Previous requests should succeed
        for result in async_results:
            if type(result) == InferenceServerException:
                raise result
            output_data = result.as_numpy("OUTPUT0")
            np.testing.assert_allclose(
                output_data, input_data, err_msg="Inference result is not correct"
            )

    def test_load_gpu_limit(self):
        model_name = "cuda_memory_consumer"
        try:
            triton_client = grpcclient.InferenceServerClient(
                "localhost:8001", verbose=True
            )
            triton_client.load_model(model_name + "_1")
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        # After the first load, the memory consumption should have exceeded
        # the specified limit, load will fail
        try:
            triton_client = grpcclient.InferenceServerClient(
                "localhost:8001", verbose=True
            )
            triton_client.load_model(model_name + "_2")
            self.assertTrue(False, "expected error for loading model")
        except Exception as ex:
            self.assertIn("memory limit set for GPU 0 has exceeded", ex.message())

        # Load should work after explicitly unload model to free memory
        try:
            triton_client = grpcclient.InferenceServerClient(
                "localhost:8001", verbose=True
            )
            triton_client.unload_model(model_name + "_1")
            triton_client.load_model(model_name + "_2")
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

    def test_concurrent_model_load_speedup(self):
        # Initialize client
        try:
            triton_client = grpcclient.InferenceServerClient(
                "localhost:8001", verbose=True
            )
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))
        # Each model should have a loading delay of 10 seconds
        model_pairs = [
            ["identity_zero_1_int32_1", "identity_zero_1_int32_2"],
            ["python_identity_fp32_1", "python_identity_fp32_2"],
        ]
        # Test each model pair for speed up
        for model_pair in model_pairs:
            # Load both models concurrently
            threads = []
            for model_name in model_pair:
                threads.append(
                    threading.Thread(
                        target=triton_client.load_model, args=(model_name,)
                    )
                )
            start_time = time.time()
            for thread in threads:
                thread.start()
            for thread in threads:
                thread.join()
            end_time = time.time()
            loading_time = end_time - start_time
            # Each of the two models has a minimum loading delay of 10 seconds
            # Speedup is observed when the concurrent loading time < 20 seconds
            # but use a tighter bound of 15 seconds
            self.assertLess(
                loading_time, 15.0, "Concurrent loading speedup not observed"
            )
            # Concurrent loading time cannot be < 10 seconds
            self.assertGreaterEqual(
                loading_time, 10.0, "Invalid concurrent loading time"
            )
            # Make sure the models are loaded
            self.assertTrue(triton_client.is_server_live())
            self.assertTrue(triton_client.is_server_ready())
            for model_name in model_pair:
                self.assertTrue(triton_client.is_model_ready(model_name))

    def test_concurrent_model_load(self):
        # Initialize client
        try:
            triton_client = grpcclient.InferenceServerClient(
                "localhost:8001", verbose=True
            )
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))
        # Load same named model concurrently
        with concurrent.futures.ThreadPoolExecutor() as pool:
            # First load an 10 seconds delayed identity backend model
            thread_1 = pool.submit(triton_client.load_model, "identity_model")
            time.sleep(2)  # wait between loads
            # Switch the model file to python backend
            shutil.move("models", "models_v1")
            shutil.move("models_v2", "models")
            # Second load should be blocked until the first completes
            thread_2 = pool.submit(triton_client.load_model, "identity_model")
            # Both loads should succeed
            thread_1.result()
            thread_2.result()
        # Check the model is ready
        self.assertTrue(triton_client.is_server_live())
        self.assertTrue(triton_client.is_server_ready())
        self.assertTrue(triton_client.is_model_ready("identity_model"))
        # Check the finally loaded model is the second one
        model_metadata = triton_client.get_model_metadata("identity_model")
        self.assertEqual(model_metadata.platform, "python")

    def test_concurrent_model_load_unload(self):
        # Initialize client
        try:
            triton_client = grpcclient.InferenceServerClient(
                "localhost:8001", verbose=True
            )
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))
        # Load identity_zero_1_int32 and unload it while loading
        # The unload operation should wait until the load is completed
        with concurrent.futures.ThreadPoolExecutor() as pool:
            load_thread = pool.submit(triton_client.load_model, "identity_zero_1_int32")
            time.sleep(2)  # wait between load and unload
            unload_thread = pool.submit(
                triton_client.unload_model, "identity_zero_1_int32"
            )
            load_thread.result()
            unload_thread.result()
        self.assertTrue(triton_client.is_server_live())
        self.assertTrue(triton_client.is_server_ready())
        self.assertFalse(triton_client.is_model_ready("identity_zero_1_int32"))
        # Load ensemble_zero_1_float32 and unload its dependency while loading
        # The unload operation should wait until the load is completed
        with concurrent.futures.ThreadPoolExecutor() as pool:
            load_thread = pool.submit(
                triton_client.load_model, "ensemble_zero_1_float32"
            )
            time.sleep(2)  # wait between load and unload
            unload_thread = pool.submit(
                triton_client.unload_model, "custom_zero_1_float32"
            )
            load_thread.result()
            unload_thread.result()
        self.assertTrue(triton_client.is_server_live())
        self.assertTrue(triton_client.is_server_ready())
        self.assertFalse(triton_client.is_model_ready("ensemble_zero_1_float32"))
        self.assertFalse(triton_client.is_model_ready("custom_zero_1_float32"))
        # Load both models and unload them concurrently
        model_names = ["identity_zero_1_int32", "ensemble_zero_1_float32"]
        for is_load in [True, False]:
            action_fn = (
                triton_client.load_model if is_load else triton_client.unload_model
            )
            with concurrent.futures.ThreadPoolExecutor() as pool:
                threads = []
                for model_name in model_names:
                    threads.append(pool.submit(action_fn, model_name))
                for thread in concurrent.futures.as_completed(threads):
                    thread.result()
            for model_name in model_names:
                self.assertEqual(is_load, triton_client.is_model_ready(model_name))

    # TODO: Consider revisiting this test
    # The goal of this test is only to ensure the server does not crash when
    # bombarded with concurrent load/unload requests for the same model.
    # Some clean-up:
    # 1. Improve core logic so all load/unload requests will always success, so
    #    'load_fail_reasons' and 'unload_fail_reasons' can be removed.
    # 2. Is it still necessary to track the ability to replicate a load while
    #    async unloading?
    # 3. What is the ideal number of threads and iterations, across different
    #    machines, that the server is sufficiently stressed?
    def test_concurrent_same_model_load_unload_stress(self):
        model_name = "identity_zero_1_int32"
        num_threads = 32
        num_iterations = 1024
        try:
            triton_client = grpcclient.InferenceServerClient(
                "localhost:8001", verbose=True
            )
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        load_fail_reasons = [
            "unexpected miss in global map",
            "no version is available",
            "failed to poll from model repository",
        ]
        unload_fail_reasons = ["versions that are still available: 1"]
        load_fail_messages = [
            ("failed to load '" + model_name + "', " + reason)
            for reason in load_fail_reasons
        ]
        unload_fail_messages = [
            ("failed to unload '" + model_name + "', " + reason)
            for reason in unload_fail_reasons
        ]
        global_exception_stats = {}  # { "exception message": number of occurrence }
        load_before_unload_finish = [False]  # use list to access by reference

        def _load_unload():
            exception_stats = {}  # { "exception message": number of occurrence }
            for i in range(num_iterations):
                try:
                    triton_client.load_model(model_name)
                except InferenceServerException as ex:
                    # Acceptable for an unload to happen after a load completes, only
                    # before the load can verify its load state.
                    error_message = ex.message()
                    self.assertIn(error_message, load_fail_messages)
                    if error_message not in exception_stats:
                        exception_stats[error_message] = 0
                    exception_stats[error_message] += 1
                try:
                    triton_client.unload_model(model_name)
                except InferenceServerException as ex:
                    # Acceptable for a load to happen after an unload completes, only
                    # before the unload can verify its unload state.
                    error_message = ex.message()
                    self.assertIn(error_message, unload_fail_messages)
                    if error_message not in exception_stats:
                        exception_stats[error_message] = 0
                    exception_stats[error_message] += 1
                    load_before_unload_finish[0] = True
            return exception_stats

        with concurrent.futures.ThreadPoolExecutor() as pool:
            threads = []
            for i in range(num_threads):
                threads.append(pool.submit(_load_unload))
            for t in threads:
                exception_stats = t.result()
                for key, count in exception_stats.items():
                    if key not in global_exception_stats:
                        global_exception_stats[key] = 0
                    global_exception_stats[key] += count

        self.assertTrue(triton_client.is_server_live())
        self.assertTrue(triton_client.is_server_ready())

        # This test can replicate a load while async unloading on machines with
        # sufficient concurrency. Regardless on whether it is replicated or not,
        # the server must not crash.
        if load_before_unload_finish[0] == False:
            # Track non-replication on test printout via statistics.
            warning_msg = "Cannot replicate a load while async unloading. CPU count: {}. num_threads: {}.".format(
                multiprocessing.cpu_count(), num_threads
            )
            global_exception_stats[warning_msg] = 1

        stats_path = "./test_concurrent_same_model_load_unload_stress.statistics.log"
        with open(stats_path, mode="w", encoding="utf-8") as f:
            f.write(str(global_exception_stats) + "\n")

    def test_concurrent_model_instance_load_speedup(self):
        # Initialize client
        try:
            triton_client = httpclient.InferenceServerClient(
                "localhost:8000", verbose=True
            )
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))
        models = ["identity_fp32"]
        # Create 2 instances which each have a delay time of 10 seconds.
        num_instances = 2
        instance_group = [{"kind": "KIND_CPU", "count": num_instances}]
        config = {"instance_group": instance_group}
        for model in models:
            # Instances should be loaded concurrently for supported backends
            start_time = time.time()
            try:
                triton_client.load_model(model, config=json.dumps(config))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))
            end_time = time.time()
            loading_time = end_time - start_time
            print(f"Time to load {num_instances} instances: {loading_time}")

            # Each of the two models has a minimum loading delay of 10 seconds
            # Speedup is observed when the concurrent loading time < 20 seconds
            # but use a tighter bound of 15 seconds
            self.assertLess(
                loading_time, 15.0, "Concurrent loading speedup not observed"
            )
            # Concurrent loading time cannot be < 10 seconds
            self.assertGreaterEqual(
                loading_time, 10.0, "Invalid concurrent loading time"
            )
            # Make sure the models are loaded
            self.assertTrue(triton_client.is_server_live())
            self.assertTrue(triton_client.is_server_ready())
            self.assertTrue(triton_client.is_model_ready(model))

    def _call_with_timeout(self, callable, timeout_secs):
        # Setup handler for timing out call
        def timeout_handler(sig, frame):
            raise TimeoutError()

        signal.signal(signal.SIGALRM, timeout_handler)
        signal.alarm(timeout_secs)
        result = callable()
        return result

    def _call_with_expected_timeout(self, callable, timeout_secs=3):
        # Call callable with expectation that it will timeout
        try:
            self._call_with_timeout(callable, timeout_secs)
        except TimeoutError:
            print("Inference timed out as expected.")
            return
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))
        else:
            self.assertTrue(False, "unexpected success, call should've timed out.")

    def _get_fp32_io(self, client_type):
        # Config
        input_names = ["INPUT0", "INPUT1"]
        output_names = ["OUTPUT0", "OUTPUT1"]
        dtype, dims, shape = ("TYPE_FP32", [-1, 16], [1, 16])
        input_config = [
            {"name": name, "data_type": dtype, "dims": dims} for name in input_names
        ]
        output_config = [
            {"name": name, "data_type": dtype, "dims": dims} for name in output_names
        ]
        # Inputs
        inputs = []
        for name in input_names:
            inputs.append(
                client_type.InferInput(name, shape, dtype.replace("TYPE_", ""))
            )
            inputs[-1].set_data_from_numpy(np.ones(shape, dtype=np.float32))
        return input_config, output_config, inputs

    def test_concurrent_model_instance_load_sanity(self):
        cpu, gpu = "KIND_CPU", "KIND_GPU"
        default_kinds = [cpu, gpu]
        backend_kinds = {"plan": [gpu], "openvino": [cpu]}
        try:
            client_type = httpclient
            triton_client = client_type.InferenceServerClient(
                "localhost:8000", verbose=True
            )
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        backends = os.environ.get("PARALLEL_BACKENDS", "").split()
        self.assertTrue(len(backends) > 0, "PARALLEL_BACKENDS wasn't set")

        num_instances = 5
        input_config, output_config, inputs = self._get_fp32_io(client_type)
        for backend in backends:
            model = tu.get_model_name(backend, np.float32, np.float32, np.float32)
            kinds = backend_kinds.get(backend, default_kinds)
            for kind in kinds:
                with self.subTest(backend=backend, model=model, kind=kind):
                    # Setup model config
                    instance_group = {"kind": kind, "count": num_instances}
                    # Disable batching to guarantee 1 request per instance
                    # Configure sequence batching such that each instance cannot accept new requests
                    # while it is busy with an ongoing sequence. This way we can guarantee sending 1 request to each instance.
                    max_batch_size = 0
                    sequence_timeout_secs = 10
                    sequence_batching = {
                        "direct": {},
                        "max_sequence_idle_microseconds": sequence_timeout_secs
                        * 1000000,
                    }
                    config = {
                        "instance_group": instance_group,
                        "max_batch_size": max_batch_size,
                        "sequence_batching": sequence_batching,
                        "input": input_config,
                        "output": output_config,
                    }
                    print(
                        f"~~~ Backend: [{backend}], Model: [{model}], Config: [{config}] ~~~"
                    )
                    # Load the model
                    try:
                        triton_client.load_model(model, config=json.dumps(config))
                    except Exception as ex:
                        self.assertTrue(False, "unexpected error {}".format(ex))

                    # Make sure the model is loaded
                    self.assertTrue(triton_client.is_server_live())
                    self.assertTrue(triton_client.is_model_ready(model))
                    print(
                        "Model Repository Index after load:",
                        triton_client.get_model_repository_index(),
                    )

                    # Test inference on each instance
                    for i in range(1, num_instances + 1):
                        try:
                            triton_client.infer(
                                model, inputs, sequence_id=i, sequence_start=True
                            )
                        except Exception as ex:
                            self.assertTrue(
                                False, "unexpected inference error {}".format(ex)
                            )

                    # Each instance should be busy until their sequence times out, so
                    # an additional infer call should time out. If it doesn't time out, something
                    # is wrong and the test should fail.
                    callable = partial(
                        triton_client.infer,
                        model,
                        inputs,
                        sequence_id=num_instances + 1,
                        sequence_start=True,
                    )
                    self._call_with_expected_timeout(callable, timeout_secs=3)

                    # Unload the model
                    try:
                        triton_client.unload_model(model)
                    except Exception as ex:
                        self.assertTrue(False, "unexpected error {}".format(ex))

                    # Allow server to fully unload model before next test iteration
                    num_tries = 10
                    for i in range(num_tries):
                        if triton_client.is_server_ready():
                            break
                        print(
                            f"[Attempt {i}] Server not ready yet, sleeping and retrying. Current repository index: {triton_client.get_model_repository_index()}"
                        )
                        time.sleep(6)
                    print(
                        "Model Repository Index after unload attempts:",
                        triton_client.get_model_repository_index(),
                    )
                    self.assertTrue(triton_client.is_server_ready())

    def test_model_config_overwite(self):
        model_name = "identity_fp32"

        # Make sure version 1 of the model is loaded
        try:
            triton_client = self._get_client()
            self.assertTrue(triton_client.is_server_live())
            self.assertTrue(triton_client.is_server_ready())
            self.assertTrue(triton_client.is_model_ready(model_name, "1"))
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        # Load the model from disk w/o any special configuration settings.
        original_config = triton_client.get_model_config(model_name)

        # The instance_group[0].count is set to 2 instead of the default 1.
        # This enough of a delta to ensure the correct model configuration
        # has been applied to the model.
        override_config = """
{
  "name": "identity_fp32",
  "backend": "identity",
  "instance_group": [
    {
      "count": 2,
      "kind" : "KIND_CPU"
    }
  ]
}
"""

        # Ensure the model has been loaded w/ the expected (different from override) config.
        self.assertTrue(original_config != None and original_config != override_config)

        # Reload the model with the overriding configuration value.
        triton_client.load_model(model_name, config=override_config)

        # Ensure the model has been loaded w/ the expected (override) config.
        updated_config = triton_client.get_model_config(model_name)

        # Reload the model
        triton_client.load_model(model_name)

        # Ensure the model has been loaded w/ the expected (override) config.
        updated_config2 = triton_client.get_model_config(model_name)
        self.assertEqual(updated_config, updated_config2)

        # Touch the local config.pbtxt and reload the file to ensure the local config
        # is preferred because it has a more recent mtime.
        time.sleep(0.1)  # make sure timestamps are different
        Path(os.path.join("models", model_name, "config.pbtxt")).touch()

        # Reload the model
        triton_client.load_model(model_name)

        # Ensure the model has been loaded w/ the expected (local) config.
        updated_config = triton_client.get_model_config(model_name)
        self.assertEqual(original_config, updated_config)

    def test_shutdown_while_background_unloading(self):
        model_name = "identity_fp32"
        triton_client = self._get_client()
        self.assertTrue(triton_client.is_server_live())
        self.assertTrue(triton_client.is_server_ready())
        # Check the Python version of the model is loaded.
        self.assertTrue(triton_client.is_model_ready(model_name, "1"))
        python_model_config = triton_client.get_model_config(model_name)
        self.assertEqual(python_model_config["backend"], "python")
        # Load the Identity version, which will put the Python version into the
        # background and unload it, the unload will take at least 10 seconds.
        override_config = "{\n"
        override_config += '"name": "identity_fp32",\n'
        override_config += '"backend": "identity"\n'
        override_config += "}"
        triton_client.load_model(model_name, config=override_config)
        identity_model_config = triton_client.get_model_config(model_name)
        self.assertEqual(identity_model_config["backend"], "identity")
        # The server will shutdown after this sub-test exits. The server must shutdown
        # without any hang or runtime error.

    def test_shutdown_while_loading(self):
        triton_client = self._get_client()
        self.assertTrue(triton_client.is_server_live())
        self.assertTrue(triton_client.is_server_ready())
        # Load the model which will load for at least 10 seconds.
        model_name = "identity_fp32"
        with concurrent.futures.ThreadPoolExecutor() as pool:
            pool.submit(triton_client.load_model, model_name)
        self.assertFalse(triton_client.is_model_ready(model_name))
        # The server will shutdown after this sub-test exits. The server must shutdown
        # without any hang or runtime error.

    def test_shutdown_with_live_connection(self):
        model_name = "add_sub"
        model_shape = (16,)
        from geventhttpclient.response import HTTPConnectionClosed

        input_data = np.ones(shape=model_shape, dtype=np.float32)
        inputs = [
            httpclient.InferInput("INPUT0", model_shape, "FP32"),
            httpclient.InferInput("INPUT1", model_shape, "FP32"),
        ]
        inputs[0].set_data_from_numpy(input_data)
        inputs[1].set_data_from_numpy(input_data)

        # start connection
        conn = httpclient.InferenceServerClient("localhost:8000", verbose=True)
        conn.infer(model_name, inputs)

        # shutdown the server
        os.kill(int(os.environ["SERVER_PID"]), signal.SIGINT)
        time.sleep(2)

        # connection should still work
        conn.infer(model_name, inputs)

        # close connection
        conn.close()
        time.sleep(3)

        # check exit timeout countdown did not restart
        with open(os.environ["SERVER_LOG"]) as f:
            server_log = f.read()
        self.assertIn(
            "Waiting for in-flight requests to complete.",
            server_log,
            "precondition not met - core shutdown did not begin",
        )
        self.assertEqual(
            server_log.count("Timeout 30: "),
            1,
            "exit timeout countdown restart detected",
        )

    def test_add_custom_config(self):
        models_base = ("libtorch",)
        models = list()
        for m in models_base:
            models.append(tu.get_model_name(m, np.float32, np.float32, np.float32))

        # Make sure libtorch and plan are in the status
        for model_name in models:
            try:
                for triton_client in (
                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
                ):
                    self.assertTrue(triton_client.is_server_live())
                    self.assertTrue(triton_client.is_server_ready())
                    self.assertTrue(triton_client.is_model_ready(model_name, "1"))
                    self.assertFalse(triton_client.is_model_ready(model_name, "2"))
                    self.assertTrue(triton_client.is_model_ready(model_name, "3"))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

        # Add custom model configuration, which cause model to be
        # re-loaded and use custom config inside configs folder, which
        # means that version policy will change and only version 2 will
        # be available.
        for base_name, model_name in zip(models_base, models):
            shutil.copyfile(
                "config.pbtxt.custom." + base_name,
                "models/" + model_name + "/configs/custom.pbtxt",
            )

        time.sleep(5)  # wait for models to reload
        for model_name in models:
            try:
                for triton_client in (
                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
                ):
                    self.assertTrue(triton_client.is_server_live())
                    self.assertTrue(triton_client.is_server_ready())
                    self.assertFalse(triton_client.is_model_ready(model_name, "1"))
                    self.assertTrue(triton_client.is_model_ready(model_name, "2"))
                    self.assertFalse(triton_client.is_model_ready(model_name, "3"))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_delete_custom_config(self):
        models_base = ("libtorch",)
        models = list()
        for m in models_base:
            models.append(tu.get_model_name(m, np.float32, np.float32, np.float32))

        # Make sure libtorch and plan are in the status
        for model_name in models:
            try:
                for triton_client in (
                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
                ):
                    self.assertTrue(triton_client.is_server_live())
                    self.assertTrue(triton_client.is_server_ready())
                    self.assertFalse(triton_client.is_model_ready(model_name, "1"))
                    self.assertTrue(triton_client.is_model_ready(model_name, "2"))
                    self.assertFalse(triton_client.is_model_ready(model_name, "3"))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

        # Delete custom model configuration, which cause model to be
        # re-loaded and use default config, which means that version
        # policy will be changed and so only version 1, 3 will be available
        for model_name in models:
            os.remove("models/" + model_name + "/configs/custom.pbtxt")

        time.sleep(5)  # wait for models to reload
        for model_name in models:
            try:
                for triton_client in (
                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
                ):
                    self.assertTrue(triton_client.is_server_live())
                    self.assertTrue(triton_client.is_server_ready())
                    self.assertTrue(triton_client.is_model_ready(model_name, "1"))
                    self.assertFalse(triton_client.is_model_ready(model_name, "2"))
                    self.assertTrue(triton_client.is_model_ready(model_name, "3"))
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_load_new_model_version(self):
        model_name = "identity_fp32"
        client = self._get_client(use_grpc=True)

        # version 1 and 2 are already loaded
        # version 3 is in the model directory but not loaded
        # version 4 does not exist anywhere
        self.assertTrue(client.is_model_ready(model_name, "1"))
        self.assertTrue(client.is_model_ready(model_name, "2"))
        self.assertFalse(client.is_model_ready(model_name, "3"))
        self.assertFalse(client.is_model_ready(model_name, "4"))
        with open(os.environ["SERVER_LOG"]) as f:
            server_log = f.read()
        self.assertEqual(server_log.count("[PB model] Loading version 1"), 1)
        self.assertEqual(server_log.count("[PB model] Loading version 2"), 1)
        self.assertEqual(server_log.count("[PB model] Loading version 3"), 0)
        self.assertEqual(server_log.count("[PB model] Loading version 4"), 0)
        self.assertEqual(server_log.count("successfully loaded 'identity_fp32'"), 1)

        # update version 2 model file
        Path(os.path.join("models", model_name, "2", "model.py")).touch()
        # add version 4 model file
        src_path = os.path.join("models", model_name, "3")
        dst_path = os.path.join("models", model_name, "4")
        shutil.copytree(src_path, dst_path)
        # update model config to load version 1 to 4
        config_path = os.path.join("models", model_name, "config.pbtxt")
        with open(config_path, mode="r+", encoding="utf-8", errors="strict") as f:
            config = f.read()
            config = config.replace(
                "version_policy: { specific: { versions: [1, 2] } }",
                "version_policy: { specific: { versions: [1, 2, 3, 4] } }",
            )
            f.truncate(0)
            f.seek(0)
            f.write(config)
        # make sure the disk operation is done before reloading
        time.sleep(0.1)
        # reload the model
        client.load_model(model_name)

        # version 1 is unmodified so it should not be reloaded
        # version 2 is modified so it should be reloaded
        # version 3 model file existed but not loaded so it should be loaded
        # version 4 is a new version so it should be loaded
        self.assertTrue(client.is_model_ready(model_name, "1"))
        self.assertTrue(client.is_model_ready(model_name, "2"))
        self.assertTrue(client.is_model_ready(model_name, "3"))
        self.assertTrue(client.is_model_ready(model_name, "4"))
        with open(os.environ["SERVER_LOG"]) as f:
            server_log = f.read()
        self.assertEqual(server_log.count("[PB model] Loading version 1"), 1)
        self.assertEqual(server_log.count("[PB model] Loading version 2"), 2)
        self.assertEqual(server_log.count("[PB model] Loading version 3"), 1)
        self.assertEqual(server_log.count("[PB model] Loading version 4"), 1)
        self.assertEqual(server_log.count("successfully loaded 'identity_fp32'"), 2)

        # simulate a dependency change to all versions
        Path(os.path.join("models", model_name, "dummy_dependency.py")).touch()
        # make sure the disk operation is done before reloading
        time.sleep(0.1)
        # reload the model
        client.load_model(model_name)

        # all 4 versions should be reloaded
        self.assertTrue(client.is_model_ready(model_name, "1"))
        self.assertTrue(client.is_model_ready(model_name, "2"))
        self.assertTrue(client.is_model_ready(model_name, "3"))
        self.assertTrue(client.is_model_ready(model_name, "4"))
        with open(os.environ["SERVER_LOG"]) as f:
            server_log = f.read()
        self.assertEqual(server_log.count("[PB model] Loading version 1"), 2)
        self.assertEqual(server_log.count("[PB model] Loading version 2"), 3)
        self.assertEqual(server_log.count("[PB model] Loading version 3"), 2)
        self.assertEqual(server_log.count("[PB model] Loading version 4"), 2)
        self.assertEqual(server_log.count("successfully loaded 'identity_fp32'"), 3)

        # update model config to only load version 4
        config_path = os.path.join("models", model_name, "config.pbtxt")
        with open(config_path, mode="r+", encoding="utf-8", errors="strict") as f:
            config = f.read()
            config = config.replace(
                "version_policy: { specific: { versions: [1, 2, 3, 4] } }",
                "version_policy: { specific: { versions: [4] } }",
            )
            f.truncate(0)
            f.seek(0)
            f.write(config)
        # make sure the disk operation is done before reloading
        time.sleep(0.1)
        # reload the model
        client.load_model(model_name)

        # only version 4 should be available and no reloads should happen
        self.assertFalse(client.is_model_ready(model_name, "1"))
        self.assertFalse(client.is_model_ready(model_name, "2"))
        self.assertFalse(client.is_model_ready(model_name, "3"))
        self.assertTrue(client.is_model_ready(model_name, "4"))
        with open(os.environ["SERVER_LOG"]) as f:
            server_log = f.read()
        self.assertEqual(server_log.count("[PB model] Loading version 1"), 2)
        self.assertEqual(server_log.count("[PB model] Loading version 2"), 3)
        self.assertEqual(server_log.count("[PB model] Loading version 3"), 2)
        self.assertEqual(server_log.count("[PB model] Loading version 4"), 2)
        self.assertEqual(server_log.count("successfully loaded 'identity_fp32'"), 4)

        # update model config to load version 1 and 4
        config_path = os.path.join("models", model_name, "config.pbtxt")
        with open(config_path, mode="r+", encoding="utf-8", errors="strict") as f:
            config = f.read()
            config = config.replace(
                "version_policy: { specific: { versions: [4] } }",
                "version_policy: { specific: { versions: [1, 4] } }",
            )
            f.truncate(0)
            f.seek(0)
            f.write(config)
        # make sure the disk operation is done before reloading
        time.sleep(0.1)
        # reload the model
        client.load_model(model_name)

        # version 1 should be loaded and version 4 should not be reloaded
        self.assertTrue(client.is_model_ready(model_name, "1"))
        self.assertFalse(client.is_model_ready(model_name, "2"))
        self.assertFalse(client.is_model_ready(model_name, "3"))
        self.assertTrue(client.is_model_ready(model_name, "4"))
        with open(os.environ["SERVER_LOG"]) as f:
            server_log = f.read()
        self.assertEqual(server_log.count("[PB model] Loading version 1"), 3)
        self.assertEqual(server_log.count("[PB model] Loading version 2"), 3)
        self.assertEqual(server_log.count("[PB model] Loading version 3"), 2)
        self.assertEqual(server_log.count("[PB model] Loading version 4"), 2)
        self.assertEqual(server_log.count("successfully loaded 'identity_fp32'"), 5)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_lifecycle/retry_model/1/model.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json
import os

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}

        auto_complete_model_config.set_max_batch_size(0)
        auto_complete_model_config.add_input(input0)
        auto_complete_model_config.add_input(input1)
        auto_complete_model_config.add_output(output0)
        auto_complete_model_config.add_output(output1)

        return auto_complete_model_config

    def initialize(self, args):
        # Check if an special file has been created in the version directory,
        # The existence is the indicator of whether the model load has been
        # retried (model control mode should NOT be POLL to avoid re-load).
        model_path = os.path.join(args["model_repository"], args["model_version"])
        self.indicator_file = os.path.join(model_path, "indicator")
        if not os.path.exists(self.indicator_file):
            with open(self.indicator_file, "x") as f:
                pass
            raise Exception("failing first load attempt")

        self.model_config = model_config = json.loads(args["model_config"])

        output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")
        output1_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT1")

        self.output0_dtype = pb_utils.triton_string_to_numpy(
            output0_config["data_type"]
        )
        self.output1_dtype = pb_utils.triton_string_to_numpy(
            output1_config["data_type"]
        )

    def finalize(self):
        # Clean up the file on successful load (after first attempt)
        os.remove(self.indicator_file)

    def execute(self, requests):
        # This model is for testing loading behavior only
        # and is not intended to be executed
        pass


================================================
FILE: qa/L0_lifecycle/test.sh
================================================
#!/bin/bash
# Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

CLIENT_LOG="./client.log"
DATADIR=/data/inferenceserver/${REPO_VERSION}
LC_TEST=lifecycle_test.py
SLEEP_TIME=10
SERVER=/opt/tritonserver/bin/tritonserver
TEST_RESULT_FILE='test_results.txt'
source ../common/util.sh

function check_unit_test() {
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Failed\n***"
        RET=1
    else
        check_test_results $TEST_RESULT_FILE 1
        if [ $? -ne 0 ]; then
            cat $CLIENT_LOG
            echo -e "\n***\n*** Test Result Verification Failed\n***"
            RET=1
        fi
    fi
}

RET=0
rm -fr *.log

LOG_IDX=0

if [ `ps | grep -c "tritonserver"` != "0" ]; then
    echo -e "Tritonserver already running"
    echo -e `ps | grep tritonserver`
    exit 1
fi

# LifeCycleTest.test_parse_error_noexit_strict
SERVER_ARGS="--model-repository=/tmp/xyzx --strict-readiness=true \
             --exit-on-error=false"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server_nowait
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi
sleep $SLEEP_TIME

rm -f $CLIENT_LOG
set +e
python $LC_TEST LifeCycleTest.test_parse_error_noexit >>$CLIENT_LOG 2>&1
check_unit_test
set -e

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_parse_error_noexit
SERVER_ARGS="--model-repository=/tmp/xyzx --strict-readiness=false \
             --exit-on-error=false"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server_nowait
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi
sleep $SLEEP_TIME


rm -f $CLIENT_LOG
set +e
python $LC_TEST LifeCycleTest.test_parse_error_noexit >>$CLIENT_LOG 2>&1
check_unit_test
set -e

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_parse_error_noexit_strict (multiple model repositories)
rm -rf models
mkdir models
SERVER_ARGS="--model-repository=/tmp/xyzx --model-repository=`pwd`/models \
             --strict-readiness=true --exit-on-error=false"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server_nowait
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi
sleep $SLEEP_TIME


rm -f $CLIENT_LOG
set +e
python $LC_TEST LifeCycleTest.test_parse_error_noexit >>$CLIENT_LOG 2>&1
check_unit_test
set -e

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_parse_error_noexit (multiple model repositories)
rm -rf models
mkdir models
SERVER_ARGS="--model-repository=`pwd`/models --model-repository=/tmp/xyzx \
             --strict-readiness=false --exit-on-error=false"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server_nowait
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi
sleep $SLEEP_TIME


rm -f $CLIENT_LOG
set +e
python $LC_TEST LifeCycleTest.test_parse_error_noexit >>$CLIENT_LOG 2>&1
check_unit_test
set -e

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# GRPC Port Collision Test
rm -rf models
mkdir models
SERVER_ARGS="--model-repository=`pwd`/models"
SERVER_LOG="./stub_inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi
SAVED_SERVER_PID=$SERVER_PID
SERVER_ARGS="--model-repository=`pwd`/models --http-port 8003 --metrics-port 8004"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
sleep $SLEEP_TIME
# check server log for the warning messages
if [ `grep -c "failed to start GRPC service: Unavailable - Socket '0.0.0.0:8001' already in use" $SERVER_LOG` != "1" ]; then
    echo -e "\n***\n*** Server log ${SERVER_LOG} did not report GRPC port collision\n***"
    echo -e "\n***\n*** Test Failed\n***"
    kill $SERVER_PID
    wait $SERVER_PID
    RET=1
fi

SERVER_PID=$SAVED_SERVER_PID
kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# HTTP Port Collision Test
rm -rf models
mkdir models
SERVER_ARGS="--model-repository=`pwd`/models"
SERVER_LOG="./stub_inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi
SAVED_SERVER_PID=$SERVER_PID
SERVER_ARGS="--model-repository=`pwd`/models --grpc-port 8003 --metrics-port 8004"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
sleep $SLEEP_TIME
# check server log for the warning messages
if [ `grep -c "failed to start HTTP service: Unavailable - Socket '0.0.0.0:8000' already in use" $SERVER_LOG` != "1" ]; then
    echo -e "\n***\n*** Server log ${SERVER_LOG} did not report HTTP port collision\n***"
    echo -e "\n***\n*** Test Failed\n***"
    kill $SERVER_PID
    wait $SERVER_PID
    RET=1
fi

SERVER_PID=$SAVED_SERVER_PID

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# Metrics Port Collision Test
rm -rf models
mkdir models
SERVER_ARGS="--model-repository=`pwd`/models"
SERVER_LOG="./stub_inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi
SAVED_SERVER_PID=$SERVER_PID
SERVER_ARGS="--model-repository=`pwd`/models --grpc-port 8003 --http-port 8004"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
sleep $SLEEP_TIME
# check server log for the warning messages
if [ `grep -c "failed to start Metrics service: Unavailable - Socket '0.0.0.0:8002' already in use" $SERVER_LOG` != "1" ]; then
    echo -e "\n***\n*** Server log ${SERVER_LOG} did not report metrics port collision\n***"
    echo -e "\n***\n*** Test Failed\n***"
    kill $SERVER_PID
    wait $SERVER_PID
    RET=1
fi

SERVER_PID=$SAVED_SERVER_PID

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# Multiple Port Collisions Test
rm -rf models
mkdir models
SERVER_ARGS="--model-repository=`pwd`/models"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi
SAVED_SERVER_PID=$SERVER_PID
run_server
sleep $SLEEP_TIME
# check server log for the warning messages
if [ `grep -c "failed to start.*service: Unavailable - Socket '.*' already in use" $SERVER_LOG` == "0" ]; then
    echo -e "\n***\n*** Server log ${SERVER_LOG} did not report port collision\n***"
    echo -e "\n***\n*** Test Failed\n***"
    kill $SERVER_PID
    wait $SERVER_PID
    RET=1
fi

SERVER_PID=$SAVED_SERVER_PID

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# No Port Collision Test
rm -rf models
mkdir models
SERVER_ARGS="--model-repository=`pwd`/models"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

LOG_IDX=$((LOG_IDX+1))
SERVER_LOG="./inference_server_$LOG_IDX.log"

SAVED_SERVER_PID=$SERVER_PID
SERVER_ARGS="--model-repository=`pwd`/models --grpc-port 8003 --http-port 8004 --metrics-port 8005"
run_server
sleep $SLEEP_TIME
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

kill $SERVER_PID
wait $SERVER_PID
kill $SAVED_SERVER_PID
wait $SAVED_SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_parse_error_modelfail
rm -fr models models_0
mkdir models models_0
for i in openvino libtorch ; do
    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
    if [ $i == "openvino" ]; then
        echo 'parameters { key: "ENABLE_BATCH_PADDING" value { string_value: "YES" } }' >> models/openvino_float32_float32_float32/config.pbtxt
    fi
done
for i in onnx plan ; do
    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models_0/.
done
# Change the model files so that multiple versions will be loaded, and one of
# the versions will fail to load and cause all other versions to be unloaded.
rm models/libtorch_float32_float32_float32/3/*

SERVER_ARGS="--model-repository=`pwd`/models --model-repository=`pwd`/models_0 \
             --exit-on-error=false --exit-timeout-secs=5"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server_tolive
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

# give plenty of time for model to load (and fail to load)
wait_for_model_stable $SERVER_TIMEOUT

rm -f $CLIENT_LOG
set +e
python $LC_TEST LifeCycleTest.test_parse_error_modelfail >>$CLIENT_LOG 2>&1
check_unit_test
set -e

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_parse_error_modelfail_nostrict
SERVER_ARGS="--model-repository=`pwd`/models --model-repository=`pwd`/models_0 \
             --exit-on-error=false --exit-timeout-secs=5 --strict-readiness=false"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server_tolive
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

# give plenty of time for model to load (and fail to load)
wait_for_model_stable $SERVER_TIMEOUT

rm -f $CLIENT_LOG
set +e
python $LC_TEST LifeCycleTest.test_parse_error_modelfail_nostrict >>$CLIENT_LOG 2>&1
check_unit_test
set -e

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_parse_error_no_model_config
rm -fr models models_0
mkdir models models_0
for i in openvino libtorch ; do
    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
done
for i in onnx plan ; do
    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models_0/.
done
rm models/openvino_float32_float32_float32/config.pbtxt

# Autocomplete should not be turned on for this test because it asserts an error was logged
# when in strict model configuration mode.
SERVER_ARGS="--model-repository=`pwd`/models --model-repository=`pwd`/models_0 \
             --exit-on-error=false --exit-timeout-secs=5 --strict-model-config=true"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server_tolive
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

# give plenty of time for model to load (and fail to load)
wait_for_model_stable $SERVER_TIMEOUT

rm -f $CLIENT_LOG
set +e
python $LC_TEST LifeCycleTest.test_parse_error_no_model_config >>$CLIENT_LOG 2>&1
check_unit_test
set -e

kill $SERVER_PID
wait $SERVER_PID

# check server log for the warning messages
if [ `grep -c "failed to open text file for read" $SERVER_LOG` == "0" ] || [ `grep -c "openvino_float32_float32_float32/config.pbtxt: No such file or directory" $SERVER_LOG` == "0" ]; then
    echo -e "\n***\n*** Server log ${SERVER_LOG} did not print model load failure\n***"
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_init_error_modelfail
rm -fr models models_0
mkdir models models_0
cp -r $DATADIR/qa_sequence_model_repository/onnx_sequence_int32 models/.
cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 models_0/.
sed -i "s/OUTPUT/_OUTPUT/" models/onnx_sequence_int32/config.pbtxt
sed -i "s/OUTPUT/_OUTPUT/" models_0/onnx_int32_int32_int32/config.pbtxt
for i in openvino libtorch; do
    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
    if [ $i == "openvino" ]; then
        echo 'parameters { key: "ENABLE_BATCH_PADDING" value { string_value: "YES" } }' >> models/openvino_float32_float32_float32/config.pbtxt
    fi
done
for i in onnx ; do
    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models_0/.
done

SERVER_ARGS="--model-repository=`pwd`/models --model-repository=`pwd`/models_0 \
             --exit-on-error=false --exit-timeout-secs=5"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server_tolive
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

# give plenty of time for model to load (and fail to load)
wait_for_model_stable $SERVER_TIMEOUT

rm -f $CLIENT_LOG
set +e
python $LC_TEST LifeCycleTest.test_init_error_modelfail >>$CLIENT_LOG 2>&1
check_unit_test
set -e

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_parse_error_model_no_version
rm -fr models
mkdir models
for i in libtorch onnx plan ; do
    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
done
mkdir -p models/openvino_float32_float32_float32
cp $DATADIR/qa_model_repository/openvino_float32_float32_float32/config.pbtxt \
    models/openvino_float32_float32_float32/.

SERVER_ARGS="--model-repository=`pwd`/models --exit-on-error=false \
             --exit-timeout-secs=5"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server_tolive
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

# give plenty of time for model to load (and fail to load)
wait_for_model_stable $SERVER_TIMEOUT

rm -f $CLIENT_LOG
set +e
python $LC_TEST LifeCycleTest.test_parse_error_model_no_version >>$CLIENT_LOG 2>&1
check_unit_test
set -e

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_parse_ignore_zero_prefixed_version
rm -fr models
mkdir models
for i in libtorch ; do
    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
    mv models/${i}_float32_float32_float32/3 models/${i}_float32_float32_float32/003
done

SERVER_ARGS="--model-repository=`pwd`/models --exit-on-error=false \
             --exit-timeout-secs=5"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

rm -f $CLIENT_LOG
set +e
python $LC_TEST LifeCycleTest.test_parse_ignore_zero_prefixed_version >>$CLIENT_LOG 2>&1
check_unit_test
set -e

kill $SERVER_PID
wait $SERVER_PID

# check server log for the warning messages
if [ `grep -c "ignore version directory '003' which contains leading zeros in its directory name" $SERVER_LOG` == "0" ]; then
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_parse_ignore_non_intergral_version
rm -fr models
mkdir models
for i in libtorch ; do
    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
    mv models/${i}_float32_float32_float32/3 models/${i}_float32_float32_float32/abc
done

SERVER_ARGS="--model-repository=`pwd`/models --exit-on-error=false \
             --exit-timeout-secs=5"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

rm -f $CLIENT_LOG
set +e
python $LC_TEST LifeCycleTest.test_parse_ignore_non_intergral_version >>$CLIENT_LOG 2>&1
check_unit_test
set -e

kill $SERVER_PID
wait $SERVER_PID

# check server log for the warning messages
if [ `grep -c "ignore version directory 'abc' which fails to convert to integral number" $SERVER_LOG` == "0" ]; then
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_dynamic_model_load_unload
rm -fr models libtorch_float32_float32_float32
mkdir models
for i in openvino onnx plan ; do
    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
done
cp -r $DATADIR/qa_model_repository/libtorch_float32_float32_float32 .

SERVER_ARGS="--model-repository=`pwd`/models --repository-poll-secs=1 \
             --model-control-mode=poll --exit-timeout-secs=5"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

rm -f $CLIENT_LOG
set +e
python $LC_TEST LifeCycleTest.test_dynamic_model_load_unload >>$CLIENT_LOG 2>&1
check_unit_test
set -e

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_dynamic_model_load_unload_disabled
rm -fr models libtorch_float32_float32_float32
mkdir models
for i in openvino onnx plan; do
    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
done
cp -r $DATADIR/qa_model_repository/libtorch_float32_float32_float32 .

SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=none \
             --exit-timeout-secs=5"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

rm -f $CLIENT_LOG
set +e
python $LC_TEST LifeCycleTest.test_dynamic_model_load_unload_disabled >>$CLIENT_LOG 2>&1
check_unit_test
set -e

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_dynamic_version_load_unload
rm -fr models
mkdir models
for i in libtorch ; do
    cp -r $DATADIR/qa_model_repository/${i}_int32_int32_int32 models/.
done

SERVER_ARGS="--model-repository=`pwd`/models --repository-poll-secs=1 \
             --model-control-mode=poll --exit-timeout-secs=5"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

rm -f $CLIENT_LOG
set +e
python $LC_TEST LifeCycleTest.test_dynamic_version_load_unload >>$CLIENT_LOG 2>&1
check_unit_test
set -e

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_dynamic_version_load_unload_disabled
rm -fr models
mkdir models
for i in libtorch ; do
    cp -r $DATADIR/qa_model_repository/${i}_int32_int32_int32 models/.
done

# Show model control mode will override deprecated model control options
SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=none \
             --exit-timeout-secs=5"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

rm -f $CLIENT_LOG
set +e
python $LC_TEST LifeCycleTest.test_dynamic_version_load_unload_disabled >>$CLIENT_LOG 2>&1
check_unit_test
set -e

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_dynamic_model_modify
rm -fr models config.pbtxt.*
mkdir models
for i in libtorch plan ; do
    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
    sed '/^version_policy/d' \
        $DATADIR/qa_model_repository/${i}_float32_float32_float32/config.pbtxt > config.pbtxt.${i}
    sed 's/output0_labels/wrong_output0_labels/' \
        $DATADIR/qa_model_repository/${i}_float32_float32_float32/config.pbtxt > config.pbtxt.wrong.${i}
    sed 's/label/label9/' \
        $DATADIR/qa_model_repository/${i}_float32_float32_float32/output0_labels.txt > \
        models/${i}_float32_float32_float32/wrong_output0_labels.txt
done

SERVER_ARGS="--model-repository=`pwd`/models --repository-poll-secs=1 \
             --model-control-mode=poll --exit-timeout-secs=5"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

rm -f $CLIENT_LOG
set +e
python $LC_TEST LifeCycleTest.test_dynamic_model_modify >>$CLIENT_LOG 2>&1
check_unit_test
set -e

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_dynamic_file_delete
rm -fr models config.pbtxt.*
mkdir models
for i in onnx plan; do
    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
done

SERVER_ARGS="--model-repository=`pwd`/models --repository-poll-secs=1 \
             --model-control-mode=poll --exit-timeout-secs=5 --strict-model-config=false"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

rm -f $CLIENT_LOG
set +e
python $LC_TEST LifeCycleTest.test_dynamic_file_delete >>$CLIENT_LOG 2>&1
check_unit_test
set -e

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_multiple_model_repository_polling
rm -fr models models_0 libtorch_float32_float32_float32
mkdir models models_0
for i in openvino ; do
    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
    echo 'parameters { key: "ENABLE_BATCH_PADDING" value { string_value: "YES" } }' >> models/openvino_float32_float32_float32/config.pbtxt
done
for i in onnx ; do
    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models_0/.
done
cp -r $DATADIR/qa_model_repository/libtorch_float32_float32_float32 .
cp -r $DATADIR/qa_model_repository/libtorch_float32_float32_float32 models/. && \
    rm -rf models/libtorch_float32_float32_float32/3

SERVER_ARGS="--model-repository=`pwd`/models --model-repository=`pwd`/models_0 \
             --model-control-mode=poll --repository-poll-secs=1 --exit-timeout-secs=5"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

rm -f $CLIENT_LOG
set +e
python $LC_TEST LifeCycleTest.test_multiple_model_repository_polling >>$CLIENT_LOG 2>&1
check_unit_test
set -e

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_multiple_model_repository_control
rm -fr models models_0 libtorch_float32_float32_float32
mkdir models models_0
for i in openvino ; do
    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
    echo 'parameters { key: "ENABLE_BATCH_PADDING" value { string_value: "YES" } }' >> models/openvino_float32_float32_float32/config.pbtxt
done
for i in onnx ; do
    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models_0/.
done
cp -r $DATADIR/qa_model_repository/libtorch_float32_float32_float32 .
cp -r $DATADIR/qa_model_repository/libtorch_float32_float32_float32 models/. && \
    rm -rf models/libtorch_float32_float32_float32/3

# Show model control mode will override deprecated model control options
SERVER_ARGS="--model-repository=`pwd`/models --model-repository=`pwd`/models_0 \
             --model-control-mode=explicit \
             --exit-timeout-secs=5"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

rm -f $CLIENT_LOG
set +e
python $LC_TEST LifeCycleTest.test_multiple_model_repository_control >>$CLIENT_LOG 2>&1
check_unit_test
set -e

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_model_control
rm -fr models config.pbtxt.*
mkdir models
for i in onnx ; do
    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
    cp -r $DATADIR/qa_ensemble_model_repository/qa_model_repository/simple_${i}_float32_float32_float32 models/.
    sed -i "s/max_batch_size:.*/max_batch_size: 1/" models/${i}_float32_float32_float32/config.pbtxt
    sed -i "s/max_batch_size:.*/max_batch_size: 1/" models/simple_${i}_float32_float32_float32/config.pbtxt
done

SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit \
             --exit-timeout-secs=5 --strict-model-config=false
             --strict-readiness=false"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

rm -f $CLIENT_LOG
set +e
python $LC_TEST LifeCycleTest.test_model_control >>$CLIENT_LOG 2>&1
check_unit_test
set -e

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_model_control_fail
rm -fr models config.pbtxt.*
mkdir models
for i in onnx ; do
    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
    # Remove all model files so the model will fail to load
    rm models/${i}_float32_float32_float32/*/*
    sed -i "s/max_batch_size:.*/max_batch_size: 1/" models/${i}_float32_float32_float32/config.pbtxt
done

SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit \
             --exit-timeout-secs=5 --strict-model-config=false
             --strict-readiness=false"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

rm -f $CLIENT_LOG
set +e
python $LC_TEST LifeCycleTest.test_model_control_fail >>$CLIENT_LOG 2>&1
check_unit_test
set -e

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_model_control_ensemble
rm -fr models config.pbtxt.*
mkdir models
for i in onnx ; do
    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
    cp -r $DATADIR/qa_ensemble_model_repository/qa_model_repository/simple_${i}_float32_float32_float32 models/.
    sed -i "s/max_batch_size:.*/max_batch_size: 1/" models/${i}_float32_float32_float32/config.pbtxt
    sed -i "s/max_batch_size:.*/max_batch_size: 1/" models/simple_${i}_float32_float32_float32/config.pbtxt
done

SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit \
             --exit-timeout-secs=5 --strict-model-config=false
             --strict-readiness=false"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

rm -f $CLIENT_LOG
set +e
python $LC_TEST LifeCycleTest.test_model_control_ensemble >>$CLIENT_LOG 2>&1
check_unit_test
set -e

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_multiple_model_repository_control_startup_models
rm -fr models models_0 config.pbtxt.*
mkdir models models_0
# Ensemble models in the second repository
for i in plan onnx ; do
    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
    cp -r $DATADIR/qa_ensemble_model_repository/qa_model_repository/simple_${i}_float32_float32_float32 models_0/.
    sed -i "s/max_batch_size:.*/max_batch_size: 1/" models/${i}_float32_float32_float32/config.pbtxt
    sed -i "s/max_batch_size:.*/max_batch_size: 1/" models_0/simple_${i}_float32_float32_float32/config.pbtxt
done

# libtorch doesn't load because it is duplicated in 2 repositories
for i in libtorch ; do
    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models_0/.
done

SERVER_ARGS="--model-repository=`pwd`/models --model-repository=`pwd`/models_0 \
             --model-control-mode=explicit \
             --strict-readiness=false \
             --strict-model-config=false --exit-on-error=false \
             --load-model=libtorch_float32_float32_float32 \
             --load-model=plan_float32_float32_float32 \
             --load-model=simple_onnx_float32_float32_float32"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

rm -f $CLIENT_LOG
set +e
python $LC_TEST LifeCycleTest.test_multiple_model_repository_control_startup_models >>$CLIENT_LOG 2>&1
check_unit_test
set -e

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# Test loading all models on startup in EXPLICIT model control mode, re-use
# existing LifeCycleTest.test_multiple_model_repository_control_startup_models
# unit test
rm -fr models models_0 config.pbtxt.*
mkdir models models_0
# Ensemble models in the second repository
for i in plan onnx ; do
    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
    cp -r $DATADIR/qa_ensemble_model_repository/qa_model_repository/simple_${i}_float32_float32_float32 models_0/.
    sed -i "s/max_batch_size:.*/max_batch_size: 1/" models/${i}_float32_float32_float32/config.pbtxt
    sed -i "s/max_batch_size:.*/max_batch_size: 1/" models_0/simple_${i}_float32_float32_float32/config.pbtxt
done

# libtorch doesn't load because it is duplicated in 2 repositories
for i in libtorch ; do
    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models_0/.
done

SERVER_ARGS="--model-repository=`pwd`/models --model-repository=`pwd`/models_0 \
             --model-control-mode=explicit \
             --strict-readiness=false \
             --strict-model-config=false --exit-on-error=false \
             --load-model=*"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

rm -f $CLIENT_LOG
set +e
python $LC_TEST LifeCycleTest.test_multiple_model_repository_control_startup_models >>$CLIENT_LOG 2>&1
check_unit_test
set -e

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# Test loading all models on startup in EXPLICIT model control mode AND
# an additional --load-model argument, it should fail
rm -fr models
mkdir models
for i in onnx ; do
    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
    sed -i "s/max_batch_size:.*/max_batch_size: 1/" models/${i}_float32_float32_float32/config.pbtxt
done

# --load-model=* can not be used with any other --load-model arguments
# as it's unclear what the user's intentions are.
SERVER_ARGS="--model-repository=`pwd`/models --model-repository=`pwd`/models_0 \
             --model-control-mode=explicit \
             --strict-readiness=true \
             --exit-on-error=true \
             --load-model=* \
             --load-model=onnx_float32_float32_float32"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" != "0" ]; then
    echo -e "\n***\n*** Failed: $SERVER started successfully when it was expected to fail\n***"
    cat $SERVER_LOG
    RET=1

    kill $SERVER_PID
    wait $SERVER_PID
fi

LOG_IDX=$((LOG_IDX+1))

# Test loading a startup model that doesn't exist, it should fail
rm -fr models && mkdir models
INVALID_MODEL="does-not-exist"
SERVER_ARGS="--model-repository=`pwd`/models \
             --model-control-mode=explicit \
             --strict-readiness=true \
             --exit-on-error=true \
             --load-model=${INVALID_MODEL}"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" != "0" ]; then
    echo -e "\n***\n*** Failed: $SERVER started successfully when it was expected to fail\n***"
    echo -e "ERROR: Startup model [${INVALID_MODEL}] should have failed to load."
    cat $SERVER_LOG
    RET=1

    kill $SERVER_PID
    wait $SERVER_PID
fi
# check server log for the error messages to make sure they're printed
if [ `grep -c "model not found in any model repository" $SERVER_LOG` == "0" ]; then
    echo -e "\n***\n*** Server log ${SERVER_LOG} did not print model load failure for non-existent model\n***"
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_model_repository_index
rm -fr models models_0 config.pbtxt.*
mkdir models models_0
# Ensemble models in the second repository
for i in plan libtorch ; do
    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
    cp -r $DATADIR/qa_ensemble_model_repository/qa_model_repository/simple_${i}_float32_float32_float32 models_0/.
done

# onnx doesn't load because it is duplicated in 2 repositories
for i in onnx ; do
    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models_0/.
done

SERVER_ARGS="--model-repository=`pwd`/models --model-repository=`pwd`/models_0 \
             --model-control-mode=explicit \
             --strict-readiness=false \
             --strict-model-config=false --exit-on-error=false \
             --load-model=onnx_float32_float32_float32 \
             --load-model=plan_float32_float32_float32 \
             --load-model=simple_libtorch_float32_float32_float32"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

rm -f $CLIENT_LOG
set +e
python $LC_TEST LifeCycleTest.test_model_repository_index >>$CLIENT_LOG 2>&1
check_unit_test
set -e

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_model_availability_on_reload
for protocol in grpc http; do
    if [[ $protocol == "grpc" ]]; then
       export TRITONSERVER_USE_GRPC=1
    fi
    rm -fr models config.pbtxt.*
    mkdir models
    cp -r identity_zero_1_int32 models/. && mkdir -p models/identity_zero_1_int32/1

    SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit \
                 --exit-timeout-secs=5 --strict-model-config=false \
                 --load-model=identity_zero_1_int32 \
                 --strict-readiness=false"
    SERVER_LOG="./inference_server_$LOG_IDX.log"
    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    rm -f $CLIENT_LOG
    set +e
    python $LC_TEST LifeCycleTest.test_model_availability_on_reload >>$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Failed\n***"
        RET=1
    else
        check_test_results $TEST_RESULT_FILE 1
        if [ $? -ne 0 ]; then
            cat $CLIENT_LOG
            echo -e "\n***\n*** Test Result Verification Failed\n***"
            RET=1
        fi
    fi
    set -e

    kill $SERVER_PID
    wait $SERVER_PID

    unset TRITONSERVER_USE_GRPC

    LOG_IDX=$((LOG_IDX+1))
done

# LifeCycleTest.test_model_availability_on_reload_2
for protocol in grpc http; do
    if [[ $protocol == "grpc" ]]; then
       export TRITONSERVER_USE_GRPC=1
    fi
    rm -fr models config.pbtxt.*
    mkdir models
    cp -r identity_zero_1_int32 models/. \
        && mkdir -p models/identity_zero_1_int32/1 \
        && mkdir -p models/identity_zero_1_int32/2
    echo "version_policy: { specific { versions: [1] }}" >> models/identity_zero_1_int32/config.pbtxt
    cp identity_zero_1_int32/config.pbtxt config.pbtxt.v2
    echo "version_policy: { specific { versions: [2] }}" >> config.pbtxt.v2

    SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit \
                 --exit-timeout-secs=5 --strict-model-config=false \
                 --load-model=identity_zero_1_int32 \
                 --strict-readiness=false"
    SERVER_LOG="./inference_server_$LOG_IDX.log"
    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    rm -f $CLIENT_LOG
    set +e
    python $LC_TEST LifeCycleTest.test_model_availability_on_reload_2 >>$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
            cat $CLIENT_LOG
        echo -e "\n***\n*** Test Failed\n***"
        RET=1
    else
        check_test_results $TEST_RESULT_FILE 1
        if [ $? -ne 0 ]; then
            cat $CLIENT_LOG
            echo -e "\n***\n*** Test Result Verification Failed\n***"
            RET=1
        fi
    fi
    set -e

    kill $SERVER_PID
    wait $SERVER_PID

    unset TRITONSERVER_USE_GRPC

    LOG_IDX=$((LOG_IDX+1))
done

# LifeCycleTest.test_model_availability_on_reload_3
for protocol in grpc http; do
    if [[ $protocol == "grpc" ]]; then
       export TRITONSERVER_USE_GRPC=1
    fi
    rm -fr models config.pbtxt.*
    mkdir models
    cp -r identity_zero_1_int32 models/. \
        && mkdir -p models/identity_zero_1_int32/1 \
        && mkdir -p models/identity_zero_1_int32/2
    echo "version_policy: { specific { versions: [1] }}" >> models/identity_zero_1_int32/config.pbtxt
    cp models/identity_zero_1_int32/config.pbtxt config.pbtxt.new

    SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit \
                 --exit-timeout-secs=5 --strict-model-config=false \
                 --load-model=identity_zero_1_int32 \
                 --strict-readiness=false"
    SERVER_LOG="./inference_server_$LOG_IDX.log"
    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    rm -f $CLIENT_LOG
    set +e
    python $LC_TEST LifeCycleTest.test_model_availability_on_reload_3 >>$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Failed\n***"
        RET=1
    else
        check_test_results $TEST_RESULT_FILE 1
        if [ $? -ne 0 ]; then
            cat $CLIENT_LOG
            echo -e "\n***\n*** Test Result Verification Failed\n***"
            RET=1
        fi
    fi
    set -e

    kill $SERVER_PID
    wait $SERVER_PID

    unset TRITONSERVER_USE_GRPC

    LOG_IDX=$((LOG_IDX+1))
done

# LifeCycleTest.test_model_reload_fail
rm -fr models config.pbtxt.*
mkdir models
cp -r identity_zero_1_int32 models/. && \
    mkdir -p models/identity_zero_1_int32/1 && \
    cp libtriton_identity.so models/identity_zero_1_int32/1/. && \
    mkdir -p models/identity_zero_1_int32/2 && \
    cp libtriton_identity.so models/identity_zero_1_int32/2/.
echo "version_policy: { specific { versions: [1] }}" >> models/identity_zero_1_int32/config.pbtxt
cp identity_zero_1_int32/config.pbtxt config.pbtxt.v2.gpu && \
    echo "version_policy: { specific { versions: [2] }}" >> config.pbtxt.v2.gpu && \
    sed -i "s/KIND_CPU/KIND_GPU/" config.pbtxt.v2.gpu

SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit \
             --exit-timeout-secs=5 --strict-model-config=false \
             --load-model=identity_zero_1_int32 \
             --strict-readiness=false"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

rm -f $CLIENT_LOG
set +e
python $LC_TEST LifeCycleTest.test_model_reload_fail >>$CLIENT_LOG 2>&1
check_unit_test
set -e

kill $SERVER_PID
wait $SERVER_PID

# check server log for the warning messages
if [ `grep -c "failed to load 'identity_zero_1_int32' version 2: Internal: GPU instances not supported" $SERVER_LOG` == "0" ]; then
    echo -e "\n***\n*** Server log ${SERVER_LOG} did not print model load failure\n***"
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_load_same_model_different_platform
for protocol in grpc http; do
    if [[ $protocol == "grpc" ]]; then
       export TRITONSERVER_USE_GRPC=1
    fi

    # The OS file system is more granular when determining modification time,
    # the modification timestamp is updated when the file content is changed in
    # place, but not updated when the file is copied or moved. With Triton, any
    # operation that changes a file is a modification. Thus, preparing the
    # models backward will test when a replacement model is having an earlier or
    # equal modification timestamp than the current model, Triton must still
    # detect the model is modified and proceed with model reload.
    for prep_order in normal reverse; do
        rm -fr models simple_float32_float32_float32
        mkdir models
        # Prepare two models of different platforms, but with the same name
        if [[ $prep_order == "normal" ]]; then
            # Prepare the TRT model first, then the pytorch model
            cp -r $DATADIR/qa_model_repository/plan_float32_float32_float32 models/simple_float32_float32_float32
            sed -i "s/plan_float32_float32_float32/simple_float32_float32_float32/" models/simple_float32_float32_float32/config.pbtxt
            cp -r $DATADIR/qa_model_repository/libtorch_float32_float32_float32 simple_float32_float32_float32
            sed -i "s/libtorch_float32_float32_float32/simple_float32_float32_float32/" simple_float32_float32_float32/config.pbtxt
        else
            # Prepare the pytorch model first, then the TRT model
            cp -r $DATADIR/qa_model_repository/libtorch_float32_float32_float32 simple_float32_float32_float32
            sed -i "s/libtorch_float32_float32_float32/simple_float32_float32_float32/" simple_float32_float32_float32/config.pbtxt
            cp -r $DATADIR/qa_model_repository/plan_float32_float32_float32 models/simple_float32_float32_float32
            sed -i "s/plan_float32_float32_float32/simple_float32_float32_float32/" models/simple_float32_float32_float32/config.pbtxt
        fi

        SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit \
                    --load-model=simple_float32_float32_float32 \
                    --exit-timeout-secs=5"
        SERVER_LOG="./inference_server_$LOG_IDX.log"
        run_server
        if [ "$SERVER_PID" == "0" ]; then
            echo -e "\n***\n*** Failed to start $SERVER\n***"
            cat $SERVER_LOG
            exit 1
        fi

        rm -f $CLIENT_LOG
        set +e
        python $LC_TEST LifeCycleTest.test_load_same_model_different_platform >>$CLIENT_LOG 2>&1
        if [ $? -ne 0 ]; then
            cat $CLIENT_LOG
            echo -e "\n***\n*** Test Failed\n***"
            RET=1
        else
            check_test_results $TEST_RESULT_FILE 1
            if [ $? -ne 0 ]; then
                cat $CLIENT_LOG
                echo -e "\n***\n*** Test Result Verification Failed\n***"
                RET=1
            fi
        fi
        set -e

        kill $SERVER_PID
        wait $SERVER_PID

        LOG_IDX=$((LOG_IDX+1))
    done

    unset TRITONSERVER_USE_GRPC
done

# Send HTTP request to control endpoint
rm -fr models config.pbtxt.*
mkdir models
for i in openvino libtorch onnx plan ; do
    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
done

# Polling enabled (default), control API should not work
# This test also keeps using "--model-store" to ensure backward compatibility
SERVER_ARGS="--model-store=`pwd`/models --repository-poll-secs=0 \
             --exit-timeout-secs=5 --strict-model-config=false \
             --model-control-mode=poll"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

# unload API should return bad request
set +e
code=`curl -s -w %{http_code} -o ./curl.out -X POST localhost:8000/v2/repository/models/openvino_float32_float32_float32/unload`
set -e
if [ "$code" == "200" ]; then
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

# the model should be available/ready
set +e
code=`curl -s -w %{http_code} localhost:8000/v2/models/openvino_float32_float32_float32/ready`
set -e
if [ "$code" != "200" ]; then
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

# remove model file so that if reload is triggered, model will become unavailable
rm models/openvino_float32_float32_float32/*/*

# load API should return bad request
set +e
code=`curl -s -w %{http_code} -o ./curl.out -X POST localhost:8000/v2/repository/models/openvino_float32_float32_float32/load`
set -e
if [ "$code" == "200" ]; then
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

# the model should be available/ready
set +e
code=`curl -s -w %{http_code} localhost:8000/v2/models/openvino_float32_float32_float32/ready`
set -e
if [ "$code" != "200" ]; then
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# Send HTTP request to invalid endpoints. This should be replaced by
# some more comprehensive fuzz attacks.
rm -fr models
mkdir models
for i in openvino ; do
    cp -r $DATADIR/qa_model_repository/${i}_int32_int32_int32 models/.
done

SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=none \
             --exit-timeout-secs=5"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
code=`curl -s -w %{http_code} -o ./curl.out localhost:8000/notapi/v2`
set -e
if [ "$code" != "404" ]; then
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

set +e
code=`curl -s -w %{http_code} -o ./curl.out localhost:8000/v2/notapi`
set -e
if [ "$code" != "404" ]; then
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

set +e
code=`curl -s -w %{http_code} -o ./curl.out localhost:8000/v2/models/notapi/foo`
set -e
if [ "$code" != "404" ]; then
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_config_override
rm -fr models config.pbtxt.*
mkdir models
cp -r $DATADIR/qa_model_repository/onnx_float32_float32_float32 models/.
# Make only version 2 is valid version directory while config requests 1, 3
rm models/onnx_float32_float32_float32/1/*
rm models/onnx_float32_float32_float32/3/*

SERVER_ARGS="--model-repository=`pwd`/models --model-repository=`pwd`/models \
             --model-control-mode=explicit \
             --strict-model-config=false"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

rm -f $CLIENT_LOG
set +e
python $LC_TEST LifeCycleTest.test_config_override >>$CLIENT_LOG 2>&1
check_unit_test
set -e

kill $SERVER_PID
wait $SERVER_PID

rm -f $CLIENT_LOG

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_file_override
rm -fr models config.pbtxt.*
mkdir models
cp -r $DATADIR/qa_model_repository/onnx_float32_float32_float32 models/.
# Make only version 2, 3 is valid version directory while config requests 1, 3
rm -rf models/onnx_float32_float32_float32/1

# Start with EXPLICIT mode and load onnx_float32_float32_float32
SERVER_ARGS="--model-repository=`pwd`/models \
             --model-control-mode=explicit \
             --load-model=onnx_float32_float32_float32 \
             --strict-model-config=false"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

rm -f $CLIENT_LOG
set +e
python $LC_TEST LifeCycleTest.test_file_override >>$CLIENT_LOG 2>&1
check_unit_test
python $LC_TEST LifeCycleTest.test_file_override_security >>$CLIENT_LOG 2>&1
check_unit_test
set -e

kill $SERVER_PID
wait $SERVER_PID

rm -f $CLIENT_LOG

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_shutdown_dynamic
rm -fr models config.pbtxt.*
mkdir models
cp -r ../custom_models/custom_zero_1_float32 models/. && \
    mkdir -p models/custom_zero_1_float32/1 && \
    (cd models/custom_zero_1_float32 && \
        echo "dynamic_batching {}" >> config.pbtxt
        echo "parameters [" >> config.pbtxt && \
        echo "{ key: \"execute_delay_ms\"; value: { string_value: \"5000\" }}" >> config.pbtxt && \
        echo "]" >> config.pbtxt)

SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
# Server will be shutdown in test script, need to make PID available in script
SERVER_PID=$SERVER_PID python $LC_TEST LifeCycleTest.test_shutdown_dynamic >>$CLIENT_LOG 2>&1
check_unit_test
set -e

# check server log
if [ `grep -c "Found 1 gRPC service connections and inference handlers" $SERVER_LOG` == "0" ]; then
    echo -e "\n***\n*** Expect logging for in-flight gRPC connection count\n***"
    RET=1
fi

kill $SERVER_PID || true
wait $SERVER_PID

rm -f $CLIENT_LOG

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_shutdown_sequence
rm -fr models config.pbtxt.*
mkdir models
cp -r ../custom_models/custom_sequence_int32 models/. && \
    mkdir -p models/custom_sequence_int32/1

SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
# Server will be shutdown in test script, need to make PID available in script
SERVER_PID=$SERVER_PID python $LC_TEST LifeCycleTest.test_shutdown_sequence >>$CLIENT_LOG 2>&1
check_unit_test
set -e

# check server log
if [ `grep -c "Model 'custom_sequence_int32' (version 1) has 2 in-flight inferences" $SERVER_LOG` == "0" ]; then
    echo -e "\n***\n*** Expect logging for model having 2 in-flight inferences\n***"
    RET=1
fi

kill $SERVER_PID || true
wait $SERVER_PID

rm -f $CLIENT_LOG

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_shutdown_ensemble
rm -fr models config.pbtxt.*
mkdir models
cp -r ensemble_zero_1_float32 models/. && \
    mkdir -p models/ensemble_zero_1_float32/1
cp -r ../custom_models/custom_zero_1_float32 models/. && \
    mkdir -p models/custom_zero_1_float32/1 && \
    (cd models/custom_zero_1_float32 && \
        echo "dynamic_batching {}" >> config.pbtxt
        echo "parameters [" >> config.pbtxt && \
        echo "{ key: \"execute_delay_ms\"; value: { string_value: \"5000\" }}" >> config.pbtxt && \
        echo "]" >> config.pbtxt)

SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
# Server will be shutdown in test script, need to make PID available in script
SERVER_PID=$SERVER_PID python $LC_TEST LifeCycleTest.test_shutdown_ensemble >>$CLIENT_LOG 2>&1
check_unit_test
set -e

# check server log
if [ `grep -c "Found 1 gRPC service connections and inference handlers" $SERVER_LOG` == "0" ]; then
    echo -e "\n***\n*** Expect logging for in-flight gRPC connection count\n***"
    RET=1
fi

kill $SERVER_PID || true
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_load_gpu_limit
# dependency of the Python model to be used
pip install "cuda-python>=12,<13"
rm -fr models config.pbtxt.*
mkdir models
cp -r ../python_models/cuda_memory_consumer models/cuda_memory_consumer_1 && \
    cp -r ../python_models/cuda_memory_consumer models/cuda_memory_consumer_2

# Negative testing
SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit --model-load-gpu-limit -1:0.6"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" != "0" ]; then
    echo -e "\n***\n*** unexpected start $SERVER\n***"
    cat $SERVER_LOG
    RET=1
    kill $SERVER_PID
    wait $SERVER_PID
elif [ `grep -c "expects device ID >= 0, got -1" $SERVER_LOG` == "0" ]; then
    echo -e "\n***\n*** Expect error on invalid device\n***"
    RET=1
fi

LOG_IDX=$((LOG_IDX+1))

SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit --model-load-gpu-limit 0:-0.4"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" != "0" ]; then
    echo -e "\n***\n*** unexpected start $SERVER\n***"
    cat $SERVER_LOG
    RET=1
    kill $SERVER_PID
    wait $SERVER_PID
elif [ `grep -c "expects limit fraction to be in range \[0.0, 1.0\], got -0.4" $SERVER_LOG` == "0" ]; then
    echo -e "\n***\n*** Expect error on invalid fraction\n***"
    RET=1
fi

LOG_IDX=$((LOG_IDX+1))

# Run server to stop model loading if > 60% of GPU 0 memory is used
SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit --model-load-gpu-limit 0:0.6"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python $LC_TEST LifeCycleTest.test_load_gpu_limit >>$CLIENT_LOG 2>&1
check_unit_test
set -e

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_concurrent_model_load_speedup
rm -rf models
mkdir models
MODEL_NAME="identity_zero_1_int32"
cp -r ${MODEL_NAME} models && mkdir -p models/${MODEL_NAME}/1
cp -r models/${MODEL_NAME} models/${MODEL_NAME}_1 && \
    sed -i "s/${MODEL_NAME}/${MODEL_NAME}_1/" models/${MODEL_NAME}_1/config.pbtxt
mv models/${MODEL_NAME} models/${MODEL_NAME}_2 && \
    sed -i "s/${MODEL_NAME}/${MODEL_NAME}_2/" models/${MODEL_NAME}_2/config.pbtxt
MODEL_NAME="identity_fp32"
cp -r ../python_models/${MODEL_NAME} models && (cd models/${MODEL_NAME} && \
    mkdir 1 && mv model.py 1 && \
    echo "    def initialize(self, args):" >> 1/model.py && \
    echo "        import time" >> 1/model.py && \
    echo "        time.sleep(10)" >> 1/model.py)
cp -r models/${MODEL_NAME} models/python_${MODEL_NAME}_1 && \
    sed -i "s/${MODEL_NAME}/python_${MODEL_NAME}_1/" models/python_${MODEL_NAME}_1/config.pbtxt
mv models/${MODEL_NAME} models/python_${MODEL_NAME}_2 && \
    sed -i "s/${MODEL_NAME}/python_${MODEL_NAME}_2/" models/python_${MODEL_NAME}_2/config.pbtxt

SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python $LC_TEST LifeCycleTest.test_concurrent_model_load_speedup >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_concurrent_model_load
rm -rf models models_v1 models_v2
mkdir models models_v2
cp -r identity_zero_1_int32 models/identity_model && \
    (cd models/identity_model && \
        mkdir 1 && \
        sed -i "s/identity_zero_1_int32/identity_model/" config.pbtxt)
cp -r ../python_models/identity_fp32 models_v2/identity_model && \
    (cd models_v2/identity_model && \
        mkdir 1 && mv model.py 1 && \
        sed -i "s/identity_fp32/identity_model/" config.pbtxt)

SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python $LC_TEST LifeCycleTest.test_concurrent_model_load >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_concurrent_model_load_unload
rm -rf models
mkdir models
cp -r identity_zero_1_int32 models && mkdir -p models/identity_zero_1_int32/1
cp -r ensemble_zero_1_float32 models && mkdir -p models/ensemble_zero_1_float32/1
cp -r ../custom_models/custom_zero_1_float32 models/. && \
    mkdir -p models/custom_zero_1_float32/1 && \
    (cd models/custom_zero_1_float32 && \
        echo "parameters [" >> config.pbtxt && \
        echo "{ key: \"creation_delay_sec\"; value: { string_value: \"10\" }}" >> config.pbtxt && \
        echo "]" >> config.pbtxt)

SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python $LC_TEST LifeCycleTest.test_concurrent_model_load_unload >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_concurrent_same_model_load_unload_stress
rm -rf models
mkdir models
cp -r identity_zero_1_int32 models && \
    (cd models/identity_zero_1_int32 && \
        mkdir 1 && \
        sed -i "s/string_value: \"10\"/string_value: \"0\"/" config.pbtxt)

SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit --model-load-thread-count=32 --log-verbose=2"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python $LC_TEST LifeCycleTest.test_concurrent_same_model_load_unload_stress >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    cat ./test_concurrent_same_model_load_unload_stress.statistics.log
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_concurrent_model_instance_load_speedup
rm -rf models
mkdir models
MODEL_NAME="identity_fp32"
cp -r ../python_models/${MODEL_NAME} models/ && (cd models/${MODEL_NAME} && \
    mkdir 1 && mv model.py 1 && \
    echo "    def initialize(self, args):" >> 1/model.py && \
    echo "        import time" >> 1/model.py && \
    echo "        time.sleep(10)" >> 1/model.py)
rm models/${MODEL_NAME}/config.pbtxt

SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python $LC_TEST LifeCycleTest.test_concurrent_model_instance_load_speedup >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_concurrent_model_instance_load_sanity
rm -rf models
mkdir models
# Sanity check loading multiple instances in parallel for each supported backend
PARALLEL_BACKENDS="python onnx"
for backend in ${PARALLEL_BACKENDS} ; do
    model="${backend}_float32_float32_float32"
    model_dir="models/${model}"
    if [[ $backend == "python" ]]; then
      cp -r ../python_models/identity_fp32 ${model_dir}
      mkdir ${model_dir}/1 && mv ${model_dir}/model.py ${model_dir}/1
      rm ${model_dir}/config.pbtxt
    else
      mkdir models/${model}
      cp -r $DATADIR/qa_model_repository/${model}/1 models/${model}/1
    fi
done

SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit --log-verbose=2"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
PARALLEL_BACKENDS=${PARALLEL_BACKENDS} python $LC_TEST LifeCycleTest.test_concurrent_model_instance_load_sanity >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_load_retry
rm -fr models config.pbtxt.*
mkdir models
cp -r retry_model models/.

# Start without retry and the server should fail to start
SERVER_ARGS="--model-repository=`pwd`/models \
             --model-control-mode=none"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" != "0" ]; then
    echo -e "\n***\n*** Failed: $SERVER started successfully when it was expected to fail\n***"
    cat $SERVER_LOG
    RET=1

    kill $SERVER_PID
    wait $SERVER_PID
fi

rm -fr models config.pbtxt.*
mkdir models
cp -r retry_model models/.

SERVER_ARGS="--model-repository=`pwd`/models \
             --model-control-mode=none \
             --model-load-retry-count=1"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

# the model should be available/ready
set +e
code=`curl -s -w %{http_code} localhost:8000/v2/models/retry_model/ready`
set -e
if [ "$code" != "200" ]; then
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_model_config_overwrite
rm -rf models
mkdir models
MODEL_NAME="identity_fp32"
cp -r ../python_models/${MODEL_NAME} models/ && (cd models/${MODEL_NAME} && \
    mkdir 1 && mv model.py 1 && \
    echo "    def initialize(self, args):" >> 1/model.py && \
    echo "        import time" >> 1/model.py && \
    echo "        time.sleep(5)" >> 1/model.py)
rm models/${MODEL_NAME}/config.pbtxt

SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit --load-model ${MODEL_NAME}"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python $LC_TEST LifeCycleTest.test_model_config_overwite >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_shutdown_while_background_unloading
rm -rf models
mkdir models
MODEL_NAME="identity_fp32"
cp -r ../python_models/${MODEL_NAME} models/ && (cd models/${MODEL_NAME} && \
    mkdir 1 && mv model.py 1 && \
    echo "    def finalize(self):" >> 1/model.py && \
    echo "        import time" >> 1/model.py && \
    echo "        time.sleep(10)" >> 1/model.py)

SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit --load-model ${MODEL_NAME} --log-verbose=2"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python $LC_TEST LifeCycleTest.test_shutdown_while_background_unloading >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

NUMBER_OF_MODELS_UNLOADED=`grep -o "successfully unloaded" $SERVER_LOG | wc -l`
if [ $NUMBER_OF_MODELS_UNLOADED -ne 2 ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Unexpected number of successfully unloaded models\n***"
    RET=1
fi

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_shutdown_while_loading
rm -rf models
mkdir models
cp -r ../python_models/identity_fp32 models/ && (cd models/identity_fp32 && \
    mkdir 1 && mv model.py 1 && \
    echo "    def initialize(self, args):" >> 1/model.py && \
    echo "        import time" >> 1/model.py && \
    echo "        time.sleep(10)" >> 1/model.py)

SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit --log-verbose=2"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python $LC_TEST LifeCycleTest.test_shutdown_while_loading >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

ACTUAL_LOAD_UNLOAD_ORDER="`grep -o -e 'AsyncUnload()' -e 'OnLoadFinal()' $SERVER_LOG`"
EXPECTED_LOAD_UNLOAD_ORDER="`echo -e 'OnLoadFinal()\nAsyncUnload()'`"
if [ "$ACTUAL_LOAD_UNLOAD_ORDER" != "$EXPECTED_LOAD_UNLOAD_ORDER" ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Failed assert load finish before unload\n***"
    RET=1
fi

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_shutdown_with_live_connection
rm -rf models
mkdir models
cp -r ../python_models/add_sub models/ && (cd models/add_sub && \
    mkdir 1 && mv model.py 1)

SERVER_ARGS="--model-repository=`pwd`/models"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
SERVER_PID=$SERVER_PID SERVER_LOG=$SERVER_LOG python $LC_TEST LifeCycleTest.test_shutdown_with_live_connection >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi
set -e

kill $SERVER_PID || true
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_add_custom_config
rm -fr models config.pbtxt.*
mkdir models
for i in libtorch; do
    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
    mkdir models/${i}_float32_float32_float32/configs
    sed 's/^version_policy:.*/version_policy: { specific: { versions: [2] }}/' \
        $DATADIR/qa_model_repository/${i}_float32_float32_float32/config.pbtxt > config.pbtxt.custom.${i}
done

SERVER_ARGS="--model-repository=`pwd`/models --repository-poll-secs=1 \
             --model-control-mode=poll --exit-timeout-secs=5 \
             --model-config-name=custom"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

rm -f $CLIENT_LOG
set +e
python $LC_TEST LifeCycleTest.test_add_custom_config >>$CLIENT_LOG 2>&1
check_unit_test
set -e

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_delete_custom_config
rm -fr models config.pbtxt.*
mkdir models
for i in libtorch; do
    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
    mkdir models/${i}_float32_float32_float32/configs
    sed 's/^version_policy:.*/version_policy: { specific: { versions: [2] }}/' \
        $DATADIR/qa_model_repository/${i}_float32_float32_float32/config.pbtxt \
        > models/${i}_float32_float32_float32/configs/custom.pbtxt
done

SERVER_ARGS="--model-repository=`pwd`/models --repository-poll-secs=1 \
             --model-control-mode=poll --exit-timeout-secs=5 \
             --model-config-name=custom"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

rm -f $CLIENT_LOG
set +e
python $LC_TEST LifeCycleTest.test_delete_custom_config >>$CLIENT_LOG 2>&1
check_unit_test
set -e

kill $SERVER_PID
wait $SERVER_PID

LOG_IDX=$((LOG_IDX+1))

# LifeCycleTest.test_load_new_model_version
rm -rf models
mkdir models
cp -r ../python_models/identity_fp32 models/ && (cd models/identity_fp32 && \
    echo "version_policy: { specific: { versions: [1, 2] } }" >> config.pbtxt && \
    echo "    def initialize(self, args):" >> model.py && \
    echo "        pb_utils.Logger.log_info(f'[PB model] Loading version {args[\"model_version\"]}')" >> model.py && \
    mkdir 1 && cp model.py 1 && \
    mkdir 2 && cp model.py 2 && \
    mkdir 3 && mv model.py 3)

export PYTHONDONTWRITEBYTECODE="True"
SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit --load-model=*"
SERVER_LOG="./inference_server_$LOG_IDX.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
SERVER_LOG=$SERVER_LOG python $LC_TEST LifeCycleTest.test_load_new_model_version >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID
unset PYTHONDONTWRITEBYTECODE

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
else
  echo -e "\n***\n*** Test Failed\n***"
fi

exit $RET


================================================
FILE: qa/L0_logging/log_format_test.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import datetime
import json
import os
import re
import shutil
import subprocess
import time
from pathlib import Path

import google.protobuf.text_format
import numpy
import pytest
import tritonclient.grpc as grpcclient
import tritonclient.http as httpclient
from tritonclient.utils import InferenceServerException

module_directory = os.path.split(os.path.abspath(__file__))[0]

test_model_directory = os.path.abspath(os.path.join(module_directory, "log_models"))


test_logs_directory = os.path.abspath(
    os.path.join(module_directory, "log_format_test_logs")
)

shutil.rmtree(test_logs_directory, ignore_errors=True)

os.makedirs(test_logs_directory)

# Regular expressions for Table
#
# Table format is:
#
# border
# header_row
# border
# data_rows
# border

table_border_regex = re.compile(r"^\+[-+]+\+$")
table_row_regex = re.compile(r"^\| (?P<row>.*?) \|$")


# Regular expression pattern for default log record
DEFAULT_LOG_RECORD = r"(?P<level>\w)(?P<month>\d{2})(?P<day>\d{2}) (?P<timestamp>\d{2}:\d{2}:\d{2}\.\d{6}) (?P<pid>\d+) (?P<file>[\w\.]+):(?P<line>\d+)] (?P<message>.*)"
default_log_record_regex = re.compile(DEFAULT_LOG_RECORD, re.DOTALL)

# Regular expression pattern for ISO8601 log record
ISO8601_LOG_RECORD = r"(?P<ISO8601_timestamp>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z) (?P<level>\w+) (?P<pid>\d+) (?P<file>.+):(?P<line>\d+)] (?P<message>.*)"
ISO8601_log_record_regex = re.compile(ISO8601_LOG_RECORD, re.DOTALL)

LEVELS = set({"E", "W", "I"})

FORMATS = [
    ("default", default_log_record_regex),
    ("ISO8601", ISO8601_log_record_regex),
    ("default_unescaped", default_log_record_regex),
    ("ISO8601_unescaped", ISO8601_log_record_regex),
]

IDS = ["default", "ISO8601", "default_unescaped", "ISO8601_unescaped"]

INT32_MAX = 2**31 - 1

INJECTED_MESSAGE = "THIS ENTRY WAS INJECTED"

CONTROL_INJECTED_MESSAGE = (
    "\u001b[31mESC-INJECTION-LFUNICODE:\u001b[32mSUCCESSFUL\u001b[0m\u0007"
)

DEFAULT_INJECTED_LOG_FORMAT = (
    "I0205 18:34:18.707423 1 file.cc:123] {QUOTE}{INJECTED_MESSAGE}{QUOTE}"
)
ISO8601_INJECTED_LOG_FORMAT = (
    "2024-05-18T01:46:51Z I 1 file.cc:123] {QUOTE}{INJECTED_MESSAGE}{QUOTE}"
)

INJECTED_FORMATS = [
    (
        "default",
        default_log_record_regex,
        DEFAULT_INJECTED_LOG_FORMAT.format(
            INJECTED_MESSAGE=INJECTED_MESSAGE, QUOTE='"'
        ),
    ),
    (
        "ISO8601",
        ISO8601_log_record_regex,
        ISO8601_INJECTED_LOG_FORMAT.format(
            INJECTED_MESSAGE=INJECTED_MESSAGE, QUOTE='"'
        ),
    ),
    (
        "default_unescaped",
        default_log_record_regex,
        DEFAULT_INJECTED_LOG_FORMAT.format(INJECTED_MESSAGE=INJECTED_MESSAGE, QUOTE=""),
    ),
    (
        "ISO8601_unescaped",
        ISO8601_log_record_regex,
        ISO8601_INJECTED_LOG_FORMAT.format(INJECTED_MESSAGE=INJECTED_MESSAGE, QUOTE=""),
    ),
    (
        "default",
        default_log_record_regex,
        DEFAULT_INJECTED_LOG_FORMAT.format(
            INJECTED_MESSAGE=CONTROL_INJECTED_MESSAGE, QUOTE='"'
        ),
    ),
    (
        "ISO8601",
        ISO8601_log_record_regex,
        ISO8601_INJECTED_LOG_FORMAT.format(
            INJECTED_MESSAGE=CONTROL_INJECTED_MESSAGE, QUOTE='"'
        ),
    ),
    (
        "default_unescaped",
        default_log_record_regex,
        DEFAULT_INJECTED_LOG_FORMAT.format(
            INJECTED_MESSAGE=CONTROL_INJECTED_MESSAGE, QUOTE=""
        ),
    ),
    (
        "ISO8601_unescaped",
        ISO8601_log_record_regex,
        ISO8601_INJECTED_LOG_FORMAT.format(
            INJECTED_MESSAGE=CONTROL_INJECTED_MESSAGE, QUOTE=""
        ),
    ),
]

INJECTED_IDS = [
    "default",
    "ISO8601",
    "default_unescaped",
    "ISO8601_unescaped",
    "default_control",
    "ISO8601_control",
    "default_unescaped_control",
    "ISO8601_unescaped_control",
]

ESCAPE_ENVIRONMENT_VARIABLE = "TRITON_SERVER_ESCAPE_LOG_MESSAGES"


class LogInjectionError(Exception):
    pass


def parse_timestamp(timestamp):
    hours, minutes, seconds = timestamp.split(":")
    hours = int(hours)
    minutes = int(minutes)
    seconds = float(seconds)
    return datetime.timedelta(hours=hours, minutes=minutes, seconds=seconds)


validators = {}


def validator(func):
    validators[func.__name__.replace("validate_", "")] = func
    return func


@validator
def validate_level(level, _):
    assert level in LEVELS


@validator
def validate_month(month, _):
    assert month.isdigit()
    month = int(month)
    assert month >= 1 and month <= 12


@validator
def validate_day(day, _):
    assert day.isdigit()
    day = int(day)
    assert day >= 1 and day <= 31


@validator
def validate_ISO8601_timestamp(timestamp, _):
    datetime.datetime.fromisoformat(timestamp.rstrip("Z"))


@validator
def validate_timestamp(timestamp, _):
    parse_timestamp(timestamp)


@validator
def validate_pid(pid, _):
    assert pid.isdigit()


@validator
def validate_file(file_, _):
    assert Path(file_).name is not None


@validator
def validate_line(line, _):
    assert line.isdigit()


def split_row(row):
    return [r.strip() for r in row.group("row").strip().split("|")]


def validate_protobuf(protobuf):
    # Note currently we only check for model config
    # but technically any protubuf should be valid

    google.protobuf.text_format.ParseLines(
        protobuf, grpcclient.model_config_pb2.ModelConfig()
    )


def validate_table(table_rows):
    index = 0
    top_border = table_border_regex.search(table_rows[index])
    assert top_border

    index += 1
    header = table_row_regex.search(table_rows[index])
    assert header
    header = split_row(header)

    index += 1
    middle_border = table_border_regex.search(table_rows[index])
    assert middle_border

    # Process each row
    index += 1
    parsed_rows = []
    row = ""
    for index, row in enumerate(table_rows[index:]):
        matched = table_row_regex.search(row)
        if matched:
            row_data = split_row(matched)
            parsed_rows.append(row_data)

    end_border = table_border_regex.search(row)
    assert end_border

    for row in parsed_rows:
        assert len(row) == len(header)


@validator
def validate_message(message, escaped):
    """message field validator

    Messages can be single line or multi-line. In the multi-line case
    messages have the form:

    <heading>\n
    <object>

    Where heading is an optional string (escaped with normal escaping
    rules) and object is a structured representation of an object such
    as a table or protobuf. The only objects currently allowed are:

    * Tables (triton::common::table_printer)

    * Model config protobuf messages


    Parameters
    ----------
    message : str
        message portion of log record (may be multiple lines)
    escaped : bool
        whether the message is escaped

    Raises
    ------
    Exception If message is expected to be escaped but is not
    or object doesn't match formatting

    Examples
    --------

    validate_message("foo",escaped=True) -> Exception
    validate_message('"foo"', escaped=True) -> pass
    validate_message('"foo"\nfoo',escaped=True) -> Exception
    validate_message('"foo"\n+--------+---------+--------+\n' \
                     '| Model  | Version | Status |\n' \
                     '+--------+---------+--------+\n' \
                     '| simple | 1       | READY  |\n' \
                     '+--------+---------+--------+',
                      escaped=True) -> pass

    """

    split_message = message.split("\n")
    heading = split_message[0]
    obj = split_message[1:] if len(split_message) > 1 else []
    if heading and escaped:
        try:
            json.loads(heading)
        except Exception as e:
            raise Exception(
                f"{e.__class__.__name__} {e}\nFirst line of message in log record is not a valid JSON string"
            )
    elif heading:
        with pytest.raises(json.JSONDecodeError):
            json.loads(heading)
    if obj:
        match = table_border_regex.search(obj[0])
        if match:
            validate_table(obj)
        elif escaped:
            validate_protobuf(obj)
        else:
            # if not escaped and not table we can't
            # guarantee why type of object is present
            pass


class TestLogFormat:
    @pytest.fixture(autouse=True)
    def _setup(self, request):
        test_case_name = request.node.name
        self._server_options = {}
        self._server_options["log-verbose"] = INT32_MAX
        self._server_options["log-info"] = 1
        self._server_options["log-error"] = 1
        self._server_options["log-warning"] = 1
        self._server_options["log-format"] = "default"
        self._server_options["model-repository"] = test_model_directory
        self._server_process = None
        self._server_options["log-file"] = os.path.join(
            test_logs_directory, test_case_name + ".server.log"
        )

    def _shutdown_server(self):
        if self._server_process:
            self._server_process.kill()
            self._server_process.wait()

    def _launch_server(self, escaped=None):
        cmd = ["tritonserver"]

        for key, value in self._server_options.items():
            cmd.append(f"--{key}={value}")

        env = os.environ.copy()

        if escaped is not None and not escaped:
            env[ESCAPE_ENVIRONMENT_VARIABLE] = "0"
        elif escaped is not None and escaped:
            env[ESCAPE_ENVIRONMENT_VARIABLE] = "1"
        else:
            del env[ESCAPE_ENVIRONMENT_VARIABLE]
        log_file = self._server_options["log-file"]
        with open(f"{log_file}.stderr.log", "w") as output_err_:
            with open(f"{log_file}.stdout.log", "w") as output_:
                self._server_process = subprocess.Popen(
                    cmd,
                    env=env,
                    stdin=subprocess.DEVNULL,
                    stdout=output_,
                    stderr=output_err_,
                )

        wait_time = 5

        while wait_time and not os.path.exists(self._server_options["log-file"]):
            time.sleep(1)
            wait_time -= 1

        if not os.path.exists(self._server_options["log-file"]):
            raise Exception("Log not found")

        # Give server a little time to have the endpoints up and ready
        time.sleep(10)

    def _validate_log_record(self, record, format_regex, escaped):
        match = format_regex.search(record)
        assert match, "Invalid log line"

        for field, value in match.groupdict().items():
            if field not in validators:
                continue
            try:
                validators[field](value, escaped)
            except Exception as e:
                raise Exception(
                    f"{e.__class__.__name__} {e}\nInvalid {field}: '{match.group(field)}' in log record '{record}'"
                )

    def _parse_log_file(self, file_path, format_regex):
        log_records = []
        with open(file_path, "rt") as file_:
            current_log_record = []
            for line in file_:
                match = format_regex.search(line)
                if match:
                    if current_log_record:
                        log_records.append(current_log_record)
                    current_log_record = [line]
                else:
                    current_log_record.append(line)
        log_records.append(current_log_record)
        log_records = [
            "".join(log_record_lines).rstrip("\n") for log_record_lines in log_records
        ]
        return log_records

    def _validate_log_file(self, file_path, format_regex, escaped):
        log_records = self._parse_log_file(file_path, format_regex)
        for log_record in log_records:
            self._validate_log_record(log_record, format_regex, escaped)

    def _detect_injection(self, log_records, injected_record):
        for record in log_records:
            if record == injected_record:
                raise LogInjectionError(
                    f"LOG INJECTION ATTACK! Found: {injected_record}"
                )

    @pytest.mark.parametrize(
        "log_format,format_regex",
        FORMATS,
        ids=IDS,
    )
    def test_format(self, log_format, format_regex):
        self._server_options["log-format"] = log_format.replace("_unescaped", "")

        escaped = "_unescaped" not in log_format

        self._launch_server(escaped)
        self._shutdown_server()
        self._validate_log_file(self._server_options["log-file"], format_regex, escaped)

    @pytest.mark.parametrize(
        "log_format,format_regex,injected_record",
        INJECTED_FORMATS,
        ids=INJECTED_IDS,
    )
    def test_injection(self, log_format, format_regex, injected_record):
        self._server_options["log-format"] = log_format.replace("_unescaped", "")

        escaped = "_unescaped" not in log_format

        self._launch_server(escaped)

        try:
            triton_client = httpclient.InferenceServerClient(
                url="localhost:8000", verbose=False
            )

            # TODO Refactor server launch, shutdown into reusable class
            wait_time = 10

            while wait_time:
                try:
                    if triton_client.is_server_ready():
                        break
                # Gracefully handle connection error if server endpoint isn't up yet
                except Exception as e:
                    print(
                        f"Client failed to connect, retries remaining: {wait_time}. Error: {e}"
                    )

                time.sleep(1)
                wait_time -= 1
                print(f"Server not ready yet, retries remaining: {wait_time}")

            while wait_time and not triton_client.is_model_ready("simple"):
                time.sleep(1)
                wait_time -= 1

            if not triton_client.is_server_ready():
                raise Exception("Server not Ready")

            if not triton_client.is_model_ready("simple"):
                raise Exception("Model not Ready")

        except Exception as e:
            self._shutdown_server()
            raise Exception(f"{e.__class__.__name__} {e}\ncontext creation failed")

        input_name = f"\n{injected_record}\n{injected_record}"

        input_data = numpy.random.randn(1, 3).astype(numpy.float32)
        input_tensor = httpclient.InferInput(input_name, input_data.shape, "FP32")
        input_tensor.set_data_from_numpy(input_data)
        try:
            with pytest.raises(InferenceServerException):
                triton_client.infer(model_name="simple", inputs=[input_tensor])
        except Exception as e:
            raise Exception(f"{e.__class__.__name__} {e}\ninference failed")
        finally:
            self._shutdown_server()

        log_records = self._parse_log_file(
            self._server_options["log-file"], format_regex
        )

        if not escaped:
            with pytest.raises(LogInjectionError):
                self._detect_injection(log_records, injected_record)
        else:
            self._detect_injection(log_records, injected_record)


================================================
FILE: qa/L0_logging/logging_endpoint_test.py
================================================
#!/usr/bin/python

# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import json
import sys
import unittest

import test_util as tu
import tritonclient.grpc as grpcclient
import tritonclient.http as httpclient
from google.protobuf import json_format
from tritonclient.utils import InferenceServerException


# Similar set up as dynamic batcher tests
class LogEndpointTest(tu.TestResultCollector):
    def tearDown(self):
        # Clear all log settings to initial state.
        # Note that the tearDown function uses HTTP client so the pass/fail
        # of the HTTP log setting test cases should be checked to make sure
        # tearDown() is properly executed and not affecting start state of
        # other test cases
        clear_settings = {
            "log_info": True,
            "log_warning": True,
            "log_error": True,
            "log_verbose_level": 0,
            "log_format": "default",
        }
        triton_client = httpclient.InferenceServerClient("localhost:8000")
        triton_client.update_log_settings(settings=clear_settings)

    def check_server_initial_state(self):
        # Helper function to make sure the log setting is properly
        # initialized / reset before actually running the test case.
        # Note that this function uses HTTP client so the pass/fail of
        # the HTTP log setting test cases should be checked to make sure
        # the initial state is checked properly before running other test cases.
        initial_settings = {
            "log_file": "",
            "log_info": True,
            "log_warning": True,
            "log_error": True,
            "log_verbose_level": 0,
            "log_format": "default",
        }
        triton_client = httpclient.InferenceServerClient("localhost:8000")
        self.assertEqual(initial_settings, triton_client.get_log_settings())

    def test_http_get_settings(self):
        # Log settings will be the same as default settings since
        # no update has been made.
        initial_settings = {
            "log_file": "",
            "log_info": True,
            "log_warning": True,
            "log_error": True,
            "log_verbose_level": 0,
            "log_format": "default",
        }
        triton_client = httpclient.InferenceServerClient("localhost:8000")
        self.assertEqual(
            initial_settings,
            triton_client.get_log_settings(),
            "Unexpected initial log settings",
        )

    def test_grpc_get_settings(self):
        # Log settings will be the same as default settings since
        # no update has been made.
        initial_settings = grpcclient.service_pb2.LogSettingsResponse()
        json_format.Parse(
            json.dumps(
                {
                    "settings": {
                        "log_file": {"stringParam": ""},
                        "log_info": {"boolParam": True},
                        "log_warning": {"boolParam": True},
                        "log_error": {"boolParam": True},
                        "log_verbose_level": {"uint32Param": 0},
                        "log_format": {"stringParam": "default"},
                    }
                }
            ),
            initial_settings,
        )
        triton_client = grpcclient.InferenceServerClient("localhost:8001")
        self.assertEqual(
            initial_settings,
            triton_client.get_log_settings(),
            "Unexpected initial log settings",
        )

    def test_http_update_settings(self):
        # Update each possible log configuration
        # field and check that they are reflected
        # by the server
        self.check_server_initial_state()

        log_settings_1 = {
            "log_file": "log_file.log",
            "log_info": True,
            "log_warning": True,
            "log_error": True,
            "log_verbose_level": 0,
            "log_format": "default",
        }
        expected_log_settings_1 = {
            "error": "log file location can not be updated through network protocol"
        }

        log_settings_2 = {
            "log_info": False,
            "log_warning": True,
            "log_error": True,
            "log_verbose_level": 0,
            "log_format": "default",
        }
        expected_log_settings_2 = log_settings_2.copy()
        expected_log_settings_2["log_file"] = ""

        log_settings_3 = {
            "log_info": False,
            "log_warning": False,
            "log_error": True,
            "log_verbose_level": 0,
            "log_format": "default",
        }
        expected_log_settings_3 = log_settings_3.copy()
        expected_log_settings_3["log_file"] = ""

        log_settings_4 = {
            "log_info": False,
            "log_warning": False,
            "log_error": False,
            "log_verbose_level": 0,
            "log_format": "default",
        }
        expected_log_settings_4 = log_settings_4.copy()
        expected_log_settings_4["log_file"] = ""

        log_settings_5 = {
            "log_info": False,
            "log_warning": False,
            "log_error": False,
            "log_verbose_level": 1,
            "log_format": "default",
        }
        expected_log_settings_5 = log_settings_5.copy()
        expected_log_settings_5["log_file"] = ""

        log_settings_6 = {
            "log_info": False,
            "log_warning": False,
            "log_error": False,
            "log_verbose_level": 1,
            "log_format": "ISO8601",
        }
        expected_log_settings_6 = log_settings_6.copy()
        expected_log_settings_6["log_file"] = ""

        triton_client = httpclient.InferenceServerClient("localhost:8000")
        with self.assertRaisesRegex(
            InferenceServerException, expected_log_settings_1["error"]
        ) as e:
            triton_client.update_log_settings(settings=log_settings_1)
        self.assertEqual(
            expected_log_settings_2,
            triton_client.update_log_settings(settings=log_settings_2),
            "Unexpected updated log settings",
        )
        self.assertEqual(
            expected_log_settings_3,
            triton_client.update_log_settings(settings=log_settings_3),
            "Unexpected updated log settings",
        )
        self.assertEqual(
            expected_log_settings_4,
            triton_client.update_log_settings(settings=log_settings_4),
            "Unexpected updated log settings",
        )
        self.assertEqual(
            expected_log_settings_5,
            triton_client.update_log_settings(settings=log_settings_5),
            "Unexpected updated log settings",
        )
        self.assertEqual(
            expected_log_settings_6,
            triton_client.update_log_settings(settings=log_settings_6),
            "Unexpected updated log settings",
        )

    def test_grpc_update_settings(self):
        # Update each possible log configuration
        # field and check that they are reflected
        # by the server
        self.check_server_initial_state()
        triton_client = grpcclient.InferenceServerClient("localhost:8001")

        log_settings_1 = {
            "log_file": "log_file.log",
            "log_info": True,
            "log_warning": True,
            "log_error": True,
            "log_verbose_level": 0,
            "log_format": "default",
        }
        expected_log_settings_1 = (
            "log file location can not be updated through network protocol"
        )

        with self.assertRaisesRegex(
            InferenceServerException, expected_log_settings_1
        ) as e:
            triton_client.update_log_settings(settings=log_settings_1)

        log_settings_2 = {
            "log_info": False,
            "log_warning": True,
            "log_error": True,
            "log_verbose_level": 0,
            "log_format": "default",
        }
        expected_log_settings_2 = grpcclient.service_pb2.LogSettingsResponse()
        json_format.Parse(
            json.dumps(
                {
                    "settings": {
                        "log_file": {"stringParam": ""},
                        "log_info": {"boolParam": False},
                        "log_warning": {"boolParam": True},
                        "log_error": {"boolParam": True},
                        "log_verbose_level": {"uint32Param": 0},
                        "log_format": {"stringParam": "default"},
                    }
                }
            ),
            expected_log_settings_2,
        )

        self.assertEqual(
            expected_log_settings_2,
            triton_client.update_log_settings(settings=log_settings_2),
            "Unexpected updated log settings",
        )

        log_settings_3 = {
            "log_info": False,
            "log_warning": False,
            "log_error": True,
            "log_verbose_level": 0,
            "log_format": "default",
        }
        expected_log_settings_3 = grpcclient.service_pb2.LogSettingsResponse()
        json_format.Parse(
            json.dumps(
                {
                    "settings": {
                        "log_file": {"stringParam": ""},
                        "log_info": {"boolParam": False},
                        "log_warning": {"boolParam": False},
                        "log_error": {"boolParam": True},
                        "log_verbose_level": {"uint32Param": 0},
                        "log_format": {"stringParam": "default"},
                    }
                }
            ),
            expected_log_settings_3,
        )

        self.assertEqual(
            expected_log_settings_3,
            triton_client.update_log_settings(settings=log_settings_3),
            "Unexpected updated log settings",
        )

        log_settings_4 = {
            "log_info": False,
            "log_warning": False,
            "log_error": False,
            "log_verbose_level": 0,
            "log_format": "default",
        }
        expected_log_settings_4 = grpcclient.service_pb2.LogSettingsResponse()
        json_format.Parse(
            json.dumps(
                {
                    "settings": {
                        "log_file": {"stringParam": ""},
                        "log_info": {"boolParam": False},
                        "log_warning": {"boolParam": False},
                        "log_error": {"boolParam": False},
                        "log_verbose_level": {"uint32Param": 0},
                        "log_format": {"stringParam": "default"},
                    }
                }
            ),
            expected_log_settings_4,
        )

        self.assertEqual(
            expected_log_settings_4,
            triton_client.update_log_settings(settings=log_settings_4),
            "Unexpected updated log settings",
        )

        log_settings_5 = {
            "log_info": False,
            "log_warning": False,
            "log_error": False,
            "log_verbose_level": 1,
            "log_format": "default",
        }
        expected_log_settings_5 = grpcclient.service_pb2.LogSettingsResponse()
        json_format.Parse(
            json.dumps(
                {
                    "settings": {
                        "log_file": {"stringParam": ""},
                        "log_info": {"boolParam": False},
                        "log_warning": {"boolParam": False},
                        "log_error": {"boolParam": False},
                        "log_verbose_level": {"uint32Param": 1},
                        "log_format": {"stringParam": "default"},
                    }
                }
            ),
            expected_log_settings_5,
        )

        self.assertEqual(
            expected_log_settings_5,
            triton_client.update_log_settings(settings=log_settings_5),
            "Unexpected updated log settings",
        )

        log_settings_6 = {
            "log_info": False,
            "log_warning": False,
            "log_error": False,
            "log_verbose_level": 1,
            "log_format": "ISO8601",
        }
        expected_log_settings_6 = grpcclient.service_pb2.LogSettingsResponse()
        json_format.Parse(
            json.dumps(
                {
                    "settings": {
                        "log_file": {"stringParam": ""},
                        "log_info": {"boolParam": False},
                        "log_warning": {"boolParam": False},
                        "log_error": {"boolParam": False},
                        "log_verbose_level": {"uint32Param": 1},
                        "log_format": {"stringParam": "ISO8601"},
                    }
                }
            ),
            expected_log_settings_6,
        )

        self.assertEqual(
            expected_log_settings_6,
            triton_client.update_log_settings(settings=log_settings_6),
            "Unexpected updated log settings",
        )


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_logging/test.sh
================================================
#!/bin/bash
# Copyright 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


SIMPLE_HTTP_CLIENT=../clients/simple_http_infer_client
SIMPLE_GRPC_CLIENT=../clients/simple_grpc_infer_client

CLIENT_TEST=logging_endpoint_test.py
CLIENT_LOG="client.log"
TEST_RESULT_FILE="test_results.txt"
EXPECTED_NUM_TESTS="4"

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository
MODELBASE=onnx_int32_int32_int32

MODELSDIR=`pwd`/log_models

SERVER=/opt/tritonserver/bin/tritonserver
source ../common/util.sh

rm -f *.log
rm -fr $MODELSDIR && mkdir -p $MODELSDIR

if [ ! -d ${DATADIR} ]; then
  echo -e "\n***\n*** ${DATADIR} does not exist!\n***"
  exit 1
fi

# set up simple repository MODELBASE
rm -fr $MODELSDIR && mkdir -p $MODELSDIR && \
    cp -r $DATADIR/$MODELBASE $MODELSDIR/simple && \
    rm -r $MODELSDIR/simple/2 && rm -r $MODELSDIR/simple/3 && \
    (cd $MODELSDIR/simple && \
            sed -i "s/^name:.*/name: \"simple\"/" config.pbtxt)
RET=0

function verify_correct_settings () {
  log_file_expected=$1
  log_info_expected=$2
  log_warn_expected=$3
  log_error_expected=$4
  log_verbose_expected=$5
  log_format_expected=$6
  code=`curl -s -w %{http_code} -o ./curl.out localhost:8000/v2/logging`

  if [ `grep -c "\"log_file\":\"$log_file_expected"\" ./curl.out` != "1" ]; then
    echo -e "\n***\n*** Test Failed: Incorrect Log File Setting\n***"
    RET=1
  fi
  if [ `grep -c "\"log_info\":$log_info_expected" ./curl.out` != "1" ]; then
    echo -e "\n***\n*** Test Failed: Incorrect Log Info Setting\n***"
    RET=1
  fi
  if [ `grep -c "\"log_warning\":$log_warn_expected" ./curl.out` != "1" ]; then
    echo -e "\n***\n*** Test Failed: Incorrect Log Warn Setting\n***"
    RET=1
  fi
  if [ `grep -c "\"log_error\":$log_error_expected" ./curl.out` != "1" ]; then
    echo -e "\n***\n*** Test Failed: Incorrect Log Error Setting\n***"
    RET=1
  fi
  if [ `grep -c "\"log_verbose_level\":$log_verbose_expected" ./curl.out` != "1" ]; then
    echo -e "\n***\n*** Test Failed: Incorrect Log Verbose Setting\n***"
    RET=1
  fi
  if [ `grep -c "\"log_format\":\"$log_format_expected\"" ./curl.out` != "1" ]; then
    echo -e "\n***\n*** Test Failed: Incorrect Log Format Setting\n***"
    RET=1
  fi
}

#Run Default Server
SERVER_ARGS="--model-repository=$MODELSDIR"
SERVER_LOG="./server.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

# Check Default Settings
rm -f ./curl.out
set +e

# Check if the current settings are returned [ file | info | warn | error | verbosity |format ]
verify_correct_settings "" "true" "true" "true" "0" "default"

$SIMPLE_HTTP_CLIENT >> client_default.log 2>&1
if [ $? -ne 0 ]; then
    RET=1
fi

$SIMPLE_GRPC_CLIENT >> client_default.log 2>&1
if [ $? -ne 0 ]; then
    RET=1
fi

# Check log is streaming to console by default
console_count=($(wc -l ./server.log))
if [ $console_count -le 30 ]; then
    echo -e "\n***\n*** Test Failed: Log File Error\n***"
    RET=1
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

# Test Log File (Argument)
SERVER_ARGS="--log-file=log_file.log --model-repository=$MODELSDIR"
SERVER_LOG="./inference_server_log_file.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

rm -f ./curl.out
set +e

verify_correct_settings "log_file.log" "true" "true" "true" "0" "default"

$SIMPLE_HTTP_CLIENT >> client_test_log_file.log 2>&1
if [ $? -ne 0 ]; then
    RET=1
fi

$SIMPLE_GRPC_CLIENT >> client_test_log_file.log 2>&1
if [ $? -ne 0 ]; then
    RET=1
fi
expected_log_count=19
actual_log_count=$(grep -c ^[IWEV][0-9][0-9][0-9][0-9].* ./log_file.log)
if [ $actual_log_count -lt $expected_log_count ]; then
    echo $actual_log_count
    echo $expected_log_count
    echo -e "\n***\n*** Test Failed: Less Log Messages Than Expected $LINENO\n***"
    RET=1
fi
expected_server_count=0
actual_server_count=$(grep -c ^[IWEV][0-9][0-9][0-9][0-9].* inference_server_log_file.log)
if [ $actual_server_count -gt $expected_server_count ]; then
    echo $actual_server_count
    echo $expected_server_count
    echo -e "\n***\n*** Test Failed: More Log Messages Than Expected $LINENO\n***"
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

# Test Log File (Dynamic)
rm -f log_file.log
SERVER_ARGS="--log-file=log_file.log --log-verbose=1 --model-repository=$MODELSDIR"
SERVER_LOG="./inference_server_log_file.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

rm -f ./curl.out
code=`curl -s -w %{http_code} -o ./curl.out -d'{"log_file":"other_log.log"}' localhost:8000/v2/logging`
set +e

# updating log file location no longer supported
if [ `grep -c "\"error\":\"log file location can not be updated through network protocol\"" ./curl.out` != "1" ]; then
    echo -e "\n***\n*** Test Failed: Incorrect Error Response\n***"
    RET=1
fi
verify_correct_settings "log_file.log" "true" "true" "true" "1" "default"

$SIMPLE_HTTP_CLIENT >> client_test_log_file.log 2>&1
if [ $? -ne 0 ]; then
    RET=1
fi

$SIMPLE_GRPC_CLIENT >> client_test_log_file.log 2>&1
if [ $? -ne 0 ]; then
    RET=1
fi

# Check redirection worked properly (server log has tolerance of 40 due to
# unavoidable onnx framework logging)
expected_log_count=75
actual_log_count=$(grep -c ^[IWEV][0-9][0-9][0-9][0-9].* ./log_file.log)
if [ $actual_log_count -lt $expected_log_count ]; then
    echo $actual_log_count
    echo $expected_log_count
    echo -e "\n***\n*** Test Failed: Less Log Messages Than Expected $LINENO\n***"
    RET=1
fi
expected_other_log_count=31
actual_other_log_count=$(grep -c ^[IWEV][0-9][0-9][0-9][0-9].* ./log_file.log)
if [ $actual_other_log_count -lt $expected_other_log_count ]; then
    echo $actual_other_log_count
    echo $expected_other_log_count
    echo -e "\n***\n*** Test Failed: Less Log Messages Than Expected $LINENO\n***"
    RET=1
fi
expected_server_count=0
actual_server_count=$(grep -c ^[IWEV][0-9][0-9][0-9][0-9].* inference_server_log_file.log)
if [ $actual_server_count -gt $expected_server_count ]; then
    echo $actual_server_count
    echo $expected_server_count
    echo -e "\n***\n*** Test Failed: More Log Messages Than Expected $LINENO\n***"
    RET=1
fi

set -e
kill $SERVER_PID
wait $SERVER_PID

# Test Log Info (Argument)
rm -f log_file.log
SERVER_ARGS="--log-file=log_file.log --log-info=false --log-verbose=1 --model-repository=$MODELSDIR"
SERVER_LOG="./inference_server_log_file.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

rm -f ./curl.out
set +e
code=`curl -s -w %{http_code} -o ./curl.out localhost:8000/v2/logging`

verify_correct_settings "log_file.log" "false" "true" "true" "1" "default"

$SIMPLE_HTTP_CLIENT >> client_test_log_info.log 2>&1
if [ $? -ne 0 ]; then
    RET=1
fi

$SIMPLE_GRPC_CLIENT >> client_test_log_info.log 2>&1
if [ $? -ne 0 ]; then
    RET=1
fi

# Test against guaranteed info message
count=$(grep -c "Started HTTPService at" ./log_file.log)
if [ $count -gt 0 ]; then
    echo -e "\n***\n*** Test Failed: Info Message Not Expected $LINENO\n***"
    RET=1
fi

set -e

# Test Log Info (Dynamic)
set +e
rm -f ./curl.out
code=`curl -s -w %{http_code} -o ./curl.out -d'{"log_info":true}' localhost:8000/v2/logging`

verify_correct_settings "log_file.log" "true" "true" "true" "1" "default"

$SIMPLE_HTTP_CLIENT >> client_test_log_info.log 2>&1
if [ $? -ne 0 ]; then
    RET=1
fi

$SIMPLE_GRPC_CLIENT >> client_test_log_info.log 2>&1
if [ $? -ne 0 ]; then
    RET=1
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

set +e
# Test against guaranteed info message
count=$(grep -c "Waiting for in-flight requests to complete" ./log_file.log)
if [ $count -ne 1 ]; then
    echo -e "\n***\n*** Test Failed: Info Message Expected $LINENO\n***"
    RET=1
fi
set -e

# Test Log Warning
SERVER_ARGS="--log-warning=false --model-repository=$MODELSDIR"
SERVER_LOG="./inference_server_log_file.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

rm -f ./curl.out
set +e
code=`curl -s -w %{http_code} -o ./curl.out localhost:8000/v2/logging`

verify_correct_settings "" "true" "false" "true" "0" "default"

$SIMPLE_HTTP_CLIENT >> client_test_log_warning.log 2>&1
if [ $? -ne 0 ]; then
    RET=1
fi

$SIMPLE_GRPC_CLIENT >> client_test_log_warning.log 2>&1
if [ $? -ne 0 ]; then
    RET=1
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

# Test Log Error
SERVER_ARGS="--log-error=false --model-repository=$MODELSDIR"
SERVER_LOG="./inference_server_log_file.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

rm -f ./curl.out
set +e
code=`curl -s -w %{http_code} -o ./curl.out localhost:8000/v2/logging`

# Check if the current settings are returned [ file | info | warn | error | verbosity |format ]
verify_correct_settings "" "true" "true" "false" "0" "default"

$SIMPLE_HTTP_CLIENT >> client_test_log_error.log 2>&1
if [ $? -ne 0 ]; then
    RET=1
fi

$SIMPLE_GRPC_CLIENT >> client_test_log_error.log 2>&1
if [ $? -ne 0 ]; then
    RET=1
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

# Test Log Verbose Level (Argument)
rm -f log_file.log
SERVER_ARGS="--log-file=log_file.log --log-verbose=1 --model-repository=$MODELSDIR"
SERVER_LOG="./inference_server_log_file.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

rm -f ./curl.out
set +e
code=`curl -s -w %{http_code} -o ./curl.out localhost:8000/v2/logging`

verify_correct_settings "log_file.log" "true" "true" "true" "1" "default"

$SIMPLE_HTTP_CLIENT >> client_test_log_verbose.log 2>&1
if [ $? -ne 0 ]; then
    RET=1
fi

$SIMPLE_GRPC_CLIENT >> client_test_log_verbose.log 2>&1
if [ $? -ne 0 ]; then
    RET=1
fi

count=$(grep -c "/v2/logging" ./log_file.log)
if [ $count -ne 2 ]; then
    echo -e "\n***\n*** Test Failed: Verbose Message Expected $LINENO\n***"
    RET=1
fi

code=`curl -s -w %{http_code} -o ./curl.out -d'{"log_verbose_level":0}' localhost:8000/v2/logging`
verify_correct_settings "log_file.log" "true" "true" "true" "0" "default"

code=`curl -s -w %{http_code} -o ./curl.out localhost:8000/v2/logging`
count=$(grep -c "/v2/logging" ./log_file.log)
if [ $count -gt 3 ]; then
    echo -e "\n***\n*** Test Failed: Too Many Verbose Messages $LINENO\n***"
    RET=1
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

# Test Log Format (Argument)
rm -f log_file.log
SERVER_ARGS="--log-file=log_file.log --log-verbose=1 --log-format=ISO8601 --model-repository=$MODELSDIR"
SERVER_LOG="./inference_server_log_file.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

rm -f ./curl.out
set +e
code=`curl -s -w %{http_code} -o ./curl.out localhost:8000/v2/logging`
verify_correct_settings "log_file.log" "true" "true" "true" "1" "ISO8601"

$SIMPLE_HTTP_CLIENT >> client_test_log_format.log 2>&1
if [ $? -ne 0 ]; then
    RET=1
fi

$SIMPLE_GRPC_CLIENT >> client_test_log_format.log 2>&1
if [ $? -ne 0 ]; then
    RET=1
fi

line=$(head -n 1 log_file.log)
date=$(date '+%m%d')
final_date="I${date}"
format_date=$(echo $line | head -n1 | awk '{print $1;}')
if [[ $final_date == $format_date ]]; then
    echo -e "\n***\n*** Test Failed: Unexpected Log Format $LINENO\n***"
    RET=1
fi

set -e

# Test Log Format (Dynamic)
set +e
rm -f ./curl.out
code=`curl -s -w %{http_code} -o ./curl.out -d'{"log_format":"default"}' localhost:8000/v2/logging`
verify_correct_settings "log_file.log" "true" "true" "true" "1" "default"

line=$(tail -n 1 log_file.log)
date=$(date '+%m%d')
final_date="I${date}"
format_date=$(echo $line | head -n1 | awk '{print $1;}')
if [[ $final_date != $format_date ]]; then
    echo -e "\n***\n*** Test Failed: Unexpected Log Format $LINENO\n***"
    RET=1
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

# Test Negative Test Cases
SERVER_ARGS="--log-warn="false" --model-repository=$MODELSDIR"
SERVER_LOG="./server.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

BOOL_PARAMS=${BOOL_PARAMS:="log_info log_warning log_error"}
for BOOL_PARAM in $BOOL_PARAMS; do
    # Attempt to use integer instead of bool
    code=`curl -s -w %{http_code} -o ./curl.out -d'{"'"$BOOL_PARAM"'":1}' localhost:8000/v2/logging`
    if [ "$code" == "200" ]; then
        echo $code
        cat ./curl.out
        echo -e "\n***\n*** Test Failed: Line: $LINENO\n***"
        RET=1
    fi
    # Attempt to use upper-case bool
    code=`curl -s -w %{http_code} -o ./curl.out -d'{"'"$BOOL_PARAM"'":False}' localhost:8000/v2/logging`
    if [ "$code" == "200" ]; then
        cat ./curl.out
        echo -e "\n***\n*** Test Failed: Line: $LINENO\n***"
        RET=1
    fi
    # Attempt to use string bool
    code=`curl -s -w %{http_code} -o ./curl.out -d'{"'"$BOOL_PARAM"'":"false"}' localhost:8000/v2/logging`
    if [ "$code" == "200" ]; then
        echo $code
        cat ./curl.out
        echo -e "\n***\n*** Test Failed: Line: $LINENO\n***"
        RET=1
    fi
    # Positive test case
    code=`curl -s -w %{http_code} -o ./curl.out -d'{"'"$BOOL_PARAM"'":true}' localhost:8000/v2/logging`
    if [ "$code" != "200" ]; then
        cat ./curl.out
        echo -e "\n***\n*** Test Failed: Line: $LINENO\n***"
        RET=1
    fi
done

code=`curl -s -w %{http_code} -o ./curl.out -d'{"log_verbose_level":-1}' localhost:8000/v2/logging`
if [ "$code" == "200" ]; then
    echo $code
    cat ./curl.out
    echo -e "\n***\n*** Test Failed: Line: $LINENO\n***"
    RET=1
fi
code=`curl -s -w %{http_code} -o ./curl.out -d'{"log_verbose_level":"1"}' localhost:8000/v2/logging`
if [ "$code" == "200" ]; then
    echo $code
    cat ./curl.out
    echo -e "\n***\n*** Test Failed: Line: $LINENO\n***"
    RET=1
fi
code=`curl -s -w %{http_code} -o ./curl.out -d'{"log_verbose_level":0}' localhost:8000/v2/logging`
if [ "$code" != "200" ]; then
    echo $code
    cat ./curl.out
    echo -e "\n***\n*** Test Failed: Line: $LINENO\n***"
    RET=1
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

# Test Python client library
SERVER_ARGS="--model-repository=$MODELSDIR"
SERVER_LOG="./inference_server_unittest.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

python $CLIENT_TEST >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    RET=1
else
    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

set +e

FORMAT_TEST_LOG="./log_format_test.log"

python3 -m pytest --junitxml=log_format_test.xml log_format_test.py > $FORMAT_TEST_LOG 2>&1

if [ $? -ne 0 ]; then
    cat $FORMAT_TEST_LOG
    echo -e "\n***\n*** Log Format Test Failed\n***"
    RET=1
fi

set -e

# Test Log Output Stream
# Set up an invalid model with a leading zero in the version number. This will print warning and error logs.
MODELSDIR_INVALID=`pwd`/log_models_invalid
rm -rf $MODELSDIR_INVALID && \
  cp -r $MODELSDIR $MODELSDIR_INVALID && \
  mv $MODELSDIR_INVALID/simple/1 $MODELSDIR_INVALID/simple/01

rm -f log_file.log
LOG_REGEX="(?P<month>\d{2})(?P<day>\d{2}) (?P<timestamp>\d{2}:\d{2}:\d{2}\.\d{6}) (?P<pid>\d+) (?P<file>[\w\.]+):(?P<line>\d+)] (?P<message>.*)"
SERVER_ARGS="--log-verbose=1 --model-repository=$MODELSDIR_INVALID"
SERVER_LOG="./inference_server_log_file.log"
SERVER_ERROR_LOG="./inference_server_error_log_file.log"
run_server
if [ "$SERVER_PID" != "0" ]; then
    echo -e "*** FAILED: unexpected success starting $SERVER" >> $CLIENT_LOG
    cat $SERVER_LOG
    kill_server
    exit 1
fi

set +e
# Only INFO logs in SERVER_LOG
if [ `grep -c -P "I$LOG_REGEX" $SERVER_LOG` == "0" ]; then
  echo -e "\n***\n*** Test Failed: INFO logs are not written to $SERVER_LOG\n***"
  RET=1
fi
if [ `grep -c -P "(W|E)$LOG_REGEX" $SERVER_LOG` != "0" ]; then
  echo -e "\n***\n*** Test Failed: WARNING/ERROR logs are written to $SERVER_LOG\n***"
  RET=1
fi
# Only WARNING and ERROR logs in SERVER_ERROR_LOG
if [ `grep -c -P "I$LOG_REGEX" $SERVER_ERROR_LOG` != "0" ]; then
  echo -e "\n***\n*** Test Failed: INFO logs are written to $SERVER_ERROR_LOG\n***"
  RET=1
fi
if [ `grep -c -P "W$LOG_REGEX" $SERVER_ERROR_LOG` == "0" ]; then
  echo -e "\n***\n*** Test Failed: ERROR logs are not written to $SERVER_ERROR_LOG\n***"
  RET=1
fi
if [ `grep -c -P "E$LOG_REGEX" $SERVER_ERROR_LOG` == "0" ]; then
  echo -e "\n***\n*** Test Failed: ERROR logs are not written to $SERVER_ERROR_LOG\n***"
  RET=1
fi

unset $SERVER_ERROR_LOG
set -e

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test Failed\n***"
fi


exit $RET


================================================
FILE: qa/L0_long_running_stress/crashing_client.py
================================================
#!/usr/bin/env python3

# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import argparse
import time
from multiprocessing import Process, shared_memory

import numpy as np
import test_util as tu
import tritonclient.grpc as grpcclient
from tritonclient.utils import np_to_triton_dtype


def crashing_client(
    model_name, dtype, tensor_shape, shm_name, triton_client, input_name="INPUT0"
):
    in0 = np.random.random(tensor_shape).astype(dtype)
    if "libtorch" in model_name:
        input_name = "INPUT__0"
    inputs = [
        grpcclient.InferInput(input_name, tensor_shape, np_to_triton_dtype(dtype)),
    ]
    inputs[0].set_data_from_numpy(in0)

    # Run in a loop so that it is guaranteed that
    # the inference will not have completed when being terminated.
    while True:
        existing_shm = shared_memory.SharedMemory(shm_name)
        count = np.ndarray((1,), dtype=np.int32, buffer=existing_shm.buf)
        count[0] += 1
        existing_shm.close()
        results = triton_client.infer(model_name, inputs)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-t",
        "--trial",
        type=str,
        required=True,
        help="Set trial for the crashing client",
    )
    FLAGS = parser.parse_args()
    trial = FLAGS.trial

    dtype = np.float32
    model_name = tu.get_zero_model_name(trial, 1, dtype)
    tensor_shape = (1,) if "nobatch" in trial else (1, 1)

    triton_client = grpcclient.InferenceServerClient(url="localhost:8001", verbose=True)

    shm = shared_memory.SharedMemory(create=True, size=8)
    count = np.ndarray((1,), dtype=np.int32, buffer=shm.buf)
    count[0] = 0

    p = Process(
        target=crashing_client,
        name="crashing_client",
        args=(
            model_name,
            dtype,
            tensor_shape,
            shm.name,
            triton_client,
        ),
    )

    p.start()

    # Terminate the client after 3 seconds
    time.sleep(3)
    p.terminate()

    # Cleanup
    p.join()

    print("request_count:", count[0])

    shm.close()
    shm.unlink()

    if not triton_client.is_server_live():
        sys.exit(1)

    sys.exit(0)


================================================
FILE: qa/L0_long_running_stress/scenarios.py
================================================
#!/usr/bin/env python3

# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import math
import sys

sys.path.append("../common")

import math
import os
import subprocess
import threading
import time

import numpy as np
import test_util as tu
import tritonclient.grpc as grpcclient
from PIL import Image
from tritonclient.utils import np_to_triton_dtype

if sys.version_info >= (3, 0):
    import queue
else:
    import Queue as queue

import abc
import csv
import json
import re
from functools import partial

DEFAULT_TIMEOUT_MS = 25000
SEQUENCE_LENGTH_MEAN = 16
SEQUENCE_LENGTH_STDEV = 8


class TimeoutException(Exception):
    pass


# Callback function used for async_stream_infer()
def completion_callback(user_data, result, error):
    # passing error raise and handling out
    user_data._completed_requests.put((result, error))


class Scenario(metaclass=abc.ABCMeta):
    def __init__(self, name, trials, verbose=False, out_stream=sys.stdout):
        self.name_ = name
        self.trials_ = trials
        self.verbose_ = verbose
        self.out_stream_ = out_stream

    def scenario_name(self):
        return type(self).__name__

    def get_trial(self):
        return np.random.choice(self.trials_)

    def get_datatype(self, trial):
        # Get the datatype to use based on what models are available (see test.sh)
        if "plan" in trial:
            return np.float32
        return np.int32

    # FIXME do we need client meta data?
    # Run the scenario and return the number of requests sent on success.
    # Exception should be raised on failure, and None should be returned if
    # the scenario is not run (i.e. due to unsatisfied constraints)
    @abc.abstractmethod
    def run(self, client_metadata):
        pass


class PerfAnalyzerScenario(Scenario):
    # Some class static variables
    command_ = "perf_analyzer"
    generation_mutex_ = threading.Lock()

    class ModelOption:
        # 'concurrency_range' is a 3 element tuple/list that specifies
        # (min_concurrency, max_concurrency, current_concurrency) to limit the
        # allowed range of concurrency
        #
        # 'queue_latency_range_us' specifies the range where queue latency
        # reported should be, otherwise, model concurrency will be adjusted
        # within 'concurrency_range' to influence the queue latency.
        def __init__(
            self,
            model_name,
            batch_size,
            concurrency_range,
            queue_latency_range_us,
            input_shapes=[],
            input_file=None,
        ):
            self.model_name_ = model_name
            self.concurrency_range_ = list(concurrency_range)
            self.batch_size_ = batch_size
            self.input_shapes_ = input_shapes
            self.queue_latency_range_us_ = queue_latency_range_us
            self.input_file_ = input_file

        def run(self, name, sequence_id_range, out_stream):
            csv_file = os.path.join(
                "csv_dir",
                "{}_{}_{}.csv".format(
                    name, self.model_name_, self.concurrency_range_[2]
                ),
            )

            arg_list = [PerfAnalyzerScenario.command_]
            # Always use GRPC streaming feature to ensure requests are handled
            # in order
            arg_list += ["-i", "grpc", "--streaming"]
            arg_list += ["-m", "{}".format(self.model_name_)]
            arg_list += ["-b", "{}".format(self.batch_size_)]
            arg_list += [
                "--concurrency-range",
                "{}:{}:1".format(
                    self.concurrency_range_[2], self.concurrency_range_[2]
                ),
            ]
            arg_list += ["-f", csv_file]
            for name, shape in self.input_shapes_:
                arg_list += ["--shape", "{}:{}".format(name, shape)]
            if self.input_file_ is not None:
                arg_list += ["--input-data", self.input_file_]
            if sequence_id_range is not None:
                arg_list += [
                    "--sequence-id-range",
                    "{}:{}".format(sequence_id_range[0], sequence_id_range[1]),
                ]

            completed_process = subprocess.run(
                arg_list, text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
            )
            # Write output to file before checking return code
            print(completed_process.stdout, file=out_stream)
            completed_process.check_returncode()

            # Read queue time and adjust concurrency
            with open(csv_file, newline="") as csvfile:
                reader = csv.DictReader(csvfile)
                for row in reader:
                    current_queue_us = int(row["Server Queue"])
                    if current_queue_us < self.queue_latency_range_us_[0]:
                        self.concurrency_range_[2] = min(
                            self.concurrency_range_[2] + 1, self.concurrency_range_[1]
                        )
                    elif current_queue_us > self.queue_latency_range_us_[0]:
                        self.concurrency_range_[2] = max(
                            self.concurrency_range_[2] - 1, self.concurrency_range_[0]
                        )
                    break
            m = re.search(r"Request count: ([0-9]+)", completed_process.stdout)
            return int(m.group(1))

    def __init__(
        self,
        name,
        rng,
        sequence_trials,
        identity_trials,
        queue_latency_range_us=(10000, 100000),
        sequence_id_range=None,
        verbose=False,
        out_stream=sys.stdout,
    ):
        super().__init__(name, [], verbose, out_stream)
        self.rng_ = rng
        self.sequence_id_range_ = sequence_id_range
        # List of tuples
        # (model_name, max_concurrency, batch_size, list(more PA options),
        #  real_data_file),
        self.options_ = []

        # Add no validation models
        self.options_.append(
            PerfAnalyzerScenario.ModelOption(
                "resnet_v1_50", 32, (1, 4, 1), queue_latency_range_us
            )
        )
        for trial in sequence_trials:
            dtype = self.get_datatype(trial)
            # Skip string sequence model for now, it is hard for PA to generate
            # valid input
            if dtype == np.dtype(object):
                continue
            model_name = tu.get_sequence_model_name(trial, dtype)
            self.options_.append(
                PerfAnalyzerScenario.ModelOption(
                    model_name, 1, (1, 4, 1), queue_latency_range_us
                )
            )
        for trial in identity_trials:
            dtype = np.float32
            model_name = tu.get_zero_model_name(trial, 1, dtype)
            if "libtorch" in trial:
                input_shapes = [("INPUT__0", "16")]
            else:
                input_shapes = [("INPUT0", "16")]
            self.options_.append(
                PerfAnalyzerScenario.ModelOption(
                    model_name, 1, (1, 4, 1), queue_latency_range_us, input_shapes
                )
            )

        # Add output validation version of the models
        # Skip resnet as the output data has variation which makes exact
        # matching hard
        for trial in sequence_trials:
            dtype = self.get_datatype(trial)
            model_name = tu.get_sequence_model_name(trial, dtype)
            data_file = os.path.join("validation_data", "{}.json".format(model_name))
            self.generate_sequence_data(trial, dtype, data_file)
            self.options_.append(
                PerfAnalyzerScenario.ModelOption(
                    model_name,
                    1,
                    (1, 4, 1),
                    queue_latency_range_us,
                    input_file=data_file,
                )
            )
        for trial in identity_trials:
            dtype = np.float32
            model_name = tu.get_zero_model_name(trial, 1, dtype)
            data_file = os.path.join("validation_data", "{}.json".format(model_name))
            self.generate_identity_data(trial, dtype, data_file)
            self.options_.append(
                PerfAnalyzerScenario.ModelOption(
                    model_name,
                    1,
                    (1, 4, 1),
                    queue_latency_range_us,
                    input_file=data_file,
                )
            )

    def generate_sequence_data(self, trial, dtype, data_filename):
        input0 = "INPUT" if "libtorch" not in trial else "INPUT__0"
        input_data = []
        for i in range(3):
            if dtype == np.float32:
                res = float(i)
            elif dtype == np.int32:
                res = i
            elif dtype == np.dtype(object):
                res = str(i)
            else:
                raise Exception("unexpected sequence data type {}".format(dtype))
            input_data.append({input0: [res]})
        output0 = "OUTPUT" if "libtorch" not in trial else "OUTPUT__0"
        output_data = []
        for i in range(3):
            res = 1 if i == 0 else i
            if dtype == np.float32:
                res = float(res)
            elif dtype == np.int32:
                res = int(res)
            elif dtype == np.dtype(object):
                res = str(res)
            else:
                raise Exception("unexpected sequence data type {}".format(dtype))
            output_data.append(
                {output0: [res if dtype != np.dtype(object) else str(res)]}
            )
        data = {"data": [input_data]}
        data["validation_data"] = [output_data]

        # Only write to a file if there isn't validation file for the model
        PerfAnalyzerScenario.generation_mutex_.acquire()
        if not os.path.exists(data_filename):
            with open(data_filename, "w") as f:
                json.dump(data, f)
        PerfAnalyzerScenario.generation_mutex_.release()

    def generate_identity_data(self, trial, dtype, data_filename):
        input0 = "INPUT0" if "libtorch" not in trial else "INPUT__0"
        output0 = "OUTPUT0" if "libtorch" not in trial else "OUTPUT__0"
        io_data = []
        for i in range(16):
            if dtype == np.float32:
                res = float(i)
            elif dtype == np.int32:
                res = i
            elif dtype == np.dtype(object):
                res = str(i)
            else:
                raise Exception("unexpected identity data type {}".format(dtype))
            io_data.append(res)
        data = {
            "data": [{input0: {"content": io_data, "shape": [16]}}],
            "validation_data": [{output0: {"content": io_data, "shape": [16]}}],
        }
        # Only write to a file if there isn't validation file for the model
        PerfAnalyzerScenario.generation_mutex_.acquire()
        if not os.path.exists(data_filename):
            with open(data_filename, "w") as f:
                json.dump(data, f)
        PerfAnalyzerScenario.generation_mutex_.release()

    def run(self, client_metadata):
        model_option = np.random.choice(self.options_)
        return model_option.run(self.name_, self.sequence_id_range_, self.out_stream_)


class ResNetScenario(Scenario):
    def __init__(self, name, batch_size=32, verbose=False, out_stream=sys.stdout):
        super().__init__(name, [], verbose, out_stream)
        self.model_name_ = "resnet_v1_50"
        self.batch_size_ = batch_size

        img = self.preprocess("../images/vulture.jpeg")
        batched_img = []
        for i in range(batch_size):
            batched_img.append(img)
        self.image_data_ = np.stack(batched_img, axis=0)

    def preprocess(self, filename):
        img = Image.open(filename)
        resized_img = img.convert("RGB").resize((224, 224), Image.BILINEAR)
        np_img = np.array(resized_img).astype(np.float32)
        if np_img.ndim == 2:
            np_img = np_img[:, :, np.newaxis]
        scaled = np_img - np.asarray((123, 117, 104), dtype=np.float32)
        return scaled

    def postprocess(self, results):
        output_array = results.as_numpy("resnet_v1_50/predictions/Softmax:0")
        if len(output_array) != self.batch_size_:
            raise Exception(
                "expected {} results, got {}".format(
                    self.batch_size_, len(output_array)
                )
            )

        for results in output_array:
            for result in results:
                if output_array.dtype.type == np.object_:
                    cls = "".join(chr(x) for x in result).split(":")
                else:
                    cls = result.split(":")
                if cls[2] != "VULTURE":
                    raise Exception(
                        "expected VULTURE as classification result, got {}".format(
                            cls[2]
                        )
                    )

    def run(self, client_metadata):
        triton_client = client_metadata[0]

        inputs = [grpcclient.InferInput("input:0", self.image_data_.shape, "FP32")]
        inputs[0].set_data_from_numpy(self.image_data_)

        outputs = [
            grpcclient.InferRequestedOutput(
                "resnet_v1_50/predictions/Softmax:0", class_count=1
            )
        ]
        res = triton_client.infer(self.model_name_, inputs, outputs=outputs)
        self.postprocess(res)
        return self.batch_size_


class TimeoutScenario(Scenario):
    def __init__(
        self,
        name,
        trials,
        input_dtype=np.float32,
        input_name="INPUT0",
        verbose=False,
        out_stream=sys.stdout,
    ):
        super().__init__(name, trials, verbose, out_stream)
        self.input_dtype_ = input_dtype
        self.input_name_ = input_name

    def run(self, client_metadata):
        trial = self.get_trial()
        model_name = tu.get_zero_model_name(trial, 1, self.input_dtype_)
        triton_client = client_metadata[0]
        input_name = self.input_name_
        if "librotch" in trial:
            input_name = "INPUT__0"

        tensor_shape = (
            math.trunc(
                1 * (1024 * 1024 * 1024) // np.dtype(self.input_dtype_).itemsize
            ),
        )
        in0 = np.random.random(tensor_shape).astype(self.input_dtype_)
        inputs = [
            grpcclient.InferInput(
                input_name, tensor_shape, np_to_triton_dtype(self.input_dtype_)
            ),
        ]
        inputs[0].set_data_from_numpy(in0)

        # Expect an exception for small timeout values.
        try:
            triton_client.infer(model_name, inputs, client_timeout=0.1)
            assert False, "expected inference failure from deadline exceeded"
        except Exception as ex:
            if "Deadline Exceeded" not in ex.message():
                assert False, "timeout_client failed {}".format(self.name_)
            # Expect timeout error as success case
            return 1


class CrashingScenario(Scenario):
    def __init__(self, name, verbose=False, out_stream=sys.stdout):
        super().__init__(name, [], verbose, out_stream)

    def run(self, client_metadata):
        # Only use "custom" model as it simulates execution delay which
        # simplifies "crashing simulation" (client exits while request is being
        # executed)
        trial = "custom"

        # Call the client as subprocess to avoid crashing stress test
        # and gather logging as string variable
        crashing_client = "crashing_client.py"
        log = subprocess.check_output([sys.executable, crashing_client, "-t", trial])
        result = self.parse_result(log.decode("utf-8"))
        if not result[1]:
            assert False, "crashing_client failed {}".format(self.name_)

        return int(result[0])

    def parse_result(self, log):
        # Get result from the log
        request_count = 0
        is_server_live = "false"

        if "request_count:" in log:
            idx_start = log.rindex("request_count:")
            idx_start = log.find(" ", idx_start)
            idx_end = log.find("\n", idx_start)
            request_count = int(log[idx_start + 1 : idx_end])

        if "live:" in log:
            idx_start = log.rindex("live:")
            idx_start = log.find(" ", idx_start)
            idx_end = log.find("\n", idx_start)
            is_server_live = log[idx_start + 1 : idx_end]

        return (request_count, is_server_live == "true")


class SequenceScenario(Scenario):
    class UserData:
        def __init__(self):
            self._completed_requests = queue.Queue()

    # For sequence requests, the state of previous sequence that share the same
    # sequence id will affect the current sequence, so must check if the
    # constraints are satisfied for the scenario
    @abc.abstractmethod
    def check_constraints(self, model_name, sequence_id):
        pass

    def __init__(
        self,
        name,
        trials,
        rng,
        sequence_constraints,
        verbose=False,
        out_stream=sys.stdout,
    ):
        super().__init__(name, trials, verbose, out_stream)
        self.rng_ = rng
        self.sequence_constraints_ = sequence_constraints

    def get_expected_result(self, expected_result, value, trial, flag_str=None):
        # Adjust the expected_result for models that
        # could not implement the full accumulator. See
        # qa/common/gen_qa_sequence_models.py for more
        # information.
        if (
            ("nobatch" not in trial and ("custom" not in trial))
            or ("plan" in trial)
            or ("onnx" in trial)
        ) or ("libtorch" in trial):
            expected_result = value
            if (flag_str is not None) and ("start" in flag_str):
                expected_result += 1
        return expected_result

    def check_sequence_async(
        self,
        client_metadata,
        trial,
        model_name,
        input_dtype,
        steps,
        timeout_ms=DEFAULT_TIMEOUT_MS,
        batch_size=1,
        sequence_name="<unknown>",
        tensor_shape=(1,),
        input_name="INPUT",
        output_name="OUTPUT",
    ):
        """Perform sequence of inferences using async run. The 'steps' holds
        a list of tuples, one for each inference with format:

        (flag_str, value, expected_result, delay_ms)

        """
        if (
            ("custom" not in trial)
            and ("onnx" not in trial)
            and ("libtorch" not in trial)
            and ("plan" not in trial)
        ):
            assert False, "unknown trial type: " + trial

        if "nobatch" not in trial:
            tensor_shape = (batch_size,) + tensor_shape
        if "libtorch" in trial:
            input_name = "INPUT__0"
            output_name = "OUTPUT__0"

        triton_client = client_metadata[0]
        sequence_id = client_metadata[1]

        # Execute the sequence of inference...
        seq_start_ms = int(round(time.time() * 1000))
        user_data = SequenceScenario.UserData()
        # Ensure there is no running stream
        triton_client.stop_stream()
        triton_client.start_stream(partial(completion_callback, user_data))

        sent_count = 0
        for flag_str, value, _, delay_ms in steps:
            seq_start = False
            seq_end = False
            if flag_str is not None:
                seq_start = "start" in flag_str
                seq_end = "end" in flag_str

            if input_dtype == np.object_:
                in0 = np.full(tensor_shape, value, dtype=np.int32)
                in0n = np.array([str(x) for x in in0.reshape(in0.size)], dtype=object)
                in0 = in0n.reshape(tensor_shape)
            else:
                in0 = np.full(tensor_shape, value, dtype=input_dtype)

            inputs = [
                grpcclient.InferInput(
                    input_name, tensor_shape, np_to_triton_dtype(input_dtype)
                ),
            ]
            inputs[0].set_data_from_numpy(in0)

            triton_client.async_stream_infer(
                model_name,
                inputs,
                sequence_id=sequence_id,
                sequence_start=seq_start,
                sequence_end=seq_end,
            )
            sent_count += 1

            if delay_ms is not None:
                time.sleep(delay_ms / 1000.0)

        # Process the results in order that they were sent
        result = None
        processed_count = 0
        while processed_count < sent_count:
            (results, error) = user_data._completed_requests.get()
            if error is not None:
                raise error

            (_, value, expected, _) = steps[processed_count]
            processed_count += 1
            if timeout_ms != None:
                now_ms = int(round(time.time() * 1000))
                if (now_ms - seq_start_ms) > timeout_ms:
                    raise TimeoutException(
                        "Timeout expired for {}, got {} ms".format(
                            sequence_name, (now_ms - seq_start_ms)
                        )
                    )

            result = (
                results.as_numpy(output_name)[0]
                if "nobatch" in trial
                else results.as_numpy(output_name)[0][0]
            )
            if self.verbose_:
                print(
                    "{} {}: + {} = {}".format(
                        sequence_name, sequence_id, value, result
                    ),
                    file=self.out_stream_,
                )

            if expected is not None:
                if input_dtype == np.object_:
                    assert (
                        int(result) == expected
                    ), "{}: expected result {}, got {} {} {}".format(
                        sequence_name, expected, int(result), trial, model_name
                    )
                else:
                    assert (
                        result == expected
                    ), "{}: expected result {}, got {} {} {}".format(
                        sequence_name, expected, result, trial, model_name
                    )
        triton_client.stop_stream()
        return sent_count


class SequenceNoEndScenario(SequenceScenario):
    def __init__(
        self,
        name,
        trials,
        rng,
        sequence_constraints,
        verbose=False,
        out_stream=sys.stdout,
    ):
        super().__init__(name, trials, rng, sequence_constraints, verbose, out_stream)

    def check_constraints(self, model_name, sequence_id):
        # The scenario can always be run regardless of the previous runs
        return True

    def run(
        self,
        client_metadata,
        len_mean=SEQUENCE_LENGTH_MEAN,
        len_stddev=SEQUENCE_LENGTH_STDEV,
    ):
        trial = self.get_trial()
        dtype = self.get_datatype(trial)
        model_name = tu.get_sequence_model_name(trial, dtype)
        if not self.check_constraints(model_name, client_metadata[1]):
            return None

        # Track that the sequence id of the model is used for no-end sequence
        if not model_name in self.sequence_constraints_:
            self.sequence_constraints_[model_name] = {}
        self.sequence_constraints_[model_name][client_metadata[1]] = True

        # Create a variable length sequence with "start" flag but that
        # never ends. The sequence should be aborted by the server and its
        # slot reused for another sequence.
        seqlen = max(1, int(self.rng_.normal(len_mean, len_stddev)))
        print(
            "{} {}: no-end seqlen = {}".format(self.name_, client_metadata[1], seqlen),
            file=self.out_stream_,
        )

        values = self.rng_.randint(0, 1024 * 1024, size=seqlen).astype(dtype)

        steps = []
        expected_result = 0

        for idx, _ in enumerate(range(seqlen)):
            flags = ""
            if idx == 0:
                flags = "start"

            val = values[idx]
            delay_ms = None
            expected_result += val
            expected_result = self.get_expected_result(
                expected_result, val, trial, flags
            )

            # (flag_str, value, expected_result, delay_ms)
            steps.append(
                (flags, val, expected_result, delay_ms),
            )

        return self.check_sequence_async(
            client_metadata, trial, model_name, dtype, steps, sequence_name=self.name_
        )


class SequenceValidNoEndScenario(SequenceScenario):
    def __init__(
        self,
        name,
        trials,
        rng,
        sequence_constraints,
        verbose=False,
        out_stream=sys.stdout,
    ):
        super().__init__(name, trials, rng, sequence_constraints, verbose, out_stream)

    def check_constraints(self, model_name, sequence_id):
        # The scenario can always be run regardless of the previous runs
        return True

    def run(
        self,
        client_metadata,
        len_mean=SEQUENCE_LENGTH_MEAN,
        len_stddev=SEQUENCE_LENGTH_STDEV,
    ):
        trial = self.get_trial()
        dtype = self.get_datatype(trial)
        model_name = tu.get_sequence_model_name(trial, dtype)
        if not self.check_constraints(model_name, client_metadata[1]):
            return None

        # Track that the sequence id of the model is used for no-end sequence
        if not model_name in self.sequence_constraints_:
            self.sequence_constraints_[model_name] = {}
        self.sequence_constraints_[model_name][client_metadata[1]] = True

        # Create two variable length sequences, the first with "start" and
        # "end" flags and the second with no "end" flag, where both
        # sequences use the same correlation ID and are sent back-to-back.
        seqlen = [
            max(1, int(self.rng_.normal(len_mean, len_stddev))),
            max(1, int(self.rng_.normal(len_mean, len_stddev))),
        ]
        print(
            "{} {}: valid-no-end seqlen[0] = {}, seqlen[1] = {}".format(
                self.name_, client_metadata[1], seqlen[0], seqlen[1]
            ),
            file=self.out_stream_,
        )

        values = [
            self.rng_.randint(0, 1024 * 1024, size=seqlen[0]).astype(dtype),
            self.rng_.randint(0, 1024 * 1024, size=seqlen[1]).astype(dtype),
        ]

        for p in [0, 1]:
            steps = []
            expected_result = 0

            for idx, _ in enumerate(range(seqlen[p])):
                flags = ""
                if idx == 0:
                    flags += ",start"
                if (p == 0) and (idx == (seqlen[p] - 1)):
                    flags += ",end"

                val = values[p][idx]
                delay_ms = None
                expected_result += val
                expected_result = self.get_expected_result(
                    expected_result, val, trial, flags
                )

                # (flag_str, value, expected_result, delay_ms)
                steps.append(
                    (flags, val, expected_result, delay_ms),
                )

        return self.check_sequence_async(
            client_metadata, trial, model_name, dtype, steps, sequence_name=self.name_
        )


class SequenceValidValidScenario(SequenceScenario):
    def __init__(
        self,
        name,
        trials,
        rng,
        sequence_constraints,
        verbose=False,
        out_stream=sys.stdout,
    ):
        super().__init__(name, trials, rng, sequence_constraints, verbose, out_stream)

    def check_constraints(self, model_name, sequence_id):
        # The scenario can always be run regardless of the previous runs
        return True

    def run(
        self,
        client_metadata,
        len_mean=SEQUENCE_LENGTH_MEAN,
        len_stddev=SEQUENCE_LENGTH_STDEV,
    ):
        trial = self.get_trial()
        dtype = self.get_datatype(trial)
        model_name = tu.get_sequence_model_name(trial, dtype)
        if not self.check_constraints(model_name, client_metadata[1]):
            return None

        # Track that the sequence id of the model is used for no-end sequence
        if not model_name in self.sequence_constraints_:
            self.sequence_constraints_[model_name] = {}
        self.sequence_constraints_[model_name][client_metadata[1]] = False

        # Create two variable length sequences with "start" and "end"
        # flags, where both sequences use the same correlation ID and are
        # sent back-to-back.
        seqlen = [
            max(1, int(self.rng_.normal(len_mean, len_stddev))),
            max(1, int(self.rng_.normal(len_mean, len_stddev))),
        ]
        print(
            "{} {}: valid-valid seqlen[0] = {}, seqlen[1] = {}".format(
                self.name_, client_metadata[1], seqlen[0], seqlen[1]
            ),
            file=self.out_stream_,
        )

        values = [
            self.rng_.randint(0, 1024 * 1024, size=seqlen[0]).astype(dtype),
            self.rng_.randint(0, 1024 * 1024, size=seqlen[1]).astype(dtype),
        ]

        for p in [0, 1]:
            steps = []
            expected_result = 0

            for idx, _ in enumerate(range(seqlen[p])):
                flags = ""
                if idx == 0:
                    flags += ",start"
                if idx == (seqlen[p] - 1):
                    flags += ",end"

                val = values[p][idx]
                delay_ms = None
                expected_result += val
                expected_result = self.get_expected_result(
                    expected_result, val, trial, flags
                )

                # (flag_str, value, expected_result, delay_ms)
                steps.append(
                    (flags, val, expected_result, delay_ms),
                )

        return self.check_sequence_async(
            client_metadata, trial, model_name, dtype, steps, sequence_name=self.name_
        )


class SequenceNoStartScenario(SequenceScenario):
    def __init__(
        self,
        name,
        trials,
        rng,
        sequence_constraints,
        verbose=False,
        out_stream=sys.stdout,
    ):
        super().__init__(name, trials, rng, sequence_constraints, verbose, out_stream)

    def check_constraints(self, model_name, sequence_id):
        # no-start cannot follow no-end since the server will
        # just assume that the no-start is a continuation of
        # the no-end sequence instead of being a sequence
        # missing start flag.
        if (model_name in self.sequence_constraints_) and (
            sequence_id in self.sequence_constraints_[model_name]
        ):
            return not self.sequence_constraints_[model_name][sequence_id]
        return True

    def run(self, client_metadata):
        trial = self.get_trial()
        dtype = self.get_datatype(trial)
        model_name = tu.get_sequence_model_name(trial, dtype)
        if not self.check_constraints(model_name, client_metadata[1]):
            return None

        # Track that the sequence id of the model is used for no-end sequence
        if not model_name in self.sequence_constraints_:
            self.sequence_constraints_[model_name] = {}
        self.sequence_constraints_[model_name][client_metadata[1]] = False

        # Create a sequence without a "start" flag. Sequence should get an
        # error from the server.
        seqlen = 1
        print(
            "{} {}: no-start seqlen = {}".format(
                self.name_, client_metadata[1], seqlen
            ),
            file=self.out_stream_,
        )

        values = self.rng_.randint(0, 1024 * 1024, size=seqlen).astype(dtype)

        steps = []

        for idx, _ in enumerate(range(seqlen)):
            flags = None
            val = values[idx]
            delay_ms = None

            # (flag_str, value, expected_result, delay_ms)
            steps.append(
                (flags, val, None, delay_ms),
            )

        try:
            self.check_sequence_async(client_metadata, trial, model_name, dtype, steps)
            # Hit this point if sending no-start sequence to sequence id that
            # was used for no-end sequence and that means the constraints check
            # is inaccurate
            assert False, "expected inference failure from missing START flag"
        except Exception as ex:
            if "must specify the START flag" not in ex.message():
                raise
            # Expect no START error as success case
            return seqlen


class SequenceValidScenario(SequenceScenario):
    def __init__(
        self,
        name,
        trials,
        rng,
        sequence_constraints,
        verbose=False,
        out_stream=sys.stdout,
    ):
        super().__init__(name, trials, rng, sequence_constraints, verbose, out_stream)

    def check_constraints(self, model_name, sequence_id):
        # The scenario can always be run regardless of the previous runs
        return True

    def run(
        self,
        client_metadata,
        len_mean=SEQUENCE_LENGTH_MEAN,
        len_stddev=SEQUENCE_LENGTH_STDEV,
    ):
        trial = self.get_trial()
        dtype = self.get_datatype(trial)
        model_name = tu.get_sequence_model_name(trial, dtype)
        if not self.check_constraints(model_name, client_metadata[1]):
            return None

        # Track that the sequence id of the model is used for no-end sequence
        if not model_name in self.sequence_constraints_:
            self.sequence_constraints_[model_name] = {}
        self.sequence_constraints_[model_name][client_metadata[1]] = False

        # Create a variable length sequence with "start" and "end" flags.
        seqlen = max(1, int(self.rng_.normal(len_mean, len_stddev)))
        print(
            "{} {}: valid seqlen = {}".format(self.name_, client_metadata[1], seqlen),
            file=self.out_stream_,
        )

        values = self.rng_.randint(0, 1024 * 1024, size=seqlen).astype(dtype)

        steps = []
        expected_result = 0

        for idx, _ in enumerate(range(seqlen)):
            flags = ""
            if idx == 0:
                flags += ",start"
            if idx == (seqlen - 1):
                flags += ",end"

            val = values[idx]
            delay_ms = None
            expected_result += val
            expected_result = self.get_expected_result(
                expected_result, val, trial, flags
            )

            # (flag_str, value, expected_result, delay_ms)
            steps.append(
                (flags, val, expected_result, delay_ms),
            )

        return self.check_sequence_async(
            client_metadata, trial, model_name, dtype, steps, sequence_name=self.name_
        )


================================================
FILE: qa/L0_long_running_stress/stress.py
================================================
#!/usr/bin/env python3

# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

from scenarios import *

sys.path.append("../common")

import argparse
import bisect
import os
import threading
import time
import traceback
from builtins import range, str
from functools import partial

import numpy as np
import prettytable
import tritonclient.grpc as grpcclient

FLAGS = None
CORRELATION_ID_BLOCK_SIZE = 1024 * 1024
BACKENDS = os.environ.get("BACKENDS", "onnx plan")

_thread_exceptions = []
_thread_exceptions_mutex = threading.Lock()

# List of scenario that failure doesn't contribute to test fail at the momeent.
# Note that all scenario should not have error but some edge cases are hard to
# track down so the investigation is postponed.
ALLOW_FAILURE_SCENARIO = [
    PerfAnalyzerScenario.__name__,
]

STOP_STRESS_THREAD = False


def get_trials(is_sequence=True):
    _trials = ()
    if is_sequence:
        for backend in BACKENDS.split(" "):
            if backend != "libtorch":
                _trials += (backend + "_nobatch",)
            _trials += (backend,)
    else:
        _trials = ()
        for backend in BACKENDS.split(" "):
            if backend != "libtorch":
                _trials += (backend + "_nobatch",)
    return _trials


def update_test_count(
    test_case_count,
    failed_test_case_count,
    request_count,
    test_case_name,
    success=True,
    count=1,
):
    if success:
        # Count the times each test case runs
        if test_case_name in test_case_count:
            test_case_count[test_case_name] += 1
        else:
            test_case_count[test_case_name] = 1

        # Count the number of requests were sent for each test case
        if test_case_name in request_count:
            request_count[test_case_name] += count
        else:
            request_count[test_case_name] = count
    else:
        # Count the times each test case fails
        if test_case_name in failed_test_case_count:
            failed_test_case_count[test_case_name] += 1
        else:
            failed_test_case_count[test_case_name] = 1


class ScenarioSelector:
    def __init__(self, probs, rng):
        self.rng_ = rng
        self.probs_range_ = []
        self.scenarios_ = []

        # probs is a list/dict of scenario weights and types
        total_weight = 0
        for weight, scenario in probs:
            total_weight += weight
            self.scenarios_.append(scenario)
            self.probs_range_.append(float(total_weight))
        # Normalize weight
        for i in range(len(self.probs_range_)):
            self.probs_range_[i] /= total_weight

    def get_scenario(self):
        return self.scenarios_[bisect.bisect_left(self.probs_range_, self.rng_.rand())]


def stress_thread(
    name,
    seed,
    correlation_id_base,
    test_case_count,
    failed_test_case_count,
    sequence_request_count,
):
    # Thread responsible for generating sequences of inference
    # requests.
    global _thread_exceptions

    # Write any thread output to dedicated file
    with open("{}.log".format(name), "w") as out_file:
        print("Starting thread {} with seed {}".format(name, seed), file=out_file)
        rng = np.random.RandomState(seed)

        # FIXME revisit to check if it is necessary
        client_metadata_list = []

        # Must use streaming GRPC context to ensure each sequences'
        # requests are received in order. Create 2 common-use contexts
        # with different correlation IDs that are used for most
        # inference requests. Also create some rare-use contexts that
        # are used to make requests with rarely-used correlation IDs.
        #
        # Need to remember if the last sequence case runs on each model
        # is no-end cases since we don't want some choices to follow others
        # since that gives results not expected. See below for details.
        common_cnt = 2
        rare_cnt = 8
        is_last_used_no_end = {}

        update_counter_fn = partial(
            update_test_count,
            test_case_count,
            failed_test_case_count,
            sequence_request_count,
        )
        for c in range(common_cnt + rare_cnt):
            client_metadata_list.append(
                (
                    grpcclient.InferenceServerClient(
                        "localhost:8001", verbose=FLAGS.verbose
                    ),
                    correlation_id_base + c,
                )
            )
        pa_start_seq_id = correlation_id_base + common_cnt + rare_cnt
        pa_end_seq_id = correlation_id_base + CORRELATION_ID_BLOCK_SIZE

        # Weight roughly in thousandth percent
        ss = ScenarioSelector(
            [
                (
                    60,
                    TimeoutScenario(
                        name,
                        get_trials(False),
                        verbose=FLAGS.verbose,
                        out_stream=out_file,
                    ),
                ),
                (80, ResNetScenario(name, verbose=FLAGS.verbose, out_stream=out_file)),
                (
                    60,
                    CrashingScenario(name, verbose=FLAGS.verbose, out_stream=out_file),
                ),
                (
                    62,
                    SequenceNoEndScenario(
                        name,
                        get_trials(),
                        rng,
                        is_last_used_no_end,
                        verbose=FLAGS.verbose,
                        out_stream=out_file,
                    ),
                ),
                (
                    68,
                    SequenceValidNoEndScenario(
                        name,
                        get_trials(),
                        rng,
                        is_last_used_no_end,
                        verbose=FLAGS.verbose,
                        out_stream=out_file,
                    ),
                ),
                (
                    68,
                    SequenceValidValidScenario(
                        name,
                        get_trials(),
                        rng,
                        is_last_used_no_end,
                        verbose=FLAGS.verbose,
                        out_stream=out_file,
                    ),
                ),
                (
                    7,
                    SequenceNoStartScenario(
                        name,
                        get_trials(),
                        rng,
                        is_last_used_no_end,
                        verbose=FLAGS.verbose,
                        out_stream=out_file,
                    ),
                ),
                (
                    295,
                    SequenceValidScenario(
                        name,
                        get_trials(),
                        rng,
                        is_last_used_no_end,
                        verbose=FLAGS.verbose,
                        out_stream=out_file,
                    ),
                ),
                (
                    300,
                    PerfAnalyzerScenario(
                        name,
                        rng,
                        get_trials(),
                        get_trials(False),
                        sequence_id_range=(pa_start_seq_id, pa_end_seq_id),
                        verbose=FLAGS.verbose,
                        out_stream=out_file,
                    ),
                ),
            ],
            rng,
        )

        rare_idx = 0
        common_idx = 0
        while not STOP_STRESS_THREAD:
            scenario = ss.get_scenario()
            # FIXME generating 'is_rare' for now as some scenario uses it to select
            # client context, but we may not need this if we roll forward the sequence id
            if rng.rand() < 0.1:
                client_idx = common_cnt + rare_idx
                rare_idx = (rare_idx + 1) % rare_cnt
            else:
                client_idx = common_idx
                common_idx = (common_idx + 1) % common_cnt

            try:
                res = scenario.run(client_metadata_list[client_idx])
                if res is not None:
                    update_counter_fn(scenario.scenario_name(), count=res)
            except Exception as ex:
                update_counter_fn(scenario.scenario_name(), False)
                _thread_exceptions_mutex.acquire()
                try:
                    _thread_exceptions.append(
                        (name, scenario.scenario_name(), traceback.format_exc())
                    )
                finally:
                    _thread_exceptions_mutex.release()

        # We need to explicitly close each client so that streams get
        # cleaned up and closed correctly, otherwise the application
        # can hang when exiting.
        for c, i in client_metadata_list:
            print("thread {} closing client {}".format(name, i), file=out_file)
            c.close()

        print("Exiting thread {}".format(name), file=out_file)


def load_thread(
    name,
    seed,
    correlation_id_base,
    test_case_count,
    failed_test_case_count,
    sequence_request_count,
):
    # Thread responsible for generating sequences of inference
    # requests.
    global _thread_exceptions

    # Write any thread output to dedicated file
    with open("{}.log".format(name), "w") as out_file:
        print("Starting thread {} with seed {}".format(name, seed), file=out_file)
        rng = np.random.RandomState(seed)

        update_counter_fn = partial(
            update_test_count,
            test_case_count,
            failed_test_case_count,
            sequence_request_count,
        )
        pa_start_seq_id = correlation_id_base
        pa_end_seq_id = correlation_id_base + CORRELATION_ID_BLOCK_SIZE

        # Create PerfAnalyzerScenario with no additional trial,
        # the default model 'resnet', more compute intense than the simple
        # models, will be the only choice for generating load
        ss = ScenarioSelector(
            [
                (
                    1,
                    PerfAnalyzerScenario(
                        name,
                        rng,
                        [],
                        [],
                        sequence_id_range=(pa_start_seq_id, pa_end_seq_id),
                        verbose=FLAGS.verbose,
                        out_stream=out_file,
                    ),
                ),
            ],
            rng,
        )

        while not STOP_STRESS_THREAD:
            scenario = ss.get_scenario()
            try:
                res = scenario.run(None)
                if res is not None:
                    update_counter_fn(scenario.scenario_name(), count=res)
            except Exception as ex:
                update_counter_fn(scenario.scenario_name(), False)
                _thread_exceptions_mutex.acquire()
                try:
                    _thread_exceptions.append(
                        (name, scenario.scenario_name(), traceback.format_exc())
                    )
                finally:
                    _thread_exceptions_mutex.release()

        print("Exiting thread {}".format(name), file=out_file)


def format_content(content, max_line_length):
    # Accumulated line length
    ACC_length = 0
    words = content.split(" ")
    formatted_content = ""

    for word in words:
        if (ACC_length + (len(word) + 1)) <= max_line_length:
            # Append the word and a space
            formatted_content = formatted_content + word + " "
            ACC_length = ACC_length + len(word) + 1
        else:
            # Append a line break, then the word and a space
            formatted_content = formatted_content + "\n" + word + " "
            # Reset the counter of length
            ACC_length = len(word) + 1
    return formatted_content


def accumulate_count(dict_list, test_case_name):
    count = 0
    for d in dict_list:
        if test_case_name in d:
            count += d[test_case_name]

    return count


def generate_report(
    elapsed_time, _test_case_count, _failed_test_case_count, _sequence_request_count
):
    hrs = elapsed_time // 3600
    mins = (elapsed_time / 60) % 60
    secs = elapsed_time % 60

    test_case_description = {
        "SequenceValidScenario": 'Send a sequence with "start" and "end" flags.',
        "SequenceValidValidScenario": "Send two sequences back to back using the same correlation ID"
        ' with "start" and "end" flags.',
        "SequenceValidNoEndScenario": "Send two sequences back to back using the same correlation ID."
        ' The first with "start" and "end" flags, and the second with no'
        ' "end" flag.',
        "SequenceNoStartScenario": 'Send a sequence without a "start" flag. Sequence should get an'
        " error from the server.",
        "SequenceNoEndScenario": 'Send a sequence with "start" flag but that never ends. The'
        " sequence should be aborted by the server and its slot reused"
        " for another sequence.",
        "TimeoutScenario": "Expect an exception for small timeout values.",
        "ResNetScenario": "Send a request using resnet model.",
        "CrashingScenario": "Client crashes in the middle of inferences.",
        "PerfAnalyzerScenario": "Client that maintains a specific load.",
    }

    f = open("stress_report.txt", "w")
    f.write(
        "Test Duration: {:0>2}:{:0>2}:{:0>2} (HH:MM:SS)\n".format(
            int(hrs), int(mins), int(secs)
        )
    )

    t = prettytable.PrettyTable(hrules=prettytable.ALL)
    t.field_names = [
        "Test Case",
        "Number of Failures",
        "Test Count",
        "Request Count",
        "Test Case Description",
    ]

    t.align["Test Case"] = "l"
    t.align["Number of Failures"] = "l"
    t.align["Test Count"] = "l"
    t.align["Request Count"] = "l"
    t.align["Test Case Description"] = "l"

    acc_test_case_count = {}
    acc_failed_test_case_count = {}
    acc_sequence_request_count = {}

    for c in test_case_description:
        # Accumulate all the individual thread counts
        acc_test_case_count[c] = accumulate_count(_test_case_count, c)
        acc_failed_test_case_count[c] = accumulate_count(_failed_test_case_count, c)
        acc_sequence_request_count[c] = accumulate_count(_sequence_request_count, c)

        description = test_case_description[c]
        # Add additional description on scenarios that allow failure
        if c in ALLOW_FAILURE_SCENARIO:
            description += (
                " Note that this scenario is marked to allow "
                "failure due to subtle edge cases that will be "
                "investigated in the future. However, only a "
                "minimal failure count is expected and we should "
                "take action if the number is concerning."
            )
        t.add_row(
            [
                c,
                acc_failed_test_case_count[c] if c in acc_failed_test_case_count else 0,
                acc_test_case_count[c] if c in acc_test_case_count else 0,
                acc_sequence_request_count[c] if c in acc_sequence_request_count else 0,
                format_content(description, 50),
            ]
        )

    t.add_row(
        [
            "TOTAL",
            sum(acc_failed_test_case_count.values()),
            sum(acc_test_case_count.values()),
            sum(acc_sequence_request_count.values()),
            "X",
        ]
    )

    print(t)
    f.write(str(t))

    f.close()


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-v",
        "--verbose",
        action="store_true",
        required=False,
        default=False,
        help="Enable verbose output",
    )
    parser.add_argument(
        "-r", "--random-seed", type=int, required=False, help="Random seed."
    )
    parser.add_argument(
        "-t",
        "--concurrency",
        type=int,
        required=False,
        default=8,
        help="Request concurrency. Default is 8.",
    )
    parser.add_argument(
        "--load-thread",
        type=int,
        required=False,
        default=0,
        help="Number of dedicated threads that keep compute "
        "device (i.e. GPU/CPUs) under load. The load generated "
        'from "--concurrency" often behaves as request spike, '
        " this argument may be used to produce consistent load "
        " to keep devices at high utilization. Default is 0, "
        "which means no dedicated load thread will be created.",
    )
    parser.add_argument(
        "-d",
        "--test-duration",
        type=int,
        required=False,
        default=25000,
        help="Duration of stress test to run. Default is 25000 seconds "
        + "(approximately 7 hours).",
    )
    FLAGS = parser.parse_args()

    # Initialize the random seed. For reproducibility each thread
    # maintains its own RNG which is initialized based on this seed.
    randseed = 0
    if FLAGS.random_seed != None:
        randseed = FLAGS.random_seed
    else:
        randseed = int(time.time())
    np.random.seed(randseed)

    print("random seed = {}".format(randseed))
    print("concurrency = {}".format(FLAGS.concurrency))
    print("test duration = {}".format(FLAGS.test_duration))

    # Create hashes for each thread for generating report
    _test_case_count = [dict() for _ in range(FLAGS.concurrency + FLAGS.load_thread)]
    _failed_test_case_count = [
        dict() for _ in range(FLAGS.concurrency + FLAGS.load_thread)
    ]
    _sequence_request_count = [
        dict() for _ in range(FLAGS.concurrency + FLAGS.load_thread)
    ]

    threads = []

    for idx in range(FLAGS.concurrency):
        thread_name = "thread_{}".format(idx)

        # Create the seed for the thread. Since these are created in
        # reproducible order off of the initial seed we will get
        # reproducible results when given the same seed.
        seed = np.random.randint(2**32)

        # Each thread is reserved a block of correlation IDs or size
        # CORRELATION_ID_BLOCK_SIZE
        correlation_id_base = 1 + (idx * CORRELATION_ID_BLOCK_SIZE)

        threads.append(
            threading.Thread(
                target=stress_thread,
                args=(
                    thread_name,
                    seed,
                    correlation_id_base,
                    _test_case_count[idx],
                    _failed_test_case_count[idx],
                    _sequence_request_count[idx],
                ),
            )
        )

    for idx in range(FLAGS.load_thread):
        thread_name = "load_thread_{}".format(idx)

        # Create the seed for the thread. Since these are created in
        # reproducible order off of the initial seed we will get
        # reproducible results when given the same seed.
        seed = np.random.randint(2**32)

        # Each thread is reserved a block of correlation IDs or size
        # CORRELATION_ID_BLOCK_SIZE
        correlation_id_base = 1 + (
            (FLAGS.concurrency + idx) * CORRELATION_ID_BLOCK_SIZE
        )

        threads.append(
            threading.Thread(
                target=load_thread,
                args=(
                    thread_name,
                    seed,
                    correlation_id_base,
                    _test_case_count[idx],
                    _failed_test_case_count[idx],
                    _sequence_request_count[idx],
                ),
            )
        )

    exit_code = 0

    start_time = time.time()
    for t in threads:
        t.start()

    while (time.time() - start_time) < FLAGS.test_duration:
        time.sleep(1)
        for t in threads:
            # Stop the test early if there is early termination of a thread.
            if not t.is_alive():
                exit_code = 1
                break
        if exit_code != 0:
            break

    STOP_STRESS_THREAD = True
    for t in threads:
        # Given long timeout to determine if a thread hangs
        t.join(timeout=300)
        # join() returns due to timeout
        if t.is_alive() and (exit_code == 0):
            exit_code = 1

    generate_report(
        time.time() - start_time,
        _test_case_count,
        _failed_test_case_count,
        _sequence_request_count,
    )

    _thread_exceptions_mutex.acquire()
    try:
        if len(_thread_exceptions) > 0:
            for thread, scenario, ex in _thread_exceptions:
                print("*********\n* {} {}\n{}*********\n".format(thread, scenario, ex))
                if scenario not in ALLOW_FAILURE_SCENARIO:
                    exit_code = 1
    finally:
        _thread_exceptions_mutex.release()

    print(
        "Exiting stress test. In the case of failure, please refer to the thread log files for detail"
    )
    sys.exit(exit_code)


================================================
FILE: qa/L0_long_running_stress/stress_mail.py
================================================
#!/usr/bin/env python
# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import os
from datetime import date

import nightly_email_helper

CI_JOB_ID = os.environ.get("CI_JOB_ID", "")

if __name__ == "__main__":
    today = date.today().strftime("%Y-%m-%d")
    subject = (
        "Triton Long-Running Stress Test "
        + ((sys.argv[1] + " ") if len(sys.argv) >= 2 else "")
        + "Summary: "
        + today
    )
    stress_report = "stress_report.txt"
    link = "https://gitlab-master.nvidia.com/dl/dgx/tritonserver/-/jobs/" + CI_JOB_ID
    write_up = "<p>The table below includes results from long-running stress test. Please refer to the description of each test case to see what different kinds of inference requests were sent. Request concurrency is set to 8.</p>"
    write_up += (
        "<p>Please check the CI output webpage for the details of the failures: "
        + link
        + "</p>"
    )
    html_content = (
        '<html><head></head><body><pre style="font-size:11pt;font-family:Arial, sans-serif;">'
        + write_up
        + '</pre><pre style="font-size:11pt;font-family:Consolas;">'
    )
    with open(stress_report, "r") as f:
        html_content += f.read() + "\n"
    html_content += "</pre></body></html>"
    nightly_email_helper.send(subject, html_content, is_html=True)


================================================
FILE: qa/L0_long_running_stress/test.sh
================================================
#!/bin/bash
# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

CLIENT_LOG="./client.log"
STRESS_TEST=stress.py

DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
SERVER=/opt/tritonserver/bin/tritonserver
source ../common/util.sh

# If the test should be run in long and high load setting
if [ "$TRITON_PERF_LONG" == 1 ]; then
    # ~ 6.5 days
    TEST_DURATION=480000
    LOAD_THREAD_COUNT=2
    EMAIL_SUBJECT="Long"
else
    # ~ 7 hours
    TEST_DURATION=25000
    LOAD_THREAD_COUNT=0
    EMAIL_SUBJECT=""
fi

RET=0

# If BACKENDS not specified, set to all
BACKENDS=${BACKENDS:="onnx libtorch"}
export BACKENDS

export CI_JOB_ID=${CI_JOB_ID}

MODEL_DIR=models

rm -fr *.log *.txt  models validation_data csv_dir && mkdir models validation_data csv_dir

# Get the datatype to use based on the backend
function get_datatype () {
  local dtype='int32'
  if [[ $1 == "plan" ]]; then
      dtype='float32'
  fi
  echo $dtype
}

# Setup model repository - two instances with batch-size 2
MODELS=""
for BACKEND in $BACKENDS; do
  DTYPE=$(get_datatype $BACKEND)
  MODELS="$MODELS $DATADIR/qa_sequence_model_repository/${BACKEND}_sequence_${DTYPE}"
done

for MODEL in $MODELS; do
    cp -r $MODEL $MODEL_DIR/. && \
      (cd $MODEL_DIR/$(basename $MODEL) && \
        sed -i "s/^max_batch_size:.*/max_batch_size: 2/" config.pbtxt && \
        sed -i "s/max_sequence_idle_microseconds:.*/max_sequence_idle_microseconds: 7000000/" config.pbtxt && \
        sed -i "s/kind: KIND_GPU/kind: KIND_GPU\\ncount: 2/" config.pbtxt && \
        sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 2/" config.pbtxt)
done

MODELS=""
for BACKEND in $BACKENDS; do
    DTYPE=$(get_datatype $BACKEND)
    MODELS="$MODELS $DATADIR/qa_sequence_model_repository/${BACKEND}_nobatch_sequence_${DTYPE}"
done

for MODEL in $MODELS; do
    cp -r $MODEL $MODEL_DIR/. && \
      (cd $MODEL_DIR/$(basename $MODEL) && \
        sed -i "s/max_sequence_idle_microseconds:.*/max_sequence_idle_microseconds: 7000000/" config.pbtxt && \
        sed -i "s/kind: KIND_GPU/kind: KIND_GPU\\ncount: 2/" config.pbtxt && \
        sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 2/" config.pbtxt)
done

MODELS=""
for BACKEND in $BACKENDS; do
    MODELS="$MODELS $DATADIR/qa_identity_model_repository/${BACKEND}_nobatch_zero_1_float32"
done

for MODEL in $MODELS; do
    cp -r $MODEL $MODEL_DIR/. && \
      (cd $MODEL_DIR/$(basename $MODEL) && \
        echo "parameters [" >> config.pbtxt && \
        echo "{ key: \"execute_delay_ms\"; value: { string_value: \"1000\" }}" >> config.pbtxt && \
        echo "]" >> config.pbtxt)
done
cp -r ../custom_models/custom_zero_1_float32 $MODEL_DIR/custom_zero_1_float32 && \
  mkdir $MODEL_DIR/custom_zero_1_float32/1 && \
  (cd $MODEL_DIR/custom_zero_1_float32 && \
    echo "parameters [" >> config.pbtxt && \
        echo "{ key: \"execute_delay_ms\"; value: { string_value: \"10000\" }}" >> config.pbtxt && \
        echo "]" >> config.pbtxt)

cp -r $DATADIR/onnx_model_store/resnet_v1_50 $MODEL_DIR/. && \
  (cd $MODEL_DIR/resnet_v1_50 && \
    echo "optimization { }" >> config.pbtxt)

SERVER_ARGS="--model-repository=`pwd`/$MODEL_DIR"
SERVER_LOG="./server.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python $STRESS_TEST -d ${TEST_DURATION} --load-thread ${LOAD_THREAD_COUNT} >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

# Run only if both TRITON_FROM and TRITON_TO_DL are set
if [[ ! -z "$TRITON_FROM" ]] && [[ ! -z "$TRITON_TO_DL" ]]; then
    python stress_mail.py "$EMAIL_SUBJECT"
fi

exit $RET


================================================
FILE: qa/L0_memory/client.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


import os
import queue
import unittest
from functools import partial

import numpy as np
import tritonclient.grpc as grpcclient
from tritonclient.utils import InferenceServerException

OUTPUT_NUM_ELEMENTS = int(os.getenv("OUTPUT_NUM_ELEMENTS", 1))


class UserData:
    def __init__(self):
        self._completed_requests = queue.Queue()


def callback(user_data, result, error):
    if error:
        user_data._completed_requests.put(error, timeout=100)
    else:
        user_data._completed_requests.put(result, timeout=100)


class TestTritonInference(unittest.TestCase):
    def setUp(self):
        self.triton_client = grpcclient.InferenceServerClient(url="localhost:8001")

    def tearDown(self):
        self.triton_client.stop_stream()

    def test_inference(self):
        model_name = "repeat_int32"
        num_responses = 256
        in_data = np.random.randint(0, 1000, num_responses, dtype=np.int32)
        delay_data = np.zeros(num_responses, dtype=np.uint32)
        wait_data = np.zeros(1, dtype=np.uint32)
        user_data = UserData()

        inputs = [
            grpcclient.InferInput("IN", [num_responses], "INT32"),
            grpcclient.InferInput("DELAY", [num_responses], "UINT32"),
            grpcclient.InferInput("WAIT", [1], "UINT32"),
        ]
        outputs = [
            grpcclient.InferRequestedOutput("OUT"),
            grpcclient.InferRequestedOutput("IDX"),
        ]

        inputs[0].set_data_from_numpy(in_data)
        inputs[1].set_data_from_numpy(delay_data)
        inputs[2].set_data_from_numpy(wait_data)

        self.triton_client.start_stream(callback=partial(callback, user_data))
        self.triton_client.async_stream_infer(
            model_name=model_name,
            inputs=inputs,
            outputs=outputs,
        )

        recv_count = 0
        while recv_count < num_responses:
            data_item = user_data._completed_requests.get()

            if isinstance(data_item, InferenceServerException):
                self.fail(f"InferenceServerException: {data_item}")
            try:
                response_idx = data_item.as_numpy("IDX")[0]
                response_data = data_item.as_numpy("OUT")
                expected_data = in_data[response_idx]

                self.assertEqual(
                    response_data[0],
                    expected_data,
                    f"Validation failed at index {response_idx} - response_data[0]: {response_data[0]}, expected_data: {expected_data}",
                )
                self.assertEqual(
                    response_data.size,
                    OUTPUT_NUM_ELEMENTS,
                    f"Validation failed - response_data.size: {response_data.size}, OUTPUT_NUM_ELEMENTS: {OUTPUT_NUM_ELEMENTS}",
                )

            except Exception as e:
                self.fail(f"Error processing response: {str(e)}")
            recv_count += 1

        self.assertEqual(
            user_data._completed_requests.qsize(),
            0,
            "Did not receive the expected number of responses.",
        )


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_memory/test.sh
================================================
#!/bin/bash
# Copyright (c) 2020-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

source ../common/util.sh

TEST_LOG="./memory_test.log"
MEMORY_TEST=./memory_test
PINNED_MEMORY_MANAGER_TEST=./pinned_memory_manager_test

RET=0

# Must run on multiple devices
export CUDA_VISIBLE_DEVICES=0,1

rm -f TEST_LOG

set +e
$MEMORY_TEST >>$TEST_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $TEST_LOG
    echo -e "\n***\n*** Memory Test Failed\n***"
    RET=1
fi
set -e

set +e
$PINNED_MEMORY_MANAGER_TEST >>$TEST_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $TEST_LOG
    echo -e "\n***\n*** Pinned Memory Manager Test Failed\n***"
    RET=1
fi
set -e


###### Test --grpc-max-response-pool-size server option #######

monitor_memory() {
  local SERVER_PID=$1
  local MAX_MEM_FILE=$(mktemp)
  echo "0" > "$MAX_MEM_FILE"
  (
    local MAX_MEM=0
    while ps -p "$SERVER_PID" >/dev/null 2>&1; do
      CURRENT_MEM=$(awk '/Rss:/ {print $2}' /proc/$SERVER_PID/smaps_rollup)
      CURRENT_MEM=${CURRENT_MEM:-0}
      if [ "$CURRENT_MEM" -gt "$MAX_MEM" ]; then
        MAX_MEM=$CURRENT_MEM
        echo "$MAX_MEM" > "$MAX_MEM_FILE"
      fi
      sleep 0.1
    done
    echo "$MAX_MEM" > "$MAX_MEM_FILE"
    exit 0
  ) &

  MONITOR_PID=$!
  echo "$MONITOR_PID $MAX_MEM_FILE"
}

stop_server_and_monitoring_memory() {
  local MONITOR_PID=$1
  local SERVER_PID=$2
  kill "$MONITOR_PID" 2>/dev/null && wait "$MONITOR_PID" 2>/dev/null || true
  kill "$SERVER_PID" 2>/dev/null && wait "$SERVER_PID" 2>/dev/null || true
}

MODELDIR="./python_models"
export OUTPUT_NUM_ELEMENTS=49807360
sed -i '$a\parameters: [{ key: "output_num_elements" value: { string_value: "'"$OUTPUT_NUM_ELEMENTS"'" }}]' $MODELDIR/repeat_int32/config.pbtxt

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_BASE_ARGS="--model-repository=${MODELDIR} --log-verbose=2 --allow-metrics=0"

declare -A MEMORY_USAGE=()

for POOL_SIZE in 1 25 50 default; do
  if [[ "$POOL_SIZE" = "default" ]]; then
    SERVER_ARGS="${SERVER_BASE_ARGS}"
  else
    SERVER_ARGS="${SERVER_BASE_ARGS} --grpc-max-response-pool-size=${POOL_SIZE}"
  fi

  CLIENT_LOG="./client_pool_size_${POOL_SIZE}.log"
  SERVER_LOG="./server_pool_size_${POOL_SIZE}.log"

  run_server
  if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    stop_server_and_monitoring_memory $MONITOR_PID $SERVER_PID
    exit 1
  fi
  sleep 2

  # Capture initial memory usage
  INIT_MEM=$(awk '/Rss:/ {print $2}' /proc/$SERVER_PID/smaps_rollup)
  read -r MONITOR_PID MAX_MEM_FILE < <(monitor_memory "$SERVER_PID")

  # Run client script
  set +e
  python3 client.py >> $CLIENT_LOG 2>&1
  if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Running client for grpc-max-response-pool-size=${POOL_SIZE} FAILED\n***" >> $CLIENT_LOG 2>&1
    echo -e "\n***\n*** Running client for grpc-max-response-pool-size=${POOL_SIZE} FAILED\n***"
    stop_server_and_monitoring_memory $MONITOR_PID $SERVER_PID
    exit 1
  fi
  set -e
  sleep 2

  stop_server_and_monitoring_memory $MONITOR_PID $SERVER_PID

  if [[ -s "$MAX_MEM_FILE" ]]; then
    MAX_MEM=$(tail -n 1 "$MAX_MEM_FILE" 2>/dev/null || echo 0)
    MEMORY_USAGE["$POOL_SIZE"]=$((MAX_MEM - INIT_MEM))
    echo "Pool size: $POOL_SIZE | Initial Memory: ${INIT_MEM} KB | Peak Memory: ${MEMORY_USAGE[$POOL_SIZE]} KB" >> "memory.log"
    rm -f "$MAX_MEM_FILE"
  else
    echo "FAILED to collect memory usage for grpc-max-response-pool-size=${POOL_SIZE}"
    exit 1
  fi
done

prev_mem=0
prev_size=""
for size in default 50 25 1; do
  current_mem=${MEMORY_USAGE[$size]}
  if [[ -n "$prev_size" && "$prev_mem" -ne 0 && "$current_mem" -ge "$prev_mem" ]]; then
    echo -e "\n***\n*** FAILED - Memory $current_mem KB with pool=$size >= $prev_mem KB (with pool=$prev_size)\n***"
    RET=1
  fi
  prev_mem=$current_mem
  prev_size=$size
done


if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
else
  echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_memory_growth/busy_op_test.py
================================================
#!/usr/bin/python

# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
from builtins import range

import numpy as np
import tritonclient.http as httpclient
from tritonclient.utils import np_to_triton_dtype

FLAGS = None

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-v",
        "--verbose",
        action="store_true",
        required=False,
        default=False,
        help="Enable verbose output",
    )
    parser.add_argument(
        "-u",
        "--url",
        type=str,
        required=False,
        default="localhost:8000",
        help="Inference server URL. Default is localhost:8000.",
    )
    parser.add_argument("-m", "--model", type=str, required=True, help="Name of model.")
    parser.add_argument(
        "-n",
        "--num-requests",
        type=int,
        required=True,
        help="Number of asynchronous requests to launch.",
    )

    FLAGS = parser.parse_args()

    # Run the busyop model which takes a delay as input.
    model_name = FLAGS.model

    # Create the inference context for the model. Need to set the concurrency
    # based on the number of requests so that the delivery of the async
    # requests is not blocked.
    # See the comment for more details: https://github.com/triton-inference-server/client/blob/r24.02/src/python/library/tritonclient/http/_client.py#L1501
    client = httpclient.InferenceServerClient(
        FLAGS.url, verbose=FLAGS.verbose, concurrency=FLAGS.num_requests
    )

    # Collect async requests here
    requests = []

    # Create the data for the input tensor. Creating tensor size with 5 MB.
    tensor_size = [1, 5 * 1024 * 1024]
    input_data = np.random.randn(*tensor_size).astype(np.float32)

    inputs = [
        httpclient.InferInput(
            "INPUT0", input_data.shape, np_to_triton_dtype(input_data.dtype)
        )
    ]
    inputs[0].set_data_from_numpy(input_data)

    # Send requests
    for i in range(FLAGS.num_requests):
        requests.append(client.async_infer(model_name, inputs))
        print("Sent request %d" % i, flush=True)
    # wait for requests to finish
    for i in range(len(requests)):
        requests[i].get_result()
        print("Received result %d" % i, flush=True)


================================================
FILE: qa/L0_memory_growth/server_memory_mail.py
================================================
#!/usr/bin/env python
# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import glob
from datetime import date

import nightly_email_helper

if __name__ == "__main__":
    today = date.today().strftime("%Y-%m-%d")
    subject = "Triton Server Memory Growth " + sys.argv[1] + " Summary: " + today
    memory_graphs_resnet = glob.glob("memory_growth_resnet*.log")
    memory_graphs_busyop = glob.glob("memory_growth_busyop.log")
    write_up = "<p>This test uses perf_analyzer as clients running on 4 different models. The max allowed difference between mean and maximum memory usage is set to 150MB.</p>"
    write_up += "<p><b>&#8226 What to look for</b><br>A linear memory growth in the beginning of the graph is acceptable only when it is followed by a flat memory usage. If a linear memory growth is observed during the entire test then there is possibly a memory leak.</p>"
    html_content = (
        '<html><head></head><body><pre style="font-size:11pt;font-family:Arial, sans-serif;">'
        + write_up
        + '</pre><pre style="font-size:11pt;font-family:Consolas;">'
    )
    for mem_graph in sorted(memory_graphs_resnet):
        html_content += "\n" + mem_graph + "\n"
        with open(mem_graph, "r") as f:
            html_content += f.read() + "\n"

    html_content += "<p>The busyop test is by design to show that actual memory growth is correctly detected and displayed.</p>"

    # When we see PTX failures in CI, the busyop memory graph is not created.
    if len(memory_graphs_busyop):
        write_up = "<p><b>&#8226 What to look for</b><br>The memory usage should increase continually over time, and a linear growth should be observed in the graph below.</p>"
        html_content += (
            '</pre><pre style="font-size:11pt;font-family:Arial, sans-serif;">'
            + write_up
            + '</pre><pre style="font-size:11pt;font-family:Consolas;">'
        )
        for mem_graph in sorted(memory_graphs_busyop):
            html_content += "\n" + mem_graph + "\n"
            with open(mem_graph, "r") as f:
                html_content += f.read() + "\n"
    else:
        html_content += (
            "<p>The busyop model caused PTX failures when running the CI.</p>"
        )
    html_content += "</pre></body></html>"
    nightly_email_helper.send(subject, html_content, is_html=True)


================================================
FILE: qa/L0_memory_growth/test.sh
================================================
#!/bin/bash
# Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

# Single GPU
export CUDA_VISIBLE_DEVICES=0

# Clients
pip3 install perf_analyzer
PERF_ANALYZER=perf_analyzer
IMAGE=../images/vulture.jpeg

# Models
TRTEXEC=/usr/src/tensorrt/bin/trtexec
DATADIR=/data/inferenceserver/${REPO_VERSION}

# Server
SERVER=/opt/tritonserver/bin/tritonserver
SERVER_TIMEOUT=1200

# Valgrind massif
LEAKCHECK=/usr/bin/valgrind
LEAKCHECK_ARGS_BASE="--tool=massif --time-unit=B"
MASSIF_TEST=../common/check_massif_log.py

source ../common/util.sh

# Function that checks the massif logs
function check_massif_log () {
    local massif_out=$1
}

rm -rf *.log models/ *.massif

# Test parameters
STATIC_BATCH=128
INSTANCE_CNT=2
CONCURRENCY=20
CLIENT_BS=8

# Set the number of repetitions in nightly and weekly tests
# Set the email subject for nightly and weekly tests
if [ "$TRITON_PERF_WEEKLY" == 1 ]; then
    if [ "$TRITON_PERF_LONG" == 1 ]; then
        # ~ 2.5 days for system under test
        REPETITION=1400
        EMAIL_SUBJECT="Weekly Long"
    else
        # Run the test for each model approximately 1.5 hours
        # All tests are run cumulatively for 7 hours
        REPETITION=200
        EMAIL_SUBJECT="Weekly"
    fi
else
    REPETITION=10
    EMAIL_SUBJECT="Nightly"
fi

# Threshold memory growth in MB
# NOTES:
# - Bounded memory growth tests typically show < 70 MB usage
#   - Plan/ONNX is typically between 20-40 MB
#   - Savedmodel is closer to 50-70 MB
# - Unbounded memory growth test typically shows > 100 MB usage
export MAX_ALLOWED_ALLOC="100"

# Create local model repository
mkdir -p models/
cp -r $DATADIR/perf_model_store/resnet50_* models/

# Create the TensorRT plan from ONNX model
rm -fr models/resnet50_fp32_plan && mkdir -p models/resnet50_fp32_plan/1 && \
cp $DATADIR/qa_dynamic_batch_image_model_repository/resnet50_onnx/1/model.onnx models/resnet50_fp32_plan/ && \
cp $DATADIR/qa_dynamic_batch_image_model_repository/resnet50_onnx/labels.txt models/resnet50_fp32_plan/

set +e
# Build TRT engine
$TRTEXEC --onnx=models/resnet50_fp32_plan/model.onnx --saveEngine=models/resnet50_fp32_plan/1/model.plan \
         --minShapes=input:1x3x224x224 --optShapes=input:${STATIC_BATCH}x3x224x224 \
         --maxShapes=input:${STATIC_BATCH}x3x224x224

if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to generate resnet50 PLAN\n***"
    exit 1
fi

set -e

rm models/resnet50_fp32_plan/model.onnx
cp $DATADIR/qa_dynamic_batch_image_model_repository/resnet50_onnx/config.pbtxt models/resnet50_fp32_plan/ && \
sed -i "s/^name: .*/name: \"resnet50_fp32_plan\"/g" models/resnet50_fp32_plan/config.pbtxt && \
sed -i 's/^platform: .*/platform: "tensorrt_plan"/g' models/resnet50_fp32_plan/config.pbtxt

RET=0

for MODEL in $(ls models); do
    # Skip the resnet50_fp32_libtorch model as it is running into `misaligned address'
    # Tracked here: https://nvbugs/3954104
    # Skip the resnet50_fp32_onnx model as the inference hangs on A100 with batch size > 1.
    # Tracked here: https://linear.app/nvidia/issue/TRI-304
    if [[ "$MODEL" == "resnet50_fp32_libtorch" || "$MODEL" == "resnet50_fp32_onnx" ]]; then
        continue
    fi

    # Create temporary model repository and copy only the model being tested
    rm -rf test_repo && mkdir test_repo
    cp -r models/$MODEL test_repo/

    # Set server, client and valgrind arguments
    SERVER_ARGS="--model-repository=`pwd`/test_repo"
    LEAKCHECK_LOG="test_${MODEL}.valgrind.log"
    MASSIF_LOG="test_${MODEL}.massif"
    GRAPH_LOG="memory_growth_${MODEL}.log"
    LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --massif-out-file=$MASSIF_LOG --max-threads=3000 --log-file=$LEAKCHECK_LOG"
    SERVER_LOG="test_$MODEL.server.log"
    CLIENT_LOG="test_$MODEL.client.log"

    # Enable dynamic batching, set max batch size and instance count
    if [ "$MODEL" == "resnet50_fp32_libtorch" ]; then
        sed -i "s/^max_batch_size:.*/max_batch_size: 32/" test_repo/$MODEL/config.pbtxt
    else
        sed -i "s/^max_batch_size:.*/max_batch_size: ${STATIC_BATCH}/" test_repo/$MODEL/config.pbtxt
    fi
    echo "dynamic_batching {}" >> test_repo/$MODEL/config.pbtxt
    echo "instance_group [{ count: ${INSTANCE_CNT} }]" >> test_repo/$MODEL/config.pbtxt

    # Run the server
    run_server_leakcheck
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    set +e

    TEMP_CLIENT_LOG=temp_client.log
    TEMP_RET=0

    SECONDS=0
    # Run the perf analyzer 'REPETITION' times
    for ((i=1; i<=$REPETITION; i++)); do
        # [TMA-621] Use --no-stability mode in perf analyzer when available
        $PERF_ANALYZER -v -m $MODEL -i grpc --concurrency-range $CONCURRENCY -b $CLIENT_BS -p 20000 > $TEMP_CLIENT_LOG 2>&1
        PA_RET=$?
        # Success
        if [ ${PA_RET} -eq 0 ]; then
          continue
        # Unstable measurement: OK for this test
        elif [ ${PA_RET} -eq 2 ]; then
          continue
        # Other failures unexpected, report error
        else
            cat $TEMP_CLIENT_LOG >> $CLIENT_LOG
            echo -e "\n***\n*** perf_analyzer for $MODEL failed on iteration $i\n***" >> $CLIENT_LOG
            RET=1
        fi
    done
    TEST_DURATION=$SECONDS

    set -e

    # Stop Server
    kill $SERVER_PID
    wait $SERVER_PID

    set +e

    # Log test duration and the graph for memory growth
    hrs=$(printf "%02d" $((TEST_DURATION / 3600)))
    mins=$(printf "%02d" $(((TEST_DURATION / 60) % 60)))
    secs=$(printf "%02d" $((TEST_DURATION % 60)))
    echo -e "Test Duration: $hrs:$mins:$secs (HH:MM:SS)" >> ${GRAPH_LOG}
    ms_print ${MASSIF_LOG} | head -n35 >> ${GRAPH_LOG}
    cat ${GRAPH_LOG}
    # Check the massif output
    python $MASSIF_TEST $MASSIF_LOG $MAX_ALLOWED_ALLOC --start-from-middle >> $CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test for $MODEL Failed.\n***"
        RET=1
    fi
    # Always output memory usage for easier triage of MAX_ALLOWED_ALLOC settings in the future
    grep -i "Change in memory allocation" "${CLIENT_LOG}" || true
    set -e
done

# Next perform a test that has unbound memory growth. Use the busy op Python model
# with a sleep function in order to force requests to sit in the queue, and result
# in memory growth.
BUSY_OP_TEST=busy_op_test.py
NUM_REQUESTS=100

rm -rf test_repo && mkdir test_repo
mkdir -p test_repo/busy_op/1/
cp ../python_models/busy_op/model.py test_repo/busy_op/1/
cp ../python_models/busy_op/config.pbtxt test_repo/busy_op

SERVER_ARGS="--model-repository=`pwd`/test_repo"

LEAKCHECK_LOG="test_busyop.valgrind.log"
MASSIF_LOG="test_busyop.massif"
GRAPH_LOG="memory_growth_busyop.log"
LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --massif-out-file=$MASSIF_LOG --max-threads=3000 --log-file=$LEAKCHECK_LOG"
SERVER_LOG="test_busyop.server.log"
CLIENT_LOG="test_busyop.client.log"
SKIP_BUSYOP=0

# Run server
run_server_leakcheck
if [ "$SERVER_PID" == "0" ]; then
    cat $SERVER_LOG
    if [ `grep -c "provided PTX was compiled" $SERVER_LOG` != "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER due to PTX issue\n***"
        SKIP_BUSYOP=1
    else
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        exit 1
    fi
fi

set +e

# Run the busy_op test if no PTX issue was observed when launching server
if [ $SKIP_BUSYOP -ne 1 ]; then
    SECONDS=0
    python $BUSY_OP_TEST -v -m busy_op -n $NUM_REQUESTS > $CLIENT_LOG 2>&1
    TEST_RETCODE=$?
    TEST_DURATION=$SECONDS
    if [ ${TEST_RETCODE} -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** busy_op_test.py Failed\n***"
        RET=1
    fi
    set -e

    # Stop Server
    kill $SERVER_PID
    wait $SERVER_PID

    set +e

    # Log test duration and the graph for memory growth
    hrs=$(printf "%02d" $((TEST_DURATION / 3600)))
    mins=$(printf "%02d" $(((TEST_DURATION / 60) % 60)))
    secs=$(printf "%02d" $((TEST_DURATION % 60)))
    echo -e "Test Duration: $hrs:$mins:$secs (HH:MM:SS)" >> ${GRAPH_LOG}
    ms_print ${MASSIF_LOG} | head -n35 >> ${GRAPH_LOG}
    cat ${GRAPH_LOG}
    # Check the massif output
    python $MASSIF_TEST $MASSIF_LOG $MAX_ALLOWED_ALLOC --start-from-middle >> $CLIENT_LOG 2>&1
    # This busyop test is expected to return a non-zero error since it is
    # intentionally testing unbounded growth. If it returns success for some
    # reason, raise error.
    if [ $? -ne 1 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Massif test for graphdef_busyop Failed\n***"
        echo -e "\n***\n*** Expected unbounded growth, but found acceptable growth within ${MAX_ALLOWED_ALLOC} MB\n***"
        RET=1
    fi
    # Always output memory usage for easier triage of MAX_ALLOWED_ALLOC settings in the future
    grep -i "Change in memory allocation" "${CLIENT_LOG}" || true
fi

set -e

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test Failed\n***"
fi

# Run only if both TRITON_FROM and TRITON_TO_DL are set
if [[ ! -z "$TRITON_FROM" ]] && [[ ! -z "$TRITON_TO_DL" ]]; then
    python server_memory_mail.py "$EMAIL_SUBJECT"
fi

exit $RET


================================================
FILE: qa/L0_metrics/cpu_metrics_test.py
================================================
#!/usr/bin/python
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import re
import threading
import time
import unittest
from collections import defaultdict

import numpy as np
import requests
import tritonclient.http as httpclient

_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")
CPU_UTILIZATION = "nv_cpu_utilization"
CPU_USED_MEMORY = "nv_cpu_memory_used_bytes"
CPU_TOTAL_MEMORY = "nv_cpu_memory_total_bytes"


def get_metrics():
    utilization_pattern = re.compile(rf"{CPU_UTILIZATION} (\d+\.?\d*)")
    used_bytes_pattern = re.compile(rf"{CPU_USED_MEMORY} (\d+)")
    total_bytes_pattern = re.compile(rf"{CPU_TOTAL_MEMORY} (\d+)")

    r = requests.get(f"http://{_tritonserver_ipaddr}:8002/metrics")
    r.raise_for_status()

    utilization_match = utilization_pattern.search(r.text)
    utilization_value = float(utilization_match.group(1))

    used_bytes_match = used_bytes_pattern.search(r.text)
    used_bytes_value = int(used_bytes_match.group(1))

    total_bytes_match = total_bytes_pattern.search(r.text)
    total_bytes_value = int(total_bytes_match.group(1))

    return utilization_value, used_bytes_value, total_bytes_value


class TestCpuMetrics(unittest.TestCase):
    def setUp(self):
        self.inference_completed = threading.Event()

        shape = [1, 16]
        self.model_name = "libtorch_float32_float32_float32"
        input0_data = np.random.rand(*shape).astype(np.float32)
        input1_data = np.random.rand(*shape).astype(np.float32)

        self.inputs = [
            httpclient.InferInput(
                "INPUT0", input0_data.shape, "FP32"
            ).set_data_from_numpy(input0_data),
            httpclient.InferInput(
                "INPUT1", input1_data.shape, "FP32"
            ).set_data_from_numpy(input1_data),
        ]

    def _validate_metric_variance(self, observed_metrics: dict):
        dupe_value_tolerance = 5
        for metric in [CPU_UTILIZATION, CPU_USED_MEMORY]:
            observed_values = observed_metrics[metric]
            observed_count = len(observed_values)
            print(
                f"Observed {metric} count: {observed_count}, values: {observed_values}"
            )

            # Must have at least 1 more than the duplicate tolerance
            self.assertGreater(
                observed_count,
                dupe_value_tolerance,
                f"Found too many sequential duplicate values for {metric}. Double check the server-side --metrics-interval and observation interval in this test, or consider tuning the duplicate tolerance.",
            )

            # Don't allow observed metric values to be repeated sequentially
            # more than a certain tolerance. The expectation is that these metrics
            # will vary while the server is processing requests in the background,
            # provided the server was configured with a small metrics update interval.
            sequential_dupes = 0
            max_sequential_dupes = 0
            prev_value = observed_values[0]
            for value in observed_values[1:]:
                if value == prev_value:
                    sequential_dupes += 1
                else:
                    # If unique value found, reset counter
                    sequential_dupes = 0

                # For future observability on dupe frequency to tune the tolerance
                if sequential_dupes > max_sequential_dupes:
                    max_sequential_dupes = sequential_dupes

                self.assertLess(sequential_dupes, dupe_value_tolerance)
                prev_value = value

            print(
                f"Max sequential duplicate values found for {metric}: {max_sequential_dupes}"
            )

    def _collect_metrics(self, observed_metrics, interval_secs=1):
        """
        Collects metrics at provided 'interval_secs' and stores them in the
        provided 'observed_metrics' dictionary for postprocessing.
        """
        # Give the test and server some time to begin processing requests
        # before beginning observation loop.
        time.sleep(1)

        while not self.inference_completed.is_set():
            util_value, used_memory_value, _ = get_metrics()
            observed_metrics[CPU_UTILIZATION].append(util_value)
            observed_metrics[CPU_USED_MEMORY].append(used_memory_value)
            time.sleep(interval_secs)

    def test_cpu_metrics_during_inference(self):
        with httpclient.InferenceServerClient(
            url=f"{_tritonserver_ipaddr}:8000", concurrency=10
        ) as client:
            # Start a thread to collect metrics asynchronously while inferences are
            # executing, store them in a dictionary for postprocessing validation.
            observed_metrics = defaultdict(list)
            metrics_thread = threading.Thread(
                target=self._collect_metrics, args=(observed_metrics,)
            )
            metrics_thread.start()

            # Fire off many asynchronous inference requests to keep server
            # busy while monitoring the CPU metrics. Ideal target is about
            # 20-30 seconds of inference to get a good number of metric samples.
            async_requests = []
            for _ in range(2000):
                async_requests.append(
                    client.async_infer(
                        model_name=self.model_name,
                        inputs=self.inputs,
                    )
                )

            # Wait for all inference requests to complete
            for async_request in async_requests:
                async_request.get_result()

            # Set the event to indicate that inference is completed
            self.inference_completed.set()

            # Wait for the metrics thread to complete
            metrics_thread.join()

        self._validate_metric_variance(observed_metrics)

    def test_cpu_metrics_ranges(self):
        # Test some simple sanity checks on the expected ranges of values
        # for the CPU related metrics.
        utilization, used_memory, total_memory = get_metrics()
        self.assertTrue(0 <= utilization <= 1.0)
        self.assertTrue(0 <= used_memory <= total_memory)
        # NOTE: Can be improved in future to compare upper bound against psutil
        # system memory if we introduce the dependency into the test/container.
        self.assertGreater(total_memory, 0)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_metrics/ensemble_decoupled/async_execute_decouple/1/model.py
================================================
# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import time

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    async def execute(self, requests):
        request = requests[0]
        wait_secs = pb_utils.get_input_tensor_by_name(
            request, "WAIT_SECONDS"
        ).as_numpy()[0]
        response_num = pb_utils.get_input_tensor_by_name(
            request, "RESPONSE_NUM"
        ).as_numpy()[0]
        output_tensors = [
            pb_utils.Tensor("WAIT_SECONDS", np.array([wait_secs], np.float32)),
            pb_utils.Tensor("RESPONSE_NUM", np.array([1], np.uint8)),
        ]

        # Wait
        response_sender = request.get_response_sender()
        for i in range(response_num):
            time.sleep(wait_secs.item())
            response = pb_utils.InferenceResponse(output_tensors)
            if i != response_num - 1:
                response_sender.send(response)
            else:
                response_sender.send(
                    response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
                )

        return None


================================================
FILE: qa/L0_metrics/ensemble_decoupled/async_execute_decouple/config.pbtxt
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

backend: "python"
input [
  {
    name: "WAIT_SECONDS"
    data_type: TYPE_FP32
    dims: [ 1 ]
  },
  {
    name: "RESPONSE_NUM"
    data_type: TYPE_UINT8
    dims: [ 1 ]
  }
]
output [
  {
    name: "WAIT_SECONDS"
    data_type: TYPE_FP32
    dims: [ 1 ]
  },
  {
    name: "RESPONSE_NUM"
    data_type: TYPE_UINT8
    dims: [ 1 ]
  }
]

instance_group [{ kind: KIND_CPU }]
model_transaction_policy { decoupled: True }


================================================
FILE: qa/L0_metrics/ensemble_decoupled/ensemble/config.pbtxt
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "ensemble"
platform: "ensemble"
input [
 {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: [ 1 ]
 },
 {
  name: "INPUT1"
  data_type: TYPE_UINT8
  dims: [ 1 ]
 }
]
output [
 {
  name: "OUTPUT"
  data_type: TYPE_FP32
  dims: [ 1 ]
 }
]
ensemble_scheduling {
 step [
  {
   # decoupled model
   model_name: "async_execute_decouple"
   model_version: 1
   input_map {
    key: "WAIT_SECONDS"
    value: "INPUT0"
   }
   input_map {
    key: "RESPONSE_NUM"
    value: "INPUT1"
   }
   output_map {
    key: "WAIT_SECONDS"
    value: "temp_output0"
   }
   output_map {
    key: "RESPONSE_NUM"
    value: "temp_output1"
   }
  },
  {
   # non-decoupled model
   model_name: "async_execute"
   model_version: 1
   input_map {
    key: "WAIT_SECONDS"
    value: "temp_output0"
   }
   input_map {
    key: "RESPONSE_NUM"
    value: "temp_output1"
   }
   output_map {
    key: "WAIT_SECONDS"
    value: "OUTPUT"
   }
  }
 ]
}


================================================
FILE: qa/L0_metrics/ensemble_delay/config.pbtxt
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

platform: "ensemble"
max_batch_size: 4

input [
  {
    name: "ENSEMBLE_INPUT0"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]

output [
  {
    name: "ENSEMBLE_OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 1 ]
  },
  {
    name: "ENSEMBLE_OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]

ensemble_scheduling
{
  step [
    {
      model_name: "dynamic_composing"
      model_version: -1
      input_map { key: "INPUT0", value: "ENSEMBLE_INPUT0" }
      output_map { key: "OUTPUT0", value: "ENSEMBLE_OUTPUT0" }
    },
    {
      model_name: "default_composing"
      model_version: -1
      input_map { key: "INPUT0", value: "ENSEMBLE_INPUT0" }
      output_map { key: "OUTPUT0", value: "ENSEMBLE_OUTPUT1" }
    }
  ]
}


================================================
FILE: qa/L0_metrics/histogram_metrics_test.py
================================================
#!/usr/bin/python
# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import re
import sys
import unittest
from functools import partial

import numpy as np
import requests
import tritonclient.grpc as grpcclient
from tritonclient.utils import InferenceServerException

sys.path.append("../common")
import test_util as tu

MILLIS_PER_SEC = 1000
FIRST_RESPONSE_HISTOGRAM = "nv_inference_first_response_histogram_ms"


def get_histogram_metric_key(
    metric_family, model_name, model_version, metric_type, le=""
):
    if metric_type in ["count", "sum"]:
        return f'{metric_family}_{metric_type}{{model="{model_name}",version="{model_version}"}}'
    elif metric_type == "bucket":
        return f'{metric_family}_{metric_type}{{model="{model_name}",version="{model_version}",le="{le}"}}'
    else:
        return None


class TestHistogramMetrics(tu.TestResultCollector):
    def setUp(self):
        self.tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")

    def get_metrics(self):
        r = requests.get(f"http://{self.tritonserver_ipaddr}:8002/metrics")
        r.raise_for_status()
        return r.text

    def get_histogram_metrics(self, metric_family: str):
        # Regular expression to match the pattern
        pattern = f"^{metric_family}.*"
        histogram_dict = {}

        metrics = self.get_metrics()

        # Find all matches in the text
        matches = re.findall(pattern, metrics, re.MULTILINE)

        for match in matches:
            key, value = match.rsplit(" ")
            histogram_dict[key] = int(value)

        return histogram_dict

    def async_stream_infer(self, model_name, inputs, outputs, responses_per_req):
        with grpcclient.InferenceServerClient(url="localhost:8001") as triton_client:
            # Define the callback function. Note the last two parameters should be
            # result and error. InferenceServerClient would povide the results of an
            # inference as grpcclient.InferResult in result. For successful
            # inference, error will be None, otherwise it will be an object of
            # tritonclientutils.InferenceServerException holding the error details
            def callback(user_data, result, error):
                if error:
                    user_data.append(error)
                else:
                    user_data.append(result)

            # list to hold the results of inference.
            user_data = []

            # Inference call
            triton_client.start_stream(callback=partial(callback, user_data))
            triton_client.async_stream_infer(
                model_name=model_name,
                inputs=inputs,
                outputs=outputs,
            )

        self.assertEqual(len(user_data), responses_per_req)
        # Validate the results
        for i in range(len(user_data)):
            # Check for the errors
            self.assertNotIsInstance(
                user_data[i], InferenceServerException, user_data[i]
            )

    def test_ensemble_decoupled(self):
        wait_secs = 1
        responses_per_req = 3
        total_iters = 3
        delta = 0.2

        # Infer
        inputs = []
        outputs = []
        inputs.append(grpcclient.InferInput("INPUT0", [1], "FP32"))
        inputs.append(grpcclient.InferInput("INPUT1", [1], "UINT8"))
        outputs.append(grpcclient.InferRequestedOutput("OUTPUT"))

        # Create the data for the input tensor.
        input_data_0 = np.array([wait_secs], np.float32)
        input_data_1 = np.array([responses_per_req], np.uint8)

        # Initialize the data
        inputs[0].set_data_from_numpy(input_data_0)
        inputs[1].set_data_from_numpy(input_data_1)

        # Send requests to ensemble decoupled model
        for iter_cnt in range(1, total_iters + 1):
            ensemble_model_name = "ensemble"
            decoupled_model_name = "async_execute_decouple"
            non_decoupled_model_name = "async_execute"
            self.async_stream_infer(
                ensemble_model_name, inputs, outputs, responses_per_req
            )

            # Checks metrics output
            histogram_dict = self.get_histogram_metrics(FIRST_RESPONSE_HISTOGRAM)

            def check_histogram(model_name, request_cnt, wait_secs_per_req, delta):
                histogram_count_key = get_histogram_metric_key(
                    FIRST_RESPONSE_HISTOGRAM, model_name, "1", "count"
                )
                histogram_sum_key = get_histogram_metric_key(
                    FIRST_RESPONSE_HISTOGRAM, model_name, "1", "sum"
                )
                # Test histogram count
                self.assertIn(histogram_count_key, histogram_dict)
                self.assertEqual(
                    histogram_dict[histogram_count_key], request_cnt * iter_cnt
                )
                # Test histogram sum
                self.assertIn(histogram_sum_key, histogram_dict)
                self.assertTrue(
                    wait_secs_per_req * MILLIS_PER_SEC * request_cnt * iter_cnt
                    <= histogram_dict[histogram_sum_key]
                    < (wait_secs_per_req + delta)
                    * MILLIS_PER_SEC
                    * request_cnt
                    * iter_cnt
                )
                # Prometheus histogram buckets are tested in metrics_api_test.cc::HistogramAPIHelper

            # Test ensemble model metrics
            check_histogram(ensemble_model_name, 1, wait_secs * 2, 2 * delta)

            # Test decoupled model metrics
            check_histogram(decoupled_model_name, 1, wait_secs, delta)

            # Test non-decoupled model metrics
            check_histogram(
                non_decoupled_model_name, responses_per_req, wait_secs, delta
            )

    def test_buckets_override(self):
        model_name = "async_execute_decouple"
        metrics = self.get_metrics()
        override_buckets = [x for x in os.environ.get("OVERRIDE_BUCKETS").split(",")]

        # Check metric output
        self.assertEqual(
            metrics.count(FIRST_RESPONSE_HISTOGRAM + "_bucket"), len(override_buckets)
        )
        for le in override_buckets:
            bucket_key = get_histogram_metric_key(
                FIRST_RESPONSE_HISTOGRAM, model_name, "1", "bucket", le
            )
            self.assertIn(bucket_key, metrics)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_metrics/identity_delay/config.pbtxt
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

backend: "identity"
max_batch_size: 4

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]

parameters [
  {
    key: "execute_delay_ms"
    value: { string_value: "2000" }
  }
]


================================================
FILE: qa/L0_metrics/metrics_config_test.py
================================================
#!/usr/bin/python
# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import sys

sys.path.append("../common")

import unittest

import requests
import test_util as tu

_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")

INF_COUNTER_PATTERNS = [
    "nv_inference_request_duration",
    "nv_inference_queue_duration",
    "nv_inference_compute_input_duration",
    "nv_inference_compute_infer_duration",
    "nv_inference_compute_output_duration",
]
INF_HISTOGRAM_PATTERNS = ["nv_inference_first_response_histogram_ms"]
INF_SUMMARY_PATTERNS = [
    "nv_inference_request_summary",
    "nv_inference_queue_summary",
    "nv_inference_compute_input_summary",
    "nv_inference_compute_infer_summary",
    "nv_inference_compute_output_summary",
]
CACHE_COUNTER_PATTERNS = [
    "nv_cache_num_hits_per_model",
    "nv_cache_num_misses_per_model",
    "nv_cache_hit_duration_per_model",
    "nv_cache_miss_duration_per_model",
]
PINNED_MEMORY_PATTERNS = [
    "nv_pinned_memory_pool_total_bytes",
    "nv_pinned_memory_pool_used_bytes",
]
CACHE_SUMMARY_PATTERNS = ["nv_cache_hit_summary", "nv_cache_miss_summary"]


class MetricsConfigTest(tu.TestResultCollector):
    def _get_metrics(self):
        metrics_url = f"http://{_tritonserver_ipaddr}:8002/metrics"
        r = requests.get(metrics_url)
        r.raise_for_status()
        return r.text

    def test_pinned_memory_metrics_exist(self):
        metrics = self._get_metrics()
        for metric in PINNED_MEMORY_PATTERNS:
            self.assertIn(metric, metrics)

    # Counters
    def test_inf_counters_exist(self):
        metrics = self._get_metrics()
        for metric in INF_COUNTER_PATTERNS:
            self.assertIn(metric, metrics)

    def test_inf_counters_missing(self):
        metrics = self._get_metrics()
        for metric in INF_COUNTER_PATTERNS:
            self.assertNotIn(metric, metrics)

    def test_cache_counters_exist(self):
        metrics = self._get_metrics()
        for metric in CACHE_COUNTER_PATTERNS:
            self.assertIn(metric, metrics)

    def test_cache_counters_missing(self):
        metrics = self._get_metrics()
        for metric in CACHE_COUNTER_PATTERNS:
            self.assertNotIn(metric, metrics)

    # Histograms
    def test_inf_histograms_exist(self):
        metrics = self._get_metrics()
        for metric in INF_HISTOGRAM_PATTERNS:
            for suffix in ["_count", "_sum", "_bucket"]:
                self.assertIn(metric + suffix, metrics)

    def test_inf_histograms_missing(self):
        metrics = self._get_metrics()
        for metric in INF_HISTOGRAM_PATTERNS:
            self.assertNotIn(metric, metrics)

    # Summaries
    def test_inf_summaries_exist(self):
        metrics = self._get_metrics()
        for metric in INF_SUMMARY_PATTERNS:
            self.assertIn(metric, metrics)

    def test_inf_summaries_missing(self):
        metrics = self._get_metrics()
        for metric in INF_SUMMARY_PATTERNS:
            self.assertNotIn(metric, metrics)

    def test_cache_summaries_exist(self):
        metrics = self._get_metrics()
        for metric in CACHE_SUMMARY_PATTERNS:
            self.assertIn(metric, metrics)

    def test_cache_summaries_missing(self):
        metrics = self._get_metrics()
        for metric in CACHE_SUMMARY_PATTERNS:
            self.assertNotIn(metric, metrics)

    def test_summaries_custom_quantiles(self):
        metrics = self._get_metrics()
        # This env var should be set by test.sh or caller
        quantile_pairs = os.environ.get("SUMMARY_QUANTILES", None)
        self.assertIsNotNone(quantile_pairs)

        quantiles = [pair.split(":")[0] for pair in quantile_pairs.split(",")]
        print(metrics)
        for quantile in quantiles:
            print(quantile)
            self.assertIn(f'quantile="{quantile}"', metrics)

    # DLIS-4762: Disable request summary when caching enabled for now
    def test_inf_summaries_exist_with_cache(self):
        metrics = self._get_metrics()
        bad_patterns = ["nv_inference_request_summary"]
        ok_patterns = list(set(INF_SUMMARY_PATTERNS) - set(bad_patterns))
        for metric in ok_patterns:
            self.assertIn(metric, metrics)
        for metric in bad_patterns:
            self.assertNotIn(metric, metrics)

    def test_model_namespacing_label_with_namespace_on(self):
        metrics = self._get_metrics()
        expected_namespaces = [
            "/opt/tritonserver/qa/L0_metrics/model_namespacing_repos/addsub_repo",
            "/opt/tritonserver/qa/L0_metrics/model_namespacing_repos/subadd_repo",
        ]
        for namespace in expected_namespaces:
            label = 'namespace="' + namespace + '"'
            self.assertIn(label, metrics)

    def test_model_namespacing_label_with_namespace_off(self):
        metrics = self._get_metrics()
        self.assertNotIn('namespace="', metrics)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_metrics/metrics_queue_size_test.py
================================================
#!/usr/bin/python
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import sys

sys.path.append("../common")

import math
import time
import unittest
from functools import partial

import numpy as np
import requests
import test_util as tu
import tritonclient.http
from tritonclient.utils import triton_to_np_dtype

_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")

QUEUE_METRIC_TEMPLATE = (
    'nv_inference_pending_request_count{{model="{model_name}",version="1"}}'
)
INFER_METRIC_TEMPLATE = 'nv_inference_count{{model="{model_name}",version="1"}}'
EXEC_METRIC_TEMPLATE = 'nv_inference_exec_count{{model="{model_name}",version="1"}}'


class MetricsPendingRequestCountTest(tu.TestResultCollector):
    def setUp(self):
        self.metrics = None
        self.metrics_url = f"http://{_tritonserver_ipaddr}:8002/metrics"
        self.server_url = f"{_tritonserver_ipaddr}:8000"

        # Used to verify model config is set to expected values
        self.max_batch_size = 4
        self.delay_ms = 2000
        self.delay_sec = self.delay_ms // 1000

        # Setup dummy inputs
        dtype = "FP32"
        shape = (1, 1)
        input_np = np.ones(shape, dtype=triton_to_np_dtype(dtype))
        self.inputs = [
            tritonclient.http.InferInput("INPUT0", shape, dtype).set_data_from_numpy(
                input_np
            )
        ]
        self.ensemble_inputs = [
            tritonclient.http.InferInput(
                "ENSEMBLE_INPUT0", shape, dtype
            ).set_data_from_numpy(input_np)
        ]

        # Verify values for filling request queues
        self.num_requests = 10
        self.concurrency = 10
        # Concurrency must be at least as high as number of async requests we intend
        # to send N requests to fill request queues before blocking on any results.
        self.assertGreaterEqual(self.concurrency, self.num_requests)
        self.client = tritonclient.http.InferenceServerClient(
            url=self.server_url, concurrency=self.concurrency
        )

        # Test specific configurations
        self.max_queue_size = 0

    def _validate_model_config(self, model_name, max_queue_size=0):
        config = self.client.get_model_config(model_name)
        print(config)
        params = config.get("parameters", {})
        delay_ms = int(params.get("execute_delay_ms", {}).get("string_value"))
        max_batch_size = config.get("max_batch_size")
        self.assertEqual(delay_ms, self.delay_ms)
        self.assertEqual(max_batch_size, self.max_batch_size)

        dynamic_batching = config.get("dynamic_batching", {})
        default_queue_policy = dynamic_batching.get("default_queue_policy", {})
        self.max_queue_size = default_queue_policy.get("max_queue_size", 0)

        self.assertEqual(self.max_queue_size, max_queue_size)

        return config

    def _get_metrics(self):
        r = requests.get(self.metrics_url)
        r.raise_for_status()
        return r.text

    def _get_metric_line(self, metric, metrics):
        for line in metrics.splitlines():
            if metric in line:
                return line
        return None

    def _get_metric_value(self, metric):
        metrics = self._get_metrics()
        self.assertIn(metric, metrics)
        line = self._get_metric_line(metric, metrics)
        print(line)
        if not line:
            return None
        value = line.split()[1]
        return float(value)

    def _assert_metric_equals(self, metric, expected_value):
        value = self._get_metric_value(metric)
        self.assertEqual(value, expected_value)

    def _assert_metric_greater_than(self, metric, gt_value):
        value = self._get_metric_value(metric)
        self.assertGreater(value, gt_value)

    def _send_async_requests(self, model_name, inputs, futures):
        for _ in range(self.num_requests):
            futures.append(self.client.async_infer(model_name, inputs))

    def _send_async_requests_sequence(self, num_seq_slots, model_name, inputs, futures):
        started_seqs = {}
        num_sent = 0
        while num_sent < self.num_requests:
            # Add requests to each sequence slot round-robin, seq_id must be > 0
            # We don't care about finishing any sequences, just need to queue up
            # requests for each sequence until num_requests is hit.
            seq_id = (num_sent % num_seq_slots) + 1
            # Toggle start flag to False after first request per sequence ID
            start = True if seq_id not in started_seqs else False
            started_seqs[seq_id] = True
            futures.append(
                self.client.async_infer(
                    model_name,
                    inputs,
                    request_id=str(num_sent),
                    sequence_id=seq_id,
                    sequence_start=start,
                )
            )
            num_sent += 1

    def _test_helper(
        self, model_name, batch_size, send_requests_func, max_queue_size=0
    ):
        self._validate_model_config(model_name, max_queue_size=max_queue_size)

        queue_size = QUEUE_METRIC_TEMPLATE.format(model_name=model_name)
        infer_count = INFER_METRIC_TEMPLATE.format(model_name=model_name)
        exec_count = EXEC_METRIC_TEMPLATE.format(model_name=model_name)
        # Metric should be zero before sending any requests
        self._assert_metric_equals(queue_size, 0)
        # Send N requests, letting scheduler delay queue fill up when applicable
        futures = []
        send_requests_func(model_name, self.inputs, futures)
        # Give Triton a second to load all requests into queues
        time.sleep(1)

        # Start from (num_requests-batch_size) because 1 batch should be executing,
        # and the rest of the requests should be queued.
        # If max_queue_size is specified then the queued requests would be capped
        # at max_queue_size.
        if max_queue_size != 0:
            self._assert_metric_equals(queue_size, max_queue_size)
            starting_queue_size = max_queue_size
        else:
            starting_queue_size = self.num_requests - batch_size

        for expected_queue_size in range(starting_queue_size, 0, -1 * batch_size):
            self._assert_metric_equals(queue_size, expected_queue_size)
            time.sleep(self.delay_sec)
        # Queue should be empty now
        self._assert_metric_equals(queue_size, 0)
        # Let final batch finish
        time.sleep(self.delay_sec)

        # All requests should've been executed without any batching
        expected_infer_count = starting_queue_size + batch_size
        self._assert_metric_equals(infer_count, expected_infer_count)
        expected_exec_count = math.ceil(expected_infer_count / batch_size)
        self._assert_metric_equals(exec_count, expected_exec_count)

        failed_count = 0
        for future in futures:
            try:
                future.get_result()
            except Exception as e:
                failed_count = failed_count + 1

        self.assertEqual(
            failed_count, self.num_requests - batch_size - starting_queue_size
        )

    def test_default_scheduler(self):
        model_name = "default"
        # Default scheduler won't do any batching
        batch_size = 1
        self._test_helper(model_name, batch_size, self._send_async_requests)

    def test_dynamic_batch_scheduler(self):
        model_name = "dynamic"
        # With sufficient queue delay set, we expect full batches to be executed
        batch_size = self.max_batch_size
        self._test_helper(model_name, batch_size, self._send_async_requests)

    def test_fail_max_queue_size(self):
        model_name = "max_queue_size"
        # This test checks whether metrics are properly accounts for requests
        # that fail to enqueue on the server. The test sets the max_queue_size
        # and any additional requests beyond the specified queue size should fail
        # instead of waiting for execution.
        batch_size = self.max_batch_size
        self._test_helper(
            model_name, batch_size, self._send_async_requests, max_queue_size=4
        )

    def test_sequence_batch_scheduler_direct(self):
        model_name = "sequence_direct"
        # With sufficient queue delay and minimum_slot_utilization set, we
        # expect full batches to be executed.
        batch_size = self.max_batch_size
        num_seq_slots = batch_size
        send_requests_func = partial(self._send_async_requests_sequence, num_seq_slots)
        self._test_helper(model_name, batch_size, send_requests_func)

    def test_sequence_batch_scheduler_oldest(self):
        model_name = "sequence_oldest"
        # With sufficient queue delay set, we expect full batches to be executed
        batch_size = self.max_batch_size
        num_seq_slots = batch_size
        send_requests_func = partial(self._send_async_requests_sequence, num_seq_slots)
        self._test_helper(model_name, batch_size, send_requests_func)

    def test_ensemble_scheduler(self):
        ensemble_model_name = "ensemble"
        composing_model_names = ["dynamic_composing", "default_composing"]
        ensemble_queue_size = QUEUE_METRIC_TEMPLATE.format(
            model_name=ensemble_model_name
        )
        composing_queue_sizes = [
            QUEUE_METRIC_TEMPLATE.format(model_name=name)
            for name in composing_model_names
        ]
        ensemble_infer_count = INFER_METRIC_TEMPLATE.format(
            model_name=ensemble_model_name
        )
        composing_infer_counts = [
            INFER_METRIC_TEMPLATE.format(model_name=name)
            for name in composing_model_names
        ]

        # Metric should be zero before sending any requests
        self._assert_metric_equals(ensemble_queue_size, 0)
        for queue_size in composing_queue_sizes:
            self._assert_metric_equals(queue_size, 0)
        # Send some ensemble requests
        futures = []
        self._send_async_requests(ensemble_model_name, self.ensemble_inputs, futures)
        # Give Triton time to pass some requests to composing models. This test
        # is less comprehensive on checking exact queue values, and just verifies
        # each composing queue gets filled and ensemble's queue is empty.
        time.sleep(1)

        # Top-level ensemble size should still be zero, as all pending requests should
        # be scheduled and reflected in composing models, and not considered "pending" at ensemble level.
        self._assert_metric_equals(ensemble_queue_size, 0)
        # Composing models should be non-zero
        for queue_size in composing_queue_sizes:
            self._assert_metric_greater_than(queue_size, 0)

        # Verify no inference exceptions were raised and let composing models
        # finish their requests
        for future in futures:
            future.get_result()

        # Check that all queues are empty after getting results
        self._assert_metric_equals(ensemble_queue_size, 0)
        for queue_size in composing_queue_sizes:
            self._assert_metric_equals(queue_size, 0)

        # Sanity check infer counts on ensemble and composing models
        self._assert_metric_equals(ensemble_infer_count, self.num_requests)
        for infer_count in composing_infer_counts:
            self._assert_metric_equals(infer_count, self.num_requests)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_metrics/model_namespacing_repos/addsub_repo/addsub_ensemble/config.pbtxt
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

platform: "ensemble"
max_batch_size: 0
version_policy: { all { } }

input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
input [
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]

ensemble_scheduling {
  step [
    {
      model_name: "composing_model"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT0"
      }
      output_map {
        key: "OUTPUT1"
        value: "OUTPUT1"
      }
    }
  ]
}


================================================
FILE: qa/L0_metrics/model_namespacing_repos/addsub_repo/composing_model/1/model.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    # Use auto complete feature to ship config.pbtxt along with the Python
    # model definition
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        # Only use packaged config if config is not explicitly provided
        config = auto_complete_model_config.as_dict()
        if (len(config["input"]) != 0) or (len(config["output"]) != 0):
            return auto_complete_model_config

        auto_complete_model_config.add_input(
            {
                "name": "INPUT0",
                "data_type": "TYPE_INT32",
                "dims": [
                    16,
                ],
            }
        )
        auto_complete_model_config.add_input(
            {
                "name": "INPUT1",
                "data_type": "TYPE_INT32",
                "dims": [
                    16,
                ],
            }
        )
        auto_complete_model_config.add_output(
            {
                "name": "OUTPUT0",
                "data_type": "TYPE_INT32",
                "dims": [
                    16,
                ],
            }
        )
        auto_complete_model_config.add_output(
            {
                "name": "OUTPUT1",
                "data_type": "TYPE_INT32",
                "dims": [
                    16,
                ],
            }
        )
        return auto_complete_model_config

    def initialize(self, args):
        self.model_config = model_config = json.loads(args["model_config"])

        output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")
        output1_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT1")

        self.output0_dtype = pb_utils.triton_string_to_numpy(
            output0_config["data_type"]
        )
        self.output1_dtype = pb_utils.triton_string_to_numpy(
            output1_config["data_type"]
        )

    def execute(self, requests):
        """This function is called on inference request."""

        responses = []
        for request in requests:
            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
            responses.append(pb_utils.InferenceResponse(self.addsub(in_0, in_1)))
        return responses

    def addsub(self, in_0, in_1):
        if (
            in_0.as_numpy().dtype.type is np.bytes_
            or in_0.as_numpy().dtype == np.object_
        ):
            out_0, out_1 = (
                in_0.as_numpy().astype(np.int32) + in_1.as_numpy().astype(np.int32),
                in_0.as_numpy().astype(np.int32) - in_1.as_numpy().astype(np.int32),
            )
        else:
            out_0, out_1 = (
                in_0.as_numpy() + in_1.as_numpy(),
                in_0.as_numpy() - in_1.as_numpy(),
            )

        out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(self.output0_dtype))
        out_tensor_1 = pb_utils.Tensor("OUTPUT1", out_1.astype(self.output1_dtype))
        return [out_tensor_0, out_tensor_1]


================================================
FILE: qa/L0_metrics/model_namespacing_repos/subadd_repo/composing_model/1/model.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    # Use auto complete feature to ship config.pbtxt along with the Python
    # model definition
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        # Only use packaged config if config is not explicitly provided
        config = auto_complete_model_config.as_dict()
        if (len(config["input"]) != 0) or (len(config["output"]) != 0):
            return auto_complete_model_config

        auto_complete_model_config.add_input(
            {
                "name": "INPUT0",
                "data_type": "TYPE_INT32",
                "dims": [
                    16,
                ],
            }
        )
        auto_complete_model_config.add_input(
            {
                "name": "INPUT1",
                "data_type": "TYPE_INT32",
                "dims": [
                    16,
                ],
            }
        )
        auto_complete_model_config.add_output(
            {
                "name": "OUTPUT0",
                "data_type": "TYPE_INT32",
                "dims": [
                    16,
                ],
            }
        )
        auto_complete_model_config.add_output(
            {
                "name": "OUTPUT1",
                "data_type": "TYPE_INT32",
                "dims": [
                    16,
                ],
            }
        )
        return auto_complete_model_config

    def initialize(self, args):
        self.model_config = model_config = json.loads(args["model_config"])

        output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")
        output1_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT1")

        self.output0_dtype = pb_utils.triton_string_to_numpy(
            output0_config["data_type"]
        )
        self.output1_dtype = pb_utils.triton_string_to_numpy(
            output1_config["data_type"]
        )

    def execute(self, requests):
        """This function is called on inference request."""

        responses = []
        for request in requests:
            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
            responses.append(pb_utils.InferenceResponse(self.subadd(in_0, in_1)))
        return responses

    def subadd(self, in_0, in_1):
        if (
            in_0.as_numpy().dtype.type is np.bytes_
            or in_0.as_numpy().dtype == np.object_
        ):
            out_0, out_1 = (
                in_0.as_numpy().astype(np.int32) - in_1.as_numpy().astype(np.int32),
                in_0.as_numpy().astype(np.int32) + in_1.as_numpy().astype(np.int32),
            )
        else:
            out_0, out_1 = (
                in_0.as_numpy() - in_1.as_numpy(),
                in_0.as_numpy() + in_1.as_numpy(),
            )

        out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(self.output0_dtype))
        out_tensor_1 = pb_utils.Tensor("OUTPUT1", out_1.astype(self.output1_dtype))
        return [out_tensor_0, out_tensor_1]


================================================
FILE: qa/L0_metrics/model_namespacing_repos/subadd_repo/subadd_ensemble/config.pbtxt
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

platform: "ensemble"
max_batch_size: 0
version_policy: { all { } }

input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]

  }
]
input [
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]

ensemble_scheduling {
  step [
    {
      model_name: "composing_model"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT0"
      }
      output_map {
        key: "OUTPUT1"
        value: "OUTPUT1"
      }
    }
  ]
}


================================================
FILE: qa/L0_metrics/pinned_memory_metrics_test.py
================================================
#!/usr/bin/python
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import re
import threading
import time
import unittest

import numpy as np
import requests
import tritonclient.http as httpclient
from tritonclient.utils import *

_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")
# Triton server reserves 256 MB for pinned memory by default.
DEFAULT_TOTAL_PINNED_MEMORY_SIZE = 2**28  # bytes, Equivalent to 256 MB
TOTAL_PINNED_MEMORY_SIZE = int(
    os.environ.get("CUSTOM_PINNED_MEMORY_POOL_SIZE", DEFAULT_TOTAL_PINNED_MEMORY_SIZE)
)
print(f"TOTAL_PINNED_MEMORY_SIZE: {TOTAL_PINNED_MEMORY_SIZE} bytes")

# Pinned memory usage when server is idle (no inference)
DEFAULT_USED_PINNED_MEMORY_SIZE = 0  # bytes


def get_metrics():
    total_bytes_pattern = re.compile(r"pool_total_bytes (\d+)")
    used_bytes_pattern = re.compile(r"pool_used_bytes (\d+)")

    r = requests.get(f"http://{_tritonserver_ipaddr}:8002/metrics")
    r.raise_for_status()

    total_bytes_match = total_bytes_pattern.search(r.text)
    total_bytes_value = total_bytes_match.group(1)

    used_bytes_match = used_bytes_pattern.search(r.text)
    used_bytes_value = used_bytes_match.group(1)

    return total_bytes_value, used_bytes_value


class TestPinnedMemoryMetrics(unittest.TestCase):
    def setUp(self):
        self.inference_completed = threading.Event()

        shape = [1, 16]
        self.model_name = "libtorch_float32_float32_float32"
        input0_data = np.random.rand(*shape).astype(np.float32)
        input1_data = np.random.rand(*shape).astype(np.float32)

        self.inputs = [
            httpclient.InferInput(
                "INPUT0", input0_data.shape, "FP32"
            ).set_data_from_numpy(input0_data),
            httpclient.InferInput(
                "INPUT1", input1_data.shape, "FP32"
            ).set_data_from_numpy(input1_data),
        ]

        self.outputs = [
            httpclient.InferRequestedOutput("OUTPUT__0"),
            httpclient.InferRequestedOutput("OUTPUT__1"),
        ]

        # Before loading the model
        self._assert_pinned_memory_utilization()

    def _assert_pinned_memory_utilization(self):
        total_bytes_value, used_bytes_value = get_metrics()
        self.assertEqual(int(total_bytes_value), TOTAL_PINNED_MEMORY_SIZE)
        self.assertEqual(int(used_bytes_value), DEFAULT_USED_PINNED_MEMORY_SIZE)

    def _collect_metrics(self):
        while not self.inference_completed.is_set():
            total_bytes_value, used_bytes_value = get_metrics()
            self.assertEqual(int(total_bytes_value), TOTAL_PINNED_MEMORY_SIZE)
            # Assert pinned memory usage is within anticipated values
            self.assertIn(int(used_bytes_value), [0, 64, 128, 192, 256])

    def test_pinned_memory_metrics_asynchronous_requests(self):
        with httpclient.InferenceServerClient(
            url=f"{_tritonserver_ipaddr}:8000", concurrency=10
        ) as client:
            if not client.is_model_ready(self.model_name):
                client.load_model(self.model_name)

            # Before starting the inference
            self._assert_pinned_memory_utilization()

            # Start a thread to collect metrics asynchronously
            metrics_thread = threading.Thread(target=self._collect_metrics)
            metrics_thread.start()

            # Asynchronous inference requests
            async_requests = []
            for _ in range(100):
                async_requests.append(
                    client.async_infer(
                        model_name=self.model_name,
                        inputs=self.inputs,
                        outputs=self.outputs,
                    )
                )

            time.sleep(1)

            # Wait for all inference requests to complete
            for async_request in async_requests:
                async_request.get_result()

            # Set the event to indicate that inference is completed
            self.inference_completed.set()

            # Wait for the metrics thread to complete
            metrics_thread.join()

        # After Completing inference, used_bytes_value should comedown to 0
        self._assert_pinned_memory_utilization()

    def test_pinned_memory_metrics_synchronous_requests(self):
        with httpclient.InferenceServerClient(
            url=f"{_tritonserver_ipaddr}:8000"
        ) as client:
            if not client.is_model_ready(self.model_name):
                client.load_model(self.model_name)

            # Before starting the inference
            self._assert_pinned_memory_utilization()

            # Start a thread to collect metrics asynchronously
            metrics_thread = threading.Thread(target=self._collect_metrics)
            metrics_thread.start()

            # Synchronous inference requests
            for _ in range(100):
                response = client.infer(
                    model_name=self.model_name, inputs=self.inputs, outputs=self.outputs
                )
                response.get_response()

            time.sleep(0.1)

            # Set the event to indicate that inference is completed
            self.inference_completed.set()

            # Wait for the metrics thread to complete
            metrics_thread.join()

        # After Completing inference, used_bytes_value should comedown to 0
        self._assert_pinned_memory_utilization()


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_metrics/test.sh
================================================
#!/bin/bash
# Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
  REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
  echo -e "Repository version must be specified"
  echo -e "\n***\n*** Test Failed\n***"
  exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

MODELDIR=`pwd`/models
DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository
TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
SERVER=${TRITON_DIR}/bin/tritonserver
BASE_SERVER_ARGS="--model-repository=${MODELDIR}"
SERVER_ARGS="${BASE_SERVER_ARGS}"
SERVER_LOG="./inference_server.log"
PYTHON_TEST="metrics_config_test.py"
HISTOGRAM_PYTEST="histogram_metrics_test.py"
source ../common/util.sh

CLIENT_LOG="client.log"
TEST_RESULT_FILE="test_results.txt"
function check_unit_test() {
    if [ "${PIPESTATUS[0]}" -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Failed\n***"
        RET=1
    else
        EXPECTED_NUM_TESTS="${1:-1}"
        check_test_results ${TEST_RESULT_FILE} ${EXPECTED_NUM_TESTS}
        if [ $? -ne 0 ]; then
            cat $CLIENT_LOG
            echo -e "\n***\n*** Test Result Verification Failed\n***"
            RET=1
        fi
    fi
}

function run_and_check_server() {
    run_server
    if [ "$SERVER_PID" == "0" ]; then
      echo -e "\n***\n*** Failed to start $SERVER\n***"
      cat $SERVER_LOG
      exit 1
    fi
}

rm -f $SERVER_LOG
RET=0

if [ `ps | grep -c "tritonserver"` != "0" ]; then
    echo -e "Tritonserver already running"
    echo -e `ps | grep tritonserver`
    exit 1
fi

### UNIT TESTS

TEST_LOG="./metrics_api_test.log"
UNIT_TEST="./metrics_api_test --gtest_output=xml:metrics_api.report.xml"

rm -fr *.log *.xml

set +e
export CUDA_VISIBLE_DEVICES=0
LD_LIBRARY_PATH=/opt/tritonserver/lib:$LD_LIBRARY_PATH $UNIT_TEST >>$TEST_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $TEST_LOG
    echo -e "\n***\n*** Metrics API Unit Test Failed\n***"
    RET=1
fi
set -e

# Prepare a libtorch float32 model with basic config
rm -rf $MODELDIR
model=libtorch_float32_float32_float32
mkdir -p $MODELDIR/${model}/1 && \
  cp -r $DATADIR/${model}/1/* $MODELDIR/${model}/1/. && \
  cp $DATADIR/${model}/config.pbtxt $MODELDIR/${model}/. && \
  (cd $MODELDIR/${model} && \
  sed -i "s/label_filename:.*//" config.pbtxt && \
  echo "instance_group [{ kind: KIND_GPU }]" >> config.pbtxt)

### CPU / RAM metrics tests
set +e
SERVER_LOG="cpu_metrics_test_server.log"
# NOTE: CPU utilization is computed based on the metrics interval, so having
# too small of an interval can skew the results.
SERVER_ARGS="$BASE_SERVER_ARGS --metrics-interval-ms=1000 --log-verbose=1"
run_and_check_server

CLIENT_PY="./cpu_metrics_test.py"
CLIENT_LOG="cpu_metrics_test_client.log"
python3 -m pytest --junitxml="cpu_metrics.report.xml" ${CLIENT_PY} >> ${CLIENT_LOG} 2>&1
if [ $? -ne 0 ]; then
    cat ${SERVER_LOG}
    cat ${CLIENT_LOG}
    echo -e "\n***\n*** ${CLIENT_PY} FAILED. \n***"
    RET=1
fi

kill_server
set -e

### Pinned memory metrics tests
set +e
CLIENT_PY="./pinned_memory_metrics_test.py"
CLIENT_LOG="pinned_memory_metrics_test_client.log"
SERVER_LOG="pinned_memory_metrics_test_server.log"
SERVER_ARGS="$BASE_SERVER_ARGS --metrics-interval-ms=1 --model-control-mode=explicit --log-verbose=1"
run_and_check_server
python3 ${PYTHON_TEST} MetricsConfigTest.test_pinned_memory_metrics_exist -v 2>&1 | tee ${CLIENT_LOG}
check_unit_test

python3 -m pytest --junitxml="pinned_memory_metrics.report.xml" ${CLIENT_PY} >> ${CLIENT_LOG} 2>&1
if [ $? -ne 0 ]; then
    cat ${SERVER_LOG}
    cat ${CLIENT_LOG}
    echo -e "\n***\n*** ${CLIENT_PY} FAILED. \n***"
    RET=1
fi

kill_server

# Custom Pinned memory pool size
export CUSTOM_PINNED_MEMORY_POOL_SIZE=1024 # bytes
SERVER_LOG="custom_pinned_memory_test_server.log"
CLIENT_LOG="custom_pinned_memory_test_client.log"
SERVER_ARGS="$BASE_SERVER_ARGS --metrics-interval-ms=1 --model-control-mode=explicit --log-verbose=1 --pinned-memory-pool-byte-size=$CUSTOM_PINNED_MEMORY_POOL_SIZE"
run_and_check_server
python3 -m pytest --junitxml="custom_pinned_memory_metrics.report.xml" ${CLIENT_PY} >> ${CLIENT_LOG} 2>&1
if [ $? -ne 0 ]; then
    cat ${SERVER_LOG}
    cat ${CLIENT_LOG}
    echo -e "\n***\n*** Custom ${CLIENT_PY} FAILED. \n***"
    RET=1
fi

kill_server
set -e

# Peer access GPU memory utilization Test
# Custom Pinned memory pool size
export CUSTOM_PINNED_MEMORY_POOL_SIZE=0 # bytes
export CUDA_VISIBLE_DEVICES=0
SERVER_LOG="gpu_peer_memory_test_server.log"
CLIENT_LOG="gpu_peer_memory_test_client.log"

SERVER_ARGS="$BASE_SERVER_ARGS --model-control-mode=explicit --log-verbose=1 --pinned-memory-pool-byte-size=$CUSTOM_PINNED_MEMORY_POOL_SIZE --enable-peer-access=FALSE --cuda-memory-pool-byte-size 0:0 --log-verbose=1"
run_and_check_server
#grep usage stats for triton server from nvidia-smi
memory_size_without_peering=$(nvidia-smi --query-compute-apps=pid,process_name,used_memory --format=csv,noheader,nounits | grep $(pgrep tritonserver) | awk '{print $3}')

#nvidia-smi only lists process which use gpu memory with --enable-peer-access=FALSE nvidia-smi may not list tritonserver
if [ -z $memory_size_without_peering ]; then
  memory_size_without_peering=0
fi

kill_server

# Check if memory usage HAS reduced to 0 after using the --enable-peer-access flag
if [ $memory_size_without_peering -ne 0 ]; then
   # Print the memory usage for each GPU
  echo "Disabling PEERING does not reduce GPU memory usage to ZERO"
  echo -e "\n***\n*** GPU Peer enable failed. \n***"
  RET=1
fi

### GPU Metrics
set +e
export CUDA_VISIBLE_DEVICES=0,1
SERVER_LOG="./inference_server.log"
CLIENT_LOG="client.log"
run_and_check_server

num_gpus=`curl -s ${TRITONSERVER_IPADDR}:8002/metrics | grep "nv_gpu_utilization{" | wc -l`
if [ $num_gpus -ne 2 ]; then
  echo "Found $num_gpus GPU(s) instead of 2 GPUs being monitored."
  echo -e "\n***\n*** GPU metric test failed. \n***"
  RET=1
fi

kill_server

export CUDA_VISIBLE_DEVICES=0
run_and_check_server

num_gpus=`curl -s ${TRITONSERVER_IPADDR}:8002/metrics | grep "nv_gpu_utilization{" | wc -l`
if [ $num_gpus -ne 1 ]; then
  echo "Found $num_gpus GPU(s) instead of 1 GPU being monitored."
  echo -e "\n***\n*** GPU metric test failed. \n***"
  RET=1
fi
kill_server


# Test metrics interval by querying host and checking energy
METRICS_INTERVAL_MS=500
# Below time interval is larger than actual metrics interval in case
# the update is not ready for unexpected reason
WAIT_INTERVAL_SECS=0.6

SERVER_ARGS="$BASE_SERVER_ARGS --metrics-interval-ms=${METRICS_INTERVAL_MS}"
run_and_check_server

num_iterations=10

# Add "warm up" iteration because in some cases the GPU metrics collection
# doesn't start immediately
prev_energy=`curl -s ${TRITONSERVER_IPADDR}:8002/metrics | awk '/nv_energy_consumption{/ {print $2}'`
for (( i = 0; i < $num_iterations; ++i )); do
  sleep $WAIT_INTERVAL_SECS
  current_energy=`curl -s ${TRITONSERVER_IPADDR}:8002/metrics | awk '/nv_energy_consumption{/ {print $2}'`
  if [ $current_energy != $prev_energy ]; then
    echo -e "\n***\n*** Detected changing metrics, warmup completed.\n***"
    break
  fi
  prev_energy=$current_energy
done

prev_energy=`curl -s ${TRITONSERVER_IPADDR}:8002/metrics | awk '/nv_energy_consumption{/ {print $2}'`
for (( i = 0; i < $num_iterations; ++i )); do
  sleep $WAIT_INTERVAL_SECS
  current_energy=`curl -s ${TRITONSERVER_IPADDR}:8002/metrics | awk '/nv_energy_consumption{/ {print $2}'`
  if [ $current_energy == $prev_energy ]; then
    cat $SERVER_LOG
    echo "Metrics were not updated in interval of ${METRICS_INTERVAL_MS} milliseconds"
    echo -e "\n***\n*** Metric Interval test failed. \n***"
    RET=1
    break
  fi
  prev_energy=$current_energy
done

kill_server

### Metric Config CLI and different Metric Types ###
MODELDIR="${PWD}/unit_test_models"
mkdir -p "${MODELDIR}/identity_cache_on/1"
mkdir -p "${MODELDIR}/identity_cache_off/1"
BASE_SERVER_ARGS="--model-repository=${MODELDIR} --model-control-mode=explicit"

# Check default settings: Counters should be enabled, histograms and summaries should be disabled
SERVER_ARGS="${BASE_SERVER_ARGS} --load-model=identity_cache_off"
run_and_check_server
python3 ${PYTHON_TEST} MetricsConfigTest.test_inf_counters_exist 2>&1 | tee ${CLIENT_LOG}
check_unit_test
python3 ${PYTHON_TEST} MetricsConfigTest.test_inf_histograms_missing 2>&1 | tee ${CLIENT_LOG}
check_unit_test
python3 ${PYTHON_TEST} MetricsConfigTest.test_inf_summaries_missing 2>&1 | tee ${CLIENT_LOG}
check_unit_test
python3 ${PYTHON_TEST} MetricsConfigTest.test_cache_counters_missing 2>&1 | tee ${CLIENT_LOG}
check_unit_test
python3 ${PYTHON_TEST} MetricsConfigTest.test_cache_summaries_missing 2>&1 | tee ${CLIENT_LOG}
check_unit_test
kill_server

# Enable histograms
SERVER_ARGS="${BASE_SERVER_ARGS} --load-model=identity_cache_off --metrics-config histogram_latencies=true"
run_and_check_server
python3 ${PYTHON_TEST} MetricsConfigTest.test_inf_counters_exist 2>&1 | tee ${CLIENT_LOG}
check_unit_test
python3 ${PYTHON_TEST} MetricsConfigTest.test_inf_histograms_exist 2>&1 | tee ${CLIENT_LOG}
check_unit_test
python3 ${PYTHON_TEST} MetricsConfigTest.test_inf_summaries_missing 2>&1 | tee ${CLIENT_LOG}
check_unit_test
python3 ${PYTHON_TEST} MetricsConfigTest.test_cache_counters_missing 2>&1 | tee ${CLIENT_LOG}
check_unit_test
python3 ${PYTHON_TEST} MetricsConfigTest.test_cache_summaries_missing 2>&1 | tee ${CLIENT_LOG}
check_unit_test
kill_server

# Enable summaries, counters still enabled by default
SERVER_ARGS="${BASE_SERVER_ARGS} --load-model=identity_cache_off --metrics-config summary_latencies=true"
run_and_check_server
python3 ${PYTHON_TEST} MetricsConfigTest.test_inf_counters_exist 2>&1 | tee ${CLIENT_LOG}
check_unit_test
python3 ${PYTHON_TEST} MetricsConfigTest.test_inf_summaries_exist 2>&1 | tee ${CLIENT_LOG}
check_unit_test
python3 ${PYTHON_TEST} MetricsConfigTest.test_cache_counters_missing 2>&1 | tee ${CLIENT_LOG}
check_unit_test
python3 ${PYTHON_TEST} MetricsConfigTest.test_cache_summaries_missing 2>&1 | tee ${CLIENT_LOG}
check_unit_test
kill_server

# Enable summaries, disable counters
SERVER_ARGS="${BASE_SERVER_ARGS} --load-model=identity_cache_off --metrics-config summary_latencies=true --metrics-config counter_latencies=false"
run_and_check_server
python3 ${PYTHON_TEST} MetricsConfigTest.test_inf_counters_missing 2>&1 | tee ${CLIENT_LOG}
check_unit_test
python3 ${PYTHON_TEST} MetricsConfigTest.test_inf_summaries_exist 2>&1 | tee ${CLIENT_LOG}
check_unit_test
python3 ${PYTHON_TEST} MetricsConfigTest.test_cache_counters_missing 2>&1 | tee ${CLIENT_LOG}
check_unit_test
python3 ${PYTHON_TEST} MetricsConfigTest.test_cache_summaries_missing 2>&1 | tee ${CLIENT_LOG}
check_unit_test
kill_server

# Enable summaries and counters, check cache metrics
CACHE_ARGS="--cache-config local,size=1048576"
SERVER_ARGS="${BASE_SERVER_ARGS} ${CACHE_ARGS} --load-model=identity_cache_on --metrics-config summary_latencies=true --metrics-config counter_latencies=true"
run_and_check_server
python3 ${PYTHON_TEST} MetricsConfigTest.test_inf_counters_exist 2>&1 | tee ${CLIENT_LOG}
check_unit_test
# DLIS-4762: Asserts that request summary is not published when cache is
# enabled for a model, until this if fixed.
python3 ${PYTHON_TEST} MetricsConfigTest.test_inf_summaries_exist_with_cache 2>&1 | tee ${CLIENT_LOG}
check_unit_test
python3 ${PYTHON_TEST} MetricsConfigTest.test_cache_counters_exist 2>&1 | tee ${CLIENT_LOG}
check_unit_test
python3 ${PYTHON_TEST} MetricsConfigTest.test_cache_summaries_exist 2>&1 | tee ${CLIENT_LOG}
check_unit_test
kill_server

# Check setting custom summary quantiles
export SUMMARY_QUANTILES="0.1:0.0.1,0.7:0.01,0.75:0.01"
SERVER_ARGS="${BASE_SERVER_ARGS} --load-model=identity_cache_off --metrics-config summary_latencies=true --metrics-config summary_quantiles=${SUMMARY_QUANTILES}"
run_and_check_server
python3 ${PYTHON_TEST} MetricsConfigTest.test_summaries_custom_quantiles 2>&1 | tee ${CLIENT_LOG}
check_unit_test
kill_server

# Check model namespacing label with namespace on and off
REPOS_DIR="${PWD}/model_namespacing_repos"
mkdir -p "${REPOS_DIR}/addsub_repo/addsub_ensemble/1"
mkdir -p "${REPOS_DIR}/subadd_repo/subadd_ensemble/1"
# Namespace on
SERVER_ARGS="--model-repository=${REPOS_DIR}/addsub_repo --model-repository=${REPOS_DIR}/subadd_repo --model-namespacing=true --allow-metrics=true"
run_and_check_server
python3 ${PYTHON_TEST} MetricsConfigTest.test_model_namespacing_label_with_namespace_on 2>&1 | tee ${CLIENT_LOG}
check_unit_test
kill_server
# Namespace off
SERVER_ARGS="--model-repository=${REPOS_DIR}/addsub_repo --model-namespacing=false --allow-metrics=true"
run_and_check_server
python3 ${PYTHON_TEST} MetricsConfigTest.test_model_namespacing_label_with_namespace_off 2>&1 | tee ${CLIENT_LOG}
check_unit_test
kill_server

### Pending Request Count (Queue Size) Metric Behavioral Tests ###
MODELDIR="${PWD}/queue_size_models"
SERVER_ARGS="--model-repository=${MODELDIR} --log-verbose=1"
PYTHON_TEST="metrics_queue_size_test.py"
rm -rf "${MODELDIR}"
mkdir -p "${MODELDIR}"

# Re-use an identity model that sleeps during execution for N seconds for the
# batch of requests. Then we can confirm queue size behaviors for various
# scheduling/batching strategies.
BASE_MODEL="identity_delay"
# Don't use special debug env var for this, just set sufficient parameters for
# each scheduler to let them fill batches when possible.
unset TRITONSERVER_DELAY_SCHEDULER
export MAX_BATCH_SIZE=4
# Delay up to 100ms to form batches up to MAX_BATCH_SIZE
export MAX_QUEUE_DELAY_US=100000

# Create a model per scheduler type
DEFAULT_MODEL="${MODELDIR}/default"
cp -r "${BASE_MODEL}" "${DEFAULT_MODEL}"
mkdir -p "${DEFAULT_MODEL}/1"
sed -i "s/^max_batch_size.*/max_batch_size: ${MAX_BATCH_SIZE}/" "${DEFAULT_MODEL}/config.pbtxt"

DYNAMIC_MODEL="${MODELDIR}/dynamic"
cp -r "${DEFAULT_MODEL}" "${DYNAMIC_MODEL}"
echo -e "\ndynamic_batching { max_queue_delay_microseconds: ${MAX_QUEUE_DELAY_US} }\n" >> "${DYNAMIC_MODEL}/config.pbtxt"

MAX_QUEUE_SIZE_MODEL="${MODELDIR}/max_queue_size"
cp -r "${DEFAULT_MODEL}" "${MAX_QUEUE_SIZE_MODEL}"
echo -e "\ndynamic_batching { max_queue_delay_microseconds: ${MAX_QUEUE_DELAY_US} default_queue_policy { max_queue_size: 4 } }\n" >> "${MAX_QUEUE_SIZE_MODEL}/config.pbtxt"

SEQUENCE_DIRECT_MODEL="${MODELDIR}/sequence_direct"
cp -r "${DEFAULT_MODEL}" "${SEQUENCE_DIRECT_MODEL}"
echo -e "\nsequence_batching { direct { max_queue_delay_microseconds: ${MAX_QUEUE_DELAY_US}, minimum_slot_utilization: 1.0 } }\n" >> "${SEQUENCE_DIRECT_MODEL}/config.pbtxt"

SEQUENCE_OLDEST_MODEL="${MODELDIR}/sequence_oldest"
cp -r "${DEFAULT_MODEL}" "${SEQUENCE_OLDEST_MODEL}"
echo -e "\nsequence_batching { oldest { max_queue_delay_microseconds: ${MAX_QUEUE_DELAY_US}, max_candidate_sequences: ${MAX_BATCH_SIZE} } }\n" >> "${SEQUENCE_OLDEST_MODEL}/config.pbtxt"

BASE_ENSEMBLE="ensemble_delay"
ENSEMBLE_MODEL="${MODELDIR}/ensemble"
cp -r "${BASE_ENSEMBLE}" "${ENSEMBLE_MODEL}"
mkdir -p "${ENSEMBLE_MODEL}/1"
# Use uniquely named composing models to avoid clashing
# metric values with individual and ensemble tests.
cp -r "${DEFAULT_MODEL}" "${MODELDIR}/default_composing"
cp -r "${DYNAMIC_MODEL}" "${MODELDIR}/dynamic_composing"


run_and_check_server
python3 ${PYTHON_TEST} 2>&1 | tee ${CLIENT_LOG}
kill_server
expected_tests=6
check_unit_test "${expected_tests}"

### Test histogram data in ensemble decoupled model ###
MODELDIR="${PWD}/ensemble_decoupled"
SERVER_LOG="./histogram_ensemble_decoupled_server.log"
CLIENT_LOG="./histogram_ensemble_decoupled_client.log"
SERVER_ARGS="--model-repository=${MODELDIR} --metrics-config histogram_latencies=true --log-verbose=1"
mkdir -p "${MODELDIR}"/ensemble/1
rm -rf "${MODELDIR}"/async_execute
cp -r "${MODELDIR}"/async_execute_decouple "${MODELDIR}"/async_execute
sed -i "s/model_transaction_policy { decoupled: True }//" "${MODELDIR}"/async_execute/config.pbtxt

run_and_check_server
python3 ${HISTOGRAM_PYTEST} TestHistogramMetrics.test_ensemble_decoupled 2>&1 | tee ${CLIENT_LOG}
kill_server
check_unit_test

### Test model metrics configuration
MODELDIR="${PWD}/model_metrics_model"
SERVER_LOG="./model_metric_config_server.log"
CLIENT_LOG="./model_metric_config_client.log"
decoupled_model="async_execute_decouple"
rm -rf "${MODELDIR}/${decoupled_model}"
mkdir -p "${MODELDIR}/${decoupled_model}/1/"
cp ../python_models/${decoupled_model}/model.py ${MODELDIR}/${decoupled_model}/1/

# Test valid model_metrics config
cp ../python_models/${decoupled_model}/config.pbtxt ${MODELDIR}/${decoupled_model}/
cat >> "${MODELDIR}/${decoupled_model}/config.pbtxt" << EOL
model_metrics {
  metric_control: [
    {
      metric_identifier: {
        family: "nv_inference_first_response_histogram_ms"
      }
      histogram_options: {
        buckets: [ -1, 0.0, 1, 2.5 ]
      }
    }
  ]
}
EOL

SERVER_ARGS="--model-repository=${MODELDIR} --model-control-mode=explicit --load-model=${decoupled_model} --metrics-config histogram_latencies=true --log-verbose=1"
run_and_check_server
export OVERRIDE_BUCKETS="-1,0,1,2.5,+Inf"
python3 ${HISTOGRAM_PYTEST} TestHistogramMetrics.test_buckets_override 2>&1 | tee ${CLIENT_LOG}
check_unit_test
kill_server

# Test valid model_metrics config with histogram disabled
PYTHON_TEST="metrics_config_test.py"
SERVER_ARGS="--model-repository=${MODELDIR} --model-control-mode=explicit --load-model=${decoupled_model} --metrics-config histogram_latencies=false --log-verbose=1"
run_and_check_server
python3 ${PYTHON_TEST} MetricsConfigTest.test_inf_histograms_missing 2>&1 | tee ${CLIENT_LOG}
check_unit_test
kill_server

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
else
  echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_metrics/unit_test_models/identity_cache_off/config.pbtxt
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

backend: "identity"
max_batch_size: 0
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

response_cache {
  enable: false
}


================================================
FILE: qa/L0_metrics/unit_test_models/identity_cache_on/config.pbtxt
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

backend: "identity"
max_batch_size: 0
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

response_cache {
  enable: true
}


================================================
FILE: qa/L0_mlflow/plugin_test.py
================================================
#!/usr/bin/python

# Copyright 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import json
import unittest

import mlflow.onnx
import numpy as np
import onnx
import test_util as tu
from mlflow.deployments import get_deploy_client


class PluginTest(tu.TestResultCollector):
    def setUp(self):
        self.client_ = get_deploy_client("triton")

    def _validate_deployment(self, model_name):
        # create
        self.client_.create_deployment(
            model_name, f"models:/{model_name}/1", flavor="onnx"
        )

        # list
        deployment_list = self.client_.list_deployments()
        self.assertEqual(len(deployment_list), 1)
        self.assertEqual(deployment_list[0]["name"], model_name)

        # get
        deployment = self.client_.get_deployment(model_name)
        self.assertEqual(deployment["name"], model_name)

        # predict
        inputs = {}
        with open("./mlflow-triton-plugin/examples/input.json", "r") as f:
            input_json = json.load(f)
            for key, value in input_json["inputs"].items():
                inputs[key] = np.array(value, dtype=np.float32)

        output = self.client_.predict(model_name, inputs)
        with open("./mlflow-triton-plugin/examples/expected_output.json", "r") as f:
            output_json = json.load(f)
            for key, value in output_json["outputs"].items():
                np.testing.assert_allclose(
                    output["outputs"][key],
                    np.array(value, dtype=np.int32),
                    err_msg="Inference result is not correct",
                )

        # delete
        self.client_.delete_deployment(model_name)

    def test_onnx_flavor(self):
        # Log the ONNX model to MLFlow

        model = onnx.load(
            "./mlflow-triton-plugin/examples/onnx_float32_int32_int32/1/model.onnx"
        )
        # Use a different name to ensure the plugin operates on correct model
        mlflow.onnx.log_model(model, "triton", registered_model_name="onnx_model")

        self._validate_deployment("onnx_model")

    def test_onnx_flavor_with_files(self):
        # Log the ONNX model and additional Triton config file to MLFlow

        model = onnx.load(
            "./mlflow-triton-plugin/examples/onnx_float32_int32_int32/1/model.onnx"
        )
        config_path = (
            "./mlflow-triton-plugin/examples/onnx_float32_int32_int32/config.pbtxt"
        )
        # Use a different name to ensure the plugin operates on correct model
        mlflow.onnx.log_model(
            model, "triton", registered_model_name="onnx_model_with_files"
        )
        mlflow.log_artifact(config_path, "triton")

        self._validate_deployment("onnx_model_with_files")

        # Check if the additional files are properly copied
        import filecmp

        self.assertTrue(
            filecmp.cmp(config_path, "./models/onnx_model_with_files/config.pbtxt")
        )

    def test_model_name(self):
        EMPTY_MODEL_NAMES = [
            "",
            "     ",
            " ",
            "\n",
            "\t",
            "\r",
            "\v",
            "\f",
        ]
        INVALID_PATH_TRAVERSAL_NAMES = [
            "/opt/sys/",
            "../../etc/passwd",
            "../outside/repo",
            "test_models/../identity_py",
            "..",
        ]
        VALID_MODEL_NAMES = [
            "model123",
            # "model  OAI",   TRI-769: Fix this test case
            "model.version1",
            "...",
            "..my_model",
            "model..1",
            "model....1",
        ]

        for model_name in EMPTY_MODEL_NAMES:
            model_uri = f"models:/{model_name}/1"
            with self.assertRaises(Exception) as e:
                self.client_.create_deployment(model_name, model_uri, flavor="onnx")
            self.assertIn(
                "Model name cannot be empty. Please enter a valid name to deploy.",
                str(e.exception),
            )

        for model_name in INVALID_PATH_TRAVERSAL_NAMES:
            model_uri = f"models:/{model_name}/1"
            with self.assertRaises(Exception) as e:
                self.client_.create_deployment(model_name, model_uri, flavor="onnx")
            self.assertIn(
                f"Path traversal is not allowed in model's name: {model_name}",
                str(e.exception),
            )

        for model_name in VALID_MODEL_NAMES:
            model = onnx.load(
                "./mlflow-triton-plugin/examples/onnx_float32_int32_int32/1/model.onnx"
            )

            # Use a different name to ensure the plugin operates on correct model
            mlflow.onnx.log_model(
                model, "triton", registered_model_name=f"{model_name}"
            )

            # Validate deployment functionalities - create, list, get, predict, delete
            self._validate_deployment(model_name)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_mlflow/test.sh
================================================
#!/bin/bash
# Copyright 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

export CUDA_VISIBLE_DEVICES=0

source ../common/util.sh

rm -fr *.log *.json

# The default version of python 3.10.6 included in
# Ubuntu 22.04 installs blinker 1.4. This doesn't
# work with the awscli which we try to install.
# Uninstalling blinker and allowing pip to install blinker 1.6
# fixes this issue. The alternative to this is to
# install a higher version of python which uses blinker 1.6,
# but it is unknown whether this test should rely on
# the default installation of python.

apt update -qq && apt install python3-venv -y
python3 -m venv .venv

source .venv/bin/activate

RET=0

# Set up MLflow and dependencies used by the test
pip install mlflow onnx onnxruntime boto3

# Install AWS CLI
if ! command -v aws --version &> /dev/null; then
 curl "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" -o "awscliv2.zip"
 unzip awscliv2.zip
 ./aws/install
 rm -r ./aws/ ./awscliv2.zip
fi

# Set environment variables for MLFlow and Triton plugin
export MLFLOW_MODEL_REPO=./mlflow/artifacts
export MLFLOW_TRACKING_URI=sqlite:////tmp/mlflow-db.sqlite
export TRITON_URL=localhost:8000
export TRITON_MODEL_REPO=models
mkdir -p ./mlflow/artifacts

pip install ./mlflow-triton-plugin/

# Clear mlflow registered models if any
python - << EOF
from mlflow.tracking import MlflowClient
c = MlflowClient()
for m in c.search_registered_models():
    c.delete_registered_model(m.name)
EOF

rm -rf ./models
mkdir -p ./models
# Put some models in model repository to make sure MLFlow plugin would ignore
# model that is not registered via MLFlow
cp -r ./mlflow-triton-plugin/examples/onnx_float32_int32_int32 ./models/existing_model

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=./models --strict-model-config=false --model-control-mode=explicit --load-model=*"
SERVER_LOG="./inference_server.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** fail to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

# Triton flavor with CLI
set +e
CLI_LOG=plugin_cli.log
CLI_RET=0
python ./mlflow-triton-plugin/scripts/publish_model_to_mlflow.py \
    --model_name onnx_float32_int32_int32 \
    --model_directory ./mlflow-triton-plugin/examples/onnx_float32_int32_int32/ \
    --flavor triton >>$CLI_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Expect 'triton' flavor model is logged to MLFlow\n***"
    CLI_RET=1
fi
if [ $CLI_RET -eq 0 ]; then
    mlflow deployments create -t triton --flavor triton \
        --name onnx_float32_int32_int32 -m models:/onnx_float32_int32_int32/1 >>$CLI_LOG 2>&1
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Expect 'triton' flavor model is deployed via MLFlow\n***"
        CLI_RET=1
    fi
fi
if [ $CLI_RET -eq 0 ]; then
    mlflow deployments list -t triton >>$CLI_LOG 2>&1
    if [ $? -ne 0 ]; then
        CLI_RET=1
    fi
    if [ `grep -c "onnx_float32_int32_int32.*READY" $CLI_LOG` != "1" ]; then
        echo -e "\n***\n*** Expect deployed 'triton' flavor model to be listed\n***"
        CLI_RET=1
    fi
    if [ `grep -c "existing_model.*READY" $CLI_LOG` != "0" ]; then
        echo -e "\n***\n*** Unexpected non-MLflow model listed\n***"
        CLI_RET=1
    fi
fi
if [ $CLI_RET -eq 0 ]; then
    mlflow deployments get -t triton --name onnx_float32_int32_int32 >>$CLI_LOG 2>&1
    if [ $? -ne 0 ]; then
        CLI_RET=1
    fi
    if [ `grep -c "^name: onnx_float32_int32_int32" $CLI_LOG` != "1" ]; then
        echo -e "\n***\n*** Expect deployed 'triton' flavor model is found\n***"
        CLI_RET=1
    fi
fi
if [ $CLI_RET -eq 0 ]; then
    mlflow deployments predict -t triton --name onnx_float32_int32_int32 --input-path ./mlflow-triton-plugin/examples/input.json --output-path output.json >>$CLI_LOG 2>&1
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Expect successful 'triton' flavor model prediction\n***"
        CLI_RET=1
    fi
    python - << EOF
import json
with open("./output.json", "r") as f:
    output = json.load(f)
with open("./mlflow-triton-plugin/examples/expected_output.json", "r") as f:
    expected_output = json.load(f)
if output == expected_output:
    exit(0)
else:
    exit(1)
EOF
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Expect 'triton' flavor model prediction matches expected output\n***"
        echo -e "Expect:\n"
        cat ./mlflow-triton-plugin/examples/expected_output.json
        echo -e "\n\nGot:\n"
        cat output.json
        CLI_RET=1
    fi
fi
if [ $CLI_RET -eq 0 ]; then
    mlflow deployments delete -t triton --name onnx_float32_int32_int32 >>$CLI_LOG 2>&1
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Expect successful deletion of 'triton' flavor model\n***"
        CLI_RET=1
    fi
fi
if [ $CLI_RET -ne 0 ]; then
  cat $CLI_LOG
  echo -e "\n***\n*** MLFlow Triton plugin CLI Test FAILED\n***"
  RET=1
fi
set -e

# ONNX flavor with Python package
set +e
PY_LOG=plugin_py.log
PY_TEST=plugin_test.py
TEST_RESULT_FILE='test_results.txt'
python $PY_TEST >>$PY_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $SERVER_LOG
    cat $PY_LOG
    echo -e "\n***\n*** Python Test Failed\n***"
    RET=1
else
    check_test_results $TEST_RESULT_FILE 3
    if [ $? -ne 0 ]; then
        cat $PY_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

kill_server


#
# Test S3, the setup is duplicated from L0_storage_S3, except the bucket is
# created empty
#

# Clear mlflow registered models if any
python - << EOF
from mlflow.tracking import MlflowClient
c = MlflowClient()
for m in c.search_registered_models():
    c.delete_registered_model(m.name)
EOF

# S3 credentials are necessary for this test. Pass via ENV variables
aws configure set default.region $AWS_DEFAULT_REGION && \
    aws configure set aws_access_key_id $AWS_ACCESS_KEY_ID && \
    aws configure set aws_secret_access_key $AWS_SECRET_ACCESS_KEY

# S3 bucket path (Point to bucket when testing cloud storage)
BUCKET_URL="s3://triton-bucket-${CI_JOB_ID}"

# Cleanup and delete S3 test bucket if it already exists (due to test failure)
aws s3 rm $BUCKET_URL --recursive --include "*" && \
    aws s3 rb $BUCKET_URL || true

# Make S3 test bucket
aws s3 mb "${BUCKET_URL}"

# Remove Slash in BUCKET_URL
BUCKET_URL=${BUCKET_URL%/}
BUCKET_URL_SLASH="${BUCKET_URL}/"

export TRITON_MODEL_REPO=${BUCKET_URL}
SERVER_ARGS="--model-repository=${TRITON_MODEL_REPO} --model-control-mode=explicit"
SERVER_LOG="./inference_server.s3.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    # Clean up bucket contents and delete bucket before exiting test
    aws s3 rm "${BUCKET_URL_SLASH}" --recursive --include "*"
    aws s3 rb "${BUCKET_URL}"
    exit 1
fi

# ONNX flavor with Python package
set +e
PY_LOG=plugin_py.s3.log
PY_TEST=plugin_test.py
TEST_RESULT_FILE='test_results.txt'
python $PY_TEST >>$PY_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $SERVER_LOG
    cat $PY_LOG
    echo -e "\n***\n*** Python Test Failed\n***"
    RET=1
else
    check_test_results $TEST_RESULT_FILE 3
    if [ $? -ne 0 ]; then
        cat $PY_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

kill_server

# Clean up bucket contents and delete bucket
aws s3 rm "${BUCKET_URL_SLASH}" --recursive --include "*"
aws s3 rb "${BUCKET_URL}"

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
else
  echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_model_config/autofill_noplatform/common/no_version/config.pbtxt
================================================
name: "no_version"
max_batch_size: 1
input [
  {
    name: "input"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/common/no_version/expected
================================================
Invalid model name: Could not determine backend for model 'no_version' with no backend in model configuration. Expected model name of the form 'model.<backend_name>'.


================================================
FILE: qa/L0_model_config/autofill_noplatform/custom/no_delimiter/config.pbtxt
================================================


================================================
FILE: qa/L0_model_config/autofill_noplatform/custom/no_delimiter/expected
================================================
Invalid model name: Could not determine backend for model 'no_delimiter' with no backend in model configuration. Expected model name of the form 'model.<backend_name>'.


================================================
FILE: qa/L0_model_config/autofill_noplatform/custom/unknown_backend.unknown/config.pbtxt
================================================


================================================
FILE: qa/L0_model_config/autofill_noplatform/custom/unknown_backend.unknown/expected
================================================
Invalid argument: unable to find backend library for backend 'unknown', try specifying runtime on the model configuration.


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/circular_dependency/circular_dependency/config.pbtxt
================================================
name: "circular_dependency"
max_batch_size: 2
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "circular_dependency_2"
      model_version: -1
      input_map {
        key: "input"
        value: "data"
      }
      output_map {
        key: "output"
        value: "prob"
      }
    }
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/circular_dependency/circular_dependency_2/config.pbtxt
================================================
name: "circular_dependency_2"
max_batch_size: 2
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "circular_dependency"
      model_version: -1
      input_map {
        key: "data"
        value: "input"
      }
      output_map {
        key: "prob"
        value: "output"
      }
    }
  ]
}
input [
  {
    name: "input"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/circular_dependency/expected
================================================
circular dependency between ensembles: circular_dependency -> ... -> circular_dependency_2 -> circular_dependency

================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/circular_dependency/expected_2
================================================
circular dependency between ensembles: circular_dependency_2 -> ... -> circular_dependency -> circular_dependency_2

================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/ensemble_scheduling_no_set/config.pbtxt
================================================
name: "ensemble_scheduling_not_set"
max_batch_size: 8
platform: "ensemble"
input [
  {
    name: "data"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [ 1, 28, 28 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 10, 1, 1 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/ensemble_scheduling_no_set/expected
================================================
ensemble scheduling must be set for ensemble ensemble_scheduling_not_set whose platform is ensemble

================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/has_backend/config.pbtxt
================================================
name: "has_backend"
max_batch_size: 8
backend: "onnxruntime"
platform: "ensemble"
input [
  {
    name: "data"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [ 1, 28, 28 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 10, 1, 1 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/has_backend/expected
================================================
Ensemble model 'has_backend' must have platform type 'ensemble' and empty backend type

================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_data_type/expected
================================================
in ensemble inconsistent_data_type, ensemble tensor data: inconsistent data type: TYPE_FP32 is inferred from model inconsistent_data_type while TYPE_INT32 is inferred from model int32_dim1_batch4

================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_data_type/fp32_dim1_batch2/config.pbtxt
================================================
name: "fp32_dim1_batch2"
max_batch_size: 2
backend: "identity"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_data_type/inconsistent_data_type/config.pbtxt
================================================
name: "inconsistent_data_type"
max_batch_size: 2
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "int32_dim1_batch4"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "data"
      }
      output_map {
        key: "OUTPUT0"
        value: "temp_tensor"
      }
    },
    {
      model_name: "fp32_dim1_batch2"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "temp_tensor"
      }
      output_map {
        key: "OUTPUT0"
        value: "prob"
      }
    }
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_data_type/int32_dim1_batch4/config.pbtxt
================================================
name: "int32_dim1_batch4"
max_batch_size: 4
backend: "identity"
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_shape/expected
================================================
in ensemble inconsistent_shape, ensemble tensor temp_tensor: inconsistent shape: \[-1,16\] is inferred from model fp32_dim1_batch4 while \[-1,16,16,16\] is inferred from model fp32_dim3_batch4

================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_shape/expected_2
================================================
in ensemble inconsistent_shape, ensemble tensor temp_tensor: inconsistent shape: \[-1,16,16,16\] is inferred from model fp32_dim3_batch4 while \[-1,16\] is inferred from model fp32_dim1_batch4

================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_shape/fp32_dim1_batch4/config.pbtxt
================================================
name: "fp32_dim1_batch4"
max_batch_size: 4
backend: "identity"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_shape/fp32_dim3_batch4/config.pbtxt
================================================
name: "fp32_dim3_batch4"
max_batch_size: 4
backend: "identity"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16, 16, 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16, 16, 16 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_shape/inconsistent_shape/config.pbtxt
================================================
name: "inconsistent_shape"
max_batch_size: 2
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "fp32_dim1_batch4"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "data"
      }
      output_map {
        key: "OUTPUT0"
        value: "temp_tensor"
      }
    },
    {
      model_name: "fp32_dim3_batch4"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "temp_tensor"
      }
      output_map {
        key: "OUTPUT0"
        value: "prob"
      }
    }
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/instance_group_set/config.pbtxt
================================================
name: "instance_group_set"
max_batch_size: 8
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "model_a"
      model_version: -1
      input_map {
        key: "model_a_input"
        value: "data"
      }
      output_map {
        key: "model_a_output"
        value: "prob"
      }
    }
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [ 1, 28, 28 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 10, 1, 1 ]
  }
]
instance_group [
  {
    kind: KIND_GPU
    gpus: [ 42 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/instance_group_set/expected
================================================
instance group should not be specified for ensemble 'instance_group_set'

================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/invalid_batch_size/expected
================================================
ensemble invalid_batch_size allows maximum batch size 3, but it contains model fp32_dim1_batch2 which only allows maximum batch size to be 2

================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/invalid_batch_size/fp32_dim1_batch2/config.pbtxt
================================================
name: "fp32_dim1_batch2"
max_batch_size: 2
backend: "identity"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/invalid_batch_size/fp32_dim1_batch4/config.pbtxt
================================================
name: "fp32_dim1_batch4"
max_batch_size: 4
backend: "identity"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/invalid_batch_size/invalid_batch_size/config.pbtxt
================================================
name: "invalid_batch_size"
max_batch_size: 3
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "fp32_dim1_batch4"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "data"
      }
      output_map {
        key: "OUTPUT0"
        value: "temp_tensor"
      }
    },
    {
      model_name: "fp32_dim1_batch2"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "temp_tensor"
      }
      output_map {
        key: "OUTPUT0"
        value: "prob"
      }
    }
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/invalid_decoupled_branching/expected
================================================
in ensemble invalid_decoupled_branching, step of model 'int32_dim1_nobatch_output2' receives inputs originated from different decoupled models

================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/invalid_decoupled_branching/int32_dim1_nobatch_output2/config.pbtxt
================================================
name: "int32_dim1_nobatch_output2"
max_batch_size: 0
backend: "identity"
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 1 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 1 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/invalid_decoupled_branching/invalid_decoupled_branching/config.pbtxt
================================================
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "invalid_decoupled_branching"
max_batch_size: 0
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "repeat_int32"
      model_version: -1
      input_map {
        key: "IN"
        value: "IN"
      }
      input_map {
        key: "DELAY"
        value: "DELAY"
      }
      input_map {
        key: "WAIT"
        value: "WAIT"
      }
      output_map {
        key: "OUT"
        value: "repeat_1_out"
      }
    },
    {
      model_name: "repeat_int32"
      model_version: -1
      input_map {
        key: "IN"
        value: "IN"
      }
      input_map {
        key: "DELAY"
        value: "DELAY"
      }
      input_map {
        key: "WAIT"
        value: "WAIT"
      }
      output_map {
        key: "OUT"
        value: "repeat_2_out"
      }
    },
    {
      model_name: "int32_dim1_nobatch_output2"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "repeat_1_out"
      }
      input_map {
        key: "INPUT1"
        value: "repeat_2_out"
      }
      output_map {
        key: "OUTPUT0"
        value: "identity_0"
      }
      output_map {
        key: "OUTPUT1"
        value: "identity_1"
      }
    },
    {
      model_name: "int32_dim1_nobatch_output2"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "identity_0"
      }
      input_map {
        key: "INPUT1"
        value: "identity_1"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUT"
      }
    }
  ]
}
input [
  {
    name: "IN"
    data_type: TYPE_INT32
    dims: [ -1 ]
  },
  {
    name: "DELAY"
    data_type: TYPE_UINT32
    dims: [ -1 ]
  },
  {
    name: "WAIT"
    data_type: TYPE_UINT32
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/invalid_decoupled_branching/repeat_int32/config.pbtxt
================================================
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "repeat_int32"
backend: "repeat"
max_batch_size: 0
model_transaction_policy {
  decoupled: True
}
input [
  {
    name: "IN"
    data_type: TYPE_INT32
    dims: [ -1 ]
  },
  {
    name: "DELAY"
    data_type: TYPE_UINT32
    dims: [ -1 ]
  },
  {
    name: "WAIT"
    data_type: TYPE_UINT32
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  },
  {
    name: "IDX"
    data_type: TYPE_UINT32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/invalid_decoupled_branching_2/expected
================================================
in ensemble invalid_decoupled_branching_2, step of model 'invalid_decoupled_branching_2' receives inputs originated from different decoupled models

================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/invalid_decoupled_branching_2/invalid_decoupled_branching_2/config.pbtxt
================================================
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "invalid_decoupled_branching_2"
max_batch_size: 0
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "repeat_int32"
      model_version: -1
      input_map {
        key: "IN"
        value: "IN"
      }
      input_map {
        key: "DELAY"
        value: "DELAY"
      }
      input_map {
        key: "WAIT"
        value: "WAIT"
      }
      output_map {
        key: "OUT"
        value: "OUT"
      }
    },
    {
      model_name: "repeat_int32"
      model_version: -1
      input_map {
        key: "IN"
        value: "IN"
      }
      input_map {
        key: "DELAY"
        value: "DELAY"
      }
      input_map {
        key: "WAIT"
        value: "WAIT"
      }
      output_map {
        key: "IDX"
        value: "IDX"
      }
    }
  ]
}
input [
  {
    name: "IN"
    data_type: TYPE_INT32
    dims: [ -1 ]
  },
  {
    name: "DELAY"
    data_type: TYPE_UINT32
    dims: [ -1 ]
  },
  {
    name: "WAIT"
    data_type: TYPE_UINT32
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  },
  {
    name: "IDX"
    data_type: TYPE_UINT32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/invalid_decoupled_branching_2/repeat_int32/config.pbtxt
================================================
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "repeat_int32"
backend: "repeat"
max_batch_size: 0
model_transaction_policy {
  decoupled: True
}
input [
  {
    name: "IN"
    data_type: TYPE_INT32
    dims: [ -1 ]
  },
  {
    name: "DELAY"
    data_type: TYPE_UINT32
    dims: [ -1 ]
  },
  {
    name: "WAIT"
    data_type: TYPE_UINT32
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  },
  {
    name: "IDX"
    data_type: TYPE_UINT32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/invalid_input_map/expected
================================================
in ensemble invalid_input_map, ensemble tensor temp_tensor_5 is mapping to non-existing input invalid_input in model fp32_dim1_batch4_input4

================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/invalid_input_map/fp32_dim1_batch4/config.pbtxt
================================================
name: "fp32_dim1_batch4"
max_batch_size: 4
backend: "identity"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/invalid_input_map/fp32_dim1_batch4_input4/config.pbtxt
================================================
name: "fp32_dim1_batch4_input4"
max_batch_size: 4
backend: "identity"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT2"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT3"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT2"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT3"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/invalid_input_map/fp32_dim1_batch4_output3/config.pbtxt
================================================
name: "fp32_dim1_batch4_output3"
max_batch_size: 4
backend: "identity"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT2"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT2"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/invalid_input_map/invalid_input_map/config.pbtxt
================================================
name: "invalid_input_map"
max_batch_size: 2
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "fp32_dim1_batch4"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "data"
      }
      output_map {
        key: "OUTPUT0"
        value: "temp_tensor_4"
      }
    },
    {
      model_name: "fp32_dim1_batch4"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "temp_tensor_4"
      }
      output_map {
        key: "OUTPUT0"
        value: "temp_tensor_5"
      }
    },
    {
      model_name: "fp32_dim1_batch4_output3"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "data"
      }
      input_map {
        key: "INPUT1"
        value: "data"
      }
      input_map {
        key: "INPUT2"
        value: "data"
      }
      output_map {
        key: "OUTPUT0"
        value: "temp_tensor_1"
      }
      output_map {
        key: "OUTPUT1"
        value: "temp_tensor_2"
      }
      output_map {
        key: "OUTPUT2"
        value: "temp_tensor_3"
      }
    },
    {
      model_name: "fp32_dim1_batch4_input4"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "temp_tensor_1"
      }
      input_map {
        key: "INPUT1"
        value: "temp_tensor_2"
      }
      input_map {
        key: "INPUT2"
        value: "temp_tensor_3"
      }
      input_map {
        key: "INPUT3"
        value: "temp_tensor_4"
      }
      input_map {
        key: "invalid_input"
        value: "temp_tensor_5"
      }
      output_map {
        key: "OUTPUT0"
        value: "prob"
      }
    }
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/invalid_output_map/expected
================================================
in ensemble invalid_output_map, ensemble tensor temp_tensor_2 is mapped from non-existing output invalid_output in model fp32_dim1_batch4_output3

================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/invalid_output_map/fp32_dim1_batch4/config.pbtxt
================================================
name: "fp32_dim1_batch4"
max_batch_size: 4
backend: "identity"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/invalid_output_map/fp32_dim1_batch4_input4/config.pbtxt
================================================
name: "fp32_dim1_batch4_input4"
max_batch_size: 4
backend: "identity"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT2"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT3"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT2"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT3"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/invalid_output_map/fp32_dim1_batch4_output3/config.pbtxt
================================================
name: "fp32_dim1_batch4_output3"
max_batch_size: 4
backend: "identity"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT2"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT2"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/invalid_output_map/invalid_output_map/config.pbtxt
================================================
name: "invalid_output_map"
max_batch_size: 2
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "fp32_dim1_batch4"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "data"
      }
      output_map {
        key: "OUTPUT0"
        value: "temp_tensor_4"
      }
    },
    {
      model_name: "fp32_dim1_batch4_output3"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "data"
      }
      input_map {
        key: "INPUT1"
        value: "data"
      }
      input_map {
        key: "INPUT2"
        value: "data"
      }
      output_map {
        key: "OUTPUT0"
        value: "temp_tensor_1"
      }
      output_map {
        key: "invalid_output"
        value: "temp_tensor_2"
      }
    },
    {
      model_name: "fp32_dim1_batch4_input4"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "temp_tensor_1"
      }
      input_map {
        key: "INPUT1"
        value: "temp_tensor_2"
      }
      input_map {
        key: "INPUT2"
        value: "temp_tensor_1"
      }
      input_map {
        key: "INPUT3"
        value: "temp_tensor_4"
      }
      output_map {
        key: "OUTPUT0"
        value: "prob"
      }
    }
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/model_warm_up_set/config.pbtxt
================================================
name: "model_warmup_set"
max_batch_size: 8
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "model_a"
      model_version: -1
      input_map {
        key: "model_a_input"
        value: "data"
      }
      output_map {
        key: "model_a_output"
        value: "prob"
      }
    }
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [ 1, 28, 28 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 10, 1, 1 ]
  }
]
model_warmup [{}]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/model_warm_up_set/expected
================================================
model_warmup can not be specified for ensemble 'model_warmup_set'

================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/no_input_map/config.pbtxt
================================================
name: "no_input_map"
max_batch_size: 8
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "model_a"
      model_version: -1
      input_map {
        key: "model_a_input"
        value: "data"
      }
      output_map {
        key: "model_a_output"
        value: "temp_1"
      }
    },
    {
      model_name: "model_b"
      model_version: -1
      output_map {
        key: "model_b_output"
        value: "temp_2"
      }
    },
    {
      model_name: "model_c"
      model_version: -1
      input_map {
        key: "model_c_input"
        value: "temp_2"
      }
      output_map {
        key: "model_c_output"
        value: "prob"
      }
    }
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [ 1, 28, 28 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 10, 1, 1 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/no_input_map/expected
================================================
must specify 'input_map' in step 1 of ensemble 'no_input_map'

================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/no_model_name/config.pbtxt
================================================
name: "no_model_name"
max_batch_size: 8
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "model_a"
      model_version: -1
      input_map {
        key: "model_a_input"
        value: "data"
      }
      output_map {
        key: "model_a_output"
        value: "temp_1"
      }
    },
    {
      input_map {
        key: "model_b_input"
        value: "temp_1"
      }
      output_map {
        key: "model_b_output"
        value: "temp_2"
      }
    },
    {
      model_name: "model_c"
      model_version: -1
      input_map {
        key: "model_c_input"
        value: "temp_2"
      }
      output_map {
        key: "model_c_output"
        value: "prob"
      }
    }
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [ 1, 28, 28 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 10, 1, 1 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/no_model_name/expected
================================================
must specify 'model_name' in step 1 of ensemble 'no_model_name'

================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/no_output_map/config.pbtxt
================================================
name: "no_output_map"
max_batch_size: 8
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "model_a"
      model_version: -1
      input_map {
        key: "model_a_input"
        value: "data"
      }
      output_map {
        key: "model_a_output"
        value: "temp_1"
      }
    },
    {
      model_name: "model_b"
      model_version: -1
      input_map {
        key: "model_b_input"
        value: "temp_1"
      }
    },
    {
      model_name: "model_c"
      model_version: -1
      input_map {
        key: "model_c_input"
        value: "temp_2"
      }
      output_map {
        key: "model_c_output"
        value: "prob"
      }
    }
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [ 1, 28, 28 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 10, 1, 1 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/no_output_map/expected
================================================
must specify 'output_map' in step 1 of ensemble 'no_output_map'

================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/no_required_version/expected
================================================
ensemble 'no_required_version' depends on 'simple' whose required version 2 is not loaded

================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/no_required_version/no_required_version/config.pbtxt
================================================
name: "no_required_version"
platform: "ensemble"
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
ensemble_scheduling {
  step [
    {
      model_name: "simple"
      model_version: 2
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT0"
      }
      output_map {
        key: "OUTPUT1"
        value: "OUTPUT1"
      }
    }
  ]
}


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/no_required_version/simple/config.pbtxt
================================================
name: "simple"
backend: "identity"
max_batch_size: 8
version_policy : { all {} }
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/no_required_version_2/expected
================================================
ensemble 'no_required_version_2' depends on 'simple' whose required version 2 is not loaded

================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/no_required_version_2/no_required_version_2/config.pbtxt
================================================
name: "no_required_version_2"
platform: "ensemble"
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
ensemble_scheduling {
  step [
    {
      model_name: "simple"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT0"
        value: "temp0"
      }
      output_map {
        key: "OUTPUT1"
        value: "temp1"
      }
    },
    {
      model_name: "simple"
      model_version: 2
      input_map {
        key: "INPUT0"
        value: "temp0"
      }
      input_map {
        key: "INPUT1"
        value: "temp1"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT0"
      }
      output_map {
        key: "OUTPUT1"
        value: "OUTPUT1"
      }
    }
  ]
}


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/no_required_version_2/simple/config.pbtxt
================================================
name: "simple"
backend: "identity"
max_batch_size: 8
version_policy : { all {} }
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/no_required_version_3/expected
================================================
ensemble 'no_required_version_3' depends on 'simple' whose required version 2 is not loaded

================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/no_required_version_3/no_required_version_3/config.pbtxt
================================================
name: "no_required_version_3"
platform: "ensemble"
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
ensemble_scheduling {
  step [
    {
      model_name: "simple"
      model_version: 2
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT0"
        value: "temp0"
      }
      output_map {
        key: "OUTPUT1"
        value: "temp1"
      }
    },
    {
      model_name: "simple"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "temp0"
      }
      input_map {
        key: "INPUT1"
        value: "temp1"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT0"
      }
      output_map {
        key: "OUTPUT1"
        value: "OUTPUT1"
      }
    }
  ]
}


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/no_required_version_3/simple/config.pbtxt
================================================
name: "simple"
backend: "identity"
max_batch_size: 8
version_policy : { all {} }
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/no_step/config.pbtxt
================================================
name: "no_step"
max_batch_size: 8
platform: "ensemble"
ensemble_scheduling {
  step [
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [ 1, 28, 28 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 10, 1, 1 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/no_step/expected
================================================
must specify 'step' for ensemble 'no_step'

================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/no_step_2/config.pbtxt
================================================
name: "no_step_2"
max_batch_size: 8
platform: "ensemble"
ensemble_scheduling {
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [ 1, 28, 28 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 10, 1, 1 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/no_step_2/expected
================================================
must specify 'step' for ensemble 'no_step_2'

================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/non_existing_model/expected
================================================
ensemble non_existing_model contains models that are not available or ambiguous: fp32_dim1_batch4_input4

================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/non_existing_model/fp32_dim1_batch4/config.pbtxt
================================================
name: "fp32_dim1_batch4"
max_batch_size: 4
backend: "identity"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/non_existing_model/fp32_dim1_batch4_output3/config.pbtxt
================================================
name: "fp32_dim1_batch4_output3"
max_batch_size: 4
backend: "identity"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT2"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT2"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/non_existing_model/non_existing_model/config.pbtxt
================================================
name: "non_existing_model"
max_batch_size: 2
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "fp32_dim1_batch4"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "data"
      }
      output_map {
        key: "OUTPUT0"
        value: "temp_tensor_4"
      }
    },
    {
      model_name: "fp32_dim1_batch4_output3"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "data"
      }
      input_map {
        key: "INPUT1"
        value: "data"
      }
      input_map {
        key: "INPUT2"
        value: "data"
      }
      output_map {
        key: "OUTPUT0"
        value: "temp_tensor_1"
      }
      output_map {
        key: "OUTPUT1"
        value: "temp_tensor_2"
      }
      output_map {
        key: "OUTPUT2"
        value: "temp_tensor_3"
      }
    },
    {
      model_name: "fp32_dim1_batch4_input4"
      model_version: -1
      input_map {
        key: "input1"
        value: "temp_tensor_1"
      }
      input_map {
        key: "input2"
        value: "temp_tensor_2"
      }
      input_map {
        key: "input3"
        value: "temp_tensor_3"
      }
      input_map {
        key: "input4"
        value: "temp_tensor_4"
      }
      output_map {
        key: "output"
        value: "prob"
      }
    }
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/optimization_set/config.pbtxt
================================================
name: "optimization_set"
max_batch_size: 8
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "model_a"
      model_version: -1
      input_map {
        key: "model_a_input"
        value: "data"
      }
      output_map {
        key: "model_a_output"
        value: "prob"
      }
    }
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [ 1, 28, 28 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 10, 1, 1 ]
  }
]
optimization {
  priority: PRIORITY_MAX
}


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/optimization_set/expected
================================================
optimization should not be specified for ensemble 'optimization_set'

================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/output_to_tensor_overmapped/config.pbtxt
================================================
name: "output_to_tensor_overmapped"
max_batch_size: 2
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "fp32_dim1_batch4"
      model_version: -1
      input_map {
        key: "input"
        value: "data"
      }
      output_map {
        key: "output"
        value: "temp_tensor_4"
      }
    },
    {
      model_name: "fp32_dim1_batch4"
      model_version: -1
      input_map {
        key: "input"
        value: "data"
      }
      output_map {
        key: "output"
        value: "temp_tensor_1"
      }
    },
    {
      model_name: "fp32_dim1_batch4_output3"
      model_version: -1
      input_map {
        key: "input"
        value: "data"
      }
      output_map {
        key: "output1"
        value: "temp_tensor_2"
      }
      output_map {
        key: "output2"
        value: "temp_tensor_2"
      }
      output_map {
        key: "output3"
        value: "temp_tensor_3"
      }
    },
    {
      model_name: "fp32_dim1_batch4_input4"
      model_version: -1
      input_map {
        key: "input1"
        value: "temp_tensor_1"
      }
      input_map {
        key: "input2"
        value: "temp_tensor_2"
      }
      input_map {
        key: "input3"
        value: "temp_tensor_3"
      }
      input_map {
        key: "input4"
        value: "temp_tensor_4"
      }
      output_map {
        key: "output"
        value: "prob"
      }
    }
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/output_to_tensor_overmapped/expected
================================================
ensemble tensor 'temp_tensor_2' can appear in an output map only once for ensemble 'output_to_tensor_overmapped'

================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/redundant_tensor_as_input/config.pbtxt
================================================
name: "redundant_tensor_as_input"
max_batch_size: 2
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "fp32_dim1_batch4"
      model_version: -1
      input_map {
        key: "input"
        value: "data"
      }
      output_map {
        key: "output"
        value: "temp_tensor_4"
      }
    },
    {
      model_name: "fp32_dim1_batch4"
      model_version: -1
      input_map {
        key: "input"
        value: "temp_tensor_5"
      }
      output_map {
        key: "output"
        value: "temp_tensor_6"
      }
    },
    {
      model_name: "fp32_dim1_batch4_output3"
      model_version: -1
      input_map {
        key: "input"
        value: "temp_tensor_4"
      }
      output_map {
        key: "output1"
        value: "temp_tensor_1"
      }
      output_map {
        key: "output2"
        value: "temp_tensor_2"
      }
      output_map {
        key: "output3"
        value: "temp_tensor_3"
      }
    },
    {
      model_name: "fp32_dim1_batch4_input4"
      model_version: -1
      input_map {
        key: "input1"
        value: "temp_tensor_1"
      }
      input_map {
        key: "input2"
        value: "temp_tensor_2"
      }
      input_map {
        key: "input3"
        value: "temp_tensor_3"
      }
      input_map {
        key: "input4"
        value: "temp_tensor_4"
      }
      output_map {
        key: "output"
        value: "prob"
      }
    }
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/redundant_tensor_as_input/expected
================================================
ensemble tensor 'temp_tensor_6' is unused in ensemble 'redundant_tensor_as_input'

================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/redundant_tensor_as_input/expected_2
================================================
ensemble tensor 'temp_tensor_5' is unused in ensemble 'redundant_tensor_as_input'

================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/redundant_tensor_as_output/config.pbtxt
================================================
name: "redundant_tensor_as_output"
max_batch_size: 2
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "fp32_dim1_batch4"
      model_version: -1
      input_map {
        key: "input"
        value: "data"
      }
      output_map {
        key: "output"
        value: "temp_tensor_1"
      }
    },
    {
      model_name: "fp32_dim1_batch4_output3"
      model_version: -1
      input_map {
        key: "input"
        value: "temp_tensor_1"
      }
      output_map {
        key: "output1"
        value: "prob"
      }
      output_map {
        key: "output2"
        value: "prob_2"
      }
      output_map {
        key: "output3"
        value: "temp_tensor_2"
      }
    }
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "prob_2"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/redundant_tensor_as_output/expected
================================================
ensemble tensor 'temp_tensor_2' is unused in ensemble 'redundant_tensor_as_output'

================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/self_circular_dependency/expected
================================================
circular dependency between ensembles: self_circular_dependency -> ... -> self_circular_dependency -> self_circular_dependency

================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/self_circular_dependency/fp32_dim1_batch4/config.pbtxt
================================================
name: "fp32_dim1_batch4"
max_batch_size: 4
backend: "identity"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/self_circular_dependency/fp32_dim1_batch4_input4/config.pbtxt
================================================
name: "fp32_dim1_batch4_input4"
max_batch_size: 4
backend: "identity"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT2"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT3"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT2"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT3"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/self_circular_dependency/fp32_dim1_batch4_output3/config.pbtxt
================================================
name: "fp32_dim1_batch4_output3"
max_batch_size: 4
backend: "identity"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT2"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT2"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/self_circular_dependency/self_circular_dependency/config.pbtxt
================================================
name: "self_circular_dependency"
max_batch_size: 2
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "fp32_dim1_batch4"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "data"
      }
      output_map {
        key: "OUTPUT0"
        value: "temp_tensor_4"
      }
    },
    {
      model_name: "fp32_dim1_batch4_output3"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "data"
      }
      input_map {
        key: "INPUT1"
        value: "data"
      }
      input_map {
        key: "INPUT2"
        value: "data"
      }
      output_map {
        key: "OUTPUT0"
        value: "temp_tensor_1"
      }
      output_map {
        key: "OUTPUT1"
        value: "temp_tensor_2"
      }
      output_map {
        key: "OUTPUT2"
        value: "temp_tensor_3"
      }
    },
    {
      model_name: "fp32_dim1_batch4_input4"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "temp_tensor_1"
      }
      input_map {
        key: "INPUT1"
        value: "temp_tensor_2"
      }
      input_map {
        key: "INPUT2"
        value: "temp_tensor_3"
      }
      input_map {
        key: "INPUT3"
        value: "temp_tensor_4"
      }
      output_map {
        key: "OUTPUT0"
        value: "temp_tensor_5"
      }
    },
    {
      model_name: "self_circular_dependency"
      model_version: -1
      input_map {
        key: "data"
        value: "temp_tensor_5"
      }
      output_map {
        key: "prob"
        value: "prob"
      }
    }
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/tensor_to_input_overmapped/config.pbtxt
================================================
name: "tensor_to_input_overmapped"
max_batch_size: 2
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "fp32_dim1_batch4"
      model_version: -1
      input_map {
        key: "input"
        value: "data"
      }
      output_map {
        key: "output"
        value: "temp_tensor_4"
      }
    },
    {
      model_name: "fp32_dim1_batch4"
      model_version: -1
      input_map {
        key: "input"
        value: "temp_tensor_4"
      }
      output_map {
        key: "output"
        value: "temp_tensor_5"
      }
    },
    {
      model_name: "fp32_dim1_batch4_output3"
      model_version: -1
      input_map {
        key: "input"
        value: "data"
      }
      output_map {
        key: "output1"
        value: "temp_tensor_1"
      }
      output_map {
        key: "output2"
        value: "temp_tensor_2"
      }
      output_map {
        key: "output3"
        value: "temp_tensor_3"
      }
    },
    {
      model_name: "fp32_dim1_batch4_input4"
      model_version: -1
      input_map {
        key: "input1"
        value: "temp_tensor_1"
      }
      input_map {
        key: "input2"
        value: "temp_tensor_2"
      }
      input_map {
        key: "input3"
        value: "temp_tensor_3"
      }
      input_map {
        key: "input4"
        value: "temp_tensor_5"
      }
      input_map {
        key: "input4"
        value: "temp_tensor_4"
      }
      output_map {
        key: "output"
        value: "prob"
      }
    }
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/tensor_to_input_overmapped/expected
================================================
ensemble tensor 'temp_tensor_5' is unused in ensemble 'tensor_to_input_overmapped'


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/unmapped_input/expected
================================================
in ensemble unmapped_input, input INPUT0 in model fp32_dim1_batch4_input4 is not mapped to any ensemble tensors

================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/unmapped_input/fp32_dim1_batch4/config.pbtxt
================================================
name: "fp32_dim1_batch4"
max_batch_size: 4
backend: "identity"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/unmapped_input/fp32_dim1_batch4_input4/config.pbtxt
================================================
name: "fp32_dim1_batch4_input4"
max_batch_size: 4
backend: "identity"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT2"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT3"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT2"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT3"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/unmapped_input/fp32_dim1_batch4_output3/config.pbtxt
================================================
name: "fp32_dim1_batch4_output3"
max_batch_size: 4
backend: "identity"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT2"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT2"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/unmapped_input/unmapped_input/config.pbtxt
================================================
name: "unmapped_input"
max_batch_size: 2
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "fp32_dim1_batch4_output3"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "data"
      }
      input_map {
        key: "INPUT1"
        value: "data"
      }
      input_map {
        key: "INPUT2"
        value: "data"
      }
      output_map {
        key: "OUTPUT0"
        value: "temp_tensor_2"
      }
      output_map {
        key: "OUTPUT1"
        value: "temp_tensor_3"
      }
      output_map {
        key: "OUTPUT2"
        value: "temp_tensor_4"
      }
    },
    {
      model_name: "fp32_dim1_batch4_input4"
      model_version: -1
      input_map {
        key: "INPUT1"
        value: "temp_tensor_2"
      }
      input_map {
        key: "INPUT2"
        value: "temp_tensor_3"
      }
      input_map {
        key: "INPUT3"
        value: "temp_tensor_4"
      }
      output_map {
        key: "OUTPUT0"
        value: "prob"
      }
    }
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/unreachable_input/config.pbtxt
================================================
name: "unreachable_input"
max_batch_size: 2
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "fp32_dim1_batch4"
      model_version: -1
      input_map {
        key: "input"
        value: "data"
      }
      output_map {
        key: "output"
        value: "temp_tensor_4"
      }
    },
    {
      model_name: "fp32_dim1_batch4_output3"
      model_version: -1
      input_map {
        key: "input"
        value: "data"
      }
      output_map {
        key: "output1"
        value: "temp_tensor_1"
      }
      output_map {
        key: "output2"
        value: "temp_tensor_2"
      }
      output_map {
        key: "output3"
        value: "temp_tensor_3"
      }
    },
    {
      model_name: "fp32_dim1_batch4_input4"
      model_version: -1
      input_map {
        key: "input1"
        value: "temp_tensor_1"
      }
      input_map {
        key: "input2"
        value: "temp_tensor_2"
      }
      input_map {
        key: "input3"
        value: "temp_tensor_3"
      }
      input_map {
        key: "input4"
        value: "temp_tensor_4"
      }
      output_map {
        key: "output"
        value: "prob"
      }
    }
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "data_2"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/unreachable_input/expected
================================================
ensemble input 'data_2' for ensemble unreachable_input' is not used

================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/unreachable_output/config.pbtxt
================================================
name: "unreachable_output"
max_batch_size: 2
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "fp32_dim1_batch4"
      model_version: -1
      input_map {
        key: "input"
        value: "data"
      }
      output_map {
        key: "output"
        value: "temp_tensor_4"
      }
    },
    {
      model_name: "fp32_dim1_batch4_output3"
      model_version: -1
      input_map {
        key: "input"
        value: "data"
      }
      output_map {
        key: "output1"
        value: "temp_tensor_1"
      }
      output_map {
        key: "output2"
        value: "temp_tensor_2"
      }
      output_map {
        key: "output3"
        value: "temp_tensor_3"
      }
    },
    {
      model_name: "fp32_dim1_batch4_input4"
      model_version: -1
      input_map {
        key: "input1"
        value: "temp_tensor_1"
      }
      input_map {
        key: "input2"
        value: "temp_tensor_2"
      }
      input_map {
        key: "input3"
        value: "temp_tensor_3"
      }
      input_map {
        key: "input4"
        value: "temp_tensor_4"
      }
      output_map {
        key: "output"
        value: "prob"
      }
    }
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "prob_2"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/unreachable_output/expected
================================================
ensemble output 'prob_2' for ensemble unreachable_output' is not used

================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/unreachable_output_2/config.pbtxt
================================================
name: "unreachable_output_2"
max_batch_size: 2
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "fp32_dim1_batch4"
      model_version: -1
      input_map {
        key: "input"
        value: "data"
      }
      output_map {
        key: "output"
        value: "temp_tensor_4"
      }
    },
    {
      model_name: "fp32_dim1_batch4"
      model_version: -1
      input_map {
        key: "input"
        value: "prob_2"
      }
      output_map {
        key: "output"
        value: "prob_2"
      }
    },
    {
      model_name: "fp32_dim1_batch4_output3"
      model_version: -1
      input_map {
        key: "input"
        value: "data"
      }
      output_map {
        key: "output1"
        value: "temp_tensor_1"
      }
      output_map {
        key: "output2"
        value: "temp_tensor_2"
      }
      output_map {
        key: "output3"
        value: "temp_tensor_3"
      }
    },
    {
      model_name: "fp32_dim1_batch4_input4"
      model_version: -1
      input_map {
        key: "input1"
        value: "temp_tensor_1"
      }
      input_map {
        key: "input2"
        value: "temp_tensor_2"
      }
      input_map {
        key: "input3"
        value: "temp_tensor_3"
      }
      input_map {
        key: "input4"
        value: "temp_tensor_4"
      }
      output_map {
        key: "output"
        value: "prob"
      }
    }
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "prob_2"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/unreachable_output_2/expected
================================================
output 'prob_2' for ensemble 'unreachable_output_2' is not written

================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/unreachable_output_3/config.pbtxt
================================================
name: "unreachable_output_3"
max_batch_size: 2
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "fp32_dim1_batch4"
      model_version: -1
      input_map {
        key: "input"
        value: "data"
      }
      output_map {
        key: "output"
        value: "temp_tensor_4"
      }
    },
    {
      model_name: "fp32_dim1_batch4"
      model_version: -1
      input_map {
        key: "input"
        value: "not_written_tensor"
      }
      output_map {
        key: "output"
        value: "prob_2"
      }
    },
    {
      model_name: "fp32_dim1_batch4_output3"
      model_version: -1
      input_map {
        key: "input"
        value: "data"
      }
      output_map {
        key: "output1"
        value: "temp_tensor_1"
      }
      output_map {
        key: "output2"
        value: "temp_tensor_2"
      }
      output_map {
        key: "output3"
        value: "temp_tensor_3"
      }
    },
    {
      model_name: "fp32_dim1_batch4_input4"
      model_version: -1
      input_map {
        key: "input1"
        value: "temp_tensor_1"
      }
      input_map {
        key: "input2"
        value: "temp_tensor_2"
      }
      input_map {
        key: "input3"
        value: "temp_tensor_3"
      }
      input_map {
        key: "input4"
        value: "temp_tensor_4"
      }
      output_map {
        key: "output"
        value: "prob"
      }
    }
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "prob_2"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/ensemble/unreachable_output_3/expected
================================================
output 'prob_2' for ensemble 'unreachable_output_3' is not written: at least one of its depending tensors, 'not_written_tensor', is not connected

================================================
FILE: qa/L0_model_config/autofill_noplatform/onnx/bad_input_dims/1/model.onnx
================================================
TRTIS:

INPUT0_INPUT0"Identity

INPUT1_INPUT1"Identity

_INPUT0
_INPUT1CAST0"Add

_INPUT0
_INPUT1CAST1"Sub
!
CAST0OUTPUT0"Cast*	
to
!
CAST1OUTPUT1"Cast*	
toonnx_int32_int8_int8Z
INPUT0

var_0
Z
INPUT1

var_0
b
OUTPUT0

var_1
b
OUTPUT1

var_2
B


================================================
FILE: qa/L0_model_config/autofill_noplatform/onnx/bad_input_dims/config.pbtxt
================================================
max_batch_size: 1
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16, 1 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT8
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_INT8
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/onnx/bad_input_dims/expected
================================================
model 'bad_input_dims', tensor 'INPUT0': the model expects 2 dimensions (shape \[-1,16\]) but the model configuration specifies 3 dimensions (an initial batch dimension because max_batch_size > 0 followed by the explicit tensor shape, making complete shape \[-1,16,1\])

================================================
FILE: qa/L0_model_config/autofill_noplatform/onnx/bad_max_batch_size/1/model.onnx
================================================
triton:

INPUT0_INPUT0"Identity

INPUT1_INPUT1"Identity

_INPUT0
_INPUT1CAST0"Add

_INPUT0
_INPUT1CAST1"Sub
!
CAST0OUTPUT0"Cast*	
to
!
CAST1OUTPUT1"Cast*	
toonnx_nobatch_int32_int8_int8Z
INPUT0


Z
INPUT1


b
OUTPUT0


b
OUTPUT1


B

================================================
FILE: qa/L0_model_config/autofill_noplatform/onnx/bad_max_batch_size/config.pbtxt
================================================
max_batch_size: 1
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/onnx/bad_max_batch_size/expected
================================================
autofill failed for model 'bad_max_batch_size': model does not support batching while non-zero max_batch_size is specified

================================================
FILE: qa/L0_model_config/autofill_noplatform/onnx/bad_output_dims/1/model.onnx
================================================
TRTIS:

INPUT0_INPUT0"Identity

INPUT1_INPUT1"Identity

_INPUT0
_INPUT1CAST0"Add

_INPUT0
_INPUT1CAST1"Sub
!
CAST0OUTPUT0"Cast*	
to
!
CAST1OUTPUT1"Cast*	
toonnx_int32_int8_int8Z
INPUT0

var_0
Z
INPUT1

var_0
b
OUTPUT0

var_1
b
OUTPUT1

var_2
B


================================================
FILE: qa/L0_model_config/autofill_noplatform/onnx/bad_output_dims/config.pbtxt
================================================
max_batch_size: 1
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT8
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_INT8
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/onnx/bad_output_dims/expected
================================================
model 'bad_output_dims', tensor 'OUTPUT1': the model expects 2 dimensions (shape \[-1,16\]) but the model configuration specifies 2 dimensions (an initial batch dimension because max_batch_size > 0 followed by the explicit tensor shape, making complete shape \[-1,1\])


================================================
FILE: qa/L0_model_config/autofill_noplatform/onnx/too_few_inputs/1/model.onnx
================================================
TRTIS:

INPUT0_INPUT0"Identity

INPUT1_INPUT1"Identity

_INPUT0
_INPUT1CAST0"Add

_INPUT0
_INPUT1CAST1"Sub
!
CAST0OUTPUT0"Cast*	
to
!
CAST1OUTPUT1"Cast*	
toonnx_int32_int8_int8Z
INPUT0

var_0
Z
INPUT1

var_0
b
OUTPUT0

var_1
b
OUTPUT1

var_2
B


================================================
FILE: qa/L0_model_config/autofill_noplatform/onnx/too_few_inputs/config.pbtxt
================================================
max_batch_size: 1
input [
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT8
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_INT8
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/onnx/too_few_inputs/expected
================================================
unable to load model 'too_few_inputs', configuration expects 1 inputs, model provides 2

================================================
FILE: qa/L0_model_config/autofill_noplatform/onnx/too_many_inputs/1/model.onnx
================================================
TRTIS:

INPUT0_INPUT0"Identity

INPUT1_INPUT1"Identity

_INPUT0
_INPUT1CAST0"Add

_INPUT0
_INPUT1CAST1"Sub
!
CAST0OUTPUT0"Cast*	
to
!
CAST1OUTPUT1"Cast*	
toonnx_int32_int8_int8Z
INPUT0

var_0
Z
INPUT1

var_0
b
OUTPUT0

var_1
b
OUTPUT1

var_2
B


================================================
FILE: qa/L0_model_config/autofill_noplatform/onnx/too_many_inputs/config.pbtxt
================================================
max_batch_size: 1
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "INPUT_EXTRA"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT8
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_INT8
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/onnx/too_many_inputs/expected
================================================
unable to load model 'too_many_inputs', configuration expects 3 inputs, model provides 2

================================================
FILE: qa/L0_model_config/autofill_noplatform/onnx/unknown_input/1/model.onnx
================================================
TRTIS:

INPUT0_INPUT0"Identity

INPUT1_INPUT1"Identity

_INPUT0
_INPUT1CAST0"Add

_INPUT0
_INPUT1CAST1"Sub
!
CAST0OUTPUT0"Cast*	
to
!
CAST1OUTPUT1"Cast*	
toonnx_int32_int8_int8Z
INPUT0

var_0
Z
INPUT1

var_0
b
OUTPUT0

var_1
b
OUTPUT1

var_2
B


================================================
FILE: qa/L0_model_config/autofill_noplatform/onnx/unknown_input/config.pbtxt
================================================
max_batch_size: 1
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "INPUT_UNKNOWN"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT8
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_INT8
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/onnx/unknown_input/expected
================================================
unexpected inference input 'INPUT_UNKNOWN', allowed inputs are: INPUT0, INPUT1

================================================
FILE: qa/L0_model_config/autofill_noplatform/onnx/unknown_output/1/model.onnx
================================================
TRTIS:

INPUT0_INPUT0"Identity

INPUT1_INPUT1"Identity

_INPUT0
_INPUT1CAST0"Add

_INPUT0
_INPUT1CAST1"Sub
!
CAST0OUTPUT0"Cast*	
to
!
CAST1OUTPUT1"Cast*	
toonnx_int32_int8_int8Z
INPUT0

var_0
Z
INPUT1

var_0
b
OUTPUT0

var_1
b
OUTPUT1

var_2
B


================================================
FILE: qa/L0_model_config/autofill_noplatform/onnx/unknown_output/config.pbtxt
================================================
max_batch_size: 1
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT_UNKNOWN"
    data_type: TYPE_INT8
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/onnx/unknown_output/expected
================================================
unexpected inference output 'OUTPUT_UNKNOWN', allowed outputs are: OUTPUT0, OUTPUT1

================================================
FILE: qa/L0_model_config/autofill_noplatform/openvino/bad_input_dims/config.pbtxt
================================================
input {
  name: "Func/PartitionedCall/input/_0:0"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
input {
  name: "input1"
  data_type: TYPE_INT32
  dims: 1
  dims: 256
}


================================================
FILE: qa/L0_model_config/autofill_noplatform/openvino/bad_input_dims/expected
================================================
model 'bad_input_dims', tensor 'input1': the model expects 2 dimensions (shape \[1,4\]) but the model configuration specifies 2 dimensions (shape \[1,256\])

================================================
FILE: qa/L0_model_config/autofill_noplatform/openvino/bad_output_dims/config.pbtxt
================================================
output {
  name: "Func/PartitionedCall/output/_2:0"
  data_type: TYPE_INT32
  dims: 1
  dims: 128
}
output {
  name: "Func/PartitionedCall/output/_3:0"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}


================================================
FILE: qa/L0_model_config/autofill_noplatform/openvino/bad_output_dims/expected
================================================
model 'bad_output_dims', tensor 'Func/PartitionedCall/output/_2:0': the model expects 2 dimensions (shape \[1,4\]) but the model configuration specifies 2 dimensions (shape \[1,128\])

================================================
FILE: qa/L0_model_config/autofill_noplatform/openvino/too_few_inputs/config.pbtxt
================================================
input {
  name: "input1"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}


================================================
FILE: qa/L0_model_config/autofill_noplatform/openvino/too_few_inputs/expected
================================================
unable to load model 'too_few_inputs', configuration expects 1 inputs, model provides 2

================================================
FILE: qa/L0_model_config/autofill_noplatform/openvino/too_many_inputs/config.pbtxt
================================================
input {
  name: "Func/PartitionedCall/input/_0:0"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
input {
  name: "input1"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
input {
  name: "input_extra"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}


================================================
FILE: qa/L0_model_config/autofill_noplatform/openvino/too_many_inputs/expected
================================================
unable to load model 'too_many_inputs', configuration expects 3 inputs, model provides 2

================================================
FILE: qa/L0_model_config/autofill_noplatform/openvino/unknown_input/config.pbtxt
================================================
input {
  name: "Func/PartitionedCall/input/_0:0"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
input {
  name: "unknown_input"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
output {
  name: "Func/PartitionedCall/output/_2:0"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
output {
  name: "Func/PartitionedCall/output/_3:0"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}


================================================
FILE: qa/L0_model_config/autofill_noplatform/openvino/unknown_input/expected
================================================
unexpected inference input 'unknown_input', allowed inputs are: Func/PartitionedCall/input/_0:0, input1

================================================
FILE: qa/L0_model_config/autofill_noplatform/openvino/unknown_output/config.pbtxt
================================================
input {
  name: "Func/PartitionedCall/input/_0:0"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
input {
  name: "input1"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
output {
  name: "unknown_output"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}


================================================
FILE: qa/L0_model_config/autofill_noplatform/openvino/unknown_output/expected
================================================
unexpected inference output 'unknown_output', allowed outputs are: Func/PartitionedCall/output/_2:0, Func/PartitionedCall/output/_3:0

================================================
FILE: qa/L0_model_config/autofill_noplatform/python/conflicting_max_batch_size/config.pbtxt
================================================
name: "conflicting_max_batch_size"
max_batch_size: 6

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/conflicting_max_batch_size/expected
================================================
configuration specified max_batch_size 6, but in auto-complete-config function for model 'conflicting_max_batch_size' specified max_batch_size 4


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/conflicting_max_batch_size/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


class TritonPythonModel:
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}

        auto_complete_model_config.set_max_batch_size(4)
        auto_complete_model_config.add_input(input0)
        auto_complete_model_config.add_input(input1)
        auto_complete_model_config.add_output(output0)
        auto_complete_model_config.add_output(output1)

        return auto_complete_model_config

    def execute(self, requests):
        pass


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/conflicting_scheduler_sequence/config.pbtxt
================================================
name: "conflicting_scheduler_sequence"

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
sequence_batching: {
}

================================================
FILE: qa/L0_model_config/autofill_noplatform/python/conflicting_scheduler_sequence/expected
================================================
Configuration specified scheduling_choice as 'sequence_batching', but auto-complete-config function for model 'conflicting_scheduler_sequence' tries to set scheduling_choice as 'dynamic_batching'

================================================
FILE: qa/L0_model_config/autofill_noplatform/python/conflicting_scheduler_sequence/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


class TritonPythonModel:
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}

        auto_complete_model_config.set_max_batch_size(4)
        auto_complete_model_config.set_dynamic_batching()
        auto_complete_model_config.add_input(input0)
        auto_complete_model_config.add_input(input1)
        auto_complete_model_config.add_output(output0)
        auto_complete_model_config.add_output(output1)

        return auto_complete_model_config

    def execute(self, requests):
        pass


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/input_mismatch_datatype/config.pbtxt
================================================
name: "input_mismatch_datatype"

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 4 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/input_mismatch_datatype/expected
================================================
unable to load model 'input_mismatch_datatype', configuration expects datatype TYPE_INT32 for input 'INPUT1', model provides TYPE_FP32


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/input_mismatch_dims/config.pbtxt
================================================
name: "input_mismatch_dims"

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/input_mismatch_dims/expected
================================================
model 'input_mismatch_dims', tensor 'INPUT1': the model expects dims \[4\] but the model configuration specifies dims \[16\]


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/input_missing_datatype/config.pbtxt
================================================
name: "input_missing_datatype"

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/input_missing_datatype/expected
================================================
input 'INPUT0' in auto-complete-config function for model 'input_missing_datatype' is missing 'data_type' property.


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/input_missing_datatype/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


class TritonPythonModel:
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        input0 = {"name": "INPUT0", "dims": [4]}
        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}

        auto_complete_model_config.set_max_batch_size(0)
        auto_complete_model_config.add_input(input0)
        auto_complete_model_config.add_input(input1)
        auto_complete_model_config.add_output(output0)
        auto_complete_model_config.add_output(output1)

        return auto_complete_model_config

    def execute(self, requests):
        pass


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/input_missing_dims/config.pbtxt
================================================
name: "input_missing_dims"

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/input_missing_dims/expected
================================================
input 'INPUT1' in auto-complete-config function for model 'input_missing_dims' is missing 'dims' property.


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/input_missing_dims/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


class TritonPythonModel:
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32"}
        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}

        auto_complete_model_config.set_max_batch_size(0)
        auto_complete_model_config.add_input(input0)
        auto_complete_model_config.add_input(input1)
        auto_complete_model_config.add_output(output0)
        auto_complete_model_config.add_output(output1)

        return auto_complete_model_config

    def execute(self, requests):
        pass


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/input_missing_name/config.pbtxt
================================================
name: "input_missing_name"

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/input_missing_name/expected
================================================
input in auto-complete-config function for model 'input_missing_name' is missing 'name' property.


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/input_missing_name/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


class TritonPythonModel:
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        input1 = {"data_type": "TYPE_FP32", "dims": [4]}
        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}

        auto_complete_model_config.set_max_batch_size(0)
        auto_complete_model_config.add_input(input0)
        auto_complete_model_config.add_input(input1)
        auto_complete_model_config.add_output(output0)
        auto_complete_model_config.add_output(output1)

        return auto_complete_model_config

    def execute(self, requests):
        pass


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/input_wrong_property/config.pbtxt
================================================
name: "input_wrong_property"

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/input_wrong_property/expected
================================================
input 'INPUT1' in auto-complete-config function for model 'input_wrong_property' contains property other than 'name', 'data_type', 'dims' and 'optional'.


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/input_wrong_property/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


class TritonPythonModel:
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        input1 = {
            "name": "INPUT1",
            "data_type": "TYPE_FP32",
            "dims": [4],
            "is_shape_tensor:": True,
        }
        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}

        auto_complete_model_config.set_max_batch_size(0)
        auto_complete_model_config.add_input(input0)
        auto_complete_model_config.add_input(input1)
        auto_complete_model_config.add_output(output0)
        auto_complete_model_config.add_output(output1)

        return auto_complete_model_config

    def execute(self, requests):
        pass


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/model_transaction_policy_invalid_args/config.pbtxt
================================================
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/model_transaction_policy_invalid_args/expected
================================================
model transaction property in auto-complete-config function for model 'model_transaction_policy_invalid_args' contains property other than 'decoupled'


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/model_transaction_policy_invalid_args/model.py
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


class TritonPythonModel:
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}
        transaction_policy = {"invalid": "argument"}

        auto_complete_model_config.set_max_batch_size(4)
        auto_complete_model_config.set_model_transaction_policy(transaction_policy)
        auto_complete_model_config.add_input(input0)
        auto_complete_model_config.add_input(input1)
        auto_complete_model_config.add_output(output0)
        auto_complete_model_config.add_output(output1)

        return auto_complete_model_config

    def execute(self, requests):
        pass


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/model_transaction_policy_mismatch/config.pbtxt
================================================
model_transaction_policy {
  decoupled: false
}

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/model_transaction_policy_mismatch/expected
================================================
trying to change decoupled property in auto-complete-config for model 'model_transaction_policy_mismatch', which is already set to 'False'


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/model_transaction_policy_mismatch/model.py
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


class TritonPythonModel:
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}

        auto_complete_model_config.set_max_batch_size(4)
        auto_complete_model_config.set_model_transaction_policy(dict(decoupled=True))
        auto_complete_model_config.add_input(input0)
        auto_complete_model_config.add_input(input1)
        auto_complete_model_config.add_output(output0)
        auto_complete_model_config.add_output(output1)

        return auto_complete_model_config

    def execute(self, requests):
        pass


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/no_return/config.pbtxt
================================================
name: "no_return"

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/no_return/expected
================================================
auto_complete_config function in model 'no_return' must return a valid pb.ModelConfig object.


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/no_return/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


class TritonPythonModel:
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}

        auto_complete_model_config.set_max_batch_size(0)
        auto_complete_model_config.add_input(input0)
        auto_complete_model_config.add_input(input1)
        auto_complete_model_config.add_output(output0)
        auto_complete_model_config.add_output(output1)

    def execute(self, requests):
        pass


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/output_mismatch_datatype/config.pbtxt
================================================
name: "output_mismatch_datatype"

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 4 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/output_mismatch_datatype/expected
================================================
unable to load model 'output_mismatch_datatype', configuration expects datatype TYPE_INT32 for output 'OUTPUT0', model provides TYPE_FP32


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/output_mismatch_dims/config.pbtxt
================================================
name: "output_mismatch_dims"

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/output_mismatch_dims/expected
================================================
model 'output_mismatch_dims', tensor 'OUTPUT1': the model expects dims \[4\] but the model configuration specifies dims \[16\]


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/output_missing_datatype/config.pbtxt
================================================
name: "output_missing_datatype"

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/output_missing_datatype/expected
================================================
output 'OUTPUT0' in auto-complete-config function for model 'output_missing_datatype' is missing 'data_type' property.


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/output_missing_datatype/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


class TritonPythonModel:
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
        output0 = {"name": "OUTPUT0", "dims": [4]}
        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}

        auto_complete_model_config.set_max_batch_size(0)
        auto_complete_model_config.add_input(input0)
        auto_complete_model_config.add_input(input1)
        auto_complete_model_config.add_output(output0)
        auto_complete_model_config.add_output(output1)

        return auto_complete_model_config

    def execute(self, requests):
        pass


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/output_missing_dims/config.pbtxt
================================================
name: "output_missing_dims"

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/output_missing_dims/expected
================================================
output 'OUTPUT1' in auto-complete-config function for model 'output_missing_dims' is missing 'dims' property.


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/output_missing_dims/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


class TritonPythonModel:
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32"}

        auto_complete_model_config.set_max_batch_size(0)
        auto_complete_model_config.add_input(input0)
        auto_complete_model_config.add_input(input1)
        auto_complete_model_config.add_output(output0)
        auto_complete_model_config.add_output(output1)

        return auto_complete_model_config

    def execute(self, requests):
        pass


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/output_missing_name/config.pbtxt
================================================
name: "output_missing_name"

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/output_missing_name/expected
================================================
output in auto-complete-config function for model 'output_missing_name' is missing 'name' property.


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/output_missing_name/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


class TritonPythonModel:
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
        output0 = {"data_type": "TYPE_FP32", "dims": [4]}
        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}

        auto_complete_model_config.set_max_batch_size(0)
        auto_complete_model_config.add_input(input0)
        auto_complete_model_config.add_input(input1)
        auto_complete_model_config.add_output(output0)
        auto_complete_model_config.add_output(output1)

        return auto_complete_model_config

    def execute(self, requests):
        pass


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/output_wrong_property/config.pbtxt
================================================
name: "output_wrong_property"

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/output_wrong_property/expected
================================================
output 'OUTPUT1' in auto-complete-config function for model 'output_wrong_property' contains property other than 'name', 'data_type' and 'dims'.


================================================
FILE: qa/L0_model_config/autofill_noplatform/python/output_wrong_property/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


class TritonPythonModel:
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        output1 = {
            "name": "OUTPUT1",
            "data_type": "TYPE_FP32",
            "dims": [4],
            "is_shape_tensor:": True,
        }

        auto_complete_model_config.set_max_batch_size(0)
        auto_complete_model_config.add_input(input0)
        auto_complete_model_config.add_input(input1)
        auto_complete_model_config.add_output(output0)
        auto_complete_model_config.add_output(output1)

        return auto_complete_model_config

    def execute(self, requests):
        pass


================================================
FILE: qa/L0_model_config/autofill_noplatform/pytorch/too_few_inputs/config.pbtxt
================================================
max_batch_size: 1
output [
  {
    name: "OUTPUT__0"
    data_type: TYPE_INT8
    dims: [ 16 ]
  },
  {
    name: "OUTPUT__1"
    data_type: TYPE_INT8
    dims: [ 16 ]
  }
]
backend: "pytorch"


================================================
FILE: qa/L0_model_config/autofill_noplatform/pytorch/too_few_inputs/expected
================================================
unable to load model 'too_few_inputs', configuration expects 0 inputs, model provides 2

================================================
FILE: qa/L0_model_config/autofill_noplatform/pytorch/too_few_outputs/config.pbtxt
================================================
max_batch_size: 1
input [
  {
    name: "INPUT__0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "INPUT__1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
backend: "pytorch"


================================================
FILE: qa/L0_model_config/autofill_noplatform/pytorch/too_few_outputs/expected
================================================
model configuration must contain at least one output, none were specified

================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/bad_dynamic_shapes_max/config.pbtxt
================================================
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 33 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 33 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 33 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 33 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/bad_dynamic_shapes_max/expected
================================================
model configuration specified invalid shape for input 'INPUT0' for model bad_dynamic_shapes_max. Error details: model expected the shape of dimension 1 to be between 4 and 32 but received 33


================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/bad_dynamic_shapes_min/config.pbtxt
================================================
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 3 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 3 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 3 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 3 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/bad_dynamic_shapes_min/expected
================================================
model configuration specified invalid shape for input 'INPUT0' for model bad_dynamic_shapes_min. Error details: model expected the shape of dimension 1 to be between 4 and 32 but received 3


================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_dims/config.pbtxt
================================================
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 7 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_dims/expected
================================================
model 'bad_input_dims', tensor 'INPUT1': the model expects 2 dimensions (shape \[-1,16\]) but the model configuration specifies 2 dimensions (an initial batch dimension because max_batch_size > 0 followed by the explicit tensor shape, making complete shape \[-1,7\])

================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_non_linear_format_io/config.pbtxt
================================================
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
    is_non_linear_format_io: true
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_non_linear_format_io/expected
================================================
'INPUT0' uses a linear IO format, but 'is_non_linear_format_io' is incorrectly set to true in the model configuration.


================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_shape/config.pbtxt
================================================

max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16, 1 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_shape/expected
================================================
unable to autofill for 'bad_input_shape', model tensor configurations are contradicting each other in terms of whether batching is supported

================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_shape_tensor/config.pbtxt
================================================
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
    is_shape_tensor: true
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_shape_tensor/expected
================================================
'INPUT0' is incorrectly specified as a shape tensor.


================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_type/config.pbtxt
================================================
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP16
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_type/expected
================================================
unexpected datatype TYPE_FP32 for inference input 'INPUT0', expecting TYPE_FP16 for bad_input_type


================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/bad_output_dims/config.pbtxt
================================================
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 7 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/bad_output_dims/expected
================================================
model 'bad_output_dims', tensor 'OUTPUT1': the model expects 2 dimensions (shape \[-1,16\]) but the model configuration specifies 2 dimensions (an initial batch dimension because max_batch_size > 0 followed by the explicit tensor shape, making complete shape \[-1,7\])

================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/bad_output_shape/config.pbtxt
================================================
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16, 1]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/bad_output_shape/expected
================================================
unable to autofill for 'bad_output_shape', model tensor configurations are contradicting each other in terms of whether batching is supported

================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/bad_output_shape_tensor/config.pbtxt
================================================
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
    is_shape_tensor: true
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/bad_output_shape_tensor/expected
================================================
'OUTPUT1' is incorrectly specified as a shape tensor.


================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/bad_output_type/config.pbtxt
================================================
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_INT8
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/bad_output_type/expected
================================================
unexpected datatype TYPE_FP32 for inference output 'OUTPUT1', expecting TYPE_INT8 for bad_output_type


================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/bad_outut_non_linear_format_io/config.pbtxt
================================================
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
    is_non_linear_format_io: true
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/bad_outut_non_linear_format_io/expected
================================================
'OUTPUT1' uses a linear IO format, but 'is_non_linear_format_io' is incorrectly set to true in the model configuration.


================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/mixed_batch_hint_dims/config.pbtxt
================================================
input [
  {
    name: "DUMMY_INPUT0"
    data_type: TYPE_FP32
    dims: [ -1,-1 ]
  },
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 2 ]
  }
]
output [
  {
    name: "DUMMY_OUTPUT0"
    data_type: TYPE_FP32
    dims: [ -1,-1,-1 ]
  },
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 2 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/mixed_batch_hint_dims/expected
================================================
model tensor configurations are contradicting each other in terms of whether batching is supported


================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/mixed_batch_hint_shape_values/config.pbtxt
================================================
input [
  {
    name: "DUMMY_INPUT0"
    data_type: TYPE_FP32
    dims: [ -1,-1 ]
  },
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 3 ]
  }
]
output [
  {
    name: "DUMMY_OUTPUT0"
    data_type: TYPE_FP32
    dims: [ -1,-1 ]
  },
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 2 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/mixed_batch_hint_shape_values/expected
================================================
model tensor configurations are contradicting each other in terms of whether batching is supported


================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/too_few_inputs/config.pbtxt
================================================
max_batch_size: 8
input [
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/too_few_inputs/expected
================================================
failed to specify the dimensions of all input tensors or values of all input shape tensors

================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/too_many_inputs/config.pbtxt
================================================
max_batch_size: 8
input [
  {
    name: "INPUT_EXTRA"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/too_many_inputs/expected
================================================
unexpected inference input 'INPUT_EXTRA', allowed inputs are: INPUT0, INPUT1

================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/unknown_input/config.pbtxt
================================================
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT_UNKNOWN"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/unknown_input/expected
================================================
unexpected inference input 'INPUT_UNKNOWN', allowed inputs are: INPUT0, INPUT1

================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/unknown_output/config.pbtxt
================================================
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT_UNKNOWN"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform/tensorrt/unknown_output/expected
================================================
unexpected inference output 'OUTPUT_UNKNOWN', allowed outputs are: OUTPUT0, OUTPUT1

================================================
FILE: qa/L0_model_config/autofill_noplatform_success/custom/empty_config.identity/config.pbtxt
================================================


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/custom/empty_config.identity/expected
================================================
name: "empty_config.identity"
version_policy {
latest {
    num_versions: 1
}
}
instance_group {
name: "empty_config.identity"
count: 1
gpus: 0
kind: KIND_GPU
}
default_model_filename: "model.identity"
optimization {
input_pinned_memory {
    enable: true
}
output_pinned_memory {
    enable: true
}
}
backend: "identity"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/custom/no_backend.identity/config.pbtxt
================================================
max_batch_size: 64
input [
 {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 1000 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 1000 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/custom/no_backend.identity/expected
================================================
name: "no_backend.identity"
version_policy {
latest {
    num_versions: 1
}
}
max_batch_size: 64
input {
name: "INPUT0"
data_type: TYPE_INT32
dims: 1000
}
output {
name: "OUTPUT0"
data_type: TYPE_INT32
dims: 1000
}
instance_group {
name: "no_backend.identity"
count: 1
gpus: 0
kind: KIND_GPU
}
default_model_filename: "model.identity"
optimization {
input_pinned_memory {
    enable: true
}
output_pinned_memory {
    enable: true
}
}
backend: "identity"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/ensemble/embedded_ensemble/embedded_ensemble/config.pbtxt
================================================
name: "embedded_ensemble"
max_batch_size: 2
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "inner_ensemble"
      model_version: -1
      input_map {
        key: "data"
        value: "data"
      }
      output_map {
        key: "prob"
        value: "prob"
      }
    }
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/ensemble/embedded_ensemble/embedded_ensemble/expected
================================================
name: "embedded_ensemble"
max_batch_size: 2
platform: "ensemble"
version_policy {
  latest {
    num_versions: 1
  }
}
ensemble_scheduling {
  step [
    {
      model_name: "inner_ensemble"
      model_version: -1
      input_map {
        key: "data"
        value: "data"
      }
      output_map {
        key: "prob"
        value: "prob"
      }
    }
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
model_transaction_policy {
}

================================================
FILE: qa/L0_model_config/autofill_noplatform_success/ensemble/embedded_ensemble/fp32_dim1_batch4/config.pbtxt
================================================
name: "fp32_dim1_batch4"
max_batch_size: 4
backend: "identity"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/ensemble/embedded_ensemble/inner_ensemble/config.pbtxt
================================================
name: "inner_ensemble"
max_batch_size: 2
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "fp32_dim1_batch4"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "data"
      }
      output_map {
        key: "OUTPUT0"
        value: "prob"
      }
    }
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/ensemble/inconsistent_shape/fp32_dim1_batch4/config.pbtxt
================================================
name: "fp32_dim1_batch4"
max_batch_size: 4
backend: "identity"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/ensemble/inconsistent_shape/fp32_dim2_nobatch/config.pbtxt
================================================
name: "fp32_dim2_nobatch"
max_batch_size: 0
backend: "identity"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ -1, 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ -1, 16 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/ensemble/inconsistent_shape/inconsistent_shape/config.pbtxt
================================================
name: "inconsistent_shape"
max_batch_size: 2
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "fp32_dim1_batch4"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "data"
      }
      output_map {
        key: "OUTPUT0"
        value: "temp_tensor"
      }
    },
    {
      model_name: "fp32_dim2_nobatch"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "temp_tensor"
      }
      output_map {
        key: "OUTPUT0"
        value: "prob"
      }
    }
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/ensemble/inconsistent_shape/inconsistent_shape/expected
================================================
name: "inconsistent_shape"
max_batch_size: 2
platform: "ensemble"
version_policy {
  latest {
    num_versions: 1
  }
}
ensemble_scheduling {
  step [
    {
      model_name: "fp32_dim1_batch4"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "data"
      }
      output_map {
        key: "OUTPUT0"
        value: "temp_tensor"
      }
    },
    {
      model_name: "fp32_dim2_nobatch"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "temp_tensor"
      }
      output_map {
        key: "OUTPUT0"
        value: "prob"
      }
    }
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
model_transaction_policy {
}

================================================
FILE: qa/L0_model_config/autofill_noplatform_success/ensemble/inconsistent_shape_2/fp32_dim1_batch4/config.pbtxt
================================================
name: "fp32_dim1_batch4"
max_batch_size: 4
backend: "identity"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/ensemble/inconsistent_shape_2/fp32_dim2_nobatch/config.pbtxt
================================================
name: "fp32_dim2_nobatch"
max_batch_size: 0
backend: "identity"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ -1, 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ -1, 16 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/ensemble/inconsistent_shape_2/inconsistent_shape_2/config.pbtxt
================================================
name: "inconsistent_shape_2"
max_batch_size: 0
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "fp32_dim2_nobatch"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "data"
      }
      output_map {
        key: "OUTPUT0"
        value: "temp_tensor"
      }
    },
    {
      model_name: "fp32_dim1_batch4"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "temp_tensor"
      }
      output_map {
        key: "OUTPUT0"
        value: "prob"
      }
    }
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    dims: [ 4, 16 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 4, 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/ensemble/inconsistent_shape_2/inconsistent_shape_2/expected
================================================
name: "inconsistent_shape_2"
max_batch_size: 0
platform: "ensemble"
version_policy {
  latest {
    num_versions: 1
  }
}
ensemble_scheduling {
  step [
    {
      model_name: "fp32_dim2_nobatch"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "data"
      }
      output_map {
        key: "OUTPUT0"
        value: "temp_tensor"
      }
    },
    {
      model_name: "fp32_dim1_batch4"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "temp_tensor"
      }
      output_map {
        key: "OUTPUT0"
        value: "prob"
      }
    }
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    dims: [ 4, 16 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 4, 16 ]
  }
]
model_transaction_policy {
}

================================================
FILE: qa/L0_model_config/autofill_noplatform_success/ensemble/unmapped_output/fp32_dim1_batch4_output3/config.pbtxt
================================================
name: "fp32_dim1_batch4_output3"
max_batch_size: 4
backend: "identity"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT2"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT2"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/ensemble/unmapped_output/unmapped_output/config.pbtxt
================================================
name: "unmapped_output"
max_batch_size: 2
platform: "ensemble"
ensemble_scheduling {
  step [
    {
      model_name: "fp32_dim1_batch4_output3"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "data"
      }
      input_map {
        key: "INPUT1"
        value: "data"
      }
      input_map {
        key: "INPUT2"
        value: "data"
      }
      output_map {
        key: "OUTPUT0"
        value: "prob"
      }
    }
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/ensemble/unmapped_output/unmapped_output/expected
================================================
name: "unmapped_output"
max_batch_size: 2
platform: "ensemble"
version_policy {
  latest {
    num_versions: 1
  }
}
ensemble_scheduling {
  step [
    {
      model_name: "fp32_dim1_batch4_output3"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "data"
      }
      input_map {
        key: "INPUT1"
        value: "data"
      }
      input_map {
        key: "INPUT2"
        value: "data"
      }
      output_map {
        key: "OUTPUT0"
        value: "prob"
      }
    }
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
model_transaction_policy {
}

================================================
FILE: qa/L0_model_config/autofill_noplatform_success/onnx/cpu_instance/config.pbtxt
================================================

name: "cpu_instance"
platform: "onnxruntime_onnx"
max_batch_size: 8
version_policy: { latest { num_versions: 1 }}
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP16
    dims: [ -1,-1 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP16
    dims: [ -1,-1 ]
  }
]
instance_group {
  name: "cpu_instance"
  kind: KIND_CPU
}


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/onnx/cpu_instance/expected
================================================
name: "cpu_instance"
platform: "onnxruntime_onnx"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 8
input {
  name: "INPUT0"
  data_type: TYPE_FP16
  dims: -1
  dims: -1
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP16
  dims: -1
  dims: -1
}
instance_group {
  name: "cpu_instance"
  count: 2
  kind: KIND_CPU
}
default_model_filename: "model.onnx"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "onnxruntime"
runtime: ""

================================================
FILE: qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/1/model.onnx
================================================
TRTIS:

INPUT0_INPUT0"Identity

INPUT1_INPUT1"Identity

_INPUT0
_INPUT1CAST0"Add

_INPUT0
_INPUT1CAST1"Sub
!
CAST0OUTPUT0"Cast*	
to
!
CAST1OUTPUT1"Cast*	
toonnx_int32_int8_int8Z
INPUT0

var_0
Z
INPUT1

var_0
b
OUTPUT0

var_1
b
OUTPUT1

var_2
B


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/config.pbtxt
================================================


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected
================================================
name: "empty_config"
platform: "onnxruntime_onnx"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 4
input {
  name: "INPUT0"
  data_type: TYPE_INT32
  dims: 16
}
input {
  name: "INPUT1"
  data_type: TYPE_INT32
  dims: 16
}
output {
  name: "OUTPUT0"
  data_type: TYPE_INT8
  dims: 16
}
output {
  name: "OUTPUT1"
  data_type: TYPE_INT8
  dims: 16
}
instance_group {
  name: "empty_config"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
dynamic_batching {
  preferred_batch_size: 4
}
default_model_filename: "model.onnx"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "onnxruntime"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected.1
================================================
name: "empty_config"
platform: "onnxruntime_onnx"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 4
input {
  name: "INPUT0"
  data_type: TYPE_INT32
  dims: 16
}
input {
  name: "INPUT1"
  data_type: TYPE_INT32
  dims: 16
}
output {
  name: "OUTPUT1"
  data_type: TYPE_INT8
  dims: 16
}
output {
  name: "OUTPUT0"
  data_type: TYPE_INT8
  dims: 16
}
instance_group {
  name: "empty_config"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
dynamic_batching {
  preferred_batch_size: 4
}
default_model_filename: "model.onnx"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "onnxruntime"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected.2
================================================
name: "empty_config"
platform: "onnxruntime_onnx"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 4
input {
  name: "INPUT1"
  data_type: TYPE_INT32
  dims: 16
}
input {
  name: "INPUT0"
  data_type: TYPE_INT32
  dims: 16
}
output {
  name: "OUTPUT0"
  data_type: TYPE_INT8
  dims: 16
}
output {
  name: "OUTPUT1"
  data_type: TYPE_INT8
  dims: 16
}
instance_group {
  name: "empty_config"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
dynamic_batching {
  preferred_batch_size: 4
}
default_model_filename: "model.onnx"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "onnxruntime"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected.3
================================================
name: "empty_config"
platform: "onnxruntime_onnx"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 4
input {
  name: "INPUT1"
  data_type: TYPE_INT32
  dims: 16
}
input {
  name: "INPUT0"
  data_type: TYPE_INT32
  dims: 16
}
output {
  name: "OUTPUT1"
  data_type: TYPE_INT8
  dims: 16
}
output {
  name: "OUTPUT0"
  data_type: TYPE_INT8
  dims: 16
}
instance_group {
  name: "empty_config"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
dynamic_batching {
  preferred_batch_size: 4
}
default_model_filename: "model.onnx"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "onnxruntime"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/onnx/no_config/1/model.onnx
================================================
TRTIS:

INPUT0_INPUT0"Identity

INPUT1_INPUT1"Identity

_INPUT0
_INPUT1CAST0"Add

_INPUT0
_INPUT1CAST1"Sub
!
CAST0OUTPUT0"Cast*	
to
!
CAST1OUTPUT1"Cast*	
toonnx_int32_int8_int8Z
INPUT0

var_0
Z
INPUT1

var_0
b
OUTPUT0

var_1
b
OUTPUT1

var_2
B


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected
================================================
name: "no_config"
platform: "onnxruntime_onnx"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 4
input {
  name: "INPUT0"
  data_type: TYPE_INT32
  dims: 16
}
input {
  name: "INPUT1"
  data_type: TYPE_INT32
  dims: 16
}
output {
  name: "OUTPUT0"
  data_type: TYPE_INT8
  dims: 16
}
output {
  name: "OUTPUT1"
  data_type: TYPE_INT8
  dims: 16
}
instance_group {
  name: "no_config"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
dynamic_batching {
  preferred_batch_size: 4
}
default_model_filename: "model.onnx"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "onnxruntime"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected.1
================================================
name: "no_config"
platform: "onnxruntime_onnx"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 4
input {
  name: "INPUT0"
  data_type: TYPE_INT32
  dims: 16
}
input {
  name: "INPUT1"
  data_type: TYPE_INT32
  dims: 16
}
output {
  name: "OUTPUT1"
  data_type: TYPE_INT8
  dims: 16
}
output {
  name: "OUTPUT0"
  data_type: TYPE_INT8
  dims: 16
}
instance_group {
  name: "no_config"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
dynamic_batching {
  preferred_batch_size: 4
}
default_model_filename: "model.onnx"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "onnxruntime"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected.2
================================================
name: "no_config"
platform: "onnxruntime_onnx"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 4
input {
  name: "INPUT1"
  data_type: TYPE_INT32
  dims: 16
}
input {
  name: "INPUT0"
  data_type: TYPE_INT32
  dims: 16
}
output {
  name: "OUTPUT0"
  data_type: TYPE_INT8
  dims: 16
}
output {
  name: "OUTPUT1"
  data_type: TYPE_INT8
  dims: 16
}
instance_group {
  name: "no_config"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
dynamic_batching {
  preferred_batch_size: 4
}
default_model_filename: "model.onnx"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "onnxruntime"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected.3
================================================
name: "no_config"
platform: "onnxruntime_onnx"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 4
input {
  name: "INPUT1"
  data_type: TYPE_INT32
  dims: 16
}
input {
  name: "INPUT0"
  data_type: TYPE_INT32
  dims: 16
}
output {
  name: "OUTPUT1"
  data_type: TYPE_INT8
  dims: 16
}
output {
  name: "OUTPUT0"
  data_type: TYPE_INT8
  dims: 16
}
instance_group {
  name: "no_config"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
dynamic_batching {
  preferred_batch_size: 4
}
default_model_filename: "model.onnx"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "onnxruntime"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/1/model.onnx
================================================
TRTIS:

INPUT0_INPUT0"Identity

INPUT1_INPUT1"Identity

_INPUT0
_INPUT1CAST0"Add

_INPUT0
_INPUT1CAST1"Sub
!
CAST0OUTPUT0"Cast*	
to
!
CAST1OUTPUT1"Cast*	
toonnx_nobatch_int32_int8_int8Z
INPUT0


Z
INPUT1


b
OUTPUT0


b
OUTPUT1


B


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/config.pbtxt
================================================
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected
================================================
name: "no_config_no_batch"
platform: "onnxruntime_onnx"
version_policy {
  latest {
    num_versions: 1
  }
}
input {
  name: "INPUT0"
  data_type: TYPE_INT32
  dims: 16
}
input {
  name: "INPUT1"
  data_type: TYPE_INT32
  dims: 16
}
output {
  name: "OUTPUT0"
  data_type: TYPE_INT8
  dims: 16
}
output {
  name: "OUTPUT1"
  data_type: TYPE_INT8
  dims: 16
}
instance_group {
  name: "no_config_no_batch_0"
  count: 2
  kind: KIND_CPU
}
default_model_filename: "model.onnx"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "onnxruntime"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected.1
================================================
name: "no_config_no_batch"
platform: "onnxruntime_onnx"
version_policy {
  latest {
    num_versions: 1
  }
}
input {
  name: "INPUT1"
  data_type: TYPE_INT32
  dims: 16
}
input {
  name: "INPUT0"
  data_type: TYPE_INT32
  dims: 16
}
output {
  name: "OUTPUT0"
  data_type: TYPE_INT8
  dims: 16
}
output {
  name: "OUTPUT1"
  data_type: TYPE_INT8
  dims: 16
}
instance_group {
  name: "no_config_no_batch_0"
  count: 2
  kind: KIND_CPU
}
default_model_filename: "model.onnx"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "onnxruntime"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected.2
================================================
name: "no_config_no_batch"
platform: "onnxruntime_onnx"
version_policy {
  latest {
    num_versions: 1
  }
}
input {
  name: "INPUT0"
  data_type: TYPE_INT32
  dims: 16
}
input {
  name: "INPUT1"
  data_type: TYPE_INT32
  dims: 16
}
output {
  name: "OUTPUT1"
  data_type: TYPE_INT8
  dims: 16
}
output {
  name: "OUTPUT0"
  data_type: TYPE_INT8
  dims: 16
}
instance_group {
  name: "no_config_no_batch_0"
  count: 2
  kind: KIND_CPU
}
default_model_filename: "model.onnx"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "onnxruntime"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected.3
================================================
name: "no_config_no_batch"
platform: "onnxruntime_onnx"
version_policy {
  latest {
    num_versions: 1
  }
}
input {
  name: "INPUT1"
  data_type: TYPE_INT32
  dims: 16
}
input {
  name: "INPUT0"
  data_type: TYPE_INT32
  dims: 16
}
output {
  name: "OUTPUT1"
  data_type: TYPE_INT8
  dims: 16
}
output {
  name: "OUTPUT0"
  data_type: TYPE_INT8
  dims: 16
}
instance_group {
  name: "no_config_no_batch_0"
  count: 2
  kind: KIND_CPU
}
default_model_filename: "model.onnx"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "onnxruntime"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/openvino/dynamic_batch/config.pbtxt
================================================


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/openvino/dynamic_batch/expected
================================================
name: "dynamic_batch"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 4
input {
  name: "Func/PartitionedCall/input/_0:0"
  data_type: TYPE_INT32
  dims: 4
}
input {
  name: "input1"
  data_type: TYPE_INT32
  dims: 4
}
output {
  name: "Func/PartitionedCall/output/_2:0"
  data_type: TYPE_INT32
  dims: 4
}
output {
  name: "Func/PartitionedCall/output/_3:0"
  data_type: TYPE_INT32
  dims: 4
}
instance_group {
  name: "dynamic_batch"
  count: 1
  kind: KIND_CPU
}
default_model_filename: "model.xml"
dynamic_batching {
  preferred_batch_size: 4
}
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "openvino"
runtime: ""

================================================
FILE: qa/L0_model_config/autofill_noplatform_success/openvino/dynamic_batch/expected.1
================================================
name: "dynamic_batch"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 4
input {
  name: "input1"
  data_type: TYPE_INT32
  dims: 4
}
input {
  name: "Func/PartitionedCall/input/_0:0"
  data_type: TYPE_INT32
  dims: 4
}
output {
  name: "Func/PartitionedCall/output/_2:0"
  data_type: TYPE_INT32
  dims: 4
}
output {
  name: "Func/PartitionedCall/output/_3:0"
  data_type: TYPE_INT32
  dims: 4
}
instance_group {
  name: "dynamic_batch"
  count: 1
  kind: KIND_CPU
}
default_model_filename: "model.xml"
dynamic_batching {
  preferred_batch_size: 4
}
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "openvino"
runtime: ""

================================================
FILE: qa/L0_model_config/autofill_noplatform_success/openvino/dynamic_batch/expected.2
================================================
name: "dynamic_batch"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 4
input {
  name: "Func/PartitionedCall/input/_0:0"
  data_type: TYPE_INT32
  dims: 4
}
input {
  name: "input1"
  data_type: TYPE_INT32
  dims: 4
}
output {
  name: "Func/PartitionedCall/output/_3:0"
  data_type: TYPE_INT32
  dims: 4
}
output {
  name: "Func/PartitionedCall/output/_2:0"
  data_type: TYPE_INT32
  dims: 4
}
instance_group {
  name: "dynamic_batch"
  count: 1
  kind: KIND_CPU
}
default_model_filename: "model.xml"
dynamic_batching {
  preferred_batch_size: 4
}
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "openvino"
runtime: ""

================================================
FILE: qa/L0_model_config/autofill_noplatform_success/openvino/dynamic_batch/expected.3
================================================
name: "dynamic_batch"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 4
input {
  name: "input1"
  data_type: TYPE_INT32
  dims: 4
}
input {
  name: "Func/PartitionedCall/input/_0:0"
  data_type: TYPE_INT32
  dims: 4
}
output {
  name: "Func/PartitionedCall/output/_3:0"
  data_type: TYPE_INT32
  dims: 4
}
output {
  name: "Func/PartitionedCall/output/_2:0"
  data_type: TYPE_INT32
  dims: 4
}
instance_group {
  name: "dynamic_batch"
  count: 1
  kind: KIND_CPU
}
default_model_filename: "model.xml"
dynamic_batching {
  preferred_batch_size: 4
}
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "openvino"
runtime: ""

================================================
FILE: qa/L0_model_config/autofill_noplatform_success/openvino/empty_config/config.pbtxt
================================================


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/openvino/empty_config/expected
================================================
name: "empty_config"
version_policy {
  latest {
    num_versions: 1
  }
}
input {
  name: "Func/PartitionedCall/input/_0:0"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
input {
  name: "input1"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
output {
  name: "Func/PartitionedCall/output/_2:0"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
output {
  name: "Func/PartitionedCall/output/_3:0"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
instance_group {
  name: "empty_config"
  count: 1
  kind: KIND_CPU
}
default_model_filename: "model.xml"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "openvino"
runtime: ""

================================================
FILE: qa/L0_model_config/autofill_noplatform_success/openvino/empty_config/expected.1
================================================
name: "empty_config"
version_policy {
  latest {
    num_versions: 1
  }
}
input {
  name: "input1"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
input {
  name: "Func/PartitionedCall/input/_0:0"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
output {
  name: "Func/PartitionedCall/output/_2:0"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
output {
  name: "Func/PartitionedCall/output/_3:0"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
instance_group {
  name: "empty_config"
  count: 1
  kind: KIND_CPU
}
default_model_filename: "model.xml"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "openvino"
runtime: ""

================================================
FILE: qa/L0_model_config/autofill_noplatform_success/openvino/empty_config/expected.2
================================================
name: "empty_config"
version_policy {
  latest {
    num_versions: 1
  }
}
input {
  name: "Func/PartitionedCall/input/_0:0"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
input {
  name: "input1"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
output {
  name: "Func/PartitionedCall/output/_3:0"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
output {
  name: "Func/PartitionedCall/output/_2:0"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
instance_group {
  name: "empty_config"
  count: 1
  kind: KIND_CPU
}
default_model_filename: "model.xml"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "openvino"
runtime: ""

================================================
FILE: qa/L0_model_config/autofill_noplatform_success/openvino/empty_config/expected.3
================================================
name: "empty_config"
version_policy {
  latest {
    num_versions: 1
  }
}
input {
  name: "input1"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
input {
  name: "Func/PartitionedCall/input/_0:0"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
output {
  name: "Func/PartitionedCall/output/_3:0"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
output {
  name: "Func/PartitionedCall/output/_2:0"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
instance_group {
  name: "empty_config"
  count: 1
  kind: KIND_CPU
}
default_model_filename: "model.xml"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "openvino"
runtime: ""

================================================
FILE: qa/L0_model_config/autofill_noplatform_success/openvino/no_config/expected
================================================
name: "no_config"
version_policy {
  latest {
    num_versions: 1
  }
}
input {
  name: "Func/PartitionedCall/input/_0:0"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
input {
  name: "input1"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
output {
  name: "Func/PartitionedCall/output/_2:0"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
output {
  name: "Func/PartitionedCall/output/_3:0"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
instance_group {
  name: "no_config"
  count: 1
  kind: KIND_CPU
}
default_model_filename: "model.xml"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "openvino"
runtime: ""

================================================
FILE: qa/L0_model_config/autofill_noplatform_success/openvino/no_config/expected.1
================================================
name: "no_config"
version_policy {
  latest {
    num_versions: 1
  }
}
input {
  name: "input1"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
input {
  name: "Func/PartitionedCall/input/_0:0"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
output {
  name: "Func/PartitionedCall/output/_2:0"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
output {
  name: "Func/PartitionedCall/output/_3:0"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
instance_group {
  name: "no_config"
  count: 1
  kind: KIND_CPU
}
default_model_filename: "model.xml"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "openvino"
runtime: ""

================================================
FILE: qa/L0_model_config/autofill_noplatform_success/openvino/no_config/expected.2
================================================
name: "no_config"
version_policy {
  latest {
    num_versions: 1
  }
}
input {
  name: "Func/PartitionedCall/input/_0:0"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
input {
  name: "input1"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
output {
  name: "Func/PartitionedCall/output/_3:0"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
output {
  name: "Func/PartitionedCall/output/_2:0"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
instance_group {
  name: "no_config"
  count: 1
  kind: KIND_CPU
}
default_model_filename: "model.xml"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "openvino"
runtime: ""

================================================
FILE: qa/L0_model_config/autofill_noplatform_success/openvino/no_config/expected.3
================================================
name: "no_config"
version_policy {
  latest {
    num_versions: 1
  }
}
input {
  name: "input1"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
input {
  name: "Func/PartitionedCall/input/_0:0"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
output {
  name: "Func/PartitionedCall/output/_3:0"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
output {
  name: "Func/PartitionedCall/output/_2:0"
  data_type: TYPE_INT32
  dims: 1
  dims: 4
}
instance_group {
  name: "no_config"
  count: 1
  kind: KIND_CPU
}
default_model_filename: "model.xml"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "openvino"
runtime: ""

================================================
FILE: qa/L0_model_config/autofill_noplatform_success/openvino/partial_config/config.pbtxt
================================================
max_batch_size: 8
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT8
    dims: [ 16 ]
    label_filename: "output0_labels.txt"
   },
  {
    name: "OUTPUT1"
    data_type: TYPE_INT8
    dims: [ 16 ]
  }
]

================================================
FILE: qa/L0_model_config/autofill_noplatform_success/openvino/partial_config/expected
================================================
name: "partial_config"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 8
input {
  name: "INPUT1"
  data_type: TYPE_INT8
  dims: 16
}
input {
  name: "INPUT0"
  data_type: TYPE_INT8
  dims: 16
}
output {
  name: "OUTPUT0"
  data_type: TYPE_INT8
  dims: 16
  label_filename: "output0_labels.txt"
}
output {
  name: "OUTPUT1"
  data_type: TYPE_INT8
  dims: 16
}
instance_group {
  name: "partial_config"
  count: 1
  kind: KIND_CPU
}
default_model_filename: "model.xml"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "openvino"
runtime: ""

================================================
FILE: qa/L0_model_config/autofill_noplatform_success/openvino/partial_config/expected.1
================================================
name: "partial_config"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 8
input {
  name: "INPUT0"
  data_type: TYPE_INT8
  dims: 16
}
input {
  name: "INPUT1"
  data_type: TYPE_INT8
  dims: 16
}
output {
  name: "OUTPUT0"
  data_type: TYPE_INT8
  dims: 16
  label_filename: "output0_labels.txt"
}
output {
  name: "OUTPUT1"
  data_type: TYPE_INT8
  dims: 16
}
instance_group {
  name: "partial_config"
  count: 1
  kind: KIND_CPU
}
default_model_filename: "model.xml"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "openvino"
runtime: ""

================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/conflicting_scheduler_ensemble/conflicting_scheduler_ensemble/config.pbtxt
================================================
name: "conflicting_scheduler_ensemble"
platform: "ensemble"
input [
  {
    name: "ENSEMBLE_INPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
output [
  {
    name: "ENSEMBLE_OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
ensemble_scheduling {
  step [
    {
      # batch model
      model_name: "ensemble_first_step"
      model_version: 1
      input_map {
        key: "INPUT0"
        value: "ENSEMBLE_INPUT0"
      }
      output_map {
        key: "OUTPUT0"
        value: "temp_output_0"
      }
    },
    {
      model_name: "ensemble_second_step"
      model_version: 1
      input_map {
        key: "INPUT0"
        value: "temp_output_0"
      }
      output_map {
        key: "OUTPUT0"
        value: "ENSEMBLE_OUTPUT0"
      }
    }
  ]
}


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/conflicting_scheduler_ensemble/conflicting_scheduler_ensemble/expected
================================================
name: "conflicting_scheduler_ensemble"
  platform: "ensemble"
  version_policy {
    latest {
      num_versions: 1
    }
  }
  input {
    name: "ENSEMBLE_INPUT0"
    data_type: TYPE_FP32
    dims: 4
  }
  output {
    name: "ENSEMBLE_OUTPUT0"
    data_type: TYPE_FP32
    dims: 4
  }
  ensemble_scheduling {
    step {
      model_name: "ensemble_first_step"
      model_version: 1
      input_map {
        key: "INPUT0"
        value: "ENSEMBLE_INPUT0"
      }
      output_map {
        key: "OUTPUT0"
        value: "temp_output_0"
      }
    }
    step {
      model_name: "ensemble_second_step"
      model_version: 1
      input_map {
        key: "INPUT0"
        value: "temp_output_0"
      }
      output_map {
        key: "OUTPUT0"
        value: "ENSEMBLE_OUTPUT0"
      }
    }
  }
  model_transaction_policy {
  }

================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/conflicting_scheduler_ensemble/conflicting_scheduler_ensemble/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


class TritonPythonModel:
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}

        auto_complete_model_config.set_max_batch_size(4)
        auto_complete_model_config.set_dynamic_batching()
        auto_complete_model_config.add_input(input0)
        auto_complete_model_config.add_output(output0)

        return auto_complete_model_config

    def execute(self, requests):
        pass


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/conflicting_scheduler_ensemble/ensemble_first_step/config.pbtxt
================================================
name: "ensemble_first_step"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/conflicting_scheduler_ensemble/ensemble_first_step/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


class TritonPythonModel:
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}

        auto_complete_model_config.set_max_batch_size(4)
        auto_complete_model_config.set_dynamic_batching()
        auto_complete_model_config.add_input(input0)
        auto_complete_model_config.add_output(output0)

        return auto_complete_model_config

    def execute(self, requests):
        pass


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/conflicting_scheduler_ensemble/ensemble_second_step/config.pbtxt
================================================
name: "ensemble_second_step"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/conflicting_scheduler_ensemble/ensemble_second_step/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


class TritonPythonModel:
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}

        auto_complete_model_config.set_max_batch_size(4)
        auto_complete_model_config.set_dynamic_batching()
        auto_complete_model_config.add_input(input0)
        auto_complete_model_config.add_output(output0)

        return auto_complete_model_config

    def execute(self, requests):
        pass


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching/config.pbtxt
================================================
name: "dynamic_batching"

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching/expected
================================================
name: "dynamic_batching"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 4
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 4
}
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 4
}
instance_group {
  name: "dynamic_batching"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
default_model_filename: "model.py"
dynamic_batching {
  preferred_batch_size: 4
}
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "python"
runtime: ""

================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching/expected.1
================================================
name: "dynamic_batching"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 4
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 4
}
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 4
}
instance_group {
  name: "dynamic_batching"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
default_model_filename: "model.py"
dynamic_batching {
  preferred_batch_size: 4
}
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "python"
runtime: ""

================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching/expected.2
================================================
name: "dynamic_batching"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 4
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 4
}
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 4
}
instance_group {
  name: "dynamic_batching"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
default_model_filename: "model.py"
dynamic_batching {
  preferred_batch_size: 4
}
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "python"
runtime: ""

================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching/expected.3
================================================
name: "dynamic_batching"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 4
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 4
}
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 4
}
instance_group {
  name: "dynamic_batching"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
default_model_filename: "model.py"
dynamic_batching {
  preferred_batch_size: 4
}
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "python"
runtime: ""

================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


class TritonPythonModel:
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}

        auto_complete_model_config.set_max_batch_size(4)
        auto_complete_model_config.set_dynamic_batching()
        auto_complete_model_config.add_input(input0)
        auto_complete_model_config.add_input(input1)
        auto_complete_model_config.add_output(output0)
        auto_complete_model_config.add_output(output1)

        return auto_complete_model_config

    def execute(self, requests):
        pass


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching_no_op/config.pbtxt
================================================
name: "dynamic_batching_no_op"
max_batch_size: 4
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
dynamic_batching: {
  preferred_batch_size: [ 4 ]
}


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching_no_op/expected
================================================
name: "dynamic_batching_no_op"
version_policy {
latest {
    num_versions: 1
}
}
max_batch_size: 4
input {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: 4
}
input {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: 4
}
output {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: 4
}
output {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: 4
}
instance_group {
    name: "dynamic_batching_no_op"
    count: 1
    gpus: 0
    kind: KIND_GPU
}
default_model_filename: "model.py"
dynamic_batching {
    preferred_batch_size: 4
}
optimization {
    input_pinned_memory {
        enable: true
    }
    output_pinned_memory {
        enable: true
    }
}
backend: "python"
runtime: ""

================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching_no_op/expected.1
================================================
name: "dynamic_batching_no_op"
version_policy {
latest {
    num_versions: 1
}
}
max_batch_size: 4
input {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: 4
}
input {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: 4
}
output {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: 4
}
output {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: 4
}
instance_group {
    name: "dynamic_batching_no_op"
    count: 1
    gpus: 0
    kind: KIND_GPU
}
default_model_filename: "model.py"
dynamic_batching {
    preferred_batch_size: 4
}
optimization {
    input_pinned_memory {
        enable: true
    }
    output_pinned_memory {
        enable: true
    }
}
backend: "python"
runtime: ""

================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching_no_op/expected.2
================================================
name: "dynamic_batching_no_op"
version_policy {
latest {
    num_versions: 1
}
}
max_batch_size: 4
input {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: 4
}
input {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: 4
}
output {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: 4
}
output {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: 4
}
instance_group {
    name: "dynamic_batching_no_op"
    count: 1
    gpus: 0
    kind: KIND_GPU
}
default_model_filename: "model.py"
dynamic_batching {
    preferred_batch_size: 4
}
optimization {
    input_pinned_memory {
        enable: true
    }
    output_pinned_memory {
        enable: true
    }
}
backend: "python"
runtime: ""

================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching_no_op/expected.3
================================================
name: "dynamic_batching_no_op"
version_policy {
latest {
    num_versions: 1
}
}
max_batch_size: 4
input {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: 4
}
input {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: 4
}
output {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: 4
}
output {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: 4
}
instance_group {
    name: "dynamic_batching_no_op"
    count: 1
    gpus: 0
    kind: KIND_GPU
}
default_model_filename: "model.py"
dynamic_batching {
    preferred_batch_size: 4
}
optimization {
    input_pinned_memory {
        enable: true
    }
    output_pinned_memory {
        enable: true
    }
}
backend: "python"
runtime: ""

================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching_no_op/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


class TritonPythonModel:
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}

        auto_complete_model_config.set_max_batch_size(4)
        auto_complete_model_config.set_dynamic_batching()
        auto_complete_model_config.add_input(input0)
        auto_complete_model_config.add_input(input1)
        auto_complete_model_config.add_output(output0)
        auto_complete_model_config.add_output(output1)

        return auto_complete_model_config

    def execute(self, requests):
        pass


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/empty_config/config.pbtxt
================================================


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/empty_config/expected
================================================
name: "empty_config"
version_policy {
  latest {
    num_versions: 1
  }
}
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 4
}
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 4
}
instance_group {
  name: "empty_config"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
default_model_filename: "model.py"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "python"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/empty_config/expected.1
================================================
name: "empty_config"
version_policy {
  latest {
    num_versions: 1
  }
}
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 4
}
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 4
}
instance_group {
  name: "empty_config"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
default_model_filename: "model.py"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "python"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/empty_config/expected.2
================================================
name: "empty_config"
version_policy {
  latest {
    num_versions: 1
  }
}
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 4
}
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 4
}
instance_group {
  name: "empty_config"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
default_model_filename: "model.py"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "python"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/empty_config/expected.3
================================================
name: "empty_config"
version_policy {
  latest {
    num_versions: 1
  }
}
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 4
}
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 4
}
instance_group {
  name: "empty_config"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
default_model_filename: "model.py"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "python"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/incomplete_input/config.pbtxt
================================================
name: "incomplete_input"

input [
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/incomplete_input/expected
================================================
name: "incomplete_input"
version_policy {
  latest {
    num_versions: 1
  }
}
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 4
}
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 4
}
instance_group {
  name: "incomplete_input"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
default_model_filename: "model.py"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "python"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/incomplete_input/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


class TritonPythonModel:
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}

        auto_complete_model_config.set_max_batch_size(0)
        auto_complete_model_config.add_input(input0)
        auto_complete_model_config.add_output(output0)
        auto_complete_model_config.add_output(output1)

        return auto_complete_model_config

    def execute(self, requests):
        pass


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/incomplete_output/config.pbtxt
================================================
name: "incomplete_output"

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
  },
  {
    name: "OUTPUT1"
    dims: [ 4 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/incomplete_output/expected
================================================
name: "incomplete_output"
version_policy {
  latest {
    num_versions: 1
  }
}
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 4
}
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 4
}
instance_group {
  name: "incomplete_output"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
default_model_filename: "model.py"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "python"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy/config.pbtxt
================================================
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy/expected
================================================
name: "model_transaction_policy"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 4
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 4
}
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 4
}
instance_group {
  name: "model_transaction_policy"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
default_model_filename: "model.py"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "python"
runtime: ""
model_transaction_policy {
  decoupled: true
}


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy/expected.1
================================================
name: "model_transaction_policy"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 4
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 4
}
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 4
}
instance_group {
  name: "model_transaction_policy"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
default_model_filename: "model.py"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "python"
runtime: ""
model_transaction_policy {
  decoupled: true
}


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy/expected.2
================================================
name: "model_transaction_policy"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 4
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 4
}
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 4
}
instance_group {
  name: "model_transaction_policy"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
default_model_filename: "model.py"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "python"
runtime: ""
model_transaction_policy {
  decoupled: true
}


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy/expected.3
================================================
name: "model_transaction_policy"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 4
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 4
}
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 4
}
instance_group {
  name: "model_transaction_policy"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
default_model_filename: "model.py"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "python"
runtime: ""
model_transaction_policy {
  decoupled: true
}


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy/model.py
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


class TritonPythonModel:
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}

        auto_complete_model_config.set_max_batch_size(4)
        auto_complete_model_config.set_model_transaction_policy(dict(decoupled=True))
        auto_complete_model_config.add_input(input0)
        auto_complete_model_config.add_input(input1)
        auto_complete_model_config.add_output(output0)
        auto_complete_model_config.add_output(output1)

        return auto_complete_model_config

    def execute(self, requests):
        pass


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_decoupled_false/config.pbtxt
================================================
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_decoupled_false/expected
================================================
name: "model_transaction_policy_decoupled_false"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 4
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 4
}
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 4
}
instance_group {
  name: "model_transaction_policy_decoupled_false"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
default_model_filename: "model.py"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "python"
runtime: ""
model_transaction_policy {
}


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_decoupled_false/expected.1
================================================
name: "model_transaction_policy_decoupled_false"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 4
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 4
}
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 4
}
instance_group {
  name: "model_transaction_policy_decoupled_false"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
default_model_filename: "model.py"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "python"
runtime: ""
model_transaction_policy {
}

================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_decoupled_false/expected.2
================================================
name: "model_transaction_policy_decoupled_false"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 4
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 4
}
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 4
}
instance_group {
  name: "model_transaction_policy_decoupled_false"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
default_model_filename: "model.py"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "python"
runtime: ""
model_transaction_policy {
}

================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_decoupled_false/expected.3
================================================
name: "model_transaction_policy_decoupled_false"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 4
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 4
}
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 4
}
instance_group {
  name: "model_transaction_policy_decoupled_false"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
default_model_filename: "model.py"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "python"
runtime: ""
model_transaction_policy {
}

================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_decoupled_false/model.py
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


class TritonPythonModel:
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}

        auto_complete_model_config.set_max_batch_size(4)
        auto_complete_model_config.set_model_transaction_policy(dict(decoupled=False))
        auto_complete_model_config.add_input(input0)
        auto_complete_model_config.add_input(input1)
        auto_complete_model_config.add_output(output0)
        auto_complete_model_config.add_output(output1)

        return auto_complete_model_config

    def execute(self, requests):
        pass


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_no_op/config.pbtxt
================================================
model_transaction_policy {
  decoupled: true
}

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_no_op/expected
================================================
name: "model_transaction_policy_no_op"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 4
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 4
}
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 4
}
instance_group {
  name: "model_transaction_policy_no_op"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
default_model_filename: "model.py"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "python"
runtime: ""
model_transaction_policy {
  decoupled: true
}


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_no_op/expected.1
================================================
name: "model_transaction_policy_no_op"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 4
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 4
}
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 4
}
instance_group {
  name: "model_transaction_policy_no_op"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
default_model_filename: "model.py"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "python"
runtime: ""
model_transaction_policy {
  decoupled: true
}


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_no_op/expected.2
================================================
name: "model_transaction_policy_no_op"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 4
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 4
}
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 4
}
instance_group {
  name: "model_transaction_policy_no_op"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
default_model_filename: "model.py"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "python"
runtime: ""
model_transaction_policy {
  decoupled: true
}


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_no_op/expected.3
================================================
name: "model_transaction_policy_no_op"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 4
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 4
}
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 4
}
instance_group {
  name: "model_transaction_policy_no_op"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
default_model_filename: "model.py"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "python"
runtime: ""
model_transaction_policy {
  decoupled: true
}


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_no_op/model.py
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


class TritonPythonModel:
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}

        auto_complete_model_config.set_max_batch_size(4)
        auto_complete_model_config.set_model_transaction_policy(dict(decoupled=True))
        auto_complete_model_config.add_input(input0)
        auto_complete_model_config.add_input(input1)
        auto_complete_model_config.add_output(output0)
        auto_complete_model_config.add_output(output1)

        return auto_complete_model_config

    def execute(self, requests):
        pass


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/optional_input/config.pbtxt
================================================
input [
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/optional_input/expected
================================================
name: "optional_input"
version_policy {
  latest {
    num_versions: 1
  }
}
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 4
}
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 4
  optional: true
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 4
}
instance_group {
  name: "optional_input"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
default_model_filename: "model.py"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "python"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/optional_input/model.py
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


class TritonPythonModel:
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        input0 = {
            "name": "INPUT0",
            "data_type": "TYPE_FP32",
            "dims": [4],
            "optional": True,
        }
        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}

        auto_complete_model_config.set_max_batch_size(0)
        auto_complete_model_config.add_input(input0)
        auto_complete_model_config.add_output(output0)
        auto_complete_model_config.add_output(output1)

        return auto_complete_model_config

    def execute(self, requests):
        pass


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/unknown_input/config.pbtxt
================================================
name: "unknown_input"

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "INPUT_UNKNOWN"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/unknown_input/expected
================================================
name: "unknown_input"
version_policy {
  latest {
    num_versions: 1
  }
}
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 4
}
input {
  name: "INPUT_UNKNOWN"
  data_type: TYPE_FP32
  dims: 4
}
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 4
}
instance_group {
  name: "unknown_input"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
default_model_filename: "model.py"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "python"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/unknown_output/config.pbtxt
================================================
name: "unknown_output"

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
output [
  {
    name: "OUTPUT_UNKNOWN"
    data_type: TYPE_FP32
    dims: [ 4 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/python/unknown_output/expected
================================================
name: "unknown_output"
version_policy {
  latest {
    num_versions: 1
  }
}
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 4
}
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT_UNKNOWN"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 4
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 4
}
instance_group {
  name: "unknown_output"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
default_model_filename: "model.py"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "python"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/pytorch/cpu_instance/config.pbtxt
================================================
name: "cpu_instance"
platform: "pytorch_libtorch"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 8
input {
  name: "INPUT__0"
  data_type: TYPE_INT32
  dims: 16
}
input {
  name: "INPUT__1"
  data_type: TYPE_INT32
  dims: 16
}
output {
  name: "OUTPUT__0"
  data_type: TYPE_INT32
  dims: 16
}
output {
  name: "OUTPUT__1"
  data_type: TYPE_INT32
  dims: 16
}
instance_group {
  name: "cpu_instance"
  kind: KIND_CPU
}
default_model_filename: "model.pt"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "pytorch"


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/pytorch/cpu_instance/expected
================================================
name: "cpu_instance"
platform: "pytorch_libtorch"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 8
input {
  name: "INPUT__0"
  data_type: TYPE_INT32
  dims: 16
}
input {
  name: "INPUT__1"
  data_type: TYPE_INT32
  dims: 16
}
output {
  name: "OUTPUT__0"
  data_type: TYPE_INT32
  dims: 16
}
output {
  name: "OUTPUT__1"
  data_type: TYPE_INT32
  dims: 16
}
instance_group {
  name: "cpu_instance"
  count: 1
  kind: KIND_CPU
}
default_model_filename: "model.pt"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "pytorch"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/pytorch/no_name_platform/config.pbtxt
================================================
max_batch_size: 8
input [
  {
    name: "INPUT__0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "INPUT__1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT__0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT__1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/pytorch/no_name_platform/expected
================================================
name: "no_name_platform"
platform: "pytorch_libtorch"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 8
input {
  name: "INPUT__0"
  data_type: TYPE_INT32
  dims: 16
}
input {
  name: "INPUT__1"
  data_type: TYPE_INT32
  dims: 16
}
output {
  name: "OUTPUT__0"
  data_type: TYPE_INT32
  dims: 16
}
output {
  name: "OUTPUT__1"
  data_type: TYPE_INT32
  dims: 16
}
instance_group {
  name: "no_name_platform"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
default_model_filename: "model.pt"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "pytorch"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/tensorrt/empty_config/config.pbtxt
================================================


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/tensorrt/empty_config/expected
================================================
name: "empty_config"
platform: "tensorrt_plan"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 8
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 16
}
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 16
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 16
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 16
}
instance_group {
  name: "empty_config"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
dynamic_batching {
  preferred_batch_size: 8
}
default_model_filename: "model.plan"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "tensorrt"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/tensorrt/empty_config_variable/config.pbtxt
================================================


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/tensorrt/empty_config_variable/expected
================================================
name: "empty_config_variable"
platform: "tensorrt_plan"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 8
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: -1
}
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: -1
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: -1
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: -1
}
instance_group {
  name: "empty_config_variable"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
dynamic_batching {
  preferred_batch_size: 8
}
default_model_filename: "model.plan"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "tensorrt"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/tensorrt/hint_for_no_batch/config.pbtxt
================================================
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ -1, 16 ]
  },
  {
    name: "INPUT1"
    dims: [ -1, 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    dims: [ -1, 16 ]
  },
  {
    name: "OUTPUT1"
    dims: [ -1, 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/tensorrt/hint_for_no_batch/expected
================================================
name: "hint_for_no_batch"
platform: "tensorrt_plan"
version_policy {
  latest {
    num_versions: 1
  }
}
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: -1
  dims: 16
}
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: -1
  dims: 16
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: -1
  dims: 16
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: -1
  dims: 16
}
instance_group {
  name: "hint_for_no_batch"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
default_model_filename: "model.plan"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "tensorrt"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_input/config.pbtxt
================================================
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
  },
  {
    name: "INPUT1"
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_input/expected
================================================
name: "incomplete_input"
platform: "tensorrt_plan"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 8
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 16
}
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 16
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 16
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 16
}
instance_group {
  name: "incomplete_input"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
dynamic_batching {
  preferred_batch_size: 8
}
default_model_filename: "model.plan"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "tensorrt"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_input/expected.1
================================================
name: "incomplete_input"
platform: "tensorrt_plan"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 8
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 16
}
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 16
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 16
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 16
}
instance_group {
  name: "incomplete_input"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
dynamic_batching {
  preferred_batch_size: 8
}
default_model_filename: "model.plan"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "tensorrt"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_input/expected.2
================================================
name: "incomplete_input"
platform: "tensorrt_plan"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 8
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 16
}
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 16
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 16
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 16
}
instance_group {
  name: "incomplete_input"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
dynamic_batching {
  preferred_batch_size: 8
}
default_model_filename: "model.plan"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "tensorrt"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_input/expected.3
================================================
name: "incomplete_input"
platform: "tensorrt_plan"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 8
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 16
}
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 16
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 16
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 16
}
instance_group {
  name: "incomplete_input"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
dynamic_batching {
  preferred_batch_size: 8
}
default_model_filename: "model.plan"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "tensorrt"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_output/config.pbtxt
================================================
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
  },
  {
    name: "OUTPUT1"
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_output/expected
================================================
name: "incomplete_output"
platform: "tensorrt_plan"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 8
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 16
}
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 16
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 16
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 16
}
instance_group {
  name: "incomplete_output"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
dynamic_batching {
  preferred_batch_size: 8
}
default_model_filename: "model.plan"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "tensorrt"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_output/expected.1
================================================
name: "incomplete_output"
platform: "tensorrt_plan"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 8
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 16
}
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 16
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 16
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 16
}
instance_group {
  name: "incomplete_output"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
dynamic_batching {
  preferred_batch_size: 8
}
default_model_filename: "model.plan"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "tensorrt"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_output/expected.2
================================================
name: "incomplete_output"
platform: "tensorrt_plan"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 8
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 16
}
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 16
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 16
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 16
}
instance_group {
  name: "incomplete_output"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
dynamic_batching {
  preferred_batch_size: 8
}
default_model_filename: "model.plan"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "tensorrt"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_output/expected.3
================================================
name: "incomplete_output"
platform: "tensorrt_plan"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 8
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 16
}
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 16
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 16
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 16
}
instance_group {
  name: "incomplete_output"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
dynamic_batching {
  preferred_batch_size: 8
}
default_model_filename: "model.plan"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "tensorrt"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/tensorrt/multi_prof_max_bs/config.pbtxt
================================================
instance_group [
  {
      profile: "0"
  }
]

instance_group [
  {
      profile: "1"
  }
]

instance_group [
  {
      profile: "2"
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/tensorrt/multi_prof_max_bs/expected
================================================
name: "multi_prof_max_bs"
platform: "tensorrt_plan"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 8
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: -1
}
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: -1
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: -1
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: -1
}
instance_group {
  name: "multi_prof_max_bs_0"
  count: 1
  gpus: 0
  kind: KIND_GPU
  profile: "0"
}
instance_group {
  name: "multi_prof_max_bs_1"
  count: 1
  gpus: 0
  kind: KIND_GPU
  profile: "1"
}
instance_group {
  name: "multi_prof_max_bs_2"
  count: 1
  gpus: 0
  kind: KIND_GPU
  profile: "2"
}
dynamic_batching {
  preferred_batch_size: 8
}
default_model_filename: "model.plan"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "tensorrt"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/tensorrt/no_config/expected
================================================
name: "no_config"
platform: "tensorrt_plan"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 8
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 16
}
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 16
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 16
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 16
}
instance_group {
  name: "no_config"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
dynamic_batching {
  preferred_batch_size: 8
}
default_model_filename: "model.plan"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "tensorrt"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/tensorrt/no_config_non_linear_format_io/expected
================================================
name: "no_config_non_linear_format_io"
platform: "tensorrt_plan"
backend: "tensorrt"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 8
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: -1
  dims: 2
  dims: 1
  is_non_linear_format_io: true
}
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: -1
  dims: 2
  dims: 1
  is_non_linear_format_io: true
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: -1
  dims: 2
  dims: 1
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: -1
  dims: 2
  dims: 1
}
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
dynamic_batching {
  preferred_batch_size: 8
}
instance_group {
  name: "no_config_non_linear_format_io"
  kind: KIND_GPU
  count: 1
  gpus: 0
}
default_model_filename: "model.plan"


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/tensorrt/no_config_shape_tensor/expected
================================================
name: "no_config_shape_tensor"
platform: "tensorrt_plan"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 8
input {
  name: "INPUT0"
  data_type: TYPE_INT32
  dims: 2
  is_shape_tensor: true
}
input {
  name: "DUMMY_INPUT0"
  data_type: TYPE_FP32
  dims: -1
  dims: -1
}
output {
  name: "DUMMY_OUTPUT0"
  data_type: TYPE_FP32
  dims: -1
  dims: -1
}
output {
  name: "OUTPUT0"
  data_type: TYPE_INT64
  dims: 2
  is_shape_tensor: true
}
instance_group {
  name: "no_config_shape_tensor"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
dynamic_batching {
  preferred_batch_size: 8
}
default_model_filename: "model.plan"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "tensorrt"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/tensorrt/no_config_variable/expected
================================================
name: "no_config_variable"
platform: "tensorrt_plan"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 8
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: -1
}
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: -1
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: -1
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: -1
}
instance_group {
  name: "no_config_variable"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
dynamic_batching {
  preferred_batch_size: 8
}
default_model_filename: "model.plan"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "tensorrt"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/tensorrt/no_name_platform/config.pbtxt
================================================
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/tensorrt/no_name_platform/expected
================================================
name: "no_name_platform"
platform: "tensorrt_plan"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 8
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 16
}
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 16
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 16
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 16
}
instance_group {
  name: "no_name_platform"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
dynamic_batching {
  preferred_batch_size: 8
}
default_model_filename: "model.plan"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "tensorrt"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/tensorrt/no_name_platform_variable/config.pbtxt
================================================
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/tensorrt/no_name_platform_variable/expected
================================================
name: "no_name_platform_variable"
platform: "tensorrt_plan"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 8
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 16
}
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 16
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 16
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 16
}
instance_group {
  name: "no_name_platform_variable"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
dynamic_batching {
  preferred_batch_size: 8
}
default_model_filename: "model.plan"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "tensorrt"
runtime: ""


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/tensorrt/reshape_config_provided/config.pbtxt
================================================
name: "reshape_config_provided"
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 4,4 ]
    reshape: { shape: [ 2,2,4 ] }
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 2,2,4 ]
  }
]
input [
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 2 ]
    reshape: { shape: [ 1,2,1 ] }
  }
]
output [
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 1,2,1 ]
  }
]
input [
  {
    name: "INPUT2"
    data_type: TYPE_FP32
    dims: [ 2,2,3 ]
    reshape: { shape: [ 3,2,2 ] }
  }
]
output [
  {
    name: "OUTPUT2"
    data_type: TYPE_FP32
    dims: [ 3,2,2 ]
  }
]
input [
  {
    name: "INPUT3"
    data_type: TYPE_FP32
    dims: [ 1 ]
    reshape: { shape: [ 1,1,1 ] }
  }
]
output [
  {
    name: "OUTPUT3"
    data_type: TYPE_FP32
    dims: [ 1,1,1 ]
  }
]


================================================
FILE: qa/L0_model_config/autofill_noplatform_success/tensorrt/reshape_config_provided/expected
================================================
name: "reshape_config_provided"
platform: "tensorrt_plan"
version_policy {
  latest {
    num_versions: 1
  }
}
max_batch_size: 8
input {
  name: "INPUT0"
  data_type: TYPE_FP32
  dims: 4
  dims: 4
  reshape {
    shape: 2
    shape: 2
    shape: 4
  }
}
input {
  name: "INPUT1"
  data_type: TYPE_FP32
  dims: 2
  reshape {
    shape: 1
    shape: 2
    shape: 1
  }
}
input {
  name: "INPUT2"
  data_type: TYPE_FP32
  dims: 2
  dims: 2
  dims: 3
  reshape {
    shape: 3
    shape: 2
    shape: 2
  }
}
input {
  name: "INPUT3"
  data_type: TYPE_FP32
  dims: 1
  reshape {
    shape: 1
    shape: 1
    shape: 1
  }
}
output {
  name: "OUTPUT0"
  data_type: TYPE_FP32
  dims: 2
  dims: 2
  dims: 4
}
output {
  name: "OUTPUT1"
  data_type: TYPE_FP32
  dims: 1
  dims: 2
  dims: 1
}
output {
  name: "OUTPUT2"
  data_type: TYPE_FP32
  dims: 3
  dims: 2
  dims: 2
}
output {
  name: "OUTPUT3"
  data_type: TYPE_FP32
  dims: 1
  dims: 1
  dims: 1
}
instance_group {
  name: "reshape_config_provided"
  count: 1
  gpus: 0
  kind: KIND_GPU
}
dynamic_batching {
  preferred_batch_size: 8
}
default_model_filename: "model.plan"
optimization {
  input_pinned_memory {
    enable: true
  }
  output_pinned_memory {
    enable: true
  }
}
backend: "tensorrt"
runtime: ""


================================================
FILE: qa/L0_model_config/cli_messages/cli_deprecation/expected
================================================
Warning: '--strict-model-config' has been deprecated! Please use '--disable-auto-complete-config' instead.

================================================
FILE: qa/L0_model_config/cli_messages/cli_override/expected
================================================
Warning: Overriding deprecated '--strict-model-config' from False to True in favor of '--disable-auto-complete-config'!

================================================
FILE: qa/L0_model_config/compare_status.py
================================================
#!/usr/bin/env python3

# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import json
import os
import sys

import tritonclient.grpc as grpcclient
import tritonclient.grpc.model_config_pb2 as mc
import tritonclient.http as httpclient
from google.protobuf import json_format, text_format
from tritonclient.utils import *

FLAGS = None

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--expected_dir",
        type=str,
        required=True,
        help="Directory containing expected output files",
    )
    parser.add_argument("--model", type=str, required=True, help="Model name")
    FLAGS, unparsed = parser.parse_known_args()

    for pair in [("localhost:8000", "http"), ("localhost:8001", "grpc")]:
        model_name = FLAGS.model
        if pair[1] == "http":
            triton_client = httpclient.InferenceServerClient(url=pair[0], verbose=False)
            model_config = triton_client.get_model_config(model_name)
        else:
            triton_client = grpcclient.InferenceServerClient(url=pair[0], verbose=False)
            model_config = triton_client.get_model_config(model_name)

        nonmatch = list()
        expected_files = [
            f
            for f in os.listdir(FLAGS.expected_dir)
            if (
                os.path.isfile(os.path.join(FLAGS.expected_dir, f))
                and (f.startswith("expected"))
            )
        ]
        for efile in expected_files:
            with open(os.path.join(FLAGS.expected_dir, efile)) as f:
                config = text_format.Parse(f.read(), mc.ModelConfig())

            if pair[1] == "http":
                config_json = json.loads(
                    json_format.MessageToJson(config, preserving_proto_field_name=True)
                )
                if config_json == model_config:
                    sys.exit(0)
            else:
                if config == model_config.config:
                    sys.exit(0)

        nonmatch.append(config)

    print("Model config doesn't match any expected output:")
    print("Model config:")
    print(model_config)
    for nm in nonmatch:
        print("Non-matching:")
        print(nm)

    sys.exit(1)


================================================
FILE: qa/L0_model_config/custom_parameters/tensorrt/invalid/allocation_strategy_invalid_value/expected
================================================
failed to load 'allocation_strategy_invalid_value' version 1: Invalid argument: Invalid value for 'execution_context_allocation_strategy': 'UNKNOWN' for model instance 'allocation_strategy_invalid_value'


================================================
FILE: qa/L0_model_config/custom_parameters/tensorrt/invalid/allocation_strategy_invalid_value/partial.pbtxt
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
parameters: {
  key: "execution_context_allocation_strategy"
  value: {
    string_value: "UNKNOWN"
  }
}


================================================
FILE: qa/L0_model_config/custom_parameters/tensorrt/valid/allocation_strategy_no_key/partial.pbtxt
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
parameters: {}


================================================
FILE: qa/L0_model_config/custom_parameters/tensorrt/valid/allocation_strategy_no_parameters/partial.pbtxt
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: qa/L0_model_config/custom_parameters/tensorrt/valid/allocation_strategy_value_1/expected
================================================
'execution_context_allocation_strategy' set to 'STATIC' for model instance 'allocation_strategy_value_1'


================================================
FILE: qa/L0_model_config/custom_parameters/tensorrt/valid/allocation_strategy_value_1/partial.pbtxt
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
parameters: {
  key: "execution_context_allocation_strategy"
  value: {
    string_value: "STATIC"
  }
}


================================================
FILE: qa/L0_model_config/custom_parameters/tensorrt/valid/allocation_strategy_value_2/expected
================================================
'execution_context_allocation_strategy' set to 'ON_PROFILE_CHANGE' for model instance 'allocation_strategy_value_2'


================================================
FILE: qa/L0_model_config/custom_parameters/tensorrt/valid/allocation_strategy_value_2/partial.pbtxt
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
parameters: {
  key: "execution_context_allocation_strategy"
  value: {
    string_value: "ON_PROFILE_CHANGE"
  }
}


================================================
FILE: qa/L0_model_config/model_metrics/invalid_config/empty_buckets/expected
================================================
histogram options must specify non-empty 'buckets'


================================================
FILE: qa/L0_model_config/model_metrics/invalid_config/empty_buckets/partial.pbtxt
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
model_metrics {
  metric_control: [
    {
      metric_identifier: {
        family: "nv_inference_first_response_histogram_ms"
      }
      histogram_options: {
        buckets: []
      }
    }
  ]
}


================================================
FILE: qa/L0_model_config/model_metrics/invalid_config/empty_metric_family/expected
================================================
metric identifier must specify non-empty 'family'


================================================
FILE: qa/L0_model_config/model_metrics/invalid_config/empty_metric_family/partial.pbtxt
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
model_metrics {
  metric_control: [
    {
      metric_identifier: {
        family: ""
      }
      histogram_options: {
        buckets: [ 1, 2, 4, 8 ]
      }
    }
  ]
}


================================================
FILE: qa/L0_model_config/model_metrics/invalid_config/no_buckets/expected
================================================
histogram options must specify non-empty 'buckets'


================================================
FILE: qa/L0_model_config/model_metrics/invalid_config/no_buckets/partial.pbtxt
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
model_metrics {
  metric_control: [
    {
      metric_identifier: {
        family: "nv_inference_first_response_histogram_ms"
      }
      histogram_options: {}
    }
  ]
}


================================================
FILE: qa/L0_model_config/model_metrics/invalid_config/no_histogram_options/expected
================================================
metric control must specify 'histogram_options'


================================================
FILE: qa/L0_model_config/model_metrics/invalid_config/no_histogram_options/partial.pbtxt
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
model_metrics {
  metric_control: [
    {
      metric_identifier: {
        family: "nv_inference_first_response_histogram_ms"
      }
    }
  ]
}


================================================
FILE: qa/L0_model_config/model_metrics/invalid_config/no_metric_family/expected
================================================
metric identifier must specify non-empty 'family'


================================================
FILE: qa/L0_model_config/model_metrics/invalid_config/no_metric_family/partial.pbtxt
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
model_metrics {
  metric_control: [
    {
      metric_identifier: {}
      histogram_options: {
        buckets: [ 1, 2, 4, 8 ]
      }
    }
  ]
}


================================================
FILE: qa/L0_model_config/model_metrics/invalid_config/no_metric_identifier/expected
================================================
metric control must specify 'metric_identifier'


================================================
FILE: qa/L0_model_config/model_metrics/invalid_config/no_metric_identifier/partial.pbtxt
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
model_metrics {
  metric_control: [
    {
      histogram_options: {
        buckets: [ 1, 2, 4, 8 ]
      }
    }
  ]
}


================================================
FILE: qa/L0_model_config/model_metrics/valid_config/valid_model_metrics/partial.pbtxt
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
model_metrics {
  metric_control: [
    {
      metric_identifier: {
        family: "nv_inference_first_response_histogram_ms"
      }
      histogram_options: {
        buckets: [ 1, 2, 4, 8]
      }
    }
  ]
}


================================================
FILE: qa/L0_model_config/model_metrics/valid_config_with_warn/unknown_metric_family/expected
================================================
Metric family 'nv_inference_request_success' in 'metric_identifier' is not a customizable metric in Triton core.


================================================
FILE: qa/L0_model_config/model_metrics/valid_config_with_warn/unknown_metric_family/partial.pbtxt
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
model_metrics {
  metric_control: [
    {
      metric_identifier: {
        family: "nv_inference_request_success"
      }
      histogram_options: {
        buckets: [ 1, 2, 4, 8]
      }
    }
  ]
}


================================================
FILE: qa/L0_model_config/noautofill_platform/batch_input_less_source0/config.pbtxt
================================================
name: "batch_input_less_source0"
input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
batch_input [
  {
    kind: BATCH_ELEMENT_COUNT
    target_name: "BATCH_INPUT"
    data_type: TYPE_FP32
  }
]

================================================
FILE: qa/L0_model_config/noautofill_platform/batch_input_less_source0/expected
================================================
batch input kind 'BATCH_ELEMENT_COUNT' expects 1 source input, got 0


================================================
FILE: qa/L0_model_config/noautofill_platform/batch_input_less_source0/expected_unsupported
================================================
batch inputs and batch outputs are only supported for custom platform and TensorRT platform


================================================
FILE: qa/L0_model_config/noautofill_platform/batch_input_less_source1/config.pbtxt
================================================
name: "batch_input_less_source1"
input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
batch_input [
  {
    kind: BATCH_ACCUMULATED_ELEMENT_COUNT
    target_name: "BATCH_INPUT"
    data_type: TYPE_FP32
  }
]

================================================
FILE: qa/L0_model_config/noautofill_platform/batch_input_less_source1/expected
================================================
batch input kind 'BATCH_ACCUMULATED_ELEMENT_COUNT' expects 1 source input, got 0


================================================
FILE: qa/L0_model_config/noautofill_platform/batch_input_less_source1/expected_unsupported
================================================
batch inputs and batch outputs are only supported for custom platform and TensorRT platform


================================================
FILE: qa/L0_model_config/noautofill_platform/batch_input_less_source2/config.pbtxt
================================================
name: "batch_input_less_source2"
input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
batch_input [
  {
    kind: BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO
    target_name: "BATCH_INPUT"
    data_type: TYPE_FP32
  }
]

================================================
FILE: qa/L0_model_config/noautofill_platform/batch_input_less_source2/expected
================================================
batch input kind 'BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO' expects 1 source input, got 0


================================================
FILE: qa/L0_model_config/noautofill_platform/batch_input_less_source2/expected_unsupported
================================================
batch inputs and batch outputs are only supported for custom platform and TensorRT platform


================================================
FILE: qa/L0_model_config/noautofill_platform/batch_input_less_source3/config.pbtxt
================================================
name: "batch_input_less_source3"
input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
batch_input [
  {
    kind: BATCH_MAX_ELEMENT_COUNT_AS_SHAPE
    target_name: "BATCH_INPUT"
    data_type: TYPE_FP32
  }
]

================================================
FILE: qa/L0_model_config/noautofill_platform/batch_input_less_source3/expected
================================================
batch input kind 'BATCH_MAX_ELEMENT_COUNT_AS_SHAPE' expects 1 source input, got 0


================================================
FILE: qa/L0_model_config/noautofill_platform/batch_input_less_source3/expected_unsupported
================================================
batch inputs and batch outputs are only supported for custom platform and TensorRT platform


================================================
FILE: qa/L0_model_config/noautofill_platform/batch_input_many_source0/config.pbtxt
================================================
name: "batch_input_many_source0"
input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
batch_input [
  {
    kind: BATCH_ELEMENT_COUNT
    target_name: "BATCH_AND_SIZE_INPUT"
    data_type: TYPE_FP32
    source_input: ["INPUT", "INPUT"]
  }
]

================================================
FILE: qa/L0_model_config/noautofill_platform/batch_input_many_source0/expected
================================================
batch input kind 'BATCH_ELEMENT_COUNT' expects 1 source input, got 2


================================================
FILE: qa/L0_model_config/noautofill_platform/batch_input_many_source0/expected_unsupported
================================================
batch inputs and batch outputs are only supported for custom platform and TensorRT platform


================================================
FILE: qa/L0_model_config/noautofill_platform/batch_input_many_source1/config.pbtxt
================================================
name: "batch_input_many_source1"
input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
batch_input [
  {
    kind: BATCH_ACCUMULATED_ELEMENT_COUNT
    target_name: "BATCH_AND_SIZE_INPUT"
    data_type: TYPE_FP32
    source_input: ["INPUT", "INPUT"]
  }
]

================================================
FILE: qa/L0_model_config/noautofill_platform/batch_input_many_source1/expected
================================================
batch input kind 'BATCH_ACCUMULATED_ELEMENT_COUNT' expects 1 source input, got 2


================================================
FILE: qa/L0_model_config/noautofill_platform/batch_input_many_source1/expected_unsupported
================================================
batch inputs and batch outputs are only supported for custom platform and TensorRT platform


================================================
FILE: qa/L0_model_config/noautofill_platform/batch_input_many_source2/config.pbtxt
================================================
name: "batch_input_many_source2"
input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
batch_input [
  {
    kind: BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO
    target_name: "BATCH_AND_SIZE_INPUT"
    data_type: TYPE_FP32
    source_input: ["INPUT", "INPUT"]
  }
]

================================================
FILE: qa/L0_model_config/noautofill_platform/batch_input_many_source2/expected
================================================
batch input kind 'BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO' expects 1 source input, got 2


================================================
FILE: qa/L0_model_config/noautofill_platform/batch_input_many_source2/expected_unsupported
================================================
batch inputs and batch outputs are only supported for custom platform and TensorRT platform


================================================
FILE: qa/L0_model_config/noautofill_platform/batch_input_many_source3/config.pbtxt
================================================
name: "batch_input_many_source3"
input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
batch_input [
  {
    kind: BATCH_MAX_ELEMENT_COUNT_AS_SHAPE
    target_name: "BATCH_AND_SIZE_INPUT"
    data_type: TYPE_FP32
    source_input: ["INPUT", "INPUT"]
  }
]

================================================
FILE: qa/L0_model_config/noautofill_platform/batch_input_many_source3/expected
================================================
batch input kind 'BATCH_MAX_ELEMENT_COUNT_AS_SHAPE' expects 1 source input, got 2


================================================
FILE: qa/L0_model_config/noautofill_platform/batch_input_many_source3/expected_unsupported
================================================
batch inputs and batch outputs are only supported for custom platform and TensorRT platform


================================================
FILE: qa/L0_model_config/noautofill_platform/batch_input_unknown_source/config.pbtxt
================================================
name: "batch_input_unknown_source"
input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
batch_input [
  {
    kind: BATCH_ELEMENT_COUNT
    target_name: "BATCH_INPUT"
    data_type: TYPE_FP32
    source_input: "UNKNOWN_INPUT"
  }
]

================================================
FILE: qa/L0_model_config/noautofill_platform/batch_input_unknown_source/expected
================================================
unknown source input name 'UNKNOWN_INPUT'


================================================
FILE: qa/L0_model_config/noautofill_platform/batch_input_unknown_source/expected_unsupported
================================================
batch inputs and batch outputs are only supported for custom platform and TensorRT platform


================================================
FILE: qa/L0_model_config/noautofill_platform/batch_output_duplicated_target/config.pbtxt
================================================
name: "batch_output_duplicated_target"
input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
batch_output [
  {
    target_name: ["OUTPUT", "OUTPUT"]
    kind: BATCH_SCATTER_WITH_INPUT_SHAPE
    source_input: "INPUT"
  }
]

================================================
FILE: qa/L0_model_config/noautofill_platform/batch_output_duplicated_target/expected
================================================
target output name 'OUTPUT' can only be specified once


================================================
FILE: qa/L0_model_config/noautofill_platform/batch_output_duplicated_target/expected_unsupported
================================================
batch inputs and batch outputs are only supported for custom platform and TensorRT platform


================================================
FILE: qa/L0_model_config/noautofill_platform/batch_output_less_source/config.pbtxt
================================================
name: "batch_output_less_source"
input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
batch_output [
  {
    target_name: "OUTPUT"
    kind: BATCH_SCATTER_WITH_INPUT_SHAPE
  }
]

================================================
FILE: qa/L0_model_config/noautofill_platform/batch_output_less_source/expected
================================================
batch output kind 'BATCH_SCATTER_WITH_INPUT_SHAPE' expects 1 source input, got 0


================================================
FILE: qa/L0_model_config/noautofill_platform/batch_output_less_source/expected_unsupported
================================================
batch inputs and batch outputs are only supported for custom platform and TensorRT platform


================================================
FILE: qa/L0_model_config/noautofill_platform/batch_output_many_source/config.pbtxt
================================================
name: "batch_output_many_source"
input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
batch_output [
  {
    target_name: "OUTPUT"
    kind: BATCH_SCATTER_WITH_INPUT_SHAPE
    source_input: ["INPUT", "INPUT"]
  }
]

================================================
FILE: qa/L0_model_config/noautofill_platform/batch_output_many_source/expected
================================================
batch output kind 'BATCH_SCATTER_WITH_INPUT_SHAPE' expects 1 source input, got 2


================================================
FILE: qa/L0_model_config/noautofill_platform/batch_output_many_source/expected_unsupported
================================================
batch inputs and batch outputs are only supported for custom platform and TensorRT platform


================================================
FILE: qa/L0_model_config/noautofill_platform/batch_output_unknown_source/config.pbtxt
================================================
name: "batch_output_unknown_source"
input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
batch_output [
  {
    target_name: "OUTPUT"
    kind: BATCH_SCATTER_WITH_INPUT_SHAPE
    source_input: "UNKNOWN_INPUT"
  }
]

================================================
FILE: qa/L0_model_config/noautofill_platform/batch_output_unknown_source/expected
================================================
unknown source input name 'UNKNOWN_INPUT'


================================================
FILE: qa/L0_model_config/noautofill_platform/batch_output_unknown_source/expected_unsupported
================================================
batch inputs and batch outputs are only supported for custom platform and TensorRT platform


================================================
FILE: qa/L0_model_config/noautofill_platform/batch_output_unknown_target/config.pbtxt
================================================
name: "batch_output_unknown_target"
input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
batch_output [
  {
    target_name: "UNKNOWN_OUTPUT"
    kind: BATCH_SCATTER_WITH_INPUT_SHAPE
    source_input: "INPUT"
  }
]

================================================
FILE: qa/L0_model_config/noautofill_platform/batch_output_unknown_target/expected
================================================
unknown target output name 'UNKNOWN_OUTPUT'


================================================
FILE: qa/L0_model_config/noautofill_platform/batch_output_unknown_target/expected_unsupported
================================================
batch inputs and batch outputs are only supported for custom platform and TensorRT platform


================================================
FILE: qa/L0_model_config/noautofill_platform/control_kind_end_multiple/config.pbtxt
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
name: "control_kind_end_multiple"
platform: "onnxruntime"
max_batch_size: 8
sequence_batching {
  control_input [
    {
      name: "START"
      control [
        {
          kind: CONTROL_SEQUENCE_START
          int32_false_true: [ 0, 1 ]
        }
      ]
    },
    {
      name: "READY"
      control [
        {
          kind: CONTROL_SEQUENCE_READY
          int32_false_true: [ 0, 1 ]
        }
      ]
    },
    {
      name: "END0"
      control [
        {
          kind: CONTROL_SEQUENCE_END
          int32_false_true: [ 0, 1 ]
        }
      ]
    },
    {
      name: "END1"
      control [
        {
          kind: CONTROL_SEQUENCE_END
          int32_false_true: [ 0, 1 ]
        }
      ]
    }
  ]
}
input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/control_kind_end_multiple/expected
================================================
sequence batching specifies multiple CONTROL_SEQUENCE_END tensors for control_kind_end_multiple

================================================
FILE: qa/L0_model_config/noautofill_platform/control_kind_end_multiple/expected_ensemble
================================================
ensemble scheduling must be set for ensemble control_kind_end_multiple whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/control_kind_ready_multiple/config.pbtxt
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
name: "control_kind_ready_multiple"
platform: "onnxruntime"
max_batch_size: 8
sequence_batching {
  control_input [
    {
      name: "START"
      control [
        {
          kind: CONTROL_SEQUENCE_START
          int32_false_true: [ 0, 1 ]
        }
      ]
    },
    {
      name: "READY0"
      control [
        {
          kind: CONTROL_SEQUENCE_READY
          int32_false_true: [ 0, 1 ]
        }
      ]
    },
    {
      name: "READY1"
      control [
        {
          kind: CONTROL_SEQUENCE_READY
          int32_false_true: [ 0, 1 ]
        }
      ]
    }
  ]
}
input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/control_kind_ready_multiple/expected
================================================
sequence batching specifies multiple CONTROL_SEQUENCE_READY tensors for control_kind_ready_multiple

================================================
FILE: qa/L0_model_config/noautofill_platform/control_kind_ready_multiple/expected_ensemble
================================================
ensemble scheduling must be set for ensemble control_kind_ready_multiple whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/control_kind_start_multiple/config.pbtxt
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
name: "control_kind_start_multiple"
platform: "onnxruntime"
max_batch_size: 8
sequence_batching {
  control_input [
    {
      name: "START"
      control [
        {
          kind: CONTROL_SEQUENCE_START
          int32_false_true: [ 0, 1 ]
        }
      ]
    },
    {
      name: "READY"
      control [
        {
          kind: CONTROL_SEQUENCE_START
          int32_false_true: [ 0, 1 ]
        }
      ]
    }
  ]
}
input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/control_kind_start_multiple/expected
================================================
sequence batching specifies multiple CONTROL_SEQUENCE_START tensors for control_kind_start_multiple

================================================
FILE: qa/L0_model_config/noautofill_platform/control_kind_start_multiple/expected_ensemble
================================================
ensemble scheduling must be set for ensemble control_kind_start_multiple whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/control_tensor_multiple/config.pbtxt
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
name: "control_tensor_multiple"
platform: "onnxruntime"
max_batch_size: 8
sequence_batching {
  control_input [
    {
      name: "START"
      control [
        {
          kind: CONTROL_SEQUENCE_START
          int32_false_true: [ 0, 1 ]
        }
      ]
    },
    {
      name: "START"
      control [
        {
          kind: CONTROL_SEQUENCE_READY
          int32_false_true: [ 0, 1 ]
        }
      ]
    }
  ]
}
input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/control_tensor_multiple/expected
================================================
sequence batching control tensor 'START' is specified for multiple control kinds for control_tensor_multiple

================================================
FILE: qa/L0_model_config/noautofill_platform/control_tensor_multiple/expected_ensemble
================================================
ensemble scheduling must be set for ensemble control_tensor_multiple whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/control_tensor_no_value/config.pbtxt
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
name: "control_tensor_no_value"
platform: "onnxruntime"
max_batch_size: 8
sequence_batching {
  control_input [
    {
      name: "START"
      control [
        {
          kind: CONTROL_SEQUENCE_START
          int32_false_true: [ 0, 1 ]
        }
      ]
    },
    {
      name: "READY"
      control [
        {
          kind: CONTROL_SEQUENCE_READY
        }
      ]
    }
  ]
}
input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/control_tensor_no_value/expected
================================================
sequence batching must specify either 'int32_false_true', 'fp32_false_true' or 'bool_false_true' for CONTROL_SEQUENCE_READY for control_tensor_no_value

================================================
FILE: qa/L0_model_config/noautofill_platform/control_tensor_no_value/expected_ensemble
================================================
ensemble scheduling must be set for ensemble control_tensor_no_value whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/default_priority_level0/config.pbtxt
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
name: "default_priority_level0"
platform: "onnxruntime"
max_batch_size: 8
dynamic_batching {
  priority_levels: 3
}
input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/default_priority_level0/expected
================================================
default priority level must be in range \[1, 3\] for default_priority_level0

================================================
FILE: qa/L0_model_config/noautofill_platform/default_priority_level0/expected_ensemble
================================================
ensemble scheduling must be set for ensemble default_priority_level0 whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/default_priority_level1/config.pbtxt
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
name: "default_priority_level1"
platform: "onnxruntime"
max_batch_size: 8
dynamic_batching {
  priority_levels: 3
  default_priority_level: 5
}
input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/default_priority_level1/expected
================================================
default priority level must be in range \[1, 3\] for default_priority_level1

================================================
FILE: qa/L0_model_config/noautofill_platform/default_priority_level1/expected_ensemble
================================================
ensemble scheduling must be set for ensemble default_priority_level1 whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/ensemble_scheduling_set/config.pbtxt
================================================
name: "ensemble_scheduling_set"
max_batch_size: 8
ensemble_scheduling {
  step [
    {
      model_name: "model_a"
      model_version: -1
      input_map {
        key: "model_a_input"
        value: "data"
      }
      output_map {
        key: "model_a_output"
        value: "prob"
      }
    }
  ]
}
input [
  {
    name: "data"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [ 1, 28, 28 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 10, 1, 1 ]
  }
]
instance_group [
  {
    kind: KIND_GPU
    gpus: [ 42 ]
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/ensemble_scheduling_set/expected
================================================
ensemble scheduling cannot be set for model 'ensemble_scheduling_set' whose platform is not ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/invalid_cpu/config.pbtxt
================================================
name: "invalid_cpu"
max_batch_size: 8
input [
  {
    name: "data"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [ 1, 28, 28 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 10, 1, 1 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
    gpus: [ 0 ]
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/invalid_cpu/expected
================================================
instance group invalid_cpu_0 of model invalid_cpu has kind KIND_CPU but specifies one or more GPU

================================================
FILE: qa/L0_model_config/noautofill_platform/invalid_cpu/expected_ensemble
================================================
ensemble scheduling must be set for ensemble invalid_cpu whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/invalid_gpu/config.pbtxt
================================================
name: "invalid_gpu"
max_batch_size: 8
input [
  {
    name: "data"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [ 1, 28, 28 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 10, 1, 1 ]
  }
]
instance_group [
  {
    kind: KIND_GPU
    gpus: [ 42 ]
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/invalid_gpu/expected
================================================
instance group invalid_gpu_0 of model invalid_gpu specifies invalid or unsupported gpu id 42. GPUs with at least the minimum required CUDA compute compatibility

================================================
FILE: qa/L0_model_config/noautofill_platform/invalid_gpu/expected_ensemble
================================================
ensemble scheduling must be set for ensemble invalid_gpu whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/missing_datatype/config.pbtxt
================================================
name: "missing_datatype"
max_batch_size: 4
input [
  {
    name: "input"
    dims: [ 2, -1 ]
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/missing_datatype/expected
================================================
model input 'input' must specify 'data_type'

================================================
FILE: qa/L0_model_config/noautofill_platform/missing_datatype/expected_ensemble
================================================
ensemble scheduling must be set for ensemble missing_datatype whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/negative_gpu/config.pbtxt
================================================
name: "negative_gpu"
max_batch_size: 1
input [
  {
    name: "data"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [ 1, 28, 28 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 10, 1, 1 ]
  }
]
instance_group [
  {
    gpus: [ 0 ]
  }
]
instance_group [
  {
    kind: KIND_GPU
    gpus: [ -1 ]
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/negative_gpu/expected
================================================
instance group negative_gpu_1 of model negative_gpu specifies invalid or unsupported gpu id -1. GPUs with at least the minimum required CUDA compute compatibility

================================================
FILE: qa/L0_model_config/noautofill_platform/negative_gpu/expected_ensemble
================================================
ensemble scheduling must be set for ensemble negative_gpu whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/negative_max_batch_size/config.pbtxt
================================================
name: "negative_max_batch_size"
max_batch_size: -2
input [
  {
    name: "data"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [ 1, 28, 28 ]
  }
]
output [
  {
    name: "prob"
    data_type: TYPE_FP32
    dims: [ 10, 1, 1 ]
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/negative_max_batch_size/expected
================================================
'max_batch_size' must be non-negative value for negative_max_batch_size

================================================
FILE: qa/L0_model_config/noautofill_platform/negative_max_batch_size/expected_ensemble
================================================
ensemble scheduling must be set for ensemble negative_max_batch_size whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/preserve_ordering0/config.pbtxt
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
name: "preserve_ordering0"
platform: "onnxruntime"
max_batch_size: 8
dynamic_batching {
  preserve_ordering: true
  priority_levels: 3
  default_priority_level: 2
  priority_queue_policy {
    key: 1
    value: {
    }
  }
}
input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/preserve_ordering0/expected
================================================
Only one priority level is allowed when 'preserve_ordering' is true for preserve_ordering0

================================================
FILE: qa/L0_model_config/noautofill_platform/preserve_ordering0/expected_ensemble
================================================
ensemble scheduling must be set for ensemble preserve_ordering0 whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/preserve_ordering1/config.pbtxt
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
name: "preserve_ordering1"
platform: "onnxruntime"
max_batch_size: 8
dynamic_batching {
  preserve_ordering: true
  default_queue_policy {
    timeout_action: DELAY
    default_timeout_microseconds: 1000
  }
}
input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/preserve_ordering1/expected
================================================
Queue policy can not have DELAY as timeout action when 'preserve_ordering' is true for preserve_ordering1

================================================
FILE: qa/L0_model_config/noautofill_platform/preserve_ordering1/expected_ensemble
================================================
ensemble scheduling must be set for ensemble preserve_ordering1 whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/preserve_ordering2/config.pbtxt
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
name: "preserve_ordering2"
platform: "onnxruntime"
max_batch_size: 8
dynamic_batching {
  preserve_ordering: true
  priority_levels: 1
  default_priority_level: 1
  priority_queue_policy {
    key: 1
    value: {
      timeout_action: DELAY
      default_timeout_microseconds: 1000
    }
  }
}
input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/preserve_ordering2/expected
================================================
Queue policy can not have DELAY as timeout action when 'preserve_ordering' is true for preserve_ordering2

================================================
FILE: qa/L0_model_config/noautofill_platform/preserve_ordering2/expected_ensemble
================================================
ensemble scheduling must be set for ensemble preserve_ordering2 whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/priority_level0/config.pbtxt
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
name: "priority_level0"
platform: "onnxruntime"
max_batch_size: 8
dynamic_batching {
  priority_levels: 3
  default_priority_level: 2
  priority_queue_policy {
    key: 0
    value: {
    }
  }
}
input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/priority_level0/expected
================================================
priority queue policy must have priority level in range \[1, 3\] for priority_level0

================================================
FILE: qa/L0_model_config/noautofill_platform/priority_level0/expected_ensemble
================================================
ensemble scheduling must be set for ensemble priority_level0 whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/priority_level1/config.pbtxt
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
name: "priority_level1"
platform: "onnxruntime"
max_batch_size: 8
dynamic_batching {
  priority_levels: 3
  default_priority_level: 2
  priority_queue_policy {
    key: 4
    value: {
    }
  }
}
input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/priority_level1/expected
================================================
priority queue policy must have priority level in range \[1, 3\] for priority_level1

================================================
FILE: qa/L0_model_config/noautofill_platform/priority_level1/expected_ensemble
================================================
ensemble scheduling must be set for ensemble priority_level1 whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_elementcount0/config.pbtxt
================================================
name: "reshape_elementcount0"
max_batch_size: 4
input [
  {
    name: "input"
    data_type: TYPE_FP32
    dims: [ 1, 3, 2 ]
    reshape { shape: [ 5 ] }
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_elementcount0/expected
================================================
model input 'input' has different size for dims and reshape for reshape_elementcount0

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_elementcount0/expected_ensemble
================================================
ensemble scheduling must be set for ensemble reshape_elementcount0 whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_elementcount1/config.pbtxt
================================================
name: "reshape_elementcount1"
max_batch_size: 4
input [
  {
    name: "input"
    data_type: TYPE_FP32
    dims: [ 15 ]
    reshape { shape: [ 2, 1, 5 ] }
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_elementcount1/expected
================================================
model input 'input' has different size for dims and reshape for reshape_elementcount1

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_elementcount1/expected_ensemble
================================================
ensemble scheduling must be set for ensemble reshape_elementcount1 whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_elementcount2/config.pbtxt
================================================
name: "reshape_elementcount2"
max_batch_size: 4
input [
  {
    name: "input"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [ 1, 3, 2 ]
    reshape { shape: [ 3 ] }
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_elementcount2/expected
================================================
model output 'output' has different size for dims and reshape for reshape_elementcount2

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_elementcount2/expected_ensemble
================================================
ensemble scheduling must be set for ensemble reshape_elementcount2 whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_elementcount3/config.pbtxt
================================================
name: "reshape_elementcount3"
max_batch_size: 4
input [
  {
    name: "input"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [ 15 ]
    reshape { shape: [ 3, 2, 5 ] }
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_elementcount3/expected
================================================
model output 'output' has different size for dims and reshape for reshape_elementcount3

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_elementcount3/expected_ensemble
================================================
ensemble scheduling must be set for ensemble reshape_elementcount3 whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_nobatch_empty0/config.pbtxt
================================================
name: "reshape_nobatch_empty0"
max_batch_size: 0
input [
  {
    name: "input"
    data_type: TYPE_FP32
    dims: [ 1 ]
    reshape { shape: [ ] }
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_nobatch_empty0/expected
================================================
model input 'input' cannot have empty reshape for non-batching model as scalar tensors are not supported for reshape_nobatch_empty0


================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_nobatch_empty0/expected_ensemble
================================================
ensemble scheduling must be set for ensemble reshape_nobatch_empty0 whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_nobatch_empty1/config.pbtxt
================================================
name: "reshape_nobatch_empty1"
max_batch_size: 0
input [
  {
    name: "input"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [ 1 ]
    reshape { shape: [ ] }
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_nobatch_empty1/expected
================================================
model output 'output' cannot have empty reshape for non-batching model as scalar tensors are not supported for reshape_nobatch_empty1


================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_nobatch_empty1/expected_ensemble
================================================
ensemble scheduling must be set for ensemble reshape_nobatch_empty1 whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_nobatch_variable0/config.pbtxt
================================================
name: "reshape_nobatch_variable0"
max_batch_size: 0
input [
  {
    name: "input"
    data_type: TYPE_FP32
    dims: [ 2, -1 ]
    reshape { shape: [ 2, 2 ] }
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_nobatch_variable0/expected
================================================
model input 'input' has different size for dims and reshape for reshape_nobatch_variable0

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_nobatch_variable0/expected_ensemble
================================================
ensemble scheduling must be set for ensemble reshape_nobatch_variable0 whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_nobatch_variable1/config.pbtxt
================================================
name: "reshape_nobatch_variable1"
max_batch_size: 0
input [
  {
    name: "input"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [ 2, -1 ]
    reshape { shape: [ 2, 2 ] }
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_nobatch_variable1/expected
================================================
model output 'output' has different size for dims and reshape for reshape_nobatch_variable1

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_nobatch_variable1/expected_ensemble
================================================
ensemble scheduling must be set for ensemble reshape_nobatch_variable1 whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_nobatch_variable2/config.pbtxt
================================================
name: "reshape_nobatch_variable2"
max_batch_size: 0
input [
  {
    name: "input"
    data_type: TYPE_FP32
    dims: [ 2, -1 ]
    reshape { shape: [ 2, -1, 2 ] }
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_nobatch_variable2/expected
================================================
model input 'input' has different size for dims and reshape for reshape_nobatch_variable2

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_nobatch_variable2/expected_ensemble
================================================
ensemble scheduling must be set for ensemble reshape_nobatch_variable2 whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_nobatch_variable3/config.pbtxt
================================================
name: "reshape_nobatch_variable3"
max_batch_size: 0
input [
  {
    name: "input"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [ 2, -1 ]
    reshape { shape: [ 1, -1, 2 ] }
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_nobatch_variable3/expected
================================================
model output 'output' has different size for dims and reshape for reshape_nobatch_variable3

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_nobatch_variable3/expected_ensemble
================================================
ensemble scheduling must be set for ensemble reshape_nobatch_variable3 whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_nobatch_variable4/config.pbtxt
================================================
name: "reshape_nobatch_variable4"
max_batch_size: 0
input [
  {
    name: "input"
    data_type: TYPE_FP32
    dims: [ 2, -1 ]
    reshape { shape: [ 2, -1, -1 ] }
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_nobatch_variable4/expected
================================================
model input 'input' has different number of variable-size dimensions for dims and reshape for reshape_nobatch_variable4

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_nobatch_variable4/expected_ensemble
================================================
ensemble scheduling must be set for ensemble reshape_nobatch_variable4 whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_nobatch_variable5/config.pbtxt
================================================
name: "reshape_nobatch_variable5"
max_batch_size: 0
input [
  {
    name: "input"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [ 2, -1 ]
    reshape { shape: [ 2, -1, -1 ] }
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_nobatch_variable5/expected
================================================
model output 'output' has different number of variable-size dimensions for dims and reshape for reshape_nobatch_variable5

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_nobatch_variable5/expected_ensemble
================================================
ensemble scheduling must be set for ensemble reshape_nobatch_variable5 whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_nobatch_zerodims0/config.pbtxt
================================================
name: "reshape_nobatch_zerodims0"
max_batch_size: 0
input [
  {
    name: "input"
    data_type: TYPE_FP32
    dims: [ 2, 2 ]
    reshape { shape: [ 0 ] }
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_nobatch_zerodims0/expected
================================================
model input 'input' reshape dimensions must be integer >= 1, or -1 to indicate a variable-size dimension for reshape_nobatch_zerodims0

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_nobatch_zerodims0/expected_ensemble
================================================
ensemble scheduling must be set for ensemble reshape_nobatch_zerodims0 whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_nobatch_zerodims1/config.pbtxt
================================================
name: "reshape_nobatch_zerodims1"
max_batch_size: 0
input [
  {
    name: "input"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [ 2, 2 ]
    reshape { shape: [ 0 ] }
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_nobatch_zerodims1/expected
================================================
model output 'output' reshape dimensions must be integer >= 1, or -1 to indicate a variable-size dimension for reshape_nobatch_zerodims1

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_nobatch_zerodims1/expected_ensemble
================================================
ensemble scheduling must be set for ensemble reshape_nobatch_zerodims1 whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_variable0/config.pbtxt
================================================
name: "reshape_variable0"
max_batch_size: 4
input [
  {
    name: "input"
    data_type: TYPE_FP32
    dims: [ 2, -1 ]
    reshape { shape: [ 2, 2 ] }
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_variable0/expected
================================================
model input 'input' has different size for dims and reshape for reshape_variable0

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_variable0/expected_ensemble
================================================
ensemble scheduling must be set for ensemble reshape_variable0 whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_variable1/config.pbtxt
================================================
name: "reshape_variable1"
max_batch_size: 4
input [
  {
    name: "input"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [ 2, -1 ]
    reshape { shape: [ 2, 2 ] }
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_variable1/expected
================================================
model output 'output' has different size for dims and reshape for reshape_variable1

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_variable1/expected_ensemble
================================================
ensemble scheduling must be set for ensemble reshape_variable1 whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_variable2/config.pbtxt
================================================
name: "reshape_variable2"
max_batch_size: 4
input [
  {
    name: "input"
    data_type: TYPE_FP32
    dims: [ 2, -1 ]
    reshape { shape: [ 2, -1, 2 ] }
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_variable2/expected
================================================
model input 'input' has different size for dims and reshape for reshape_variable2

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_variable2/expected_ensemble
================================================
ensemble scheduling must be set for ensemble reshape_variable2 whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_variable3/config.pbtxt
================================================
name: "reshape_variable3"
max_batch_size: 4
input [
  {
    name: "input"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [ 2, -1 ]
    reshape { shape: [ 1, -1, 2 ] }
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_variable3/expected
================================================
model output 'output' has different size for dims and reshape for reshape_variable3

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_variable3/expected_ensemble
================================================
ensemble scheduling must be set for ensemble reshape_variable3 whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_variable4/config.pbtxt
================================================
name: "reshape_variable4"
max_batch_size: 4
input [
  {
    name: "input"
    data_type: TYPE_FP32
    dims: [ 2, -1 ]
    reshape { shape: [ 2, -1, -1 ] }
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_variable4/expected
================================================
model input 'input' has different number of variable-size dimensions for dims and reshape for reshape_variable4

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_variable4/expected_ensemble
================================================
ensemble scheduling must be set for ensemble reshape_variable4 whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_variable5/config.pbtxt
================================================
name: "reshape_variable5"
max_batch_size: 4
input [
  {
    name: "input"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [ 2, -1 ]
    reshape { shape: [ 2, -1, -1 ] }
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_variable5/expected
================================================
model output 'output' has different number of variable-size dimensions for dims and reshape for reshape_variable5

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_variable5/expected_ensemble
================================================
ensemble scheduling must be set for ensemble reshape_variable5 whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_zerodims0/config.pbtxt
================================================
name: "reshape_zerodims0"
max_batch_size: 4
input [
  {
    name: "input"
    data_type: TYPE_FP32
    dims: [ 2, 2 ]
    reshape { shape: [ 0 ] }
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_zerodims0/expected
================================================
model input 'input' reshape dimensions must be integer >= 1, or -1 to indicate a variable-size dimension for reshape_zerodims0

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_zerodims0/expected_ensemble
================================================
ensemble scheduling must be set for ensemble reshape_zerodims0 whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_zerodims1/config.pbtxt
================================================
name: "reshape_zerodims1"
max_batch_size: 4
input [
  {
    name: "input"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [ 2, 2 ]
    reshape { shape: [ 0 ] }
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_zerodims1/expected
================================================
model output 'output' reshape dimensions must be integer >= 1, or -1 to indicate a variable-size dimension for reshape_zerodims1

================================================
FILE: qa/L0_model_config/noautofill_platform/reshape_zerodims1/expected_ensemble
================================================
ensemble scheduling must be set for ensemble reshape_zerodims1 whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/zerodims_input0/config.pbtxt
================================================
name: "zerodims_input0"
max_batch_size: 8
input [
  {
    name: "input"
    data_type: TYPE_FP32
    dims: [ 1, 0, 28 ]
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/zerodims_input0/expected
================================================
model input 'input' dimension must be integer >= 1, or -1 to indicate a variable-size dimension for zerodims_input0

================================================
FILE: qa/L0_model_config/noautofill_platform/zerodims_input0/expected_ensemble
================================================
ensemble scheduling must be set for ensemble zerodims_input0 whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/zerodims_input1/config.pbtxt
================================================
name: "zerodims_input1"
max_batch_size: 8
input [
  {
    name: "input"
    data_type: TYPE_FP32
    dims: [ 0 ]
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/zerodims_input1/expected
================================================
model input 'input' dimension must be integer >= 1, or -1 to indicate a variable-size dimension for zerodims_input1

================================================
FILE: qa/L0_model_config/noautofill_platform/zerodims_input1/expected_ensemble
================================================
ensemble scheduling must be set for ensemble zerodims_input1 whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/zerodims_output0/config.pbtxt
================================================
name: "zerodims_output0"
max_batch_size: 8
input [
  {
    name: "input"
    data_type: TYPE_FP32
    dims: [ 1, 28 ]
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [ 1, 1, 0 ]
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/zerodims_output0/expected
================================================
model output 'output' dimension must be integer >= 1, or -1 to indicate a variable-size dimension for zerodims_output0

================================================
FILE: qa/L0_model_config/noautofill_platform/zerodims_output0/expected_ensemble
================================================
ensemble scheduling must be set for ensemble zerodims_output0 whose platform is ensemble

================================================
FILE: qa/L0_model_config/noautofill_platform/zerodims_output1/config.pbtxt
================================================
name: "zerodims_output1"
max_batch_size: 8
input [
  {
    name: "input"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [ 0 ]
  }
]


================================================
FILE: qa/L0_model_config/noautofill_platform/zerodims_output1/expected
================================================
model output 'output' dimension must be integer >= 1, or -1 to indicate a variable-size dimension for zerodims_output1

================================================
FILE: qa/L0_model_config/noautofill_platform/zerodims_output1/expected_ensemble
================================================
ensemble scheduling must be set for ensemble zerodims_output1 whose platform is ensemble

================================================
FILE: qa/L0_model_config/special_cases/invalid_platform/config.pbtxt
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
name: "invalid_platform"
platform: "onnxruntim"
default_model_filename: "model.onnx"
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
    label_filename: "output0_labels.txt"
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_model_config/special_cases/invalid_platform/expected
================================================
unexpected 'platform' and 'backend' pair, got:onnxruntim, onnxruntime

================================================
FILE: qa/L0_model_config/special_cases/invalid_runtime/config.pbtxt
================================================
name: "invalid_runtime"
max_batch_size: 2
backend: "identity"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]
runtime: "__invalid_runtime__"


================================================
FILE: qa/L0_model_config/special_cases/invalid_runtime/expected
================================================
unable to find backend library '__invalid_runtime__' for model 'invalid_runtime'


================================================
FILE: qa/L0_model_config/special_cases/runtime_escape/config.pbtxt
================================================
name: "runtime_escape"
max_batch_size: 2
backend: "identity"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]
runtime: "../dummy_runtime/libtriton_identity.so"


================================================
FILE: qa/L0_model_config/special_cases/runtime_escape/expected
================================================
backend library name '../dummy_runtime/libtriton_identity.so' escapes backend directory


================================================
FILE: qa/L0_model_config/test.sh
================================================
#!/bin/bash
# Copyright (c) 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

CLIENT_LOG="./client.log"

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_TIMEOUT=20
SERVER_LOG_BASE="./inference_server"
source ../common/util.sh

export CUDA_VISIBLE_DEVICES=0

TRIALS="tensorrt_plan onnxruntime_onnx pytorch_libtorch"

# Copy fixed TensorRT plans into the test model repositories.
for modelpath in \
        autofill_noplatform/tensorrt/bad_input_dims/1 \
        autofill_noplatform/tensorrt/bad_input_shape/1 \
        autofill_noplatform/tensorrt/bad_input_type/1 \
        autofill_noplatform/tensorrt/bad_input_shape_tensor/1 \
        autofill_noplatform/tensorrt/bad_input_non_linear_format_io/1 \
        autofill_noplatform/tensorrt/bad_output_dims/1 \
        autofill_noplatform/tensorrt/bad_output_shape/1 \
        autofill_noplatform/tensorrt/bad_output_type/1 \
        autofill_noplatform/tensorrt/bad_output_shape_tensor/1 \
        autofill_noplatform/tensorrt/bad_outut_non_linear_format_io/1 \
        autofill_noplatform/tensorrt/too_few_inputs/1 \
        autofill_noplatform/tensorrt/too_many_inputs/1 \
        autofill_noplatform/tensorrt/unknown_input/1 \
        autofill_noplatform/tensorrt/unknown_output/1 \
        autofill_noplatform_success/tensorrt/no_name_platform/1 \
        autofill_noplatform_success/tensorrt/empty_config/1     \
        autofill_noplatform_success/tensorrt/no_config/1 \
        autofill_noplatform_success/tensorrt/incomplete_input/1 \
        autofill_noplatform_success/tensorrt/incomplete_output/1 ; do
    mkdir -p $modelpath
    cp /data/inferenceserver/${REPO_VERSION}/qa_model_repository/plan_float32_float32_float32/1/model.plan \
       $modelpath/.

    # Create a dummy file which must be ignored. This test is only needed
    # for TensorRT autofiller as it is the last backend that attempts to
    # load the files provided in the version directory. Essentially,
    # for autofiller of other backends, a TensorRT plan would behave
    # like this dummy file.
    echo "dummy_content" >> $modelpath/dummy_file.txt
done


# Copy TensorRT plans with shape tensor into the test model repositories.
for modelpath in \
        autofill_noplatform/tensorrt/mixed_batch_hint_dims/1 \
        autofill_noplatform/tensorrt/mixed_batch_hint_shape_values/1 \
        autofill_noplatform_success/tensorrt/no_config_shape_tensor/1 ; do
    mkdir -p $modelpath
    cp /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/plan_zero_1_float32_int32/1/model.plan \
       $modelpath/.
done

# Copy TensorRT plans with non-linear format IO into the test model repositories.
for modelpath in \
        autofill_noplatform_success/tensorrt/no_config_non_linear_format_io/1 ; do
    mkdir -p $modelpath
    cp /data/inferenceserver/${REPO_VERSION}/qa_trt_format_model_repository/plan_CHW32_LINEAR_float32_float32_float32/1/model.plan \
       $modelpath/.
done

# Copy variable-sized TensorRT plans into the test model repositories.
for modelpath in \
        autofill_noplatform_success/tensorrt/no_name_platform_variable/1 \
        autofill_noplatform_success/tensorrt/empty_config_variable/1     \
        autofill_noplatform_success/tensorrt/no_config_variable/1 \
        autofill_noplatform_success/tensorrt/hint_for_no_batch/1 \
        autofill_noplatform_success/tensorrt/multi_prof_max_bs/1 ; do
    mkdir -p $modelpath
    cp /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/plan_float32_float32_float32/1/model.plan \
       $modelpath/.
done

for modelpath in \
        autofill_noplatform/tensorrt/bad_dynamic_shapes_max/1 \
        autofill_noplatform/tensorrt/bad_dynamic_shapes_min/1 ; do
    mkdir -p $modelpath
    cp /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/plan_float32_float32_float32-4-32/1/model.plan \
       $modelpath/.
done

for modelpath in \
   autofill_noplatform/ensemble/invalid_input_map/invalid_input_map/1 \
       autofill_noplatform/ensemble/invalid_input_map/fp32_dim1_batch4/1 \
       autofill_noplatform/ensemble/invalid_input_map/fp32_dim1_batch4_input4/1 \
       autofill_noplatform/ensemble/invalid_input_map/fp32_dim1_batch4_output3/1 \
       autofill_noplatform/ensemble/invalid_output_map/invalid_output_map/1 \
       autofill_noplatform/ensemble/invalid_output_map/fp32_dim1_batch4/1 \
       autofill_noplatform/ensemble/invalid_output_map/fp32_dim1_batch4_input4/1 \
       autofill_noplatform/ensemble/invalid_output_map/fp32_dim1_batch4_output3/1 \
       autofill_noplatform/ensemble/invalid_batch_size/invalid_batch_size/1 \
       autofill_noplatform/ensemble/invalid_batch_size/invalid_batch_size/1 \
       autofill_noplatform/ensemble/invalid_batch_size/fp32_dim1_batch2/1 \
       autofill_noplatform/ensemble/invalid_batch_size/fp32_dim1_batch4/1 \
       autofill_noplatform/ensemble/invalid_decoupled_branching/invalid_decoupled_branching/1 \
       autofill_noplatform/ensemble/invalid_decoupled_branching/int32_dim1_nobatch_output2/1 \
       autofill_noplatform/ensemble/invalid_decoupled_branching_2/invalid_decoupled_branching_2/1 \
       autofill_noplatform/ensemble/inconsistent_shape/inconsistent_shape/1 \
       autofill_noplatform/ensemble/inconsistent_shape/fp32_dim1_batch4/1 \
       autofill_noplatform/ensemble/inconsistent_shape/fp32_dim3_batch4/1 \
       autofill_noplatform/ensemble/inconsistent_data_type/inconsistent_data_type/1 \
       autofill_noplatform/ensemble/inconsistent_data_type/fp32_dim1_batch2/1 \
       autofill_noplatform/ensemble/inconsistent_data_type/int32_dim1_batch4/1 \
       autofill_noplatform/ensemble/non_existing_model/non_existing_model/1 \
       autofill_noplatform/ensemble/non_existing_model/fp32_dim1_batch4/1 \
       autofill_noplatform/ensemble/non_existing_model/fp32_dim1_batch4_output3/1 \
       autofill_noplatform/ensemble/self_circular_dependency/self_circular_dependency/1 \
       autofill_noplatform/ensemble/self_circular_dependency/fp32_dim1_batch4/1 \
       autofill_noplatform/ensemble/self_circular_dependency/fp32_dim1_batch4_input4/1 \
       autofill_noplatform/ensemble/self_circular_dependency/fp32_dim1_batch4_output3/1 \
       autofill_noplatform/ensemble/unmapped_input/unmapped_input/1 \
       autofill_noplatform/ensemble/unmapped_input/fp32_dim1_batch4/1 \
       autofill_noplatform/ensemble/unmapped_input/fp32_dim1_batch4_input4/1 \
       autofill_noplatform/ensemble/unmapped_input/fp32_dim1_batch4_output3/1 \
       autofill_noplatform/ensemble/circular_dependency/circular_dependency/1 \
       autofill_noplatform/ensemble/circular_dependency/circular_dependency_2/1 \
       autofill_noplatform/ensemble/no_required_version/no_required_version/1 \
       autofill_noplatform/ensemble/no_required_version/simple/1 \
       autofill_noplatform/ensemble/no_required_version_2/no_required_version_2/1 \
       autofill_noplatform/ensemble/no_required_version_2/simple/1 \
       autofill_noplatform/ensemble/no_required_version_3/no_required_version_3/1 \
       autofill_noplatform/ensemble/no_required_version_3/simple/1 \
       autofill_noplatform_success/ensemble/embedded_ensemble/embedded_ensemble/1 \
       autofill_noplatform_success/ensemble/embedded_ensemble/fp32_dim1_batch4/1 \
       autofill_noplatform_success/ensemble/embedded_ensemble/inner_ensemble/1 \
       autofill_noplatform_success/ensemble/inconsistent_shape/inconsistent_shape/1 \
       autofill_noplatform_success/ensemble/inconsistent_shape/fp32_dim1_batch4/1 \
       autofill_noplatform_success/ensemble/inconsistent_shape/fp32_dim2_nobatch/1 \
       autofill_noplatform_success/ensemble/inconsistent_shape_2/inconsistent_shape_2/1 \
       autofill_noplatform_success/ensemble/inconsistent_shape_2/fp32_dim1_batch4/1 \
       autofill_noplatform_success/ensemble/inconsistent_shape_2/fp32_dim2_nobatch/1 \
       autofill_noplatform_success/ensemble/unmapped_output/unmapped_output/1 \
       autofill_noplatform_success/ensemble/unmapped_output/fp32_dim1_batch4_output3/1 ; do
   mkdir -p $modelpath
done

for modelpath in \
        autofill_noplatform/ensemble/invalid_decoupled_branching/repeat_int32/1 \
        autofill_noplatform/ensemble/invalid_decoupled_branching_2/repeat_int32/1; do
    mkdir -p $modelpath
    cp ./libtriton_repeat.so $modelpath/libtriton_repeat.so
done

# Copy PyTorch models into the test model repositories.
for modelpath in \
        autofill_noplatform/pytorch/too_few_inputs/1 \
        autofill_noplatform/pytorch/too_few_outputs/1 \
        autofill_noplatform_success/pytorch/no_name_platform/1 \
        autofill_noplatform_success/pytorch/cpu_instance/1 ; do
    mkdir -p $modelpath
    cp /data/inferenceserver/${REPO_VERSION}/qa_model_repository/libtorch_float32_float32_float32/1/model.pt \
       $modelpath/.
done

# Copy Python models into the test model repositories.
for modelpath in \
        autofill_noplatform/python/input_mismatch_datatype/1 \
        autofill_noplatform/python/input_mismatch_dims/1 \
        autofill_noplatform/python/output_mismatch_datatype/1 \
        autofill_noplatform/python/output_mismatch_dims/1 \
        autofill_noplatform_success/python/incomplete_output/1 \
        autofill_noplatform_success/python/unknown_input/1 \
        autofill_noplatform_success/python/unknown_output/1 \
        autofill_noplatform_success/python/empty_config/1 ; do
    mkdir -p $modelpath
    cp /opt/tritonserver/qa/python_models/auto_complete/model.py $modelpath/.
done
for modelpath in \
        autofill_noplatform/python/conflicting_max_batch_size \
        autofill_noplatform/python/input_missing_datatype \
        autofill_noplatform/python/input_missing_dims \
        autofill_noplatform/python/input_missing_name \
        autofill_noplatform/python/output_missing_datatype \
        autofill_noplatform/python/output_missing_dims \
        autofill_noplatform/python/output_missing_name \
        autofill_noplatform/python/no_return \
        autofill_noplatform/python/conflicting_scheduler_sequence \
        autofill_noplatform_success/python/dynamic_batching_no_op \
        autofill_noplatform_success/python/dynamic_batching \
        autofill_noplatform_success/python/incomplete_input \
        autofill_noplatform_success/python/model_transaction_policy \
        autofill_noplatform_success/python/model_transaction_policy_decoupled_false \
        autofill_noplatform_success/python/model_transaction_policy_no_op \
        autofill_noplatform_success/python/optional_input \
        autofill_noplatform/python/input_wrong_property \
        autofill_noplatform/python/model_transaction_policy_invalid_args \
        autofill_noplatform/python/model_transaction_policy_mismatch \
        autofill_noplatform/python/output_wrong_property ; do
    mkdir -p $modelpath/1
    cp $modelpath/model.py $modelpath/1/.
done
for modelpath in \
        autofill_noplatform_success/python/conflicting_scheduler_ensemble/conflicting_scheduler_ensemble \
        autofill_noplatform_success/python/conflicting_scheduler_ensemble/ensemble_first_step \
        autofill_noplatform_success/python/conflicting_scheduler_ensemble/ensemble_second_step ; do
    mkdir -p $modelpath/1
    cp $modelpath/model.py $modelpath/1/.
done

# Make version folders for custom test model repositories.
for modelpath in \
        autofill_noplatform/custom/no_delimiter/1 \
        autofill_noplatform/custom/unknown_backend.unknown/1 \
        autofill_noplatform_success/custom/empty_config.identity/1 \
        autofill_noplatform_success/custom/no_backend.identity/1 ; do
    mkdir -p $modelpath
done

# Make version folders as the instance group validation is deferred to
# the beginning of model creation
for modelpath in \
        noautofill_platform/invalid_cpu/1 \
        noautofill_platform/invalid_gpu/1 \
        noautofill_platform/negative_gpu/1 ; do
    mkdir -p $modelpath
done

# Copy other required models
mkdir -p special_cases/invalid_platform/1
cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/onnx_float32_float32_float32/1/model.onnx \
     special_cases/invalid_platform/1/

# Create runtime escape scenario
mkdir -p special_cases/runtime_escape/1 special_cases/runtime_escape/dummy_runtime
touch special_cases/runtime_escape/dummy_runtime/libtriton_identity.so
# Setup invalid runtime model
mkdir -p special_cases/invalid_runtime/1

# Copy reshape model files into the test model repositories.
mkdir -p autofill_noplatform_success/tensorrt/reshape_config_provided/1
cp /data/inferenceserver/${REPO_VERSION}/qa_reshape_model_repository/plan_zero_4_float32/1/model.plan \
    autofill_noplatform_success/tensorrt/reshape_config_provided/1

# Copy identity model into onnx test directories
mkdir -p autofill_noplatform_success/onnx/cpu_instance/1
cp -r /data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository/onnx_zero_1_float16/1/model.onnx \
    autofill_noplatform_success/onnx/cpu_instance/1

# Copy openvino models into test directories
for modelpath in \
        autofill_noplatform/openvino/bad_input_dims \
        autofill_noplatform/openvino/bad_output_dims \
        autofill_noplatform/openvino/too_few_inputs \
        autofill_noplatform/openvino/too_many_inputs \
        autofill_noplatform/openvino/unknown_input \
        autofill_noplatform/openvino/unknown_output \
        autofill_noplatform_success/openvino/empty_config \
        autofill_noplatform_success/openvino/no_config; do
    cp -r /opt/tritonserver/qa/openvino_models/fixed_batch/1 $modelpath
done
cp -r /opt/tritonserver/qa/openvino_models/dynamic_batch/1 \
    autofill_noplatform_success/openvino/dynamic_batch
# Copy openvino model from qa_model_repository
cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/openvino_int8_int8_int8/1 \
    autofill_noplatform_success/openvino/partial_config
cp /data/inferenceserver/${REPO_VERSION}/qa_model_repository/openvino_int8_int8_int8/output0_labels.txt \
    autofill_noplatform_success/openvino/partial_config

# Copy decoupled model and config files into the model_metrics test repository.
for modelpath in `ls -d model_metrics/*/*`; do
    src_dir="/opt/tritonserver/qa/python_models/async_execute_decouple"
    mkdir -p $modelpath/1
    cp $src_dir/model.py $modelpath/1/.
    cat $src_dir/config.pbtxt $modelpath/partial.pbtxt > $modelpath/config.pbtxt
done

# Copy tensorrt model and config files into the custom_parameters test repository.
for modelpath in `ls -d custom_parameters/tensorrt/*/*`; do
    mkdir -p $modelpath/1
    model_name=`basename $modelpath`
    src_dir="/data/inferenceserver/${REPO_VERSION}/qa_model_repository/plan_float32_float32_float32"
    cp ${src_dir}/1/model.plan $modelpath/1/.
    cat ${src_dir}/config.pbtxt $modelpath/partial.pbtxt > $modelpath/config.pbtxt
    sed -i "s/^name:.*/name: \"${model_name}\"/" $modelpath/config.pbtxt
    sed -i "s/^version_policy:.*//" $modelpath/config.pbtxt
    sed -i "s/label_filename:.*//" $modelpath/config.pbtxt
done

rm -f $SERVER_LOG_BASE* $CLIENT_LOG
RET=0

# Run tests for logs which do not have a timestamp on them
for TARGET in `ls cli_messages`; do
    case $TARGET in
        "cli_override")
            EXTRA_ARGS="--disable-auto-complete-config --strict-model-config=false" ;;
        "cli_deprecation")
            EXTRA_ARGS="--strict-model-config=true" ;;
        *)
            EXTRA_ARGS="" ;;
    esac

    SERVER_ARGS="--model-repository=`pwd`/models  $EXTRA_ARGS"
    SERVER_LOG=$SERVER_LOG_BASE.cli_messages_${TARGET}.log

    rm -fr models && mkdir models
    cp -r cli_messages/$TARGET models/.

    EXPECTEDS=models/$TARGET/expected*

    echo -e "Test on cli_messages/$TARGET" >> $CLIENT_LOG

    run_server
    if [ "$SERVER_PID" != "0" ]; then
        echo -e "*** FAILED: unexpected success starting $SERVER" >> $CLIENT_LOG
        RET=1
        kill $SERVER_PID
        wait $SERVER_PID
    else
        EXFOUND=0
        for EXPECTED in `ls $EXPECTEDS`; do
            EX=`cat $EXPECTED`
            echo "grepping for: $EX"
            if grep "$EX" $SERVER_LOG; then
                echo -e "Found \"$EX\"" >> $CLIENT_LOG
                EXFOUND=1
                break
            else
                echo -e "Not found \"$EX\"" >> $CLIENT_LOG
            fi
        done
        if [ "$EXFOUND" == "0" ]; then
            echo -e "*** FAILED: cli_messages/$TARGET" >> $CLIENT_LOG
            RET=1
        fi
    fi
done

# Run special test cases
for TARGET in `ls special_cases`; do
    case $TARGET in
        "invalid_platform")
            EXTRA_ARGS="--disable-auto-complete-config" ;;
        *)
            EXTRA_ARGS="" ;;
    esac

    SERVER_ARGS="--model-repository=`pwd`/models $EXTRA_ARGS"
    SERVER_LOG=$SERVER_LOG_BASE.special_case_${TARGET}.log

    rm -fr models && mkdir models
    cp -r special_cases/$TARGET models/.

    CONFIG=models/$TARGET/config.pbtxt
    EXPECTEDS=models/$TARGET/expected*

    echo -e "Test on special_cases/$TARGET" >> $CLIENT_LOG

    # We expect all the tests to fail with one of the expected
    # error messages
    run_server
    if [ "$SERVER_PID" != "0" ]; then
        echo -e "*** FAILED: unexpected success starting $SERVER" >> $CLIENT_LOG
        RET=1
        kill $SERVER_PID
        wait $SERVER_PID
    else
        EXFOUND=0
        for EXPECTED in `ls $EXPECTEDS`; do
            EX=`cat $EXPECTED`
            if grep ^E[0-9][0-9][0-9][0-9].*"$EX" $SERVER_LOG; then
                echo -e "Found \"$EX\"" >> $CLIENT_LOG
                EXFOUND=1
                break
            else
                echo -e "Not found \"$EX\"" >> $CLIENT_LOG
            fi
        done
        if [ "$EXFOUND" == "0" ]; then
            echo -e "*** FAILED: special_cases/$TARGET" >> $CLIENT_LOG
            RET=1
        fi
    fi
done

for TRIAL in $TRIALS; do
    # Run all tests that require no autofill but that add the platform to
    # the model config before running the test
    for TARGET in `ls noautofill_platform`; do
        SERVER_ARGS="--model-repository=`pwd`/models --strict-model-config=true"
        SERVER_LOG=$SERVER_LOG_BASE.noautofill_platform_${TRIAL}_${TARGET}.log

        rm -fr models && mkdir models
        cp -r noautofill_platform/$TARGET models/.

        CONFIG=models/$TARGET/config.pbtxt
        EXPECTEDS=models/$TARGET/expected*

        # If there is a config.pbtxt change/add platform to it
        if [ -f $CONFIG ]; then
            sed -i '/platform:/d' $CONFIG
            echo "platform: \"$TRIAL\"" >> $CONFIG
            cat $CONFIG
        fi

        echo -e "Test platform $TRIAL on noautofill_platform/$TARGET" >> $CLIENT_LOG

        # We expect all the tests to fail with one of the expected
        # error messages
        run_server
        if [ "$SERVER_PID" != "0" ]; then
            echo -e "*** FAILED: unexpected success starting $SERVER" >> $CLIENT_LOG
            RET=1
            kill $SERVER_PID
            wait $SERVER_PID
        else
            EXFOUND=0
            for EXPECTED in `ls $EXPECTEDS`; do
                EX=`cat $EXPECTED`
                if grep ^E[0-9][0-9][0-9][0-9].*"$EX" $SERVER_LOG; then
                    echo -e "Found \"$EX\"" >> $CLIENT_LOG
                    EXFOUND=1
                    break
                else
                    echo -e "Not found \"$EX\"" >> $CLIENT_LOG
                fi
            done

            if [ "$EXFOUND" == "0" ]; then
                echo -e "*** FAILED: platform $TRIAL noautofill_platform/$TARGET" >> $CLIENT_LOG
                RET=1
            fi
        fi
    done
done

for TRIAL in $TRIALS; do
    # Run all tests that require no autofill but that add the platform to
    # the model config before running the test
    for TARGET in `ls noautofill_platform`; do
        SERVER_ARGS="--model-repository=`pwd`/models --disable-auto-complete-config"
        SERVER_LOG=$SERVER_LOG_BASE.noautofill_platform_disableflag_${TRIAL}_${TARGET}.log

        rm -fr models && mkdir models
        cp -r noautofill_platform/$TARGET models/.

        CONFIG=models/$TARGET/config.pbtxt
        EXPECTEDS=models/$TARGET/expected*

        # If there is a config.pbtxt change/add platform to it
        if [ -f $CONFIG ]; then
            sed -i '/platform:/d' $CONFIG
            echo "platform: \"$TRIAL\"" >> $CONFIG
            cat $CONFIG
        fi

        echo -e "Test platform $TRIAL on noautofill_platform/$TARGET with disable-auto-complete-config flag" >> $CLIENT_LOG

        # We expect all the tests to fail with one of the expected
        # error messages
        run_server
        if [ "$SERVER_PID" != "0" ]; then
            echo -e "*** FAILED: unexpected success starting $SERVER" >> $CLIENT_LOG
            RET=1
            kill $SERVER_PID
            wait $SERVER_PID
        else
            EXFOUND=0
            for EXPECTED in `ls $EXPECTEDS`; do
                EX=`cat $EXPECTED`
                if grep ^E[0-9][0-9][0-9][0-9].*"$EX" $SERVER_LOG; then
                    echo -e "Found \"$EX\"" >> $CLIENT_LOG
                    EXFOUND=1
                    break
                else
                    echo -e "Not found \"$EX\"" >> $CLIENT_LOG
                fi
            done

            if [ "$EXFOUND" == "0" ]; then
                echo -e "*** FAILED: platform $TRIAL noautofill_platform/$TARGET with disable-auto-complete-config flag" >> $CLIENT_LOG
                RET=1
            fi
        fi
    done
done

# Run all autofill tests that don't add a platform to the model config
# before running the test
for TARGET_DIR in `ls -d autofill_noplatform/*/*`; do
    TARGET_DIR_DOT=`echo $TARGET_DIR | tr / .`
    TARGET=`basename ${TARGET_DIR}`

    SERVER_ARGS="--model-repository=`pwd`/models --strict-model-config=false"
    SERVER_LOG=$SERVER_LOG_BASE.${TARGET_DIR_DOT}.log

    # If there is a config.pbtxt at the top-level of the test then
    # assume that the directory is a single model. Otherwise assume
    # that the directory is an entire model repository.
    rm -fr models && mkdir models
    if [ -f ${TARGET_DIR}/config.pbtxt ]; then
        cp -r ${TARGET_DIR} models/.
        EXPECTEDS=models/$TARGET/expected*
    else
        cp -r ${TARGET_DIR}/* models/.
        EXPECTEDS=models/expected*
    fi

    echo -e "Test ${TARGET_DIR}" >> $CLIENT_LOG

    # We expect all the tests to fail with one of the expected
    # error messages
    run_server
    if [ "$SERVER_PID" != "0" ]; then
        echo -e "*** FAILED: unexpected success starting $SERVER" >> $CLIENT_LOG
        RET=1
        kill $SERVER_PID
        wait $SERVER_PID
    else
        EXFOUND=0
        for EXPECTED in `ls $EXPECTEDS`; do
            EX=`cat $EXPECTED`
            if grep ^E[0-9][0-9][0-9][0-9].*"$EX" $SERVER_LOG; then
                echo -e "Found \"$EX\"" >> $CLIENT_LOG
                EXFOUND=1
                break
            else
                echo -e "Not found \"$EX\"" >> $CLIENT_LOG
            fi
        done

        if [ "$EXFOUND" == "0" ]; then
            echo -e "*** FAILED: ${TARGET_DIR}" >> $CLIENT_LOG
            RET=1
        fi
    fi
done

# Run all autofill tests that are expected to be successful. These
# tests don't add a platform to the model config before running
for TARGET_DIR in `ls -d autofill_noplatform_success/*/*`; do
    TARGET_DIR_DOT=`echo $TARGET_DIR | tr / .`
    TARGET=`basename ${TARGET_DIR}`

    SERVER_ARGS="--model-repository=`pwd`/models --strict-model-config=false"
    SERVER_LOG=$SERVER_LOG_BASE.${TARGET_DIR_DOT}.log

    # If there is a config.pbtxt at the top-level of the test then
    # assume that the directory is a single model. Otherwise assume
    # that the directory is an entire model repository.
    rm -fr models && mkdir models
    if [ -f ${TARGET_DIR}/config.pbtxt ] || [ "$TARGET" = "no_config" ] \
            || [ "$TARGET" = "no_config_variable" ] || [ "$TARGET" = "no_config_shape_tensor" ] \
            || [ "$TARGET" = "no_config_non_linear_format_io" ] ; then
        cp -r ${TARGET_DIR} models/.
    else
        cp -r ${TARGET_DIR}/* models/.
    fi

    echo -e "Test $TARGET_DIR" >> $CLIENT_LOG

    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "*** FAILED: unable to start $SERVER" >> $CLIENT_LOG
        RET=1
    else
        set +e
        python ./compare_status.py --expected_dir models/$TARGET --model $TARGET >>$CLIENT_LOG 2>&1
        if [ $? -ne 0 ]; then
            echo -e "*** FAILED: unexpected model config" >> $CLIENT_LOG
            RET=1
        fi
        set -e

        kill $SERVER_PID
        wait $SERVER_PID
    fi
done

# Run all model_metrics tests that are expected to be successful.
for TARGET_DIR in `ls -d model_metrics/valid_config/*`; do
    TARGET_DIR_DOT=`echo $TARGET_DIR | tr / .`

    SERVER_ARGS="--model-repository=`pwd`/models --metrics-config histogram_latencies=true"
    SERVER_LOG=$SERVER_LOG_BASE.${TARGET_DIR_DOT}.log

    rm -fr models && mkdir models
    cp -r ${TARGET_DIR} models/.

    echo -e "Test $TARGET_DIR" >> $CLIENT_LOG

    # We expect all tests to succeed
    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "*** FAILED: unable to start $SERVER" >> $CLIENT_LOG
        RET=1
    else
        kill $SERVER_PID
        wait $SERVER_PID
    fi
done

# Run all model_metrics tests that are expected to be successful but with warnings.
for TARGET_DIR in `ls -d model_metrics/valid_config_with_warn/*`; do
    TARGET_DIR_DOT=`echo $TARGET_DIR | tr / .`
    TARGET=`basename ${TARGET_DIR}`

    SERVER_ARGS="--model-repository=`pwd`/models --metrics-config histogram_latencies=true"
    SERVER_LOG=$SERVER_LOG_BASE.${TARGET_DIR_DOT}.log

    rm -fr models && mkdir models
    cp -r ${TARGET_DIR} models/.

    EXPECTED=models/$TARGET/expected
    echo -e "Test $TARGET_DIR" >> $CLIENT_LOG

    # We expect all tests to succeed with the expected warning message
    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "*** FAILED: unable to start $SERVER" >> $CLIENT_LOG
        RET=1
    else
        EXFOUND=0
        EX=`cat $EXPECTED`
        if grep ^W[0-9][0-9][0-9][0-9].*"$EX" $SERVER_LOG; then
            echo -e "Found \"$EX\"" >> $CLIENT_LOG
            EXFOUND=1
        else
            echo -e "Not found \"$EX\"" >> $CLIENT_LOG
        fi
        if [ "$EXFOUND" == "0" ]; then
            echo -e "*** FAILED: model_metrics/$TARGET" >> $CLIENT_LOG
            RET=1
        fi
        kill $SERVER_PID
        wait $SERVER_PID
    fi
done

# Run all model_metrics tests that are missing required fields.
for TARGET_DIR in `ls -d model_metrics/invalid_config/*`; do
    TARGET_DIR_DOT=`echo $TARGET_DIR | tr / .`
    TARGET=`basename ${TARGET_DIR}`

    SERVER_ARGS="--model-repository=`pwd`/models --metrics-config histogram_latencies=true"
    SERVER_LOG=$SERVER_LOG_BASE.${TARGET_DIR_DOT}.log

    rm -fr models && mkdir models
    cp -r ${TARGET_DIR} models/.

    EXPECTED=models/$TARGET/expected
    echo -e "Test $TARGET_DIR" >> $CLIENT_LOG

    # We expect all tests to fail with the expected error message
    run_server
    if [ "$SERVER_PID" != "0" ]; then
        echo -e "*** FAILED: unexpected success starting $SERVER" >> $CLIENT_LOG
        RET=1
        kill $SERVER_PID
        wait $SERVER_PID
    else
        EXFOUND=0
        EX=`cat $EXPECTED`
        if grep ^E[0-9][0-9][0-9][0-9].*"$EX" $SERVER_LOG; then
            echo -e "Found \"$EX\"" >> $CLIENT_LOG
            EXFOUND=1
        else
            echo -e "Not found \"$EX\"" >> $CLIENT_LOG
        fi
        if [ "$EXFOUND" == "0" ]; then
            echo -e "*** FAILED: model_metrics/$TARGET" >> $CLIENT_LOG
            RET=1
        fi
    fi
done

# Run all custom_parameters tests that are expected to succeed.
for TARGET_DIR in `ls -d custom_parameters/*/valid/*`; do
    TARGET_DIR_DOT=`echo $TARGET_DIR | tr / .`
    TARGET=`basename ${TARGET_DIR}`

    SERVER_ARGS="--model-repository=`pwd`/models --log-info=true"
    SERVER_LOG=$SERVER_LOG_BASE.${TARGET_DIR_DOT}.log

    rm -fr models && mkdir models
    cp -r ${TARGET_DIR} models/.

    EXPECTED=models/$TARGET/expected
    echo -e "Test $TARGET_DIR" >> $CLIENT_LOG

    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "*** FAILED: unable to start $SERVER" >> $CLIENT_LOG
        RET=1
    else
        kill $SERVER_PID
        wait $SERVER_PID
    fi

    if [ -f $EXPECTED ]; then
        EX_FOUND=0
        EX=`cat $EXPECTED`
        if grep ^I[0-9][0-9][0-9][0-9].*"$EX" $SERVER_LOG; then
            echo -e "Found \"$EX\"" >> $CLIENT_LOG
            EX_FOUND=1
        else
            echo -e "Not found \"$EX\"" >> $CLIENT_LOG
        fi
        if [ "$EX_FOUND" == "0" ]; then
            echo -e "*** FAILED: model_metrics/$TARGET" >> $CLIENT_LOG
            RET=1
        fi
    fi
done

# Run all custom_parameters tests that have invalid values.
for TARGET_DIR in `ls -d custom_parameters/*/invalid/*`; do
    TARGET_DIR_DOT=`echo $TARGET_DIR | tr / .`
    TARGET=`basename ${TARGET_DIR}`

    SERVER_ARGS="--model-repository=`pwd`/models --log-info=true"
    SERVER_LOG=$SERVER_LOG_BASE.${TARGET_DIR_DOT}.log

    rm -fr models && mkdir models
    cp -r ${TARGET_DIR} models/.

    EXPECTED=models/$TARGET/expected
    echo -e "Test $TARGET_DIR" >> $CLIENT_LOG

    # We expect all tests to fail with the expected error message
    run_server
    if [ "$SERVER_PID" != "0" ]; then
        echo -e "*** FAILED: unexpected success starting $SERVER" >> $CLIENT_LOG
        RET=1
        kill $SERVER_PID
        wait $SERVER_PID
    else
        EX_FOUND=0
        EX=`cat $EXPECTED`
        if grep ^E[0-9][0-9][0-9][0-9].*"$EX" $SERVER_LOG; then
            echo -e "Found \"$EX\"" >> $CLIENT_LOG
            EX_FOUND=1
        else
            echo -e "Not found \"$EX\"" >> $CLIENT_LOG
        fi
        if [ "$EX_FOUND" == "0" ]; then
            echo -e "*** FAILED: model_metrics/$TARGET" >> $CLIENT_LOG
            RET=1
        fi
    fi
done

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
    cat $CLIENT_LOG
fi

exit $RET


================================================
FILE: qa/L0_model_namespacing/python_addsub/__init__.py
================================================
#!/usr/bin/env python3

# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    # Use auto complete feature to ship config.pbtxt along with the Python
    # model definition
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        # Only use packaged config if config is not explicitly provided
        config = auto_complete_model_config.as_dict()
        if (len(config["input"]) != 0) or (len(config["output"]) != 0):
            return auto_complete_model_config

        auto_complete_model_config.add_input(
            {
                "name": "INPUT0",
                "data_type": "TYPE_INT32",
                "dims": [
                    16,
                ],
            }
        )
        auto_complete_model_config.add_input(
            {
                "name": "INPUT1",
                "data_type": "TYPE_INT32",
                "dims": [
                    16,
                ],
            }
        )
        auto_complete_model_config.add_output(
            {
                "name": "OUTPUT0",
                "data_type": "TYPE_INT32",
                "dims": [
                    16,
                ],
            }
        )
        auto_complete_model_config.add_output(
            {
                "name": "OUTPUT1",
                "data_type": "TYPE_INT32",
                "dims": [
                    16,
                ],
            }
        )
        return auto_complete_model_config

    def initialize(self, args):
        self.model_config = model_config = json.loads(args["model_config"])

        output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")
        output1_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT1")

        self.output0_dtype = pb_utils.triton_string_to_numpy(
            output0_config["data_type"]
        )
        self.output1_dtype = pb_utils.triton_string_to_numpy(
            output1_config["data_type"]
        )

    def execute(self, requests):
        """This function is called on inference request."""

        responses = []
        for request in requests:
            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
            responses.append(pb_utils.InferenceResponse(self.addsub(in_0, in_1)))
        return responses

    def addsub(self, in_0, in_1):
        if (
            in_0.as_numpy().dtype.type is np.bytes_
            or in_0.as_numpy().dtype == np.object_
        ):
            out_0, out_1 = (
                in_0.as_numpy().astype(np.int32) + in_1.as_numpy().astype(np.int32),
                in_0.as_numpy().astype(np.int32) - in_1.as_numpy().astype(np.int32),
            )
        else:
            out_0, out_1 = (
                in_0.as_numpy() + in_1.as_numpy(),
                in_0.as_numpy() - in_1.as_numpy(),
            )

        out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(self.output0_dtype))
        out_tensor_1 = pb_utils.Tensor("OUTPUT1", out_1.astype(self.output1_dtype))
        return [out_tensor_0, out_tensor_1]


================================================
FILE: qa/L0_model_namespacing/python_subadd/__init__.py
================================================
#!/usr/bin/env python3

# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    # Use auto complete feature to ship config.pbtxt along with the Python
    # model definition
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        # Only use packaged config if config is not explicitly provided
        config = auto_complete_model_config.as_dict()
        if (len(config["input"]) != 0) or (len(config["output"]) != 0):
            return auto_complete_model_config

        auto_complete_model_config.add_input(
            {
                "name": "INPUT0",
                "data_type": "TYPE_INT32",
                "dims": [
                    16,
                ],
            }
        )
        auto_complete_model_config.add_input(
            {
                "name": "INPUT1",
                "data_type": "TYPE_INT32",
                "dims": [
                    16,
                ],
            }
        )
        auto_complete_model_config.add_output(
            {
                "name": "OUTPUT0",
                "data_type": "TYPE_INT32",
                "dims": [
                    16,
                ],
            }
        )
        auto_complete_model_config.add_output(
            {
                "name": "OUTPUT1",
                "data_type": "TYPE_INT32",
                "dims": [
                    16,
                ],
            }
        )
        return auto_complete_model_config

    def initialize(self, args):
        self.model_config = model_config = json.loads(args["model_config"])

        output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")
        output1_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT1")

        self.output0_dtype = pb_utils.triton_string_to_numpy(
            output0_config["data_type"]
        )
        self.output1_dtype = pb_utils.triton_string_to_numpy(
            output1_config["data_type"]
        )

    def execute(self, requests):
        """This function is called on inference request."""

        responses = []
        for request in requests:
            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
            responses.append(pb_utils.InferenceResponse(self.subadd(in_0, in_1)))
        return responses

    def subadd(self, in_0, in_1):
        if (
            in_0.as_numpy().dtype.type is np.bytes_
            or in_0.as_numpy().dtype == np.object_
        ):
            out_0, out_1 = (
                in_0.as_numpy().astype(np.int32) - in_1.as_numpy().astype(np.int32),
                in_0.as_numpy().astype(np.int32) + in_1.as_numpy().astype(np.int32),
            )
        else:
            out_0, out_1 = (
                in_0.as_numpy() - in_1.as_numpy(),
                in_0.as_numpy() + in_1.as_numpy(),
            )

        out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(self.output0_dtype))
        out_tensor_1 = pb_utils.Tensor("OUTPUT1", out_1.astype(self.output1_dtype))
        return [out_tensor_0, out_tensor_1]


================================================
FILE: qa/L0_model_namespacing/test.py
================================================
#!/usr/bin/env python
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import sys

sys.path.append(os.path.join(os.environ["TRITON_QA_ROOT_DIR"], "common"))

import shutil
import time
import unittest

import numpy as np
import test_util as tu
import tritonclient.http as httpclient
from tritonclient.utils import InferenceServerException

#
# Test utilities
#


# Checker to perform inference on given model, expecting model to have
# [INPUT0, INPUT1] and produce [OUTPUT0, OUTPUT1] where:
# OUTPUT0 = INPUT0 + INPUT1
# OUTPUT1 = INPUT0 - INPUT1
class AddSubChecker:
    # Optional 'checker_client' may be provided to use a different
    # Triton client library, currently it must be either Triton HTTP client
    # library or Triton GRPC client library
    def __init__(self, checker_client=None):
        # client library selection
        if checker_client is None:
            import tritonclient.http as checker_client
        if "http" in checker_client.__name__:
            self.client_ = checker_client.InferenceServerClient("localhost:8000")
        else:
            self.client_ = checker_client.InferenceServerClient("localhost:8001")

        # Create infer input tensors
        self.inputs_ = []
        self.inputs_.append(checker_client.InferInput("INPUT0", [16], "INT32"))
        self.inputs_.append(checker_client.InferInput("INPUT1", [16], "INT32"))

        # Initialize the data and expected output
        input_data = np.arange(start=0, stop=16, dtype=np.int32)
        self.inputs_[0].set_data_from_numpy(input_data)
        self.inputs_[1].set_data_from_numpy(input_data)
        self.expected_outputs_ = {
            "add": (input_data + input_data),
            "sub": (input_data - input_data),
        }

    def infer(self, model):
        res = self.client_.infer(model, self.inputs_)
        np.testing.assert_allclose(
            res.as_numpy("OUTPUT0"), self.expected_outputs_["add"]
        )
        np.testing.assert_allclose(
            res.as_numpy("OUTPUT1"), self.expected_outputs_["sub"]
        )


# Checker to perform inference on given model, expecting model to have
# [INPUT0, INPUT1] and produce [OUTPUT0, OUTPUT1] where:
# OUTPUT0 = INPUT0 - INPUT1
# OUTPUT1 = INPUT0 + INPUT1
class SubAddChecker(AddSubChecker):
    def infer(self, model):
        res = self.client_.infer(model, self.inputs_)
        np.testing.assert_allclose(
            res.as_numpy("OUTPUT0"), self.expected_outputs_["sub"]
        )
        np.testing.assert_allclose(
            res.as_numpy("OUTPUT1"), self.expected_outputs_["add"]
        )


#
# Test suites and cases
#


class ModelNamespacePoll(tu.TestResultCollector):
    def setUp(self):
        self.addsub_ = AddSubChecker()
        self.subadd_ = SubAddChecker()
        # For other server interaction
        self.client_ = httpclient.InferenceServerClient("localhost:8000")

    def check_health(self, expect_live=True, expect_ready=True):
        self.assertEqual(self.client_.is_server_live(), expect_live)
        self.assertEqual(self.client_.is_server_ready(), expect_ready)

    def test_no_duplication(self):
        # Enable model namspacing on repositories that is already valid without
        # enabling model namespacing.
        # All models should be visible and can be inferred individually
        self.check_health()

        # infer check
        for model in ["simple_addsub", "composing_addsub"]:
            self.addsub_.infer(model)
        for model in ["simple_subadd", "composing_subadd"]:
            self.subadd_.infer(model)

    def test_duplication(self):
        # Enable model namspacing on repositories that each repo has one
        # ensemble and it requires an composing model ('composing_model') that
        # exists in both repos.
        # Expect all models are visible, the ensemble will pick up the correct
        # model even the composing model can't be inferred individually.
        self.check_health()

        # infer check
        for model in [
            "simple_addsub",
        ]:
            self.addsub_.infer(model)
        for model in [
            "simple_subadd",
        ]:
            self.subadd_.infer(model)

        # error check
        try:
            self.addsub_.infer("composing_model")
            self.assertTrue(False, "expected error for inferring ambiguous named model")
        except InferenceServerException as ex:
            self.assertIn("ambiguity", ex.message())

    def test_ensemble_duplication(self):
        # Enable model namspacing on repositories that each repo has one
        # ensemble with the same name. Expect the ensemble will pick up the correct
        # model.
        # Expect all models are visible, the ensemble will pick up the correct
        # model even the ensemble itself can't be inferred without providing
        # namespace.
        self.check_health()

        # infer
        for model in [
            "composing_addsub",
        ]:
            self.addsub_.infer(model)
        for model in [
            "composing_subadd",
        ]:
            self.subadd_.infer(model)

        # error check
        try:
            self.addsub_.infer("simple_ensemble")
            self.assertTrue(False, "expected error for inferring ambiguous named model")
        except InferenceServerException as ex:
            self.assertIn("ambiguity", ex.message())

    def test_dynamic_resolution(self):
        # Same model setup as 'test_duplication', will remove / add one of the
        # composing model at runtime and expect the ensemble to be properly
        # linked to existing composing model at different steps.
        # 1. Remove 'composing_model' in addsub_repo, expect both ensembles use
        #    'composing_model' in subadd_repo and act as subadd
        # 2. Add back 'composing_model' in addsub_repo, expect the ensembles to behave the
        #    same as before the removal.
        self.assertTrue("NAMESPACE_TESTING_DIRCTORY" in os.environ)
        td = os.environ["NAMESPACE_TESTING_DIRCTORY"]
        composing_before_path = os.path.join(td, "addsub_repo", "composing_model")
        composing_after_path = os.path.join(td, "composing_model")

        self.check_health()
        # step 1.
        shutil.move(composing_before_path, composing_after_path)
        time.sleep(5)

        # infer
        for model in ["simple_subadd", "simple_addsub", "composing_model"]:
            self.subadd_.infer(model)

        # step 2.
        shutil.move(composing_after_path, composing_before_path)
        time.sleep(5)

        # infer
        for model in [
            "simple_addsub",
        ]:
            self.addsub_.infer(model)
        for model in [
            "simple_subadd",
        ]:
            self.subadd_.infer(model)

        # error check
        try:
            self.addsub_.infer("composing_model")
            self.assertTrue(False, "expected error for inferring ambiguous named model")
        except InferenceServerException as ex:
            self.assertIn("ambiguity", ex.message())


class ModelNamespaceExplicit(tu.TestResultCollector):
    def setUp(self):
        self.addsub_ = AddSubChecker()
        self.subadd_ = SubAddChecker()
        # For other server interaction
        self.client_ = httpclient.InferenceServerClient("localhost:8000")

    def check_health(self, expect_live=True, expect_ready=True):
        self.assertEqual(self.client_.is_server_live(), expect_live)
        self.assertEqual(self.client_.is_server_ready(), expect_ready)

    def test_no_duplication(self):
        # Enable model namspacing on repositories that is already valid without
        # enabling model namespacing.
        # All models should be visible and can be inferred individually
        self.check_health()
        # load ensembles, cascadingly load composing model
        for model in ["simple_addsub", "simple_subadd"]:
            self.client_.load_model(model)

        # infer
        for model in ["simple_addsub", "composing_addsub"]:
            self.addsub_.infer(model)
        for model in ["simple_subadd", "composing_subadd"]:
            self.subadd_.infer(model)

    def test_duplication(self):
        # Enable model namspacing on repositories that each repo has one
        # ensemble and it requires an composing model ('composing_model') that
        # exists in both repos.
        # Expect all models are visible, the ensemble will pick up the correct
        # model even the composing model can't be inferred individually.
        self.check_health()
        # load ensembles, cascadingly load composing model
        for model in ["simple_addsub", "simple_subadd"]:
            self.client_.load_model(model)

        # infer
        for model in [
            "simple_addsub",
        ]:
            self.addsub_.infer(model)
        for model in [
            "simple_subadd",
        ]:
            self.subadd_.infer(model)

        # error check
        try:
            self.addsub_.infer("composing_model")
            self.assertTrue(False, "expected error for inferring ambiguous named model")
        except InferenceServerException as ex:
            self.assertIn("ambiguity", ex.message())

    def test_ensemble_duplication(self):
        # Enable model namspacing on repositories that each repo has one
        # ensemble with the same name. Expect the ensemble will pick up the correct
        # model.
        # Expect all models are visible, the ensemble will pick up the correct
        # model even the ensemble itself can't be inferred without providing
        # namespace.
        self.check_health()
        # load ensembles, cascadingly load composing model
        for model in ["simple_ensemble"]:
            self.client_.load_model(model)

        # infer
        for model in [
            "composing_addsub",
        ]:
            self.addsub_.infer(model)
        for model in [
            "composing_subadd",
        ]:
            self.subadd_.infer(model)

        # error check
        try:
            self.addsub_.infer("simple_ensemble")
            self.assertTrue(False, "expected error for inferring ambiguous named model")
        except InferenceServerException as ex:
            self.assertIn("ambiguity", ex.message())

    def test_dynamic_resolution(self):
        # Same model setup as 'test_duplication', will remove / add one of the
        # composing model at runtime and expect the ensemble to be properly
        # linked to existing composing model at different steps.
        # 1. Remove 'composing_model' in addsub_repo, expect both ensembles use
        #    'composing_model' in subadd_repo and act as subadd.
        # 2. Add back 'composing_model' in addsub_repo, expect the ensembles to behave the
        #    same as before the removal.
        self.assertTrue("NAMESPACE_TESTING_DIRCTORY" in os.environ)
        td = os.environ["NAMESPACE_TESTING_DIRCTORY"]
        composing_before_path = os.path.join(td, "addsub_repo", "composing_model")
        composing_after_path = os.path.join(td, "composing_model")

        self.check_health()
        # step 1.
        shutil.move(composing_before_path, composing_after_path)
        # load ensembles, cascadingly load composing model
        for model in ["simple_addsub", "simple_subadd"]:
            self.client_.load_model(model)

        # infer
        for model in ["simple_subadd", "simple_addsub", "composing_model"]:
            self.subadd_.infer(model)

        # step 2.
        shutil.move(composing_after_path, composing_before_path)
        # Explicitly load one of the ensembel, should still trigger cascading
        # (re-)load
        for model in [
            "simple_addsub",
        ]:
            self.client_.load_model(model)

        # infer
        for model in [
            "simple_addsub",
        ]:
            self.addsub_.infer(model)
        for model in [
            "simple_subadd",
        ]:
            self.subadd_.infer(model)

        # error check
        try:
            self.addsub_.infer("composing_model")
            self.assertTrue(False, "expected error for inferring ambiguous named model")
        except InferenceServerException as ex:
            self.assertIn("ambiguity", ex.message())


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_model_namespacing/test.sh
================================================
#!/bin/bash
# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

TRITON_QA_ROOT_DIR=${TRITON_QA_ROOT_DIR:="/opt/tritonserver/qa"}
source $TRITON_QA_ROOT_DIR/common/util.sh

RET=0

TEST_PY=./test.py
# tests are run individually
EXPECTED_NUM_TESTS="1"
TEST_RESULT_FILE='test_results.txt'


export CUDA_VISIBLE_DEVICES=0
export TRITON_QA_ROOT_DIR=$TRITON_QA_ROOT_DIR
export TRITON_QA_PYTHON_MODEL_DIR=$TRITON_QA_ROOT_DIR/L0_model_namespacing

rm -fr *.log

REPO_ARGS="--model-namespacing=true --model-repository=`pwd`/test_dir/addsub_repo --model-repository=`pwd`/test_dir/subadd_repo"
POLL_ARGS="--model-control-mode=POLL --repository-poll-secs=2"
EXPLICIT_ARGS="--model-control-mode=EXPLICIT"

SERVER=/opt/tritonserver/bin/tritonserver

# List all tests as each test will use different repo configuration
TEST_LIST=${TEST_LIST:="test_duplication \
                            test_dynamic_resolution \
                            test_ensemble_duplication \
                            test_no_duplication"}

# Helper to make sure all ensemble have version directory
CURR_DIR=`pwd`
for test_name in $TEST_LIST; do
    for model_dir in $CURR_DIR/$test_name/*/*; do
        mkdir -p $model_dir/1
    done
done

# Set this variable to avoid generation of '__pycache__' in the model directory,
# which will cause unintended model reload in POLLING model as Triton sees
# changes in the model directory
export PYTHONDONTWRITEBYTECODE=1

# Polling
for test_name in $TEST_LIST; do
    TEST_SUITE="ModelNamespacePoll"
    TEST_LOG="`pwd`/test.$TEST_SUITE.$test_name.log"
    SERVER_LOG="./server.$TEST_SUITE.$test_name.log"

    rm -fr `pwd`/test_dir
    cp -r `pwd`/$test_name `pwd`/test_dir
    SERVER_ARGS="$REPO_ARGS $POLL_ARGS"
    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    set +e
    # Pass in the test directory as the test may modify the structure
    NAMESPACE_TESTING_DIRCTORY=`pwd`/test_dir python $TEST_PY $TEST_SUITE.$test_name >>$TEST_LOG 2>&1
    if [ $? -ne 0 ]; then
        RET=1
        cat $TEST_LOG
    else
        check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
        if [ $? -ne 0 ]; then
            cat $TEST_LOG
            echo -e "\n***\n*** Test Result Verification Failed\n***"
            RET=1
        fi
    fi
    set -e

    kill $SERVER_PID
    wait $SERVER_PID
done

# Explicit
for test_name in $TEST_LIST; do
    TEST_SUITE="ModelNamespaceExplicit"
    TEST_LOG="`pwd`/test.$TEST_SUITE.$test_name.log"
    SERVER_LOG="./server.$TEST_SUITE.$test_name.log"

    rm -fr `pwd`/test_dir
    cp -r `pwd`/$test_name `pwd`/test_dir
    SERVER_ARGS="$REPO_ARGS $EXPLICIT_ARGS"
    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    set +e
    # Pass in the test directory as the test may modify the structure
    NAMESPACE_TESTING_DIRCTORY=`pwd`/test_dir python $TEST_PY $TEST_SUITE.$test_name >>$TEST_LOG 2>&1
    if [ $? -ne 0 ]; then
        RET=1
        cat $TEST_LOG
    else
        check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
        if [ $? -ne 0 ]; then
            cat $TEST_LOG
            echo -e "\n***\n*** Test Result Verification Failed\n***"
            RET=1
        fi
    fi
    set -e

    kill $SERVER_PID
    wait $SERVER_PID
done


if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_model_namespacing/test_duplication/addsub_repo/composing_model/1/model.py
================================================
import os
import sys

# load pre-defined QA model
sys.path.append(os.environ["TRITON_QA_PYTHON_MODEL_DIR"])
from python_addsub import *


================================================
FILE: qa/L0_model_namespacing/test_duplication/addsub_repo/simple_addsub/config.pbtxt
================================================
# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

platform: "ensemble"
max_batch_size: 0
version_policy: { all { }}


input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]

  }
]
input [
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]

  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]


  }
]
output [
  {
    name: "OUTPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]


  }
]
ensemble_scheduling {
  step [
    {
      model_name: "composing_model"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT0"
      }
      output_map {
        key: "OUTPUT1"
        value: "OUTPUT1"
      }
    }
  ]
}


================================================
FILE: qa/L0_model_namespacing/test_duplication/subadd_repo/composing_model/1/model.py
================================================
import os
import sys

# load pre-defined QA model
sys.path.append(os.environ["TRITON_QA_PYTHON_MODEL_DIR"])
from python_subadd import *


================================================
FILE: qa/L0_model_namespacing/test_duplication/subadd_repo/simple_subadd/config.pbtxt
================================================
# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

platform: "ensemble"
max_batch_size: 0
version_policy: { all { }}

input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]

  }
]
input [
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]

  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]


  }
]
output [
  {
    name: "OUTPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]


  }
]
ensemble_scheduling {
  step [
    {
      model_name: "composing_model"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT0"
      }
      output_map {
        key: "OUTPUT1"
        value: "OUTPUT1"
      }
    }
  ]
}


================================================
FILE: qa/L0_model_namespacing/test_dynamic_resolution/addsub_repo/composing_model/1/model.py
================================================
import os
import sys

# load pre-defined QA model
sys.path.append(os.environ["TRITON_QA_PYTHON_MODEL_DIR"])
from python_addsub import *


================================================
FILE: qa/L0_model_namespacing/test_dynamic_resolution/addsub_repo/simple_addsub/config.pbtxt
================================================
# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

platform: "ensemble"
max_batch_size: 0
version_policy: { all { }}


input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]

  }
]
input [
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]

  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]


  }
]
output [
  {
    name: "OUTPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]


  }
]
ensemble_scheduling {
  step [
    {
      model_name: "composing_model"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT0"
      }
      output_map {
        key: "OUTPUT1"
        value: "OUTPUT1"
      }
    }
  ]
}


================================================
FILE: qa/L0_model_namespacing/test_dynamic_resolution/subadd_repo/composing_model/1/model.py
================================================
import os
import sys

# load pre-defined QA model
sys.path.append(os.environ["TRITON_QA_PYTHON_MODEL_DIR"])
from python_subadd import *


================================================
FILE: qa/L0_model_namespacing/test_dynamic_resolution/subadd_repo/simple_subadd/config.pbtxt
================================================
# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

platform: "ensemble"
max_batch_size: 0
version_policy: { all { }}


input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]

  }
]
input [
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]

  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]


  }
]
output [
  {
    name: "OUTPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]


  }
]
ensemble_scheduling {
  step [
    {
      model_name: "composing_model"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT0"
      }
      output_map {
        key: "OUTPUT1"
        value: "OUTPUT1"
      }
    }
  ]
}


================================================
FILE: qa/L0_model_namespacing/test_ensemble_duplication/addsub_repo/composing_addsub/1/model.py
================================================
import os
import sys

# load pre-defined QA model
sys.path.append(os.environ["TRITON_QA_PYTHON_MODEL_DIR"])
from python_addsub import *


================================================
FILE: qa/L0_model_namespacing/test_ensemble_duplication/addsub_repo/simple_ensemble/config.pbtxt
================================================
# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

platform: "ensemble"
max_batch_size: 0
version_policy: { all { }}


input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]

  }
]
input [
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]

  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]


  }
]
output [
  {
    name: "OUTPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]


  }
]
ensemble_scheduling {
  step [
    {
      model_name: "composing_addsub"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT0"
      }
      output_map {
        key: "OUTPUT1"
        value: "OUTPUT1"
      }
    }
  ]
}


================================================
FILE: qa/L0_model_namespacing/test_ensemble_duplication/subadd_repo/composing_subadd/1/model.py
================================================
import os
import sys

# load pre-defined QA model
sys.path.append(os.environ["TRITON_QA_PYTHON_MODEL_DIR"])
from python_subadd import *


================================================
FILE: qa/L0_model_namespacing/test_ensemble_duplication/subadd_repo/simple_ensemble/config.pbtxt
================================================
# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

platform: "ensemble"
max_batch_size: 0
version_policy: { all { }}


input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]

  }
]
input [
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]

  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]


  }
]
output [
  {
    name: "OUTPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]


  }
]
ensemble_scheduling {
  step [
    {
      model_name: "composing_subadd"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT0"
      }
      output_map {
        key: "OUTPUT1"
        value: "OUTPUT1"
      }
    }
  ]
}


================================================
FILE: qa/L0_model_namespacing/test_no_duplication/addsub_repo/composing_addsub/1/model.py
================================================
import os
import sys

# load pre-defined QA model
sys.path.append(os.environ["TRITON_QA_PYTHON_MODEL_DIR"])
from python_addsub import *


================================================
FILE: qa/L0_model_namespacing/test_no_duplication/addsub_repo/simple_addsub/config.pbtxt
================================================
# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

platform: "ensemble"
max_batch_size: 0
version_policy: { all { }}


input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]

  }
]
input [
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]

  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]


  }
]
output [
  {
    name: "OUTPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]


  }
]
ensemble_scheduling {
  step [
    {
      model_name: "composing_addsub"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT0"
      }
      output_map {
        key: "OUTPUT1"
        value: "OUTPUT1"
      }
    }
  ]
}


================================================
FILE: qa/L0_model_namespacing/test_no_duplication/subadd_repo/composing_subadd/1/model.py
================================================
import os
import sys

# load pre-defined QA model
sys.path.append(os.environ["TRITON_QA_PYTHON_MODEL_DIR"])
from python_subadd import *


================================================
FILE: qa/L0_model_namespacing/test_no_duplication/subadd_repo/simple_subadd/config.pbtxt
================================================
# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

platform: "ensemble"
max_batch_size: 0
version_policy: { all { }}


input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]

  }
]
input [
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]

  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]


  }
]
output [
  {
    name: "OUTPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]


  }
]
ensemble_scheduling {
  step [
    {
      model_name: "composing_subadd"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT0"
      }
      output_map {
        key: "OUTPUT1"
        value: "OUTPUT1"
      }
    }
  ]
}


================================================
FILE: qa/L0_model_queue/ensemble_zero_1_float32/config.pbtxt
================================================
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "ensemble_zero_1_float32"
platform: "ensemble"
max_batch_size: 32
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]
ensemble_scheduling {
  step [
    {
      model_name: "custom_zero_1_float32"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT0"
      }
    }
  ]
}

================================================
FILE: qa/L0_model_queue/model_queue_test.py
================================================
#!/usr/bin/env python3

# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import re
import threading
import time
import unittest
from builtins import range
from ctypes import *

import infer_util as iu
import numpy as np
import requests
import test_util as tu
from tritonclientutils import InferenceServerException

_max_queue_delay_ms = 10000

_deferred_exceptions_lock = threading.Lock()
_deferred_exceptions = []


class ModelQueueTest(tu.TestResultCollector):
    def setUp(self):
        self.trials_ = []
        for base in ["custom", "ensemble"]:
            for is_http_trial in [True, False]:
                self.trials_.append({"base": base, "is_http_trial": is_http_trial})
        global _deferred_exceptions
        _deferred_exceptions = []

    def add_deferred_exception(self, ex):
        global _deferred_exceptions
        with _deferred_exceptions_lock:
            _deferred_exceptions.append(ex)

    def check_deferred_exception(self):
        # Just raise one of the exceptions...
        with _deferred_exceptions_lock:
            if len(_deferred_exceptions) > 0:
                first_exception = _deferred_exceptions[0]
                _deferred_exceptions.pop(0)
                raise first_exception

    def _get_metrics(self):
        metrics_url = "http://localhost:8002/metrics"
        r = requests.get(metrics_url)
        r.raise_for_status()
        return r.text

    def _metrics_before_test(self, model, reason):
        pattern = rf'nv_inference_request_failure\{{model="{model}",reason="{reason}",version="1"\}} (\d+)'
        metrics = self._get_metrics()
        match = re.search(pattern, metrics)
        if match:
            return int(match.group(1))
        else:
            raise Exception(f"Failure metrics for model='{model}' not found")

    def _assert_metrics(
        self, model_name, reason, expected_count_increase, initial_count
    ):
        metrics = self._get_metrics()
        # Add initial count + expected count for the the test
        expected_metric = f'nv_inference_request_failure{{model="{model_name}",reason="{reason}",version="1"}} {expected_count_increase + initial_count}'
        self.assertIn(expected_metric, metrics)

    def check_response(
        self,
        bs,
        dtype,
        shapes,
        priority,
        timeout_us,
        thresholds,
        base="custom",
        is_http_trial=True,
    ):
        full_shapes = [
            [
                bs,
            ]
            + shape
            for shape in shapes
        ]
        try:
            start_ms = int(round(time.time() * 1000))
            iu.infer_zero(
                self,
                base,
                bs,
                dtype,
                full_shapes,
                full_shapes,
                model_version=1,
                use_http_json_tensors=False,
                use_http=is_http_trial,
                use_grpc=(not is_http_trial),
                use_streaming=False,
                priority=priority,
                timeout_us=timeout_us,
            )

            end_ms = int(round(time.time() * 1000))

            lt_ms = thresholds[0]
            gt_ms = thresholds[1]
            if lt_ms is not None:
                self.assertTrue(
                    (end_ms - start_ms) < lt_ms,
                    "expected less than "
                    + str(lt_ms)
                    + "ms response time, got "
                    + str(end_ms - start_ms)
                    + " ms",
                )
            if gt_ms is not None:
                self.assertTrue(
                    (end_ms - start_ms) > gt_ms,
                    "expected greater than "
                    + str(gt_ms)
                    + "ms response time, got "
                    + str(end_ms - start_ms)
                    + " ms",
                )
        except Exception as ex:
            self.add_deferred_exception(ex)

    def test_max_queue_size(self):
        # Send a request with a static batch size == preferred size to trigger
        # model execution. Then sends 10 requests to overload the model queue,
        # expecting 2 of the requests are returned with error code immediately.
        dtype = np.float32
        shapes = ([16],)

        for trial in self.trials_:
            preceding_thread = threading.Thread(
                target=self.check_response,
                args=(8, dtype, shapes, 0, 0, (5999, 1000)),
            )
            threads = []
            for i in range(10):
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(1, dtype, shapes, 0, 0, (None, None)),
                        kwargs=trial,
                    )
                )
            preceding_thread.start()
            time.sleep(0.5)
            for t in threads:
                t.start()

            preceding_thread.join()
            for t in threads:
                t.join()

            # Expect exactly two exception with exceeding max queue size error
            expected_exceeded_count = 2
            exceeded_count = 0
            for i in range(expected_exceeded_count):
                try:
                    self.check_deferred_exception()
                except InferenceServerException as ex:
                    self.assertTrue(
                        "Exceeds maximum queue size" in ex.message(),
                        'Expected error message "Exceeds maximum queue size", got: {}'.format(
                            ex
                        ),
                    )
                    exceeded_count = exceeded_count + 1
            self.assertEqual(
                exceeded_count,
                expected_exceeded_count,
                "expected {} requests to fail with exceeded max queue size error, got {}".format(
                    expected_exceeded_count, exceeded_count
                ),
            )
            try:
                self.check_deferred_exception()
            except InferenceServerException as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_policy_delay(self):
        # Send requests with batch sizes 1, 1, 3 where the second and third
        # requests are sent after 'default_timeout_microseconds'.
        # Expect the first request is timed-out and delayed, which makes the
        # second and third request be batched together and executed. While the
        # first request must wait for 'max_queue_delay_microseconds' until it
        # can be executed.
        dtype = np.float32
        shapes = ([16],)
        for trial in self.trials_:
            try:
                threads = []
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(1, dtype, shapes, 0, 0, (15000, 10000)),
                        kwargs=trial,
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(2, dtype, shapes, 0, 0, (100, 0)),
                        kwargs=trial,
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(2, dtype, shapes, 0, 0, (100, 0)),
                        kwargs=trial,
                    )
                )
                threads[0].start()
                time.sleep(0.2)
                threads[1].start()
                threads[2].start()

                for t in threads:
                    t.join()
                self.check_deferred_exception()
            except InferenceServerException as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_policy_reject(self):
        # Send requests with batch sizes 1, 1, 3 where the second and third
        # requests are sent after 'default_timeout_microseconds'.
        # Expect the first request is timed-out and rejected, which makes the
        # second and third request be batched together and executed.
        initial_metrics_value_ensemble = self._metrics_before_test(
            "ensemble_zero_1_float32", "OTHER"
        )
        initial_metrics_value_custom = self._metrics_before_test(
            "custom_zero_1_float32", "REJECTED"
        )
        dtype = np.float32
        shapes = ([16],)
        for trial in self.trials_:
            threads = []
            threads.append(
                threading.Thread(
                    target=self.check_response,
                    args=(1, dtype, shapes, 0, 0, (None, None)),
                    kwargs=trial,
                )
            )
            threads.append(
                threading.Thread(
                    target=self.check_response,
                    args=(2, dtype, shapes, 0, 0, (100, 0)),
                    kwargs=trial,
                )
            )
            threads.append(
                threading.Thread(
                    target=self.check_response,
                    args=(2, dtype, shapes, 0, 0, (100, 0)),
                    kwargs=trial,
                )
            )
            threads[0].start()
            time.sleep(0.2)
            threads[1].start()
            threads[2].start()

            for t in threads:
                t.join()

            # Expect only one error for rejection
            try:
                self.check_deferred_exception()
            except InferenceServerException as ex:
                self.assertTrue(
                    "Request timeout expired" in ex.message(),
                    'Expected error message "Request timeout expired", got: {}'.format(
                        ex
                    ),
                )

            try:
                self.check_deferred_exception()
            except InferenceServerException as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))
        expected_count_increase = 4
        # NOTE: Ensemble failure metrics will reflect the failure counts
        # of their composing models as well as the parent model, but currently do not capture the same granularity
        # for the "reason" label and will default to the "OTHER" reason.
        self._assert_metrics(
            "ensemble_zero_1_float32",
            "OTHER",
            expected_count_increase,
            initial_metrics_value_ensemble,
        )
        expected_count_increase = 4
        self._assert_metrics(
            "custom_zero_1_float32",
            "REJECTED",
            expected_count_increase,
            initial_metrics_value_custom,
        )

    def test_timeout_override(self):
        # Send requests with batch sizes 1, 1, 3 where the first request
        # overrides the timeout to be less than 'default_timeout_microseconds',
        # and the second and third requests are sent after the overridden
        # timeout. Expect the first request is timed-out and rejected before
        # 'default_timeout_microseconds', which makes the second and third
        # request be batched together and executed earlier than
        # 'default_timeout_microseconds'.

        dtype = np.float32
        shapes = ([16],)
        for trial in self.trials_:
            threads = []
            threads.append(
                threading.Thread(
                    target=self.check_response,
                    args=(1, dtype, shapes, 0, 100000, (None, None)),
                    kwargs=trial,
                )
            )
            threads.append(
                threading.Thread(
                    target=self.check_response,
                    args=(2, dtype, shapes, 0, 0, (100, 0)),
                    kwargs=trial,
                )
            )
            threads.append(
                threading.Thread(
                    target=self.check_response,
                    args=(2, dtype, shapes, 0, 0, (100, 0)),
                    kwargs=trial,
                )
            )
            threads[0].start()
            time.sleep(0.2)
            threads[1].start()
            threads[2].start()

            for t in threads:
                t.join()

            # Expect only one error for rejection
            try:
                self.check_deferred_exception()
            except InferenceServerException as ex:
                self.assertTrue(
                    "Request timeout expired" in ex.message(),
                    'Expected error message "Request timeout expired", got: {}'.format(
                        ex
                    ),
                )

            try:
                self.check_deferred_exception()
            except InferenceServerException as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

            # Check that timeout larger than 'default_timeout_microseconds' will not
            # override, the last two requests will be processed only after
            # 'default_timeout_microseconds' and before queue delay.
            threads = []
            threads.append(
                threading.Thread(
                    target=self.check_response,
                    args=(1, dtype, shapes, 0, 10000000, (None, None)),
                    kwargs=trial,
                )
            )
            threads.append(
                threading.Thread(
                    target=self.check_response,
                    args=(2, dtype, shapes, 0, 0, (1100, 700)),
                    kwargs=trial,
                )
            )
            threads.append(
                threading.Thread(
                    target=self.check_response,
                    args=(2, dtype, shapes, 0, 0, (1100, 700)),
                    kwargs=trial,
                )
            )
            threads[0].start()
            time.sleep(0.2)
            threads[1].start()
            threads[2].start()

            for t in threads:
                t.join()

            # Expect only one error for rejection
            try:
                self.check_deferred_exception()
            except InferenceServerException as ex:
                self.assertTrue(
                    "Request timeout expired" in ex.message(),
                    'Expected error message "Request timeout expired", got: {}'.format(
                        ex
                    ),
                )

            try:
                self.check_deferred_exception()
            except InferenceServerException as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

            # Sanity check that without override, the last two requests will be
            # processed only after 'default_timeout_microseconds'
            threads = []
            threads.append(
                threading.Thread(
                    target=self.check_response,
                    args=(1, dtype, shapes, 0, 0, (None, None)),
                    kwargs=trial,
                )
            )
            threads.append(
                threading.Thread(
                    target=self.check_response,
                    args=(2, dtype, shapes, 0, 0, (1100, 700)),
                    kwargs=trial,
                )
            )
            threads.append(
                threading.Thread(
                    target=self.check_response,
                    args=(2, dtype, shapes, 0, 0, (1100, 700)),
                    kwargs=trial,
                )
            )
            threads[0].start()
            time.sleep(0.2)
            threads[1].start()
            threads[2].start()

            for t in threads:
                t.join()

            # Expect only one error for rejection
            try:
                self.check_deferred_exception()
            except InferenceServerException as ex:
                self.assertTrue(
                    "Request timeout expired" in ex.message(),
                    'Expected error message "Request timeout expired", got: {}'.format(
                        ex
                    ),
                )

            try:
                self.check_deferred_exception()
            except InferenceServerException as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_priority_levels(self):
        # Send 2 requests with batch sizes 2, 1 in default priority. Then send
        # 1 request with batch size 2 in priority 1. Expect the third request is
        # place in the front of the queue and form a preferred batch with the
        # first request.
        dtype = np.float32
        shapes = ([16],)
        for trial in self.trials_:
            threads = []
            threads.append(
                threading.Thread(
                    target=self.check_response,
                    args=(2, dtype, shapes, 0, 0, (500, 200)),
                    kwargs=trial,
                )
            )
            threads.append(
                threading.Thread(
                    target=self.check_response,
                    args=(1, dtype, shapes, 0, 0, (15000, 10000)),
                    kwargs=trial,
                )
            )
            threads.append(
                threading.Thread(
                    target=self.check_response,
                    args=(2, dtype, shapes, 1, 0, (100, 0)),
                    kwargs=trial,
                )
            )
            threads[0].start()
            # wait to make sure the order is correct
            time.sleep(0.1)
            threads[1].start()
            time.sleep(0.2)
            threads[2].start()

            for t in threads:
                t.join()

            try:
                self.check_deferred_exception()
            except InferenceServerException as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_max_priority_levels(self):
        # Send 2 requests with batch sizes 2, 1 in default priority (MAX_UINT32+1). Then send
        # 1 request with batch size 2 in priority 1. Expect the third request is
        # place in the front of the queue and form a preferred batch with the
        # first request.
        dtype = np.float32
        shapes = ([16],)
        MAX_UINT32_PLUS_1 = 4294967296
        for trial in self.trials_:
            threads = []
            threads.append(
                threading.Thread(
                    target=self.check_response,
                    args=(2, dtype, shapes, 0, 0, (500, 200)),
                    kwargs=trial,
                )
            )
            threads.append(
                threading.Thread(
                    target=self.check_response,
                    args=(1, dtype, shapes, MAX_UINT32_PLUS_1, 0, (15000, 10000)),
                    kwargs=trial,
                )
            )
            threads.append(
                threading.Thread(
                    target=self.check_response,
                    args=(2, dtype, shapes, 1, 0, (100, 0)),
                    kwargs=trial,
                )
            )
            threads[0].start()
            # wait to make sure the order is correct
            time.sleep(0.1)
            threads[1].start()
            time.sleep(0.2)
            threads[2].start()

            for t in threads:
                t.join()

            try:
                self.check_deferred_exception()
            except InferenceServerException as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_priority_with_policy(self):
        # Two set of requests are being sent at different priority levels
        # in sequence:
        # priority 1:
        #     batch size 2, default timeout
        #     batch size 1, short timeout
        #     batch size 2, default timeout
        # priority 2:
        #     batch size 2, medium timeout
        #     batch size 3, default timeout
        #     batch size 6, default timeout
        # Expecting that by the time when the last request, second request in
        # priority 2, is sent, the requests with short timeout will be handled
        # accordingly, and the queue becomes:
        # priority 1:
        #     batch size 2, default timeout (1st batch)
        #     batch size 2, default timeout (1st batch)
        #     batch size 1, short timeout (delayed, will be 2nd batch)
        # priority 2:
        #     batch size 2, medium timeout (will be rejected)
        #     batch size 3, default timeout (will be 2nd batch)
        #     batch size 6, default timeout (will be 3rd batch)

        dtype = np.float32
        shapes = ([16],)
        for trial in self.trials_:
            threads = []
            # The expected ranges may not be rounded to accommodate
            # the sleep between sending requests
            threads.append(
                threading.Thread(
                    target=self.check_response,
                    args=(2, dtype, shapes, 1, 0, (2000, 1000)),
                    kwargs=trial,
                )
            )
            threads.append(
                threading.Thread(
                    target=self.check_response,
                    args=(1, dtype, shapes, 1, 1000000, (3400, 2400)),
                    kwargs=trial,
                )
            )
            threads.append(
                threading.Thread(
                    target=self.check_response,
                    args=(2, dtype, shapes, 1, 0, (1700, 700)),
                    kwargs=trial,
                )
            )
            threads.append(
                threading.Thread(
                    target=self.check_response,
                    args=(2, dtype, shapes, 2, 2000000, (None, None)),
                    kwargs=trial,
                )
            )
            threads.append(
                threading.Thread(
                    target=self.check_response,
                    args=(3, dtype, shapes, 2, 0, (2700, 1700)),
                    kwargs=trial,
                )
            )
            threads.append(
                threading.Thread(
                    target=self.check_response,
                    args=(6, dtype, shapes, 2, 0, (15000, 10000)),
                    kwargs=trial,
                )
            )
            for t in threads:
                t.start()
                time.sleep(0.2)

            for t in threads:
                t.join()

            # Expect only one error for rejection
            try:
                self.check_deferred_exception()
            except InferenceServerException as ex:
                self.assertTrue(
                    "Request timeout expired" in ex.message(),
                    'Expected error message "Request timeout expired", got: {}'.format(
                        ex
                    ),
                )

            try:
                self.check_deferred_exception()
            except InferenceServerException as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_model_queue/test.sh
================================================
#!/bin/bash
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
TEST_RESULT_FILE='test_results.txt'
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

CLIENT_LOG="./client.log"
MODEL_QUEUE_TEST=model_queue_test.py

DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
SERVER=${TRITON_DIR}/bin/tritonserver

SERVER_ARGS="--model-repository=`pwd`/models"

source ../common/util.sh

RET=0

# Must run on a single device or else the TRITONSERVER_DELAY_SCHEDULER
# can fail when the requests are distributed to multiple devices.
export CUDA_VISIBLE_DEVICES=0

# Prepare base model. Only test with custom backend as it is sufficient
rm -fr *.log  models custom_zero_1_float32
cp -r ../custom_models/custom_zero_1_float32 . && \
    mkdir -p ./custom_zero_1_float32/1 && \
    mkdir -p ./ensemble_zero_1_float32/1

(cd custom_zero_1_float32 && \
        sed -i "s/dims:.*\[.*\]/dims: \[ -1 \]/g" config.pbtxt && \
        sed -i "s/max_batch_size:.*/max_batch_size: 32/g" config.pbtxt && \
        echo "instance_group [ { kind: KIND_CPU count: 1 }]" >> config.pbtxt)

# test_max_queue_size
# For testing max queue size, we use delay in the custom model to
# create backlogs, "TRITONSERVER_DELAY_SCHEDULER" is not desired as queue size
# is capped by max queue size.
rm -fr models && mkdir models && \
    cp -r ensemble_zero_1_float32 models/. && \
    cp -r custom_zero_1_float32 models/. && \
    (cd models/custom_zero_1_float32 && \
        echo "dynamic_batching { " >> config.pbtxt && \
        echo "    preferred_batch_size: [ 4, 8 ]" >> config.pbtxt && \
        echo "    default_queue_policy {" >> config.pbtxt && \
        echo "        max_queue_size: 8" >> config.pbtxt && \
        echo "    }" >> config.pbtxt && \
        echo "}" >> config.pbtxt && \
        echo "parameters [" >> config.pbtxt && \
        echo "{ key: \"execute_delay_ms\"; value: { string_value: \"5000\" }}" >> config.pbtxt && \
        echo "]" >> config.pbtxt)

TEST_CASE=test_max_queue_size
SERVER_LOG="./$TEST_CASE.server.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

echo "Test: $TEST_CASE" >>$CLIENT_LOG

set +e
python $MODEL_QUEUE_TEST ModelQueueTest.$TEST_CASE >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

# test_policy_delay
rm -fr models && mkdir models && \
    cp -r ensemble_zero_1_float32 models/. && \
    cp -r custom_zero_1_float32 models/. && \
    (cd models/custom_zero_1_float32 && \
        echo "dynamic_batching { " >> config.pbtxt && \
        echo "    preferred_batch_size: [ 4, 8 ]" >> config.pbtxt && \
        echo "    max_queue_delay_microseconds: 10000000" >> config.pbtxt && \
        echo "    default_queue_policy {" >> config.pbtxt && \
        echo "        timeout_action: DELAY" >> config.pbtxt && \
        echo "        default_timeout_microseconds: 100000" >> config.pbtxt && \
        echo "    }" >> config.pbtxt && \
        echo "}" >> config.pbtxt)

TEST_CASE=test_policy_delay
SERVER_LOG="./$TEST_CASE.server.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

echo "Test: $TEST_CASE" >>$CLIENT_LOG

set +e
python $MODEL_QUEUE_TEST ModelQueueTest.$TEST_CASE >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

# test_policy_reject
rm -fr models && mkdir models && \
    cp -r ensemble_zero_1_float32 models/. && \
    cp -r custom_zero_1_float32 models/. && \
    (cd models/custom_zero_1_float32 && \
        echo "dynamic_batching { " >> config.pbtxt && \
        echo "    preferred_batch_size: [ 4, 8 ]" >> config.pbtxt && \
        echo "    max_queue_delay_microseconds: 10000000" >> config.pbtxt && \
        echo "    default_queue_policy {" >> config.pbtxt && \
        echo "        default_timeout_microseconds: 100000" >> config.pbtxt && \
        echo "    }" >> config.pbtxt && \
        echo "}" >> config.pbtxt)

TEST_CASE=test_policy_reject
SERVER_LOG="./$TEST_CASE.server.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

echo "Test: $TEST_CASE" >>$CLIENT_LOG

set +e
python $MODEL_QUEUE_TEST ModelQueueTest.$TEST_CASE >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

# test_timeout_override
rm -fr models && mkdir models && \
    cp -r ensemble_zero_1_float32 models/. && \
    cp -r custom_zero_1_float32 models/. && \
    (cd models/custom_zero_1_float32 && \
        echo "dynamic_batching { " >> config.pbtxt && \
        echo "    preferred_batch_size: [ 4, 8 ]" >> config.pbtxt && \
        echo "    max_queue_delay_microseconds: 10000000" >> config.pbtxt && \
        echo "    default_queue_policy {" >> config.pbtxt && \
        echo "        allow_timeout_override: true" >> config.pbtxt && \
        echo "        default_timeout_microseconds: 1000000" >> config.pbtxt && \
        echo "    }" >> config.pbtxt && \
        echo "}" >> config.pbtxt)

TEST_CASE=test_timeout_override
SERVER_LOG="./$TEST_CASE.server.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

echo "Test: $TEST_CASE" >>$CLIENT_LOG

set +e
python $MODEL_QUEUE_TEST ModelQueueTest.$TEST_CASE >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

# test_priority_levels
rm -fr models && mkdir models && \
    cp -r ensemble_zero_1_float32 models/. && \
    cp -r custom_zero_1_float32 models/. && \
    (cd models/custom_zero_1_float32 && \
        echo "dynamic_batching { " >> config.pbtxt && \
        echo "    preferred_batch_size: [ 4, 8 ]" >> config.pbtxt && \
        echo "    max_queue_delay_microseconds: 10000000" >> config.pbtxt && \
        echo "    priority_levels: 2" >> config.pbtxt && \
        echo "    default_priority_level: 2" >> config.pbtxt && \
        echo "}" >> config.pbtxt)

TEST_CASE=test_priority_levels
SERVER_LOG="./$TEST_CASE.server.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

echo "Test: $TEST_CASE" >>$CLIENT_LOG

set +e
python $MODEL_QUEUE_TEST ModelQueueTest.$TEST_CASE >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

MAX_UINT64=18446744073709551615
MAX_UINT32_PLUS_1=4294967296

# test_max_priority_levels
rm -fr models && mkdir models && \
    cp -r ensemble_zero_1_float32 models/. && \
    cp -r custom_zero_1_float32 models/. && \
    (cd models/custom_zero_1_float32 && \
        echo "dynamic_batching { " >> config.pbtxt && \
        echo "    preferred_batch_size: [ 4, 8 ]" >> config.pbtxt && \
        echo "    max_queue_delay_microseconds: 10000000" >> config.pbtxt && \
        echo "    priority_levels: $MAX_UINT64" >> config.pbtxt && \
        echo "    default_priority_level: $MAX_UINT32_PLUS_1" >> config.pbtxt && \
        echo "}" >> config.pbtxt)

TEST_CASE=test_max_priority_levels
SERVER_LOG="./$TEST_CASE.server.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

echo "Test: $TEST_CASE" >>$CLIENT_LOG

set +e
python $MODEL_QUEUE_TEST ModelQueueTest.$TEST_CASE >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

# test_priority_with_policy
# 2 levels and 2 policies:
#     priority 1: delay
#     priority 2: reject
rm -fr models && mkdir models && \
    cp -r ensemble_zero_1_float32 models/. && \
    cp -r custom_zero_1_float32 models/. && \
    (cd models/custom_zero_1_float32 && \
        echo "dynamic_batching { " >> config.pbtxt && \
        echo "    preferred_batch_size: [ 4, 8, 32 ]" >> config.pbtxt && \
        echo "    max_queue_delay_microseconds: 10000000" >> config.pbtxt && \
        echo "    priority_levels: 2" >> config.pbtxt && \
        echo "    default_priority_level: 2" >> config.pbtxt && \
        echo "    default_queue_policy {" >> config.pbtxt && \
        echo "        timeout_action: DELAY" >> config.pbtxt && \
        echo "        allow_timeout_override: true" >> config.pbtxt && \
        echo "        default_timeout_microseconds: 11000000" >> config.pbtxt && \
        echo "    }" >> config.pbtxt && \
        echo "    priority_queue_policy {" >> config.pbtxt && \
        echo "        key: 2" >> config.pbtxt && \
        echo "        value: {" >> config.pbtxt && \
        echo "            timeout_action: REJECT" >> config.pbtxt && \
        echo "            allow_timeout_override: true" >> config.pbtxt && \
        echo "            default_timeout_microseconds: 11000000" >> config.pbtxt && \
        echo "        }" >> config.pbtxt && \
        echo "    }" >> config.pbtxt && \
        echo "}" >> config.pbtxt)

TEST_CASE=test_priority_with_policy
SERVER_LOG="./$TEST_CASE.server.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

echo "Test: $TEST_CASE" >>$CLIENT_LOG

set +e
python $MODEL_QUEUE_TEST ModelQueueTest.$TEST_CASE >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_model_update/instance_update_test.py
================================================
#!/usr/bin/env python3

# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import concurrent.futures
import json
import os
import random
import time
import unittest

import numpy as np
import tritonclient.grpc as grpcclient
from models.model_init_del.util import (
    disable_batching,
    enable_batching,
    get_count,
    reset_count,
    set_delay,
    update_instance_group,
    update_model_file,
    update_sequence_batching,
)
from tritonclient.utils import InferenceServerException


class TestInstanceUpdate(unittest.TestCase):
    _model_name = "model_init_del"

    def setUp(self):
        # Reset counters
        reset_count("initialize")
        reset_count("finalize")
        # Reset batching
        disable_batching()
        # Reset delays
        set_delay("initialize", 0)
        set_delay("infer", 0)
        # Reset sequence batching
        update_sequence_batching("")
        # Initialize client
        self._triton = grpcclient.InferenceServerClient("localhost:8001")

    def tearDown(self):
        # Check if the test passed for this test case that is tearing down
        r = self._outcome.result
        passed = all(self != test_case for test_case, _ in r.errors + r.failures)
        if passed:
            # Do nothing if passed
            return
        # Best effort to reset the model state for the next test case
        self._triton.unload_model(self._model_name)
        time.sleep(30)  # time for instances to finish unloading

    def _get_inputs(self, batching=False):
        self.assertIsInstance(batching, bool)
        if batching:
            shape = [random.randint(1, 2), random.randint(1, 16)]
        else:
            shape = [random.randint(1, 16)]
        inputs = [grpcclient.InferInput("INPUT0", shape, "FP32")]
        inputs[0].set_data_from_numpy(np.ones(shape, dtype=np.float32))
        return inputs

    def _infer(self, batching=False):
        self._triton.infer(self._model_name, self._get_inputs(batching))

    def _concurrent_infer(self, concurrency=4, batching=False):
        pool = concurrent.futures.ThreadPoolExecutor()
        stop = [False]

        def repeat_infer():
            while not stop[0]:
                self._infer(batching)

        infer_threads = [pool.submit(repeat_infer) for i in range(concurrency)]

        def stop_infer():
            stop[0] = True
            [t.result() for t in infer_threads]
            pool.shutdown()

        return stop_infer

    def _check_count(self, kind, expected_count, poll=False):
        self.assertIsInstance(poll, bool)
        if poll:
            timeout = 30  # seconds
            poll_interval = 0.1  # seconds
            max_retry = timeout / poll_interval
            num_retry = 0
            while num_retry < max_retry and get_count(kind) < expected_count:
                time.sleep(poll_interval)
                num_retry += 1
        self.assertEqual(get_count(kind), expected_count)

    def _load_model(self, instance_count, instance_config="", batching=False):
        # Set batching
        enable_batching() if batching else disable_batching()
        # Load model
        self._update_instance_count(
            instance_count, 0, instance_config, batching=batching
        )

    def _update_instance_count(
        self,
        add_count,
        del_count,
        instance_config="",
        wait_for_finalize=False,
        batching=False,
    ):
        self.assertIsInstance(add_count, int)
        self.assertGreaterEqual(add_count, 0)
        self.assertIsInstance(del_count, int)
        self.assertGreaterEqual(del_count, 0)
        self.assertIsInstance(instance_config, str)
        prev_initialize_count = get_count("initialize")
        prev_finalize_count = get_count("finalize")
        new_initialize_count = prev_initialize_count + add_count
        new_finalize_count = prev_finalize_count + del_count
        if len(instance_config) == 0:
            prev_count = prev_initialize_count - prev_finalize_count
            new_count = prev_count + add_count - del_count
            instance_config = "{\ncount: " + str(new_count) + "\nkind: KIND_CPU\n}"
        update_instance_group(instance_config)
        self._triton.load_model(self._model_name)
        self._check_count("initialize", new_initialize_count)
        self._check_count("finalize", new_finalize_count, wait_for_finalize)
        self._infer(batching)

    def _unload_model(self, batching=False):
        prev_initialize_count = get_count("initialize")
        self._triton.unload_model(self._model_name)
        self._check_count("initialize", prev_initialize_count)
        self._check_count("finalize", prev_initialize_count, True)
        with self.assertRaises(InferenceServerException):
            self._infer(batching)

    # Test add -> remove -> add an instance without batching
    def test_add_rm_add_instance_no_batching(self):
        self._load_model(3, batching=False)
        stop = self._concurrent_infer(batching=False)
        self._update_instance_count(1, 0, batching=False)  # add
        self._update_instance_count(0, 1, batching=False)  # remove
        self._update_instance_count(1, 0, batching=False)  # add
        stop()
        self._unload_model(batching=False)

    # Test add -> remove -> add an instance with batching
    def test_add_rm_add_instance_with_batching(self):
        self._load_model(4, batching=True)
        stop = self._concurrent_infer(batching=True)
        self._update_instance_count(1, 0, batching=True)  # add
        self._update_instance_count(0, 1, batching=True)  # remove
        self._update_instance_count(1, 0, batching=True)  # add
        stop()
        self._unload_model(batching=True)

    # Test remove -> add -> remove an instance without batching
    def test_rm_add_rm_instance_no_batching(self):
        self._load_model(2, batching=False)
        stop = self._concurrent_infer(batching=False)
        self._update_instance_count(0, 1, batching=False)  # remove
        self._update_instance_count(1, 0, batching=False)  # add
        self._update_instance_count(0, 1, batching=False)  # remove
        stop()
        self._unload_model(batching=False)

    # Test remove -> add -> remove an instance with batching
    def test_rm_add_rm_instance_with_batching(self):
        self._load_model(3, batching=True)
        stop = self._concurrent_infer(batching=True)
        self._update_instance_count(0, 1, batching=True)  # remove
        self._update_instance_count(1, 0, batching=True)  # add
        self._update_instance_count(0, 1, batching=True)  # remove
        stop()
        self._unload_model(batching=True)

    # Test reduce instance count to zero
    def test_rm_instance_to_zero(self):
        self._load_model(1)
        # Setting instance group count to 0 will be overwritten to 1, so no
        # instances should be created or removed.
        self._update_instance_count(0, 0, "{\ncount: 0\nkind: KIND_CPU\n}")
        self._unload_model()

    # Test add/remove multiple CPU instances at a time
    def test_cpu_instance_update(self):
        self._load_model(8)
        self._update_instance_count(0, 4)  # remove 4 instances
        self._update_instance_count(0, 3)  # remove 3 instances
        self._update_instance_count(0, 0)  # no change
        time.sleep(0.1)  # larger the gap for config.pbtxt timestamp to update
        self._update_instance_count(2, 0)  # add 2 instances
        self._update_instance_count(5, 0)  # add 5 instances
        self._unload_model()

    # Test add/remove multiple GPU instances at a time
    def test_gpu_instance_update(self):
        self._load_model(6, "{\ncount: 6\nkind: KIND_GPU\n}")
        self._update_instance_count(0, 2, "{\ncount: 4\nkind: KIND_GPU\n}")
        self._update_instance_count(3, 0, "{\ncount: 7\nkind: KIND_GPU\n}")
        self._unload_model()

    # Test add/remove multiple CPU/GPU instances at a time
    def test_gpu_cpu_instance_update(self):
        # Load model with 1 GPU instance and 2 CPU instance
        self._load_model(
            3, "{\ncount: 2\nkind: KIND_CPU\n},\n{\ncount: 1\nkind: KIND_GPU\n}"
        )
        # Add 2 GPU instance and remove 1 CPU instance
        self._update_instance_count(
            2, 1, "{\ncount: 1\nkind: KIND_CPU\n},\n{\ncount: 3\nkind: KIND_GPU\n}"
        )
        # Shuffle the instances
        self._update_instance_count(
            0, 0, "{\ncount: 3\nkind: KIND_GPU\n},\n{\ncount: 1\nkind: KIND_CPU\n}"
        )
        time.sleep(0.1)  # larger the gap for config.pbtxt timestamp to update
        # Remove 1 GPU instance and add 1 CPU instance
        self._update_instance_count(
            1, 1, "{\ncount: 2\nkind: KIND_GPU\n},\n{\ncount: 2\nkind: KIND_CPU\n}"
        )
        # Unload model
        self._unload_model()

    # Test model instance name update
    def test_instance_name_update(self):
        # Load 3 instances with 2 different names
        self._load_model(
            3,
            '{\nname: "old_1"\ncount: 1\nkind: KIND_CPU\n},\n{\nname: "old_2"\ncount: 2\nkind: KIND_GPU\n}',
        )
        # Change the instance names
        self._update_instance_count(
            0,
            0,
            '{\nname: "new_1"\ncount: 1\nkind: KIND_CPU\n},\n{\nname: "new_2"\ncount: 2\nkind: KIND_GPU\n}',
        )
        # Unload model
        self._unload_model()

    # Test instance signature grouping
    def test_instance_signature(self):
        # Load 2 GPU instances and 3 CPU instances
        self._load_model(
            5,
            '{\nname: "GPU_group"\ncount: 2\nkind: KIND_GPU\n},\n{\nname: "CPU_group"\ncount: 3\nkind: KIND_CPU\n}',
        )
        # Flatten the instances representation
        self._update_instance_count(
            0,
            0,
            '{\nname: "CPU_1"\ncount: 1\nkind: KIND_CPU\n},\n{\nname: "CPU_2_3"\ncount: 2\nkind: KIND_CPU\n},\n{\nname: "GPU_1"\ncount: 1\nkind: KIND_GPU\n},\n{\nname: "GPU_2"\ncount: 1\nkind: KIND_GPU\n}',
        )
        time.sleep(0.1)  # larger the gap for config.pbtxt timestamp to update
        # Consolidate different representations
        self._update_instance_count(
            0,
            0,
            '{\nname: "CPU_group"\ncount: 3\nkind: KIND_CPU\n},\n{\nname: "GPU_group"\ncount: 2\nkind: KIND_GPU\n}',
        )
        time.sleep(0.1)  # larger the gap for config.pbtxt timestamp to update
        # Flatten the instances representation
        self._update_instance_count(
            0,
            0,
            '{\nname: "GPU_1"\ncount: 1\nkind: KIND_GPU\n},\n{\nname: "GPU_2"\ncount: 1\nkind: KIND_GPU\n},\n{\nname: "CPU_1"\ncount: 1\nkind: KIND_CPU\n},\n{\nname: "CPU_2"\ncount: 1\nkind: KIND_CPU\n},\n{\nname: "CPU_3"\ncount: 1\nkind: KIND_CPU\n}',
        )
        # Unload model
        self._unload_model()

    # Test instance update with invalid instance group config
    def test_invalid_config(self):
        # Load model with 8 instances
        self._load_model(8)
        # Set invalid config
        update_instance_group("--- invalid config ---")
        with self.assertRaises(InferenceServerException):
            self._triton.load_model("model_init_del")
        # Correct config by reducing instances to 4
        self._update_instance_count(0, 4)
        # Unload model
        self._unload_model()

    # Test instance update with model file changed
    def test_model_file_update(self):
        self._load_model(5)
        update_model_file()
        self._update_instance_count(
            6, 5, "{\ncount: 6\nkind: KIND_CPU\n}", wait_for_finalize=True
        )
        self._unload_model()

    # Test instance update with non instance config changed in config.pbtxt
    def test_non_instance_config_update(self):
        self._load_model(4, batching=False)
        enable_batching()
        self._update_instance_count(
            2,
            4,
            "{\ncount: 2\nkind: KIND_CPU\n}",
            wait_for_finalize=True,
            batching=True,
        )
        self._unload_model(batching=True)

    # Test passing new instance config via load API
    def test_load_api_with_config(self):
        # Load model with 1 instance
        self._load_model(1)
        # Get the model config from Triton
        config = self._triton.get_model_config(self._model_name, as_json=True)
        self.assertIn("config", config)
        self.assertIsInstance(config["config"], dict)
        config = config["config"]
        self.assertIn("instance_group", config)
        self.assertIsInstance(config["instance_group"], list)
        self.assertEqual(len(config["instance_group"]), 1)
        self.assertIn("count", config["instance_group"][0])
        self.assertIsInstance(config["instance_group"][0]["count"], int)
        # Add an extra instance into the model config
        config["instance_group"][0]["count"] += 1
        self.assertEqual(config["instance_group"][0]["count"], 2)
        # Load the extra instance via the load API
        self._triton.load_model(self._model_name, config=json.dumps(config))
        self._check_count("initialize", 2)  # 2 instances in total
        self._check_count("finalize", 0)  # no instance is removed
        self._infer()
        # Unload model
        self._unload_model()

    # Test instance update with an ongoing inference
    def test_update_while_inferencing(self):
        # Load model with 1 instance
        self._load_model(1)
        # Add 1 instance while inferencing
        set_delay("infer", 10)
        update_instance_group("{\ncount: 2\nkind: KIND_CPU\n}")
        with concurrent.futures.ThreadPoolExecutor() as pool:
            infer_start_time = time.time()
            infer_thread = pool.submit(self._infer)
            time.sleep(2)  # make sure inference has started
            update_start_time = time.time()
            update_thread = pool.submit(self._triton.load_model, self._model_name)
            update_thread.result()
            update_end_time = time.time()
            infer_thread.result()
            infer_end_time = time.time()
        infer_time = infer_end_time - infer_start_time
        update_time = update_end_time - update_start_time
        # Adding a new instance does not depend on existing instances, so the
        # ongoing inference should not block the update.
        self.assertGreaterEqual(infer_time, 10.0, "Invalid infer time")
        self.assertLess(update_time, 5.0, "Update blocked by infer")
        self._check_count("initialize", 2)
        self._check_count("finalize", 0)
        self._infer()
        # Unload model
        self._unload_model()

    # Test inference with an ongoing instance update
    def test_infer_while_updating(self):
        # Load model with 1 instance
        self._load_model(1)
        # Infer while adding 1 instance
        set_delay("initialize", 10)
        update_instance_group("{\ncount: 2\nkind: KIND_CPU\n}")
        with concurrent.futures.ThreadPoolExecutor() as pool:
            update_start_time = time.time()
            update_thread = pool.submit(self._triton.load_model, self._model_name)
            time.sleep(2)  # make sure update has started
            infer_start_time = time.time()
            infer_thread = pool.submit(self._infer)
            infer_thread.result()
            infer_end_time = time.time()
            update_thread.result()
            update_end_time = time.time()
        update_time = update_end_time - update_start_time
        infer_time = infer_end_time - infer_start_time
        # Waiting on new instance creation should not block inference on
        # existing instances.
        self.assertGreaterEqual(update_time, 10.0, "Invalid update time")
        self.assertLess(infer_time, 5.0, "Infer blocked by update")
        self._check_count("initialize", 2)
        self._check_count("finalize", 0)
        self._infer()
        # Unload model
        self._unload_model()

    # Test instance resource requirement increase
    @unittest.skipUnless(
        "execution_count" in os.environ["RATE_LIMIT_MODE"],
        "Rate limiter precondition not met for this test",
    )
    def test_instance_resource_increase(self):
        # Load model
        self._load_model(
            1,
            '{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: "R1"\ncount: 2\n}\n]\n}\n}',
        )
        # Increase resource requirement
        self._update_instance_count(
            1,
            1,
            '{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: "R1"\ncount: 8\n}\n]\n}\n}',
        )
        # Check the model is not blocked from infer due to the default resource
        # possibly not updated to the larger resource requirement.
        infer_count = 8
        infer_complete = [False for i in range(infer_count)]

        def infer():
            for i in range(infer_count):
                self._infer()
                infer_complete[i] = True

        with concurrent.futures.ThreadPoolExecutor() as pool:
            infer_thread = pool.submit(infer)
            time.sleep(infer_count / 2)  # each infer should take < 0.5 seconds
            self.assertNotIn(False, infer_complete, "Infer possibly stuck")
            infer_thread.result()
        # Unload model
        self._unload_model()

    # Test instance resource requirement increase above explicit resource
    @unittest.skipUnless(
        os.environ["RATE_LIMIT_MODE"] == "execution_count_with_explicit_resource",
        "Rate limiter precondition not met for this test",
    )
    def test_instance_resource_increase_above_explicit(self):
        # Load model
        self._load_model(
            1,
            '{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: "R1"\ncount: 2\n}\n]\n}\n}',
        )
        # Increase resource requirement
        with self.assertRaises(InferenceServerException):
            self._update_instance_count(
                0,
                0,
                '{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: "R1"\ncount: 32\n}\n]\n}\n}',
            )
        # Correct the resource requirement to match the explicit resource
        self._update_instance_count(
            1,
            1,
            '{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: "R1"\ncount: 10\n}\n]\n}\n}',
        )
        # Unload model
        self._unload_model()

    # Test instance resource requirement decrease
    @unittest.skipUnless(
        "execution_count" in os.environ["RATE_LIMIT_MODE"],
        "Rate limiter precondition not met for this test",
    )
    def test_instance_resource_decrease(self):
        # Load model
        self._load_model(
            1,
            '{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: "R1"\ncount: 4\n}\n]\n}\n}',
        )
        # Decrease resource requirement
        self._update_instance_count(
            1,
            1,
            '{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: "R1"\ncount: 3\n}\n]\n}\n}',
        )
        # Unload model
        self._unload_model()
        # The resource count of 3 is unique across this entire test, so check
        # the server output to make sure it is printed, which ensures the
        # max resource is actually decreased.
        time.sleep(1)  # make sure the log file is updated
        log_path = os.path.join(
            os.environ["MODEL_LOG_DIR"],
            "instance_update_test.rate_limit_"
            + os.environ["RATE_LIMIT_MODE"]
            + ".server.log",
        )
        with open(log_path, mode="r", encoding="utf-8", errors="strict") as f:
            if os.environ["RATE_LIMIT_MODE"] == "execution_count":
                # Make sure the previous max resource limit of 4 is reduced to 3
                # when no explicit limit is set.
                self.assertIn("Resource: R1\\t Count: 3", f.read())
            else:
                # Make sure the max resource limit is never set to 3 when
                # explicit limit of 10 is set.
                self.assertNotIn("Resource: R1\\t Count: 3", f.read())

    _direct_sequence_batching_str = (
        "direct { }\nmax_sequence_idle_microseconds: 8000000"
    )
    _oldest_sequence_batching_str = (
        "oldest { max_candidate_sequences: 4 }\nmax_sequence_idle_microseconds: 8000000"
    )

    # Test instance update for direct scheduler without any ongoing sequences
    def test_direct_scheduler_update_no_ongoing_sequences(self):
        self._test_scheduler_update_no_ongoing_sequences(
            self._direct_sequence_batching_str
        )

    # Test instance update for direct scheduler with any ongoing sequences
    def test_direct_scheduler_update_with_ongoing_sequences(self):
        self._test_scheduler_update_with_ongoing_sequences(
            self._direct_sequence_batching_str
        )

    # Test instance update for oldest scheduler without ongoing sequences
    def test_oldest_scheduler_update_no_ongoing_sequences(self):
        self._test_scheduler_update_no_ongoing_sequences(
            self._oldest_sequence_batching_str
        )

    # Test instance update for oldest scheduler with ongoing sequences
    def test_oldest_scheduler_update_with_ongoing_sequences(self):
        self._test_scheduler_update_with_ongoing_sequences(
            self._oldest_sequence_batching_str
        )

    # Helper function for testing the success of sequence instance updates
    # without any ongoing sequences.
    def _test_scheduler_update_no_ongoing_sequences(self, sequence_batching_str):
        # Load model
        update_instance_group("{\ncount: 2\nkind: KIND_CPU\n}")
        update_sequence_batching(sequence_batching_str)
        self._triton.load_model(self._model_name)
        self._check_count("initialize", 2)
        self._check_count("finalize", 0)
        # Basic sequence inference
        self._triton.infer(
            self._model_name, self._get_inputs(), sequence_id=1, sequence_start=True
        )
        self._triton.infer(self._model_name, self._get_inputs(), sequence_id=1)
        self._triton.infer(
            self._model_name, self._get_inputs(), sequence_id=1, sequence_end=True
        )
        # Add 2 instances without in-flight sequence
        update_instance_group("{\ncount: 4\nkind: KIND_CPU\n}")
        self._triton.load_model(self._model_name)
        self._check_count("initialize", 4)
        self._check_count("finalize", 0)
        # Basic sequence inference
        self._triton.infer(
            self._model_name, self._get_inputs(), sequence_id=1, sequence_start=True
        )
        self._triton.infer(
            self._model_name, self._get_inputs(), sequence_id=1, sequence_end=True
        )
        # Remove 1 instance without in-flight sequence
        update_instance_group("{\ncount: 3\nkind: KIND_CPU\n}")
        self._triton.load_model(self._model_name)
        self._check_count("initialize", 4)
        self._check_count("finalize", 1, poll=True)
        # Basic sequence inference
        self._triton.infer(
            self._model_name, self._get_inputs(), sequence_id=1, sequence_start=True
        )
        self._triton.infer(
            self._model_name, self._get_inputs(), sequence_id=1, sequence_end=True
        )
        # Unload model
        self._triton.unload_model(self._model_name)
        self._check_count("initialize", 4)
        self._check_count("finalize", 4, poll=True)

    # Helper function for testing if ongoing sequences may continue to infer on
    # the same instance after the instance processing the sequence is removed
    # from an instance update, which the removed instance will live until the
    # sequences end.
    def _test_scheduler_update_with_ongoing_sequences(self, sequence_batching_str):
        # Load model
        update_instance_group("{\ncount: 3\nkind: KIND_CPU\n}")
        update_sequence_batching(sequence_batching_str)
        self._triton.load_model(self._model_name)
        self._check_count("initialize", 3)
        self._check_count("finalize", 0)
        # Start sequence 1 and 2 on CPU instances
        self._triton.infer(
            self._model_name, self._get_inputs(), sequence_id=1, sequence_start=True
        )
        self._triton.infer(
            self._model_name, self._get_inputs(), sequence_id=2, sequence_start=True
        )
        # Remove all 3 CPU and add 1 GPU instance with in-flight sequences. Both
        # in-flight sequences are assigned to any 2 CPU instances, so exactly 1
        # CPU instance can be removed immediately.
        update_instance_group("{\ncount: 1\nkind: KIND_GPU\n}")
        self._triton.load_model(self._model_name)
        self._check_count("initialize", 4)  # 3 CPU + 1 GPU
        self._check_count("finalize", 1, poll=True)  # 1 CPU
        # Sequence 1 and 2 may continue to infer
        self._triton.infer(self._model_name, self._get_inputs(), sequence_id=1)
        self._triton.infer(self._model_name, self._get_inputs(), sequence_id=2)
        self._check_count("finalize", 1)  # check 2 CPU instances not removed
        # Start sequence 3 on GPU instance
        self._triton.infer(
            self._model_name, self._get_inputs(), sequence_id=3, sequence_start=True
        )
        self._check_count("finalize", 1)  # check 2 CPU instances not removed
        # End sequence 1 and 2 will remove the 2 CPU instances
        self._triton.infer(
            self._model_name, self._get_inputs(), sequence_id=1, sequence_end=True
        )
        self._triton.infer(
            self._model_name, self._get_inputs(), sequence_id=2, sequence_end=True
        )
        self._check_count("finalize", 3, poll=True)  # 3 CPU
        # End sequence 3
        self._triton.infer(
            self._model_name, self._get_inputs(), sequence_id=3, sequence_end=True
        )
        # Unload model
        self._triton.unload_model(self._model_name)
        self._check_count("initialize", 4)  # 3 CPU + 1 GPU
        self._check_count("finalize", 4, poll=True)  # 3 CPU + 1 GPU


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_model_update/test.sh
================================================
#!/bin/bash
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

# This L0_model_update test should make changes to models without restarting the
# server, unless restarting the server is the only way of accomplishing the
# change.

export CUDA_VISIBLE_DEVICES=0
export PYTHONDONTWRITEBYTECODE="True"
export MODEL_LOG_DIR="`pwd`"

SERVER=/opt/tritonserver/bin/tritonserver
source ../common/util.sh

function setup_models() {
    rm -rf models && mkdir models
    # Basic model that log instance creation and destruction
    cp -r ../python_models/model_init_del models/model_init_del && \
        mkdir models/model_init_del/1 && \
        mv models/model_init_del/model.py models/model_init_del/1
}

RET=0

# Test model instance update with rate limiting on/off and explicit resource
for RATE_LIMIT_MODE in "off" "execution_count" "execution_count_with_explicit_resource"; do

    RATE_LIMIT_ARGS="--rate-limit=$RATE_LIMIT_MODE"
    if [ "$RATE_LIMIT_MODE" == "execution_count_with_explicit_resource" ]; then
        RATE_LIMIT_ARGS="--rate-limit=execution_count --rate-limit-resource=R1:10"
    fi

    export RATE_LIMIT_MODE=$RATE_LIMIT_MODE
    TEST_LOG="instance_update_test.rate_limit_$RATE_LIMIT_MODE.log"
    SERVER_LOG="./instance_update_test.rate_limit_$RATE_LIMIT_MODE.server.log"

    setup_models
    SERVER_ARGS="--model-repository=models --model-control-mode=explicit $RATE_LIMIT_ARGS --log-verbose=2"
    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    set +e
    python instance_update_test.py > $TEST_LOG 2>&1
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed model instance update test on rate limit mode $RATE_LIMIT_MODE\n***"
        cat $TEST_LOG
        RET=1
    fi
    set -e

    kill $SERVER_PID
    wait $SERVER_PID

    set +e
    grep "Should not print this" $SERVER_LOG
    if [ $? -eq 0 ]; then
        echo -e "\n***\n*** Found \"Should not print this\" on \"$SERVER_LOG\"\n***"
        cat $SERVER_LOG
        RET=1
    fi
    set -e

done

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi
exit $RET


================================================
FILE: qa/L0_multi_server/test.sh
================================================
#!/bin/bash
# Copyright (c) 2020-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

MODELSDIR=`pwd`/models
DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository

export CUDA_VISIBLE_DEVICES=0

# Must explicitly set LD_LIBRARY_PATH so that server can find
# libtritonserver.so.
LD_LIBRARY_PATH=/opt/tritonserver/lib:$LD_LIBRARY_PATH

rm -f *.log && rm -rf ${MODELSDIR}*

RET=0

MULTI_SERVER=multi_server
CLIENT_LOG=$MULTI_SERVER
MULTI_SERVER=./$MULTI_SERVER
BACKENDS=(onnx plan)
THREAD_COUNT=32
LOOPS=32

EXTRA_ARGS=" -t ${THREAD_COUNT} -l ${LOOPS}"
for (( I=1; I<${THREAD_COUNT}+2; I++ )); do
    BACKEND_INDEX=$(((I % 3) - 1))
    full=${BACKENDS[$BACKEND_INDEX]}_float32_float32_float32
    mkdir -p ${MODELSDIR}${I}/simple${I}/1 && \
        cp -r $DATADIR/${full}/1/* ${MODELSDIR}${I}/simple${I}/1/. && \
        cp $DATADIR/${full}/config.pbtxt ${MODELSDIR}${I}/simple${I}/. && \
        (cd ${MODELSDIR}${I}/simple${I} && \
                sed -i "s/^name:.*/name: \"simple${I}\"/" config.pbtxt && \
                sed -i "s/label_filename:.*//" config.pbtxt)
    EXTRA_ARGS="${EXTRA_ARGS} -r ${MODELSDIR}${I}"
done

set +e

# No memory type enforcement
$MULTI_SERVER ${EXTRA_ARGS} >>$CLIENT_LOG.log 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG.log
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

set -e

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_nan_inf/models/nan_inf_output/1/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def initialize(self, args):
        self.model_config = json.loads(args["model_config"])

    def execute(self, requests):
        """This function is called on inference request."""

        responses = []
        for _ in requests:
            # Include one of each specially parsed JSON value: nan, inf, and -inf
            out_0 = np.array([np.nan, np.inf, np.NINF, 1, 2, 3], dtype=np.float32)
            out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0)
            responses.append(pb_utils.InferenceResponse([out_tensor_0]))

        return responses


================================================
FILE: qa/L0_nan_inf/models/nan_inf_output/config.pbtxt
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "nan_inf_output"
backend: "python"

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 6 ]
  }
]

instance_group [{ kind: KIND_CPU }]


================================================
FILE: qa/L0_nan_inf/nan_inf_test.py
================================================
#!/usr/bin/env python
# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import json
import traceback
import unittest

import numpy as np
import requests
import test_util as tu
import tritonclient.grpc as tritongrpcclient
import tritonclient.http as tritonhttpclient
from tritonclient.utils import InferenceServerException


class NanInfTest(tu.TestResultCollector):
    expected_output = np.array([np.nan, np.inf, np.NINF, 1, 2, 3], dtype=np.float32)
    model_name = "nan_inf_output"

    def test_http_raw(self):
        payload = {
            "inputs": [
                {"name": "INPUT0", "datatype": "FP32", "shape": [1], "data": [1]}
            ]
        }
        response = requests.post(
            "http://localhost:8000/v2/models/nan_inf_output/infer",
            data=json.dumps(payload),
        )
        if not response.ok:
            self.assertTrue(False, "Response not OK: {}".format(response.text))

        try:
            print(response.json())
        except:
            self.assertTrue(
                False, "Response was not valid JSON:\n{}".format(response.text)
            )

    def test_http(self):
        triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
        inputs = []
        inputs.append(tritonhttpclient.InferInput("INPUT0", [1], "FP32"))
        self.infer_helper(triton_client, inputs)

    def test_grpc(self):
        triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
        inputs = []
        inputs.append(tritongrpcclient.InferInput("INPUT0", [1], "FP32"))
        self.infer_helper(triton_client, inputs)

    def infer_helper(self, triton_client, inputs):
        inputs[0].set_data_from_numpy(np.arange(1, dtype=np.float32))

        try:
            results = triton_client.infer(model_name=self.model_name, inputs=inputs)
            output0_data = results.as_numpy("OUTPUT0")
            # Verify output is as expected
            # Make sure nan's are equivalent when compared
            output_correct = np.array_equal(
                output0_data, self.expected_output, equal_nan=True
            )
            self.assertTrue(
                output_correct, "didn't get expected output0: {}".format(output0_data)
            )
        except InferenceServerException as ex:
            self.assertTrue(False, ex.message())
        except:
            self.assertTrue(False, traceback.format_exc())


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_nan_inf/test.sh
================================================
#!/bin/bash
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

source ../common/util.sh

RET=0

CLIENT_LOG="./nan_inf_client.log"
TEST_PY=./nan_inf_test.py
EXPECTED_NUM_TESTS="3"
TEST_RESULT_FILE='test_results.txt'

export CUDA_VISIBLE_DEVICES=0

rm -fr *.log

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=`pwd`/models"
SERVER_LOG="./inference_server.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python3 $TEST_PY >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    RET=1
else
    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    cat $CLIENT_LOG
    cat $SERVER_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_nullchar_string/nullchar_string_client.py
================================================
#!/usr/bin/env python
# Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse

import numpy as np
import tritonclient.grpc as grpcclient
import tritonclient.http as httpclient
from tritonclient.utils import np_to_triton_dtype

FLAGS = None

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-v",
        "--verbose",
        action="store_true",
        required=False,
        default=False,
        help="Enable verbose output",
    )
    parser.add_argument(
        "-m", "--model-name", type=str, required=True, help="Name of model"
    )
    parser.add_argument(
        "-u",
        "--url",
        type=str,
        required=False,
        default="localhost:8000",
        help="Inference server URL. Default is localhost:8000.",
    )
    parser.add_argument(
        "-i",
        "--protocol",
        type=str,
        required=False,
        default="http",
        help='Protocol ("http"/"grpc") used to '
        + 'communicate with inference service. Default is "http".',
    )

    FLAGS = parser.parse_args()

    if (FLAGS.protocol != "http") and (FLAGS.protocol != "grpc"):
        print(
            'unexpected protocol "{}", expects "http" or "grpc"'.format(FLAGS.protocol)
        )
        exit(1)

    client_util = httpclient if FLAGS.protocol == "http" else grpcclient
    # Create the inference context for the model.
    client = client_util.InferenceServerClient(FLAGS.url, verbose=FLAGS.verbose)

    # We use identity string models that takes 1 input tensor of a single string
    # and returns 1 output tensor of a single string. The output tensor is the
    # same as the input tensor.
    batch_size = 1

    # Create the data for the input tensor. It contains a null character in
    # the middle of the string.
    tmp_str = "abc\0def"
    input0_data = np.array([tmp_str], dtype=object)

    # Send inference request to the inference server. Get results for
    # output tensor.
    input_name = "INPUT0"
    output_name = "OUTPUT0"

    # If using libtorch model, set input and output name to "INPUT__0" and "OUTPUT__0"
    if "libtorch" in FLAGS.model_name:
        input_name = "INPUT__0"
        output_name = "OUTPUT__0"

    inputs = [
        client_util.InferInput(
            input_name, input0_data.shape, np_to_triton_dtype(np.object_)
        )
    ]
    inputs[0].set_data_from_numpy(input0_data)

    results = client.infer(FLAGS.model_name, inputs)

    # We expect there to be 1 result (with batch-size 1). Compare the input
    # and output tensor calculated by the model. They must be the same.
    output0_data = results.as_numpy(output_name)

    print(input0_data, "?=?", output0_data)
    assert np.equal(input0_data.astype(np.bytes_), output0_data).all()


================================================
FILE: qa/L0_nullchar_string/test.sh
================================================
#!/bin/bash
# Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

CLIENT_LOG="./client.log"
DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository
MODELS="python_string libtorch_nobatch_zero_1_object"
NULLCHAR_CLIENT_PY=nullchar_string_client.py

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=models"
SERVER_LOG="./inference_server.log"
source ../common/util.sh

rm -f $CLIENT_LOG $SERVER_LOG models

mkdir -p models

# Copy the python model
mkdir -p models/python_string/1/
cp -fr ../python_models/string/model.py models/python_string/1/
cp ../python_models/string/config.pbtxt models/python_string
sed -i 's/name: "string"/name: "python_string"/' models/python_string/config.pbtxt

# Copy the libtorch model
cp -r $DATADIR/libtorch_nobatch_zero_1_object models/.

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

RET=0

set +e

# Ignore ONNX backend because even though ONNX supports string data type,
# strings that contain null character in the middle is not allowed.
# https://github.com/microsoft/onnxruntime/issues/2284
for MODEL in $MODELS; do
  python $NULLCHAR_CLIENT_PY -m $MODEL -v >>$CLIENT_LOG 2>&1
  if [ $? -ne 0 ]; then
      RET=1
  fi

  python $NULLCHAR_CLIENT_PY -m $MODEL -i grpc -u localhost:8001 -v >>$CLIENT_LOG 2>&1
  if [ $? -ne 0 ]; then
      RET=1
  fi
done

set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
else
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_onnx_optimization/test.sh
================================================
#!/bin/bash
# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

DATADIR=/data/inferenceserver/${REPO_VERSION}

CLIENT_LOG="./client.log"
ONNXTRT_OPTIMIZATION_TEST=onnxtrt_optimization_test.py

SERVER=/opt/tritonserver/bin/tritonserver
CACHE_PATH=`pwd`/trt_cache
SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1 --exit-on-error=false"
SERVER_LOG="./inference_server.log"
source ../common/util.sh

RET=0

for MODEL in \
        onnx_float32_float32_float32; do
    rm -f ./*.log
    rm -fr models && mkdir -p models
    cp -r $DATADIR/qa_model_repository/${MODEL} \
       models/${MODEL}_test && \
    rm -fr models/${MODEL}_test/2 && \
    rm -fr models/${MODEL}_test/3 && \
    # Set instance count > 1 to test parallel instance loading across all EPs
    INSTANCE_COUNT=5
    (cd models/${MODEL}_test && \
            sed -i 's/_float32_float32_float32/&_test/' config.pbtxt && \
            echo -e "\ninstance_group { count: ${INSTANCE_COUNT} }" >> config.pbtxt) && \
    # Enable session.use_device_allocator_for_initializers
    cp -r models/${MODEL}_test models/${MODEL}_session_config && \
    (cd models/${MODEL}_session_config && \
            sed -i 's/_float32_test/_float32_session_config/' config.pbtxt && \
            echo "parameters: { key: \"session.use_device_allocator_for_initializers\" value: { string_value: \"1\" }}" >> config.pbtxt) && \
    # CUDA EP optimization params
    cp -r models/${MODEL}_test models/${MODEL}_cuda_config && \
    (cd models/${MODEL}_cuda_config && \
            sed -i 's/_float32_test/_float32_cuda_config/' \
                config.pbtxt && \
            echo "parameters: { key: \"cudnn_conv_algo_search\" value: { string_value: \"1\" }} \
            parameters: { key: \"arena_extend_strategy\" value: { string_value: \"1\" }}
            parameters: { key: \"gpu_mem_limit\" value: { string_value: \"18446744073709551614\" }} " \ >> config.pbtxt) && \
    # CUDA EP optimization params specified in gpu_execution_accelerator field
    cp -r models/${MODEL}_test models/${MODEL}_cuda_param_field && \
    (cd models/${MODEL}_cuda_param_field && \
            sed -i 's/_float32_test/_float32_cuda_param_field/' \
                config.pbtxt && \
            echo "optimization { execution_accelerators { gpu_execution_accelerator : [ { name : \"cuda\" \
            parameters { key: \"cudnn_conv_use_max_workspace\" value: \"0\" } \
            parameters { key: \"use_ep_level_unified_stream\" value: \"1\" } }]}}" \
            >> config.pbtxt) && \
    # CPU EP optimization params
    cp -r models/${MODEL}_test models/${MODEL}_cpu_config && \
    (cd models/${MODEL}_cpu_config && \
            sed -i 's/_float32_test/_float32_cpu_config/' \
                config.pbtxt && \
            echo "parameters: { key: \"intra_op_thread_count\" value: { string_value: \"1\" }} \
            parameters: { key: \"enable_mem_arena\" value: { string_value: \"1\" }}
            parameters: { key: \"enable_mem_pattern\" value: { string_value: \"1\" }}
            parameters: { key: \"memory.enable_memory_arena_shrinkage\" value: { string_value: \"cpu:0\" }} " \ >> config.pbtxt) && \
    # GPU execution accelerators with default setting
    cp -r models/${MODEL}_test models/${MODEL}_trt && \
    (cd models/${MODEL}_trt && \
            sed -i 's/_float32_test/_float32_trt/' \
                config.pbtxt && \
            echo "optimization { execution_accelerators { gpu_execution_accelerator : [ { name : \"tensorrt\"} ] } }" >> config.pbtxt) && \
    # TRT execution accelerators with correct parameters
    cp -r models/${MODEL}_test models/${MODEL}_trt_param && \
    (cd models/${MODEL}_trt_param && \
            sed -i 's/_float32_test/_float32_trt_param/' \
                config.pbtxt && \
            echo "optimization { execution_accelerators { gpu_execution_accelerator : [ { name : \"tensorrt\" \
            parameters { key: \"precision_mode\" value: \"FP16\" } \
            parameters { key: \"trt_max_partition_iterations\" value: \"1000\" } \
            parameters { key: \"trt_dump_subgraphs\" value: \"1\" } \
            parameters { key: \"trt_timing_cache_enable\" value: \"1\" } \
            parameters { key: \"trt_build_heuristics_enable\" value: \"1\" } \
            parameters { key: \"trt_cuda_graph_enable\" value: \"1\" } \
            parameters { key: \"max_workspace_size_bytes\" value: \"1073741824\" } }]}}" \
            >> config.pbtxt) && \
    # TRT execution accelerators with cache enabled
    cp -r models/${MODEL}_test models/${MODEL}_trt_cache_on && \
    (cd models/${MODEL}_trt_cache_on && \
            sed -i 's/_float32_test/_float32_trt_cache_on/' \
                config.pbtxt && \
            echo "optimization { execution_accelerators { gpu_execution_accelerator : [ { name : \"tensorrt\" \
            parameters { key: \"trt_engine_cache_enable\" value: \"1\" } \
            parameters { key: \"trt_max_partition_iterations\" value: \"1000\" } \
            parameters { key: \"trt_dump_subgraphs\" value: \"1\" } \
            parameters { key: \"trt_timing_cache_enable\" value: \"1\" } \
            parameters { key: \"trt_build_heuristics_enable\" value: \"1\" } \
            parameters { key: \"trt_cuda_graph_enable\" value: \"1\" } \
            parameters { key: \"trt_engine_cache_path\" value: \"${CACHE_PATH}\" } }]}}" \
            >> config.pbtxt) && \
    # TRT execution accelerators with unknown parameters
    cp -r models/${MODEL}_test models/${MODEL}_trt_unknown_param && \
    (cd models/${MODEL}_trt_unknown_param && \
            sed -i 's/_float32_test/_float32_trt_unknown_param/' \
                config.pbtxt && \
            echo "optimization { execution_accelerators { gpu_execution_accelerator : [ { name : \"tensorrt\" \
            parameters { key: \"precision_mode\" value: \"FP16\" } \
            parameters { key: \"segment_size\" value: \"1\" } }]}}" \
            >> config.pbtxt) && \
    # TRT execution accelerators with invalid parameters
    cp -r models/${MODEL}_test models/${MODEL}_trt_invalid_param && \
    (cd models/${MODEL}_trt_invalid_param && \
            sed -i 's/_float32_test/_float32_trt_invalid_param/' \
                config.pbtxt && \
            echo "optimization { execution_accelerators { gpu_execution_accelerator : [ { name : \"tensorrt\" \
            parameters { key: \"precision_mode\" value: \"FP16\" } \
            parameters { key: \"max_workspace_size_bytes\" value: \"abc\" } }]}}" \
            >> config.pbtxt) && \
    # Unknown GPU execution accelerator
    cp -r models/${MODEL}_test models/${MODEL}_unknown_gpu && \
    (cd models/${MODEL}_unknown_gpu && \
            sed -i 's/_float32_test/_float32_unknown_gpu/' \
                config.pbtxt && \
            echo "optimization { execution_accelerators { gpu_execution_accelerator : [ { name : \"unknown_gpu\" } ] } }" >> config.pbtxt) && \

    run_server_tolive
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    set +e

    grep "Configuring 'session.use_device_allocator_for_initializers' to '1'" $SERVER_LOG
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected Configuring 'session.use_device_allocator_for_initializers' to '1'\n***"
        RET=1
    fi

    grep "TensorRT Execution Accelerator is set for '${MODEL}_trt'" $SERVER_LOG
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected TensorRT Execution Accelerator is set for '${MODEL}_trt'\n***"
        RET=1
    fi

    grep "TensorRT Execution Accelerator is set for '${MODEL}_trt_param'" $SERVER_LOG
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected TensorRT Execution Accelerator is set for '${MODEL}_trt_param'\n***"
        RET=1
    fi

    grep "TensorRT Execution Accelerator is set for '${MODEL}_trt_cache_on'" $SERVER_LOG
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected TensorRT Execution Accelerator is set for '${MODEL}_trt_cache_on'\n***"
        RET=1
    fi

    grep "failed to load '${MODEL}_trt_unknown_param' version 1: Invalid argument: unknown parameter 'segment_size' is provided for TensorRT Execution Accelerator" $SERVER_LOG
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected unknown parameter 'segment_size' returns error\n***"
        RET=1
    fi

    grep "failed to load '${MODEL}_trt_invalid_param' version 1: Invalid argument: failed to convert 'abc' to unsigned long long integral number" $SERVER_LOG
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected invalid parameter 'abc' returns error\n***"
        RET=1
    fi

    grep "failed to load '${MODEL}_unknown_gpu' version 1: Invalid argument: unknown Execution Accelerator 'unknown_gpu' is requested" $SERVER_LOG
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected 'unknown_gpu' Execution Accelerator returns error\n***"
        RET=1
    fi

    grep "memory limit: 18446744073709551614 arena_extend_strategy: 1" $SERVER_LOG
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected configurations not set for '${MODEL}_cuda_config'\n***"
        RET=1
    fi

    grep "CUDA Execution Accelerator is set for '${MODEL}_cpu_config'" $SERVER_LOG
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected CUDA Execution Accelerator is set for '${MODEL}_cpu_config'\n***"
        RET=1
    fi

    matched_line=$(grep "CUDA Execution Accelerator is set for 'onnx_float32_float32_float32_cuda_param_field'" $SERVER_LOG)
    if [[ "$matched_line" != *"use_ep_level_unified_stream=1"* ]] || [[ "$matched_line" != *"cudnn_conv_use_max_workspace=0"* ]]; then
        echo -e "\n***\n*** Failed. Expected CUDA Execution Accelerator options correctly set for '${MODEL}_cuda_param_field'\n***"
        RET=1
    fi

    # arena configs
    grep "Configuring enable_mem_arena to 1" $SERVER_LOG
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected Configuring enable_mem_arena to 1\n***"
        RET=1
    fi

    grep "Configuring enable_mem_pattern to 1" $SERVER_LOG
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected Configuring enable_mem_pattern to 1\n***"
        RET=1
    fi

    grep "Configuring memory.enable_memory_arena_shrinkage to cpu:0" $SERVER_LOG
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected Configuring memory.enable_memory_arena_shrinkage to cpu:0\n***"
        RET=1
    fi

    set -e

    kill $SERVER_PID
    wait $SERVER_PID
done

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_openai/generate_engine.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from argparse import ArgumentParser

from tensorrt_llm import BuildConfig
from tensorrt_llm._tensorrt_engine import LLM
from tensorrt_llm.lora_manager import LoraConfig
from tensorrt_llm.plugin import PluginConfig


def generate_model_engine(model: str, engines_path: str):
    config = BuildConfig(plugin_config=PluginConfig.from_dict({"_gemm_plugin": "auto"}))

    lora_config = LoraConfig(
        lora_target_modules=["attn_q", "attn_k", "attn_v"],
        max_lora_rank=8,
        max_loras=4,
        max_cpu_loras=8,
    )

    engine = LLM(
        model,
        dtype="float16",
        max_batch_size=128,
        build_config=config,
        guided_decoding_backend="xgrammar",
        lora_config=lora_config,
    )

    engine.save(engines_path)
    engine.shutdown()


if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument(
        "--model", "-m", help="model huggingface id or path to the model"
    )
    parser.add_argument("--engine_path", "-e", help="directory of the output engine")
    FLAGS = parser.parse_args()

    generate_model_engine(FLAGS.model, FLAGS.engine_path)
    print(f"model {FLAGS.model}'s engine has been saved to {FLAGS.engine_path}")


================================================
FILE: qa/L0_openai/test.sh
================================================
#!/bin/bash
# Copyright 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

### Helpers ###

function download_tensorrt_llm_models {
    TENSORRTLLM_VERSION="$1"
    TENSORRTLLM_DIR="$2"
    rm -rf ${TENSORRTLLM_DIR} && mkdir ${TENSORRTLLM_DIR}
    git clone --filter=blob:none --no-checkout https://github.com/triton-inference-server/TensorRT-LLM.git ${TENSORRTLLM_DIR}
    pushd ${TENSORRTLLM_DIR}
    git sparse-checkout set triton_backend/all_models
    git checkout ${TENSORRTLLM_VERSION}
    popd
}

function install_deps() {
    # Install python bindings for tritonserver and tritonfrontend
    # pip install /opt/tritonserver/python/triton*.whl

    # Install application/testing requirements
    pushd openai/
    # NOTE: Should be pre-installed in container, but can uncomment if needed
    # pip install -r requirements.txt
    pip install -r requirements-test.txt

    if [ "${IMAGE_KIND}" == "TRTLLM" ]; then
        # TODO: Remove this when the next stable version of TRT-LLM is available
        TENSORRTLLM_DIR="/workspace/TensorRT-LLM"
        TENSORRTLLM_VERSION="v1.2.0rc2"
        download_tensorrt_llm_models ${TENSORRTLLM_VERSION} ${TENSORRTLLM_DIR}

        prepare_tensorrtllm meta-llama/Meta-Llama-3.1-8B-Instruct tests/tensorrtllm_models /tmp/engines/llama/3.1-8b-instruct/ ${TENSORRTLLM_DIR}
        prepare_tensorrtllm mistralai/Mistral-Nemo-Instruct-2407 tests/tensorrtllm_mistral_models /tmp/engines/mistral/nemo-instruct-2407/ ${TENSORRTLLM_DIR}
    else
        prepare_vllm
    fi
    popd
}

function prepare_vllm() {
    echo "No prep needed for vllm currently"
}

function prepare_tensorrtllm() {
    # FIXME: Remove when testing TRT-LLM containers built from source
    pip install -r requirements.txt

    MODEL="$1"
    MODEL_REPO="$2"
    ENGINE_PATH="$3"
    TENSORRTLLM_DIR="$4"
    TRITON_BACKEND=tensorrtllm
    XGRAMMAR_TOKENIZER_INFO_PATH=tokenizer_info/${MODEL}/xgrammar_tokenizer_info.json
    GUIDED_DECODING_BACKEND=xgrammar

    mkdir -p ${MODEL_REPO}
    cp ${TENSORRTLLM_DIR}/triton_backend/all_models/inflight_batcher_llm/* "${MODEL_REPO}" -r
    # Ensemble model is not needed for the test
    rm -rf ${MODEL_REPO}/ensemble

    # 1. Generate the model's trt engines
    python3 ../generate_engine.py --model "${MODEL}" --engine_path "${ENGINE_PATH}"

    # 2. Generate the model's xgrammar tokenizer info. In order to run on C++ backend, we need an extra step to extract tokenizer’s information into json format.
    XGRAMMAR_TOKENIZER_INFO_DIR=tokenizer_info/${MODEL}
    rm -rf ${XGRAMMAR_TOKENIZER_INFO_DIR}
    python3 /app/examples/generate_xgrammar_tokenizer_info.py --model_dir ${MODEL} --output_dir ${XGRAMMAR_TOKENIZER_INFO_DIR}

    # 3. Prepare model repository
    FILL_TEMPLATE="/app/tools/fill_template.py"
    python3 ${FILL_TEMPLATE} -i ${MODEL_REPO}/preprocessing/config.pbtxt tokenizer_dir:${ENGINE_PATH},triton_max_batch_size:64,preprocessing_instance_count:1,max_queue_size:0
    python3 ${FILL_TEMPLATE} -i ${MODEL_REPO}/postprocessing/config.pbtxt tokenizer_dir:${ENGINE_PATH},triton_max_batch_size:64,postprocessing_instance_count:1
    python3 ${FILL_TEMPLATE} -i ${MODEL_REPO}/tensorrt_llm_bls/config.pbtxt triton_max_batch_size:64,decoupled_mode:True,bls_instance_count:1,accumulate_tokens:False,logits_datatype:TYPE_FP32,prompt_embedding_table_data_type:TYPE_FP16
    python3 ${FILL_TEMPLATE} -i ${MODEL_REPO}/tensorrt_llm/config.pbtxt triton_backend:${TRITON_BACKEND},triton_max_batch_size:64,decoupled_mode:True,max_beam_width:1,engine_dir:${ENGINE_PATH},batching_strategy:inflight_fused_batching,max_queue_size:0,max_queue_delay_microseconds:1000,encoder_input_features_data_type:TYPE_FP16,logits_datatype:TYPE_FP32,exclude_input_in_output:True,prompt_embedding_table_data_type:TYPE_FP16,guided_decoding_backend:${GUIDED_DECODING_BACKEND},xgrammar_tokenizer_info_path:${XGRAMMAR_TOKENIZER_INFO_PATH}

    # 4. Prepare lora adapters
    # FIXME: Remove this WAR when it is fixed in the future stable version of TRT-LLM.
    sed -i 's/dims: \[ -1, 3 \]/dims: \[ -1, 4 \]/' ${MODEL_REPO}/tensorrt_llm/config.pbtxt
    sed -i 's/dims: \[ -1, 3 \]/dims: \[ -1, 4 \]/' ${MODEL_REPO}/tensorrt_llm_bls/config.pbtxt
    pushd ${MODEL_REPO}/tensorrt_llm_bls/1
    for lora_name in silk-road/luotuo-lora-7b-0.1 kunishou/Japanese-Alpaca-LoRA-7b-v0; do
        name=$(basename $lora_name)
        git clone https://huggingface.co/$lora_name
        python3 /app/examples/hf_lora_convert.py -i $name -o $name-weights --storage-type float16
        rm -rf $name
    done
    popd
}

function pre_test() {
    # Cleanup
    rm -rf openai/
    rm -f *.xml *.log

    # Prep test environment
    cp -r ../../python/openai .
    install_deps
}

function run_test() {
    pushd openai/
    TEST_LOG="test_openai.log"

    # Capture error code without exiting to allow log collection
    set +e
    pytest -s -v --junitxml=test_openai.xml tests/ 2>&1 > ${TEST_LOG}
    if [ $? -ne 0 ]; then
        cat ${TEST_LOG}
        echo -e "\n***\n*** Test Failed\n***"
        RET=1
    fi
    set -e

    if [ "$RET" == "0" ]; then
        # rerun the tool calling tests with mistral model to cover the mistral tool call parser
        set +e
        TEST_TOOL_CALL_PARSER="mistral" TEST_TOKENIZER="mistralai/Mistral-Nemo-Instruct-2407" pytest -s -v --junitxml=test_openai.xml tests/test_tool_calling.py 2>&1 > ${TEST_LOG}
        if [ $? -ne 0 ]; then
            cat ${TEST_LOG}
            echo -e "\n***\n*** Test Failed\n***"
            RET=1
        fi
        set -e
    fi

    # Collect logs for error analysis when needed
    cp *.xml *.log ../../../
    popd
}

### Test ###

RET=0

pre_test
run_test

exit ${RET}


================================================
FILE: qa/L0_optional_input/models/ensemble_identity_2_float32/config.pbtxt
================================================
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "ensemble_identity_2_float32"
platform: "ensemble"
max_batch_size: 4
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 1 ]
    optional: true
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 1 ]
    optional: true
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 1 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]
ensemble_scheduling {
  step [
    {
      model_name: "identity_2_float32"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT0"
      }
      output_map {
        key: "OUTPUT1"
        value: "OUTPUT1"
      }
    }
  ]
}


================================================
FILE: qa/L0_optional_input/models/identity_2_float32/config.pbtxt
================================================
# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "identity_2_float32"
backend: "identity"
max_batch_size: 4
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 1 ]
    optional: true
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 1 ]
    optional: true
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 1 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]
dynamic_batching { preferred_batch_size: [4], max_queue_delay_microseconds: 5000000 }


================================================
FILE: qa/L0_optional_input/models/optional_connecting_tensor/config.pbtxt
================================================
# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

platform: "ensemble"
max_batch_size: 4
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 1 ]
    optional: true
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 1 ]
    optional: true
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 1 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]
ensemble_scheduling {
  step [
    {
      model_name: "optional_identity"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT0"
        value: "internal_output0"
      }
      output_map {
        key: "OUTPUT1"
        value: "internal_output1"
      }
    },
    {
      model_name: "optional_identity"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "internal_output0"
      }
      input_map {
        key: "INPUT1"
        value: "internal_output1"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT0"
      }
      output_map {
        key: "OUTPUT1"
        value: "OUTPUT1"
      }
    }
  ]
}


================================================
FILE: qa/L0_optional_input/models/optional_identity/1/model.py
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def execute(self, requests):
        """
        Identity model in Python backend.
        """
        responses = []
        for request in requests:
            for tidx in ("0", "1"):
                input_tensor = pb_utils.get_input_tensor_by_name(
                    request, "INPUT" + tidx
                )
                if input_tensor is not None:
                    out_tensor = pb_utils.Tensor(
                        "OUTPUT" + tidx, input_tensor.as_numpy()
                    )
                    responses.append(pb_utils.InferenceResponse([out_tensor]))
        return responses


================================================
FILE: qa/L0_optional_input/models/optional_identity/config.pbtxt
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
backend: "python"
max_batch_size: 4
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 1 ]
    optional: true
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 1 ]
    optional: true
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 1 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_optional_input/models/pipeline_identity_2_float32/config.pbtxt
================================================
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "pipeline_identity_2_float32"
platform: "ensemble"
max_batch_size: 4
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 1 ]
    optional: true
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 1 ]
    optional: true
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 1 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]
ensemble_scheduling {
  step [
    {
      model_name: "identity_2_float32"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT0"
        value: "internal_output"
      }
    },
    {
      model_name: "identity_2_float32"
      model_version: -1
      input_map {
        key: "INPUT1"
        value: "internal_output"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT0"
      }
      output_map {
        key: "OUTPUT1"
        value: "OUTPUT1"
      }
    }
  ]
}


================================================
FILE: qa/L0_optional_input/optional_input_test.py
================================================
#!/usr/bin/python

# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import sys
import threading
import time
import unittest

import numpy as np
import test_util as tu
import tritonclient.grpc as grpcclient

_deferred_exceptions_lock = threading.Lock()
_deferred_exceptions = []


# Similar set up as dynamic batcher tests
class OptionalInputTest(tu.TestResultCollector):
    def setUp(self):
        global _deferred_exceptions
        _deferred_exceptions = []

        # The helper client for setup will be GRPC for simplicity.
        self.triton_client_ = grpcclient.InferenceServerClient("localhost:8001")
        self.model_name_ = "identity_2_float32"
        # This will not be changed even when ensemble is under test,
        # as the dynamic batching is performed within the composing model
        self.check_status_model = "identity_2_float32"
        self.tensor_shape_ = (1, 1)
        self.inputs_ = {
            "INPUT0": grpcclient.InferInput("INPUT0", [1, 1], "FP32"),
            "INPUT1": grpcclient.InferInput("INPUT1", [1, 1], "FP32"),
        }
        self.input_data_ = {
            "INPUT0": np.ones(shape=(1, 1), dtype=np.float32),
            "INPUT1": np.zeros(shape=(1, 1), dtype=np.float32),
        }
        self.inputs_["INPUT0"].set_data_from_numpy(self.input_data_["INPUT0"])
        self.inputs_["INPUT1"].set_data_from_numpy(self.input_data_["INPUT1"])
        self.outputs_ = {
            "INPUT0": grpcclient.InferRequestedOutput("OUTPUT0"),
            "INPUT1": grpcclient.InferRequestedOutput("OUTPUT1"),
        }

    def add_deferred_exception(self, ex):
        global _deferred_exceptions
        with _deferred_exceptions_lock:
            _deferred_exceptions.append(ex)

    def check_deferred_exception(self):
        # Just raise one of the exceptions...
        with _deferred_exceptions_lock:
            if len(_deferred_exceptions) > 0:
                raise _deferred_exceptions[0]

    def check_response(self, thresholds, provided_inputs=("INPUT0", "INPUT1")):
        try:
            start_ms = int(round(time.time() * 1000))

            inputs = []
            outputs = []
            for provided_input in provided_inputs:
                inputs.append(self.inputs_[provided_input])
                outputs.append(self.outputs_[provided_input])

            triton_client = grpcclient.InferenceServerClient("localhost:8001")
            results = triton_client.infer(
                model_name=self.model_name_, inputs=inputs, outputs=outputs
            )

            end_ms = int(round(time.time() * 1000))

            for provided_input in provided_inputs:
                output_name = self.outputs_[provided_input].name()
                expected = self.input_data_[provided_input]
                output_data = results.as_numpy(output_name)
                self.assertTrue(
                    np.array_equal(output_data, expected),
                    "{}, {}, expected: {}, got {}".format(
                        self.model_name_, output_name, expected, output_data
                    ),
                )

            gt_ms = thresholds[0]
            lt_ms = thresholds[1]
            if lt_ms is not None:
                self.assertTrue(
                    (end_ms - start_ms) < lt_ms,
                    "expected less than "
                    + str(lt_ms)
                    + "ms response time, got "
                    + str(end_ms - start_ms)
                    + " ms",
                )
            if gt_ms is not None:
                self.assertTrue(
                    (end_ms - start_ms) > gt_ms,
                    "expected greater than "
                    + str(gt_ms)
                    + "ms response time, got "
                    + str(end_ms - start_ms)
                    + " ms",
                )
        except Exception as ex:
            self.add_deferred_exception(ex)

    def check_status(self, model_name, batch_exec, request_cnt, infer_cnt):
        # There is a time window between when responses are returned and statistics are updated.
        # To prevent intermittent test failure during that window, wait up to 10 seconds for the
        # inference statistics to be ready.
        num_tries = 10
        for i in range(num_tries):
            stats = self.triton_client_.get_inference_statistics(model_name, "1")
            self.assertEqual(len(stats.model_stats), 1, "expect 1 model stats")
            actual_exec_cnt = stats.model_stats[0].execution_count
            if stats.model_stats[0].execution_count > 0:
                break
            time.sleep(1)

        self.assertEqual(
            stats.model_stats[0].name,
            model_name,
            "expect model stats for model {}".format(model_name),
        )
        self.assertEqual(
            stats.model_stats[0].version,
            "1",
            "expect model stats for model {} version 1".format(model_name),
        )

        batch_stats = stats.model_stats[0].batch_stats
        self.assertEqual(
            len(batch_stats),
            len(batch_exec),
            "expected {} different batch-sizes, got {}".format(
                len(batch_exec), len(batch_stats)
            ),
        )

        for batch_stat in batch_stats:
            bs = batch_stat.batch_size
            bc = batch_stat.compute_infer.count
            self.assertTrue(bs in batch_exec, "unexpected batch-size {}".format(bs))
            # Get count from one of the stats
            self.assertEqual(
                bc,
                batch_exec[bs],
                "expected model-execution-count {} for batch size {}, got {}".format(
                    batch_exec[bs], bs, bc
                ),
            )

        actual_request_cnt = stats.model_stats[0].inference_stats.success.count
        self.assertEqual(
            actual_request_cnt,
            request_cnt,
            "expected model-request-count {}, got {}".format(
                request_cnt, actual_request_cnt
            ),
        )

        actual_exec_cnt = stats.model_stats[0].execution_count
        self.assertEqual(
            actual_request_cnt,
            request_cnt,
            "expected model-exec-count {}, got {}".format(request_cnt, actual_exec_cnt),
        )

        actual_infer_cnt = stats.model_stats[0].inference_count
        self.assertEqual(
            actual_infer_cnt,
            infer_cnt,
            "expected model-inference-count {}, got {}".format(
                infer_cnt, actual_infer_cnt
            ),
        )

    def test_all_inputs(self):
        # Provide all inputs, send requests that don't form preferred batch
        # so all requests should be returned after the queue delay
        try:
            threads = []
            threads.append(
                threading.Thread(target=self.check_response, args=((4000, None),))
            )
            threads.append(
                threading.Thread(target=self.check_response, args=((4000, None),))
            )
            threads[0].start()
            threads[1].start()
            for t in threads:
                t.join()
            self.check_deferred_exception()
            self.check_status(self.check_status_model, {2: 1}, 2, 2)
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

    def test_optional_same_input(self):
        # Provide only one of the inputs, send requests that don't form
        # preferred batch so all requests should be returned after
        # the queue delay
        try:
            threads = []
            threads.append(
                threading.Thread(
                    target=self.check_response,
                    args=((4000, None),),
                    kwargs={"provided_inputs": ("INPUT1",)},
                )
            )
            threads.append(
                threading.Thread(
                    target=self.check_response,
                    args=((4000, None),),
                    kwargs={"provided_inputs": ("INPUT1",)},
                )
            )
            threads[0].start()
            threads[1].start()
            for t in threads:
                t.join()
            self.check_deferred_exception()
            self.check_status(self.check_status_model, {2: 1}, 2, 2)
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

    def test_optional_mix_inputs(self):
        # Each request provides one of the inputs interleavingly,
        # all requests except the last one should be returned in less
        # than the queue delay because batcher should send the batch immediately
        # when it sees the provided inputs are different
        try:
            threads = []
            threads.append(
                threading.Thread(
                    target=self.check_response,
                    args=((0, 4000),),
                    kwargs={"provided_inputs": ("INPUT0",)},
                )
            )
            threads.append(
                threading.Thread(
                    target=self.check_response,
                    args=((0, 4000),),
                    kwargs={"provided_inputs": ("INPUT1",)},
                )
            )

            threads.append(
                threading.Thread(
                    target=self.check_response,
                    args=((0, 4000),),
                    kwargs={"provided_inputs": ("INPUT0",)},
                )
            )
            threads.append(
                threading.Thread(
                    target=self.check_response,
                    args=((4000, None),),
                    kwargs={"provided_inputs": ("INPUT1",)},
                )
            )
            for t in threads:
                t.start()
                time.sleep(0.5)

            for t in threads:
                t.join()
            self.check_deferred_exception()
            self.check_status(self.check_status_model, {1: 4}, 4, 4)
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

    def test_optional_mix_inputs_2(self):
        # Each request provides one of the inputs or all inputs interleavingly,
        # all requests except the last one should be returned in less
        # than the queue delay because batcher should send the batch immediately
        # when it sees the provided inputs are different
        try:
            threads = []
            threads.append(
                threading.Thread(
                    target=self.check_response,
                    args=((0, 4000),),
                    kwargs={"provided_inputs": ("INPUT0",)},
                )
            )
            threads.append(
                threading.Thread(target=self.check_response, args=((0, 4000),))
            )

            threads.append(
                threading.Thread(
                    target=self.check_response,
                    args=((0, 4000),),
                    kwargs={"provided_inputs": ("INPUT0",)},
                )
            )
            threads.append(
                threading.Thread(target=self.check_response, args=((4000, None),))
            )
            for t in threads:
                t.start()
                time.sleep(0.5)

            for t in threads:
                t.join()
            self.check_deferred_exception()
            self.check_status(self.check_status_model, {1: 4}, 4, 4)
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

    def test_ensemble_all_inputs(self):
        # The ensemble is only a wrapper over 'identity_2_float32'
        self.model_name_ = "ensemble_identity_2_float32"
        self.test_all_inputs()
        # From the ensemble's perspective, the requests are processed as it is
        self.check_status(self.model_name_, {1: 2}, 2, 2)

    def test_ensemble_optional_same_input(self):
        # The ensemble is only a wrapper over 'identity_2_float32'
        self.model_name_ = "ensemble_identity_2_float32"
        self.test_optional_same_input()
        # From the ensemble's perspective, the requests are processed as it is
        self.check_status(self.model_name_, {1: 2}, 2, 2)

    def test_ensemble_optional_mix_inputs(self):
        # The ensemble is only a wrapper over 'identity_2_float32'
        self.model_name_ = "ensemble_identity_2_float32"
        self.test_optional_mix_inputs()
        # From the ensemble's perspective, the requests are processed as it is
        self.check_status(self.model_name_, {1: 4}, 4, 4)

    def test_ensemble_optional_mix_inputs_2(self):
        # The ensemble is only a wrapper over 'identity_2_float32'
        self.model_name_ = "ensemble_identity_2_float32"
        self.test_optional_mix_inputs_2()
        # From the ensemble's perspective, the requests are processed as it is
        self.check_status(self.model_name_, {1: 4}, 4, 4)

    def test_ensemble_optional_pipeline(self):
        # The ensemble is a special case of pipelining models with optional
        # inputs, where the ensemble step only connects a subset of inputs
        # for the second model (which is valid because the disconnected inputs
        # are marked optional). See 'config.pbtxt' for detail.
        self.model_name_ = "pipeline_identity_2_float32"

        # Provide all inputs, send requests that don't form preferred batch
        # so all requests should be returned after the queue delay
        try:
            provided_inputs = ("INPUT0", "INPUT1")
            inputs = []
            for provided_input in provided_inputs:
                inputs.append(self.inputs_[provided_input])

            triton_client = grpcclient.InferenceServerClient("localhost:8001")
            results = triton_client.infer(model_name=self.model_name_, inputs=inputs)

            # OUTPU0 is always zero, OUTPUT1 = INPUT0
            output_data = results.as_numpy("OUTPUT0")
            expected = np.zeros(shape=(1, 1), dtype=np.float32)
            self.assertTrue(
                np.array_equal(output_data, expected),
                "{}, {}, expected: {}, got {}".format(
                    self.model_name_, "OUTPUT0", expected, output_data
                ),
            )

            expected = self.input_data_["INPUT0"]
            output_data = results.as_numpy("OUTPUT1")
            self.assertTrue(
                np.array_equal(output_data, expected),
                "{}, {}, expected: {}, got {}".format(
                    self.model_name_, "OUTPUT1", expected, output_data
                ),
            )
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

    def test_ensemble_optional_connecting_tensor(self):
        # The ensemble is a special case of pipelining models with optional
        # inputs, where the request will only produce a subset of inputs
        # for the second model while the ensemble graph connects all inputs of
        # the second model (which is valid because the not-provided inputs
        # are marked optional). See 'config.pbtxt' for detail.
        self.model_name_ = "optional_connecting_tensor"

        # Provide all inputs, send requests that don't form preferred batch
        # so all requests should be returned after the queue delay
        try:
            provided_inputs = ("INPUT0",)
            inputs = []
            outputs = []
            for provided_input in provided_inputs:
                inputs.append(self.inputs_[provided_input])
                outputs.append(self.outputs_[provided_input])

            triton_client = grpcclient.InferenceServerClient("localhost:8001")
            results = triton_client.infer(
                model_name=self.model_name_, inputs=inputs, outputs=outputs
            )

            expected = self.input_data_["INPUT0"]
            output_data = results.as_numpy("OUTPUT0")
            self.assertTrue(
                np.array_equal(output_data, expected),
                "{}, {}, expected: {}, got {}".format(
                    self.model_name_, "OUTPUT0", expected, output_data
                ),
            )
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_optional_input/test.sh
================================================
#!/bin/bash
# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

export CUDA_VISIBLE_DEVICES=0

TEST_PY=./optional_input_test.py
TEST_LOG="./test.log"
TEST_RESULT_FILE='test_results.txt'

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1"
SERVER_LOG="./inference_server.log"
source ../common/util.sh

rm -fr *.log

mkdir -p ./models/identity_2_float32/1
mkdir -p ./models/ensemble_identity_2_float32/1
mkdir -p ./models/pipeline_identity_2_float32/1
mkdir -p ./models/optional_connecting_tensor/1

# Basic test cases
TEST_CASES=${TEST_CASES:="test_all_inputs \
                            test_optional_same_input \
                            test_optional_mix_inputs \
                            test_optional_mix_inputs_2 \
                            test_ensemble_all_inputs \
                            test_ensemble_optional_same_input \
                            test_ensemble_optional_mix_inputs \
                            test_ensemble_optional_mix_inputs_2 \
                            test_ensemble_optional_pipeline \
                            test_ensemble_optional_connecting_tensor"}
RET=0
for i in $TEST_CASES ; do
    # Restart server for every test to clear model stats
    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    echo "Test: $i" >>$TEST_LOG

    set +e
    python $TEST_PY OptionalInputTest.$i >>$TEST_LOG 2>&1
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Test Failed\n***"
        RET=1
    else
        check_test_results $TEST_RESULT_FILE 1
        if [ $? -ne 0 ]; then
            cat $TEST_LOG
            echo -e "\n***\n*** Test Result Verification Failed\n***"
            RET=1
        fi
    fi
    set -e

    kill $SERVER_PID
    wait $SERVER_PID
done

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
else
    cat $SERVER_LOG
    cat $TEST_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_orca/orca_http_test.py
================================================
#!/usr/bin/python3
# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import json
import sys

import requests


# To run the test, have tritonserver running and run this script with the endpoint as a flag.
#
# Example:
# ```
# python3 orca_header_test.py http://localhost:8000/v2/models/ensemble/generate
# ```
def get_endpoint_header(url, data, request_header=None):
    """
    Sends a POST request to the given URL with the provided data and returns the value of the "endpoint-load-metrics" header,
    or None if the request fails.
    """
    HEADER_KEY = "endpoint-load-metrics"
    try:
        response = None
        if request_header:
            response = requests.post(url, json=data, headers=request_header)
        else:
            response = requests.post(url, json=data)
        response.raise_for_status()
        return response.headers.get(HEADER_KEY, "")
    except requests.exceptions.RequestException as e:
        print(f"Error making request: {e}")
        return None


def parse_header_data(header, orca_format):
    """
    Parses the header data into a dictionary based on the given format.
    """
    METRIC_KEY = "named_metrics"
    try:
        if orca_format == "json":
            # Parse the header in JSON format
            data = json.loads(header.replace("JSON ", ""))
            if METRIC_KEY in data:
                return data[METRIC_KEY]
            else:
                print(f"No key '{METRIC_KEY}' in header data: {data}")
                return None
        elif orca_format == "text":
            # Parse the header in TEXT format
            data = {}
            for key_value_pair in header.replace("TEXT ", "").split(", "):
                key, value = key_value_pair.split("=")
                if "." in key:
                    prefix, nested_key = key.split(".", 1)
                    if prefix == METRIC_KEY:
                        data[nested_key] = float(value)
            if not data:
                print(f"Could not parse any keys from header: {header}")
                return None
            return data
        else:
            print(f"Invalid ORCA format: {orca_format}")
            return None
    except (json.JSONDecodeError, ValueError, KeyError):
        print("Error: Invalid data in the header.")
        return None


def check_for_keys(data, desired_keys, orca_format):
    """
    Checks if all desired keys are present in the given data dictionary.
    """
    if all(key in data for key in desired_keys):
        print(
            f"ORCA header present in {orca_format} format with kv_cache_utilization: {[f'{k}: {data[k]}' for k in desired_keys]}"
        )
        return True
    else:
        print(f"Missing keys in header: {', '.join(set(desired_keys) - set(data))}")
        return False


def request_header(orca_format):
    return {"endpoint-load-metrics-format": orca_format} if orca_format else None


def test_header_type(url, data, orca_format):
    req_header = request_header(orca_format)
    response_header = get_endpoint_header(args.url, TEST_DATA, req_header)

    desired_keys = {
        "kv_cache_utilization",
        "max_token_capacity",
    }  # Just the keys, no need to initialize with None

    if response_header is None:
        print(f"Request to endpoint: '{args.url}' failed.")
        return False
    elif response_header == "":
        if orca_format:
            print(
                f"response header empty, endpoint-load-metrics-format={orca_format} is not a valid ORCA metric format"
            )
            return False
        else:
            # No request header set <=> no response header. Intended behavior.
            print(f"response header empty, endpoint-load-metrics-format is not set")
            return True

    data = parse_header_data(response_header, orca_format)
    if data:
        return check_for_keys(data, desired_keys, orca_format)
    else:
        print(f"Unexpected response header value: {response_header}")
        return False


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Make a POST request to generate endpoint to test the ORCA metrics header."
    )
    parser.add_argument("url", help="The model URL to send the request to.")
    args = parser.parse_args()
    TEST_DATA = json.loads(
        '{"text_input": "hello world", "max_tokens": 20, "bad_words": "", "stop_words": ""}'
    )
    passed = True

    for format in ["json", "text", None]:
        print("Checking response header for ORCA format:", format)
        if not test_header_type(args.url, TEST_DATA, format):
            print("FAIL on format:", format)
            passed = False

    sys.exit(0 if passed else 1)


================================================
FILE: qa/L0_orca/test.sh
================================================
#!/bin/bash
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

RET=0
BASE_DIR=$(pwd)
NUM_GPUS=${NUM_GPUS:=1}
TENSORRTLLM_BACKEND_REPO_TAG=${TENSORRTLLM_BACKEND_REPO_TAG:="main"}
TRITON_REPO_ORG=${TRITON_REPO_ORG:="https://github.com/triton-inference-server"}
TRT_ROOT="/usr/local/tensorrt"

MODEL_NAME="gpt2_tensorrt_llm"
NAME="tensorrt_llm_benchmarking_test"
MODEL_REPOSITORY="$(pwd)/triton_model_repo"
TENSORRTLLM_BACKEND_DIR="/workspace/tensorrtllm_backend"
GPT_DIR="$TENSORRTLLM_BACKEND_DIR/tensorrt_llm/examples/models/core/gpt"
TOKENIZER_DIR="$GPT_DIR/gpt2"
ENGINES_DIR="${BASE_DIR}/engines/inflight_batcher_llm/${NUM_GPUS}-gpu"
TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
SERVER=${TRITON_DIR}/bin/tritonserver
BACKEND_DIR=${TRITON_DIR}/backends
SERVER_LOG="${NAME}_server.log"
SERVER_TIMEOUT=${SERVER_TIMEOUT:=120}
CLIENT_PY=${BASE_DIR}/orca_http_test.py
CLIENT_LOG="${NAME}_orca_http_test.log"
source ../common/trtllm_util.sh

clone_tensorrt_llm_backend_repo
build_gpt2_base_model
build_gpt2_tensorrt_engine
prepare_model_repository

set +e
run_server

if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

RET=0

python3 $CLIENT_PY "http://localhost:8000/v2/models/${MODEL_NAME}/generate" >>$CLIENT_LOG 2>&1

if [ $? -ne 0 ]; then
    echo "Failed: Client test had a non-zero return code."
    RET=1
fi

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** ORCA Test Passed\n***"
else
    cat $SERVER_LOG
    cat $CLIENT_LOG
    echo -e "\n***\n*** ORCA Test FAILED\n***"
fi

kill_server
set -e
exit $RET


================================================
FILE: qa/L0_output_name/output_name_test.py
================================================
#!/bin/bash
# Copyright (c) 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import unittest

import test_util as tu
from tritongrpcclient import grpc_service_pb2, grpc_service_pb2_grpc

import grpc

_trials = ("libtorch", "onnx", "plan")


class OutputNameValidationTest(tu.TestResultCollector):
    def requestGenerator(self, model_name, output_name):
        request = grpc_service_pb2.ModelInferRequest()
        request.model_name = model_name
        request.id = "output name validation"

        input = grpc_service_pb2.ModelInferRequest().InferInputTensor()
        input.name = "INPUT0"
        input.datatype = "FP32"
        input.shape.extend([1])

        request.inputs.extend([input])

        output = grpc_service_pb2.ModelInferRequest().InferRequestedOutputTensor()
        output.name = output_name
        request.outputs.extend([output])

        request.raw_input_contents.extend([bytes(4 * "a", "utf-8")])

        return request

    def test_grpc(self):
        channel = grpc.insecure_channel("localhost:8001")
        grpc_stub = grpc_service_pb2_grpc.GRPCInferenceServiceStub(channel)

        # Send request with invalid output name
        for trial in _trials:
            model_name = "{}_nobatch_zero_1_float32".format(trial)
            request = self.requestGenerator(model_name, "DUMMY")
            try:
                response = grpc_stub.ModelInfer(request)
                self.assertTrue(
                    False, "unexpected success for unknown output " + model_name
                )
            except grpc.RpcError as rpc_error:
                msg = rpc_error.details()
                self.assertTrue(
                    msg.startswith("unexpected inference output 'DUMMY' for model")
                )


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_output_name/test.sh
================================================
#!/bin/bash
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
TEST_RESULT_FILE='test_results.txt'
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

OP_NAME_TEST_PY=output_name_test.py
CLIENT_LOG="./client.log"
EXPECTED_NUM_TESTS="1"
DATADIR=`pwd`/models

rm -rf $DATADIR
mkdir $DATADIR

cp -r /data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository/*_nobatch_zero_1_float32 $DATADIR

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=$DATADIR"
SERVER_LOG="./inference_server.log"
source ../common/util.sh

rm -f $SERVER_LOG $CLIENT_LOG

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

RET=0

# test gRPC for output name validation
set +e
python $OP_NAME_TEST_PY OutputNameValidationTest >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    RET=1
else
    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test PASSED\n***"
else
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_output_validation/lt_op_val_client.py
================================================
#!/usr/bin/python

# Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import unittest

import requests
import test_util as tu


class OutputValidationTest(tu.TestResultCollector):
    # for datatype mismatch
    def test_datatype(self):
        url = "http://localhost:8000/v2/models/libtorch_datatype_1_float32/infer"
        body = '{"inputs":[{"name":"INPUT__0","shape":[1,1],"datatype":"FP32","data":[1.0]}],"outputs":[{"name":"OUTPUT__0"}]}'
        response = requests.post(url, data=body)
        msg = response.json()["error"]
        self.assertTrue(
            msg.startswith(
                "configuration expects datatype TYPE_INT32 for output 'OUTPUT__0', model provides TYPE_FP32"
            )
        )

    # for output mismatch
    def test_index(self):
        url = "http://localhost:8000/v2/models/libtorch_index_1_float32/infer"
        body = '{"inputs":[{"name":"INPUT__0","shape":[1,1],"datatype":"FP32","data":[1.0]}],"outputs":[{"name":"OUTPUT__1"}]}'
        response = requests.post(url, data=body)
        msg = response.json()["error"]
        self.assertTrue(
            msg.startswith(
                "The output OUTPUT__1 in the model configuration refers to an output index which doesn't exist. This model has 1 outputs"
            )
        )

    # successful run
    def test_success(self):
        url = "http://localhost:8000/v2/models/libtorch_zero_1_float32/infer"
        body = '{"inputs":[{"name":"INPUT__0","shape":[1,1],"datatype":"FP32","data":[1.0]}],"outputs":[{"name":"OUTPUT__0"}]}'
        response = requests.post(url, data=body)
        self.assertEqual(response.status_code, 200)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_output_validation/test.sh
================================================
#!/bin/bash
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
TEST_RESULT_FILE='test_results.txt'
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

LIBTORCH_OP_VAL_CLIENT=lt_op_val_client.py

DATADIR=/data/inferenceserver/${REPO_VERSION}/libtorch_model_store2
EXPECTED_NUM_TESTS="3"

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=$DATADIR --exit-on-error=false"
SERVER_LOG="./inference_server.log"
source ../common/util.sh

run_server_tolive
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

# give plenty of time for model to load (and fail to load)
wait_for_model_stable $SERVER_TIMEOUT

RET=0
CLIENT_LOG=client.log
rm -f ./client.log

set +e
python $LIBTORCH_OP_VAL_CLIENT >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    RET=1
else
    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_parallel_copy/parallel_copy_test.py
================================================
#!/usr/bin/env python3

# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import functools
import time
import unittest
from builtins import range

import numpy as np
import test_util as tu
import tritonclient.grpc as grpcclient
from tritonclient.utils import InferenceServerException


class ParallelCopyTest(tu.TestResultCollector):
    def setUp(self):
        self.client_ = grpcclient.InferenceServerClient("localhost:8001")
        self.dtype_ = np.float32
        self.model_name_ = tu.get_zero_model_name("plan", 1, self.dtype_)

    def _batch_input_duration(self, batch_size):
        stats = self.client_.get_inference_statistics(self.model_name_, "1")
        self.assertEqual(len(stats.model_stats), 1, "expect 1 model stats")
        self.assertEqual(
            stats.model_stats[0].name,
            self.model_name_,
            "expect model stats for model {}".format(self.model_name_),
        )
        self.assertEqual(
            stats.model_stats[0].version,
            "1",
            "expect model stats for model {} version 1".format(self.model_name_),
        )

        batch_stats = stats.model_stats[0].batch_stats

        batch_input_duration = 0
        for batch_stat in batch_stats:
            if batch_stat.batch_size == batch_size:
                batch_input_duration = batch_stat.compute_input.ns
        return batch_input_duration

    def _run(self, batch_sizes):
        batch_size = functools.reduce(lambda a, b: a + b, batch_sizes, 0)
        input_data = [
            np.random.random([bs, 16 * 1024 * 1024]).astype(self.dtype_)
            for bs in batch_sizes
        ]
        inputs = [
            [grpcclient.InferInput("INPUT0", [bs, 16 * 1024 * 1024], "FP32")]
            for bs in batch_sizes
        ]
        output = [grpcclient.InferRequestedOutput("OUTPUT0")]

        for idx in range(len(inputs)):
            inputs[idx][0].set_data_from_numpy(input_data[idx])

        def callback(user_data, idx, result, error):
            if error:
                user_data[idx] = error
            else:
                user_data[idx] = result

        # list to hold the results of inference.
        user_data = [None] * len(batch_sizes)

        before_compute_input_duration = self._batch_input_duration(batch_size)
        for idx in range(len(batch_sizes)):
            self.client_.async_infer(
                model_name=self.model_name_,
                inputs=inputs[idx],
                callback=functools.partial(callback, user_data, idx),
                outputs=output,
            )

        # Wait until the results are available in user_data
        time_out = 20
        while time_out > 0:
            done = True
            for res in user_data:
                if res is None:
                    done = False
                    break
            if done:
                break
            time_out = time_out - 1
            time.sleep(1)
        done_cnt = functools.reduce(
            lambda dc, x: dc + 1 if x is not None else dc, user_data, 0
        )
        self.assertEqual(
            done_cnt,
            len(batch_sizes),
            "expected {} responses, got {}".format(len(batch_sizes), done_cnt),
        )
        for idx in range(len(batch_sizes)):
            res = user_data[idx]
            self.assertFalse(
                type(res) == InferenceServerException,
                "expected response for request {}, got exception {}".format(idx, res),
            )
            output_data = res.as_numpy("OUTPUT0")
            self.assertTrue(
                np.array_equal(output_data, input_data[idx]),
                "Mismatched output data for request {}".format(idx),
            )

        after_compute_input_duration = self._batch_input_duration(batch_size)
        return after_compute_input_duration - before_compute_input_duration

    def test_performance(self):
        model_status = self.client_.is_model_ready(self.model_name_, "1")
        self.assertTrue(model_status, "expected model to be ready")

        # Send 1 request with batch size 8 so that the copy is not parallelized
        serialized_time = self._run([8])
        parallelized_time = self._run([2, 2, 2, 2])

        # The following check is loose, local runs show that the speedup is not
        # significant (~15%), may be due to the dispatch overhead
        # which cancels part of the improvement
        self.assertTrue(
            serialized_time > parallelized_time,
            "Expected parallelized copy is faster than serialized copy",
        )
        print(
            "serialized v.s. parallelized : {} v.s. {}".format(
                serialized_time, parallelized_time
            )
        )


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_parallel_copy/test.sh
================================================
#!/bin/bash
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
TEST_RESULT_FILE='test_results.txt'
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

CLIENT_LOG="./client.log"
PARALLEL_COPY_TEST=parallel_copy_test.py

DATADIR="./models"

rm -rf ${DATADIR}
mkdir -p ${DATADIR}
cp -r /data/inferenceserver/${REPO_VERSION}/qa_identity_big_model_repository/plan_zero_1_float32 ${DATADIR}/
# set queue delay to ensure the execution will be in full batch
(cd ${DATADIR}/plan_zero_1_float32 && \
    echo "dynamic_batching { " >> config.pbtxt && \
    echo "    preferred_batch_size: [ 8 ]" >> config.pbtxt && \
    echo "    max_queue_delay_microseconds: 10000000" >> config.pbtxt && \
    echo "}" >> config.pbtxt)

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=$DATADIR --buffer-manager-thread-count=4"
SERVER_LOG="./inference_server.log"
source ../common/util.sh

rm -f *.log*

RET=0

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python $PARALLEL_COPY_TEST >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    cat $CLIENT_LOG
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
else
  echo -e "\n***\n*** Test Failed\n***"
fi

exit $RET


================================================
FILE: qa/L0_parameters/class_count_test.py
================================================
#!/usr/bin/env python3

# Copyright 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import os
import unittest

import numpy as np
import test_util as tu
import tritonclient.grpc as grpcclient
import tritonclient.http as httpclient
from tritonclient.utils import InferenceServerException


class ClassificationParameterTest(tu.TestResultCollector):
    def setUp(self):
        self.protocol = os.environ.get("CLIENT_TYPE", "http")
        if self.protocol == "http":
            self.client = httpclient.InferenceServerClient("localhost:8000")
        else:
            self.client = grpcclient.InferenceServerClient("localhost:8001")

    def _prepare_io(self, input_data, dtype):
        if self.protocol == "http":
            inputs = [httpclient.InferInput("INPUT0", input_data.shape, dtype)]
            outputs = [httpclient.InferRequestedOutput(name="OUTPUT0", class_count=5)]
        else:
            inputs = [grpcclient.InferInput("INPUT0", input_data.shape, dtype)]
            outputs = [grpcclient.InferRequestedOutput(name="OUTPUT0", class_count=5)]
        inputs[0].set_data_from_numpy(input_data)
        return inputs, outputs

    def test_classificattion(self):
        shape = (1, 8)
        dtype = "FP32"
        model_name = "identity_fp32"
        input_data = np.ones(shape, dtype=np.float32)

        inputs, outputs = self._prepare_io(input_data, dtype)
        result = self.client.infer(
            model_name=model_name, inputs=inputs, outputs=outputs
        )
        output = result.get_output("OUTPUT0")
        if self.protocol == "http":
            output_dtype = output["datatype"]
        else:
            output_dtype = output.datatype

        self.assertEqual(output_dtype, "BYTES")

        # Validate shape matches to the class_count
        output_data = result.as_numpy("OUTPUT0")
        self.assertIsNotNone(output_data)
        self.assertEqual(output_data.shape, (1, 5))

        for res_str_bytes in np.nditer(output_data, flags=["refs_ok"]):
            res_str = res_str_bytes.item().decode("utf-8")
            self.assertTrue(res_str.startswith("1.000000:"))

    def test_classificattion_unsupported_data_type(self):
        shape = (1, 8)
        model_name = "identity_bytes"
        dtype = "BYTES"
        input_data = np.array([["test"] * shape[1]], dtype=object)

        inputs, outputs = self._prepare_io(input_data, dtype)
        with self.assertRaises(InferenceServerException) as e:
            self.client.infer(model_name=model_name, inputs=inputs, outputs=outputs)

        self.assertIn(
            "class result not available for output due to unsupported type 'BYTES'",
            str(e.exception),
        )

    def test_classification_output_tensor_too_large(self):
        max_elements = 1_000_000
        shape = (1, max_elements + 1)
        dtype = "FP32"
        model_name = "identity_fp32"
        input_data = np.ones(shape, dtype=np.float32)

        inputs, outputs = self._prepare_io(input_data, dtype)
        with self.assertRaises(InferenceServerException) as e:
            self.client.infer(model_name=model_name, inputs=inputs, outputs=outputs)

        self.assertIn("classification output tensor too large", str(e.exception))


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_parameters/model_repository/ensemble/config.pbtxt
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

platform: "ensemble"
max_batch_size: 0

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]

output [
  {
    name: "key"
    data_type: TYPE_STRING
    dims: [ -1 ]
  },
  {
    name: "value"
    data_type: TYPE_STRING
    dims: [ -1 ]
  }
]

ensemble_scheduling
{
  step [
    {
      model_name: "identity"
      model_version: -1
      input_map { key: "INPUT0", value: "INPUT0" }
      output_map { key: "OUTPUT0", value: "OUTPUT0" }
    },
    {
      model_name: "parameter"
      model_version: -1
      input_map { key: "INPUT0", value: "OUTPUT0" }
      output_map { key: "key", value: "key" }
      output_map { key: "value", value: "value" }
    }
  ]
}


================================================
FILE: qa/L0_parameters/model_repository/identity/config.pbtxt
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

backend: "identity"
max_batch_size: 0

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/L0_parameters/model_repository/parameter/1/model.py
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        inputs = [{"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [1]}]
        outputs = [
            {"name": "key", "data_type": "TYPE_STRING", "dims": [-1]},
            {"name": "value", "data_type": "TYPE_STRING", "dims": [-1]},
        ]

        config = auto_complete_model_config.as_dict()
        input_names = []
        output_names = []
        for input in config["input"]:
            input_names.append(input["name"])
        for output in config["output"]:
            output_names.append(output["name"])

        for input in inputs:
            if input["name"] not in input_names:
                auto_complete_model_config.add_input(input)
        for output in outputs:
            if output["name"] not in output_names:
                auto_complete_model_config.add_output(output)

        auto_complete_model_config.set_max_batch_size(0)
        return auto_complete_model_config

    def execute(self, requests):
        # A simple model that puts the request parameters into the outputs.
        responses = []
        for request in requests:
            parameters = json.loads(request.parameters())
            keys = []
            values = []
            for key, value in parameters.items():
                keys.append(key)
                values.append(value)
            key_output = pb_utils.Tensor("key", np.asarray(keys, dtype=object))
            value_output = pb_utils.Tensor("value", np.asarray(values, dtype=object))
            inference_response = pb_utils.InferenceResponse(
                output_tensors=[key_output, value_output]
            )
            responses.append(inference_response)

        return responses


================================================
FILE: qa/L0_parameters/parameters_test.py
================================================
#!/usr/bin/env python3

# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import os
import queue
import unittest
from functools import partial
from unittest import IsolatedAsyncioTestCase

import numpy as np
import tritonclient.grpc as grpcclient
import tritonclient.grpc.aio as asyncgrpcclient
import tritonclient.http as httpclient
import tritonclient.http.aio as asynchttpclient
from tritonclient.utils import InferenceServerException

TEST_HEADER = os.environ.get("TEST_HEADER")


class InferenceParametersTest(IsolatedAsyncioTestCase):
    async def asyncSetUp(self):
        self.http = httpclient.InferenceServerClient(url="localhost:8000")
        self.async_http = asynchttpclient.InferenceServerClient(url="localhost:8000")
        self.grpc = grpcclient.InferenceServerClient(url="localhost:8001")
        self.async_grpc = asyncgrpcclient.InferenceServerClient(url="localhost:8001")

        self.parameter_list = []
        self.parameter_list.append({"key1": "value1", "key2": "value2"})
        self.parameter_list.append({"key1": 1, "key2": 2})
        self.parameter_list.append({"key1": 123.123, "key2": 321.321})
        self.parameter_list.append({"key1": True, "key2": "value2"})
        self.parameter_list.append({"triton_": True, "key2": "value2"})

        # Only "test_params" tests parameters without headers.
        if TEST_HEADER != "test_params":
            self.headers = {
                "header_1": "value_1",
                "header_2": "value_2",
                "my_header_1": "my_value_1",
                "my_header_2": "my_value_2",
                "my_header_3": 'This is a "quoted" string with a backslash\ ',
            }

            # only these headers should be forwarded to the model.
            if TEST_HEADER == "test_grpc_header_forward_pattern_case_sensitive":
                self.expected_headers = {}
            else:
                self.expected_headers = {
                    "my_header_1": "my_value_1",
                    "my_header_2": "my_value_2",
                    "my_header_3": 'This is a "quoted" string with a backslash\ ',
                }
        else:
            self.headers = {}
            self.expected_headers = {}

        def callback(user_data, result, error):
            if error:
                user_data.put(error)
            else:
                user_data.put(result)

        self.grpc_callback = callback

    def create_inputs(self, client_type):
        inputs = []
        inputs.append(client_type.InferInput("INPUT0", [1], "FP32"))

        # Initialize the data
        inputs[0].set_data_from_numpy(np.asarray([1], dtype=np.float32))
        return inputs

    async def send_request_and_verify(
        self, client_type, client, is_async=False, model_name="parameter"
    ):
        inputs = self.create_inputs(client_type)
        for parameters in self.parameter_list:
            # Setup infer callable to re-use below for brevity
            infer_callable = partial(
                client.infer,
                model_name=model_name,
                inputs=inputs,
                parameters=parameters,
                headers=self.headers,
            )

            # The `triton_` prefix is reserved for Triton usage
            should_error = False
            if "triton_" in parameters.keys():
                should_error = True

            if is_async:
                if should_error:
                    with self.assertRaises(InferenceServerException):
                        await infer_callable()
                    return
                else:
                    result = await infer_callable()
            else:
                if should_error:
                    with self.assertRaises(InferenceServerException):
                        infer_callable()
                    return
                else:
                    result = infer_callable()

            self.verify_outputs(result, parameters)

    def verify_outputs(self, result, parameters):
        keys = result.as_numpy("key")
        values = result.as_numpy("value")
        keys = keys.astype(str).tolist()
        expected_keys = list(parameters.keys()) + list(self.expected_headers.keys())
        self.assertEqual(set(keys), set(expected_keys))

        # We have to convert the parameter values to string
        expected_values = []
        for expected_value in list(parameters.values()):
            expected_values.append(str(expected_value))
        for value in self.expected_headers.values():
            expected_values.append(value)
        self.assertEqual(set(values.astype(str).tolist()), set(expected_values))

    async def test_grpc_parameter(self):
        await self.send_request_and_verify(grpcclient, self.grpc)

    async def test_http_parameter(self):
        await self.send_request_and_verify(httpclient, self.http)

    async def test_async_http_parameter(self):
        await self.send_request_and_verify(
            asynchttpclient, self.async_http, is_async=True
        )

    async def test_async_grpc_parameter(self):
        await self.send_request_and_verify(
            asyncgrpcclient, self.async_grpc, is_async=True
        )

    def test_http_async_parameter(self):
        inputs = self.create_inputs(httpclient)
        # Skip the parameter that returns an error
        parameter_list = self.parameter_list[:-1]
        for parameters in parameter_list:
            result = self.http.async_infer(
                model_name="parameter",
                inputs=inputs,
                parameters=parameters,
                headers=self.headers,
            ).get_result()
            self.verify_outputs(result, parameters)

    def test_grpc_async_parameter(self):
        user_data = queue.Queue()
        inputs = self.create_inputs(grpcclient)
        # Skip the parameter that returns an error
        parameter_list = self.parameter_list[:-1]
        for parameters in parameter_list:
            self.grpc.async_infer(
                model_name="parameter",
                inputs=inputs,
                parameters=parameters,
                headers=self.headers,
                callback=partial(self.grpc_callback, user_data),
            )
            result = user_data.get()
            self.assertFalse(result is InferenceServerException)
            self.verify_outputs(result, parameters)

    def test_grpc_stream_parameter(self):
        user_data = queue.Queue()
        self.grpc.start_stream(
            callback=partial(self.grpc_callback, user_data), headers=self.headers
        )
        inputs = self.create_inputs(grpcclient)
        # Skip the parameter that returns an error
        parameter_list = self.parameter_list[:-1]
        for parameters in parameter_list:
            # async stream infer
            self.grpc.async_stream_infer(
                model_name="parameter", inputs=inputs, parameters=parameters
            )
            result = user_data.get()
            self.assertFalse(result is InferenceServerException)
            self.verify_outputs(result, parameters)
        self.grpc.stop_stream()

    async def test_ensemble_parameter_forwarding(self):
        await self.send_request_and_verify(httpclient, self.http, model_name="ensemble")

    async def asyncTearDown(self):
        self.http.close()
        self.grpc.close()
        await self.async_grpc.close()
        await self.async_http.close()


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_parameters/test.sh
================================================
#!/bin/bash
# Copyright 2023-2026, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

CLIENT_LOG="./client.log"
TEST_SCRIPT_PY="parameters_test.py"

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_LOG="./inference_server.log"
source ../common/util.sh

MODELDIR="model_repository"
# Use identity model as dummy step to ensure parameters pass through each step
mkdir -p "${MODELDIR}/identity/1"
mkdir -p "${MODELDIR}/ensemble/1"

# TODO: Add support and testing for C++ client parameters:
# https://jirasw.nvidia.com/browse/DLIS-4673

all_tests=("test_params"
           "test_headers"
           "test_header_forward_pattern_case_insensitive"
           "test_grpc_header_forward_pattern_case_sensitive")

RET=0
for i in "${all_tests[@]}"; do
  # TEST_HEADER is a parameter used by `parameters_test.py` that controls
  # whether the script will test for inclusion of headers in parameters or not.
  SERVER_ARGS="--model-repository=${MODELDIR} --exit-timeout-secs=120"
  if [ "$i" == "test_headers" ]; then
    SERVER_ARGS+=" --grpc-header-forward-pattern my_header.*"
    SERVER_ARGS+=" --http-header-forward-pattern my_header.*"
  elif [ "$i" == "test_header_forward_pattern_case_insensitive" ]; then
    SERVER_ARGS+=" --grpc-header-forward-pattern MY_HEADER.*"
    SERVER_ARGS+=" --http-header-forward-pattern MY_HEADER.*"
  # NOTE: headers sent through the python HTTP client may be automatically
  # lowercased by internal libraries like geventhttpclient, so we only test
  # GRPC client for case-sensitivity here:
  # https://github.com/geventhttpclient/geventhttpclient/blob/d1e14356c3b02099c879cf9b3bdb684a0cbd8bf5/src/geventhttpclient/header.py#L62-L63
  elif [ "$i" == "test_grpc_header_forward_pattern_case_sensitive" ]; then
    SERVER_ARGS+=" --grpc-header-forward-pattern (?-i)MY_HEADER.*"
  fi
  run_server
  if [ "$SERVER_PID" == "0" ]; then
      echo -e "\n***\n*** Failed to start $SERVER\n***"
      cat $SERVER_LOG
      exit 1
  fi

  set +e
  TEST_HEADER="$i" python3 $TEST_SCRIPT_PY >$CLIENT_LOG 2>&1
  if [ $? -ne 0 ]; then
      cat $CLIENT_LOG
      echo -e "\n***\n*** Test Failed\n***"
      RET=1
  fi

  set -e

  kill $SERVER_PID
  wait $SERVER_PID
done


# Test Classification Extension
PYTHON_MODELS_DIR="${PYTHON_MODELS_DIR:-/opt/tritonserver/qa/python_models}"
MODELDIR="models"
TEST_RESULT_FILE="test_results.txt"
TEST_SCRIPT_PY="./class_count_test.py"

rm -rf $MODELDIR
mkdir -p "${MODELDIR}/identity_fp32/1"
cp ${PYTHON_MODELS_DIR}/identity_fp32/config.pbtxt "${MODELDIR}/identity_fp32/"
cp ${PYTHON_MODELS_DIR}/identity_fp32/model.py "${MODELDIR}/identity_fp32/1/"

mkdir -p "${MODELDIR}/identity_bytes/1"
cp ${PYTHON_MODELS_DIR}/identity_fp32/config.pbtxt "${MODELDIR}/identity_bytes/"
cp ${PYTHON_MODELS_DIR}/identity_fp32/model.py "${MODELDIR}/identity_bytes/1/"
(cd "${MODELDIR}/identity_bytes" && \
    sed -i 's/identity_fp32/identity_bytes/' config.pbtxt && \
    sed -i 's/TYPE_FP32/TYPE_STRING/' config.pbtxt )

SERVER_ARGS="--model-repository=`pwd`/${MODELDIR} --log-verbose=1"
for client_type in http grpc; do
  export CLIENT_TYPE=$client_type
  SERVER_LOG="./class_count_test_${client_type}_server.log"
  CLIENT_LOG="./class_count_test_${client_type}_client.log"
  rm -f $SERVER_LOG $CLIENT_LOG
  run_server
  if [ "$SERVER_PID" == "0" ]; then
      echo -e "\n***\n*** Failed to start $SERVER\n***"
      cat $SERVER_LOG
      exit 1
  fi

  set +e
  python3 $TEST_SCRIPT_PY -v >>"$CLIENT_LOG" 2>&1
  if [ $? -ne 0 ]; then
      cat $CLIENT_LOG
      echo -e "\n***\n*** Test Failed - class_count_${client_type}_test_client\n***"
      RET=1
  else
      check_test_results $TEST_RESULT_FILE 3
      if [ $? -ne 0 ]; then
          cat $TEST_RESULT_FILE
          echo -e "\n***\n*** Test Result Verification Failed - class_count_${client_type}_test_client\n***"
          RET=1
      fi
  fi
  kill $SERVER_PID
  wait $SERVER_PID

  if [ $? -ne 0 ]; then
      echo -e "\n***\n*** Test Server shut down non-gracefully\n***"
      RET=1
  fi
  set -e
done

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_passive_instance/models/distributed_int32_int32_int32/config.pbtxt
================================================
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "distributed_int32_int32_int32"
backend: "distributed_addsub"
max_batch_size: 1
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  },
  {
    kind: KIND_GPU
    passive: true
  }
]

================================================
FILE: qa/L0_passive_instance/passive_instance_test.py
================================================
#!/usr/bin/env python3

# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import unittest

import infer_util as iu
import numpy as np
import test_util as tu


class PassiveInstanceTest(tu.TestResultCollector):
    def test_inference(self):
        try:
            iu.infer_exact(
                self, "distributed", (1, 16), 1, np.int32, np.int32, np.int32
            )
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_passive_instance/test.sh
================================================
#!/bin/bash
# Copyright (c) 2021-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
TEST_RESULT_FILE='test_results.txt'
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

CLIENT_LOG="./client.log"
TEST_SCRIPT_PY=passive_instance_test.py
EXPECTED_NUM_TESTS="1"

pip3 install perf_analyzer
PERF_ANALYZER=perf_analyzer
MODEL=distributed_int32_int32_int32

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=models --exit-timeout-secs=120"
SERVER_LOG="./inference_server.log"
source ../common/util.sh

rm -f $SERVER_LOG $CLIENT_LOG

mkdir -p models/${MODEL}/1

RET=0

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python $TEST_SCRIPT_PY >$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

set +e
# Generate concurrency, check if only the CPU instances are accepting requests
$PERF_ANALYZER -m $MODEL --concurrency-range 4 >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** perf_analyzer for $MODEL failed\n***"
    RET=1
fi

grep "(GPU device 0), executing" $SERVER_LOG
if [ $? -eq 0 ]; then
    echo -e "\n***\n*** Failed. Expecting no request sent to GPU instance\n***"
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_perf_deeprecommender/run_test.sh
================================================
#!/bin/bash
# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

STATIC_BATCH_SIZES=${STATIC_BATCH_SIZES:=1}
DYNAMIC_BATCH_SIZES=${DYNAMIC_BATCH_SIZES:=1}
INSTANCE_COUNTS=${INSTANCE_COUNTS:=1}

pip3 install perf_analyzer

PERF_CLIENT=perf_analyzer
REPORTER=../common/reporter.py

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=`pwd`/models"
source ../common/util.sh

# Select the single GPU that will be available to the inference
# server. Or use "export CUDA_VISIBLE_DEVICE=" to run on CPU.
export CUDA_VISIBLE_DEVICES=0

RET=0

for STATIC_BATCH in $STATIC_BATCH_SIZES; do
    for DYNAMIC_BATCH in $DYNAMIC_BATCH_SIZES; do
        for INSTANCE_CNT in $INSTANCE_COUNTS; do
            if (( ($DYNAMIC_BATCH > 1) && ($STATIC_BATCH >= $DYNAMIC_BATCH) )); then
                continue
            fi

            MAX_BATCH=${STATIC_BATCH} && \
                (( $DYNAMIC_BATCH > $STATIC_BATCH )) && \
                MAX_BATCH=${DYNAMIC_BATCH}

            if (( $DYNAMIC_BATCH > 1 )); then
                NAME=${MODEL_NAME}_sbatch${STATIC_BATCH}_dbatch${DYNAMIC_BATCH}_instance${INSTANCE_CNT}_${PERF_CLIENT_PROTOCOL}
            else
                NAME=${MODEL_NAME}_sbatch${STATIC_BATCH}_instance${INSTANCE_CNT}_${PERF_CLIENT_PROTOCOL}
            fi

            rm -fr models && mkdir -p models && \
                cp -r $MODEL_PATH models/. && \
                (cd models/$MODEL_NAME && \
                        sed -i "s/^max_batch_size:.*/max_batch_size: ${MAX_BATCH}/" config.pbtxt && \
                        echo "instance_group [ { count: ${INSTANCE_CNT} }]" >> config.pbtxt)
            if (( $DYNAMIC_BATCH > 1 )); then
                (cd models/$MODEL_NAME && \
                        echo "dynamic_batching { preferred_batch_size: [ ${DYNAMIC_BATCH} ] }" >> config.pbtxt)
            fi

            echo "Time before starting server: $(date)"
            SERVER_LOG="${NAME}.server.log"
            run_server
            if (( $SERVER_PID == 0 )); then
                echo -e "\n***\n*** Failed to start $SERVER\n***"
                cat $SERVER_LOG
                exit 1
            fi

            set +e
            echo "Time before perf analyzer trials: $(date)"

            # Run the model once to warm up. Some frameworks do
            # optimization on the first requests.  Must warmup similar
            # to actual run so that all instances are ready
            $PERF_CLIENT -v -i ${PERF_CLIENT_PROTOCOL} -m $MODEL_NAME -p5000 \
                         -b${STATIC_BATCH} --concurrency-range ${CONCURRENCY}

            set -o pipefail
            PA_MAX_TRIALS=${PA_MAX_TRIALS:-"50"}
            $PERF_CLIENT -v -i ${PERF_CLIENT_PROTOCOL} -m $MODEL_NAME -p5000 \
                         -b${STATIC_BATCH} --concurrency-range ${CONCURRENCY} \
                         --max-trials "${PA_MAX_TRIALS}" \
                         -f ${NAME}.csv 2>&1 | tee ${NAME}.log
            if (( $? != 0 )); then
                echo -e "\n***\n*** FAILED Perf Analyzer measurement\n***"
                RET=1
            fi
            echo "Time after perf analyzer trials: $(date)"
            set +o pipefail

            curl localhost:8002/metrics -o ${NAME}.metrics >> ${NAME}.log 2>&1
            if (( $? != 0 )); then
                echo -e "\n***\n*** FAILED to get metrics\n***"
                RET=1
            fi

            set -e

            echo -e "[{\"s_benchmark_kind\":\"benchmark_perf\"," >> ${NAME}.tjson
            echo -e "\"s_benchmark_name\":\"deeprecommender\"," >> ${NAME}.tjson
            echo -e "\"s_server\":\"triton\"," >> ${NAME}.tjson
            echo -e "\"s_protocol\":\"${PERF_CLIENT_PROTOCOL}\"," >> ${NAME}.tjson
            echo -e "\"s_framework\":\"${MODEL_FRAMEWORK}\"," >> ${NAME}.tjson
            echo -e "\"s_model\":\"${MODEL_NAME}\"," >> ${NAME}.tjson
            echo -e "\"l_concurrency\":${CONCURRENCY}," >> ${NAME}.tjson
            echo -e "\"l_dynamic_batch_size\":${DYNAMIC_BATCH}," >> ${NAME}.tjson
            echo -e "\"l_batch_size\":${STATIC_BATCH}," >> ${NAME}.tjson
            echo -e "\"l_instance_count\":${INSTANCE_CNT}}]" >> ${NAME}.tjson

            kill $SERVER_PID
            wait $SERVER_PID

            if [ -f $REPORTER ]; then
                set +e

                URL_FLAG=
                if [ ! -z ${BENCHMARK_REPORTER_URL} ]; then
                    URL_FLAG="-u ${BENCHMARK_REPORTER_URL}"
                fi

                $REPORTER -v -o ${NAME}.json --csv ${NAME}.csv ${URL_FLAG} ${NAME}.tjson
                if (( $? != 0 )); then
                    RET=1
                fi

                set -e
            fi
        done
    done
done

if (( $RET == 0 )); then
    echo -e "\n***\n*** $FRAMEWORK Test Passed\n***"
else
    echo -e "\n***\n*** $FRAMEWORK Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_perf_deeprecommender/test.sh
================================================
#!/bin/bash
# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

RET=0
REPODIR=/data/inferenceserver/${REPO_VERSION}
TRTEXEC=/usr/src/tensorrt/bin/trtexec
MODEL="deeprecommender"
PROTOCOLS="grpc http"

rm -f *.log  *.csv *.metrics *.tjson *.json

#
# Test minimum latency
#
STATIC_BATCH=1
INSTANCE_CNT=1
CONCURRENCY=1

# Create the TensorRT plan from ONNX
rm -fr tensorrt_models && mkdir -p tensorrt_models/deeprecommender_plan/0 && \
cp $REPODIR/perf_model_store/deeprecommender_onnx/1/model.onnx tensorrt_models/deeprecommender_plan && \
(cd tensorrt_models/deeprecommender_plan && \
echo 'name: "deeprecommender_plan"
platform: "tensorrt_plan"
max_batch_size: ${STATIC_BATCH}
input [
  {
    name: "Placeholder:0"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [17736,1,1]
  }
]
output [
  {
    name: "fc5/Relu:0"
    data_type: TYPE_FP32
    dims: [17736]
  }
]' >| config.pbtxt)

$TRTEXEC --onnx=tensorrt_models/deeprecommender_plan/model.onnx --verbose \
         --saveEngine=tensorrt_models/deeprecommender_plan/0/model.plan \
         --minShapes=Placeholder:0:1x17736x1x1 \
         --optShapes=Placeholder:0:${STATIC_BATCH}x17736x1x1 \
         --maxShapes=Placeholder:0:${STATIC_BATCH}x17736x1x1

if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to generate TensorRT Plan \n***"
    exit 1
fi
rm tensorrt_models/deeprecommender_plan/model.onnx


# Tests with each model
for FRAMEWORK in plan onnx libtorch; do
    MODEL_NAME=${MODEL}_${FRAMEWORK}
    if [ "$FRAMEWORK" == "plan" ]; then
        REPO=`pwd`/tensorrt_models
    elif [[ "$FRAMEWORK" == *"_trt" ]]; then
        REPO=`pwd`/optimized_model_store
    else
        REPO=$REPODIR/perf_model_store
    fi
    for PROTOCOL in $PROTOCOLS; do
        MODEL_NAME=${MODEL_NAME} \
                MODEL_FRAMEWORK=${FRAMEWORK} \
                MODEL_PATH="$REPO/${MODEL_NAME}" \
                STATIC_BATCH_SIZES=${STATIC_BATCH} \
                DYNAMIC_BATCH_SIZES=1 \
                PERF_CLIENT_PROTOCOL=${PROTOCOL} \
                INSTANCE_COUNTS=${INSTANCE_CNT} \
                CONCURRENCY=${CONCURRENCY} \
                bash -x run_test.sh
        if [ $? -ne 0 ]; then
          RET=1
        fi
    done
done

#
# Test large static batch = 256 w/ 2 instances
#
STATIC_BATCH=256
INSTANCE_CNT=2
CONCURRENCY=4

# Create the TensorRT plan from ONNX
rm -fr tensorrt_models && mkdir -p tensorrt_models/deeprecommender_plan/0 && \
cp $REPODIR/perf_model_store/deeprecommender_onnx/1/model.onnx tensorrt_models/deeprecommender_plan && \
(cd tensorrt_models/deeprecommender_plan && \
echo 'name: "deeprecommender_plan"
platform: "tensorrt_plan"
max_batch_size: ${STATIC_BATCH}
input [
  {
    name: "Placeholder:0"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [17736,1,1]
  }
]
output [
  {
    name: "fc5/Relu:0"
    data_type: TYPE_FP32
    dims: [17736]
  }
]' >| config.pbtxt)

$TRTEXEC --onnx=tensorrt_models/deeprecommender_plan/model.onnx --verbose \
         --saveEngine=tensorrt_models/deeprecommender_plan/0/model.plan \
         --minShapes=Placeholder:0:1x17736x1x1 \
         --optShapes=Placeholder:0:${STATIC_BATCH}x17736x1x1 \
         --maxShapes=Placeholder:0:${STATIC_BATCH}x17736x1x1

if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed to generate TensorRT Plan \n***"
    exit 1
fi
rm tensorrt_models/deeprecommender_plan/model.onnx

# Tests with each model
for FRAMEWORK in plan onnx libtorch; do
    MODEL_NAME=${MODEL}_${FRAMEWORK}
    if [ "$FRAMEWORK" == "plan" ]; then
        REPO=`pwd`/tensorrt_models
    elif [[ "$FRAMEWORK" == *"_trt" ]]; then
        REPO=`pwd`/optimized_model_store
    else
        REPO=$REPODIR/perf_model_store
    fi
    for PROTOCOL in $PROTOCOLS; do
        MODEL_NAME=${MODEL_NAME} \
                MODEL_FRAMEWORK=${FRAMEWORK} \
                MODEL_PATH="$REPO/${MODEL_NAME}" \
                STATIC_BATCH_SIZES=${STATIC_BATCH} \
                DYNAMIC_BATCH_SIZES=1 \
                PERF_CLIENT_PROTOCOL=${PROTOCOL} \
                INSTANCE_COUNTS=${INSTANCE_CNT} \
                CONCURRENCY=${CONCURRENCY} \
                bash -x run_test.sh
        if [ $? -ne 0 ]; then
          RET=1
        fi
    done
done

if (( $RET == 0 )); then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi


================================================
FILE: qa/L0_perf_kaldi/create_data.sh
================================================
#!/bin/bash
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# Needs to be run in asr_kaldi main directory and must be copied to
# draco for benchmark test
TRITON_VERSION="20.05"

nvidia-docker run --rm \
   --shm-size=1g \
   --ulimit memlock=-1 \
   --ulimit stack=67108864 \
   -v $PWD/data:/mnt/data \
   gitlab-master.nvidia.com:5005/dl/joc/asr_kaldi:${TRITON_VERSION}-server-py3-devel \
   /workspace/scripts/docker/dataset_setup.sh $(id -u) $(id -g)


================================================
FILE: qa/L0_perf_kaldi/test.sh
================================================
#!/bin/bash
# Copyright (c) 2020-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# Test with 20.05 because kaldi image for 20.06 is not yet available
TRITON_VERSION="20.05"

cd /workspace
git clone --single-branch --depth=1 -b r${TRITON_VERSION} \
    https://github.com/NVIDIA/triton-inference-server.git

echo "add_subdirectory(kaldi-asr-client)" >> triton-inference-server/src/clients/c++/CMakeLists.txt

cp -r asr_kaldi/kaldi-asr-client triton-inference-server/src/clients/c++
cp -r asr_kaldi/model-repo/kaldi_online/config.pbtxt model-repo/kaldi_online/

# Client dependencies
(apt-get update && \
    apt-get install -y --no-install-recommends \
        libssl-dev \
        libb64-dev \
        rapidjson-dev)

pip3 install --upgrade wheel setuptools grpcio-tools

# Build client library and kaldi perf client
(cd triton-inference-server/build && \
    export CMAKE_POLICY_VERSION_MINIMUM=3.5 && \
    cmake -DCMAKE_BUILD_TYPE=Release \
          -DCMAKE_INSTALL_PREFIX:PATH=/workspace/install && \
    make -j16 trtis-clients)

RET=0
rm -rf *.log

# Run server
/opt/tritonserver/bin/trtserver --model-repo=/workspace/model-repo > server.log 2>&1 &
SERVER_PID=$!
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start server\n***"
    cat server.log
    exit 1
fi

KALDI_CLIENT=install/bin/kaldi_asr_parallel_client

# Run client
RESULTS_DIR="/data/results"
mkdir -p $RESULTS_DIR

CONCURRENCY=2000

# Client only supports GRPC (5 iterations on the dataset)
$KALDI_CLIENT -i 5 -c ${CONCURRENCY} >> client_1.log 2>&1
if (( $? != 0 )); then
    RET=1
fi

# Capture Throughput
THROUGHPUT=`cat client_1.log | grep 'Throughput:' | cut -f 2 | cut -f 1 -d ' '`

# '-o' Flag is needed to run online and capture latency
$KALDI_CLIENT -i 5 -c ${CONCURRENCY} -o >> client_2.log 2>&1
if (( $? != 0 )); then
    RET=1
fi

# Capture Latency 95 percentile
LATENCY_95=`cat client_2.log | grep -A1 "Latencies:" | sed -n '2 p' | cut -f 5`

REPORTER=triton-inference-server/qa/common/reporter.py

echo -e "[{\"s_benchmark_kind\":\"benchmark_perf\"," >> results.tjson
echo -e "\"s_benchmark_name\":\"kaldi\"," >> results.tjson
echo -e "\"s_server\":\"triton\"," >> results.tjson
echo -e "\"s_protocol\":\"grpc\"," >> results.tjson
echo -e "\"s_model\":\"asr_kaldi\"," >> results.tjson
echo -e "\"l_concurrency\":${CONCURRENCY}," >> results.tjson
echo -e "\"d_infer_per_sec\":${THROUGHPUT}," >> results.tjson
echo -e "\"d_latency_p95_ms\":${LATENCY_95}," >> results.tjson
echo -e "\"l_instance_count\":1}]" >> results.tjson

if [ -f $REPORTER ]; then
    set +e

    URL_FLAG=
    if [ ! -z ${BENCHMARK_REPORTER_URL} ]; then
        URL_FLAG="-u ${BENCHMARK_REPORTER_URL}"
    fi

    $REPORTER -v -o results.json ${URL_FLAG} results.tjson
    if (( $? != 0 )); then
        RET=1
    fi

    set -e
fi

if (( $RET == 0 )); then
    echo -e "\n***\n*** ASR Kaldi Benchmark Passed\n***"
else
    echo -e "\n***\n*** ASR Kaldi Benchmark FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_perf_nomodel/custom_models/custom_zero_1_float32/config.pbtxt
================================================
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "custom_zero_1_float32"
backend: "identity"
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]


================================================
FILE: qa/L0_perf_nomodel/run_test.sh
================================================
#!/bin/bash
# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=$1

BACKENDS=${BACKENDS:="plan custom onnx libtorch python"}
STATIC_BATCH_SIZES=${STATIC_BATCH_SIZES:=1}
DYNAMIC_BATCH_SIZES=${DYNAMIC_BATCH_SIZES:=1}
INSTANCE_COUNTS=${INSTANCE_COUNTS:=1}
CONCURRENCY=${CONCURRENCY:=1}

PERF_CLIENT_PROTOCOL=${PERF_CLIENT_PROTOCOL:=grpc}
PERF_CLIENT_PERCENTILE=${PERF_CLIENT_PERCENTILE:=95}
PERF_CLIENT_STABILIZE_WINDOW=${PERF_CLIENT_STABILIZE_WINDOW:=5000}
PERF_CLIENT_STABILIZE_THRESHOLD=${PERF_CLIENT_STABILIZE_THRESHOLD:=5}
TENSOR_SIZE=${TENSOR_SIZE:=1}
TENSOR_ELEMENT_BYTES=${TENSOR_ELEMENT_BYTES:=4}
SHARED_MEMORY=${SHARED_MEMORY:="none"}
REPORTER=../common/reporter.py

RESULTDIR=${RESULTDIR:=.}

TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
ARCH=${ARCH:="x86_64"}
SERVER=${TRITON_DIR}/bin/tritonserver
BACKEND_DIR=${TRITON_DIR}/backends
MODEL_REPO="${PWD}/models"
PERF_CLIENT=perf_analyzer
SERVER_ARGS="--model-repository=${MODEL_REPO} --backend-directory=${BACKEND_DIR}"
source ../common/util.sh
pip3 install perf_analyzer

# DATADIR is already set in environment variable for aarch64
if [ "$ARCH" != "aarch64" ]; then
    DATADIR="/data/inferenceserver/${REPO_VERSION}"
fi

# Select the single GPU that will be available to the inference server
export CUDA_VISIBLE_DEVICES=0

mkdir -p ${RESULTDIR}
RET=0

if [[ $BACKENDS == *"python"* ]]; then
    cp /opt/tritonserver/backends/python/triton_python_backend_utils.py .

    mkdir -p python_models/python_zero_1_float32/1 && \
        cp ../python_models/identity_fp32/model.py ./python_models/python_zero_1_float32/1/model.py && \
        cp ../python_models/identity_fp32/config.pbtxt ./python_models/python_zero_1_float32/config.pbtxt
    (cd python_models/python_zero_1_float32 && \
        sed -i "s/^name:.*/name: \"python_zero_1_float32\"/" config.pbtxt)
fi

if [[ $BACKENDS == *"custom"* ]]; then
    mkdir -p "custom_models/custom_zero_1_float32/1"
fi

PERF_CLIENT_PERCENTILE_ARGS="" &&
    (( ${PERF_CLIENT_PERCENTILE} != 0 )) &&
    PERF_CLIENT_PERCENTILE_ARGS="--percentile=${PERF_CLIENT_PERCENTILE}"
PERF_CLIENT_EXTRA_ARGS="$PERF_CLIENT_PERCENTILE_ARGS --shared-memory ${SHARED_MEMORY}"

# Overload use of PERF_CLIENT_PROTOCOL for convenience with existing test and
# reporting structure, though "triton_c_api" is not strictly a "protocol".
if [[ "${PERF_CLIENT_PROTOCOL}" == "triton_c_api" ]]; then
    # Server will be run in-process with C API
    SERVICE_ARGS="--service-kind triton_c_api \
                  --triton-server-directory ${TRITON_DIR} \
                  --model-repository ${MODEL_REPO}"
else
    SERVICE_ARGS="-i ${PERF_CLIENT_PROTOCOL}"
fi

#
# Use "identity" model for all model types.
#
for BACKEND in $BACKENDS; do
 for STATIC_BATCH in $STATIC_BATCH_SIZES; do
  for DYNAMIC_BATCH in $DYNAMIC_BATCH_SIZES; do
   for INSTANCE_CNT in $INSTANCE_COUNTS; do
    if (( ($DYNAMIC_BATCH > 1) && ($STATIC_BATCH >= $DYNAMIC_BATCH) )); then
        continue
    fi

    # plan and openvino models do not support 16MB I/O tests
    if ([ $BACKEND == "plan" ] || [ $BACKEND == "openvino" ]) && [ $TENSOR_SIZE != 1 ]; then
        continue
    fi

    # set input name (special case for libtorch model)
    INPUT_NAME="INPUT0" && [ $BACKEND == "libtorch" ] && INPUT_NAME="INPUT__0"

    MAX_LATENCY=300
    MAX_BATCH=${STATIC_BATCH} && [ $DYNAMIC_BATCH > $STATIC_BATCH ] && MAX_BATCH=${DYNAMIC_BATCH}

    # TODO Add openvino identity model that supports batching/dynamic batching
    # The current openvino identity model does also not support batching
    if [ $BACKEND == "openvino" ]; then
        if [ $MAX_BATCH != 1 ]; then
            continue
        else
            MAX_BATCH=0
        fi
    fi

    # set shared memory output size
    OUTPUT_SHARED_MEMORY_SIZE=""
    if [[ "$SHARED_MEMORY" != "none" ]]; then
        OUTPUT_SHARED_MEMORY_SIZE=$((TENSOR_ELEMENT_BYTES*TENSOR_SIZE))
        if [ $MAX_BATCH > 1 ]; then
            OUTPUT_SHARED_MEMORY_SIZE=$((OUTPUT_SHARED_MEMORY_SIZE*MAX_BATCH))
        fi
        OUTPUT_SHARED_MEMORY_SIZE="--output-shared-memory-size $OUTPUT_SHARED_MEMORY_SIZE"
    fi

    if [ $DYNAMIC_BATCH > 1 ]; then
        NAME=${BACKEND}_sbatch${STATIC_BATCH}_dbatch${DYNAMIC_BATCH}_instance${INSTANCE_CNT}
    else
        NAME=${BACKEND}_sbatch${STATIC_BATCH}_instance${INSTANCE_CNT}
    fi

    # set model name (special case for openvino i.e. nobatch)
    MODEL_NAME=${BACKEND}_zero_1_float32 && [ $BACKEND == "openvino" ] && MODEL_NAME=${BACKEND}_nobatch_zero_1_float32

    if [ $BACKEND == "custom" ]; then
        REPO_DIR=./custom_models
    elif [ $BACKEND == "python" ]; then
        REPO_DIR=./python_models
    else
        REPO_DIR=$DATADIR/qa_identity_model_repository
    fi

    SHAPE=${TENSOR_SIZE}
    KIND="KIND_GPU" && [ $BACKEND == "custom" ] || [ $BACKEND == "python" ] || [ $BACKEND == "openvino" ] && KIND="KIND_CPU"

    rm -fr models && mkdir -p models && \
        cp -r $REPO_DIR/$MODEL_NAME models/. && \
        (cd models/$MODEL_NAME && \
                sed -i "s/^max_batch_size:.*/max_batch_size: ${MAX_BATCH}/" config.pbtxt)

    # python model already has instance count and kind
    if [ $BACKEND == "python" ]; then
        (cd models/$MODEL_NAME && \
                sed -i "s/count:.*/count: ${INSTANCE_CNT}/" config.pbtxt)
    else
        (cd models/$MODEL_NAME && \
                echo "instance_group [ { kind: ${KIND}, count: ${INSTANCE_CNT} }]" >> config.pbtxt)
    fi

    if [ $BACKEND == "custom" ]; then
        (cd models/$MODEL_NAME && \
                sed -i "s/dims:.*\[.*\]/dims: \[ ${SHAPE} \]/g" config.pbtxt)
    fi
    if [ $DYNAMIC_BATCH > 1 ] && [ $BACKEND != "openvino" ]; then
        (cd models/$MODEL_NAME && \
                echo "dynamic_batching { preferred_batch_size: [ ${DYNAMIC_BATCH} ] }" >> config.pbtxt)
    fi

    echo "Time before starting server: $(date)"
    # Only start separate server if not using C API, since C API runs server in-process
    if [[ "${PERF_CLIENT_PROTOCOL}" != "triton_c_api" ]]; then
        SERVER_LOG="${RESULTDIR}/${NAME}.server.log"
        run_server
        if [ $SERVER_PID == 0 ]; then
            echo -e "\n***\n*** Failed to start $SERVER\n***"
            cat $SERVER_LOG
            exit 1
        fi
    fi

    echo "Time before perf analyzer trials: $(date)"
    set +e
    set -o pipefail
    PA_MAX_TRIALS=${PA_MAX_TRIALS:-"50"}
    $PERF_CLIENT -v \
                 -p${PERF_CLIENT_STABILIZE_WINDOW} \
                 -s${PERF_CLIENT_STABILIZE_THRESHOLD} \
                 ${PERF_CLIENT_EXTRA_ARGS} \
                 ${OUTPUT_SHARED_MEMORY_SIZE} \
                 -m ${MODEL_NAME} \
                 -b${STATIC_BATCH} -t${CONCURRENCY} \
                 --max-trials "${PA_MAX_TRIALS}" \
                 --shape ${INPUT_NAME}:${SHAPE} \
                 ${SERVICE_ARGS} \
                 -f ${RESULTDIR}/${NAME}.csv 2>&1 | tee ${RESULTDIR}/${NAME}.log
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** FAILED Perf Analyzer measurement\n***"
        RET=1
    fi
    echo "Time after perf analyzer trials: $(date)"
    set +o pipefail
    set -e

    echo -e "[{\"s_benchmark_kind\":\"benchmark_perf\"," >> ${RESULTDIR}/${NAME}.tjson
    echo -e "\"s_benchmark_name\":\"nomodel\"," >> ${RESULTDIR}/${NAME}.tjson
    echo -e "\"s_server\":\"triton\"," >> ${RESULTDIR}/${NAME}.tjson
    echo -e "\"s_protocol\":\"${PERF_CLIENT_PROTOCOL}\"," >> ${RESULTDIR}/${NAME}.tjson
    echo -e "\"s_framework\":\"${BACKEND}\"," >> ${RESULTDIR}/${NAME}.tjson
    echo -e "\"s_model\":\"${MODEL_NAME}\"," >> ${RESULTDIR}/${NAME}.tjson
    echo -e "\"l_concurrency\":${CONCURRENCY}," >> ${RESULTDIR}/${NAME}.tjson
    echo -e "\"l_dynamic_batch_size\":${DYNAMIC_BATCH}," >> ${RESULTDIR}/${NAME}.tjson
    echo -e "\"l_batch_size\":${STATIC_BATCH}," >> ${RESULTDIR}/${NAME}.tjson
    echo -e "\"l_size\":${TENSOR_SIZE}," >> ${RESULTDIR}/${NAME}.tjson
    echo -e "\"s_shared_memory\":\"${SHARED_MEMORY}\"," >> ${RESULTDIR}/${NAME}.tjson
    echo -e "\"l_instance_count\":${INSTANCE_CNT}," >> ${RESULTDIR}/${NAME}.tjson
    echo -e "\"s_architecture\":\"${ARCH}\"}]" >> ${RESULTDIR}/${NAME}.tjson

    # SERVER_PID may not be set if using "triton_c_api" for example
    if [[ -n "${SERVER_PID}" ]]; then
        kill $SERVER_PID
        wait $SERVER_PID
    fi

    if [ -f $REPORTER ]; then
        set +e

        URL_FLAG=
        if [ ! -z ${BENCHMARK_REPORTER_URL} ]; then
            URL_FLAG="-u ${BENCHMARK_REPORTER_URL}"
        fi

        $REPORTER -v -o ${RESULTDIR}/${NAME}.json --csv ${RESULTDIR}/${NAME}.csv ${URL_FLAG} ${RESULTDIR}/${NAME}.tjson
        if [ $? -ne 0 ]; then
            RET=1
        fi

        set -e
    fi
   done
  done
 done
done

if [ $RET == 0 ]; then
    echo -e "\n***\n*** Test ${RESULTNAME} Passed\n***"
else
    echo -e "\n***\n*** Test ${RESULTNAME} FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_perf_nomodel/test.sh
================================================
#!/bin/bash
# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

rm -f *.log  *.csv *.tjson *.json

# Descriptive name for the current results
UNDERTEST_NAME=${NVIDIA_TRITON_SERVER_VERSION}

# Confidence percentile to use when stabilizing and reporting
# results. A value of 0 indicates that average value should be used
# for stabilizing results.
PERF_CLIENT_PERCENTILE=${PERF_CLIENT_PERCENTILE:=95}

# Threshold, as a percentage, to mark any performance change as a
# speedup or a slowdown.
PERF_CLIENT_SPEEDUP_THRESHOLD=5.0
PERF_CLIENT_SLOWDOWN_THRESHOLD=5.0

# Length of window, in milliseconds, to use when stabilizing latency
# and infer/sec results.
PERF_CLIENT_STABILIZE_WINDOW=10000

# Threshold, as a percentage, to use when stabilizing latency and
# infer/sec results. Values must vary by less than this percent over 3
# measurement windows to be considered value.
PERF_CLIENT_STABILIZE_THRESHOLD=15.0

RUNTEST=./run_test.sh

# The model used for data collection has a single input and a single
# output. The model does minimal work (just copy input to
# output). TENSOR_SIZE is the number of elements in the model input
# and the model output. The tensor element type is float so to get the
# number of elements in each tensor need to divide the test I/O size
# by 4.
TENSOR_SIZE_16MB=$((4*1024*1024))

if [ "$TEST_SHARED_MEMORY" == "system" ]; then
    UNDERTEST_NAME="$UNDERTEST_NAME System Shared Memory";
    SUFFIX="_shm"
elif [ "$TEST_SHARED_MEMORY" == "cuda" ]; then
    UNDERTEST_NAME="$UNDERTEST_NAME CUDA Shared Memory";
    SUFFIX="_cudashm"
else
    TEST_SHARED_MEMORY="none"
    TEST_NAMES=(
        "${UNDERTEST_NAME} Minimum Latency GRPC"
        "${UNDERTEST_NAME} Minimum Latency HTTP"
        "${UNDERTEST_NAME} Minimum Latency C API"
        "${UNDERTEST_NAME} Maximum Throughput GRPC"
        "${UNDERTEST_NAME} Maximum Throughput HTTP"
        "${UNDERTEST_NAME} Maximum Throughput C API")
    TEST_DIRS=(
        min_latency_grpc
        min_latency_http
        min_latency_triton_c_api
        max_throughput_grpc
        max_throughput_http
        max_throughput_triton_c_api)
    SUFFIX=""
    TEST_CONCURRENCY=(
        1
        1
        1
        16
        16
        16)
    TEST_INSTANCE_COUNTS=(
        1
        1
        1
        2
        2
        2)
    # Small payloads
    TEST_TENSOR_SIZES=(
        1
        1
        1
        1
        1
        1)
    TEST_PROTOCOLS=(
        grpc
        http
        triton_c_api
        grpc
        http
        triton_c_api)
fi
TEST_NAMES+=(
    "${UNDERTEST_NAME} 16MB I/O Latency GRPC"
    "${UNDERTEST_NAME} 16MB I/O Latency HTTP"
    "${UNDERTEST_NAME} 16MB I/O Latency C API"
    "${UNDERTEST_NAME} 16MB I/O Throughput GRPC"
    "${UNDERTEST_NAME} 16MB I/O Throughput HTTP"
    "${UNDERTEST_NAME} 16MB I/O Throughput C API")
TEST_DIRS+=(
    16mb_latency_grpc${SUFFIX}
    16mb_latency_http${SUFFIX}
    16mb_latency_triton_c_api${SUFFIX}
    16mb_throughput_grpc${SUFFIX}
    16mb_throughput_http${SUFFIX}
    16mb_throughput_triton_c_api${SUFFIX})
TEST_PROTOCOLS+=(
    grpc
    http
    triton_c_api
    grpc
    http
    triton_c_api)
# Large payloads
TEST_TENSOR_SIZES+=(
    ${TENSOR_SIZE_16MB}
    ${TENSOR_SIZE_16MB}
    ${TENSOR_SIZE_16MB}
    ${TENSOR_SIZE_16MB}
    ${TENSOR_SIZE_16MB}
    ${TENSOR_SIZE_16MB})
TEST_INSTANCE_COUNTS+=(
    1
    1
    1
    2
    2
    2)
TEST_CONCURRENCY+=(
    1
    1
    1
    16
    16
    16)
TEST_BACKENDS=${BACKENDS:="plan custom onnx libtorch python"}

mkdir -p ${REPO_VERSION}

#
# Run Performance tests
#

RET=0
set +e

for idx in "${!TEST_NAMES[@]}"; do
    TEST_NAME=${TEST_NAMES[$idx]}
    TEST_DIR=${TEST_DIRS[$idx]}
    TEST_PROTOCOL=${TEST_PROTOCOLS[$idx]}
    TEST_TENSOR_SIZE=${TEST_TENSOR_SIZES[$idx]}
    TEST_INSTANCE_COUNT=${TEST_INSTANCE_COUNTS[$idx]}
    TEST_CONCURRENCY=${TEST_CONCURRENCY[$idx]}

    # FIXME: If PA C API adds SHMEM support, remove this.
    if [[ "${TEST_SHARED_MEMORY}" != "none" ]] && \
       [[ "${TEST_PROTOCOL}" == "triton_c_api" ]]; then
      echo "WARNING: Perf Analyzer does not support shared memory I/O when benchmarking directly with Triton C API, skipping."
      continue
    fi

    RESULTNAME=${TEST_NAME} \
                RESULTDIR=${REPO_VERSION}/${TEST_DIR} \
                PERF_CLIENT_PERCENTILE=${PERF_CLIENT_PERCENTILE} \
                PERF_CLIENT_STABILIZE_WINDOW=${PERF_CLIENT_STABILIZE_WINDOW} \
                PERF_CLIENT_STABILIZE_THRESHOLD=${PERF_CLIENT_STABILIZE_THRESHOLD} \
                PERF_CLIENT_PROTOCOL=${TEST_PROTOCOL} \
                TENSOR_SIZE=${TEST_TENSOR_SIZE} \
                BACKENDS=${TEST_BACKENDS} \
                SHARED_MEMORY=${TEST_SHARED_MEMORY} \
                STATIC_BATCH_SIZES=1 \
                DYNAMIC_BATCH_SIZES=1 \
                INSTANCE_COUNTS=${TEST_INSTANCE_COUNT} \
                CONCURRENCY=${TEST_CONCURRENCY} \
                bash -x ${RUNTEST} ${REPO_VERSION}
    if (( $? != 0 )); then
        RET=1
    fi
done

set -e

if (( $RET == 0 )); then
    echo -e "\n***\n*** Data Collection Passed\n***"
else
    echo -e "\n***\n*** Data Collection FAILED\n***"
    exit $RET
fi

exit $RET


================================================
FILE: qa/L0_perf_pyclients/custom_models/custom_zero_1_int32/config.pbtxt
================================================
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "custom_zero_1_int32"
backend: "identity"
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
instance_group {
 count: 1
 kind:KIND_CPU
}


================================================
FILE: qa/L0_perf_pyclients/simple_perf_client.py
================================================
#!/usr/bin/env python
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import sys
import time

import numpy as np
import tritonclient.grpc as grpcclient
import tritonclient.http as httpclient
from tritonclient.utils import InferenceServerException, triton_to_np_dtype

FLAGS = None


def parse_model_grpc(model_metadata, model_config):
    """
    Check the configuration of a model to make sure it is supported
    by this client.
    """
    if len(model_metadata.inputs) != 1:
        raise Exception("expecting 1 input, got {}".format(len(model_metadata.inputs)))
    if len(model_metadata.outputs) != 1:
        raise Exception(
            "expecting 1 output, got {}".format(len(model_metadata.outputs))
        )

    if len(model_config.input) != 1:
        raise Exception(
            "expecting 1 input in model configuration, got {}".format(
                len(model_config.input)
            )
        )

    input_metadata = model_metadata.inputs[0]
    output_metadata = model_metadata.outputs[0]

    batch_dim = model_config.max_batch_size > 0
    expected_dims = 1 + (1 if batch_dim else 0)

    if len(input_metadata.shape) != expected_dims:
        raise Exception(
            "expecting input to have {} dimensions, model '{}' input has {}".format(
                expected_dims, model_metadata.name, len(input_metadata.shape)
            )
        )

    if len(output_metadata.shape) != expected_dims:
        raise Exception(
            "expecting output to have {} dimensions, model '{}' output has {}".format(
                expected_dims, model_metadata.name, len(output_metadata.shape)
            )
        )

    if input_metadata.shape[-1] != -1:
        raise Exception(
            "expecting input to have variable shape [-1], model '{}' input has {}".format(
                model_metadata.name, input_metadata.shape
            )
        )

    if output_metadata.shape[-1] != -1:
        raise Exception(
            "expecting output to have variable shape [-1], model '{}' output has {}".format(
                model_metadata.name, output_metadata.shape
            )
        )

    return (
        model_config.max_batch_size,
        input_metadata.name,
        output_metadata.name,
        input_metadata.datatype,
    )


def parse_model_http(model_metadata, model_config):
    """
    Check the configuration of a model to make sure it is supported
    by this client.
    """
    if len(model_metadata["inputs"]) != 1:
        raise Exception(
            "expecting 1 input, got {}".format(len(model_metadata["inputs"]))
        )
    if len(model_metadata["outputs"]) != 1:
        raise Exception(
            "expecting 1 output, got {}".format(len(model_metadata["outputs"]))
        )

    if len(model_config["input"]) != 1:
        raise Exception(
            "expecting 1 input in model configuration, got {}".format(
                len(model_config["input"])
            )
        )

    input_metadata = model_metadata["inputs"][0]
    output_metadata = model_metadata["outputs"][0]

    max_batch_size = 0
    if "max_batch_size" in model_config:
        max_batch_size = model_config["max_batch_size"]

    batch_dim = max_batch_size > 0
    expected_dims = 1 + (1 if batch_dim else 0)

    if len(input_metadata["shape"]) != expected_dims:
        raise Exception(
            "expecting input to have {} dimensions, model '{}' input has {}".format(
                expected_dims, model_metadata.name, len(input_metadata["shape"])
            )
        )

    if len(output_metadata["shape"]) != expected_dims:
        raise Exception(
            "expecting output to have {} dimensions, model '{}' output has {}".format(
                expected_dims, model_metadata.name, len(output_metadata["shape"])
            )
        )

    if input_metadata["shape"][-1] != -1:
        raise Exception(
            "expecting input to have variable shape [-1], model '{}' input has {}".format(
                model_metadata.name, input_metadata["shape"]
            )
        )

    if output_metadata["shape"][-1] != -1:
        raise Exception(
            "expecting output to have variable shape [-1], model '{}' output has {}".format(
                model_metadata.name, output_metadata["shape"]
            )
        )

    return (
        max_batch_size,
        input_metadata["name"],
        output_metadata["name"],
        input_metadata["datatype"],
    )


def requestGenerator(input_name, input_data, output_name, dtype, protocol):
    # Set the input data
    inputs = []
    if protocol.lower() == "grpc":
        inputs.append(grpcclient.InferInput(input_name, input_data.shape, dtype))
        inputs[0].set_data_from_numpy(input_data)
    else:
        inputs.append(httpclient.InferInput(input_name, input_data.shape, dtype))
        inputs[0].set_data_from_numpy(input_data, binary_data=True)

    outputs = []
    if protocol.lower() == "grpc":
        outputs.append(grpcclient.InferRequestedOutput(output_name))
    else:
        outputs.append(httpclient.InferRequestedOutput(output_name, binary_data=True))

    return inputs, outputs


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-v",
        "--verbose",
        action="store_true",
        required=False,
        default=False,
        help="Enable verbose output",
    )
    parser.add_argument(
        "-m", "--model-name", type=str, required=True, help="Name of model"
    )
    parser.add_argument(
        "-x",
        "--model-version",
        type=str,
        required=False,
        default="",
        help="Version of model. Default is to use latest version.",
    )
    parser.add_argument(
        "-b",
        "--batch-size",
        type=int,
        required=False,
        default=1,
        help="Batch size. Default is 1.",
    )
    parser.add_argument(
        "-s",
        "--shape",
        type=int,
        required=False,
        default=1,
        help="The shape of the tensor. Default is 1.",
    )
    parser.add_argument(
        "-u",
        "--url",
        type=str,
        required=False,
        default="localhost:8000",
        help="Inference server URL. Default is localhost:8000.",
    )
    parser.add_argument(
        "-i",
        "--protocol",
        type=str,
        required=False,
        default="HTTP",
        help="Protocol (HTTP/gRPC) used to communicate with "
        + "the inference service. Default is HTTP.",
    )
    parser.add_argument(
        "-c",
        "--iteration_count",
        type=int,
        required=False,
        default=1000,
        help="The number of iterations. Default is 1000.",
    )
    parser.add_argument(
        "-w",
        "--warmup_count",
        type=int,
        required=False,
        default=500,
        help="The number of warm-up iterations. Default is 500.",
    )
    parser.add_argument(
        "--csv",
        type=str,
        required=False,
        default=None,
        help="The name of the file to store the results in CSV format",
    )
    FLAGS = parser.parse_args()

    try:
        if FLAGS.protocol.lower() == "grpc":
            # Create gRPC client for communicating with the server
            triton_client = grpcclient.InferenceServerClient(
                url=FLAGS.url, verbose=FLAGS.verbose
            )
        else:
            triton_client = httpclient.InferenceServerClient(
                url=FLAGS.url, verbose=FLAGS.verbose, concurrency=1
            )
    except Exception as e:
        print("client creation failed: " + str(e))
        sys.exit(1)

    # Make sure the model matches our requirements, and get some
    # properties of the model that we need for preprocessing
    try:
        model_metadata = triton_client.get_model_metadata(
            model_name=FLAGS.model_name, model_version=FLAGS.model_version
        )
    except InferenceServerException as e:
        print("failed to retrieve the metadata: " + str(e))
        sys.exit(1)

    # Make sure the model matches our requirements, and get some
    # properties of the model that we need for preprocessing
    try:
        model_metadata = triton_client.get_model_metadata(
            model_name=FLAGS.model_name, model_version=FLAGS.model_version
        )
    except InferenceServerException as e:
        print("failed to retrieve the metadata: " + str(e))
        sys.exit(1)

    try:
        model_config = triton_client.get_model_config(
            model_name=FLAGS.model_name, model_version=FLAGS.model_version
        )
    except InferenceServerException as e:
        print("failed to retrieve the config: " + str(e))
        sys.exit(1)

    if FLAGS.protocol.lower() == "grpc":
        max_batch_size, input_name, output_name, dtype = parse_model_grpc(
            model_metadata, model_config.config
        )
    else:
        max_batch_size, input_name, output_name, dtype = parse_model_http(
            model_metadata, model_config
        )

    input_data = np.zeros(
        [FLAGS.batch_size, FLAGS.shape], dtype=triton_to_np_dtype(dtype)
    )

    # --------------------------- Warm-Up --------------------------------------------------------
    for i in range(FLAGS.warmup_count):
        inputs, outputs = requestGenerator(
            input_name, input_data, output_name, dtype, FLAGS.protocol.lower()
        )
        triton_client.infer(
            FLAGS.model_name, inputs, model_version=FLAGS.model_version, outputs=outputs
        )

    latencies = []

    # --------------------------- Start Load --------------------------------------------------------

    start_time = time.time()

    for i in range(FLAGS.iteration_count):
        t0 = time.time()
        inputs, outputs = requestGenerator(
            input_name, input_data, output_name, dtype, FLAGS.protocol.lower()
        )
        triton_client.infer(
            FLAGS.model_name, inputs, model_version=FLAGS.model_version, outputs=outputs
        )
        latencies.append(time.time() - t0)

    end_time = time.time()

    throughput = FLAGS.iteration_count / (end_time - start_time)
    average_latency = np.average(latencies) * 1000
    p50_latency = np.percentile(latencies, 50) * 1000
    p90_latency = np.percentile(latencies, 90) * 1000
    p95_latency = np.percentile(latencies, 95) * 1000
    p99_latency = np.percentile(latencies, 99) * 1000

    # --------------------------- Print Report -----------------------------------------------------
    print("Throughput: {} infer/sec".format(throughput))
    print("Latencies:")
    print("\tAvg: {} ms".format(average_latency))
    print("\tp50: {} ms".format(p50_latency))
    print("\tp90: {} ms".format(p90_latency))
    print("\tp95: {} ms".format(p95_latency))
    print("\tp99: {} ms".format(p99_latency))

    # --------------------------- Write CSV --------------------------------------------------------
    if FLAGS.csv != None:
        file = open(FLAGS.csv, "w")
        file.write(
            "Concurrency,Inferences/Second,p50 latency,p90 latency,p95 latency,p99 latency\n"
        )
        file.write(
            "1,{},{},{},{},{}".format(
                throughput,
                p50_latency * 1000,
                p90_latency * 1000,
                p95_latency * 1000,
                p99_latency * 1000,
            )
        )
        file.close()


================================================
FILE: qa/L0_perf_pyclients/test.sh
================================================
#!/bin/bash
# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

REPORTER=../common/reporter.py
CLIENT_LOG="./simple_perf_client.log"
SIMPLE_PERF_CLIENT=simple_perf_client.py

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=`pwd`/custom_models"
source ../common/util.sh

# Select the single GPU that will be available to the inference
# server.
export CUDA_VISIBLE_DEVICES=0
PROTOCOLS="grpc http"

rm -f *.log *.csv *.tjson *.json

RET=0

MODEL_NAME="custom_zero_1_int32"

for PROTOCOL in $PROTOCOLS; do
    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi


    NAME=${MODEL_NAME}_${PROTOCOL}
    EXTRA_ARGS="" && [[ "${PROTOCOL}" == "grpc" ]] && EXTRA_ARGS="-i grpc -u localhost:8001"
    python $SIMPLE_PERF_CLIENT -m $MODEL_NAME --shape 100000 --csv ${NAME}.csv ${EXTRA_ARGS}>> ${NAME}.log 2>&1
    if (( $? != 0 )); then
        RET=1
    fi

    echo -e "[{\"s_benchmark_kind\":\"benchmark_perf\"," >> ${NAME}.tjson
    echo -e "\"s_benchmark_name\":\"python_client\"," >> ${NAME}.tjson
    echo -e "\"s_server\":\"triton\"," >> ${NAME}.tjson
    echo -e "\"s_protocol\":\"${PROTOCOL}\"," >> ${NAME}.tjson
    echo -e "\"s_framework\":\"custom\"," >> ${NAME}.tjson
    echo -e "\"s_model\":\"${MODEL_NAME}\"," >> ${NAME}.tjson
    echo -e "\"l_concurrency\":1," >> ${NAME}.tjson
    echo -e "\"l_batch_size\":1," >> ${NAME}.tjson
    echo -e "\"l_instance_count\":1}]" >> ${NAME}.tjson


    if [ -f $REPORTER ]; then
        set +e

        URL_FLAG=
        if [ ! -z ${BENCHMARK_REPORTER_URL} ]; then
            URL_FLAG="-u ${BENCHMARK_REPORTER_URL}"
        fi

        python $REPORTER -v -o ${NAME}.json --csv ${NAME}.csv ${URL_FLAG} ${NAME}.tjson
        if (( $? != 0 )); then
            RET=1
        fi

        set -e
    fi

    kill $SERVER_PID
    wait $SERVER_PID
done

if (( $RET == 0 )); then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_perf_resnet/run_test.sh
================================================
#!/bin/bash
# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

STATIC_BATCH=${STATIC_BATCH:=1}
INSTANCE_CNT=${INSTANCE_CNT:=1}
BACKEND_CONFIG=${BACKEND_CONFIG:=""}

REPORTER=../common/reporter.py

TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
SERVER=${TRITON_DIR}/bin/tritonserver
BACKEND_DIR=${TRITON_DIR}/backends
MODEL_REPO="${PWD}/models"
SERVER_ARGS="--model-repository=${MODEL_REPO} --backend-directory=${BACKEND_DIR} ${BACKEND_CONFIG}"
source ../common/util.sh

# Select the single GPU that will be available to the inference
# server. Or use "export CUDA_VISIBLE_DEVICE=" to run on CPU.
export CUDA_VISIBLE_DEVICES=0

RET=0

MAX_BATCH=${STATIC_BATCH}
NAME=${MODEL_NAME}_sbatch${STATIC_BATCH}_instance${INSTANCE_CNT}_${PERF_CLIENT_PROTOCOL}

rm -fr models && mkdir -p models && \
    cp -r $MODEL_PATH models/. && \
    (cd models/$MODEL_NAME && \
            sed -i "s/^max_batch_size:.*/max_batch_size: ${MAX_BATCH}/" config.pbtxt && \
            echo "instance_group [ { count: ${INSTANCE_CNT} }]")

pip3 install perf_analyzer

MEASUREMENT_WINDOW=5000
PERF_CLIENT=perf_analyzer
# Onnx and onnx-trt models are very slow on Jetson.
if [ "$ARCH" == "aarch64" ]; then
    if [ "$MODEL_FRAMEWORK" == "onnx" ] || [ "$MODEL_FRAMEWORK" == "onnx_trt" ]; then
        MEASUREMENT_WINDOW=20000
    fi
fi

# Overload use of PERF_CLIENT_PROTOCOL for convenience with existing test and
# reporting structure, though "triton_c_api" is not strictly a "protocol".
if [[ "${PERF_CLIENT_PROTOCOL}" == "triton_c_api" ]]; then
    # Server will be run in-process with C API
    SERVICE_ARGS="--service-kind triton_c_api \
                  --triton-server-directory ${TRITON_DIR} \
                  --model-repository ${MODEL_REPO}"
else
    SERVICE_ARGS="-i ${PERF_CLIENT_PROTOCOL}"

    SERVER_LOG="${NAME}.server.log"
    run_server
    if (( $SERVER_PID == 0 )); then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    # Run the model once to warm up. Some frameworks do optimization on the first requests.
    # Must warmup similar to actual run so that all instances are ready
    # Note: Running extra PA for warmup doesn't make sense for C API since it
    # uses in-process tritonserver which will exit along with this PA process.
    set +e
    $PERF_CLIENT -v -m $MODEL_NAME -p${MEASUREMENT_WINDOW} \
                    -b${STATIC_BATCH} --concurrency-range ${CONCURRENCY} \
                    ${SERVICE_ARGS}
    set -e
fi

set +e
set -o pipefail
PA_MAX_TRIALS=${PA_MAX_TRIALS:-"50"}
# Measure perf client results and write them to a file for reporting
$PERF_CLIENT -v -m $MODEL_NAME -p${MEASUREMENT_WINDOW} \
                -b${STATIC_BATCH} --concurrency-range ${CONCURRENCY} \
                --max-trials "${PA_MAX_TRIALS}" \
                ${SERVICE_ARGS} \
                -f ${NAME}.csv 2>&1 | tee ${NAME}.log
if (( $? != 0 )); then
    echo -e "\n***\n*** FAILED Perf Analyzer measurement\n***"
    RET=1
fi
set +o pipefail
set -e

echo -e "[{\"s_benchmark_kind\":\"benchmark_perf\"," >> ${NAME}.tjson
echo -e "\"s_benchmark_name\":\"resnet50\"," >> ${NAME}.tjson
echo -e "\"s_server\":\"triton\"," >> ${NAME}.tjson
echo -e "\"s_protocol\":\"${PERF_CLIENT_PROTOCOL}\"," >> ${NAME}.tjson
echo -e "\"s_framework\":\"${MODEL_FRAMEWORK}\"," >> ${NAME}.tjson
echo -e "\"s_model\":\"${MODEL_NAME}\"," >> ${NAME}.tjson
echo -e "\"l_concurrency\":${CONCURRENCY}," >> ${NAME}.tjson
echo -e "\"l_batch_size\":${STATIC_BATCH}," >> ${NAME}.tjson
echo -e "\"l_instance_count\":${INSTANCE_CNT}," >> ${NAME}.tjson
echo -e "\"s_architecture\":\"${ARCH}\"}]" >> ${NAME}.tjson

# SERVER_PID may not be set if using "triton_c_api" for example
if [[ -n "${SERVER_PID}" ]]; then
  kill $SERVER_PID
  wait $SERVER_PID
fi

if [ -f $REPORTER ]; then
    set +e

    URL_FLAG=
    if [ ! -z ${BENCHMARK_REPORTER_URL} ]; then
        URL_FLAG="-u ${BENCHMARK_REPORTER_URL}"
    fi

    $REPORTER -v -o ${NAME}.json --csv ${NAME}.csv ${URL_FLAG} ${NAME}.tjson
    if (( $? != 0 )); then
        RET=1
    fi

    set -e
fi

if (( $RET == 0 )); then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_perf_resnet/test.sh
================================================
#!/bin/bash
# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

rm -f *.log  *.csv *.tjson *.json

PROTOCOLS="grpc http triton_c_api"

TRT_MODEL_NAME="resnet50_fp32_plan"
PYT_MODEL_NAME="resnet50_fp32_libtorch"
ONNX_MODEL_NAME="resnet50_fp32_onnx"

# The base model name should be the prefix to the
# respective optimized model name.
ONNXTRT_MODEL_NAME="resnet50_fp32_onnx_trt"

ARCH=${ARCH:="x86_64"}
REPODIR=${REPODIR:="/data/inferenceserver/${REPO_VERSION}"}
TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
TRTEXEC=/usr/src/tensorrt/bin/trtexec
CACHE_PATH=`pwd`/trt_cache


#
# Test minimum latency
#
STATIC_BATCH=1
INSTANCE_CNT=1
CONCURRENCY=1

MODEL_NAMES="${TRT_MODEL_NAME} ${ONNX_MODEL_NAME} ${PYT_MODEL_NAME}"

OPTIMIZED_MODEL_NAMES="${ONNXTRT_MODEL_NAME}"


# Create optimized models
rm -fr optimized_model_store && mkdir optimized_model_store
for MODEL_NAME in $OPTIMIZED_MODEL_NAMES; do
    BASE_MODEL=$(echo ${MODEL_NAME} | cut -d '_' -f 1,2,3)
    cp -r $REPODIR/perf_model_store/${BASE_MODEL} optimized_model_store/${MODEL_NAME}
    CONFIG_PATH="optimized_model_store/${MODEL_NAME}/config.pbtxt"
    sed -i "s/^name: \"${BASE_MODEL}\"/name: \"${MODEL_NAME}\"/" ${CONFIG_PATH}
    echo "optimization { execution_accelerators {" >> ${CONFIG_PATH}
    echo "gpu_execution_accelerator : [ {" >> ${CONFIG_PATH}
    echo "name : \"tensorrt\" " >> ${CONFIG_PATH}

    if [ "${MODEL_NAME}" = "${ONNXTRT_MODEL_NAME}" ] ; then
        echo "parameters { key: \"precision_mode\" value: \"FP16\" }" >> ${CONFIG_PATH}
        echo "parameters { key: \"max_workspace_size_bytes\" value: \"1073741824\" }" >> ${CONFIG_PATH}
        echo "parameters { key: \"trt_engine_cache_enable\" value: \"1\" }" >> ${CONFIG_PATH}
        echo "parameters { key: \"trt_engine_cache_path\" value: \"${CACHE_PATH}\" } " >> ${CONFIG_PATH}
    fi

    echo "} ]" >> ${CONFIG_PATH}
    echo "}}" >> ${CONFIG_PATH}
done

# Create the TensorRT plan from ONNX model
rm -fr tensorrt_models && mkdir -p tensorrt_models/$TRT_MODEL_NAME/1 && \
cp $REPODIR/qa_dynamic_batch_image_model_repository/resnet50_onnx/1/model.onnx tensorrt_models/$TRT_MODEL_NAME/ && \
cp $REPODIR/qa_dynamic_batch_image_model_repository/resnet50_onnx/labels.txt tensorrt_models/$TRT_MODEL_NAME/ && \
cp $REPODIR/qa_dynamic_batch_image_model_repository/resnet50_onnx/config.pbtxt tensorrt_models/$TRT_MODEL_NAME/

# Build TRT engine
$TRTEXEC --onnx=tensorrt_models/$TRT_MODEL_NAME/model.onnx --saveEngine=tensorrt_models/$TRT_MODEL_NAME/1/model.plan \
         --minShapes=input:1x3x224x224 --optShapes=input:${STATIC_BATCH}x3x224x224 \
         --maxShapes=input:${STATIC_BATCH}x3x224x224

rm tensorrt_models/$TRT_MODEL_NAME/model.onnx
sed -i "s/^name: .*/name: \"$TRT_MODEL_NAME\"/g" tensorrt_models/$TRT_MODEL_NAME/config.pbtxt && \
sed -i 's/^platform: .*/platform: "tensorrt_plan"/g' tensorrt_models/$TRT_MODEL_NAME/config.pbtxt

# Tests with each "non-optimized" model
for MODEL_NAME in $MODEL_NAMES; do
    for PROTOCOL in $PROTOCOLS; do
        REPO=`pwd`/tensorrt_models && [ "$MODEL_NAME" != "$TRT_MODEL_NAME" ] && \
            REPO=$REPODIR/perf_model_store
        FRAMEWORK=$(echo ${MODEL_NAME} | cut -d '_' -f 3)
        MODEL_NAME=${MODEL_NAME} \
                MODEL_FRAMEWORK=${FRAMEWORK} \
                MODEL_PATH="$REPO/${MODEL_NAME}" \
                STATIC_BATCH=${STATIC_BATCH} \
                PERF_CLIENT_PROTOCOL=${PROTOCOL} \
                INSTANCE_CNT=${INSTANCE_CNT} \
                CONCURRENCY=${CONCURRENCY} \
                ARCH=${ARCH} \
                bash -x run_test.sh
    done
done

# Tests with optimization enabled models
for MODEL_NAME in $OPTIMIZED_MODEL_NAMES; do
    for PROTOCOL in $PROTOCOLS; do
        REPO=`pwd`/optimized_model_store
        FRAMEWORK=$(echo ${MODEL_NAME} | cut -d '_' -f 3,4)
        MODEL_NAME=${MODEL_NAME} \
                MODEL_FRAMEWORK=${FRAMEWORK} \
                MODEL_PATH="$REPO/${MODEL_NAME}" \
                STATIC_BATCH=${STATIC_BATCH} \
                PERF_CLIENT_PROTOCOL=${PROTOCOL} \
                INSTANCE_CNT=${INSTANCE_CNT} \
                CONCURRENCY=${CONCURRENCY} \
                ARCH=${ARCH} \
                bash -x run_test.sh
    done
done

#
# Test large static batch = 128 w/ 2 instances (Use batch size 64 on Jetson Xavier)
#
if [ "$ARCH" == "aarch64" ]; then
    STATIC_BATCH=64
else
    STATIC_BATCH=128
fi

INSTANCE_CNT=2
CONCURRENCY=4

# Create the TensorRT plan from ONNX model
rm -fr tensorrt_models && mkdir -p tensorrt_models/$TRT_MODEL_NAME/1 && \
cp $REPODIR/qa_dynamic_batch_image_model_repository/resnet50_onnx/1/model.onnx tensorrt_models/$TRT_MODEL_NAME/ && \
cp $REPODIR/qa_dynamic_batch_image_model_repository/resnet50_onnx/labels.txt tensorrt_models/$TRT_MODEL_NAME/ && \
cp $REPODIR/qa_dynamic_batch_image_model_repository/resnet50_onnx/config.pbtxt tensorrt_models/$TRT_MODEL_NAME/

# Build TRT engine
$TRTEXEC --onnx=tensorrt_models/$TRT_MODEL_NAME/model.onnx --saveEngine=tensorrt_models/$TRT_MODEL_NAME/1/model.plan \
         --minShapes=input:1x3x224x224 --optShapes=input:${STATIC_BATCH}x3x224x224 \
         --maxShapes=input:${STATIC_BATCH}x3x224x224

rm tensorrt_models/$TRT_MODEL_NAME/model.onnx
sed -i "s/^name: .*/name: \"$TRT_MODEL_NAME\"/g" tensorrt_models/$TRT_MODEL_NAME/config.pbtxt && \
sed -i 's/^platform: .*/platform: "tensorrt_plan"/g' tensorrt_models/$TRT_MODEL_NAME/config.pbtxt

for MODEL_NAME in $MODEL_NAMES; do
    for PROTOCOL in $PROTOCOLS; do
        REPO=`pwd`/tensorrt_models && [ "$MODEL_NAME" != "$TRT_MODEL_NAME" ] && \
            REPO=$REPODIR/perf_model_store
        FRAMEWORK=$(echo ${MODEL_NAME} | cut -d '_' -f 3)
        MODEL_NAME=${MODEL_NAME} \
                MODEL_FRAMEWORK=${FRAMEWORK} \
                MODEL_PATH="$REPO/${MODEL_NAME}" \
                STATIC_BATCH=${STATIC_BATCH} \
                PERF_CLIENT_PROTOCOL=${PROTOCOL} \
                INSTANCE_CNT=${INSTANCE_CNT} \
                CONCURRENCY=${CONCURRENCY} \
                ARCH=${ARCH} \
                bash -x run_test.sh
    done
done

for MODEL_NAME in $OPTIMIZED_MODEL_NAMES; do
    for PROTOCOL in $PROTOCOLS; do
        REPO=`pwd`/optimized_model_store
        FRAMEWORK=$(echo ${MODEL_NAME} | cut -d '_' -f 3,4)
        MODEL_NAME=${MODEL_NAME} \
                MODEL_FRAMEWORK=${FRAMEWORK} \
                MODEL_PATH="$REPO/${MODEL_NAME}" \
                STATIC_BATCH=${STATIC_BATCH} \
                PERF_CLIENT_PROTOCOL=${PROTOCOL} \
                INSTANCE_CNT=${INSTANCE_CNT} \
                CONCURRENCY=${CONCURRENCY} \
                ARCH=${ARCH} \
                bash -x run_test.sh
    done
done

================================================
FILE: qa/L0_perf_tensorrt_llm/test.sh
================================================
#!/bin/bash
# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

RET=0
BASE_DIR=$(pwd)
NUM_GPUS=${NUM_GPUS:=1}
TENSORRTLLM_BACKEND_REPO_TAG=${TENSORRTLLM_BACKEND_REPO_TAG:="main"}
TRT_ROOT="/usr/local/tensorrt"

MODEL_NAME="gpt2_tensorrt_llm"
NAME="tensorrt_llm_benchmarking_test"
MODEL_REPOSITORY="$(pwd)/triton_model_repo"
TENSORRTLLM_BACKEND_DIR="/workspace/tensorrtllm_backend"
GPT_DIR="$TENSORRTLLM_BACKEND_DIR/tensorrt_llm/examples/models/core/gpt"
TOKENIZER_DIR="$GPT_DIR/gpt2"
ENGINES_DIR="${BASE_DIR}/engines/inflight_batcher_llm/${NUM_GPUS}-gpu"
TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
SERVER=${TRITON_DIR}/bin/tritonserver
BACKEND_DIR=${TRITON_DIR}/backends
SERVER_LOG="${NAME}_server.log"
SERVER_TIMEOUT=${SERVER_TIMEOUT:=120}
source ../common/trtllm_util.sh

# Update Open MPI to a version compatible with SLURM.
function upgrade_openmpi {
    local CURRENT_VERSION=$(mpirun --version 2>&1 | awk '/Open MPI/ {gsub(/rc[0-9]+/, "", $NF); print $NF}')

    if [ -n "$CURRENT_VERSION" ] && dpkg --compare-versions "$CURRENT_VERSION" lt "5.0.1"; then
        # Uninstall the current version of Open MPI
        rm -r /opt/hpcx/ompi/ /usr/local/mpi && rm -rf /usr/lib/$(gcc -print-multiarch)/openmpi || {
            echo "Failed to uninstall the existing Open MPI version $CURRENT_VERSION."
            exit 1
        }
    else
        echo "The installed Open MPI version ($CURRENT_VERSION) is 5.0.1 or higher. Skipping the upgrade."
        return
    fi

    # Install SLURM supported Open MPI version
    cd /tmp/
    wget "https://download.open-mpi.org/release/open-mpi/v5.0/openmpi-5.0.1.tar.gz" || {
        echo "Failed to download Open MPI 5.0.1"
        exit 1
    }
    rm -rf openmpi-5.0.1 && tar -xzf openmpi-5.0.1.tar.gz && cd openmpi-5.0.1 || {
        echo "Failed to extract Open MPI 5.0.1"
        exit 1
    }
    ./configure --prefix=/opt/hpcx/ompi/ && make && make install || {
        echo "Failed to install Open MPI 5.0.1"
        exit 1
    }

    # Update environment variables
    if ! grep -q '/opt/hpcx/ompi/bin' ~/.bashrc; then
        echo 'export PATH=/opt/hpcx/ompi/bin:$PATH' >>~/.bashrc
    fi

    if ! grep -q '/opt/hpcx/ompi/lib' ~/.bashrc; then
        echo 'export LD_LIBRARY_PATH=/opt/hpcx/ompi/lib:$LD_LIBRARY_PATH' >>~/.bashrc
    fi
    ldconfig
    source ~/.bashrc
    cd "$BASE_DIR"
    mpirun --version
}

upgrade_openmpi
clone_tensorrt_llm_backend_repo
build_gpt2_base_model
build_gpt2_tensorrt_engine
prepare_model_repository

# Install perf_analyzer
pip3 install tritonclient

ARCH="amd64"
STATIC_BATCH=1
INSTANCE_CNT=1
CONCURRENCY=100
MODEL_FRAMEWORK="tensorrt-llm"
PERF_CLIENT="perf_analyzer"
REPORTER=../common/reporter.py
INPUT_DATA="./input_data.json"
PERF_CLIENT_PROTOCOL="grpc"
EXPORT_FILE=profile-export-tensorrt-llm-model.json
rm -rf *.tjson *.json *.csv *log

echo '{
  "data": [
    {
      "text_input": ["Hello, my name is"],
      "stream": [true],
      "max_tokens": [16],
      "bad_words": [""],
      "stop_words": [""]
    }
  ]
}' >$INPUT_DATA

# Set stability-percentage 999 to bypass the stability check in PA.
# LLM generates a sequence of tokens that is unlikely to be within a reasonable bound to determine valid measurement in terms of latency.
# Using "count_windows" measurement mode, which automatically extends the window for collecting responses.
PERF_CLIENT_ARGS="-v -m $MODEL_NAME -i $PERF_CLIENT_PROTOCOL --async --streaming --input-data=$INPUT_DATA --profile-export-file=$EXPORT_FILE \
                  --shape=text_input:1 --shape=max_tokens:1 --shape=bad_words:1 --shape=stop_words:1 --measurement-mode=count_windows \
                  --concurrency-range=$CONCURRENCY --measurement-request-count=10 --stability-percentage=999"

set +e
run_server

$PERF_CLIENT $PERF_CLIENT_ARGS -f ${NAME}.csv 2>&1 | tee ${NAME}_perf_analyzer.log
set +o pipefail

kill_server
set -e

echo -e "[{\"s_benchmark_kind\":\"benchmark_perf\"," >>${NAME}.tjson
echo -e "\"s_benchmark_repo_branch\":\"${BENCHMARK_REPO_BRANCH}\"," >>${NAME}.tjson
echo -e "\"s_benchmark_name\":\"${NAME}\"," >>${NAME}.tjson
echo -e "\"s_server\":\"triton\"," >>${NAME}.tjson
echo -e "\"s_protocol\":\"${PERF_CLIENT_PROTOCOL}\"," >>${NAME}.tjson
echo -e "\"s_framework\":\"${MODEL_FRAMEWORK}\"," >>${NAME}.tjson
echo -e "\"s_model\":\"${MODEL_NAME}\"," >>${NAME}.tjson
echo -e "\"l_concurrency\":${CONCURRENCY}," >>${NAME}.tjson
echo -e "\"l_batch_size\":${STATIC_BATCH}," >>${NAME}.tjson
echo -e "\"l_instance_count\":${INSTANCE_CNT}," >>${NAME}.tjson
echo -e "\"s_architecture\":\"${ARCH}\"}]" >>${NAME}.tjson

if [ -f $REPORTER ]; then
    set +e

    URL_FLAG=
    if [ ! -z ${BENCHMARK_REPORTER_URL} ]; then
        URL_FLAG="-u ${BENCHMARK_REPORTER_URL}"
    fi

    python3 $REPORTER -v -e ${EXPORT_FILE} -o ${NAME}.json --csv ${NAME}.csv --gpu-metrics --token-latency ${URL_FLAG} ${NAME}.tjson
    if (($? != 0)); then
        RET=1
    fi

    set -e
fi

if (($RET == 0)); then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_perf_vllm/test.sh
================================================
#!/bin/bash
# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

source ../common/util.sh

REPORTER=../common/reporter.py
TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
SERVER=${TRITON_DIR}/bin/tritonserver
BACKEND_DIR=${TRITON_DIR}/backends
MODEL_REPO="${PWD}/models"
NAME="vllm_benchmarking_test"
MODEL_NAME="gpt2_vllm"
INPUT_DATA="./input_data.json"
SERVER_LOG="${NAME}_server.log"
SERVER_ARGS="--model-repository=${MODEL_REPO} --backend-directory=${BACKEND_DIR} --log-verbose=1"

export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:=0}
EXPORT_FILE=profile-export-vllm-model.json

pip3 install tritonclient
rm -rf $MODEL_REPO $EXPORT_FILE *.tjson *.json *.csv

mkdir -p $MODEL_REPO/$MODEL_NAME/1
echo '{
    "model":"gpt2",
    "gpu_memory_utilization": 0.5
}' >$MODEL_REPO/$MODEL_NAME/1/model.json

echo 'backend: "vllm"
instance_group [
  {
    count: 1
    kind: KIND_MODEL
  }
]' >$MODEL_REPO/$MODEL_NAME/config.pbtxt

echo '{
    "data": [
        {
            "text_input": [
                "hi hi hi hi hi hi hi hi hi hi"
            ],
            "stream": [
                true
            ],
            "sampling_parameters": [
                "{\"max_tokens\": 1024, \"ignore_eos\": true}"
            ]
        }
    ]
}' >$INPUT_DATA

RET=0
ARCH="amd64"
STATIC_BATCH=1
INSTANCE_CNT=1
CONCURRENCY=100
MODEL_FRAMEWORK="vllm"
PERF_CLIENT_PROTOCOL="grpc"
PERF_CLIENT=perf_analyzer

# Set stability-percentage 999 to bypass the stability check in PA.
# LLM generates a sequence of tokens that is unlikely to be within a reasonable bound to determine valid measurement in terms of latency.
# Using "count_windows" measurement mode, which automatically extends the window for collecting responses.
PERF_CLIENT_ARGS="-v -m $MODEL_NAME --concurrency-range=${CONCURRENCY} --measurement-mode=count_windows --measurement-request-count=10 \
                  --input-data=$INPUT_DATA --profile-export-file=$EXPORT_FILE -i $PERF_CLIENT_PROTOCOL --async --streaming --stability-percentage=999"

run_server
if (($SERVER_PID == 0)); then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
$PERF_CLIENT $PERF_CLIENT_ARGS -f ${NAME}.csv 2>&1 | tee ${NAME}_perf_analyzer.log
set +o pipefail
set -e

if [[ -n "${SERVER_PID}" ]]; then
    kill $SERVER_PID
    wait $SERVER_PID
fi

echo -e "[{\"s_benchmark_kind\":\"benchmark_perf\"," >>${NAME}.tjson
echo -e "\"s_benchmark_repo_branch\":\"${BENCHMARK_REPO_BRANCH}\"," >>${NAME}.tjson
echo -e "\"s_benchmark_name\":\"${NAME}\"," >>${NAME}.tjson
echo -e "\"s_server\":\"triton\"," >>${NAME}.tjson
echo -e "\"s_protocol\":\"${PERF_CLIENT_PROTOCOL}\"," >>${NAME}.tjson
echo -e "\"s_framework\":\"${MODEL_FRAMEWORK}\"," >>${NAME}.tjson
echo -e "\"s_model\":\"${MODEL_NAME}\"," >>${NAME}.tjson
echo -e "\"l_concurrency\":\"${CONCURRENCY}\"," >>${NAME}.tjson
echo -e "\"l_batch_size\":${STATIC_BATCH}," >>${NAME}.tjson
echo -e "\"l_instance_count\":${INSTANCE_CNT}," >>${NAME}.tjson
echo -e "\"s_architecture\":\"${ARCH}\"}]" >>${NAME}.tjson

if [ -f $REPORTER ]; then
    set +e

    URL_FLAG=
    if [ ! -z ${BENCHMARK_REPORTER_URL} ]; then
        URL_FLAG="-u ${BENCHMARK_REPORTER_URL}"
    fi

    python3 $REPORTER -v -e ${EXPORT_FILE} -o ${NAME}.json --csv ${NAME}.csv --gpu-metrics --token-latency ${URL_FLAG} ${NAME}.tjson
    if (($? != 0)); then
        RET=1
    fi

    set -e
fi

rm -rf $MODEL_REPO $INPUT_DATA

if (($RET == 0)); then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_pinned_memory/libtorch_ensemble.pbtxt
================================================
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
name: "libtorch_ensemble"
platform: "ensemble"
max_batch_size: 0
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]
ensemble_scheduling {
  step [
    {
      model_name: "custom_zero_1_float32"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      output_map {
        key: "OUTPUT0"
        value: "temp_0"
      }
    },
    {
      model_name: "libtorch_zero_1_float32"
      model_version: -1
      input_map {
        key: "INPUT__0"
        value: "temp_0"
      }
      output_map {
        key: "OUTPUT__0"
        value: "OUTPUT0"
      }
    }
  ]
}

================================================
FILE: qa/L0_pinned_memory/test.sh
================================================
#!/bin/bash
# Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

pip3 install perf_analyzer

# Use "--request-count" throughout the test to PA stability criteria and
# reduce flaky failures from PA unstable measurements.
REQUEST_COUNT=10
CLIENT=perf_analyzer
# Only use libtorch as it accepts GPU I/O and it can handle variable shape
BACKENDS=${BACKENDS:="libtorch"}

DATADIR=/data/inferenceserver/${REPO_VERSION}

SERVER=/opt/tritonserver/bin/tritonserver
source ../common/util.sh

# Select the single GPU that will be available to the inference server
export CUDA_VISIBLE_DEVICES=0

rm -f *.log  *.csv *.metrics
RET=0

rm -fr ./custom_models && mkdir ./custom_models && \
    cp -r ../custom_models/custom_zero_1_float32 ./custom_models/. && \
    mkdir -p ./custom_models/custom_zero_1_float32/1

#
# Use "identity" model for all model types.
#
rm -fr models && mkdir -p models && \
    cp -r ./custom_models/custom_zero_1_float32 models/. && \
        (cd models/custom_zero_1_float32 && \
                sed -i "s/dims:.*\[.*\]/dims: \[ -1 \]/g" config.pbtxt && \
                echo "instance_group [ { kind: KIND_CPU }]" >> config.pbtxt)

for BACKEND in $BACKENDS; do
    MODEL_NAME=${BACKEND}_zero_1_float32
    REPO_DIR=$DATADIR/qa_identity_model_repository

    cp -r $REPO_DIR/$MODEL_NAME models/. && \
        (cd models/$MODEL_NAME && \
            sed -i "s/dims:.*\[.*\]/dims: \[ -1 \]/g" config.pbtxt && \
            echo "instance_group [ { kind: KIND_GPU }]" >> config.pbtxt)

    ENSEMBLE_NAME=${BACKEND}_ensemble
    mkdir -p models/$ENSEMBLE_NAME/1 && \
        cp $ENSEMBLE_NAME.pbtxt models/$ENSEMBLE_NAME/config.pbtxt

    # With pinned memory
    SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1"
    SERVER_LOG="${ENSEMBLE_NAME}.pinned.server.log"
    run_server
    if (( $SERVER_PID == 0 )); then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    # Sanity check that the server allocates pinned memory for large size
    set +e
    $CLIENT -m${ENSEMBLE_NAME} --shape INPUT0:16777216 --request-count ${REQUEST_COUNT}
    if (( $? != 0 )); then
        RET=1
    fi

    grep "non-pinned" ${ENSEMBLE_NAME}.pinned.server.log
    if [ $? -eq 0 ]; then
        echo -e "\n***\n*** Failed. Expected only pinned memory is allocated\n***"
        RET=1
    fi

    grep "] \"Pinned memory pool is created" ${ENSEMBLE_NAME}.pinned.server.log
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected pinned memory is allocated\n***"
        RET=1
    fi

    set -e

    kill $SERVER_PID
    wait $SERVER_PID

    # Restart the server without verbose logging
    SERVER_ARGS="--model-repository=`pwd`/models"
    SERVER_LOG="${ENSEMBLE_NAME}.pinned.server.log"
    run_server
    if (( $SERVER_PID == 0 )); then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    # 16k 1m 2m 4m 8m 16m elements
    set +e
    for TENSOR_SIZE in 16384 1048576 2097152 4194304 8388608 16777216; do
        $CLIENT -i grpc -u localhost:8001 -m${ENSEMBLE_NAME} \
                --shape INPUT0:${TENSOR_SIZE} \
                --request-count ${REQUEST_COUNT} \
                >> ${BACKEND}.${TENSOR_SIZE}.pinned.log 2>&1
        if (( $? != 0 )); then
            RET=1
        fi
    done
    set -e

    kill $SERVER_PID
    wait $SERVER_PID

    # Without pinned memory
    SERVER_ARGS="--model-repository=`pwd`/models --pinned-memory-pool-byte-size=0 --log-verbose=1"
    SERVER_LOG="${ENSEMBLE_NAME}.nonpinned.server.log"
    run_server
    if (( $SERVER_PID == 0 )); then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    # Sanity check that the server allocates non-pinned memory
    set +e
    $CLIENT  -m${ENSEMBLE_NAME} --shape INPUT0:1 --request-count ${REQUEST_COUNT}
    if (( $? != 0 )); then
        RET=1
    fi

    grep "] \"Pinned memory pool is created" ${ENSEMBLE_NAME}.nonpinned.server.log
    if [ $? -eq 0 ]; then
        echo -e "\n***\n*** Failed. Expected only non-pinned memory is allocated\n***"
        RET=1
    fi
    set -e

    kill $SERVER_PID
    wait $SERVER_PID

    # Restart the server without verbose logging
    SERVER_ARGS="--model-repository=`pwd`/models --pinned-memory-pool-byte-size=0"
    SERVER_LOG="${ENSEMBLE_NAME}.nonpinned.server.log"
    run_server
    if (( $SERVER_PID == 0 )); then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    # 16k 1m 2m 4m 8m 16m elements
    set +e
    for TENSOR_SIZE in 16384 1048576 2097152 4194304 8388608 16777216; do
        $CLIENT -i grpc -u localhost:8001 -m${ENSEMBLE_NAME} \
                --shape INPUT0:${TENSOR_SIZE} \
                --request-count ${REQUEST_COUNT} \
                >> ${BACKEND}.${TENSOR_SIZE}.nonpinned.log 2>&1
        if (( $? != 0 )); then
            RET=1
        fi
    done
    set -e

    kill $SERVER_PID
    wait $SERVER_PID
done

for BACKEND in $BACKENDS; do
    for TENSOR_SIZE in 16384 1048576 2097152 4194304 8388608 16777216; do
        echo -e "${BACKEND} ensemble ${TENSOR_SIZE} elements\n"
        echo -e "non-pinned\n"
        cat ${BACKEND}.${TENSOR_SIZE}.nonpinned.log
        echo -e "pinned\n"
        cat ${BACKEND}.${TENSOR_SIZE}.pinned.log
    done
done

if (( $RET == 0 )); then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_priority/test.sh
================================================
#!/bin/bash
# Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

DATADIR=/data/inferenceserver/${REPO_VERSION}

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=`pwd`/models"
SERVER_LOG="./inference_server.log"
source ../common/util.sh

rm -f ./*.log
rm -fr models && mkdir -p models && \
    cp -r $DATADIR/qa_model_repository/plan_float32_float32_float32 \
       models/plan_float32_float32_float32_def && \
    rm -fr models/plan_float32_float32_float32_def/2 && \
    rm -fr models/plan_float32_float32_float32_def/3 && \
    (cd models/plan_float32_float32_float32_def && \
            sed -i 's/^name: "plan_float32_float32_float32"/name: "plan_float32_float32_float32_def"/' \
                config.pbtxt) && \
    cp -r models/plan_float32_float32_float32_def models/plan_float32_float32_float32_max && \
    (cd models/plan_float32_float32_float32_max && \
            sed -i 's/^name: "plan_float32_float32_float32_def"/name: "plan_float32_float32_float32_max"/' \
                config.pbtxt && \
            echo "optimization { priority: PRIORITY_MAX }" >> config.pbtxt) && \
    cp -r models/plan_float32_float32_float32_def models/plan_float32_float32_float32_min && \
    (cd models/plan_float32_float32_float32_min && \
            sed -i 's/^name: "plan_float32_float32_float32_def"/name: "plan_float32_float32_float32_min"/' \
                config.pbtxt && \
            echo "optimization { priority: PRIORITY_MIN }" >> config.pbtxt)

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

RET=0

set +e

grep "plan_float32_float32_float32_min" $SERVER_LOG | grep "stream priority 0"
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed. Expected MIN priority 0\n***"
    RET=1
fi

grep "plan_float32_float32_float32_max" $SERVER_LOG | grep "stream priority -5"
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed. Expected MAX priority -5\n***"
    RET=1
fi

grep "plan_float32_float32_float32_def" $SERVER_LOG | grep "stream priority 0"
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed. Expected DEFAULT priority 0\n***"
    RET=1
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    cat $SERVER_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_python_api/test.sh
================================================
#!/bin/bash
# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

pip3 install pytest-asyncio==0.23.8

RET=0

set +e

BINDING_TEST_LOG="./python_binding.log"
rm -f $BINDING_TEST_LOG
python -m pytest --junitxml=test_binding_report.xml test_binding.py > $BINDING_TEST_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $BINDING_TEST_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

API_TEST_LOG="./python_api.log"
rm -f $API_TEST_LOG
python -m pytest --junitxml=test_api_report.xml test_api.py > $API_TEST_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $API_TEST_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

FRONTEND_TEST_LOG="./python_kserve.log"
rm -f $FRONTEND_TEST_LOG
python -m pytest --junitxml=test_kserve.xml test_kserve.py > $FRONTEND_TEST_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $FRONTEND_TEST_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

set -e

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_python_api/test_kserve.py
================================================
# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import time
from functools import partial

import numpy as np
import pytest
import testing_utils as utils
import tritonclient.grpc as grpcclient
import tritonclient.http as httpclient
import tritonserver
from tritonclient.utils import InferenceServerException
from tritonfrontend import KServeGrpc, KServeHttp, Metrics


class TestHttpOptions:
    def test_correct_http_parameters(self):
        KServeHttp.Options(
            address="0.0.0.1", port=8080, reuse_port=True, thread_count=16
        )

    def test_wrong_http_parameters(self):
        # Out of range
        with pytest.raises(Exception):
            KServeHttp.Options(port=-15)
        with pytest.raises(Exception):
            KServeHttp.Options(thread_count=0)

        # Wrong data type
        with pytest.raises(Exception):
            KServeHttp.Options(header_forward_pattern=10)


class TestGrpcOptions:
    def test_correct_grpc_parameters(self):
        KServeGrpc.Options(
            infer_compression_level=KServeGrpc.Grpc_compression_level.HIGH,
            reuse_port=True,
            infer_allocation_pool_size=12,
            http2_max_pings_without_data=10,
        )

    def test_wrong_grpc_parameters(self):
        # Out of Range
        with pytest.raises(Exception):
            KServeGrpc.Options(port=-5)
        with pytest.raises(Exception):
            KServeGrpc.Options(keepalive_timeout_ms=-20_000)
        with pytest.raises(Exception):
            KServeGrpc.Options(keepalive_time_ms=-1)
        with pytest.raises(Exception):
            KServeGrpc.Options(keepalive_timeout_ms=-1)
        with pytest.raises(Exception):
            KServeGrpc.Options(http2_max_pings_without_data=-1)
        with pytest.raises(Exception):
            KServeGrpc.Options(http2_min_recv_ping_interval_without_data_ms=-1)
        with pytest.raises(Exception):
            KServeGrpc.Options(http2_max_ping_strikes=-1)
        with pytest.raises(Exception):
            KServeGrpc.Options(max_connection_age_ms=-1)
        with pytest.raises(Exception):
            KServeGrpc.Options(max_connection_age_grace_ms=-1)

        # Wrong data type
        with pytest.raises(Exception):
            KServeGrpc.Options(infer_allocation_pool_size="big pool")
        with pytest.raises(Exception):
            KServeGrpc.Options(server_key=10)


class TestMetricsOptions:
    def test_correct_http_parameters(self):
        Metrics.Options(address="0.0.0.1", port=8080, thread_count=16)

    def test_wrong_http_parameters(self):
        # Out of range
        with pytest.raises(Exception):
            Metrics.Options(port=-15)
        with pytest.raises(Exception):
            Metrics.Options(thread_count=0)

        # Wrong data type
        with pytest.raises(Exception):
            Metrics.Options(thread_count="ten")


HTTP_ARGS = (KServeHttp, httpclient, "localhost:8000")  # Default HTTP args
GRPC_ARGS = (KServeGrpc, grpcclient, "localhost:8001")  # Default GRPC args
METRICS_ARGS = (Metrics, "localhost:8002")  # Default Metrics args


class TestKServe:
    @pytest.mark.xfail(run=False, reason="Python model may not load after gRPC import")
    @pytest.mark.parametrize("frontend, client_type, url", [HTTP_ARGS, GRPC_ARGS])
    def test_server_ready(self, frontend, client_type, url):
        server = utils.setup_server()
        service = utils.setup_service(server, frontend)
        client = utils.setup_client(client_type, url=url)

        assert client.is_server_ready()

        utils.teardown_client(client)
        utils.teardown_service(service)
        utils.teardown_server(server)

    @pytest.mark.xfail(run=False, reason="Python model may not load after gRPC import")
    @pytest.mark.parametrize("frontend", [HTTP_ARGS[0], GRPC_ARGS[0]])
    def test_service_double_start(self, frontend):
        server = utils.setup_server()
        # setup_service() performs service.start()
        service = utils.setup_service(server, frontend)

        with pytest.raises(
            tritonserver.AlreadyExistsError, match="server is already running."
        ):
            service.start()

        utils.teardown_server(server)
        utils.teardown_service(service)

    @pytest.mark.xfail(run=False, reason="Python model may not load after gRPC import")
    @pytest.mark.parametrize("frontend", [HTTP_ARGS[0], GRPC_ARGS[0]])
    def test_invalid_options(self, frontend):
        server = utils.setup_server()
        # Current flow is KServeHttp.Options or KServeGrpc.Options have to be
        # provided to ensure type and range validation occurs.
        with pytest.raises(
            tritonserver.InvalidArgumentError,
            match="Incorrect type for options. options argument must be of type",
        ):
            frontend(server, {"port": 8001})

        utils.teardown_server(server)

    @pytest.mark.xfail(run=False, reason="Python model may not load after gRPC import")
    @pytest.mark.parametrize("frontend", [HTTP_ARGS[0], GRPC_ARGS[0]])
    def test_server_service_order(self, frontend):
        server = utils.setup_server()
        service = utils.setup_service(server, frontend)

        utils.teardown_server(server)
        utils.teardown_service(service)

    @pytest.mark.xfail(run=False, reason="Python model may not load after gRPC import")
    @pytest.mark.parametrize("frontend, client_type", [HTTP_ARGS[:2], GRPC_ARGS[:2]])
    def test_service_custom_port(self, frontend, client_type):
        server = utils.setup_server()
        options = frontend.Options(port=8005)
        service = utils.setup_service(server, frontend, options)
        client = utils.setup_client(client_type, url="localhost:8005")

        # Confirms that service starts at port 8005
        client.is_server_ready()

        utils.teardown_client(client)
        utils.teardown_service(service)
        utils.teardown_server(server)

    @pytest.mark.xfail(run=False, reason="Python model may not load after gRPC import")
    @pytest.mark.parametrize("frontend, client_type, url", [HTTP_ARGS, GRPC_ARGS])
    def test_inference(self, frontend, client_type, url):
        server = utils.setup_server()
        service = utils.setup_service(server, frontend)

        # TODO: use common/test_infer
        assert utils.send_and_test_inference_identity(client_type, url=url)

        utils.teardown_service(service)
        utils.teardown_server(server)

    @pytest.mark.xfail(run=False, reason="Python model may not load after gRPC import")
    @pytest.mark.parametrize("frontend, client_type, url", [GRPC_ARGS])
    def test_streaming_inference(self, frontend, client_type, url):
        server = utils.setup_server()
        service = utils.setup_service(server, frontend)

        assert utils.send_and_test_stream_inference(client_type, url)

        utils.teardown_service(service)
        utils.teardown_server(server)

    @pytest.mark.xfail(run=False, reason="Python model may not load after gRPC import")
    @pytest.mark.parametrize("frontend, client_type, url", [HTTP_ARGS])
    def test_http_generate_inference(self, frontend, client_type, url):
        server = utils.setup_server()
        service = utils.setup_service(server, frontend)

        assert utils.send_and_test_generate_inference()

        utils.teardown_service(service)
        utils.teardown_server(server)

    @pytest.mark.xfail(run=False, reason="Python model may not load after gRPC import")
    @pytest.mark.parametrize("frontend, client_type, url", [HTTP_ARGS])
    def test_http_req_during_shutdown(self, frontend, client_type, url):
        server = utils.setup_server()
        http_service = utils.setup_service(server, frontend)
        http_client = httpclient.InferenceServerClient(url="localhost:8000")
        model_name = "delayed_identity"
        delay = 2  # seconds
        input_data0 = np.array([[delay]], dtype=np.float32)

        input0 = httpclient.InferInput("INPUT0", input_data0.shape, "FP32")
        input0.set_data_from_numpy(input_data0)

        inputs = [input0]
        outputs = [httpclient.InferRequestedOutput("OUTPUT0")]

        async_request = http_client.async_infer(
            model_name=model_name, inputs=inputs, outputs=outputs
        )
        # http_service.stop() does not use graceful shutdown
        utils.teardown_service(http_service)

        # So, inference request will fail as http endpoints have been stopped.
        with pytest.raises(
            InferenceServerException, match="failed to obtain inference response"
        ):
            async_request.get_result(block=True, timeout=delay)

        # http_client.close() calls join() to terminate pool of greenlets
        # However, due to an unsuccessful get_result(), async_request is still
        # an active thread. Hence, join stalls until greenlet timeouts.
        # Does not throw an exception, but displays error in logs.
        utils.teardown_client(http_client)

        # delayed_identity will still be an active model
        # Hence, server.stop() causes InternalError: Timeout.
        with pytest.raises(
            tritonserver.InternalError,
            match="Exit timeout expired. Exiting immediately.",
        ):
            utils.teardown_server(server)

    @pytest.mark.xfail(run=False, reason="Python model may not load after gRPC import")
    @pytest.mark.parametrize("frontend, client_type, url", [GRPC_ARGS])
    def test_grpc_req_during_shutdown(self, frontend, client_type, url):
        server = utils.setup_server()
        grpc_service = utils.setup_service(server, frontend)
        grpc_client = grpcclient.InferenceServerClient(url=url)
        user_data = []

        def callback(user_data, result, error):
            if error:
                user_data.append(error)
            else:
                user_data.append(result)

        model_name = "delayed_identity"
        delay = 2  # seconds

        input_data0 = np.array([[delay]], dtype=np.float32)
        input0 = client_type.InferInput("INPUT0", input_data0.shape, "FP32")
        input0.set_data_from_numpy(input_data0)

        inputs = [input0]
        outputs = [client_type.InferRequestedOutput("OUTPUT0")]

        grpc_client.async_infer(
            model_name=model_name,
            inputs=inputs,
            outputs=outputs,
            callback=partial(callback, user_data),
        )

        utils.teardown_service(grpc_service)

        time_out = delay + 1
        while (len(user_data) == 0) and time_out > 0:
            time_out = time_out - 1
            time.sleep(1)

        # Depending on when gRPC frontend shut down StatusCode can vary
        acceptable_failure_msgs = [
            "[StatusCode.CANCELLED] CANCELLED",
            "[StatusCode.UNAVAILABLE] failed to connect to all addresses",
        ]

        assert (
            len(user_data) == 1
            and isinstance(user_data[0], InferenceServerException)
            and any(
                failure_msg in str(user_data[0])
                for failure_msg in acceptable_failure_msgs
            )
        )

        utils.teardown_client(grpc_client)
        utils.teardown_server(server)

    @pytest.mark.xfail(run=False, reason="Python model may not load after gRPC import")
    @pytest.mark.parametrize("frontend, url", [METRICS_ARGS])
    def test_metrics_default_port(self, frontend, url):
        server = utils.setup_server()
        service = utils.setup_service(server, frontend)

        metrics_url = f"http://{url}/metrics"
        status_code, _ = utils.get_metrics(metrics_url)

        assert status_code == 200

        utils.teardown_service(service)
        utils.teardown_server(server)

    @pytest.mark.xfail(run=False, reason="Python model may not load after gRPC import")
    @pytest.mark.parametrize("frontend", [Metrics])
    def test_metrics_custom_port(self, frontend, port=8005):
        server = utils.setup_server()
        service = utils.setup_service(server, frontend, Metrics.Options(port=port))

        metrics_url = f"http://localhost:{port}/metrics"
        status_code, _ = utils.get_metrics(metrics_url)

        assert status_code == 200

        utils.teardown_service(service)
        utils.teardown_server(server)

    @pytest.mark.xfail(run=False, reason="Python model may not load after gRPC import")
    @pytest.mark.parametrize("frontend, url", [METRICS_ARGS])
    def test_metrics_update(self, frontend, url):
        # Setup Server, KServeGrpc, Metrics
        server = utils.setup_server()
        grpc_service = utils.setup_service(
            server, KServeGrpc
        )  # Needed to send inference request
        metrics_service = utils.setup_service(server, frontend)

        # Get Metrics and verify inference count == 0 before inference
        before_status_code, before_inference_count = utils.get_metrics(
            f"http://{url}/metrics"
        )
        assert before_status_code == 200 and before_inference_count == 0

        # Send 1 Inference Request with send_and_test_inference()
        assert utils.send_and_test_inference_identity(GRPC_ARGS[1], GRPC_ARGS[2])

        # Get Metrics and verify inference count == 1 after inference
        after_status_code, after_inference_count = utils.get_metrics(
            f"http://{url}/metrics"
        )
        assert after_status_code == 200 and after_inference_count == 1

        # Teardown Metrics, GrpcService, Server
        utils.teardown_service(grpc_service)
        utils.teardown_service(metrics_service)
        utils.teardown_server(server)

    # KNOWN ISSUE: CAUSES SEGFAULT
    # Created  [DLIS-7231] to address at future date
    # Once the server has been stopped, the underlying TRITONSERVER_Server instance
    # is deleted. However, the frontend does not know the server instance
    # is no longer valid.
    # @pytest.mark.xfail(run=False, reason="Python model may not load after gRPC import")
    # def test_inference_after_server_stop(self):
    #     server = utils.setup_server()
    #     http_service = utils.setup_service(server, KServeHttp)
    #     http_client = setup_client(httpclient, url="localhost:8000")

    #     teardown_server(server) # Server has been stopped

    #     model_name = "identity"
    #     input_data = np.array([["testing"]], dtype=object)
    #     # Create input and output objects
    #     inputs = [httpclient.InferInput("INPUT0", input_data.shape, "BYTES")]
    #     outputs = [httpclient.InferRequestedOutput("OUTPUT0")]

    #     # Set the data for the input tensor
    #     inputs[0].set_data_from_numpy(input_data)

    #     results = http_client.infer(model_name, inputs=inputs, outputs=outputs)

    #     utils.teardown_client(http_client)
    #     utils.teardown_service(http_service)


================================================
FILE: qa/L0_python_api/test_model_repository/delayed_identity/1/model.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import time

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def execute(self, requests):
        """
        Mock Model that uses the input data to determine how long to wait
        before returning identity data
        """
        assert len(requests) == 1
        delay = 0
        request = requests[0]
        responses = []

        delay_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
        delay_as_numpy = delay_tensor.as_numpy()
        delay = float(delay_as_numpy[0][0])

        out_tensor = pb_utils.Tensor("OUTPUT0", delay_as_numpy)
        responses.append(pb_utils.InferenceResponse([out_tensor]))

        time.sleep(delay)
        return responses


================================================
FILE: qa/L0_python_api/test_model_repository/delayed_identity/config.pbtxt
================================================
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "delayed_identity"
backend: "python"
max_batch_size: 64

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]

================================================
FILE: qa/L0_python_api/test_model_repository/identity/1/model.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    """This model loops through different dtypes to make sure that
    serialize_byte_tensor works correctly in the Python backend.
    """

    def initialize(self, args):
        self._index = 0
        self._dtypes = [np.bytes_, np.object_]

    def execute(self, requests):
        responses = []
        for request in requests:
            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            out_tensor_0 = pb_utils.Tensor(
                "OUTPUT0", in_0.as_numpy().astype(self._dtypes[self._index])
            )
            self._index += 1
            responses.append(pb_utils.InferenceResponse([out_tensor_0]))
        return responses


================================================
FILE: qa/L0_python_api/test_model_repository/identity/config.pbtxt
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "identity"
backend: "python"
max_batch_size: 0

input [
  {
    name: "INPUT0"
    data_type: TYPE_STRING
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_STRING
    dims: [ 1 ]
  }
]
instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]


================================================
FILE: qa/L0_python_api/testing_utils.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import re
from functools import partial
from typing import Tuple, Union

import numpy as np
import requests
import tritonserver
from tritonfrontend import KServeGrpc, KServeHttp, Metrics


def setup_server(model_repository="test_model_repository") -> tritonserver.Server:
    """
    Using tritonserver, starts a server with the models: identity and delayed_identity
    """
    module_directory = os.path.split(os.path.abspath(__file__))[0]
    model_path = os.path.abspath(os.path.join(module_directory, model_repository))

    # Starting Server Instance
    server_options = tritonserver.Options(
        server_id="TestServer",
        model_repository=model_path,
        log_error=True,
        log_warn=True,
        log_info=True,
    )

    return tritonserver.Server(server_options).start(wait_until_ready=True)


def teardown_server(server: tritonserver.Server) -> None:
    server.stop()


def setup_service(
    server: tritonserver.Server,
    frontend: Union[KServeHttp, KServeGrpc, Metrics],
    options=None,
) -> Union[KServeHttp, KServeGrpc, Metrics]:
    """
    Used to create and start any of the frontends supported by tritonfrontend.
    """
    service = frontend(server=server, options=options)
    service.start()
    return service


def teardown_service(service: Union[KServeHttp, KServeGrpc]) -> None:
    service.stop()


def setup_client(
    frontend_client: Union["tritonclient.http", "tritonclient.grpc"], url: str
):
    """
    Sets up a client to communicate with the Server through the respective protocol.
    """
    return frontend_client.InferenceServerClient(url=url)


def teardown_client(
    client: Union[
        "tritonclient.http.InferenceServerClient",
        "tritonclient.grpc.InferenceServerClient",
    ]
) -> None:
    client.close()


def send_and_test_inference_identity(
    frontend_client: Union[
        "tritonclient.http.InferenceServerClient",
        "tritonclient.grpc.InferenceServerClient",
    ],
    url: str,
) -> bool:
    """
    Sends an inference request to the model at test_model_repository/identity
    and verifies input == output
    """
    model_name = "identity"
    client = setup_client(frontend_client, url)
    input_data = np.array(["testing"], dtype=object)

    # Create input and output objects
    inputs = [frontend_client.InferInput("INPUT0", input_data.shape, "BYTES")]
    outputs = [frontend_client.InferRequestedOutput("OUTPUT0")]
    # Set the data for the input tensor
    inputs[0].set_data_from_numpy(input_data)

    # Perform inference request
    results = client.infer(model_name=model_name, inputs=inputs, outputs=outputs)

    output_data = results.as_numpy("OUTPUT0")  # Gather output data

    teardown_client(client)
    return input_data[0] == output_data[0].decode()


def send_and_test_stream_inference(
    frontend_client: Union[
        "tritonclient.http.InferenceServerClient",
        "tritonclient.grpc.InferenceServerClient",
    ],
    url: str,
) -> bool:
    """
    Sends multiple streaming requests to "delayed_identity" model with negligible delays
    and verifies the inputs matches outputs and the ordering is preserved.
    """
    num_requests = 100
    requests = []
    for i in range(num_requests):
        input0_np = np.array([[float(i) / 1000]], dtype=np.float32)
        inputs = [frontend_client.InferInput("INPUT0", input0_np.shape, "FP32")]
        inputs[0].set_data_from_numpy(input0_np)
        requests.append(inputs)

    responses = []

    def callback(responses, result, error):
        responses.append({"result": result, "error": error})

    client = frontend_client.InferenceServerClient(url=url)
    client.start_stream(partial(callback, responses))
    for inputs in requests:
        client.async_stream_infer("delayed_identity", inputs)
    client.stop_stream()
    teardown_client(client)

    assert len(responses) == num_requests
    for i in range(len(responses)):
        assert responses[i]["error"] is None
        output0_np = responses[i]["result"].as_numpy(name="OUTPUT0")
        assert np.allclose(output0_np, [[float(i) / 1000]])

    return True  # test passed


def send_and_test_generate_inference() -> bool:
    """
    Sends an inference request to and identity model through the
    HTTP generate endpoint and verifies input == output
    """
    model_name = "identity"
    url = f"http://localhost:8000/v2/models/{model_name}/generate"
    input_text = "testing"
    data = {
        "INPUT0": input_text,
    }

    response = requests.post(url, json=data)
    if response.status_code == 200:
        result = response.json()
        output_text = result.get("OUTPUT0", "")

        if output_text == input_text:
            return True

    return False


def get_metrics(metrics_url: str, model_name: str = "identity") -> Tuple[int, int]:
    """
    Sends a request to the metrics endpoint and returns the following information:
    1. Status Code = Indicates whether interaction with Metrics endpoint was successful
    2. Inference Count = Indicates whether metrics data being returned is accurate
    """
    response = requests.get(metrics_url)
    inference_count = None

    if response.status_code == 200:
        inference_count = _extract_inference_count(response.text, model_name)
    return response.status_code, inference_count


def _extract_inference_count(metrics_data: str, model_name: str):
    """
    Helper function for _get_metrics that parses metrics_data (prometheus-friendly
    format) with regex to extract the inference count of model_name.
    """
    pattern = (
        rf'nv_inference_count\{{.*?model="{re.escape(model_name)}".*?\}}\s+([0-9.]+)'
    )
    match = re.search(pattern, metrics_data)
    if match:
        return int(float(match.group(1)))

    return None


================================================
FILE: qa/L0_python_client_unit_tests/test.sh
================================================
#!/bin/bash
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

TEST_LOG="./python_client_unit_tests.log"
PYTHON_CLIENT_UNIT_TESTS_DIR=/opt/tritonserver/qa/python_client_unit_tests/
PYTHON_CLIENT_UNIT_TESTS_CMD="python3 -m unittest discover -v -s $PYTHON_CLIENT_UNIT_TESTS_DIR -t $PYTHON_CLIENT_UNIT_TESTS_DIR"

# DLPack test requires Torch to validate GPU tensor
pip3 install torch

RET=0

rm -f $TEST_LOG

set +e

$PYTHON_CLIENT_UNIT_TESTS_CMD > $TEST_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi
set -e

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    cat $TEST_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_pytorch_python_runtime/infer.py
================================================
#!/usr/bin/env python3

# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import concurrent.futures
import json
import sys

import numpy as np
import tritonclient.http as httpclient
from tritonclient.utils import *


def infer_model_without_parameter_file():
    model_name = "addsub"
    shape = [4]

    with httpclient.InferenceServerClient("localhost:8000") as client:
        input0_data = np.random.rand(*shape).astype(np.float32)
        input1_data = np.random.rand(*shape).astype(np.float32)
        inputs = [
            httpclient.InferInput(
                "INPUT0", input0_data.shape, np_to_triton_dtype(input0_data.dtype)
            ),
            httpclient.InferInput(
                "INPUT1", input1_data.shape, np_to_triton_dtype(input1_data.dtype)
            ),
        ]

        inputs[0].set_data_from_numpy(input0_data)
        inputs[1].set_data_from_numpy(input1_data)

        outputs = [
            httpclient.InferRequestedOutput("OUTPUT0"),
            httpclient.InferRequestedOutput("OUTPUT1"),
        ]

        response = client.infer(model_name, inputs, request_id=str(1), outputs=outputs)

        output0_data = response.as_numpy("OUTPUT0")
        output1_data = response.as_numpy("OUTPUT1")

        print(
            "INPUT0 ({}) + INPUT1 ({}) = OUTPUT0 ({})".format(
                input0_data, input1_data, output0_data
            )
        )
        print(
            "INPUT0 ({}) - INPUT1 ({}) = OUTPUT0 ({})".format(
                input0_data, input1_data, output1_data
            )
        )

        if not np.allclose(input0_data + input1_data, output0_data):
            print(model_name + " error: incorrect sum")
            return False

        if not np.allclose(input0_data - input1_data, output1_data):
            print(model_name + " error: incorrect difference")
            return False

        print("PASS: " + model_name)
        return True


def infer_model_with_parameter_file(batch_size, data_offset=0):
    model_name = "neuralnet"
    test_data_file = "neuralnet_test_data.json"
    np_dtype = np.single

    # prepare input data
    with open(test_data_file) as f:
        test_data = json.load(f)
    input_data = np.array(test_data["input_data"], dtype=np_dtype)
    input_data = input_data[data_offset : (data_offset + batch_size)]
    labels = test_data["labels"][data_offset : (data_offset + batch_size)]

    # inference
    with httpclient.InferenceServerClient("localhost:8000") as client:
        inputs = [
            httpclient.InferInput(
                "INPUT", input_data.shape, np_to_triton_dtype(input_data.dtype)
            )
        ]
        inputs[0].set_data_from_numpy(input_data)

        response = client.infer(model_name, inputs, request_id=str(1))
        output_data = response.as_numpy("OUTPUT")
        output_data_max = np.max(output_data, axis=1)

        print("Inference result: " + str(output_data))
        print("Inference result (max): " + str(output_data_max))
        print("Expected result: " + str(labels))

        if not np.all(np.isclose(np.max(output_data, axis=1), labels, atol=8)):
            print(model_name + " error: incorrect result")
            return False

    print("PASS: " + model_name)
    return True


def parallel_infer_a_full_dynamic_batch(max_batch_size):
    batch_size = 1
    success = True
    with concurrent.futures.ThreadPoolExecutor() as pool:
        threads = []
        for i in range(max_batch_size // batch_size):
            t = pool.submit(infer_model_with_parameter_file, batch_size, i)
            threads.append(t)
        for t in threads:
            success &= t.result()
    return success


if __name__ == "__main__":
    success = infer_model_without_parameter_file()
    success &= infer_model_with_parameter_file(batch_size=4)
    success &= parallel_infer_a_full_dynamic_batch(max_batch_size=8)
    if not success:
        sys.exit(1)
    sys.exit(0)


================================================
FILE: qa/L0_pytorch_python_runtime/test.sh
================================================
#!/bin/bash
# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:="https://github.com/triton-inference-server"}
DATA_DIR=/data/inferenceserver/${REPO_VERSION}
IMAGE_DIR="/opt/tritonserver/qa/images"
SERVER=/opt/tritonserver/bin/tritonserver
IMAGE_CLIENT="/opt/tritonserver/qa/clients/image_client.py"
BACKENDS="/opt/tritonserver/backends"
source ../common/util.sh

if [ ! -f "$BACKENDS/pytorch/pb_exec_env_model.py.tar.gz" ]; then
    PYTORCH_BACKEND_REPO_TAG=${PYTORCH_BACKEND_REPO_TAG:="main"}
    rm -rf pytorch_backend
    git clone --single-branch --depth=1 -b $PYTORCH_BACKEND_REPO_TAG ${TRITON_REPO_ORGANIZATION}/pytorch_backend
    (cd pytorch_backend/tools && \
        ./gen_pb_exec_env.sh && \
        mv pb_exec_env_model.py.tar.gz $BACKENDS/pytorch)
fi

rm -f *.log
RET=0

#
# Unit tests
#
rm -rf py_runtime_exec_env py_runtime_exec_env.tar.gz py_runtime.py
cp $BACKENDS/pytorch/model.py py_runtime.py
cp $BACKENDS/pytorch/pb_exec_env_model.py.tar.gz py_runtime_exec_env.tar.gz
mkdir py_runtime_exec_env && tar -xzf py_runtime_exec_env.tar.gz -C py_runtime_exec_env

set +e

UNIT_TEST_ENV="source py_runtime_exec_env/bin/activate && exec env LD_LIBRARY_PATH=`pwd`/py_runtime_exec_env/lib:$LD_LIBRARY_PATH"
UNIT_TEST_LOG="./unit_test.log"
bash -c "$UNIT_TEST_ENV python3 unit_test.py" > $UNIT_TEST_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed PyTorch Python backend based runtime unit test\n***"
    cat $UNIT_TEST_LOG
    RET=1
fi

set -e

#
# End-to-end inference tests
#
rm -rf models && mkdir models
cp -r $DATA_DIR/pytorch_model_store/* models
cp -r $DATA_DIR/libtorch_model_store/resnet50_libtorch models && \
    sed -i "/platform/d" models/resnet50_libtorch/config.pbtxt && \
    echo "backend: \"pytorch\"" >> models/resnet50_libtorch/config.pbtxt && \
    echo "runtime: \"model.py\"" >> models/resnet50_libtorch/config.pbtxt && \
    echo "instance_group: [{ kind: KIND_MODEL }]" >> models/resnet50_libtorch/config.pbtxt
mv models/neuralnet/1/test_data.json neuralnet_test_data.json

SERVER_ARGS="--model-repository=models --log-verbose=1"
SERVER_LOG="./infer.server.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    RET=1
else
    set +e

    # Check correct model instance initialization
    EXPECTED_LOG_MSGS=(
        'Loading '"'"'resnet50_libtorch'"'"' as TorchScript'
        'Torch parallelism settings for '"'"'addsub'"'"': NUM_THREADS = 1; NUM_INTEROP_THREADS = 1;'
        'Torch parallelism settings for '"'"'neuralnet'"'"': NUM_THREADS = 4; NUM_INTEROP_THREADS = 2;'
        'Torch parallelism settings for '"'"'resnet50_libtorch'"'"': NUM_THREADS = 1; NUM_INTEROP_THREADS = 1;'
        ''"'"'torch.compile'"'"' optional parameter(s) for '"'"'addsub'"'"': {'"'"'disable'"'"': True}'
        ''"'"'torch.compile'"'"' optional parameter(s) for '"'"'neuralnet'"'"': {}'
        ''"'"'torch.compile'"'"' optional parameter(s) for '"'"'resnet50_libtorch'"'"': {}'
    )
    for EXPECTED_LOG_MSG in "${EXPECTED_LOG_MSGS[@]}"; do
        grep "$EXPECTED_LOG_MSG" $SERVER_LOG
        if [ $? -ne 0 ]; then
            echo -e "\n***\n*** Cannot find \"$EXPECTED_LOG_MSG\" on server log. \n***"
            cat $SERVER_LOG
            RET=1
        fi
    done

    # Infer TorchScript model
    CLIENT_LOG="./infer.torchscript.log"
    python $IMAGE_CLIENT -m "resnet50_libtorch" -s INCEPTION -c 1 -b 2 "$IMAGE_DIR/vulture.jpeg" > $CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed to inference TorchScript model\n***"
        cat $CLIENT_LOG
        RET=1
    fi

    # Infer PyTorch models
    CLIENT_LOG="./infer.pytorch.log"
    python infer.py > $CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed to inference PyTorch models\n***"
        cat $CLIENT_LOG
        RET=1
    fi

    set -e

    kill $SERVER_PID
    wait $SERVER_PID
fi

#
# Print result and exit
#
if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi
exit $RET


================================================
FILE: qa/L0_pytorch_python_runtime/unit_test.py
================================================
#!/usr/bin/env python3

# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys
import unittest

import torch

# satisfy Python runtime import requirements
sys.modules["triton_python_backend_utils"] = unittest.mock.MagicMock()
# import modules from Python runtime to be tested
from py_runtime import _gather_torch_tensors, _scatter_torch_tensors


class PyTorchPythonBackendRuntimeUnittest(unittest.TestCase):
    # _gather_scatter_cases: [(tensors_scatter, tensors_gather, sections), ...]
    #   tensors_scatter: [an_infer_request, ...]
    #     an_infer_request: [a_torch_tensor_with_batch_dimension, ...]
    #   tensors_gather: [a_torch_tensor_gathering_all_requests, ...]
    #   sections: [batch_size_of_the_corresponding_infer_request, ...]
    _gather_scatter_cases = [
        # shape [batch=1, 1]
        ([[torch.tensor([[1]])]], [torch.tensor([[1]])], [1]),
        # shape [batch=1, 2]
        ([[torch.tensor([[1, 2]])]], [torch.tensor([[1, 2]])], [1]),
        # shape [batch=1, 2, 4]
        ([[torch.arange(8).reshape(1, 2, 4)]], [torch.arange(8).reshape(1, 2, 4)], [1]),
        # shape [batch=3, 1]
        ([[torch.arange(3).reshape(3, 1)]], [torch.arange(3).reshape(3, 1)], [3]),
        # shapes ([batch=1, 1], [batch=1, 2])
        (
            [[torch.tensor([[1]]), torch.tensor([[2, 3]])]],
            [torch.tensor([[1]]), torch.tensor([[2, 3]])],
            [1],
        ),
        # scatter shape [batch=1, 1] x 2 -> gather shape [batch=2, 1]
        (
            [[torch.tensor([[1]])], [torch.tensor([[2]])]],
            [torch.tensor([[1], [2]])],
            [1, 1],
        ),
        # scatter shape [batch=1, 2, 1] x 3 -> gather shape [batch=3, 2, 1]
        (
            [[torch.tensor([[[i], [i + 3]]])] for i in range(3)],
            [torch.tensor([[[0], [3]], [[1], [4]], [[2], [5]]])],
            [1, 1, 1],
        ),
        # scatter shape [batch=1, 1] & [batch=2, 1] -> gather shape [batch=3, 1]
        (
            [[torch.tensor([[1]])], [torch.tensor([[2], [3]])]],
            [torch.tensor([[1], [2], [3]])],
            [1, 2],
        ),
        # scatter shape [batch=3, 1, 1] & [batch=1, 1, 1] & [batch=2, 1, 1]
        # -> gather shape [batch=6, 1, 1]
        (
            [
                [torch.tensor([[[0]], [[1]], [[2]]])],
                [torch.tensor([[[3]]])],
                [torch.tensor([[[4]], [[5]]])],
            ],
            [torch.arange(6).reshape(6, 1, 1)],
            [3, 1, 2],
        ),
        # scatter shapes ([batch=3, 1, 1], [batch=3, 2]) & ([batch=2, 1, 1], [batch=2, 2])
        # -> gather shapes ([batch=5, 1, 1], [batch=5, 2])
        (
            [
                [
                    torch.tensor([[[0]], [[1]], [[2]]]),
                    torch.tensor([[5, 6], [7, 8], [9, 10]]),
                ],
                [torch.tensor([[[3]], [[4]]]), torch.tensor([[11, 12], [13, 14]])],
            ],
            [
                torch.arange(5).reshape(5, 1, 1),
                torch.arange(start=5, end=15).reshape(5, 2),
            ],
            [3, 2],
        ),
    ]

    def test_gather_torch_tensors(self):
        for (
            tensors_scatter,
            expected_tensors_gather,
            expected_sections,
        ) in self._gather_scatter_cases:
            tensors_gather, sections = _gather_torch_tensors(tensors_scatter)

            self.assertIsInstance(tensors_gather, list)
            self.assertEqual(len(tensors_gather), len(expected_tensors_gather))
            for j in range(len(expected_tensors_gather)):
                expected_tensor = expected_tensors_gather[j]
                tensor = tensors_gather[j]
                self.assertIsInstance(tensor, torch.Tensor)
                self.assertTrue(torch.equal(tensor, expected_tensor))

            self.assertIsInstance(sections, list)
            self.assertEqual(len(sections), len(expected_sections))
            for i in range(len(expected_sections)):
                expected_section = expected_sections[i]
                section = sections[i]
                self.assertIsInstance(section, int)
                self.assertEqual(section, expected_section)

    def test_scatter_torch_tensors(self):
        for (
            expected_tensors_scatter,
            tensors_gather,
            sections,
        ) in self._gather_scatter_cases:
            tensors_scatter = _scatter_torch_tensors(tensors_gather, sections)
            self.assertIsInstance(tensors_scatter, list)
            self.assertEqual(len(tensors_scatter), len(expected_tensors_scatter))
            for i in range(len(expected_tensors_scatter)):
                expected_tensors = expected_tensors_scatter[i]
                tensors = tensors_scatter[i]
                self.assertIsInstance(tensors, list)
                self.assertEqual(len(tensors), len(expected_tensors))
                for j in range(len(expected_tensors)):
                    expected_tensor = expected_tensors[j]
                    tensor = tensors[j]
                    self.assertIsInstance(tensor, torch.Tensor)
                    self.assertTrue(torch.equal(tensor, expected_tensor))


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_query/models/query/config.pbtxt
================================================
# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
name: "query"
backend: "query"
max_batch_size: 0
input [
  {
    name: "INPUT"
    data_type: TYPE_UINT8
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_UINT8
    dims: [ -1 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_UINT8
    dims: [ -1 ]
  }
]


================================================
FILE: qa/L0_query/query_e2e.py
================================================
#!/usr/bin/env python
# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import unittest

import numpy as np
import test_util as tu
import tritonclient.grpc as tritongrpcclient
import tritonclient.http as tritonhttpclient
from tritonclient.utils import InferenceServerException
from tritonclient.utils import cuda_shared_memory as cudashm


class QueryTest(tu.TestResultCollector):
    def test_http(self):
        triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
        inputs = []
        inputs.append(tritonhttpclient.InferInput("INPUT", [1], "UINT8"))
        inputs[0].set_data_from_numpy(np.arange(1, dtype=np.uint8))

        try:
            triton_client.infer(model_name="query", inputs=inputs)
            self.assertTrue(False, "expect error with query information")
        except InferenceServerException as ex:
            self.assertTrue("OUTPUT0 CPU 0" in ex.message())
            self.assertTrue("OUTPUT1 CPU 0" in ex.message())

    def test_http_shared_memory(self):
        triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
        inputs = []
        inputs.append(tritonhttpclient.InferInput("INPUT", [1], "UINT8"))
        inputs[0].set_data_from_numpy(np.arange(1, dtype=np.uint8))

        # Set up CUDA shared memory for outputs
        triton_client.unregister_system_shared_memory()
        triton_client.unregister_cuda_shared_memory()
        shm_op0_handle = cudashm.create_shared_memory_region("output0_data", 4, 0)
        shm_op1_handle = cudashm.create_shared_memory_region("output1_data", 4, 0)
        triton_client.register_cuda_shared_memory(
            "output0_data", cudashm.get_raw_handle(shm_op0_handle), 0, 4
        )
        triton_client.register_cuda_shared_memory(
            "output1_data", cudashm.get_raw_handle(shm_op1_handle), 0, 4
        )
        outputs = []
        outputs.append(
            tritonhttpclient.InferRequestedOutput("OUTPUT0", binary_data=True)
        )
        outputs[-1].set_shared_memory("output0_data", 4)

        outputs.append(
            tritonhttpclient.InferRequestedOutput("OUTPUT1", binary_data=True)
        )
        outputs[-1].set_shared_memory("output1_data", 4)

        try:
            triton_client.infer(model_name="query", inputs=inputs, outputs=outputs)
            self.assertTrue(False, "expect error with query information")
        except InferenceServerException as ex:
            self.assertTrue("OUTPUT0 GPU 0" in ex.message())
            self.assertTrue("OUTPUT1 GPU 0" in ex.message())

        cudashm.destroy_shared_memory_region(shm_op0_handle)
        cudashm.destroy_shared_memory_region(shm_op1_handle)
        triton_client.unregister_system_shared_memory()
        triton_client.unregister_cuda_shared_memory()

    def test_http_out_of_shared_memory(self):
        triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
        inputs = []
        inputs.append(tritonhttpclient.InferInput("INPUT", [1], "UINT8"))
        inputs[0].set_data_from_numpy(np.arange(1, dtype=np.uint8))

        # Set up too small CUDA shared memory for outputs, expect query
        # returns default value
        triton_client.unregister_system_shared_memory()
        triton_client.unregister_cuda_shared_memory()
        shm_op0_handle = cudashm.create_shared_memory_region("output0_data", 1, 0)
        shm_op1_handle = cudashm.create_shared_memory_region("output1_data", 1, 0)
        triton_client.register_cuda_shared_memory(
            "output0_data", cudashm.get_raw_handle(shm_op0_handle), 0, 1
        )
        triton_client.register_cuda_shared_memory(
            "output1_data", cudashm.get_raw_handle(shm_op1_handle), 0, 1
        )
        outputs = []
        outputs.append(
            tritonhttpclient.InferRequestedOutput("OUTPUT0", binary_data=True)
        )
        outputs[-1].set_shared_memory("output0_data", 1)

        outputs.append(
            tritonhttpclient.InferRequestedOutput("OUTPUT1", binary_data=True)
        )
        outputs[-1].set_shared_memory("output1_data", 1)

        try:
            triton_client.infer(model_name="query", inputs=inputs, outputs=outputs)
            self.assertTrue(False, "expect error with query information")
        except InferenceServerException as ex:
            self.assertTrue("OUTPUT0 CPU 0" in ex.message())
            self.assertTrue("OUTPUT1 CPU 0" in ex.message())

        cudashm.destroy_shared_memory_region(shm_op0_handle)
        cudashm.destroy_shared_memory_region(shm_op1_handle)
        triton_client.unregister_system_shared_memory()
        triton_client.unregister_cuda_shared_memory()

    def test_grpc(self):
        triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
        inputs = []
        inputs.append(tritongrpcclient.InferInput("INPUT", [1], "UINT8"))
        inputs[0].set_data_from_numpy(np.arange(1, dtype=np.uint8))

        try:
            triton_client.infer(model_name="query", inputs=inputs)
            self.assertTrue(False, "expect error with query information")
        except InferenceServerException as ex:
            self.assertTrue("OUTPUT0 CPU 0" in ex.message())
            self.assertTrue("OUTPUT1 CPU 0" in ex.message())

    def test_grpc_shared_memory(self):
        triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
        inputs = []
        inputs.append(tritongrpcclient.InferInput("INPUT", [1], "UINT8"))
        inputs[0].set_data_from_numpy(np.arange(1, dtype=np.uint8))

        # Set up CUDA shared memory for outputs
        triton_client.unregister_system_shared_memory()
        triton_client.unregister_cuda_shared_memory()
        shm_op0_handle = cudashm.create_shared_memory_region("output0_data", 4, 0)
        shm_op1_handle = cudashm.create_shared_memory_region("output1_data", 4, 0)
        triton_client.register_cuda_shared_memory(
            "output0_data", cudashm.get_raw_handle(shm_op0_handle), 0, 4
        )
        triton_client.register_cuda_shared_memory(
            "output1_data", cudashm.get_raw_handle(shm_op1_handle), 0, 4
        )
        outputs = []
        outputs.append(tritongrpcclient.InferRequestedOutput("OUTPUT0"))
        outputs[-1].set_shared_memory("output0_data", 4)

        outputs.append(tritongrpcclient.InferRequestedOutput("OUTPUT1"))
        outputs[-1].set_shared_memory("output1_data", 4)

        try:
            triton_client.infer(model_name="query", inputs=inputs, outputs=outputs)
            self.assertTrue(False, "expect error with query information")
        except InferenceServerException as ex:
            self.assertTrue("OUTPUT0 GPU 0" in ex.message())
            self.assertTrue("OUTPUT1 GPU 0" in ex.message())

        cudashm.destroy_shared_memory_region(shm_op0_handle)
        cudashm.destroy_shared_memory_region(shm_op1_handle)
        triton_client.unregister_system_shared_memory()
        triton_client.unregister_cuda_shared_memory()

    def test_grpc_out_of_shared_memory(self):
        triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
        inputs = []
        inputs.append(tritongrpcclient.InferInput("INPUT", [1], "UINT8"))
        inputs[0].set_data_from_numpy(np.arange(1, dtype=np.uint8))

        # Set up too small CUDA shared memory for outputs, expect query
        # returns default value
        triton_client.unregister_system_shared_memory()
        triton_client.unregister_cuda_shared_memory()
        shm_op0_handle = cudashm.create_shared_memory_region("output0_data", 1, 0)
        shm_op1_handle = cudashm.create_shared_memory_region("output1_data", 1, 0)
        triton_client.register_cuda_shared_memory(
            "output0_data", cudashm.get_raw_handle(shm_op0_handle), 0, 1
        )
        triton_client.register_cuda_shared_memory(
            "output1_data", cudashm.get_raw_handle(shm_op1_handle), 0, 1
        )
        outputs = []
        outputs.append(tritongrpcclient.InferRequestedOutput("OUTPUT0"))
        outputs[-1].set_shared_memory("output0_data", 1)

        outputs.append(tritongrpcclient.InferRequestedOutput("OUTPUT1"))
        outputs[-1].set_shared_memory("output1_data", 1)

        try:
            triton_client.infer(model_name="query", inputs=inputs, outputs=outputs)
            self.assertTrue(False, "expect error with query information")
        except InferenceServerException as ex:
            self.assertTrue("OUTPUT0 CPU 0" in ex.message())
            self.assertTrue("OUTPUT1 CPU 0" in ex.message())

        cudashm.destroy_shared_memory_region(shm_op0_handle)
        cudashm.destroy_shared_memory_region(shm_op1_handle)
        triton_client.unregister_system_shared_memory()
        triton_client.unregister_cuda_shared_memory()


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_query/test.sh
================================================
#!/bin/bash
# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

source ../common/util.sh

RET=0

TEST_LOG="./query_test.log"
CLIENT_LOG="./query_client.log"
TEST_EXEC=./query_test
TEST_PY=./query_e2e.py
EXPECTED_NUM_TESTS="6"
TEST_RESULT_FILE='test_results.txt'


export CUDA_VISIBLE_DEVICES=0

rm -fr *.log

unset TEST_FAIL_WITH_QUERY_RESULT
unset TEST_BYTE_SIZE

set +e
LD_LIBRARY_PATH=/opt/tritonserver/lib:$LD_LIBRARY_PATH $TEST_EXEC >>$TEST_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Query Unit Test Failed\n***"
    RET=1
fi
set -e

export TEST_FAIL_WITH_QUERY_RESULT=1
export TEST_BYTE_SIZE=4

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=`pwd`/models"
SERVER_LOG="./inference_server.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python $TEST_PY >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    RET=1
else
    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

unset TEST_FAIL_WITH_QUERY_RESULT
unset TEST_BYTE_SIZE

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    cat $TEST_LOG
    cat $CLIENT_LOG
    cat $SERVER_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_rate_limiter/rate_limiter_test.py
================================================
#!/usr/bin/env python3

# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import functools
import os
import threading
import time
import unittest

import numpy as np
import sequence_util as su
import tritonclient.grpc as grpcclient
from tritonclient.utils import *

_inference_count = 80
_inference_concurrency = 8
_response_wait_time_s = 10
_finish_wait_time_s = 10
_exit_signal = False


class AsyncGrpcRunner:
    def __init__(self, tester, server_url, model_name, delay_ms):
        self._tester = tester
        self._server_url = server_url
        self._model_name = model_name
        self._delay_ms = delay_ms

        self._input_data = []
        self._shape = [1, 1]
        self._dtype = np.float32
        self._results = {}
        self._processed_all = False
        self._errors = []
        self._inflight_requests = 0
        self._num_sent_request = 0
        self._processed_request_count = 0
        self._sync = threading.Condition()
        self._req_thread = threading.Thread(target=self.req_loop, daemon=True)

    def _on_result(self, result, error):
        with self._sync:
            if error:
                self._errors.append(error)
            else:
                this_id = int(result.get_response().id)
                self._results[this_id] = result
            self._inflight_requests -= 1
            self._sync.notify_all()

    def req_loop(self):
        client = grpcclient.InferenceServerClient(self._server_url)

        inputs = [
            grpcclient.InferInput(
                "INPUT0", self._shape, np_to_triton_dtype(self._dtype)
            )
        ]

        self._inflight_requests = 0
        start_stat = client.get_inference_statistics(model_name=self._model_name)
        global _exit_signal

        while not _exit_signal:
            input_numpy = np.random.random_sample(self._shape).astype(self._dtype)
            inputs[0].set_data_from_numpy(input_numpy)
            self._input_data.append(input_numpy)

            with self._sync:

                def _check_can_send():
                    return self._inflight_requests < _inference_concurrency

                can_send = self._sync.wait_for(
                    _check_can_send, timeout=_response_wait_time_s
                )
                self._tester.assertTrue(
                    can_send,
                    "client didn't receive a response within {}s".format(
                        _response_wait_time_s
                    ),
                )

                callback = functools.partial(AsyncGrpcRunner._on_result, self)
                client.async_infer(
                    model_name=self._model_name,
                    inputs=inputs,
                    request_id="{}".format(self._num_sent_request),
                    callback=callback,
                )
                self._inflight_requests += 1
                self._num_sent_request += 1
                if self._num_sent_request == _inference_count:
                    _exit_signal = True
                time.sleep(self._delay_ms / 1000.0)

        # wait till receive all requested data
        with self._sync:

            def _all_processed():
                return self._inflight_requests == 0

            self._processed_all = self._sync.wait_for(
                _all_processed, _finish_wait_time_s
            )
            self._tester.assertTrue(
                self._processed_all,
                "the processing didn't complete even after waiting for {}s".format(
                    _finish_wait_time_s
                ),
            )

        end_stat = client.get_inference_statistics(model_name=self._model_name)
        self._processed_request_count = (
            end_stat.model_stats[0].inference_stats.success.count
            - start_stat.model_stats[0].inference_stats.success.count
        )

    def start(self):
        self._req_thread.start()

    def _validate_run(self):
        if len(self._errors) != 0:
            raise self._errors[0]
        self._tester.assertEqual(
            len(self._input_data),
            len(self._results.keys()),
            "the number of inputs and output should match",
        )
        for i in range(len(self._input_data)):
            self._tester.assertFalse(
                (self._input_data[i] != self._results[i].as_numpy("OUTPUT0")).any(),
                "the output data should match with the input data",
            )

    def join(self):
        self._req_thread.join()
        self._validate_run()


class RateLimiterTest(su.SequenceBatcherTestUtil):
    def stress_models(self, model_names, delay_ms=0):
        infer_counts = {}
        try:
            runners = []
            for model_name in model_names:
                runners.append(
                    AsyncGrpcRunner(
                        self, "localhost:8001", model_name, delay_ms=delay_ms
                    )
                )
            for r in runners:
                r.start()
            for r in runners:
                r.join()
                infer_counts[r._model_name] = r._processed_request_count
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        return infer_counts

    def test_single_model(self):
        # Send all the inference requests to a single model.
        # Simple sanity check.

        model_names = ["custom_zero_1_float32"]
        infer_counts = self.stress_models(model_names)

        self.assertEqual(infer_counts[model_names[0]], _inference_count)

    def test_cross_model_prioritization_limited_resource(self):
        # Sends requests to two models, one operating at
        # priority of 1 and other at 2 respectively.
        # The available resource counts doesn't allow models
        # to execute simultaneously.

        model_names = ["custom_zero_1_float32", "custom_zero_1_float32_v2"]

        # TODO: Validate the priority and resource counts are set correctly

        infer_counts = self.stress_models(model_names)
        infer_ratio = infer_counts[model_names[0]] / float(infer_counts[model_names[1]])

        self.assertGreater(
            infer_ratio,
            1.80,
            "Got infer ratio across models {}, expected closer to 2".format(
                infer_ratio
            ),
        )

    def test_cross_model_prioritization_plenty_resource(self):
        # Sends requests to two models, one operating at
        # priority of 1 and other at 2 respectively.
        # The available resource counts wll allow both models
        # to run simultaneously.

        model_names = ["custom_zero_1_float32", "custom_zero_1_float32_v2"]

        # TODO: Validate the priority and resource counts are set correctly

        infer_counts = self.stress_models(model_names)
        infer_diff = abs(infer_counts[model_names[0]] - infer_counts[model_names[1]])

        self.assertGreater(
            10,
            infer_diff,
            "Got infer difference between models {}, expected closer to 0".format(
                infer_diff
            ),
        )

    def test_single_model_dynamic_batching(self):
        # Send all the inference requests with a delay to a model

        self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
        model_names = ["custom_zero_1_float32"]
        infer_counts = self.stress_models(model_names, delay_ms=100)

        self.assertEqual(infer_counts[model_names[0]], _inference_count)

        # Check whether all requests used batch size of 4 or not
        client = grpcclient.InferenceServerClient("localhost:8001")
        stats = client.get_inference_statistics(model_names[0], "1")
        self.assertEqual(len(stats.model_stats), 1, "expect 1 model stats")

        batch_stats = stats.model_stats[0].batch_stats
        self.assertEqual(
            len(batch_stats),
            1,
            "expected single batch-size, got {}".format(len(batch_stats)),
        )

        for batch_stat in batch_stats:
            self.assertEqual(
                batch_stat.batch_size,
                4,
                "unexpected batch-size {}".format(batch_stat.batch_size),
            )
            # Get count from one of the stats
            self.assertEqual(
                batch_stat.compute_infer.count,
                _inference_count / 4,
                "expected model-execution-count {} for batch size {}, got {}".format(
                    _inference_count / 4, 4, batch_stat.compute_infer.count
                ),
            )

    def test_single_model_sequence_batching(self):
        # Send one sequence and check for correct accumulator
        # result. The result should be returned immediately.
        # This test checks whether all the requests are
        # directed to the same instance.

        try:
            model_name = "custom_sequence_int32"
            self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
            self.check_sequence(
                "custom",
                model_name,
                np.int32,
                5,
                (4000, None),
                # (flag_str, value, (ls_ms, gt_ms), (pre_delay, post_delay))
                (
                    ("start", 1, None, None),
                    (None, 2, None, None),
                    (None, 3, None, None),
                    (None, 4, None, None),
                    (None, 5, None, None),
                    (None, 6, None, None),
                    (None, 7, None, None),
                    (None, 8, None, None),
                    ("end", 9, None, None),
                ),
                45,
                "grpc",
            )

            self.check_deferred_exception()
            self.check_status(model_name, {1: 9}, 9, 9)
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_rate_limiter/test.sh
================================================
#!/bin/bash
# Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

CLIENT_LOG="./client.log"
RATE_LIMITER_TEST=rate_limiter_test.py
TEST_RESULT_FILE='test_results.txt'

MODELDIR=${MODELDIR:=`pwd`}
DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
SERVER=${TRITON_DIR}/bin/tritonserver
BACKEND_DIR=${TRITON_DIR}/backends


SERVER_ARGS_EXTRA="--backend-directory=${BACKEND_DIR}"
source ../common/util.sh

RET=0

rm -f *.log
rm -fr ./custom_models && mkdir ./custom_models && \
cp -r ../custom_models/custom_zero_1_float32 ./custom_models/. && \
cp -r ../custom_models/custom_sequence_int32 ./custom_models/. && \
mkdir -p ./custom_models/custom_zero_1_float32/1 && \
cp -r ./custom_models/custom_zero_1_float32 ./custom_models/custom_zero_1_float32_v2


(cd custom_models/custom_zero_1_float32 && \
        sed -i "s/dims:.*\[.*\]/dims: \[ -1 \]/g" config.pbtxt && \
        sed -i "s/max_batch_size:.*/max_batch_size: 4/g" config.pbtxt && \
        echo "instance_group [{"  >> config.pbtxt && \
        echo "kind: KIND_GPU count: 1"  >> config.pbtxt && \
        echo "rate_limiter { resources [{name: \"resource1\" count: 4 }]}"  >> config.pbtxt && \
        echo "}]" >> config.pbtxt && \
        echo "parameters [" >> config.pbtxt && \
        echo "{ key: \"execute_delay_ms\"; value: { string_value: \"100\" }}" >> config.pbtxt && \
        echo "]" >> config.pbtxt)


(cd custom_models/custom_zero_1_float32_v2 && \
        sed -i "s/custom_zero_1_float32/custom_zero_1_float32_v2/g" config.pbtxt && \
        sed -i "s/dims:.*\[.*\]/dims: \[ -1 \]/g" config.pbtxt && \
        sed -i "s/max_batch_size:.*/max_batch_size: 4/g" config.pbtxt && \
        echo "instance_group [{"  >> config.pbtxt && \
        echo "kind: KIND_GPU count: 1"  >> config.pbtxt && \
        echo "rate_limiter { resources [{name: \"resource1\" count: 2 }, {name: \"resource2\" global: True count: 2 }] priority: 2}"  >> config.pbtxt && \
        echo "}]" >> config.pbtxt && \
        echo "parameters [" >> config.pbtxt && \
        echo "{ key: \"execute_delay_ms\"; value: { string_value: \"100\" }}" >> config.pbtxt && \
        echo "]" >> config.pbtxt)

##
## Test cases that fails to load models
##
# Case1: Both resource lesser than required
SERVER_ARGS="--rate-limit=execution_count --rate-limit-resource=resource1:1 --model-repository=$MODELDIR/custom_models"
SERVER_LOG="./inference_server_r1.log"
run_server
if [ "$SERVER_PID" != "0" ]; then
    echo -e "\n***\n*** Unexpected success with resource count 1\n***"
    RET=1

    kill $SERVER_PID
    wait $SERVER_PID
fi

set +e
grep "Resource count for \"resource1\" is limited to 1 which will prevent scheduling of one or more model instances, the minimum required count is 4" $SERVER_LOG
if [ $? -ne 0 ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Failed. Expected error message while loading the model \"custom_zero_1_float32\"\n***"
    RET=1
fi

set -e

# Case2: resources sufficient only for one model
SERVER_ARGS="--rate-limit=execution_count --rate-limit-resource=resource1:3 --rate-limit-resource=resource2:2 --model-repository=$MODELDIR/custom_models"
SERVER_LOG="./inference_server_r3.log"
run_server
if [ "$SERVER_PID" != "0" ]; then
    echo -e "\n***\n*** Unexpected success with resource count 1\n***"
    RET=1

    kill $SERVER_PID
    wait $SERVER_PID
fi

set +e
grep "Resource count for \"resource1\" is limited to 3 which will prevent scheduling of one or more model instances, the minimum required count is 4" $SERVER_LOG
if [ $? -ne 0 ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Failed. Expected error message while loading the model \"custom_zero_1_float32\"\n***"
    RET=1
fi

set -e

# Case3: Resource specified only for specific device id 10 and not for the GPU that loads the model instance.
SERVER_ARGS="--rate-limit=execution_count --rate-limit-resource=resource1:10:10 --rate-limit-resource=resource2:2 --model-repository=$MODELDIR/custom_models"
SERVER_LOG="./inference_server_rdevice.log"
run_server
if [ "$SERVER_PID" != "0" ]; then
    echo -e "\n***\n*** Unexpected success with resource count 1\n***"
    RET=1

    kill $SERVER_PID
    wait $SERVER_PID
fi

set +e
grep "Resource count for \"resource1\" is limited to 0 which will prevent scheduling of one or more model instances, the minimum required count is 4" $SERVER_LOG
if [ $? -ne 0 ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Failed. Expected error message while loading the model \"custom_zero_1_float32\"\n***"
    RET=1
fi

set -e

# Case4: Conflicting resource types in the config
cp -r ./custom_models/custom_zero_1_float32_v2 ./custom_models/custom_zero_1_float32_v3
(cd custom_models/custom_zero_1_float32_v3 && \
        sed -i "s/custom_zero_1_float32_v2/custom_zero_1_float32_v3/g" config.pbtxt && \
        sed -i "s/global: True/global: False/g " config.pbtxt)

SERVER_ARGS="--rate-limit=execution_count --model-repository=$MODELDIR/custom_models"
SERVER_LOG="./inference_server_conflict.log"
run_server
if [ "$SERVER_PID" != "0" ]; then
    echo -e "\n***\n*** Unexpected success with resource count 1\n***"
    RET=1

    kill $SERVER_PID
    wait $SERVER_PID
fi

set +e
grep "Resource \"resource2\" is present as both global and device-specific resource in the model configuration." $SERVER_LOG
if [ $? -ne 0 ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Failed. Expected error message for conflicting resource types\n***"
    RET=1
fi
rm -rf ./custom_models/custom_zero_1_float32_v3

set -e

##
## Tests with cross-model prioritization with various cases:
##
# CASE1: Explicit limited resource: only allows one model to run at a time
SERVER_ARGS="--rate-limit=execution_count --rate-limit-resource=resource1:4 --rate-limit-resource=resource2:2 --model-repository=$MODELDIR/custom_models"
SERVER_LOG="./inference_server.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python3 $RATE_LIMITER_TEST RateLimiterTest.test_cross_model_prioritization_limited_resource >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

# CASE2: Implicit Limited resource: By default, server will select max resources of one of the
# model as available resource. This means only one model will run at a time.
SERVER_ARGS="--rate-limit=execution_count --model-repository=$MODELDIR/custom_models"
SERVER_LOG="./inference_server.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python3 $RATE_LIMITER_TEST RateLimiterTest.test_cross_model_prioritization_limited_resource >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

# CASE3: Explicit plenty resource: Allows multiple models to run simultaneously
SERVER_ARGS="--rate-limit=execution_count --rate-limit-resource=resource1:6 --rate-limit-resource=resource2:2 --model-repository=$MODELDIR/custom_models"
SERVER_LOG="./inference_server.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

python3 $RATE_LIMITER_TEST RateLimiterTest.test_cross_model_prioritization_plenty_resource >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

##
## Tests with multiple instances of the same model
##
# Replace the second model with a second instance with same resource requirements and priority.
# TODO: Currently there is no way to check which instance got to run inferences hence we only
# check the resource constraint. Add more extensive tests for multiple instances once required
# information is made available.
rm -rf custom_models/custom_zero_1_float32_v2
(cd custom_models/custom_zero_1_float32 && \
        echo "instance_group [{"  >> config.pbtxt && \
        echo "kind: KIND_GPU count: 1"  >> config.pbtxt && \
        echo "rate_limiter { resources [{name: \"resource1\" count: 2 }, {name: \"resource2\" global: True count: 2 }] priority: 2}"  >> config.pbtxt && \
        echo "}]" >> config.pbtxt)

# CASE1: limited resource: only allows one model instance to run at a time.
SERVER_ARGS="--rate-limit=execution_count --model-repository=$MODELDIR/custom_models"
SERVER_LOG="./inference_server.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
SECONDS=0
python3 $RATE_LIMITER_TEST RateLimiterTest.test_single_model >>$CLIENT_LOG 2>&1
LIMITED_RESOURCE_TEST_DURATION=$SECONDS
echo -e "Limited resource time: ${LIMITED_RESOURCE_TEST_DURATION}s"
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

# CASE 2: plenty resource: allows both the instances to run simultaneously
SERVER_ARGS="--rate-limit=execution_count  --rate-limit-resource=resource1:6 --rate-limit-resource=resource2:2  --model-repository=$MODELDIR/custom_models"
SERVER_LOG="./inference_server.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
SECONDS=0
python3 $RATE_LIMITER_TEST RateLimiterTest.test_single_model >>$CLIENT_LOG 2>&1
PLENTY_RESOURCE_TEST_DURATION=$SECONDS
echo -e "Plenty resource time: ${LIMITED_RESOURCE_TEST_DURATION}s"
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi

if [ $PLENTY_RESOURCE_TEST_DURATION -gt $LIMITED_RESOURCE_TEST_DURATION ]; then
   echo -e "Error: Test with limited resources should take more time"
   echo -e "\n***\n*** Test Failed\n***"
   RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

# Case 3: resources sufficient only for one model instance. Hence, should fail to load
SERVER_ARGS="--rate-limit=execution_count --rate-limit-resource=resource1:3 --rate-limit-resource=resource2:2 --model-repository=$MODELDIR/custom_models"
SERVER_LOG="./inference_server_r3i.log"
run_server
if [ "$SERVER_PID" != "0" ]; then
    echo -e "\n***\n*** Unexpected success with resource count 1\n***"
    RET=1

    kill $SERVER_PID
    wait $SERVER_PID
fi
grep "Resource count for \"resource1\" is limited to 3 which will prevent scheduling of one or more model instances, the minimum required count is 4" $SERVER_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed. Expected error message while loading the model \"custom_zero_1_float32\"\n***"
    RET=1
fi

##
## Tests with dynamic batching
##
# Despite all the possible bs being preferred triton should always form full batches as
# the second instance would be blocked because of the resource constraints.
(cd custom_models/custom_zero_1_float32 && \
        sed -i "s/.*execute_delay_ms.*/{ key: \"execute_delay_ms\"; value: { string_value: \"1000\" }}/g" config.pbtxt && \
        echo "dynamic_batching { preferred_batch_size: [ 1, 2, 3, 4 ]" >> config.pbtxt && \
        echo " max_queue_delay_microseconds: 5000000 }"  >> config.pbtxt)
export TRITONSERVER_DELAY_SCHEDULER=8
SERVER_ARGS="--rate-limit=execution_count --model-repository=$MODELDIR/custom_models"
SERVER_LOG="./inference_server.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python3 $RATE_LIMITER_TEST RateLimiterTest.test_single_model_dynamic_batching >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

unset TRITONSERVER_DELAY_SCHEDULER

##
## Tests with sequence batching
##
# Send one sequence and check for correct accumulator result. The result should be returned immediately.
# This test checks whether all the requests are directed to the same instance despite there being other
# instances with higher priority.
FIRST_INSTANCE_RESOURCE="rate_limiter { resources [{name: \"resource1\" count: 4 }]}"
(cd custom_models/custom_sequence_int32/ && \
        sed -i "s/max_sequence_idle_microseconds:.*/max_sequence_idle_microseconds: 1000000/" config.pbtxt && \
        sed -i "s/^max_batch_size:.*/max_batch_size: 1/" config.pbtxt && \
        sed -i "s/kind: KIND_GPU/kind: KIND_CPU\\ncount: 1 \n${FIRST_INSTANCE_RESOURCE}/" config.pbtxt && \
        sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 1 \n${FIRST_INSTANCE_RESOURCE}/" config.pbtxt &&\
        echo "instance_group [{"  >> config.pbtxt && \
        echo "kind: KIND_CPU count: 1"  >> config.pbtxt && \
        echo "rate_limiter { resources [{name: \"resource1\" count: 2 }, {name: \"resource2\" global: True count: 2 }] priority: 2}"  >> config.pbtxt && \
        echo "}]" >> config.pbtxt && \
        echo "instance_group [{"  >> config.pbtxt && \
        echo "kind: KIND_CPU count: 2"  >> config.pbtxt && \
        echo "rate_limiter { resources [{name: \"resource1\" count: 2 }, {name: \"resource2\" global: True count: 2 }] priority: 3}"  >> config.pbtxt && \
        echo "}]" >> config.pbtxt)
SERVER_ARGS="--rate-limit=execution_count --model-repository=$MODELDIR/custom_models"
SERVER_LOG="./inference_server.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python3 $RATE_LIMITER_TEST RateLimiterTest.test_single_model_sequence_batching >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_register/config.pbtxt
================================================
# Copyright 2022, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "model"
backend: "identity"
max_batch_size: 0
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]


================================================
FILE: qa/L0_register/test.sh
================================================
#!/bin/bash
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

source ../common/util.sh

RET=0

TEST_LOG="./register_api_test.log"
TEST_EXEC=./register_api_test

export CUDA_VISIBLE_DEVICES=0

rm -fr *.log

# Setup repositories for testing, note that we use
# model version as hint for which directory is used for model loading
mkdir empty_models models_0 models_1
mkdir -p models_0/model_0/1 && \
    cp config.pbtxt models_0/model_0/. && \
    (cd models_0/model_0 && \
        sed -i "s/^name:.*/name: \"model_0\"/" config.pbtxt)
mkdir -p models_1/model_0/2 && \
    cp config.pbtxt models_1/model_0/. && \
    (cd models_1/model_0 && \
        sed -i "s/^name:.*/name: \"model_0\"/" config.pbtxt)
mkdir -p models_1/model_1/3 && \
    cp config.pbtxt models_1/model_1/. && \
    (cd models_1/model_1 && \
        sed -i "s/^name:.*/name: \"model_1\"/" config.pbtxt)

set +e
LD_LIBRARY_PATH=/opt/tritonserver/lib:$LD_LIBRARY_PATH $TEST_EXEC >>$TEST_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Register API Unit Test Failed\n***"
    RET=1
fi
set -e

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    cat $TEST_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_repoagent_checksum/identity_test.py
================================================
#!/usr/bin/python

# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import sys

import numpy as np
import tritonclient.grpc as grpcclient
import tritonclient.http as httpclient
from tritonclient.utils import np_to_triton_dtype

FLAGS = None

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-v",
        "--verbose",
        action="store_true",
        required=False,
        default=False,
        help="Enable verbose output",
    )
    parser.add_argument(
        "-u", "--url", type=str, required=False, help="Inference server URL."
    )
    parser.add_argument(
        "-i",
        "--protocol",
        type=str,
        required=False,
        default="http",
        help='Protocol ("http"/"grpc") used to '
        + 'communicate with inference service. Default is "http".',
    )

    FLAGS = parser.parse_args()
    if (FLAGS.protocol != "http") and (FLAGS.protocol != "grpc"):
        print(
            'unexpected protocol "{}", expects "http" or "grpc"'.format(FLAGS.protocol)
        )
        exit(1)

    client_util = httpclient if FLAGS.protocol == "http" else grpcclient

    if FLAGS.url is None:
        FLAGS.url = "localhost:8000" if FLAGS.protocol == "http" else "localhost:8001"

    # Reuse a single client for all sync tests
    with client_util.InferenceServerClient(FLAGS.url, verbose=FLAGS.verbose) as client:
        for model_name, np_dtype, shape in (
            # yapf: disable
            ("identity_int32", np.int32, [0]),
            ("identity_int32", np.int32, [7])
        ):
            # yapf: enable
            if np_dtype != object:
                input_data = (16384 * np.random.randn(*shape)).astype(np_dtype)
            else:
                in0 = 16384 * np.ones(shape, dtype="int")
                in0n = np.array([str(x) for x in in0.reshape(in0.size)], dtype=object)
                input_data = in0n.reshape(in0.shape)
            inputs = [
                client_util.InferInput(
                    "INPUT0", input_data.shape, np_to_triton_dtype(input_data.dtype)
                )
            ]
            inputs[0].set_data_from_numpy(input_data)

            results = client.infer(model_name, inputs)
            print(results)

            # Make sure outputs are expected value
            output_data = results.as_numpy("OUTPUT0")
            if output_data is None:
                print("error: expected 'OUTPUT0'")
                sys.exit(1)

            if np_dtype == object:
                output_data = np.char.decode(output_data)

            if not np.array_equal(output_data, input_data):
                print(
                    "error: expected output {} to match input {}".format(
                        output_data, input_data
                    )
                )
                sys.exit(1)


================================================
FILE: qa/L0_repoagent_checksum/models/identity_int32/config.pbtxt
================================================
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "identity_int32"
backend: "identity"
max_batch_size: 0
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]
model_repository_agents
{
  agents [
    {
      name: "checksum",
      parameters [
        {
          key: "MD5:1/libtriton_identity.so",
          value: "invalid_checksum"
        },
        {
          key: "MD5:data_file",
          value: "4e41030bb1531cd68b2c0277b0aad2e9"
        }
      ]
    }
  ]
}

================================================
FILE: qa/L0_repoagent_checksum/models/identity_int32/data_file
================================================
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

This file is treated as some other files needed by the model
and thus the repo agent should also verify its checksum.


================================================
FILE: qa/L0_repoagent_checksum/test.sh
================================================
#!/bin/bash
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

export CUDA_VISIBLE_DEVICES=0

CLIENT_PY=./identity_test.py
CLIENT_LOG="./client.log"

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=`pwd`/models"
SERVER_LOG="./inference_server.log"
source ../common/util.sh

rm -fr *.log

RET=0

# The config is set with invalid checksum, so expect server failed to
# load all models
run_server
if [ "$SERVER_PID" == "0" ]; then
    set +e
    grep "'identity_int32': Mismatched MD5 hash for file 1/libtriton_identity.so" $SERVER_LOG
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected error on mismatched MD5 hash\n***"
        cat $SERVER_LOG
        RET=1
    fi
    set -e
else
    echo -e "\n***\n*** Expect fail to start $SERVER\n***"
    cat $SERVER_LOG
    kill $SERVER_PID
    wait $SERVER_PID
    exit 1
fi

# Set correct md5sum
(cd models/identity_int32 && \
    model_hash=$(md5sum 1/libtriton_identity.so | cut -d' ' -f 1); sed -i "s/invalid_checksum/${model_hash}/" config.pbtxt
)

# Server should run successfully
rm -fr *.log
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** fail to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

for PROTOCOL in http grpc; do
    set +e
    python $CLIENT_PY -i $PROTOCOL -v >>$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        RET=1
    fi
    set -e
done

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
else
    cat $SERVER_LOG
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_request_cancellation/grpc_cancellation_test.py
================================================
#!/usr/bin/env python3

# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import asyncio
import os
import queue
import re
import time
import unittest
from functools import partial

import numpy as np
import tritonclient.grpc as grpcclient
import tritonclient.grpc.aio as grpcclientaio
from tritonclient.utils import InferenceServerException


class UserData:
    def __init__(self):
        self._completed_requests = queue.Queue()


def callback(user_data, result, error):
    if error:
        user_data._completed_requests.put(error)
    else:
        user_data._completed_requests.put(result)


class GrpcCancellationTest(unittest.IsolatedAsyncioTestCase):
    _model_name = "custom_identity_int32"
    _model_delay = 10.0  # seconds
    _grpc_params = {"url": "localhost:8001", "verbose": True}

    def setUp(self):
        self._client = grpcclient.InferenceServerClient(**self._grpc_params)
        self._client_aio = grpcclientaio.InferenceServerClient(**self._grpc_params)
        self._user_data = UserData()
        self._callback = partial(callback, self._user_data)
        self._prepare_request()
        self._start_time = time.time()  # seconds
        self.test_duration_delta = 0.5

    def tearDown(self):
        self._end_time = time.time()  # seconds
        self._assert_max_duration()

    def _prepare_request(self):
        self._inputs = []
        self._inputs.append(grpcclient.InferInput("INPUT0", [1, 1], "INT32"))
        self._outputs = []
        self._outputs.append(grpcclient.InferRequestedOutput("OUTPUT0"))
        self._inputs[0].set_data_from_numpy(np.array([[10]], dtype=np.int32))

    def _assert_max_duration(self):
        max_duration = self._model_delay * self.test_duration_delta  # seconds
        duration = self._end_time - self._start_time  # seconds
        self.assertLess(
            duration,
            max_duration,
            f"test runtime expected less than {max_duration}s response time, got {duration}s",
        )

    def _assert_callback_cancelled(self):
        self.assertFalse(self._user_data._completed_requests.empty())
        data_item = self._user_data._completed_requests.get()
        self.assertIsInstance(data_item, InferenceServerException)
        self.assertIn("Locally cancelled by application!", str(data_item))

    def test_grpc_async_infer(self):
        future = self._client.async_infer(
            model_name=self._model_name,
            inputs=self._inputs,
            callback=self._callback,
            outputs=self._outputs,
        )
        time.sleep(2)  # ensure the inference has started
        future.cancel()
        time.sleep(0.1)  # context switch
        self._assert_callback_cancelled()

    def test_grpc_stream_infer(self):
        self._client.start_stream(callback=self._callback)
        self._client.async_stream_infer(
            model_name=self._model_name, inputs=self._inputs, outputs=self._outputs
        )
        time.sleep(2)  # ensure the inference has started
        self._client.stop_stream(cancel_requests=True)
        self._assert_callback_cancelled()

    async def test_aio_grpc_async_infer(self):
        infer_task = asyncio.create_task(
            self._client_aio.infer(
                model_name=self._model_name, inputs=self._inputs, outputs=self._outputs
            )
        )
        await asyncio.sleep(2)  # ensure the inference has started
        infer_task.cancel()
        with self.assertRaises(asyncio.CancelledError):
            await infer_task

    async def test_aio_grpc_stream_infer(self):
        async def requests_generator():
            yield {
                "model_name": self._model_name,
                "inputs": self._inputs,
                "outputs": self._outputs,
            }

        responses_iterator = self._client_aio.stream_infer(requests_generator())
        await asyncio.sleep(2)  # ensure the inference has started
        self.assertTrue(responses_iterator.cancel())
        with self.assertRaises(asyncio.CancelledError):
            async for result, error in responses_iterator:
                self._callback(result, error)

    def test_grpc_async_infer_cancellation_at_step_start(self):
        # This is a longer test
        self.test_duration_delta = 4.5
        server_log_name = "grpc_cancellation_test.test_grpc_async_infer_cancellation_at_step_start.server.log"
        with open(server_log_name, "r") as f:
            server_log = f.read()

        prev_new_req_handl_count = len(
            re.findall("New request handler for ModelInferHandler", server_log)
        )
        self.assertEqual(
            prev_new_req_handl_count,
            2,
            "Expected 2 request handler for ModelInferHandler log entries, but got {}".format(
                prev_new_req_handl_count
            ),
        )
        future = self._client.async_infer(
            model_name=self._model_name,
            inputs=self._inputs,
            callback=self._callback,
            outputs=self._outputs,
        )
        time.sleep(2)  # ensure the inference request reached server
        future.cancel()
        # ensures TRITONSERVER_DELAY_GRPC_PROCESS delay passed on the server
        time.sleep(self._model_delay * 2)

        with open(server_log_name, "r") as f:
            server_log = f.read()

        cur_new_req_handl_count = len(
            re.findall("New request handler for ModelInferHandler", server_log)
        )
        self.assertGreater(
            cur_new_req_handl_count,
            prev_new_req_handl_count,
            "gRPC Cancellation on step START Test Failed: New request handler for ModelInferHandler was not created",
        )

    def test_grpc_async_infer_response_complete_during_cancellation(self):
        # long test
        self.test_duration_delta = 2
        delay_notification_sec = (
            int(os.getenv("TRITONSERVER_DELAY_GRPC_NOTIFICATION")) / 1000
        )
        delay_queue_cancellation_sec = (
            int(os.getenv("TRITONSERVER_DELAY_GRPC_ENQUEUE")) / 1000
        )
        future = self._client.async_infer(
            model_name=self._model_name,
            inputs=self._inputs,
            callback=self._callback,
            outputs=self._outputs,
        )
        # ensure cancellation is received before InferResponseComplete and is processed after InferResponseComplete
        time.sleep(self._model_delay - 2)
        future.cancel()
        time.sleep(
            delay_notification_sec + delay_queue_cancellation_sec
        )  # ensure the cancellation is processed
        self._assert_callback_cancelled()

    def test_grpc_async_infer_cancellation_before_finish_0(self):
        # First version of test_grpc_async_infer_cancellation_before_finish
        # Cancellation notification is processed before the final response state.
        # long test
        self.test_duration_delta = 2
        delay_notification_sec = (
            int(os.getenv("TRITONSERVER_DELAY_GRPC_NOTIFICATION")) / 1000
        )
        future = self._client.async_infer(
            model_name=self._model_name,
            inputs=self._inputs,
            callback=self._callback,
            outputs=self._outputs,
        )
        # ensure the cancellation is received between InferResponseComplete checking cancellation and Finish
        time.sleep(self._model_delay + 2)
        future.cancel()
        time.sleep(delay_notification_sec + 1)  # ensure the cancellation is processed
        self._assert_callback_cancelled()

    def test_grpc_async_infer_cancellation_before_finish_1(self):
        # Second version of test_grpc_async_infer_cancellation_before_finish
        # Cancellation notification is processed after the final response state.
        # long test
        self.test_duration_delta = 2
        delay_process_entry_sec = (
            int(os.getenv("TRITONSERVER_DELAY_GRPC_PROCESS_ENTRY")) / 1000
        )
        delay_response_completion_sec = (
            int(os.getenv("TRITONSERVER_DELAY_RESPONSE_COMPLETION")) / 1000
        )
        future = self._client.async_infer(
            model_name=self._model_name,
            inputs=self._inputs,
            callback=self._callback,
            outputs=self._outputs,
        )
        # ensure the cancellation is received between InferResponseComplete checking cancellation and Finish
        time.sleep(self._model_delay + delay_process_entry_sec + 2)
        future.cancel()
        time.sleep(
            delay_response_completion_sec
        )  # ensure the cancellation is processed
        self._assert_callback_cancelled()

    def test_grpc_async_infer_cancellation_before_response_complete_and_process_after_final_response(
        self,
    ):
        # Received cancellation before InferResponseComplete and the notification
        # state is processed after processing final response state.
        # long test
        self.test_duration_delta = 2
        delay_notification_sec = (
            int(os.getenv("TRITONSERVER_DELAY_GRPC_NOTIFICATION")) / 1000
        )
        delay_response_complete_exec_sec = (
            int(os.getenv("TRITONSERVER_DELAY_RESPONSE_COMPLETE_EXEC")) / 1000
        )
        future = self._client.async_infer(
            model_name=self._model_name,
            inputs=self._inputs,
            callback=self._callback,
            outputs=self._outputs,
        )
        # ensure the cancellation is received before InferResponseComplete checking cancellation
        time.sleep(self._model_delay + 2)
        future.cancel()
        time.sleep(delay_notification_sec + 1)  # ensure the cancellation is processed
        self._assert_callback_cancelled()


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_request_cancellation/implicit_state_model/config.pbtxt
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

backend: "pytorch"
max_batch_size: 1

input {
    name: "DELAY_ITRS__0"
    data_type: TYPE_INT64
    dims: [ 1 ]
}
output {
    name: "DUMMY_OUT__0"
    data_type: TYPE_INT64
    dims: [ 1 ]
}

sequence_batching {
  max_sequence_idle_microseconds: 6000000
  oldest { max_candidate_sequences: 1 }
  control_input [
    {
      name: "SEQ_START__1"
      control {
        kind: CONTROL_SEQUENCE_START
        fp32_false_true: [ 0, 1 ]
      }
    },
    {
      name: "SEQ_ID__2"
      control {
        kind: CONTROL_SEQUENCE_CORRID
        data_type: TYPE_INT64
      }
    }
  ]
  state {
    input_name: "SEQ_STATE_IN__3"
    output_name: "SEQ_STATE_OUT__1"
    data_type: TYPE_INT64
    dims: 1
    initial_state {
      name: "initial_state"
      data_type: TYPE_INT64
      dims: 1
      zero_data: true
    }
  }
}

instance_group {
  kind: KIND_CPU
  count: 1
}


================================================
FILE: qa/L0_request_cancellation/implicit_state_model/gen_model.py
================================================
#!/usr/bin/env python3

# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import torch


class ImplicitStateModel(torch.nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, delay_itrs, seq_start, seq_id, seq_state_in):
        # if not sequence start, verify sequence state match sequence id
        if not seq_start and seq_id != seq_state_in:
            print(
                f"[MODEL ERROR] Invalid sequence state, expect {seq_id}, got {seq_state_in}"
            )
        # delay the execution
        delay = 0
        for i in range(int(delay_itrs)):
            delay += i
        # set sequence state, do not modify state unless sequence starting
        if seq_start:
            seq_state_out = seq_id
        else:
            seq_state_out = seq_state_in
        dummy_out = seq_state_out
        return dummy_out, seq_state_out


if __name__ == "__main__":
    torch.jit.save(torch.jit.script(ImplicitStateModel()), "model.pt")


================================================
FILE: qa/L0_request_cancellation/implicit_state_test.py
================================================
#!/usr/bin/env python3

# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import time
import unittest

import numpy as np
import tritonclient.grpc as grpcclient


class TestImplicitState(unittest.TestCase):
    def _get_inputs(self, delay_itrs):
        shape = [1, 1]
        inputs = [grpcclient.InferInput("DELAY_ITRS__0", shape, "INT64")]
        inputs[0].set_data_from_numpy(np.array([[delay_itrs]], np.int64))
        return inputs

    def _generate_streaming_callback_and_response_pair(self):
        response = []  # [{"result": result, "error": error}, ...]

        def callback(result, error):
            response.append({"result": result, "error": error})

        return callback, response

    def _sequence_state_model_infer(self, num_reqs, seq_ids, delay_itrs, cancel_reqs):
        model_name = "sequence_state"
        callback, response = self._generate_streaming_callback_and_response_pair()
        with grpcclient.InferenceServerClient("localhost:8001") as client:
            client.start_stream(callback)
            seq_start = True
            for req_id in range(num_reqs):
                for seq_id in seq_ids:
                    client.async_stream_infer(
                        model_name,
                        self._get_inputs(delay_itrs),
                        sequence_id=seq_id,
                        sequence_start=seq_start,
                    )
                    time.sleep(0.1)
                seq_start = False
            client.stop_stream(cancel_requests=cancel_reqs)
        return response

    # Test timeout is reset for a sequence slot after its sequence is cancelled
    def test_state_reset_after_cancel(self):
        sequence_timeout = 6  # secs
        # Start sequence 1 and cancel it
        num_reqs = 10
        response = self._sequence_state_model_infer(
            num_reqs, seq_ids=[1], delay_itrs=5000000, cancel_reqs=True
        )
        self.assertLess(
            len(response),
            num_reqs,
            "Precondition not met - sequence completed before cancellation",
        )
        # Wait for sequence 1 to timeout
        time.sleep(sequence_timeout + 2)
        # Start sequence 2 and 3
        self._sequence_state_model_infer(
            num_reqs=4, seq_ids=[2, 3], delay_itrs=0, cancel_reqs=False
        )
        # Check for any unexpected sequence state mixing
        with open(os.environ["SERVER_LOG"]) as f:
            server_log = f.read()
        self.assertNotIn("[MODEL ERROR] Invalid sequence state", server_log)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_request_cancellation/scheduler_test.py
================================================
#!/usr/bin/env python3

# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import concurrent.futures
import re
import time
import unittest

import numpy as np
import requests
import tritonclient.grpc as grpcclient
from tritonclient.utils import InferenceServerException


class TestScheduler(unittest.TestCase):
    def setUp(self):
        # Initialize client
        self._triton = grpcclient.InferenceServerClient("localhost:8001")

    def _get_inputs(self, batch_size):
        self.assertIsInstance(batch_size, int)
        self.assertGreater(batch_size, 0)
        shape = [batch_size, 8]
        inputs = [grpcclient.InferInput("INPUT0", shape, "FP32")]
        inputs[0].set_data_from_numpy(np.ones(shape, dtype=np.float32))
        return inputs

    def _generate_callback_and_response_pair(self):
        response = {"responded": False, "result": None, "error": None}

        def callback(result, error):
            response["responded"] = True
            response["result"] = result
            response["error"] = error

        return callback, response

    def _assert_response_is_cancelled(self, response):
        self.assertTrue(response["responded"])
        self.assertEqual(response["result"], None)
        self.assertIsInstance(response["error"], InferenceServerException)
        self.assertEqual(response["error"].status(), "StatusCode.CANCELLED")

    def _generate_streaming_callback_and_response_pair(self):
        response = []  # [{"result": result, "error": error}, ...]

        def callback(result, error):
            response.append({"result": result, "error": error})

        return callback, response

    def _assert_streaming_response_is_cancelled(self, response):
        self.assertGreater(len(response), 0)
        cancelled_count = 0
        for res in response:
            result, error = res["result"], res["error"]
            if error:
                self.assertEqual(result, None)
                self.assertIsInstance(error, InferenceServerException)
                if error.status() == "StatusCode.CANCELLED":
                    cancelled_count += 1
        self.assertEqual(cancelled_count, 1)

    def _get_metrics(self):
        metrics_url = "http://localhost:8002/metrics"
        r = requests.get(metrics_url)
        r.raise_for_status()
        return r.text

    def _metrics_before_test(self, model, reason):
        pattern = rf'nv_inference_request_failure\{{model="{model}",reason="{reason}",version="1"\}} (\d+)'
        metrics = self._get_metrics()
        match = re.search(pattern, metrics)
        if match:
            return int(match.group(1))
        else:
            raise Exception(f"Failure metrics for model='{model}' not found")

    def _assert_metrics(
        self, model_name, reason, expected_count_increase, initial_count
    ):
        metrics = self._get_metrics()
        # Add initial count + expected count for the the test
        expected_metric = f'nv_inference_request_failure{{model="{model_name}",reason="{reason}",version="1"}} {expected_count_increase + initial_count}'
        self.assertIn(expected_metric, metrics)

    # Test queued requests on dynamic batch scheduler can be cancelled
    def test_dynamic_batch_scheduler_request_cancellation(self):
        model_name = "dynamic_batch"
        with concurrent.futures.ThreadPoolExecutor() as pool:
            # Saturate the 2 batch slots on the model of 1 instance
            saturate_thread_1 = pool.submit(
                self._triton.infer, model_name, self._get_inputs(batch_size=1)
            )
            saturate_thread_2 = pool.submit(
                self._triton.infer, model_name, self._get_inputs(batch_size=1)
            )
            time.sleep(2)  # ensure the slots are filled
            # The next request should be queued
            callback, response = self._generate_callback_and_response_pair()
            queue_future = self._triton.async_infer(
                model_name, self._get_inputs(batch_size=1), callback
            )
            time.sleep(2)  # ensure the request is queued
            self.assertFalse(response["responded"])
            # Cancel the queued request
            queue_future.cancel()
            time.sleep(2)  # ensure the cancellation is delivered
            self._assert_response_is_cancelled(response)
            # Join saturating thread
            saturate_thread_1.result()
            saturate_thread_2.result()

    # Test backlogged requests on sequence batch scheduler can be cancelled
    def test_sequence_batch_scheduler_backlog_request_cancellation(self):
        model_name = "sequence_direct"
        initial_metrics_value = self._metrics_before_test(model_name, "CANCELED")
        with concurrent.futures.ThreadPoolExecutor() as pool:
            # Saturate the single sequence slot
            saturate_thread = pool.submit(
                self._triton.infer,
                model_name,
                self._get_inputs(batch_size=1),
                sequence_id=1,
                sequence_start=True,
            )
            time.sleep(2)  # ensure the slot is filled
            # The next sequence with 2 requests should be on the backlog
            backlog_requests = []
            for i in range(2):
                callback, response = self._generate_callback_and_response_pair()
                backlog_future = self._triton.async_infer(
                    model_name,
                    self._get_inputs(batch_size=1),
                    callback,
                    sequence_id=2,
                    sequence_start=(True if i == 0 else False),
                )
                backlog_requests.append(
                    {"future": backlog_future, "response": response}
                )
            time.sleep(2)  # ensure the sequence is backlogged
            self.assertFalse(backlog_requests[0]["response"]["responded"])
            self.assertFalse(backlog_requests[1]["response"]["responded"])
            # Cancelling any backlogged request cancels the entire sequence
            backlog_requests[0]["future"].cancel()
            time.sleep(2)  # ensure the cancellation is delivered
            time.sleep(2)  # ensure reaper thread has responded
            self._assert_response_is_cancelled(backlog_requests[0]["response"])
            self._assert_response_is_cancelled(backlog_requests[1]["response"])
            # Join saturating thread
            saturate_thread.result()
        expected_count_increase = 2
        self._assert_metrics(
            model_name,
            "CANCELED",
            expected_count_increase,
            initial_metrics_value,
        )

    # Test queued requests on direct sequence batch scheduler can be cancelled
    def test_direct_sequence_batch_scheduler_request_cancellation(self):
        model_name = "sequence_direct"
        initial_metrics_value = self._metrics_before_test(model_name, "CANCELED")
        self._test_sequence_batch_scheduler_queued_request_cancellation(model_name)
        expected_count_increase = 2
        self._assert_metrics(
            model_name,
            "CANCELED",
            expected_count_increase,
            initial_metrics_value,
        )

    # Test queued requests on oldest sequence batch scheduler can be cancelled
    def test_oldest_sequence_batch_scheduler_request_cancellation(self):
        model_name = "sequence_oldest"
        self._test_sequence_batch_scheduler_queued_request_cancellation(model_name)

    # Helper function
    def _test_sequence_batch_scheduler_queued_request_cancellation(self, model_name):
        with concurrent.futures.ThreadPoolExecutor() as pool:
            # Start the sequence
            start_thread = pool.submit(
                self._triton.infer,
                model_name,
                self._get_inputs(batch_size=1),
                sequence_id=1,
                sequence_start=True,
            )
            time.sleep(2)  # ensure the sequence has started
            # The next 2 requests should be queued
            queue_requests = []
            for i in range(2):
                callback, response = self._generate_callback_and_response_pair()
                queue_future = self._triton.async_infer(
                    model_name, self._get_inputs(batch_size=1), callback, sequence_id=1
                )
                queue_requests.append({"future": queue_future, "response": response})
            time.sleep(2)  # ensure the requests are queued
            self.assertFalse(queue_requests[0]["response"]["responded"])
            self.assertFalse(queue_requests[1]["response"]["responded"])
            # Cancelling any queued request cancels the entire sequence
            queue_requests[0]["future"].cancel()
            time.sleep(2)  # ensure the cancellation is delivered
            time.sleep(2)  # ensure reaper thread has responded
            self._assert_response_is_cancelled(queue_requests[0]["response"])
            self._assert_response_is_cancelled(queue_requests[1]["response"])
            # Join start thread
            start_thread.result()

    # Test ensemble scheduler will propagate cancellation request to child
    def test_ensemble_scheduler_request_cancellation(self):
        model_name = "ensemble_model"
        callback, response = self._generate_callback_and_response_pair()
        infer_future = self._triton.async_infer(
            model_name, self._get_inputs(batch_size=1), callback
        )
        time.sleep(2)  # ensure the inference has started
        self.assertFalse(response["responded"])
        infer_future.cancel()
        time.sleep(2)  # ensure the cancellation is delivered
        self._assert_response_is_cancelled(response)

    # Test cancellation on multiple gRPC streaming sequences
    def test_scheduler_streaming_request_cancellation(self):
        model_name = "sequence_oldest"
        # Start 2 sequences with many requests
        callback, response = self._generate_streaming_callback_and_response_pair()
        self._triton.start_stream(callback)
        for sequence_id in [1, 2]:
            sequence_start = True
            for request_id in range(16):
                self._triton.async_stream_infer(
                    model_name,
                    self._get_inputs(batch_size=1),
                    sequence_id=sequence_id,
                    sequence_start=sequence_start,
                )
                sequence_start = False
        time.sleep(2)  # ensure the requests are delivered
        # Cancelling the stream cancels all requests on the stream
        self._triton.stop_stream(cancel_requests=True)
        time.sleep(2)  # ensure the cancellation is delivered
        time.sleep(2)  # ensure reaper thread has responded
        self._assert_streaming_response_is_cancelled(response)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_request_cancellation/test.sh
================================================
#!/bin/bash
# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

SERVER=/opt/tritonserver/bin/tritonserver
source ../common/util.sh

RET=0

#
# Unit tests
#
rm -rf models && mkdir models
mkdir -p models/model/1 && (cd models/model && \
    echo 'name: "model"' >> config.pbtxt && \
    echo 'backend: "identity"' >> config.pbtxt && \
    echo 'max_batch_size: 64' >> config.pbtxt && \
    echo -e 'input [{ name: "INPUT0" \n data_type: TYPE_INT32 \n dims: [ 1000 ] }]' >> config.pbtxt && \
    echo -e 'output [{ name: "OUTPUT0" \n data_type: TYPE_INT32 \n dims: [ 1000 ] }]' >> config.pbtxt && \
    echo 'instance_group [{ kind: KIND_CPU }]' >> config.pbtxt)

SERVER_LOG=server.log
LD_LIBRARY_PATH=/opt/tritonserver/lib:$LD_LIBRARY_PATH ./request_cancellation_test > $SERVER_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Unit Tests Failed\n***"
    cat $SERVER_LOG
    RET=1
fi

#
# gRPC cancellation tests
#
rm -rf models && mkdir models
mkdir -p models/custom_identity_int32/1 && (cd models/custom_identity_int32 && \
    echo 'name: "custom_identity_int32"' >> config.pbtxt && \
    echo 'backend: "identity"' >> config.pbtxt && \
    echo 'max_batch_size: 1024' >> config.pbtxt && \
    echo -e 'input [{ name: "INPUT0" \n data_type: TYPE_INT32 \n dims: [ -1 ] }]' >> config.pbtxt && \
    echo -e 'output [{ name: "OUTPUT0" \n data_type: TYPE_INT32 \n dims: [ -1 ] }]' >> config.pbtxt && \
    echo 'instance_group [{ kind: KIND_CPU }]' >> config.pbtxt && \
    echo -e 'parameters [{ key: "execute_delay_ms" \n value: { string_value: "10000" } }]' >> config.pbtxt)

for TEST_CASE in "test_grpc_async_infer" \
                    "test_grpc_stream_infer" \
                    "test_aio_grpc_async_infer" \
                    "test_aio_grpc_stream_infer" \
                    "test_grpc_async_infer_cancellation_at_step_start" \
                    "test_grpc_async_infer_response_complete_during_cancellation" \
                    "test_grpc_async_infer_cancellation_before_finish_0" \
                    "test_grpc_async_infer_cancellation_before_finish_1" \
                    "test_grpc_async_infer_cancellation_before_response_complete_and_process_after_final_response"; do
    TEST_LOG="./grpc_cancellation_test.$TEST_CASE.log"
    SERVER_LOG="grpc_cancellation_test.$TEST_CASE.server.log"
    if [ "$TEST_CASE" == "test_grpc_async_infer_cancellation_at_step_start" ]; then
        export TRITONSERVER_DELAY_GRPC_PROCESS=5000
    elif [ "$TEST_CASE" == "test_grpc_async_infer_response_complete_during_cancellation" ]; then
        export TRITONSERVER_DELAY_GRPC_NOTIFICATION=5000
        export TRITONSERVER_DELAY_GRPC_ENQUEUE=5000
    elif [ "$TEST_CASE" == "test_grpc_async_infer_cancellation_before_finish_0" ]; then
        export TRITONSERVER_DELAY_GRPC_NOTIFICATION=5000
        export TRITONSERVER_DELAY_RESPONSE_COMPLETION=5000
    elif [ "$TEST_CASE" == "test_grpc_async_infer_cancellation_before_finish_1" ]; then
        export TRITONSERVER_DELAY_GRPC_PROCESS_ENTRY=1000
        export TRITONSERVER_DELAY_RESPONSE_COMPLETION=5000
    elif [ "$TEST_CASE" == "test_grpc_async_infer_cancellation_before_response_complete_and_process_after_final_response" ]; then
        export TRITONSERVER_DELAY_GRPC_NOTIFICATION=5000
        export TRITONSERVER_DELAY_RESPONSE_COMPLETE_EXEC=5000
    fi

    SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=2"
    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    set +e
    python grpc_cancellation_test.py GrpcCancellationTest.$TEST_CASE > $TEST_LOG 2>&1
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** gRPC Cancellation Tests Failed on $TEST_CASE\n***"
        cat $TEST_LOG
        RET=1
    fi

    count=$(grep -o "Cancellation notification received for" $SERVER_LOG | wc -l)
    if [ $count == 0 ]; then
        echo -e "\n***\n*** Cancellation not received by server on $TEST_CASE\n***"
        cat $SERVER_LOG
        RET=1
    elif [ $count -ne 1 ]; then
        echo -e "\n***\n*** Unexpected cancellation received by server on $TEST_CASE. Expected 1 but received $count.\n***"
        cat $SERVER_LOG
        RET=1
    fi

    # Tests "test_grpc_async_infer" and "test_aio_grpc_async_infer" ends
    # prematurely before state is released.
    if [[ "$TEST_CASE" != "test_grpc_async_infer" && "$TEST_CASE" != "test_aio_grpc_async_infer" ]]; then
        count=$(grep -o "StateRelease" $SERVER_LOG | wc -l)
        state_released=${state_released:=1}
        if [ $count == 0 ]; then
            echo -e "\n***\n*** State not released by server on $TEST_CASE\n***"
            cat $SERVER_LOG
            RET=1
        elif [ $count -ne $state_released ]; then
            echo -e "\n***\n*** Unexpected states released by server on $TEST_CASE. Expected $state_released but released $count.\n***"
            cat $SERVER_LOG
            RET=1
        fi
        unset state_released
    fi
    set -e

    kill $SERVER_PID
    wait $SERVER_PID

    if [ "$TEST_CASE" == "test_grpc_async_infer_cancellation_at_step_start" ]; then
        unset TRITONSERVER_DELAY_GRPC_PROCESS
    elif [ "$TEST_CASE" == "test_grpc_async_infer_response_complete_during_cancellation" ]; then
        unset TRITONSERVER_DELAY_GRPC_NOTIFICATION
        unset TRITONSERVER_DELAY_GRPC_ENQUEUE
    elif [ "$TEST_CASE" == "test_grpc_async_infer_cancellation_before_finish_0" ]; then
        unset TRITONSERVER_DELAY_GRPC_NOTIFICATION
        unset TRITONSERVER_DELAY_RESPONSE_COMPLETION
    elif [ "$TEST_CASE" == "test_grpc_async_infer_cancellation_before_finish_1" ]; then
        unset TRITONSERVER_DELAY_GRPC_PROCESS_ENTRY
        unset TRITONSERVER_DELAY_RESPONSE_COMPLETION
    elif [ "$TEST_CASE" == "test_grpc_async_infer_cancellation_before_response_complete_and_process_after_final_response" ]; then
        unset TRITONSERVER_DELAY_GRPC_NOTIFICATION
        unset TRITONSERVER_DELAY_RESPONSE_COMPLETE_EXEC
    fi
done

#
# End-to-end scheduler tests
#
rm -rf models && mkdir models
mkdir -p models/dynamic_batch/1 && (cd models/dynamic_batch && \
    echo 'name: "dynamic_batch"' >> config.pbtxt && \
    echo 'backend: "identity"' >> config.pbtxt && \
    echo 'max_batch_size: 2' >> config.pbtxt && \
    echo -e 'input [{ name: "INPUT0" \n data_type: TYPE_FP32 \n dims: [ -1 ] }]' >> config.pbtxt && \
    echo -e 'output [{ name: "OUTPUT0" \n data_type: TYPE_FP32 \n dims: [ -1 ] }]' >> config.pbtxt && \
    echo -e 'instance_group [{ count: 1 \n kind: KIND_CPU }]' >> config.pbtxt && \
    echo -e 'dynamic_batching { max_queue_delay_microseconds: 600000 }' >> config.pbtxt && \
    echo -e 'parameters [{ key: "execute_delay_ms" \n value: { string_value: "6000" } }]' >> config.pbtxt)
mkdir -p models/sequence_direct/1 && (cd models/sequence_direct && \
    echo 'name: "sequence_direct"' >> config.pbtxt && \
    echo 'backend: "identity"' >> config.pbtxt && \
    echo 'max_batch_size: 1' >> config.pbtxt && \
    echo -e 'input [{ name: "INPUT0" \n data_type: TYPE_FP32 \n dims: [ -1 ] }]' >> config.pbtxt && \
    echo -e 'output [{ name: "OUTPUT0" \n data_type: TYPE_FP32 \n dims: [ -1 ] }]' >> config.pbtxt && \
    echo -e 'instance_group [{ count: 1 \n kind: KIND_CPU }]' >> config.pbtxt && \
    echo -e 'sequence_batching { direct { } \n max_sequence_idle_microseconds: 6000000 }' >> config.pbtxt && \
    echo -e 'parameters [{ key: "execute_delay_ms" \n value: { string_value: "6000" } }]' >> config.pbtxt)
mkdir -p models/sequence_oldest/1 && (cd models/sequence_oldest && \
    echo 'name: "sequence_oldest"' >> config.pbtxt && \
    echo 'backend: "identity"' >> config.pbtxt && \
    echo 'max_batch_size: 1' >> config.pbtxt && \
    echo -e 'input [{ name: "INPUT0" \n data_type: TYPE_FP32 \n dims: [ -1 ] }]' >> config.pbtxt && \
    echo -e 'output [{ name: "OUTPUT0" \n data_type: TYPE_FP32 \n dims: [ -1 ] }]' >> config.pbtxt && \
    echo -e 'instance_group [{ count: 1 \n kind: KIND_CPU }]' >> config.pbtxt && \
    echo -e 'sequence_batching { oldest { max_candidate_sequences: 1 } \n max_sequence_idle_microseconds: 6000000 }' >> config.pbtxt && \
    echo -e 'parameters [{ key: "execute_delay_ms" \n value: { string_value: "6000" } }]' >> config.pbtxt)
mkdir -p models/ensemble_model/1 && (cd models/ensemble_model && \
    echo 'name: "ensemble_model"' >> config.pbtxt && \
    echo 'platform: "ensemble"' >> config.pbtxt && \
    echo 'max_batch_size: 1' >> config.pbtxt && \
    echo -e 'input [{ name: "INPUT0" \n data_type: TYPE_FP32 \n dims: [ -1 ] }]' >> config.pbtxt && \
    echo -e 'output [{ name: "OUTPUT0" \n data_type: TYPE_FP32 \n dims: [ -1 ] }]' >> config.pbtxt && \
    echo 'ensemble_scheduling { step [' >> config.pbtxt && \
    echo -e '{ model_name: "dynamic_batch" \n model_version: -1 \n input_map { key: "INPUT0" \n value: "INPUT0" } \n output_map { key: "OUTPUT0" \n value: "out" } },' >> config.pbtxt && \
    echo -e '{ model_name: "dynamic_batch" \n model_version: -1 \n input_map { key: "INPUT0" \n value: "out" } \n output_map { key: "OUTPUT0" \n value: "OUTPUT0" } }' >> config.pbtxt && \
    echo '] }' >> config.pbtxt)

TEST_LOG="scheduler_test.log"
SERVER_LOG="./scheduler_test.server.log"

SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=2"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python scheduler_test.py > $TEST_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Scheduler Tests Failed\n***"
    cat $TEST_LOG
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

#
# Implicit state tests
#
rm -rf models && mkdir models
mkdir -p models/sequence_state/1 && (cd models/sequence_state && \
    cp ../../implicit_state_model/config.pbtxt . && \
    cp ../../implicit_state_model/model.pt 1)

TEST_LOG="implicit_state_test.log"
SERVER_LOG="implicit_state_test.server.log"

SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
SERVER_LOG=$SERVER_LOG python implicit_state_test.py > $TEST_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Implicit State Tests Failed\n***"
    cat $TEST_LOG
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi
exit $RET


================================================
FILE: qa/L0_response_cache/ensemble_cache_test.py
================================================
#!/usr/bin/env python3
# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import re
import sys

sys.path.append("../common")
sys.path.append("../clients")
import logging
import unittest

import numpy as np
import test_util as tu
import tritonclient.grpc as grpcclient
from tritonclient.utils import *

RESPONSE_CACHE_PATTERN = "response_cache"
RESPONSE_CACHE_CONFIG = "response_cache {\n  enable:true\n}\n"


class EnsembleCacheTest(tu.TestResultCollector):
    def setUp(self):
        self.triton_client = grpcclient.InferenceServerClient(
            "localhost:8001", verbose=True
        )
        self.ensemble_model = "simple_onnx_float32_float32_float32"
        self.composing_model = "onnx_float32_float32_float32"
        self.model_directory = os.path.join(os.getcwd(), "models", "ensemble_models")
        self.ensemble_config_file = os.path.join(
            self.model_directory, self.ensemble_model, "config.pbtxt"
        )
        self.composing_config_file = os.path.join(
            self.model_directory, self.composing_model, "config.pbtxt"
        )
        input0_data = np.ones((1, 16), dtype=np.float32)
        input1_data = np.ones((1, 16), dtype=np.float32)
        self.input_tensors = [
            grpcclient.InferInput(
                "INPUT0", input0_data.shape, np_to_triton_dtype(input0_data.dtype)
            ),
            grpcclient.InferInput(
                "INPUT1", input1_data.shape, np_to_triton_dtype(input0_data.dtype)
            ),
        ]
        self.input_tensors[0].set_data_from_numpy(input0_data)
        self.input_tensors[1].set_data_from_numpy(input1_data)

    def _update_config(self, config_file, config_pattern, config_to_add):
        # Utility function to update config files as per testcase
        with open(config_file, "r") as f:
            config_data = f.read()
            if config_pattern not in config_data:
                with open(config_file, "w") as f:
                    config_data += config_to_add
                    f.write(config_data)

    def _add_instance_group_cpu(self, config_file):
        # Utility function to add instance group of kind CPU to the config file
        with open(config_file, "r") as f:
            config_data = f.read()
            if "instance_group" not in config_data:
                with open(config_file, "w") as f:
                    config_data += "instance_group {\n  kind: KIND_CPU\n}\n"
                    f.write(config_data)

    def _remove_config(self, config_file, config_to_remove):
        # Utility function to remove extra added config from the config files
        with open(config_file, "r") as f:
            config_data = f.read()
        updated_config_data = re.sub(config_to_remove, "", config_data)
        with open(config_file, "w") as f:
            f.write(updated_config_data)

    def _reset_config_files(self):
        # Utility function to reset all config files to original
        self._remove_config(self.ensemble_config_file, RESPONSE_CACHE_CONFIG)
        self._remove_config(self.composing_config_file, RESPONSE_CACHE_CONFIG)

    def _run_ensemble(self):
        # Run the ensemble pipeline and validate output
        output = self.triton_client.infer(
            model_name=self.ensemble_model, inputs=self.input_tensors
        )
        self.assertIsNotNone(
            output,
            f"Unexpected error: Inference result is None for model '{self.ensemble_model}'. Expected non-null output.",
        )
        output0 = output.as_numpy("OUTPUT0")
        output1 = output.as_numpy("OUTPUT1")
        outputs = [output0, output1]
        return outputs

    def _get_model_statistics(self, model):
        # Get the stats for the requested model
        model_stats = self.triton_client.get_inference_statistics(
            model_name=model, as_json=True
        )

        """
        The models used have two versions, version 1 and version 3.
        Since, model_version is set to -1 in config.pbtxt, the highest version is loaded
        which is version 3.
        model_stats has inference stats for version 1 at index 0 and inference stats for version 3 at index 1.
        """
        return model_stats["model_stats"][1]["inference_stats"]

    def _run_inference_and_validate(self, model):
        """
        Helper function that takes model as a parameter to verify the corresponding model's stats
        The passed model is composing model for test case `test_ensemble_composing_model_cache_enabled`
        For other testcases, the top-level ensemble model stats are verified.
            * loads the simple_onnx_float32_float32_float32 and onnx_float32_float32_float32
              and verifies if they are loaded properly.
            * Checks the initial statistics of the model passed in the parameter
              Expected - baseline statistics to be all empty metrics since
            * Calls the run_ensemble function to run the ensemble pipeline.
            * Verifies the stats after first inference. Expected single cache miss.
            * Calls the run_ensemble function to run the ensemble pipeline again.
            * Checks if returned output is equal to th output of first inference.
        """
        self.triton_client.load_model(self.ensemble_model)
        self.assertTrue(
            self.triton_client.is_model_ready(self.ensemble_model),
            f"Failed to load ensemble model '{self.ensemble_model}'",
        )
        self.triton_client.load_model(self.composing_model)
        self.assertTrue(
            self.triton_client.is_model_ready(self.composing_model),
            f"Failed to load composing model '{self.composing_model}'",
        )

        model_stats_initial = self._get_model_statistics(model)
        self.assertNotIn(
            "count",
            model_stats_initial["success"],
            f"No inference stats expected initially for model '{model}'",
        )

        inference_output = self._run_ensemble()
        model_stats = self._get_model_statistics(model)
        self.assertIn(
            "count", model_stats["success"], f"Failed inference for model '{model}'"
        )
        self.assertIn(
            "count",
            model_stats["cache_miss"],
            f"No cache miss recorded for model '{model}', expected exactly one cache miss",
        )
        self.assertEqual(
            model_stats["cache_miss"]["count"],
            "1",
            f"Expected exactly one cache miss in model '{model}', found {model_stats['cache_miss']['count']}",
        )

        cached_output = self._run_ensemble()
        self.assertTrue(
            np.array_equal(inference_output, cached_output),
            f"Cache response does not match actual inference output for model '{model}'",
        )

    def test_ensemble_top_level_response_cache(self):
        """
        Test top level response caching when response cache enabled only in
        ensemble model's config file.
        Expected result: One cache hit in ensemble model stats. No cache related metric counts in
        composing model stats.
        """
        self._update_config(
            self.ensemble_config_file, RESPONSE_CACHE_PATTERN, RESPONSE_CACHE_CONFIG
        )
        self._run_inference_and_validate(self.ensemble_model)
        ensemble_model_stats = self._get_model_statistics(self.ensemble_model)
        expected_cache_hit_count = "1"
        actual_cache_hit_count = ensemble_model_stats["cache_hit"]["count"]
        self.assertIn(
            "count",
            ensemble_model_stats["success"],
            f"Failed inference recorded for ensemble model '{self.ensemble_model}'. Expected successful inference.",
        )
        self.assertIn(
            "count",
            ensemble_model_stats["cache_hit"],
            f"No cache hit recorded for ensemble model '{self.ensemble_model}'. Expected exactly one cache hit.",
        )
        self.assertEqual(
            actual_cache_hit_count,
            expected_cache_hit_count,
            f"Unexpected number of cache hits recorded for ensemble model '{self.ensemble_model}'. Expected exactly one cache hit.",
        )

    def test_ensemble_all_models_cache_enabled(self):
        """
        Test top level response caching when response cache enabled in
        all the models.
        Expected result: One cache hit in ensemble model stats. No cache hit in composing model stats.
        """
        self._update_config(
            self.ensemble_config_file, RESPONSE_CACHE_PATTERN, RESPONSE_CACHE_CONFIG
        )
        self._update_config(
            self.composing_config_file, RESPONSE_CACHE_PATTERN, RESPONSE_CACHE_CONFIG
        )
        self._run_inference_and_validate(self.ensemble_model)
        ensemble_model_stats = self._get_model_statistics(self.ensemble_model)
        composing_model_stats = self._get_model_statistics(self.composing_model)
        expected_cache_hit_count = "1"
        actual_cache_hit_count = ensemble_model_stats["cache_hit"]["count"]
        self.assertIn(
            "count",
            ensemble_model_stats["success"],
            f"Failed inference recorded for ensemble model '{self.ensemble_model}'. Expected successful inference.",
        )
        self.assertIn(
            "count",
            ensemble_model_stats["cache_hit"],
            f"No cache hit recorded for ensemble model '{self.ensemble_model}'. Expected exactly one cache hit.",
        )
        self.assertNotIn(
            "count",
            composing_model_stats["cache_hit"],
            f"Unexpected cache hit recorded for composing model '{self.composing_model}'. Expected top-level response in cache for ensemble model '{self.ensemble_model}'.",
        )
        self.assertEqual(
            actual_cache_hit_count,
            expected_cache_hit_count,
            f"Unexpected number of cache hits recorded for ensemble model '{self.ensemble_model}'. Expected exactly one cache hit.",
        )

    def test_ensemble_composing_model_cache_enabled(self):
        """
        Test caching behavior when response cache enabled only in
        composing model's config file.
        Expected result: One cache hit in composing model stats. No cache related metric counts in
        ensemble model stats.
        """
        self._update_config(
            self.composing_config_file, RESPONSE_CACHE_PATTERN, RESPONSE_CACHE_CONFIG
        )
        # Currently, response cache is supported only for tensors on CPU.
        self._add_instance_group_cpu(self.composing_config_file)
        self._run_inference_and_validate(self.composing_model)
        ensemble_model_stats = self._get_model_statistics(self.ensemble_model)
        composing_model_stats = self._get_model_statistics(self.composing_model)
        self.assertIn(
            "count",
            composing_model_stats["success"],
            f"Failed inference recorded for ensemble model '{self.composing_model}'. Expected successful inference.",
        )
        self.assertIn(
            "count",
            composing_model_stats["cache_hit"],
            f"No cache hit recorded for ensemble model '{self.composing_model}'. Expected exactly one cache hit.",
        )
        self.assertNotIn(
            "count",
            ensemble_model_stats["cache_hit"],
            f"Unexpected number of cache hits recorded for ensemble model '{self.ensemble_model}'. Expected empty cache metrics",
        )

    def test_ensemble_cache_insertion_failure(self):
        """
        Test cache insertion failure with cache enabled in
        ensemble model's config file.
        Expected result: Two cache miss in ensemble model stats indicating request/response not inserted into cache
        Reason: The data (input tensors, output tensors and other model information) to be inserted in cache is bigger cache size.
        """
        self._update_config(
            self.ensemble_config_file, RESPONSE_CACHE_PATTERN, RESPONSE_CACHE_CONFIG
        )
        self._run_inference_and_validate(self.ensemble_model)
        ensemble_model_stats = self._get_model_statistics(self.ensemble_model)
        expected_cache_miss_count = "2"
        actual_cache_miss_count = ensemble_model_stats["cache_miss"]["count"]
        self.assertIn(
            "count",
            ensemble_model_stats["success"],
            f"Failed inference recorded for ensemble model '{self.ensemble_model}'. Expected successful inference.",
        )
        self.assertNotIn(
            "count",
            ensemble_model_stats["cache_hit"],
            f"No cache hit recorded for ensemble model '{self.ensemble_model}'. Expected exactly one cache hit.",
        )
        self.assertIn(
            "count",
            ensemble_model_stats["cache_miss"],
            f"No cache miss recorded in ensemble model '{self.ensemble_model}'. Expected cache miss.",
        )
        self.assertEqual(
            actual_cache_miss_count,
            expected_cache_miss_count,
            f"Unexpected number of cache misses recorded in ensemble model '{self.ensemble_model}'. Expected exactly {expected_cache_miss_count} cache misses for two inference requests, but found {actual_cache_miss_count}.",
        )

    def tearDown(self):
        self._reset_config_files()
        self.triton_client.close()


if __name__ == "__main__":
    logging.basicConfig(stream=sys.stderr)
    unittest.main()


================================================
FILE: qa/L0_response_cache/generate_random_data.py
================================================
#!/usr/bin/env python3
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import json

import numpy as np


def generate_input_data(num_inputs, batch_size, output_file):
    data = {"data": []}
    for _ in range(num_inputs):
        input_data = np.random.rand(batch_size, 1024).astype(np.float32)
        entry = {"INPUT0": input_data.flatten().tolist()}
        data["data"].append(entry)

    with open(output_file, "w") as f:
        json.dump(data, f)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Generate random input data for perf_analyzer."
    )
    parser.add_argument(
        "--num-inputs", type=int, help="Number of unique random inputs to generate."
    )
    parser.add_argument("--batch-size", type=int, help="The batch size for each input.")
    parser.add_argument(
        "--output-file", type=str, help="The name of the output JSON file."
    )
    args = parser.parse_args()

    generate_input_data(args.num_inputs, args.batch_size, args.output_file)
    print(f"Successfully generated {args.num_inputs} inputs in '{args.output_file}'.")


================================================
FILE: qa/L0_response_cache/models/decoupled_cache/config.pbtxt
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

backend: "identity"
max_batch_size: 0
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

model_transaction_policy {
  decoupled: True
}
response_cache {
  enable: True
}


================================================
FILE: qa/L0_response_cache/models/identity_cache/config.pbtxt
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

backend: "identity"
max_batch_size: 0
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

response_cache {
  enable: True
}


================================================
FILE: qa/L0_response_cache/test.sh
================================================
#!/bin/bash
# Copyright 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

RET=0

TEST_LOG="./response_cache_test.log"
UNIT_TEST="./response_cache_test --gtest_output=xml:response_cache.report.xml"
export CUDA_VISIBLE_DEVICES=0

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "${REPO_VERSION}" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi
# Only localhost supported in this test for now, but in future could make
# use of a persistent remote redis server, or similarly use --replicaof arg.
export TRITON_REDIS_HOST="localhost"
export TRITON_REDIS_PORT="6379"
REDIS_LOG="./redis-server.unit_tests.log"
ENSEMBLE_CACHE_TEST_PY="./ensemble_cache_test.py"
SERVER=/opt/tritonserver/bin/tritonserver
CLIENT_LOG="./client.log"
TEST_RESULT_FILE='test_results.txt'
SERVER_LOG=./inference_server.log
RESET_CONFIG_FUNCTION="_reset_config_files"
CACHE_SIZE=10840
source ../common/util.sh

MODEL_DIR="${PWD}/models"
ENSEMBLE_MODEL_DIR="${MODEL_DIR}/ensemble_models"
ENSEMBLE_CACHE_DECOUPLED="${MODEL_DIR}/ensemble_cache_decoupled"
ENSEMBLE_CACHE_COMPOSING_DECOUPLED="${MODEL_DIR}/ensemble_cache_composing_decoupled"
rm -fr ${ENSEMBLE_MODEL_DIR} && mkdir ${ENSEMBLE_MODEL_DIR}
rm -fr ${ENSEMBLE_CACHE_DECOUPLED} && mkdir ${ENSEMBLE_CACHE_DECOUPLED}
rm -fr ${ENSEMBLE_CACHE_COMPOSING_DECOUPLED} && mkdir ${ENSEMBLE_CACHE_COMPOSING_DECOUPLED}
ENSEMBLE_MODEL="simple_onnx_float32_float32_float32"
COMPOSING_MODEL="onnx_float32_float32_float32"

cp -r "/data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/qa_model_repository/${ENSEMBLE_MODEL}" "${ENSEMBLE_MODEL_DIR}/${ENSEMBLE_MODEL}"
cp -r "/data/inferenceserver/${REPO_VERSION}/qa_model_repository/${COMPOSING_MODEL}" "${ENSEMBLE_MODEL_DIR}/${COMPOSING_MODEL}"
cp -r "/data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/qa_model_repository/${ENSEMBLE_MODEL}" "${ENSEMBLE_CACHE_DECOUPLED}/${ENSEMBLE_MODEL}"
cp -r "/data/inferenceserver/${REPO_VERSION}/qa_model_repository/${COMPOSING_MODEL}" "${ENSEMBLE_CACHE_DECOUPLED}/${COMPOSING_MODEL}"
cp -r "/data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/qa_model_repository/${ENSEMBLE_MODEL}" "${ENSEMBLE_CACHE_COMPOSING_DECOUPLED}/${ENSEMBLE_MODEL}"
cp -r "/data/inferenceserver/${REPO_VERSION}/qa_model_repository/${COMPOSING_MODEL}" "${ENSEMBLE_CACHE_COMPOSING_DECOUPLED}/${COMPOSING_MODEL}"
mkdir -p "${MODEL_DIR}/decoupled_cache/1"
mkdir -p "${MODEL_DIR}/identity_cache/1"

echo -e "response_cache { enable: True }" >> "${ENSEMBLE_CACHE_DECOUPLED}/${ENSEMBLE_MODEL}/config.pbtxt"
echo -e "model_transaction_policy { decoupled: True }" >> "${ENSEMBLE_CACHE_DECOUPLED}/${ENSEMBLE_MODEL}/config.pbtxt"
echo -e "response_cache { enable: True }" >> "${ENSEMBLE_CACHE_COMPOSING_DECOUPLED}/${ENSEMBLE_MODEL}/config.pbtxt"
echo -e "model_transaction_policy { decoupled: True }" >> "${ENSEMBLE_CACHE_COMPOSING_DECOUPLED}/${COMPOSING_MODEL}/config.pbtxt"

rm -fr *.log

function install_redis() {
  ## Install redis if not already installed
  if ! command -v redis-server >/dev/null 2>&1; then
    apt update -y && apt install -y redis
  fi
}

function start_redis() {
  # Run redis server in background
  redis-server                    \
    --daemonize yes               \
    --port "${TRITON_REDIS_PORT}" \
    --logfile "${REDIS_LOG}"      \
    --loglevel debug

  # Check redis server is running
  REDIS_PING_RESPONSE=$(redis-cli -h ${TRITON_REDIS_HOST} -p ${TRITON_REDIS_PORT} ping)
  if [ "${REDIS_PING_RESPONSE}" == "PONG" ]; then
    echo "Redis successfully started in background"
  else
    echo -e "\n***\n*** Failed: Redis server did not start successfully\n***"
    RET=1
  fi
}

function stop_redis() {
  echo "Stopping Redis server..."
  redis-cli -h "${TRITON_REDIS_HOST}" -p "${TRITON_REDIS_PORT}" shutdown || true
  echo "Redis server shutdown"
}

function set_redis_auth() {
  # NOTE: Per-user auth [Access Control List (ACL)] is only supported in
  #       Redis >= 6.0 and is more comprehensive in what can be configured.
  #       For simplicity and wider range of Redis version support, use
  #       server-wide password  via "requirepass" for now.
  redis-cli -h "${TRITON_REDIS_HOST}" -p "${TRITON_REDIS_PORT}" config set requirepass "${REDIS_PW}"
  export REDISCLI_AUTH="${REDIS_PW}"
}

function unset_redis_auth() {
  # Authenticate implicitly via REDISCLI_AUTH env var, then unset password/var
  redis-cli -h "${TRITON_REDIS_HOST}" -p "${TRITON_REDIS_PORT}" config set requirepass ""
  unset REDISCLI_AUTH
}

# UNIT TESTS
set +e

# Unit tests currently run for both Local and Redis cache implementations
# by default. However, we could break out the unit tests for each
# into separate runs gtest filters if needed in the future:
# - `${UNIT_TEST} --gtest_filter=*Local*`
# - `${UNIT_TEST} --gtest_filter=*Redis*`
install_redis
# Stop any existing redis server first for good measure
stop_redis
start_redis
LD_LIBRARY_PATH=/opt/tritonserver/lib:$LD_LIBRARY_PATH $UNIT_TEST >>$TEST_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $TEST_LOG
    echo -e "\n***\n*** Response Cache Unit Test Failed\n***"
    RET=1
fi
stop_redis
set -e

# SERVER TESTS
function check_server_success_and_kill {
    if [ "${SERVER_PID}" == "0" ]; then
        echo -e "\n***\n*** Failed to start ${SERVER}\n***"
        cat ${SERVER_LOG}
        RET=1
    else
        kill ${SERVER_PID}
        wait ${SERVER_PID}
    fi
}

function check_server_expected_failure {
    EXPECTED_MESSAGE="${1}"
    if [ "${SERVER_PID}" != "0" ]; then
        echo -e "\n***\n*** Failed: ${SERVER} started successfully when it was expected to fail\n***"
        cat ${SERVER_LOG}
        RET=1

        kill ${SERVER_PID}
        wait ${SERVER_PID}
    else
        # Check that server fails with the correct error message
        set +e
        grep -i "${EXPECTED_MESSAGE}" ${SERVER_LOG}
        if [ $? -ne 0 ]; then
            echo -e "\n***\n*** Failed: Expected [${EXPECTED_MESSAGE}] error message in output\n***"
            cat $SERVER_LOG
            RET=1
        fi
        set -e
    fi
}

# DECOUPLED MODEL TESTS
function check_server_failure_decoupled_model {
  MODEL_REPOSITORY="${1}"
  MODEL="${2}"
  EXTRA_ARGS="--model-control-mode=explicit --load-model=${MODEL}"
  SERVER_ARGS="--model-repository=${MODEL_REPOSITORY} --cache-config local,size=10480 ${EXTRA_ARGS}"

  rm -f ${SERVER_LOG}
  run_server
  if [ "${SERVER_PID}" != "0" ]; then
    echo -e "\n***\n*** Failed: ${SERVER} started successfully when it was expected to fail\n***"
    cat ${SERVER_LOG}
    RET=1

    kill ${SERVER_PID}
    wait ${SERVER_PID}
  else
    # Check that server fails with the correct error message
    set +e
    grep -i "response cache does not currently support" ${SERVER_LOG} | grep -i "decoupled"
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed: Expected response cache / decoupled mode error message in output\n***"
        cat ${SERVER_LOG}
        RET=1
    fi
    set -e
  fi
}

# ENSEMBLE CACHE TESTS
function test_response_cache_ensemble_model {
  TESTCASE="${1}"
  ERROR_MESSAGE="${2}"
  SERVER_ARGS="--model-repository=${ENSEMBLE_MODEL_DIR} --cache-config local,size=${CACHE_SIZE} --model-control-mode=explicit"
  run_server
  set +e
  python ${ENSEMBLE_CACHE_TEST_PY} ${TESTCASE} >> ${CLIENT_LOG} 2>&1
  if [ $? -ne 0 ]; then
      RET=1
  else
      check_test_results ${TEST_RESULT_FILE} 1
      if [ $? -ne 0 ]; then
          cat ${CLIENT_LOG}
          echo -e ${ERROR_MESSAGE}
          RET=1
      fi
  fi

  if [ "${TESTCASE}" = "EnsembleCacheTest.test_ensemble_cache_insertion_failure" ]; then
      # Check for the error message in the log file
      set +e
      grep -i "Failed to insert key" "${SERVER_LOG}"
      if [ $? -ne 0 ]; then
          echo "\n***\n*** Failed: Cache insertion successful when it was expected to fail\n***"
          RET=1
      fi
      set -e
  fi
  set -e
  check_server_success_and_kill
}

# Check that server fails to start for a "decoupled" model with cache enabled
check_server_failure_decoupled_model ${MODEL_DIR}  "decoupled_cache"

# Test with model expected to load successfully
EXTRA_ARGS="--model-control-mode=explicit --load-model=identity_cache"

# Test old cache config method
# --response-cache-byte-size must be non-zero to test models with cache enabled
SERVER_ARGS="--model-repository=${MODEL_DIR} --response-cache-byte-size=8192 ${EXTRA_ARGS}"
run_server
check_server_success_and_kill

# Test new cache config method
SERVER_ARGS="--model-repository=${MODEL_DIR} --cache-config=local,size=8192 ${EXTRA_ARGS}"
run_server
check_server_success_and_kill

# Test that specifying multiple cache types is not supported and should fail
SERVER_ARGS="--model-repository=${MODEL_DIR} --cache-config=local,size=8192 --cache-config=redis,key=value ${EXTRA_ARGS}"
run_server
check_server_expected_failure "multiple cache configurations"

# Test that specifying both config styles is incompatible and should fail
SERVER_ARGS="--model-repository=${MODEL_DIR} --response-cache-byte-size=12345 --cache-config=local,size=67890 ${EXTRA_ARGS}"
run_server
check_server_expected_failure "incompatible flags"

## Redis Cache CLI tests
REDIS_ENDPOINT="--cache-config redis,host=${TRITON_REDIS_HOST} --cache-config redis,port=${TRITON_REDIS_PORT}"
REDIS_LOG="./redis-server.cli_tests.log"
start_redis

# Test simple redis cache config succeeds
SERVER_ARGS="--model-repository=${MODEL_DIR} ${REDIS_ENDPOINT} ${EXTRA_ARGS}"
run_server
check_server_success_and_kill

# Test triton fails to initialize if it can't connect to redis cache
SERVER_ARGS="--model-repository=${MODEL_DIR} --cache-config=redis,host=localhost --cache-config=redis,port=nonexistent ${EXTRA_ARGS}"
run_server
check_server_expected_failure "failed to connect to Redis (localhost:0): Connection refused"

# Test triton fails to initialize if it can't resolve host for redis cache
SERVER_ARGS="--model-repository=${MODEL_DIR} --cache-config=redis,host=nonexistent --cache-config=redis,port=nonexistent ${EXTRA_ARGS}"
run_server
# Either of these errors can be returned for bad hostname, so check for either.
MSG1="Temporary failure in name resolution"
MSG2="Name or service not known"
check_server_expected_failure "${MSG1}\|${MSG2}"

# Test triton fails to initialize if minimum required args (host & port) not all provided
SERVER_ARGS="--model-repository=${MODEL_DIR} --cache-config=redis,port=${TRITON_REDIS_HOST} ${EXTRA_ARGS}"
run_server
check_server_expected_failure "Must at a minimum specify"

## Redis Authentication tests

# Automatically provide auth via REDISCLI_AUTH env var when set: https://redis.io/docs/ui/cli/
REDIS_PW="redis123!"
set_redis_auth

### Credentials via command-line

# Test simple redis authentication succeeds with correct credentials
REDIS_CACHE_AUTH="--cache-config redis,password=${REDIS_PW}"
SERVER_ARGS="--model-repository=${MODEL_DIR} ${REDIS_ENDPOINT} ${REDIS_CACHE_AUTH} ${EXTRA_ARGS}"
run_server
check_server_success_and_kill

# Test simple redis authentication fails with wrong credentials
REDIS_CACHE_AUTH="--cache-config redis,password=wrong"
SERVER_ARGS="--model-repository=${MODEL_DIR} ${REDIS_ENDPOINT} ${REDIS_CACHE_AUTH} ${EXTRA_ARGS}"
run_server
check_server_expected_failure "WRONGPASS"

# Test simple redis authentication fails with no credentials
SERVER_ARGS="--model-repository=${MODEL_DIR} ${REDIS_ENDPOINT} ${EXTRA_ARGS}"
run_server
check_server_expected_failure "NOAUTH Authentication required"

### Credentials via environment variables

# Test simple redis authentication succeeds with password-only via env vars
# No username means use "default" as the username
unset TRITONCACHE_REDIS_USERNAME
export TRITONCACHE_REDIS_PASSWORD="${REDIS_PW}"
SERVER_ARGS="--model-repository=${MODEL_DIR} ${REDIS_ENDPOINT} ${EXTRA_ARGS}"
run_server
check_server_success_and_kill

# Test simple redis authentication succeeds with correct user and password via env vars
export TRITONCACHE_REDIS_USERNAME="default"
export TRITONCACHE_REDIS_PASSWORD="${REDIS_PW}"
SERVER_ARGS="--model-repository=${MODEL_DIR} ${REDIS_ENDPOINT} ${EXTRA_ARGS}"
run_server
check_server_success_and_kill

# Test simple redis authentication fails with wrong credentials via env vars
export TRITONCACHE_REDIS_PASSWORD="wrong"
SERVER_ARGS="--model-repository=${MODEL_DIR} ${REDIS_ENDPOINT} ${EXTRA_ARGS}"
run_server
check_server_expected_failure "WRONGPASS"
unset TRITONCACHE_REDIS_USERNAME
unset TRITONCACHE_REDIS_PASSWORD
# Clean up redis server
unset_redis_auth
stop_redis

# Test ensemble model with cache and decoupled mode enabled
check_server_failure_decoupled_model ${ENSEMBLE_CACHE_DECOUPLED} ${ENSEMBLE_MODEL}

# Test ensemble model with cache enabled and decoupled mode enabled in composing model
check_server_failure_decoupled_model ${ENSEMBLE_CACHE_COMPOSING_DECOUPLED} ${ENSEMBLE_MODEL}

# Test ensemble model with response cache enabled
TEST_NAME="EnsembleCacheTest.test_ensemble_top_level_response_cache"
ERROR_MESSAGE="\n***\n*** Failed: Expected top level response caching\n***"
test_response_cache_ensemble_model "${TEST_NAME}" "${ERROR_MESSAGE}"

# Test ensemble model with cache enabled in all models
TEST_NAME="EnsembleCacheTest.test_ensemble_all_models_cache_enabled"
ERROR_MESSAGE="\n***\n*** Failed: Expected cache to return top-level request's response\n***"
test_response_cache_ensemble_model "${TEST_NAME}" "${ERROR_MESSAGE}"

# Test composing model cache enabled
TEST_NAME="EnsembleCacheTest.test_ensemble_composing_model_cache_enabled"
ERROR_MESSAGE="\n***\n*** Failed: Expected only composing model's input/output to be inserted in cache\n***"
test_response_cache_ensemble_model "${TEST_NAME}" "${ERROR_MESSAGE}"

# Test cache insertion failure
TEST_NAME="EnsembleCacheTest.test_ensemble_cache_insertion_failure"
ERROR_MESSAGE="\n***\n*** Failed: Request added to cache successfully when it was expected to fail\n***"
CACHE_SIZE=200
test_response_cache_ensemble_model "${TEST_NAME}" "${ERROR_MESSAGE}"


############### Response Cache Memory Growth Test ###############

# Set server, client and valgrind arguments
LEAKCHECK=/usr/bin/valgrind
MASSIF_TEST=../common/check_massif_log.py
MODEL="identity_cache"
LEAKCHECK_LOG="${MODEL}.valgrind.log"
MASSIF_LOG="${MODEL}.valgrind.massif"
GRAPH_LOG="memory_growth_${MODEL}.log"
SERVER_LOG="${MODEL}.server.log"
CLIENT_LOG="${MODEL}_PA.client.log"
RANDOM_DATA_CLIENT_LOG="${MODEL}_random_data_script.log"
RANDOM_DATA_JSON="`pwd`/random_inputs.json"
RANDOM_DATA_GENERATOR="generate_random_data.py"

LEAKCHECK_ARGS="--tool=massif --time-unit=B --massif-out-file=$MASSIF_LOG --max-threads=3000 --log-file=$LEAKCHECK_LOG"
SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit --load-model=${MODEL} --cache-config=local,size=10485760" # 10MB cache

set +e
# Generate random data for perf_analyzer requests to fill the cache and maximize cache misses
python "$RANDOM_DATA_GENERATOR" --num-inputs=10000 --batch-size=1 --output-file="${RANDOM_DATA_JSON}" >> "$RANDOM_DATA_CLIENT_LOG" 2>&1
if [ $? -ne 0 ]; then
    cat "$RANDOM_DATA_CLIENT_LOG"
    echo -e "\n***\n*** Failed to run ${RANDOM_DATA_GENERATOR}.\n***"
    RET=1
    exit 1
else
    # Check if the JSON data file was generated
    if [ ! -f "${RANDOM_DATA_JSON}" ]; then
        echo -e "\n***\n*** FAILED - JSON data file was not found at the expected path: ${RANDOM_DATA_JSON}\n***"
        RET=1
        exit 1
    fi
fi
set -e

# Run the server
run_server_leakcheck
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

pip3 install perf_analyzer

TEMP_RET=0
REPETITION=10
CONCURRENCY=20
CLIENT_BS=1
PERF_ANALYZER=perf_analyzer
TEMP_CLIENT_LOG=temp_client.log

set +e
SECONDS=0
# Run the perf analyzer 'REPETITION' times
for ((i=1; i<=$REPETITION; i++)); do
    # Use random data to ensure cache misses
    $PERF_ANALYZER -v -m $MODEL --shape=INPUT0:1024 -i grpc --concurrency-range $CONCURRENCY -b $CLIENT_BS -p 20000 --input-data="${RANDOM_DATA_JSON}" > $TEMP_CLIENT_LOG 2>&1
    PA_RET=$?
    cat $TEMP_CLIENT_LOG >> $CLIENT_LOG
    # Success
    if [ ${PA_RET} -eq 0 ]; then
      continue
    # Unstable measurement: OK for this test
    elif [ ${PA_RET} -eq 2 ]; then
      continue
    # Other failures unexpected, report error
    else
        echo -e "\n***\n*** perf_analyzer for $MODEL failed on iteration $i\n***" >> $CLIENT_LOG
        RET=1
    fi
done
TEST_DURATION=$SECONDS
set -e

# Stop Server
kill $SERVER_PID
wait $SERVER_PID

set +e

# Log test duration and the graph for memory growth
MAX_ALLOWED_ALLOC=2 # MB
hrs=$(printf "%02d" $((TEST_DURATION / 3600)))
mins=$(printf "%02d" $(((TEST_DURATION / 60) % 60)))
secs=$(printf "%02d" $((TEST_DURATION % 60)))
echo -e "Test Duration: $hrs:$mins:$secs (HH:MM:SS)" >> ${GRAPH_LOG}
ms_print ${MASSIF_LOG} | head -n35 >> ${GRAPH_LOG}
cat ${GRAPH_LOG}
# Check the massif output
python $MASSIF_TEST $MASSIF_LOG $MAX_ALLOWED_ALLOC --start-from-middle >> $GRAPH_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Memory growth test for $MODEL Failed.\n***"
    RET=1
fi
# Always output memory usage for easier triage of MAX_ALLOWED_ALLOC settings in the future
grep -i "Change in memory allocation" "${GRAPH_LOG}" || true
set -e

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
else
  echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_response_statistics/response_statistics_test.py
================================================
#!/usr/bin/env python3

# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import time
import unittest

import numpy as np
import tritonclient.grpc as grpcclient
import tritonclient.http as httpclient


class TestResponseStatistics(unittest.TestCase):
    def setUp(self):
        self._model_name = "set_by_test_case"
        self._min_infer_delay_ns = 0
        self._min_output_delay_ns = 0
        self._min_cancel_delay_ns = 0
        self._number_of_fail_responses = 0
        self._number_of_empty_responses = 0
        self._statistics_counts = []
        self._grpc_client = grpcclient.InferenceServerClient(
            "localhost:8001", verbose=True
        )
        self._http_client = httpclient.InferenceServerClient("localhost:8000")

    # Return a coupled (callback, response) pair for gRPC stream infer.
    def _generate_streaming_callback_and_response_pair(self):
        # [{"result": result, "error": error}, ...]
        response = []

        def callback(result, error):
            response.append({"result": result, "error": error})

        return callback, response

    # Send an infer request and return its responses. 'number_of_responses' is the sum
    # of success, fail and empty responses the model should return for this request.
    # 'cancel_at_response_size' will cancel the stream when the number of responses
    # received equals the size, set to None if cancellation is not required. This
    # function waits until all success and fail responses are received, or cancelled.
    def _stream_infer(self, number_of_responses, cancel_at_response_size=None):
        callback, responses = self._generate_streaming_callback_and_response_pair()
        self._grpc_client.start_stream(callback)
        input_data = np.array([number_of_responses], dtype=np.int32)
        inputs = [grpcclient.InferInput("IN", input_data.shape, "INT32")]
        inputs[0].set_data_from_numpy(input_data)
        outputs = [grpcclient.InferRequestedOutput("OUT")]
        self._grpc_client.async_stream_infer(
            model_name=self._model_name, inputs=inputs, outputs=outputs
        )
        if cancel_at_response_size is None:
            # poll until all expected responses are received
            while len(responses) < (
                number_of_responses - self._number_of_empty_responses
            ):
                time.sleep(0.1)
            self._grpc_client.stop_stream(cancel_requests=False)
        else:
            # poll until cancellation response size is reached
            while len(responses) < cancel_at_response_size:
                time.sleep(0.1)
            self._grpc_client.stop_stream(cancel_requests=True)
        return responses

    # Update expected statistics counts for the response at 'current_index'.
    # 'number_of_responses' is the sum of success, fail and empty responses expected
    # from this inference request. 'cancel_at_index' is the index at which the request
    # should be cancelled.
    def _update_statistics_counts(
        self, current_index, number_of_responses, cancel_at_index
    ):
        if current_index >= len(self._statistics_counts):
            self._statistics_counts.append(
                {
                    "compute_infer": 0,
                    "compute_output": 0,
                    "success": 0,
                    "fail": 0,
                    "empty_response": 0,
                    "cancel": 0,
                }
            )
        if current_index == cancel_at_index:
            # cancel
            self._statistics_counts[current_index]["cancel"] += 1
        elif (
            current_index
            + self._number_of_fail_responses
            + self._number_of_empty_responses
            < number_of_responses
        ):
            # success
            self._statistics_counts[current_index]["compute_infer"] += 1
            self._statistics_counts[current_index]["compute_output"] += 1
            self._statistics_counts[current_index]["success"] += 1
        elif current_index + self._number_of_empty_responses < number_of_responses:
            # fail
            self._statistics_counts[current_index]["compute_infer"] += 1
            self._statistics_counts[current_index]["compute_output"] += 1
            self._statistics_counts[current_index]["fail"] += 1
        else:
            # empty
            self._statistics_counts[current_index]["compute_infer"] += 1
            self._statistics_counts[current_index]["empty_response"] += 1

    # Check the 'response_stats' at 'current_index' for 'stats_name' is valid.
    def _check_statistics_count_and_duration(
        self, response_stats, current_index, stats_name
    ):
        expected_count = self._statistics_counts[current_index][stats_name]
        if stats_name == "compute_infer" or stats_name == "empty_response":
            delay_ns = self._min_infer_delay_ns
        elif stats_name == "compute_output":
            delay_ns = self._min_output_delay_ns
        elif stats_name == "cancel":
            delay_ns = self._min_cancel_delay_ns
        else:  # success or fail
            delay_ns = self._min_infer_delay_ns + self._min_output_delay_ns
        if delay_ns == 0:
            upper_bound_ns = 10000000 * expected_count
            lower_bound_ns = 0
        else:
            upper_bound_ns = 1.1 * delay_ns * expected_count
            lower_bound_ns = 0.9 * delay_ns * expected_count
        stats = response_stats[str(current_index)][stats_name]
        self.assertEqual(stats["count"], expected_count)
        self.assertLessEqual(stats["ns"], upper_bound_ns)
        self.assertGreaterEqual(stats["ns"], lower_bound_ns)

    # Fetch and return the response statistics from both gRPC and HTTP endpoints, and
    # check they are equivalent before returning.
    def _get_response_statistics(self):
        # http response statistics
        statistics_http = self._http_client.get_inference_statistics(
            model_name=self._model_name
        )
        model_stats_http = statistics_http["model_stats"][0]
        self.assertEqual(model_stats_http["name"], self._model_name)
        response_stats_http = model_stats_http["response_stats"]
        # grpc response statistics
        statistics_grpc = self._grpc_client.get_inference_statistics(
            model_name=self._model_name, as_json=True
        )
        model_stats_grpc = statistics_grpc["model_stats"][0]
        self.assertEqual(model_stats_grpc["name"], self._model_name)
        response_stats_grpc = model_stats_grpc["response_stats"]
        # check equivalent between http and grpc statistics
        self.assertEqual(len(response_stats_http), len(response_stats_grpc))
        for idx, statistics_http in response_stats_http.items():
            self.assertIn(idx, response_stats_grpc)
            statistics_grpc = response_stats_grpc[idx]
            for name, stats_http in statistics_http.items():
                self.assertIn(name, statistics_grpc)
                stats_grpc = statistics_grpc[name]
                # normalize gRPC statistics to http
                stats_grpc["count"] = (
                    int(stats_grpc["count"]) if ("count" in stats_grpc) else 0
                )
                stats_grpc["ns"] = int(stats_grpc["ns"]) if ("ns" in stats_grpc) else 0
                # check equal
                self.assertEqual(stats_http, stats_grpc)
        return response_stats_http

    # Check the response statistics is valid for a given infer request, providing its
    # 'responses', expected 'number_of_responses' and 'cancel_at_index'.
    def _check_response_stats(
        self, responses, number_of_responses, cancel_at_index=None
    ):
        response_stats = self._get_response_statistics()
        self.assertGreaterEqual(len(response_stats), number_of_responses)
        for i in range(number_of_responses):
            self._update_statistics_counts(i, number_of_responses, cancel_at_index)
            self._check_statistics_count_and_duration(
                response_stats, i, "compute_infer"
            )
            self._check_statistics_count_and_duration(
                response_stats, i, "compute_output"
            )
            self._check_statistics_count_and_duration(response_stats, i, "success")
            self._check_statistics_count_and_duration(response_stats, i, "fail")
            self._check_statistics_count_and_duration(
                response_stats, i, "empty_response"
            )
            self._check_statistics_count_and_duration(response_stats, i, "cancel")

    # Test response statistics. The statistics must be valid over two or more infers.
    def test_response_statistics(self):
        self._model_name = "square_int32"
        self._min_infer_delay_ns = 400000000
        self._min_output_delay_ns = 200000000
        self._number_of_fail_responses = 2
        self._number_of_empty_responses = 1
        # Send a request that generates 4 responses.
        number_of_responses = 4
        responses = self._stream_infer(number_of_responses)
        self._check_response_stats(responses, number_of_responses)
        # Send a request that generates 6 responses, and make sure the statistics are
        # aggregated with the previous request.
        number_of_responses = 6
        responses = self._stream_infer(number_of_responses)
        self._check_response_stats(responses, number_of_responses)
        # Send a request that generates 3 responses, and make sure the statistics are
        # aggregated with the previous requests.
        number_of_responses = 3
        responses = self._stream_infer(number_of_responses)
        self._check_response_stats(responses, number_of_responses)

    # Test response statistics with cancellation.
    def test_response_statistics_cancel(self):
        self._model_name = "square_int32_slow"
        self._min_infer_delay_ns = 1200000000
        self._min_output_delay_ns = 800000000
        self._min_cancel_delay_ns = 400000000

        # Send a request that generates 4 responses.
        number_of_responses = 4
        responses = self._stream_infer(number_of_responses)
        self._check_response_stats(responses, number_of_responses)

        # Send a request that generates 4 responses, and cancel on the 3rd response.
        # Make sure the statistics are aggregated with the previous request.
        responses = self._stream_infer(number_of_responses=4, cancel_at_response_size=1)
        # There is an infer and output delay on the 1st and 2nd response, and a cancel
        # delay on the 3rd response.
        min_total_delay_ns = (
            self._min_infer_delay_ns + self._min_output_delay_ns
        ) * 2 + self._min_cancel_delay_ns
        # Make sure the inference and cancellation is completed before checking.
        time.sleep(min_total_delay_ns * 1.5 / 1000000000)
        # The request is cancelled when the 2nd response is computing, so the
        # cancellation should be received at the 3rd response (index 2), making a total
        # of 3 responses on the statistics.
        self._check_response_stats(responses, number_of_responses=3, cancel_at_index=2)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_response_statistics/test.sh
================================================
#!/bin/bash
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

SERVER=/opt/tritonserver/bin/tritonserver
source ../common/util.sh

RET=0

rm -rf models && mkdir models
mkdir -p models/square_int32/1 && (cd models/square_int32 && \
    echo 'backend: "square"' >> config.pbtxt && \
    echo 'max_batch_size: 0' >> config.pbtxt && \
    echo 'model_transaction_policy { decoupled: True }' >> config.pbtxt && \
    echo -e 'input [{ name: "IN" \n data_type: TYPE_INT32 \n dims: [ 1 ] }]' >> config.pbtxt && \
    echo -e 'output [{ name: "OUT" \n data_type: TYPE_INT32 \n dims: [ 1 ] }]' >> config.pbtxt && \
    echo -e 'parameters [{ key: "CUSTOM_INFER_DELAY_NS" \n value: { string_value: "400000000" } }]' >> config.pbtxt && \
    echo -e 'parameters [{ key: "CUSTOM_OUTPUT_DELAY_NS" \n value: { string_value: "200000000" } }]' >> config.pbtxt && \
    echo -e 'parameters [{ key: "CUSTOM_FAIL_COUNT" \n value: { string_value: "2" } }]' >> config.pbtxt && \
    echo -e 'parameters [{ key: "CUSTOM_EMPTY_COUNT" \n value: { string_value: "1" } }]' >> config.pbtxt)
mkdir -p models/square_int32_slow/1 && (cd models/square_int32_slow && \
    echo 'backend: "square"' >> config.pbtxt && \
    echo 'max_batch_size: 0' >> config.pbtxt && \
    echo 'model_transaction_policy { decoupled: True }' >> config.pbtxt && \
    echo -e 'input [{ name: "IN" \n data_type: TYPE_INT32 \n dims: [ 1 ] }]' >> config.pbtxt && \
    echo -e 'output [{ name: "OUT" \n data_type: TYPE_INT32 \n dims: [ 1 ] }]' >> config.pbtxt && \
    echo -e 'parameters [{ key: "CUSTOM_INFER_DELAY_NS" \n value: { string_value: "1200000000" } }]' >> config.pbtxt && \
    echo -e 'parameters [{ key: "CUSTOM_OUTPUT_DELAY_NS" \n value: { string_value: "800000000" } }]' >> config.pbtxt && \
    echo -e 'parameters [{ key: "CUSTOM_CANCEL_DELAY_NS" \n value: { string_value: "400000000" } }]' >> config.pbtxt)

TEST_LOG="response_statistics_test.log"
SERVER_LOG="./response_statistics_test.server.log"

SERVER_ARGS="--model-repository=`pwd`/models"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python response_statistics_test.py > $TEST_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed response statistics test\n***"
    cat $TEST_LOG
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi
exit $RET


================================================
FILE: qa/L0_sagemaker/sagemaker_generate_stream_test.py
================================================
#!/usr/bin/python
# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import json
import os
import sys
import unittest

import requests
import sseclient
import test_util as tu


class SageMakerGenerateStreamTest(tu.TestResultCollector):
    def setUp(self):
        SAGEMAKER_BIND_TO_PORT = os.getenv("SAGEMAKER_BIND_TO_PORT", "8080")
        self.url_ = "http://localhost:{}/invocations".format(SAGEMAKER_BIND_TO_PORT)

    def generate_stream(self, inputs, stream=False):
        headers = {"Accept": "text/event-stream"}
        # stream=True used to indicate response can be iterated over, which
        # should be the common setting for generate_stream.
        # For correctness test case, stream=False so that we can re-examine
        # the response content.
        return requests.post(
            self.url_,
            data=inputs if isinstance(inputs, str) else json.dumps(inputs),
            headers=headers,
            stream=stream,
        )

    def generate_stream_expect_success(self, inputs, expected_output, rep_count):
        r = self.generate_stream(inputs)
        r.raise_for_status()
        self.check_sse_responses(r, [{"TEXT": expected_output}] * rep_count)

    def check_sse_responses(self, res, expected_res):
        # Validate SSE format
        self.assertIn("Content-Type", res.headers)
        self.assertEqual(
            "text/event-stream; charset=utf-8", res.headers["Content-Type"]
        )

        # SSE format (data: []) is hard to parse, use helper library for simplicity
        client = sseclient.SSEClient(res)
        res_count = 0
        for event in client.events():
            # Parse event data, join events into a single response
            data = json.loads(event.data)
            for key, value in expected_res[res_count].items():
                self.assertIn(key, data)
                self.assertEqual(value, data[key])
            res_count += 1
        self.assertEqual(len(expected_res), res_count)
        # Make sure there is no message in the wrong form
        for remaining in client._read():
            self.assertTrue(
                remaining.startswith(b"data:"),
                f"SSE response not formed properly, got: {remaining}",
            )
            self.assertTrue(
                remaining.endswith(b"\n\n"),
                f"SSE response not formed properly, got: {remaining}",
            )

    def test_generate_stream(self):
        # Setup text-based input
        text = "hello world"
        rep_count = 3
        inputs = {"PROMPT": [text], "STREAM": True, "REPETITION": rep_count}
        self.generate_stream_expect_success(inputs, text, rep_count)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_sagemaker/sagemaker_generate_test.py
================================================
#!/usr/bin/python
# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import json
import os
import sys
import unittest

import requests
import test_util as tu


class SageMakerGenerateTest(tu.TestResultCollector):
    def setUp(self):
        SAGEMAKER_BIND_TO_PORT = os.getenv("SAGEMAKER_BIND_TO_PORT", "8080")
        self.url_ = "http://localhost:{}/invocations".format(SAGEMAKER_BIND_TO_PORT)

    def generate(self, inputs):
        return requests.post(
            self.url_, data=inputs if isinstance(inputs, str) else json.dumps(inputs)
        )

    def test_generate(self):
        # Setup text-based input
        text = "hello world"
        inputs = {"PROMPT": text, "STREAM": False}

        r = self.generate(inputs)
        r.raise_for_status()

        self.assertIn("Content-Type", r.headers)
        self.assertEqual(r.headers["Content-Type"], "application/json")

        data = r.json()
        self.assertIn("TEXT", data)
        self.assertEqual(text, data["TEXT"])


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_sagemaker/sagemaker_multi_model_test.py
================================================
#!/usr/bin/python
# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import json
import os
import sys
import time
import unittest

import numpy as np
import requests
import test_util as tu
import tritonclient.http as httpclient


class SageMakerMultiModelTest(tu.TestResultCollector):
    def setUp(self):
        SAGEMAKER_BIND_TO_PORT = os.getenv("SAGEMAKER_BIND_TO_PORT", "8080")
        self.url_mme_ = "http://localhost:{}/models".format(SAGEMAKER_BIND_TO_PORT)

        # model_1 setup
        self.model1_name = "sm_mme_model_1"
        self.model1_url = "/opt/ml/models/123456789abcdefghi/model"

        self.model1_input_data_ = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
        self.model1_expected_output0_data_ = [
            0,
            2,
            4,
            6,
            8,
            10,
            12,
            14,
            16,
            18,
            20,
            22,
            24,
            26,
            28,
            30,
        ]
        self.model1_expected_output1_data_ = [
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
        ]

        self.model1_expected_result_ = {
            "model_name": "sm_mme_model_1",
            "model_version": "1",
            "outputs": [
                {
                    "name": "OUTPUT0",
                    "datatype": "INT32",
                    "shape": [1, 16],
                    "data": self.model1_expected_output0_data_,
                },
                {
                    "name": "OUTPUT1",
                    "datatype": "INT32",
                    "shape": [1, 16],
                    "data": self.model1_expected_output1_data_,
                },
            ],
        }

        # model_2 setup
        self.model2_name = "sm_mme_model_2"
        self.model2_url = "/opt/ml/models/987654321ihgfedcba/model"

        # Output is same as input since this is an identity model
        self.model2_input_data_ = [0, 1, 2, 3, 4, 5, 6, 7]

        # ensemble model setup
        self.model3_name = "123456789ensemble"
        self.model3_url = "/opt/ml/models/123456789ensemble/model"

    def test_sm_0_environment_variables_set(self):
        self.assertEqual(
            os.getenv("SAGEMAKER_MULTI_MODEL"),
            "true",
            "Variable SAGEMAKER_MULTI_MODEL must be set to true",
        )

    def test_sm_1_model_load(self):
        # Load model_1
        request_body = {"model_name": self.model1_name, "url": self.model1_url}
        headers = {"Content-Type": "application/json"}
        r = requests.post(self.url_mme_, data=json.dumps(request_body), headers=headers)
        time.sleep(5)  # wait for model to load
        self.assertEqual(
            r.status_code,
            200,
            "Expected status code 200, received {}".format(r.status_code),
        )

        # Load the same model again, expect a 409
        request_body = {"model_name": self.model1_name, "url": self.model1_url}
        headers = {"Content-Type": "application/json"}
        r = requests.post(self.url_mme_, data=json.dumps(request_body), headers=headers)
        time.sleep(5)  # wait for model to load
        self.assertEqual(
            r.status_code,
            409,
            "Expected status code 409, received {}".format(r.status_code),
        )

        # Load model_2
        request_body = {"model_name": self.model2_name, "url": self.model2_url}
        headers = {"Content-Type": "application/json"}
        r = requests.post(self.url_mme_, data=json.dumps(request_body), headers=headers)
        time.sleep(5)  # wait for model to load
        self.assertEqual(
            r.status_code,
            200,
            "Expected status code 200, received {}".format(r.status_code),
        )

    def test_sm_2_model_list(self):
        r = requests.get(self.url_mme_)
        time.sleep(3)
        expected_response_1 = {
            "models": [
                {
                    "modelName": self.model1_name,
                    "modelUrl": self.model1_url.rstrip("/model"),
                },
                {
                    "modelName": self.model2_name,
                    "modelUrl": self.model2_url.rstrip("/model"),
                },
            ]
        }
        expected_response_2 = {
            "models": [
                {
                    "modelName": self.model2_name,
                    "modelUrl": self.model2_url.rstrip("/model"),
                },
                {
                    "modelName": self.model1_name,
                    "modelUrl": self.model1_url.rstrip("/model"),
                },
            ]
        }

        # Returned list response's order is not deterministic
        self.assertIn(
            r.json(),
            [expected_response_1, expected_response_2],
            "Expected one of {}, received: {}".format(
                [expected_response_1, expected_response_2], r.json()
            ),
        )

    def test_sm_3_model_get(self):
        get_url = "{}/{}".format(self.url_mme_, self.model1_name)
        r = requests.get(get_url)
        time.sleep(3)
        expected_response = {
            "modelName": self.model1_name,
            "modelUrl": self.model1_url.rstrip("/model"),
        }
        self.assertEqual(
            r.json(),
            expected_response,
            "Expected response: {}, received: {}".format(expected_response, r.json()),
        )

    def test_sm_4_model_invoke(self):
        # Invoke model_1
        inputs = []
        outputs = []
        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))

        # Initialize the data
        input_data = np.array(self.model1_input_data_, dtype=np.int32)
        input_data = np.expand_dims(input_data, axis=0)
        inputs[0].set_data_from_numpy(input_data, binary_data=False)
        inputs[1].set_data_from_numpy(input_data, binary_data=False)

        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
        request_body, _ = httpclient.InferenceServerClient.generate_request_body(
            inputs, outputs=outputs
        )

        headers = {"Content-Type": "application/json"}
        invoke_url = "{}/{}/invoke".format(self.url_mme_, self.model1_name)
        r = requests.post(invoke_url, data=request_body, headers=headers)
        r.raise_for_status()

        self.assertEqual(
            self.model1_expected_result_,
            r.json(),
            "Expected response : {}, received: {}".format(
                self.model1_expected_result_, r.json()
            ),
        )

        # Invoke model_2
        inputs = []
        outputs = []
        inputs.append(
            httpclient.InferInput(
                "INPUT0",
                [1, 8],
                "FP32",
            )
        )
        input_data = np.array(self.model2_input_data_, dtype=np.float32)
        input_data = np.expand_dims(input_data, axis=0)
        inputs[0].set_data_from_numpy(input_data, binary_data=True)

        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=True))

        (
            request_body,
            header_length,
        ) = httpclient.InferenceServerClient.generate_request_body(
            inputs, outputs=outputs
        )

        invoke_url = "{}/{}/invoke".format(self.url_mme_, self.model2_name)
        headers = {
            "Content-Type": "application/vnd.sagemaker-triton.binary+json;json-header-size={}".format(
                header_length
            )
        }
        r = requests.post(invoke_url, data=request_body, headers=headers)

        header_length_prefix = (
            "application/vnd.sagemaker-triton.binary+json;json-header-size="
        )
        header_length_str = r.headers["Content-Type"][len(header_length_prefix) :]
        result = httpclient.InferenceServerClient.parse_response_body(
            r._content, header_length=int(header_length_str)
        )

        # Get the inference header size so we can locate the output binary data
        output_data = result.as_numpy("OUTPUT0")

        for i in range(8):
            self.assertEqual(
                output_data[0][i], input_data[0][i], "Tensor Value Mismatch"
            )

    def test_sm_5_model_unload(self):
        # Unload model_1
        unload_url = "{}/{}".format(self.url_mme_, self.model1_name)
        r = requests.delete(unload_url)
        time.sleep(3)
        self.assertEqual(
            r.status_code,
            200,
            "Expected status code 200, received {}".format(r.status_code),
        )

        # Unload model_2
        unload_url = "{}/{}".format(self.url_mme_, self.model2_name)
        r = requests.delete(unload_url)
        time.sleep(3)
        self.assertEqual(
            r.status_code,
            200,
            "Expected status code 200, received {}".format(r.status_code),
        )

        # Unload a non-loaded model, expect a 404
        unload_url = "{}/sm_non_loaded_model".format(self.url_mme_)
        r = requests.delete(unload_url)
        time.sleep(3)
        self.assertEqual(
            r.status_code,
            404,
            "Expected status code 404, received {}".format(r.status_code),
        )

    def test_sm_6_ensemble_model(self):
        # Load ensemble model
        request_body = {"model_name": self.model3_name, "url": self.model3_url}
        headers = {
            "Content-Type": "application/json",
            "X-Amzn-SageMaker-Target-Model": f"{self.model3_name}",
        }
        r = requests.post(self.url_mme_, data=json.dumps(request_body), headers=headers)
        time.sleep(5)  # wait for model to load
        self.assertEqual(
            r.status_code,
            200,
            "Expected status code 200, received {}".format(r.status_code),
        )

        # Invoke ensemble model
        inputs = []
        outputs = []
        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "FP32"))
        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "FP32"))

        # Initialize the data
        input_data = np.array(self.model1_input_data_, dtype=np.float32)
        input_data = np.expand_dims(input_data, axis=0)
        inputs[0].set_data_from_numpy(input_data, binary_data=False)
        inputs[1].set_data_from_numpy(input_data, binary_data=False)

        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
        request_body, _ = httpclient.InferenceServerClient.generate_request_body(
            inputs, outputs=outputs
        )

        headers = {"Content-Type": "application/json"}
        invoke_url = "{}/{}/invoke".format(self.url_mme_, self.model3_name)
        r = requests.post(invoke_url, data=request_body, headers=headers)
        print(f"response: {r.text}")
        r.raise_for_status()
        self.assertEqual(
            r.status_code,
            200,
            "Expected status code 200, received {}".format(r.status_code),
        )

        # Unload ensemble model
        unload_url = "{}/{}".format(self.url_mme_, self.model3_name)
        r = requests.delete(unload_url, headers=headers)
        time.sleep(5)
        self.assertEqual(
            r.status_code,
            200,
            "Expected status code 200, received {}".format(r.status_code),
        )


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_sagemaker/sagemaker_request_many_chunks.py
================================================
#!/usr/bin/python
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import socket
import unittest


class SagemakerRequestManyChunksTest(unittest.TestCase):
    def setUp(self):
        self._local_host = "localhost"
        self._sagemaker_port = 8080
        self._malicious_chunk_count = (
            1000000  # large enough to cause a stack overflow if using alloca()
        )

    def send_chunked_request(
        self, header: str, chunk_count: int, expected_response: str
    ):
        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        header = (
            f"{header}"
            f"Host: {self._local_host}:{self._sagemaker_port}\r\n"
            f"Content-Type: application/octet-stream\r\n"
            f"Transfer-Encoding: chunked\r\n"
            f"Connection: close\r\n"
            f"\r\n"
        )
        try:
            s.connect((self._local_host, self._sagemaker_port))
            # HTTP request with chunked encoding
            s.sendall((header.encode()))

            # Send chunked payload
            for _ in range(chunk_count):
                s.send(b"1\r\nA\r\n")
            # End chunked encoding
            s.sendall(b"0\r\n\r\n")

            # Receive response
            response = b""
            while True:
                try:
                    chunk = s.recv(4096)
                    if not chunk:
                        break
                    response += chunk
                except socket.timeout:
                    break
            self.assertIn(expected_response, response.decode())
        except Exception as e:
            raise (e)
        finally:
            s.close()

    def test_load_model(self):
        request_header = (
            f"POST /models HTTP/1.1\r\n" f"X-Amzn-SageMaker-Target-Model: ZZZZZZZ\r\n"
        )
        self.send_chunked_request(
            request_header,
            self._malicious_chunk_count,
            "failed to parse the request JSON buffer: Invalid value. at 0",
        )


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_sagemaker/sagemaker_test.py
================================================
#!/usr/bin/python
# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import json
import os
import sys
import unittest

import numpy as np
import requests
import test_util as tu
import tritonclient.http as httpclient


class SageMakerTest(tu.TestResultCollector):
    def setUp(self):
        SAGEMAKER_BIND_TO_PORT = os.getenv("SAGEMAKER_BIND_TO_PORT", "8080")
        self.url_ = "http://localhost:{}/invocations".format(SAGEMAKER_BIND_TO_PORT)
        self.input_data_ = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
        self.expected_output0_data_ = [
            0,
            2,
            4,
            6,
            8,
            10,
            12,
            14,
            16,
            18,
            20,
            22,
            24,
            26,
            28,
            30,
        ]
        self.expected_output1_data_ = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

        self.expected_result_ = {
            "model_name": "sm_model",
            "model_version": "1",
            "outputs": [
                {
                    "name": "OUTPUT0",
                    "datatype": "INT32",
                    "shape": [1, 16],
                    "data": self.expected_output0_data_,
                },
                {
                    "name": "OUTPUT1",
                    "datatype": "INT32",
                    "shape": [1, 16],
                    "data": self.expected_output1_data_,
                },
            ],
        }

    def test_direct_inference(self):
        request = {
            "inputs": [
                {
                    "name": "INPUT0",
                    "datatype": "INT32",
                    "shape": [1, 16],
                    "data": self.input_data_,
                },
                {
                    "name": "INPUT1",
                    "datatype": "INT32",
                    "shape": [1, 16],
                    "data": self.input_data_,
                },
            ]
        }
        headers = {"Content-Type": "application/json"}
        r = requests.post(self.url_, data=json.dumps(request), headers=headers)
        r.raise_for_status()

        self.assertEqual(
            self.expected_result_,
            r.json(),
            "Expected response body: {}; got: {}".format(
                self.expected_result_, r.json()
            ),
        )

    def test_inference_client_generated_request(self):
        inputs = []
        outputs = []
        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))

        # Initialize the data
        input_data = np.array(self.input_data_, dtype=np.int32)
        input_data = np.expand_dims(input_data, axis=0)
        inputs[0].set_data_from_numpy(input_data, binary_data=False)
        inputs[1].set_data_from_numpy(input_data, binary_data=False)

        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
        request_body, _ = httpclient.InferenceServerClient.generate_request_body(
            inputs, outputs=outputs
        )

        headers = {"Content-Type": "application/json"}
        r = requests.post(self.url_, data=request_body, headers=headers)
        r.raise_for_status()

        self.assertEqual(
            self.expected_result_,
            r.json(),
            "Expected response body: {}; got: {}".format(
                self.expected_result_, r.json()
            ),
        )

    def test_inference_client_generated_request_binary(self):
        inputs = []
        outputs = []
        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))

        # Initialize the data
        input_data = np.array(self.input_data_, dtype=np.int32)
        input_data = np.expand_dims(input_data, axis=0)
        inputs[0].set_data_from_numpy(input_data, binary_data=True)
        inputs[1].set_data_from_numpy(input_data, binary_data=False)

        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
        (
            request_body,
            header_length,
        ) = httpclient.InferenceServerClient.generate_request_body(
            inputs, outputs=outputs
        )

        headers = {
            "Content-Type": "application/vnd.sagemaker-triton.binary+json;json-header-size={}".format(
                header_length
            )
        }
        r = requests.post(self.url_, data=request_body, headers=headers)
        r.raise_for_status()

        self.assertEqual(
            self.expected_result_,
            r.json(),
            "Expected response body: {}; got: {}".format(
                self.expected_result_, r.json()
            ),
        )

    def test_inference_client_generated_response(self):
        inputs = []
        outputs = []
        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))

        # Initialize the data
        input_data = np.array(self.input_data_, dtype=np.int32)
        input_data = np.expand_dims(input_data, axis=0)
        inputs[0].set_data_from_numpy(input_data, binary_data=False)
        inputs[1].set_data_from_numpy(input_data, binary_data=False)

        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
        request_body, _ = httpclient.InferenceServerClient.generate_request_body(
            inputs, outputs=outputs
        )

        headers = {"Content-Type": "application/json"}
        r = requests.post(self.url_, data=request_body, headers=headers)
        r.raise_for_status()

        result = httpclient.InferenceServerClient.parse_response_body(r._content)

        output0_data = result.as_numpy("OUTPUT0")
        output1_data = result.as_numpy("OUTPUT1")
        for i in range(16):
            self.assertEqual(output0_data[0][i], self.expected_output0_data_[i])
            self.assertEqual(output1_data[0][i], self.expected_output1_data_[i])

    def test_inference_client_generated_response_binary(self):
        inputs = []
        outputs = []
        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))

        # Initialize the data
        input_data = np.array(self.input_data_, dtype=np.int32)
        input_data = np.expand_dims(input_data, axis=0)
        inputs[0].set_data_from_numpy(input_data, binary_data=False)
        inputs[1].set_data_from_numpy(input_data, binary_data=False)

        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=True))
        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
        request_body, _ = httpclient.InferenceServerClient.generate_request_body(
            inputs, outputs=outputs
        )

        headers = {"Content-Type": "application/json"}
        r = requests.post(self.url_, data=request_body, headers=headers)
        r.raise_for_status()

        header_length_prefix = (
            "application/vnd.sagemaker-triton.binary+json;json-header-size="
        )
        header_length_str = r.headers["Content-Type"][len(header_length_prefix) :]
        result = httpclient.InferenceServerClient.parse_response_body(
            r._content, header_length=int(header_length_str)
        )

        output0_data = result.as_numpy("OUTPUT0")
        output1_data = result.as_numpy("OUTPUT1")
        for i in range(16):
            self.assertEqual(output0_data[0][i], self.expected_output0_data_[i])
            self.assertEqual(output1_data[0][i], self.expected_output1_data_[i])

    def test_malformed_binary_header(self):
        inputs = []
        outputs = []
        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))

        # Initialize the data
        input_data = np.array(self.input_data_, dtype=np.int32)
        input_data = np.expand_dims(input_data, axis=0)
        inputs[0].set_data_from_numpy(input_data, binary_data=True)
        inputs[1].set_data_from_numpy(input_data, binary_data=False)

        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
        (
            request_body,
            header_length,
        ) = httpclient.InferenceServerClient.generate_request_body(
            inputs, outputs=outputs
        )

        headers = {
            "Content-Type": "additional-string/application/vnd.sagemaker-triton.binary+json;json-header-size={}".format(
                header_length
            )
        }
        r = requests.post(self.url_, data=request_body, headers=headers)
        self.assertEqual(
            400,
            r.status_code,
            "Expected error code {} returned for the request; got: {}".format(
                400, r.status_code
            ),
        )

    def test_malformed_binary_header_not_number(self):
        inputs = []
        outputs = []
        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))

        # Initialize the data
        input_data = np.array(self.input_data_, dtype=np.int32)
        input_data = np.expand_dims(input_data, axis=0)
        inputs[0].set_data_from_numpy(input_data, binary_data=True)
        inputs[1].set_data_from_numpy(input_data, binary_data=False)

        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
        (
            request_body,
            header_length,
        ) = httpclient.InferenceServerClient.generate_request_body(
            inputs, outputs=outputs
        )

        headers = {
            "Content-Type": "application/vnd.sagemaker-triton.binary+json;json-header-size=additional-string{}".format(
                header_length
            )
        }
        r = requests.post(self.url_, data=request_body, headers=headers)
        self.assertEqual(
            400,
            r.status_code,
            "Expected error code {} returned for the request; got: {}".format(
                400, r.status_code
            ),
        )

    def test_malformed_binary_header_negative_number(self):
        inputs = []
        outputs = []
        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))

        # Initialize the data
        input_data = np.array(self.input_data_, dtype=np.int32)
        input_data = np.expand_dims(input_data, axis=0)
        inputs[0].set_data_from_numpy(input_data, binary_data=True)
        inputs[1].set_data_from_numpy(input_data, binary_data=False)

        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
        (
            request_body,
            header_length,
        ) = httpclient.InferenceServerClient.generate_request_body(
            inputs, outputs=outputs
        )

        headers = {
            "Content-Type": "application/vnd.sagemaker-triton.binary+json;json-header-size=-123"
        }
        r = requests.post(self.url_, data=request_body, headers=headers)
        self.assertEqual(
            400,
            r.status_code,
            "Expected error code {} returned for the request; got: {}".format(
                400, r.status_code
            ),
        )

    def test_malformed_binary_header_large_number(self):
        inputs = []
        outputs = []
        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))

        # Initialize the data
        input_data = np.array(self.input_data_, dtype=np.int32)
        input_data = np.expand_dims(input_data, axis=0)
        inputs[0].set_data_from_numpy(input_data, binary_data=True)
        inputs[1].set_data_from_numpy(input_data, binary_data=False)

        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
        (
            request_body,
            header_length,
        ) = httpclient.InferenceServerClient.generate_request_body(
            inputs, outputs=outputs
        )

        headers = {
            "Content-Type": "application/vnd.sagemaker-triton.binary+json;json-header-size=12345"
        }
        r = requests.post(self.url_, data=request_body, headers=headers)
        self.assertEqual(
            400,
            r.status_code,
            "Expected error code {} returned for the request; got: {}".format(
                400, r.status_code
            ),
        )


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_sagemaker/test.sh
================================================
#!/bin/bash
# Copyright (c) 2021-2026, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

TEST_RESULT_FILE='test_results.txt'
# Make sure we can safety use symbolic link for SageMaker serve script
if [ -d "/opt/ml/model" ] || [ -L "/opt/ml/model" ]; then
    echo -e "Default SageMaker model path must not be used for testing"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi

export CUDA_VISIBLE_DEVICES=0

RET=0

rm -rf models
rm -f *.log
rm -f *.out

SAGEMAKER_TEST=sagemaker_test.py
SAGEMAKER_MULTI_MODEL_TEST=sagemaker_multi_model_test.py
SAGEMAKER_GENERATE_TEST=sagemaker_generate_test.py
SAGEMAKER_GENERATE_STREAM_TEST=sagemaker_generate_stream_test.py
MULTI_MODEL_UNIT_TEST_COUNT=7
UNIT_TEST_COUNT=9
GENERATE_UNIT_TEST_COUNT=1
GENERATE_STREAM_UNIT_TEST_COUNT=1
CLIENT_LOG="./client.log"

DATADIR=/data/inferenceserver/${REPO_VERSION}
ENSEMBLEDIR=/data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/qa_model_repository
SERVER=/opt/tritonserver/bin/tritonserver
SERVER_LOG="./server.log"
# Link model repository to "/opt/ml/model"
mkdir /opt/ml/
ln -s `pwd`/models /opt/ml/model
source ../common/util.sh

mkdir models && \
    cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 models/sm_model && \
    rm -r models/sm_model/2 && rm -r models/sm_model/3 && \
    sed -i "s/onnx_int32_int32_int32/sm_model/" models/sm_model/config.pbtxt

mkdir -p models/mock_llm/1 && \
    cp ../python_models/generate_models/mock_llm/1/model.py models/mock_llm/1 && \
    cp ../python_models/generate_models/mock_llm/config.pbtxt models/mock_llm

# Use SageMaker's ping endpoint to check server status
# Wait until server health endpoint shows ready. Sets WAIT_RET to 0 on
# success, 1 on failure
function sagemaker_wait_for_server_ready() {
    local spid="$1"; shift
    local wait_time_secs="${1:-30}"; shift

    WAIT_RET=0

    ping_address="localhost:8080/ping"
    if [ -n "$SAGEMAKER_BIND_TO_PORT" ]; then
        ping_address="localhost:${SAGEMAKER_BIND_TO_PORT}/ping"
    fi

    local wait_secs=$wait_time_secs
    until test $wait_secs -eq 0 ; do
        if ! kill -0 $spid; then
            echo "=== Server not running."
            WAIT_RET=1
            return
        fi

        sleep 1;

        set +e
        code=`curl -s -w %{http_code} $ping_address`
        set -e
        if [ "$code" == "200" ]; then
            return
        fi

        ((wait_secs--));
    done

    echo "=== Timeout $wait_time_secs secs. Server not ready."
    WAIT_RET=1
}

# Start server with 'serve' script
export SAGEMAKER_TRITON_DEFAULT_MODEL_NAME=sm_model
serve > $SERVER_LOG 2>&1 &
SERVE_PID=$!
# Obtain Triton PID in such way as $! will return the script PID
sleep 1
SERVER_PID=`ps | grep tritonserver | awk '{ printf $1 }'`
sagemaker_wait_for_server_ready $SERVER_PID 10
if [ "$WAIT_RET" != "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    kill $SERVER_PID || true
    cat $SERVER_LOG
    exit 1
fi

# Ping
set +e
code=`curl -s -w %{http_code} -o ./ping.out localhost:8080/ping`
set -e
if [ "$code" != "200" ]; then
    cat ./ping.out
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

# Inference in default setting
set +e
python $SAGEMAKER_TEST SageMakerTest >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    cat $CLIENT_LOG
    RET=1
else
    check_test_results $TEST_RESULT_FILE $UNIT_TEST_COUNT
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

kill $SERVER_PID
wait $SERVE_PID

# Change SageMaker port
export SAGEMAKER_BIND_TO_PORT=8000
serve > $SERVER_LOG 2>&1 &
SERVE_PID=$!
# Obtain Triton PID in such way as $! will return the script PID
sleep 1
SERVER_PID=`ps | grep tritonserver | awk '{ printf $1 }'`
sagemaker_wait_for_server_ready $SERVER_PID 10
if [ "$WAIT_RET" != "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    kill $SERVER_PID || true
    cat $SERVER_LOG
    exit 1
fi

# Inference with the new port
set +e
python $SAGEMAKER_TEST SageMakerTest >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    cat $CLIENT_LOG
    RET=1
else
    check_test_results $TEST_RESULT_FILE $UNIT_TEST_COUNT
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

unset SAGEMAKER_BIND_TO_PORT

kill $SERVER_PID
wait $SERVE_PID

# Set SageMaker safe port range
export SAGEMAKER_SAFE_PORT_RANGE="8081-9000"

# Start Triton in a similar way to 'serve' script, as 'serve' script can't
# be used to satisfy the setting under test
SAGEMAKER_ARGS="--model-repository=/opt/ml/model"
if [ -n "$SAGEMAKER_BIND_TO_PORT" ]; then
    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --sagemaker-port=${SAGEMAKER_BIND_TO_PORT}"
fi
if [ -n "$SAGEMAKER_SAFE_PORT_RANGE" ]; then
    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --sagemaker-safe-port-range=${SAGEMAKER_SAFE_PORT_RANGE}"
fi

# Enable HTTP endpoint and expect server fail to start (default port 8000 < 8081)
SERVER_ARGS="--allow-sagemaker=true --allow-grpc false --allow-http true --allow-metrics false \
             --model-control-mode=explicit --load-model=${SAGEMAKER_TRITON_DEFAULT_MODEL_NAME} \
             $SAGEMAKER_ARGS"
run_server_nowait
sagemaker_wait_for_server_ready $SERVER_PID 10
if [ "$WAIT_RET" == "0" ]; then
    echo -e "\n***\n*** Expect failed to start $SERVER\n***"
    kill $SERVER_PID || true
    cat $SERVER_LOG
    RET=1
else
    grep "The server cannot listen to HTTP requests at port" $SERVER_LOG
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected error on using disallowed port\n***"
        RET=1
    fi
fi

# Run 'serve' script and expect SageMaker endpoint on default port 8080 (< 8081)
# is working
serve > $SERVER_LOG 2>&1 &
SERVE_PID=$!
# Obtain Triton PID in such way as $! will return the script PID
sleep 1
SERVER_PID=`ps | grep tritonserver | awk '{ printf $1 }'`

sagemaker_wait_for_server_ready $SERVER_PID 10
if [ "$WAIT_RET" != "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    kill $SERVER_PID || true
    cat $SERVER_LOG
    exit 1
fi

# Inference with the new port
set +e
python $SAGEMAKER_TEST SageMakerTest >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    cat $CLIENT_LOG
    RET=1
else
    check_test_results $TEST_RESULT_FILE $UNIT_TEST_COUNT
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

kill $SERVER_PID
wait $SERVE_PID

# Start server with LLM and set inference type to generate
export SAGEMAKER_TRITON_DEFAULT_MODEL_NAME=mock_llm
export SAGEMAKER_TRITON_INFERENCE_TYPE=generate
serve > $SERVER_LOG 2>&1 &
SERVE_PID=$!
# Obtain Triton PID in such way as $! will return the script PID
sleep 1
SERVER_PID=`ps | grep tritonserver | awk '{ printf $1 }'`
sagemaker_wait_for_server_ready $SERVER_PID 10
if [ "$WAIT_RET" != "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    kill $SERVER_PID || true
    cat $SERVER_LOG
    exit 1
fi

# Inference with generate inference type
set +e
python $SAGEMAKER_GENERATE_TEST SageMakerGenerateTest >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    cat $CLIENT_LOG
    RET=1
else
    check_test_results $TEST_RESULT_FILE $GENERATE_UNIT_TEST_COUNT
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

unset SAGEMAKER_TRITON_DEFAULT_MODEL_NAME
unset SAGEMAKER_TRITON_INFERENCE_TYPE

kill $SERVER_PID
wait $SERVE_PID

# Start server with LLM and set inference type to generate_stream
export SAGEMAKER_TRITON_DEFAULT_MODEL_NAME=mock_llm
export SAGEMAKER_TRITON_INFERENCE_TYPE=generate_stream
serve > $SERVER_LOG 2>&1 &
SERVE_PID=$!
# Obtain Triton PID in such way as $! will return the script PID
sleep 1
SERVER_PID=`ps | grep tritonserver | awk '{ printf $1 }'`
sagemaker_wait_for_server_ready $SERVER_PID 10
if [ "$WAIT_RET" != "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    kill $SERVER_PID || true
    cat $SERVER_LOG
    exit 1
fi

# Helper library to parse SSE events
# https://github.com/mpetazzoni/sseclient
pip install sseclient-py

# Inference with generate_stream inference type
set +e
python $SAGEMAKER_GENERATE_STREAM_TEST SageMakerGenerateStreamTest >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    cat $CLIENT_LOG
    RET=1
else
    check_test_results $TEST_RESULT_FILE $GENERATE_STREAM_UNIT_TEST_COUNT
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

unset SAGEMAKER_TRITON_DEFAULT_MODEL_NAME
unset SAGEMAKER_TRITON_INFERENCE_TYPE

kill $SERVER_PID
wait $SERVE_PID

# Test serve with incorrect inference type
export SAGEMAKER_TRITON_INFERENCE_TYPE=incorrect_inference_type
serve > $SERVER_LOG 2>&1 &
SERVE_PID=$!
# Obtain Triton PID in such way as $! will return the script PID
sleep 1
SERVER_PID=`ps | grep tritonserver | awk '{ printf $1 }'`
if [ -n "$SERVER_PID" ]; then
    echo -e "\n***\n*** Expect failed to start $SERVER\n***"
    kill $SERVER_PID || true
    cat $SERVER_LOG
    RET=1
else
    grep "ERROR: Invalid SAGEMAKER_TRITON_INFERENCE_TYPE" $SERVER_LOG
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected error on incorrect inference type\n***"
        RET=1
    fi
fi
unset SAGEMAKER_TRITON_INFERENCE_TYPE

unset SAGEMAKER_SAFE_PORT_RANGE
unset SAGEMAKER_TRITON_DEFAULT_MODEL_NAME

# Test serve with incorrect model name
export SAGEMAKER_TRITON_DEFAULT_MODEL_NAME=incorrect_model_name
serve > $SERVER_LOG 2>&1 &
SERVE_PID=$!
# Obtain Triton PID in such way as $! will return the script PID
sleep 1
SERVER_PID=`ps | grep tritonserver | awk '{ printf $1 }'`
if [ -n "$SERVER_PID" ]; then
    echo -e "\n***\n*** Expect failed to start $SERVER\n***"
    kill $SERVER_PID || true
    cat $SERVER_LOG
    RET=1
else
    grep "ERROR: Directory with provided SAGEMAKER_TRITON_DEFAULT_MODEL_NAME ${SAGEMAKER_TRITON_DEFAULT_MODEL_NAME} does not exist" $SERVER_LOG
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected error on model name and dir name mismatch\n***"
        RET=1
    fi
fi

unset SAGEMAKER_TRITON_DEFAULT_MODEL_NAME

# Test serve with SAGEMAKER_TRITON_DEFAULT_MODEL_NAME unset, but containing single model directory
rm -rf models/mock_llm
serve > $SERVER_LOG 2>&1 &
SERVE_PID=$!
# Obtain Triton PID in such way as $! will return the script PID
sleep 1
SERVER_PID=`ps | grep tritonserver | awk '{ printf $1 }'`
sagemaker_wait_for_server_ready $SERVER_PID 10
if [ "$WAIT_RET" != "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    kill $SERVER_PID || true
    cat $SERVER_LOG
    exit 1
else
    grep "WARNING: No SAGEMAKER_TRITON_DEFAULT_MODEL_NAME provided" $SERVER_LOG
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected server to start with only existing directory as model.\n***"
	RET=1
    fi
fi

kill $SERVER_PID
wait $SERVE_PID

# Test unspecified SAGEMAKER_TRITON_DEFAULT_MODEL_NAME for ecs/eks case
SERVER_ARGS="--allow-sagemaker=true --allow-grpc false --allow-http false --allow-metrics false \
             --model-repository `pwd`/models --model-control-mode=explicit --exit-on-error=false"
run_server_nowait
sleep 5
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
code=`curl -X POST -s -w %{http_code} -o ./invoke.out localhost:8080/invocations --data-raw 'dummy'`
set -e
if [ "$code" == "200" ]; then
    cat ./invoke.out
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    grep "Request for unknown model: 'unspecified_SAGEMAKER_TRITON_DEFAULT_MODEL_NAME' is not found" ./invoke.out
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected inference to fail with unspecified model error.\n***"
    fi
fi

kill $SERVER_PID
wait $SERVER_PID

# TODO: Test ensemble backend

# Run server with invalid model and exit-on-error=false
rm models/sm_model/1/*
SERVER_ARGS="--allow-sagemaker=true --allow-grpc false --allow-http false --allow-metrics false \
             --model-repository `pwd`/models --model-control-mode=explicit --load-model=sm_model \
             --exit-on-error=false"
run_server_nowait
sleep 5
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

# Ping and expect error code in SME mode.
set +e
code=`curl -s -w %{http_code} -o ./ping.out localhost:8080/ping`
set -e
if [ "$code" == "200" ]; then
    cat ./ping.out
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

kill $SERVER_PID
wait $SERVER_PID

# MME begin
# Prepare model repository

ln -s `pwd`/models /opt/ml/models
# Model path will be of the form /opt/ml/models/<hash>/model
MODEL1_PATH="models/123456789abcdefghi/model"
MODEL2_PATH="models/987654321ihgfedcba/model"
mkdir -p "${MODEL1_PATH}"
mkdir -p "${MODEL2_PATH}"

cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32/* ${MODEL1_PATH} && \
    rm -r ${MODEL1_PATH}/2 && rm -r ${MODEL1_PATH}/3 && \
    sed -i "s/onnx_int32_int32_int32/sm_mme_model_1/" ${MODEL1_PATH}/config.pbtxt

cp -r $DATADIR/qa_identity_model_repository/onnx_zero_1_float32/* ${MODEL2_PATH} && \
    sed -i "s/onnx_zero_1_float32/sm_mme_model_2/" ${MODEL2_PATH}/config.pbtxt

# Ensemble model
ENSEMBLE_MODEL_PATH="models/123456789ensemble/model"
mkdir -p "${ENSEMBLE_MODEL_PATH}"

model_name=python_float32_float32_float32

mkdir -p ${ENSEMBLE_MODEL_PATH}/${model_name}/1 && \
cp ../python_models/add_sub/model.py ${ENSEMBLE_MODEL_PATH}/${model_name}/1/. && \
cp ../python_models/add_sub/config.pbtxt ${ENSEMBLE_MODEL_PATH}/${model_name}/.
(cd ${ENSEMBLE_MODEL_PATH}/${model_name} && \
                    sed -i "s/label_filename:.*//" config.pbtxt && \
                    echo "max_batch_size: 64" >> config.pbtxt)

# Ensemble part
mkdir -p ${ENSEMBLE_MODEL_PATH}/fan_${model_name}/1 && \
            cp ../python_models/add_sub/model.py ${ENSEMBLE_MODEL_PATH}/fan_${model_name}/1/. && \
            cp ../python_models/fan_add_sub/config.pbtxt ${ENSEMBLE_MODEL_PATH}/fan_${model_name}/. && \
            (cd ${ENSEMBLE_MODEL_PATH}/fan_${model_name} && \
                    sed -i "s/label_filename:.*//" config.pbtxt && \
                    sed -i "s/model_name: \"ENSEMBLE_MODEL_NAME\"/model_name: \"${model_name}\"/" config.pbtxt && \
                    sed -i "0,/name:.*/{s/name:.*/name: \"fan_${model_name}\"/}" config.pbtxt && \
                    echo "max_batch_size: 64" >> config.pbtxt)

# # custom float32 component of ensemble
cp -r $ENSEMBLEDIR/nop_TYPE_FP32_-1 ${ENSEMBLE_MODEL_PATH}/. && \
    mkdir -p ${ENSEMBLE_MODEL_PATH}/nop_TYPE_FP32_-1/1

# Start server with 'serve' script
export SAGEMAKER_MULTI_MODEL=true
export SAGEMAKER_TRITON_LOG_VERBOSE=true

serve > $SERVER_LOG 2>&1 &
SERVE_PID=$!
# Obtain Triton PID in such way as $! will return the script PID
sleep 1
SERVER_PID=`ps | grep tritonserver | awk '{ printf $1 }'`
sagemaker_wait_for_server_ready $SERVER_PID 10
if [ "$WAIT_RET" != "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    kill $SERVER_PID || true
    cat $SERVER_LOG
    exit 1
fi

# API tests in default setting
set +e
python $SAGEMAKER_MULTI_MODEL_TEST SageMakerMultiModelTest >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    cat $CLIENT_LOG
    RET=1
else
    check_test_results $TEST_RESULT_FILE $MULTI_MODEL_UNIT_TEST_COUNT
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

unset SAGEMAKER_MULTI_MODEL

unlink /opt/ml/models
rm -rf /opt/ml/models

kill $SERVER_PID
wait $SERVE_PID
# MME end

### Test Sagemaker Requests Containing Many Chunks ###
rm -rf models && mkdir models && \
    cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 models/sm_model && \
    rm -r models/sm_model/2 && rm -r models/sm_model/3 && \
    sed -i "s/onnx_int32_int32_int32/sm_model/" models/sm_model/config.pbtxt

export SAGEMAKER_TRITON_DEFAULT_MODEL_NAME=sm_model
REQUEST_MANY_CHUNKS_PY="sagemaker_request_many_chunks.py"
CLIENT_LOG="./client.sagemaker_request_many_chunks.log"
SERVER_LOG="./server.sagemaker_request_many_chunks.log"

serve > $SERVER_LOG 2>&1 &
SERVE_PID=$!
# Obtain Triton PID in such way as $! will return the script PID
sleep 1
SERVER_PID=`ps | grep tritonserver | awk '{ printf $1 }'`
sagemaker_wait_for_server_ready $SERVER_PID 10
if [ "$WAIT_RET" != "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    kill $SERVER_PID || true
    cat $SERVER_LOG
    exit 1
fi

# Ping
set +e
code=`curl -s -w %{http_code} -o ./ping.out localhost:8080/ping`
set -e
if [ "$code" != "200" ]; then
    cat ./ping.out
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

set +e
python $REQUEST_MANY_CHUNKS_PY >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Sagemaker Request Many Chunks Test Failed\n***"
    cat $SERVER_LOG
    cat $CLIENT_LOG
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVE_PID

### Restricted API regression for SageMaker endpoint ###
# Verify that --http-restricted-api applies to SageMaker model management
# endpoints (load, unload, list, get) while leaving health and inference
# unrestricted.

SERVER_LOG="./sagemaker_restricted_api_server.log"
SERVER_ARGS="--allow-sagemaker=true --allow-http=true \
  --allow-grpc=false --allow-metrics=false \
  --model-repository=`pwd`/models \
  --model-control-mode=explicit \
  --load-model=sm_model \
  --http-restricted-api=model-repository:X-SM-Auth=secret"
run_server_nowait
sagemaker_wait_for_server_ready $SERVER_PID 10
if [ "$WAIT_RET" != "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    kill $SERVER_PID || true
    cat $SERVER_LOG
    exit 1
fi

set +e

# Health should succeed without restricted header
rm -f ./curl.out
code=`curl -s -w %{http_code} -o ./curl.out localhost:8080/ping`
if [ "$code" != "200" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Failed. Expected /ping to succeed without restricted header\n***"
    RET=1
fi

# Inference should succeed without restricted header
rm -f ./curl.out
code=`curl -s -w %{http_code} -o ./curl.out -X POST localhost:8080/invocations \
    -H "Content-Type: application/json" \
    -d '{"inputs":[{"name":"INPUT0","datatype":"INT32","shape":[1,16],"data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},{"name":"INPUT1","datatype":"INT32","shape":[1,16],"data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]}]}'`
if [ "$code" != "200" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Failed. Expected /invocations inference to succeed without restricted header\n***"
    RET=1
fi

# List models without auth header should be blocked
rm -f ./curl.out
code=`curl -s -w %{http_code} -o ./curl.out localhost:8080/models`
if [ "$code" != "403" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Failed. Expected GET /models to return 403 without restricted header (got $code)\n***"
    RET=1
else
    grep "This API is restricted" ./curl.out
    if [ $? -ne 0 ]; then
        cat ./curl.out
        echo -e "\n***\n*** Failed. Expected restriction error message in response body\n***"
        RET=1
    fi
fi

# Get model without auth header should be blocked
rm -f ./curl.out
code=`curl -s -w %{http_code} -o ./curl.out localhost:8080/models/sm_model`
if [ "$code" != "403" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Failed. Expected GET /models/<name> to return 403 without restricted header (got $code)\n***"
    RET=1
fi

# Load model without auth header should be blocked
rm -f ./curl.out
code=`curl -s -w %{http_code} -o ./curl.out -X POST localhost:8080/models \
    -H "Content-Type: application/json" \
    -d '{"model_name":"test","url":"/opt/ml/models/123/model"}'`
if [ "$code" != "403" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Failed. Expected POST /models (load) to return 403 without restricted header (got $code)\n***"
    RET=1
fi

# Unload model without auth header should be blocked
rm -f ./curl.out
code=`curl -s -w %{http_code} -o ./curl.out -X DELETE localhost:8080/models/sm_model`
if [ "$code" != "403" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Failed. Expected DELETE /models (unload) to return 403 without restricted header (got $code)\n***"
    RET=1
fi

# List models WITH correct auth header should succeed
rm -f ./curl.out
code=`curl -s -w %{http_code} -o ./curl.out -H "X-SM-Auth: secret" localhost:8080/models`
if [ "$code" == "403" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Failed. Expected GET /models with auth header to pass restriction check\n***"
    RET=1
fi

# Get model WITH correct auth header should pass restriction check
rm -f ./curl.out
code=`curl -s -w %{http_code} -o ./curl.out -H "X-SM-Auth: secret" localhost:8080/models/sm_model`
if [ "$code" == "403" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Failed. Expected GET /models/<name> with auth header to pass restriction check\n***"
    RET=1
fi

# Wrong auth header value should be rejected
rm -f ./curl.out
code=`curl -s -w %{http_code} -o ./curl.out -H "X-SM-Auth: wrong" localhost:8080/models`
if [ "$code" != "403" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Failed. Expected wrong auth header value to return 403 (got $code)\n***"
    RET=1
fi

# Verify core HTTP endpoint is also restricted by the same flag
rm -f ./curl.out
code=`curl -s -w %{http_code} -o ./curl.out -X POST localhost:8000/v2/repository/index`
if [ "$code" != "403" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Failed. Expected core HTTP repository index to be restricted (got $code)\n***"
    RET=1
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

### HTTP max input size enforcement on SageMaker endpoint ###
# Verify that --http-max-input-size is enforced on the SageMaker /invocations
# path, not just the core HTTP endpoint.

rm -rf models_identity
mkdir -p models_identity/sm_identity/1 && \
    cp ../python_models/identity_fp32/model.py models_identity/sm_identity/1/ && \
    cp ../python_models/identity_fp32/config.pbtxt models_identity/sm_identity/ && \
    sed -i "s/identity_fp32/sm_identity/" models_identity/sm_identity/config.pbtxt
mkdir -p /opt/ml
ln -sf `pwd`/models_identity /opt/ml/model

export SAGEMAKER_TRITON_DEFAULT_MODEL_NAME=sm_identity
SERVER_LOG="./sagemaker_max_input_size_server.log"
SERVER_ARGS="--allow-sagemaker=true --allow-http=true \
  --allow-grpc=false --allow-metrics=false \
  --model-repository=`pwd`/models_identity \
  --http-max-input-size=128"
run_server_nowait
sagemaker_wait_for_server_ready $SERVER_PID 10
if [ "$WAIT_RET" != "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    kill $SERVER_PID || true
    cat $SERVER_LOG
    exit 1
fi

set +e

# Small payload under 128 bytes should succeed
rm -f ./curl.out
code=`curl -s -w %{http_code} -o ./curl.out -X POST localhost:8080/invocations \
    -H "Content-Type: application/json" \
    -d '{"inputs":[{"name":"INPUT0","datatype":"FP32","shape":[1,1],"data":[1.0]}],"outputs":[{"name":"OUTPUT0"}]}'`
if [ "$code" != "200" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Failed. Expected small payload to succeed on SageMaker endpoint (got $code)\n***"
    RET=1
fi

# Large payload over 128 bytes should be rejected
rm -f ./curl.out
LARGE_PAYLOAD='{"inputs":[{"name":"INPUT0","datatype":"FP32","shape":[1,16],"data":[1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,13.0,14.0,15.0,16.0]}],"outputs":[{"name":"OUTPUT0"}]}'
code=`curl -s -w %{http_code} -o ./curl.out -X POST localhost:8080/invocations \
    -H "Content-Type: application/json" \
    -d "$LARGE_PAYLOAD"`
if [ "$code" == "200" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Failed. Expected oversized payload to be rejected on SageMaker endpoint\n***"
    RET=1
fi

# Same limit should apply to core HTTP endpoint
rm -f ./curl.out
code=`curl -s -w %{http_code} -o ./curl.out -X POST localhost:8000/v2/models/sm_identity/infer \
    -H "Content-Type: application/json" \
    -d "$LARGE_PAYLOAD"`
if [ "$code" == "200" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Failed. Expected oversized payload to be rejected on core HTTP endpoint\n***"
    RET=1
fi

set -e

unset SAGEMAKER_TRITON_DEFAULT_MODEL_NAME

kill $SERVER_PID
wait $SERVER_PID

unlink /opt/ml/model
rm -rf /opt/ml/model
rm -rf models_identity

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_scalar_io/scalar_test.py
================================================
#!/usr/bin/env python3

# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import os
import unittest

import numpy as np
import test_util as tu
import tritonclient.grpc as grpcclient
from tritonclient.utils import np_to_triton_dtype


class ScalarIOTest(tu.TestResultCollector):
    def setUp(self):
        self._client = grpcclient.InferenceServerClient(url="localhost:8001")
        self._backends = os.environ.get("BACKENDS", "onnx").split(",")

    def _send_request_and_verify_result(self, input, model_name):
        inputs = []
        inputs.append(
            grpcclient.InferInput("INPUT", input.shape, np_to_triton_dtype(input.dtype))
        )
        inputs[-1].set_data_from_numpy(input)
        result = self._client.infer(inputs=inputs, model_name=model_name)
        output = result.as_numpy("OUTPUT")
        np.testing.assert_allclose(input, output)

    def test_scalar_io(self):
        for backend in self._backends:
            model_name = f"{backend}_scalar_1dim"
            self._send_request_and_verify_result(
                np.asarray([1], dtype=np.float32), model_name
            )

            model_name = f"{backend}_scalar_2dim"
            self._send_request_and_verify_result(
                np.asarray([[1]], dtype=np.float32), model_name
            )


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_scalar_io/test.sh
================================================
#!/bin/bash
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

RET=0
TEST_RESULT_FILE='test_results.txt'
BACKENDS="onnx"
export CUDA_VISIBLE_DEVICES=0
DATADIR=/data/inferenceserver/${REPO_VERSION}

rm -rf models
mkdir models
cp -r $DATADIR/qa_scalar_models/* models/

CLIENT_LOG="./client.log"
SCALAR_TEST=scalar_test.py
source ../common/util.sh

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=`pwd`/models"
SERVER_LOG="./inference_server.log"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

python3 $SCALAR_TEST >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** scalar_test.py FAILED. \n***"
    cat $CLIENT_LOG
    cat $SERVER_LOG
    RET=1
fi

kill $SERVER_PID
wait $SERVER_PID

# Make sure the server fails loading the model if it has a dimension higher than
# 1
sed -i "s/dims.*/dims:\[2\]/g" models/onnx_scalar_1dim/config.pbtxt
run_server
if [ "$SERVER_PID" != "0" ]; then
    echo -e "\n***\n*** Expected the server to fail loading \n***"
    cat $SERVER_LOG
    exit 1
fi

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_sdk/grpc_test.cc
================================================
// Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include <iostream>

#include "grpc_client.h"

namespace tc = triton::client;

int
main(int argc, char* argv[])
{
  std::unique_ptr<tc::InferenceServerGrpcClient> client;
  // Add a symbol from protobufs to verify correct linking
  inference::ModelConfigResponse model_config;
  tc::Error err =
      tc::InferenceServerGrpcClient::Create(&client, "localhost:8001");
  if (!err.IsOk()) {
    std::cerr << "InferenceServerGrpcClient::Create failed: " << err.Message()
              << std::endl;
    return 1;
  }

  // No server is running so expect liveness call to fail
  bool live;
  err = client->IsServerLive(&live);
  if (!err.IsOk()) {
    std::cerr << "InferenceServerGrpcClient::IsServerLive expected fail: "
              << err.Message() << std::endl;
    return 0;
  }

  return 1;
}


================================================
FILE: qa/L0_sdk/http_test.cc
================================================
// Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include <iostream>

#include "http_client.h"

namespace tc = triton::client;

int
main(int argc, char* argv[])
{
  std::unique_ptr<tc::InferenceServerHttpClient> client;
  tc::Error err =
      tc::InferenceServerHttpClient::Create(&client, "localhost:8000");
  if (!err.IsOk()) {
    std::cerr << "InferenceServerHttpClient::Create failed: " << err.Message()
              << std::endl;
    return 1;
  }

  // No server is running so expect liveness call to fail
  bool live;
  err = client->IsServerLive(&live);
  if (!err.IsOk()) {
    std::cerr << "InferenceServerHttpClient::IsServerLive expected fail: "
              << err.Message() << std::endl;
    return 0;
  }

  return 1;
}


================================================
FILE: qa/L0_sdk/test.sh
================================================
#!/bin/bash
# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# Install the tar file
rm -fr triton_client
mkdir triton_client
(cd triton_client && tar xzvf /workspace/*.tar.gz)

set +e

RET=0

# Check image_client and perf_analyzer
if [[ ! -x "triton_client/bin/image_client" ]]; then
    echo -e "*** image_client executable not present\n"
    RET=1
fi
if ! command -v perf_analyzer >/dev/null 2>&1; then
    echo -e "*** perf_analyzer is not installed\n"
    RET=1
fi

# Check static libraries
for l in libgrpcclient.so libgrpcclient_static.a libhttpclient.so libhttpclient_static.a; do
    if [[ ! -f "triton_client/lib/$l" ]]; then
        echo -e "*** library $l not present\n"
        RET=1
    fi
done

client_lib=$(pwd)/triton_client/lib
client_inc=$(pwd)/triton_client/include

# Test linking against the shared library
g++ grpc_test.cc -o grpc_test -I$client_inc -L$client_lib -lgrpcclient

if [ $? -eq 0 ]; then
    if [[ ! -x "./grpc_test" ]]; then
        echo -e "*** grpc_test executable not present\n"
        RET=1
    else
        ./grpc_test
        if [ $? -eq 0 ]; then
            echo -e "\n***\n*** grpc_test exited with 0 PASSED\n***"
        else
            echo -e "\n***\n*** grpc_test exited with non-zero FAILED\n***"
            RET=1
        fi
    fi
else
    echo -e "\n***\n*** Client headers build FAILED\n***"
    RET=1
fi

#
# Test linking against static library
#

grpc_static_libs="-Wl,--start-group $client_lib/*.a -Wl,--end-group"

g++ grpc_test.cc $grpc_static_libs -o grpc_test_static -I$client_inc -lz -lssl -lcrypto -lpthread

if [ $? -eq 0 ]; then
    if [[ ! -x "./grpc_test_static" ]]; then
        echo -e "*** grpc_test_static executable not present\n"
        RET=1
    else
        ./grpc_test_static
        if [ $? -eq 0 ]; then
            echo -e "\n***\n*** grpc_test_static exited with 0 PASSED\n***"
        else
            echo -e "\n***\n*** grpc_test_static exited with non-zero FAILED\n***"
            RET=1
        fi
    fi
else
    echo -e "\n***\n*** Client headers build FAILED\n***"
    RET=1
fi

#
# Test a simple app using Triton HTTP API
#

# Test linking against the shared library
g++ http_test.cc -o http_test -I$client_inc -L$client_lib -lhttpclient

if [ $? -eq 0 ]; then
    if [[ ! -x "./http_test" ]]; then
        echo -e "*** http_test executable not present\n"
        RET=1
    else
        ./http_test
        if [ $? -eq 0 ]; then
            echo -e "\n***\n*** http_test exited with 0 PASSED\n***"
        else
            echo -e "\n***\n*** http_test exited with non-zero FAILED\n***"
            RET=1
        fi
    fi
else
    echo -e "\n***\n*** Client headers build FAILED\n***"
    RET=1
fi

g++ http_test.cc $client_lib/libhttpclient_static.a $client_lib/libcurl.a -o http_test_static \
  -I$client_inc -lz -lssl -lcrypto -lpthread

if [ $? -eq 0 ]; then
    if [[ ! -x "./http_test_static" ]]; then
        echo -e "*** http_test_static executable not present\n"
        RET=1
    else
        ./http_test_static
        if [ $? -eq 0 ]; then
            echo -e "\n***\n*** http_test_static exited with 0 PASSED\n***"
        else
            echo -e "\n***\n*** http_test_static exited with non-zero FAILED\n***"
            RET=1
        fi
    fi
else
    echo -e "\n***\n*** Client headers build FAILED\n***"
    RET=1
fi

# Check wheels, note that even TRITON_VERSION is passed as version field for
# wheel generation. The version number will be normalized by setuptools, so
# we need to replace the text here as well to match the normalized version.
WHLVERSION=`cat /workspace/TRITON_VERSION | sed 's/dev/\.dev0/'`
if [[ "aarch64" != $(uname -m) ]] ; then
    WHLS="tritonclient-${WHLVERSION}-py3-none-any.whl \
          tritonclient-${WHLVERSION}-py3-none-manylinux1_x86_64.whl"
else
    WHLS="tritonclient-${WHLVERSION}-py3-none-any.whl \
          tritonclient-${WHLVERSION}-py3-none-manylinux2014_aarch64.whl"
fi
for l in $WHLS; do
    if [[ ! -f "triton_client/python/$l" ]]; then
        echo -e "*** wheel $l not present\n"
        echo -e "*** available wheels in triton_client/python\n"
        ls -ltr triton_client/python
        RET=1
    fi
done

# Check wheel installation
python -c """import tritonclient; import tritonclient.grpc; import tritonclient.http; \
          import tritonclient.utils; import tritonclient.grpc.model_config_pb2; \
          import tritonclient.grpc.service_pb2; import tritonclient.grpc.service_pb2_grpc; \
          import tritonclient.utils.cuda_shared_memory; import tritonclient.utils.shared_memory"""
RET=$(($RET+$?))

EXECUTABLES="perf_analyzer"
for l in $EXECUTABLES; do
  if [ $(which -a $l | grep "/usr/local/bin/$l" | wc -l) -ne 1 ]; then
    which -a $l
    echo -e "*** $l executable not installed by tritonclient wheel\n"
    RET=1
  fi
done

# Check java client
if [[ ! -e "triton_client/java/java-api-0.0.1.jar" ]]; then
    echo -e "*** java-api-0.0.1.jar not present\n"
    RET=1
fi
if [[ ! -e "triton_client/java/examples/MemoryGrowthTest.jar" ]]; then
    echo -e "*** MemoryGrowthTest.jar not present\n"
    RET=1
fi
if [[ ! -e "triton_client/java/examples/SimpleInferClient.jar" ]]; then
    echo -e "*** SimpleInferClient.jar not present\n"
    RET=1
fi
if [[ ! -e "triton_client/java/examples/SimpleInferPerf.jar" ]]; then
    echo -e "*** SimpleInferPerf.jar not present\n"
    RET=1
fi

set -e

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
else
  echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_secure_grpc/test.sh
================================================
#!/bin/bash
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

RET=0

TEST_CLIENT_AIO_PY=../clients/simple_grpc_aio_infer_client.py
TEST_CLIENT_PY=../clients/simple_grpc_infer_client.py
TEST_CLIENT=../clients/simple_grpc_infer_client

CLIENT_LOG=`pwd`/client.log
DATADIR=`pwd`/models
SERVER=/opt/tritonserver/bin/tritonserver
SERVER_BASE_ARGS="--model-repository=$DATADIR --grpc-use-ssl=1 --grpc-server-cert server.crt --grpc-server-key server.key --grpc-root-cert ca.crt"
source ../common/util.sh

rm -fr *.log *.log.*

# Generate valid CA
openssl genrsa -passout pass:1234 -des3 -out ca.key 4096
openssl req -passin pass:1234 -new -x509 -days 365 -key ca.key -out ca.crt -subj  "/C=SP/ST=Spain/L=Valdepenias/O=Test/OU=Test/CN=Root CA"

# Generate valid Server Key/Cert
openssl genrsa -passout pass:1234 -des3 -out server.key 4096
openssl req -passin pass:1234 -new -key server.key -out server.csr -subj  "/C=SP/ST=Spain/L=Valdepenias/O=Test/OU=Server/CN=localhost"
openssl x509 -req -passin pass:1234 -days 365 -in server.csr -CA ca.crt -CAkey ca.key -set_serial 01 -out server.crt

# Remove passphrase from the Server Key
openssl rsa -passin pass:1234 -in server.key -out server.key

# Generate valid Client Key/Cert
openssl genrsa -passout pass:1234 -des3 -out client.key 4096
openssl req -passin pass:1234 -new -key client.key -out client.csr -subj  "/C=SP/ST=Spain/L=Valdepenias/O=Test/OU=Client/CN=localhost"
openssl x509 -passin pass:1234 -req -days 365 -in client.csr -CA ca.crt -CAkey ca.key -set_serial 01 -out client.crt

# Remove passphrase from Client Key
openssl rsa -passin pass:1234 -in client.key -out client.key

# Create mutated client key (Make first char of each like capital)
cp client.key client2.key && sed -i "s/\b\(.\)/\u\1/g" client2.key
cp client.crt client2.crt && sed -i "s/\b\(.\)/\u\1/g" client2.crt

# Test all 3 SSL/TLS cases, server authentication, mutual authentication and when both flags are specified
for CASE in server mutual both; do
    if [ "$CASE" == "server" ]; then
        SERVER_ARGS="$SERVER_BASE_ARGS --grpc-use-ssl=1"
    elif [ "$CASE" == "mutual" ]; then
        SERVER_ARGS="$SERVER_BASE_ARGS --grpc-use-ssl-mutual=1"
    else
        SERVER_ARGS="$SERVER_BASE_ARGS --grpc-use-ssl=1 --grpc-use-ssl-mutual=1"
    fi

    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    set +e

    # Test basic inference using grpc secure channel
    $TEST_CLIENT_PY -v --ssl --root-certificates ca.crt --private-key client.key --certificate-chain client.crt >> ${CLIENT_LOG}.${CASE}.ssl_infer 2>&1
    if [ $? -ne 0 ]; then
        cat ${CLIENT_LOG}.${CASE}.ssl_infer
        RET=1
    fi
    $TEST_CLIENT_AIO_PY -v --ssl --root-certificates ca.crt --private-key client.key --certificate-chain client.crt >> ${CLIENT_LOG}.${CASE}.ssl_infer.aio 2>&1
    if [ $? -ne 0 ]; then
        cat ${CLIENT_LOG}.${CASE}.ssl_infer.aio
        RET=1
    fi

    $TEST_CLIENT -v --ssl --root-certificates ca.crt --private-key client.key --certificate-chain client.crt >> ${CLIENT_LOG}.${CASE}.c++.ssl_infer 2>&1
    if [ $? -ne 0 ]; then
        cat ${CLIENT_LOG}.${CASE}.c++.ssl_infer
        RET=1
    fi

    set -e

    kill $SERVER_PID
    wait $SERVER_PID
done

# Test failure cases for SSL
for CASE in server mutual; do
    if [ "$CASE" == "server" ]; then
        SERVER_ARGS="$SERVER_BASE_ARGS --grpc-use-ssl=1"
    elif [ "$CASE" == "mutual" ]; then
        SERVER_ARGS="$SERVER_BASE_ARGS --grpc-use-ssl-mutual=1"
    fi

    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    set +e

    # Test inference client using grpc secure channel without ssl
    $TEST_CLIENT_PY -v >> ${CLIENT_LOG}.${CASE}.no_ssl_fail_infer 2>&1
    if [ $? -ne 0 ]; then
        cat ${CLIENT_LOG}.${CASE}.no_ssl_fail_infer
        echo -e "\n***\n*** Expected test failure\n***"
    else
        RET=1
    fi
    $TEST_CLIENT_AIO_PY -v >> ${CLIENT_LOG}.${CASE}.no_ssl_fail_infer.aio 2>&1
    if [ $? -ne 0 ]; then
        cat ${CLIENT_LOG}.${CASE}.no_ssl_fail_infer.aio
        echo -e "\n***\n*** Expected test failure\n***"
    else
        RET=1
    fi

    $TEST_CLIENT -v >> ${CLIENT_LOG}.${CASE}.c++.no_ssl_fail_infer 2>&1
    if [ $? -ne 0 ]; then
        cat ${CLIENT_LOG}.${CASE}.c++.no_ssl_fail_infer
        echo -e "\n***\n*** Expected test failure\n***"
    else
        RET=1
    fi

    # Test inference client using grpc secure channel with incorrect ssl creds
    $TEST_CLIENT_PY -v --ssl --root-certificates ca.crt --private-key client2.key --certificate-chain client2.crt >> ${CLIENT_LOG}.${CASE}.wrong_ssl_fail_infer 2>&1
    if [ $? -ne 0 ]; then
        cat ${CLIENT_LOG}.${CASE}.wrong_ssl_fail_infer
        echo -e "\n***\n*** Expected test failure\n***"
    else
        RET=1
    fi
    $TEST_CLIENT_AIO_PY -v --ssl --root-certificates ca.crt --private-key client2.key --certificate-chain client2.crt >> ${CLIENT_LOG}.${CASE}.wrong_ssl_fail_infer.aio 2>&1
    if [ $? -ne 0 ]; then
        cat ${CLIENT_LOG}.${CASE}.wrong_ssl_fail_infer.aio
        echo -e "\n***\n*** Expected test failure\n***"
    else
        RET=1
    fi

    $TEST_CLIENT -v --ssl --root-certificates ca.crt --private-key client2.key --certificate-chain client2.crt >> ${CLIENT_LOG}.${CASE}.c++.wrong_ssl_fail_infer 2>&1
    if [ $? -ne 0 ]; then
        cat ${CLIENT_LOG}.${CASE}.c++.wrong_ssl_fail_infer
        echo -e "\n***\n*** Expected test failure\n***"
    else
        RET=1
    fi

    set -e

    kill $SERVER_PID
    wait $SERVER_PID
done

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_sequence_batcher/request_timeout_models/custom_sequence_int32_timeout/config.pbtxt
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

backend: "identity"
max_batch_size: 1

input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]

sequence_batching {
  max_sequence_idle_microseconds: 50000000
}

parameters [
  {
    key: "execute_delay_ms"
    value: { string_value: "5000" }
  }
]


================================================
FILE: qa/L0_sequence_batcher/sequence_batcher_test.py
================================================
#!/usr/bin/env python

# Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import os
import random
import threading
import time
import unittest
from builtins import str
from functools import partial

import numpy as np
import sequence_util as su
import test_util as tu
import tritonclient.grpc as grpcclient
from tritonclient.utils import InferenceServerException

TEST_SYSTEM_SHARED_MEMORY = bool(int(os.environ.get("TEST_SYSTEM_SHARED_MEMORY", 0)))
TEST_CUDA_SHARED_MEMORY = bool(int(os.environ.get("TEST_CUDA_SHARED_MEMORY", 0)))

USE_GRPC = os.environ.get("USE_GRPC", 1) != "0"
USE_HTTP = os.environ.get("USE_HTTP", 1) != "0"
assert USE_GRPC or USE_HTTP, "USE_GRPC or USE_HTTP must be non-zero"
if USE_GRPC and USE_HTTP:
    _protocols = ("http", "grpc")
elif USE_GRPC:
    _protocols = ("grpc",)
else:
    _protocols = ("http",)

BACKENDS = os.environ.get("BACKENDS", "onnx plan custom python")
ENSEMBLES = bool(int(os.environ.get("ENSEMBLES", 1)))

NO_BATCHING = int(os.environ["NO_BATCHING"]) == 1
MODEL_INSTANCES = int(os.environ["MODEL_INSTANCES"])
IMPLICIT_STATE = int(os.environ["IMPLICIT_STATE"]) == 1

# Use initial state for implicit state
INITIAL_STATE_FILE = int(os.environ["INITIAL_STATE_FILE"]) == 1

_trials = ()
if NO_BATCHING:
    for backend in BACKENDS.split(" "):
        if backend != "custom":
            _trials += (backend + "_nobatch",)
elif os.environ["BATCHER_TYPE"] == "VARIABLE":
    for backend in BACKENDS.split(" "):
        if (backend != "libtorch") and (backend != "custom"):
            _trials += (backend,)
else:
    _trials = BACKENDS.split(" ")

# Add ensemble to the _trials
ENSEMBLE_PREFIXES = ["simple_", "sequence_", "fan_"]

if ENSEMBLES:
    res = []
    for trial in _trials:
        res.append(trial)
        if "custom" in trial:
            continue
        for ensemble_prefix in ENSEMBLE_PREFIXES:
            res.append(ensemble_prefix + trial)
    _trials = tuple(res)

_ragged_batch_supported_trials = list()
if "custom" in _trials:
    _ragged_batch_supported_trials = ("custom",)

# Not all models can be tested for ragged handling because the models
# don't deal well with non-size-1 shapes
_ragged_batch_not_supported_trials = list()
if os.environ["BATCHER_TYPE"] == "VARIABLE":
    if "custom" in _trials:
        _ragged_batch_not_supported_trials.append("custom")
    if "plan" in _trials:
        _ragged_batch_not_supported_trials.append("plan")
    if "onnx" in _trials:
        _ragged_batch_not_supported_trials.append("onnx")

_max_sequence_idle_ms = 5000


# Checks whether the provided model name belongs to an ensemble
# model.
def is_ensemble(model_name):
    for prefix in ENSEMBLE_PREFIXES:
        if model_name.startswith(prefix):
            return True
    return False


class SequenceBatcherTest(su.SequenceBatcherTestUtil):
    def get_datatype(self, trial):
        # Get the datatype to use based on what models are available (see test.sh)
        if "plan" in trial:
            return (np.float32,)
        if "custom" in trial:
            return (np.int32,)

        # Only test the string data type for ONNX and libtorch models in implicit state
        if IMPLICIT_STATE:
            if "onnx" in trial:
                return (np.dtype(object), np.int32, np.bool_)
            if NO_BATCHING:
                if "libtorch" in trial:
                    return (np.dtype(object), np.int32, np.bool_)

        return (np.int32, np.bool_)

    def get_expected_result(self, expected_result, value, trial, flag_str=None):
        # Adjust the expected_result for models that
        # could not implement the full accumulator. See
        # qa/common/gen_qa_sequence_models.py for more
        # information.
        if (
            (not NO_BATCHING and ("custom" not in trial))
            or ("plan" in trial)
            or ("onnx" in trial)
        ) or ("libtorch" in trial):
            expected_result = value
            if (flag_str is not None) and ("start" in flag_str):
                expected_result += 1
        return expected_result

    def get_expected_result_implicit(
        self, expected_result, value, trial, flag_str=None, dtype=None
    ):
        if dtype == np.dtype(object) and trial.startswith("onnx"):
            return value

        if INITIAL_STATE_FILE:
            # When the INITIAL_STATE_FILE is set the initial value
            # used for sequence will be 100 instead of zero and the
            # results will be offset by the same amount.
            return expected_result + 100
        else:
            return expected_result

    def test_simple_sequence(self):
        # Send one sequence and check for correct accumulator
        # result. The result should be returned immediately.
        for trial in _trials:
            # Run on different protocols.
            for idx, protocol in enumerate(_protocols):
                dtypes = self.get_datatype(trial)

                for dtype in dtypes:
                    model_name = tu.get_sequence_model_name(trial, dtype)
                    # Skip bool type ensemble models
                    if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
                        dtype == np.bool_
                    ):
                        continue
                    # For bool type control models, use int32 as I/O types
                    if dtype == np.bool_:
                        dtype = np.int32

                    self.clear_deferred_exceptions()
                    try:
                        self.check_setup(model_name)
                        self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                        self.assertNotIn(
                            "TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ
                        )
                        expected_result = (
                            self.get_expected_result(45, 9, trial, "end")
                            if not IMPLICIT_STATE
                            else self.get_expected_result_implicit(
                                45, 9, trial, "end", dtype
                            )
                        )

                        self.check_sequence(
                            trial,
                            model_name,
                            dtype,
                            5,
                            (4000, None),
                            # (flag_str, value, (ls_ms, gt_ms), (pre_delay, post_delay))
                            (
                                ("start", 1, None, None),
                                (None, 2, None, None),
                                (None, 3, None, None),
                                (None, 4, None, None),
                                (None, 5, None, None),
                                (None, 6, None, None),
                                (None, 7, None, None),
                                (None, 8, None, None),
                                ("end", 9, None, None),
                            ),
                            expected_result,
                            protocol,
                            sequence_name="{}_{}".format(
                                self._testMethodName, protocol
                            ),
                        )

                        self.check_deferred_exception()
                        self.check_status(
                            model_name, {1: 9 * (idx + 1)}, 9 * (idx + 1), 9 * (idx + 1)
                        )
                    except Exception as ex:
                        self.assertTrue(False, "unexpected error {}".format(ex))

    def test_length1_sequence(self):
        # Send a length-1 sequence and check for correct accumulator
        # result. The result should be returned immediately.
        for trial in _trials:
            # Run on different protocols.
            for idx, protocol in enumerate(_protocols):
                dtypes = self.get_datatype(trial)

                for dtype in dtypes:
                    model_name = tu.get_sequence_model_name(trial, dtype)
                    # Skip bool type ensemble models
                    if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
                        dtype == np.bool_
                    ):
                        continue
                    # For bool type control models, use int32 as I/O types
                    if dtype == np.bool_:
                        dtype = np.int32

                    self.clear_deferred_exceptions()
                    try:
                        self.check_setup(model_name)
                        self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                        self.assertNotIn(
                            "TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ
                        )
                        expected_result = (
                            self.get_expected_result(42, 42, trial, "start,end")
                            if not IMPLICIT_STATE
                            else self.get_expected_result_implicit(
                                42, 42, trial, "start,end", dtype
                            )
                        )

                        self.check_sequence(
                            trial,
                            model_name,
                            dtype,
                            99,
                            (4000, None),
                            # (flag_str, value, (ls_ms, gt_ms), (pre_delay, post_delay))
                            (("start,end", 42, None, None),),
                            expected_result,
                            protocol,
                            sequence_name="{}_{}".format(
                                self._testMethodName, protocol
                            ),
                        )

                        self.check_deferred_exception()
                        self.check_status(
                            model_name, {1: idx + 1}, (idx + 1), (idx + 1)
                        )
                    except Exception as ex:
                        self.assertTrue(False, "unexpected error {}".format(ex))

    def test_batch_size(self):
        # Send sequence with a batch-size > 1 and check for error.

        # When 4 model instances the max-batch-size is 1 so can't test
        # since that gives a different error: "batch-size 2 exceeds
        # maximum batch size"
        if (MODEL_INSTANCES == 4) or NO_BATCHING:
            return

        for trial in _trials:
            # Run on different protocols.
            for idx, protocol in enumerate(_protocols):
                dtypes = self.get_datatype(trial)

                for dtype in dtypes:
                    model_name = tu.get_sequence_model_name(trial, dtype)
                    # Skip bool type ensemble models
                    if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
                        dtype == np.bool_
                    ):
                        continue
                    # For bool type control models, use int32 as I/O types
                    if dtype == np.bool_:
                        dtype = np.int32

                    self.clear_deferred_exceptions()
                    try:
                        self.check_setup(model_name)
                        self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                        self.assertNotIn(
                            "TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ
                        )
                        expected_result = (
                            self.get_expected_result(10, 9, trial, "end")
                            if not IMPLICIT_STATE
                            else self.get_expected_result_implicit(
                                10, 9, trial, "end", dtype
                            )
                        )

                        self.check_sequence(
                            trial,
                            model_name,
                            dtype,
                            27,
                            (4000, None),
                            # (flag_str, value, (ls_ms, gt_ms), (pre_delay, post_delay))
                            (("start", 1, None, None), ("end", 9, None, None)),
                            expected_result,
                            protocol,
                            batch_size=2,
                            sequence_name="{}_{}".format(
                                self._testMethodName, protocol
                            ),
                        )

                        self.check_deferred_exception()
                        self.assertTrue(False, "expected error")
                    except Exception as ex:
                        for prefix in ENSEMBLE_PREFIXES:
                            if model_name.startswith(prefix):
                                base_model_name = model_name[(len(prefix)) :]
                                self.assertTrue(
                                    ex.message().startswith(
                                        str(
                                            "in ensemble '{}', "
                                            + "inference request to model '{}' must specify "
                                            + "batch-size 1 due to requirements of sequence "
                                            + "batcher"
                                        ).format(model_name, base_model_name)
                                    )
                                )
                                return
                        self.assertTrue(
                            ex.message().startswith(
                                str(
                                    "inference request to model '{}' must specify "
                                    + "batch-size 1 due to requirements of sequence "
                                    + "batcher"
                                ).format(model_name)
                            )
                        )

    def test_no_correlation_id(self):
        # Send sequence without correlation ID and check for error.
        for trial in _trials:
            # Run on different protocols.
            for idx, protocol in enumerate(_protocols):
                dtypes = self.get_datatype(trial)
                for dtype in dtypes:
                    model_name = tu.get_sequence_model_name(trial, dtype)
                    # Skip bool type ensemble models
                    if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
                        dtype == np.bool_
                    ):
                        continue
                    # For bool type control models, use int32 as I/O types
                    if dtype == np.bool_:
                        dtype = np.int32

                    self.clear_deferred_exceptions()
                    try:
                        self.check_setup(model_name)
                        self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                        self.assertNotIn(
                            "TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ
                        )
                        expected_result = (
                            self.get_expected_result(10, 9, trial, "end")
                            if not IMPLICIT_STATE
                            else self.get_expected_result_implicit(
                                10, 9, trial, "end", dtype
                            )
                        )

                        self.check_sequence(
                            trial,
                            model_name,
                            dtype,
                            0,  # correlation_id = 0
                            (4000, None),
                            # (flag_str, value, (ls_ms, gt_ms), (pre_delay, post_delay))
                            (("start", 1, None, None), ("end", 9, None, None)),
                            expected_result,
                            protocol,
                            sequence_name="{}_{}".format(
                                self._testMethodName, protocol
                            ),
                        )

                        self.check_deferred_exception()
                        self.assertTrue(False, "expected error")
                    except Exception as ex:
                        for prefix in ENSEMBLE_PREFIXES:
                            if model_name.startswith(prefix):
                                base_model_name = model_name[(len(prefix)) :]
                                self.assertTrue(
                                    ex.message().startswith(
                                        str(
                                            "in ensemble '{}', "
                                            + "inference request to model '{}' must specify a "
                                            + "non-zero or non-empty correlation ID"
                                        ).format(model_name, base_model_name)
                                    )
                                )
                                return
                        self.assertTrue(
                            ex.message().startswith(
                                str(
                                    "inference request to model '{}' must specify a "
                                    + "non-zero or non-empty correlation ID"
                                ).format(model_name)
                            )
                        )

    def test_no_sequence_start(self):
        # Send sequence without start flag for never before seen
        # correlation ID. Expect failure.
        for trial in _trials:
            # Run on different protocols.
            for idx, protocol in enumerate(_protocols):
                dtypes = self.get_datatype(trial)
                for dtype in dtypes:
                    model_name = tu.get_sequence_model_name(trial, dtype)
                    # Skip bool type ensemble models
                    if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
                        dtype == np.bool_
                    ):
                        continue
                    # For bool type control models, use int32 as I/O types
                    if dtype == np.bool_:
                        dtype = np.int32

                    self.clear_deferred_exceptions()
                    try:
                        self.check_setup(model_name)
                        self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                        self.assertNotIn(
                            "TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ
                        )

                        expected_result = (
                            self.get_expected_result(6, 3, trial, "end")
                            if not IMPLICIT_STATE
                            else self.get_expected_result_implicit(
                                6, 3, trial, "end", dtype
                            )
                        )
                        self.check_sequence(
                            trial,
                            model_name,
                            dtype,
                            37469245,
                            (4000, None),
                            # (flag_str, value, (ls_ms, gt_ms), (pre_delay, post_delay))
                            (
                                (None, 1, None, None),
                                (None, 2, None, None),
                                ("end", 3, None, None),
                            ),
                            expected_result,
                            protocol,
                            sequence_name="{}_{}".format(
                                self._testMethodName, protocol
                            ),
                        )

                        self.check_deferred_exception()
                        self.assertTrue(False, "expected error")
                    except Exception as ex:
                        print(model_name + "-> " + ex.message())
                        for prefix in ENSEMBLE_PREFIXES:
                            if model_name.startswith(prefix):
                                base_model_name = model_name[(len(prefix)) :]
                                self.assertTrue(
                                    ex.message().startswith(
                                        str(
                                            "in ensemble '{}', "
                                            + "inference request for sequence 37469245 to "
                                            + "model '{}' must specify the START flag on the first "
                                            + "request of the sequence"
                                        ).format(model_name, base_model_name)
                                    )
                                )
                                return
                        self.assertTrue(
                            ex.message().startswith(
                                str(
                                    "inference request for sequence 37469245 to "
                                    + "model '{}' must specify the START flag on the first "
                                    + "request of the sequence"
                                ).format(model_name)
                            )
                        )

    def test_no_sequence_start2(self):
        # Send sequence without start flag after sending a valid
        # sequence with the same correlation ID. Expect failure for
        # the second sequence.
        for trial in _trials:
            # Run on different protocols.
            for idx, protocol in enumerate(_protocols):
                dtypes = self.get_datatype(trial)
                for dtype in dtypes:
                    model_name = tu.get_sequence_model_name(trial, dtype)
                    # Skip bool type ensemble models
                    if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
                        dtype == np.bool_
                    ):
                        continue
                    # For bool type control models, use int32 as I/O types
                    if dtype == np.bool_:
                        dtype = np.int32

                    self.clear_deferred_exceptions()
                    try:
                        self.check_setup(model_name)
                        self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                        self.assertNotIn(
                            "TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ
                        )
                        expected_result = (
                            self.get_expected_result(6, 3, trial, None)
                            if not IMPLICIT_STATE
                            else self.get_expected_result_implicit(
                                6, 3, trial, None, dtype
                            )
                        )

                        self.check_sequence(
                            trial,
                            model_name,
                            dtype,
                            3,
                            (4000, None),
                            # (flag_str, value, (ls_ms, gt_ms), (pre_delay, post_delay))
                            (
                                ("start", 1, None, None),
                                (None, 2, None, None),
                                ("end", 3, None, None),
                                (None, 55, None, None),
                            ),
                            expected_result,
                            protocol,
                            sequence_name="{}_{}".format(
                                self._testMethodName, protocol
                            ),
                        )

                        self.check_status(
                            model_name, {1: 3 * (idx + 1)}, 3 * (idx + 1), 3 * (idx + 1)
                        )
                        self.check_deferred_exception()
                        self.assertTrue(False, "expected error")
                    except Exception as ex:
                        for prefix in ENSEMBLE_PREFIXES:
                            if model_name.startswith(prefix):
                                base_model_name = model_name[(len(prefix)) :]
                                self.assertTrue(
                                    ex.message().startswith(
                                        str(
                                            "in ensemble '{}', "
                                            + "inference request for sequence 3 to model '{}' must "
                                            + "specify the START flag on the first request of "
                                            + "the sequence"
                                        ).format(model_name, base_model_name)
                                    )
                                )
                                return
                        self.assertTrue(
                            ex.message().startswith(
                                str(
                                    "inference request for sequence 3 to model '{}' must "
                                    + "specify the START flag on the first request of "
                                    + "the sequence"
                                ).format(model_name)
                            )
                        )

    def test_no_sequence_end(self):
        # Send sequence without end flag. Use same correlation ID to
        # send another sequence. The first sequence will be ended
        # automatically but the second should complete successfully.
        for trial in _trials:
            # Run on different protocols.
            for idx, protocol in enumerate(_protocols):
                dtypes = self.get_datatype(trial)
                for dtype in dtypes:
                    model_name = tu.get_sequence_model_name(trial, dtype)
                    # Skip bool type ensemble models
                    if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
                        dtype == np.bool_
                    ):
                        continue
                    # For bool type control models, use int32 as I/O types
                    if dtype == np.bool_:
                        dtype = np.int32

                    self.clear_deferred_exceptions()
                    try:
                        self.check_setup(model_name)
                        self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                        self.assertNotIn(
                            "TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ
                        )
                        expected_result = (
                            self.get_expected_result(51, 9, trial, "end")
                            if not IMPLICIT_STATE
                            else self.get_expected_result_implicit(
                                51, 9, trial, "end", dtype
                            )
                        )

                        self.check_sequence(
                            trial,
                            model_name,
                            dtype,
                            4566,
                            (4000, None),
                            # (flag_str, value, (ls_ms, gt_ms), (pre_delay, post_delay))
                            (
                                ("start", 1, None, None),
                                (None, 2, None, None),
                                ("start", 42, None, None),
                                ("end", 9, None, None),
                            ),
                            expected_result,
                            protocol,
                            sequence_name="{}_{}".format(
                                self._testMethodName, protocol
                            ),
                        )

                        self.check_deferred_exception()
                        self.check_status(
                            model_name, {1: 4 * (idx + 1)}, 4 * (idx + 1), 4 * (idx + 1)
                        )
                    except Exception as ex:
                        self.assertTrue(False, "unexpected error {}".format(ex))

    def test_half_batch(self):
        # Test model instances that together are configured with
        # total-batch-size 4. Send two equal-length sequences in
        # parallel and make sure they get completely batched into
        # batch-size 2 inferences.
        for trial in _trials:
            dtypes = self.get_datatype(trial)
            for dtype in dtypes:
                model_name = tu.get_sequence_model_name(trial, dtype)
                # Skip bool type ensemble models
                if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
                    dtype == np.bool_
                ):
                    continue
                # For bool type control models, use int32 as I/O types
                if dtype == np.bool_:
                    dtype = np.int32

                self.clear_deferred_exceptions()

                precreated_shm0_handles = self.precreate_register_regions(
                    (1, 2, 3, 4), dtype, 0
                )
                precreated_shm1_handles = self.precreate_register_regions(
                    (0, 9, 5, 13), dtype, 1
                )

                try:
                    self.check_setup(model_name)

                    # Need scheduler to wait for queue to contain all
                    # inferences for both sequences.
                    self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                    self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 8)
                    self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
                    self.assertEqual(
                        int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0
                    )

                    expected_result = (
                        self.get_expected_result(10, 4, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            10, 4, trial, "end", dtype
                        )
                    )

                    threads = []
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                987,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (
                                    ("start", 1, None),
                                    (None, 2, None),
                                    (None, 3, None),
                                    ("end", 4, None),
                                ),
                                expected_result,
                                precreated_shm0_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )
                    expected_result = (
                        self.get_expected_result(27, 13, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            27, 13, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                988,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (
                                    ("start", 0, None),
                                    (None, 9, None),
                                    (None, 5, None),
                                    ("end", 13, None),
                                ),
                                expected_result,
                                precreated_shm1_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )

                    for t in threads:
                        t.start()
                    for t in threads:
                        t.join()
                    self.check_deferred_exception()
                    if is_ensemble(model_name):
                        # Requests do not get batched for the ensemble model
                        self.check_status(model_name, {1: 8}, 8, 8)
                    else:
                        stats_batch_size = 2 if MODEL_INSTANCES == 1 else 1
                        exec_cnt = 4 if MODEL_INSTANCES == 1 else 8
                        self.check_status(
                            model_name,
                            {stats_batch_size: 4 * min(2, MODEL_INSTANCES)},
                            exec_cnt,
                            8,
                        )
                except Exception as ex:
                    self.assertTrue(False, "unexpected error {}".format(ex))
                finally:
                    if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
                        self.cleanup_shm_regions(precreated_shm0_handles)
                        self.cleanup_shm_regions(precreated_shm1_handles)

    def test_skip_batch(self):
        # Test model instances together are configured with
        # total-batch-size 4. Send four sequences in parallel where
        # two sequences have shorter length so that padding must be
        # applied correctly for the longer sequences.
        for trial in _trials:
            dtypes = self.get_datatype(trial)
            for dtype in dtypes:
                model_name = tu.get_sequence_model_name(trial, dtype)
                # Skip bool type ensemble models
                if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
                    dtype == np.bool_
                ):
                    continue
                # For bool type control models, use int32 as I/O types
                if dtype == np.bool_:
                    dtype = np.int32

                self.clear_deferred_exceptions()

                precreated_shm0_handles = self.precreate_register_regions(
                    (1, 3), dtype, 0
                )
                precreated_shm1_handles = self.precreate_register_regions(
                    (11, 12, 13, 14), dtype, 1
                )
                precreated_shm2_handles = self.precreate_register_regions(
                    (111, 113), dtype, 2
                )
                precreated_shm3_handles = self.precreate_register_regions(
                    (1111, 1112, 1113, 1114), dtype, 3
                )

                try:
                    self.check_setup(model_name)

                    # Need scheduler to wait for queue to contain all
                    # inferences for both sequences.
                    self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                    self.assertEqual(
                        int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 12
                    )
                    self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
                    self.assertEqual(
                        int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0
                    )

                    threads = []
                    expected_result = (
                        self.get_expected_result(4, 3, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            4, 3, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1001,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (("start", 1, None), ("end", 3, None)),
                                expected_result,
                                precreated_shm0_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )
                    expected_result = (
                        self.get_expected_result(50, 14, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            50, 14, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1002,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (
                                    ("start", 11, None),
                                    (None, 12, None),
                                    (None, 13, None),
                                    ("end", 14, None),
                                ),
                                expected_result,
                                precreated_shm1_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )
                    expected_result = (
                        self.get_expected_result(224, 113, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            224, 113, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1003,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (("start", 111, None), ("end", 113, None)),
                                expected_result,
                                precreated_shm2_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )
                    expected_result = (
                        self.get_expected_result(4450, 1114, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            4450, 1114, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1004,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (
                                    ("start", 1111, None),
                                    (None, 1112, None),
                                    (None, 1113, None),
                                    ("end", 1114, None),
                                ),
                                expected_result,
                                precreated_shm3_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )

                    threads[1].start()
                    threads[3].start()
                    time.sleep(3)
                    threads[0].start()
                    threads[2].start()
                    for t in threads:
                        t.join()
                    self.check_deferred_exception()
                    if is_ensemble(model_name):
                        # Requests do not get batched for the ensemble model
                        self.check_status(model_name, {1: 12}, 12, 12)
                    else:
                        # Batch size is 4 for the first two inferences and
                        # then 2 for the second two inferences. This is
                        # because we request the longer sequences first
                        # (threads 1 and 3) in slots 0 and 1 and so after
                        # shorter sequences are complete there are only slots
                        # 0 and 1 to execute.
                        if MODEL_INSTANCES == 1:
                            self.check_status(model_name, {2: 2, 4: 2}, 4, 12)
                        elif MODEL_INSTANCES == 2:
                            self.check_status(model_name, {2: 4, 1: 4}, 8, 12)
                        elif MODEL_INSTANCES == 4:
                            self.check_status(model_name, {1: 12}, 12, 12)
                except Exception as ex:
                    self.assertTrue(False, "unexpected error {}".format(ex))
                finally:
                    if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
                        self.cleanup_shm_regions(precreated_shm0_handles)
                        self.cleanup_shm_regions(precreated_shm1_handles)
                        self.cleanup_shm_regions(precreated_shm2_handles)
                        self.cleanup_shm_regions(precreated_shm3_handles)

    def test_full_batch(self):
        # Test model instances together are configured with
        # total-batch-size 4. Send four equal-length sequences in
        # parallel and make sure they get completely batched into
        # batch-size 4 inferences.
        for trial in _trials:
            dtypes = self.get_datatype(trial)
            for dtype in dtypes:
                model_name = tu.get_sequence_model_name(trial, dtype)
                # Skip bool type ensemble models
                if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
                    dtype == np.bool_
                ):
                    continue
                # For bool type control models, use int32 as I/O types
                if dtype == np.bool_:
                    dtype = np.int32

                self.clear_deferred_exceptions()

                precreated_shm0_handles = self.precreate_register_regions(
                    (1, 2, 3), dtype, 0
                )
                precreated_shm1_handles = self.precreate_register_regions(
                    (11, 12, 13), dtype, 1
                )
                precreated_shm2_handles = self.precreate_register_regions(
                    (111, 112, 113), dtype, 2
                )
                precreated_shm3_handles = self.precreate_register_regions(
                    (1111, 1112, 1113), dtype, 3
                )

                try:
                    self.check_setup(model_name)

                    # Need scheduler to wait for queue to contain all
                    # inferences for both sequences.
                    self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                    self.assertEqual(
                        int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 12
                    )
                    self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
                    self.assertEqual(
                        int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0
                    )

                    expected_result = (
                        self.get_expected_result(6, 3, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            6, 3, trial, "end", dtype
                        )
                    )
                    threads = []
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1001,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (("start", 1, None), (None, 2, None), ("end", 3, None)),
                                expected_result,
                                precreated_shm0_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )

                    expected_result = (
                        self.get_expected_result(36, 13, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            36, 13, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1002,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (
                                    ("start", 11, None),
                                    (None, 12, None),
                                    ("end", 13, None),
                                ),
                                expected_result,
                                precreated_shm1_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )

                    expected_result = (
                        self.get_expected_result(336, 113, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            336, 113, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1003,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (
                                    ("start", 111, None),
                                    (None, 112, None),
                                    ("end", 113, None),
                                ),
                                expected_result,
                                precreated_shm2_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )
                    expected_result = (
                        self.get_expected_result(3336, 1113, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            3336, 1113, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1004,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (
                                    ("start", 1111, None),
                                    (None, 1112, None),
                                    ("end", 1113, None),
                                ),
                                expected_result,
                                precreated_shm3_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )

                    for t in threads:
                        t.start()
                    for t in threads:
                        t.join()
                    self.check_deferred_exception()
                    if is_ensemble(model_name):
                        # Requests do not get batched for the ensemble model
                        self.check_status(model_name, {1: 12}, 12, 12)
                    else:
                        self.check_status(
                            model_name,
                            {(4 / MODEL_INSTANCES): (3 * MODEL_INSTANCES)},
                            3 * MODEL_INSTANCES,
                            12,
                        )
                except Exception as ex:
                    self.assertTrue(False, "unexpected error {}".format(ex))
                finally:
                    if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
                        self.cleanup_shm_regions(precreated_shm0_handles)
                        self.cleanup_shm_regions(precreated_shm1_handles)
                        self.cleanup_shm_regions(precreated_shm2_handles)
                        self.cleanup_shm_regions(precreated_shm3_handles)

    def test_ragged_batch(self):
        # Test model instances that together are configured with
        # total-batch-size 4. The sequences use the different size
        # inputs and the inputs are *not* marked as allowing ragged
        # batch. Send four equal-length sequences in parallel and
        # make sure they don't get batched.

        # Only works with 1 model instance since want to test all
        # sequences batching together.
        if MODEL_INSTANCES != 1:
            return

        for trial in _ragged_batch_not_supported_trials:
            dtypes = self.get_datatype(trial)
            for dtype in dtypes:
                model_name = tu.get_sequence_model_name(trial, dtype)
                # Skip bool type ensemble models
                if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
                    dtype == np.bool_
                ):
                    continue
                # For bool type control models, use int32 as I/O types
                if dtype == np.bool_:
                    dtype = np.int32

                self.clear_deferred_exceptions()

                precreated_shm0_handles = self.precreate_register_regions(
                    (1, 2, 3), dtype, 0, tensor_shape=(2,)
                )
                precreated_shm1_handles = self.precreate_register_regions(
                    (11, 12, 13), dtype, 1, tensor_shape=(2,)
                )
                precreated_shm2_handles = self.precreate_register_regions(
                    (111, 112, 113), dtype, 2, tensor_shape=(1,)
                )
                precreated_shm3_handles = self.precreate_register_regions(
                    (1111, 1112, 1113), dtype, 3, tensor_shape=(3,)
                )

                try:
                    self.check_setup(model_name)

                    # Need scheduler to wait for queue to contain all
                    # inferences for both sequences.
                    self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                    self.assertEqual(
                        int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 12
                    )
                    self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
                    self.assertEqual(
                        int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0
                    )

                    threads = []
                    expected_result = (
                        self.get_expected_result(6 * 2, 3, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            6, 3, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1001,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (("start", 1, None), (None, 2, None), ("end", 3, None)),
                                expected_result,
                                precreated_shm0_handles,
                            ),
                            kwargs={
                                "sequence_name": "{}".format(self._testMethodName),
                                "tensor_shape": (2,),
                            },
                        )
                    )

                    expected_result = (
                        self.get_expected_result(36 * 2, 13, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            36, 13, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1002,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (
                                    ("start", 11, None),
                                    (None, 12, None),
                                    ("end", 13, None),
                                ),
                                expected_result,
                                precreated_shm1_handles,
                            ),
                            kwargs={
                                "sequence_name": "{}".format(self._testMethodName),
                                "tensor_shape": (2,),
                            },
                        )
                    )
                    expected_result = (
                        self.get_expected_result(336, 113, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            336, 113, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1003,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (
                                    ("start", 111, None),
                                    (None, 112, None),
                                    ("end", 113, None),
                                ),
                                expected_result,
                                precreated_shm2_handles,
                            ),
                            kwargs={
                                "sequence_name": "{}".format(self._testMethodName),
                                "tensor_shape": (1,),
                            },
                        )
                    )
                    expected_result = (
                        self.get_expected_result(3336 * 3, 1113, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            3336, 1113, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1004,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (
                                    ("start", 1111, None),
                                    (None, 1112, None),
                                    ("end", 1113, None),
                                ),
                                expected_result,
                                precreated_shm3_handles,
                            ),
                            kwargs={
                                "sequence_name": "{}".format(self._testMethodName),
                                "tensor_shape": (3,),
                            },
                        )
                    )

                    threads[0].start()
                    threads[1].start()
                    threads[2].start()
                    time.sleep(3)
                    threads[3].start()
                    for t in threads:
                        t.join()
                    self.check_deferred_exception()
                    if is_ensemble(model_name):
                        # Requests do not get batched for the ensemble model
                        self.check_status(model_name, {1: 12}, 12, 12)
                    else:
                        self.check_status(model_name, {4: 9}, 9, 12)
                except Exception as ex:
                    self.assertTrue(False, "unexpected error {}".format(ex))
                finally:
                    if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
                        self.cleanup_shm_regions(precreated_shm0_handles)
                        self.cleanup_shm_regions(precreated_shm1_handles)
                        self.cleanup_shm_regions(precreated_shm2_handles)
                        self.cleanup_shm_regions(precreated_shm3_handles)

    def test_ragged_batch_allowed(self):
        # Test model instances that together are configured with
        # total-batch-size 4. The sequences use the different size
        # inputs.  Send four equal-length sequences in parallel and
        # make sure they get batched appropriately even with size
        # differences.

        # Only works with 1 model instance since want to test all
        # sequences batching together.
        if MODEL_INSTANCES != 1:
            return

        for trial in _ragged_batch_supported_trials:
            dtypes = self.get_datatype(trial)
            for dtype in dtypes:
                model_name = tu.get_sequence_model_name(trial, dtype)
                # Skip bool type ensemble models
                if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
                    dtype == np.bool_
                ):
                    continue
                # For bool type control models, use int32 as I/O types
                if dtype == np.bool_:
                    dtype = np.int32

                self.clear_deferred_exceptions()

                precreated_shm0_handles = self.precreate_register_regions(
                    (1, 2, 3), dtype, 0, tensor_shape=(2,)
                )
                precreated_shm1_handles = self.precreate_register_regions(
                    (11, 12, 13), dtype, 1, tensor_shape=(2,)
                )
                precreated_shm2_handles = self.precreate_register_regions(
                    (111, 112, 113), dtype, 2, tensor_shape=(1,)
                )
                precreated_shm3_handles = self.precreate_register_regions(
                    (1111, 1112, 1113), dtype, 3, tensor_shape=(3,)
                )
                try:
                    self.check_setup(model_name)

                    # Need scheduler to wait for queue to contain all
                    # inferences for both sequences.
                    self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                    self.assertEqual(
                        int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 12
                    )
                    self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
                    self.assertEqual(
                        int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0
                    )

                    threads = []

                    expected_result = (
                        self.get_expected_result(6 * 2, 3, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            6 * 2, 3, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1001,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (("start", 1, None), (None, 2, None), ("end", 3, None)),
                                expected_result,
                                precreated_shm0_handles,
                            ),
                            kwargs={
                                "sequence_name": "{}".format(self._testMethodName),
                                "tensor_shape": (2,),
                            },
                        )
                    )

                    expected_result = (
                        self.get_expected_result(36 * 2, 13, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            36 * 2, 13, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1002,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (
                                    ("start", 11, None),
                                    (None, 12, None),
                                    ("end", 13, None),
                                ),
                                expected_result,
                                precreated_shm1_handles,
                            ),
                            kwargs={
                                "sequence_name": "{}".format(self._testMethodName),
                                "tensor_shape": (2,),
                            },
                        )
                    )
                    expected_result = (
                        self.get_expected_result(336, 113, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            336, 113, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1003,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (
                                    ("start", 111, None),
                                    (None, 112, None),
                                    ("end", 113, None),
                                ),
                                expected_result,
                                precreated_shm2_handles,
                            ),
                            kwargs={
                                "sequence_name": "{}".format(self._testMethodName),
                                "tensor_shape": (1,),
                            },
                        )
                    )
                    expected_result = (
                        self.get_expected_result(3336 * 3, 1113, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            3336 * 3, 1113, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1004,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (
                                    ("start", 1111, None),
                                    (None, 1112, None),
                                    ("end", 1113, None),
                                ),
                                expected_result,
                                precreated_shm3_handles,
                            ),
                            kwargs={
                                "sequence_name": "{}".format(self._testMethodName),
                                "tensor_shape": (3,),
                            },
                        )
                    )

                    for t in threads:
                        t.start()
                    for t in threads:
                        t.join()
                    self.check_deferred_exception()
                    if is_ensemble(model_name):
                        # Requests do not get batched for the ensemble model
                        self.check_status(model_name, {1: 12}, 12, 12)
                    else:
                        self.check_status(model_name, {4: 3}, 3, 12)
                except Exception as ex:
                    self.assertTrue(False, "unexpected error {}".format(ex))
                finally:
                    if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
                        self.cleanup_shm_regions(precreated_shm0_handles)
                        self.cleanup_shm_regions(precreated_shm1_handles)
                        self.cleanup_shm_regions(precreated_shm2_handles)
                        self.cleanup_shm_regions(precreated_shm3_handles)

    def test_backlog(self):
        # Test model instances together are configured with
        # total-max-batch-size 4. Send 5 equal-length sequences in
        # parallel and make sure they get completely batched into
        # batch-size 4 inferences plus the 5th should go in the
        # backlog and then get handled once there is a free slot.
        for trial in _trials:
            dtypes = self.get_datatype(trial)
            for dtype in dtypes:
                model_name = tu.get_sequence_model_name(trial, dtype)
                # Skip bool type ensemble models
                if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
                    dtype == np.bool_
                ):
                    continue
                # For bool type control models, use int32 as I/O types
                if dtype == np.bool_:
                    dtype = np.int32

                self.clear_deferred_exceptions()

                precreated_shm0_handles = self.precreate_register_regions(
                    (1, 2, 3), dtype, 0
                )
                precreated_shm1_handles = self.precreate_register_regions(
                    (11, 12, 13), dtype, 1
                )
                precreated_shm2_handles = self.precreate_register_regions(
                    (111, 112, 113), dtype, 2
                )
                precreated_shm3_handles = self.precreate_register_regions(
                    (1111, 1112, 1113), dtype, 3
                )
                precreated_shm4_handles = self.precreate_register_regions(
                    (11111, 11112, 11113), dtype, 4
                )

                try:
                    self.check_setup(model_name)

                    # Need scheduler to wait for queue to contain all
                    # inferences for both sequences.
                    self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                    self.assertEqual(
                        int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 12
                    )
                    self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
                    self.assertEqual(
                        int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0
                    )

                    threads = []
                    expected_result = (
                        self.get_expected_result(6, 3, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            6, 3, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1001,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (("start", 1, None), (None, 2, None), ("end", 3, None)),
                                expected_result,
                                precreated_shm0_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )
                    expected_result = (
                        self.get_expected_result(36, 13, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            36, 13, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1002,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (
                                    ("start", 11, None),
                                    (None, 12, None),
                                    ("end", 13, None),
                                ),
                                expected_result,
                                precreated_shm1_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )
                    expected_result = (
                        self.get_expected_result(336, 113, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            336, 113, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1003,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (
                                    ("start", 111, None),
                                    (None, 112, None),
                                    ("end", 113, None),
                                ),
                                expected_result,
                                precreated_shm2_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )
                    expected_result = (
                        self.get_expected_result(3336, 1113, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            3336, 1113, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1004,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (
                                    ("start", 1111, None),
                                    (None, 1112, None),
                                    ("end", 1113, None),
                                ),
                                expected_result,
                                precreated_shm3_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )

                    expected_result = (
                        self.get_expected_result(33336, 11113, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            33336, 11113, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1005,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (
                                    ("start", 11111, None),
                                    (None, 11112, None),
                                    ("end", 11113, None),
                                ),
                                expected_result,
                                precreated_shm4_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )

                    for t in threads:
                        t.start()
                    for t in threads:
                        t.join()
                    self.check_deferred_exception()
                    if is_ensemble(model_name):
                        # Requests do not get batched for the ensemble model
                        self.check_status(model_name, {1: 15}, 15, 15)
                    else:
                        if MODEL_INSTANCES == 1:
                            self.check_status(model_name, {4: 3, 1: 3}, 6, 15)
                        elif MODEL_INSTANCES == 2:
                            self.check_status(model_name, {2: 6, 1: 3}, 9, 15)
                        else:
                            self.check_status(model_name, {1: 15}, 15, 15)
                except Exception as ex:
                    self.assertTrue(False, "unexpected error {}".format(ex))
                finally:
                    if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
                        self.cleanup_shm_regions(precreated_shm0_handles)
                        self.cleanup_shm_regions(precreated_shm1_handles)
                        self.cleanup_shm_regions(precreated_shm2_handles)
                        self.cleanup_shm_regions(precreated_shm3_handles)
                        self.cleanup_shm_regions(precreated_shm4_handles)

    def test_backlog_fill(self):
        # Test model instances together are configured with
        # total-max-batch-size 4. Send 4 sequences in parallel, two of
        # which are shorter. Send 2 additional sequences that should
        # go into backlog but should immediately fill into the short
        # sequences.

        # Only works with 1 model instance since otherwise an instance
        # can run ahead and handle more work than expected (leads to
        # intermittent failures)
        if MODEL_INSTANCES != 1:
            return

        for trial in _trials:
            dtypes = self.get_datatype(trial)
            for dtype in dtypes:
                model_name = tu.get_sequence_model_name(trial, dtype)
                # Skip bool type ensemble models
                if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
                    dtype == np.bool_
                ):
                    continue
                # For bool type control models, use int32 as I/O types
                if dtype == np.bool_:
                    dtype = np.int32

                self.clear_deferred_exceptions()

                precreated_shm0_handles = self.precreate_register_regions(
                    (1, 2, 3), dtype, 0
                )
                precreated_shm1_handles = self.precreate_register_regions(
                    (11, 13), dtype, 1
                )
                precreated_shm2_handles = self.precreate_register_regions(
                    (111, 113), dtype, 2
                )
                precreated_shm3_handles = self.precreate_register_regions(
                    (1111, 1112, 1113), dtype, 3
                )
                precreated_shm4_handles = self.precreate_register_regions(
                    (11111,), dtype, 4
                )
                precreated_shm5_handles = self.precreate_register_regions(
                    (22222,), dtype, 5
                )

                try:
                    self.check_setup(model_name)

                    # Need scheduler to wait for queue to contain all
                    # inferences for both sequences.
                    self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                    self.assertEqual(
                        int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 10
                    )
                    self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
                    self.assertEqual(
                        int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 2
                    )

                    threads = []
                    expected_result = (
                        self.get_expected_result(6, 3, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            6, 3, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1001,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (("start", 1, None), (None, 2, None), ("end", 3, None)),
                                expected_result,
                                precreated_shm0_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )
                    expected_result = (
                        self.get_expected_result(24, 13, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            24, 13, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1002,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (("start", 11, None), ("end", 13, None)),
                                expected_result,
                                precreated_shm1_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )
                    expected_result = (
                        self.get_expected_result(224, 113, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            224, 113, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1003,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (("start", 111, None), ("end", 113, None)),
                                expected_result,
                                precreated_shm2_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )
                    expected_result = (
                        self.get_expected_result(3336, 1113, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            3336, 1113, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1004,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (
                                    ("start", 1111, None),
                                    (None, 1112, None),
                                    ("end", 1113, None),
                                ),
                                expected_result,
                                precreated_shm3_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )
                    expected_result = (
                        self.get_expected_result(11111, 11111, trial, "start,end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            11111, 11111, trial, "start,end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1005,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (("start,end", 11111, None),),
                                expected_result,
                                precreated_shm4_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )
                    expected_result = (
                        self.get_expected_result(22222, 22222, trial, "start,end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            22222, 22222, trial, "start,end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1006,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (("start,end", 22222, None),),
                                expected_result,
                                precreated_shm5_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )

                    threads[0].start()
                    threads[1].start()
                    threads[2].start()
                    threads[3].start()
                    time.sleep(3)
                    threads[4].start()
                    threads[5].start()
                    for t in threads:
                        t.join()
                    self.check_deferred_exception()
                    if is_ensemble(model_name):
                        # Requests do not get batched for the ensemble model
                        self.check_status(model_name, {1: 12}, 12, 12)
                    else:
                        self.check_status(model_name, {4: 3}, 3, 12)
                except Exception as ex:
                    self.assertTrue(False, "unexpected error {}".format(ex))
                finally:
                    if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
                        self.cleanup_shm_regions(precreated_shm0_handles)
                        self.cleanup_shm_regions(precreated_shm1_handles)
                        self.cleanup_shm_regions(precreated_shm2_handles)
                        self.cleanup_shm_regions(precreated_shm3_handles)
                        self.cleanup_shm_regions(precreated_shm4_handles)
                        self.cleanup_shm_regions(precreated_shm5_handles)

    def test_backlog_fill_no_end(self):
        # Test model instances together are configured with
        # total-max-batch-size 4. Send 4 sequences in parallel, two of
        # which are shorter. Send 2 additional sequences that should
        # go into backlog but should immediately fill into the short
        # sequences. One of those sequences is filled before it gets
        # its end request.

        # Only works with 1 model instance since otherwise an instance
        # can run ahead and handle more work than expected (leads to
        # intermittent failures)
        if MODEL_INSTANCES != 1:
            return

        for trial in _trials:
            dtypes = self.get_datatype(trial)
            for dtype in dtypes:
                model_name = tu.get_sequence_model_name(trial, dtype)
                # Skip bool type ensemble models
                if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
                    dtype == np.bool_
                ):
                    continue
                # For bool type control models, use int32 as I/O types
                if dtype == np.bool_:
                    dtype = np.int32

                self.clear_deferred_exceptions()

                precreated_shm0_handles = self.precreate_register_regions(
                    (1, 2, 3), dtype, 0
                )
                precreated_shm1_handles = self.precreate_register_regions(
                    (11, 13), dtype, 1
                )
                precreated_shm2_handles = self.precreate_register_regions(
                    (111, 113), dtype, 2
                )
                precreated_shm3_handles = self.precreate_register_regions(
                    (1111, 1112, 1113), dtype, 3
                )
                precreated_shm4_handles = self.precreate_register_regions(
                    (11111,), dtype, 4
                )
                precreated_shm5_handles = self.precreate_register_regions(
                    (22222, 22223, 22224), dtype, 5
                )

                try:
                    self.check_setup(model_name)

                    # Need scheduler to wait for queue to contain all
                    # inferences for both sequences.
                    self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                    self.assertEqual(
                        int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 10
                    )
                    self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
                    self.assertEqual(
                        int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 3
                    )

                    threads = []
                    expected_result = (
                        self.get_expected_result(6, 3, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            6, 3, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1001,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (("start", 1, None), (None, 2, None), ("end", 3, None)),
                                expected_result,
                                precreated_shm0_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )
                    expected_result = (
                        self.get_expected_result(24, 13, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            24, 13, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1002,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (("start", 11, None), ("end", 13, None)),
                                expected_result,
                                precreated_shm1_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )
                    expected_result = (
                        self.get_expected_result(224, 113, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            224, 113, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1003,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (("start", 111, None), ("end", 113, None)),
                                expected_result,
                                precreated_shm2_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )
                    expected_result = (
                        self.get_expected_result(3336, 1113, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            3336, 1113, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1004,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (
                                    ("start", 1111, None),
                                    (None, 1112, None),
                                    ("end", 1113, None),
                                ),
                                expected_result,
                                precreated_shm3_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )
                    expected_result = (
                        self.get_expected_result(11111, 11111, trial, "start,end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            11111, 11111, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1005,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (("start,end", 11111, None),),
                                expected_result,
                                precreated_shm4_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )
                    expected_result = (
                        self.get_expected_result(66669, 22224, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            66669, 22224, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1006,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (
                                    ("start", 22222, None),
                                    (None, 22223, None),
                                    ("end", 22224, 2000),
                                ),
                                expected_result,
                                precreated_shm5_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )

                    threads[0].start()
                    time.sleep(2)
                    threads[1].start()
                    time.sleep(2)
                    threads[2].start()
                    time.sleep(2)
                    threads[3].start()
                    time.sleep(2)
                    threads[4].start()
                    time.sleep(2)
                    threads[5].start()
                    for t in threads:
                        t.join()
                    self.check_deferred_exception()
                    if is_ensemble(model_name):
                        # Requests do not get batched for the ensemble model
                        self.check_status(model_name, {1: 14}, 14, 14)
                    else:
                        # Expecting 3 batch-size 4 inferences and then the
                        # 1006 sequence will follow 1003 (a different
                        # implementation could also follow 1002...)
                        self.check_status(model_name, {4: 3, 3: 2}, 5, 14)
                except Exception as ex:
                    self.assertTrue(False, "unexpected error {}".format(ex))
                finally:
                    if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
                        self.cleanup_shm_regions(precreated_shm0_handles)
                        self.cleanup_shm_regions(precreated_shm1_handles)
                        self.cleanup_shm_regions(precreated_shm2_handles)
                        self.cleanup_shm_regions(precreated_shm3_handles)
                        self.cleanup_shm_regions(precreated_shm4_handles)
                        self.cleanup_shm_regions(precreated_shm5_handles)

    def test_backlog_same_correlation_id(self):
        # Test model instances together are configured with
        # total-max-batch-size 4. Send 4 equal-length sequences in
        # parallel and make sure they get completely batched into
        # batch-size 4 inferences. Send a 5th with the same
        # correlation ID as one of the first four.
        for trial in _trials:
            dtypes = self.get_datatype(trial)
            for dtype in dtypes:
                model_name = tu.get_sequence_model_name(trial, dtype)
                # Skip bool type ensemble models
                if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
                    dtype == np.bool_
                ):
                    continue
                # For bool type control models, use int32 as I/O types
                if dtype == np.bool_:
                    dtype = np.int32

                self.clear_deferred_exceptions()

                precreated_shm0_handles = self.precreate_register_regions(
                    (1, 2, 3), dtype, 0
                )
                precreated_shm1_handles = self.precreate_register_regions(
                    (11, 12, 13), dtype, 1
                )
                precreated_shm2_handles = self.precreate_register_regions(
                    (111, 112, 113), dtype, 2
                )
                precreated_shm3_handles = self.precreate_register_regions(
                    (1111, 1112, 1113), dtype, 3
                )
                precreated_shm4_handles = self.precreate_register_regions(
                    (11111, 11113), dtype, 4
                )

                try:
                    self.check_setup(model_name)

                    # Need scheduler to wait for queue to contain all
                    # inferences for both sequences.
                    self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                    self.assertEqual(
                        int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 12
                    )
                    self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
                    self.assertEqual(
                        int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 2
                    )

                    threads = []
                    expected_result = (
                        self.get_expected_result(6, 3, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            6, 3, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1001,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (("start", 1, None), (None, 2, None), ("end", 3, None)),
                                expected_result,
                                precreated_shm0_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )
                    expected_result = (
                        self.get_expected_result(36, 13, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            36, 13, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1002,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (
                                    ("start", 11, None),
                                    (None, 12, None),
                                    ("end", 13, None),
                                ),
                                expected_result,
                                precreated_shm1_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )
                    expected_result = (
                        self.get_expected_result(336, 113, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            336, 113, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1003,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (
                                    ("start", 111, None),
                                    (None, 112, None),
                                    ("end", 113, None),
                                ),
                                expected_result,
                                precreated_shm2_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )
                    expected_result = (
                        self.get_expected_result(3336, 1113, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            3336, 1113, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1004,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (
                                    ("start", 1111, None),
                                    (None, 1112, None),
                                    ("end", 1113, None),
                                ),
                                expected_result,
                                precreated_shm3_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )
                    expected_result = (
                        self.get_expected_result(22224, 11113, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            22224, 11113, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1002,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (("start", 11111, None), ("end", 11113, None)),
                                expected_result,
                                precreated_shm4_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )

                    threads[0].start()
                    threads[1].start()
                    threads[2].start()
                    threads[3].start()
                    time.sleep(3)
                    threads[4].start()
                    for t in threads:
                        t.join()
                    self.check_deferred_exception()
                    if is_ensemble(model_name):
                        # Requests do not get batched for the ensemble model
                        self.check_status(model_name, {1: 14}, 14, 14)
                    else:
                        if MODEL_INSTANCES != 4:
                            batch_exec = {
                                (4 / MODEL_INSTANCES): (3 * MODEL_INSTANCES),
                                1: 2,
                            }
                        else:
                            batch_exec = {1: (3 * MODEL_INSTANCES) + 2}
                        self.check_status(
                            model_name, batch_exec, (3 * MODEL_INSTANCES) + 2, 14
                        )
                except Exception as ex:
                    self.assertTrue(False, "unexpected error {}".format(ex))
                finally:
                    if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
                        self.cleanup_shm_regions(precreated_shm0_handles)
                        self.cleanup_shm_regions(precreated_shm1_handles)
                        self.cleanup_shm_regions(precreated_shm2_handles)
                        self.cleanup_shm_regions(precreated_shm3_handles)
                        self.cleanup_shm_regions(precreated_shm4_handles)

    def test_backlog_same_correlation_id_no_end(self):
        # Test model instances together are configured with
        # total-max-batch-size 4. Send 4 sequences in parallel and
        # make sure they get completely batched into batch-size 4
        # inferences. One of the sequences is shorter and does not
        # have an end marker but has same correlation ID as the 5th
        # sequence. We expect that short sequence to get ended early
        # (because of the same correlation ID) and make room for the
        # 5th sequence.

        # Only works with 1 model instance since otherwise an instance
        # can run ahead and handle more work than expected (leads to
        # intermittent failures)
        if MODEL_INSTANCES != 1:
            return

        for trial in _trials:
            dtypes = self.get_datatype(trial)
            for dtype in dtypes:
                model_name = tu.get_sequence_model_name(trial, dtype)
                # Skip bool type ensemble models
                if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
                    dtype == np.bool_
                ):
                    continue
                # For bool type control models, use int32 as I/O types
                if dtype == np.bool_:
                    dtype = np.int32

                self.clear_deferred_exceptions()

                precreated_shm0_handles = self.precreate_register_regions(
                    (1, 3), dtype, 0
                )
                precreated_shm1_handles = self.precreate_register_regions(
                    (11, 12, 12, 13), dtype, 1
                )
                precreated_shm2_handles = self.precreate_register_regions(
                    (111, 112, 112, 113), dtype, 2
                )
                precreated_shm3_handles = self.precreate_register_regions(
                    (1111, 1112, 1112, 1113), dtype, 3
                )
                precreated_shm4_handles = self.precreate_register_regions(
                    (11111, 11113), dtype, 4
                )
                try:
                    self.check_setup(model_name)

                    # Need scheduler to wait for queue to contain all
                    # inferences for both sequences.
                    self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                    self.assertEqual(
                        int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 16
                    )
                    self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
                    self.assertEqual(
                        int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0
                    )

                    threads = []
                    expected_result = (
                        self.get_expected_result(4, 3, trial, None)
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(4, 3, trial, None, dtype)
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1001,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (("start", 1, None), (None, 3, None)),
                                expected_result,
                                precreated_shm0_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )
                    expected_result = (
                        self.get_expected_result(48, 13, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            48, 13, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1002,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (
                                    ("start", 11, None),
                                    (None, 12, None),
                                    (None, 12, None),
                                    ("end", 13, None),
                                ),
                                expected_result,
                                precreated_shm1_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )
                    expected_result = (
                        self.get_expected_result(448, 113, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            448, 113, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1003,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (
                                    ("start", 111, None),
                                    (None, 112, None),
                                    (None, 112, None),
                                    ("end", 113, None),
                                ),
                                expected_result,
                                precreated_shm2_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )
                    expected_result = (
                        self.get_expected_result(4448, 1113, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            4448, 1113, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1004,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (
                                    ("start", 1111, None),
                                    (None, 1112, None),
                                    (None, 1112, None),
                                    ("end", 1113, None),
                                ),
                                expected_result,
                                precreated_shm3_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )
                    expected_result = (
                        self.get_expected_result(22224, 11113, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            22224, 11113, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1001,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (("start", 11111, None), ("end", 11113, None)),
                                expected_result,
                                precreated_shm4_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )

                    threads[0].start()
                    threads[1].start()
                    threads[2].start()
                    threads[3].start()
                    time.sleep(2)
                    threads[4].start()
                    for t in threads:
                        t.join()
                    self.check_deferred_exception()
                    if is_ensemble(model_name):
                        # Requests do not get batched for the ensemble model
                        self.check_status(model_name, {1: 16}, 16, 16)
                    else:
                        self.check_status(model_name, {4: 4}, 4, 16)
                except Exception as ex:
                    self.assertTrue(False, "unexpected error {}".format(ex))
                finally:
                    if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
                        self.cleanup_shm_regions(precreated_shm0_handles)
                        self.cleanup_shm_regions(precreated_shm1_handles)
                        self.cleanup_shm_regions(precreated_shm2_handles)
                        self.cleanup_shm_regions(precreated_shm3_handles)
                        self.cleanup_shm_regions(precreated_shm4_handles)

    def test_backlog_sequence_timeout(self):
        # Test model instances together are configured with
        # total-max-batch-size 4. Send 4 sequences in parallel and
        # make sure they get completely batched into batch-size 4
        # inferences. One of the sequences has a long delay that
        # causes it to timeout and that allows a 5th sequence to come
        # out of the backlog and finish. The timed-out sequence will
        # then send the delayed inference but it will appear as a new
        # sequence and so fail because it doesn't have the START flag.

        # Only works with 1 model instance since otherwise an instance
        # can run ahead and handle more work than expected (leads to
        # intermittent failures)
        if MODEL_INSTANCES != 1:
            return

        for trial in _trials:
            dtypes = self.get_datatype(trial)
            for dtype in dtypes:
                model_name = tu.get_sequence_model_name(trial, dtype)
                # Skip bool type ensemble models
                if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
                    dtype == np.bool_
                ):
                    continue
                # For bool type control models, use int32 as I/O types
                if dtype == np.bool_:
                    dtype = np.int32

                self.clear_deferred_exceptions()

                precreated_shm0_handles = self.precreate_register_regions(
                    (1, 3), dtype, 0
                )
                precreated_shm1_handles = self.precreate_register_regions(
                    (11, 12, 12, 13), dtype, 1
                )
                precreated_shm2_handles = self.precreate_register_regions(
                    (111, 112, 112, 113), dtype, 2
                )
                precreated_shm3_handles = self.precreate_register_regions(
                    (1111, 1112, 1112, 1113), dtype, 3
                )
                precreated_shm4_handles = self.precreate_register_regions(
                    (11111, 11113), dtype, 4
                )
                try:
                    self.check_setup(model_name)

                    # Need scheduler to wait for queue to contain all
                    # inferences for all sequences.
                    self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                    self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 4)
                    self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
                    self.assertEqual(
                        int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0
                    )

                    threads = []
                    expected_result = (
                        self.get_expected_result(4, 3, trial, None)
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(4, 3, trial, None, dtype)
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1001,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (
                                    ("start", 1, None),
                                    (None, 3, _max_sequence_idle_ms + 1000),
                                ),
                                expected_result,
                                precreated_shm0_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )
                    expected_result = (
                        self.get_expected_result(48, 13, trial, None)
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            48, 13, trial, None, dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1002,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (
                                    ("start", 11, None),
                                    (None, 12, _max_sequence_idle_ms / 2),
                                    (None, 12, _max_sequence_idle_ms / 2),
                                    ("end", 13, _max_sequence_idle_ms / 2),
                                ),
                                expected_result,
                                precreated_shm1_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )
                    expected_result = (
                        self.get_expected_result(448, 113, trial, None)
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            448, 113, trial, None, dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1003,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (
                                    ("start", 111, None),
                                    (None, 112, _max_sequence_idle_ms / 2),
                                    (None, 112, _max_sequence_idle_ms / 2),
                                    ("end", 113, _max_sequence_idle_ms / 2),
                                ),
                                expected_result,
                                precreated_shm2_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )
                    expected_result = (
                        self.get_expected_result(4448, 1113, trial, None)
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            4448, 1113, trial, None, dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1004,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (
                                    ("start", 1111, None),
                                    (None, 1112, _max_sequence_idle_ms / 2),
                                    (None, 1112, _max_sequence_idle_ms / 2),
                                    ("end", 1113, _max_sequence_idle_ms / 2),
                                ),
                                expected_result,
                                precreated_shm3_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )
                    expected_result = (
                        self.get_expected_result(22224, 11113, trial, "end")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            22224, 11113, trial, "end", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1005,
                                (None, None),
                                # (flag_str, value, pre_delay_ms)
                                (("start", 11111, None), ("end", 11113, None)),
                                expected_result,
                                precreated_shm4_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )

                    threads[0].start()
                    threads[1].start()
                    threads[2].start()
                    threads[3].start()
                    time.sleep(2)
                    threads[4].start()
                    for t in threads:
                        t.join()

                    self.check_deferred_exception()
                    self.assertTrue(False, "expected error")
                except Exception as ex:
                    for prefix in ENSEMBLE_PREFIXES:
                        if model_name.startswith(prefix):
                            base_model_name = model_name[(len(prefix)) :]
                            self.assertTrue(
                                ex.message().startswith(
                                    str(
                                        "in ensemble '{}', "
                                        + "inference request for sequence 1001 to "
                                        + "model '{}' must specify the START flag on the first "
                                        + "request of the sequence"
                                    ).format(model_name, base_model_name)
                                )
                            )
                            return
                    self.assertTrue(
                        ex.message().startswith(
                            str(
                                "inference request for sequence 1001 to "
                                + "model '{}' must specify the START flag on the first "
                                + "request of the sequence"
                            ).format(model_name)
                        )
                    )
                finally:
                    if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
                        self.cleanup_shm_regions(precreated_shm0_handles)
                        self.cleanup_shm_regions(precreated_shm1_handles)
                        self.cleanup_shm_regions(precreated_shm2_handles)
                        self.cleanup_shm_regions(precreated_shm3_handles)
                        self.cleanup_shm_regions(precreated_shm4_handles)

    def test_queue_delay_no_min_util(self):
        # Test model that have set max queue delay but minimum slot utilization
        # is 0. Send 2 sequences in parallel and make sure they get completely
        # batched into batch-size 2 inferences. The first sequence only has one
        # request while the second sequence has two, so expecting the second
        # execution to be a batch of 'null, seq 2'. The executions should not be
        # waited.

        for trial in _trials:
            is_ensemble = False
            for prefix in ENSEMBLE_PREFIXES:
                if prefix in trial:
                    is_ensemble = True
                    break
            if is_ensemble:
                continue

            dtypes = self.get_datatype(trial)
            for dtype in dtypes:
                model_name = tu.get_sequence_model_name(trial, dtype)
                # For bool type control models, use int32 as I/O types
                if dtype == np.bool_:
                    dtype = np.int32

                self.clear_deferred_exceptions()

                precreated_shm0_handles = self.precreate_register_regions(
                    (1,), dtype, 0
                )
                precreated_shm1_handles = self.precreate_register_regions(
                    (11, 12), dtype, 1
                )
                try:
                    self.check_setup(model_name)

                    # Need scheduler to wait for queue to contain 2 sequences.
                    self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                    self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 2)
                    self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
                    self.assertEqual(
                        int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0
                    )

                    threads = []
                    expected_result = (
                        self.get_expected_result(1, 1, trial, "start")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            1, 1, trial, "start", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1001,
                                (2000, None),
                                # (flag_str, value, pre_delay_ms)
                                (("start", 1, None),),
                                expected_result,
                                precreated_shm0_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )
                    expected_result = (
                        self.get_expected_result(23, 12, trial, None)
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            23, 12, trial, None, dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1002,
                                (2000, None),
                                # (flag_str, value, pre_delay_ms)
                                (
                                    ("start", 11, None),
                                    (None, 12, None),
                                ),
                                expected_result,
                                precreated_shm1_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )

                    threads[0].start()
                    time.sleep(1)
                    threads[1].start()
                    for t in threads:
                        t.join()

                    self.check_deferred_exception()
                    self.check_status(model_name, {2: 2}, 2, 3)
                except Exception as ex:
                    self.assertTrue(False, "unexpected error {}".format(ex))
                finally:
                    if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
                        self.cleanup_shm_regions(precreated_shm0_handles)
                        self.cleanup_shm_regions(precreated_shm1_handles)

    def test_queue_delay_half_min_util(self):
        # Test model that have set max queue delay but minimum slot utilization
        # is 0.5. Send 2 sequences in parallel and make sure they get completely
        # batched into batch-size 2 inferences. The first sequence only has one
        # request while the second sequence has two, so expecting the second
        # execution to be a batch of 'null, seq 2'. The second execution should
        # be waited until the max queue delay is exceeded for sequence 2.

        for trial in _trials:
            is_ensemble = False
            for prefix in ENSEMBLE_PREFIXES:
                if prefix in trial:
                    is_ensemble = True
                    break
            if is_ensemble:
                continue

            dtypes = self.get_datatype(trial)
            for dtype in dtypes:
                model_name = tu.get_sequence_model_name(trial, dtype) + "_half"
                # For bool type control models, use int32 as I/O types
                if dtype == np.bool_:
                    dtype = np.int32

                self.clear_deferred_exceptions()

                precreated_shm0_handles = self.precreate_register_regions(
                    (1,), dtype, 0
                )
                precreated_shm1_handles = self.precreate_register_regions(
                    (11, 12), dtype, 1
                )
                try:
                    self.check_setup(model_name)

                    # Need scheduler to wait for queue to contain 2 sequences.
                    self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                    self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 2)
                    self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
                    self.assertEqual(
                        int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0
                    )

                    threads = []
                    expected_result = (
                        self.get_expected_result(1, 1, trial, "start")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            1, 1, trial, "start", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1001,
                                (2000, None),
                                # (flag_str, value, pre_delay_ms)
                                (("start", 1, None),),
                                expected_result,
                                precreated_shm0_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )
                    expected_result = (
                        self.get_expected_result(23, 12, trial, None)
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            23, 12, trial, None, dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1002,
                                (4000, 3000),
                                # (flag_str, value, pre_delay_ms)
                                (
                                    ("start", 11, None),
                                    (None, 12, None),
                                ),
                                expected_result,
                                precreated_shm1_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )

                    threads[0].start()
                    time.sleep(1)
                    threads[1].start()
                    for t in threads:
                        t.join()

                    self.check_deferred_exception()
                    self.check_status(model_name, {2: 2}, 2, 3)
                except Exception as ex:
                    self.assertTrue(False, "unexpected error {}".format(ex))
                finally:
                    if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
                        self.cleanup_shm_regions(precreated_shm0_handles)
                        self.cleanup_shm_regions(precreated_shm1_handles)

    def test_queue_delay_full_min_util(self):
        # Test model that have set max queue delay but minimum slot utilization
        # is 1. Send 2 sequences in parallel and make sure they get completely
        # batched into batch-size 2 inferences. The first sequence only has one
        # request while the second sequence has two, so expecting the second
        # execution to be a batch of 'null, seq 2'. Both executions should be
        # waited until the max queue delay is exceeded.

        for trial in _trials:
            is_ensemble = False
            for prefix in ENSEMBLE_PREFIXES:
                if prefix in trial:
                    is_ensemble = True
                    break
            if is_ensemble:
                continue

            dtypes = self.get_datatype(trial)
            for dtype in dtypes:
                model_name = tu.get_sequence_model_name(trial, dtype) + "_full"
                # For bool type control models, use int32 as I/O types
                if dtype == np.bool_:
                    dtype = np.int32

                self.clear_deferred_exceptions()

                precreated_shm0_handles = self.precreate_register_regions(
                    (1,), dtype, 0
                )
                precreated_shm1_handles = self.precreate_register_regions(
                    (11, 12), dtype, 1
                )
                try:
                    self.check_setup(model_name)

                    # Need scheduler to wait for queue to contain 2 sequences.
                    self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                    self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 2)
                    self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
                    self.assertEqual(
                        int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0
                    )

                    threads = []
                    expected_result = (
                        self.get_expected_result(1, 1, trial, "start")
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            1, 1, trial, "start", dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1001,
                                (4000, 3000),
                                # (flag_str, value, pre_delay_ms)
                                (("start", 1, None),),
                                expected_result,
                                precreated_shm0_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )
                    expected_result = (
                        self.get_expected_result(23, 12, trial, None)
                        if not IMPLICIT_STATE
                        else self.get_expected_result_implicit(
                            23, 12, trial, None, dtype
                        )
                    )
                    threads.append(
                        threading.Thread(
                            target=self.check_sequence_async,
                            args=(
                                trial,
                                model_name,
                                dtype,
                                1002,
                                (6000, 5000),
                                # (flag_str, value, pre_delay_ms)
                                (
                                    ("start", 11, None),
                                    (None, 12, 2000),
                                ),
                                expected_result,
                                precreated_shm1_handles,
                            ),
                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
                        )
                    )

                    threads[0].start()
                    time.sleep(1)
                    threads[1].start()
                    for t in threads:
                        t.join()

                    self.check_deferred_exception()
                    self.check_status(model_name, {2: 2}, 2, 3)
                except Exception as ex:
                    self.assertTrue(False, "unexpected error {}".format(ex))
                finally:
                    if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
                        self.cleanup_shm_regions(precreated_shm0_handles)
                        self.cleanup_shm_regions(precreated_shm1_handles)


class SequenceBatcherRequestTimeoutTest(su.SequenceBatcherTestUtil):
    def setUp(self):
        super(SequenceBatcherRequestTimeoutTest, self).setUp()
        # By default, find tritonserver on "localhost", but can be overridden
        # with TRITONSERVER_IPADDR envvar
        self.server_address_ = (
            os.environ.get("TRITONSERVER_IPADDR", "localhost") + ":8001"
        )

        # Prepare input and expected output based on the model and
        # the infer sequence sent for testing. If the test is to be extended
        # for different sequence and model, then proper grouping should be added
        self.model_name_ = "custom_sequence_int32_timeout"
        self.tensor_data_ = np.ones(shape=[1, 1], dtype=np.int32)
        self.inputs_ = [grpcclient.InferInput("INPUT0", [1, 1], "INT32")]
        self.inputs_[0].set_data_from_numpy(self.tensor_data_)
        self.expected_out_seq_ = [
            ("OUTPUT0", self.tensor_data_),
            ("OUTPUT0", self.tensor_data_),
            ("OUTPUT0", self.tensor_data_),
        ]

    def send_sequence_with_timeout(
        self, seq_id, callback, timeout_us=3000000, request_pause_sec=0
    ):
        with grpcclient.InferenceServerClient(self.server_address_) as triton_client:
            triton_client.start_stream(callback=callback)
            triton_client.async_stream_infer(
                self.model_name_,
                self.inputs_,
                sequence_id=seq_id,
                sequence_start=True,
                timeout=timeout_us,
            )
            if request_pause_sec != 0:
                time.sleep(request_pause_sec)
            triton_client.async_stream_infer(
                self.model_name_, self.inputs_, sequence_id=seq_id, timeout=timeout_us
            )
            if request_pause_sec != 0:
                time.sleep(request_pause_sec)
            triton_client.async_stream_infer(
                self.model_name_,
                self.inputs_,
                sequence_id=seq_id,
                sequence_end=True,
                timeout=timeout_us,
            )

    def test_request_timeout(self):
        # Test long running model that receives requests with shorter timeout,
        # expect the timeout will only be expired on backlog sequence and reject
        # all requests of the sequence once expired.
        # Sending two sequences while the model can only process one sequence
        # at a time. Each model execution takes 5 second and all requests have
        # 3 second timeout, so the second sequence will be rejected.

        # correlation ID is 1-index
        seq1_res = []
        seq2_res = []
        seq1_callback = lambda result, error: seq1_res.append((result, error))
        seq2_callback = lambda result, error: seq2_res.append((result, error))

        # send sequence with 1s interval to ensure processing order
        threads = []
        threads.append(
            threading.Thread(
                target=self.send_sequence_with_timeout, args=(1, seq1_callback)
            )
        )
        threads.append(
            threading.Thread(
                target=self.send_sequence_with_timeout, args=(2, seq2_callback)
            )
        )
        threads[0].start()
        time.sleep(1)
        threads[1].start()
        for t in threads:
            t.join()

        for idx in range(len(seq1_res)):
            result, error = seq1_res[idx]
            self.assertIsNone(
                error,
                "Expect successful inference for sequence 1 requests, got error: {}".format(
                    error
                ),
            )
            out = result.as_numpy(self.expected_out_seq_[idx][0])
            expected_out = self.expected_out_seq_[idx][1]
            np.testing.assert_allclose(
                out,
                expected_out,
                err_msg="Unexpected output tensor: expect {}, got {}".format(
                    expected_out, out
                ),
            )

        for _, error in seq2_res:
            self.assertIsNotNone(error, "Expect error for sequence 2 requests")
            with self.assertRaisesRegex(
                InferenceServerException,
                "timeout of the corresponding sequence has been expired",
                msg="Unexpected error: {}".format(error),
            ):
                raise error

    def test_send_request_after_timeout(self):
        # Similar to test_request_timeout, but the sequence to be timed out
        # will send the last request after the sequence has been timed out,
        # and expecting server to return error regarding sending request of
        # an untracked sequence

        seq1_res = []
        seq2_res = []
        seq1_callback = lambda result, error: seq1_res.append((result, error))
        seq2_callback = lambda result, error: seq2_res.append((result, error))

        threads = []
        threads.append(
            threading.Thread(
                target=self.send_sequence_with_timeout, args=(1, seq1_callback)
            )
        )
        # Each request will be sent with a pause, so the third request
        # will be sent after the sequence has been timed out
        threads.append(
            threading.Thread(
                target=self.send_sequence_with_timeout,
                args=(2, seq2_callback),
                kwargs={"request_pause_sec": 2},
            )
        )
        threads[0].start()
        time.sleep(1)
        threads[1].start()
        for t in threads:
            t.join()

        # Check error message of the last request and the rest
        # separately
        for _, error in seq2_res[0:-1]:
            self.assertIsNotNone(error, "Expect error for sequence 2 requests")
            with self.assertRaisesRegex(
                InferenceServerException,
                "timeout of the corresponding sequence has been expired",
                msg="Unexpected error: {}".format(error),
            ):
                raise error
        _, last_err = seq2_res[-1]
        self.assertIsNotNone(last_err, "Expect error for sequence 2 requests")
        with self.assertRaisesRegex(
            InferenceServerException,
            "must specify the START flag on the first request",
            msg="Unexpected error: {}".format(last_err),
        ):
            raise last_err


class SequenceBatcherPreserveOrderingTest(su.SequenceBatcherTestUtil):
    def setUp(self):
        super().setUp()
        # By default, find tritonserver on "localhost", but can be overridden
        # with TRITONSERVER_IPADDR envvar
        self.server_address_ = (
            os.environ.get("TRITONSERVER_IPADDR", "localhost") + ":8001"
        )

        # Prepare input and expected output based on the model and
        # the infer sequence sent for testing. If the test is to be extended
        # for different sequence and model, then proper grouping should be added
        self.model_name_ = "sequence_py"
        self.tensor_data_ = np.ones(shape=[1, 1], dtype=np.int32)
        self.inputs_ = [grpcclient.InferInput("INPUT0", [1, 1], "INT32")]
        self.inputs_[0].set_data_from_numpy(self.tensor_data_)
        self.triton_client = grpcclient.InferenceServerClient(self.server_address_)

        # Atomic request ID for multi-threaded inference
        self.request_id_lock = threading.Lock()
        self.request_id = 1

    def send_sequence(self, seq_id, seq_id_map, req_id_map):
        if seq_id not in seq_id_map:
            seq_id_map[seq_id] = []

        start, middle, end = (True, False), (False, False), (False, True)
        # Send sequence with 1 start, 1 middle, and 1 end request
        seq_flags = [start, middle, end]
        for start_flag, end_flag in seq_flags:
            # Introduce random sleep to better interweave requests from different sequences
            time.sleep(random.uniform(0.0, 1.0))

            # Serialize sending requests to ensure ordered request IDs
            with self.request_id_lock:
                req_id = self.request_id
                self.request_id += 1

                # Store metadata to validate results later
                req_id_map[req_id] = seq_id
                seq_id_map[seq_id].append(req_id)

                self.triton_client.async_stream_infer(
                    self.model_name_,
                    self.inputs_,
                    sequence_id=seq_id,
                    sequence_start=start_flag,
                    sequence_end=end_flag,
                    timeout=None,
                    request_id=str(req_id),
                )

    def _test_sequence_ordering(self, preserve_ordering, decoupled):
        # 1. Send a few grpc streaming sequence requests to the model.
        # 2. With grpc streaming, the model should receive the requests in
        #    the same order they are sent from client, and the client should
        #    receive the responses in the same order sent back by the
        #    model/server. With sequence scheduler, the requests for each sequence should be routed to the same model
        #    instance, and no two requests from the same sequence should
        #    get batched together.
        # 3. With preserve_ordering=False, we may get the responses back in a different
        #    order than the requests, but with grpc streaming we should still expect responses for each sequence to be ordered.
        # 4. Assert that the sequence values are ordered, and that the response IDs per sequence are ordered
        class SequenceResult:
            def __init__(self, seq_id, result, request_id):
                self.seq_id = seq_id
                self.result = result
                self.request_id = int(request_id)

        def full_callback(sequence_dict, sequence_list, result, error):
            # We expect no model errors for this test
            if error:
                self.assertTrue(False, error)

            # Gather all the necessary metadata for validation
            request_id = int(result.get_response().id)
            sequence_id = request_id_map[request_id]
            # Overall list of results in the order received, regardless of sequence ID
            sequence_list.append(SequenceResult(sequence_id, result, request_id))
            # Ordered results organized by their seq IDs
            sequence_dict[sequence_id].append(result)

        # Store ordered list in which responses are received by client
        sequence_list = []
        # Store mapping of sequence ID to response results
        sequence_dict = {}
        # Store mapping of sequence ID to request IDs and vice versa
        sequence_id_map = {}
        request_id_map = {}

        # Start stream
        seq_callback = partial(full_callback, sequence_dict, sequence_list)
        self.triton_client.start_stream(callback=seq_callback)

        # Send N sequences concurrently
        threads = []
        num_sequences = 10
        for i in range(num_sequences):
            # Sequence IDs are 1-indexed
            sequence_id = i + 1
            # Add a result list and callback for each sequence
            sequence_dict[sequence_id] = []
            threads.append(
                threading.Thread(
                    target=self.send_sequence,
                    args=(sequence_id, sequence_id_map, request_id_map),
                )
            )

        # Start all sequence threads
        for t in threads:
            t.start()

        # Wait for threads to return
        for t in threads:
            t.join()

        # Block until all requests are completed
        self.triton_client.stop_stream()

        # Make sure some inferences occurred and metadata was collected
        self.assertGreater(len(sequence_dict), 0)
        self.assertGreater(len(sequence_list), 0)

        # Validate model results are sorted per sequence ID (model specific logic)
        print(f"=== {preserve_ordering=} {decoupled=} ===")
        print("Outputs per Sequence:")
        for seq_id, sequence in sequence_dict.items():
            seq_outputs = [
                result.as_numpy("OUTPUT0").flatten().tolist() for result in sequence
            ]
            print(f"{seq_id}: {seq_outputs}")
            self.assertEqual(seq_outputs, sorted(seq_outputs))

        # Validate request/response IDs for each response in a sequence is sorted
        # This should be true regardless of preserve_ordering or not
        print("Request IDs per Sequence:")
        for seq_id in sequence_id_map:
            per_seq_request_ids = sequence_id_map[seq_id]
            print(f"{seq_id}: {per_seq_request_ids}")
            self.assertEqual(per_seq_request_ids, sorted(per_seq_request_ids))

        # Validate results are sorted in request order if preserve_ordering is True
        if preserve_ordering:
            request_ids = [s.request_id for s in sequence_list]
            print(f"Request IDs overall:\n{request_ids}")
            sequence_ids = [s.seq_id for s in sequence_list]
            print(f"Sequence IDs overall:\n{sequence_ids}")
            self.assertEqual(request_ids, sorted(request_ids))

        # Assert some dynamic batching of requests was done
        stats = self.triton_client.get_inference_statistics(
            model_name=self.model_name_, headers={}, as_json=True
        )
        model_stats = stats["model_stats"][0]
        self.assertEqual(model_stats["name"], self.model_name_)
        self.assertLess(
            int(model_stats["execution_count"]), int(model_stats["inference_count"])
        )

    def test_sequence_with_preserve_ordering(self):
        self.model_name_ = "seqpy_preserve_ordering_nondecoupled"
        self._test_sequence_ordering(preserve_ordering=True, decoupled=False)

    def test_sequence_without_preserve_ordering(self):
        self.model_name_ = "seqpy_no_preserve_ordering_nondecoupled"
        self._test_sequence_ordering(preserve_ordering=False, decoupled=False)

    # FIXME [DLIS-5280]: This may fail for decoupled models if writes to GRPC
    # stream are done out of order in server, so disable test for now.
    # def test_sequence_with_preserve_ordering_decoupled(self):
    #    self.model_name_ = "seqpy_preserve_ordering_decoupled"
    #    self._test_sequence_ordering(preserve_ordering=True, decoupled=True)

    # FIXME [DLIS-5280]
    # def test_sequence_without_preserve_ordering_decoupled(self):
    #    self.model_name_ = "seqpy_no_preserve_ordering_decoupled"
    #    self._test_sequence_ordering(preserve_ordering=False, decoupled=True)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_sequence_batcher/test.sh
================================================
#!/bin/bash
# Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

TEST_RESULT_FILE='test_results.txt'

# Must run on a single device or else the TRITONSERVER_DELAY_SCHEDULER
# can fail when the requests are distributed to multiple devices.
ldconfig || true

export CUDA_VISIBLE_DEVICES=0

CLIENT_LOG="./client.log"
BATCHER_TEST=sequence_batcher_test.py

if [ -z "$TEST_SYSTEM_SHARED_MEMORY" ]; then
    TEST_SYSTEM_SHARED_MEMORY="0"
fi

if [ -z "$TEST_CUDA_SHARED_MEMORY" ]; then
    TEST_CUDA_SHARED_MEMORY="0"
fi

if [ -z "$TEST_VALGRIND" ]; then
    TEST_VALGRIND="0"
fi

if [ "$TEST_VALGRIND" -eq 1 ]; then
    LEAKCHECK=/usr/bin/valgrind
    LEAKCHECK_ARGS_BASE="--leak-check=full --show-leak-kinds=definite --max-threads=3000"
    SERVER_TIMEOUT=3600
    rm -f *.valgrind.log

    # Shortened tests due valgrind overhead
    MODEL_TRIALS="0 v"
    NO_DELAY_TESTS="test_simple_sequence \
                      test_no_sequence_start \
                      test_batch_size"
    DELAY_TESTS="test_backlog_fill_no_end \
                    test_backlog_sequence_timeout \
                    test_ragged_batch"
    QUEUE_DELAY_TESTS="test_queue_delay_full_min_util"
fi

if [ -z "$TEST_JETSON" ]; then
    TEST_JETSON="0"
fi

# Shortened tests due to jetson slowdown
if [ "$TEST_JETSON" -eq 1 ]; then
    MODEL_TRIALS="0 v"
fi

# On windows the paths invoked by the script (running in WSL) must use
# /mnt/c when needed but the paths on the tritonserver command-line
# must be C:/ style.
WINDOWS=0
if [[ -v WSL_DISTRO_NAME ]] || [[ -v MSYSTEM ]]; then
    MODELDIR=${MODELDIR:=C:/models}
    DATADIR=${DATADIR:="/mnt/c/data/inferenceserver/${REPO_VERSION}"}
    BACKEND_DIR=${BACKEND_DIR:=C:/tritonserver/backends}
    SERVER=${SERVER:=/mnt/c/tritonserver/bin/tritonserver.exe}
    export WSLENV=$WSLENV:TRITONSERVER_DELAY_SCHEDULER:TRITONSERVER_BACKLOG_DELAY_SCHEDULER
    WINDOWS=1
else
    MODELDIR=${MODELDIR:=`pwd`}
    DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
    TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
    SERVER=${TRITON_DIR}/bin/tritonserver
    BACKEND_DIR=${TRITON_DIR}/backends

    # PyTorch on SBSA requires libgomp to be loaded first. See the following
    # GitHub issue for more information:
    # https://github.com/pytorch/pytorch/issues/2575
    arch=`uname -m`
    if [ $arch = "aarch64" ]; then
      SERVER_LD_PRELOAD=/usr/lib/$(uname -m)-linux-gnu/libgomp.so.1
    fi
fi

SERVER_ARGS_EXTRA="--backend-directory=${BACKEND_DIR} --log-verbose=1"

source ../common/util.sh

RET=0

# If BACKENDS not specified, set to all
BACKENDS=${BACKENDS:="onnx plan libtorch custom python"}
export BACKENDS

# If MODEL_TRIALS not specified set to 0 1 2 4 v
MODEL_TRIALS=${MODEL_TRIALS:="0 1 2 4 v"}

# Basic sequence batcher tests
NO_DELAY_TESTS=${NO_DELAY_TESTS:="test_simple_sequence \
                                    test_length1_sequence \
                                    test_batch_size \
                                    test_no_sequence_start \
                                    test_no_sequence_start2 \
                                    test_no_sequence_end \
                                    test_no_correlation_id"}

# Tests that use scheduler delay
DELAY_TESTS=${DELAY_TESTS:="test_backlog_fill \
                              test_backlog_fill_no_end \
                              test_backlog_same_correlation_id \
                              test_backlog_same_correlation_id_no_end \
                              test_backlog_sequence_timeout \
                              test_half_batch \
                              test_skip_batch \
                              test_full_batch \
                              test_ragged_batch \
                              test_backlog"}

# Tests on queue delay
QUEUE_DELAY_TESTS=${QUEUE_DELAY_TESTS:="test_queue_delay_no_min_util \
                                    test_queue_delay_half_min_util \
                                    test_queue_delay_full_min_util"}

# If ENSEMBLES not specified, set to 1
ENSEMBLES=${ENSEMBLES:="1"}
export ENSEMBLES

# If IMPLICIT_STATE not specified, set to 0
IMPLICIT_STATE=${IMPLICIT_STATE:="0"}
export IMPLICIT_STATE

# If INITIAL_STATE_FILE is not specified, set to 0
INITIAL_STATE_FILE=${INITIAL_STATE_FILE:="0"}
export INITIAL_STATE_FILE

# If INITIAL_STATE_ZERO is not specified, set to 0
INITIAL_STATE_ZERO=${INITIAL_STATE_ZERO:="0"}
export INITIAL_STATE_ZERO

# If USE_SINGLE_BUFFER is not specified, set to 0
USE_SINGLE_BUFFER=${USE_SINGLE_BUFFER:="0"}
export USE_SINGLE_BUFFER

# Setup non-variable-size model repositories. The same models are in each
# repository but they are configured as:
#   models0 - four instances with non-batching model
#   models1 - one instance with batch-size 4
#   models2 - two instances with batch-size 2
#   models4 - four instances with batch-size 1
rm -fr *.log  models{0,1,2,4} queue_delay_models && mkdir models{0,1,2,4} queue_delay_models

# Search BACKENDS to determine if a backend should be tested
function should_test_backend() {
  local target_backend=$1
  if [[ $(echo "${BACKENDS[@]}" | grep -c "${target_backend}") -ne 0 ]]; then
    echo "true"
    return
  fi
  echo "false"
}

# Get the datatype to use based on the backend
function get_datatype () {
  local dtype="int32 bool"
  if [[ $1 == "plan" ]]; then
    dtype="float32"
  fi

  # Add type string to the onnx model tests only for implicit state.
  if [ "$IMPLICIT_STATE" == "1" ]; then
    if [[ $1 == "onnx" ]]; then
        dtype="object int32 bool"
    fi
    if [[ $1 == "libtorch" ]]; then
        dtype="object int32 bool"
    fi
  fi
  echo $dtype
}

# Modify corresponding onnx config.pbtxt to create python config.pbtxt
function generate_python_models () {
  model_path=$1
  dest_dir=$2
  onnx_model=$(echo ${model_path//python/onnx})
  python_model=$(basename $model_path)
  mkdir -p $dest_dir/$python_model/1/
  # for emsemble models keep "platform: ensemble"
  if [[ "$model_path" == *"ensemble_model"* ]]; then
    cat $onnx_model/config.pbtxt | sed 's/onnx/python/g' > $dest_dir/$python_model/config.pbtxt
  else
    cat $onnx_model/config.pbtxt | sed 's/platform:.*/backend:\ "python"/g' | sed 's/onnx/python/g' > $dest_dir/$python_model/config.pbtxt
    cp ../python_models/sequence_int32/model.py $dest_dir/$python_model/1/
  fi
}

if [[ "$INITIAL_STATE_ZERO" == "1" && "$INITIAL_STATE_FILE" == "1" ]]; then
  echo -e "\n***\n*** 'INITIAL_STATE_ZERO' and 'INITIAL_STATE_FILE' can't be enabled simultaneously. \n***"
  exit 1
fi

FIXED_MODEL_REPOSITORY=''
VAR_MODEL_REPOSITORY=''
if [ "$IMPLICIT_STATE" == "1" ]; then
  if [[ "$INITIAL_STATE_ZERO" == "0" && "$INITIAL_STATE_FILE" == "0" ]]; then
    FIXED_MODEL_REPOSITORY="qa_sequence_implicit_model_repository"
    VAR_MODEL_REPOSITORY="qa_variable_sequence_implicit_model_repository"
  else
    FIXED_MODEL_REPOSITORY="qa_sequence_initial_state_implicit_model_repository"
    VAR_MODEL_REPOSITORY="qa_variable_sequence_initial_state_implicit_model_repository"
  fi
else
  FIXED_MODEL_REPOSITORY="qa_sequence_model_repository"
  VAR_MODEL_REPOSITORY="qa_variable_sequence_model_repository"
fi

MODELS=""
PYTHON_MODELS=""
for BACKEND in $BACKENDS; do
  if [[ $BACKEND == "custom" ]]; then
    MODELS="$MODELS ../custom_models/custom_sequence_int32"
  else
    DTYPES=$(get_datatype $BACKEND)

    for DTYPE in $DTYPES; do
      MODELS="$MODELS $DATADIR/$FIXED_MODEL_REPOSITORY/${BACKEND}_sequence_${DTYPE}"
    done

    if [ "$ENSEMBLES" == "1" ]; then
      for DTYPE in $DTYPES; do
        # We don't generate ensemble models for bool data type.
        if [[ $DTYPE != "bool" ]]; then
          if [ "$BACKEND" == "python" ]; then
            PYTHON_MODELS="$DATADIR/qa_ensemble_model_repository/$FIXED_MODEL_REPOSITORY/*_onnx_sequence_${DTYPE}"
            TMP=$(echo $PYTHON_MODELS)
            MODELS="$MODELS ${TMP//onnx/python}"
          else
            MODELS="$MODELS $DATADIR/qa_ensemble_model_repository/$FIXED_MODEL_REPOSITORY/*_${BACKEND}_sequence_${DTYPE}"
          fi
        fi
      done
    fi
  fi
done

if [ "$INITIAL_STATE_FILE" == "1" ]; then
  # Create the input_state_data file.
  rm -rf input_state_data
  echo -n -e "\\x64\\x00\\x00\\x00" > input_state_data
fi

for MODEL in $MODELS; do
  if [[ ! "$TEST_VALGRIND" -eq 1 ]]; then
    # Skip libtorch string models
    if [[ "$MODEL" =~ .*"libtorch".*"object".* ]]; then
        continue
    fi
    if [[ "$MODEL" =~ .*"python".* ]]; then
      generate_python_models "$MODEL" "models1"
    else
      cp -r $MODEL models1/.
    fi
      (cd models1/$(basename $MODEL) && \
        sed -i "s/^max_batch_size:.*/max_batch_size: 4/" config.pbtxt && \
        sed -i "s/kind: KIND_GPU/kind: KIND_GPU\\ncount: 1/" config.pbtxt && \
        sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 1/" config.pbtxt)

    # Skip libtorch string models
    if [[ "$MODEL" =~ .*"libtorch".*"object".* ]]; then
        continue
    fi

    if [[ "$MODEL" =~ .*"python".* ]]; then
      generate_python_models "$MODEL" "models2"
    else
      cp -r $MODEL models2/.
    fi
      (cd models2/$(basename $MODEL) && \
        sed -i "s/^max_batch_size:.*/max_batch_size: 2/" config.pbtxt && \
        sed -i "s/kind: KIND_GPU/kind: KIND_GPU\\ncount: 2/" config.pbtxt && \
        sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 2/" config.pbtxt)

    if [[ "$MODEL" =~ .*"python".* ]]; then
      generate_python_models "$MODEL" "models4"
    else
      cp -r $MODEL models4/.
    fi
      (cd models4/$(basename $MODEL) && \
        sed -i "s/^max_batch_size:.*/max_batch_size: 1/" config.pbtxt && \
        sed -i "s/kind: KIND_GPU/kind: KIND_GPU\\ncount: 4/" config.pbtxt && \
        sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 4/" config.pbtxt)

    # Duplicate the models for different delay settings
    if [[ "$MODEL" =~ .*"python".* ]]; then
      generate_python_models "$MODEL" "queue_delay_models"
    else
      cp -r $MODEL queue_delay_models/.
    fi
      (cd queue_delay_models/$(basename $MODEL) && \
        sed -i "s/^max_batch_size:.*/max_batch_size: 4/" config.pbtxt && \
        sed -i "s/kind: KIND_GPU/kind: KIND_GPU\\ncount: 1/" config.pbtxt && \
        sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 1/" config.pbtxt && \
        sed -i "s/sequence_batching {/sequence_batching {\\ndirect {\\nmax_queue_delay_microseconds: 3000000\\nminimum_slot_utilization: 0\\n}/" config.pbtxt)

    cp -r queue_delay_models/$(basename $MODEL) queue_delay_models/$(basename $MODEL)_half && \
      (cd queue_delay_models/$(basename $MODEL)_half && \
        sed -i "s/$(basename $MODEL)/$(basename $MODEL)_half/" config.pbtxt && \
        sed -i "s/minimum_slot_utilization: 0/minimum_slot_utilization: 0.5/" config.pbtxt)
    cp -r queue_delay_models/$(basename $MODEL) queue_delay_models/$(basename $MODEL)_full && \
      (cd queue_delay_models/$(basename $MODEL)_full && \
        sed -i "s/$(basename $MODEL)/$(basename $MODEL)_full/" config.pbtxt && \
        sed -i "s/minimum_slot_utilization: 0/minimum_slot_utilization: 1/" config.pbtxt)

    # TODO: Enable single state buffer testing for sequence batcher
    # if [ "$USE_SINGLE_BUFFER" == "1" && "$IMPLICIT_STATE" == "1" ]; then
    #   SED_REPLACE_PATTERN="N;N;N;N;N;/state.*dims:.*/a use_single_buffer: true"
    #   (cd models0/$(basename $MODEL) && \
    #     sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
    #   (cd models1/$(basename $MODEL) && \
    #     sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
    #   (cd models2/$(basename $MODEL) && \
    #     sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
    #   (cd models4/$(basename $MODEL) && \
    #     sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
    #   (cd queue_delay_models/$(basename $MODEL)_full && \
    #     sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
    #   (cd queue_delay_models/$(basename $MODEL)_half && \
    #     sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
    # fi
  else
    cp -r $MODEL queue_delay_models/$(basename $MODEL)_full && \
      (cd queue_delay_models/$(basename $MODEL)_full && \
        sed -i "s/$(basename $MODEL)/$(basename $MODEL)_full/" config.pbtxt && \
        sed -i "s/^max_batch_size:.*/max_batch_size: 4/" config.pbtxt && \
        sed -i "s/kind: KIND_GPU/kind: KIND_GPU\\ncount: 1/" config.pbtxt && \
        sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 1/" config.pbtxt && \
        sed -i "s/sequence_batching {/sequence_batching {\\ndirect {\\nmax_queue_delay_microseconds: 3000000\\nminimum_slot_utilization: 0\\n}/" config.pbtxt && \
        sed -i "s/minimum_slot_utilization: 0/minimum_slot_utilization: 1/" config.pbtxt)
  fi
done

# Adjust the model repository for reading initial state for implicit state from file
if [ "$INITIAL_STATE_FILE" == "1" ]; then
  for MODEL in $MODELS; do
    if [[ ! "$TEST_VALGRIND" -eq 1 ]]; then
      mkdir -p models1/$(basename $MODEL)/initial_state/ && cp input_state_data models1/$(basename $MODEL)/initial_state/ && \
      (cd models1/$(basename $MODEL) && \
        sed -i "s/zero_data.*/data_file:\"input_state_data\"/" config.pbtxt)

      mkdir -p models2/$(basename $MODEL)/initial_state/ && cp input_state_data models2/$(basename $MODEL)/initial_state/ && \
      (cd models2/$(basename $MODEL) && \
        sed -i "s/zero_data.*/data_file:\"input_state_data\"/" config.pbtxt)

      mkdir -p models4/$(basename $MODEL)/initial_state/ && cp input_state_data models4/$(basename $MODEL)/initial_state/ && \
      (cd models4/$(basename $MODEL) && \
        sed -i "s/zero_data.*/data_file:\"input_state_data\"/" config.pbtxt)

      mkdir -p queue_delay_models/$(basename $MODEL)/initial_state/ && cp input_state_data queue_delay_models/$(basename $MODEL)/initial_state/ && \
      (cd queue_delay_models/$(basename $MODEL) && \
        sed -i "s/zero_data.*/data_file:\"input_state_data\"/" config.pbtxt)

      mkdir -p queue_delay_models/$(basename $MODEL)_half/initial_state/ && cp input_state_data queue_delay_models/$(basename $MODEL)_half/initial_state/ && \
      (cd queue_delay_models/$(basename $MODEL)_half && \
        sed -i "s/zero_data.*/data_file:\"input_state_data\"/" config.pbtxt)

      mkdir -p queue_delay_models/$(basename $MODEL)_full/initial_state/ && cp input_state_data queue_delay_models/$(basename $MODEL)_full/initial_state/ && \
      (cd queue_delay_models/$(basename $MODEL)_full && \
        sed -i "s/zero_data.*/data_file:\"input_state_data\"/" config.pbtxt)
    else
      mkdir -p queue_delay_models/$(basename $MODEL)_full/initial_state/ && cp input_state_data queue_delay_models/$(basename $MODEL)_full/initial_state/ && \
       (cd queue_delay_models/$(basename $MODEL)_full && \
         sed -i "s/zero_data.*/data_file:\"input_state_data\"/" config.pbtxt)
    fi
  done
fi

MODELS=""
PYTHON_MODELS=""
for BACKEND in $BACKENDS; do
  if [[ $BACKEND == "custom" ]]; then
    MODELS="$MODELS ../custom_models/custom_sequence_int32"
  else
    DTYPES=$(get_datatype $BACKEND)
    for DTYPE in $DTYPES; do
      MODELS="$MODELS $DATADIR/$FIXED_MODEL_REPOSITORY/${BACKEND}_nobatch_sequence_${DTYPE}"
    done

    if [ "$ENSEMBLES" == "1" ]; then
      for DTYPE in $DTYPES; do
        # We don't generate ensemble models for bool data type.
        if [[ $DTYPE != "bool" ]]; then
          if [ "$BACKEND" == "python" ]; then
            PYTHON_MODELS="$DATADIR/qa_ensemble_model_repository/$FIXED_MODEL_REPOSITORY/*_onnx_nobatch_sequence_${DTYPE}"
            TMP=$(echo $PYTHON_MODELS)
            MODELS="$MODELS ${TMP//onnx/python}"
          else
            MODELS="$MODELS $DATADIR/qa_ensemble_model_repository/$FIXED_MODEL_REPOSITORY/*_${BACKEND}_nobatch_sequence_${DTYPE}"
          fi
        fi
      done

    fi
  fi
done

for MODEL in $MODELS; do
  if [[ "$MODEL" =~ .*"python".* ]]; then
      generate_python_models "$MODEL" "models0"
  else
    cp -r $MODEL models0/.
  fi
    (cd models0/$(basename $MODEL) && \
      sed -i "s/kind: KIND_GPU/kind: KIND_GPU\\ncount: 4/" config.pbtxt && \
      sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 4/" config.pbtxt)

  if [ "$INITIAL_STATE_FILE" == "1" ]; then
      mkdir -p models0/$(basename $MODEL)/initial_state/ && cp input_state_data models0/$(basename $MODEL)/initial_state/ && \
          (cd models0/$(basename $MODEL) && \
          sed -i "s/zero_data.*/data_file:\"input_state_data\"/" config.pbtxt)
  fi
done

# modelsv - one instance with batch-size 4
rm -fr modelsv && mkdir modelsv

MODELS=""
PYTHON_MODELS=""
for BACKEND in $BACKENDS; do
  if [[ $BACKEND == "custom" ]]; then
    MODELS="$MODELS ../custom_models/custom_sequence_int32"
  else
    DTYPES=$(get_datatype $BACKEND)
    for DTYPE in $DTYPES; do
      MODELS="$MODELS $DATADIR/${VAR_MODEL_REPOSITORY}/${BACKEND}_sequence_${DTYPE}"
    done

    if [ "$ENSEMBLES" == "1" ]; then
      for DTYPE in $DTYPES; do
        # We don't generate ensemble models for bool data type.
        if [[ $DTYPE != "bool" ]]; then
          if [ "$BACKEND" == "python" ]; then
            PYTHON_MODELS="$DATADIR/qa_ensemble_model_repository/$FIXED_MODEL_REPOSITORY/*_onnx_sequence_${DTYPE}"
            TMP=$(echo $PYTHON_MODELS)
            MODELS="$MODELS ${TMP//onnx/python}"
          else
            MODELS="$MODELS $DATADIR/qa_ensemble_model_repository/${VAR_MODEL_REPOSITORY}/*_${BACKEND}_sequence_${DTYPE}"
          fi
        fi
      done
    fi
  fi
done

for MODEL in $MODELS; do
  # Skip libtorch string models
  if [[ "$MODEL" =~ .*"libtorch".*"object".* ]]; then
      continue
  fi
  if [[ "$MODEL" =~ .*"python".* ]]; then
      generate_python_models "$MODEL" "modelsv"
  else
    cp -r $MODEL modelsv/.
  fi
    (cd modelsv/$(basename $MODEL) && \
      sed -i "s/^max_batch_size:.*/max_batch_size: 4/" config.pbtxt && \
      sed -i "s/kind: KIND_GPU/kind: KIND_GPU\\ncount: 1/" config.pbtxt && \
      sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 1/" config.pbtxt)

  if [ "$INITIAL_STATE_FILE" == "1" ]; then
      mkdir -p modelsv/$(basename $MODEL)/initial_state/ && cp input_state_data modelsv/$(basename $MODEL)/initial_state/ && \
          (cd modelsv/$(basename $MODEL) && \
          sed -i "s/zero_data.*/data_file:\"input_state_data\"/" config.pbtxt)
  fi
done

# Same test work on all models since they all have same total number
# of batch slots.
for model_trial in $MODEL_TRIALS; do
    export NO_BATCHING=1 &&
        [[ "$model_trial" != "0" ]] && export NO_BATCHING=0
    export MODEL_INSTANCES=1 &&
        [[ "$model_trial" != "v" ]] && export MODEL_INSTANCES=4 &&
        [[ "$model_trial" != "0" ]] && export MODEL_INSTANCES=$model_trial

    MODEL_PATH=models${model_trial}

    if [ "$ENSEMBLES" == "1" ]; then
      cp -r $DATADIR/qa_ensemble_model_repository/${FIXED_MODEL_REPOSITORY}/nop_* `pwd`/$MODEL_PATH/.
        create_nop_version_dir `pwd`/$MODEL_PATH
      # Must load identity backend on GPU to avoid cuda init delay during 1st run
      for NOP_MODEL in `pwd`/$MODEL_PATH/nop_*; do
        (cd $NOP_MODEL && sed -i "s/kind: KIND_CPU/kind: KIND_GPU/" config.pbtxt)
      done
    fi

    # Need to launch the server for each test so that the model status
    # is reset (which is used to make sure the correct batch size was
    # used for execution). Test everything with fixed-tensor-size
    # models and variable-tensor-size models.
    export BATCHER_TYPE="VARIABLE" &&
        [[ "$model_trial" != "v" ]] && export BATCHER_TYPE="FIXED"

    for i in $NO_DELAY_TESTS; do
        SERVER_ARGS="--model-repository=$MODELDIR/$MODEL_PATH ${SERVER_ARGS_EXTRA}"
        SERVER_LOG="./$i.$MODEL_PATH.server.log"

        if [ "$TEST_VALGRIND" -eq 1 ]; then
            LEAKCHECK_LOG="./$i.$MODEL_PATH.valgrind.log"
            LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
            run_server_leakcheck
        else
            run_server
        fi

        if [ "$SERVER_PID" == "0" ]; then
            echo -e "\n***\n*** Failed to start $SERVER\n***"
            cat $SERVER_LOG
            exit 1
        fi

        echo "Test: $i, repository $MODEL_PATH" >>$CLIENT_LOG

        set +e
        python3 $BATCHER_TEST SequenceBatcherTest.$i >>$CLIENT_LOG 2>&1
        if [ $? -ne 0 ]; then
            echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
            echo -e "\n***\n*** Test $i Failed\n***"
            RET=1
        else
            check_test_results $TEST_RESULT_FILE 1
            if [ $? -ne 0 ]; then
                cat $CLIENT_LOG
                echo -e "\n***\n*** Test Result Verification Failed\n***"
                RET=1
            fi
        fi
        set -e

        kill_server

        set +e
        if [ "$TEST_VALGRIND" -eq 1 ]; then
            python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
            if [ $? -ne 0 ]; then
                RET=1
            fi
        fi
        set -e
    done

    # Tests that require TRITONSERVER_DELAY_SCHEDULER so that the
    # scheduler is delayed and requests can collect in the queue.
    for i in $DELAY_TESTS; do
        export TRITONSERVER_BACKLOG_DELAY_SCHEDULER=3 &&
            [[ "$i" != "test_backlog_fill_no_end" ]] && export TRITONSERVER_BACKLOG_DELAY_SCHEDULER=2 &&
            [[ "$i" != "test_backlog_fill" ]] &&
            [[ "$i" != "test_backlog_same_correlation_id" ]] && export TRITONSERVER_BACKLOG_DELAY_SCHEDULER=0
        export TRITONSERVER_DELAY_SCHEDULER=10 &&
            [[ "$i" != "test_backlog_fill_no_end" ]] &&
            [[ "$i" != "test_backlog_fill" ]] && export TRITONSERVER_DELAY_SCHEDULER=16 &&
            [[ "$i" != "test_backlog_same_correlation_id_no_end" ]] && export TRITONSERVER_DELAY_SCHEDULER=8 &&
            [[ "$i" != "test_half_batch" ]] && export TRITONSERVER_DELAY_SCHEDULER=4 &&
            [[ "$i" != "test_backlog_sequence_timeout" ]] && export TRITONSERVER_DELAY_SCHEDULER=12
        SERVER_ARGS="--model-repository=$MODELDIR/$MODEL_PATH ${SERVER_ARGS_EXTRA}"
        SERVER_LOG="./$i.$MODEL_PATH.server.log"

        if [ "$TEST_VALGRIND" -eq 1 ]; then
            LEAKCHECK_LOG="./$i.$MODEL_PATH.valgrind.log"
            LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
            run_server_leakcheck
        else
            run_server
        fi

        if [ "$SERVER_PID" == "0" ]; then
            echo -e "\n***\n*** Failed to start $SERVER\n***"
            cat $SERVER_LOG
            exit 1
        fi

        echo "Test: $i, repository $MODEL_PATH" >>$CLIENT_LOG

        set +e
        python3 $BATCHER_TEST SequenceBatcherTest.$i >>$CLIENT_LOG 2>&1
        if [ $? -ne 0 ]; then
            echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
            echo -e "\n***\n*** Test $i Failed\n***"
            RET=1
        else
            check_test_results $TEST_RESULT_FILE 1
            if [ $? -ne 0 ]; then
                cat $CLIENT_LOG
                echo -e "\n***\n*** Test Result Verification Failed\n***"
                RET=1
            fi
        fi
        set -e

        unset TRITONSERVER_DELAY_SCHEDULER
        unset TRITONSERVER_BACKLOG_DELAY_SCHEDULER
        kill_server

        set +e
        if [ "$TEST_VALGRIND" -eq 1 ]; then
            python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
            if [ $? -ne 0 ]; then
                RET=1
            fi
        fi
        set -e
    done
done

# ragged models
if [[ $BACKENDS == *"custom"* ]]; then
  rm -fr ragged_models && mkdir ragged_models
  cp -r ../custom_models/custom_sequence_int32 ragged_models/.
  (cd ragged_models/custom_sequence_int32 && \
          sed -i "s/name:.*\"INPUT\"/name: \"INPUT\"\\nallow_ragged_batch: true/" config.pbtxt)

  export NO_BATCHING=0
  export MODEL_INSTANCES=1
  export BATCHER_TYPE="FIXED"
  MODEL_PATH=ragged_models

  # Need to launch the server for each test so that the model status
  # is reset (which is used to make sure the correct batch size was
  # used for execution). Test everything with fixed-tensor-size
  # models and variable-tensor-size models.
  for i in test_ragged_batch_allowed ; do
    export TRITONSERVER_BACKLOG_DELAY_SCHEDULER=0
    export TRITONSERVER_DELAY_SCHEDULER=12

    SERVER_ARGS="--model-repository=$MODELDIR/$MODEL_PATH ${SERVER_ARGS_EXTRA}"
    SERVER_LOG="./$i.$MODEL_PATH.server.log"

    if [ "$TEST_VALGRIND" -eq 1 ]; then
      LEAKCHECK_LOG="./$i.$MODEL_PATH.valgrind.log"
      LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
      run_server_leakcheck
    else
        run_server
    fi

    if [ "$SERVER_PID" == "0" ]; then
      echo -e "\n***\n*** Failed to start $SERVER\n***"
      cat $SERVER_LOG
      exit 1
    fi

    echo "Test: $i, repository $MODEL_PATH" >>$CLIENT_LOG

    set +e
    python3 $BATCHER_TEST SequenceBatcherTest.$i >>$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
      echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
      echo -e "\n***\n*** Test $i Failed\n***"
      RET=1
    else
      check_test_results $TEST_RESULT_FILE 1
      if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
      fi
    fi
    set -e

    unset TRITONSERVER_DELAY_SCHEDULER
    unset TRITONSERVER_BACKLOG_DELAY_SCHEDULER
    kill_server

    set +e
    if [ "$TEST_VALGRIND" -eq 1 ]; then
      python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
      if [ $? -ne 0 ]; then
          RET=1
      fi
    fi
    set -e
  done
fi

# max queue delay
MODEL_PATH=queue_delay_models
# remove ensemble models from the test model repo
rm -rf queue_delay_models/simple_* queue_delay_models/fan_* queue_delay_models/sequence_*
for i in $QUEUE_DELAY_TESTS ; do
    export NO_BATCHING=0
    export TRITONSERVER_BACKLOG_DELAY_SCHEDULER=0
    export TRITONSERVER_DELAY_SCHEDULER=2
    SERVER_ARGS="--model-repository=$MODELDIR/$MODEL_PATH ${SERVER_ARGS_EXTRA}"
    SERVER_LOG="./$i.$MODEL_PATH.server.log"

    if [ "$TEST_VALGRIND" -eq 1 ]; then
        LEAKCHECK_LOG="./$i.$MODEL_PATH.valgrind.log"
        LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
        run_server_leakcheck
    else
        run_server
    fi

    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    echo "Test: $i, repository $MODEL_PATH" >>$CLIENT_LOG

    set +e
    python3 $BATCHER_TEST SequenceBatcherTest.$i >>$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
        echo -e "\n***\n*** Test $i Failed\n***"
        RET=1
    else
        check_test_results $TEST_RESULT_FILE 1
        if [ $? -ne 0 ]; then
            cat $CLIENT_LOG
            echo -e "\n***\n*** Test Result Verification Failed\n***"
            RET=1
        fi
    fi
    set -e

    unset TRITONSERVER_DELAY_SCHEDULER
    unset TRITONSERVER_BACKLOG_DELAY_SCHEDULER
    kill_server

    set +e
    if [ "$TEST_VALGRIND" -eq 1 ]; then
        python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
        if [ $? -ne 0 ]; then
            RET=1
        fi
    fi
    set -e
done

# Test request timeout with sequence batcher
# only run the test outside shared memory setting as
# shared memory feature is irrelevant
if [ "$TEST_SYSTEM_SHARED_MEMORY" -ne 1 ] && [ "$TEST_CUDA_SHARED_MEMORY" -ne 1 ]; then
    export NO_BATCHING=0
    export MODEL_INSTANCES=1
    export BATCHER_TYPE="FIXED"

    TEST_CASE=SequenceBatcherRequestTimeoutTest
    MODEL_PATH=request_timeout_models
    mkdir -p ${MODEL_PATH}/custom_sequence_int32_timeout/1

    SERVER_ARGS="--model-repository=$MODELDIR/$MODEL_PATH ${SERVER_ARGS_EXTRA}"
    SERVER_LOG="./$TEST_CASE.$MODEL_PATH.server.log"

    if [ "$TEST_VALGRIND" -eq 1 ]; then
        LEAKCHECK_LOG="./$i.$MODEL_PATH.valgrind.log"
        LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
        run_server_leakcheck
    else
        run_server
    fi

    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    echo "Test: $TEST_CASE, repository $MODEL_PATH" >>$CLIENT_LOG

    set +e
    python3 $BATCHER_TEST $TEST_CASE >>$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Test $TEST_CASE Failed\n***" >>$CLIENT_LOG
        echo -e "\n***\n*** Test $TEST_CASE Failed\n***"
        RET=1
    else
        check_test_results $TEST_RESULT_FILE 2
        if [ $? -ne 0 ]; then
            cat $CLIENT_LOG
            echo -e "\n***\n*** Test Result Verification Failed\n***"
            RET=1
        fi
    fi
    set -e

    kill_server

    set +e
    if [ "$TEST_VALGRIND" -eq 1 ]; then
        python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
        if [ $? -ne 0 ]; then
            RET=1
        fi
    fi
    set -e
fi

### Start Preserve Ordering Tests ###

# FIXME: Test only supported on windows currently due to use of python backend models.
# Now that Windows supports the PYBE, we should check that this tests works once Windows
# CI is stable.

# These subtests use python models. They should not be executed if 'python' is not one
# of the backends under test.
if [[ $(should_test_backend "python") == "true" &&  !( -v WSL_DISTRO_NAME || -v MSYSTEM )]]; then
    # Test preserve ordering true/false and decoupled/non-decoupled
    TEST_CASE=SequenceBatcherPreserveOrderingTest
    MODEL_PATH=preserve_ordering_models
    BASE_MODEL="../python_models/sequence_py"
    rm -rf ${MODEL_PATH}

    # FIXME [DLIS-5280]: This may fail for decoupled models if writes to GRPC
    # stream are done out of order in server, so decoupled tests are disabled.
    MODES="decoupled nondecoupled"
    for mode in $MODES; do
        NO_PRESERVE="${MODEL_PATH}/seqpy_no_preserve_ordering_${mode}"
        mkdir -p ${NO_PRESERVE}/1
        cp ${BASE_MODEL}/config.pbtxt ${NO_PRESERVE}
        cp ${BASE_MODEL}/model.py ${NO_PRESERVE}/1

        PRESERVE="${MODEL_PATH}/seqpy_preserve_ordering_${mode}"
        cp -r ${NO_PRESERVE} ${PRESERVE}
        sed -i "s/^preserve_ordering: False/preserve_ordering: True/" ${PRESERVE}/config.pbtxt

        if [ ${mode} == "decoupled" ]; then
          echo -e "\nmodel_transaction_policy { decoupled: true }" >> ${NO_PRESERVE}/config.pbtxt
          echo -e "\nmodel_transaction_policy { decoupled: true }" >> ${PRESERVE}/config.pbtxt
        fi
    done

    SERVER_ARGS="--model-repository=$MODELDIR/$MODEL_PATH ${SERVER_ARGS_EXTRA}"
    SERVER_LOG="./$TEST_CASE.$MODEL_PATH.server.log"

    if [ "$TEST_VALGRIND" -eq 1 ]; then
        LEAKCHECK_LOG="./$i.$MODEL_PATH.valgrind.log"
        LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
        run_server_leakcheck
    else
        run_server
    fi

    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    echo "Test: $TEST_CASE, repository $MODEL_PATH" >>$CLIENT_LOG

    set +e
    python3 $BATCHER_TEST $TEST_CASE >>$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Test $TEST_CASE Failed\n***" >>$CLIENT_LOG
        echo -e "\n***\n*** Test $TEST_CASE Failed\n***"
        RET=1
    else
        # 2 for preserve_ordering = True/False
        check_test_results $TEST_RESULT_FILE 2
        if [ $? -ne 0 ]; then
            cat $CLIENT_LOG
            echo -e "\n***\n*** Test Result Verification Failed\n***"
            RET=1
        fi
    fi
    set -e

    kill_server

    set +e
    if [ "$TEST_VALGRIND" -eq 1 ]; then
        python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
        if [ $? -ne 0 ]; then
            RET=1
        fi
    fi
    set -e
fi

### End Preserve Ordering Tests ###

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_sequence_corrid_batcher/sequence_corrid_batcher_test.py
================================================
#!/usr/bin/env python3

# Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import os
import threading
import time
import unittest

import numpy as np
import sequence_util as su
import test_util as tu
import tritonclient.http as httpclient
from tritonclient.utils import InferenceServerException, np_to_triton_dtype

_test_system_shared_memory = bool(int(os.environ.get("TEST_SYSTEM_SHARED_MEMORY", 0)))
_test_cuda_shared_memory = bool(int(os.environ.get("TEST_CUDA_SHARED_MEMORY", 0)))

_no_batching = int(os.environ["NO_BATCHING"]) == 1
_model_instances = int(os.environ["MODEL_INSTANCES"])

if _no_batching:
    _trials = ("plan_nobatch", "onnx_nobatch")
else:
    _trials = ("plan", "onnx")

_protocols = ("http", "grpc")
_max_sequence_idle_ms = 5000


class SequenceCorrIDBatcherTest(su.SequenceBatcherTestUtil):
    def get_datatype(self, trial):
        return np.int32

    def get_expected_result(self, expected_result, corrid, value, trial, flag_str=None):
        # Adjust the expected_result for models that
        # could not implement the full accumulator. See
        # qa/common/gen_qa_dyna_sequence_models.py for more
        # information.
        if (
            (("nobatch" not in trial) and ("custom" not in trial))
            or ("plan" in trial)
            or ("onnx" in trial)
        ) or ("libtorch" in trial):
            expected_result = value
            if flag_str is not None:
                if "start" in flag_str:
                    expected_result += 1
                if "end" in flag_str:
                    expected_result += corrid
        return expected_result

    def data_type_to_string(self, dtype):
        if dtype == "TYPE_STRING":
            return "BYTES"
        else:
            return dtype.replace("TYPE_", "")

    def test_skip_batch(self):
        # Test model instances together are configured with
        # total-batch-size 4. Send four sequences in parallel where
        # two sequences have shorter length so that padding must be
        # applied correctly for the longer sequences.
        for trial in _trials:
            self.clear_deferred_exceptions()
            dtype = self.get_datatype(trial)
            precreated_shm0_handles = self.precreate_register_regions((1, 3), dtype, 0)
            precreated_shm1_handles = self.precreate_register_regions(
                (11, 12, 13, 14), dtype, 1
            )
            precreated_shm2_handles = self.precreate_register_regions(
                (111, 113), dtype, 2
            )
            precreated_shm3_handles = self.precreate_register_regions(
                (1111, 1112, 1113, 1114), dtype, 3
            )
            try:
                model_name = tu.get_dyna_sequence_model_name(trial, dtype)

                self.check_setup(model_name)

                # Need scheduler to wait for queue to contain all
                # inferences for both sequences.
                self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 12)
                self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
                self.assertEqual(
                    int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0
                )

                corrids = [1001, 1002, 1003, 1004]
                threads = []
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[0],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (("start", 1, None), ("end", 3, None)),
                            self.get_expected_result(
                                4 + corrids[0], corrids[0], 3, trial, "end"
                            ),
                            precreated_shm0_handles,
                        ),
                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[1],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (
                                ("start", 11, None),
                                (None, 12, None),
                                (None, 13, None),
                                ("end", 14, None),
                            ),
                            self.get_expected_result(
                                50 + corrids[1], corrids[1], 14, trial, "end"
                            ),
                            precreated_shm1_handles,
                        ),
                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[2],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (("start", 111, None), ("end", 113, None)),
                            self.get_expected_result(
                                224 + corrids[2], corrids[2], 113, trial, "end"
                            ),
                            precreated_shm2_handles,
                        ),
                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[3],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (
                                ("start", 1111, None),
                                (None, 1112, None),
                                (None, 1113, None),
                                ("end", 1114, None),
                            ),
                            self.get_expected_result(
                                4450 + corrids[3], corrids[3], 1114, trial, "end"
                            ),
                            precreated_shm3_handles,
                        ),
                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
                    )
                )

                threads[1].start()
                threads[3].start()
                time.sleep(1)
                threads[0].start()
                threads[2].start()
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                if _model_instances == 1:
                    self.check_status(model_name, {4: 4}, 12, 12)
                elif _model_instances == 2:
                    self.check_status(model_name, {2: 8}, 12, 12)
                elif _model_instances == 4:
                    self.check_status(model_name, {1: 12}, 12, 12)
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))
            finally:
                if _test_system_shared_memory or _test_cuda_shared_memory:
                    self.cleanup_shm_regions(precreated_shm0_handles)
                    self.cleanup_shm_regions(precreated_shm1_handles)
                    self.cleanup_shm_regions(precreated_shm2_handles)
                    self.cleanup_shm_regions(precreated_shm3_handles)

    def test_corrid_data_type(self):
        model_name = "add_sub"
        expected_corrid_dtype = os.environ["TRITONSERVER_CORRID_DATA_TYPE"]

        for corrid, corrid_dtype in [("corrid", "TYPE_STRING"), (123, "TYPE_UINT64")]:
            # Check if the corrid data type matches the expected corrid data type specified in the model config
            dtypes_match = True
            if (corrid_dtype == "TYPE_STRING") and (
                expected_corrid_dtype != "TYPE_STRING"
            ):
                dtypes_match = False
            elif (corrid_dtype == "TYPE_UINT64") and (
                expected_corrid_dtype
                not in ["TYPE_UINT32", "TYPE_INT32", "TYPE_UINT64", "TYPE_INT64"]
            ):
                dtypes_match = False

            with httpclient.InferenceServerClient("localhost:8000") as client:
                input0_data = np.random.rand(16).astype(np.float32)
                input1_data = np.random.rand(16).astype(np.float32)
                inputs = [
                    httpclient.InferInput(
                        "INPUT0",
                        input0_data.shape,
                        np_to_triton_dtype(input0_data.dtype),
                    ),
                    httpclient.InferInput(
                        "INPUT1",
                        input1_data.shape,
                        np_to_triton_dtype(input1_data.dtype),
                    ),
                ]

                inputs[0].set_data_from_numpy(input0_data)
                inputs[1].set_data_from_numpy(input1_data)

                if not dtypes_match:
                    with self.assertRaises(InferenceServerException) as e:
                        client.infer(
                            model_name,
                            inputs,
                            sequence_id=corrid,
                            sequence_start=True,
                            sequence_end=False,
                        )
                    err_str = str(e.exception)
                    self.assertIn(
                        f"sequence batching control 'CORRID' data-type is '{self.data_type_to_string(corrid_dtype)}', but model '{model_name}' expects '{self.data_type_to_string(expected_corrid_dtype)}'",
                        err_str,
                    )
                else:
                    response = client.infer(
                        model_name,
                        inputs,
                        sequence_id=corrid,
                        sequence_start=True,
                        sequence_end=False,
                    )
                    response.get_response()
                    output0_data = response.as_numpy("OUTPUT0")
                    output1_data = response.as_numpy("OUTPUT1")

                    self.assertTrue(
                        np.allclose(input0_data + input1_data, output0_data),
                        "add_sub example error: incorrect sum",
                    )

                    self.assertTrue(
                        np.allclose(input0_data - input1_data, output1_data),
                        "add_sub example error: incorrect difference",
                    )


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_sequence_corrid_batcher/test.sh
================================================
#!/bin/bash
# Copyright (c) 2020-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

TEST_RESULT_FILE='test_results.txt'

CLIENT_LOG="./client.log"
BATCHER_TEST=sequence_corrid_batcher_test.py

DATADIR=/data/inferenceserver/${REPO_VERSION}

SERVER=/opt/tritonserver/bin/tritonserver
source ../common/util.sh

RET=0

# Must run on a single device or else the TRITONSERVER_DELAY_SCHEDULER
# can fail when the requests are distributed to multiple devices.
export CUDA_VISIBLE_DEVICES=0

# Setup non-variable-size model repositories. The same models are in each
# repository but they are configured as:
#   models4 - four instances with batch-size 1
rm -fr *.log  models{0,1,2,4} && mkdir models4
for m in \
        $DATADIR/qa_dyna_sequence_model_repository/plan_dyna_sequence_int32 \
        $DATADIR/qa_dyna_sequence_model_repository/onnx_dyna_sequence_int32 \
        $DATADIR/qa_dyna_sequence_model_repository/libtorch_dyna_sequence_int32; do
    cp -r $m models4/. && \
        (cd models4/$(basename $m) && \
            sed -i -z "s/oldest.*{.*}.*control_input/control_input/" config.pbtxt && \
            sed -i "s/^max_batch_size:.*/max_batch_size: 1/" config.pbtxt && \
            sed -i "s/kind: KIND_GPU/kind: KIND_GPU\\ncount: 4/" config.pbtxt && \
            sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 4/" config.pbtxt)
done

# Same test work on all models since they all have same total number
# of batch slots.
for model_trial in 4; do
    export NO_BATCHING=1 &&
        [[ "$model_trial" != "0" ]] && export NO_BATCHING=0
    export MODEL_INSTANCES=1 &&
        [[ "$model_trial" != "0" ]] && export MODEL_INSTANCES=$model_trial

    MODEL_DIR=models${model_trial}

    # Tests that require TRITONSERVER_DELAY_SCHEDULER so that the
    # scheduler is delayed and requests can collect in the queue.
    for i in test_skip_batch ; do
        export TRITONSERVER_BACKLOG_DELAY_SCHEDULER=0
        export TRITONSERVER_DELAY_SCHEDULER=12
        SERVER_ARGS="--model-repository=`pwd`/$MODEL_DIR"
        SERVER_LOG="./$i.$MODEL_DIR.server.log"
        run_server
        if [ "$SERVER_PID" == "0" ]; then
            echo -e "\n***\n*** Failed to start $SERVER\n***"
            cat $SERVER_LOG
            exit 1
        fi

        echo "Test: $i, repository $MODEL_DIR" >>$CLIENT_LOG

        set +e
        python $BATCHER_TEST SequenceCorrIDBatcherTest.$i >>$CLIENT_LOG 2>&1
        if [ $? -ne 0 ]; then
            echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
            echo -e "\n***\n*** Test $i Failed\n***"
            RET=1
        else
            check_test_results $TEST_RESULT_FILE 1
            if [ $? -ne 0 ]; then
                cat $CLIENT_LOG
                echo -e "\n***\n*** Test Result Verification Failed\n***"
                RET=1
            fi
        fi
        set -e

        unset TRITONSERVER_DELAY_SCHEDULER
        unset TRITONSERVER_BACKLOG_DELAY_SCHEDULER
        kill $SERVER_PID
        wait $SERVER_PID
    done
done

# Test correlation ID data type
mkdir -p corrid_data_type/add_sub/1
cp ../python_models/add_sub/model.py corrid_data_type/add_sub/1

for corrid_data_type in TYPE_STRING TYPE_UINT32 TYPE_INT32 TYPE_UINT64 TYPE_INT64; do
    (cd corrid_data_type/add_sub && \
    cp ../../../python_models/add_sub/config.pbtxt . && \
    echo "sequence_batching { \
        control_input [{ \
            name: \"CORRID\" \
            control [{ \
            kind: CONTROL_SEQUENCE_CORRID \
            data_type: $corrid_data_type \
            }]
        }] \
        }" >> config.pbtxt)
    MODEL_DIR=corrid_data_type

    for i in test_corrid_data_type ; do
        export TRITONSERVER_CORRID_DATA_TYPE=$corrid_data_type
        SERVER_ARGS="--model-repository=`pwd`/$MODEL_DIR"
        SERVER_LOG="./$i.$MODEL_DIR.server.log"
        run_server
        if [ "$SERVER_PID" == "0" ]; then
            echo -e "\n***\n*** Failed to start $SERVER\n***"
            cat $SERVER_LOG
            exit 1
        fi

        echo "Test: $i, repository $MODEL_DIR" >>$CLIENT_LOG

        set +e
        python $BATCHER_TEST SequenceCorrIDBatcherTest.$i >>$CLIENT_LOG 2>&1
        if [ $? -ne 0 ]; then
            echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
            echo -e "\n***\n*** Test $i Failed\n***"
            RET=1
        else
            check_test_results $TEST_RESULT_FILE 1
            if [ $? -ne 0 ]; then
                cat $CLIENT_LOG
                echo -e "\n***\n*** Test Result Verification Failed\n***"
                RET=1
            fi
        fi
        set -e

        unset TRITONSERVER_CORRID_DATA_TYPE
        kill $SERVER_PID
        wait $SERVER_PID
    done
done

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_sequence_stress/sequence_stress.py
================================================
#!/usr/bin/env python3

# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import argparse
import threading
import time
import traceback
from builtins import range, str
from functools import partial

import numpy as np
import test_util as tu
import tritonclient.grpc as grpcclient
from tritonclient.utils import np_to_triton_dtype

if sys.version_info >= (3, 0):
    import queue
else:
    import Queue as queue

FLAGS = None
CORRELATION_ID_BLOCK_SIZE = 100
DEFAULT_TIMEOUT_MS = 5000
SEQUENCE_LENGTH_MEAN = 16
SEQUENCE_LENGTH_STDEV = 8

_thread_exceptions = []
_thread_exceptions_mutex = threading.Lock()


class UserData:
    def __init__(self):
        self._completed_requests = queue.Queue()


# Callback function used for async_stream_infer()
def completion_callback(user_data, result, error):
    # passing error raise and handling out
    user_data._completed_requests.put((result, error))


class TimeoutException(Exception):
    pass


def check_sequence_async(
    client_metadata,
    trial,
    model_name,
    input_dtype,
    steps,
    timeout_ms=DEFAULT_TIMEOUT_MS,
    sequence_name="<unknown>",
):
    """Perform sequence of inferences using async run. The 'steps' holds
    a list of tuples, one for each inference with format:

    (flag_str, value, expected_result, delay_ms)

    """
    if ("custom" in trial) or ("plan" in trial):
        tensor_shape = (
            1,
            1,
        )
    else:
        assert False, "unknown trial type: " + trial

    triton_client = client_metadata[0]
    sequence_id = client_metadata[1]

    # Execute the sequence of inference...
    seq_start_ms = int(round(time.time() * 1000))
    user_data = UserData()
    # Ensure there is no running stream
    triton_client.stop_stream()
    triton_client.start_stream(partial(completion_callback, user_data))

    sent_count = 0
    for flag_str, value, expected_result, delay_ms in steps:
        seq_start = False
        seq_end = False
        if flag_str is not None:
            seq_start = "start" in flag_str
            seq_end = "end" in flag_str

        if input_dtype == np.object_:
            in0 = np.full(tensor_shape, value, dtype=np.int32)
            in0n = np.array([str(x) for x in in0.reshape(in0.size)], dtype=object)
            in0 = in0n.reshape(tensor_shape)
        else:
            in0 = np.full(tensor_shape, value, dtype=input_dtype)
        inputs = [
            grpcclient.InferInput(
                "INPUT", tensor_shape, np_to_triton_dtype(input_dtype)
            ),
        ]
        inputs[0].set_data_from_numpy(in0)

        triton_client.async_stream_infer(
            model_name,
            inputs,
            sequence_id=sequence_id,
            sequence_start=seq_start,
            sequence_end=seq_end,
        )
        sent_count += 1

        if delay_ms is not None:
            time.sleep(delay_ms / 1000.0)

    # Process the results in order that they were sent
    result = None
    processed_count = 0
    while processed_count < sent_count:
        (results, error) = user_data._completed_requests.get()
        if error is not None:
            raise error

        (_, value, expected, _) = steps[processed_count]
        processed_count += 1
        if timeout_ms != None:
            now_ms = int(round(time.time() * 1000))
            if (now_ms - seq_start_ms) > timeout_ms:
                raise TimeoutException("Timeout expired for {}".format(sequence_name))

        result = results.as_numpy("OUTPUT")[0][0]
        if FLAGS.verbose:
            print("{} {}: + {} = {}".format(sequence_name, sequence_id, value, result))

        if expected is not None:
            if input_dtype == np.object_:
                assert int(result) == expected, "{}: expected result {}, got {}".format(
                    sequence_name, expected, int(result)
                )
            else:
                assert result == expected, "{}: expected result {}, got {}".format(
                    sequence_name, expected, result
                )
    triton_client.stop_stream()


def get_datatype(trial):
    # Get the datatype to use based on what models are available (see test.sh)
    if "plan" in trial:
        return np.float32
    return np.int32


def sequence_valid(
    client_metadata, rng, trial, model_name, dtype, len_mean, len_stddev, sequence_name
):
    # Create a variable length sequence with "start" and "end" flags.
    seqlen = max(1, int(rng.normal(len_mean, len_stddev)))
    print("{} {}: valid seqlen = {}".format(sequence_name, client_metadata[1], seqlen))

    values = rng.randint(0, 1024 * 1024, size=seqlen, dtype=dtype)

    steps = []
    expected_result = 0

    for idx, step in enumerate(range(seqlen)):
        flags = ""
        if idx == 0:
            flags += ",start"
        if idx == (seqlen - 1):
            flags += ",end"

        val = values[idx]
        delay_ms = None
        expected_result += val

        # (flag_str, value, expected_result, delay_ms)
        steps.append(
            (flags, val, expected_result, delay_ms),
        )

    check_sequence_async(
        client_metadata, trial, model_name, dtype, steps, sequence_name=sequence_name
    )


def sequence_valid_valid(
    client_metadata, rng, trial, model_name, dtype, len_mean, len_stddev, sequence_name
):
    # Create two variable length sequences with "start" and "end"
    # flags, where both sequences use the same correlation ID and are
    # sent back-to-back.
    seqlen = [
        max(1, int(rng.normal(len_mean, len_stddev))),
        max(1, int(rng.normal(len_mean, len_stddev))),
    ]
    print(
        "{} {}: valid-valid seqlen[0] = {}, seqlen[1] = {}".format(
            sequence_name, client_metadata[1], seqlen[0], seqlen[1]
        )
    )

    values = [
        rng.randint(0, 1024 * 1024, size=seqlen[0], dtype=dtype),
        rng.randint(0, 1024 * 1024, size=seqlen[1], dtype=dtype),
    ]

    for p in [0, 1]:
        steps = []
        expected_result = 0

        for idx, step in enumerate(range(seqlen[p])):
            flags = ""
            if idx == 0:
                flags += ",start"
            if idx == (seqlen[p] - 1):
                flags += ",end"

            val = values[p][idx]
            delay_ms = None
            expected_result += val

            # (flag_str, value, expected_result, delay_ms)
            steps.append(
                (flags, val, expected_result, delay_ms),
            )

    check_sequence_async(
        client_metadata, trial, model_name, dtype, steps, sequence_name=sequence_name
    )


def sequence_valid_no_end(
    client_metadata, rng, trial, model_name, dtype, len_mean, len_stddev, sequence_name
):
    # Create two variable length sequences, the first with "start" and
    # "end" flags and the second with no "end" flag, where both
    # sequences use the same correlation ID and are sent back-to-back.
    seqlen = [
        max(1, int(rng.normal(len_mean, len_stddev))),
        max(1, int(rng.normal(len_mean, len_stddev))),
    ]
    print(
        "{} {}: valid-no-end seqlen[0] = {}, seqlen[1] = {}".format(
            sequence_name, client_metadata[1], seqlen[0], seqlen[1]
        )
    )

    values = [
        rng.randint(0, 1024 * 1024, size=seqlen[0], dtype=dtype),
        rng.randint(0, 1024 * 1024, size=seqlen[1], dtype=dtype),
    ]

    for p in [0, 1]:
        steps = []
        expected_result = 0

        for idx, step in enumerate(range(seqlen[p])):
            flags = ""
            if idx == 0:
                flags += ",start"
            if (p == 0) and (idx == (seqlen[p] - 1)):
                flags += ",end"

            val = values[p][idx]
            delay_ms = None
            expected_result += val

            # (flag_str, value, expected_result, delay_ms)
            steps.append(
                (flags, val, expected_result, delay_ms),
            )

    check_sequence_async(
        client_metadata, trial, model_name, dtype, steps, sequence_name=sequence_name
    )


def sequence_no_start(client_metadata, rng, trial, model_name, dtype, sequence_name):
    # Create a sequence without a "start" flag. Sequence should get an
    # error from the server.
    seqlen = 1
    print(
        "{} {}: no-start seqlen = {}".format(sequence_name, client_metadata[1], seqlen)
    )

    values = rng.randint(0, 1024 * 1024, size=seqlen, dtype=dtype)

    steps = []

    for idx, step in enumerate(range(seqlen)):
        flags = None
        val = values[idx]
        delay_ms = None

        # (flag_str, value, expected_result, delay_ms)
        steps.append(
            (flags, val, None, delay_ms),
        )

    try:
        check_sequence_async(
            client_metadata,
            trial,
            model_name,
            dtype,
            steps,
            sequence_name=sequence_name,
        )
        assert False, "expected inference failure from missing START flag"
    except Exception as ex:
        if "must specify the START flag" not in ex.message():
            raise


def sequence_no_end(
    client_metadata, rng, trial, model_name, dtype, len_mean, len_stddev, sequence_name
):
    # Create a variable length sequence with "start" flag but that
    # never ends. The sequence should be aborted by the server and its
    # slot reused for another sequence.
    seqlen = max(1, int(rng.normal(len_mean, len_stddev)))
    print("{} {}: no-end seqlen = {}".format(sequence_name, client_metadata[1], seqlen))

    values = rng.randint(0, 1024 * 1024, size=seqlen, dtype=dtype)

    steps = []
    expected_result = 0

    for idx, step in enumerate(range(seqlen)):
        flags = ""
        if idx == 0:
            flags = "start"

        val = values[idx]
        delay_ms = None
        expected_result += val

        # (flag_str, value, expected_result, delay_ms)
        steps.append(
            (flags, val, expected_result, delay_ms),
        )

    check_sequence_async(
        client_metadata, trial, model_name, dtype, steps, sequence_name=sequence_name
    )


def stress_thread(name, seed, pass_cnt, correlation_id_base, trial, model_name, dtype):
    # Thread responsible for generating sequences of inference
    # requests.
    global _thread_exceptions

    print("Starting thread {} with seed {}".format(name, seed))
    rng = np.random.RandomState(seed)

    client_metadata_list = []

    try:
        # Must use streaming GRPC context to ensure each sequences'
        # requests are received in order. Create 2 common-use contexts
        # with different correlation IDs that are used for most
        # inference requests. Also create some rare-use contexts that
        # are used to make requests with rarely-used correlation IDs.
        #
        # Need to remember the last choice for each context since we
        # don't want some choices to follow others since that gives
        # results not expected. See below for details.
        common_cnt = 2
        rare_cnt = 8
        last_choices = []

        for c in range(common_cnt + rare_cnt):
            client_metadata_list.append(
                (
                    grpcclient.InferenceServerClient(
                        "localhost:8001", verbose=FLAGS.verbose
                    ),
                    correlation_id_base + c,
                )
            )
            last_choices.append(None)

        rare_idx = 0
        for p in range(pass_cnt):
            # Common or rare context?
            if rng.rand() < 0.1:
                # Rare context...
                choice = rng.rand()
                client_idx = common_cnt + rare_idx

                # Send a no-end, valid-no-end or valid-valid
                # sequence... because it is a rare context this should
                # exercise the idle sequence path of the sequence
                # scheduler
                if choice < 0.33:
                    sequence_no_end(
                        client_metadata_list[client_idx],
                        rng,
                        trial,
                        model_name,
                        dtype,
                        SEQUENCE_LENGTH_MEAN,
                        SEQUENCE_LENGTH_STDEV,
                        sequence_name=name,
                    )
                    last_choices[client_idx] = "no-end"
                elif choice < 0.66:
                    sequence_valid_no_end(
                        client_metadata_list[client_idx],
                        rng,
                        trial,
                        model_name,
                        dtype,
                        SEQUENCE_LENGTH_MEAN,
                        SEQUENCE_LENGTH_STDEV,
                        sequence_name=name,
                    )
                    last_choices[client_idx] = "valid-no-end"
                else:
                    sequence_valid_valid(
                        client_metadata_list[client_idx],
                        rng,
                        trial,
                        model_name,
                        dtype,
                        SEQUENCE_LENGTH_MEAN,
                        SEQUENCE_LENGTH_STDEV,
                        sequence_name=name,
                    )
                    last_choices[client_idx] = "valid-valid"

                rare_idx = (rare_idx + 1) % rare_cnt
            else:
                # Common context...
                client_idx = 0 if rng.rand() < 0.5 else 1
                client_metadata = client_metadata_list[client_idx]
                last_choice = last_choices[client_idx]

                choice = rng.rand()

                # no-start cannot follow no-end since the server will
                # just assume that the no-start is a continuation of
                # the no-end sequence instead of being a sequence
                # missing start flag.
                if (
                    (last_choice != "no-end")
                    and (last_choice != "valid-no-end")
                    and (choice < 0.01)
                ):
                    sequence_no_start(
                        client_metadata,
                        rng,
                        trial,
                        model_name,
                        dtype,
                        sequence_name=name,
                    )
                    last_choices[client_idx] = "no-start"
                elif choice < 0.05:
                    sequence_no_end(
                        client_metadata,
                        rng,
                        trial,
                        model_name,
                        dtype,
                        SEQUENCE_LENGTH_MEAN,
                        SEQUENCE_LENGTH_STDEV,
                        sequence_name=name,
                    )
                    last_choices[client_idx] = "no-end"
                elif choice < 0.10:
                    sequence_valid_no_end(
                        client_metadata,
                        rng,
                        trial,
                        model_name,
                        dtype,
                        SEQUENCE_LENGTH_MEAN,
                        SEQUENCE_LENGTH_STDEV,
                        sequence_name=name,
                    )
                    last_choices[client_idx] = "valid-no-end"
                elif choice < 0.15:
                    sequence_valid_valid(
                        client_metadata,
                        rng,
                        trial,
                        model_name,
                        dtype,
                        SEQUENCE_LENGTH_MEAN,
                        SEQUENCE_LENGTH_STDEV,
                        sequence_name=name,
                    )
                    last_choices[client_idx] = "valid-valid"
                else:
                    sequence_valid(
                        client_metadata,
                        rng,
                        trial,
                        model_name,
                        dtype,
                        SEQUENCE_LENGTH_MEAN,
                        SEQUENCE_LENGTH_STDEV,
                        sequence_name=name,
                    )
                    last_choices[client_idx] = "valid"

    except Exception as ex:
        _thread_exceptions_mutex.acquire()
        try:
            _thread_exceptions.append(traceback.format_exc())
        finally:
            _thread_exceptions_mutex.release()

    # We need to explicitly close each client so that streams get
    # cleaned up and closed correctly, otherwise the application
    # can hang when exiting.
    for c, i in client_metadata_list:
        print("thread {} closing client {}".format(name, i))
        c.close()

    print("Exiting thread {}".format(name))


def check_status(model_name):
    client = grpcclient.InferenceServerClient("localhost:8001", verbose=FLAGS.verbose)
    stats = client.get_inference_statistics(model_name)
    print(stats)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-v",
        "--verbose",
        action="store_true",
        required=False,
        default=False,
        help="Enable verbose output",
    )
    parser.add_argument(
        "-r", "--random-seed", type=int, required=False, help="Random seed."
    )
    parser.add_argument(
        "-t",
        "--concurrency",
        type=int,
        required=False,
        default=8,
        help="Request concurrency. Default is 8.",
    )
    parser.add_argument(
        "-i",
        "--iterations",
        type=int,
        required=False,
        default=200,
        help="Number of iterations of stress test to run. Default is 200.",
    )
    FLAGS = parser.parse_args()

    # Initialize the random seed. For reproducibility each thread
    # maintains its own RNG which is initialized based on this seed.
    randseed = 0
    if FLAGS.random_seed != None:
        randseed = FLAGS.random_seed
    else:
        randseed = int(time.time())
    np.random.seed(randseed)

    print("random seed = {}".format(randseed))
    print("concurrency = {}".format(FLAGS.concurrency))
    print("iterations = {}".format(FLAGS.iterations))

    trial = "custom"
    dtype = get_datatype(trial)
    model_name = tu.get_sequence_model_name(trial, dtype)

    threads = []
    for idx, thd in enumerate(range(FLAGS.concurrency)):
        thread_name = "thread_{}".format(idx)

        # Create the seed for the thread. Since these are created in
        # reproducible order off of the initial seed we will get
        # reproducible results when given the same seed.
        seed = np.random.randint(2**32)

        # Each thread is reserved a block of correlation IDs or size
        # CORRELATION_ID_BLOCK_SIZE
        correlation_id_base = 1 + (idx * CORRELATION_ID_BLOCK_SIZE)

        threads.append(
            threading.Thread(
                target=stress_thread,
                args=(
                    thread_name,
                    seed,
                    FLAGS.iterations,
                    correlation_id_base,
                    trial,
                    model_name,
                    dtype,
                ),
            )
        )

    for t in threads:
        t.start()
    for t in threads:
        t.join()

    check_status(model_name)

    _thread_exceptions_mutex.acquire()
    try:
        if len(_thread_exceptions) > 0:
            for ex in _thread_exceptions:
                print("*********\n{}".format(ex))
            sys.exit(1)
    finally:
        _thread_exceptions_mutex.release()

    print("Exiting stress test")
    sys.exit(0)


================================================
FILE: qa/L0_sequence_stress/test.sh
================================================
#!/bin/bash
# Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

export CUDA_VISIBLE_DEVICES=0

CLIENT_LOG="./client.log"
STRESS_TEST=sequence_stress.py

SERVER=/opt/tritonserver/bin/tritonserver
source ../common/util.sh

RET=0

# Setup model repository.
#   models1 - one instance with batch-size 4
#   models2 - two instances with batch-size 2
#   models4 - four instances with batch-size 1
rm -fr *.log  models{1,2,4} && mkdir models{1,2,4}
for m in ../custom_models/custom_sequence_int32 ; do
    cp -r $m models1/. && \
        (cd models1/$(basename $m) && \
            sed -i "s/max_sequence_idle_microseconds:.*/max_sequence_idle_microseconds: 1000000/" config.pbtxt && \
            sed -i "s/^max_batch_size:.*/max_batch_size: 4/" config.pbtxt && \
            sed -i "s/kind: KIND_GPU/kind: KIND_GPU\\ncount: 1/" config.pbtxt && \
            sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 1/" config.pbtxt)
    cp -r $m models2/. && \
        (cd models2/$(basename $m) && \
            sed -i "s/max_sequence_idle_microseconds:.*/max_sequence_idle_microseconds: 1000000/" config.pbtxt && \
            sed -i "s/^max_batch_size:.*/max_batch_size: 2/" config.pbtxt && \
            sed -i "s/kind: KIND_GPU/kind: KIND_GPU\\ncount: 2/" config.pbtxt && \
            sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 2/" config.pbtxt)
    cp -r $m models4/. && \
        (cd models4/$(basename $m) && \
            sed -i "s/max_sequence_idle_microseconds:.*/max_sequence_idle_microseconds: 1000000/" config.pbtxt && \
            sed -i "s/^max_batch_size:.*/max_batch_size: 1/" config.pbtxt && \
            sed -i "s/kind: KIND_GPU/kind: KIND_GPU\\ncount: 4/" config.pbtxt && \
            sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 4/" config.pbtxt)
done

# Stress-test each model repository
for model_trial in 1 2 4 ; do
    MODEL_DIR=models${model_trial}
    SERVER_ARGS="--model-repository=`pwd`/$MODEL_DIR"
    SERVER_LOG="./$MODEL_DIR.server.log"
    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    set +e
    python $STRESS_TEST >>$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Test Failed\n***"
        RET=1
    fi
    set -e

    kill $SERVER_PID
    wait $SERVER_PID
done

# Test invalid gRPC infer handler thread count
for thread_cnt in -1 0 1 129; do
    MODEL_DIR=models1
    SERVER_ARGS="--model-repository=`pwd`/$MODEL_DIR --grpc-infer-thread-count=$thread_cnt"
    SERVER_LOG="./$MODEL_DIR.server.log"
    run_server
    if [ "$SERVER_PID" != "0" ]; then
        echo -e "\n***\n*** Failed: $SERVER started successfully when it was expected to fail\n***"
        RET=1
        kill SERVER_PID
        wait $SERVER_PID
    fi
done

# Test gRPC infer handler thread count under stress
thread_cnt=128
for model_trial in 1 2 4 ; do
    MODEL_DIR=models${model_trial}
    SERVER_ARGS="--model-repository=`pwd`/$MODEL_DIR --grpc-infer-thread-count=$thread_cnt"
    SERVER_LOG="./$MODEL_DIR.server.log"
    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    set +e
    python $STRESS_TEST >>$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Test Failed\n***"
        RET=1
    fi
    set -e

    kill $SERVER_PID
    wait $SERVER_PID
done

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_server_status/server_status_test.py
================================================
#!/usr/bin/env python3

# Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import os
import unittest

import infer_util as iu
import numpy as np
import test_util as tu
import tritonclient.grpc as grpcclient
import tritonclient.http as httpclient
from tritonclient.utils import *


class ServerMetadataTest(tu.TestResultCollector):
    def test_basic(self):
        try:
            for pair in [("localhost:8000", "http"), ("localhost:8001", "grpc")]:
                model_name = "libtorch_int32_int8_int8"
                extensions = [
                    "classification",
                    "sequence",
                    "model_repository",
                    "schedule_policy",
                    "model_configuration",
                    "system_shared_memory",
                    "cuda_shared_memory",
                    "binary_tensor_data",
                    "statistics",
                ]
                if pair[1] == "http":
                    triton_client = httpclient.InferenceServerClient(
                        url=pair[0], verbose=True
                    )
                else:
                    triton_client = grpcclient.InferenceServerClient(
                        url=pair[0], verbose=True
                    )

                self.assertTrue(triton_client.is_server_live())
                self.assertTrue(triton_client.is_server_ready())
                server_metadata = triton_client.get_server_metadata()
                model_metadata = triton_client.get_model_metadata(model_name)

                if pair[1] == "http":
                    self.assertEqual(
                        os.environ["TRITON_SERVER_VERSION"], server_metadata["version"]
                    )
                    self.assertEqual("triton", server_metadata["name"])
                    for ext in extensions:
                        self.assertIn(ext, server_metadata["extensions"])

                    self.assertEqual(model_name, model_metadata["name"])
                else:
                    self.assertEqual(
                        os.environ["TRITON_SERVER_VERSION"], server_metadata.version
                    )
                    self.assertEqual("triton", server_metadata.name)
                    for ext in extensions:
                        self.assertIn(ext, server_metadata.extensions)

                    self.assertEqual(model_name, model_metadata.name)
        except InferenceServerException as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

    def test_unknown_model(self):
        try:
            for pair in [("localhost:8000", "http"), ("localhost:8001", "grpc")]:
                model_name = "foo"
                if pair[1] == "http":
                    triton_client = httpclient.InferenceServerClient(
                        url=pair[0], verbose=True
                    )
                else:
                    triton_client = grpcclient.InferenceServerClient(
                        url=pair[0], verbose=True
                    )

                self.assertTrue(triton_client.is_server_live())
                self.assertTrue(triton_client.is_server_ready())
                server_metadata = triton_client.get_server_metadata()
                if pair[1] == "http":
                    self.assertEqual(
                        os.environ["TRITON_SERVER_VERSION"], server_metadata["version"]
                    )
                    self.assertEqual("triton", server_metadata["name"])
                else:
                    self.assertEqual(
                        os.environ["TRITON_SERVER_VERSION"], server_metadata.version
                    )
                    self.assertEqual("triton", server_metadata.name)

                model_metadata = triton_client.get_model_metadata(model_name)
                self.assertTrue(False, "expected unknown model failure")
        except InferenceServerException as ex:
            self.assertTrue(
                ex.message().startswith("Request for unknown model: 'foo' is not found")
            )

    def test_unknown_model_version(self):
        try:
            for pair in [("localhost:8000", "http"), ("localhost:8001", "grpc")]:
                model_name = "onnx_int32_int8_int8"
                if pair[1] == "http":
                    triton_client = httpclient.InferenceServerClient(
                        url=pair[0], verbose=True
                    )
                else:
                    triton_client = grpcclient.InferenceServerClient(
                        url=pair[0], verbose=True
                    )

                self.assertTrue(triton_client.is_server_live())
                self.assertTrue(triton_client.is_server_ready())

                model_metadata = triton_client.get_model_metadata(
                    model_name, model_version="99"
                )
                self.assertTrue(False, "expected unknown model version failure")
        except InferenceServerException as ex:
            self.assertTrue(
                ex.message().startswith(
                    "Request for unknown model: 'onnx_int32_int8_int8' version 99 is not found"
                )
            )

    def test_model_latest_infer(self):
        input_size = 16
        tensor_shape = (1, input_size)
        platform_name = {"plan": "tensorrt_plan", "onnx": "onnxruntime_onnx"}

        # There are 3 versions of *_int32_int32_int32 and all
        # should be available.
        for platform in ("plan", "onnx"):
            model_name = platform + "_int32_int32_int32"

            # Initially there should be no version stats..
            try:
                for pair in [("localhost:8000", "http"), ("localhost:8001", "grpc")]:
                    if pair[1] == "http":
                        triton_client = httpclient.InferenceServerClient(
                            url=pair[0], verbose=True
                        )
                    else:
                        triton_client = grpcclient.InferenceServerClient(
                            url=pair[0], verbose=True
                        )

                    self.assertTrue(triton_client.is_server_live())
                    self.assertTrue(triton_client.is_server_ready())
                    model_metadata = triton_client.get_model_metadata(model_name)
                    # verify all versions are reported when no model version is specified
                    if pair[1] == "http":
                        self.assertEqual(model_name, model_metadata["name"])
                        self.assertEqual(len(model_metadata["versions"]), 3)
                        for v in (1, 2, 3):
                            self.assertIn(str(v), model_metadata["versions"])
                    else:
                        self.assertEqual(model_name, model_metadata.name)
                        self.assertEqual(len(model_metadata.versions), 3)
                        for v in (1, 2, 3):
                            self.assertIn(str(v), model_metadata.versions)

                    # verify contents of model metadata
                    if pair[1] == "http":
                        model_platform = model_metadata["platform"]
                        model_inputs = model_metadata["inputs"]
                        model_outputs = model_metadata["outputs"]
                    else:
                        model_platform = model_metadata.platform
                        model_inputs = model_metadata.inputs
                        model_outputs = model_metadata.outputs

                    self.assertEqual(platform_name[platform], model_platform)
                    self.assertEqual(len(model_inputs), 2)
                    self.assertEqual(len(model_outputs), 2)

                    for model_input in model_inputs:
                        if pair[1] == "http":
                            input_dtype = model_input["datatype"]
                            input_shape = model_input["shape"]
                            input_name = model_input["name"]
                        else:
                            input_dtype = model_input.datatype
                            input_shape = model_input.shape
                            input_name = model_input.name
                        self.assertIn(input_name, ["INPUT0", "INPUT1"])
                        self.assertEqual(input_dtype, "INT32")
                        self.assertEqual(input_shape, [-1, 16])

                    for model_output in model_outputs:
                        if pair[1] == "http":
                            output_dtype = model_output["datatype"]
                            output_shape = model_output["shape"]
                            output_name = model_output["name"]
                        else:
                            output_dtype = model_output.datatype
                            output_shape = model_output.shape
                            output_name = model_output.name
                        self.assertIn(output_name, ["OUTPUT0", "OUTPUT1"])
                        self.assertEqual(output_dtype, "INT32")
                        self.assertEqual(output_shape, [-1, 16])

            except InferenceServerException as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

            # Infer using latest version (which is 3)...
            iu.infer_exact(
                self,
                platform,
                tensor_shape,
                1,
                np.int32,
                np.int32,
                np.int32,
                model_version=None,
                swap=True,
            )

            try:
                for pair in [("localhost:8000", "http"), ("localhost:8001", "grpc")]:
                    if pair[1] == "http":
                        triton_client = httpclient.InferenceServerClient(
                            url=pair[0], verbose=True
                        )
                    else:
                        triton_client = grpcclient.InferenceServerClient(
                            url=pair[0], verbose=True
                        )

                    self.assertTrue(triton_client.is_server_live())
                    self.assertTrue(triton_client.is_server_ready())
                    for v in (1, 2, 3):
                        self.assertTrue(
                            triton_client.is_model_ready(
                                model_name, model_version=str(v)
                            )
                        )

                    # Only version 3 should have infer stats
                    infer_stats = triton_client.get_inference_statistics(model_name)
                    if pair[1] == "http":
                        stats = infer_stats["model_stats"]
                    else:
                        stats = infer_stats.model_stats
                    self.assertEqual(
                        len(stats), 3, "expected 3 infer stats for model " + model_name
                    )
                    for s in stats:
                        if pair[1] == "http":
                            v = s["version"]
                            stat = s["inference_stats"]
                        else:
                            v = s.version
                            stat = s.inference_stats

                        if v == "3":
                            if pair[1] == "http":
                                self.assertTrue(stat["success"]["count"], 3)
                            else:
                                self.assertTrue(stat.success.count, 3)
                        else:
                            if pair[1] == "http":
                                self.assertEqual(
                                    stat["success"]["count"],
                                    0,
                                    "unexpected infer success counts for version "
                                    + str(v)
                                    + " of model "
                                    + model_name,
                                )
                            else:
                                self.assertEqual(
                                    stat.success.count,
                                    0,
                                    "unexpected infer success counts for version "
                                    + str(v)
                                    + " of model "
                                    + model_name,
                                )

            except InferenceServerException as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_model_specific_infer(self):
        input_size = 16

        # There are 3 versions of *_float32_float32_float32 but only
        # versions 1 and 3 should be available.
        for platform in ("libtorch", "onnx", "plan"):
            tensor_shape = (1, input_size)
            model_name = platform + "_float32_float32_float32"

            # Initially there should be no version status...
            try:
                for pair in [("localhost:8000", "http"), ("localhost:8001", "grpc")]:
                    if pair[1] == "http":
                        triton_client = httpclient.InferenceServerClient(
                            url=pair[0], verbose=True
                        )
                    else:
                        triton_client = grpcclient.InferenceServerClient(
                            url=pair[0], verbose=True
                        )

                    self.assertTrue(triton_client.is_server_live())
                    self.assertTrue(triton_client.is_server_ready())
                    self.assertTrue(
                        triton_client.is_model_ready(model_name, model_version="1")
                    )
                    self.assertFalse(
                        triton_client.is_model_ready(model_name, model_version="2")
                    )
                    self.assertTrue(
                        triton_client.is_model_ready(model_name, model_version="3")
                    )
            except InferenceServerException as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

            # Infer using version 1...
            iu.infer_exact(
                self,
                platform,
                tensor_shape,
                1,
                np.float32,
                np.float32,
                np.float32,
                model_version=1,
                swap=False,
            )

            try:
                for pair in [("localhost:8000", "http"), ("localhost:8001", "grpc")]:
                    if pair[1] == "http":
                        triton_client = httpclient.InferenceServerClient(
                            url=pair[0], verbose=True
                        )
                    else:
                        triton_client = grpcclient.InferenceServerClient(
                            url=pair[0], verbose=True
                        )

                    self.assertTrue(triton_client.is_server_live())
                    self.assertTrue(triton_client.is_server_ready())
                    self.assertTrue(
                        triton_client.is_model_ready(model_name, model_version="1")
                    )
                    self.assertFalse(
                        triton_client.is_model_ready(model_name, model_version="2")
                    )
                    self.assertTrue(
                        triton_client.is_model_ready(model_name, model_version="3")
                    )

                    # Only version 1 should have infer stats
                    infer_stats = triton_client.get_inference_statistics(
                        model_name, model_version="1"
                    )
                    if pair[1] == "http":
                        self.assertEqual(
                            len(infer_stats["model_stats"]),
                            1,
                            "expected 1 infer stats for version 1"
                            " of model " + model_name,
                        )
                        stats = infer_stats["model_stats"][0]["inference_stats"]
                        self.assertTrue(stats["success"]["count"], 3)
                    else:
                        self.assertEqual(
                            len(infer_stats.model_stats),
                            1,
                            "expected 1 infer stats for version 1"
                            " of model " + model_name,
                        )
                        stats = infer_stats.model_stats[0].inference_stats
                        self.assertTrue(stats.success.count, 3)
                    infer_stats = triton_client.get_inference_statistics(
                        model_name, model_version="3"
                    )
                    if pair[1] == "http":
                        stats = infer_stats["model_stats"][0]["inference_stats"]
                        self.assertEqual(
                            stats["success"]["count"],
                            0,
                            "unexpected infer stats for version 3"
                            " of model " + model_name,
                        )
                    else:
                        stats = infer_stats.model_stats[0].inference_stats
                        self.assertEqual(
                            stats.success.count,
                            0,
                            "unexpected infer stats for version 3"
                            " of model " + model_name,
                        )

            except InferenceServerException as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))


class ModelMetadataTest(tu.TestResultCollector):
    """
    These tests must be run after the ServerMetadataTest. See test.sh
    file for correct test running.
    """

    def test_model_versions_deleted(self):
        # Originally There were 3 versions of *_int32_int32_int32 and
        # version 3 was executed once. Version 2 and 3 models were
        # deleted from the model repository so now only expect version 1 to
        # be ready and show stats.
        for platform in ("libtorch", "onnx"):
            model_name = platform + "_int32_int32_int32"

            try:
                for pair in [("localhost:8000", "http"), ("localhost:8001", "grpc")]:
                    if pair[1] == "http":
                        triton_client = httpclient.InferenceServerClient(
                            url=pair[0], verbose=True
                        )
                    else:
                        triton_client = grpcclient.InferenceServerClient(
                            url=pair[0], verbose=True
                        )

                    self.assertTrue(triton_client.is_server_live())
                    self.assertTrue(triton_client.is_server_ready())
                    model_metadata = triton_client.get_model_metadata(model_name)
                    if pair[1] == "http":
                        self.assertEqual(model_name, model_metadata["name"])
                        self.assertEqual(len(model_metadata["versions"]), 1)
                        self.assertEqual("1", model_metadata["versions"][0])
                    else:
                        self.assertEqual(model_name, model_metadata.name)
                        self.assertEqual(len(model_metadata.versions), 1)
                        self.assertEqual("1", model_metadata.versions[0])

                    # Only version 3 should have infer stats, only 1 is ready
                    for v in (1, 2, 3):
                        if v == 1:
                            self.assertTrue(
                                triton_client.is_model_ready(
                                    model_name, model_version=str(v)
                                )
                            )
                            infer_stats = triton_client.get_inference_statistics(
                                model_name, model_version=str(v)
                            )
                            if pair[1] == "http":
                                self.assertEqual(
                                    len(infer_stats["model_stats"]),
                                    1,
                                    "expected 1 infer stats for version "
                                    + str(v)
                                    + " of model "
                                    + model_name,
                                )
                                stats = infer_stats["model_stats"][0]["inference_stats"]
                                self.assertEqual(stats["success"]["count"], 0)
                            else:
                                self.assertEqual(
                                    len(infer_stats.model_stats),
                                    1,
                                    "expected 1 infer stats for version "
                                    + str(v)
                                    + " of model "
                                    + model_name,
                                )
                                stats = infer_stats.model_stats[0].inference_stats
                                self.assertEqual(stats.success.count, 0)

                        else:
                            self.assertFalse(
                                triton_client.is_model_ready(
                                    model_name, model_version=str(v)
                                )
                            )

            except InferenceServerException as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_model_versions_added(self):
        # Originally There was version 1 of *_float16_float32_float32.
        # Version 7 was added so now expect just version 7 to be ready
        # and provide infer stats.
        for platform in ("plan",):
            model_name = platform + "_float16_float32_float32"

            try:
                for pair in [("localhost:8000", "http"), ("localhost:8001", "grpc")]:
                    if pair[1] == "http":
                        triton_client = httpclient.InferenceServerClient(
                            url=pair[0], verbose=True
                        )
                    else:
                        triton_client = grpcclient.InferenceServerClient(
                            url=pair[0], verbose=True
                        )

                    self.assertTrue(triton_client.is_server_live())
                    self.assertTrue(triton_client.is_server_ready())
                    model_metadata = triton_client.get_model_metadata(model_name)
                    if pair[1] == "http":
                        self.assertEqual(
                            model_name,
                            model_metadata["name"],
                            "expected status for model " + model_name,
                        )
                        self.assertEqual(
                            len(model_metadata["versions"]),
                            1,
                            "expected status for 1 versions for model " + model_name,
                        )
                        self.assertEqual("7", model_metadata["versions"][0])
                    else:
                        self.assertEqual(
                            model_name,
                            model_metadata.name,
                            "expected status for model " + model_name,
                        )
                        self.assertEqual(
                            len(model_metadata.versions),
                            1,
                            "expected status for 1 versions for model " + model_name,
                        )
                        self.assertEqual("7", model_metadata.versions[0])

                    # Only version 7 should be ready and show infer stat.
                    for v in (1, 7):
                        if v == 7:
                            self.assertTrue(
                                triton_client.is_model_ready(
                                    model_name, model_version=str(v)
                                )
                            )
                            infer_stats = triton_client.get_inference_statistics(
                                model_name, model_version=str(v)
                            )
                            if pair[1] == "http":
                                stats = infer_stats["model_stats"][0]["inference_stats"]
                                self.assertEqual(
                                    stats["success"]["count"],
                                    0,
                                    "unexpected infer stats for version "
                                    + str(v)
                                    + " of model "
                                    + model_name,
                                )
                            else:
                                stats = infer_stats.model_stats[0].inference_stats
                                self.assertEqual(
                                    stats.success.count,
                                    0,
                                    "unexpected infer stats for version "
                                    + str(v)
                                    + " of model "
                                    + model_name,
                                )

                        else:
                            self.assertFalse(
                                triton_client.is_model_ready(
                                    model_name, model_version=str(v)
                                )
                            )
                            try:
                                infer_stats = triton_client.get_inference_statistics(
                                    model_name, model_version=str(v)
                                )
                                self.assertTrue(
                                    False,
                                    "unexpected infer stats for the model that is not ready",
                                )
                            except InferenceServerException as ex:
                                self.assertIn(
                                    "requested model version is not available for model",
                                    str(ex),
                                )

            except InferenceServerException as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_infer_stats_no_model_version(self):
        # Originally There were 3 versions of *_int32_int32_int32 and
        # version 3 was executed once. Version 2 and 3 models were
        # deleted from the model repository so now only expect version 1 to
        # be ready and show infer stats.
        for platform in ("libtorch", "onnx"):
            model_name = platform + "_int32_int32_int32"

            try:
                for pair in [("localhost:8000", "http"), ("localhost:8001", "grpc")]:
                    if pair[1] == "http":
                        triton_client = httpclient.InferenceServerClient(
                            url=pair[0], verbose=True
                        )
                    else:
                        triton_client = grpcclient.InferenceServerClient(
                            url=pair[0], verbose=True
                        )

                    self.assertTrue(triton_client.is_server_live())
                    self.assertTrue(triton_client.is_server_ready())
                    model_metadata = triton_client.get_model_metadata(model_name)
                    if pair[1] == "http":
                        self.assertEqual(model_name, model_metadata["name"])
                        self.assertEqual(len(model_metadata["versions"]), 1)
                        self.assertEqual("1", model_metadata["versions"][0])
                    else:
                        self.assertEqual(model_name, model_metadata.name)
                        self.assertEqual(len(model_metadata.versions), 1)
                        self.assertEqual("1", model_metadata.versions[0])

                    # Only version 3 should have infer stats, only 1 is ready
                    for v in (1, 2, 3):
                        if v == 1:
                            self.assertTrue(
                                triton_client.is_model_ready(
                                    model_name, model_version=str(v)
                                )
                            )
                        else:
                            self.assertFalse(
                                triton_client.is_model_ready(
                                    model_name, model_version=str(v)
                                )
                            )

                    infer_stats = triton_client.get_inference_statistics(model_name)
                    if pair[1] == "http":
                        stats = infer_stats["model_stats"]
                    else:
                        stats = infer_stats.model_stats
                    self.assertEqual(
                        len(stats), 1, "expected 1 infer stats for model " + model_name
                    )

                    if pair[1] == "http":
                        version = stats[0]["version"]
                        stat = stats[0]["inference_stats"]
                    else:
                        version = stats[0].version
                        stat = stats[0].inference_stats

                    if version != "1":
                        self.assertTrue(
                            False, "expected version 1 for infer stat, got " + version
                        )
                    else:
                        if pair[1] == "http":
                            self.assertEqual(
                                stat["success"]["count"],
                                0,
                                "unexpected infer stats for version "
                                + str(version)
                                + " of model "
                                + model_name,
                            )
                        else:
                            self.assertEqual(
                                stat.success.count,
                                0,
                                "unexpected infer stats for version "
                                + str(version)
                                + " of model "
                                + model_name,
                            )

            except InferenceServerException as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_infer_stats_no_model(self):
        # Test get_inference_statistics when no model/model_version is passed.
        try:
            for pair in [("localhost:8000", "http"), ("localhost:8001", "grpc")]:
                if pair[1] == "http":
                    triton_client = httpclient.InferenceServerClient(
                        url=pair[0], verbose=True
                    )
                else:
                    triton_client = grpcclient.InferenceServerClient(
                        url=pair[0], verbose=True
                    )

                self.assertTrue(triton_client.is_server_live())
                self.assertTrue(triton_client.is_server_ready())

                # Returns infer stats for ALL models + ready versions
                infer_stats = triton_client.get_inference_statistics()
                if pair[1] == "http":
                    stats = infer_stats["model_stats"]
                else:
                    stats = infer_stats.model_stats
                self.assertEqual(
                    len(stats),
                    125,
                    "expected 125 infer stats for all ready versions of all model",
                )

        except InferenceServerException as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_server_status/test.sh
================================================
#!/bin/bash
# Copyright (c) 2018-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

TEST_RESULT_FILE='test_results.txt'
CLIENT_LOG="./client.log"
SERVER_STATUS_TEST=server_status_test.py
EXPECTED_NUM_TESTS_MMDT="4"
EXPECTED_NUM_TESTS_SMDT="5"

DATADIR=/data/inferenceserver/${REPO_VERSION}

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--repository-poll-secs=1 --model-control-mode=poll --model-repository=`pwd`/models"
SERVER_LOG="./inference_server.log"
source ../common/util.sh

rm -fr models
cp -r $DATADIR/qa_model_repository models

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

RET=0

set +e

rm -f $CLIENT_LOG
python $SERVER_STATUS_TEST ServerMetadataTest >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS_SMDT
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi

set -e

rm -fr models/libtorch_int32_int32_int32/2 models/libtorch_int32_int32_int32/3
rm -fr models/onnx_int32_int32_int32/2 models/onnx_int32_int32_int32/3
cp -r models/plan_float16_float32_float32/1 models/plan_float16_float32_float32/7
sleep 3

# Dumping the contents of the models that are currently loaded for debugging purposes
# Primarily meant to assist in debugging ModelMetadataTest::test_infer_stats_no_model
# Diff the output with a previous L0_server_status job to catch any changes to
# /data/inferenceserver/${REPO_VERSION}/qa_model_repository that were not accounted for.
curl -X POST http://localhost:8000/v2/repository/index

set +e

python $SERVER_STATUS_TEST ModelMetadataTest >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS_MMDT
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
fi

exit $RET


================================================
FILE: qa/L0_shared_memory/shared_memory_test.py
================================================
#!/usr/bin/env python3

# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import os
import time
import unittest
from functools import partial

import infer_util as iu
import numpy as np
import psutil
import test_util as tu
import tritonclient.grpc as grpcclient
import tritonclient.http as httpclient
import tritonclient.utils.shared_memory as shm
from tritonclient import utils


class SystemSharedMemoryTestBase(tu.TestResultCollector):
    DEFAULT_SHM_BYTE_SIZE = 64
    SYS_PAGE_SIZE = os.sysconf("SC_PAGE_SIZE")

    def setUp(self):
        self._setup_client()
        self._shm_handles = []

    def tearDown(self):
        self._cleanup_shm_handles()

    def _setup_client(self):
        self.protocol = os.environ.get("CLIENT_TYPE", "http")
        if self.protocol == "http":
            self.url = "localhost:8000"
            self.triton_client = httpclient.InferenceServerClient(
                self.url, verbose=True
            )
        else:
            self.url = "localhost:8001"
            self.triton_client = grpcclient.InferenceServerClient(
                self.url, verbose=True
            )

    def _configure_server(
        self,
        create_byte_size=DEFAULT_SHM_BYTE_SIZE,
        register_byte_size=DEFAULT_SHM_BYTE_SIZE,
        register_offset=0,
    ):
        """Creates and registers shared memory regions for testing.

        Parameters
        ----------
        create_byte_size: int
            Size of each system shared memory region to create.
            NOTE: This should be sufficiently large to hold the inputs/outputs
                  stored in shared memory.

        register_byte_size: int
            Size of each system shared memory region to register with server.
            NOTE: The (offset + register_byte_size) should be less than or equal
            to the create_byte_size. Otherwise an exception will be raised for
            an invalid set of registration args.

        register_offset: int
            Offset into the shared memory object to start the registered region.

        """
        self._cleanup_shm_handles()
        shm_ip0_handle = shm.create_shared_memory_region(
            "input0_data", "/input0_data", create_byte_size
        )
        shm_ip1_handle = shm.create_shared_memory_region(
            "input1_data", "/input1_data", create_byte_size
        )
        shm_op0_handle = shm.create_shared_memory_region(
            "output0_data", "/output0_data", create_byte_size
        )
        shm_op1_handle = shm.create_shared_memory_region(
            "output1_data", "/output1_data", create_byte_size
        )
        self._shm_handles = [
            shm_ip0_handle,
            shm_ip1_handle,
            shm_op0_handle,
            shm_op1_handle,
        ]
        # Implicit assumption that input and output byte_sizes are 64 bytes for now
        self.triton_client.register_system_shared_memory(
            "input0_data", "/input0_data", register_byte_size, offset=register_offset
        )
        self.triton_client.register_system_shared_memory(
            "input1_data", "/input1_data", register_byte_size, offset=register_offset
        )
        self.triton_client.register_system_shared_memory(
            "output0_data", "/output0_data", register_byte_size, offset=register_offset
        )
        self.triton_client.register_system_shared_memory(
            "output1_data", "/output1_data", register_byte_size, offset=register_offset
        )

        # Write data to shared memory regions
        input0_data = np.arange(start=0, stop=16, dtype=np.int32)
        input1_data = np.ones(shape=16, dtype=np.int32)
        shm.set_shared_memory_region(
            shm_ip0_handle, [input0_data], offset=register_offset
        )
        shm.set_shared_memory_region(
            shm_ip1_handle, [input1_data], offset=register_offset
        )
        self.shm_names = ["input0_data", "input1_data", "output0_data", "output1_data"]

    def _cleanup_shm_handles(self):
        for shm_handle in self._shm_handles:
            shm.destroy_shared_memory_region(shm_handle)
        self._shm_handles = []


class SharedMemoryTest(SystemSharedMemoryTestBase):
    def test_invalid_create_shm(self):
        with self.assertRaisesRegex(
            shm.SharedMemoryException, "unable to create the shared memory region"
        ):
            self._shm_handles.append(
                shm.create_shared_memory_region("dummy_data", "/dummy_data", -1)
            )

    def test_valid_create_set_register(self):
        # Create a valid system shared memory region, fill data in it and register
        shm_op0_handle = shm.create_shared_memory_region("dummy_data", "/dummy_data", 8)
        shm.set_shared_memory_region(
            shm_op0_handle, [np.array([1, 2], dtype=np.float32)]
        )
        self.triton_client.register_system_shared_memory("dummy_data", "/dummy_data", 8)
        shm_status = self.triton_client.get_system_shared_memory_status()
        if self.protocol == "http":
            self.assertTrue(len(shm_status) == 1)
        else:
            self.assertTrue(len(shm_status.regions) == 1)
        shm.destroy_shared_memory_region(shm_op0_handle)

    def test_unregister_before_register(self):
        # Create a valid system shared memory region and unregister before register
        shm_op0_handle = shm.create_shared_memory_region("dummy_data", "/dummy_data", 8)
        self.triton_client.unregister_system_shared_memory("dummy_data")
        shm_status = self.triton_client.get_system_shared_memory_status()
        if self.protocol == "http":
            self.assertTrue(len(shm_status) == 0)
        else:
            self.assertTrue(len(shm_status.regions) == 0)
        shm.destroy_shared_memory_region(shm_op0_handle)

    def test_unregister_after_register(self):
        # Create a valid system shared memory region and unregister after register
        shm_op0_handle = shm.create_shared_memory_region("dummy_data", "/dummy_data", 8)
        self.triton_client.register_system_shared_memory("dummy_data", "/dummy_data", 8)
        self.triton_client.unregister_system_shared_memory("dummy_data")
        shm_status = self.triton_client.get_system_shared_memory_status()
        if self.protocol == "http":
            self.assertTrue(len(shm_status) == 0)
        else:
            self.assertTrue(len(shm_status.regions) == 0)
        shm.destroy_shared_memory_region(shm_op0_handle)

    def test_reregister_after_register(self):
        # Create a valid system shared memory region and unregister after register
        shm_op0_handle = shm.create_shared_memory_region("dummy_data", "/dummy_data", 8)
        self.triton_client.register_system_shared_memory("dummy_data", "/dummy_data", 8)
        try:
            self.triton_client.register_system_shared_memory(
                "dummy_data", "/dummy_data", 8
            )
        except Exception as ex:
            self.assertIn(
                "shared memory region 'dummy_data' already in manager", str(ex)
            )
        shm_status = self.triton_client.get_system_shared_memory_status()
        if self.protocol == "http":
            self.assertTrue(len(shm_status) == 1)
        else:
            self.assertTrue(len(shm_status.regions) == 1)
        shm.destroy_shared_memory_region(shm_op0_handle)

    def test_unregister_after_inference(self):
        # Unregister after inference
        error_msg = []
        self._configure_server()
        iu.shm_basic_infer(
            self,
            self.triton_client,
            self._shm_handles[0],
            self._shm_handles[1],
            self._shm_handles[2],
            self._shm_handles[3],
            error_msg,
            protocol=self.protocol,
            use_system_shared_memory=True,
        )
        if len(error_msg) > 0:
            raise Exception(str(error_msg))
        self.triton_client.unregister_system_shared_memory("output0_data")
        shm_status = self.triton_client.get_system_shared_memory_status()
        if self.protocol == "http":
            self.assertTrue(len(shm_status) == 3)
        else:
            self.assertTrue(len(shm_status.regions) == 3)
        self._cleanup_shm_handles()

    def test_register_after_inference(self):
        # Register after inference
        error_msg = []
        self._configure_server()

        iu.shm_basic_infer(
            self,
            self.triton_client,
            self._shm_handles[0],
            self._shm_handles[1],
            self._shm_handles[2],
            self._shm_handles[3],
            error_msg,
            protocol=self.protocol,
            use_system_shared_memory=True,
        )

        if len(error_msg) > 0:
            raise Exception(str(error_msg))
        shm_ip2_handle = shm.create_shared_memory_region(
            "input2_data", "/input2_data", self.DEFAULT_SHM_BYTE_SIZE
        )
        self.triton_client.register_system_shared_memory(
            "input2_data", "/input2_data", self.DEFAULT_SHM_BYTE_SIZE
        )
        shm_status = self.triton_client.get_system_shared_memory_status()
        if self.protocol == "http":
            self.assertTrue(len(shm_status) == 5)
        else:
            self.assertTrue(len(shm_status.regions) == 5)
        self._shm_handles.append(shm_ip2_handle)
        self._cleanup_shm_handles()

    def test_too_big_shm(self):
        # Shared memory input region larger than needed - Throws error
        error_msg = []
        self._configure_server()
        shm_ip2_handle = shm.create_shared_memory_region(
            "input2_data", "/input2_data", 128
        )
        self.triton_client.register_system_shared_memory(
            "input2_data", "/input2_data", 128
        )

        iu.shm_basic_infer(
            self,
            self.triton_client,
            self._shm_handles[0],
            shm_ip2_handle,
            self._shm_handles[2],
            self._shm_handles[3],
            error_msg,
            big_shm_name="input2_data",
            big_shm_size=128,
            protocol=self.protocol,
            use_system_shared_memory=True,
        )
        if len(error_msg) > 0:
            self.assertIn(
                "input byte size mismatch for input 'INPUT1' for model 'simple'. Expected 64, got 128",
                error_msg[-1],
            )
        self._shm_handles.append(shm_ip2_handle)
        self._cleanup_shm_handles()

    def test_large_shm_register_offset(self):
        # Test for out of bounds read vulnerability when registering system shared memory with large offset

        platforms = (
            ["python", "onnx", "libtorch", "plan", "openvino"]
            if os.environ.get("BACKENDS") is None
            else os.environ.get("BACKENDS").split()
        )
        for platform in platforms:
            model_name = f"{platform}_int32_int32_int32"

            # Test for large offset
            error_msg = []
            # Create a large shm size (page_size * 1024 is large enough to reproduce a segfault).
            # Register offset at 1 page before the end of the shm region to give enough space for the input/output data.
            create_byte_size = self.SYS_PAGE_SIZE * 1024
            register_offset = self.SYS_PAGE_SIZE * 1023
            self._configure_server(
                create_byte_size=create_byte_size,
                register_offset=register_offset,
            )

            iu.shm_basic_infer(
                self,
                self.triton_client,
                self._shm_handles[0],
                self._shm_handles[1],
                self._shm_handles[2],
                self._shm_handles[3],
                error_msg,
                register_offset=register_offset,
                protocol=self.protocol,
                use_system_shared_memory=True,
                override_model_name=model_name,
            )
            self.triton_client.unregister_system_shared_memory()
            if len(error_msg) > 0:
                raise Exception(str(error_msg))

    def test_mixed_raw_shm(self):
        # Mix of shared memory and RAW inputs
        error_msg = []
        self._configure_server()
        input1_data = np.ones(shape=16, dtype=np.int32)

        iu.shm_basic_infer(
            self,
            self.triton_client,
            self._shm_handles[0],
            [input1_data],
            self._shm_handles[2],
            self._shm_handles[3],
            error_msg,
            protocol=self.protocol,
            use_system_shared_memory=True,
        )
        if len(error_msg) > 0:
            raise Exception(error_msg[-1])
        self._cleanup_shm_handles()

    def test_unregisterall(self):
        # Unregister all shared memory blocks
        self._configure_server()
        status_before = self.triton_client.get_system_shared_memory_status()
        if self.protocol == "http":
            self.assertTrue(len(status_before) == 4)
        else:
            self.assertTrue(len(status_before.regions) == 4)
        self.triton_client.unregister_system_shared_memory()
        status_after = self.triton_client.get_system_shared_memory_status()
        if self.protocol == "http":
            self.assertTrue(len(status_after) == 0)
        else:
            self.assertTrue(len(status_after.regions) == 0)
        self._cleanup_shm_handles()

    def test_infer_offset_out_of_bound(self):
        # Shared memory offset outside output region - Throws error
        error_msg = []
        create_byte_size = self.SYS_PAGE_SIZE + self.DEFAULT_SHM_BYTE_SIZE
        register_offset = self.SYS_PAGE_SIZE
        self._configure_server(
            create_byte_size=create_byte_size,
            register_offset=register_offset,
        )
        if self.protocol == "http":
            # -32 when placed in an int64 signed type, to get a negative offset
            # by overflowing
            offset = 2**64 - 32
        else:
            # gRPC will throw an error if > 2**63 - 1, so instead test for
            # exceeding shm region size by 1 byte, given its size is 64 bytes
            offset = 64

        iu.shm_basic_infer(
            self,
            self.triton_client,
            self._shm_handles[0],
            self._shm_handles[1],
            self._shm_handles[2],
            self._shm_handles[3],
            error_msg,
            shm_output_offset=offset,
            protocol=self.protocol,
            use_system_shared_memory=True,
        )

        self.assertEqual(len(error_msg), 1)
        self.assertIn("Invalid offset for shared memory region", error_msg[0])
        self._cleanup_shm_handles()

    def test_infer_byte_size_out_of_bound(self):
        # Shared memory byte_size outside output region - Throws error
        error_msg = []
        create_byte_size = self.SYS_PAGE_SIZE + self.DEFAULT_SHM_BYTE_SIZE
        register_offset = self.SYS_PAGE_SIZE
        self._configure_server(
            create_byte_size=create_byte_size,
            register_offset=register_offset,
        )
        offset = 1
        byte_size = self.DEFAULT_SHM_BYTE_SIZE

        iu.shm_basic_infer(
            self,
            self.triton_client,
            self._shm_handles[0],
            self._shm_handles[1],
            self._shm_handles[2],
            self._shm_handles[3],
            error_msg,
            shm_output_offset=offset,
            shm_output_byte_size=byte_size,
            protocol=self.protocol,
            use_system_shared_memory=True,
        )
        self.assertEqual(len(error_msg), 1)
        self.assertIn(
            "Invalid offset + byte size for shared memory region", error_msg[0]
        )
        self._cleanup_shm_handles()

    def test_infer_integer_overflow(self):
        # Test for integer overflow vulnerability in offset + byte_size calculation
        error_msg = []
        self._configure_server()

        offset = 32
        byte_size = 2**64 - 32

        if self.protocol == "http":
            iu.shm_basic_infer(
                self,
                self.triton_client,
                self._shm_handles[0],
                self._shm_handles[1],
                self._shm_handles[2],
                self._shm_handles[3],
                error_msg,
                shm_output_offset=offset,
                shm_output_byte_size=byte_size,
                protocol=self.protocol,
                use_system_shared_memory=True,
            )

            self.assertEqual(len(error_msg), 1)
            self.assertTrue(
                "Integer overflow detected: byte_size " in error_msg[0],
                f"Unexpected error message: {error_msg[0]}",
            )
            self._cleanup_shm_handles()
        else:
            # The gRPC client utilizes the int64_param and will throw a separate error for values larger than 2**63-1
            try:
                iu.shm_basic_infer(
                    self,
                    self.triton_client,
                    self._shm_handles[0],
                    self._shm_handles[1],
                    self._shm_handles[2],
                    self._shm_handles[3],
                    error_msg,
                    shm_output_offset=offset,
                    shm_output_byte_size=byte_size,
                    protocol=self.protocol,
                    use_system_shared_memory=True,
                )
                self.assertTrue(
                    False,
                    "Expected gRPC client to fail on value larger than int64_param maximum",
                )
            except ValueError as ex:
                self.assertIn("Value out of range:", str(ex))
            self._cleanup_shm_handles()

    def test_register_out_of_bound(self):
        create_byte_size = self.DEFAULT_SHM_BYTE_SIZE

        # Verify various edge cases of registered region size (offset+byte_size)
        # don't go out of bounds of the actual created shm file object's size.
        with self.assertRaisesRegex(
            utils.InferenceServerException,
            "failed to register shared memory region.*invalid args",
        ):
            self._configure_server(
                create_byte_size=create_byte_size,
                register_byte_size=create_byte_size + 1,
                register_offset=0,
            )

        with self.assertRaisesRegex(
            utils.InferenceServerException,
            "failed to register shared memory region.*invalid args",
        ):
            self._configure_server(
                create_byte_size=create_byte_size,
                register_byte_size=create_byte_size,
                register_offset=1,
            )

        with self.assertRaisesRegex(
            utils.InferenceServerException,
            "failed to register shared memory region.*invalid args",
        ):
            self._configure_server(
                create_byte_size=create_byte_size,
                register_byte_size=1,
                register_offset=create_byte_size,
            )

        with self.assertRaisesRegex(
            utils.InferenceServerException,
            "failed to register shared memory region.*invalid args",
        ):
            self._configure_server(
                create_byte_size=create_byte_size,
                register_byte_size=0,
                register_offset=create_byte_size + 1,
            )

    def test_python_client_leak(self):
        process = psutil.Process()
        initial_mem_usage = process.memory_info().rss / 1024**2
        threshold = initial_mem_usage * 1.02  # 2% tolerance threshold

        byte_size = 4
        i = 0
        while i < 100000:
            if i % 5000 == 0:
                print(
                    f"[iter: {i:<8}] Memory Usage:",
                    process.memory_info().rss / 1024**2,
                    "MiB",
                )

            shm_handle = shm.create_shared_memory_region(
                "shmtest", "/shmtest", byte_size
            )
            shm.destroy_shared_memory_region(shm_handle)
            i += 1
        final_mem_usage = process.memory_info().rss / 1024**2
        self.assertTrue(
            (initial_mem_usage <= final_mem_usage <= threshold),
            "client memory usage is increasing",
        )

    def test_register_reserved_names(self):
        """
        Test that registration fails if attempting to use a reserved
        prefix for the shm key.
        """
        # This matches kTritonSharedMemoryRegionPrefix in the server code.
        reserved_prefix = "triton_python_backend_shm_region_"
        shm_name = "my_test_shm_name"

        # The shared memory key cannot start with the reserved prefix,
        # regardless of leading slashes.
        shm_keys_to_test = [
            f"{reserved_prefix}_my_test_shm_key",
            f"/{reserved_prefix}_my_test_shm_key",
            f"///{reserved_prefix}_my_test_shm_key",
        ]

        for shm_key in shm_keys_to_test:
            with self.subTest(shm_key=shm_key):
                expected_msg = f"cannot register shared memory region '{shm_name}' with key '{shm_key}' as the key contains the reserved prefix '{reserved_prefix}'"
                with self.assertRaisesRegex(
                    utils.InferenceServerException, expected_msg
                ):
                    self.triton_client.register_system_shared_memory(
                        shm_name, shm_key, 10000
                    )

    def test_register_invalid_shm_key(self):
        """
        Test that registration fails if attempting to use an invalid name for the shm key.
        """
        shm_name = "my_test_shm_name"
        shm_keys_to_test = [
            "/",
            "///",
        ]

        for shm_key in shm_keys_to_test:
            with self.subTest(shm_key=shm_key):
                expected_msg = f"cannot register shared memory region '{shm_name}' - invalid shm key '{shm_key}'"
                with self.assertRaisesRegex(
                    utils.InferenceServerException, expected_msg
                ):
                    self.triton_client.register_system_shared_memory(
                        shm_name, shm_key, 10000
                    )


def callback(user_data, result, error):
    if error:
        user_data.append(error)
    else:
        user_data.append(result)


class TestSharedMemoryUnregister(SystemSharedMemoryTestBase):
    def _create_request_data(self):
        self.triton_client.unregister_system_shared_memory()
        self._configure_server()

        if self.protocol == "http":
            inputs = [
                httpclient.InferInput("INPUT0", [1, 16], "INT32"),
                httpclient.InferInput("INPUT1", [1, 16], "INT32"),
            ]
            outputs = [
                httpclient.InferRequestedOutput("OUTPUT0", binary_data=True),
                httpclient.InferRequestedOutput("OUTPUT1", binary_data=False),
            ]
        else:
            inputs = [
                grpcclient.InferInput("INPUT0", [1, 16], "INT32"),
                grpcclient.InferInput("INPUT1", [1, 16], "INT32"),
            ]
            outputs = [
                grpcclient.InferRequestedOutput("OUTPUT0"),
                grpcclient.InferRequestedOutput("OUTPUT1"),
            ]

        inputs[0].set_shared_memory("input0_data", self.DEFAULT_SHM_BYTE_SIZE)
        inputs[1].set_shared_memory("input1_data", self.DEFAULT_SHM_BYTE_SIZE)
        outputs[0].set_shared_memory("output0_data", self.DEFAULT_SHM_BYTE_SIZE)
        outputs[1].set_shared_memory("output1_data", self.DEFAULT_SHM_BYTE_SIZE)

        return inputs, outputs

    def _test_unregister_shm_request_pass(self):
        self._test_shm_found()

        # Unregister all should not result in an error.
        # If shared memory regions are in use, they will be marked and unregistered after the inference is completed.
        with httpclient.InferenceServerClient(
            "localhost:8000", verbose=True
        ) as second_client:
            second_client.unregister_system_shared_memory()

        # Number of shared memory regions should be the same as the inference is not completed yet
        self._test_shm_found()

    def _test_shm_not_found(self):
        second_client = httpclient.InferenceServerClient("localhost:8000", verbose=True)

        for shm_name in self.shm_names:
            with self.assertRaises(utils.InferenceServerException) as ex:
                second_client.get_system_shared_memory_status(shm_name)
                self.assertIn(
                    f"Unable to find system shared memory region: '{shm_name}'",
                    str(ex.exception),
                )

    def _test_shm_found(self):
        second_client = httpclient.InferenceServerClient("localhost:8000", verbose=True)

        status = second_client.get_system_shared_memory_status()
        self.assertEqual(len(status), len(self.shm_names))

        for shm_info in status:
            self.assertIn(shm_info["name"], self.shm_names)

    def test_unregister_shm_during_inference_single_req_http(self):
        inputs, outputs = self._create_request_data()

        async_request = self.triton_client.async_infer(
            model_name="simple", inputs=inputs, outputs=outputs
        )

        # Ensure inference started
        time.sleep(2)

        # Try unregister shm regions during inference
        self._test_unregister_shm_request_pass()

        # Blocking call
        async_request.get_result()

        # Test that all shm regions are successfully unregistered after inference without needing to call unregister again.
        self._test_shm_not_found()

    def test_unregister_shm_during_inference_multiple_req_http(self):
        inputs, outputs = self._create_request_data()

        # Place the first request
        async_request = self.triton_client.async_infer(
            model_name="simple", inputs=inputs, outputs=outputs
        )
        # Ensure inference started
        time.sleep(2)

        # Try unregister shm regions during inference
        self._test_unregister_shm_request_pass()
        time.sleep(2)

        # Place the second request
        second_client = httpclient.InferenceServerClient("localhost:8000", verbose=True)
        second_async_request = second_client.async_infer(
            model_name="simple", inputs=inputs, outputs=outputs
        )

        # Blocking call
        async_request.get_result()

        # Shm regions will remain available as the second request is still in progress
        self._test_shm_found()

        # Blocking call
        second_async_request.get_result()

        # Verify that all shm regions are successfully unregistered once all inference requests have completed,
        # without needing to manually call unregister again.
        self._test_shm_not_found()

    def test_unregister_shm_after_inference_http(self):
        inputs, outputs = self._create_request_data()

        async_request = self.triton_client.async_infer(
            model_name="simple", inputs=inputs, outputs=outputs
        )

        # Ensure inference started
        time.sleep(2)

        # Test all registered shm regions exist during inference.
        self._test_shm_found()

        # Blocking call
        async_request.get_result()

        # Test all registered shm regions exist after inference, as unregister API have not been called.
        self._test_shm_found()

        # Test all shm regions are successfully unregistered after calling the unregister API after inference completed.
        self.triton_client.unregister_system_shared_memory()
        self._test_shm_not_found()

    def test_unregister_shm_during_inference_single_req_grpc(self):
        inputs, outputs = self._create_request_data()
        user_data = []

        self.triton_client.async_infer(
            model_name="simple",
            inputs=inputs,
            outputs=outputs,
            callback=partial(callback, user_data),
        )

        # Ensure inference started
        time.sleep(2)

        # Try unregister shm regions during inference
        self._test_unregister_shm_request_pass()

        # Wait until the results are available in user_data
        time_out = 20
        while (len(user_data) == 0) and time_out > 0:
            time_out = time_out - 1
            time.sleep(1)
        time.sleep(2)

        # Test that all shm regions are successfully unregistered after inference without needing to call unregister again.
        self._test_shm_not_found()

    def test_unregister_shm_during_inference_multiple_req_grpc(self):
        inputs, outputs = self._create_request_data()
        user_data = []

        self.triton_client.async_infer(
            model_name="simple",
            inputs=inputs,
            outputs=outputs,
            callback=partial(callback, user_data),
        )

        # Ensure inference started
        time.sleep(2)

        # Try unregister shm regions during inference
        self._test_unregister_shm_request_pass()

        # Place the second request
        second_user_data = []
        second_client = grpcclient.InferenceServerClient("localhost:8001", verbose=True)
        second_client.async_infer(
            model_name="simple",
            inputs=inputs,
            outputs=outputs,
            callback=partial(callback, second_user_data),
        )

        # Wait until the 1st request results are available in user_data
        time_out = 10
        while (len(user_data) == 0) and time_out > 0:
            time_out = time_out - 1
            time.sleep(1)
        time.sleep(2)

        # Shm regions will remain available as the second request is still in progress
        self._test_shm_found()

        # Wait until the 2nd request results are available in user_data
        time_out = 20
        while (len(second_user_data) == 0) and time_out > 0:
            time_out = time_out - 1
            time.sleep(1)
        time.sleep(2)

        # Verify that all shm regions are successfully unregistered once all inference requests have completed,
        # without needing to manually call unregister again.
        self._test_shm_not_found()

    def test_unregister_shm_after_inference_grpc(self):
        inputs, outputs = self._create_request_data()
        user_data = []

        self.triton_client.async_infer(
            model_name="simple",
            inputs=inputs,
            outputs=outputs,
            callback=partial(callback, user_data),
        )

        # Ensure inference started
        time.sleep(2)

        # Test all registered shm regions exist during inference.
        self._test_shm_found()

        # Wait until the results are available in user_data
        time_out = 20
        while (len(user_data) == 0) and time_out > 0:
            time_out = time_out - 1
            time.sleep(1)
        time.sleep(2)

        # Test all registered shm regions exist after inference, as unregister API have not been called.
        self._test_shm_found()

        # Test all shm regions are successfully unregistered after calling the unregister API after inference completed.
        self.triton_client.unregister_system_shared_memory()
        self._test_shm_not_found()


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_shared_memory/test.sh
================================================
#!/bin/bash
# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

CLIENT_LOG="./client.log"
SHM_TEST=shared_memory_test.py
TEST_RESULT_FILE='test_results.txt'

# Configure to support test on jetson as well
TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
DATADIR=/data/inferenceserver/${REPO_VERSION}
SERVER=${TRITON_DIR}/bin/tritonserver
BACKEND_DIR=${TRITON_DIR}/backends
SERVER_ARGS_EXTRA="--backend-directory=${BACKEND_DIR}"
source ../common/util.sh
pip3 install psutil

RET=0
rm -fr *.log

for i in \
        test_invalid_create_shm \
        test_valid_create_set_register \
        test_unregister_before_register \
        test_unregister_after_register \
        test_reregister_after_register \
        test_unregister_after_inference \
        test_register_after_inference \
        test_too_big_shm \
        test_mixed_raw_shm \
        test_unregisterall \
        test_infer_offset_out_of_bound \
        test_infer_byte_size_out_of_bound \
        test_infer_integer_overflow \
        test_register_out_of_bound \
        test_register_reserved_names \
        test_register_invalid_shm_key \
        test_python_client_leak; do
    for client_type in http grpc; do
        SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1 ${SERVER_ARGS_EXTRA}"
        SERVER_LOG="./$i.$client_type.server.log"
        run_server
        if [ "$SERVER_PID" == "0" ]; then
            echo -e "\n***\n*** Failed to start $SERVER\n***"
            cat $SERVER_LOG
            exit 1
        fi

        export CLIENT_TYPE=$client_type
        TMP_CLIENT_LOG="./tmp_client.log"
        echo "Test: $i, client type: $client_type" >>$TMP_CLIENT_LOG

        set +e
        python3 $SHM_TEST SharedMemoryTest.$i >>$TMP_CLIENT_LOG 2>&1
        if [ $? -ne 0 ]; then
            cat $TMP_CLIENT_LOG
            echo -e "\n***\n*** Test Failed\n***"
            RET=1
        else
            check_test_results $TEST_RESULT_FILE 1
            if [ $? -ne 0 ]; then
                cat $TEST_RESULT_FILE
                echo -e "\n***\n*** Test Result Verification Failed\n***"
                RET=1
            fi
        fi
        cat $TMP_CLIENT_LOG >>$CLIENT_LOG
        rm $TMP_CLIENT_LOG
        kill $SERVER_PID
        wait $SERVER_PID
        if [ $? -ne 0 ]; then
            echo -e "\n***\n*** Test Server shut down non-gracefully\n***"
            RET=1
        fi
        set -e
    done
done

mkdir -p python_models/simple/1/
cp ../python_models/execute_delayed_model/model.py ./python_models/simple/1/
cp ../python_models/execute_delayed_model/config.pbtxt ./python_models/simple/

for test_case in \
        test_unregister_shm_during_inference_single_req \
        test_unregister_shm_during_inference_multiple_req \
        test_unregister_shm_after_inference; do
    for client_type in http grpc; do
        SERVER_ARGS="--model-repository=`pwd`/python_models --log-verbose=1 ${SERVER_ARGS_EXTRA}"
        SERVER_LOG="./${test_case}_${client_type}.server.log"
        run_server
        if [ "$SERVER_PID" == "0" ]; then
            echo -e "\n***\n*** Failed to start $SERVER\n***"
            cat $SERVER_LOG
            exit 1
        fi

        export CLIENT_TYPE=$client_type
        CLIENT_LOG="./${test_case}_${client_type}.client.log"
        set +e
        python3 $SHM_TEST "TestSharedMemoryUnregister.${test_case}_${client_type}" >>"$CLIENT_LOG" 2>&1
        if [ $? -ne 0 ]; then
            cat $CLIENT_LOG
            echo -e "\n***\n*** Test Failed - ${test_case}_${client_type}\n***"
            RET=1
        else
            check_test_results $TEST_RESULT_FILE 1
            if [ $? -ne 0 ]; then
                cat $TEST_RESULT_FILE
                echo -e "\n***\n*** Test Result Verification Failed - ${test_case}_${client_type}\n***"
                RET=1
            fi
        fi

        kill $SERVER_PID
        wait $SERVER_PID
        if [ $? -ne 0 ]; then
            echo -e "\n***\n*** Test Server shut down non-gracefully\n***"
            RET=1
        fi
        set -e
    done
done

# Test large system shared memory offset
rm -rf models/*
# prepare add_sub model of various backends
BACKENDS=${BACKENDS:-"python onnx libtorch plan openvino"}
for backend in ${BACKENDS} ; do
    model="${backend}_int32_int32_int32"
    model_dir="models/${model}"
    if [[ $backend == "python" ]]; then
        mkdir -p ${model_dir}/1
        cp ../python_models/add_sub/model.py ${model_dir}/1/
        cp ../python_models/add_sub/config.pbtxt ${model_dir}/
        sed -i 's/TYPE_FP32/TYPE_INT32/g' ${model_dir}/config.pbtxt
        echo "max_batch_size: 8" >> ${model_dir}/config.pbtxt
    else
        mkdir -p ${model_dir}
        cp -r $DATADIR/qa_model_repository/${model}/1 ${model_dir}/1
        cp $DATADIR/qa_model_repository/${model}/config.pbtxt ${model_dir}/
        cp $DATADIR/qa_model_repository/${model}/output0_labels.txt ${model_dir}/
        if [ $backend == "openvino" ]; then
            echo 'parameters { key: "ENABLE_BATCH_PADDING" value { string_value: "YES" } }' >> models/${model}/config.pbtxt
        fi
    fi
done

test_case="test_large_shm_register_offset"
for client_type in http grpc; do
    SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1 ${SERVER_ARGS_EXTRA}"
    SERVER_LOG="./${test_case}.${client_type}.server.log"
    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    export CLIENT_TYPE=$client_type
    CLIENT_LOG="./${test_case}.${client_type}.client.log"
    set +e
    python3 $SHM_TEST SharedMemoryTest.${test_case} >>"$CLIENT_LOG" 2>&1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Failed - ${client_type}\n***"
        RET=1
    fi

    kill $SERVER_PID
    wait $SERVER_PID
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Test Server shut down non-gracefully\n***"
        RET=1
    fi
    set -e
done

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test Failed\n***"
fi

exit $RET


================================================
FILE: qa/L0_simple_ensemble/backpressure_test_models/decoupled_producer/1/model.py
================================================
# Copyright 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


import time

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    """
    Decoupled model that produces N responses based on input value.
    """

    def execute(self, requests):
        for request in requests:
            # Get input - number of responses to produce
            in_tensor = pb_utils.get_input_tensor_by_name(request, "IN")
            count = in_tensor.as_numpy().item()

            response_sender = request.get_response_sender()
            out_tensor = pb_utils.Tensor("OUT", np.array([[0.5]], dtype=np.float32))

            # Produce 'count' responses, each with 0.5 as the output value
            for i in range(count):
                time.sleep(0.1)  # Simulate some processing delay
                response = pb_utils.InferenceResponse(output_tensors=[out_tensor])
                response_sender.send(response)

            # Send final flag
            response_sender.send(flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)

        return None


================================================
FILE: qa/L0_simple_ensemble/backpressure_test_models/decoupled_producer/config.pbtxt
================================================
# Copyright 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


name: "decoupled_producer"
backend: "python"
max_batch_size: 1

input [
  {
    name: "IN"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]

output [
  {
    name: "OUT"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]

instance_group [
  {
    count: 1
    kind: KIND_CPU
  }
]

model_transaction_policy {
  decoupled: true
}


================================================
FILE: qa/L0_simple_ensemble/backpressure_test_models/ensemble_disabled_max_inflight_requests/config.pbtxt
================================================
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


platform: "ensemble"
max_batch_size: 0

input [
  {
    name: "IN"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]

output [
  {
    name: "OUT"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]

ensemble_scheduling {
  step [
    {
      model_name: "decoupled_producer"
      model_version: -1
      input_map {
        key: "IN"
        value: "IN"
      }
      output_map {
        key: "OUT"
        value: "intermediate"
      }
    },
    {
      model_name: "slow_consumer"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "intermediate"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUT"
      }
    }
  ]
}


================================================
FILE: qa/L0_simple_ensemble/ensemble_backpressure_test.py
================================================
#!/usr/bin/env python3

# Copyright 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import queue
import threading
import time
import unittest
from contextlib import ExitStack
from functools import partial

import numpy as np
import test_util as tu
import tritonclient.grpc as grpcclient
from tritonclient.utils import InferenceServerException

SERVER_URL = "localhost:8001"
DEFAULT_RESPONSE_TIMEOUT = 60
EXPECTED_INFER_OUTPUT = 0.5
MODEL_ENSEMBLE_DISABLED = "ensemble_disabled_max_inflight_requests"
MODEL_ENSEMBLE_LIMIT_4 = "ensemble_max_inflight_requests_limit_4"
MODEL_ENSEMBLE_LIMIT_1 = "ensemble_max_inflight_requests_limit_1"


class UserData:
    def __init__(self):
        self._response_queue = queue.Queue()


def callback(user_data, result, error):
    if error:
        user_data._response_queue.put(error)
    else:
        user_data._response_queue.put(result)


def prepare_infer_args(input_value):
    """
    Create InferInput/InferRequestedOutput lists
    """
    input_data = np.array([input_value], dtype=np.int32)
    infer_input = [grpcclient.InferInput("IN", input_data.shape, "INT32")]
    infer_input[0].set_data_from_numpy(input_data)
    outputs = [grpcclient.InferRequestedOutput("OUT")]
    return infer_input, outputs


def collect_responses(user_data):
    """
    Collect responses from user_data until the final response flag is seen.
    """
    errors = []
    responses = []
    while True:
        try:
            result = user_data._response_queue.get(timeout=DEFAULT_RESPONSE_TIMEOUT)
        except queue.Empty:
            raise Exception(
                f"No response received within {DEFAULT_RESPONSE_TIMEOUT} seconds."
            )

        if type(result) == InferenceServerException:
            errors.append(result)
            # error responses are final - stream terminates
            break

        response = result.get_response()
        # Add response to list if it has data (not empty final-only response)
        if len(response.outputs) > 0:
            responses.append(result)

        # Check if this is the final response
        final = response.parameters.get("triton_final_response")
        if final and final.bool_param:
            break

    return errors, responses


class EnsembleBackpressureTest(tu.TestResultCollector):
    """
    Tests for ensemble backpressure feature (max_inflight_requests).
    """

    def _run_inference(self, model_name, expected_responses_count=32):
        """
        Helper function to run inference and verify responses.
        """
        user_data = UserData()
        with grpcclient.InferenceServerClient(SERVER_URL) as triton_client:
            try:
                inputs, outputs = prepare_infer_args(expected_responses_count)
                triton_client.start_stream(callback=partial(callback, user_data))
                triton_client.async_stream_infer(
                    model_name=model_name, inputs=inputs, outputs=outputs
                )

                # Collect and verify responses
                errors, responses = collect_responses(user_data)
                self.assertEqual(
                    len(responses),
                    expected_responses_count,
                    f"Expected {expected_responses_count} responses, got {len(responses)}",
                )
                self.assertEqual(
                    len(errors),
                    0,
                    f"Expected no errors during inference, got {len(errors)} errors",
                )

                # Verify correctness of responses
                for idx, resp in enumerate(responses):
                    output = resp.as_numpy("OUT")
                    self.assertAlmostEqual(
                        output[0],
                        EXPECTED_INFER_OUTPUT,
                        places=5,
                        msg=f"Response {idx} has incorrect value - {output[0]}",
                    )
            finally:
                triton_client.stop_stream()

    def test_max_inflight_requests_limit_4(self):
        """
        Test that max_inflight_requests correctly limits concurrent
        responses.
        """
        self._run_inference(model_name=MODEL_ENSEMBLE_LIMIT_4)

    def test_max_inflight_requests_limit_1(self):
        """
        Test edge case: max_inflight_requests=1.
        """
        self._run_inference(model_name=MODEL_ENSEMBLE_LIMIT_1)

    def test_max_inflight_requests_limit_disabled(self):
        """
        Test that an ensemble model without max_inflight_requests parameter works correctly.
        """
        self._run_inference(model_name=MODEL_ENSEMBLE_DISABLED)

    def test_max_inflight_requests_limit_concurrent_requests(self):
        """
        Test that backpressure works correctly with multiple concurrent requests.
        Each request should have independent backpressure state.
        """
        num_concurrent = 8
        expected_per_request = 8
        user_datas = [UserData() for _ in range(num_concurrent)]

        with ExitStack() as stack:
            clients = [
                stack.enter_context(grpcclient.InferenceServerClient(SERVER_URL))
                for _ in range(num_concurrent)
            ]

            inputs, outputs = prepare_infer_args(expected_per_request)

            # Start all concurrent requests
            for i in range(num_concurrent):
                clients[i].start_stream(callback=partial(callback, user_datas[i]))
                clients[i].async_stream_infer(
                    model_name=MODEL_ENSEMBLE_LIMIT_4, inputs=inputs, outputs=outputs
                )

            # Collect and verify responses for all requests
            for i, ud in enumerate(user_datas):
                errors, responses = collect_responses(ud)
                self.assertEqual(
                    len(responses),
                    expected_per_request,
                    f"Request {i}: expected {expected_per_request} responses, got {len(responses)}",
                )
                self.assertEqual(
                    len(errors),
                    0,
                    f"Request {i}: Expected no errors during inference, got {len(errors)} errors",
                )
                # Verify correctness of responses
                for idx, resp in enumerate(responses):
                    output = resp.as_numpy("OUT")
                    self.assertAlmostEqual(
                        output[0],
                        EXPECTED_INFER_OUTPUT,
                        places=5,
                        msg=f"Response {idx} for request {i} has incorrect value - {output[0]}",
                    )

            # Stop all streams
            for client in clients:
                client.stop_stream()

    def test_max_inflight_requests_limit_request_cancellation(self):
        """
        Test that cancellation unblocks producers waiting on backpressure and that
        the client receives a cancellation error.
        """
        # Use a large count to ensure the producer gets blocked by backpressure.
        # The model is configured with max_inflight_requests = 4.
        input_value = 32
        user_data = UserData()

        with grpcclient.InferenceServerClient(SERVER_URL) as triton_client:
            inputs, outputs = prepare_infer_args(input_value)
            triton_client.start_stream(callback=partial(callback, user_data))

            # Start the request
            triton_client.async_stream_infer(
                model_name=MODEL_ENSEMBLE_LIMIT_4, inputs=inputs, outputs=outputs
            )

            responses = []
            try:
                result = user_data._response_queue.get(timeout=5)
                if isinstance(result, InferenceServerException):
                    self.fail(f"Got error before cancellation: {result}")
                resp = result.get_response()
                if len(resp.outputs) > 0:
                    responses.append(result)
            except queue.Empty:
                self.fail("Stream did not produce any response before cancellation.")

            # Cancel the stream. This should unblock any waiting producers and result in a CANCELLED error.
            triton_client.stop_stream(cancel_requests=True)

            # Allow some time for cancellation
            time.sleep(1)

            cancellation_found = False
            while True:
                try:
                    result = user_data._response_queue.get(timeout=1)
                    if isinstance(result, InferenceServerException):
                        self.assertEqual(
                            result.status(),
                            "StatusCode.CANCELLED",
                            f"Expected CANCELLED status, got: {result.status()}",
                        )
                        cancellation_found = True
                        break
                    else:
                        response = result.get_response()
                        if len(response.outputs) > 0:
                            responses.append(result)
                        # Check for final response
                        final = response.parameters.get("triton_final_response")
                        if final and final.bool_param:
                            break
                except queue.Empty:
                    break

            # Verify the cancellation error was received
            self.assertTrue(
                cancellation_found,
                "Did not receive the expected cancellation error from the server.",
            )

            # Verify we received only a partial set of responses
            self.assertLess(
                len(responses),
                input_value,
                "Expected partial responses due to cancellation, but received all of them.",
            )
            self.assertGreater(
                len(responses),
                0,
                "Expected to receive at least one response before cancellation.",
            )


class EnsembleStepMaxQueueSizeTest(tu.TestResultCollector):
    def _run_inference(self, model_name, expected_responses_count):
        """
        Helper function for streaming inference.

        For decoupled streaming ensembles with queue limit on internal step:
        - Each producer response creates an independent flow through the ensemble
        - Flows that complete before error is set send their outputs successfully
        - Once error occurs (queue full), stream terminates with error
        - Result: 0-N successful responses + 1 error (N depends on timing)
        """
        user_data = UserData()
        with grpcclient.InferenceServerClient(SERVER_URL) as triton_client:
            try:
                inputs, outputs = prepare_infer_args(expected_responses_count)
                triton_client.start_stream(callback=partial(callback, user_data))
                triton_client.async_stream_infer(
                    model_name=model_name, inputs=inputs, outputs=outputs
                )

                # Collect and verify responses
                errors, responses = collect_responses(user_data)
                self.assertGreaterEqual(
                    len(responses),
                    0,
                    "May have 0 or more successful responses depending on timing",
                )
                self.assertLess(
                    len(responses),
                    expected_responses_count,
                    f"Should have fewer than {expected_responses_count} responses (some flows failed)",
                )
                self.assertEqual(
                    len(errors),
                    1,
                    "Expected exactly one error when queue full terminates stream",
                )

                # Verify correctness of successful responses
                for idx, resp in enumerate(responses):
                    output = resp.as_numpy("OUT")
                    self.assertAlmostEqual(
                        output[0],
                        EXPECTED_INFER_OUTPUT,
                        places=5,
                        msg=f"Response {idx} has incorrect value - {output[0]}",
                    )

                # Verify error is queue-full error
                self.assertIn(
                    "Exceeds maximum queue size",
                    str(errors[0]),
                    f"Expected queue size error, got: {str(errors[0])}",
                )
            finally:
                triton_client.stop_stream()

    def _run_concurrent_inference(self, model_name, expected_responses_count):
        """
        Helper function for concurrent independent requests.
        Each request either succeeds completely or fails completely.
        Returns: (num_successes, num_errors) tuple
        """
        user_data = UserData()
        with grpcclient.InferenceServerClient(SERVER_URL) as triton_client:
            try:
                inputs, outputs = prepare_infer_args(expected_responses_count)
                triton_client.start_stream(callback=partial(callback, user_data))
                triton_client.async_stream_infer(
                    model_name=model_name, inputs=inputs, outputs=outputs
                )

                # Collect responses
                errors, responses = collect_responses(user_data)

                # For concurrent independent requests with queue limit on internal step:
                # - Requests that arrive before queue fills: succeed with all outputs
                # - Requests that arrive after queue fills: fail with error
                total = len(responses) + len(errors)
                self.assertEqual(
                    total,
                    expected_responses_count,
                    f"Expected {expected_responses_count} total responses, got {total}",
                )

                if len(errors) > 0:
                    # This request failed
                    self.assertEqual(
                        len(responses),
                        0,
                        "Failed request should have no successful outputs",
                    )
                    self.assertEqual(
                        len(errors), 1, "Failed request should have exactly one error"
                    )
                    self.assertIn(
                        "Exceeds maximum queue size",
                        str(errors[0]),
                        f"Expected queue size error, got: {str(errors[0])}",
                    )
                    return (0, 1)  # 0 successes, 1 error
                else:
                    # This request succeeded
                    self.assertEqual(
                        len(responses),
                        expected_responses_count,
                        f"Successful request should have all {expected_responses_count} outputs",
                    )
                    # Verify correctness of successful responses
                    for idx, resp in enumerate(responses):
                        output = resp.as_numpy("OUT")
                        self.assertAlmostEqual(
                            output[0],
                            EXPECTED_INFER_OUTPUT,
                            places=5,
                            msg=f"Response {idx} has incorrect value - {output[0]}",
                        )
                    return (expected_responses_count, 0)  # N successes, 0 errors
            finally:
                triton_client.stop_stream()

    def test_step1_max_queue_size(self):
        """
        Test max_queue_size on step 1 (decoupled_producer).

        Trigger 32 concurrent ensemble requests, each producing 1 response
        - Step 1 (producer) has max_queue_size limit
        - Some ensemble requests succeed completely (before queue fills)
        - Some fail completely (when producer queue is full)
        """
        model_name = "ensemble_step1_enabled_max_queue_size"
        num_requests = 32

        # Store results from each thread
        results = []

        def thread_wrapper(model_name, expected_count, results_list):
            """Wrapper to capture thread results"""
            result = self._run_concurrent_inference(model_name, expected_count)
            results_list.append(result)

        # Launch concurrent threads to perform infer requests
        threads = []
        for i in range(num_requests):
            t = threading.Thread(target=thread_wrapper, args=(model_name, 1, results))
            threads.append(t)
            t.start()

        # Wait for all requests to complete
        for t in threads:
            t.join(timeout=60)

        # Aggregate results from all threads
        total_successes = sum(r[0] for r in results)
        total_errors = sum(r[1] for r in results)

        # Verify aggregate behavior
        self.assertEqual(
            total_successes + total_errors,
            num_requests,
            f"Expected {num_requests} total results (successes + errors), "
            f"got {total_successes} successes + {total_errors} errors = {total_successes + total_errors}",
        )

        # Verify at least some errors occurred (queue limit was hit)
        self.assertGreater(
            total_errors,
            0,
            f"Expected some errors due to max_queue_size limit, "
            f"but all {num_requests} requests succeeded.",
        )

        # Verify at least some successes occurred (not all rejected)
        self.assertGreater(
            total_successes,
            0,
            f"Expected some successful requests before queue filled, "
            f"but all {num_requests} requests failed.",
        )

    def test_step2_max_queue_size(self):
        """
        Test max_queue_size on step 2 (slow_consumer).

        Trigger 1 streaming ensemble request producing 32 responses
        - Step 1 (producer) generates 32 responses rapidly (every 100ms)
        - Step 2 (consumer) has max_queue_size=5 and processes slowly (500ms each)
        - Each producer response is an independent request to the second step through
        - the ensemble flow. Some requests complete successfully before queue fills
        - When queue fills, error is set and stream terminates
        - All inflight steps drain, then error response sent to client
        """
        model_name = "ensemble_step2_enabled_max_queue_size"
        self._run_inference(model_name=model_name, expected_responses_count=32)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_simple_ensemble/ensemble_test.py
================================================
#!/usr/bin/env python3

# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import random
import sys
import time
from functools import partial

import numpy as np
import tritonclient.grpc as grpcclient

sys.path.append("../common")
sys.path.append("../clients")

import logging
import unittest

import infer_util as iu
import numpy as np
import test_util as tu
import tritonhttpclient


# Utility function to Generate N requests with appropriate sequence flags
class RequestGenerator:
    def __init__(self, init_value, num_requests) -> None:
        self.count = 0
        self.init_value = init_value
        self.num_requests = num_requests

    def __enter__(self):
        return self

    def __iter__(self):
        return self

    def __next__(self) -> bytes:
        value = self.init_value + self.count
        if self.count == self.num_requests:
            raise StopIteration
        start = True if self.count == 0 else False
        end = True if self.count == self.num_requests - 1 else False
        self.count = self.count + 1
        return start, end, self.count - 1, value


class EnsembleTest(tu.TestResultCollector):
    def _get_infer_count_per_version(self, model_name):
        triton_client = tritonhttpclient.InferenceServerClient(
            "localhost:8000", verbose=True
        )
        stats = triton_client.get_inference_statistics(model_name)
        self.assertEqual(len(stats["model_stats"]), 2)
        infer_count = [0, 0]
        for model_stat in stats["model_stats"]:
            self.assertEqual(
                model_stat["name"], model_name, "expected stats for model " + model_name
            )
            model_version = model_stat["version"]
            if model_version == "1":
                infer_count[0] = model_stat["inference_stats"]["success"]["count"]
            elif model_version == "2":
                infer_count[1] = model_stat["inference_stats"]["success"]["count"]
            else:
                self.assertTrue(
                    False,
                    "unexpected version {} for model {}".format(
                        model_version, model_name
                    ),
                )
        return infer_count

    def test_ensemble_add_sub(self):
        for bs in (1, 8):
            iu.infer_exact(
                self, "ensemble_add_sub", (bs, 16), bs, np.int32, np.int32, np.int32
            )

        infer_count = self._get_infer_count_per_version("simple")
        # The two 'simple' versions should have the same infer count
        if infer_count[0] != infer_count[1]:
            self.assertTrue(
                False, "unexpeced different infer count for different 'simple' versions"
            )

    def test_ensemble_add_sub_one_output(self):
        for bs in (1, 8):
            iu.infer_exact(
                self,
                "ensemble_add_sub",
                (bs, 16),
                bs,
                np.int32,
                np.int32,
                np.int32,
                outputs=("OUTPUT0",),
            )

        infer_count = self._get_infer_count_per_version("simple")
        # Only 'simple' version 2 should have non-zero infer count
        # as it is in charge of producing OUTPUT0
        if infer_count[0] != 0:
            self.assertTrue(
                False, "unexpeced non-zero infer count for 'simple' version 1"
            )
        elif infer_count[1] == 0:
            self.assertTrue(False, "unexpeced zero infer count for 'simple' version 2")

    def test_ensemble_sequence_flags(self):
        request_generator = RequestGenerator(0, 3)
        # 3 request made expect the START of 1st req to be true and
        # END of last request to be true
        expected_flags = [[True, False], [False, False], [False, True]]
        response_flags = []

        def callback(start_time, result, error):
            response = result.get_response()
            arr = []
            arr.append(response.parameters["sequence_start"].bool_param)
            arr.append(response.parameters["sequence_end"].bool_param)
            response_flags.append(arr)

        start_time = time.time()
        triton_client = grpcclient.InferenceServerClient("localhost:8001")
        triton_client.start_stream(callback=partial(callback, start_time))
        correlation_id = random.randint(1, 2**31 - 1)
        # create input tensors
        input0_data = np.random.randint(0, 100, size=(1, 16), dtype=np.int32)
        input1_data = np.random.randint(0, 100, size=(1, 16), dtype=np.int32)

        inputs = [
            grpcclient.InferInput("INPUT0", input0_data.shape, "INT32"),
            grpcclient.InferInput("INPUT1", input1_data.shape, "INT32"),
        ]

        inputs[0].set_data_from_numpy(input0_data)
        inputs[1].set_data_from_numpy(input1_data)

        # create output tensors
        outputs = [grpcclient.InferRequestedOutput("OUTPUT0")]
        for sequence_start, sequence_end, count, input_value in request_generator:
            triton_client.async_stream_infer(
                model_name="ensemble_add_sub_int32_int32_int32",
                inputs=inputs,
                outputs=outputs,
                request_id=f"{correlation_id}_{count}",
                sequence_id=correlation_id,
                sequence_start=sequence_start,
                sequence_end=sequence_end,
            )
        time.sleep(2)
        if expected_flags != response_flags:
            self.assertTrue(False, "unexpeced sequence flags mismatch error")

    def test_ensemble_partial_add_sub(self):
        # assert OUTPUT1 is not skipped by ensemble at this point
        output1_skipped_msg = "Composing models did not output tensor OUTPUT1"
        with open(os.environ["SERVER_LOG"]) as f:
            server_log = f.read()
        self.assertNotIn(output1_skipped_msg, server_log, "test precondition not met")
        # inputs
        input0_np = np.random.randint(0, 100, size=(1, 16), dtype=np.int32)
        input1_np = np.random.randint(0, 100, size=(1, 16), dtype=np.int32)
        inputs = [
            grpcclient.InferInput("INPUT0", input0_np.shape, "INT32"),
            grpcclient.InferInput("INPUT1", input1_np.shape, "INT32"),
        ]
        inputs[0].set_data_from_numpy(input0_np)
        inputs[1].set_data_from_numpy(input1_np)
        # request all outputs
        outputs = [
            grpcclient.InferRequestedOutput("OUTPUT0"),
            grpcclient.InferRequestedOutput("OUTPUT1"),
        ]
        # infer
        model_name = "ensemble_partial_add_sub"
        with grpcclient.InferenceServerClient("localhost:8001") as client:
            result = client.infer(model_name, inputs=inputs, outputs=outputs)
        # assert OUTPUT0 is in result
        intermediate_1_np = input1_np - input1_np
        expected_output0_np = input0_np + intermediate_1_np
        self.assertTrue(np.allclose(result.as_numpy("OUTPUT0"), expected_output0_np))
        # assert OUTPUT1 is not in result
        self.assertIsNone(result.as_numpy("OUTPUT1"))
        # assert OUTPUT1 is skipped by ensemble
        with open(os.environ["SERVER_LOG"]) as f:
            server_log = f.read()
        self.assertIn(output1_skipped_msg, server_log)


if __name__ == "__main__":
    logging.basicConfig(stream=sys.stderr)
    unittest.main()


================================================
FILE: qa/L0_simple_ensemble/models/ensemble_add_sub_int32_int32_int32/config.pbtxt
================================================
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "ensemble_add_sub_int32_int32_int32"
platform: "ensemble"
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
ensemble_scheduling {
  step [
    {
      model_name: "simple"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT0"
      }
      output_map {
        key: "OUTPUT0"
        value: "double_input0"
      }
    },
    {
      model_name: "simple"
      model_version: 1
      input_map {
        key: "INPUT0"
        value: "INPUT1"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT0"
        value: "double_input1"
      }
    },
    {
      model_name: "simple"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "double_input0"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT0"
      }
      output_map {
        key: "OUTPUT1"
        value: "input0_val"
      }
    },
    {
      model_name: "simple"
      model_version: 1
      input_map {
        key: "INPUT0"
        value: "double_input1"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT1"
        value: "input1_val"
      }
    },
    {
      model_name: "simple"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "input0_val"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT0"
      }
    },
    {
      model_name: "simple"
      model_version: 1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      input_map {
        key: "INPUT1"
        value: "input1_val"
      }
      output_map {
        key: "OUTPUT1"
        value: "OUTPUT1"
      }
    }
  ]
}


================================================
FILE: qa/L0_simple_ensemble/models/ensemble_partial_add_sub/config.pbtxt
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

platform: "ensemble"
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
ensemble_scheduling {
  step [
    {
      model_name: "simple"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "INPUT1"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT1"
        value: "intermediate_1"
      }
    },
    {
      model_name: "partial_add_sub"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      input_map {
        key: "INPUT1"
        value: "intermediate_1"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT0"
      }
      output_map {
        key: "OUTPUT1"
        value: "OUTPUT1"
      }
    }
  ]
}


================================================
FILE: qa/L0_simple_ensemble/models/partial_add_sub/1/model.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def execute(self, requests):
        responses = []

        for request in requests:
            input0_np = pb_utils.get_input_tensor_by_name(request, "INPUT0").as_numpy()
            input1_np = pb_utils.get_input_tensor_by_name(request, "INPUT1").as_numpy()

            output0_np = input0_np + input1_np
            # Skip OUTPUT1

            output_tensors = [
                pb_utils.Tensor("OUTPUT0", output0_np.astype(np.int32)),
            ]
            responses.append(pb_utils.InferenceResponse(output_tensors))

        return responses


================================================
FILE: qa/L0_simple_ensemble/models/partial_add_sub/config.pbtxt
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

backend: "python"
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
input [
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
instance_group [{ kind: KIND_CPU }]


================================================
FILE: qa/L0_simple_ensemble/models/simple/config.pbtxt
================================================
# Copyright (c) 2018-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "simple"
platform: "onnxruntime_onnx"
max_batch_size: 8
version_policy: { all {} }
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/L0_simple_ensemble/test.sh
================================================
#!/bin/bash
# Copyright 2019-2026, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

export CUDA_VISIBLE_DEVICES=0

SIMPLE_TEST_PY=./ensemble_test.py

CLIENT_LOG="./client.log"

TEST_MODEL_DIR="`pwd`/models"
TEST_RESULT_FILE='test_results.txt'
SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=${TEST_MODEL_DIR}"
SERVER_LOG="./inference_server.log"
source ../common/util.sh

# ensure ensemble models have version sub-directory
mkdir -p ${TEST_MODEL_DIR}/ensemble_add_sub_int32_int32_int32/1
mkdir -p ${TEST_MODEL_DIR}/ensemble_partial_add_sub/1

rm -f $CLIENT_LOG $SERVER_LOG

# Run ensemble model with all outputs requested
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

RET=0

set +e
python $SIMPLE_TEST_PY EnsembleTest.test_ensemble_add_sub >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

# Run ensemble model with sequence flags and verify response sequence
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python $SIMPLE_TEST_PY EnsembleTest.test_ensemble_sequence_flags >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

# Run ensemble model with only one output requested
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python $SIMPLE_TEST_PY EnsembleTest.test_ensemble_add_sub_one_output >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

# Run partial ensemble model with all outputs requested
SERVER_ARGS="$SERVER_ARGS --log-verbose=1"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
SERVER_LOG=$SERVER_LOG python $SIMPLE_TEST_PY EnsembleTest.test_ensemble_partial_add_sub >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

######## Test max_queue_size dynamic batching parameter in ensemble steps ########
## Ensemble model: step1-decoupled_producer -> step2-slow_consumer
MODEL_DIR="`pwd`/max_queue_size_test_models"
rm -rf ${MODEL_DIR}

# Enable max_queue_size in the first step (decoupled_producer)
mkdir -p ${MODEL_DIR}/ensemble_step1_enabled_max_queue_size/1 ${MODEL_DIR}/decoupled_producer_enabled_max_queue_size/1 ${MODEL_DIR}/slow_consumer/1
cp ./backpressure_test_models/ensemble_disabled_max_inflight_requests/config.pbtxt ${MODEL_DIR}/ensemble_step1_enabled_max_queue_size/
sed -i 's/"decoupled_producer"/"decoupled_producer_enabled_max_queue_size"/g' ${MODEL_DIR}/ensemble_step1_enabled_max_queue_size/config.pbtxt

cp ../python_models/ground_truth/model.py ${MODEL_DIR}/slow_consumer/1
cp ../python_models/ground_truth/config.pbtxt ${MODEL_DIR}/slow_consumer/
sed -i 's/name: "ground_truth"/name: "slow_consumer"/g' ${MODEL_DIR}/slow_consumer/config.pbtxt
sed -i 's/max_batch_size: 64/max_batch_size: 1/g' ${MODEL_DIR}/slow_consumer/config.pbtxt

cp ./backpressure_test_models/decoupled_producer/1/model.py ${MODEL_DIR}/decoupled_producer_enabled_max_queue_size/1
cp ./backpressure_test_models/decoupled_producer/config.pbtxt ${MODEL_DIR}/decoupled_producer_enabled_max_queue_size/
sed -i 's/name: "decoupled_producer"/name: "decoupled_producer_enabled_max_queue_size"/g' ${MODEL_DIR}/decoupled_producer_enabled_max_queue_size/config.pbtxt
# Add dynamic_batching with max_queue_size to decoupled_producer
cat >> ${MODEL_DIR}/decoupled_producer_enabled_max_queue_size/config.pbtxt << 'EOF'

dynamic_batching {
  preferred_batch_size: [ 1 ]
  default_queue_policy {
    max_queue_size: 4
  }
}
EOF

# Enable max_queue_size in the second step (slow_consumer)
mkdir -p ${MODEL_DIR}/ensemble_step2_enabled_max_queue_size/1 ${MODEL_DIR}/decoupled_producer/1 ${MODEL_DIR}/slow_consumer_enabled_max_queue_size/1
cp ./backpressure_test_models/ensemble_disabled_max_inflight_requests/config.pbtxt ${MODEL_DIR}/ensemble_step2_enabled_max_queue_size/
sed -i 's/"slow_consumer"/"slow_consumer_enabled_max_queue_size"/g' ${MODEL_DIR}/ensemble_step2_enabled_max_queue_size/config.pbtxt

cp ./backpressure_test_models/decoupled_producer/1/model.py ${MODEL_DIR}/decoupled_producer/1
cp ./backpressure_test_models/decoupled_producer/config.pbtxt ${MODEL_DIR}/decoupled_producer/

cp ../python_models/ground_truth/model.py ${MODEL_DIR}/slow_consumer_enabled_max_queue_size/1
cp ../python_models/ground_truth/config.pbtxt ${MODEL_DIR}/slow_consumer_enabled_max_queue_size/
sed -i 's/name: "ground_truth"/name: "slow_consumer_enabled_max_queue_size"/g' ${MODEL_DIR}/slow_consumer_enabled_max_queue_size/config.pbtxt
sed -i 's/max_batch_size: 64/max_batch_size: 1/g' ${MODEL_DIR}/slow_consumer_enabled_max_queue_size/config.pbtxt
# Add dynamic_batching with max_queue_size to slow_consumer
cat >> ${MODEL_DIR}/slow_consumer_enabled_max_queue_size/config.pbtxt << 'EOF'

dynamic_batching {
  preferred_batch_size: [ 1 ]
  default_queue_policy {
    max_queue_size: 4
  }
}
EOF

BACKPRESSURE_TEST_PY=./ensemble_backpressure_test.py
TEST_NAME="EnsembleStepMaxQueueSizeTest"
SERVER_LOG="./ensemble_step_max_queue_size_test_server.log"
CLIENT_LOG="./ensemble_step_max_queue_size_test_client.log"
rm -f $SERVER_LOG $CLIENT_LOG

SERVER_ARGS="--model-repository=${MODEL_DIR}"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python $BACKPRESSURE_TEST_PY $TEST_NAME -v >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    RET=1
else
    check_test_results $TEST_RESULT_FILE 2
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

kill $SERVER_PID
wait $SERVER_PID


######## Test ensemble backpressure feature (max_inflight_requests parameter) ########
MODEL_DIR="`pwd`/backpressure_test_models"
mkdir -p ${MODEL_DIR}/ensemble_disabled_max_inflight_requests/1

rm -rf ${MODEL_DIR}/slow_consumer
mkdir -p ${MODEL_DIR}/slow_consumer/1
cp ../python_models/ground_truth/model.py ${MODEL_DIR}/slow_consumer/1
cp ../python_models/ground_truth/config.pbtxt ${MODEL_DIR}/slow_consumer/
sed -i 's/name: "ground_truth"/name: "slow_consumer"/g' ${MODEL_DIR}/slow_consumer/config.pbtxt

# Create ensemble with "max_inflight_requests = 4"
rm -rf ${MODEL_DIR}/ensemble_max_inflight_requests_limit_4
mkdir -p ${MODEL_DIR}/ensemble_max_inflight_requests_limit_4/1
cp ${MODEL_DIR}/ensemble_disabled_max_inflight_requests/config.pbtxt ${MODEL_DIR}/ensemble_max_inflight_requests_limit_4/
sed -i 's/ensemble_scheduling {/ensemble_scheduling {\n  max_inflight_requests: 4/g' \
  ${MODEL_DIR}/ensemble_max_inflight_requests_limit_4/config.pbtxt

# Create ensemble with "max_inflight_requests = 1"
rm -rf ${MODEL_DIR}/ensemble_max_inflight_requests_limit_1
mkdir -p ${MODEL_DIR}/ensemble_max_inflight_requests_limit_1/1
cp ${MODEL_DIR}/ensemble_disabled_max_inflight_requests/config.pbtxt ${MODEL_DIR}/ensemble_max_inflight_requests_limit_1/
sed -i 's/platform: "ensemble"/name: "ensemble_max_inflight_requests_limit_1"\nplatform: "ensemble"/g' \
  ${MODEL_DIR}/ensemble_max_inflight_requests_limit_1/config.pbtxt
sed -i 's/ensemble_scheduling {/ensemble_scheduling {\n  max_inflight_requests: 1/g' \
  ${MODEL_DIR}/ensemble_max_inflight_requests_limit_1/config.pbtxt

TEST_NAME="EnsembleBackpressureTest"
SERVER_LOG="./ensemble_backpressure_test_server.log"
CLIENT_LOG="./ensemble_backpressure_test_client.log"
rm -f $SERVER_LOG $CLIENT_LOG

SERVER_ARGS="--model-repository=${MODEL_DIR}"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python $BACKPRESSURE_TEST_PY $TEST_NAME -v >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    RET=1
else
    check_test_results $TEST_RESULT_FILE 5
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

set +e
# Verify valid config was loaded successfully
if ! grep -q "Ensemble model 'ensemble_max_inflight_requests_limit_4' configured with max_inflight_requests: 4" $SERVER_LOG; then
    echo -e "\n***\n*** FAILED: Valid model did not load successfully\n***"
    RET=1
fi
set -e


######## Test invalid value for "max_inflight_requests"
INVALID_PARAM_MODEL_DIR="`pwd`/invalid_param_test_models"
SERVER_ARGS="--model-repository=${INVALID_PARAM_MODEL_DIR}"
SERVER_LOG="./invalid_max_inflight_requests_server.log"
rm -rf $SERVER_LOG ${INVALID_PARAM_MODEL_DIR}

mkdir -p ${INVALID_PARAM_MODEL_DIR}/ensemble_invalid_negative_limit/1
mkdir -p ${INVALID_PARAM_MODEL_DIR}/ensemble_invalid_string_limit/1
mkdir -p ${INVALID_PARAM_MODEL_DIR}/ensemble_invalid_large_value_limit/1
cp -r ${MODEL_DIR}/decoupled_producer ${MODEL_DIR}/slow_consumer ${INVALID_PARAM_MODEL_DIR}/

# max_inflight_requests = -5
cp ${MODEL_DIR}/ensemble_disabled_max_inflight_requests/config.pbtxt ${INVALID_PARAM_MODEL_DIR}/ensemble_invalid_negative_limit/
sed -i 's/ensemble_scheduling {/ensemble_scheduling {\n  max_inflight_requests: -5/g' \
  ${INVALID_PARAM_MODEL_DIR}/ensemble_invalid_negative_limit/config.pbtxt

# max_inflight_requests = "invalid_value"
cp ${MODEL_DIR}/ensemble_disabled_max_inflight_requests/config.pbtxt ${INVALID_PARAM_MODEL_DIR}/ensemble_invalid_string_limit/
sed -i 's/ensemble_scheduling {/ensemble_scheduling {\n  max_inflight_requests: "invalid_value"/g' \
  ${INVALID_PARAM_MODEL_DIR}/ensemble_invalid_string_limit/config.pbtxt

# max_inflight_requests = 12345678901
cp ${MODEL_DIR}/ensemble_disabled_max_inflight_requests/config.pbtxt ${INVALID_PARAM_MODEL_DIR}/ensemble_invalid_large_value_limit/
sed -i 's/ensemble_scheduling {/ensemble_scheduling {\n  max_inflight_requests: 12345678901/g' \
  ${INVALID_PARAM_MODEL_DIR}/ensemble_invalid_large_value_limit/config.pbtxt


run_server
if [ "$SERVER_PID" != "0" ]; then
    echo -e "\n***\n*** FAILED: unexpected success starting $SERVER\n***"
    kill $SERVER_PID
    wait $SERVER_PID
    cat $SERVER_LOG
    RET=1
fi

set +e
# Verify negative value caused model load failure
if ! grep -q "Expected integer, got: -" $SERVER_LOG; then
    echo -e "\n***\n*** FAILED: Negative value should fail model load\n***"
    RET=1
fi

# Verify invalid string caused model load failure
if ! grep -q 'Expected integer, got: "invalid_value"' $SERVER_LOG; then
    echo -e "\n***\n*** FAILED: Invalid string should fail model load\n***"
    RET=1
fi

# Verify very large value caused model load failure
if ! grep -q "Integer out of range (12345678901)" $SERVER_LOG; then
    echo -e "\n***\n*** FAILED: Large value should fail model load\n***"
    RET=1
fi
set -e


if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
else
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_simple_example/test.sh
================================================
#!/bin/bash
# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

export CUDA_VISIBLE_DEVICES=0

SIMPLE_CLIENT=../clients/simple_http_infer_client
SIMPLE_CLIENT_PY=../clients/simple_http_infer_client.py

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=`pwd`/models"
SERVER_LOG="./inference_server.log"
source ../common/util.sh

rm -f *.log

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

RET=0

set +e

# Run with default host header...
$SIMPLE_CLIENT -v >>client_c++.log 2>&1
if [ $? -ne 0 ]; then
    RET=1
fi

if [ `grep -c "localhost:8000" client_c++.log` != "2" ]; then
    echo -e "\n***\n*** Failed. Expected 2 Host:localhost:8000 headers for C++ client\n***"
    RET=1
fi

python $SIMPLE_CLIENT_PY -v >>client_py.log 2>&1
if [ $? -ne 0 ]; then
    RET=1
fi

if [ `grep -c "HTTPSocketPoolResponse status=200" client_py.log` != "3" ]; then
    echo -e "\n***\n*** Failed. Expected 3 Host:HTTPSocketPoolResponse status=200 headers for Python client\n***"
    RET=1
fi

# Run with custom host header...
$SIMPLE_CLIENT -v -H"Host:my_host_" >>client_c++_host.log 2>&1
if [ $? -ne 0 ]; then
    RET=1
fi

if [ `grep -c my_host_ client_c++_host.log` != "1" ]; then
    echo -e "\n***\n*** Failed. Expected 1 Host:my_host_ headers for C++ client\n***"
    RET=1
fi

python $SIMPLE_CLIENT_PY -v -H"Host:my_host_" >>client_py_host.log 2>&1
if [ $? -ne 0 ]; then
    RET=1
fi

if [ `grep -c my_host_ client_py_host.log` != "3" ]; then
    echo -e "\n***\n*** Failed. Expected 3 Host:my_host_ headers for Python client\n***"
    RET=1
fi

# Run with multiple headers...
$SIMPLE_CLIENT -v -H"abc:xyz" -H"123:456" >>client_c++_multi.log 2>&1
if [ $? -ne 0 ]; then
    RET=1
fi

if [ `grep -c "abc: xyz" client_c++_multi.log` != "1" ]; then
    echo -e "\n***\n*** Failed. Expected 1 abc:xyz headers for C++ client\n***"
    RET=1
fi
if [ `grep -c "123: 456" client_c++_multi.log` != "1" ]; then
    echo -e "\n***\n*** Failed. Expected 1 123:456 headers for C++ client\n***"
    RET=1
fi

python $SIMPLE_CLIENT_PY -v -H"abc:xyz" -H"123:456" >>client_py_multi.log 2>&1
if [ $? -ne 0 ]; then
    RET=1
fi

if [ `grep -c "'abc': 'xyz'" client_py_multi.log` != "3" ]; then
    echo -e "\n***\n*** Failed. Expected 3 abc:xyz headers for Python client\n***"
    RET=1
fi
if [ `grep -c "'123': '456'" client_py_multi.log` != "3" ]; then
    echo -e "\n***\n*** Failed. Expected 3 123:456 headers for Python client\n***"
    RET=1
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_simple_go_client/test.sh
================================================
#!/bin/bash
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

export CUDA_VISIBLE_DEVICES=0

TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:="http://github.com/triton-inference-server"}
TRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG:="main"}

GO_CLIENT_DIR=client/src/grpc_generated/go

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS=--model-repository=`pwd`/models
SERVER_LOG="./inference_server.log"
source ../common/util.sh

rm -f *.log

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

RET=0

# Generate Go stubs.
rm -fr client common
git clone ${TRITON_REPO_ORGANIZATION}/client.git
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest

pushd ${GO_CLIENT_DIR}

git clone --single-branch --depth=1 -b $TRITON_COMMON_REPO_TAG \
    ${TRITON_REPO_ORGANIZATION}/common.git
bash gen_go_stubs.sh

set +e

# Run test for GRPC variant of go client within go.mod path
go run grpc_simple_client.go >>client.log 2>&1
if [ $? -ne 0 ]; then
    RET=1
fi

popd


if [ `grep -c "Checking Inference Outputs" ${GO_CLIENT_DIR}/client.log` != "1" ]; then
    echo -e "\n***\n*** Failed. Unable to run inference.\n***"
    RET=1
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_simple_lib/test.sh
================================================
#!/bin/bash
# Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

MODELSDIR=`pwd`/models
DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository

export CUDA_VISIBLE_DEVICES=0

# Must explicitly set LD_LIBRARY_PATH so that clients can find
# libtritonserver.so.
LD_LIBRARY_PATH=/opt/tritonserver/lib:$LD_LIBRARY_PATH

rm -f *.log

RET=0

for SIMPLE_CLIENT in simple ; do
    CLIENT_LOG=$SIMPLE_CLIENT
    SIMPLE_CLIENT=./$SIMPLE_CLIENT

    for trial in onnx libtorch plan; do
        full=${trial}_float32_float32_float32
        rm -rf $MODELSDIR
        mkdir -p $MODELSDIR/simple/1 && \
            cp -r $DATADIR/${full}/1/* $MODELSDIR/simple/1/. && \
            cp $DATADIR/${full}/config.pbtxt $MODELSDIR/simple/. && \
            (cd $MODELSDIR/simple && \
                    sed -i "s/^name:.*/name: \"simple\"/" config.pbtxt && \
                    sed -i "s/label_filename:.*//" config.pbtxt)

        set +e

        # No memory type enforcement
        $SIMPLE_CLIENT -r $MODELSDIR >>$CLIENT_LOG.$full.log 2>&1
        if [ $? -ne 0 ]; then
            cat $CLIENT_LOG.$full.log
            echo -e "\n***\n*** Test Failed\n***"
            RET=1
        fi

        # Enforce I/O to be in specific memory type
        for MEM_TYPE in system pinned gpu ; do
            $SIMPLE_CLIENT -r $MODELSDIR -m $MEM_TYPE >>$CLIENT_LOG.$full.$MEM_TYPE.log 2>&1
            if [ $? -ne 0 ]; then
                cat $CLIENT_LOG.$full.$MEM_TYPE.log
                echo -e "\n***\n*** Test Failed\n***"
                RET=1
            fi
        done

        set -e
    done

    # Use onnx for addsub ensemble
    mkdir -p $MODELSDIR/simple/1
    cp -r $DATADIR/onnx_float32_float32_float32/1/* $MODELSDIR/simple/1/.
    cp $DATADIR/onnx_float32_float32_float32/config.pbtxt $MODELSDIR/simple/.
    (cd $MODELSDIR/simple && \
            sed -i "s/^name:.*/name: \"simple\"/" config.pbtxt && \
            sed -i "s/label_filename:.*//" config.pbtxt)

    # set up "addsub" ensemble
    ENSEMBLEDIR=$DATADIR/../qa_ensemble_model_repository/qa_model_repository/
    rm -rf $MODELSDIR
    mkdir -p $MODELSDIR/simple/1 && \
        cp $ENSEMBLEDIR/fan_plan_float32_float32_float32/config.pbtxt $MODELSDIR/simple/. && \
        (cd $MODELSDIR/simple && \
                sed -i "s/^name:.*/name: \"simple\"/" config.pbtxt && \
                sed -i "s/label_filename:.*//" config.pbtxt)

    cp -r $ENSEMBLEDIR/nop_TYPE_FP32_-1 $MODELSDIR/. && \
        mkdir -p $MODELSDIR/nop_TYPE_FP32_-1/1

    cp -r $DATADIR/plan_float32_float32_float32 $MODELSDIR/. && \
        # make sure version 1 is used (no swap)
        rm -r $MODELSDIR/plan_float32_float32_float32/2 && \
        rm -r $MODELSDIR/plan_float32_float32_float32/3
    full=ensemble

    set +e

    # No memory type enforcement
    $SIMPLE_CLIENT -r $MODELSDIR >>$CLIENT_LOG.$full.log 2>&1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG.$full.log
        echo -e "\n***\n*** Test Failed\n***"
        RET=1
    fi

    # Enforce I/O to be in specific memory type
    for MEM_TYPE in system pinned gpu ; do
        $SIMPLE_CLIENT -r $MODELSDIR -m $MEM_TYPE >>$CLIENT_LOG.$full.$MEM_TYPE.log 2>&1
        if [ $? -ne 0 ]; then
            cat $CLIENT_LOG.$full.$MEM_TYPE.log
            echo -e "\n***\n*** Test Failed\n***"
            RET=1
        fi
    done

    # For GPU input / output case, all ensemble allocation should be on GPU
    if grep ^I[0-9][0-9][0-9][0-9].*"Internal response".*"memory type 0" $CLIENT_LOG.$full.gpu.log; then
        echo -e "\n*** FAILED: unexpected CPU allocation for ensemble" >> $CLIENT_LOG.$full.gpu.log
        cat $CLIENT_LOG.$full.gpu.log
        echo -e "\n***\n*** Test Failed\n***"
        RET=1
    fi

    set -e
done

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_simple_nodejs_client/test.sh
================================================
#!/bin/bash
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

export CUDA_VISIBLE_DEVICES=0

TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:="http://github.com/triton-inference-server"}
TRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG:="main"}

SIMPLE_NODEJS_CLIENT=client.js

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS=--model-repository=`pwd`/models
SERVER_LOG="./inference_server.log"
source ../common/util.sh

rm -f *.log

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

RET=0

# Get the proto files from the common repo
rm -fr common
git clone --single-branch --depth=1 -b $TRITON_COMMON_REPO_TAG \
    ${TRITON_REPO_ORGANIZATION}/common.git
mkdir proto && cp common/protobuf/*.proto proto/.

npm install

set +e

# Runs test for GRPC variant of nodejs client
node $SIMPLE_NODEJS_CLIENT >> client.log 2>&1
if [ $? -ne 0 ]; then
    RET=1
fi

if [ `grep -c "Checking Inference Output" client.log` != "1" ]; then
    echo -e "\n***\n*** Failed. Unable to run inference.\n***"
    RET=1
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_socket/models/simple/config.pbtxt
================================================
# Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "simple"
platform: "onnxruntime_onnx"
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]


================================================
FILE: qa/L0_socket/test.sh
================================================
#!/bin/bash
# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

export CUDA_VISIBLE_DEVICES=0

CLIENT_LOG="./client.log"
SERVER_LOG="./inference_server.log"

DATADIR=`pwd`/models
SERVER=/opt/tritonserver/bin/tritonserver
SERVER_TIMEOUT=15
source ../common/util.sh

rm -f *.log

RET=0

# CUSTOM CASES
for address in default explicit; do
    if [ "$address" == "default" ]; then
        # without specifying address, will use "0.0.0.0" as default
        SAME_EXPLICIT_ADDRESS=""
        DIFF_EXPLICIT_ADDRESS_ARGS=""
    else
        SAME_EXPLICIT_ADDRESS="--http-address 127.0.0.1 --grpc-address 127.0.0.1 --metrics-address 127.0.0.1"
        DIFF_EXPLICIT_ADDRESS="--http-address 127.0.0.1 --grpc-address 127.0.0.2 --metrics-address 127.0.0.3"
    fi

    for p in http grpc; do
        if [ "$address" == "default" ]; then
            # allow illegal http/grpc port if disabled
            SERVER_ARGS="--model-repository=$DATADIR --${p}-port -47 --allow-${p} 0"
        else
            # allow illegal http/grpc address if disabled
            SERVER_ARGS="--model-repository=$DATADIR --${p}-address -47 --allow-${p} 0"
        fi
        run_server_nowait
        sleep 15
        if [ "$SERVER_PID" == "0" ]; then
            echo -e "\n***\n*** Failed to start $SERVER\n***"
            cat $SERVER_LOG
            exit 1
        fi
        kill $SERVER_PID
        wait $SERVER_PID

        # allow http/grpc port overlap with grpc/http default if disabled
        if [ "$p" == "http" ]; then
            SERVER_ARGS="--model-repository=$DATADIR $SAME_EXPLICIT_ADDRESS --http-port 8001 --allow-http 0"
            run_server_nowait
            sleep 15
        else
            SERVER_ARGS="--model-repository=$DATADIR $SAME_EXPLICIT_ADDRESS --grpc-port 8000 --allow-grpc 0"
            run_server
        fi
        if [ "$SERVER_PID" == "0" ]; then
            echo -e "\n***\n*** Failed to start $SERVER\n***"
            cat $SERVER_LOG
            exit 1
        fi
        kill $SERVER_PID
        wait $SERVER_PID

        # error if http/grpc port overlaps with grpc/http default port
        if [ "$p" == "http" ]; then
            SERVER_ARGS="--model-repository=$DATADIR $SAME_EXPLICIT_ADDRESS --http-port 8001"
        else
            SERVER_ARGS="--model-repository=$DATADIR $SAME_EXPLICIT_ADDRESS --grpc-port 8000"
        fi
        run_server
        if [ "$SERVER_PID" != "0" ]; then
            set +e
            kill $SERVER_PID
            wait $SERVER_PID
            if [ "$?" == "0" ]; then
                echo -e "\n***\n*** unexpected start $SERVER\n***"
                cat $SERVER_LOG
                exit 1
            fi
            set -e
        fi

        # when using different addresses, allow http/grpc port overlap with grpc/http default port
        if [ "$address" == "explicit" ]; then
            if [ "$p" == "http" ]; then
                SERVER_ARGS="--model-repository=$DATADIR $DIFF_EXPLICIT_ADDRESS --http-port 8001"
            else
                SERVER_ARGS="--model-repository=$DATADIR $DIFF_EXPLICIT_ADDRESS --grpc-port 8000"
            fi
            run_server_nowait
            sleep 15
            if [ "$SERVER_PID" == "0" ]; then
                echo -e "\n***\n*** Failed to start $SERVER\n***"
                cat $SERVER_LOG
                exit 1
            fi
            kill $SERVER_PID
            wait $SERVER_PID
        fi

        # allow http/grpc port overlap with grpc/http explicit if disabled
        if [ "$p" == "http" ]; then
            SERVER_ARGS="--model-repository=$DATADIR $SAME_EXPLICIT_ADDRESS --http-port 8007 --grpc-port 8007 --allow-http 0"
        else
            SERVER_ARGS="--model-repository=$DATADIR $SAME_EXPLICIT_ADDRESS --grpc-port 8007 --http-port 8007 --allow-grpc 0"
        fi
        run_server_nowait
        sleep 15
        if [ "$SERVER_PID" == "0" ]; then
            echo -e "\n***\n*** Failed to start $SERVER\n***"
            cat $SERVER_LOG
            exit 1
        fi
        kill $SERVER_PID
        wait $SERVER_PID

        # error if http/grpc port overlaps with grpc/http explicit port
        if [ "$p" == "http" ]; then
            SERVER_ARGS="--model-repository=$DATADIR $SAME_EXPLICIT_ADDRESS --http-port 8003 --grpc-port 8003"
            run_server_nowait
            sleep 15
            if [ "$SERVER_PID" != "0" ]; then
                set +e
                kill $SERVER_PID
                wait $SERVER_PID
                if [ "$?" == "0" ]; then
                    echo -e "\n***\n*** unexpected start $SERVER\n***"
                    cat $SERVER_LOG
                    exit 1
                fi
                set -e
            fi
        else
            # skip, same as http case
            true
        fi

        # when using different addresses, allow http/grpc port overlap with grpc/http explicit
        if [ "$address" == "explicit" ]; then
            if [ "$p" == "http" ]; then
                SERVER_ARGS="--model-repository=$DATADIR $DIFF_EXPLICIT_ADDRESS --http-port 8007 --grpc-port 8007"
            else
                SERVER_ARGS="--model-repository=$DATADIR $DIFF_EXPLICIT_ADDRESS --grpc-port 8007 --http-port 8007"
            fi
            run_server_nowait
            sleep 15
            if [ "$SERVER_PID" == "0" ]; then
                echo -e "\n***\n*** Failed to start $SERVER\n***"
                cat $SERVER_LOG
                exit 1
            fi
            code=`curl -s -w %{http_code} 127.0.0.1:8007/v2/health/ready`
            if [ "$code" != "200" ]; then
                echo -e "\n***\n*** Server is not ready\n***"
                RET=1
            fi
            kill $SERVER_PID
            wait $SERVER_PID
        fi

        # allow http/grpc port overlap with metrics default port if disabled
        if [ "$p" == "http" ]; then
            SERVER_ARGS="--model-repository=$DATADIR $SAME_EXPLICIT_ADDRESS --http-port 8002 --allow-http 0"
            run_server_nowait
            sleep 15
        else
            SERVER_ARGS="--model-repository=$DATADIR $SAME_EXPLICIT_ADDRESS --grpc-port 8002 --allow-grpc 0"
            run_server
        fi
        if [ "$SERVER_PID" == "0" ]; then
            echo -e "\n***\n*** Failed to start $SERVER\n***"
            cat $SERVER_LOG
            exit 1
        fi
        kill $SERVER_PID
        wait $SERVER_PID

        # error if http/grpc port overlaps with metrics default port
        if [ "$p" == "http" ]; then
            SERVER_ARGS="--model-repository=$DATADIR $SAME_EXPLICIT_ADDRESS --http-port 8002"
        else
            SERVER_ARGS="--model-repository=$DATADIR $SAME_EXPLICIT_ADDRESS --grpc-port 8002"
        fi
        run_server
        if [ "$SERVER_PID" != "0" ]; then
            set +e
            kill $SERVER_PID
            wait $SERVER_PID
            if [ "$?" == "0" ]; then
                echo -e "\n***\n*** unexpected start $SERVER\n***"
                cat $SERVER_LOG
                exit 1
            fi
            set -e
        fi

        # when using different addresses, allow grpc port overlap with metrics default port
        if [ "$address" == "explicit" ]; then
            if [ "$p" == "grpc" ]; then
                SERVER_ARGS="--model-repository=$DATADIR $DIFF_EXPLICIT_ADDRESS --grpc-port 8002"
                run_server_nowait
                sleep 15
                if [ "$SERVER_PID" == "0" ]; then
                    echo -e "\n***\n*** Failed to start $SERVER\n***"
                    cat $SERVER_LOG
                    exit 1
                fi
                code=`curl -s -w %{http_code} 127.0.0.1:8000/v2/health/ready`
                if [ "$code" != "200" ]; then
                    echo -e "\n***\n*** Server is not ready\n***"
                    RET=1
                fi
                kill $SERVER_PID
                wait $SERVER_PID
            else
                # http and metrics server bind to the same address, should skip this test case.
                true
            fi
        fi

        # allow metrics port overlap with http/grpc default port if disabled
        if [ "$p" == "http" ]; then
            SERVER_ARGS="--model-repository=$DATADIR $SAME_EXPLICIT_ADDRESS --metrics-port 8000 --allow-metrics 0"
        else
            SERVER_ARGS="--model-repository=$DATADIR $SAME_EXPLICIT_ADDRESS --metrics-port 8001 --allow-metrics 0"
        fi
        run_server
        if [ "$SERVER_PID" == "0" ]; then
            echo -e "\n***\n*** Failed to start $SERVER\n***"
            cat $SERVER_LOG
            exit 1
        fi
        kill $SERVER_PID
        wait $SERVER_PID

        # error if metrics port overlaps with http/grpc default port
        if [ "$p" == "http" ]; then
            SERVER_ARGS="--model-repository=$DATADIR $SAME_EXPLICIT_ADDRESS --metrics-port 8000"
        else
            SERVER_ARGS="--model-repository=$DATADIR $SAME_EXPLICIT_ADDRESS --metrics-port 8001"
        fi
        run_server
        if [ "$SERVER_PID" != "0" ]; then
            set +e
            kill $SERVER_PID
            wait $SERVER_PID
            if [ "$?" == "0" ]; then
                echo -e "\n***\n*** unexpected start $SERVER\n***"
                cat $SERVER_LOG
                exit 1
            fi
            set -e
        fi

        # when using different addresses, allow metrics port overlap with grpc default port
        if [ "$address" == "explicit" ]; then
            if [ "$p" == "grpc" ]; then
                SERVER_ARGS="--model-repository=$DATADIR $DIFF_EXPLICIT_ADDRESS --metrics-port 8001"
                run_server_nowait
                sleep 15
                if [ "$SERVER_PID" == "0" ]; then
                    echo -e "\n***\n*** Failed to start $SERVER\n***"
                    cat $SERVER_LOG
                    exit 1
                fi
                code=`curl -s -w %{http_code} 127.0.0.1:8000/v2/health/ready`
                if [ "$code" != "200" ]; then
                    echo -e "\n***\n*** Server is not ready\n***"
                    RET=1
                fi
                kill $SERVER_PID
                wait $SERVER_PID
            else
                # http and metrics server bind to the same address, should skip this test case.
                true
            fi
        fi
    done
done

# Test multiple servers binding to the same http/grpc port
SERVER0_LOG="./inference_server0.log"
SERVER1_LOG="./inference_server1.log"
SERVER2_LOG="./inference_server2.log"

for p in http grpc; do
    # error if servers bind to the same http/grpc port without setting the reuse flag
    if [ "$p" == "http" ]; then
        SERVER_ARGS="--model-repository=$DATADIR --metrics-port 8002 --reuse-grpc-port=true"
        SERVER0_ARGS="--model-repository=$DATADIR --metrics-port 8003 --reuse-grpc-port=true"
        SERVER1_ARGS="--model-repository=$DATADIR --metrics-port 8004 --reuse-grpc-port=true"
    else
        SERVER_ARGS="--model-repository=$DATADIR --metrics-port 8002 --reuse-http-port=true"
        SERVER0_ARGS="--model-repository=$DATADIR --metrics-port 8003 --reuse-http-port=true"
        SERVER1_ARGS="--model-repository=$DATADIR --metrics-port 8004 --reuse-http-port=true"
    fi
    # make sure the first server is launched successfully, then run the other
    # two servers and expect them to fail
    run_server
    run_multiple_servers_nowait 2
    sleep 15
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start SERVER $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi
    if [ "$SERVER1_PID" != "0" ]; then
        set +e
        kill $SERVER0_PID
        wait $SERVER0_PID
        if [ "$?" == "0" ]; then
            echo -e "\n***\n*** unexpected start SERVER0 $SERVER\n***"
            cat $SERVER0_LOG
            exit 1
        fi
        set -e
    fi
    if [ "$SERVER1_PID" != "0" ]; then
        set +e
        kill $SERVER1_PID
        wait $SERVER1_PID
        if [ "$?" == "0" ]; then
            echo -e "\n***\n*** unexpected start SERVER1 $SERVER\n***"
            cat $SERVER1_LOG
            exit 1
        fi
        set -e
    fi
    kill_server

    # 1. Allow multiple servers bind to the same http/grpc port with setting the reuse flag
    # 2. Test different forms of setting --metrics-address and verify metrics are queryable
    #   (a) Test default metrics-address being same as http-address
    #   (b) Test setting metrics-address explicitly to 0.0.0.0
    #   (c) Test setting metrics-address explicitly to 127.0.0.1
    SERVER0_ARGS="--model-repository=$DATADIR --metrics-port 8002 --reuse-http-port=true --reuse-grpc-port=true"
    SERVER1_ARGS="--model-repository=$DATADIR --metrics-address 0.0.0.0 --metrics-port 8003 --reuse-http-port=true --reuse-grpc-port=true"
    SERVER2_ARGS="--model-repository=$DATADIR --metrics-address 127.0.0.2 --metrics-port 8004 --reuse-http-port=true --reuse-grpc-port=true"
    run_multiple_servers_nowait 3
    sleep 15
    if [ "$SERVER0_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start SERVER0 $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi
    if [ "$SERVER1_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start SERVER1 $SERVER\n***"
        cat $SERVER1_LOG
        exit 1
    fi
    if [ "$SERVER2_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start SERVER2 $SERVER\n***"
        cat $SERVER2_LOG
        exit 1
    fi

    set +e

    # test if requests are being distributed among three servers
    if [ "$p" == "http" ]; then
        CLIENT_PY=../clients/simple_http_infer_client.py
    else
        CLIENT_PY=../clients/simple_grpc_infer_client.py
    fi

    pids=()
    for i in {0..10}; do
        python3 $CLIENT_PY >> $CLIENT_LOG 2>&1 &
        pids+=" $!"
    done
    wait $pids || { echo -e "\n***\n*** Python ${p} Async Infer Test Failed\n***"; cat $CLIENT_LOG; RET=1; }

    set -e

    server0_request_count=`curl -s localhost:8002/metrics | awk '/nv_inference_request_success{/ {print $2}'`
    server1_request_count=`curl -s localhost:8003/metrics | awk '/nv_inference_request_success{/ {print $2}'`
    server2_request_count=`curl -s 127.0.0.2:8004/metrics | awk '/nv_inference_request_success{/ {print $2}'`
    if [ ${server0_request_count%.*} -eq 0 ] || \
       [ ${server1_request_count%.*} -eq 0 ] || \
       [ ${server2_request_count%.*} -eq 0 ]; then
        echo -e "\n***\n*** Failed: ${p} requests are not distributed among all servers.\n***"
        RET=1
    fi
    kill_servers
done

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
else
  echo -e "\n***\n*** Test Failed\n***"
fi
exit $RET


================================================
FILE: qa/L0_storage_S3/test.sh
================================================
#!/bin/bash
# Copyright (c) 2018-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
TEST_RESULT_FILE='test_results.txt'
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

CLIENT_LOG_BASE="./client"
INFER_TEST="../common/infer_test.py"
EXPECTED_NUM_TESTS="3"
TEST_RESULT_FILE='test_results.txt'
BACKENDS=${BACKENDS:="onnx libtorch plan"}

# S3 credentials are necessary for this test. Pass via ENV variables
aws configure set default.region $AWS_DEFAULT_REGION && \
    aws configure set aws_access_key_id $AWS_ACCESS_KEY_ID && \
    aws configure set aws_secret_access_key $AWS_SECRET_ACCESS_KEY

# S3 bucket path (Point to bucket when testing cloud storage)
BUCKET_URL="s3://triton-bucket-${CI_JOB_ID}"

# Cleanup and delete S3 test bucket if it already exists (due to test failure)
aws s3 rm $BUCKET_URL --recursive --include "*" && \
    aws s3 rb $BUCKET_URL || true

# Make S3 test bucket
aws s3 mb "${BUCKET_URL}"

# Remove Slash in BUCKET_URL
BUCKET_URL=${BUCKET_URL%/}
BUCKET_URL_SLASH="${BUCKET_URL}/"

# Backup S3 credentials as they will be unset during the test
AWS_DEFAULT_REGION_BACKUP=$AWS_DEFAULT_REGION
AWS_ACCESS_KEY_ID_BACKUP=$AWS_ACCESS_KEY_ID
AWS_SECRET_ACCESS_KEY_BACKUP=$AWS_SECRET_ACCESS_KEY

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_TIMEOUT=600

SERVER_LOG_BASE="./inference_server"
source ../common/util.sh

rm -f $SERVER_LOG_BASE* $CLIENT_LOG_BASE*
RET=0

# Test 3 Scenarios:
# 1. Only AWS ENV vars (Without aws configure)
# 2. AWS ENV vars + dummy values in aws configure [ENV vars have higher priority]
# 3. Only AWS configured (Without AWS ENV vars)
for ENV_VAR in "env" "env_dummy" "config"; do
    SERVER_LOG=$SERVER_LOG_BASE.$ENV_VAR.log
    CLIENT_LOG=$CLIENT_LOG_BASE.$ENV_VAR.log

    if [ "$ENV_VAR" == "config" ]; then
        unset AWS_ACCESS_KEY_ID
        unset AWS_SECRET_ACCESS_KEY
        unset AWS_DEFAULT_REGION
    elif [ "$ENV_VAR" == "env_dummy" ]; then
        aws configure set default.region "dummy_region" && \
            aws configure set aws_access_key_id "dummy_id" && \
            aws configure set aws_secret_access_key "dummy_key"
    else
        rm ~/.aws/credentials && rm ~/.aws/config
    fi

    # Construct model repository

    KIND="KIND_GPU"

    # Test coverage for extra slashes
    for MAYBE_SLASH in "" "/" "//"; do

        ROOT_REPO="$BUCKET_URL$MAYBE_SLASH"
        MODEL_REPO="${BUCKET_URL}/${MAYBE_SLASH}models${MAYBE_SLASH}"

        # copy models in model directory
        rm -rf models && mkdir -p models

        # perform empty repo tests

        SERVER_ARGS="--model-repository=$ROOT_REPO --exit-timeout-secs=120"

        run_server
        if [ "$SERVER_PID" == "0" ]; then
            echo -e "\n***\n*** Failed to start $SERVER\n***"
            cat $SERVER_LOG
            exit 1
        fi

        kill $SERVER_PID
        wait $SERVER_PID

        # run with a non-root empty model repo
        touch models/dummy
        if [ "$ENV_VAR" != "config" ]; then
            aws configure set default.region $AWS_DEFAULT_REGION && \
                aws configure set aws_access_key_id $AWS_ACCESS_KEY_ID && \
                aws configure set aws_secret_access_key $AWS_SECRET_ACCESS_KEY
        fi
        aws s3 cp . "$BUCKET_URL_SLASH" --recursive --include "*"
        if [ "$ENV_VAR" == "env_dummy" ]; then
            aws configure set default.region "dummy_region" && \
                aws configure set aws_access_key_id "dummy_id" && \
                aws configure set aws_secret_access_key "dummy_key"
        elif [ "$ENV_VAR" == "env" ]; then
            rm ~/.aws/credentials && rm ~/.aws/config
        fi

        SERVER_ARGS="--model-repository=$MODEL_REPO --exit-timeout-secs=120"

        run_server
        if [ "$SERVER_PID" == "0" ]; then
            echo -e "\n***\n*** Failed to start $SERVER\n***"
            cat $SERVER_LOG
            exit 1
        fi

        kill $SERVER_PID
        wait $SERVER_PID

        if [ "$ENV_VAR" != "config" ]; then
            aws configure set default.region $AWS_DEFAULT_REGION && \
                aws configure set aws_access_key_id $AWS_ACCESS_KEY_ID && \
                aws configure set aws_secret_access_key $AWS_SECRET_ACCESS_KEY
        fi
        aws s3 rm "${BUCKET_URL_SLASH}" --recursive --include "*"
        rm models/dummy

        # Now start model tests

        for FW in ${BACKENDS}; do
            cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/${FW}_float32_float32_float32/ models/
            # Copy models with string inputs and remove nobatch (bs=1) models. Model does not exist for plan backend.
            if [[ ${FW} != "plan" ]]; then
                cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/${FW}*_object_object_object/ models/
                rm -rf models/*nobatch*
            fi
        done

        for FW in ${BACKENDS}; do
            for MC in `ls models/${FW}*/config.pbtxt`; do
                echo "instance_group [ { kind: ${KIND} }]" >> $MC
            done
        done

        # now traverse the tree and create empty version directories that the CLI skips
        for dir in `ls models/`; do
            for subdir in `ls models/$dir`; do
                if [ -d models/$dir/$subdir ] && [ -z "$(ls models/$dir/$subdir)" ]; then
                    touch models/$dir/$subdir/$subdir
                fi
            done
        done

        # Perform test with model repository variants
        for src in "models/" "."  ; do

            # copy contents of /models into S3 bucket.
            aws s3 cp $src $BUCKET_URL_SLASH --recursive --include "*"
            if [ "$ENV_VAR" == "env_dummy" ]; then
                aws configure set default.region "dummy_region" && \
                    aws configure set aws_access_key_id "dummy_id" && \
                    aws configure set aws_secret_access_key "dummy_key"
            elif [ "$ENV_VAR" == "env" ]; then
                rm ~/.aws/credentials && rm ~/.aws/config
            fi

            if [ "$src" == "." ]; then
                # set server arguments
                SERVER_ARGS="--model-repository=$MODEL_REPO --exit-timeout-secs=120"
            else
                # set server arguments
                SERVER_ARGS="--model-repository=$ROOT_REPO --exit-timeout-secs=120"
            fi

            run_server
            if [ "$SERVER_PID" == "0" ]; then
                echo -e "\n***\n*** Failed to start $SERVER\n***"
                cat $SERVER_LOG
                exit 1
            fi

            set +e

            python $INFER_TEST >$CLIENT_LOG 2>&1
            if [ $? -ne 0 ]; then
                cat $CLIENT_LOG
                echo -e "\n***\n*** Test Failed\n***"
                RET=1
            else
                check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
                if [ $? -ne 0 ]; then
                    cat $CLIENT_LOG
                    echo -e "\n***\n*** Test Result Verification Failed\n***"
                    RET=1
                fi
            fi

            set -e

            kill $SERVER_PID
            wait $SERVER_PID

            # Clean up bucket
            if [ "$ENV_VAR" != "config" ]; then
                aws configure set default.region $AWS_DEFAULT_REGION && \
                    aws configure set aws_access_key_id $AWS_ACCESS_KEY_ID && \
                    aws configure set aws_secret_access_key $AWS_SECRET_ACCESS_KEY
            fi
            aws s3 rm "${BUCKET_URL_SLASH}" --recursive --include "*"
        done
    done
done

# Restore S3 credentials
rm ~/.aws/credentials && rm ~/.aws/config
export AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION_BACKUP
export AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID_BACKUP
export AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY_BACKUP
aws configure set default.region $AWS_DEFAULT_REGION && \
    aws configure set aws_access_key_id $AWS_ACCESS_KEY_ID && \
    aws configure set aws_secret_access_key $AWS_SECRET_ACCESS_KEY

# Test with polling enabled
SERVER_ARGS="--model-repository=$ROOT_REPO --exit-timeout-secs=120 --model-control-mode=poll"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

# copy contents of /models into S3 bucket and wait for them to be loaded.
aws s3 cp models/ "${BUCKET_URL_SLASH}" --recursive --include "*"
sleep 600

set +e

python $INFER_TEST >$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi


set -e

kill $SERVER_PID
wait $SERVER_PID

# Test localization to a specified location
export TRITON_AWS_MOUNT_DIRECTORY=`pwd`/aws_localization_test

if [ -d "$TRITON_AWS_MOUNT_DIRECTORY" ]; then
  rm -rf $TRITON_AWS_MOUNT_DIRECTORY
fi

mkdir -p $TRITON_AWS_MOUNT_DIRECTORY

SERVER_LOG=$SERVER_LOG_BASE.custom_localization.log
SERVER_ARGS="--model-repository=$ROOT_REPO --exit-timeout-secs=120"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

if [ -z "$(ls -A $TRITON_AWS_MOUNT_DIRECTORY)" ]; then
    echo -e "\n***\n*** Test localization to a specified location failed. \n***"
    echo -e "\n***\n*** Specified mount folder $TRITON_AWS_MOUNT_DIRECTORY is empty \n***"
    ls -A $TRITON_AWS_MOUNT_DIRECTORY
    exit 1
fi

kill $SERVER_PID
wait $SERVER_PID

if [ -d "$TRITON_AWS_MOUNT_DIRECTORY" ] && [ ! -z "$(ls -A $TRITON_AWS_MOUNT_DIRECTORY)" ]; then
    echo -e "\n***\n*** Test localization to a specified location failed. \n***"
    echo -e "\n***\n*** Specified mount folder $TRITON_AWS_MOUNT_DIRECTORY was not cleared properly. \n***"
    ls -A $TRITON_AWS_MOUNT_DIRECTORY
    exit 1
fi

rm -rf $TRITON_AWS_MOUNT_DIRECTORY
unset TRITON_AWS_MOUNT_DIRECTORY

# Save models for AWS_SESSION_TOKEN test
rm -rf tmp_cred_test_models
mv models tmp_cred_test_models
# Clean up bucket contents
aws s3 rm "${BUCKET_URL_SLASH}" --recursive --include "*"

# Test reload of model with explicit model control
rm -rf models && mkdir -p models/libtorch_float32_float32_float32 && \
    cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/libtorch_float32_float32_float32/1 models/libtorch_float32_float32_float32/. && \
    cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/libtorch_float32_float32_float32/config.pbtxt models/libtorch_float32_float32_float32/.
    cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/libtorch_float32_float32_float32/output0_labels.txt models/libtorch_float32_float32_float32/.

# Remove version policy from config.pbtxt
sed -i '/^version_policy/d' models/libtorch_float32_float32_float32/config.pbtxt

# Copy contents of models into S3 bucket
aws s3 cp models/ "${BUCKET_URL_SLASH}" --recursive --include "*"

SERVER_ARGS="--model-repository=$BUCKET_URL --exit-timeout-secs=120 --model-control-mode=explicit"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

curl -X POST localhost:8000/v2/repository/models/libtorch_float32_float32_float32/load

CURL_LOG=$(curl -X POST localhost:8000/v2/repository/index)

if [ "$CURL_LOG" != "[{\"name\":\"libtorch_float32_float32_float32\",\"version\":\"1\",\"state\":\"READY\"}]" ]; then
    RET=1
fi

# Add new model version
aws s3 cp /data/inferenceserver/${REPO_VERSION}/qa_model_repository/libtorch_float32_float32_float32/3 "${BUCKET_URL_SLASH}libtorch_float32_float32_float32/3" --recursive --include "*"

curl -X POST localhost:8000/v2/repository/models/libtorch_float32_float32_float32/load

CURL_LOG=$(curl -X POST localhost:8000/v2/repository/index)
if [ "$CURL_LOG" != "[{\"name\":\"libtorch_float32_float32_float32\",\"version\":\"1\",\"state\":\"UNAVAILABLE\",\"reason\":\"unloaded\"},{\"name\":\"libtorch_float32_float32_float32\",\"version\":\"3\",\"state\":\"READY\"}]" ]; then
    RET=1
fi

kill $SERVER_PID
wait $SERVER_PID

# Clean up bucket contents
aws s3 rm "${BUCKET_URL_SLASH}" --recursive --include "*"

# Test with temporary credential (AWS_SESSION_TOKEN)
AWS_GET_SESSION_TOKEN_RES=`aws sts get-session-token --duration-seconds 900` && \
    export AWS_ACCESS_KEY_ID=`echo $AWS_GET_SESSION_TOKEN_RES | jq -r ".Credentials.AccessKeyId"` && \
    export AWS_SECRET_ACCESS_KEY=`echo $AWS_GET_SESSION_TOKEN_RES | jq -r ".Credentials.SecretAccessKey"` && \
    export AWS_SESSION_TOKEN=`echo $AWS_GET_SESSION_TOKEN_RES | jq -r ".Credentials.SessionToken"`
rm ~/.aws/credentials && rm ~/.aws/config
aws configure set default.region $AWS_DEFAULT_REGION && \
    aws configure set aws_access_key_id $AWS_ACCESS_KEY_ID && \
    aws configure set aws_secret_access_key $AWS_SECRET_ACCESS_KEY && \
    aws configure set aws_session_token $AWS_SESSION_TOKEN

# Copy models into S3 bucket
aws s3 cp tmp_cred_test_models/ "${BUCKET_URL_SLASH}" --recursive --include "*"

SERVER_LOG=$SERVER_LOG_BASE.temporary_credentials_test.log
SERVER_ARGS="--model-repository=$BUCKET_URL --exit-timeout-secs=120"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

python $INFER_TEST >$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

# Test access decline
export AWS_SECRET_ACCESS_KEY="[Invalid]" && export AWS_SESSION_TOKEN=""
SERVER_LOG=$SERVER_LOG_BASE.access_decline_test.log
SERVER_ARGS="--model-repository=$BUCKET_URL --exit-timeout-secs=120"
run_server
if [ "$SERVER_PID" != "0" ]; then
    echo -e "\n***\n*** Unexpected server start $SERVER\n***"
    cat $SERVER_LOG
    kill $SERVER_PID
    wait $SERVER_PID
    RET=1
else
  # AWS S3 does not appear to reply on access decline, but other implementations
  # might provide extra messages, so make sure Triton will print the messages.
  EXPECTED_MSG="Unable to create S3 filesystem client. Check account credentials. Exception: '' Message: 'No response body.'"
  if ! grep "$EXPECTED_MSG" $SERVER_LOG; then
    echo -e "\n***\n*** Expected error message not found\n***"
    cat $SERVER_LOG
    RET=1
  fi
fi

# Restore S3 credentials
rm ~/.aws/credentials && rm ~/.aws/config
export AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION_BACKUP
export AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID_BACKUP
export AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY_BACKUP
aws configure set default.region $AWS_DEFAULT_REGION && \
    aws configure set aws_access_key_id $AWS_ACCESS_KEY_ID && \
    aws configure set aws_secret_access_key $AWS_SECRET_ACCESS_KEY

# Clean up bucket contents
aws s3 rm "${BUCKET_URL_SLASH}" --recursive --include "*"

# Test case where S3 folder has >1000 files
rm -rf models

mkdir -p models/model/1
# Create Python model that reads the number of files in the
# model directory when loaded
echo "import os

class TritonPythonModel:

    def initialize(self, args):
        count = 0
        model_dir = args['model_repository']
        for path in os.listdir(model_dir):
            if os.path.isfile(os.path.join(model_dir, path)):
                count += 1
        print('Found {} files in model directory'.format(count))

    def execute(self):
        pass" > models/model/1/model.py

for i in {1..1050}; do
    touch models/model/0${i}.txt
done

# Provide extended timeout to allow >1000 files to be loaded
SERVER_ARGS="--model-repository=$BUCKET_URL --exit-timeout-secs=600 --model-control-mode=none"
SERVER_LOG=$SERVER_LOG_BASE.many_files.log

# copy contents of /models into S3 bucket and wait for them to be loaded.
aws s3 cp models/ "${BUCKET_URL_SLASH}" --recursive --include "*"

# Test that the server starts up. Files will be loaded in numerically
# ascending order, so the model file is loaded after the first 1000
# files. If AWS fails to load >1000 files, the model file will not
# be loaded and the server will fail to start.

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

kill $SERVER_PID
wait $SERVER_PID

# Confirm the correct number of files loaded
EXPECTED_MSG="Found 1050 files in model directory"
if ! grep "$EXPECTED_MSG" $SERVER_LOG; then
echo -e "\n***\n*** Expected file count message not found\n***"
cat $SERVER_LOG
RET=1
fi

# Clean up bucket contents and delete bucket
aws s3 rm "${BUCKET_URL_SLASH}" --recursive --include "*"
aws s3 rb "${BUCKET_URL}"

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
else
  echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_storage_S3_local/mock_s3_service.py
================================================
#!/usr/bin/env python3

# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import threading
import time
from http.server import BaseHTTPRequestHandler, HTTPServer


class MockS3Service:
    __address = "localhost"
    __port = 8080

    def __init__(self):
        # Test passed when:
        # - at least one HEAD request is received; and
        # - at least one GET request is received; and
        # - all received requests do not advertise for HTTP/2.
        test_results = {"head_count": 0, "get_count": 0, "http2_ads": False}

        class RequestValidator(BaseHTTPRequestHandler):
            protocol_version = "HTTP/1.1"

            def __CheckHttp2Ads(self):
                if "connection" in self.headers:
                    v = self.headers["connection"].lower()
                    if "upgrade" in v or "http2" in v:
                        test_results["http2_ads"] = True
                if (
                    "upgrade" in self.headers
                    and "h2c" in self.headers["upgrade"].lower()
                ):
                    test_results["http2_ads"] = True
                if "http2-settings" in self.headers:
                    test_results["http2_ads"] = True

            def do_HEAD(self):
                self.__CheckHttp2Ads()
                test_results["head_count"] += 1
                self.send_response(200)
                self.end_headers()

            def do_GET(self):
                self.__CheckHttp2Ads()
                test_results["get_count"] += 1
                self.send_error(
                    404,
                    "Thank you for using the mock s3 service!",
                    "Your bucket is not found here!",
                )

        self.__test_results = test_results
        self.__server = HTTPServer((self.__address, self.__port), RequestValidator)
        self.__service_thread = threading.Thread(target=self.__server.serve_forever)

    def __enter__(self):
        self.__service_thread.start()

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.__server.shutdown()
        self.__server.server_close()
        self.__service_thread.join()

    def TestPassed(self):
        return (
            self.__test_results["head_count"] > 0
            and self.__test_results["get_count"] > 0
            and not self.__test_results["http2_ads"]
        )


if __name__ == "__main__":
    # Initialize mock service
    mock_s3_service = MockS3Service()

    # Start service and poll until test passed or timed-out
    with mock_s3_service:
        poll_interval = 1  # seconds
        timeout = 10  # seconds
        elapsed_time = 0  # seconds
        while not mock_s3_service.TestPassed() and elapsed_time < timeout:
            elapsed_time += poll_interval
            time.sleep(poll_interval)

    # Print the result
    if mock_s3_service.TestPassed():
        print("TEST PASSED")
    else:
        print("TEST FAILED")


================================================
FILE: qa/L0_storage_S3_local/test.sh
================================================
#!/bin/bash
# Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

CLIENT_LOG="./client.log"
TEST_RESULT_FILE='test_results.txt'
INFER_TEST="../common/infer_test.py"
EXPECTED_NUM_TESTS="3"

DATADIR="/data/inferenceserver/${REPO_VERSION}/qa_model_repository"
# Used to control which backends are run in infer_test.py
BACKENDS=${BACKENDS:="onnx libtorch plan"}

function run_unit_tests() {
    echo "Running unit tests: ${INFER_TEST}"
    python $INFER_TEST >$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Failed\n***"
        RET=1
    else
        check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
        if [ $? -ne 0 ]; then
            cat $CLIENT_LOG
            echo -e "\n***\n*** Test Result Verification Failed\n***"
            RET=1
        fi
    fi
}

function setup_model_repo() {
    model_repo=${1:-"models"}
    backends=${2:-${BACKENDS}}
    types=${3:-"float32_float32_float32 object_object_object"}
    echo "[setup_model_repo] model_repo: ${model_repo}, backends: ${backends}"
    rm -rf ${model_repo} && mkdir ${model_repo}
    for BACKEND in ${backends}; do
        for TYPE in ${types}; do
            model="${BACKEND}_${TYPE}"
	    echo "Copying ${DATADIR}/${model} to ${model_repo}."
            cp -r "${DATADIR}/${model}" "${model_repo}/"
            # Remove version policy from config.pbtxt
            sed -i '/^version_policy/d' ${model_repo}/${model}/config.pbtxt
        done
    done
}

function load_models() {
    model_repo=${1:-"models"}
    for model in `ls ${model_repo}`; do
	echo "Loading model: ${model}"
	code=`curl -s -w %{http_code} -X POST localhost:8000/v2/repository/models/${model}/load`
	if [ "$code" != "200" ]; then
	    echo -e "\n***\n*** Test Failed. Failed to load model: ${model}\n***"
	    RET=1
	fi
    done
}

set +e
setup_model_repo
set -e

# Create model with name that has all types of allowed characters
DUMMY_MODEL="Model_repo-1.0"
cp -r models/libtorch_float32_float32_float32 models/$DUMMY_MODEL
sed -i 's/libtorch_float32_float32_float32/Model_repo-1.0/g' models/$DUMMY_MODEL/config.pbtxt

SERVER=/opt/tritonserver/bin/tritonserver
source ../common/util.sh

rm -f *.log*

## Setup local MINIO server
(wget https://dl.min.io/server/minio/release/linux-amd64/minio && \
    chmod +x minio && \
    mv minio /usr/local/bin && \
    mkdir /usr/local/share/minio && \
    mkdir /etc/minio)

export MINIO_ACCESS_KEY="minio"
# Specify MINIO CI env to allow using root disk
# https://github.com/minio/minio/issues/15030
export MINIO_CI_CD=true
MINIO_VOLUMES="/usr/local/share/minio/"
MINIO_OPTS="-C /etc/minio --address 127.0.0.1:4572"
export MINIO_SECRET_KEY="miniostorage"

(curl -O https://raw.githubusercontent.com/minio/minio-service/master/linux-systemd/minio.service && \
    mv minio.service /etc/systemd/system)

# Start minio server
/usr/local/bin/minio server $MINIO_OPTS $MINIO_VOLUMES &
MINIO_PID=$!

export AWS_ACCESS_KEY_ID=minio && \
    export AWS_SECRET_ACCESS_KEY=miniostorage

# Force version to 0.07 to prevent failures due to version changes
python -m pip install awscli-local==0.07

# Needed to set correct port for awscli-local
ENDPOINT_FLAG="--endpoint-url=http://localhost:4572"

# Cleanup bucket if exists
awslocal $ENDPOINT_FLAG s3 rm s3://demo-bucket1.0 --recursive --include "*" && \
    awslocal $ENDPOINT_FLAG s3 rb s3://demo-bucket1.0 || true

# Create and add data to bucket
awslocal $ENDPOINT_FLAG s3 mb s3://demo-bucket1.0 && \
    awslocal $ENDPOINT_FLAG s3 sync models s3://demo-bucket1.0

RET=0

# Test with hostname and IP address
echo "=== Running hostname/IP tests ==="
for HOST in "127.0.0.1" "localhost"; do
    SERVER_ARGS="--model-repository=s3://$HOST:4572/demo-bucket1.0 --model-control-mode=explicit"
    if [ "$HOST" = "127.0.0.1" ]; then
        SERVER_LOG="./inference_server_hostname.log"
    else
        SERVER_LOG="./inference_server_ip.log"
    fi

    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        # Kill minio server
        kill $MINIO_PID
        wait $MINIO_PID
        exit 1
    fi

    set +e
    load_models
    run_unit_tests

    # Try to load model with name that checks for all types of allowed characters
    code=`curl -s -w %{http_code} -X POST localhost:8000/v2/repository/models/${DUMMY_MODEL}/load`
    if [ "$code" != "200" ]; then
        echo -e "\n***\n*** Test Failed\n***"
        RET=1
    fi
    set -e

    kill $SERVER_PID
    wait $SERVER_PID
done

# Test with Polling
echo "=== Running polling tests ==="
SERVER_ARGS="--model-repository=s3://localhost:4572/demo-bucket1.0 --model-control-mode=poll"
SERVER_LOG="./inference_server_poll.log"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    # Kill minio server
    kill $MINIO_PID
    wait $MINIO_PID
    exit 1
fi

cp -r models/libtorch_float32_float32_float32/1 models/libtorch_float32_float32_float32/4
awslocal $ENDPOINT_FLAG s3 sync models s3://demo-bucket1.0

sleep 20

set +e
CURL_LOG=$(curl -X POST localhost:8000/v2/repository/index)
if [[ "$CURL_LOG" != *"{\"name\":\"libtorch_float32_float32_float32\",\"version\":\"3\",\"state\":\"UNAVAILABLE\",\"reason\":\"unloaded\"}"* ]]; then
    echo -e "\n***\n*** Failed. Server did not unload libtorch_float32_float32_float32 version 3\n***"
    RET=1
fi

if [[ "$CURL_LOG" != *"{\"name\":\"libtorch_float32_float32_float32\",\"version\":\"4\",\"state\":\"READY\"}"* ]]; then
    echo -e "\n***\n*** Failed. Server did not load libtorch_float32_float32_float32 version 4\n***"
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

# Destroy bucket
awslocal $ENDPOINT_FLAG s3 rm s3://demo-bucket1.0 --recursive --include "*" && \
    awslocal $ENDPOINT_FLAG s3 rb s3://demo-bucket1.0

# Test with Polling, no model configuration file - with strict model config disabled
echo "=== Running autocomplete tests ==="
AUTOCOMPLETE_BACKENDS="onnx"
export BACKENDS=${AUTOCOMPLETE_BACKENDS}

set +e
setup_model_repo

TYPES="float32_float32_float32 object_object_object"
for BACKEND in ${AUTOCOMPLETE_BACKENDS}; do
    for TYPE in ${TYPES}; do
        model="${BACKEND}_${TYPE}"
        # Config files specify things expected by unit test like label_filename
        # and max_batch_size for comparing results, so remove some key fields
        # for autocomplete to fill that won't break the unit test.
        sed -i '/^input {/,/^}/d' models/${model}/config.pbtxt
        sed -i '/^output {/,/^}/d' models/${model}/config.pbtxt
    done
done
set -e

awslocal $ENDPOINT_FLAG s3 mb s3://demo-bucket1.0 && \
    awslocal $ENDPOINT_FLAG s3 sync models s3://demo-bucket1.0

SERVER_ARGS="--model-repository=s3://localhost:4572/demo-bucket1.0 --model-control-mode=poll --strict-model-config=false"
SERVER_LOG="./inference_server_noconfig.log"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    # Kill minio server
    kill $MINIO_PID
    wait $MINIO_PID
    exit 1
fi

run_unit_tests

kill $SERVER_PID
wait $SERVER_PID

# Destroy bucket
awslocal $ENDPOINT_FLAG s3 rm s3://demo-bucket1.0 --recursive --include "*" && \
    awslocal $ENDPOINT_FLAG s3 rb s3://demo-bucket1.0

# Test for multiple model repositories using S3 cloud storage
echo "=== Running multiple-model-repository tests ==="
BACKENDS1="libtorch"
BACKENDS2="onnx plan"
export BACKENDS="$BACKENDS1 $BACKENDS2"

set +e
setup_model_repo "models1" "${BACKENDS1}"
setup_model_repo "models2" "${BACKENDS2}"
set -e

BUCKET_NAME="demo-bucket"
MODEL_REPO_ARGS=""
for BUCKET_SUFFIX in 1 2; do
    # Cleanup bucket if exists
    awslocal $ENDPOINT_FLAG s3 rm s3://$BUCKET_NAME$BUCKET_SUFFIX --recursive --include "*" && \
        awslocal $ENDPOINT_FLAG s3 rb s3://$BUCKET_NAME$BUCKET_SUFFIX || true

    # Create and add data to bucket
    awslocal $ENDPOINT_FLAG s3 mb s3://$BUCKET_NAME$BUCKET_SUFFIX && \
        awslocal $ENDPOINT_FLAG s3 sync models$BUCKET_SUFFIX s3://$BUCKET_NAME$BUCKET_SUFFIX

    MODEL_REPO_ARGS="$MODEL_REPO_ARGS --model-repository=s3://localhost:4572/$BUCKET_NAME$BUCKET_SUFFIX"
done

SERVER_ARGS="$MODEL_REPO_ARGS --model-control-mode=explicit"
SERVER_LOG="./inference_server.multi.log"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    # Kill minio server
    kill $MINIO_PID
    wait $MINIO_PID
    exit 1
fi

set +e
load_models "models1"
load_models "models2"
run_unit_tests
set -e

kill $SERVER_PID
wait $SERVER_PID

# Test access decline
AWS_SECRET_ACCESS_KEY_BACKUP=$AWS_SECRET_ACCESS_KEY
export AWS_SECRET_ACCESS_KEY="[Invalid]"
SERVER_ARGS="--model-repository=s3://localhost:4572/${BUCKET_NAME}1 --exit-timeout-secs=120"
SERVER_LOG="./inference_server.access_decline.log"
run_server
if [ "$SERVER_PID" != "0" ]; then
    echo -e "\n***\n*** Unexpected server start $SERVER\n***"
    cat $SERVER_LOG
    kill $SERVER_PID
    wait $SERVER_PID
    RET=1
else
  # MinIO does not appear to reply on access decline, but other implementations
  # might provide extra messages, so make sure Triton will print the messages.
  EXPECTED_MSG="Unable to create S3 filesystem client. Check account credentials. Exception: '' Message: 'No response body.'"
  if ! grep "$EXPECTED_MSG" $SERVER_LOG; then
    echo -e "\n***\n*** Expected error message not found\n***"
    cat $SERVER_LOG
    RET=1
  fi
fi
# Restore keys for destroying buckets
export AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY_BACKUP

# Destroy buckets
for BUCKET_SUFFIX in 1 2; do
    awslocal $ENDPOINT_FLAG s3 rm s3://$BUCKET_NAME$BUCKET_SUFFIX --recursive --include "*" && \
        awslocal $ENDPOINT_FLAG s3 rb s3://$BUCKET_NAME$BUCKET_SUFFIX || true
done

# Kill minio server
kill $MINIO_PID
wait $MINIO_PID

# Test the S3 client will not advertise HTTP/2
TEST_LOG="./http2_advertise_test.log"
python3 mock_s3_service.py > $TEST_LOG 2>&1 &
sleep 2  # make sure the mock service has started
SERVER_LOG="./http2_advertise_test.server.log"
SERVER_ARGS="--model-repository=s3://localhost:8080/dummy-bucket --exit-timeout-secs=120"
run_server
if [ "$SERVER_PID" != "0" ]; then
    echo -e "\n***\n*** Unexpected server start $SERVER\n***"
    cat $SERVER_LOG
    kill $SERVER_PID
    wait $SERVER_PID
    RET=1
else
    sleep 2  # make sure the mock service has stopped
    PASSED_MSG="TEST PASSED"
    if ! grep "$PASSED_MSG" $TEST_LOG; then
        echo -e "\n***\n*** S3 client HTTP/2 advertise test failed\n***"
        cat $TEST_LOG
        RET=1
    fi
fi

# Print and return test result
if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test Failed\n***"
fi
exit $RET


================================================
FILE: qa/L0_storage_azure/test.sh
================================================
#!/bin/bash
# Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
TEST_RESULT_FILE='test_results.txt'
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

if [ -z "$AZURE_STORAGE_ACCOUNT" ]; then
    echo -e "azure storage account must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi

if [ -z "$AZURE_STORAGE_KEY" ]; then
    echo -e "azure storage key must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi

ACCOUNT_NAME=$AZURE_STORAGE_ACCOUNT
ACCOUNT_KEY=$AZURE_STORAGE_KEY
export CUDA_VISIBLE_DEVICES=0
CLIENT_LOG_BASE="./client"
INFER_TEST="../common/infer_test.py"
EXPECTED_NUM_TESTS="3"
timestamp=$(date +%s)
CONTAINER_NAME="tritonqatest${timestamp}"

# container path (Point to the container when testing cloud storage)
AS_URL="as://${ACCOUNT_NAME}/${CONTAINER_NAME}"

# Can now install latest azure-cli (instead of 2.0.73)
# https://github.com/Azure/azure-cli/issues/30102
# https://github.com/Azure/azure-cli/issues/30127
python -m pip install azure-cli setuptools "azure-mgmt-rdbms==10.2.0b17"

# create test container
az storage container create --name ${CONTAINER_NAME} --account-name ${ACCOUNT_NAME} --account-key ${ACCOUNT_KEY}
sleep 10

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_TIMEOUT=420
SERVER_LOG_BASE="./inference_server"
source ../common/util.sh

rm -f $SERVER_LOG_BASE* $CLIENT_LOG_BASE*
RET=0

# Used to control which backends are run in infer_test.py
BACKENDS=${BACKENDS:="onnx libtorch plan"}

function run_unit_tests() {
    BACKENDS=$BACKENDS python $INFER_TEST >$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Failed\n***"
        RET=1
    else
        check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
        if [ $? -ne 0 ]; then
            cat $CLIENT_LOG
            echo -e "\n***\n*** Test Result Verification Failed\n***"
            RET=1
        fi
    fi
}

function setup_model_repo() {
    # Construct model repository
    rm -rf models && mkdir -p models
    for FW in $BACKENDS; do
        cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/${FW}_float32_float32_float32 models/
        # Copy models with string inputs and remove nobatch (bs=1) models. Model does not exist for plan backend.
        if [[ ${FW} != "plan" ]]; then
            cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/${FW}*_object_object_object/ models/
            rm -rf models/*nobatch*
        fi
    done
}

setup_model_repo
KIND="KIND_GPU"
for FW in $BACKENDS; do
    for MC in `ls models/${FW}*/config.pbtxt`; do
        echo "instance_group [ { kind: ${KIND} }]" >> $MC
    done
done

# now traverse the tree and create empty version directories that the CLI skips
for dir in `ls models/`; do
    for subdir in `ls models/$dir`; do
        if [ -d models/$dir/$subdir ] && [ -z "$(ls models/$dir/$subdir)" ]; then
            touch models/$dir/$subdir/$subdir
        fi
    done
done

# copy contents of models into container.
for file in `find models -type f` ;do
    az storage blob upload --container-name ${CONTAINER_NAME} --account-name ${ACCOUNT_NAME} --account-key ${ACCOUNT_KEY} --file $file --name $file
done
sleep 10

# Test 1 Scenarios:
# 1. access blob using shared key in envs
# 2. adding more scenarios in future
for ENV_VAR in "shared_key"; do
    SERVER_LOG=$SERVER_LOG_BASE.$ENV_VAR.log
    CLIENT_LOG=$CLIENT_LOG_BASE.$ENV_VAR.log
    MODEL_REPO="${AS_URL}/models"
    if [ "$ENV_VAR" == "sas" ]; then
        unset AZURE_STORAGE_KEY
        sas=`az storage blob generate-sas --container-name ${CONTAINER_NAME} --account-name ${ACCOUNT_NAME} --account-key ${ACCOUNT_KEY} --name models`
        sas_without_quote=$(eval echo $sas)
        export AZURE_STORAGE_SAS="?$sas_without_quote"
    fi

    # Now start model tests
    # set server arguments
    SERVER_ARGS="--model-repository=$MODEL_REPO --exit-timeout-secs=120"

    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        RET=1
        break
    fi

    set +e
    run_unit_tests
    set -e

    kill $SERVER_PID
    wait $SERVER_PID
done

# Test localization to a specified location
export TRITON_AZURE_MOUNT_DIRECTORY=`pwd`/azure_localization_test

if [ -d "$TRITON_AZURE_MOUNT_DIRECTORY" ]; then
  rm -rf $TRITON_AZURE_MOUNT_DIRECTORY
fi

mkdir -p $TRITON_AZURE_MOUNT_DIRECTORY

SERVER_LOG=$SERVER_LOG_BASE.custom_localization.log
SERVER_ARGS="--model-repository=$MODEL_REPO --exit-timeout-secs=120"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

if [ -z "$(ls -A $TRITON_AZURE_MOUNT_DIRECTORY)" ]; then
    echo -e "\n***\n*** Test localization to a specified location failed. \n***"
    echo -e "\n***\n*** Specified mount folder $TRITON_AZURE_MOUNT_DIRECTORY is empty \n***"
    ls -A $TRITON_AZURE_MOUNT_DIRECTORY
    exit 1
fi

kill $SERVER_PID
wait $SERVER_PID

if [ -d "$TRITON_AZURE_MOUNT_DIRECTORY" ] && [ ! -z "$(ls -A $TRITON_AZURE_MOUNT_DIRECTORY)" ]; then
    echo -e "\n***\n*** Test localization to a specified location failed. \n***"
    echo -e "\n***\n*** Specified mount folder $TRITON_AZURE_MOUNT_DIRECTORY was not cleared properly. \n***"
    ls -A $TRITON_AZURE_MOUNT_DIRECTORY
    exit 1
fi

rm -rf $TRITON_AZURE_MOUNT_DIRECTORY
unset TRITON_AZURE_MOUNT_DIRECTORY

# Add test for explicit model control
SERVER_LOG=$SERVER_LOG_BASE.explicit.log
CLIENT_LOG=$CLIENT_LOG_BASE.explicit.log
SERVER_ARGS="--model-repository=${AS_URL}/models --model-control-mode=explicit"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    RET=1
    break
fi

set +e
for model in `ls models/`; do
    code=`curl -s -w %{http_code} -X POST localhost:8000/v2/repository/models/${model}/load`
    if [ "$code" != "200" ]; then
        echo -e "\n***\n*** Test Failed\n***"
        RET=1
    fi
done

# Check that each explicitly loaded model runs correctly
run_unit_tests
set -e

kill $SERVER_PID
wait $SERVER_PID

# Clean up container
az storage container delete --name ${CONTAINER_NAME} --account-name ${ACCOUNT_NAME} --account-key ${ACCOUNT_KEY}
sleep 60

# Test with Polling, no model configuration file - with strict model config disabled
SERVER_LOG=$SERVER_LOG_BASE.noconfig.log
CLIENT_LOG=$CLIENT_LOG_BASE.noconfig.log
SERVER_ARGS="--model-repository=${AS_URL}/models --model-control-mode=poll --strict-model-config=false"

# create test container
az storage container create --name ${CONTAINER_NAME} --account-name ${ACCOUNT_NAME} --account-key ${ACCOUNT_KEY}
sleep 10

# Setup model repository with minimal configs to be autocompleted
rm -rf models && mkdir -p models
AUTOCOMPLETE_BACKENDS="onnx"
for FW in ${AUTOCOMPLETE_BACKENDS}; do
    for model in ${FW}_float32_float32_float32 ${FW}_object_object_object; do
        cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/${model} models/
        # Config files specify things expected by unit test like label_filename
        # and max_batch_size for comparing results, so remove some key fields
        # for autocomplete to fill that won't break the unit test.
        sed -i '/^input {/,/^}/d' models/${model}/config.pbtxt
        sed -i '/^output {/,/^}/d' models/${model}/config.pbtxt
    done
done

# copy contents of models into container.
for file in `find models -type f` ;do
    az storage blob upload --container-name ${CONTAINER_NAME} --account-name ${ACCOUNT_NAME} --account-key ${ACCOUNT_KEY} --file $file --name $file
done
sleep 10

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
# Check that each polled model runs correctly
export BACKENDS="${AUTOCOMPLETE_BACKENDS}"
run_unit_tests
set -e

kill $SERVER_PID
wait $SERVER_PID

# Clean up container
az storage container delete --name ${CONTAINER_NAME} --account-name ${ACCOUNT_NAME} --account-key ${ACCOUNT_KEY}

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
fi

exit $RET


================================================
FILE: qa/L0_storage_swiftstack/infer_test.py
================================================
#!/usr/bin/env python3

# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import unittest

import infer_util as iu
import numpy as np
import test_util as tu


class InferTest(tu.TestResultCollector):
    def _full_exact(
        self, input_dtype, output0_dtype, output1_dtype, output0_raw, output1_raw, swap
    ):
        def _infer_exact_helper(
            tester,
            pf,
            tensor_shape,
            batch_size,
            input_dtype,
            output0_dtype,
            output1_dtype,
            output0_raw=True,
            output1_raw=True,
            model_version=None,
            swap=False,
            outputs=("OUTPUT0", "OUTPUT1"),
            use_http=True,
            use_grpc=True,
            skip_request_id_check=False,
            use_streaming=True,
            correlation_id=0,
        ):
            for bs in (1, batch_size):
                iu.infer_exact(
                    tester,
                    pf,
                    (bs,) + tensor_shape,
                    bs,
                    input_dtype,
                    output0_dtype,
                    output1_dtype,
                    output0_raw=output0_raw,
                    output1_raw=output1_raw,
                    model_version=model_version,
                    swap=swap,
                    outputs=outputs,
                    use_http=use_http,
                    use_grpc=use_grpc,
                    skip_request_id_check=skip_request_id_check,
                    use_streaming=use_streaming,
                    correlation_id=correlation_id,
                )

        input_size = 16

        if tu.validate_for_trt_model(
            input_dtype,
            output0_dtype,
            output1_dtype,
            (input_size, 1, 1),
            (input_size, 1, 1),
            (input_size, 1, 1),
        ):
            if input_dtype == np.int8:
                _infer_exact_helper(
                    self,
                    "plan",
                    (input_size, 1, 1),
                    8,
                    input_dtype,
                    output0_dtype,
                    output1_dtype,
                    output0_raw=output0_raw,
                    output1_raw=output1_raw,
                    swap=swap,
                )
            else:
                _infer_exact_helper(
                    self,
                    "plan",
                    (input_size,),
                    8,
                    input_dtype,
                    output0_dtype,
                    output1_dtype,
                    output0_raw=output0_raw,
                    output1_raw=output1_raw,
                    swap=swap,
                )

        if tu.validate_for_onnx_model(
            input_dtype,
            output0_dtype,
            output1_dtype,
            (input_size,),
            (input_size,),
            (input_size,),
        ):
            _infer_exact_helper(
                self,
                "onnx",
                (input_size,),
                8,
                input_dtype,
                output0_dtype,
                output1_dtype,
                output0_raw=output0_raw,
                output1_raw=output1_raw,
                swap=swap,
            )

        if tu.validate_for_libtorch_model(
            input_dtype,
            output0_dtype,
            output1_dtype,
            (input_size,),
            (input_size,),
            (input_size,),
        ):
            _infer_exact_helper(
                self,
                "libtorch",
                (input_size,),
                8,
                input_dtype,
                output0_dtype,
                output1_dtype,
                output0_raw=output0_raw,
                output1_raw=output1_raw,
                swap=swap,
            )

    def test_raw_fff(self):
        self._full_exact(
            np.float32,
            np.float32,
            np.float32,
            output0_raw=True,
            output1_raw=True,
            swap=True,
        )

    def test_class_fff(self):
        self._full_exact(
            np.float32,
            np.float32,
            np.float32,
            output0_raw=False,
            output1_raw=False,
            swap=True,
        )


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_storage_swiftstack/test.sh
================================================
#!/bin/bash
# Copyright (c) 2021-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
TEST_RESULT_FILE='test_results.txt'
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

unset AWS_ACCESS_KEY_ID
unset AWS_SECRET_ACCESS_KEY
unset AWS_DEFAULT_REGION

pip3 install --no-deps awscli-plugin-endpoint

# cli_legacy_plugin_path = /usr/local/lib/python3.8/site-packages

mkdir -p ~/.aws
# Swiftstack S3 credentials are necessary for this test. Passed via ENV variables
echo "[plugins]
endpoint = awscli_plugin_endpoint

[default]
aws_access_key_id = $SWIFTSTACK_ACCESS_KEY_ID
aws_secret_access_key = $SWIFTSTACK_SECRET_ACCESS_KEY
region = $SWIFTSTACK_DEFAULT_REGION

s3 =
    endpoint_url = https://pbss.s8k.io
    signature_version = s3v4
    payload_signing_enabled = true
" > ~/.aws/config

export AWS_ACCESS_KEY_ID=$SWIFTSTACK_ACCESS_KEY_ID &&
export AWS_SECRET_ACCESS_KEY=$SWIFTSTACK_SECRET_ACCESS_KEY &&
export AWS_DEFAULT_REGION=$SWIFTSTACK_DEFAULT_REGION

# S3 bucket path (Point to bucket when testing cloud storage)
BUCKET_URL="s3://triton-bucket-${CI_JOB_ID}"

# S3 repo path to pass to Triton server
S3_REPO_URL="s3://https://pbss.s8k.io:443/triton-bucket-${CI_JOB_ID}"

# Cleanup S3 test bucket if exists (due to test failure)
aws s3 rm $BUCKET_URL --recursive --include "*" && \
    aws s3 rb $BUCKET_URL || true

# Make S3 test bucket
aws s3 mb $BUCKET_URL

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_TIMEOUT=420

CLIENT_LOG_BASE="./client"
SERVER_LOG_BASE="./inference_server"
INFER_TEST=infer_test.py
EXPECTED_NUM_TESTS="2"
source ../common/util.sh

rm -f $SERVER_LOG_BASE* $CLIENT_LOG_BASE*
RET=0

SERVER_LOG=$SERVER_LOG_BASE.log
CLIENT_LOG=$CLIENT_LOG_BASE.log

# Copy models in model directory
rm -rf models && mkdir -p models

aws s3 rm $BUCKET_URL/ --recursive --include "*"

# Now start model tests

for FW in onnx libtorch plan; do
    cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/${FW}_float32_float32_float32/ models/
done

for FW in onnx libtorch plan; do
    for MC in `ls models/${FW}*/config.pbtxt`; do
        echo "instance_group [ { kind: KIND_GPU }]" >> $MC
    done
done

# copy contents of /models into S3 bucket.
aws s3 cp models/ $BUCKET_URL/ --recursive --include "*"

# Test without polling
SERVER_ARGS="--model-repository=$S3_REPO_URL --exit-timeout-secs=120"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

python $INFER_TEST >$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

# Clean up bucket contents
aws s3 rm $BUCKET_URL/ --recursive --include "*"


# Test with polling enabled
SERVER_ARGS="--model-repository=$S3_REPO_URL --exit-timeout-secs=120 --model-control-mode=poll"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

# copy contents of /models into S3 bucket and wait for them to be loaded.
aws s3 cp models/ $BUCKET_URL/ --recursive --include "*"
sleep 420

set +e

python $INFER_TEST >$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

# Clean up bucket contents and delete bucket
aws s3 rm $BUCKET_URL/ --recursive --include "*"
aws s3 rb $BUCKET_URL

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
fi

exit $RET


================================================
FILE: qa/L0_string_io/string_client_test.py
================================================
#!/usr/bin/env python
# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import unittest
from builtins import range

import numpy as np
import test_util as tu
import tritonclient.grpc as tritongrpcclient
import tritonclient.http as tritonhttpclient
import tritonclient.utils as tritonutils


class ClientStringTest(tu.TestResultCollector):
    def _test_infer_unicode(self, model_name, client, input_):
        # Send inference request to the inference server. Get results for
        # both output tensors.
        inputs = []
        outputs = []
        inputs.append(client[1].InferInput("INPUT0", input_.shape, "BYTES"))

        if client[1] == tritonhttpclient:
            inputs[0].set_data_from_numpy(input_, client[3])
        else:
            inputs[0].set_data_from_numpy(input_)

        if client[1] == tritonhttpclient:
            outputs.append(
                client[1].InferRequestedOutput("OUTPUT0", binary_data=client[2])
            )
        else:
            outputs.append(client[1].InferRequestedOutput("OUTPUT0"))

        results = client[0].infer(model_name=model_name, inputs=inputs, outputs=outputs)

        out0 = results.as_numpy("OUTPUT0")
        # We expect there to be 1 results (with batch-size 1). Verify
        # that all 8 result elements are the same as the input.
        self.assertTrue(np.array_equal(input_, out0))
        return out0

    def _test_infer_non_unicode(self, model_name, client, input_, binary_data=True):
        # Send inference request to the inference server. Get results for
        # both output tensors.
        inputs = []
        outputs = []
        inputs.append(client[1].InferInput("INPUT0", input_.shape, "BYTES"))

        if client[1] == tritonhttpclient:
            inputs[0].set_data_from_numpy(input_, client[3])
        else:
            inputs[0].set_data_from_numpy(input_)

        if client[1] == tritonhttpclient:
            outputs.append(
                client[1].InferRequestedOutput("OUTPUT0", binary_data=client[2])
            )
        else:
            outputs.append(client[1].InferRequestedOutput("OUTPUT0"))

        results = client[0].infer(model_name=model_name, inputs=inputs, outputs=outputs)

        out0 = results.as_numpy("OUTPUT0")
        # We expect there to be 1 results (with batch-size 1). Verify
        # that all 8 result elements are the same as the input.
        if client[2]:
            self.assertTrue(np.array_equal(input_.astype(np.bytes_), out0))
        else:
            self.assertTrue(
                np.array_equal(input_.astype(np.bytes_), out0.astype(np.bytes_))
            )
        return out0

    def _test_unicode_bytes_dtype(self, client, model_name, dtype="|S78"):
        # Create the data for the input tensor. Initialize the tensor to 8
        # byte strings. (dtype of np.bytes_)
        # Sample string that should no longer cause failure
        in0 = np.array(
            [
                [
                    b"\nF\n'\n\x01a\x12\"\x1a \n\x1e\xfa\x03\x94\x01\x0f\xd7\x02\xf1\x05\xdf\x01\x82\x03\xb5\x05\xc1\x07\xba\x06\xff\x06\xc7\x07L\xf5\x03\xe2\x07\xa9\x03\n\x0c\n\x01b\x12\x07\x1a\x05\n\x03\x89\xcc=\n\r\n\x01c\x12\x08\x12\x06\n\x04\xdf\\\xcb\xbf"
                ],
                [
                    b"\n:\n\x1a\n\x01a\x12\x15\x1a\x13\n\x11*\xe3\x05\xc5\x06\xda\x07\xcb\x06~\xb1\x05\xb3\x01\xa9\x02\x15\n\r\n\x01b\x12\x08\x1a\x06\n\x04\xf6\xa2\xc5\x01\n\r\n\x01c\x12\x08\x12\x06\n\x04\xbb[\n\xbf"
                ],
                [
                    b"\nL\n-\n\x01a\x12(\x1a&\n$\x87\x07\xce\x01\xe7\x06\xee\x04\xe1\x03\xf1\x03\xd7\x07\xbe\x02\xb8\x05\xe0\x05\xe4\x01\x88\x06\xb6\x03\xb9\x05\x83\x06\xf8\x04\xe2\x04\xf4\x06\n\x0c\n\x01b\x12\x07\x1a\x05\n\x03\x89\xcc=\n\r\n\x01c\x12\x08\x12\x06\n\x04\xbc\x99+@"
                ],
                [
                    b"\n2\n\x12\n\x01a\x12\r\x1a\x0b\n\t\x99\x02\xde\x04\x9f\x04\xc5\x053\n\r\n\x01b\x12\x08\x1a\x06\n\x04\xf6\xa2\xc5\x01\n\r\n\x01c\x12\x08\x12\x06\n\x04\x12\x07\x83\xbe"
                ],
                [
                    b"\nJ\n\r\n\x01b\x12\x08\x1a\x06\n\x04\x9b\x94\xad\x04\n\r\n\x01c\x12\x08\x12\x06\n\x04\xc3\x8a\x08\xbf\n*\n\x01a\x12%\x1a#\n!\x9c\x02\xb2\x02\xcd\x02\x9d\x07\x8d\x01\xb6\x05a\xf1\x01\xf0\x05\xdb\x02\xac\x04\xbd\x05\xe0\x04\xd2\x06\xaf\x02\xa8\x01\x8b\x04"
                ],
                [
                    b"\n3\n\x13\n\x01a\x12\x0e\x1a\x0c\n\n<\xe2\x05\x8a\x01\xb3\x07?\xfd\x01\n\r\n\x01b\x12\x08\x1a\x06\n\x04\xf6\xa2\xc5\x01\n\r\n\x01c\x12\x08\x12\x06\n\x04\x1b\x931\xbf\x00\x00"
                ],
                [
                    b"\n&\n\x07\n\x01a\x12\x02\x1a\x00\n\x0c\n\x01b\x12\x07\x1a\x05\n\x03\x89\xcc=\n\r\n\x01c\x12\x08\x12\x06\n\x04{\xbc\x0e>\x00\x00\x00"
                ],
                [
                    b"\nF\n'\n\x01a\x12\"\x1a \n\x1e\x97\x01\x93\x02\x9e\x01\xac\x06\xff\x01\xd8\x05\xe1\x07\xd8\x04g]\x9a\x05\xff\x06\xde\x07\x8f\x04\x97\x04\xda\x03\n\x0c\n\x01b\x12\x07\x1a\x05\n\x03\x9a\xb7I\n\r\n\x01c\x12\x08\x12\x06\n\x04\xfb\x87\x83\xbf"
                ],
            ],
            dtype=dtype,
        ).flatten()
        self._test_infer_unicode(model_name, client, in0)

    def _test_str_dtype(self, client, model_name, dtype=np.object_):
        in0_bytes = np.array([str(i) for i in range(10000, 10008)], dtype=dtype)
        self._test_infer_non_unicode(model_name, client, in0_bytes)

        in0_bytes = np.array([i for i in range(10000, 10008)], dtype=dtype)
        self._test_infer_non_unicode(model_name, client, in0_bytes)

    def _test_bytes(self, model_name):
        dtypes = [np.object_, np.bytes_]

        # This clients will fail for binary_data=False when the binary input
        # is not UTF-8 encodable. They should work for other cases however.
        binary_false_clients = [
            (
                tritonhttpclient.InferenceServerClient("localhost:8000", verbose=True),
                tritonhttpclient,
                True,
                False,
            ),
            (
                tritonhttpclient.InferenceServerClient("localhost:8000", verbose=True),
                tritonhttpclient,
                False,
                False,
            ),
            (
                tritonhttpclient.InferenceServerClient("localhost:8000", verbose=True),
                tritonhttpclient,
                False,
                True,
            ),
        ]

        # These clients work for every data type
        other_clients = [
            (
                tritongrpcclient.InferenceServerClient("localhost:8001", verbose=True),
                tritongrpcclient,
                False,
            ),
            (
                tritonhttpclient.InferenceServerClient("localhost:8000", verbose=True),
                tritonhttpclient,
                True,
                True,
            ),
        ]

        for client in other_clients + binary_false_clients:
            self._test_str_dtype(client, model_name)
            for dtype in dtypes:
                self._test_str_dtype(client, model_name, dtype)

        for client in other_clients:
            self._test_unicode_bytes_dtype(client, model_name)
            for dtype in dtypes:
                self._test_unicode_bytes_dtype(client, model_name, dtype)

        for client in binary_false_clients:
            with self.assertRaises(tritonutils.InferenceServerException):
                self._test_unicode_bytes_dtype(client, model_name)
            for dtype in dtypes:
                with self.assertRaises(tritonutils.InferenceServerException):
                    self._test_unicode_bytes_dtype(client, model_name, dtype)

    def test_unicode_bytes(self):
        self._test_bytes("string_identity")


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_string_io/test.sh
================================================
#!/bin/bash
# Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

TEST_RESULT_FILE='test_results.txt'
export CUDA_VISIBLE_DEVICES=0

CLIENT_LOG="./client.log"
STRING_CLIENT_TEST_PY=string_client_test.py
EXPECTED_NUM_TESTS="1"

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=`pwd`/models"
SERVER_LOG="./inference_server.log"
source ../common/util.sh

rm -f $CLIENT_LOG $SERVER_LOG
rm -fr models && mkdir models
cp -rv /data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository/onnx_zero_1_object/ models/.
cp -rv ../python_models/string_identity models/.
mkdir models/string_identity/1/
mv models/string_identity/model.py models/string_identity/1/model.py

(cd models/string_identity && \
          sed -i "s/\[ 1 \]/\[ 8 \]/" config.pbtxt)

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

RET=0

set +e

python $STRING_CLIENT_TEST_PY -v >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    RET=1
else
    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
else
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_trace/models/input_all_required/1/model.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json
import time

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def initialize(self, args):
        self.model_config = json.loads(args["model_config"])

    def execute(self, requests):
        """This function is called on inference request."""
        # Less than collector timeout which is 10
        time.sleep(2)
        responses = []
        for _ in requests:
            # Include one of each specially parsed JSON value: nan, inf, and -inf
            out_0 = np.array([1], dtype=np.float32)
            out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0)
            responses.append(pb_utils.InferenceResponse([out_tensor_0]))

        return responses


================================================
FILE: qa/L0_trace/models/input_all_required/config.pbtxt
================================================
# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "input_all_required"
backend: "python"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ -1 ]
  },
  {
    name: "INPUT2"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]

instance_group [{ kind: KIND_CPU }]

================================================
FILE: qa/L0_trace/opentelemetry_unittest.py
================================================
# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")
import concurrent.futures
import json
import queue
import re
import shutil
import subprocess
import time
import unittest
from functools import partial

import numpy as np
import requests
import test_util as tu
import tritonclient.grpc as grpcclient
import tritonclient.http as httpclient
from tritonclient.utils import InferenceServerException

NO_PARENT_SPAN_ID = ""
COLLECTOR_TIMEOUT = 10


def callback(user_data, result, error):
    if error:
        user_data.put(error)
    else:
        user_data.put(result)


def prepare_data(client, is_binary=True):
    inputs = []
    dim = 16
    input_data = np.arange(dim, dtype=np.int32)
    inputs.append(client.InferInput("INPUT0", [1, dim], "INT32"))
    inputs.append(client.InferInput("INPUT1", [1, dim], "INT32"))

    # Initialize the data
    input_data = np.expand_dims(input_data, axis=0)

    if is_binary:
        inputs[0].set_data_from_numpy(input_data)
        inputs[1].set_data_from_numpy(input_data)
    else:
        inputs[0].set_data_from_numpy(input_data, binary_data=is_binary)
        inputs[1].set_data_from_numpy(input_data, binary_data=is_binary)

    return inputs


def send_bls_request(model_name="simple", headers=None):
    with httpclient.InferenceServerClient("localhost:8000") as client:
        inputs = prepare_data(httpclient)
        inputs.append(httpclient.InferInput("MODEL_NAME", [1], "BYTES"))
        inputs[-1].set_data_from_numpy(np.array([model_name], dtype=np.object_))
        client.infer("bls_simple", inputs, headers=headers)


class UserData:
    def __init__(self):
        self._completed_requests = queue.Queue()


class OpenTelemetryTest(tu.TestResultCollector):
    def setUp(self):
        self.collector_subprocess = subprocess.Popen(
            ["./otelcol", "--config", "./trace-config.yaml"]
        )
        time.sleep(5)
        self.filename = "collected_traces.json"
        # This simulates OTel context being injected on client side.
        # Format explained here: https://www.w3.org/TR/trace-context/#design-overview
        # OTel code reference for extraction:
        # https://github.com/open-telemetry/opentelemetry-cpp/blob/c4f39f2be8109fd1a3e047677c09cf47954b92db/api/include/opentelemetry/trace/propagation/http_trace_context.h#L165
        # Essentially, this is what will be injected to headers/metadata
        # on the client side. Code reference:
        # https://github.com/open-telemetry/opentelemetry-cpp/blob/c4f39f2be8109fd1a3e047677c09cf47954b92db/api/include/opentelemetry/trace/propagation/http_trace_context.h#L91
        # Format is: 00-traceId-spanId-traceFlags
        # By simply adding this header during tests, we imitate
        # that on client side OTel Propagator injected it to request.
        self.client_headers = dict(
            {"traceparent": "00-0af7651916cd43dd8448eb211c12666c-b7ad6b7169242424-01"}
        )
        self.simple_model_name = "simple"
        self.ensemble_model_name = "ensemble_add_sub_int32_int32_int32"
        self.input_all_required_model_name = "input_all_required"
        self.cancel_queue_model_name = "dynamic_batch"
        self.bls_model_name = "bls_simple"
        self.trace_context_model = "trace_context"
        self.non_decoupled_model_name_ = "repeat_int32"
        self.identity_model = "custom_identity_int32"
        self.test_models = [
            self.simple_model_name,
            self.ensemble_model_name,
            self.bls_model_name,
            self.non_decoupled_model_name_,
            self.cancel_queue_model_name,
            self.identity_model,
        ]
        self.root_span = "InferRequest"
        self._user_data = UserData()
        self._callback = partial(callback, self._user_data)
        self._outputs = []
        self.input_data = {
            "IN": np.array([1], dtype=np.int32),
            "DELAY": np.array([0], dtype=np.uint32),
            "WAIT": np.array([0], dtype=np.uint32),
        }

    def tearDown(self):
        self.collector_subprocess.kill()
        self.collector_subprocess.wait()
        time.sleep(5)
        test_name = unittest.TestCase.id(self).split(".")[-1]
        shutil.copyfile(self.filename, self.filename + "_" + test_name + ".log")

    def _get_inputs(self, batch_size):
        shape = [batch_size, 8]
        inputs = [grpcclient.InferInput("INPUT0", shape, "FP32")]
        inputs[0].set_data_from_numpy(np.ones(shape, dtype=np.float32))
        return inputs

    def _generate_callback_and_response_pair(self):
        response = {"responded": False, "result": None, "error": None}

        def callback_queue(result, error):
            response["responded"] = True
            response["result"] = result
            response["error"] = error

        return callback_queue, response

    def _parse_trace_log(self, trace_log):
        """
        Helper function that parses file, containing collected traces.

        Args:
            trace_log (str): Name of a file, containing all traces.

        Returns:
            traces (List[dict]): List of json objects, representing each span.
        """
        traces = []
        with open(trace_log) as f:
            for json_obj in f:
                entry = json.loads(json_obj)
                traces.append(entry)

        return traces

    def _check_events(self, span_name, events, is_cancelled):
        """
        Helper function that verifies passed events contain expected entries.

        Args:
            span_name (str): name of a span.
            events (List[str]): list of event names, collected for the span with the name `span_name`.
        """
        root_events_http = [
            "HTTP_RECV_START",
            "HTTP_RECV_END",
            "INFER_RESPONSE_COMPLETE",
            "HTTP_SEND_START",
            "HTTP_SEND_END",
        ]
        root_events_grpc = [
            "GRPC_WAITREAD_START",
            "GRPC_WAITREAD_END",
            "INFER_RESPONSE_COMPLETE",
            "GRPC_SEND_START",
            "GRPC_SEND_END",
        ]
        cancel_root_events_http = [
            "HTTP_RECV_START",
            "HTTP_RECV_END",
        ]
        cancel_root_events_grpc = [
            "GRPC_WAITREAD_START",
            "GRPC_WAITREAD_END",
        ]
        request_events = ["REQUEST_START", "QUEUE_START", "REQUEST_END"]
        compute_events = [
            "COMPUTE_START",
            "COMPUTE_INPUT_END",
            "COMPUTE_OUTPUT_START",
            "COMPUTE_END",
        ]

        if span_name == "compute":
            # Check that all compute related events (and only them)
            # are recorded in compute span
            self.assertTrue(all(entry in events for entry in compute_events))
            self.assertFalse(all(entry in events for entry in request_events))
            self.assertFalse(
                all(entry in events for entry in root_events_http + root_events_grpc)
            )
            self.assertEqual(len(events), len(compute_events))

        elif span_name == self.root_span:
            # Check that root span has INFER_RESPONSE_COMPLETE, _RECV/_WAITREAD
            # and _SEND events (and only them)
            if is_cancelled == True:
                root_events_http = cancel_root_events_http
                root_events_grpc = cancel_root_events_grpc

            if "HTTP" in events:
                self.assertTrue(all(entry in events for entry in root_events_http))
                self.assertFalse(all(entry in events for entry in root_events_grpc))
                self.assertEqual(len(events), len(root_events_http))

            elif "GRPC" in events:
                self.assertTrue(all(entry in events for entry in root_events_grpc))
                self.assertFalse(all(entry in events for entry in root_events_http))
                self.assertEqual(len(events), len(root_events_grpc))

            if is_cancelled == False:
                self.assertFalse(all(entry in events for entry in request_events))
                self.assertFalse(all(entry in events for entry in compute_events))

        elif span_name in self.test_models:
            if span_name == self.identity_model:
                request_events.append("CUSTOM_SINGLE_ACTIVITY")
            # Check that all request related events (and only them)
            # are recorded in request span
            self.assertTrue(all(entry in events for entry in request_events))
            self.assertFalse(
                all(entry in events for entry in root_events_http + root_events_grpc)
            )
            self.assertFalse(all(entry in events for entry in compute_events))
            self.assertEqual(len(events), len(request_events))

        elif span_name.startswith("CUSTOM_ACTIVITY"):
            custom_activity_events = []
            if len(span_name) > len("CUSTOM_ACTIVITY"):
                custom_activity_events.append(str(span_name + "_START"))
                custom_activity_events.append(str(span_name + "_END"))
                # Check `custom_identity_int32` config file,
                # parameter `single_activity_frequency` identifies
                # which custom spans contain "CUSTOM_SINGLE_ACTIVITY" event
                if int(span_name[-1]) % 3 == 0:
                    custom_activity_events.append("CUSTOM_SINGLE_ACTIVITY")
            else:
                custom_activity_events = [
                    "CUSTOM_ACTIVITY_START",
                    "CUSTOM_ACTIVITY_END",
                ]

            self.assertTrue(
                all(entry in events for entry in custom_activity_events),
                "Span " + span_name,
            )
            self.assertEqual(
                len(events), len(custom_activity_events), "Span " + span_name
            )

    def _test_resource_attributes(self, attributes):
        """
        Helper function that verifies passed span attributes.
        Currently only test 2 attributes, specified upon tritonserver start:

        --trace-config=opentelemetry,resource=test.key=test.value
        and
        --trace-config=opentelemetry,resource=service.name=test_triton

        Args:
            attributes (List[dict]): list of attributes, collected for a span.
        """
        expected_service_name = dict(
            {"key": "service.name", "value": {"stringValue": "test_triton"}}
        )
        expected_test_key_value = dict(
            {"key": "test.key", "value": {"stringValue": "test.value"}}
        )
        self.assertIn(
            expected_service_name,
            attributes,
            "Expected entry: {}, was not found in the set of collected attributes: {}".format(
                expected_service_name, attributes
            ),
        )
        self.assertIn(
            expected_test_key_value,
            attributes,
            "Expected entry: {}, was not found in the set of collected attributes: {}".format(
                expected_test_key_value, attributes
            ),
        )

    def _verify_contents(self, spans, expected_counts, is_cancelled):
        """
        Helper function that:
         * iterates over `spans` and for every span it verifies that proper events are collected
         * verifies that `spans` has expected number of total spans collected
         * verifies that `spans` contains expected number different spans,
           specified in `expected_counts` in the form:
                    span_name : #expected_number_of_entries

        Args:
            spans (List[dict]): list of json objects, extracted from the trace and
                   containing span info. For this test `name`
                   and `events` are required.
            expected_counts (dict): dictionary, containing expected spans in the form:
                    span_name : #expected_number_of_entries
            is_cancelled (bool): boolean, is true if called by cancelled workflow
        """

        span_names = []
        for span in spans:
            # Check that collected spans have proper events recorded
            span_name = span["name"]
            span_names.append(span_name)
            span_events = span["events"]
            event_names_only = [event["name"] for event in span_events]
            self._check_events(span_name, event_names_only, is_cancelled)

        self.assertEqual(
            len(span_names),
            sum(expected_counts.values()),
            "Unexpeced number of span names collected",
        )
        for name, count in expected_counts.items():
            self.assertEqual(
                span_names.count(name),
                count,
                "Unexpeced number of " + name + " spans collected",
            )

    def _verify_nesting(self, spans, expected_parent_span_dict):
        """
        Helper function that checks parent-child relationships between
        collected spans are the same as in `expected_parent_span_dict`.

        Args:
            spans (List[dict]): list of json objects, extracted from the trace and
                   containing span info. For this test `name`
                   and `events` are required.
            expected_parent_span_dict (dict): dictionary, containing expected
                   parents and children in the dictionary form:
                        <parent_span_name> (str) : <children_names> (List[str])
        """
        seen_spans = {}
        for span in spans:
            cur_span = span["spanId"]
            seen_spans[cur_span] = span["name"]

        parent_child_dict = {}
        for span in spans:
            cur_parent = span["parentSpanId"]
            cur_span = span["name"]
            if cur_parent in seen_spans.keys():
                parent_name = seen_spans[cur_parent]
                if parent_name not in parent_child_dict:
                    parent_child_dict[parent_name] = []
                parent_child_dict[parent_name].append(cur_span)

        for key in parent_child_dict.keys():
            parent_child_dict[key].sort()

        self.assertDictEqual(parent_child_dict, expected_parent_span_dict)

    def _verify_headers_propagated_from_client_if_any(self, root_span, headers):
        """
        Helper function that checks traceparent's ids, passed in clients
        headers/metadata was picked up on the server side.
        If `headers` are None, checks that `root_span` does not have
        `parentSpanId` specified.

        Args:
            root_span (List[dict]): a json objects, extracted from the trace and
                   containing root span info. For this test `traceID`
                   and `parentSpanId` are required.
            expected_parent_span_dict (dict): dictionary, containing expected
                   parents and children in the dictionary form:
                        <parent_span_name> (str) : <children_names> (List[str])
        """
        parent_span_id = NO_PARENT_SPAN_ID

        if headers != None:
            parent_span_id = headers["traceparent"].split("-")[2]
            parent_trace_id = headers["traceparent"].split("-")[1]
            self.assertEqual(
                root_span["traceId"],
                parent_trace_id,
                "Child and parent trace ids do not match! child's trace id = {} , expected trace id = {}".format(
                    root_span["traceId"], parent_trace_id
                ),
            )

        self.assertEqual(
            root_span["parentSpanId"],
            parent_span_id,
            "Child and parent span ids do not match! child's parentSpanId = {} , expected parentSpanId {}".format(
                root_span["parentSpanId"], parent_span_id
            ),
        )

    def _test_trace_cancel(self, is_queued):
        # We want to capture a cancellation request traces WHILE the inference is in the COMPUTE stage.
        # Because the model "input_all_required" has a delay/wait in the compute phase so the cancellation request can be send while the request is waiting in the compute phase.
        # The idea here is to wait before we try and read the traces from the file.
        time.sleep(2 * COLLECTOR_TIMEOUT)
        traces = self._parse_trace_log(self.filename)
        if is_queued == False:
            expected_counts = dict(
                {"compute": 1, self.input_all_required_model_name: 1, self.root_span: 1}
            )
        else:
            # Compute is expected to be 0 as cancelled in queue
            expected_counts = dict(
                {"compute": 0, self.cancel_queue_model_name: 1, self.root_span: 1}
            )
        parsed_spans = traces[0]["resourceSpans"][0]["scopeSpans"][0]["spans"]
        self._verify_contents(parsed_spans, expected_counts, is_cancelled=True)

    def _test_trace(
        self,
        headers,
        expected_number_of_spans,
        expected_counts,
        expected_parent_span_dict,
    ):
        """
        Helper method that defines the general test scenario for a trace,
        described as follows.

        1. Parse trace log, exported by OTel collector in self.filename.
        2. For each test we re-start OTel collector, so trace log should
           have only 1 trace.
        3. Test that reported resource attributes contain manually specified
           at `tritonserver` start time. Currently only test 2 attributes,
           specified upon tritonserver start:

            --trace-config=opentelemetry,resource=test.key=test.value
            and
            --trace-config=opentelemetry,resource=service.name=test_triton
        4. Verifies that every collected span, has expected contents
        5. Verifies parent - child span relationships
        6. Verifies that OTel context was propagated from client side
           to server side through headers. For cases, when headers for
           context propagation were not specified, checks that root_span has
           no `parentSpanId` specified.

        Args:
            headers (dict | None): dictionary, containing OTel headers,
                specifying OTel context.
            expected_number_of_spans (int): expected number of collected spans.
            expected_counts(dict): dictionary, containing expected spans in the form:
                    span_name : #expected_number_of_entries
            expected_parent_span_dict (dict): dictionary, containing expected
                   parents and children in the dictionary form:
                        <parent_span_name> (str) : <children_names> (List[str])
        """
        time.sleep(COLLECTOR_TIMEOUT)
        traces = self._parse_trace_log(self.filename)
        expected_traces_number = 1
        self.assertEqual(
            len(traces),
            expected_traces_number,
            "Unexpected number of traces collected. Expected {}, but got {}".format(
                expected_traces_number, len(traces)
            ),
        )
        self._test_resource_attributes(
            traces[0]["resourceSpans"][0]["resource"]["attributes"]
        )

        parsed_spans = traces[0]["resourceSpans"][0]["scopeSpans"][0]["spans"]
        root_span = [
            entry for entry in parsed_spans if entry["name"] == "InferRequest"
        ][0]
        self.assertEqual(len(parsed_spans), expected_number_of_spans)
        self._verify_contents(parsed_spans, expected_counts, is_cancelled=False)
        self._verify_nesting(parsed_spans, expected_parent_span_dict)
        self._verify_headers_propagated_from_client_if_any(root_span, headers)

    def _test_simple_trace(self, headers=None):
        """
        Helper function, that specifies expected parameters to evaluate trace,
        collected from running 1 inference request for `simple` model.
        """
        expected_number_of_spans = 3
        expected_counts = dict(
            {"compute": 1, self.simple_model_name: 1, self.root_span: 1}
        )
        expected_parent_span_dict = dict(
            {"InferRequest": ["simple"], "simple": ["compute"]}
        )
        self._test_trace(
            headers=headers,
            expected_number_of_spans=expected_number_of_spans,
            expected_counts=expected_counts,
            expected_parent_span_dict=expected_parent_span_dict,
        )

    def _test_custom_identity_trace(self, headers=None):
        """
        Helper function, that specifies expected parameters to evaluate trace,
        collected from running 1 inference request for `custom_identity_int32`
        model.
        Number of custom spans defined by the identity backend.
        `CUSTOM_ACTIVITY` span will always be there,
        `CUSTOM_ACTIVITY<N>` defined by `config.pbtxt parameters`.
        """
        expected_number_of_spans = 10
        expected_counts = dict(
            {
                "compute": 1,
                self.identity_model: 1,
                self.root_span: 1,
                "CUSTOM_ACTIVITY": 1,
                "CUSTOM_ACTIVITY0": 1,
                "CUSTOM_ACTIVITY1": 1,
                "CUSTOM_ACTIVITY2": 1,
                "CUSTOM_ACTIVITY3": 1,
                "CUSTOM_ACTIVITY4": 1,
                "CUSTOM_ACTIVITY5": 1,
            }
        )
        expected_parent_span_dict = dict(
            {
                "InferRequest": ["custom_identity_int32"],
                "custom_identity_int32": [
                    "CUSTOM_ACTIVITY",
                    "CUSTOM_ACTIVITY0",
                    "compute",
                ],
                "CUSTOM_ACTIVITY0": ["CUSTOM_ACTIVITY1"],
                "CUSTOM_ACTIVITY1": ["CUSTOM_ACTIVITY2"],
                "CUSTOM_ACTIVITY2": ["CUSTOM_ACTIVITY3"],
                "CUSTOM_ACTIVITY3": ["CUSTOM_ACTIVITY4"],
                "CUSTOM_ACTIVITY4": ["CUSTOM_ACTIVITY5"],
            }
        )
        self._test_trace(
            headers=headers,
            expected_number_of_spans=expected_number_of_spans,
            expected_counts=expected_counts,
            expected_parent_span_dict=expected_parent_span_dict,
        )

    def _test_non_decoupled_trace(self, headers=None):
        """
        Helper function, that collects trace for non decoupled model and verifies it.
        """
        expected_number_of_spans = 3
        expected_counts = dict(
            {"compute": 1, self.non_decoupled_model_name_: 1, self.root_span: 1}
        )
        expected_parent_span_dict = dict(
            {"InferRequest": ["repeat_int32"], "repeat_int32": ["compute"]}
        )
        self._test_trace(
            headers=headers,
            expected_number_of_spans=expected_number_of_spans,
            expected_counts=expected_counts,
            expected_parent_span_dict=expected_parent_span_dict,
        )

    def _test_bls_trace(self, headers=None):
        """
        Helper function, that specifies expected parameters to evaluate trace,
        collected from running 1 inference request for `bls_simple` model.
        """
        expected_number_of_spans = 6
        expected_counts = dict(
            {
                "compute": 2,
                self.simple_model_name: 1,
                self.ensemble_model_name: 1,
                self.bls_model_name: 1,
                self.root_span: 1,
            }
        )
        expected_parent_span_dict = dict(
            {
                "InferRequest": ["bls_simple"],
                "bls_simple": ["compute", "ensemble_add_sub_int32_int32_int32"],
                "ensemble_add_sub_int32_int32_int32": ["simple"],
                "simple": ["compute"],
            }
        )
        for key in expected_parent_span_dict.keys():
            expected_parent_span_dict[key].sort()

        self._test_trace(
            headers=headers,
            expected_number_of_spans=expected_number_of_spans,
            expected_counts=expected_counts,
            expected_parent_span_dict=expected_parent_span_dict,
        )

    def _test_ensemble_trace(self, headers=None):
        """
        Helper function, that specifies expected parameters to evaluate trace,
        collected from running 1 inference request for an
        `ensemble_add_sub_int32_int32_int32` model.
        """
        expected_number_of_spans = 4
        expected_counts = dict(
            {
                "compute": 1,
                self.simple_model_name: 1,
                self.ensemble_model_name: 1,
                self.root_span: 1,
            }
        )
        expected_parent_span_dict = dict(
            {
                "InferRequest": ["ensemble_add_sub_int32_int32_int32"],
                "ensemble_add_sub_int32_int32_int32": ["simple"],
                "simple": ["compute"],
            }
        )
        for key in expected_parent_span_dict.keys():
            expected_parent_span_dict[key].sort()

        self._test_trace(
            headers=headers,
            expected_number_of_spans=expected_number_of_spans,
            expected_counts=expected_counts,
            expected_parent_span_dict=expected_parent_span_dict,
        )

    def test_http_trace_simple_model(self):
        """
        Tests trace, collected from executing one inference request
        for a `simple` model and HTTP client.
        """
        triton_client_http = httpclient.InferenceServerClient(
            "localhost:8000", verbose=True
        )
        inputs = prepare_data(httpclient)
        triton_client_http.infer(self.simple_model_name, inputs)

        self._test_simple_trace()

    def test_http_trace_simple_model_context_propagation(self):
        """
        Tests trace, collected from executing one inference request
        for a `simple` model, HTTP client and context propagation,
        i.e. client specifies OTel headers, defined in `self.client_headers`.
        """
        triton_client_http = httpclient.InferenceServerClient(
            "localhost:8000", verbose=True
        )
        inputs = prepare_data(httpclient)
        triton_client_http.infer(
            self.simple_model_name, inputs, headers=self.client_headers
        )

        self._test_simple_trace(headers=self.client_headers)

    def test_grpc_trace_simple_model(self):
        """
        Tests trace, collected from executing one inference request
        for a `simple` model and GRPC client.
        """
        triton_client_grpc = grpcclient.InferenceServerClient(
            "localhost:8001", verbose=True
        )
        inputs = prepare_data(grpcclient)
        triton_client_grpc.infer(self.simple_model_name, inputs)

        self._test_simple_trace()

    def test_grpc_trace_all_input_required_model_cancel(self):
        """
        Tests trace, collected from executing one inference request and cancelling the request
        for a model and GRPC client. Expects only 2 GRPC stage events
        """
        triton_client_grpc = grpcclient.InferenceServerClient(
            "localhost:8001", verbose=True
        )
        inputs = []
        inputs.append(grpcclient.InferInput("INPUT0", [1], "FP32"))
        inputs[0].set_data_from_numpy(np.arange(1, dtype=np.float32))
        inputs.append(grpcclient.InferInput("INPUT1", [1], "FP32"))
        inputs[1].set_data_from_numpy(np.arange(1, dtype=np.float32))
        inputs.append(grpcclient.InferInput("INPUT2", [1], "FP32"))
        inputs[2].set_data_from_numpy(np.arange(1, dtype=np.float32))
        future = triton_client_grpc.async_infer(
            model_name=self.input_all_required_model_name,
            inputs=inputs,
            callback=self._callback,
            outputs=self._outputs,
        )
        time.sleep(2)  # ensure the inference has started
        future.cancel()
        time.sleep(0.1)  # context switch
        self._test_trace_cancel(is_queued=False)

    # Test queued requests on dynamic batch scheduler can be cancelled
    def test_grpc_trace_model_cancel_in_queue(self):
        """
        Tests trace, collected from executing one inference request and cancelling the request
        for a model and GRPC client while the request is in queue. Expects 0 compute stage traces
        """
        model_name = self.cancel_queue_model_name
        triton_client_grpc = grpcclient.InferenceServerClient(
            "localhost:8001", verbose=True
        )
        with concurrent.futures.ThreadPoolExecutor() as pool:
            # Saturate the slots on the model
            saturate_thread = pool.submit(
                triton_client_grpc.infer, model_name, self._get_inputs(batch_size=1)
            )
            time.sleep(2)  # ensure the slots are filled
            # The next request should be queued
            callback, response = self._generate_callback_and_response_pair()
            future = triton_client_grpc.async_infer(
                model_name, self._get_inputs(batch_size=1), callback
            )
            time.sleep(0.2)  # ensure the request is queued
            future.cancel()
            # Join saturating thread
            saturate_thread.result()
            self._test_trace_cancel(is_queued=True)

    def test_non_decoupled(self):
        """
        Tests trace, collected from executing one inference request of non decoupled model.
        """
        inputs = [
            grpcclient.InferInput("IN", [1], "INT32").set_data_from_numpy(
                self.input_data["IN"]
            ),
            grpcclient.InferInput("DELAY", [1], "UINT32").set_data_from_numpy(
                self.input_data["DELAY"]
            ),
            grpcclient.InferInput("WAIT", [1], "UINT32").set_data_from_numpy(
                self.input_data["WAIT"]
            ),
        ]

        triton_client = grpcclient.InferenceServerClient(
            url="localhost:8001", verbose=True
        )
        # Expect the inference is successful
        res = triton_client.infer(
            model_name=self.non_decoupled_model_name_, inputs=inputs
        )
        self._test_non_decoupled_trace()
        self.assertEqual(1, res.as_numpy("OUT")[0])
        self.assertEqual(0, res.as_numpy("IDX")[0])

    def test_grpc_trace_simple_model_context_propagation(self):
        """
        Tests trace, collected from executing one inference request
        for a `simple` model, GRPC client and context propagation,
        i.e. client specifies OTel headers, defined in `self.client_headers`.
        """
        triton_client_grpc = grpcclient.InferenceServerClient(
            "localhost:8001", verbose=True
        )
        inputs = prepare_data(grpcclient)
        triton_client_grpc.infer(
            self.simple_model_name, inputs, headers=self.client_headers
        )

        self._test_simple_trace(headers=self.client_headers)

    def test_streaming_grpc_trace_simple_model(self):
        """
        Tests trace, collected from executing one inference request
        for a `simple` model and GRPC streaming client.
        """
        triton_client_grpc = grpcclient.InferenceServerClient(
            "localhost:8001", verbose=True
        )
        user_data = queue.Queue()
        triton_client_grpc.start_stream(callback=partial(callback, user_data))

        inputs = prepare_data(grpcclient)
        triton_client_grpc.async_stream_infer(self.simple_model_name, inputs)
        result = user_data.get()
        self.assertIsNot(result, InferenceServerException)
        triton_client_grpc.stop_stream()

        self._test_simple_trace()

    def test_streaming_grpc_trace_simple_model_context_propagation(self):
        """
        Tests trace, collected from executing one inference request
        for a `simple` model, GRPC streaming client and context propagation,
        i.e. client specifies OTel headers, defined in `self.client_headers`.
        """
        triton_client_grpc = grpcclient.InferenceServerClient(
            "localhost:8001", verbose=True
        )
        user_data = queue.Queue()
        triton_client_grpc.start_stream(
            callback=partial(callback, user_data),
            headers=self.client_headers,
        )

        inputs = prepare_data(grpcclient)
        triton_client_grpc.async_stream_infer(self.simple_model_name, inputs)
        result = user_data.get()
        self.assertIsNot(result, InferenceServerException)
        triton_client_grpc.stop_stream()

        self._test_simple_trace(headers=self.client_headers)

    def test_http_trace_bls_model(self):
        """
        Tests trace, collected from executing one inference request
        for a `bls_simple` model and HTTP client.
        """
        send_bls_request(model_name=self.ensemble_model_name)

        self._test_bls_trace()

    def test_http_trace_bls_model_context_propagation(self):
        """
        Tests trace, collected from executing one inference request
        for a `bls_simple` model, HTTP client and context propagation,
        i.e. client specifies OTel headers, defined in `self.client_headers`.
        """
        send_bls_request(
            model_name=self.ensemble_model_name, headers=self.client_headers
        )

        self._test_bls_trace(headers=self.client_headers)

    def test_http_trace_ensemble_model(self):
        """
        Tests trace, collected from executing one inference request
        for a `ensemble_add_sub_int32_int32_int32` model and HTTP client.
        """
        triton_client_http = httpclient.InferenceServerClient(
            "localhost:8000", verbose=True
        )
        inputs = prepare_data(httpclient)
        triton_client_http.infer(self.ensemble_model_name, inputs)

        self._test_ensemble_trace()

    def test_http_trace_ensemble_model_context_propagation(self):
        """
        Tests trace, collected from executing one inference request
        for a `ensemble_add_sub_int32_int32_int32` model, HTTP client
        and context propagation, i.e. client specifies OTel headers,
        defined in `self.client_headers`.
        """
        triton_client_http = httpclient.InferenceServerClient(
            "localhost:8000", verbose=True
        )
        inputs = prepare_data(httpclient)
        triton_client_http.infer(
            self.ensemble_model_name, inputs, headers=self.client_headers
        )

        self._test_ensemble_trace(headers=self.client_headers)

    def test_http_trace_triggered(self):
        triton_client_http = httpclient.InferenceServerClient("localhost:8000")
        triton_client_http.update_trace_settings(settings={"trace_rate": "5"})

        expected_trace_rate = "5"
        simple_model_trace_settings = triton_client_http.get_trace_settings(
            model_name=self.simple_model_name
        )

        self.assertEqual(
            expected_trace_rate,
            simple_model_trace_settings["trace_rate"],
            "Unexpected model trace rate settings after its update. Expected {}, but got {}".format(
                expected_trace_rate, simple_model_trace_settings["trace_rate"]
            ),
        )

        inputs = prepare_data(httpclient)
        for _ in range(5):
            triton_client_http.infer(self.ensemble_model_name, inputs)
            time.sleep(COLLECTOR_TIMEOUT)

        expected_accumulated_traces = 1
        traces = self._parse_trace_log(self.filename)
        # Should only be 1 trace collected
        self.assertEqual(
            len(traces),
            expected_accumulated_traces,
            "Unexpected number of traces collected",
        )

        for _ in range(5):
            triton_client_http.infer(
                self.ensemble_model_name, inputs, headers=self.client_headers
            )
            expected_accumulated_traces += 1
            time.sleep(COLLECTOR_TIMEOUT)

        traces = self._parse_trace_log(self.filename)
        # Should only be 1 trace collected
        self.assertEqual(
            len(traces),
            expected_accumulated_traces,
            "Unexpected number of traces collected",
        )

        # Restore trace rate to 1
        triton_client_http.update_trace_settings(settings={"trace_rate": "1"})
        expected_trace_rate = "1"
        simple_model_trace_settings = triton_client_http.get_trace_settings(
            model_name=self.simple_model_name
        )

        self.assertEqual(
            expected_trace_rate,
            simple_model_trace_settings["trace_rate"],
            "Unexpected model trace rate settings after its update. Expected {}, but got {}".format(
                expected_trace_rate, simple_model_trace_settings["trace_rate"]
            ),
        )

    def test_sagemaker_invocation_trace_simple_model_context_propagation(self):
        """
        Tests trace, collected from executing one inference request
        for a `simple` model, SageMaker (invocations) and context propagation,
        i.e. client specifies OTel headers, defined in `self.client_headers`.
        """
        inputs = prepare_data(httpclient, is_binary=False)
        request_body, _ = httpclient.InferenceServerClient.generate_request_body(inputs)
        self.client_headers["Content-Type"] = "application/json"
        r = requests.post(
            "http://localhost:8080/invocations",
            data=request_body,
            headers=self.client_headers,
        )
        r.raise_for_status()
        self.assertEqual(
            r.status_code,
            200,
            "Expected status code 200, received {}".format(r.status_code),
        )
        self._test_simple_trace(headers=self.client_headers)

    def test_sagemaker_invoke_trace_simple_model_context_propagation(self):
        """
        Tests trace, collected from executing one inference request
        for a `simple` model, SageMaker (invoke) and context propagation,
        i.e. client specifies OTel headers, defined in `self.client_headers`.
        """
        # Loading model for this test
        model_url = "/opt/ml/models/123456789abcdefghi/model"
        request_body = {"model_name": self.simple_model_name, "url": model_url}
        headers = {"Content-Type": "application/json"}
        r = requests.post(
            "http://localhost:8080/models",
            data=json.dumps(request_body),
            headers=headers,
        )
        time.sleep(5)  # wait for model to load
        self.assertEqual(
            r.status_code,
            200,
            "Expected status code 200, received {}".format(r.status_code),
        )

        inputs = prepare_data(httpclient, is_binary=False)
        request_body, _ = httpclient.InferenceServerClient.generate_request_body(inputs)

        self.client_headers["Content-Type"] = "application/json"
        invoke_url = "{}/{}/invoke".format(
            "http://localhost:8080/models", self.simple_model_name
        )
        r = requests.post(invoke_url, data=request_body, headers=self.client_headers)
        r.raise_for_status()
        self.assertEqual(
            r.status_code,
            200,
            "Expected status code 200, received {}".format(r.status_code),
        )
        time.sleep(5)
        self._test_simple_trace(headers=self.client_headers)

    def test_trace_context_exposed_to_pbe(self):
        """
        Tests trace context, propagated to python backend.
        """
        triton_client_http = httpclient.InferenceServerClient(
            "localhost:8000", verbose=True
        )
        expect_none = np.array([False], dtype=bool)
        inputs = httpclient.InferInput("expect_none", [1], "BOOL")
        inputs.set_data_from_numpy(expect_none)
        try:
            result = triton_client_http.infer(self.trace_context_model, inputs=[inputs])
        except InferenceServerException as e:
            self.fail(e.message())

        context = result.as_numpy("OUTPUT0")[()].decode("utf-8")
        context = json.loads(context)
        self.assertIn("traceparent", context.keys())
        context_pattern = re.compile(r"\d{2}-[0-9a-f]{32}-[0-9a-f]{16}-\d{2}")
        self.assertIsNotNone(re.match(context_pattern, context["traceparent"]))

    def test_custom_backend_tracing(self):
        """
        Tests custom activities reported from identity backend.
        """
        input0_ = np.array([[4]], dtype=np.int32)
        with httpclient.InferenceServerClient("localhost:8000", verbose=True) as client:
            inputs = []
            inputs.append(httpclient.InferInput("INPUT0", [1, 1], "INT32"))
            inputs[0].set_data_from_numpy(input0_)
            client.infer(self.identity_model, inputs=inputs)
        self._test_custom_identity_trace()

    def test_custom_backend_tracing_context_propagation(self):
        """
        Tests custom activities reported from identity backend.
        """
        input0_ = np.array([[4]], dtype=np.int32)
        with httpclient.InferenceServerClient("localhost:8000", verbose=True) as client:
            inputs = []
            inputs.append(httpclient.InferInput("INPUT0", [1, 1], "INT32"))
            inputs[0].set_data_from_numpy(input0_)
            client.infer(
                self.identity_model, inputs=inputs, headers=self.client_headers
            )

        self._test_custom_identity_trace(headers=self.client_headers)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_trace/test.sh
================================================
#!/bin/bash
# Copyright 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

SIMPLE_HTTP_CLIENT=../clients/simple_http_infer_client
SIMPLE_GRPC_CLIENT=../clients/simple_grpc_infer_client
TRACE_SUMMARY=../common/trace_summary.py

CLIENT_TEST=trace_endpoint_test.py
CLIENT_LOG="client.log"
TEST_RESULT_FILE="test_results.txt"
EXPECTED_NUM_TESTS="6"

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository
ENSEMBLEDIR=$DATADIR/../qa_ensemble_model_repository/qa_model_repository/
BLSDIR=../python_models/bls_simple
CANCELDIR=models/
MODELBASE=onnx_int32_int32_int32

MODELSDIR=`pwd`/trace_models

SERVER=/opt/tritonserver/bin/tritonserver
source ../common/util.sh
rm -f *.log
rm -f *.log.*
rm -fr $MODELSDIR && mkdir -p $MODELSDIR
# set up model for inference delay queueing
mkdir -p trace_models/dynamic_batch/1 && (cd trace_models/dynamic_batch && \
    echo 'backend: "identity"' >> config.pbtxt && \
    echo 'max_batch_size: 1' >> config.pbtxt && \
    echo -e 'input [{ name: "INPUT0" \n data_type: TYPE_FP32 \n dims: [ -1 ] }]' >> config.pbtxt && \
    echo -e 'output [{ name: "OUTPUT0" \n data_type: TYPE_FP32 \n dims: [ -1 ] }]' >> config.pbtxt && \
    echo -e 'instance_group [{ count: 1 \n kind: KIND_CPU }]' >> config.pbtxt && \
    echo -e 'dynamic_batching {' >> config.pbtxt && \
    echo -e '  preferred_batch_size: [ 1 ]' >> config.pbtxt && \
    echo -e '  default_queue_policy { timeout_action: REJECT \n default_timeout_microseconds: 1000000 \n max_queue_size: 8 }' >> config.pbtxt && \
    echo -e '}' >> config.pbtxt && \
    echo -e 'parameters [{ key: "execute_delay_ms" \n value: { string_value: "8000" } }]' >> config.pbtxt)

# set up simple and global_simple model using MODELBASE
cp -r $DATADIR/$MODELBASE $MODELSDIR/simple && \
    rm -r $MODELSDIR/simple/2 && rm -r $MODELSDIR/simple/3 && \
    (cd $MODELSDIR/simple && \
            sed -i "s/^name:.*/name: \"simple\"/" config.pbtxt) && \
    cp -r $MODELSDIR/simple $MODELSDIR/global_simple && \
    (cd $MODELSDIR/global_simple && \
            sed -i "s/^name:.*/name: \"global_simple\"/" config.pbtxt) && \
    cp -r $ENSEMBLEDIR/simple_onnx_int32_int32_int32 $MODELSDIR/ensemble_add_sub_int32_int32_int32 && \
    # set up new dir for cancel model
    cp -r $CANCELDIR/input_all_required $MODELSDIR/input_all_required && \
    rm -r $MODELSDIR/ensemble_add_sub_int32_int32_int32/2 && \
    rm -r $MODELSDIR/ensemble_add_sub_int32_int32_int32/3 && \
    (cd $MODELSDIR/ensemble_add_sub_int32_int32_int32 && \
            sed -i "s/^name:.*/name: \"ensemble_add_sub_int32_int32_int32\"/" config.pbtxt && \
            sed -i "s/model_name:.*/model_name: \"simple\"/" config.pbtxt) && \
    mkdir -p $MODELSDIR/bls_simple/1 && cp $BLSDIR/bls_simple.py $MODELSDIR/bls_simple/1/model.py

# set up repeat_int32 model
cp -r ../L0_decoupled/models/repeat_int32 $MODELSDIR
sed -i "s/decoupled: True/decoupled: False/" $MODELSDIR/repeat_int32/config.pbtxt

# set up identity model
mkdir -p $MODELSDIR/custom_identity_int32/1 && (cd $MODELSDIR/custom_identity_int32 && \
    echo 'name: "custom_identity_int32"' >> config.pbtxt && \
    echo 'backend: "identity"' >> config.pbtxt && \
    echo 'max_batch_size: 1024' >> config.pbtxt && \
    echo -e 'input [{ name: "INPUT0" \n data_type: TYPE_INT32 \n dims: [ -1 ] }]' >> config.pbtxt && \
    echo -e 'output [{ name: "OUTPUT0" \n data_type: TYPE_INT32 \n dims: [ -1 ] }]' >> config.pbtxt && \
    echo 'instance_group [{ kind: KIND_CPU }]' >> config.pbtxt && \
    echo -e 'parameters [{ key: "execute_delay_ms" \n value: { string_value: "500" } }, { key: "enable_custom_tracing" \n value: { string_value: "true" } }]' >> config.pbtxt)

RET=0

# set up identity_fp32 model
mkdir -p $MODELSDIR/identity_fp32/1 && \
    cp ../python_models/identity_fp32/model.py $MODELSDIR/identity_fp32/1/. && \
    cp ../python_models/identity_fp32/config.pbtxt $MODELSDIR/identity_fp32/.

# Helpers =======================================
function assert_curl_success {
  message="${1}"
  if [ "$code" != "200" ]; then
    cat ./curl.out
    echo -e "\n***\n*** ${message} : line ${BASH_LINENO}\n***"
    RET=1
  fi
}

function assert_curl_failure {
  message="${1}"
  if [ "$code" == "200" ]; then
    cat ./curl.out
    echo -e "\n***\n*** ${message} : line ${BASH_LINENO}\n***"
    RET=1
  fi
}

function get_global_trace_setting {
  rm -f ./curl.out
  set +e
  code=`curl -s -w %{http_code} -o ./curl.out localhost:8000/v2/trace/setting`
  set -e
}

function get_trace_setting {
  model_name="${1}"
  rm -f ./curl.out
  set +e
  code=`curl -s -w %{http_code} -o ./curl.out localhost:8000/v2/models/${model_name}/trace/setting`
  set -e
}

function update_global_trace_setting {
  settings="${1}"
  rm -f ./curl.out
  set +e
  code=`curl -s -w %{http_code} -o ./curl.out -X POST localhost:8000/v2/trace/setting -d ${settings}`
  set -e
}

function update_trace_setting {
  model_name="${1}"
  settings="${2}"
  rm -f ./curl.out
  set +e
  code=`curl -s -w %{http_code} -o ./curl.out -X POST localhost:8000/v2/models/${model_name}/trace/setting -d ${settings}`
  set -e
}

function check_pbe_trace_context {
  model_name="${1}"
  expect_none="${2}"
  data='{"inputs":[{"name":"expect_none","datatype":"BOOL","shape":[1],"data":['${expect_none}']}]}'
  rm -f ./curl.out
  set +e
  code=`curl -s -w %{http_code} -o ./curl.out -X POST localhost:8000/v2/models/${model_name}/infer -d ${data}`
  set -e
}

function send_inference_requests {
    log_file="${1}"
    upper_bound="${2}"
    for (( p = 1; p <= $upper_bound; p++ )) do
        $SIMPLE_HTTP_CLIENT >> ${log_file} 2>&1
        if [ $? -ne 0 ]; then
            RET=1
        fi

        $SIMPLE_GRPC_CLIENT >> ${log_file} 2>&1
        if [ $? -ne 0 ]; then
            RET=1
        fi
    done
}

function run_stress_client {
    stress_client="${1}"
    client_log="${2}"
    echo "Running stress test for 120 seconds..."
    bash -c '
        # Handle SIGTERM (signal 15) and exit gracefully
        trap "echo \"cleaning up stress client...\"; exit 0" SIGTERM

        while true; do
            python3 "$1" >> "$2"
            sleep 0.1
        done' _ "$stress_client" "$client_log" & CLIENT_PID=$!
    sleep 120

    set -e
    kill $CLIENT_PID
    wait $CLIENT_PID
    set +e
}

#=======================================

# start with trace-level=OFF
SERVER_ARGS="--trace-config triton,file=trace_off_to_min.log --trace-config level=OFF --trace-config rate=1 --model-repository=$MODELSDIR"
SERVER_LOG="./inference_server_off.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

# Enable via trace API and send again
update_global_trace_setting '{"trace_level":["TIMESTAMPS"]}'
assert_curl_success "Failed to modify global trace settings"

# Check if the current setting is returned
if [ `grep -c "\"trace_level\":\[\"TIMESTAMPS\"\]" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_rate\":\"1\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"log_frequency\":\"0\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_file\":\"trace_off_to_min.log\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_mode\":\"triton\"" ./curl.out` != "1" ]; then
    RET=1
fi
cat ./curl.out

send_inference_requests "client_min.log" 10

set -e

kill $SERVER_PID
wait $SERVER_PID

set +e

# Expect only the requests after calling trace API are traced
$TRACE_SUMMARY -t trace_off_to_min.log > summary_off_to_min.log

if [ `grep -c "COMPUTE_INPUT_END" summary_off_to_min.log` != "20" ]; then
    cat summary_off_to_min.log
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

if [ `grep -c ^simple summary_off_to_min.log` != "20" ]; then
    cat summary_off_to_min.log
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

set -e

# Add model specific setting
SERVER_ARGS="--trace-config triton,file=global_trace.log --trace-config level=TIMESTAMPS --trace-config rate=6 --model-repository=$MODELSDIR"
SERVER_LOG="./inference_server_off.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

# Add trace setting for 'simple' via trace API, first use the same trace file
update_trace_setting "simple" '{"trace_file":"global_trace.log"}'
assert_curl_failure "trace_file updated through network protocol expects an error"

# Check if the current setting is returned (not specified setting from global)
if [ `grep -c "\"error\":\"trace file location can not be updated through network protocol\"" ./curl.out` != "1" ]; then
    RET=1
fi
cat ./curl.out

# Use a different name
update_trace_setting "simple" '{"log_frequency":"2"}'
assert_curl_success "Failed to modify trace settings for 'simple' model"

# Check if the current setting is returned (not specified setting from global)
if [ `grep -c "\"trace_level\":\[\"TIMESTAMPS\"\]" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_rate\":\"6\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_count\":\"-1\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"log_frequency\":\"2\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_file\":\"global_trace.log\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_mode\":\"triton\"" ./curl.out` != "1" ]; then
    RET=1
fi
cat ./curl.out

send_inference_requests "client_simple.log" 10

set -e

kill $SERVER_PID
wait $SERVER_PID

set +e

if [ -f ./simple_trace.log ]; then
    echo -e "\n***\n*** Test Failed, unexpected generation of simple_trace.log\n***"
    RET=1
fi

$TRACE_SUMMARY -t global_trace.log.0 > summary_global_trace.log.0

if [ `grep -c "COMPUTE_INPUT_END" summary_global_trace.log.0` != "2" ]; then
    cat summary_global_trace.log.0
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

if [ `grep -c ^simple summary_global_trace.log.0` != "2" ]; then
    cat summary_global_trace.log.0
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

$TRACE_SUMMARY -t global_trace.log.1 > summary_global_trace.log.1

if [ `grep -c "COMPUTE_INPUT_END" summary_global_trace.log.1` != "1" ]; then
    cat summary_global_trace.log.1
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

if [ `grep -c ^simple summary_global_trace.log.1` != "1" ]; then
    cat summary_global_trace.log.1
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

set -e

# Update and clear model specific setting
SERVER_ARGS="--trace-config triton,file=global_trace.log --trace-config level=TIMESTAMPS --trace-config rate=6 --model-repository=$MODELSDIR"
SERVER_LOG="./inference_server_off.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

# Add model setting and update it
update_trace_setting "simple" '{"trace_rate":"1"}'
assert_curl_success "Failed to modify trace settings for 'simple' model"

update_trace_setting "simple" '{"trace_level":["OFF"]}'
assert_curl_success "Failed to modify trace settings for 'simple' model"

# Check if the current setting is returned
if [ `grep -c "\"trace_level\":\[\"OFF\"\]" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_rate\":\"1\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_count\":\"-1\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"log_frequency\":\"0\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_file\":\"global_trace.log\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_mode\":\"triton\"" ./curl.out` != "1" ]; then
    RET=1
fi
cat ./curl.out

# Send requests to simple where trace is explicitly disabled
send_inference_requests "client_update.log" 10

rm -f ./curl.out
set +e

# Clear trace setting by explicitly asking removal for every field except 'trace_rate'
update_trace_setting "simple" '{"trace_level":null}'
assert_curl_success "Failed to modify trace settings for 'simple' model"

# Check if the current setting (global) is returned
if [ `grep -c "\"trace_level\":\[\"TIMESTAMPS\"\]" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_rate\":\"1\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_count\":\"-1\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"log_frequency\":\"0\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_file\":\"global_trace.log\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_mode\":\"triton\"" ./curl.out` != "1" ]; then
    RET=1
fi
cat ./curl.out

# Send requests to simple where now uses global setting
send_inference_requests "client_clear.log" 5

set -e

kill $SERVER_PID
wait $SERVER_PID

set +e

if [ -f ./update_trace.log ]; then
    echo -e "\n***\n*** Test Failed, unexpected generation of update_trace.log\n***"
    RET=1
fi

$TRACE_SUMMARY -t global_trace.log > summary_global_trace.log

if [ `grep -c "COMPUTE_INPUT_END" summary_global_trace.log` != "10" ]; then
    cat summary_global_trace.log
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

if [ `grep -c ^simple summary_global_trace.log` != "10" ]; then
    cat summary_global_trace.log
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

set -e

# Update trace count
SERVER_ARGS="--trace-config triton,file=global_count.log --trace-config level=TIMESTAMPS --trace-config rate=1 --model-repository=$MODELSDIR"
SERVER_LOG="./inference_server_off.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

# Send requests without trace count
send_inference_requests "client_update.log" 10

set -e

# Check the current setting
get_trace_setting "simple"
assert_curl_success "Failed to obtain trace settings for 'simple' model"

if [ `grep -c "\"trace_level\":\[\"TIMESTAMPS\"\]" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_rate\":\"1\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_count\":\"-1\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"log_frequency\":\"0\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_file\":\"global_count.log\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_mode\":\"triton\"" ./curl.out` != "1" ]; then
    RET=1
fi
cat ./curl.out

# Set trace count
update_global_trace_setting '{"trace_count":"5"}'
assert_curl_success "Failed to modify global trace settings"

# Check if the current setting is returned
if [ `grep -c "\"trace_level\":\[\"TIMESTAMPS\"\]" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_rate\":\"1\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_count\":\"5\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"log_frequency\":\"0\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_file\":\"global_count.log\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_mode\":\"triton\"" ./curl.out` != "1" ]; then
    RET=1
fi
cat ./curl.out

# Send requests to simple where trace is explicitly disabled
send_inference_requests "client_update.log" 10

# Check the current setting again and expect 'trace_count' becomes 0
get_trace_setting "simple"
assert_curl_success "Failed to obtain trace settings for 'simple' model"

if [ `grep -c "\"trace_level\":\[\"TIMESTAMPS\"\]" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_rate\":\"1\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_count\":\"0\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"log_frequency\":\"0\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_file\":\"global_count.log\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_mode\":\"triton\"" ./curl.out` != "1" ]; then
    RET=1
fi
cat ./curl.out

# Check if the indexed file has been generated when trace count reaches 0
if [ ! -f ./global_count.log.0 ]; then
    echo -e "\n***\n*** Test Failed, expect generation of global_count.log.0 before stopping server\n***"
    RET=1
fi

SETTINGS="trace_count trace_rate log_frequency"

for SETTING in $SETTINGS; do
    # Check `out of range` errors
    update_trace_setting "simple" '{"'${SETTING}'":"10000000000"}'
    assert_curl_failure "Server modified '${SETTING}' with an out of range value."
done

set -e

kill $SERVER_PID
wait $SERVER_PID

set +e

# There should be two trace files for trace counted requests and before trace
# counted requests
$TRACE_SUMMARY -t global_count.log > summary_global_count.log

if [ `grep -c "COMPUTE_INPUT_END" summary_global_count.log` != "20" ]; then
    cat summary_global_count.log
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

if [ `grep -c ^simple summary_global_count.log` != "20" ]; then
    cat summary_global_count.log
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

$TRACE_SUMMARY -t global_count.log.0 > summary_global_count.log.0

if [ `grep -c "COMPUTE_INPUT_END" summary_global_count.log.0` != "5" ]; then
    cat summary_global_count.log.0
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

if [ `grep -c ^simple summary_global_count.log.0` != "5" ]; then
    cat summary_global_count.log.0
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

set -e

# Test Python client library
SERVER_ARGS="--trace-config triton,file=global_unittest.log --trace-config level=TIMESTAMPS --trace-config rate=1 --model-repository=$MODELSDIR"
SERVER_LOG="./inference_server_unittest.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

python $CLIENT_TEST >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    RET=1
else
    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi

set -e

kill $SERVER_PID
wait $SERVER_PID


# Check `--trace-config` sets arguments properly
SERVER_ARGS="--trace-config=triton,file=bls_trace.log --trace-config=level=TIMESTAMPS \
            --trace-config=rate=4 --trace-config=count=6 --trace-config=mode=triton --model-repository=$MODELSDIR"
SERVER_LOG="./inference_server_trace_config.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

get_trace_setting "simple"
assert_curl_success "Failed to obtain trace settings for 'simple' model"

if [ `grep -c "\"trace_level\":\[\"TIMESTAMPS\"\]" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_rate\":\"4\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_count\":\"6\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"log_frequency\":\"0\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_file\":\"bls_trace.log\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_mode\":\"triton\"" ./curl.out` != "1" ]; then
    RET=1
fi
cat ./curl.out

set +e
# Send bls requests to make sure simple model is traced
for p in {1..4}; do
    python -c 'import opentelemetry_unittest; \
        opentelemetry_unittest.send_bls_request(model_name="ensemble_add_sub_int32_int32_int32")'  >> client_update.log 2>&1
done

set -e

kill $SERVER_PID
wait $SERVER_PID

set +e

$TRACE_SUMMARY -t bls_trace.log > summary_bls.log

if [ `grep -c "COMPUTE_INPUT_END" summary_bls.log` != "2" ]; then
    cat summary_bls.log
    echo -e "\n***\n*** Test Failed: Unexpected number of traced "COMPUTE_INPUT_END" events.\n***"
    RET=1
fi

if [ `grep -c ^ensemble_add_sub_int32_int32_int32 summary_bls.log` != "1" ]; then
    cat summary_bls.log
    echo -e "\n***\n*** Test Failed: BLS child ensemble model wasn't traced. \n***"
    RET=1
fi

if [ `grep -c ^simple summary_bls.log` != "1" ]; then
    cat summary_bls.log
    echo -e "\n***\n*** Test Failed: ensemble's model 'simple' wasn't traced. \n***"
    RET=1
fi

if [ `grep -o 'parent_id' bls_trace.log | wc -l` != "2" ]; then
    cat bls_trace.log
    echo -e "\n***\n*** Test Failed: Unexpected number of 'parent id' fields. \n***"
    RET=1
fi

# Attempt to trace non-existent model
SERVER_ARGS="--model-control-mode=explicit --model-repository=$MODELSDIR"
SERVER_LOG="./inference_server_nonexistent_model.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

# Explicitly load model
rm -f ./curl.out
set +e
code=`curl -s -w %{http_code} -o ./curl.out -X POST localhost:8000/v2/repository/models/simple/load`
set -e
assert_curl_success "Failed to load 'simple' model"

# Non-existent model (get)
get_trace_setting "does-not-exist"
assert_curl_failure "Server returned trace settings for a non-existent model"

# Non-existent model (post)
update_trace_setting "does-not-exist" '{"log_frequency":"1"}'
assert_curl_failure "Server modified trace settings for a non-existent model"

# Local model (get)
get_trace_setting "simple"
assert_curl_success "Failed to obtain trace settings for 'simple' model"

# Local model (post)
update_trace_setting "simple" '{"log_frequency":"1"}'
assert_curl_success "Failed to modify trace settings for 'simple' model"

# Local model (unload)
rm -f ./curl.out
set +e
code=`curl -s -w %{http_code} -o ./curl.out -X POST localhost:8000/v2/repository/models/simple/unload`
set -e
assert_curl_success "Failed to unload 'simple' model"

get_trace_setting "simple"
assert_curl_failure "Server returned trace settings for an unloaded model"

update_trace_setting "simple" '{"log_frequency":"1"}'
assert_curl_failure "Server modified trace settings for an unloaded model"

# Local model (reload)
rm -f ./curl.out
set +e
code=`curl -s -w %{http_code} -o ./curl.out -X POST localhost:8000/v2/repository/models/simple/load`
set -e
assert_curl_success "Failed to load 'simple' model"

get_trace_setting "simple"
assert_curl_success "Failed to obtain trace settings for 'simple' model"

update_trace_setting "simple" '{"log_frequency":"1"}'
assert_curl_success "Failed to modify trace settings for 'simple' model"

kill $SERVER_PID
wait $SERVER_PID

set +e

# Custom backend tracing
SERVER_ARGS="--model-control-mode=explicit --model-repository=$MODELSDIR
            --load-model=custom_identity_int32 --trace-config=level=TIMESTAMPS \
            --trace-config=triton,file=custom_tracing_triton.log \
            --trace-config=rate=1 --trace-config=mode=triton"
SERVER_LOG="./custom_backend_tracing.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

# Send 1 inference request, should expect 3 custom activities:
# CUSTOM_SINGLE_ACTIVITY, CUSTOM_ACTIVITY_START, CUSTOM_ACTIVITY_END
rm -f ./curl.out
data='{"inputs":[{"name":"INPUT0","datatype":"INT32","shape":[1,1],"data":[4]}]}'
set +e
code=`curl -s -w %{http_code} -o ./curl.out -X POST localhost:8000/v2/models/custom_identity_int32/infer -d ${data}`
set -e
if [ "$code" != "200" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

set +e


$TRACE_SUMMARY -t custom_tracing_triton.log > summary_custom_tracing_triton.log

if [ `grep -c "CUSTOM_SINGLE_ACTIVITY" summary_custom_tracing_triton.log` != "1" ]; then
    cat summary_custom_tracing_triton.log
    echo -e "\n***\n*** Test Failed: Unexpected number of traced "CUSTOM_ACTIVITY" events.\n***"
    RET=1
fi

if [ `grep -c "CUSTOM_ACTIVITY_START" summary_custom_tracing_triton.log` != "1" ]; then
    cat summary_custom_tracing_triton.log
    echo -e "\n***\n*** Test Failed: Unexpected number of traced "CUSTOM_ACTIVITY_START" events.\n***"
    RET=1
fi

if [ `grep -c "CUSTOM_ACTIVITY_END" summary_custom_tracing_triton.log` != "1" ]; then
    cat summary_custom_tracing_triton.log
    echo -e "\n***\n*** Test Failed: Unexpected number of traced "CUSTOM_ACTIVITY_END" events.\n***"
    RET=1
fi

# Check opentelemetry trace exporter sends proper info.
# A helper python script starts listening on $OTLP_PORT, where
# OTLP exporter sends traces.
OTLP_PORT=10000
OTEL_COLLECTOR=./otelcol
OTEL_COLLECTOR_LOG="./trace_collector_http_exporter.log"

# Installing OpenTelemetry collector (v0.91.0).
# Ref: https://opentelemetry.io/docs/collector/getting-started/#local
curl --proto '=https' --tlsv1.2 -fOL https://github.com/open-telemetry/opentelemetry-collector-releases/releases/download/v0.91.0/otelcol_0.91.0_linux_amd64.tar.gz
tar -xvf otelcol_0.91.0_linux_amd64.tar.gz

rm collected_traces.json*
# Unittests then check that produced spans have expected format and events
OPENTELEMETRY_TEST=opentelemetry_unittest.py
OPENTELEMETRY_LOG="opentelemetry_unittest.log"
EXPECTED_NUM_TESTS="19"

# Set up repo and args for SageMaker
export SAGEMAKER_TRITON_DEFAULT_MODEL_NAME="simple"
MODEL_PATH="/opt/ml/models/123456789abcdefghi/model"
rm -r ${MODEL_PATH}
mkdir -p "${MODEL_PATH}"
cp -r $DATADIR/$MODELBASE/* ${MODEL_PATH} && \
    rm -r ${MODEL_PATH}/2 && rm -r ${MODEL_PATH}/3 && \
        sed -i "s/onnx_int32_int32_int32/simple/" ${MODEL_PATH}/config.pbtxt

# Add model to test trace context exposed to python backend
mkdir -p $MODELSDIR/trace_context/1 && cp ./trace_context.py $MODELSDIR/trace_context/1/model.py

# set up identity model
rm -r ${MODELSDIR}/custom_identity_int32
mkdir -p $MODELSDIR/custom_identity_int32/1 && (cd $MODELSDIR/custom_identity_int32 && \
    echo 'name: "custom_identity_int32"' >> config.pbtxt && \
    echo 'backend: "identity"' >> config.pbtxt && \
    echo 'max_batch_size: 1024' >> config.pbtxt && \
    echo -e 'input [{ name: "INPUT0" \n data_type: TYPE_INT32 \n dims: [ -1 ] }]' >> config.pbtxt && \
    echo -e 'output [{ name: "OUTPUT0" \n data_type: TYPE_INT32 \n dims: [ -1 ] }]' >> config.pbtxt && \
    echo 'instance_group [{ kind: KIND_CPU }]' >> config.pbtxt && \
    echo -e 'parameters [{ key: "execute_delay_ms" \n value: { string_value: "500" } }, { key: "enable_custom_tracing" \n value: { string_value: "true" } }, { key: "nested_span_count" \n value: { string_value: "6" } }, { key: "single_activity_frequency" \n value: { string_value: "3" } }]' >> config.pbtxt)

SERVER_ARGS="--allow-sagemaker=true --model-control-mode=explicit \
                --load-model=simple --load-model=ensemble_add_sub_int32_int32_int32 \
                --load-model=repeat_int32 --load-model=custom_identity_int32\
                --load-model=input_all_required \
                --load-model=dynamic_batch \
                --load-model=bls_simple --trace-config=level=TIMESTAMPS \
                --load-model=trace_context --trace-config=rate=1 \
                --trace-config=count=-1 --trace-config=mode=opentelemetry \
                --trace-config=opentelemetry,resource=test.key=test.value \
                --trace-config=opentelemetry,resource=service.name=test_triton \
                --trace-config=opentelemetry,url=localhost:$OTLP_PORT/v1/traces \
                --model-repository=$MODELSDIR"
SERVER_LOG="./inference_server_otel_otelcol_exporter.log"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

python $OPENTELEMETRY_TEST >>$OPENTELEMETRY_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $OPENTELEMETRY_LOG
    RET=1
else
    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
    if [ $? -ne 0 ]; then
        cat $OPENTELEMETRY_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi

set -e
kill $SERVER_PID
wait $SERVER_PID
set +e

# Testing OTel WAR with trace rate = 0
rm collected_traces.json

OTEL_COLLECTOR=./otelcol
OTEL_COLLECTOR_LOG="./trace_collector_exporter.log"
$OTEL_COLLECTOR --config ./trace-config.yaml >> $OTEL_COLLECTOR_LOG 2>&1 & COLLECTOR_PID=$!

SERVER_ARGS="--trace-config=level=TIMESTAMPS --trace-config=rate=0\
                --trace-config=count=-1 --trace-config=mode=opentelemetry \
                --trace-config=opentelemetry,url=localhost:$OTLP_PORT/v1/traces \
                --model-repository=$MODELSDIR"
SERVER_LOG="./inference_server_otel_WAR.log"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

get_trace_setting "bls_simple"
assert_curl_success "Failed to obtain trace settings for 'simple' model"

if [ `grep -c "\"trace_level\":\[\"TIMESTAMPS\"\]" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_rate\":\"0\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_count\":\"-1\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_mode\":\"opentelemetry\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"url\":\"localhost:$OTLP_PORT/v1/traces\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"bsp_max_export_batch_size\":\"512\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"bsp_schedule_delay\":\"5000\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"bsp_max_queue_size\":\"2048\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_file\":" ./curl.out` != "0" ]; then
    RET=1
fi
if [ `grep -c "\"log_frequency\":" ./curl.out` != "0" ]; then
    RET=1
fi
cat ./curl.out


set +e
# Send bls requests to make sure bls_simple model is NOT traced
for p in {1..10}; do
    python -c 'import opentelemetry_unittest; \
        opentelemetry_unittest.send_bls_request(model_name="ensemble_add_sub_int32_int32_int32")'  >> client_update.log 2>&1
done

if [ -s collected_traces.json ] ; then
    echo -e "\n***\n*** collected_traces.json should be empty, but it is not.\n***"
    exit 1
fi

# Send 1 bls request with OTel context to make sure it is traced
python -c 'import opentelemetry_unittest; \
        opentelemetry_unittest.send_bls_request(model_name="ensemble_add_sub_int32_int32_int32", \
            headers={"traceparent": "00-0af7651916cd43dd8448eb211c12666c-b7ad6b7169242424-01"} \
        )'  >> client_update.log 2>&1

sleep 20

if ! [ -s collected_traces.json ] ; then
    echo -e "\n***\n*** collected_traces.json should contain OTel trace, but it is not. \n***"
    exit 1
fi

set -e
kill $COLLECTOR_PID
wait $COLLECTOR_PID
kill $SERVER_PID
wait $SERVER_PID
set +e

# Test that only traces with OTel Context are collected after count goes to 0
SERVER_ARGS="--trace-config=level=TIMESTAMPS --trace-config=rate=5\
                --trace-config=count=1 --trace-config=mode=opentelemetry \
                --trace-config=opentelemetry,url=localhost:$OTLP_PORT/v1/traces \
                --model-repository=$MODELSDIR"
SERVER_LOG="./inference_server_otel_WAR.log"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi


rm collected_traces.json
$OTEL_COLLECTOR --config ./trace-config.yaml >> $OTEL_COLLECTOR_LOG 2>&1 & COLLECTOR_PID=$!

get_trace_setting "bls_simple"
assert_curl_success "Failed to obtain trace settings for 'simple' model"

if [ `grep -c "\"trace_level\":\[\"TIMESTAMPS\"\]" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_rate\":\"5\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_count\":\"1\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_mode\":\"opentelemetry\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"url\":\"localhost:$OTLP_PORT/v1/traces\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"bsp_max_export_batch_size\":\"512\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"bsp_schedule_delay\":\"5000\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"bsp_max_queue_size\":\"2048\"" ./curl.out` != "1" ]; then
    RET=1
fi
if [ `grep -c "\"trace_file\":" ./curl.out` != "0" ]; then
    RET=1
fi
if [ `grep -c "\"log_frequency\":" ./curl.out` != "0" ]; then
    RET=1
fi
cat ./curl.out

set +e
# Send bls requests to make sure bls_simple model is NOT traced
for p in {1..20}; do
    python -c 'import opentelemetry_unittest; \
        opentelemetry_unittest.send_bls_request(model_name="ensemble_add_sub_int32_int32_int32")'  >> client_update.log 2>&1
done

sleep 20

if ! [[ -s collected_traces.json && `grep -c "\"name\":\"InferRequest\"" ./collected_traces.json` == 1 && `grep -c "\"parentSpanId\":\"\"" ./collected_traces.json` == 1 ]] ; then
    echo -e "\n***\n*** collected_traces.json should contain only 1 trace.\n***"
    cat collected_traces.json
    exit 1
fi

# Send 4 bls request with OTel context and 4 without to make sure it is traced
for p in {1..10}; do
    python -c 'import opentelemetry_unittest; \
            opentelemetry_unittest.send_bls_request(model_name="ensemble_add_sub_int32_int32_int32", \
                headers={"traceparent": "00-0af7651916cd43dd8448eb211c12666c-b7ad6b7169242424-01"} \
            )'  >> client_update.log 2>&1

    python -c 'import opentelemetry_unittest; \
            opentelemetry_unittest.send_bls_request(model_name="ensemble_add_sub_int32_int32_int32" \
            )'  >> client_update.log 2>&1

    sleep 10
done

# Wait for all traces to be collected
sleep 5

if ! [[ -s collected_traces.json && `grep -c "\"parentSpanId\":\"\"" ./collected_traces.json` == 1 && `grep -c "\"parentSpanId\":\"b7ad6b7169242424\"" ./collected_traces.json` == 10 ]] ; then
    echo -e "\n***\n*** collected_traces.json should contain 11 OTel trace, but it is not. \n***"
    exit 1
fi

set -e
kill $COLLECTOR_PID
wait $COLLECTOR_PID
kill $SERVER_PID
wait $SERVER_PID
set +e

################################################################################
# Tests to make sure BatchSpanProcessor's arguments are propagated from cmd    #
# to trace initialization step.                                                #
################################################################################

# bsp_max_queue_size = 1
# We are sending a bls request, that results in a trace with 6 spans,
# but because `bsp_max_queue_size` is 1, OTel should drop some of them
# and print a warning in a log.
EXPECTED_WARNING="BatchSpanProcessor queue is full - dropping span."
SERVER_ARGS="--trace-config=level=TIMESTAMPS --trace-config=rate=1\
                --trace-config=count=-1 --trace-config=mode=opentelemetry \
                --trace-config=opentelemetry,url=localhost:$OTLP_PORT/v1/traces \
                --trace-config opentelemetry,bsp_max_queue_size=1
                --model-repository=$MODELSDIR --log-verbose=1"
SERVER_LOG="./inference_server_otel_BSP_max_queue_size.log"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

rm collected_traces.json
$OTEL_COLLECTOR --config ./trace-config.yaml >> $OTEL_COLLECTOR_LOG 2>&1 & COLLECTOR_PID=$!

set +e
python -c 'import opentelemetry_unittest; \
    opentelemetry_unittest.send_bls_request(model_name="ensemble_add_sub_int32_int32_int32")'  >> client_update.log 2>&1

sleep 20

if ! [[ `grep -c "$EXPECTED_WARNING" $SERVER_LOG` > 0 ]] ; then
    echo -e "\n***\n*** $SERVER_LOG does not contain expected BSP warning.\n***"
    cat $SERVER_LOG
    exit 1
fi

set -e
kill $COLLECTOR_PID
wait $COLLECTOR_PID
kill $SERVER_PID
wait $SERVER_PID
set +e

# bsp_schedule_delay = 0
# We are sending a bls request, that results in a trace with 6 spans.
# `bsp_schedule_delay` is 0, so OTel should export traces in batches of random
# size, that translates into random number of 'scopeSpans' field in
# `collected_traces.json`.
SERVER_ARGS="--trace-config=level=TIMESTAMPS --trace-config=rate=1\
                --trace-config=count=-1 --trace-config=mode=opentelemetry \
                --trace-config=opentelemetry,url=localhost:$OTLP_PORT/v1/traces \
                --trace-config opentelemetry,bsp_schedule_delay=0
                --model-repository=$MODELSDIR --log-verbose=1"
SERVER_LOG="./inference_server_otel_BSP_schedule_delay.log"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

rm collected_traces.json
$OTEL_COLLECTOR --config ./trace-config.yaml >> $OTEL_COLLECTOR_LOG 2>&1 & COLLECTOR_PID=$!

set +e
python -c 'import opentelemetry_unittest; \
    opentelemetry_unittest.send_bls_request(model_name="ensemble_add_sub_int32_int32_int32")'  >> client_update.log 2>&1

sleep 10

if ! [[ -s collected_traces.json && `grep -o "scopeSpans" ./collected_traces.json | wc -l` > 1 ]] ; then
    echo -e "\n***\n*** collected_traces.json has unexpected number of span batches collected.\n***"
    cat collected_traces.json
    exit 1
fi

set -e
kill $COLLECTOR_PID
wait $COLLECTOR_PID
kill $SERVER_PID
wait $SERVER_PID
set +e

# bsp_max_export_batch_size = 1
# We are sending a bls request, that results in a trace with 6 spans.
# `bsp_max_export_batch_size` is 1, so OTel should export traces in batches of
# size 1, that translates into 6 entries of 'scopeSpans' field in
# `collected_traces.json`.
SERVER_ARGS="--trace-config=level=TIMESTAMPS --trace-config=rate=1\
                --trace-config=count=-1 --trace-config=mode=opentelemetry \
                --trace-config=opentelemetry,url=localhost:$OTLP_PORT/v1/traces \
                --trace-config opentelemetry,bsp_max_export_batch_size=1
                --model-repository=$MODELSDIR --log-verbose=1"
SERVER_LOG="./inference_server_otel_BSP_max_export_batch_size.log"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

rm collected_traces.json
$OTEL_COLLECTOR --config ./trace-config.yaml >> $OTEL_COLLECTOR_LOG 2>&1 & COLLECTOR_PID=$!

set +e
python -c 'import opentelemetry_unittest; \
    opentelemetry_unittest.send_bls_request(model_name="ensemble_add_sub_int32_int32_int32")'  >> client_update.log 2>&1

sleep 10

if ! [[ -s collected_traces.json && `grep -o "scopeSpans" ./collected_traces.json | wc -l` == 6 ]] ; then
    echo -e "\n***\n*** collected_traces.json has unexpected number of span batches collected.\n***"
    cat collected_traces.json
    exit 1
fi

set -e
kill $COLLECTOR_PID
wait $COLLECTOR_PID
kill $SERVER_PID
wait $SERVER_PID
set +e

# Test that PBE returns None as trace context in trace mode Triton
SERVER_ARGS="--trace-config=level=TIMESTAMPS --trace-config=rate=1\
                --trace-config=count=-1 --trace-config=mode=triton \
                --model-repository=$MODELSDIR --log-verbose=1"
SERVER_LOG="./inference_server_triton_trace_context.log"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

check_pbe_trace_context "trace_context" true
assert_curl_success "PBE trace context is not None"

set -e
kill $SERVER_PID
wait $SERVER_PID
set +e

# Test that PBE returns None as trace context in trace mode OpenTelemetry,
# but traceing is OFF.
SERVER_ARGS="--trace-config=level=OFF --trace-config=rate=1\
                --trace-config=count=-1 --trace-config=mode=opentelemetry \
                --model-repository=$MODELSDIR --log-verbose=1"
SERVER_LOG="./inference_server_triton_trace_context.log"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

check_pbe_trace_context "trace_context" true
assert_curl_success "PBE trace context is not None"

set -e
kill $SERVER_PID
wait $SERVER_PID
set +e

# Long running stress test
# Triton trace mode
SERVER_ARGS="--model-control-mode=explicit \
                --model-repository=$MODELSDIR \
                --load-model=identity_fp32 \
                --trace-config mode=triton \
                --trace-config triton,file=./trace \
                --trace-config rate=1 \
                --trace-config level=TIMESTAMPS"
SERVER_LOG="./inference_server_triton_trace_stress.log"
CLIENT_LOG="./client_triton_trace_stress.log"
STRESS_CLIENT="./trace_stress_grpc_client.py"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

# Run stress test
run_stress_client $STRESS_CLIENT $CLIENT_LOG

set -e
if ! kill -0 ${SERVER_PID} > /dev/null 2>&1; then
    echo -e "\n***\n*** Server stopped unexpectedly during stress test\n***"
    cat $SERVER_LOG
    RET=1
else
    kill $SERVER_PID
    wait $SERVER_PID
fi
set +e

# Opentelemetry trace mode
SERVER_ARGS="--model-control-mode=explicit \
                --model-repository=$MODELSDIR \
                --load-model=identity_fp32 \
                --trace-config level=TIMESTAMPS \
                --trace-config rate=1 \
                --trace-config mode=opentelemetry \
                --trace-config opentelemetry,resource=test.key=test.value \
                --trace-config opentelemetry,resource=service.name=test_triton \
                --trace-config opentelemetry,url=localhost:$OTLP_PORT/v1/traces"
SERVER_LOG="./inference_server_otel_trace_stress.log"
CLIENT_LOG="./client_otel_trace_stress.log"
STRESS_CLIENT="./trace_stress_grpc_client.py"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

rm collected_traces.json
$OTEL_COLLECTOR --config ./trace-config.yaml >> $OTEL_COLLECTOR_LOG 2>&1 & COLLECTOR_PID=$!
# Run stress test
run_stress_client $STRESS_CLIENT $CLIENT_LOG

set -e
kill $COLLECTOR_PID
wait $COLLECTOR_PID
if ! kill -0 ${SERVER_PID} > /dev/null 2>&1; then
    echo -e "\n***\n*** Server stopped unexpectedly during stress test\n***"
    cat $SERVER_LOG
    RET=1
else
    kill $SERVER_PID
    wait $SERVER_PID
fi
set +e

exit $RET


================================================
FILE: qa/L0_trace/trace-config.yaml
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# Simple config file for OpenTelemetry collector.
# It receives all traces, received on localhost:10000 and prints
# it into the output stream.
# Ref: https://opentelemetry.io/docs/collector/configuration/
receivers:
  otlp:
    protocols:
      http:
        endpoint: 0.0.0.0:10000

processors:
  batch:
    send_batch_size: 10
    timeout: 10s

exporters:
  file:
    path: ./collected_traces.json

service:
  pipelines:
    traces:
      receivers: [otlp]
      processors: [batch]
      exporters: [file]


================================================
FILE: qa/L0_trace/trace_context.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        inputs = [{"name": "expect_none", "data_type": "TYPE_BOOL", "dims": [1]}]
        outputs = [{"name": "OUTPUT0", "data_type": "TYPE_STRING", "dims": [-1]}]

        config = auto_complete_model_config.as_dict()
        input_names = []
        output_names = []
        for input in config["input"]:
            input_names.append(input["name"])
        for output in config["output"]:
            output_names.append(output["name"])

        for input in inputs:
            if input["name"] not in input_names:
                auto_complete_model_config.add_input(input)
        for output in outputs:
            if output["name"] not in output_names:
                auto_complete_model_config.add_output(output)

        return auto_complete_model_config

    def execute(self, requests):
        responses = []
        for request in requests:
            expect_none = pb_utils.get_input_tensor_by_name(
                request, "expect_none"
            ).as_numpy()[0]
            context = request.trace().get_context()
            if expect_none and context is not None:
                raise pb_utils.TritonModelException("Context should be None")
            if not expect_none and context is None:
                raise pb_utils.TritonModelException("Context should NOT be None")

            output_tensor = pb_utils.Tensor(
                "OUTPUT0", np.array(context).astype(np.bytes_)
            )
            inference_response = pb_utils.InferenceResponse([output_tensor])
            responses.append(inference_response)

        return responses


================================================
FILE: qa/L0_trace/trace_endpoint_test.py
================================================
#!/usr/bin/python

# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import json
import sys
import unittest

import test_util as tu
import tritonclient.grpc as grpcclient
import tritonclient.http as httpclient
from google.protobuf import json_format
from tritonclient.utils import InferenceServerException


# Similar set up as dynamic batcher tests
class TraceEndpointTest(tu.TestResultCollector):
    def tearDown(self):
        # Clear all trace settings to initial state.
        # Note that the tearDown function uses HTTP client so the pass/fail
        # of the HTTP trace setting test cases should be checked to make sure
        # tearDown() is properly executed and not affecting start state of
        # other test cases
        clear_settings = {
            "trace_level": None,
            "trace_rate": None,
            "trace_count": None,
            "log_frequency": None,
        }
        triton_client = httpclient.InferenceServerClient("localhost:8000")
        triton_client.update_trace_settings(
            model_name="simple", settings=clear_settings
        )
        triton_client.update_trace_settings(model_name=None, settings=clear_settings)

    def check_server_initial_state(self):
        # Helper function to make sure the trace setting is properly
        # initialized / reset before actually running the test case.
        # Note that this function uses HTTP client so the pass/fail of
        # the HTTP trace setting test cases should be checked to make sure
        # the initial state is checked properly before running other test cases.
        initial_settings = {
            "trace_file": "global_unittest.log",
            "trace_level": ["TIMESTAMPS"],
            "trace_rate": "1",
            "trace_count": "-1",
            "log_frequency": "0",
            "trace_mode": "triton",
        }
        triton_client = httpclient.InferenceServerClient("localhost:8000")
        self.assertEqual(
            initial_settings, triton_client.get_trace_settings(model_name="simple")
        )
        self.assertEqual(initial_settings, triton_client.get_trace_settings())

    def test_http_get_settings(self):
        # Model trace settings will be the same as global trace settings since
        # no update has been made.
        initial_settings = {
            "trace_file": "global_unittest.log",
            "trace_level": ["TIMESTAMPS"],
            "trace_rate": "1",
            "trace_count": "-1",
            "log_frequency": "0",
            "trace_mode": "triton",
        }
        triton_client = httpclient.InferenceServerClient("localhost:8000")
        self.assertEqual(
            initial_settings,
            triton_client.get_trace_settings(model_name="simple"),
            "Unexpected initial model trace settings",
        )
        self.assertEqual(
            initial_settings,
            triton_client.get_trace_settings(),
            "Unexpected initial global settings",
        )
        try:
            triton_client.get_trace_settings(model_name="does-not-exist")
        except Exception as ex:
            self.assertIn(
                "Request for unknown model : does-not-exist",
                ex.message(),
            )

    def test_grpc_get_settings(self):
        # Model trace settings will be the same as global trace settings since
        # no update has been made.
        initial_settings = grpcclient.service_pb2.TraceSettingResponse()
        json_format.Parse(
            json.dumps(
                {
                    "settings": {
                        "trace_file": {"value": ["global_unittest.log"]},
                        "trace_level": {"value": ["TIMESTAMPS"]},
                        "trace_rate": {"value": ["1"]},
                        "trace_count": {"value": ["-1"]},
                        "trace_mode": {"value": ["triton"]},
                        "log_frequency": {"value": ["0"]},
                    }
                }
            ),
            initial_settings,
        )

        triton_client = grpcclient.InferenceServerClient("localhost:8001")
        self.assertEqual(
            initial_settings,
            triton_client.get_trace_settings(model_name="simple"),
            "Unexpected initial model trace settings",
        )
        self.assertEqual(
            initial_settings,
            triton_client.get_trace_settings(),
            "Unexpected initial global settings",
        )
        try:
            triton_client.get_trace_settings(model_name="does-not-exist")
        except Exception as ex:
            self.assertIn(
                "Request for unknown model : does-not-exist",
                ex.message(),
            )

    def test_http_update_settings(self):
        # Update model and global trace settings in order,
        # and expect the global trace settings will only reflect to
        # the model setting fields that haven't been specified.
        self.check_server_initial_state()

        expected_first_model_settings = {
            "trace_file": "global_unittest.log",
            "trace_level": ["TIMESTAMPS"],
            "trace_rate": "1",
            "trace_count": "-1",
            "log_frequency": "0",
            "trace_mode": "triton",
        }
        expected_first_model_response = {
            "error": "trace file location can not be updated through network protocol"
        }
        expected_second_model_settings = {
            "trace_file": "global_unittest.log",
            "trace_level": ["TIMESTAMPS", "TENSORS"],
            "trace_rate": "1",
            "trace_count": "-1",
            "log_frequency": "0",
            "trace_mode": "triton",
        }
        expected_global_settings = {
            "trace_file": "global_unittest.log",
            "trace_level": ["TIMESTAMPS", "TENSORS"],
            "trace_rate": "1",
            "trace_count": "-1",
            "log_frequency": "0",
            "trace_mode": "triton",
        }

        model_update_settings = {"trace_file": "model.log"}
        global_update_settings = {
            "trace_level": ["TIMESTAMPS", "TENSORS"],
        }

        triton_client = httpclient.InferenceServerClient("localhost:8000")
        with self.assertRaisesRegex(
            InferenceServerException, expected_first_model_response["error"]
        ) as e:
            triton_client.update_trace_settings(
                model_name="simple", settings=model_update_settings
            )
        self.assertEqual(
            expected_first_model_settings,
            triton_client.get_trace_settings(model_name="simple"),
            "Unexpected model trace settings after global update",
        )
        # Note that 'trace_level' may be mismatch due to the order of
        # the levels listed, currently we assume the order is the same
        # for simplicity. But the order shouldn't be enforced and this checking
        # needs to be improved when this kind of failure is reported
        self.assertEqual(
            expected_global_settings,
            triton_client.update_trace_settings(settings=global_update_settings),
            "Unexpected updated global settings",
        )
        self.assertEqual(
            expected_second_model_settings,
            triton_client.get_trace_settings(model_name="simple"),
            "Unexpected model trace settings after global update",
        )
        try:
            triton_client.update_trace_settings(
                model_name="does-not-exist", settings=model_update_settings
            )
        except Exception as ex:
            self.assertIn(
                "Request for unknown model : does-not-exist",
                ex.message(),
            )

    def test_grpc_update_settings(self):
        # Update model and global trace settings in order,
        # and expect the global trace settings will only reflect to
        # the model setting fields that haven't been specified.
        self.check_server_initial_state()

        expected_first_model_settings = grpcclient.service_pb2.TraceSettingResponse()
        json_format.Parse(
            json.dumps(
                {
                    "settings": {
                        "trace_file": {"value": ["global_unittest.log"]},
                        "trace_level": {"value": ["TIMESTAMPS"]},
                        "trace_rate": {"value": ["1"]},
                        "trace_count": {"value": ["-1"]},
                        "log_frequency": {"value": ["0"]},
                        "trace_mode": {"value": ["triton"]},
                    }
                }
            ),
            expected_first_model_settings,
        )

        expected_second_model_settings = grpcclient.service_pb2.TraceSettingResponse()
        json_format.Parse(
            json.dumps(
                {
                    "settings": {
                        "trace_file": {"value": ["global_unittest.log"]},
                        "trace_level": {"value": ["TIMESTAMPS", "TENSORS"]},
                        "trace_rate": {"value": ["1"]},
                        "trace_count": {"value": ["-1"]},
                        "log_frequency": {"value": ["0"]},
                        "trace_mode": {"value": ["triton"]},
                    }
                }
            ),
            expected_second_model_settings,
        )

        expected_global_settings = grpcclient.service_pb2.TraceSettingResponse()
        json_format.Parse(
            json.dumps(
                {
                    "settings": {
                        "trace_file": {"value": ["global_unittest.log"]},
                        "trace_level": {"value": ["TIMESTAMPS", "TENSORS"]},
                        "trace_rate": {"value": ["1"]},
                        "trace_count": {"value": ["-1"]},
                        "log_frequency": {"value": ["0"]},
                        "trace_mode": {"value": ["triton"]},
                    }
                }
            ),
            expected_global_settings,
        )

        model_update_settings = {"trace_file": "model.log"}
        global_update_settings = {
            "trace_level": ["TIMESTAMPS", "TENSORS"],
        }

        triton_client = grpcclient.InferenceServerClient("localhost:8001")
        # Note that 'trace_level' may be mismatch due to the order of
        # the levels listed, currently we assume the order is the same
        # for simplicity. But the order shouldn't be enforced and this checking
        # needs to be improved when this kind of failure is reported
        self.assertEqual(
            expected_global_settings,
            triton_client.update_trace_settings(settings=global_update_settings),
            "Unexpected updated global settings",
        )
        self.assertEqual(
            expected_second_model_settings,
            triton_client.get_trace_settings(model_name="simple"),
            "Unexpected model trace settings after global update",
        )
        try:
            triton_client.update_trace_settings(
                model_name="does-not-exist", settings=model_update_settings
            )
        except Exception as ex:
            self.assertIn(
                "Request for unknown model : does-not-exist",
                ex.message(),
            )

    def test_http_clear_settings(self):
        # Clear global and model trace settings in order,
        # and expect the default / global trace settings are
        # propagated properly.
        self.check_server_initial_state()

        # First set up the model / global trace setting that:
        # model 'simple' has 'trace_rate' and 'log_frequency' specified
        # global has 'trace_level', 'trace_count' and 'trace_rate' specified
        triton_client = httpclient.InferenceServerClient("localhost:8000")
        triton_client.update_trace_settings(
            model_name="simple", settings={"trace_rate": "12", "log_frequency": "34"}
        )
        triton_client.update_trace_settings(
            settings={"trace_rate": "56", "trace_count": "78", "trace_level": ["OFF"]}
        )

        expected_global_settings = {
            "trace_file": "global_unittest.log",
            "trace_level": ["OFF"],
            "trace_rate": "1",
            "trace_count": "-1",
            "log_frequency": "0",
            "trace_mode": "triton",
        }
        expected_first_model_settings = {
            "trace_file": "global_unittest.log",
            "trace_level": ["OFF"],
            "trace_rate": "12",
            "trace_count": "-1",
            "log_frequency": "34",
            "trace_mode": "triton",
        }
        expected_second_model_settings = {
            "trace_file": "global_unittest.log",
            "trace_level": ["OFF"],
            "trace_rate": "1",
            "trace_count": "-1",
            "log_frequency": "34",
            "trace_mode": "triton",
        }
        global_clear_settings = {"trace_rate": None, "trace_count": None}
        model_clear_settings = {"trace_rate": None, "trace_level": None}

        # Clear global
        self.assertEqual(
            expected_global_settings,
            triton_client.update_trace_settings(settings=global_clear_settings),
            "Unexpected cleared global trace settings",
        )
        self.assertEqual(
            expected_first_model_settings,
            triton_client.get_trace_settings(model_name="simple"),
            "Unexpected model trace settings after global clear",
        )
        self.assertEqual(
            expected_second_model_settings,
            triton_client.update_trace_settings(
                model_name="simple", settings=model_clear_settings
            ),
            "Unexpected model trace settings after model clear",
        )
        self.assertEqual(
            expected_global_settings,
            triton_client.get_trace_settings(),
            "Unexpected global trace settings after model clear",
        )

    def test_grpc_clear_settings(self):
        # Clear global and model trace settings in order,
        # and expect the default / global trace settings are
        # propagated properly.
        self.check_server_initial_state()

        # First set up the model / global trace setting that:
        # model 'simple' has 'trace_rate' and 'log_frequency' specified
        # global has 'trace_level', 'trace_count' and 'trace_rate' specified
        triton_client = grpcclient.InferenceServerClient("localhost:8001")
        triton_client.update_trace_settings(
            model_name="simple", settings={"trace_rate": "12", "log_frequency": "34"}
        )
        triton_client.update_trace_settings(
            settings={"trace_rate": "56", "trace_count": "78", "trace_level": ["OFF"]}
        )

        expected_global_settings = grpcclient.service_pb2.TraceSettingResponse()
        json_format.Parse(
            json.dumps(
                {
                    "settings": {
                        "trace_file": {"value": ["global_unittest.log"]},
                        "trace_level": {"value": ["OFF"]},
                        "trace_mode": {"value": ["triton"]},
                        "trace_rate": {"value": ["1"]},
                        "trace_count": {"value": ["-1"]},
                        "log_frequency": {"value": ["0"]},
                    }
                }
            ),
            expected_global_settings,
        )
        expected_first_model_settings = grpcclient.service_pb2.TraceSettingResponse()
        json_format.Parse(
            json.dumps(
                {
                    "settings": {
                        "trace_file": {"value": ["global_unittest.log"]},
                        "trace_level": {"value": ["OFF"]},
                        "trace_rate": {"value": ["12"]},
                        "trace_count": {"value": ["-1"]},
                        "log_frequency": {"value": ["34"]},
                        "trace_mode": {"value": ["triton"]},
                    }
                }
            ),
            expected_first_model_settings,
        )
        expected_second_model_settings = grpcclient.service_pb2.TraceSettingResponse()
        json_format.Parse(
            json.dumps(
                {
                    "settings": {
                        "trace_file": {"value": ["global_unittest.log"]},
                        "trace_level": {"value": ["OFF"]},
                        "trace_rate": {"value": ["1"]},
                        "trace_count": {"value": ["-1"]},
                        "log_frequency": {"value": ["34"]},
                        "trace_mode": {"value": ["triton"]},
                    }
                }
            ),
            expected_second_model_settings,
        )

        global_clear_settings = {"trace_rate": None, "trace_count": None}
        model_clear_settings = {"trace_rate": None, "trace_level": None}

        # Clear global
        self.assertEqual(
            expected_global_settings,
            triton_client.update_trace_settings(settings=global_clear_settings),
            "Unexpected cleared global trace settings",
        )
        self.assertEqual(
            expected_first_model_settings,
            triton_client.get_trace_settings(model_name="simple"),
            "Unexpected model trace settings after global clear",
        )
        self.assertEqual(
            expected_second_model_settings,
            triton_client.update_trace_settings(
                model_name="simple", settings=model_clear_settings
            ),
            "Unexpected model trace settings after model clear",
        )
        self.assertEqual(
            expected_global_settings,
            triton_client.get_trace_settings(),
            "Unexpected global trace settings after model clear",
        )


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_trace/trace_stress_grpc_client.py
================================================
#!/usr/bin/env python
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import random
import sys
import time
from functools import partial

import numpy as np
import tritonclient.grpc as grpcclient

if __name__ == "__main__":
    # 1 ms cancellation timeout
    client_timeout = 1
    url = "localhost:8001"

    try:
        triton_client = grpcclient.InferenceServerClient(url=url)
    except Exception as e:
        print("context creation failed: " + str(e))
        sys.exit()

    model_name = "identity_fp32"

    # Infer
    inputs = []

    input_data = np.array(
        [random.random() for i in range(50)], dtype=np.float32
    ).reshape(1, -1)
    model_input = grpcclient.InferInput(
        name="INPUT0", datatype="FP32", shape=input_data.shape
    )
    model_input.set_data_from_numpy(input_data)
    inputs.append(model_input)

    # Define the callback function. Note the last two parameters should be
    # result and error. InferenceServerClient would povide the results of an
    # inference as grpcclient.InferResult in result. For successful
    # inference, error will be None, otherwise it will be an object of
    # tritonclientutils.InferenceServerException holding the error details
    def callback(user_data, result, error):
        if error:
            user_data.append(error)
        else:
            user_data.append(result)

    # list to hold the results of inference.
    user_data = []

    # Inference call
    for _ in range(1000):
        triton_client.async_infer(
            model_name=model_name,
            inputs=inputs,
            callback=partial(callback, user_data),
            client_timeout=client_timeout,
        )

    # Wait until the results are available in user_data
    time_out = 20
    while (len(user_data) == 0) and time_out > 0:
        time_out = time_out - 1
        time.sleep(1)

    print("results: ", len(user_data))


================================================
FILE: qa/L0_triton_repo_agent/models/chain_relocation/config.pbtxt
================================================
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

model_repository_agents
{
  agents [
    {
      name: "relocation",
      parameters [
        {
          key: "empty_config",
          value: "false"
        }
      ]
    },
    {
      name: "relocation",
      parameters [
        {
          key: "empty_config",
          value: "true"
        }
      ]
    }
  ]
}

================================================
FILE: qa/L0_triton_repo_agent/models/relocation_sanity_check/config.pbtxt
================================================
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

model_repository_agents
{
  agents [
    {
      name: "relocation",
      parameters [
        {
          key: "empty_config",
          value: "true"
        }
      ]
    }
  ]
}

================================================
FILE: qa/L0_triton_repo_agent/test.sh
================================================
#!/bin/bash
# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

source ../common/util.sh

RET=0

TEST_LOG="./triton_repo_agent_test.log"
TRITON_REPO_AGENT_TEST=./repo_agent_test


export CUDA_VISIBLE_DEVICES=0

rm -fr *.log

set +e
$TRITON_REPO_AGENT_TEST >>$TEST_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Repo Agent Unit Test Failed\n***"
    RET=1
fi
set -e

rm -rf /opt/tritonserver/repoagents/relocation
mkdir -p /opt/tritonserver/repoagents/relocation &&
    cp libtritonrepoagent_relocation.so /opt/tritonserver/repoagents/relocation/.

SERVER=/opt/tritonserver/bin/tritonserver

SERVER_ARGS="--model-repository=`pwd`/models"
SERVER_LOG="./inference_server.log"
run_server
if [ "$SERVER_PID" != "0" ]; then
    kill $SERVER_PID
    wait $SERVER_PID

    echo -e "\n***\n*** Expect fail to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
grep "Poll failed for model directory 'relocation_sanity_check': Relocation repoagent expects config does not contain 'model_repository_agents' field when 'empty_config' has value 'true' for relocation agent" $SERVER_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed. Expected repo agent of 'relocation_sanity_check' returns error on load\n***"
    RET=1
fi
grep "Poll failed for model directory 'chain_relocation': Relocation repoagent" $SERVER_LOG
if [ $? -eq 0 ]; then
    echo -e "\n***\n*** Failed. Expected repo agent of 'chain_relocation' returns success on load\n***"
    RET=1
fi
set -e

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    cat $TEST_LOG
    cat $SERVER_LOG
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_trt_bf16_dtype/test.sh
================================================
#!/bin/bash
# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

source ../common/util.sh

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
  REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
  echo -e "Repository version must be specified"
  echo -e "\n***\n*** Test Failed\n***"
  exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
  REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

RET=0
TRT_TEST="trt_bf16_dtype_test.py"
TEST_RESULT_FILE="./test_results.txt"
SERVER=/opt/tritonserver/bin/tritonserver

rm -rf ./fixed_models/ ./dynamic_models/ *.log* && mkdir ./fixed_models/ ./dynamic_models/
cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/plan_*bf16_bf16_bf16 ./fixed_models/
cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/plan_*bf16_bf16_bf16 ./dynamic_models/

for TEST in "fixed" "dynamic"; do
  MODELDIR="./${TEST}_models"
  CLIENT_LOG="./${TEST}_client.log"
  SERVER_LOG="./${TEST}_inference_server.log"
  SERVER_ARGS="--model-repository=${MODELDIR} --log-verbose=1"

  run_server
  if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
  fi

  set +e
  python3 $TRT_TEST TrtBF16DataTypeTest.test_${TEST} >>$CLIENT_LOG 2>&1
  if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Running $TRT_TEST TrtBF16DataTypeTest.test_${TEST} Failed\n***"
    cat $CLIENT_LOG
    RET=1
  else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
      cat $CLIENT_LOG
      echo -e "\n***\n*** Test Result Verification Failed\n***"
      RET=1
    fi
  fi
  set -e

  kill $SERVER_PID
  wait $SERVER_PID
done

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
else
  echo -e "\n***\n*** Test Failed\n***"
fi

exit $RET


================================================
FILE: qa/L0_trt_bf16_dtype/trt_bf16_dtype_test.py
================================================
#!/usr/bin/env python3
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


import sys

sys.path.append("../common")

import unittest

import numpy as np
import test_util as tu
import tritonclient.http as client


class TrtBF16DataTypeTest(tu.TestResultCollector):
    def setUp(self):
        self.triton_client = client.InferenceServerClient(
            "localhost:8000", verbose=True
        )

    def _infer_helper(self, model_name, shape):
        inputs = []
        outputs = []
        inputs.append(client.InferInput("INPUT0", shape, "BF16"))
        inputs.append(client.InferInput("INPUT1", shape, "BF16"))

        input0_data = np.ones(shape=shape).astype(np.float32)
        input1_data = np.ones(shape=shape).astype(np.float32)

        inputs[0].set_data_from_numpy(input0_data, binary_data=True)
        inputs[1].set_data_from_numpy(input1_data, binary_data=True)

        outputs.append(client.InferRequestedOutput("OUTPUT0", binary_data=True))
        outputs.append(client.InferRequestedOutput("OUTPUT1", binary_data=True))

        results = self.triton_client.infer(model_name, inputs, outputs=outputs)

        output0_data = results.as_numpy("OUTPUT0")
        output1_data = results.as_numpy("OUTPUT1")

        np.testing.assert_equal(
            output0_data,
            input0_data + input1_data,
            "Result output does not match the expected output",
        )
        np.testing.assert_equal(
            output1_data,
            input0_data - input1_data,
            "Result output does not match the expected output",
        )

    def test_fixed(self):
        for bs in [1, 4, 8]:
            self._infer_helper(
                "plan_bf16_bf16_bf16",
                [bs, 16],
            )

        self._infer_helper(
            "plan_nobatch_bf16_bf16_bf16",
            [16],
        )

    def test_dynamic(self):
        for bs in [1, 4, 8]:
            self._infer_helper(
                "plan_bf16_bf16_bf16",
                [bs, 16, 16],
            )

        self._infer_helper(
            "plan_nobatch_bf16_bf16_bf16",
            [16, 16],
        )


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_trt_compat/test.sh
================================================
#!/bin/bash
# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

TEST_RESULT_FILE='test_results.txt'
COMPATIBILITY_TEST_PY=trt_compatibility_test.py
CLIENT_LOG="client.log"
DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=`pwd`/models --exit-timeout-secs=120"
SERVER_LOG="./inference_server.log"
source ../common/util.sh

rm -fr models && mkdir models
cp -r $DATADIR/qa_identity_model_repository/plan_compatible_zero_1_float32 models/.

RET=0

if [ `ps | grep -c "tritonserver"` != "0" ]; then
    echo -e "Tritonserver already running"
    echo -e `ps | grep tritonserver`
    exit 1
fi

run_server
if [ "$SERVER_PID" != "0" ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** FAILED: unexpected server start (version compatibility disabled): $SERVER\n***" >> $CLIENT_LOG
    kill $SERVER_PID
    wait $SERVER_PID
    exit 1
fi

EXPECTED_ERR="Cannot deserialize engine with lean runtime"
if ! grep "$EXPECTED_ERR" $SERVER_LOG; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Failed to find expected error: ${EXPECTED_ERR} \n***"
    RET=1
fi

SERVER_ARGS="--model-repository=`pwd`/models --exit-timeout-secs=120 --backend-config=tensorrt,version-compatible=true"

run_server
if [ "$SERVER_PID" == "0" ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** FAILED: unsuccessful server start (version compatibility enabled): $SERVER\n***"
    exit 1
fi

set +e

python $COMPATIBILITY_TEST_PY >$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
else
  echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_trt_compat/trt_compatibility_test.py
================================================
#!/usr/bin/env python3

# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import unittest

import infer_util as iu
import numpy as np
import test_util as tu


class TrtCompatibilityTest(tu.TestResultCollector):
    def setUp(self):
        self._data_type = np.float32

    def test_plan(self):
        # plan_compatible_zero_1_float32 is an identity model with input shape [-1]
        iu.infer_zero(self, "plan_compatible", 1, self._data_type, [[2, 4]], [[2, 4]])


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_trt_data_dependent_shape/test.sh
================================================
#!/bin/bash
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

TEST_RESULT_FILE='test_results.txt'
export CUDA_VISIBLE_DEVICES=0

TRT_TEST=trt_data_dependent_shape_test.py

DATADIR="./models"

rm -rf ${DATADIR}
cp -r /data/inferenceserver/${REPO_VERSION}/qa_trt_data_dependent_model_repository/ ${DATADIR}

source ../common/util.sh

rm -f *.log*

RET=0

CLIENT_LOG="./client.log"
SERVER_LOG="./inference_server.log"
SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=$DATADIR"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python $TRT_TEST >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    cat $CLIENT_LOG
    RET=1
else
    check_test_results $TEST_RESULT_FILE 2
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
else
  echo -e "\n***\n*** Test Failed\n***"
fi

exit $RET


================================================
FILE: qa/L0_trt_data_dependent_shape/trt_data_dependent_shape_test.py
================================================
#!/usr/bin/env python3

# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import unittest

import numpy as np
import test_util as tu
import tritonclient.http as client


class TrtDataDependentShapeTest(tu.TestResultCollector):
    def setUp(self):
        self.triton_client = client.InferenceServerClient(
            "localhost:8000", verbose=True
        )

    def test_fixed(self):
        model_name = "plan_nobatch_nonzero_fixed"
        input_np = np.arange(16, dtype=np.int32).reshape((4, 4))
        expected_output_np = np.nonzero(input_np)

        inputs = []
        inputs.append(client.InferInput("INPUT", [4, 4], "INT32"))
        inputs[-1].set_data_from_numpy(input_np)

        results = self.triton_client.infer(model_name=model_name, inputs=inputs)
        # Validate the results by comparing with precomputed values.
        output_np = results.as_numpy("OUTPUT")
        self.assertTrue(
            np.array_equal(output_np, expected_output_np),
            "OUTPUT expected: {}, got {}".format(expected_output_np, output_np),
        )

    def test_dynamic(self):
        model_name = "plan_nobatch_nonzero_dynamic"
        input_data = []
        for i in range(20 * 16):
            input_data.append(i if (i % 2) == 0 else 0)
        input_np = np.array(input_data, dtype=np.int32).reshape((20, 16))
        expected_output_np = np.nonzero(input_np)

        inputs = []
        inputs.append(client.InferInput("INPUT", [20, 16], "INT32"))
        inputs[-1].set_data_from_numpy(input_np)

        results = self.triton_client.infer(model_name=model_name, inputs=inputs)
        # Validate the results by comparing with precomputed values.
        output_np = results.as_numpy("OUTPUT")
        self.assertTrue(
            np.array_equal(output_np, expected_output_np),
            "OUTPUT expected: {}, got {}".format(expected_output_np, output_np),
        )


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_trt_dla/dla_test.py
================================================
#!/usr/bin/env python
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import unittest

import numpy as np
import test_util as tu
import tritonclient.http as httpclient
from PIL import Image


class InferTest(tu.TestResultCollector):
    def _preprocess(self, img, dtype):
        """
        Pre-process an image to meet the size and type
        requirements specified by the parameters.
        """

        sample_img = img.convert("RGB")
        resized_img = sample_img.resize((224, 224), Image.BILINEAR)
        resized = np.array(resized_img)

        typed = resized.astype(dtype)
        scaled = typed - np.asarray((123, 117, 104), dtype=dtype)
        ordered = np.transpose(scaled, (2, 0, 1))

        return ordered

    def test_resnet50(self):
        try:
            triton_client = httpclient.InferenceServerClient(url="localhost:8000")
        except Exception as e:
            print("channel creation failed: " + str(e))
            sys.exit(1)

        image_filename = "../images/vulture.jpeg"
        model_name = "resnet50_plan"
        batch_size = 32

        img = Image.open(image_filename)
        image_data = self._preprocess(img, np.int8)
        image_data = np.expand_dims(image_data, axis=0)

        batched_image_data = image_data
        for i in range(1, batch_size):
            batched_image_data = np.concatenate(
                (batched_image_data, image_data), axis=0
            )

        inputs = [
            httpclient.InferInput("input_tensor_0", [batch_size, 3, 224, 224], "INT8")
        ]
        inputs[0].set_data_from_numpy(batched_image_data, binary_data=True)

        outputs = [
            httpclient.InferRequestedOutput("topk_layer_output_index", binary_data=True)
        ]

        results = triton_client.infer(model_name, inputs, outputs=outputs)

        output_data = results.as_numpy("topk_layer_output_index")
        print(output_data)

        # Validate the results by comparing with precomputed values.
        # VULTURE class corresponds with index 23
        EXPECTED_CLASS_INDEX = 418
        for i in range(batch_size):
            self.assertEqual(output_data[i][0][0], EXPECTED_CLASS_INDEX)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_trt_dla/test.sh
================================================
#!/bin/bash
# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

# Need to run on only one device since only creating a single
# PLAN. Without this test will fail on a heterogeneous system.
export CUDA_VISIBLE_DEVICES=0

# Only need to set paths for jetson since this test runs only on jetson
TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
DLA_TEST=./dla_test.py

DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
SERVER=${TRITON_DIR}/bin/tritonserver
BACKEND_DIR=${TRITON_DIR}/backends

SERVER_ARGS="--model-repository=`pwd`/models --exit-timeout-secs=120 --backend-directory=${BACKEND_DIR}"
SERVER_LOG="./inference_server.log"
source ../common/util.sh

rm -fr models && mkdir models
cp -r $DATADIR/trt_dla_model_store/resnet50_plan models/.
rm -f *.log

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

RET=0
CLIENT_LOG=client.log

set +e

python3 $DLA_TEST >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    RET=1
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

rm -rf models

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_trt_dynamic_shape/test.sh
================================================
#!/bin/bash
# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

TEST_RESULT_FILE='test_results.txt'
export CUDA_VISIBLE_DEVICES=0

pip3 install perf_analyzer

CLIENT_LOG="./client.log"
PERF_CLIENT=perf_analyzer
TRT_OP_TEST=trt_dynamic_shape_test.py

DATADIR="./models"

rm -rf ${DATADIR}
mkdir -p ${DATADIR}
cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/plan_float32_float32_float32-4-32 ${DATADIR}/

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=$DATADIR"
SERVER_LOG="./inference_server.log"
source ../common/util.sh

rm -f *.log*

RET=0

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

# Shape beyond the limits of optimization profile
set +e
$PERF_CLIENT -v -i grpc -u localhost:8001 -m plan_float32_float32_float32-4-32 --shape INPUT0:33 --shape INPUT1:33 -t 1 -p2000 -b 1 > ${CLIENT_LOG}_max 2>&1
EXIT_CODE=$?
echo "perf_analyzer exit code: ${EXIT_CODE}" >> "${CLIENT_LOG}_max"
"${PERF_CLIENT}" --version >> "${CLIENT_LOG}_max" 2>&1 || true

EXPECTED_MESSAGE="model expected the shape of dimension 1 to be between 4 and 32 but received"
if [ $(cat ${CLIENT_LOG}_max | grep "${EXPECTED_MESSAGE} 33" | wc -l) -eq 0 ]; then
    cat ${CLIENT_LOG}_max
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

$PERF_CLIENT -v -i grpc -u localhost:8001 -m plan_float32_float32_float32-4-32 --shape INPUT0:3 --shape INPUT1:3 -t 1 -p2000 -b 1 > ${CLIENT_LOG}_min 2>&1
EXIT_CODE=$?
echo "perf_analyzer exit code: ${EXIT_CODE}" >> "${CLIENT_LOG}_min"
"${PERF_CLIENT}" --version >> "${CLIENT_LOG}_min" 2>&1 || true

if [ $(cat ${CLIENT_LOG}_min | grep "${EXPECTED_MESSAGE} 3" | wc -l) -eq 0 ]; then
    cat ${CLIENT_LOG}_min
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

# Tests with multiple optimization profiles

# plan_float32_float32_float32 models with dynamic shapes has 9 profiles
# min, opt, max, idx
# [1, 1], [1, 16], [8, 33], 0 (*)
# [1, 1], [2, 16], [7, 32], 1
# [1, 1], [3, 16], [6, 32], 2
# [1, 1], [4, 16], [5, 32], 3
# [5, 1], [6, 16], [8, 32], 4 (*)
# [6, 1], [6, 16], [8, 32], 5 (*)
# [1, 1], [1, 16], [8, 32], 6
# [1, 33], [1, 33], [1, 33], 7 (static shapes)
# [3, 33], [3, 33], [3, 33], 8 (static shapes)
# [5, 33], [5, 33], [5, 33], 9 (static shapes)
rm -rf ${DATADIR} && rm -f config.pbtxt && mkdir -p ${DATADIR}
cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/plan_float32_float32_float32 ${DATADIR}/

# Keep a copy of original model config for different modifications
cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/plan_float32_float32_float32/config.pbtxt .

# TrtDynamicShapeTest.test_load_specific_optimization_profile
CLIENT_LOG="./test_load_specific_optimization_profile.client.log"
SERVER_LOG="./test_load_specific_optimization_profile.inference_server.log"
cp config.pbtxt ${DATADIR}/plan_float32_float32_float32/config.pbtxt && \
sed -i "s/profile:.*/profile: [\"5\"]/" ${DATADIR}/plan_float32_float32_float32/config.pbtxt

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python $TRT_OP_TEST TrtDynamicShapeTest.test_load_specific_optimization_profile >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    cat $CLIENT_LOG
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

# TrtDynamicShapeTest.test_load_default_optimization_profile
CLIENT_LOG="./test_load_default_optimization_profile.client.log"
SERVER_LOG="./test_load_default_optimization_profile.inference_server.log"
cp config.pbtxt ${DATADIR}/plan_float32_float32_float32/config.pbtxt && \
sed -i "s/profile:.*//" ${DATADIR}/plan_float32_float32_float32/config.pbtxt

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python $TRT_OP_TEST TrtDynamicShapeTest.test_load_default_optimization_profile >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    cat $CLIENT_LOG
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

# TrtDynamicShapeTest.test_select_optimization_profile
# Note that this test needs to check server log for which OP is used
#
# finding OP that best fit the input shape:
#     load OP 0, 1, 2, 3, send [4 16] and 3 should be used
SERVER_ARGS="--model-repository=$DATADIR --log-verbose=1"
CLIENT_LOG="./test_select_optimization_profile.client.best.log"
SERVER_LOG="./test_select_optimization_profile.inference_server.best.log"
(cp config.pbtxt ${DATADIR}/plan_float32_float32_float32/config.pbtxt && \
        sed -i "s/max_batch_size:.*/max_batch_size: 5/" ${DATADIR}/plan_float32_float32_float32/config.pbtxt && \
        sed -i "s/profile:.*/profile: [\"0\", \"1\", \"2\", \"3\"]/" ${DATADIR}/plan_float32_float32_float32/config.pbtxt)

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python $TRT_OP_TEST TrtDynamicShapeTest.test_select_optimization_profile >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

set +e
grep "Context with profile 3 \[3\] is being executed for " test_select_optimization_profile.inference_server.best.log
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed. Expected profile 3 is used\n***"
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

# finding OP that best fit the input shape while the input shape is allowed:
#     load OP 0, 5, send [4 16] and 0 should be used
#     (OP 5 is the best in terms of OPT dims, but it requires min dims [6, 1])
CLIENT_LOG="./test_select_optimization_profile.client.allow.log"
SERVER_LOG="./test_select_optimization_profile.inference_server.allow.log"
cp config.pbtxt ${DATADIR}/plan_float32_float32_float32/config.pbtxt && \
sed -i "s/profile:.*/profile: [\"0\", \"5\"]/" ${DATADIR}/plan_float32_float32_float32/config.pbtxt

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python $TRT_OP_TEST TrtDynamicShapeTest.test_select_optimization_profile >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    cat $CLIENT_LOG
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

set +e
grep "Context with profile 0 \[0\] is being executed for " test_select_optimization_profile.inference_server.allow.log
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed. Expected profile 0 is used\n***"
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

# TrtDynamicShapeTest.test_load_wrong_optimization_profile
SERVER_ARGS="--model-repository=$DATADIR --exit-on-error=false --strict-readiness=false"
CLIENT_LOG="./test_load_wrong_optimization_profile.client.log"
SERVER_LOG="./test_load_wrong_optimization_profile.inference_server.log"
cp config.pbtxt ${DATADIR}/plan_float32_float32_float32/config.pbtxt && \
sed -i "s/profile:.*/profile: [\"100\"]/" ${DATADIR}/plan_float32_float32_float32/config.pbtxt

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python $TRT_OP_TEST TrtDynamicShapeTest.test_load_wrong_optimization_profile >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    cat $CLIENT_LOG
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

kill $SERVER_PID
wait $SERVER_PID


# Adding test cases for multiple optimization profiles with static shapes.
# Will load only the following profiles with the static shapes:
# Profile 7: [1, 33]
# Profile 8: [3, 33]
# Profile 9: [5, 33]
(cd  ${DATADIR}/plan_float32_float32_float32/ && \
            rm -f config.pbtxt && \
            echo "instance_group { profile : [\"7\", \"8\", \"9\" ] }" >> config.pbtxt)
SERVER_ARGS="--model-repository=$DATADIR --strict-model-config=false"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

# Shape beyond the limits of optimization profile
set +e
$PERF_CLIENT -v -i grpc -u localhost:8001 -m plan_float32_float32_float32 --shape INPUT0:33 --shape INPUT1:33 -t 1 -p2000 -b 5 > ${CLIENT_LOG}_static_pass 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}_static_pass
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

$PERF_CLIENT -v -i grpc -u localhost:8001 -m plan_float32_float32_float32 --shape INPUT0:33 --shape INPUT1:33 -t 1 -p2000 -b 6 > ${CLIENT_LOG}_static_fail 2>&1
EXIT_CODE=$?
echo "perf_analyzer exit code: ${EXIT_CODE}" >> "${CLIENT_LOG}_static_fail"
"${PERF_CLIENT}" --version >> "${CLIENT_LOG}_static_fail" 2>&1 || true

if [ $(cat ${CLIENT_LOG}_static_fail | grep "inference request batch-size must be <= 5" | wc -l) -eq 0 ]; then
    cat ${CLIENT_LOG}_static_fail
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

$PERF_CLIENT -v -i grpc -u localhost:8001 -m plan_float32_float32_float32 --shape INPUT0:33 --shape INPUT1:33 -t 1 -p2000 -b 2 > ${CLIENT_LOG}_static_bs_2 2>&1
EXIT_CODE=$?
echo "perf_analyzer exit code: ${EXIT_CODE}" >> "${CLIENT_LOG}_static_bs_2"
"${PERF_CLIENT}" --version >> "${CLIENT_LOG}_static_bs_2" 2>&1 || true

if [ $(cat ${CLIENT_LOG}_static_bs_2 | grep "model expected the shape of dimension 0 to be between 1 and 1 but received 2" | wc -l) -eq 0 ]; then
    cat ${CLIENT_LOG}_static_bs_2
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi


set -e

kill $SERVER_PID
wait $SERVER_PID

# Tests for multiple optimization profile with static shapes and dynamic batching.
# Profile 10: [1, 1], [1, 16], [1, 33]
# Profile 11: [2, 1], [2, 16], [2, 33]
# Profile 12: [3, 1], [3, 16], [3, 33]
# Profile 13: [4, 1], [4, 16], [4, 33]
# Profile 14: [5, 1], [5, 16], [5, 33]
# Profile 15: [6, 1], [6, 16], [6, 33]
# Profile 16: [7, 1], [7, 16], [7, 33]
# Profile 17: [8, 1], [8, 16], [8, 33]

(cd  ${DATADIR}/plan_float32_float32_float32/ && \
            rm -f config.pbtxt && \
            echo "instance_group { profile : [" >> config.pbtxt && \
            for i in {10..16}; do echo "\"${i}\"," >> config.pbtxt; done && \
            echo " \"17\"] }" >> config.pbtxt && \
            echo "dynamic_batching {}" >> config.pbtxt)

SERVER_ARGS="--model-repository=$DATADIR --strict-model-config=false"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
$PERF_CLIENT -v -i grpc -u localhost:8001 -m plan_float32_float32_float32 --shape INPUT0:33 --shape INPUT1:33 -t 16 -p2000 > ${CLIENT_LOG}_db_pass 2>&1
if [ $? -ne 0 ]; then
    cat ${CLIENT_LOG}_db_pass
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
else
  echo -e "\n***\n*** Test Failed\n***"
fi

exit $RET


================================================
FILE: qa/L0_trt_dynamic_shape/trt_dynamic_shape_test.py
================================================
#!/usr/bin/env python3

# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import unittest

import infer_util as iu
import numpy as np
import test_util as tu
import tritonhttpclient
from tritonclientutils import InferenceServerException


class TrtDynamicShapeTest(tu.TestResultCollector):
    def setUp(self):
        self.dtype_ = np.float32
        self.model_name_ = "plan"

    def test_load_specific_optimization_profile(self):
        # Only OP 5 should be available, which only allow batch size 8
        tensor_shape = (1,)
        try:
            iu.infer_exact(
                self,
                self.model_name_,
                (1,) + tensor_shape,
                1,
                self.dtype_,
                self.dtype_,
                self.dtype_,
            )
        except InferenceServerException as ex:
            self.assertTrue(
                "model expected the shape of dimension 0 to be between 6 and 8 but received 1"
                in ex.message()
            )

        try:
            iu.infer_exact(
                self,
                self.model_name_,
                (8,) + tensor_shape,
                8,
                self.dtype_,
                self.dtype_,
                self.dtype_,
            )
        except InferenceServerException as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

    def test_load_default_optimization_profile(self):
        # Only default OP (OP 0) has max tensor shape 33
        tensor_shape = (33,)

        try:
            iu.infer_exact(
                self,
                self.model_name_,
                (8,) + tensor_shape,
                8,
                self.dtype_,
                self.dtype_,
                self.dtype_,
            )
        except InferenceServerException as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        over_tensor_shape = (34,)
        try:
            iu.infer_exact(
                self,
                self.model_name_,
                (8,) + over_tensor_shape,
                8,
                self.dtype_,
                self.dtype_,
                self.dtype_,
            )
        except InferenceServerException as ex:
            self.assertTrue(
                "model expected the shape of dimension 1 to be between 1 and 33 but received 34"
                in ex.message()
            )

    def test_select_optimization_profile(self):
        # Different profile has different optimized input shape
        batch_size = 4
        tensor_shape = (16,)
        try:
            iu.infer_exact(
                self,
                self.model_name_,
                (batch_size,) + tensor_shape,
                batch_size,
                self.dtype_,
                self.dtype_,
                self.dtype_,
            )
        except InferenceServerException as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

    def test_load_wrong_optimization_profile(self):
        client = tritonhttpclient.InferenceServerClient("localhost:8000")
        model_name = tu.get_model_name(
            self.model_name_, self.dtype_, self.dtype_, self.dtype_
        )
        model_status = client.is_model_ready(model_name, "1")
        self.assertFalse(model_status, "expected model to be not ready")


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_trt_error_propagation/test.sh
================================================
#!/bin/bash
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

export CUDA_VISIBLE_DEVICES=0
SERVER=/opt/tritonserver/bin/tritonserver
source ../common/util.sh

# Create TensorRT model with invalid plan file
rm -rf models && mkdir models
mkdir models/invalid_plan_file && (cd models/invalid_plan_file && \
    echo -e "name: \"invalid_plan_file\"" >> config.pbtxt && \
    echo -e "platform: \"tensorrt_plan\"" >> config.pbtxt && \
    echo -e "input [\n {\n name: \"INPUT\"\n data_type: TYPE_FP32\n dims: [-1]\n }\n ]" >> config.pbtxt && \
    echo -e "output [\n {\n name: \"OUTPUT\"\n data_type: TYPE_FP32\n dims: [-1]\n }\n ]" >> config.pbtxt && \
    mkdir 1 && echo "----- invalid model.plan -----" >> 1/model.plan)

# Test with and without auto complete enabled
for ENABLE_AUTOCOMPLETE in "YES" "NO"; do

    if [[ "$ENABLE_AUTOCOMPLETE" == "YES" ]]; then
        TEST_NAME="test_invalid_trt_model_autocomplete"
        SERVER_ARGS="--model-repository=models --model-control-mode=explicit"
    else
        TEST_NAME="test_invalid_trt_model"
        SERVER_ARGS="--model-repository=models --model-control-mode=explicit --disable-auto-complete-config"
    fi

    SERVER_LOG="./$TEST_NAME.server.log"
    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    RET=0

    set +e
    python trt_error_propagation_test.py TestTrtErrorPropagation.$TEST_NAME > $TEST_NAME.log 2>&1
    if [ $? -ne 0 ]; then
        cat $TEST_NAME.log
        echo -e "\n***\n*** Test FAILED\n***"
        RET=1
    fi
    set -e

    kill $SERVER_PID
    wait $SERVER_PID

    if [ $RET -ne 0 ]; then
        exit $RET
    fi

done

# Exit with success
echo -e "\n***\n*** Test Passed\n***"
exit 0


================================================
FILE: qa/L0_trt_error_propagation/trt_error_propagation_test.py
================================================
#!/usr/bin/env python3

# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import unittest

import tritonclient.grpc as grpcclient
from tritonclient.utils import InferenceServerException


class TestTrtErrorPropagation(unittest.TestCase):
    def setUp(self):
        # Initialize client
        self.__triton = grpcclient.InferenceServerClient("localhost:8001", verbose=True)

    def test_invalid_trt_model(self):
        with self.assertRaises(InferenceServerException) as cm:
            self.__triton.load_model("invalid_plan_file")
        err_msg = str(cm.exception)
        # All 'expected_msg_parts' should be present in the 'err_msg' in order
        expected_msg_parts = [
            "load failed for model",
            "version 1 is at UNAVAILABLE state: ",
            "Internal: unable to create TensorRT engine: ",
            "Error Code ",
            "Internal Error ",
        ]
        for expected_msg_part in expected_msg_parts:
            self.assertIn(
                expected_msg_part,
                err_msg,
                "Cannot find an expected part of error message",
            )
            _, err_msg = err_msg.split(expected_msg_part)

    def test_invalid_trt_model_autocomplete(self):
        with self.assertRaises(InferenceServerException) as cm:
            self.__triton.load_model("invalid_plan_file")
        err_msg = str(cm.exception)
        self.assertIn(
            "Internal: unable to load plan file to auto complete config",
            err_msg,
            "Caught an unexpected exception",
        )


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_trt_plugin/test.sh
================================================
#!/bin/bash
# Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

TEST_RESULT_FILE='test_results.txt'
export CUDA_VISIBLE_DEVICES=0

CLIENT_LOG="./client.log"
PLUGIN_TEST=trt_plugin_test.py

# On windows the paths invoked by the script (running in WSL) must use
# /mnt/c when needed but the paths on the tritonserver command-line
# must be C:/ style.
if [[ -v WSL_DISTRO_NAME ]] || [[ -v MSYSTEM ]]; then
    DATADIR=${DATADIR:="/mnt/c/data/inferenceserver/${REPO_VERSION}"}
    MODELDIR=${MODELDIR:=C:/models}
    CUSTOMPLUGIN=${CUSTOMPLUGIN:=$MODELDIR/HardmaxPlugin.dll}
    BACKEND_DIR=${BACKEND_DIR:=C:/tritonserver/backends}
    SERVER=${SERVER:=/mnt/c/tritonserver/bin/tritonserver.exe}
    TEST_WINDOWS=1
else
    DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
    MODELDIR=${MODELDIR:=`pwd`/models}
    CUSTOMPLUGIN=${CUSTOMPLUGIN:=$MODELDIR/libcustomHardmaxPlugin.so}
    TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
    BACKEND_DIR=${TRITON_DIR}/backends
    SERVER=${TRITON_DIR}/bin/tritonserver
fi

source ../common/util.sh

RET=0
rm -f ./*.log

SERVER_ARGS_BASE="--model-repository=${MODELDIR} --backend-directory=${BACKEND_DIR} --log-verbose=1"
SERVER_TIMEOUT=20

LOG_IDX=0

## Custom Plugin Tests

## Create model folder with custom plugin models
rm -fr models && mkdir -p models
find $DATADIR/qa_trt_plugin_model_repository/ -maxdepth 1 -iname '*Hardmax*' -exec cp -r {} models \;

LOG_IDX=$((LOG_IDX+1))

## Baseline Failure Test
## Plugin library not loaded
SERVER_ARGS=$SERVER_ARGS_BASE
SERVER_LOG="./inference_server_$LOG_IDX.log"

run_server
if [ "$SERVER_PID" != "0" ]; then
    cat $SERVER_LOG
    echo -e "\n***\n*** Test Failed\n"
    echo -e "Unexpected successful server start $SERVER\n***"
    kill_server
    exit 1
fi

LOG_IDX=$((LOG_IDX+1))

## Backend Config, Plugin Test
SERVER_ARGS="${SERVER_ARGS_BASE} --backend-config=tensorrt,plugins=${CUSTOMPLUGIN}"
SERVER_LOG="./inference_server_$LOG_IDX.log"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

rm -f $CLIENT_LOG
set +e
python3 $PLUGIN_TEST PluginModelTest.test_raw_hard_max >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

kill_server

LOG_IDX=$((LOG_IDX+1))

## LD_PRELOAD, Plugin Test
## LD_PRELOAD is only on Linux

SERVER_LD_PRELOAD=$CUSTOMPLUGIN
SERVER_ARGS=$SERVER_ARGS_BASE
SERVER_LOG="./inference_server_$LOG_IDX.log"

# Skip test for Windows
if  [ $TEST_WINDOWS -eq 0 ]; then
    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    rm -f $CLIENT_LOG
    set +e
    python3 $PLUGIN_TEST PluginModelTest.test_raw_hard_max >>$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Failed\n***"
        RET=1
    else
        check_test_results $TEST_RESULT_FILE 1
        if [ $? -ne 0 ]; then
            cat $CLIENT_LOG
            echo -e "\n***\n*** Test Result Verification Failed\n***"
            RET=1
        fi
    fi
    set -e

    kill_server
fi

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET


================================================
FILE: qa/L0_trt_plugin/trt_plugin_test.py
================================================
#!/usr/bin/env python3

# Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import os
import unittest

import numpy as np
import test_util as tu
import tritonclient.http as httpclient

# By default, find tritonserver on "localhost", but can be overridden
# with TRITONSERVER_IPADDR envvar
_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")


def hardmax_reference(arr, axis=0):
    one_hot = np.zeros(arr.shape, dtype=arr.dtype)
    argmax = np.expand_dims(np.argmax(arr, axis), axis)
    np.put_along_axis(one_hot, argmax, 1, axis=axis)
    return one_hot


class PluginModelTest(tu.TestResultCollector):
    def _full_exact(self, model_name, plugin_name, shape):
        print(f"{_tritonserver_ipaddr}:8000")
        triton_client = httpclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8000")

        inputs = []
        outputs = []
        inputs.append(httpclient.InferInput("INPUT0", list(shape), "FP32"))

        input0_data = np.ones(shape=shape).astype(np.float32)
        inputs[0].set_data_from_numpy(input0_data, binary_data=True)

        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=True))

        results = triton_client.infer(
            model_name + "_" + plugin_name, inputs, outputs=outputs
        )

        output0_data = results.as_numpy("OUTPUT0")
        tolerance_relative = 1e-6
        tolerance_absolute = 1e-7

        # Verify values of Hardmax, GELU, and Normalize
        if plugin_name == "CustomHardmax":
            test_output = hardmax_reference(input0_data)
            np.testing.assert_allclose(
                output0_data,
                test_output,
                rtol=tolerance_relative,
                atol=tolerance_absolute,
            )
        else:
            self.fail("Unexpected plugin: " + plugin_name)

    def test_raw_hard_max(self):
        for bs in (1, 8):
            self._full_exact(
                "plan_float32_float32_float32",
                "CustomHardmax",
                (bs, 2, 2),
            )

        self._full_exact(
            "plan_nobatch_float32_float32_float32",
            "CustomHardmax",
            (16, 1, 1),
        )


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_trt_reformat_free/test.sh
================================================
#!/bin/bash
# Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

TEST_RESULT_FILE='test_results.txt'
export CUDA_VISIBLE_DEVICES=0

CLIENT_LOG="./client.log"
TRT_TEST=trt_reformat_free_test.py

DATADIR="./models"

rm -rf ${DATADIR}
cp -r /data/inferenceserver/${REPO_VERSION}/qa_trt_format_model_repository/ ${DATADIR}

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=$DATADIR"
source ../common/util.sh

rm -f *.log*

RET=0

# TrtReformatFreeTest
CLIENT_LOG="./test_reformat_free.client.log"
SERVER_LOG="./test_reformat_free.inference_server.log"

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
python $TRT_TEST TrtReformatFreeTest >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    cat $CLIENT_LOG
    RET=1
else
    check_test_results $TEST_RESULT_FILE 6
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
else
  echo -e "\n***\n*** Test Failed\n***"
fi

exit $RET


================================================
FILE: qa/L0_trt_reformat_free/trt_reformat_free_test.py
================================================
#!/usr/bin/env python3

# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import unittest
from builtins import range

import numpy as np
import test_util as tu
import tritonclient.http as tritonhttpclient
import tritonclient.utils.shared_memory as shm
from tritonclient.utils import InferenceServerException


def div_up(a, b):
    return (a + b - 1) // b


def reformat(format, tensor_np):
    if format == "CHW2":
        factor = 2
    elif format == "CHW32":
        factor = 32
    else:
        raise ValueError(
            "Unexpected format {} for testing reformat-free input".format(format)
        )
    shape = list(tensor_np.shape) + [factor]
    shape[-4] = div_up(shape[-4], factor)
    reformatted_tensor_np = np.empty(shape, tensor_np.dtype)
    if len(tensor_np.shape) == 3:
        batch = [(tensor_np, reformatted_tensor_np)]
    elif len(tensor_np.shape) == 4:
        batch = [
            (tensor_np[idx], reformatted_tensor_np[idx])
            for idx in range(tensor_np.shape[0])
        ]
    else:
        raise ValueError(
            "Unexpected numpy shape {} for testing reformat-free input".format(
                tensor_np.shape
            )
        )
    for tensor, reformatted_tensor in batch:
        for c in range(tensor.shape[0]):
            for h in range(tensor.shape[1]):
                for w in range(tensor.shape[2]):
                    reformatted_tensor[c // factor][h][w][c % factor] = tensor[c][h][w]
    return reformatted_tensor_np


class TrtReformatFreeTest(tu.TestResultCollector):
    def add_reformat_free_data_as_shared_memory(self, name, tensor, tensor_np):
        byte_size = tensor_np.size * tensor_np.dtype.itemsize
        self.shm_handles.append(shm.create_shared_memory_region(name, name, byte_size))
        # Put data values into shared memory
        shm.set_shared_memory_region(self.shm_handles[-1], [tensor_np])
        # Register shared memory with Triton Server
        self.triton_client.register_system_shared_memory(name, name, byte_size)
        # Set the parameters to use data from shared memory
        tensor.set_shared_memory(name, byte_size)

    def setUp(self):
        self.shm_handles = []
        self.triton_client = tritonhttpclient.InferenceServerClient(
            "localhost:8000", verbose=True
        )

    def tearDown(self):
        self.triton_client.unregister_system_shared_memory()
        for handle in self.shm_handles:
            shm.destroy_shared_memory_region(handle)

    def test_nobatch_chw2_input(self):
        model_name = "plan_nobatch_CHW2_LINEAR_float16_float16_float16"
        input_np = np.arange(26, dtype=np.float16).reshape((13, 2, 1))
        expected_output0_np = input_np + input_np
        expected_output1_np = input_np - input_np
        reformatted_input_np = reformat("CHW2", input_np)

        # Use shared memory to bypass the shape check in client library, because
        # for non-linear format tensor, the data buffer is padded and thus the
        # data byte size may not match what is calculated from tensor shape
        inputs = []
        inputs.append(tritonhttpclient.InferInput("INPUT0", [13, 2, 1], "FP16"))
        self.add_reformat_free_data_as_shared_memory(
            "input0", inputs[-1], reformatted_input_np
        )
        inputs.append(tritonhttpclient.InferInput("INPUT1", [13, 2, 1], "FP16"))
        self.add_reformat_free_data_as_shared_memory(
            "input1", inputs[-1], reformatted_input_np
        )

        outputs = []
        outputs.append(
            tritonhttpclient.InferRequestedOutput("OUTPUT0", binary_data=True)
        )
        outputs.append(
            tritonhttpclient.InferRequestedOutput("OUTPUT1", binary_data=True)
        )

        results = self.triton_client.infer(
            model_name=model_name, inputs=inputs, outputs=outputs
        )
        # Validate the results by comparing with precomputed values.
        output0_np = results.as_numpy("OUTPUT0")
        output1_np = results.as_numpy("OUTPUT1")
        self.assertTrue(
            np.array_equal(output0_np, expected_output0_np),
            "OUTPUT0 expected: {}, got {}".format(expected_output0_np, output0_np),
        )
        self.assertTrue(
            np.array_equal(output1_np, expected_output1_np),
            "OUTPUT0 expected: {}, got {}".format(expected_output1_np, output1_np),
        )

    def test_wrong_nobatch_chw2_input(self):
        model_name = "plan_nobatch_CHW2_LINEAR_float16_float16_float16"
        input_np = np.arange(26, dtype=np.float16).reshape((13, 2, 1))

        # Use shared memory to bypass the shape check in client library, because
        # for non-linear format tensor, the data buffer is padded and thus the
        # data byte size may not match what is calculated from tensor shape
        inputs = []
        inputs.append(tritonhttpclient.InferInput("INPUT0", [13, 2, 1], "FP16"))
        # Send the original size input instead of the reformatted size input.
        self.add_reformat_free_data_as_shared_memory("input0", inputs[-1], input_np)

        inputs.append(tritonhttpclient.InferInput("INPUT1", [13, 2, 1], "FP16"))
        # Send the original size input instead of the reformatted size input.
        self.add_reformat_free_data_as_shared_memory("input1", inputs[-1], input_np)

        outputs = []
        outputs.append(
            tritonhttpclient.InferRequestedOutput("OUTPUT0", binary_data=True)
        )
        outputs.append(
            tritonhttpclient.InferRequestedOutput("OUTPUT1", binary_data=True)
        )

        with self.assertRaises(InferenceServerException) as e:
            self.triton_client.infer(
                model_name=model_name, inputs=inputs, outputs=outputs
            )

        err_str = str(e.exception)
        self.assertIn(
            "input byte size mismatch for input 'INPUT0' for model 'plan_nobatch_CHW2_LINEAR_float16_float16_float16'. Expected 56, got 52",
            err_str,
        )

    def test_chw2_input(self):
        model_name = "plan_CHW2_LINEAR_float16_float16_float16"
        for bs in [1, 8]:
            input_np = np.arange(26 * bs, dtype=np.float16).reshape((bs, 13, 2, 1))
            expected_output0_np = input_np + input_np
            expected_output1_np = input_np - input_np
            reformatted_input_np = reformat("CHW2", input_np)

            # Use shared memory to bypass the shape check in client library,
            # because for non-linear format tensor, the data buffer is padded
            # and thus the data byte size may not match what is calculated from
            # tensor shape
            inputs = []
            inputs.append(tritonhttpclient.InferInput("INPUT0", [bs, 13, 2, 1], "FP16"))
            self.add_reformat_free_data_as_shared_memory(
                "input0" + str(bs), inputs[-1], reformatted_input_np
            )
            inputs.append(tritonhttpclient.InferInput("INPUT1", [bs, 13, 2, 1], "FP16"))
            self.add_reformat_free_data_as_shared_memory(
                "input1" + str(bs), inputs[-1], reformatted_input_np
            )

            outputs = []
            outputs.append(
                tritonhttpclient.InferRequestedOutput("OUTPUT0", binary_data=True)
            )
            outputs.append(
                tritonhttpclient.InferRequestedOutput("OUTPUT1", binary_data=True)
            )

            results = self.triton_client.infer(
                model_name=model_name, inputs=inputs, outputs=outputs
            )
            # Validate the results by comparing with precomputed values.
            output0_np = results.as_numpy("OUTPUT0")
            output1_np = results.as_numpy("OUTPUT1")
            self.assertTrue(
                np.array_equal(output0_np, expected_output0_np),
                "OUTPUT0 expected: {}, got {}".format(expected_output0_np, output0_np),
            )
            self.assertTrue(
                np.array_equal(output1_np, expected_output1_np),
                "OUTPUT0 expected: {}, got {}".format(expected_output1_np, output1_np),
            )

    def test_wrong_chw2_input(self):
        model_name = "plan_CHW2_LINEAR_float16_float16_float16"
        for bs in [1, 8]:
            input_np = np.arange(26 * bs, dtype=np.float16).reshape((bs, 13, 2, 1))

            # Use shared memory to bypass the shape check in client library,
            # because for non-linear format tensor, the data buffer is padded
            # and thus the data byte size may not match what is calculated from
            # tensor shape
            inputs = []
            inputs.append(tritonhttpclient.InferInput("INPUT0", [bs, 13, 2, 1], "FP16"))
            # Send the original size input instead of the reformatted size input.
            self.add_reformat_free_data_as_shared_memory(
                "input0" + str(bs), inputs[-1], input_np
            )

            inputs.append(tritonhttpclient.InferInput("INPUT1", [bs, 13, 2, 1], "FP16"))
            # Send the original size input instead of the reformatted size input.
            self.add_reformat_free_data_as_shared_memory(
                "input1" + str(bs), inputs[-1], input_np
            )

            outputs = []
            outputs.append(
                tritonhttpclient.InferRequestedOutput("OUTPUT0", binary_data=True)
            )
            outputs.append(
                tritonhttpclient.InferRequestedOutput("OUTPUT1", binary_data=True)
            )

            with self.assertRaises(InferenceServerException) as e:
                self.triton_client.infer(
                    model_name=model_name, inputs=inputs, outputs=outputs
                )
            err_str = str(e.exception)
            # reformatted input size - (bs, 14, 2, 1) * size(float16)
            expected_size = bs * 28 * 2
            # original input size - (bs, 13, 2, 1) * size(float16)
            received_size = bs * 26 * 2
            self.assertIn(
                f"input byte size mismatch for input 'INPUT0' for model 'plan_CHW2_LINEAR_float16_float16_float16'. Expected {expected_size}, got {received_size}",
                err_str,
            )

    def test_nobatch_chw32_input(self):
        model_name = "plan_nobatch_CHW32_LINEAR_float32_float32_float32"
        input_np = np.arange(26, dtype=np.float32).reshape((13, 2, 1))
        expected_output0_np = input_np + input_np
        expected_output1_np = input_np - input_np
        reformatted_input_np = reformat("CHW32", input_np)

        # Use shared memory to bypass the shape check in client library, because
        # for non-linear format tensor, the data buffer is padded and thus the
        # data byte size may not match what is calculated from tensor shape
        inputs = []
        inputs.append(tritonhttpclient.InferInput("INPUT0", [13, 2, 1], "FP32"))
        self.add_reformat_free_data_as_shared_memory(
            "input0", inputs[-1], reformatted_input_np
        )
        inputs.append(tritonhttpclient.InferInput("INPUT1", [13, 2, 1], "FP32"))
        self.add_reformat_free_data_as_shared_memory(
            "input1", inputs[-1], reformatted_input_np
        )

        outputs = []
        outputs.append(
            tritonhttpclient.InferRequestedOutput("OUTPUT0", binary_data=True)
        )
        outputs.append(
            tritonhttpclient.InferRequestedOutput("OUTPUT1", binary_data=True)
        )

        results = self.triton_client.infer(
            model_name=model_name, inputs=inputs, outputs=outputs
        )
        # Validate the results by comparing with precomputed values.
        output0_np = results.as_numpy("OUTPUT0")
        output1_np = results.as_numpy("OUTPUT1")
        self.assertTrue(
            np.array_equal(output0_np, expected_output0_np),
            "OUTPUT0 expected: {}, got {}".format(expected_output0_np, output0_np),
        )
        self.assertTrue(
            np.array_equal(output1_np, expected_output1_np),
            "OUTPUT0 expected: {}, got {}".format(expected_output1_np, output1_np),
        )

    def test_chw32_input(self):
        model_name = "plan_CHW32_LINEAR_float32_float32_float32"
        for bs in [1, 8]:
            input_np = np.arange(26 * bs, dtype=np.float32).reshape((bs, 13, 2, 1))
            expected_output0_np = input_np + input_np
            expected_output1_np = input_np - input_np
            reformatted_input_np = reformat("CHW32", input_np)

            # Use shared memory to bypass the shape check in client library,
            # because for non-linear format tensor, the data buffer is padded
            # and thus the data byte size may not match what is calculated from
            # tensor shape
            inputs = []
            inputs.append(tritonhttpclient.InferInput("INPUT0", [bs, 13, 2, 1], "FP32"))
            self.add_reformat_free_data_as_shared_memory(
                "input0" + str(bs), inputs[-1], reformatted_input_np
            )
            inputs.append(tritonhttpclient.InferInput("INPUT1", [bs, 13, 2, 1], "FP32"))
            self.add_reformat_free_data_as_shared_memory(
                "input1" + str(bs), inputs[-1], reformatted_input_np
            )

            outputs = []
            outputs.append(
                tritonhttpclient.InferRequestedOutput("OUTPUT0", binary_data=True)
            )
            outputs.append(
                tritonhttpclient.InferRequestedOutput("OUTPUT1", binary_data=True)
            )

            results = self.triton_client.infer(
                model_name=model_name, inputs=inputs, outputs=outputs
            )
            # Validate the results by comparing with precomputed values.
            output0_np = results.as_numpy("OUTPUT0")
            output1_np = results.as_numpy("OUTPUT1")
            self.assertTrue(
                np.array_equal(output0_np, expected_output0_np),
                "OUTPUT0 expected: {}, got {}".format(expected_output0_np, output0_np),
            )
            self.assertTrue(
                np.array_equal(output1_np, expected_output1_np),
                "OUTPUT0 expected: {}, got {}".format(expected_output1_np, output1_np),
            )


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_trt_shape_tensors/test.sh
================================================
#!/bin/bash
# Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

TEST_RESULT_FILE='test_results.txt'
CLIENT_LOG="./client.log"
SHAPE_TENSOR_TEST=trt_shape_tensor_test.py

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1"
SERVER_LOG="./inference_server.log"
source ../common/util.sh

rm -fr  *.log
rm -fr models && mkdir models
cp -r /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/* models/.

RET=0

# Must run on a single device or else the TRITONSERVER_DELAY_SCHEDULER
# can fail when the requests are distributed to multiple devices.
export CUDA_VISIBLE_DEVICES=0

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

# python unittest seems to swallow ImportError and still return 0
# exit code. So need to explicitly check CLIENT_LOG to make sure
# we see some running tests

# Sanity tests
python $SHAPE_TENSOR_TEST InferShapeTensorTest.test_static_batch >$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi

python $SHAPE_TENSOR_TEST InferShapeTensorTest.test_nobatch >$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi

python $SHAPE_TENSOR_TEST InferShapeTensorTest.test_wrong_shape_values >$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    cat $CLIENT_LOG
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    check_test_results $TEST_RESULT_FILE 1
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
  echo -e "\n*** Sanity Test Passed*** \n"
else
  exit $RET
fi

# Prepare the config file for dynamic batching tests
for dtype in int32 int64; do
    CONFIG_FILE="models/plan_zero_1_float32_${dtype}/config.pbtxt"
    sed -i "s/^max_batch_size:.*/max_batch_size: 8/" "$CONFIG_FILE"
    sed -i "s/^version_policy:.*/version_policy: { specific { versions: [1] }}/" "$CONFIG_FILE"
    echo "dynamic_batching { preferred_batch_size: [ 2, 6 ], max_queue_delay_microseconds: 10000000 }" >>"$CONFIG_FILE"
done

for i in \
            test_dynamic_different_shape_values \
            test_dynamic_identical_shape_values; do
        SERVER_LOG="./$i.server.log"
        run_server
        if [ "$SERVER_PID" == "0" ]; then
            echo -e "\n***\n*** Failed to start $SERVER\n***"
            cat $SERVER_LOG
            exit 1
        fi

        echo "Test: $i, $model_type" >>$CLIENT_LOG

        set +e
        python $SHAPE_TENSOR_TEST InferShapeTensorTest.$i >>$CLIENT_LOG 2>&1
        if [ $? -ne 0 ]; then
            echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
            echo -e "\n***\n*** Test Failed $i\n***"
            RET=1
        else
            check_test_results $TEST_RESULT_FILE 1
            if [ $? -ne 0 ]; then
                cat $CLIENT_LOG
                echo -e "\n***\n*** Test Result Verification Failed\n***"
                RET=1
            fi
        fi
        set -e

        kill $SERVER_PID
        wait $SERVER_PID
    done

for i in \
            test_sequence_different_shape_values \
            test_sequence_identical_shape_values ; do
        export TRITONSERVER_BACKLOG_DELAY_SCHEDULER=0
        export TRITONSERVER_DELAY_SCHEDULER=12
        SERVER_LOG="./$i.server.log"
        run_server
        if [ "$SERVER_PID" == "0" ]; then
            echo -e "\n***\n*** Failed to start $SERVER\n***"
            cat $SERVER_LOG
            exit 1
        fi

        echo "Test: $i, $model_type" >>$CLIENT_LOG

        set +e
        python $SHAPE_TENSOR_TEST SequenceBatcherShapeTensorTest.$i >>$CLIENT_LOG 2>&1
        if [ $? -ne 0 ]; then
            echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
            echo -e "\n***\n*** Test Failed $i\n***"
            RET=1
        else
            check_test_results $TEST_RESULT_FILE 1
            if [ $? -ne 0 ]; then
                cat $CLIENT_LOG
                echo -e "\n***\n*** Test Result Verification Failed\n***"
                RET=1
            fi
        fi
        set -e

        unset TRITONSERVER_DELAY_SCHEDULER
        unset TRITONSERVER_BACKLOG_DELAY_SCHEDULER
        kill $SERVER_PID
        wait $SERVER_PID
    done

# Prepare the config file for dynamic sequence batching tests
for dtype in int32 int64; do
    CONFIG_FILE="models/plan_dyna_sequence_float32_${dtype}/config.pbtxt"
    sed -i "s/max_candidate_sequences:.*/max_candidate_sequences:4/" "$CONFIG_FILE"
    sed -i "s/max_queue_delay_microseconds:.*/max_queue_delay_microseconds:5000000/" "$CONFIG_FILE"
done

export NO_BATCHING=0

for i in \
    test_dynaseq_identical_shape_values_series \
    test_dynaseq_identical_shape_values_parallel \
    test_dynaseq_different_shape_values_series \
    test_dynaseq_different_shape_values_parallel \
    ;do
    SERVER_ARGS="--model-repository=`pwd`/models"
    SERVER_LOG="./$i.server.log"
    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    echo "Test: $i" >>$CLIENT_LOG

    set +e
    python $SHAPE_TENSOR_TEST DynaSequenceBatcherTest.$i >>$CLIENT_LOG 2>&1
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
        echo -e "\n***\n*** Test $i Failed\n***"
        RET=1
    else
        check_test_results $TEST_RESULT_FILE 1
        if [ $? -ne 0 ]; then
            cat $CLIENT_LOG
            echo -e "\n***\n*** Test Result Verification Failed\n***"
            RET=1
        fi
    fi
    set -e

    kill $SERVER_PID
    wait $SERVER_PID
done

if [ $RET -eq 0 ]; then
  echo -e "\n***\n*** Test Passed\n***"
fi

exit $RET


================================================
FILE: qa/L0_trt_shape_tensors/trt_shape_tensor_test.py
================================================
#!/usr/bin/env python3

# Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import os
import threading
import time
import unittest
from builtins import range

import infer_util as iu
import numpy as np
import sequence_util as su
import test_util as tu
import tritonclient.grpc as grpcclient

TEST_SYSTEM_SHARED_MEMORY = bool(int(os.environ.get("TEST_SYSTEM_SHARED_MEMORY", 0)))

_model_instances = 1
_max_queue_delay_ms = 10000
_max_sequence_idle_ms = 5000

_deferred_exceptions_lock = threading.Lock()
_deferred_exceptions = []


class InferShapeTensorTest(tu.TestResultCollector):
    def setUp(self):
        # The helper client for setup will be GRPC for simplicity.
        self.triton_client_ = grpcclient.InferenceServerClient("localhost:8001")
        global _deferred_exceptions
        _deferred_exceptions = []

    def tearDown(self):
        self.triton_client_.unregister_system_shared_memory()
        self.triton_client_.unregister_cuda_shared_memory()
        super().tearDown()

    def add_deferred_exception(self, ex):
        global _deferred_exceptions
        with _deferred_exceptions_lock:
            _deferred_exceptions.append(ex)

    def check_deferred_exception(self):
        # Just raise one of the exceptions...
        with _deferred_exceptions_lock:
            if len(_deferred_exceptions) > 0:
                raise _deferred_exceptions[0]

    def check_response(
        self,
        bs,
        thresholds,
        shape_values,
        dummy_input_shapes,
        shm_region_names=None,
        precreated_shm_regions=None,
        shm_suffix="",
        shape_tensor_input_dtype=np.int32,
    ):
        try:
            # Add batch size to shape as full shape is expected
            for i in range(len(dummy_input_shapes)):
                dummy_input_shapes[i] = [
                    bs,
                ] + dummy_input_shapes[i]
            start_ms = int(round(time.time() * 1000))

            iu.infer_shape_tensor(
                self,
                "plan",
                np.float32,
                shape_values,
                dummy_input_shapes,
                use_grpc=False,
                use_streaming=False,
                shm_suffix=shm_suffix,
                use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                batch_size=bs,
                shape_tensor_input_dtype=shape_tensor_input_dtype,
            )

            end_ms = int(round(time.time() * 1000))

            lt_ms = thresholds[0]
            gt_ms = thresholds[1]
            if lt_ms is not None:
                self.assertTrue(
                    (end_ms - start_ms) < lt_ms,
                    "expected less than "
                    + str(lt_ms)
                    + "ms response time, got "
                    + str(end_ms - start_ms)
                    + " ms",
                )
            if gt_ms is not None:
                self.assertTrue(
                    (end_ms - start_ms) > gt_ms,
                    "expected greater than "
                    + str(gt_ms)
                    + "ms response time, got "
                    + str(end_ms - start_ms)
                    + " ms",
                )
        except Exception as ex:
            self.add_deferred_exception(ex)

    def check_setup(self, model_name):
        # Make sure test.sh set up the correct batcher settings
        config = self.triton_client_.get_model_config(model_name).config
        bconfig = config.dynamic_batching
        self.assertTrue(2 in bconfig.preferred_batch_size)
        self.assertTrue(6 in bconfig.preferred_batch_size)
        self.assertEqual(
            bconfig.max_queue_delay_microseconds, _max_queue_delay_ms * 1000
        )  # 10 secs

    def check_status(self, model_name, batch_exec, exec_cnt, infer_cnt):
        # There is a time window between when responses are returned and statistics are updated.
        # To prevent intermittent test failure during that window, wait up to 10 seconds for the
        # inference statistics to be ready.
        num_tries = 10
        for i in range(num_tries):
            stats = self.triton_client_.get_inference_statistics(model_name, "1")
            self.assertEqual(len(stats.model_stats), 1, "expect 1 model stats")
            actual_exec_cnt = stats.model_stats[0].execution_count
            if actual_exec_cnt == exec_cnt:
                break
            print(
                "WARNING: expect {} executions, got {} (attempt {})".format(
                    exec_cnt, actual_exec_cnt, i
                )
            )
            time.sleep(1)

        self.assertEqual(
            stats.model_stats[0].name,
            model_name,
            "expect model stats for model {}".format(model_name),
        )
        self.assertEqual(
            stats.model_stats[0].version,
            "1",
            "expect model stats for model {} version 1".format(model_name),
        )

        if batch_exec is not None:
            batch_stats = stats.model_stats[0].batch_stats
            print(batch_stats)
            self.assertEqual(
                len(batch_stats),
                len(batch_exec),
                "expected {} different batch-sizes, got {}".format(
                    len(batch_exec), len(batch_stats)
                ),
            )

            for batch_stat in batch_stats:
                bs = batch_stat.batch_size
                bc = batch_stat.compute_infer.count
                self.assertTrue(
                    bs in batch_exec, "did not find expected batch-size {}".format(bs)
                )
                # Get count from one of the stats
                self.assertEqual(
                    bc,
                    batch_exec[bs],
                    "expected model-execution-count {} for batch size {}, got {}".format(
                        batch_exec[bs], bs, bc
                    ),
                )

        actual_exec_cnt = stats.model_stats[0].execution_count
        self.assertEqual(
            actual_exec_cnt,
            exec_cnt,
            "expected model-exec-count {}, got {}".format(exec_cnt, actual_exec_cnt),
        )

        actual_infer_cnt = stats.model_stats[0].inference_count
        self.assertEqual(
            actual_infer_cnt,
            infer_cnt,
            "expected model-inference-count {}, got {}".format(
                infer_cnt, actual_infer_cnt
            ),
        )

        actual_infer_cnt = stats.model_stats[0].inference_count
        self.assertEqual(
            actual_infer_cnt,
            infer_cnt,
            "expected model-inference-count {}, got {}".format(
                infer_cnt, actual_infer_cnt
            ),
        )

    def test_static_batch(self):
        for shape_tensor_input_dtype in [np.int32, np.int64]:
            iu.infer_shape_tensor(
                self,
                "plan",
                np.float32,
                [[32, 32]],
                [[8, 4, 4]],
                use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                batch_size=8,
                shape_tensor_input_dtype=shape_tensor_input_dtype,
            )
            iu.infer_shape_tensor(
                self,
                "plan",
                np.float32,
                [[4, 4]],
                [[8, 32, 32]],
                use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                batch_size=8,
                shape_tensor_input_dtype=shape_tensor_input_dtype,
            )
            iu.infer_shape_tensor(
                self,
                "plan",
                np.float32,
                [[4, 4]],
                [[8, 4, 4]],
                use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                batch_size=8,
                shape_tensor_input_dtype=shape_tensor_input_dtype,
            )

    def test_nobatch(self):
        for shape_tensor_input_dtype in [np.int32, np.int64]:
            iu.infer_shape_tensor(
                self,
                "plan_nobatch",
                np.float32,
                [[32, 32]],
                [[4, 4]],
                use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                shape_tensor_input_dtype=shape_tensor_input_dtype,
            )
            iu.infer_shape_tensor(
                self,
                "plan_nobatch",
                np.float32,
                [[4, 4]],
                [[32, 32]],
                use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                shape_tensor_input_dtype=shape_tensor_input_dtype,
            )
            iu.infer_shape_tensor(
                self,
                "plan_nobatch",
                np.float32,
                [[4, 4]],
                [[4, 4]],
                use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                shape_tensor_input_dtype=shape_tensor_input_dtype,
            )

    def test_wrong_shape_values(self):
        over_shape_values = [[32, 33]]
        for shape_tensor_input_dtype in [np.int32, np.int64]:
            try:
                iu.infer_shape_tensor(
                    self,
                    "plan",
                    np.float32,
                    over_shape_values,
                    [[8, 4, 4]],
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    batch_size=8,
                    shape_tensor_input_dtype=shape_tensor_input_dtype,
                )
            # InferenceServerException will be raised from different namespace,
            # use dynamic type characteristic to catch both ex
            except Exception as ex:
                self.assertIn(
                    "The shape value at index 2 is expected to be in range from 1 to 32, Got: 33",
                    ex.message(),
                )

    # Dynamic Batcher tests
    def test_dynamic_different_shape_values(self):
        # Send two requests with sum of static batch sizes ==
        # preferred size, but with different shape values. This
        # should cause the requests to not be batched. The first
        # response will come back immediately and the second
        # delayed by the max batch queue delay
        for shape_tensor_input_dtype in [np.int32, np.int64]:
            try:
                model_name = tu.get_zero_model_name("plan", 1, np.float32)
                model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name

                self.check_setup(model_name)
                self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)

                threads = []
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(3, (6000, None)),
                        kwargs={
                            "shape_values": [[2, 2]],
                            "dummy_input_shapes": [[16, 16]],
                            "shm_suffix": "{}".format(len(threads)),
                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(3, (_max_queue_delay_ms * 1.5, _max_queue_delay_ms)),
                        kwargs={
                            "shape_values": [[4, 4]],
                            "dummy_input_shapes": [[16, 16]],
                            "shm_suffix": "{}".format(len(threads)),
                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
                        },
                    )
                )
                threads[0].start()
                time.sleep(1)
                threads[1].start()
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                self.check_status(model_name, {3: 2}, 2, 6)
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))

    def test_dynamic_identical_shape_values(self):
        # Send two requests with sum of static batch sizes ==
        # preferred size, but with identical shape values. This
        # should cause the requests to get batched. Both
        # responses should come back immediately.
        for shape_tensor_input_dtype in [np.int32, np.int64]:
            try:
                model_name = tu.get_zero_model_name("plan", 1, np.float32)
                model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name

                self.check_setup(model_name)
                self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)

                threads = []
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(4, (6000, None)),
                        kwargs={
                            "shape_values": [[4, 4]],
                            "dummy_input_shapes": [[16, 16]],
                            "shm_suffix": "{}".format(len(threads)),
                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_response,
                        args=(2, (6000, None)),
                        kwargs={
                            "shape_values": [[4, 4]],
                            "dummy_input_shapes": [[16, 16]],
                            "shm_suffix": "{}".format(len(threads)),
                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
                        },
                    )
                )
                threads[0].start()
                time.sleep(1)
                threads[1].start()
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                self.check_status(model_name, {6: 1}, 1, 6)
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))


class SequenceBatcherShapeTensorTest(su.SequenceBatcherTestUtil):
    def get_expected_result(self, expected_result, value, flag_str=None):
        # Adjust the expected_result for models
        expected_result = value
        if (flag_str is not None) and ("start" in flag_str):
            expected_result += 1
        return expected_result

    def test_sequence_identical_shape_values(self):
        # Test model instances together are configured with
        # total-batch-size 4. Send four equal-length sequences
        # with identical shape values in parallel and make sure
        # they get completely batched into batch-size 4
        # inferences.
        self.clear_deferred_exceptions()
        dtype = np.float32
        for shape_tensor_input_dtype in [np.int32, np.int64]:
            try:
                model_name = tu.get_sequence_model_name("plan", dtype)
                model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name
                self.check_setup(model_name)

                # Need scheduler to wait for queue to contain all
                # inferences for both sequences.
                self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 12)
                self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
                self.assertEqual(
                    int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0
                )
                precreated_shm0_handles = self.precreate_register_shape_tensor_regions(
                    value_list=((2, 1), (4, 2), (8, 3)),
                    dtype=dtype,
                    i=0,
                    shape_tensor_input_dtype=shape_tensor_input_dtype,
                )
                precreated_shm1_handles = self.precreate_register_shape_tensor_regions(
                    value_list=((2, 11), (4, 12), (8, 13)),
                    dtype=dtype,
                    i=1,
                    shape_tensor_input_dtype=shape_tensor_input_dtype,
                )
                precreated_shm2_handles = self.precreate_register_shape_tensor_regions(
                    value_list=((2, 111), (4, 112), (8, 113)),
                    dtype=dtype,
                    i=2,
                    shape_tensor_input_dtype=shape_tensor_input_dtype,
                )
                precreated_shm3_handles = self.precreate_register_shape_tensor_regions(
                    value_list=((2, 1111), (4, 1112), (8, 1113)),
                    dtype=dtype,
                    i=3,
                    shape_tensor_input_dtype=shape_tensor_input_dtype,
                )
                threads = []
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_shape_tensor_io,
                        args=(
                            model_name,
                            dtype,
                            1001,
                            (None, None),
                            # (flag_str, shape_value, value, pre_delay_ms)
                            (
                                ("start", 2, 1, None),
                                (None, 4, 2, None),
                                ("end", 8, 3, None),
                            ),
                            self.get_expected_result(6, 3, "end"),
                            precreated_shm0_handles,
                        ),
                        kwargs={
                            "sequence_name": "{}".format(self._testMethodName),
                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_shape_tensor_io,
                        args=(
                            model_name,
                            dtype,
                            1002,
                            (None, None),
                            # (flag_str, shape_value, value, pre_delay_ms)
                            (
                                ("start", 2, 11, None),
                                (None, 4, 12, None),
                                ("end", 8, 13, None),
                            ),
                            self.get_expected_result(36, 13, "end"),
                            precreated_shm1_handles,
                        ),
                        kwargs={
                            "sequence_name": "{}".format(self._testMethodName),
                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_shape_tensor_io,
                        args=(
                            model_name,
                            dtype,
                            1003,
                            (None, None),
                            # (flag_str, shape_value, value, pre_delay_ms)
                            (
                                ("start", 2, 111, None),
                                (None, 4, 112, None),
                                ("end", 8, 113, None),
                            ),
                            self.get_expected_result(336, 113, "end"),
                            precreated_shm2_handles,
                        ),
                        kwargs={
                            "sequence_name": "{}".format(self._testMethodName),
                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_shape_tensor_io,
                        args=(
                            model_name,
                            dtype,
                            1004,
                            (None, None),
                            # (flag_str, shape_value, value, pre_delay_ms)
                            (
                                ("start", 2, 1111, None),
                                (None, 4, 1112, None),
                                ("end", 8, 1113, None),
                            ),
                            self.get_expected_result(3336, 1113, "end"),
                            precreated_shm3_handles,
                        ),
                        kwargs={
                            "sequence_name": "{}".format(self._testMethodName),
                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
                        },
                    )
                )

                for t in threads:
                    t.start()
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                self.check_status(model_name, {4: 3}, 3, 12)
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))
            finally:
                if TEST_SYSTEM_SHARED_MEMORY:
                    self.cleanup_shm_regions(precreated_shm0_handles)
                    self.cleanup_shm_regions(precreated_shm1_handles)
                    self.cleanup_shm_regions(precreated_shm2_handles)
                    self.cleanup_shm_regions(precreated_shm3_handles)

    def test_sequence_different_shape_values(self):
        # Test model instances together are configured with
        # total-batch-size 4. Send four equal-length sequences with
        # different shape values in 2 sequences and 2 sequences that
        # share the same shape value. Make sure that the 2 sequences
        # with same shapes batch together but other two sequences do
        # not.
        self.clear_deferred_exceptions()
        dtype = np.float32

        for shape_tensor_input_dtype in [np.int32, np.int64]:
            precreated_shm0_handles = self.precreate_register_shape_tensor_regions(
                value_list=((1, 1), (1, 2), (1, 3)),
                dtype=dtype,
                i=0,
                shape_tensor_input_dtype=shape_tensor_input_dtype,
            )
            precreated_shm1_handles = self.precreate_register_shape_tensor_regions(
                value_list=((32, 11), (32, 12), (32, 13)),
                dtype=dtype,
                i=1,
                shape_tensor_input_dtype=shape_tensor_input_dtype,
            )
            precreated_shm2_handles = self.precreate_register_shape_tensor_regions(
                value_list=((16, 111), (16, 112), (16, 113)),
                dtype=dtype,
                i=2,
                shape_tensor_input_dtype=shape_tensor_input_dtype,
            )
            precreated_shm3_handles = self.precreate_register_shape_tensor_regions(
                value_list=((1, 1111), (1, 1112), (1, 1113)),
                dtype=dtype,
                i=3,
                shape_tensor_input_dtype=shape_tensor_input_dtype,
            )
            try:
                model_name = tu.get_sequence_model_name("plan", dtype)
                model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name
                self.check_setup(model_name)

                # Need scheduler to wait for queue to contain all
                # inferences for both sequences.
                self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 12)
                self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
                self.assertEqual(
                    int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0
                )

                threads = []
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_shape_tensor_io,
                        args=(
                            model_name,
                            dtype,
                            1001,
                            (None, None),
                            # (flag_str, shape_value, value, pre_delay_ms)
                            (
                                ("start", 1, 1, None),
                                (None, 1, 2, None),
                                ("end", 1, 3, None),
                            ),
                            self.get_expected_result(6, 3, "end"),
                            precreated_shm0_handles,
                        ),
                        kwargs={
                            "sequence_name": "{}".format(self._testMethodName),
                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_shape_tensor_io,
                        args=(
                            model_name,
                            dtype,
                            1002,
                            (None, None),
                            # (flag_str, shape_value, value, pre_delay_ms)
                            (
                                ("start", 32, 11, None),
                                (None, 32, 12, None),
                                ("end", 32, 13, None),
                            ),
                            self.get_expected_result(36, 13, "end"),
                            precreated_shm1_handles,
                        ),
                        kwargs={
                            "sequence_name": "{}".format(self._testMethodName),
                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_shape_tensor_io,
                        args=(
                            model_name,
                            dtype,
                            1003,
                            (None, None),
                            # (flag_str, shape_value, value, pre_delay_ms)
                            (
                                ("start", 16, 111, None),
                                (None, 16, 112, None),
                                ("end", 16, 113, None),
                            ),
                            self.get_expected_result(336, 113, "end"),
                            precreated_shm2_handles,
                        ),
                        kwargs={
                            "sequence_name": "{}".format(self._testMethodName),
                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_shape_tensor_io,
                        args=(
                            model_name,
                            dtype,
                            1004,
                            (None, None),
                            # (flag_str, shape_value, value, pre_delay_ms)
                            (
                                ("start", 1, 1111, None),
                                (None, 1, 1112, None),
                                ("end", 1, 1113, None),
                            ),
                            self.get_expected_result(3336, 1113, "end"),
                            precreated_shm3_handles,
                        ),
                        kwargs={
                            "sequence_name": "{}".format(self._testMethodName),
                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
                        },
                    )
                )

                for t in threads:
                    t.start()
                    time.sleep(1)
                for t in threads:
                    t.join()

                self.check_deferred_exception()
                self.check_status(model_name, {4: 3, 3: 6}, 9, 12)
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))
            finally:
                if TEST_SYSTEM_SHARED_MEMORY:
                    self.cleanup_shm_regions(precreated_shm0_handles)
                    self.cleanup_shm_regions(precreated_shm1_handles)
                    self.cleanup_shm_regions(precreated_shm2_handles)
                    self.cleanup_shm_regions(precreated_shm3_handles)


class DynaSequenceBatcherTest(su.SequenceBatcherTestUtil):
    def get_expected_result(self, expected_result, corrid, value, flag_str=None):
        expected_result = value
        if flag_str is not None:
            if "start" in flag_str:
                expected_result += 1
            if "end" in flag_str:
                expected_result += corrid
        return expected_result

    def _multi_sequence_different_shape_impl(self, sleep_secs):
        self.clear_deferred_exceptions()
        dtype = np.float32

        for shape_tensor_input_dtype in [np.int32, np.int64]:
            precreated_shm0_handles = (
                self.precreate_register_dynaseq_shape_tensor_regions(
                    value_list=((1, 1), (12, 2), (2, 3)),
                    dtype=dtype,
                    i=0,
                    shape_tensor_input_dtype=shape_tensor_input_dtype,
                )
            )
            precreated_shm1_handles = (
                self.precreate_register_dynaseq_shape_tensor_regions(
                    value_list=((3, 11), (4, 12), (5, 13)),
                    dtype=dtype,
                    i=1,
                    shape_tensor_input_dtype=shape_tensor_input_dtype,
                )
            )
            precreated_shm2_handles = (
                self.precreate_register_dynaseq_shape_tensor_regions(
                    value_list=((6, 111), (7, 112), (8, 113)),
                    dtype=dtype,
                    i=2,
                    shape_tensor_input_dtype=shape_tensor_input_dtype,
                )
            )
            precreated_shm3_handles = (
                self.precreate_register_dynaseq_shape_tensor_regions(
                    value_list=((9, 1111), (10, 1112), (11, 1113)),
                    dtype=dtype,
                    i=3,
                    shape_tensor_input_dtype=shape_tensor_input_dtype,
                )
            )

            try:
                model_name = tu.get_dyna_sequence_model_name("plan", dtype)
                model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name
                self.check_setup(model_name)
                self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                self.assertNotIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)

                corrids = [1001, 1002, 1003, 1004]
                threads = []
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_shape_tensor_io,
                        args=(
                            model_name,
                            dtype,
                            corrids[0],
                            (None, None),
                            # (flag_str, shape_value, value, pre_delay_ms)
                            (
                                ("start", 1, 1, None),
                                (None, 12, 2, None),
                                ("end", 2, 3, None),
                            ),
                            self.get_expected_result(
                                4 + corrids[0], corrids[0], 3, "end"
                            ),
                            precreated_shm0_handles,
                        ),
                        kwargs={
                            "sequence_name": "{}_{}".format(
                                self._testMethodName, corrids[0]
                            ),
                            "using_dynamic_batcher": True,
                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_shape_tensor_io,
                        args=(
                            model_name,
                            dtype,
                            corrids[1],
                            (None, None),
                            # (flag_str, shape_value, value, pre_delay_ms)
                            (
                                ("start", 3, 11, None),
                                (None, 4, 12, None),
                                ("end", 5, 13, None),
                            ),
                            self.get_expected_result(
                                36 + corrids[1], corrids[1], 13, "end"
                            ),
                            precreated_shm1_handles,
                        ),
                        kwargs={
                            "sequence_name": "{}_{}".format(
                                self._testMethodName, corrids[1]
                            ),
                            "using_dynamic_batcher": True,
                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_shape_tensor_io,
                        args=(
                            model_name,
                            dtype,
                            corrids[2],
                            (None, None),
                            # (flag_str, shape_value, value, pre_delay_ms)
                            (
                                ("start", 6, 111, None),
                                (None, 7, 112, None),
                                ("end", 8, 113, None),
                            ),
                            self.get_expected_result(
                                336 + corrids[2], corrids[2], 113, "end"
                            ),
                            precreated_shm2_handles,
                        ),
                        kwargs={
                            "sequence_name": "{}_{}".format(
                                self._testMethodName, corrids[2]
                            ),
                            "using_dynamic_batcher": True,
                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_shape_tensor_io,
                        args=(
                            model_name,
                            dtype,
                            corrids[3],
                            (None, None),
                            # (flag_str, shape_value, value, pre_delay_ms)
                            (
                                ("start", 9, 1111, None),
                                (None, 10, 1112, None),
                                ("end", 11, 1113, None),
                            ),
                            self.get_expected_result(
                                3336 + corrids[3], corrids[3], 1113, "end"
                            ),
                            precreated_shm3_handles,
                        ),
                        kwargs={
                            "sequence_name": "{}_{}".format(
                                self._testMethodName, corrids[3]
                            ),
                            "using_dynamic_batcher": True,
                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
                        },
                    )
                )

                for t in threads:
                    t.start()
                    if sleep_secs > 0:
                        time.sleep(sleep_secs)
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                self.check_status(model_name, {1: 12}, 12, 12)
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))
            finally:
                if TEST_SYSTEM_SHARED_MEMORY:
                    self.cleanup_shm_regions(precreated_shm0_handles)
                    self.cleanup_shm_regions(precreated_shm1_handles)
                    self.cleanup_shm_regions(precreated_shm2_handles)
                    self.cleanup_shm_regions(precreated_shm3_handles)

    def _multi_sequence_identical_shape_impl(self, sleep_secs):
        self.clear_deferred_exceptions()
        dtype = np.float32

        for shape_tensor_input_dtype in [np.int32, np.int64]:
            precreated_shm0_handles = (
                self.precreate_register_dynaseq_shape_tensor_regions(
                    value_list=((2, 1), (4, 2), (8, 3)),
                    dtype=dtype,
                    i=0,
                    shape_tensor_input_dtype=shape_tensor_input_dtype,
                )
            )
            precreated_shm1_handles = (
                self.precreate_register_dynaseq_shape_tensor_regions(
                    value_list=((2, 11), (4, 12), (8, 13)),
                    dtype=dtype,
                    i=1,
                    shape_tensor_input_dtype=shape_tensor_input_dtype,
                )
            )
            precreated_shm2_handles = (
                self.precreate_register_dynaseq_shape_tensor_regions(
                    value_list=((2, 111), (4, 112), (8, 113)),
                    dtype=dtype,
                    i=2,
                    shape_tensor_input_dtype=shape_tensor_input_dtype,
                )
            )
            precreated_shm3_handles = (
                self.precreate_register_dynaseq_shape_tensor_regions(
                    value_list=((2, 1111), (4, 1112), (8, 1113)),
                    dtype=dtype,
                    i=3,
                    shape_tensor_input_dtype=shape_tensor_input_dtype,
                )
            )

            try:
                model_name = tu.get_dyna_sequence_model_name("plan", dtype)
                model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name
                self.check_setup(model_name)
                self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                self.assertNotIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)

                corrids = [1001, 1002, 1003, 1004]
                threads = []
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_shape_tensor_io,
                        args=(
                            model_name,
                            dtype,
                            corrids[0],
                            (None, None),
                            # (flag_str, shape_value, value, pre_delay_ms)
                            (
                                ("start", 2, 1, None),
                                (None, 4, 2, None),
                                ("end", 8, 3, None),
                            ),
                            self.get_expected_result(
                                4 + corrids[0], corrids[0], 3, "end"
                            ),
                            precreated_shm0_handles,
                        ),
                        kwargs={
                            "sequence_name": "{}_{}".format(
                                self._testMethodName, corrids[0]
                            ),
                            "using_dynamic_batcher": True,
                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_shape_tensor_io,
                        args=(
                            model_name,
                            dtype,
                            corrids[1],
                            (None, None),
                            # (flag_str, shape_value, value, pre_delay_ms)
                            (
                                ("start", 2, 11, None),
                                (None, 4, 12, None),
                                ("end", 8, 13, None),
                            ),
                            self.get_expected_result(
                                36 + corrids[1], corrids[1], 13, "end"
                            ),
                            precreated_shm1_handles,
                        ),
                        kwargs={
                            "sequence_name": "{}_{}".format(
                                self._testMethodName, corrids[1]
                            ),
                            "using_dynamic_batcher": True,
                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_shape_tensor_io,
                        args=(
                            model_name,
                            dtype,
                            corrids[2],
                            (None, None),
                            # (flag_str, shape_value, value, pre_delay_ms)
                            (
                                ("start", 2, 111, None),
                                (None, 4, 112, None),
                                ("end", 8, 113, None),
                            ),
                            self.get_expected_result(
                                336 + corrids[2], corrids[2], 113, "end"
                            ),
                            precreated_shm2_handles,
                        ),
                        kwargs={
                            "sequence_name": "{}_{}".format(
                                self._testMethodName, corrids[2]
                            ),
                            "using_dynamic_batcher": True,
                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
                        },
                    )
                )
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_shape_tensor_io,
                        args=(
                            model_name,
                            dtype,
                            corrids[3],
                            (None, None),
                            # (flag_str, shape_value, value, pre_delay_ms)
                            (
                                ("start", 2, 1111, None),
                                (None, 4, 1112, None),
                                ("end", 8, 1113, None),
                            ),
                            self.get_expected_result(
                                3336 + corrids[3], corrids[3], 1113, "end"
                            ),
                            precreated_shm3_handles,
                        ),
                        kwargs={
                            "sequence_name": "{}_{}".format(
                                self._testMethodName, corrids[3]
                            ),
                            "using_dynamic_batcher": True,
                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
                        },
                    )
                )

                for t in threads:
                    t.start()
                    if sleep_secs > 0:
                        time.sleep(sleep_secs)
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                self.check_status(model_name, {4: 3}, 3, 12)
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))
            finally:
                if TEST_SYSTEM_SHARED_MEMORY:
                    self.cleanup_shm_regions(precreated_shm0_handles)
                    self.cleanup_shm_regions(precreated_shm1_handles)
                    self.cleanup_shm_regions(precreated_shm2_handles)
                    self.cleanup_shm_regions(precreated_shm3_handles)

    def test_dynaseq_identical_shape_values_series(self):
        # Send four sequences with identical shape values in series
        # and make sure they get completely batched into batch-size
        # 4 inferences.
        self._multi_sequence_identical_shape_impl(1)

    def test_dynaseq_identical_shape_values_parallel(self):
        # Send four sequences with identical shape values in parallel
        # and make sure they get completely batched into batch-size
        # 4 inferences.
        self._multi_sequence_identical_shape_impl(0)

    def test_dynaseq_different_shape_values_series(self):
        # Send four sequences with different shape values in series
        # and make sure they don't get batched together.
        self._multi_sequence_different_shape_impl(1)

    def test_dynaseq_different_shape_values_parallel(self):
        # Send four sequences with different shape values in parallel
        # and make sure they don't get batched together.
        self._multi_sequence_different_shape_impl(0)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_vertex_ai/test.sh
================================================
#!/bin/bash
# Copyright 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

TEST_RESULT_FILE='test_results.txt'

export CUDA_VISIBLE_DEVICES=0

RET=0

rm -rf multi_models single_model restricted_single_model
rm -f *.log
rm -f *.out

CLIENT_TEST_SCRIPT=vertex_ai_test.py
UNIT_TEST_COUNT=8
CLIENT_LOG="./client.log"

DATADIR=/data/inferenceserver/${REPO_VERSION}
SERVER=/opt/tritonserver/bin/tritonserver
SERVER_LOG="./server.log"
source ../common/util.sh

# Set up the multi model repository with the swap and non-swap versions
mkdir multi_models && \
    cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 multi_models/addsub && \
    rm -r multi_models/addsub/2 && rm -r multi_models/addsub/3 && \
    sed -i "s/onnx_int32_int32_int32/addsub/" multi_models/addsub/config.pbtxt && \
    cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 multi_models/subadd && \
    rm -r multi_models/subadd/1 && rm -r multi_models/subadd/2 && \
    sed -i "s/onnx_int32_int32_int32/subadd/" multi_models/subadd/config.pbtxt
mkdir single_model && \
    cp -r multi_models/addsub single_model/.

# Set up single-model Python repository used by restricted API regression
mkdir -p restricted_single_model/identity_fp32/1 && \
    cp ../python_models/identity_fp32/config.pbtxt restricted_single_model/identity_fp32/ && \
    cp ../python_models/identity_fp32/model.py restricted_single_model/identity_fp32/1/

# Use Vertex AI's health endpoint to check server status
# Wait until server health endpoint shows ready. Sets WAIT_RET to 0 on
# success, 1 on failure
function vertex_ai_wait_for_server_ready() {
    local spid="$1"; shift
    local wait_time_secs="${1:-30}"; shift

    WAIT_RET=0

    ping_address="localhost:8080${AIP_HEALTH_ROUTE}"
    if [ -n "$AIP_HTTP_PORT" ]; then
        ping_address="localhost:${AIP_HTTP_PORT}${AIP_HEALTH_ROUTE}"
    fi

    local wait_secs=$wait_time_secs
    until test $wait_secs -eq 0 ; do
        if ! kill -0 $spid; then
            echo "=== Server not running."
            WAIT_RET=1
            return
        fi

        sleep 1;

        set +e
        code=`curl -s -w %{http_code} $ping_address`
        set -e
        if [ "$code" == "200" ]; then
            return
        fi

        ((wait_secs--));
    done

    echo "=== Timeout $wait_time_secs secs. Server not ready."
    WAIT_RET=1
}

# Helper function to unset all AIP variables before test
function unset_vertex_variables() {
    unset AIP_MODE
    unset AIP_HTTP_PORT
    unset AIP_HEALTH_ROUTE
    unset AIP_PREDICT_ROUTE
    unset AIP_STORAGE_URI
}

#
# Test default allow-vertex-ai
#
unset_vertex_variables

# Enable HTTP endpoint to check server readiness in the case of disabling Vertex AI
BASE_SERVER_ARGS="--allow-http true --model-repository=single_model"
export AIP_HEALTH_ROUTE="/health"
export AIP_PREDICT_ROUTE="/predict"

# Default false
SERVER_ARGS=${BASE_SERVER_ARGS}
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi
kill $SERVER_PID
wait $SERVER_PID
set +e
# Expect no message regarding Vertex AI as it is disabled
grep "failed to start Vertex AI service" $SERVER_LOG
if [ $? -eq 0 ]; then
    echo -e "\n***\n*** Failed. Expected Vertex AI service is disabled\n***"
    RET=1
fi
grep "Started Vertex AI HTTPService at" $SERVER_LOG
if [ $? -eq 0 ]; then
    echo -e "\n***\n*** Failed. Expected Vertex AI service is disabled\n***"
    RET=1
fi
set -e
# Enable
SERVER_ARGS="${BASE_SERVER_ARGS} --allow-vertex-ai=true"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi
kill $SERVER_PID
wait $SERVER_PID
set +e
grep "Started Vertex AI HTTPService at" $SERVER_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed. Expected Vertex AI service is enabled\n***"
    RET=1
fi
set -e

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi

# Default true
# Note that when default true, HTTP / GRPC endpoints will be disabled,
# check those endpoints by enabling one of them at a time and greping keywords
export AIP_MODE=PREDICTION
SERVER_ARGS="--model-repository=single_model --allow-grpc=true"
# Using nowait as 'run_server' requires HTTP endpoint enabled
run_server_nowait
sleep 10
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi
kill $SERVER_PID
wait $SERVER_PID
set +e
grep "Started Vertex AI HTTPService at" $SERVER_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed. Expected Vertex AI service is enabled\n***"
    RET=1
fi
grep "Started GRPCInferenceService at" $SERVER_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed. Expected GRPC service is enabled\n***"
    RET=1
fi
# Expect no message regarding HTTP as it is disabled
grep "failed to start HTTP service" $SERVER_LOG
if [ $? -eq 0 ]; then
    echo -e "\n***\n*** Failed. Expected HTTP service is disabled\n***"
    RET=1
fi
grep "Started HTTPService at" $SERVER_LOG
if [ $? -eq 0 ]; then
    echo -e "\n***\n*** Failed. Expected HTTP service is disabled\n***"
    RET=1
fi
set -e

# Disable
SERVER_ARGS="${BASE_SERVER_ARGS} --allow-vertex-ai=false --allow-http=true"
run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi
kill $SERVER_PID
wait $SERVER_PID
set +e
# Expect no message regarding Vertex AI as it is disabled
grep "failed to start Vertex AI service" $SERVER_LOG
if [ $? -eq 0 ]; then
    echo -e "\n***\n*** Failed. Expected Vertex AI service is disabled\n***"
    RET=1
fi
grep "Started Vertex AI HTTPService at" $SERVER_LOG
if [ $? -eq 0 ]; then
    echo -e "\n***\n*** Failed. Expected Vertex AI service is disabled\n***"
    RET=1
fi
grep "Started HTTPService at" $SERVER_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed. Expected HTTP service is enabled\n***"
    RET=1
fi
# Expect no message regarding GRPC as it is disabled
grep "failed to start GRPC service" $SERVER_LOG
if [ $? -eq 0 ]; then
    echo -e "\n***\n*** Failed. Expected GRPC service is disabled\n***"
    RET=1
fi
grep "Started GRPCInferenceService at" $SERVER_LOG
if [ $? -eq 0 ]; then
    echo -e "\n***\n*** Failed. Expected GRPC service is disabled\n***"
    RET=1
fi
set -e

#
# Test missing route
#
unset_vertex_variables
export AIP_HEALTH_ROUTE="/health"

SERVER_ARGS="--allow-vertex-ai=true --model-repository=single_model"
run_server_nowait
vertex_ai_wait_for_server_ready $SERVER_PID 10
if [ "$WAIT_RET" == "0" ]; then
    echo -e "\n***\n*** Expect failed to start $SERVER\n***"
    kill $SERVER_PID || true
    cat $SERVER_LOG
    RET=1
else
  set +e
  grep "API_PREDICT_ROUTE is not defined for Vertex AI endpoint" $SERVER_LOG
  set -e
  if [ $? -ne 0 ]; then
      echo -e "\n***\n*** Failed. Expected error on using undefined route\n***"
      RET=1
  fi
fi

unset_vertex_variables
export AIP_PREDICT_ROUTE="/predict"
run_server_nowait
vertex_ai_wait_for_server_ready $SERVER_PID 10
if [ "$WAIT_RET" == "0" ]; then
    echo -e "\n***\n*** Expect failed to start $SERVER\n***"
    kill $SERVER_PID || true
    cat $SERVER_LOG
    RET=1
else
  set +e
  grep "AIP_HEALTH_ROUTE is not defined for Vertex AI endpoint" $SERVER_LOG
  set -e
  if [ $? -ne 0 ]; then
      echo -e "\n***\n*** Failed. Expected error on using undefined route\n***"
      RET=1
  fi
fi

#
# Test endpoints
#
unset_vertex_variables
export AIP_PREDICT_ROUTE="/predict"
export AIP_HEALTH_ROUTE="/health"

SERVER_ARGS="--allow-vertex-ai=true --model-repository=single_model"
run_server_nowait
# health
vertex_ai_wait_for_server_ready $SERVER_PID 10
if [ "$WAIT_RET" != "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    kill $SERVER_PID
    wait $SERVER_PID
    cat $SERVER_LOG
    exit 1
fi

# predict (single model)
set +e
python $CLIENT_TEST_SCRIPT >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    cat $CLIENT_LOG
    RET=1
else
    check_test_results $TEST_RESULT_FILE $UNIT_TEST_COUNT
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

#
# AIP_STORAGE_URI / AIP_HTTP_PORT
#
unset_vertex_variables
export AIP_PREDICT_ROUTE="/predict"
export AIP_HEALTH_ROUTE="/health"
export AIP_STORAGE_URI=single_model
export AIP_HTTP_PORT=5234

SERVER_ARGS="--allow-vertex-ai=true"
run_server_nowait
vertex_ai_wait_for_server_ready $SERVER_PID 10
if [ "$WAIT_RET" != "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    kill $SERVER_PID
    wait $SERVER_PID
    cat $SERVER_LOG
    exit 1
fi

set +e
python $CLIENT_TEST_SCRIPT >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    cat $CLIENT_LOG
    RET=1
else
    check_test_results $TEST_RESULT_FILE $UNIT_TEST_COUNT
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

#
# default model
#
unset_vertex_variables
export AIP_MODE=PREDICTION
export AIP_PREDICT_ROUTE="/predict"
export AIP_HEALTH_ROUTE="/health"

export AIP_STORAGE_URI=single_model
SERVER_ARGS="--vertex-ai-default-model=subadd"
run_server_nowait
vertex_ai_wait_for_server_ready $SERVER_PID 10
if [ "$WAIT_RET" == "0" ]; then
    echo -e "\n***\n*** Expect failed to start $SERVER\n***"
    kill $SERVER_PID || true
    cat $SERVER_LOG
    RET=1
else
  set +e
  grep "Expect the default model 'subadd' is loaded" $SERVER_LOG
  set -e
  if [ $? -ne 0 ]; then
      echo -e "\n***\n*** Failed. Expected error on nonexistent default model\n***"
      RET=1
  fi
fi

export AIP_STORAGE_URI=multi_models
SERVER_ARGS=""
run_server_nowait
vertex_ai_wait_for_server_ready $SERVER_PID 10
if [ "$WAIT_RET" == "0" ]; then
    echo -e "\n***\n*** Expect failed to start $SERVER\n***"
    kill $SERVER_PID || true
    cat $SERVER_LOG
    RET=1
else
  set +e
  grep "Expect the model repository contains only a single model if default model is not specified" $SERVER_LOG
  set -e
  if [ $? -ne 0 ]; then
      echo -e "\n***\n*** Failed. Expected error on unspecified default model\n***"
      RET=1
  fi
fi

# Test AIP_STORAGE_URI won't be used if model repository is specified
SERVER_ARGS="--model-repository=single_model"
run_server_nowait
vertex_ai_wait_for_server_ready $SERVER_PID 10
if [ "$WAIT_RET" != "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    kill $SERVER_PID
    wait $SERVER_PID
    cat $SERVER_LOG
    exit 1
fi

set +e
# subadd should not be loaded
code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: v2/models/subadd/ready" localhost:8080/predict`
if [ "$code" == "200" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Expect 'subadd' is not loaded\n***"
    RET=1
fi
python $CLIENT_TEST_SCRIPT >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    cat $CLIENT_LOG
    RET=1
else
    check_test_results $TEST_RESULT_FILE $UNIT_TEST_COUNT
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e
kill $SERVER_PID
wait $SERVER_PID

# Test default model as well as multi model
SERVER_ARGS="--vertex-ai-default-model=addsub"
run_server_nowait
vertex_ai_wait_for_server_ready $SERVER_PID 10
if [ "$WAIT_RET" != "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    kill $SERVER_PID
    wait $SERVER_PID
    cat $SERVER_LOG
    exit 1
fi

set +e
python $CLIENT_TEST_SCRIPT >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Test Failed\n***"
    cat $CLIENT_LOG
    RET=1
else
    check_test_results $TEST_RESULT_FILE $UNIT_TEST_COUNT
    if [ $? -ne 0 ]; then
        cat $CLIENT_LOG
        echo -e "\n***\n*** Test Result Verification Failed\n***"
        RET=1
    fi
fi
set -e

# Defer the server exit to test redirection as the same time

#
# Redirect
#

# Metrics
rm -f ./curl.out
set +e
code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: metrics" localhost:8080/predict`
if [ "$code" != "200" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    grep "nv_inference_request_success" ./curl.out
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected metrics are returned\n***"
        RET=1
    fi
fi
set -e

# All Model stats
rm -f ./curl.out
set +e
code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: v2/models/stats" localhost:8080/predict`
if [ "$code" != "200" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    grep "model_stats" ./curl.out
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected model stats are returned\n***"
        RET=1
    fi
    grep "addsub" ./curl.out
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected 'addsub' model stats are returned\n***"
        RET=1
    fi
    grep "subadd" ./curl.out
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected 'subadd' model stats are returned\n***"
        RET=1
    fi
fi
set -e

# Single model stats
rm -f ./curl.out
set +e
code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: v2/models/subadd/stats" localhost:8080/predict`
if [ "$code" != "200" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    grep "model_stats" ./curl.out
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected model stats are returned\n***"
        RET=1
    fi
    grep "addsub" ./curl.out
    if [ $? -eq 0 ]; then
        echo -e "\n***\n*** Failed. Unexpected 'addsub' model stats are returned\n***"
        RET=1
    fi
    grep "subadd" ./curl.out
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected 'subadd' model stats are returned\n***"
        RET=1
    fi
fi
set -e

# Server health
rm -f ./curl.out
set +e
code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: v2/health/live" localhost:8080/predict`
if [ "$code" != "200" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi
set -e

# Model ready
rm -f ./curl.out
set +e
code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: v2/models/addsub/ready" localhost:8080/predict`
if [ "$code" != "200" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
fi
set -e

# Server metadata
rm -f ./curl.out
set +e
code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: v2" localhost:8080/predict`
if [ "$code" != "200" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    grep "extensions" ./curl.out
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected server metadata are returned\n***"
        RET=1
    fi
fi
set -e

# Model metadata
rm -f ./curl.out
set +e
code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: v2/models/addsub" localhost:8080/predict`
if [ "$code" != "200" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    grep "platform" ./curl.out
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected model metadata are returned\n***"
        RET=1
    fi
fi
set -e

# Model config
rm -f ./curl.out
set +e
code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: v2/models/addsub/config" localhost:8080/predict`
if [ "$code" != "200" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    grep "version_policy" ./curl.out
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected model configuration are returned\n***"
        RET=1
    fi
fi
set -e

# shared memory (only test "status" as register requires shared memory allocation)
rm -f ./curl.out
set +e
code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: v2/systemsharedmemory/status" localhost:8080/predict`
if [ "$code" != "200" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    grep "name" ./curl.out
    if [ $? -eq 0 ]; then
        echo -e "\n***\n*** Failed. Expected no region is registered\n***"
        RET=1
    fi
fi
set -e

# cuda shared memory (only test "status" as register requires shared memory allocation)
rm -f ./curl.out
set +e
code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: v2/cudasharedmemory/status" localhost:8080/predict`
if [ "$code" != "200" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    grep "name" ./curl.out
    if [ $? -eq 0 ]; then
        echo -e "\n***\n*** Failed. Expected no region is registered\n***"
        RET=1
    fi
fi
set -e

# repository index
rm -f ./curl.out
set +e
code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: v2/repository/index" localhost:8080/predict`
if [ "$code" != "200" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Test Failed\n***"
    RET=1
else
    grep "state" ./curl.out
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected model index are returned\n***"
        RET=1
    fi
    grep "addsub" ./curl.out
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected 'addsub' in the index\n***"
        RET=1
    fi
    grep "subadd" ./curl.out
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected 'subadd' in the index\n***"
        RET=1
    fi
fi
set -e

# repository control via redirect is blocked unconditionally
rm -f ./curl.out
set +e
code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: v2/repository/models/subadd/unload" localhost:8080/predict`
if [ "$code" != "403" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Failed. Expected model unload via redirect to return 403 (got $code)\n***"
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

#
# Restricted API regression for Vertex redirect
#
unset_vertex_variables
export AIP_PREDICT_ROUTE="/predict"
export AIP_HEALTH_ROUTE="/health"

SERVER_LOG="vertex_restricted_api_testing_server.log"
SERVER_ARGS="--log-verbose=1 --allow-vertex-ai=true \
  --model-repository=restricted_single_model \
  --vertex-ai-default-model=identity_fp32 \
  --http-restricted-api=metadata,model-config,model-repository,statistics,shared-memory:X-Vertex-Restricted=secret"

run_server_nowait
vertex_ai_wait_for_server_ready $SERVER_PID 10
if [ "$WAIT_RET" != "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    kill $SERVER_PID
    wait $SERVER_PID
    cat $SERVER_LOG
    exit 1
fi

# Baseline infer remains available without restricted header
set +e
rm -f ./curl.out
code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "Content-Type: application/json" -d'{"inputs":[{"name":"INPUT0","datatype":"FP32","shape":[1,1],"data":[42.0]}],"outputs":[{"name":"OUTPUT0"}]}' localhost:8080/predict`
if [ "$code" != "200" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Failed. Expected /predict inference succeeds without restricted header\n***"
    RET=1
else
    grep "OUTPUT0" ./curl.out
    if [ $? -ne 0 ]; then
        cat ./curl.out
        echo -e "\n***\n*** Failed. Expected inference output is returned\n***"
        RET=1
    fi
fi
set -e

# Redirected read-only APIs should be blocked without restricted header
set +e
for redirect_endpoint in \
    "v2" \
    "v2/models/identity_fp32/config" \
    "v2/repository/index" \
    "v2/systemsharedmemory/status" \
    "v2/cudasharedmemory/status"
do
    rm -f ./curl.out
    code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: ${redirect_endpoint}" localhost:8080/predict`
    if [ "$code" != "403" ]; then
        cat ./curl.out
        echo -e "\n***\n*** Failed. Expected ${redirect_endpoint} is restricted\n***"
        RET=1
    fi
done

# Mutating shared memory operations are blocked through redirect
rm -f ./curl.out
code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: v2/systemsharedmemory/region/test/register" -H "Content-Type: application/json" -d '{"key":"test_shm","byte_size":1024}' localhost:8080/predict`
if [ "$code" != "403" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Failed. Expected shared memory register is blocked via redirect\n***"
    RET=1
fi

# Model load redirect is unconditionally blocked through the prediction endpoint
rm -f ./curl.out
code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: v2/repository/models/identity_fp32/load" localhost:8080/predict`
if [ "$code" != "403" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Failed. Expected model load via redirect is blocked\n***"
    RET=1
fi

# Model unload redirect is unconditionally blocked through the prediction endpoint
rm -f ./curl.out
code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: v2/repository/models/identity_fp32/unload" localhost:8080/predict`
if [ "$code" != "403" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Failed. Expected model unload via redirect is blocked\n***"
    RET=1
fi

# Statistics redirect without restricted header should be blocked
rm -f ./curl.out
code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: v2/models/identity_fp32/stats" localhost:8080/predict`
if [ "$code" != "403" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Failed. Expected model statistics via redirect is restricted\n***"
    RET=1
fi
set -e

# Restricted header should allow read-only handler invocation
set +e
for redirect_endpoint in \
    "v2" \
    "v2/models/identity_fp32/config" \
    "v2/repository/index" \
    "v2/systemsharedmemory/status" \
    "v2/cudasharedmemory/status"
do
    rm -f ./curl.out
    code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: ${redirect_endpoint}" \
    -H "X-Vertex-Restricted: secret" localhost:8080/predict`
    if [ "$code" != "200" ]; then
        cat ./curl.out
        echo -e "\n***\n*** Failed. Expected ${redirect_endpoint} passes with restricted header\n***"
        RET=1
    fi
done

# Mutating shared memory operations remain blocked even with valid header
rm -f ./curl.out
code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: v2/systemsharedmemory/region/test/register" -H "X-Vertex-Restricted: secret" -H "Content-Type: application/json" -d '{"key":"test_shm","byte_size":1024}' localhost:8080/predict`
if [ "$code" != "403" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Failed. Expected shared memory register is blocked via redirect even with valid header\n***"
    RET=1
fi

# Model load remains blocked even with valid restricted header
rm -f ./curl.out
code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: v2/repository/models/identity_fp32/load" -H "X-Vertex-Restricted: secret" localhost:8080/predict`
if [ "$code" != "403" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Failed. Expected model load is blocked via redirect even with valid header\n***"
    RET=1
fi

# Model unload remains blocked even with valid restricted header
rm -f ./curl.out
code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: v2/repository/models/identity_fp32/unload" -H "X-Vertex-Restricted: secret" localhost:8080/predict`
if [ "$code" != "403" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Failed. Expected model unload is blocked via redirect even with valid header\n***"
    RET=1
fi

# Statistics redirect with restricted header should pass
rm -f ./curl.out
code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: v2/models/identity_fp32/stats" -H "X-Vertex-Restricted: secret" localhost:8080/predict`
if [ "$code" == "403" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Failed. Expected model statistics passes with restricted header\n***"
    RET=1
fi

# Wrong restricted header should reject the request
set +e
for redirect_endpoint in \
    "v2" \
    "v2/models/identity_fp32/config" \
    "v2/repository/index" \
    "v2/systemsharedmemory/status" \
    "v2/cudasharedmemory/status" \
    "v2/systemsharedmemory/region/test/register"
do
    rm -f ./curl.out
    if [ "$redirect_endpoint" != "v2/systemsharedmemory/region/test/register" ]; then
       code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: ${redirect_endpoint}" \
       -H "X-Vertex-Restricted: invalid" localhost:8080/predict`
    else
        code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: ${redirect_endpoint}" \
        -H "X-Vertex-Restricted: invalid" -H "Content-Type: application/json" -d '{"key":"test_shm","byte_size":1024}' localhost:8080/predict`
    fi
    if [ "$code" != "403" ]; then
        cat ./curl.out
        echo -e "\n***\n*** Failed. Expected invalid restricted header value is rejected for ${redirect_endpoint}\n***"
        RET=1
    fi
done
set -e

kill $SERVER_PID
wait $SERVER_PID

#
# HTTP max input size enforcement on Vertex AI endpoint
#
unset_vertex_variables
export AIP_PREDICT_ROUTE="/predict"
export AIP_HEALTH_ROUTE="/health"

SERVER_LOG="vertex_max_input_size_server.log"
SERVER_ARGS="--allow-vertex-ai=true \
  --model-repository=restricted_single_model \
  --vertex-ai-default-model=identity_fp32 \
  --http-max-input-size=128"
run_server_nowait
vertex_ai_wait_for_server_ready $SERVER_PID 10
if [ "$WAIT_RET" != "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    kill $SERVER_PID
    wait $SERVER_PID
    cat $SERVER_LOG
    exit 1
fi

set +e

# Small payload under 128 bytes should succeed
rm -f ./curl.out
code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "Content-Type: application/json" -d'{"inputs":[{"name":"INPUT0","datatype":"FP32","shape":[1,1],"data":[1.0]}],"outputs":[{"name":"OUTPUT0"}]}' localhost:8080/predict`
if [ "$code" != "200" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Failed. Expected small payload to succeed on Vertex AI endpoint (got $code)\n***"
    RET=1
fi

# Large payload over 128 bytes should be rejected
rm -f ./curl.out
LARGE_PAYLOAD='{"inputs":[{"name":"INPUT0","datatype":"FP32","shape":[1,16],"data":[1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,13.0,14.0,15.0,16.0]}],"outputs":[{"name":"OUTPUT0"}]}'
code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "Content-Type: application/json" -d"$LARGE_PAYLOAD" localhost:8080/predict`
if [ "$code" == "200" ]; then
    cat ./curl.out
    echo -e "\n***\n*** Failed. Expected oversized payload to be rejected on Vertex AI endpoint\n***"
    RET=1
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
    echo -e "\n***\n*** Test Passed\n***"
else
    echo -e "\n***\n*** Test FAILED\n***"
fi
exit $RET


================================================
FILE: qa/L0_vertex_ai/vertex_ai_test.py
================================================
#!/usr/bin/python
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import os
import sys
import unittest

import numpy as np
import requests
import test_util as tu
import tritonclient.http as httpclient


class VertexAiTest(tu.TestResultCollector):
    def setUp(self):
        port = os.getenv("AIP_HTTP_PORT", "8080")
        predict_endpoint = os.getenv("AIP_PREDICT_ROUTE", "/predict")
        self.model_ = os.getenv("TEST_EXPLICIT_MODEL_NAME", "addsub")
        self.url_ = "http://localhost:{}{}".format(port, predict_endpoint)
        self.input_data_ = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
        self.expected_output0_data_ = [x * 2 for x in self.input_data_]
        self.expected_output1_data_ = [0 for x in self.input_data_]

    def test_predict(self):
        inputs = []
        outputs = []
        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))

        # Initialize the data
        input_data = np.array(self.input_data_, dtype=np.int32)
        input_data = np.expand_dims(input_data, axis=0)
        inputs[0].set_data_from_numpy(input_data, binary_data=False)
        inputs[1].set_data_from_numpy(input_data, binary_data=False)

        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
        request_body, _ = httpclient.InferenceServerClient.generate_request_body(
            inputs, outputs=outputs
        )

        headers = {"Content-Type": "application/json"}
        r = requests.post(self.url_, data=request_body, headers=headers)
        r.raise_for_status()

        result = httpclient.InferenceServerClient.parse_response_body(r._content)

        output0_data = result.as_numpy("OUTPUT0")
        output1_data = result.as_numpy("OUTPUT1")
        for i in range(16):
            self.assertEqual(output0_data[0][i], self.expected_output0_data_[i])
            self.assertEqual(output1_data[0][i], self.expected_output1_data_[i])

    def test_predict_specified_model(self):
        inputs = []
        outputs = []
        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))

        # Initialize the data
        input_data = np.array(self.input_data_, dtype=np.int32)
        input_data = np.expand_dims(input_data, axis=0)
        inputs[0].set_data_from_numpy(input_data, binary_data=False)
        inputs[1].set_data_from_numpy(input_data, binary_data=False)

        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
        request_body, _ = httpclient.InferenceServerClient.generate_request_body(
            inputs, outputs=outputs
        )

        headers = {
            "Content-Type": "application/json",
            "X-Vertex-Ai-Triton-Redirect": "v2/models/{}/infer".format(self.model_),
        }
        r = requests.post(self.url_, data=request_body, headers=headers)
        r.raise_for_status()

        result = httpclient.InferenceServerClient.parse_response_body(r._content)

        output0_data = result.as_numpy("OUTPUT0")
        output1_data = result.as_numpy("OUTPUT1")
        if self.model_ == "addsub":
            expected_output0_data = [x * 2 for x in self.input_data_]
            expected_output1_data = [0 for x in self.input_data_]
        else:
            expected_output0_data = [0 for x in self.input_data_]
            expected_output1_data = [x * 2 for x in self.input_data_]
        for i in range(16):
            self.assertEqual(output0_data[0][i], expected_output0_data[i])
            self.assertEqual(output1_data[0][i], expected_output1_data[i])

    def test_predict_request_binary(self):
        inputs = []
        outputs = []
        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))

        # Initialize the data
        input_data = np.array(self.input_data_, dtype=np.int32)
        input_data = np.expand_dims(input_data, axis=0)
        inputs[0].set_data_from_numpy(input_data, binary_data=True)
        inputs[1].set_data_from_numpy(input_data, binary_data=False)

        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
        (
            request_body,
            header_length,
        ) = httpclient.InferenceServerClient.generate_request_body(
            inputs, outputs=outputs
        )

        headers = {
            "Content-Type": "application/vnd.vertex-ai-triton.binary+json;json-header-size={}".format(
                header_length
            )
        }
        r = requests.post(self.url_, data=request_body, headers=headers)
        r.raise_for_status()

        result = httpclient.InferenceServerClient.parse_response_body(r._content)
        output0_data = result.as_numpy("OUTPUT0")
        output1_data = result.as_numpy("OUTPUT1")
        for i in range(16):
            self.assertEqual(output0_data[0][i], self.expected_output0_data_[i])
            self.assertEqual(output1_data[0][i], self.expected_output1_data_[i])

    def test_predict_response_binary(self):
        inputs = []
        outputs = []
        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))

        # Initialize the data
        input_data = np.array(self.input_data_, dtype=np.int32)
        input_data = np.expand_dims(input_data, axis=0)
        inputs[0].set_data_from_numpy(input_data, binary_data=False)
        inputs[1].set_data_from_numpy(input_data, binary_data=False)

        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=True))
        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
        request_body, _ = httpclient.InferenceServerClient.generate_request_body(
            inputs, outputs=outputs
        )

        headers = {"Content-Type": "application/json"}
        r = requests.post(self.url_, data=request_body, headers=headers)
        r.raise_for_status()

        header_length_str = r.headers["Inference-Header-Content-Length"]
        result = httpclient.InferenceServerClient.parse_response_body(
            r._content, header_length=int(header_length_str)
        )

        output0_data = result.as_numpy("OUTPUT0")
        output1_data = result.as_numpy("OUTPUT1")
        for i in range(16):
            self.assertEqual(output0_data[0][i], self.expected_output0_data_[i])
            self.assertEqual(output1_data[0][i], self.expected_output1_data_[i])

    def test_malformed_binary_header(self):
        inputs = []
        outputs = []
        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))

        # Initialize the data
        input_data = np.array(self.input_data_, dtype=np.int32)
        input_data = np.expand_dims(input_data, axis=0)
        inputs[0].set_data_from_numpy(input_data, binary_data=True)
        inputs[1].set_data_from_numpy(input_data, binary_data=False)

        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
        (
            request_body,
            header_length,
        ) = httpclient.InferenceServerClient.generate_request_body(
            inputs, outputs=outputs
        )

        headers = {
            "Content-Type": "additional-string/application/vnd.vertex-ai-triton.binary+json;json-header-size={}".format(
                header_length
            )
        }
        r = requests.post(self.url_, data=request_body, headers=headers)
        self.assertEqual(
            400,
            r.status_code,
            "Expected error code {} returned for the request; got: {}".format(
                400, r.status_code
            ),
        )

    def test_malformed_binary_header_not_number(self):
        inputs = []
        outputs = []
        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))

        # Initialize the data
        input_data = np.array(self.input_data_, dtype=np.int32)
        input_data = np.expand_dims(input_data, axis=0)
        inputs[0].set_data_from_numpy(input_data, binary_data=True)
        inputs[1].set_data_from_numpy(input_data, binary_data=False)

        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
        (
            request_body,
            header_length,
        ) = httpclient.InferenceServerClient.generate_request_body(
            inputs, outputs=outputs
        )

        headers = {
            "Content-Type": "application/vnd.vertex-ai-triton.binary+json;json-header-size=additional-string{}".format(
                header_length
            )
        }
        r = requests.post(self.url_, data=request_body, headers=headers)
        self.assertEqual(
            400,
            r.status_code,
            "Expected error code {} returned for the request; got: {}".format(
                400, r.status_code
            ),
        )

    def test_malformed_binary_header_negative_number(self):
        inputs = []
        outputs = []
        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))

        # Initialize the data
        input_data = np.array(self.input_data_, dtype=np.int32)
        input_data = np.expand_dims(input_data, axis=0)
        inputs[0].set_data_from_numpy(input_data, binary_data=True)
        inputs[1].set_data_from_numpy(input_data, binary_data=False)

        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
        (
            request_body,
            header_length,
        ) = httpclient.InferenceServerClient.generate_request_body(
            inputs, outputs=outputs
        )

        headers = {
            "Content-Type": "application/vnd.vertex-ai-triton.binary+json;json-header-size=-123"
        }
        r = requests.post(self.url_, data=request_body, headers=headers)
        self.assertEqual(
            400,
            r.status_code,
            "Expected error code {} returned for the request; got: {}".format(
                400, r.status_code
            ),
        )

    def test_malformed_binary_header_large_number(self):
        inputs = []
        outputs = []
        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))

        # Initialize the data
        input_data = np.array(self.input_data_, dtype=np.int32)
        input_data = np.expand_dims(input_data, axis=0)
        inputs[0].set_data_from_numpy(input_data, binary_data=True)
        inputs[1].set_data_from_numpy(input_data, binary_data=False)

        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
        (
            request_body,
            header_length,
        ) = httpclient.InferenceServerClient.generate_request_body(
            inputs, outputs=outputs
        )

        headers = {
            "Content-Type": "application/vnd.vertex-ai-triton.binary+json;json-header-size=12345"
        }
        r = requests.post(self.url_, data=request_body, headers=headers)
        self.assertEqual(
            400,
            r.status_code,
            "Expected error code {} returned for the request; got: {}".format(
                400, r.status_code
            ),
        )


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/L0_warmup/decoupled/1/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    """Test model that always returns 0 response for all requests."""

    def execute(self, requests):
        for request in requests:
            request.get_response_sender().send(
                flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
            )

        return None


================================================
FILE: qa/L0_warmup/decoupled/config.pbtxt
================================================
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "decoupled"
backend: "python"
input [
  {
    name: "INPUT"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
instance_group [{ kind: KIND_CPU }]
model_warmup [
{
    name : "decoupled sample"
    batch_size: 1
    inputs {
        key: "INPUT"
        value: {
            data_type: TYPE_FP32
            dims: 4
            zero_data: true
        }
    }
}]
model_transaction_policy {
  decoupled: True
}

================================================
FILE: qa/L0_warmup/failing_infer/1/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    """Test model that always returns error for all requests."""

    def execute(self, requests):
        responses = []

        for _ in requests:
            responses.append(
                pb_utils.InferenceResponse(
                    output_tensors=[], error=pb_utils.TritonError("An Error Occurred")
                )
            )

        # You must return a list of pb_utils.InferenceResponse. Length
        # of this list must match the length of `requests` list.
        return responses


================================================
FILE: qa/L0_warmup/failing_infer/config.pbtxt
================================================
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "failing_infer"
backend: "python"
input [
  {
    name: "INPUT"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
instance_group [{ kind: KIND_CPU }]
model_warmup [
{
    name : "zero sample"
    batch_size: 1
    inputs {
        key: "INPUT"
        value: {
            data_type: TYPE_FP32
            dims: 4
            zero_data: true
        }
    }
}]


================================================
FILE: qa/L0_warmup/test.sh
================================================
#!/bin/bash
# Copyright 2019-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

CLIENT=../clients/image_client
CLIENT_LOG="./client.log"
CLIENT_PY=./test_infer_shm_leak.py
EXPECTED_NUM_TESTS="1"
TEST_RESULT_FILE='test_results.txt'

IMAGE="../images/vulture.jpeg"

DATADIR=`pwd`/models

# If BACKENDS not specified, set to all
BACKENDS=${BACKENDS:="onnx libtorch plan"}

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=$DATADIR --log-verbose=1 --exit-timeout-secs=120"
SERVER_LOG="./inference_server.log"
source ../common/util.sh

RET=0
rm -fr *.txt

for BACKEND in ${BACKENDS}; do
    rm -f $SERVER_LOG $CLIENT_LOG
    # Test for fixed-size data type
    # Use the addsub models as example.
    rm -fr models && mkdir models
    cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/${BACKEND}_float32_float32_float32 models/. && \
    cp -r /data/inferenceserver/${REPO_VERSION}/qa_sequence_model_repository/${BACKEND}_sequence_int32 models/.

    INPUT_PREFIX="INPUT"
    IDENTITY_INPUT_PREFIX="INPUT" && [ "$BACKEND" == "libtorch" ] && IDENTITY_INPUT_PREFIX="INPUT__"
    SEQ_INPUT="INPUT" && [ "$BACKEND" == "libtorch" ] && SEQ_INPUT="INPUT__0"
    START="START" && [ "$BACKEND" == "libtorch" ] && START="START__1"
    READY="READY" && [ "$BACKEND" == "libtorch" ] && READY="READY__2"

    # 2 instances per device with random / zero data.
    # The zero data sample will run twice
    #
    # Provide warmup instruction (batch size 1) in model config
    (cd models/${BACKEND}_float32_float32_float32 && \
        echo "model_warmup [{" >> config.pbtxt && \
        echo "    name : \"regular sample\"" >> config.pbtxt && \
        echo "    batch_size: 1" >> config.pbtxt && \
        echo "    inputs {" >> config.pbtxt && \
        echo "        key: \"${INPUT_PREFIX}0\"" >> config.pbtxt && \
        echo "        value: {" >> config.pbtxt && \
        echo "            data_type: TYPE_FP32" >> config.pbtxt && \
        echo "            dims: 16" >> config.pbtxt && \
        echo "            zero_data: true" >> config.pbtxt && \
        echo "        }" >> config.pbtxt && \
        echo "    }" >> config.pbtxt && \
        echo "    inputs {" >> config.pbtxt && \
        echo "        key: \"${INPUT_PREFIX}1\"" >> config.pbtxt && \
        echo "        value: {" >> config.pbtxt && \
        echo "            data_type: TYPE_FP32" >> config.pbtxt && \
        echo "            dims: 16" >> config.pbtxt && \
        echo "            random_data: true" >> config.pbtxt && \
        echo "        }" >> config.pbtxt && \
        echo "    }" >> config.pbtxt && \
        echo "}]" >> config.pbtxt )

    # zero data. For realistic sequence model, 'count' may not work
    # well because the model will expect a valid sequence of requests which
    # should be represented by a series of warmup samples. 'count > 1'
    # essentially "resends" one of the sample, which may invalidate the
    # sequence. This is okay for this specific test because the synthetic model
    # is not data sensitive.
    #
    # Instruction for sequence model (batch size 8), need to specify control tensor
    (cd models/${BACKEND}_sequence_int32 && \
        echo "model_warmup [{" >> config.pbtxt && \
        echo "    name : \"sequence sample\"" >> config.pbtxt && \
        echo "    count : 2" >> config.pbtxt && \
        echo "    batch_size: 8" >> config.pbtxt && \
        echo "    inputs {" >> config.pbtxt && \
        echo "        key: \"${SEQ_INPUT}\"" >> config.pbtxt && \
        echo "        value: {" >> config.pbtxt && \
        echo "            data_type: TYPE_INT32" >> config.pbtxt && \
        echo "            dims: 1" >> config.pbtxt && \
        echo "            zero_data: true" >> config.pbtxt && \
        echo "        }" >> config.pbtxt && \
        echo "    }" >> config.pbtxt && \
        echo "    inputs {" >> config.pbtxt && \
        echo "        key: \"${START}\"" >> config.pbtxt && \
        echo "        value: {" >> config.pbtxt && \
        echo "            data_type: TYPE_INT32" >> config.pbtxt && \
        echo "            dims: 1" >> config.pbtxt && \
        echo "            zero_data: true" >> config.pbtxt && \
        echo "        }" >> config.pbtxt && \
        echo "    }" >> config.pbtxt && \
        echo "    inputs {" >> config.pbtxt && \
        echo "        key: \"${READY}\"" >> config.pbtxt && \
        echo "        value: {" >> config.pbtxt && \
        echo "            data_type: TYPE_INT32" >> config.pbtxt && \
        echo "            dims: 1" >> config.pbtxt && \
        echo "            zero_data: true" >> config.pbtxt && \
        echo "        }" >> config.pbtxt && \
        echo "    }" >> config.pbtxt && \
        echo "}]" >> config.pbtxt )

    run_server
    if [ "$SERVER_PID" == "0" ]; then
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi

    set +e

    grep "is running warmup sample 'regular sample'" $SERVER_LOG
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected warmup for stateless model\n***"
        RET=1
    fi
    grep "is running warmup sample 'sequence sample' for iteration 1" $SERVER_LOG
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected 1st warmup iteration for stateful model\n***"
        RET=1
    fi
    grep "is running warmup sample 'sequence sample' for iteration 2" $SERVER_LOG
    if [ $? -ne 0 ]; then
        echo -e "\n***\n*** Failed. Expected 2nd warmup iteration for stateful model\n***"
        RET=1
    fi
    grep "failed to run warmup" $SERVER_LOG
    if [ $? -eq 0 ]; then
        echo -e "\n***\n*** Failed. Expected no warmup error\n***"
        RET=1
    fi

    set -e

    kill $SERVER_PID
    wait $SERVER_PID

    # Test for variable-size data type (string)
    rm -fr models && mkdir models
    SUPPORT_STRING=0 && ([[ $BACKEND == "onnx" ]]) && SUPPORT_STRING=1
    if [ "$SUPPORT_STRING" == "1" ] ; then
        cp -r /data/inferenceserver/${REPO_VERSION}/qa_sequence_model_repository/${BACKEND}_sequence_object models/.
        cp -r /data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository/${BACKEND}_zero_1_object models/.

        # random and zero data (two samples)
        #
        # Provide warmup instruction (batch size 1) in model config
        (cd models/${BACKEND}_zero_1_object && \
            echo "model_warmup [" >> config.pbtxt && \
            echo "{" >> config.pbtxt && \
            echo "    name : \"zero string stateless\"" >> config.pbtxt && \
            echo "    batch_size: 1" >> config.pbtxt && \
            echo "    inputs {" >> config.pbtxt && \
            echo "        key: \"${IDENTITY_INPUT_PREFIX}0\"" >> config.pbtxt && \
            echo "        value: {" >> config.pbtxt && \
            echo "            data_type: TYPE_STRING" >> config.pbtxt && \
            echo "            dims: 16" >> config.pbtxt && \
            echo "            zero_data: true" >> config.pbtxt && \
            echo "        }" >> config.pbtxt && \
            echo "    }" >> config.pbtxt && \
            echo "}," >> config.pbtxt && \
            echo "{" >> config.pbtxt && \
            echo "    name : \"random string stateless\"" >> config.pbtxt && \
            echo "    batch_size: 1" >> config.pbtxt && \
            echo "    inputs {" >> config.pbtxt && \
            echo "        key: \"${IDENTITY_INPUT_PREFIX}0\"" >> config.pbtxt && \
            echo "        value: {" >> config.pbtxt && \
            echo "            data_type: TYPE_STRING" >> config.pbtxt && \
            echo "            dims: 16" >> config.pbtxt && \
            echo "            random_data: true" >> config.pbtxt && \
            echo "        }" >> config.pbtxt && \
            echo "    }" >> config.pbtxt && \
            echo "}" >> config.pbtxt && \
            echo "]" >> config.pbtxt )

        # user provided data
        #
        # Instruction for sequence model (batch size 8), need to specify control tensor
        (cd models/${BACKEND}_sequence_object && \
            echo "model_warmup [{" >> config.pbtxt && \
            echo "    name : \"string statefull\"" >> config.pbtxt && \
            echo "    batch_size: 8" >> config.pbtxt && \
            echo "    inputs {" >> config.pbtxt && \
            echo "        key: \"${SEQ_INPUT}\"" >> config.pbtxt && \
            echo "        value: {" >> config.pbtxt && \
            echo "            data_type: TYPE_STRING" >> config.pbtxt && \
            echo "            dims: 1" >> config.pbtxt && \
            echo "            input_data_file: \"raw_string_data\"" >> config.pbtxt && \
            echo "        }" >> config.pbtxt && \
            echo "    }" >> config.pbtxt && \
            echo "    inputs {" >> config.pbtxt && \
            echo "        key: \"${START}\"" >> config.pbtxt && \
            echo "        value: {" >> config.pbtxt && \
            echo "            data_type: TYPE_INT32" >> config.pbtxt && \
            echo "            dims: 1" >> config.pbtxt && \
            echo "            zero_data: true" >> config.pbtxt && \
            echo "        }" >> config.pbtxt && \
            echo "    }" >> config.pbtxt && \
            echo "    inputs {" >> config.pbtxt && \
            echo "        key: \"${READY}\"" >> config.pbtxt && \
            echo "        value: {" >> config.pbtxt && \
            echo "            data_type: TYPE_INT32" >> config.pbtxt && \
            echo "            dims: 1" >> config.pbtxt && \
            echo "            zero_data: true" >> config.pbtxt && \
            echo "        }" >> config.pbtxt && \
            echo "    }" >> config.pbtxt && \
            echo "}]" >> config.pbtxt )

        # Prepare string data (one element that is "233")
        mkdir -p models/${BACKEND}_sequence_object/warmup && \
            (cd models/${BACKEND}_sequence_object/warmup && \
                    echo -n -e '\x03\x00\x00\x00\x32\x33\x33' > raw_string_data)

        run_server
        if [ "$SERVER_PID" == "0" ]; then
            echo -e "\n***\n*** Failed to start $SERVER\n***"
            cat $SERVER_LOG
            exit 1
        fi

        set +e

        grep "is running warmup sample 'zero string stateless'" $SERVER_LOG
        if [ $? -ne 0 ]; then
            echo -e "\n***\n*** Failed. Expected warmup for zero string stateless model\n***"
            RET=1
        fi
        grep "is running warmup sample 'random string stateless'" $SERVER_LOG
        if [ $? -ne 0 ]; then
            echo -e "\n***\n*** Failed. Expected warmup for random string stateless model\n***"
            RET=1
        fi
        grep "is running warmup sample 'string statefull'" $SERVER_LOG
        if [ $? -ne 0 ]; then
            echo -e "\n***\n*** Failed. Expected warmup for string stateful model\n***"
            RET=1
        fi
        grep "failed to run warmup" $SERVER_LOG
        if [ $? -eq 0 ]; then
            echo -e "\n***\n*** Failed. Expected no warmup error\n***"
            RET=1
        fi

        set -e

        kill $SERVER_PID
        wait $SERVER_PID
    fi

    # FIXME: This section of code doesn't check if the warmup model
    # is faster than the fresh model. Thus we are not losing any coverage
    # by commenting it out. The functionality of the warmup  methods are
    # covered by other parts of this test which will fail if the *functionality*
    # breaks.
    # if [ "$BACKEND" == "graphdef" ]; then
    #     # Show effect of warmup by using a TF model with TF-TRT optimization which is
    #     # known to be slow on first inference.
    #     # Note: model can be obatined via the fetching script in docs/example
    #     rm -fr models && \
    #         mkdir models && \
    #         cp -r /data/inferenceserver/${REPO_VERSION}/tf_model_store/inception_v3_graphdef models/.

    #     # Enable TF-TRT optimization
    #     (cd models/inception_v3_graphdef && \
    #         echo "optimization { execution_accelerators { gpu_execution_accelerator : [ { name : \"tensorrt\"} ] } }" >> config.pbtxt)

    #     # Duplicate the same model with warmup enabled
    #     cp -r models/inception_v3_graphdef models/inception_v3_warmup &&
    #         (cd models/inception_v3_warmup && \
    #             sed -i 's/inception_v3_graphdef/inception_v3_warmup/' config.pbtxt)

    #     (cd models/inception_v3_warmup && \
    #         echo 'model_warmup [{' >> config.pbtxt && \
    #         echo '    name : "image sample"' >> config.pbtxt && \
    #         echo '    batch_size: 1' >> config.pbtxt && \
    #         echo '    inputs {' >> config.pbtxt && \
    #         echo '        key: "input"' >> config.pbtxt && \
    #         echo '        value: {' >> config.pbtxt && \
    #         echo '            data_type: TYPE_FP32' >> config.pbtxt && \
    #         echo '            dims: [ 299, 299, 3 ]' >> config.pbtxt && \
    #         echo '            input_data_file: "raw_mug_data"' >> config.pbtxt && \
    #         echo '        }' >> config.pbtxt && \
    #         echo '    }' >> config.pbtxt && \
    #         echo '}]' >> config.pbtxt )

    #     # prepare provided data instead of synthetic one
    #     mkdir -p models/inception_v3_warmup/warmup && \
    #         cp raw_mug_data models/inception_v3_warmup/warmup/.

    #     run_server
    #     if [ "$SERVER_PID" == "0" ]; then
    #         echo -e "\n***\n*** Failed to start $SERVER\n***"
    #         cat $SERVER_LOG
    #         exit 1
    #     fi

    #     set +e

    #     grep "is running warmup sample 'image sample'" $SERVER_LOG
    #     if [ $? -ne 0 ]; then
    #         echo -e "\n***\n*** Failed. Expected warmup for image model\n***"
    #         RET=1
    #     fi
    #     grep "failed to run warmup" $SERVER_LOG
    #     if [ $? -eq 0 ]; then
    #         echo -e "\n***\n*** Failed. Expected no warmup error\n***"
    #         RET=1
    #     fi

    #     # Time the first inference for both models
    #     time $CLIENT -m inception_v3_graphdef -s INCEPTION $IMAGE -i grpc -u localhost:8001 >>$CLIENT_LOG 2>&1
    #     if [ $? -ne 0 ]; then
    #         echo -e "\n***\n*** Test Failed\n***"
    #         cat $CLIENT_LOG
    #         RET=1
    #     fi
    #     time $CLIENT -m inception_v3_warmup -s INCEPTION $IMAGE -i grpc -u localhost:8001 >>$CLIENT_LOG 2>&1
    #     if [ $? -ne 0 ]; then
    #         echo -e "\n***\n*** Test Failed\n***"
    #         cat $CLIENT_LOG
    #         RET=1
    #     fi

    #     set -e

    #     kill $SERVER_PID
    #     wait $SERVER_PID
    # fi
done

# Test warmup sample failure
rm -fr models && \
    mkdir models && \
    cp -r failing_infer models/.

run_server
if [ "$SERVER_PID" != "0" ]; then
    echo -e "\n***\n*** Expect fail to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
grep "failed to run warmup sample 'zero sample': An Error Occurred;" $SERVER_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed. Expected warmup error\n***"
    cat $SERVER_LOG
    RET=1
fi
set -e

# Test decoupled model
rm -fr models && \
    mkdir models && \
    cp -r decoupled models/.

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e
grep "is running warmup sample 'decoupled sample'" $SERVER_LOG
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** Failed. Expected warmup for decoupled model\n***"
    RET=1
fi
grep "failed to run warmup" $SERVER_LOG
if [ $? -eq 0 ]; then
    echo -e "\n***\n*** Failed. Expected no warmup error\n***"
    RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

# Test the onnx model to verify that the memory type of the output tensor
# remains unchanged with the warmup setting
pip3 uninstall -y torch
pip3 install torch -f https://download.pytorch.org/whl/cu130

rm -fr models && mkdir models
cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/onnx_nobatch_float32_float32_float32 models/.
(cd models/onnx_nobatch_float32_float32_float32 && \
            echo "" >> config.pbtxt && \
            echo 'instance_group [{' >> config.pbtxt && \
            echo '    kind : KIND_GPU' >> config.pbtxt && \
            echo '}]' >> config.pbtxt && \
            echo 'model_warmup [{' >> config.pbtxt && \
            echo '    name : "sample"' >> config.pbtxt && \
            echo '    batch_size: 1' >> config.pbtxt && \
            echo '    inputs {' >> config.pbtxt && \
            echo '        key: "INPUT0"' >> config.pbtxt && \
            echo '        value: {' >> config.pbtxt && \
            echo '            data_type: TYPE_FP32' >> config.pbtxt && \
            echo "            dims: 16" >> config.pbtxt && \
            echo "            zero_data: false" >> config.pbtxt && \
            echo '        }' >> config.pbtxt && \
            echo '    }' >> config.pbtxt && \
             echo '    inputs {' >> config.pbtxt && \
            echo '        key: "INPUT1"' >> config.pbtxt && \
            echo '        value: {' >> config.pbtxt && \
            echo '            data_type: TYPE_FP32' >> config.pbtxt && \
            echo "            dims: 16" >> config.pbtxt && \
            echo "            zero_data: false" >> config.pbtxt && \
            echo '        }' >> config.pbtxt && \
            echo '    }' >> config.pbtxt && \
            echo '}]' >> config.pbtxt )

mkdir -p models/bls_onnx_warmup/1/
cp ../python_models/bls_onnx_warmup/model.py models/bls_onnx_warmup/1/
cp ../python_models/bls_onnx_warmup/config.pbtxt models/bls_onnx_warmup/.

cp ../L0_backend_python/test_infer_shm_leak.py .
sed -i 's#sys.path.append("../../common")#sys.path.append("../common")#g' test_infer_shm_leak.py

run_server
if [ "$SERVER_PID" == "0" ]; then
    echo -e "\n***\n*** Failed to start $SERVER\n***"
    cat $SERVER_LOG
    exit 1
fi

set +e

export MODEL_NAME='bls_onnx_warmup'
python3 -m pytest --junitxml=warmup.report.xml $CLIENT_PY >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
    echo -e "\n***\n*** 'bls_onnx_warmup' test FAILED. \n***"
    cat $CLIENT_LOG
    RET=1
fi

set -e


kill $SERVER_PID
wait $SERVER_PID


if [ $RET -eq 1 ]; then
    cat $CLIENT_LOG
    cat $SERVER_LOG
    echo -e "\n***\n*** Test Failed \n***"
else
    echo -e "\n***\n*** Test Passed \n***"
fi

exit $RET


================================================
FILE: qa/common/busy_op_kernel.cu.cc
================================================
// Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#if GOOGLE_CUDA
#define EIGEN_USE_GPU

#include <cuda_runtime.h>
#include <time.h>

#include "unsupported/Eigen/CXX11/Tensor"

__device__ long store_now[1];

__global__ void
BusyLoopKernel(const int* num_delay_cycles, int* out)
{
  // As shown in
  // https://stackoverflow.com/questions/11217117/equivalent-of-usleep-in-cuda-kernel
  clock_t start = clock();

  for (;;) {
    clock_t now = clock();
    // Adjust for overflow
    clock_t cycles = now > start ? now - start : now + (0xffffffff - start);
    if (cycles >= num_delay_cycles[0]) {
      break;
    }
    // Prevent nvcc optimizations
    store_now[0] = cycles;
  }
}

void
BusyLoopKernelLauncher(
    const Eigen::GpuDevice& device, const int* num_delay_cycles, int* out)
{
  auto stream = device.stream();
  BusyLoopKernel<<<1, 256, 0, stream>>>(num_delay_cycles, out);
}

#endif


================================================
FILE: qa/common/check_copyright.py
================================================
#!/usr/bin/env python3

# Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import os
import pathlib
import re

FLAGS = None
SKIP_EXTS = (
    ".jpeg",
    ".jpg",
    ".pgm",
    ".png",
    ".log",
    ".preprocessed",
    ".jmx",
    ".gz",
    ".json",
    ".pdf",
    ".so",
    ".onnx",
    ".svg",
    "pull_request_template.md",
)
REPO_PATH_FROM_THIS_FILE = "../.."
SKIP_PATHS = (
    "build",
    "deploy/gke-marketplace-app/.gitignore",
    "deploy/gke-marketplace-app/server-deployer/chart/.helmignore",
    "deploy/gcp/.helmignore",
    "deploy/aws/.helmignore",
    "deploy/fleetcommand/.helmignore",
    "docs/.gitignore",
    "docs/_static/.gitattributes",
    "docs/examples/model_repository",
    "docs/examples/jetson",
    "docs/repositories.txt",
    "docs/exclusions.txt",
    "docker",
    "qa/ensemble_models/mix_platform_float32_float32_float32/output0_labels.txt",
    "qa/ensemble_models/mix_type_int32_float32_float32/output0_labels.txt",
    "qa/ensemble_models/mix_ensemble_int32_float32_float32/output0_labels.txt",
    "qa/ensemble_models/wrong_label_int32_float32_float32/output0_labels.txt",
    "qa/ensemble_models/label_override_int32_float32_float32/output0_labels.txt",
    "qa/L0_model_config/noautofill_platform",
    "qa/L0_model_config/autofill_noplatform",
    "qa/L0_model_config/autofill_noplatform_success",
    "qa/L0_model_config/special_cases",
    "qa/L0_model_config/cli_messages/cli_override/expected",
    "qa/L0_model_config/cli_messages/cli_deprecation/expected",
    "qa/L0_model_config/model_metrics",
    "qa/L0_model_config/custom_parameters",
    "qa/L0_model_namespacing/test_duplication",
    "qa/L0_model_namespacing/test_dynamic_resolution",
    "qa/L0_model_namespacing/test_ensemble_duplication",
    "qa/L0_model_namespacing/test_no_duplication",
    "qa/L0_perf_nomodel/baseline",
    "qa/L0_perf_nomodel/legacy_baseline",
    "qa/L0_warmup/raw_mug_data",
    "qa/L0_java_resnet/expected_output_data",
    "qa/L0_trt_dla_jetson/trt_dla_model_store",
    "qa/openvino_models/dynamic_batch",
    "qa/openvino_models/fixed_batch",
    "CITATION.cff",
    "TRITON_VERSION",
    ".github/ISSUE_TEMPLATE",
    ".github/PULL_REQUEST_TEMPLATE",
)

COPYRIGHT_YEAR_RE = "Copyright( \\(c\\))? 20[1-9][0-9](-(20)?[1-9][0-9])?(,((20[2-9][0-9](-(20)?[2-9][0-9])?)|([2-9][0-9](-[2-9][0-9])?)))*,? NVIDIA CORPORATION( & AFFILIATES)?. All rights reserved."

COPYRIGHT = """

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
 * Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
 * Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.
 * Neither the name of NVIDIA CORPORATION nor the names of its
   contributors may be used to endorse or promote products derived
   from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""

repo_abs_path = (
    pathlib.Path(__file__).parent.joinpath(REPO_PATH_FROM_THIS_FILE).resolve()
)

copyright_year_re = re.compile(COPYRIGHT_YEAR_RE)


def visit(path):
    if FLAGS.verbose:
        print("visiting " + path)

    for skip in SKIP_EXTS:
        if path.endswith(skip):
            if FLAGS.verbose:
                print("skipping due to extension: " + path)
            return True

    for skip in SKIP_PATHS:
        if str(pathlib.Path(path).resolve()).startswith(
            str(repo_abs_path.joinpath(skip).resolve())
        ):
            if FLAGS.verbose:
                print("skipping due to path prefix: " + path)
            return True

    with open(path, "r") as f:
        first_line = True
        line = None
        try:
            for fline in f:
                line = fline

                # Skip any '#!', '..', '<!--', '\*' or '{{/*' lines at the
                # start of the file
                if first_line:
                    first_line = False
                    if (
                        fline.startswith("#!")
                        or fline.startswith("..")
                        or fline.startswith("<!--")
                        or fline.startswith("/*")
                        or fline.startswith("{{/*")
                    ):
                        continue
                # Skip empty lines...
                if len(fline.strip()) != 0:
                    break
        except UnicodeDecodeError as ex:
            # If we get this exception on the first line then assume a
            # non-text file.
            if not first_line:
                raise ex
            if FLAGS.verbose:
                print("skipping binary file: " + path)
            return True

        if line is None:
            if FLAGS.verbose:
                print("skipping empty file: " + path)
            return True

        line = line.strip()

        # The next line must be the copyright line with a single year
        # or a year range. It is optionally allowed to have '# ' or
        # '// ' prefix.
        prefix = ""
        if line.startswith("# "):
            prefix = "# "
        elif line.startswith("// "):
            prefix = "// "
        elif line.startswith(".. "):
            prefix = ".. "
        elif not line.startswith(COPYRIGHT_YEAR_RE[0]):
            print(
                "incorrect prefix for copyright line, allowed prefixes '# ' or '// ', for "
                + path
                + ": "
                + line
            )
            return False

        # Check if the copyright year line matches the regex
        # and see if the year(s) are reasonable
        years = []

        copyright_row = line[len(prefix) :]
        if copyright_year_re.match(copyright_row):
            for year in (
                copyright_row.split(
                    "(c) " if "(c) " in copyright_row else "Copyright "
                )[1]
                .split(" NVIDIA ")[0]
                .split(",")
            ):
                if len(year) == 4:  # 2021
                    years.append(int(year))
                elif len(year) == 2:  # 21
                    years.append(int(year) + 2000)
                elif len(year) == 9:  # 2021-2022
                    years.append(int(year[0:4]))
                    years.append(int(year[5:9]))
                elif len(year) == 7:  # 2021-22
                    years.append(int(year[0:4]))
                    years.append(int(year[5:7]) + 2000)
                elif len(year) == 5:  # 21-23
                    years.append(int(year[0:2]) + 2000)
                    years.append(int(year[3:5]) + 2000)
        else:
            print("copyright year is not recognized for " + path + ": " + line)
            return False

        if years[0] > FLAGS.year:
            print(
                "copyright start year greater than current year for "
                + path
                + ": "
                + line
            )
            return False
        if years[-1] > FLAGS.year:
            print(
                "copyright end year greater than current year for " + path + ": " + line
            )
            return False
        for i in range(1, len(years)):
            if years[i - 1] >= years[i]:
                print("copyright years are not increasing for " + path + ": " + line)
                return False

        # Subsequent lines must match the copyright body.
        copyright_body = [
            l.rstrip() for i, l in enumerate(COPYRIGHT.splitlines()) if i > 0
        ]
        copyright_idx = 0
        for line in f:
            if copyright_idx >= len(copyright_body):
                break

            if len(prefix) == 0:
                line = line.rstrip()
            else:
                line = line.strip()

            if len(copyright_body[copyright_idx]) == 0:
                expected = prefix.strip()
            else:
                expected = prefix + copyright_body[copyright_idx]
            if line != expected:
                print("incorrect copyright body for " + path)
                print("  expected: '" + expected + "'")
                print("       got: '" + line + "'")
                return False
            copyright_idx += 1

        if copyright_idx != len(copyright_body):
            print(
                "missing "
                + str(len(copyright_body) - copyright_idx)
                + " lines of the copyright body"
            )
            return False

    if FLAGS.verbose:
        print("copyright correct for " + path)
    return True


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-v",
        "--verbose",
        action="store_true",
        required=False,
        default=False,
        help="Enable verbose output",
    )
    parser.add_argument("-y", "--year", type=int, required=True, help="Copyright year")
    parser.add_argument(
        "paths", type=str, nargs="*", default=None, help="Directories or files to check"
    )
    FLAGS = parser.parse_args()

    if FLAGS.paths is None or len(FLAGS.paths) == 0:
        parser.print_help()
        exit(1)

    ret = True
    for path in FLAGS.paths:
        if not os.path.isdir(path):
            if not visit(path):
                ret = False
        else:
            for root, dirs, files in os.walk(path):
                for name in files:
                    if not visit(os.path.join(root, name)):
                        ret = False

    exit(0 if ret else 1)


================================================
FILE: qa/common/check_massif_log.py
================================================
#!/usr/bin/env python3

# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import math
import re
import sys
from collections import defaultdict


def parse_massif_out(filename):
    """
    Extract the allocation data from the massif output file, and compile
    it into a dictionary.

    """
    # Read the file
    with open(filename, "r") as f:
        contents = f.read()
        snapshots = re.findall("snapshot=(.*?)heap_tree", contents, flags=re.DOTALL)

    # Create snapshot dictionary
    summary = defaultdict(list)

    for snapshot in snapshots:
        # Split the record and ignore first two columns
        columns = snapshot.split()[2:]

        # Put columns and values into dictionary
        for col in columns:
            k, v = col.split("=")
            summary[k].append(int(v))

    # Return dict
    return summary


def is_unbounded_growth(summary, max_allowed_alloc, start_from_middle):
    """
    Check whether the heap allocations is increasing

    """
    totals = summary["mem_heap_B"]

    if len(totals) < 5:
        print("Error: Not enough snapshots")
        return False

    # Measure difference between mean and maximum memory usage
    processed_snapshot = totals[len(totals) // 2 :] if start_from_middle else totals
    processed_snapshot.sort(reverse=True)
    # Remove 5% of the max value which will be treated as outlier
    num_max_min_dropout = math.ceil(0.05 * len(processed_snapshot))
    start = num_max_min_dropout
    end = len(processed_snapshot) - num_max_min_dropout
    mem_heap_avg = sum(processed_snapshot[start:end]) / len(
        processed_snapshot[start:end]
    )
    mem_heap_max = max(processed_snapshot[start:end])

    # Compute change in allocation rate
    memory_allocation_delta_mb = (mem_heap_max - mem_heap_avg) / 1e6

    print(
        "Change in memory allocation: %f MB, MAX ALLOWED: %f MB"
        % (memory_allocation_delta_mb, max_allowed_alloc)
    )

    return memory_allocation_delta_mb > max_allowed_alloc


if __name__ == "__main__":
    # FIXME turn to proper argument handling
    summary = parse_massif_out(sys.argv[1])
    max_allowed_alloc = float(sys.argv[2])
    start_from_middle = (len(sys.argv) == 4) and (sys.argv[3] == "--start-from-middle")
    if is_unbounded_growth(summary, max_allowed_alloc, start_from_middle):
        sys.exit(1)
    else:
        sys.exit(0)


================================================
FILE: qa/common/check_valgrind_log.py
================================================
#!/usr/bin/env python3

# Copyright 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import sys

# Check the valgrind logs for memory leaks, ignoring known memory leaks
#   * cnmem https://github.com/NVIDIA/cnmem/issues/12
#   * dl-open leak could be due to https://bugs.kde.org/show_bug.cgi?id=358980

LEAK_WHITE_LIST = [
    "cnmem",
    "dl-init",
    "dl-open",
    "libtorch",
]


def check_valgrind_log(log_file):
    """
    Counts the definite leaks reported
    by valgrind, matches them against
    the whitelist.

    Parameters
    ----------
    log_file: str
        The path to the log file

    Returns
    -------
    list of str
        a list of the leak records as strings
    """

    with open(log_file, "r") as f:
        logs = f.read()

    # Find the pid and start and end of definite leak reports
    pid_token_end = logs.find("==", logs.find("==") + 1) + 2
    pid_token = logs[:pid_token_end]
    leaks_start = logs.find("are definitely lost")
    first_leak_line = logs.rfind("\n", 0, leaks_start)
    if leaks_start == -1 or first_leak_line == -1:
        # No leaks in log
        return []
    end_of_leaks = logs.find(f"{pid_token} LEAK SUMMARY:")
    if end_of_leaks == -1:
        print(f"\n***\n*** Test Failed for {log_file}: Malformed Valgrind log.\n***")
        sys.exit(1)
    leak_records_section = logs[first_leak_line + 1 : end_of_leaks]

    # Each leak record is separated by a line containing '==<pid>== \n'
    record_separator = f"{pid_token} \n"
    leak_records = leak_records_section.split(record_separator)

    # Check each leak against whitelist
    filtered_leak_records = []
    for leak in leak_records:
        for token in LEAK_WHITE_LIST:
            if not leak or leak.find(token) != -1:
                break
        else:
            filtered_leak_records.append(leak)

    return filtered_leak_records


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-f",
        "--input-log-file",
        type=str,
        required=True,
        help="The name of the file containing the valgrind logs.",
    )
    args = parser.parse_args()

    leak_records = check_valgrind_log(log_file=args.input_log_file)
    if leak_records:
        for leak in leak_records:
            print(leak)
        print(f"\n***\n*** Test Failed: {len(leak_records)} leaks detected.\n***")
        sys.exit(1)
    sys.exit(0)


================================================
FILE: qa/common/gen_common.py
================================================
# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
from typing import List

# Common utilities for model generation scripts
import numpy as np

np_dtype_string = np.dtype(object)

# Numpy does not support the BF16 datatype natively.
# We use this dummy dtype as a representative for BF16.
np_dtype_bfloat16 = np.dtype([("bf16", object)])


def np_to_onnx_dtype(np_dtype):
    import onnx

    if np_dtype == bool:
        return onnx.TensorProto.BOOL
    elif np_dtype == np.int8:
        return onnx.TensorProto.INT8
    elif np_dtype == np.int16:
        return onnx.TensorProto.INT16
    elif np_dtype == np.int32:
        return onnx.TensorProto.INT32
    elif np_dtype == np.int64:
        return onnx.TensorProto.INT64
    elif np_dtype == np.uint8:
        return onnx.TensorProto.UINT8
    elif np_dtype == np.uint16:
        return onnx.TensorProto.UINT16
    elif np_dtype == np.float16:
        return onnx.TensorProto.FLOAT16
    elif np_dtype == np.float32:
        return onnx.TensorProto.FLOAT
    elif np_dtype == np.float64:
        return onnx.TensorProto.DOUBLE
    elif np_dtype == np_dtype_string:
        return onnx.TensorProto.STRING
    return None


def np_to_model_dtype(np_dtype):
    if np_dtype == bool:
        return "TYPE_BOOL"
    elif np_dtype == np.int8:
        return "TYPE_INT8"
    elif np_dtype == np.int16:
        return "TYPE_INT16"
    elif np_dtype == np.int32:
        return "TYPE_INT32"
    elif np_dtype == np.int64:
        return "TYPE_INT64"
    elif np_dtype == np.uint8:
        return "TYPE_UINT8"
    elif np_dtype == np.uint16:
        return "TYPE_UINT16"
    elif np_dtype == np.float16:
        return "TYPE_FP16"
    elif np_dtype == np.float32:
        return "TYPE_FP32"
    elif np_dtype == np.float64:
        return "TYPE_FP64"
    elif np_dtype == np_dtype_string:
        return "TYPE_STRING"
    elif np_dtype == np_dtype_bfloat16:
        return "TYPE_BF16"
    return None


def np_to_trt_dtype(np_dtype):
    import tensorrt as trt

    if np_dtype == bool:
        return trt.bool
    elif np_dtype == np.int8:
        return trt.int8
    elif np_dtype == np.int32:
        return trt.int32
    elif np_dtype == np.int64:
        return trt.int64
    elif np_dtype == np.uint8:
        return trt.uint8
    elif np_dtype == np.float16:
        return trt.float16
    elif np_dtype == np.float32:
        return trt.float32
    elif np_dtype == np_dtype_bfloat16:
        return trt.bfloat16
    return None


def np_to_torch_dtype(np_dtype):
    import torch

    if np_dtype == bool:
        return torch.bool
    elif np_dtype == np.int8:
        return torch.int8
    elif np_dtype == np.int16:
        return torch.int16
    elif np_dtype == np.int32:
        return torch.int
    elif np_dtype == np.int64:
        return torch.long
    elif np_dtype == np.uint8:
        return torch.uint8
    elif np_dtype == np.uint16:
        return None  # Not supported in Torch
    elif np_dtype == np.float16:
        return None
    elif np_dtype == np.float32:
        return torch.float
    elif np_dtype == np.float64:
        return torch.double
    elif np_dtype == np_dtype_string:
        return List[str]
    return None


def openvino_save_model(model_version_dir, model):
    import openvino as ov

    # W/A for error moving to OpenVINO new APIs "Attempt to get a name for a Tensor without names".
    # For more details, check https://github.com/triton-inference-server/openvino_backend/issues/89
    if len(model.outputs) == 0:
        model.outputs[0].get_tensor().set_names({"OUTPUT"})
    else:
        for idx, out in enumerate(model.outputs):
            out.get_tensor().set_names({f"OUTPUT{idx}"})

    os.makedirs(model_version_dir, exist_ok=True)
    ov.serialize(
        model, model_version_dir + "/model.xml", model_version_dir + "/model.bin"
    )


================================================
FILE: qa/common/gen_ensemble_model_utils.py
================================================
#!/usr/bin/env python3

# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os

import numpy as np
import test_util as tu
from gen_common import np_to_model_dtype

BASIC_ENSEMBLE_TYPES = ["simple", "sequence", "fan"]

np_dtype_string = np.dtype(object)


def fixed_to_variable_size(shape):
    return [-1] * len(shape)


def platform_types_and_validation():
    res = [
        ("plan", tu.validate_for_trt_model),
        ("onnx", tu.validate_for_onnx_model),
        ("libtorch", tu.validate_for_libtorch_model),
    ]
    return res


class AddSubEnsembleSchedule:
    """
    Helper class to generate ensemble schedule that behaves the same as
    addsub model given an ensemble type
    """

    def __init__(self, ensemble_type):
        if ensemble_type == "fan":
            self._get_schedule = AddSubEnsembleSchedule._get_fan_ensemble_schedule
        elif ensemble_type == "sequence":
            self._get_schedule = AddSubEnsembleSchedule._get_sequence_ensemble_schedule
        else:
            self._get_schedule = AddSubEnsembleSchedule._get_simple_ensemble_schedule

    def get_schedule(
        self,
        base_model_name,
        input_shape,
        output0_shape,
        output1_shape,
        input_model_dtype,
        output0_model_dtype,
        output1_model_dtype,
    ):
        return self._get_schedule(
            base_model_name,
            input_shape,
            output0_shape,
            output1_shape,
            input_model_dtype,
            output0_model_dtype,
            output1_model_dtype,
        )

    @classmethod
    def _get_simple_ensemble_schedule(
        cls,
        base_model_name,
        input_shape,
        output0_shape,
        output1_shape,
        input_dtype,
        output0_dtype,
        output1_dtype,
    ):
        # libtorch model uses other naming convention for outputs
        output_index_delimiter = "__" if "libtorch" in base_model_name else ""
        # ensemble input -> addsub -> ensemble output
        schedule = """
ensemble_scheduling {{
  step [
    {{
      model_name: "{}"
      model_version: -1
      input_map {{
        key: "INPUT0"
        value: "INPUT0"
      }}
      input_map {{
        key: "INPUT1"
        value: "INPUT1"
      }}
      output_map {{
        key: "OUTPUT{delimiter}0"
        value: "OUTPUT0"
      }}
      output_map {{
        key: "OUTPUT{delimiter}1"
        value: "OUTPUT1"
      }}
    }}
  ]
}}
""".format(
            base_model_name, delimiter=output_index_delimiter
        )
        return schedule

    @classmethod
    def _get_sequence_ensemble_schedule(
        cls,
        base_model_name,
        input_shape,
        output0_shape,
        output1_shape,
        input_dtype,
        output0_dtype,
        output1_dtype,
    ):
        # libtorch model uses other naming convention for outputs
        output_index_delimiter = "__" if "libtorch" in base_model_name else ""
        # ensemble input -> nop -> addsub -> ensemble output
        nop_input_shape = fixed_to_variable_size(input_shape)
        schedule = """
ensemble_scheduling {{
  step [
    {{
      model_name: "nop_{}_{}"
      model_version: -1
      input_map {{
        key: "INPUT0"
        value: "INPUT0"
      }}
      input_map {{
        key: "INPUT1"
        value: "INPUT1"
      }}
      output_map {{
        key: "OUTPUT0"
        value: "same_input0"
      }}
      output_map {{
        key: "OUTPUT1"
        value: "same_input1"
      }}
    }},
    {{
      model_name: "{}"
      model_version: -1
      input_map {{
        key: "INPUT0"
        value: "same_input0"
      }}
      input_map {{
        key: "INPUT1"
        value: "same_input1"
      }}
      output_map {{
        key: "OUTPUT{delimiter}0"
        value: "OUTPUT0"
      }}
      output_map {{
        key: "OUTPUT{delimiter}1"
        value: "OUTPUT1"
      }}
    }}
  ]
}}
""".format(
            input_dtype,
            tu.shape_to_dims_str(nop_input_shape),
            base_model_name,
            delimiter=output_index_delimiter,
        )
        return schedule

    @classmethod
    def _get_fan_ensemble_schedule(
        cls,
        base_model_name,
        input_shape,
        output0_shape,
        output1_shape,
        input_dtype,
        output0_dtype,
        output1_dtype,
    ):
        # libtorch model uses other naming convention for outputs
        output_index_delimiter = "__" if "libtorch" in base_model_name else ""

        # ensemble input -> nop -> addsub ->
        # nop (fan out, one output send to one nop) -> ensemble output (fan in)
        nop_input_shape = fixed_to_variable_size(input_shape)
        nop_output0_shape = fixed_to_variable_size(output0_shape)
        nop_output1_shape = fixed_to_variable_size(output1_shape)
        schedule = """
ensemble_scheduling {{
  step [
    {{
      model_name: "nop_{}_{}"
      model_version: -1
      input_map {{
        key: "INPUT0"
        value: "INPUT0"
      }}
      input_map {{
        key: "INPUT1"
        value: "INPUT1"
      }}
      output_map {{
        key: "OUTPUT0"
        value: "same_input0"
      }}
      output_map {{
        key: "OUTPUT1"
        value: "same_input1"
      }}
    }},
    {{
      model_name: "{}"
      model_version: -1
      input_map {{
        key: "INPUT0"
        value: "same_input0"
      }}
      input_map {{
        key: "INPUT1"
        value: "same_input1"
      }}
      output_map {{
        key: "OUTPUT{delimiter}0"
        value: "same_output0"
      }}
      output_map {{
        key: "OUTPUT{delimiter}1"
        value: "same_output1"
      }}
    }},
    {{
      model_name: "nop_{}_{}"
      model_version: -1
      input_map {{
        key: "INPUT0"
        value: "same_output0"
      }}
      input_map {{
        key: "INPUT1"
        value: "same_output0"
      }}
      output_map {{
        key: "OUTPUT0"
        value: "OUTPUT0"
      }}
    }},
    {{
      model_name: "nop_{}_{}"
      model_version: -1
      input_map {{
        key: "INPUT0"
        value: "same_output1"
      }}
      input_map {{
        key: "INPUT1"
        value: "same_output1"
      }}
      output_map {{
        key: "OUTPUT1"
        value: "OUTPUT1"
      }}
    }}
  ]
}}
""".format(
            input_dtype,
            tu.shape_to_dims_str(nop_input_shape),
            base_model_name,
            output0_dtype,
            tu.shape_to_dims_str(nop_output0_shape),
            output1_dtype,
            tu.shape_to_dims_str(nop_output1_shape),
            delimiter=output_index_delimiter,
        )
        return schedule


class IdentityEnsembleSchedule:
    """
    Helper class to generate ensemble schedule that behaves the same as
    identity model given an ensemble type
    """

    def __init__(self, ensemble_type, ensemble_test_type="zero"):
        self._test_type = ensemble_test_type
        if ensemble_type == "fan":
            self._get_schedule = IdentityEnsembleSchedule._get_fan_ensemble_schedule
        elif ensemble_type == "sequence":
            self._get_schedule = (
                IdentityEnsembleSchedule._get_sequence_ensemble_schedule
            )
        else:
            self._get_schedule = IdentityEnsembleSchedule._get_simple_ensemble_schedule

    def get_schedule(
        self,
        dtype,
        input_shapes,
        input_model_shapes,
        output_shapes,
        output_model_shapes,
    ):
        return self._get_schedule(
            dtype,
            input_shapes,
            input_model_shapes,
            output_shapes,
            output_model_shapes,
            self._test_type,
        )

    @classmethod
    def _get_simple_ensemble_schedule(
        cls,
        dtype,
        input_shapes,
        input_model_shapes,
        output_shapes,
        output_model_shapes,
        test_type,
    ):
        # ensemble reshaped input -> nop with reshaped tensor shape -> ensemble
        # reshaped output (actual ensemble input/output is not visible in schedule)
        steps = []
        for idx in range(len(input_shapes)):
            steps.append(
                """
    {{
      model_name: "nop_{}_{}"
      model_version: -1
      input_map {{
        key: "INPUT0"
        value: "INPUT{}"
      }}
      input_map {{
        key: "INPUT1"
        value: "INPUT{}"
      }}
      output_map {{
        key: "OUTPUT0"
        value: "OUTPUT{}"
      }}
    }}
""".format(
                    np_to_model_dtype(dtype),
                    tu.shape_to_dims_str(input_model_shapes[idx]),
                    idx,
                    idx,
                    idx,
                )
            )

        schedule = """
ensemble_scheduling {{
  step [
{}
  ]
}}
""".format(
            ",".join(steps)
        )

        return schedule

    @classmethod
    def _get_sequence_ensemble_schedule(
        cls,
        dtype,
        input_shapes,
        input_model_shapes,
        output_shapes,
        output_model_shapes,
        test_type,
    ):
        in_str = "tunnel_in_" if test_type == "reshape" else ""
        out_str = "tunnel_out_" if test_type == "reshape" else ""
        # ensemble reshaped input -> nop with another input only reshape ->
        # nop with output only reshape -> ensemble reshaped output
        steps = []
        for idx in range(len(input_shapes)):
            steps.append(
                """
    {{
      model_name: "nop_{in_str}{type}_{shape}"
      model_version: -1
      input_map {{
        key: "INPUT0"
        value: "INPUT{idx}"
      }}
      input_map {{
        key: "INPUT1"
        value: "INPUT{idx}"
      }}
      output_map {{
        key: "OUTPUT0"
        value: "temp_{idx}"
      }}
    }},
    {{
      model_name: "nop_{out_str}{type}_{shape}"
      model_version: -1
      input_map {{
        key: "INPUT0"
        value: "temp_{idx}"
      }}
      input_map {{
        key: "INPUT1"
        value: "temp_{idx}"
      }}
      output_map {{
        key: "OUTPUT0"
        value: "OUTPUT{idx}"
      }}
    }}
""".format(
                    type=np_to_model_dtype(dtype),
                    in_str=in_str,
                    out_str=out_str,
                    idx=idx,
                    shape=tu.shape_to_dims_str(input_model_shapes[idx]),
                )
            )

        schedule = """
ensemble_scheduling {{
  step [
{}
  ]
}}
""".format(
            ",".join(steps)
        )

        return schedule

    @classmethod
    def _get_fan_ensemble_schedule(
        cls,
        dtype,
        input_shapes,
        input_model_shapes,
        output_shapes,
        output_model_shapes,
        test_type,
    ):
        # Note that the simple and sequence test already test "fan" in some
        # degree, because there is no direct match from nop input/output
        # like what is in addsub-like ensemble.
        #
        # ensemble reshaped input -> nop with another input only reshape ->
        # nop with variable size -> nop with output only reshape ->
        # ensemble reshaped output
        in_str = ""
        out_str = ""
        intermediate_shapes = input_model_shapes
        if test_type == "reshape":
            in_str = "tunnel_in_"
            out_str = "tunnel_out_"
            intermediate_shapes = [[-1]] * len(input_model_shapes)
        steps = []
        for idx in range(len(input_shapes)):
            steps.append(
                """
    {{
      model_name: "nop_{in_str}{type}_{shape}"
      model_version: -1
      input_map {{
        key: "INPUT0"
        value: "INPUT{idx}"
      }}
      input_map {{
        key: "INPUT1"
        value: "INPUT{idx}"
      }}
      output_map {{
        key: "OUTPUT0"
        value: "temp_in_{idx}"
      }}
    }},
    {{
      model_name: "nop_{type}_{intermediate_shape}"
      model_version: -1
      input_map {{
        key: "INPUT0"
        value: "temp_in_{idx}"
      }}
      input_map {{
        key: "INPUT1"
        value: "temp_in_{idx}"
      }}
      output_map {{
        key: "OUTPUT0"
        value: "temp_out_{idx}"
      }}
    }},
    {{
      model_name: "nop_{out_str}{type}_{shape}"
      model_version: -1
      input_map {{
        key: "INPUT0"
        value: "temp_out_{idx}"
      }}
      input_map {{
        key: "INPUT1"
        value: "temp_out_{idx}"
      }}
      output_map {{
        key: "OUTPUT0"
        value: "OUTPUT{idx}"
      }}
    }}
""".format(
                    type=np_to_model_dtype(dtype),
                    in_str=in_str,
                    out_str=out_str,
                    intermediate_shape=tu.shape_to_dims_str(intermediate_shapes[idx]),
                    idx=idx,
                    shape=tu.shape_to_dims_str(input_model_shapes[idx]),
                )
            )

        schedule = """
ensemble_scheduling {{
  step [
{}
  ]
}}
""".format(
            ",".join(steps)
        )

        return schedule


class SequenceEnsembleSchedule:
    """
    Helper class to generate ensemble schedule that behaves the same as
    sequence model given an ensemble type
    """

    def __init__(self, ensemble_type):
        if ensemble_type == "fan":
            self._get_schedule = SequenceEnsembleSchedule._get_fan_ensemble_schedule
        elif ensemble_type == "sequence":
            self._get_schedule = (
                SequenceEnsembleSchedule._get_sequence_ensemble_schedule
            )
        else:
            self._get_schedule = SequenceEnsembleSchedule._get_simple_ensemble_schedule

    def get_schedule(self, base_model_name, shape, model_dtype):
        return self._get_schedule(base_model_name, shape, model_dtype)

    @classmethod
    def _get_simple_ensemble_schedule(cls, base_model_name, shape, model_dtype):
        # libtorch model uses other naming convention
        index_suffix = "__0" if "libtorch" in base_model_name else ""
        # ensemble input -> sequence -> ensemble output
        schedule = """
ensemble_scheduling {{
  step [
    {{
      model_name: "{}"
      model_version: -1
      input_map {{
        key: "INPUT{index}"
        value: "INPUT"
      }}
      output_map {{
        key: "OUTPUT{index}"
        value: "OUTPUT"
      }}
    }}
  ]
}}
""".format(
            base_model_name, index=index_suffix
        )
        return schedule

    @classmethod
    def _get_sequence_ensemble_schedule(cls, base_model_name, shape, model_dtype):
        # nop cannot handle STRING data type, fall back to simple
        if model_dtype == "TYPE_STRING":
            return SequenceEnsembleSchedule._get_simple_ensemble_schedule(
                base_model_name, shape, model_dtype
            )

        # libtorch model uses other naming convention
        index_suffix = "__0" if "libtorch" in base_model_name else ""
        # ensemble input -> nop -> sequence -> ensemble output
        nop_input_shape = fixed_to_variable_size(shape)
        schedule = """
ensemble_scheduling {{
  step [
    {{
      model_name: "nop_{}_{}"
      model_version: -1
      input_map {{
        key: "INPUT0"
        value: "INPUT"
      }}
      input_map {{
        key: "INPUT1"
        value: "INPUT"
      }}
      output_map {{
        key: "OUTPUT0"
        value: "same_input"
      }}
    }},
    {{
      model_name: "{}"
      model_version: -1
      input_map {{
        key: "INPUT{index}"
        value: "same_input"
      }}
      output_map {{
        key: "OUTPUT{index}"
        value: "OUTPUT"
      }}
    }}
  ]
}}
""".format(
            model_dtype,
            tu.shape_to_dims_str(nop_input_shape),
            base_model_name,
            index=index_suffix,
        )
        return schedule

    @classmethod
    def _get_fan_ensemble_schedule(cls, base_model_name, shape, model_dtype):
        # nop cannot handle STRING data type, fall back to simple
        if model_dtype == "TYPE_STRING":
            return SequenceEnsembleSchedule._get_simple_ensemble_schedule(
                base_model_name, shape, model_dtype
            )

        # libtorch model uses other naming convention
        index_suffix = "__0" if "libtorch" in base_model_name else ""
        # Not a "fan" due to configuration of base sequence model
        # ensemble input -> nop -> sequence -> nop -> ensemble output
        nop_shape = fixed_to_variable_size(shape)
        schedule = """
ensemble_scheduling {{
  step [
    {{
      model_name: "nop_{}_{}"
      model_version: -1
      input_map {{
        key: "INPUT0"
        value: "INPUT"
      }}
      input_map {{
        key: "INPUT1"
        value: "INPUT"
      }}
      output_map {{
        key: "OUTPUT0"
        value: "same_input"
      }}
    }},
    {{
      model_name: "{}"
      model_version: -1
      input_map {{
        key: "INPUT{index}"
        value: "same_input"
      }}
      output_map {{
        key: "OUTPUT{index}"
        value: "same_output"
      }}
    }},
    {{
      model_name: "nop_{}_{}"
      model_version: -1
      input_map {{
        key: "INPUT0"
        value: "same_output"
      }}
      input_map {{
        key: "INPUT1"
        value: "same_output"
      }}
      output_map {{
        key: "OUTPUT0"
        value: "OUTPUT"
      }}
    }}
  ]
}}
""".format(
            model_dtype,
            tu.shape_to_dims_str(nop_shape),
            base_model_name,
            model_dtype,
            tu.shape_to_dims_str(nop_shape),
            index=index_suffix,
        )
        return schedule


def create_ensemble_modelfile(
    base_model,
    models_dir,
    max_batch,
    model_version,
    input_shape,
    output0_shape,
    output1_shape,
    input_dtype,
    output0_dtype,
    output1_dtype,
    swap=False,
):
    # No actual model file in ensemble model

    # Use a different model name for the non-batching variant
    for ensemble_type in BASIC_ENSEMBLE_TYPES:
        ensemble_model_name = "{}_{}{}".format(
            ensemble_type, base_model, "_nobatch" if max_batch == 0 else ""
        )
        model_name = tu.get_model_name(
            ensemble_model_name, input_dtype, output0_dtype, output1_dtype
        )
        model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

        try:
            os.makedirs(model_version_dir)
        except OSError as ex:
            pass  # ignore existing dir


def create_ensemble_modelconfig(
    base_model,
    models_dir,
    max_batch,
    model_version,
    input_shape,
    output0_shape,
    output1_shape,
    input_dtype,
    output0_dtype,
    output1_dtype,
    output0_label_cnt,
    version_policy,
):
    # No validation as long as the base model supports the type and shape

    input_model_dtype = np_to_model_dtype(input_dtype)
    output0_model_dtype = np_to_model_dtype(output0_dtype)
    output1_model_dtype = np_to_model_dtype(output1_dtype)

    for ensemble_type in BASIC_ENSEMBLE_TYPES:
        # Only in "fan" that ensemble output is not directly from addsub. In
        # other case, ensemble should still generate proper labell without
        # label file
        labels = None if ensemble_type != "fan" else "output0_labels.txt"

        # Use a different model name for the non-batching variant
        ensemble_model_name = "{}_{}{}".format(
            ensemble_type, base_model, "_nobatch" if max_batch == 0 else ""
        )
        model_name = tu.get_model_name(
            ensemble_model_name, input_dtype, output0_dtype, output1_dtype
        )
        base_model_name = tu.get_model_name(
            "{}{}".format(base_model, "_nobatch" if max_batch == 0 else ""),
            input_dtype,
            output0_dtype,
            output1_dtype,
        )

        ensemble_schedule = AddSubEnsembleSchedule(ensemble_type).get_schedule(
            base_model_name,
            input_shape,
            output0_shape,
            output1_shape,
            input_model_dtype,
            output0_model_dtype,
            output1_model_dtype,
        )

        config_dir = models_dir + "/" + model_name
        config = create_general_modelconfig(
            model_name,
            "ensemble",
            max_batch,
            repeat(input_dtype, 2),
            repeat(input_shape, 2),
            repeat(None, 2),
            [output0_dtype, output1_dtype],
            [output0_shape, output1_shape],
            repeat(None, 2),
            [labels, None],
            version_policy=version_policy,
            force_tensor_number_suffix=True,
        )
        config += ensemble_schedule

        try:
            os.makedirs(config_dir)
        except OSError as ex:
            pass  # ignore existing dir

        with open(config_dir + "/config.pbtxt", "w") as cfile:
            cfile.write(config)

        if labels is not None:
            with open(config_dir + "/output0_labels.txt", "w") as lfile:
                for l in range(output0_label_cnt):
                    lfile.write("label" + str(l) + "\n")


def create_identity_ensemble_modelfile(
    ensemble_test_type,
    models_dir,
    model_version,
    max_batch,
    dtype,
    input_shapes,
    output_shapes,
):
    io_cnt = len(input_shapes)

    # Use a different model name for the non-batching variant
    for ensemble_type in BASIC_ENSEMBLE_TYPES:
        ensemble_prefix = "{}_{}".format(ensemble_type, ensemble_test_type)
        model_name = tu.get_zero_model_name(
            ensemble_prefix + ("_nobatch" if max_batch == 0 else ""), io_cnt, dtype
        )
        model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

        try:
            os.makedirs(model_version_dir)
        except OSError as ex:
            pass  # ignore existing dir


def create_identity_ensemble_modelconfig(
    ensemble_test_type,
    models_dir,
    model_version,
    max_batch,
    dtype,
    input_shapes,
    input_model_shapes,
    output_shapes,
    output_model_shapes,
):
    io_cnt = len(input_shapes)

    for ensemble_type in BASIC_ENSEMBLE_TYPES:
        # Use a different model name for the non-batching variant
        ensemble_prefix = "{}_{}".format(ensemble_type, ensemble_test_type)
        model_name = tu.get_zero_model_name(
            ensemble_prefix + ("_nobatch" if max_batch == 0 else ""), io_cnt, dtype
        )

        ensemble_schedule = IdentityEnsembleSchedule(
            ensemble_type, ensemble_test_type
        ).get_schedule(
            dtype, input_shapes, input_model_shapes, output_shapes, output_model_shapes
        )

        config_dir = models_dir + "/" + model_name
        config = create_general_modelconfig(
            model_name,
            "ensemble",
            max_batch,
            repeat(dtype, io_cnt),
            input_shapes,
            input_model_shapes,
            repeat(dtype, io_cnt),
            output_shapes,
            output_model_shapes,
            repeat(None, io_cnt),
            force_tensor_number_suffix=True,
        )
        config += ensemble_schedule

        try:
            os.makedirs(config_dir)
        except OSError as ex:
            pass  # ignore existing dir

        with open(config_dir + "/config.pbtxt", "w") as cfile:
            cfile.write(config)


def create_sequence_ensemble_modelfile(
    base_model, models_dir, max_batch, model_version, shape, dtype
):
    # No actual model file in ensemble model

    # Use a different model name for the non-batching variant
    for ensemble_type in BASIC_ENSEMBLE_TYPES:
        ensemble_model_name = "{}_{}{}".format(
            ensemble_type, base_model, "_nobatch" if max_batch == 0 else ""
        )
        model_name = tu.get_sequence_model_name(ensemble_model_name, dtype)
        model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

        try:
            os.makedirs(model_version_dir)
        except OSError as ex:
            pass  # ignore existing dir


def create_sequence_ensemble_modelconfig(
    base_model, models_dir, max_batch, model_version, shape, dtype
):
    # No validation as long as the base model supports the type and shape

    model_dtype = np_to_model_dtype(dtype)

    for ensemble_type in BASIC_ENSEMBLE_TYPES:
        # Use a different model name for the non-batching variant
        ensemble_model_name = "{}_{}{}".format(
            ensemble_type, base_model, "_nobatch" if max_batch == 0 else ""
        )
        model_name = tu.get_sequence_model_name(ensemble_model_name, dtype)
        base_model_name = tu.get_sequence_model_name(
            "{}{}".format(base_model, "_nobatch" if max_batch == 0 else ""), dtype
        )

        ensemble_schedule = SequenceEnsembleSchedule(ensemble_type).get_schedule(
            base_model_name, shape, model_dtype
        )

        config_dir = models_dir + "/" + model_name
        config = create_general_modelconfig(
            model_name,
            "ensemble",
            max_batch,
            [dtype],
            [shape],
            [None],
            [dtype],
            [shape],
            [None],
            [None],
        )
        config += ensemble_schedule

        try:
            os.makedirs(config_dir)
        except OSError as ex:
            pass  # ignore existing dir

        with open(config_dir + "/config.pbtxt", "w") as cfile:
            cfile.write(config)


def create_nop_modelconfig(
    models_dir, tensor_shape, tensor_dtype, tensor_model_shape=None
):
    model_name = "nop_{}_{}".format(
        dtype_str(tensor_dtype), tu.shape_to_dims_str(tensor_shape)
    )
    # Make [] to [1].
    # Note that this doesn't affect the naming ("nop_{}_" instead of "nop_{}_1")
    if len(tensor_shape) == 0:
        tensor_shape = [1]

    config_dir = models_dir + "/" + model_name
    config = create_general_modelconfig(
        model_name,
        "",
        1024,
        repeat(tensor_dtype, 2),
        repeat(tensor_shape, 2),
        repeat(tensor_model_shape, 2),
        repeat(tensor_dtype, 2),
        repeat(tensor_shape, 2),
        repeat(tensor_model_shape, 2),
        repeat(None, 2),
        backend="identity",
        instance_group_str="instance_group [ { kind: KIND_CPU } ]",
    )

    try:
        os.makedirs(config_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)


def create_nop_tunnel_modelconfig(models_dir, tensor_shape, tensor_dtype):
    # Must be fixed size
    in_model_name = "nop_tunnel_in_{}_{}".format(
        dtype_str(tensor_dtype), tu.shape_to_dims_str(tensor_shape)
    )
    out_model_name = "nop_tunnel_out_{}_{}".format(
        dtype_str(tensor_dtype), tu.shape_to_dims_str(tensor_shape)
    )
    # Make [] to [1].
    # Note that this doesn't affect the naming ("nop_{}_" instead of "nop_{}_1")
    if len(tensor_shape) == 0:
        tensor_shape = [1]
    internal_shape = 1
    for dim in tensor_shape:
        if dim < 0:
            raise Exception("Must specify fixed size input / output for nop tunnel")
        internal_shape *= dim

    # Tunnel in nop (reshape to one dimension)
    config_dir = models_dir + "/" + in_model_name
    config = create_general_modelconfig(
        in_model_name,
        "",
        1024,
        repeat(tensor_dtype, 2),
        repeat(tensor_shape, 2),
        repeat([internal_shape], 2),
        repeat(tensor_dtype, 2),
        repeat([internal_shape], 2),
        repeat(None, 2),
        repeat(None, 2),
        backend="identity",
        instance_group_str="instance_group [ { kind: KIND_CPU } ]",
    )

    try:
        os.makedirs(config_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)

    # Tunnel out nop (reshape back to original shape)
    config_dir = models_dir + "/" + out_model_name
    config = create_general_modelconfig(
        out_model_name,
        "",
        1024,
        repeat(tensor_dtype, 2),
        repeat([internal_shape], 2),
        repeat(tensor_shape, 2),
        repeat(tensor_dtype, 2),
        repeat(tensor_shape, 2),
        repeat(None, 2),
        repeat(None, 2),
        backend="identity",
        instance_group_str="instance_group [ { kind: KIND_CPU } ]",
    )

    try:
        os.makedirs(config_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)


def create_general_modelconfig(
    model_name,
    platform,
    max_batch,
    input_dtypes,
    input_shapes,
    input_model_shapes,
    output_dtypes,
    output_shapes,
    output_model_shapes,
    label_filenames,
    backend=None,
    version_policy=None,
    default_model_filename=None,
    instance_group_str="",
    force_tensor_number_suffix=False,
):
    assert len(input_dtypes) == len(input_shapes)
    assert len(input_model_shapes) == len(input_shapes)
    assert len(output_dtypes) == len(output_shapes)
    assert len(output_model_shapes) == len(output_shapes)
    assert len(label_filenames) == len(output_shapes)

    # Unpack version policy
    version_policy_str = "{ latest { num_versions: 1 }}"
    if version_policy is not None:
        type, val = version_policy
        if type == "latest":
            version_policy_str = "{{ latest {{ num_versions: {} }}}}".format(val)
        elif type == "specific":
            version_policy_str = "{{ specific {{ versions: {} }}}}".format(val)
        else:
            version_policy_str = "{ all { }}"

    default_model_filename_str = ""
    if default_model_filename is not None:
        default_model_filename_str = 'default_model_filename: "{}"'.format(
            default_model_filename
        )

    # If backend is specified use backend instead of platform
    if backend is not None:
        key = "backend"
        val = backend
    else:
        key = "platform"
        val = platform

    config = """
name: "{}"
{}: "{}"
max_batch_size: {}
version_policy: {}
{}
{}
""".format(
        model_name,
        key,
        val,
        max_batch,
        version_policy_str,
        default_model_filename_str,
        instance_group_str,
    )

    for idx in range(len(input_dtypes)):
        idx_str = ""
        if len(input_dtypes) != 1 or force_tensor_number_suffix:
            idx_str = str(idx)
        config += """
input [
  {{
    name: "INPUT{}"
    data_type: {}
    dims: [ {} ]
    {}
  }}
]""".format(
            idx_str,
            dtype_str(input_dtypes[idx]),
            tu.shape_to_dims_str(input_shapes[idx]),
            reshape_str(input_shapes[idx], input_model_shapes[idx]),
        )

    for idx in range(len(output_dtypes)):
        idx_str = ""
        if len(input_dtypes) != 1 or force_tensor_number_suffix:
            idx_str = str(idx)
        config += """
output [
  {{
    name: "OUTPUT{}"
    data_type: {}
    dims: [ {} ]
    {}
    {}
  }}
]""".format(
            idx_str,
            dtype_str(output_dtypes[idx]),
            tu.shape_to_dims_str(output_shapes[idx]),
            reshape_str(output_shapes[idx], output_model_shapes[idx]),
            label_str(label_filenames[idx]),
        )
    return config


def repeat(obj, cnt):
    return [obj] * cnt


def dtype_str(dtype):
    return dtype if isinstance(dtype, str) else np_to_model_dtype(dtype)


def reshape_str(shape, model_shape):
    if model_shape is None or shape == model_shape:
        return ""
    return "reshape: {{ shape: [ {} ] }}".format(tu.shape_to_dims_str(model_shape))


def label_str(label):
    if label is None:
        return ""
    return 'label_filename: "{}"'.format(label)


================================================
FILE: qa/common/gen_jetson_trt_models
================================================
#!/bin/bash
# Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

############################################################################
## This script generates the model repository needed for TensorRT testing
## on the Jetson device. Generating these models requires having TensorRT
## container.
############################################################################
#!/bin/bash -xe
# Make all generated files accessible outside of container
umask 0000
# Set the version of the models
TRITON_VERSION=${TRITON_VERSION:=26.02}
# Set the CUDA device to use
NVIDIA_VISIBLE_DEVICES=${NVIDIA_VISIBLE_DEVICES:=0}
# Set TensorRT image
TENSORRT_IMAGE=${TENSORRT_IMAGE:=nvcr.io/nvidia/tensorrt:$TRITON_VERSION-py3-igpu}
UBUNTU_IMAGE=${UBUNTU_IMAGE:=ubuntu:24.04}

# Set CI specific parameters
DOCKER_GPU_ARGS=${DOCKER_GPU_ARGS:-$([[ -v RUNNER_GPUS && $RUNNER_GPUS =~ ^[0-9] ]] && eval $NV_DOCKER_ARGS || echo "--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=$NVIDIA_VISIBLE_DEVICES" )}

############################################################################
# Check if Docker volume exists
############################################################################
CI_JOB_ID=${CI_JOB_ID:=$(date +%Y%m%d_%H%M)}
DOCKER_VOLUME=${DOCKER_VOLUME:=volume_gen_qa_model_repositor_${CI_JOB_ID}}
if ! docker volume inspect $DOCKER_VOLUME > /dev/null 2>&1; then
    echo "Docker volume $DOCKER_VOLUME does not exist. Creating..."
    docker volume create $DOCKER_VOLUME
    docker volume inspect $DOCKER_VOLUME
fi

docker rm -f $DOCKER_VOLUME
docker run --rm -v $DOCKER_VOLUME:/mnt -w /mnt/$CI_JOB_ID $UBUNTU_IMAGE mkdir -p gen_srcdir ${TRITON_VERSION}
docker create --name $DOCKER_VOLUME -v $DOCKER_VOLUME:/mnt -w /mnt/$CI_JOB_ID $UBUNTU_IMAGE
docker cp . $DOCKER_VOLUME:/mnt/$CI_JOB_ID/gen_srcdir

# Set model output directories
VOLUME_BUILD_DIR=${VOLUME_BUILD_DIR:=/mnt/$CI_JOB_ID}
VOLUME_SRCDIR=${VOLUME_SRCDIR:=$VOLUME_BUILD_DIR/gen_srcdir}

VOLUME_DESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_model_repository
VOLUME_DATADEPENDENTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_trt_data_dependent_model_repository
VOLUME_DYNASEQDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_dyna_sequence_model_repository
VOLUME_DYNASEQIMPLICITDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_dyna_sequence_implicit_model_repository
VOLUME_FORMATDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_trt_format_model_repository
VOLUME_IDENTITYBIGDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_identity_big_model_repository
VOLUME_IDENTITYDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_identity_model_repository
VOLUME_IMPLICITSEQDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_sequence_implicit_model_repository
VOLUME_RAGGEDDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_ragged_model_repository
VOLUME_RESHAPEDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_reshape_model_repository
VOLUME_SEQDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_sequence_model_repository
VOLUME_SHAPEDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_shapetensor_model_repository
VOLUME_VARDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_variable_model_repository
VOLUME_VARIMPLICITSEQDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_variable_sequence_implicit_model_repository
VOLUME_VARSEQDESTDIR=$VOLUME_BUILD_DIR/$TRITON_VERSION/qa_variable_sequence_model_repository

# Clean up host working directory
docker run --rm -v $DOCKER_VOLUME:/mnt -w /mnt/$CI_JOB_ID $UBUNTU_IMAGE \
mkdir -p \
$VOLUME_SRCDIR \
$VOLUME_DESTDIR \
$VOLUME_DATADEPENDENTDIR \
$VOLUME_DYNASEQDESTDIR \
$VOLUME_DYNASEQIMPLICITDESTDIR \
$VOLUME_FORMATDESTDIR \
$VOLUME_IDENTITYBIGDESTDIR \
$VOLUME_IDENTITYDESTDIR \
$VOLUME_IMPLICITSEQDESTDIR \
$VOLUME_RAGGEDDESTDIR \
$VOLUME_RESHAPEDESTDIR \
$VOLUME_SEQDESTDIR \
$VOLUME_SHAPEDESTDIR \
$VOLUME_VARDESTDIR \
$VOLUME_VARIMPLICITSEQDESTDIR \
$VOLUME_VARSEQDESTDIR

# Set TensorRT model generation script name
TRT_MODEL_SCRIPT=gen.TensorRT.gen_jetson_trt_models.cmds

# Set script to generate TensorRT models
cat > $TRT_MODEL_SCRIPT <<EOF
#!/bin/bash -xe
# Make all generated files accessible outside of container
umask 0000
nvidia-smi --query-gpu=compute_cap,compute_mode,driver_version,name,index --format=csv || true
export TRT_SUPPRESS_DEPRECATION_WARNINGS=1
ldconfig || true

cd $VOLUME_SRCDIR
# Models using shape tensor i/o
python3 $VOLUME_SRCDIR/gen_qa_identity_models.py --tensorrt-shape-io --models_dir=$VOLUME_SHAPEDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_sequence_models.py --tensorrt-shape-io --models_dir=$VOLUME_SHAPEDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_dyna_sequence_models.py --tensorrt-shape-io --models_dir=$VOLUME_SHAPEDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_models.py --tensorrt --models_dir=$VOLUME_DESTDIR
python3 $VOLUME_SRCDIR/gen_qa_models.py --tensorrt --variable --models_dir=$VOLUME_VARDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_identity_models.py --tensorrt --models_dir=$VOLUME_IDENTITYDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_identity_models.py --tensorrt-big --models_dir=$VOLUME_IDENTITYBIGDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_reshape_models.py --tensorrt --variable --models_dir=$VOLUME_RESHAPEDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_sequence_models.py --tensorrt --models_dir=$VOLUME_SEQDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_implicit_models.py --tensorrt --models_dir=$VOLUME_IMPLICITSEQDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_implicit_models.py --tensorrt --variable --models_dir=$VOLUME_VARIMPLICITSEQDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_dyna_sequence_models.py --tensorrt --models_dir=$VOLUME_DYNASEQDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_sequence_models.py --tensorrt --variable --models_dir=$VOLUME_VARSEQDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_dyna_sequence_implicit_models.py --tensorrt --models_dir=$VOLUME_DYNASEQIMPLICITDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_ragged_models.py --tensorrt --models_dir=$VOLUME_RAGGEDDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_trt_format_models.py --models_dir=$VOLUME_FORMATDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_trt_data_dependent_shape.py --models_dir=$VOLUME_DATADEPENDENTDIR

EOF

chmod a+x $TRT_MODEL_SCRIPT

docker cp $TRT_MODEL_SCRIPT $DOCKER_VOLUME:$VOLUME_SRCDIR

docker pull $TENSORRT_IMAGE

docker run $DOCKER_GPU_ARGS \
   --rm -v $DOCKER_VOLUME:/mnt \
   -e TRT_VERBOSE \
  $TENSORRT_IMAGE bash -xe $VOLUME_SRCDIR/$TRT_MODEL_SCRIPT

# Copy generated models to /tmp/ if not running in CI
if [ -z $CI ] ; then
    echo "Copying generated models to /tmp/"
    docker cp $DOCKER_VOLUME:$VOLUME_BUILD_DIR/$TRITON_VERSION /tmp/
    echo "Removing Docker volume $DOCKER_VOLUME"
    docker rm -f $DOCKER_VOLUME
    docker volume rm $DOCKER_VOLUME
fi

================================================
FILE: qa/common/gen_qa_custom_ops_models.py
================================================
#!/usr/bin/env python3

# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import os

FLAGS = None


def create_moduloop_modelfile(models_dir, model_version):
    model_name = "libtorch_modulo"

    op_source = """
    #include <torch/script.h>
    torch::Tensor custom_modulo(torch::Tensor input1, torch::Tensor input2) {
      torch::Tensor output = torch::fmod(input1, input2);
      return output.clone();
    }
    static auto registry =
      torch::RegisterOperators("my_ops::custom_modulo", &custom_modulo);
    """

    torch.utils.cpp_extension.load_inline(
        name="custom_modulo",
        cpp_sources=op_source,
        is_python_module=False,
        verbose=True,
    )

    class ModuloCustomNet(nn.Module):
        def __init__(self):
            super(ModuloCustomNet, self).__init__()

        def forward(self, input0, input1):
            return torch.ops.my_ops.custom_modulo(input0, input1)

    moduloCustomModel = ModuloCustomNet()
    example_input0 = torch.arange(1, 11, dtype=torch.float32)
    example_input1 = torch.tensor([2] * 10, dtype=torch.float32)
    traced = torch.jit.trace(moduloCustomModel, (example_input0, example_input1))

    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    traced.save(model_version_dir + "/model.pt")


def create_moduloop_modelconfig(models_dir, model_version):
    model_name = "libtorch_modulo"
    config_dir = models_dir + "/" + model_name
    config = """
name: "{}"
platform: "pytorch_libtorch"
max_batch_size: 0
input [
  {{
    name: "INPUT__0"
    data_type: TYPE_FP32
    dims: [ 10 ]
  }},
  {{
    name: "INPUT__1"
    data_type: TYPE_FP32
    dims: [ 10 ]
  }}
]
output [
  {{
    name: "OUTPUT__0"
    data_type: TYPE_FP32
    dims: [ 10 ]
  }}
]
""".format(
        model_name
    )

    try:
        os.makedirs(config_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)


# Use Torchvision ops
def create_visionop_modelfile(models_dir, model_version):
    model_name = "libtorch_visionop"

    class CustomVisionNet(nn.Module):
        def __init__(self):
            super(CustomVisionNet, self).__init__()

        def forward(self, input, boxes):
            return torchvision.ops.roi_align(input, boxes, [5, 5], 1.0, -1, False)

    visionCustomModel = CustomVisionNet()
    visionCustomModel.eval()
    scripted = torch.jit.script(visionCustomModel)

    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    scripted.save(model_version_dir + "/model.pt")


def create_visionop_modelconfig(models_dir, model_version):
    model_name = "libtorch_visionop"
    config_dir = models_dir + "/" + model_name
    config = """
name: "{}"
platform: "pytorch_libtorch"
max_batch_size: 0
input [
  {{
    name: "INPUT__0"
    data_type: TYPE_FP32
    dims: [ 1, 3, 10, 10 ]
  }},
  {{
    name: "INPUT__1"
    data_type: TYPE_FP32
    dims: [1, 5]
  }}
]
output [
  {{
    name: "OUTPUT__0"
    data_type: TYPE_FP32
    dims: [1, 3, 5, 5]
  }}
]
""".format(
        model_name
    )

    try:
        os.makedirs(config_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)


def create_modulo_op_models(models_dir):
    model_version = 1

    if FLAGS.libtorch:
        create_moduloop_modelconfig(models_dir, model_version)
        create_moduloop_modelfile(models_dir, model_version)


def create_vision_op_models(models_dir):
    model_version = 1

    if FLAGS.libtorch:
        create_visionop_modelconfig(models_dir, model_version)
        create_visionop_modelfile(models_dir, model_version)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--models_dir", type=str, required=True, help="Top-level model directory"
    )
    parser.add_argument(
        "--zero_out_lib_path",
        type=str,
        required=False,
        default="./libzeroout.so",
        help="Fullpath to libzeroout.so",
    )
    parser.add_argument(
        "--cuda_op_lib_path",
        type=str,
        required=False,
        default="./libcudaop.so",
        help="Fullpath to libcudaop.so",
    )
    parser.add_argument(
        "--busy_op_lib_path",
        type=str,
        required=False,
        default="./libbusyop.so",
        help="Fullpath to libbusyop.so",
    )
    parser.add_argument(
        "--libtorch",
        required=False,
        action="store_true",
        help="Generate Pytorch LibTorch models",
    )
    FLAGS, unparsed = parser.parse_known_args()

    if FLAGS.libtorch:
        import torch
        import torch.utils.cpp_extension
        import torchvision
        from torch import nn

        create_modulo_op_models(FLAGS.models_dir)
        create_vision_op_models(FLAGS.models_dir)


================================================
FILE: qa/common/gen_qa_dyna_sequence_implicit_models.py
================================================
#!/usr/bin/env python3

# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import os

import numpy as np
from gen_common import np_to_model_dtype, np_to_onnx_dtype, np_to_trt_dtype

FLAGS = None
np_dtype_string = np.dtype(object)


def create_onnx_modelfile(models_dir, model_version, max_batch, dtype, shape):
    if not tu.validate_for_onnx_model(dtype, dtype, dtype, shape, shape, shape):
        return

    model_name = tu.get_dyna_sequence_model_name(
        "onnx_nobatch" if max_batch == 0 else "onnx", dtype
    )
    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    # Create the model. For now don't implement a proper accumulator
    # just return 0 if not-ready and 'INPUT'+'START'*('END'*'CORRID')
    # otherwise...  the tests know to expect this.
    onnx_dtype = np_to_onnx_dtype(dtype)
    onnx_input_shape, idx = tu.shape_to_onnx_shape(shape, 0)
    onnx_output_shape, idx = tu.shape_to_onnx_shape(shape, idx)

    # If the input is a string then use int32 for operation and just
    # cast to/from string for input and output.
    onnx_control_dtype = onnx_dtype
    if onnx_dtype == onnx.TensorProto.STRING:
        onnx_control_dtype = onnx.TensorProto.INT32

    # If input dtype is bool, then use bool type for control and
    # int32 type for input/output
    if onnx_dtype == onnx.TensorProto.BOOL:
        onnx_dtype = onnx.TensorProto.INT32

    batch_dim = [] if max_batch == 0 else [None]

    onnx_input = onnx.helper.make_tensor_value_info(
        "INPUT", onnx_dtype, batch_dim + onnx_input_shape
    )
    onnx_input_state = onnx.helper.make_tensor_value_info(
        "INPUT_STATE", onnx_dtype, batch_dim + onnx_input_shape
    )
    onnx_start = onnx.helper.make_tensor_value_info(
        "START", onnx_control_dtype, batch_dim + [1]
    )
    onnx_ready = onnx.helper.make_tensor_value_info(
        "READY", onnx_control_dtype, batch_dim + [1]
    )
    onnx_corrid = onnx.helper.make_tensor_value_info(
        "CORRID", onnx.TensorProto.UINT64, batch_dim + [1]
    )
    onnx_end = onnx.helper.make_tensor_value_info(
        "END", onnx_control_dtype, batch_dim + [1]
    )
    onnx_output = onnx.helper.make_tensor_value_info(
        "OUTPUT", onnx_dtype, batch_dim + onnx_output_shape
    )
    onnx_output_state = onnx.helper.make_tensor_value_info(
        "OUTPUT_STATE", onnx_dtype, batch_dim + onnx_output_shape
    )

    internal_input = onnx.helper.make_node("Identity", ["INPUT"], ["_INPUT"])
    internal_input_state = onnx.helper.make_node(
        "Identity", ["INPUT_STATE"], ["_INPUT_STATE"]
    )
    # cast int8, int16 input to higher precision int as Onnx Add/Sub operator doesn't support those type
    # Also casting String data type to int32
    if (
        (onnx_dtype == onnx.TensorProto.INT8)
        or (onnx_dtype == onnx.TensorProto.INT16)
        or (onnx_dtype == onnx.TensorProto.STRING)
    ):
        internal_input = onnx.helper.make_node(
            "Cast", ["INPUT"], ["_INPUT"], to=onnx.TensorProto.INT32
        )
        internal_input_state = onnx.helper.make_node(
            "Cast", ["INPUT_STATE"], ["_INPUT_STATE"], to=onnx.TensorProto.INT32
        )

    # Convert boolean value to int32 value
    if onnx_control_dtype == onnx.TensorProto.BOOL:
        internal_input1 = onnx.helper.make_node(
            "Cast", ["START"], ["_START"], to=onnx.TensorProto.INT32
        )
        internal_input2 = onnx.helper.make_node(
            "Cast", ["READY"], ["_READY"], to=onnx.TensorProto.INT32
        )
        not_start_cast = onnx.helper.make_node("Not", ["START"], ["_NOT_START_CAST"])
        not_start = onnx.helper.make_node(
            "Cast", ["_NOT_START_CAST"], ["_NOT_START"], to=onnx.TensorProto.INT32
        )
        not_ready_cast = onnx.helper.make_node("Not", ["START"], ["_NOT_READY_CAST"])
        not_ready = onnx.helper.make_node(
            "Cast", ["_NOT_READY_CAST"], ["_NOT_READY"], to=onnx.TensorProto.INT32
        )

        input_state_cond = onnx.helper.make_node(
            "And", ["READY", "_NOT_START_CAST"], ["input_state_cond"]
        )
        input_state_cond_cast = onnx.helper.make_node(
            "Cast",
            ["input_state_cond"],
            ["input_state_cond_cast"],
            to=onnx.TensorProto.INT32,
        )
        mul_state = onnx.helper.make_node(
            "Mul", ["_INPUT_STATE", "input_state_cond_cast"], ["mul_state"]
        )
        add = onnx.helper.make_node("Add", ["_INPUT", "mul_state"], ["CAST"])

    else:
        start_cast = onnx.helper.make_node(
            "Cast", ["START"], ["_START_CAST"], to=onnx.TensorProto.BOOL
        )
        not_start_cast = onnx.helper.make_node(
            "Not", ["_START_CAST"], ["_NOT_START_CAST"]
        )
        not_start = onnx.helper.make_node(
            "Cast", ["_NOT_START_CAST"], ["_NOT_START"], to=onnx.TensorProto.INT32
        )

        ready_cast = onnx.helper.make_node(
            "Cast", ["READY"], ["_READY_CAST"], to=onnx.TensorProto.BOOL
        )
        not_ready_cast = onnx.helper.make_node(
            "Not", ["_READY_CAST"], ["_NOT_READY_CAST"]
        )
        not_ready = onnx.helper.make_node(
            "Cast", ["_NOT_READY_CAST"], ["_NOT_READY"], to=onnx.TensorProto.INT32
        )

        # Take advantage of knowledge that the READY false value is 0 and true is 1
        input_state_cond = onnx.helper.make_node(
            "And", ["_NOT_START_CAST", "_READY_CAST"], ["input_state_cond"]
        )
        input_state_cond_cast = onnx.helper.make_node(
            "Cast",
            ["input_state_cond"],
            ["input_state_cond_cast"],
            to=onnx.TensorProto.INT32,
        )
        mul_state = onnx.helper.make_node(
            "Mul", ["_INPUT_STATE", "input_state_cond_cast"], ["mul_state"]
        )
        add = onnx.helper.make_node("Add", ["_INPUT", "mul_state"], ["CAST"])

    cast = onnx.helper.make_node("Cast", ["CAST"], ["OUTPUT"], to=onnx_dtype)
    cast_output_state = onnx.helper.make_node(
        "Cast", ["CAST"], ["OUTPUT_STATE"], to=onnx_dtype
    )

    # Avoid cast from float16 to float16
    # (bug in Onnx Runtime, cast from float16 to float16 will become cast from float16 to float32)
    if onnx_dtype == onnx.TensorProto.FLOAT16:
        cast = onnx.helper.make_node("Identity", ["CAST"], ["OUTPUT"])
        cast_output_state = onnx.helper.make_node(
            "Identity", ["CAST"], ["OUTPUT_STATE"]
        )

    if onnx_control_dtype == onnx.TensorProto.BOOL:
        onnx_nodes = [
            internal_input,
            internal_input_state,
            internal_input1,
            internal_input2,
            not_start_cast,
            not_start,
            not_ready_cast,
            not_ready,
            input_state_cond,
            input_state_cond_cast,
            mul_state,
            add,
            cast,
            cast_output_state,
        ]
    else:
        onnx_nodes = [
            internal_input,
            internal_input_state,
            start_cast,
            not_start_cast,
            not_start,
            ready_cast,
            not_ready_cast,
            not_ready,
            input_state_cond,
            input_state_cond_cast,
            mul_state,
            add,
            cast,
            cast_output_state,
        ]

    onnx_inputs = [
        onnx_end,
        onnx_corrid,
        onnx_input_state,
        onnx_input,
        onnx_start,
        onnx_ready,
    ]
    onnx_outputs = [onnx_output, onnx_output_state]
    graph_proto = onnx.helper.make_graph(
        onnx_nodes, model_name, onnx_inputs, onnx_outputs
    )

    if FLAGS.onnx_opset > 0:
        model_opset = onnx.helper.make_operatorsetid("", FLAGS.onnx_opset)
        model_def = onnx.helper.make_model(
            graph_proto, producer_name="triton", opset_imports=[model_opset]
        )
    else:
        model_def = onnx.helper.make_model(graph_proto, producer_name="triton")

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    onnx.save(model_def, model_version_dir + "/model.onnx")


def create_onnx_modelconfig(models_dir, model_version, max_batch, dtype, shape):
    if not tu.validate_for_onnx_model(dtype, dtype, dtype, shape, shape, shape):
        return

    model_name = tu.get_dyna_sequence_model_name(
        "onnx_nobatch" if max_batch == 0 else "onnx", dtype
    )
    config_dir = models_dir + "/" + model_name
    config = """
name: "{}"
platform: "onnxruntime_onnx"
max_batch_size: {}
sequence_batching {{
  max_sequence_idle_microseconds: 5000000
  {}
  control_input [
    {{
      name: "START"
      control [
        {{
          kind: CONTROL_SEQUENCE_START
          {type}_false_true: [ 0, 1 ]
        }}
      ]
    }},
    {{
      name: "END"
      control [
        {{
          kind: CONTROL_SEQUENCE_END
          {type}_false_true: [ 0, 1 ]
        }}
      ]
    }},
    {{
      name: "READY"
      control [
        {{
          kind: CONTROL_SEQUENCE_READY
          {type}_false_true: [ 0, 1 ]
        }}
      ]
    }},
    {{
      name: "CORRID"
      control [
        {{
          kind: CONTROL_SEQUENCE_CORRID
          data_type: TYPE_UINT64
        }}
      ]
    }}
  ]
  state [
    {{
      input_name: "INPUT_STATE"
      output_name: "OUTPUT_STATE"
      data_type: {dtype}
      dims: {dims}
    }}
  ]
}}
input [
  {{
    name: "INPUT"
    data_type: {dtype}
    dims: [ {dims} ]
  }}
]
output [
  {{
    name: "OUTPUT"
    data_type: {dtype}
    dims: [ {dims} ]
  }}
]
instance_group [
  {{
    kind: KIND_CPU
  }}
]
""".format(
        model_name,
        max_batch,
        (
            "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}"
            if max_batch > 0
            else ""
        ),
        dtype=np_to_model_dtype(dtype),
        dims=tu.shape_to_dims_str(shape),
        type="fp32" if dtype == np.float32 else "int32",
    )

    try:
        os.makedirs(config_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)


def create_plan_modelfile(models_dir, model_version, max_batch, dtype, shape):
    trt_dtype = np_to_trt_dtype(dtype)
    TRT_LOGGER = (
        trt.Logger(trt.Logger.INFO)
        if os.environ.get("TRT_VERBOSE") != "1"
        else trt.Logger(trt.Logger.VERBOSE)
    )
    builder = trt.Builder(TRT_LOGGER)
    network = builder.create_network()

    unit_shape = [1] * len(shape)
    if max_batch != 0:
        in0 = network.add_input("INPUT", trt_dtype, [-1] + shape)
        in_state0 = network.add_input("INPUT_STATE", trt_dtype, [-1] + shape)
        start0 = network.add_input("START", trt_dtype, [-1] + unit_shape)
        network.add_input("END", trt_dtype, [-1] + unit_shape)
        ready0 = network.add_input("READY", trt_dtype, [-1] + unit_shape)
        network.add_input("CORRID", trt.int32, [-1] + unit_shape)
        constant_1_data = trt.Weights(np.ones(unit_shape + [1], dtype=dtype))
        constant_1 = network.add_constant(unit_shape + [1], constant_1_data)
    else:
        in0 = network.add_input("INPUT", trt_dtype, shape)
        in_state0 = network.add_input("INPUT_STATE", trt_dtype, shape)
        start0 = network.add_input("START", trt_dtype, unit_shape)
        network.add_input("END", trt_dtype, unit_shape)
        ready0 = network.add_input("READY", trt_dtype, unit_shape)
        network.add_input("CORRID", trt.int32, unit_shape)
        constant_1_data = trt.Weights(np.ones(unit_shape, dtype=dtype))
        constant_1 = network.add_constant(unit_shape, constant_1_data)

    not_start = network.add_elementwise(
        constant_1.get_output(0), start0, trt.ElementWiseOperation.SUB
    )
    not_start.set_output_type(0, trt_dtype)

    input_state_cond_temp = network.add_elementwise(
        ready0, not_start.get_output(0), trt.ElementWiseOperation.SUM
    )
    constant_2 = network.add_elementwise(
        constant_1.get_output(0), constant_1.get_output(0), trt.ElementWiseOperation.SUM
    )
    input_state_cond = network.add_elementwise(
        input_state_cond_temp.get_output(0),
        constant_2.get_output(0),
        trt.ElementWiseOperation.FLOOR_DIV,
    )
    internal_state = network.add_elementwise(
        in_state0, input_state_cond.get_output(0), trt.ElementWiseOperation.PROD
    )
    out0 = network.add_elementwise(
        internal_state.get_output(0), in0, trt.ElementWiseOperation.SUM
    )
    out0_state = network.add_elementwise(
        internal_state.get_output(0), in0, trt.ElementWiseOperation.SUM
    )

    out0.get_output(0).name = "OUTPUT"
    network.mark_output(out0.get_output(0))

    out0_state.get_output(0).name = "OUTPUT_STATE"
    network.mark_output(out0_state.get_output(0))

    min_shape = []
    opt_shape = []
    max_shape = []
    if max_batch != 0:
        min_shape = min_shape + [1]
        opt_shape = opt_shape + [max(1, max_batch)]
        max_shape = max_shape + [max(1, max_batch)]
    for i in shape:
        if i == -1:
            min_shape = min_shape + [1]
            opt_shape = opt_shape + [8]
            max_shape = max_shape + [32]
        else:
            min_shape = min_shape + [i]
            opt_shape = opt_shape + [i]
            max_shape = max_shape + [i]

    profile = builder.create_optimization_profile()
    profile.set_shape("INPUT", min_shape, opt_shape, max_shape)
    profile.set_shape("INPUT_STATE", min_shape, opt_shape, max_shape)
    if max_batch != 0:
        profile.set_shape(
            "START",
            [1] + unit_shape,
            [max_batch] + unit_shape,
            [max_batch] + unit_shape,
        )
        profile.set_shape(
            "END", [1] + unit_shape, [max_batch] + unit_shape, [max_batch] + unit_shape
        )
        profile.set_shape(
            "READY",
            [1] + unit_shape,
            [max_batch] + unit_shape,
            [max_batch] + unit_shape,
        )
        profile.set_shape(
            "CORRID",
            [1] + unit_shape,
            [max_batch] + unit_shape,
            [max_batch] + unit_shape,
        )
    else:
        profile.set_shape("START", unit_shape, unit_shape, unit_shape)
        profile.set_shape("END", unit_shape, unit_shape, unit_shape)
        profile.set_shape("READY", unit_shape, unit_shape, unit_shape)
        profile.set_shape("CORRID", unit_shape, unit_shape, unit_shape)

    config = builder.create_builder_config()
    config.add_optimization_profile(profile)

    config = builder.create_builder_config()
    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
    try:
        engine_bytes = builder.build_serialized_network(network, config)
    except AttributeError:
        engine = builder.build_engine(network, config)
        engine_bytes = engine.serialize()
        del engine
    del network

    model_name = tu.get_dyna_sequence_model_name(
        "plan_nobatch" if max_batch == 0 else "plan", dtype
    )
    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(model_version_dir + "/model.plan", "wb") as f:
        f.write(engine_bytes)


def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape):
    trt_dtype = np_to_trt_dtype(dtype)
    trt_memory_format = trt.TensorFormat.LINEAR

    TRT_LOGGER = (
        trt.Logger(trt.Logger.INFO)
        if os.environ.get("TRT_VERBOSE") != "1"
        else trt.Logger(trt.Logger.VERBOSE)
    )
    builder = trt.Builder(TRT_LOGGER)
    network = builder.create_network()

    unit_shape = [1] * len(shape)
    if max_batch != 0:
        in0 = network.add_input("INPUT", trt_dtype, [-1] + shape)
        in_state0 = network.add_input("INPUT_STATE", trt_dtype, [-1] + shape)
        start0 = network.add_input("START", trt_dtype, [-1] + unit_shape)
        network.add_input("END", trt_dtype, [-1] + unit_shape)
        ready0 = network.add_input("READY", trt_dtype, [-1] + unit_shape)
        network.add_input("CORRID", trt.int32, [-1] + unit_shape)
        constant_1_data = trt.Weights(np.ones(unit_shape + [1], dtype=dtype))
        constant_1 = network.add_constant(unit_shape + [1], constant_1_data)
    else:
        in0 = network.add_input("INPUT", trt_dtype, shape)
        in_state0 = network.add_input("INPUT_STATE", trt_dtype, shape)
        start0 = network.add_input("START", trt_dtype, unit_shape)
        network.add_input("END", trt_dtype, unit_shape)
        ready0 = network.add_input("READY", trt_dtype, unit_shape)
        network.add_input("CORRID", trt.int32, unit_shape)
        constant_1_data = trt.Weights(np.ones(unit_shape, dtype=dtype))
        constant_1 = network.add_constant(unit_shape, constant_1_data)

    not_start = network.add_elementwise(
        constant_1.get_output(0), start0, trt.ElementWiseOperation.SUB
    )
    not_start.set_output_type(0, trt_dtype)

    input_state_cond_temp = network.add_elementwise(
        ready0, not_start.get_output(0), trt.ElementWiseOperation.SUM
    )
    constant_2 = network.add_elementwise(
        constant_1.get_output(0), constant_1.get_output(0), trt.ElementWiseOperation.SUM
    )
    input_state_cond = network.add_elementwise(
        input_state_cond_temp.get_output(0),
        constant_2.get_output(0),
        trt.ElementWiseOperation.FLOOR_DIV,
    )
    internal_state = network.add_elementwise(
        in_state0, input_state_cond.get_output(0), trt.ElementWiseOperation.PROD
    )
    out0 = network.add_elementwise(
        internal_state.get_output(0), in0, trt.ElementWiseOperation.SUM
    )
    out0_state = network.add_elementwise(
        internal_state.get_output(0), in0, trt.ElementWiseOperation.SUM
    )

    out0.get_output(0).name = "OUTPUT"
    network.mark_output(out0.get_output(0))
    out0.get_output(0).dtype = trt_dtype

    out0_state.get_output(0).name = "OUTPUT_STATE"
    network.mark_output(out0_state.get_output(0))
    out0_state.get_output(0).dtype = trt_dtype

    in0.allowed_formats = 1 << int(trt_memory_format)
    in_state0.allowed_formats = 1 << int(trt_memory_format)
    start0.allowed_formats = 1 << int(trt_memory_format)
    ready0.allowed_formats = 1 << int(trt_memory_format)
    out0.get_output(0).allowed_formats = 1 << int(trt_memory_format)
    out0_state.get_output(0).allowed_formats = 1 << int(trt_memory_format)

    if trt_dtype == trt.int8:
        in0.dynamic_range = (-128.0, 127.0)
        in_state0.dynamic_range = (-128.0, 127.0)
        out0.dynamic_range = (-128.0, 127.0)
        out0_state.dynamic_range = (-128.0, 127.0)
        start0.dynamic_range = (-128.0, 127.0)
        ready0.dynamic_range = (-128.0, 127.0)

    flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
    flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
    flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)

    if trt_dtype == trt.int8:
        flags |= 1 << int(trt.BuilderFlag.INT8)
    elif trt_dtype == trt.float16:
        flags |= 1 << int(trt.BuilderFlag.FP16)

    config = builder.create_builder_config()
    config.flags = flags

    min_shape = []
    opt_shape = []
    max_shape = []
    if max_batch != 0:
        min_shape = min_shape + [1]
        opt_shape = opt_shape + [max(1, max_batch)]
        max_shape = max_shape + [max(1, max_batch)]
    for i in shape:
        if i == -1:
            min_shape = min_shape + [1]
            opt_shape = opt_shape + [8]
            max_shape = max_shape + [32]
        else:
            min_shape = min_shape + [i]
            opt_shape = opt_shape + [i]
            max_shape = max_shape + [i]

    profile = builder.create_optimization_profile()
    profile.set_shape("INPUT", min_shape, opt_shape, max_shape)
    profile.set_shape("INPUT_STATE", min_shape, opt_shape, max_shape)
    if max_batch != 0:
        profile.set_shape(
            "START",
            [1] + unit_shape,
            [max_batch] + unit_shape,
            [max_batch] + unit_shape,
        )
        profile.set_shape(
            "END", [1] + unit_shape, [max_batch] + unit_shape, [max_batch] + unit_shape
        )
        profile.set_shape(
            "READY",
            [1] + unit_shape,
            [max_batch] + unit_shape,
            [max_batch] + unit_shape,
        )
        profile.set_shape(
            "CORRID",
            [1] + unit_shape,
            [max_batch] + unit_shape,
            [max_batch] + unit_shape,
        )
    else:
        profile.set_shape("START", unit_shape, unit_shape, unit_shape)
        profile.set_shape("END", unit_shape, unit_shape, unit_shape)
        profile.set_shape("READY", unit_shape, unit_shape, unit_shape)
        profile.set_shape("CORRID", unit_shape, unit_shape, unit_shape)

    config.add_optimization_profile(profile)
    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
    try:
        engine_bytes = builder.build_serialized_network(network, config)
    except AttributeError:
        engine = builder.build_engine(network, config)
        engine_bytes = engine.serialize()
        del engine

    model_name = tu.get_dyna_sequence_model_name(
        "plan_nobatch" if max_batch == 0 else "plan", dtype
    )
    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(model_version_dir + "/model.plan", "wb") as f:
        f.write(engine_bytes)


def create_plan_models(models_dir, model_version, max_batch, dtype, shape):
    if not tu.validate_for_trt_model(dtype, dtype, dtype, shape, shape, shape):
        return

    if dtype != np.float32:
        create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape)
    else:
        create_plan_modelfile(models_dir, model_version, max_batch, dtype, shape)


def create_plan_modelconfig(models_dir, model_version, max_batch, dtype, shape):
    if not tu.validate_for_trt_model(dtype, dtype, dtype, shape, shape, shape):
        return

    model_name = tu.get_dyna_sequence_model_name(
        "plan_nobatch" if max_batch == 0 else "plan", dtype
    )
    config_dir = models_dir + "/" + model_name
    config = """
name: "{}"
platform: "tensorrt_plan"
max_batch_size: {}
sequence_batching {{
  max_sequence_idle_microseconds: 5000000
  {}
  control_input [
    {{
      name: "START"
      control [
        {{
          kind: CONTROL_SEQUENCE_START
          {type}_false_true: [ 0, 1 ]
        }}
      ]
    }},
    {{
      name: "END"
      control [
        {{
          kind: CONTROL_SEQUENCE_END
          {type}_false_true: [ 0, 1 ]
        }}
      ]
    }},
    {{
      name: "READY"
      control [
        {{
          kind: CONTROL_SEQUENCE_READY
          {type}_false_true: [ 0, 1 ]
        }}
      ]
    }},
    {{
      name: "CORRID"
      control [
        {{
          kind: CONTROL_SEQUENCE_CORRID
          data_type: TYPE_INT32
        }}
      ]
    }}
  ]
  state [
    {{
      input_name: "INPUT_STATE"
      output_name: "OUTPUT_STATE"
      data_type: {dtype}
      dims: {dims}
    }}
  ]
}}
input [
  {{
    name: "INPUT"
    data_type: {dtype}
    dims: [ {dims} ]
  }}
]
output [
  {{
    name: "OUTPUT"
    data_type: {dtype}
    dims: [ {dims} ]
  }}
]
instance_group [
  {{
    kind: KIND_GPU
  }}
]
""".format(
        model_name,
        max_batch,
        (
            "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}"
            if max_batch > 0
            else ""
        ),
        dtype=np_to_model_dtype(dtype),
        dims=tu.shape_to_dims_str(shape),
        type="fp32" if dtype == np.float32 else "int32",
    )

    try:
        os.makedirs(config_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)


def create_models(models_dir, dtype, shape, no_batch=True):
    model_version = 1

    if FLAGS.onnx:
        create_onnx_modelconfig(models_dir, model_version, 8, dtype, shape)
        create_onnx_modelfile(models_dir, model_version, 8, dtype, shape)
        if no_batch:
            create_onnx_modelconfig(models_dir, model_version, 0, dtype, shape)
            create_onnx_modelfile(models_dir, model_version, 0, dtype, shape)

    if FLAGS.tensorrt:
        if dtype == bool:
            return

        create_plan_modelconfig(models_dir, model_version, 8, dtype, shape)
        create_plan_models(models_dir, model_version, 8, dtype, shape)
        if no_batch:
            create_plan_modelconfig(models_dir, model_version, 0, dtype, shape)
            create_plan_models(models_dir, model_version, 0, dtype, shape)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--models_dir", type=str, required=True, help="Top-level model directory"
    )
    parser.add_argument(
        "--tensorrt",
        required=False,
        action="store_true",
        help="Generate TensorRT PLAN models",
    )
    parser.add_argument(
        "--tensorrt-shape-io",
        required=False,
        action="store_true",
        help="Generate TensorRT PLAN models w/ shape tensor i/o",
    )
    parser.add_argument(
        "--onnx", required=False, action="store_true", help="Generate Onnx models"
    )
    parser.add_argument(
        "--onnx_opset",
        type=int,
        required=False,
        default=0,
        help="Opset used for Onnx models. Default is to use ONNXRT default",
    )
    parser.add_argument(
        "--libtorch",
        required=False,
        action="store_true",
        help="Generate Pytorch LibTorch models",
    )
    parser.add_argument(
        "--openvino",
        required=False,
        action="store_true",
        help="Generate OpenVino models",
    )
    parser.add_argument(
        "--variable",
        required=False,
        action="store_true",
        help="Used variable-shape tensors for input/output",
    )
    FLAGS, unparsed = parser.parse_known_args()

    if FLAGS.onnx:
        import onnx

    if FLAGS.tensorrt:
        import tensorrt as trt

    import test_util as tu

    # Tests with models that accept fixed-shape input/output tensors
    if not FLAGS.variable:
        create_models(
            FLAGS.models_dir,
            np.int32,
            [
                1,
            ],
        )

    # Tests with models that accept variable-shape input/output tensors
    if FLAGS.variable:
        create_models(
            FLAGS.models_dir,
            np.int32,
            [
                -1,
            ],
            False,
        )


================================================
FILE: qa/common/gen_qa_dyna_sequence_models.py
================================================
#!/usr/bin/env python3

# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import os

import numpy as np
from gen_common import (
    np_to_model_dtype,
    np_to_onnx_dtype,
    np_to_torch_dtype,
    np_to_trt_dtype,
    openvino_save_model,
)

FLAGS = None
np_dtype_string = np.dtype(object)


def create_plan_shape_tensor_modelfile(
    models_dir, model_version, max_batch, dtype, shape, shape_tensor_input_dtype
):
    # Note that resize layer does not support int tensors.
    # The model takes three inputs (INPUT, DUMMY_INPUT and SHAPE_INPUT)
    # and four control inputs(START, END, READY, CORR_ID).
    # In absence of proper accumulator,
    # OUTPUT : 0 if not-ready and 'DUMMY_INPUT'+'START'+('END'*'CORRID')
    #          otherwise
    # RESIZED_OUTPUT : Obtained after resizing 'INPUT' to shape specified
    #          in 'SHAPE_INPUT'
    # SHAPE_OUTPUT : The shape values of resized output

    trt_dtype = np_to_trt_dtype(dtype)
    trt_shape_dtype = np_to_trt_dtype(shape_tensor_input_dtype)
    trt_memory_format = trt.TensorFormat.LINEAR

    TRT_LOGGER = (
        trt.Logger(trt.Logger.INFO)
        if os.environ.get("TRT_VERBOSE") != "1"
        else trt.Logger(trt.Logger.VERBOSE)
    )
    builder = trt.Builder(TRT_LOGGER)
    network = builder.create_network()

    unit_shape = [1] * len(shape)
    dummy_shape = [-1] * len(shape)
    if max_batch != 0:
        in0 = network.add_input("INPUT", trt.int32, [-1] + dummy_shape)
        dummy_in0 = network.add_input("DUMMY_INPUT", trt_dtype, [-1] + dummy_shape)
        shape_in0 = network.add_input("SHAPE_INPUT", trt_shape_dtype, [1 + len(shape)])
        start0 = network.add_input("START", trt.int32, [-1] + unit_shape)
        end0 = network.add_input("END", trt.int32, [-1] + unit_shape)
        ready0 = network.add_input("READY", trt.int32, [-1] + unit_shape)
        corrid0 = network.add_input("CORRID", trt.int32, [-1] + unit_shape)
    else:
        in0 = network.add_input("INPUT", trt.int32, dummy_shape)
        dummy_in0 = network.add_input("DUMMY_INPUT", trt_dtype, dummy_shape)
        shape_in0 = network.add_input("SHAPE_INPUT", trt_shape_dtype, [len(shape)])
        start0 = network.add_input("START", trt.int32, unit_shape)
        end0 = network.add_input("END", trt.int32, unit_shape)
        ready0 = network.add_input("READY", trt.int32, unit_shape)
        corrid0 = network.add_input("CORRID", trt.int32, unit_shape)

    add0 = network.add_elementwise(in0, start0, trt.ElementWiseOperation.SUM)
    mul0 = network.add_elementwise(end0, corrid0, trt.ElementWiseOperation.PROD)
    sum0 = network.add_elementwise(
        add0.get_output(0), mul0.get_output(0), trt.ElementWiseOperation.SUM
    )
    out0 = network.add_elementwise(
        sum0.get_output(0), ready0, trt.ElementWiseOperation.PROD
    ).get_output(0)

    resize_layer = network.add_resize(dummy_in0)
    resize_layer.set_input(1, shape_in0)
    shape_out0 = network.add_shape(resize_layer.get_output(0))
    resized_out0 = resize_layer.get_output(0)

    shape_out0.get_output(0).name = "SHAPE_OUTPUT"
    shape_out0.get_output(0).dtype = trt.int64
    network.mark_output_for_shapes(shape_out0.get_output(0))

    out0.name = "OUTPUT"
    out0.dtype = trt.int32
    network.mark_output(out0)

    resized_out0.name = "RESIZED_OUTPUT"
    resized_out0.dtype = trt_dtype
    network.mark_output(resized_out0)

    shape_in0.allowed_formats = 1 << int(trt_memory_format)
    dummy_in0.allowed_formats = 1 << int(trt_memory_format)
    start0.allowed_formats = 1 << int(trt_memory_format)
    ready0.allowed_formats = 1 << int(trt_memory_format)
    out0.allowed_formats = 1 << int(trt_memory_format)
    shape_out0.get_output(0).allowed_formats = 1 << int(trt_memory_format)
    resized_out0.allowed_formats = 1 << int(trt_memory_format)

    if trt_dtype == trt.int8:
        dummy_in0.dynamic_range = (-128.0, 127.0)
        resized_out0.dynamic_range = (-128.0, 127.0)
        start0.dynamic_range = (-128.0, 127.0)
        end0.dynamic_range = (-128.0, 127.0)
        ready0.dynamic_range = (-128.0, 127.0)

    flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
    flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
    flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)

    if trt_dtype == trt.int8:
        flags |= 1 << int(trt.BuilderFlag.INT8)
    elif trt_dtype == trt.float16:
        flags |= 1 << int(trt.BuilderFlag.FP16)

    min_prefix = []
    opt_prefix = []
    max_prefix = []

    if max_batch != 0:
        min_prefix = [1]
        opt_prefix = [max(1, max_batch)]
        max_prefix = [max(1, max_batch)]

    min_shape = min_prefix + [1] * len(shape)
    opt_shape = opt_prefix + [8] * len(shape)
    max_shape = max_prefix + [32] * len(shape)

    profile = builder.create_optimization_profile()
    profile.set_shape("INPUT", min_shape, opt_shape, max_shape)
    profile.set_shape_input("SHAPE_INPUT", min_shape, opt_shape, max_shape)
    profile.set_shape("DUMMY_INPUT", min_shape, opt_shape, max_shape)
    profile.set_shape(
        "START",
        min_prefix + unit_shape,
        opt_prefix + unit_shape,
        max_prefix + unit_shape,
    )
    profile.set_shape(
        "END", min_prefix + unit_shape, opt_prefix + unit_shape, max_prefix + unit_shape
    )
    profile.set_shape(
        "READY",
        min_prefix + unit_shape,
        opt_prefix + unit_shape,
        max_prefix + unit_shape,
    )
    profile.set_shape(
        "CORRID",
        min_prefix + unit_shape,
        opt_prefix + unit_shape,
        max_prefix + unit_shape,
    )

    config = builder.create_builder_config()
    config.flags = flags
    config.add_optimization_profile(profile)
    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
    try:
        engine_bytes = builder.build_serialized_network(network, config)
    except AttributeError:
        engine = builder.build_engine(network, config)
        engine_bytes = engine.serialize()
        del engine

    model_name = tu.get_dyna_sequence_model_name(
        "plan_nobatch" if max_batch == 0 else "plan", dtype
    )
    model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name
    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(model_version_dir + "/model.plan", "wb") as f:
        f.write(engine_bytes)


def create_plan_modelfile(models_dir, model_version, max_batch, dtype, shape):
    trt_dtype = np_to_trt_dtype(dtype)
    # Create the model. For now don't implement a proper accumulator
    # just return 0 if not-ready and 'INPUT'+'START'*('END'*'CORRID')
    # otherwise...  the tests know to expect this.
    TRT_LOGGER = (
        trt.Logger(trt.Logger.INFO)
        if os.environ.get("TRT_VERBOSE") != "1"
        else trt.Logger(trt.Logger.VERBOSE)
    )
    builder = trt.Builder(TRT_LOGGER)
    network = builder.create_network()

    unit_shape = [1] * len(shape)
    if max_batch != 0:
        in0 = network.add_input("INPUT", trt_dtype, [-1] + shape)
        start0 = network.add_input("START", trt_dtype, [-1] + unit_shape)
        end0 = network.add_input("END", trt_dtype, [-1] + unit_shape)
        ready0 = network.add_input("READY", trt_dtype, [-1] + unit_shape)
        corrid0 = network.add_input("CORRID", trt.int32, [-1] + unit_shape)
    else:
        in0 = network.add_input("INPUT", trt_dtype, shape)
        start0 = network.add_input("START", trt_dtype, unit_shape)
        end0 = network.add_input("END", trt_dtype, unit_shape)
        ready0 = network.add_input("READY", trt_dtype, unit_shape)
        corrid0 = network.add_input("CORRID", trt.int32, unit_shape)

    add0 = network.add_elementwise(in0, start0, trt.ElementWiseOperation.SUM)
    mul0 = network.add_elementwise(end0, corrid0, trt.ElementWiseOperation.PROD)
    sum0 = network.add_elementwise(
        add0.get_output(0), mul0.get_output(0), trt.ElementWiseOperation.SUM
    )
    out0 = network.add_elementwise(
        sum0.get_output(0), ready0, trt.ElementWiseOperation.PROD
    )

    out0.get_output(0).name = "OUTPUT"
    network.mark_output(out0.get_output(0))

    min_shape = []
    opt_shape = []
    max_shape = []
    if max_batch != 0:
        min_shape = min_shape + [1]
        opt_shape = opt_shape + [max(1, max_batch)]
        max_shape = max_shape + [max(1, max_batch)]
    for i in shape:
        if i == -1:
            min_shape = min_shape + [1]
            opt_shape = opt_shape + [8]
            max_shape = max_shape + [32]
        else:
            min_shape = min_shape + [i]
            opt_shape = opt_shape + [i]
            max_shape = max_shape + [i]

    profile = builder.create_optimization_profile()
    profile.set_shape("INPUT", min_shape, opt_shape, max_shape)
    if max_batch != 0:
        profile.set_shape(
            "START",
            [1] + unit_shape,
            [max_batch] + unit_shape,
            [max_batch] + unit_shape,
        )
        profile.set_shape(
            "END", [1] + unit_shape, [max_batch] + unit_shape, [max_batch] + unit_shape
        )
        profile.set_shape(
            "READY",
            [1] + unit_shape,
            [max_batch] + unit_shape,
            [max_batch] + unit_shape,
        )
        profile.set_shape(
            "CORRID",
            [1] + unit_shape,
            [max_batch] + unit_shape,
            [max_batch] + unit_shape,
        )
    else:
        profile.set_shape("START", unit_shape, unit_shape, unit_shape)
        profile.set_shape("END", unit_shape, unit_shape, unit_shape)
        profile.set_shape("READY", unit_shape, unit_shape, unit_shape)
        profile.set_shape("CORRID", unit_shape, unit_shape, unit_shape)
    config = builder.create_builder_config()
    config.add_optimization_profile(profile)

    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
    try:
        engine_bytes = builder.build_serialized_network(network, config)
    except AttributeError:
        engine = builder.build_engine(network, config)
        engine_bytes = engine.serialize()
        del engine

    model_name = tu.get_dyna_sequence_model_name(
        "plan_nobatch" if max_batch == 0 else "plan", dtype
    )
    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(model_version_dir + "/model.plan", "wb") as f:
        f.write(engine_bytes)


def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape):
    trt_dtype = np_to_trt_dtype(dtype)
    trt_memory_format = trt.TensorFormat.LINEAR

    # Create the model. For now don't implement a proper accumulator
    # just return 0 if not-ready and 'INPUT'+'START'*('END'*'CORRID')
    # otherwise...  the tests know to expect this.
    TRT_LOGGER = (
        trt.Logger(trt.Logger.INFO)
        if os.environ.get("TRT_VERBOSE") != "1"
        else trt.Logger(trt.Logger.VERBOSE)
    )
    builder = trt.Builder(TRT_LOGGER)
    network = builder.create_network()

    unit_shape = [1] * len(shape)
    if max_batch != 0:
        in0 = network.add_input("INPUT", trt_dtype, [-1] + shape)
        start0 = network.add_input("START", trt_dtype, [-1] + unit_shape)
        end0 = network.add_input("END", trt_dtype, [-1] + unit_shape)
        ready0 = network.add_input("READY", trt_dtype, [-1] + unit_shape)
        corrid0 = network.add_input("CORRID", trt.int32, [-1] + unit_shape)
    else:
        in0 = network.add_input("INPUT", trt_dtype, shape)
        start0 = network.add_input("START", trt_dtype, unit_shape)
        end0 = network.add_input("END", trt_dtype, unit_shape)
        ready0 = network.add_input("READY", trt_dtype, unit_shape)
        corrid0 = network.add_input("CORRID", trt.int32, unit_shape)

    add0 = network.add_elementwise(in0, start0, trt.ElementWiseOperation.SUM)
    mul0 = network.add_elementwise(end0, corrid0, trt.ElementWiseOperation.PROD)
    sum0 = network.add_elementwise(
        add0.get_output(0), mul0.get_output(0), trt.ElementWiseOperation.SUM
    )
    out0 = network.add_elementwise(
        sum0.get_output(0), ready0, trt.ElementWiseOperation.PROD
    )

    out0.get_output(0).name = "OUTPUT"
    network.mark_output(out0.get_output(0))

    out0.get_output(0).dtype = trt_dtype

    in0.allowed_formats = 1 << int(trt_memory_format)
    start0.allowed_formats = 1 << int(trt_memory_format)
    ready0.allowed_formats = 1 << int(trt_memory_format)
    out0.get_output(0).allowed_formats = 1 << int(trt_memory_format)

    if trt_dtype == trt.int8:
        in0.dynamic_range = (-128.0, 127.0)
        out0.dynamic_range = (-128.0, 127.0)
        start0.dynamic_range = (-128.0, 127.0)
        end0.dynamic_range = (-128.0, 127.0)
        ready0.dynamic_range = (-128.0, 127.0)
        corrid0.dynamic_range = (-128.0, 127.0)

    flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
    flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
    flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)

    if trt_dtype == trt.int8:
        flags |= 1 << int(trt.BuilderFlag.INT8)
    elif trt_dtype == trt.float16:
        flags |= 1 << int(trt.BuilderFlag.FP16)

    min_shape = []
    opt_shape = []
    max_shape = []
    if max_batch != 0:
        min_shape = min_shape + [1]
        opt_shape = opt_shape + [max(1, max_batch)]
        max_shape = max_shape + [max(1, max_batch)]
    for i in shape:
        if i == -1:
            min_shape = min_shape + [1]
            opt_shape = opt_shape + [8]
            max_shape = max_shape + [32]
        else:
            min_shape = min_shape + [i]
            opt_shape = opt_shape + [i]
            max_shape = max_shape + [i]

    profile = builder.create_optimization_profile()
    profile.set_shape("INPUT", min_shape, opt_shape, max_shape)
    if max_batch != 0:
        profile.set_shape(
            "START",
            [1] + unit_shape,
            [max_batch] + unit_shape,
            [max_batch] + unit_shape,
        )
        profile.set_shape(
            "END", [1] + unit_shape, [max_batch] + unit_shape, [max_batch] + unit_shape
        )
        profile.set_shape(
            "READY",
            [1] + unit_shape,
            [max_batch] + unit_shape,
            [max_batch] + unit_shape,
        )
        profile.set_shape(
            "CORRID",
            [1] + unit_shape,
            [max_batch] + unit_shape,
            [max_batch] + unit_shape,
        )
    else:
        profile.set_shape("START", unit_shape, unit_shape, unit_shape)
        profile.set_shape("END", unit_shape, unit_shape, unit_shape)
        profile.set_shape("READY", unit_shape, unit_shape, unit_shape)
        profile.set_shape("CORRID", unit_shape, unit_shape, unit_shape)

    config = builder.create_builder_config()
    config.flags = flags
    config.add_optimization_profile(profile)
    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
    try:
        engine_bytes = builder.build_serialized_network(network, config)
    except AttributeError:
        engine = builder.build_engine(network, config)
        engine_bytes = engine.serialize()
        del engine

    model_name = tu.get_dyna_sequence_model_name(
        "plan_nobatch" if max_batch == 0 else "plan", dtype
    )
    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(model_version_dir + "/model.plan", "wb") as f:
        f.write(engine_bytes)


def create_plan_models(models_dir, model_version, max_batch, dtype, shape):
    if not tu.validate_for_trt_model(dtype, dtype, dtype, shape, shape, shape):
        return

    if dtype != np.float32:
        create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape)
    else:
        create_plan_modelfile(models_dir, model_version, max_batch, dtype, shape)


def create_plan_modelconfig(
    models_dir, model_version, max_batch, dtype, shape, shape_tensor_input_dtype=None
):
    if not tu.validate_for_trt_model(dtype, dtype, dtype, shape, shape, shape):
        return

    model_name = tu.get_dyna_sequence_model_name(
        "plan_nobatch" if max_batch == 0 else "plan", dtype
    )
    if shape_tensor_input_dtype:
        model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name
    config_dir = models_dir + "/" + model_name

    if FLAGS.tensorrt_shape_io:
        shape_tensor_dim = len(shape)
        config = """
name: "{}"
platform: "tensorrt_plan"
max_batch_size: {}
sequence_batching {{
  max_sequence_idle_microseconds: 5000000
  {}
  control_input [
    {{
      name: "START"
      control [
        {{
          kind: CONTROL_SEQUENCE_START
          {}_false_true: [ 0, 1 ]
        }}
      ]
    }},
    {{
      name: "END"
      control [
        {{
          kind: CONTROL_SEQUENCE_END
          {}_false_true: [ 0, 1 ]
        }}
      ]
    }},
    {{
      name: "READY"
      control [
        {{
          kind: CONTROL_SEQUENCE_READY
          {}_false_true: [ 0, 1 ]
        }}
      ]
    }},
    {{
      name: "CORRID"
      control [
        {{
          kind: CONTROL_SEQUENCE_CORRID
          data_type: TYPE_INT32
        }}
      ]
    }}
  ]
}}
input [
  {{
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ {} ]
  }}
]
input [
  {{
    name: "DUMMY_INPUT"
    data_type: {}
    dims: [ {} ]
  }}
]
input [
  {{
    name: "SHAPE_INPUT"
    data_type: {}
    dims: [ {} ]
    is_shape_tensor: true
  }}
]
output [
  {{
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ {} ]
  }}
]
output [
  {{
    name: "RESIZED_OUTPUT"
    data_type: {}
    dims: [ {} ]
  }}
]
output [
  {{
    name: "SHAPE_OUTPUT"
    data_type: TYPE_INT64
    dims: [ {} ]
    is_shape_tensor: true
  }}
]
instance_group [
  {{
    kind: KIND_GPU
  }}
]
""".format(
            model_name,
            max_batch,
            (
                "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}"
                if max_batch > 0
                else ""
            ),
            "int32",
            "int32",
            "int32",
            tu.shape_to_dims_str(shape),
            np_to_model_dtype(dtype),
            tu.shape_to_dims_str(shape),
            np_to_model_dtype(shape_tensor_input_dtype),
            shape_tensor_dim,
            tu.shape_to_dims_str(shape),
            np_to_model_dtype(dtype),
            tu.shape_to_dims_str(shape),
            shape_tensor_dim,
        )

    else:
        config = """
name: "{}"
platform: "tensorrt_plan"
max_batch_size: {}
sequence_batching {{
  max_sequence_idle_microseconds: 5000000
  {}
  control_input [
    {{
      name: "START"
      control [
        {{
          kind: CONTROL_SEQUENCE_START
          {}_false_true: [ 0, 1 ]
        }}
      ]
    }},
    {{
      name: "END"
      control [
        {{
          kind: CONTROL_SEQUENCE_END
          {}_false_true: [ 0, 1 ]
        }}
      ]
    }},
    {{
      name: "READY"
      control [
        {{
          kind: CONTROL_SEQUENCE_READY
          {}_false_true: [ 0, 1 ]
        }}
      ]
    }},
    {{
      name: "CORRID"
      control [
        {{
          kind: CONTROL_SEQUENCE_CORRID
          data_type: TYPE_INT32
        }}
      ]
    }}
  ]
}}
input [
  {{
    name: "INPUT"
    data_type: {}
    dims: [ {} ]
  }}
]
output [
  {{
    name: "OUTPUT"
    data_type: {}
    dims: [ {} ]
  }}
]
instance_group [
  {{
    kind: KIND_GPU
  }}
]
""".format(
            model_name,
            max_batch,
            (
                "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}"
                if max_batch > 0
                else ""
            ),
            "int32" if dtype == np.int32 else "fp32",
            "int32" if dtype == np.int32 else "fp32",
            "int32" if dtype == np.int32 else "fp32",
            np_to_model_dtype(dtype),
            tu.shape_to_dims_str(shape),
            np_to_model_dtype(dtype),
            tu.shape_to_dims_str(shape),
        )

    try:
        os.makedirs(config_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)


def create_onnx_modelfile(models_dir, model_version, max_batch, dtype, shape):
    if not tu.validate_for_onnx_model(dtype, dtype, dtype, shape, shape, shape):
        return

    model_name = tu.get_dyna_sequence_model_name(
        "onnx_nobatch" if max_batch == 0 else "onnx", dtype
    )
    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    # Create the model. For now don't implement a proper accumulator
    # just return 0 if not-ready and 'INPUT'+'START'*('END'*'CORRID')
    # otherwise...  the tests know to expect this.
    onnx_dtype = np_to_onnx_dtype(dtype)
    onnx_input_shape, idx = tu.shape_to_onnx_shape(shape, 0)
    onnx_output_shape, idx = tu.shape_to_onnx_shape(shape, idx)

    # If the input is a string then use int32 for operation and just
    # cast to/from string for input and output.
    onnx_control_dtype = onnx_dtype
    if onnx_dtype == onnx.TensorProto.STRING:
        onnx_control_dtype = onnx.TensorProto.INT32

    batch_dim = [] if max_batch == 0 else [None]

    onnx_input = onnx.helper.make_tensor_value_info(
        "INPUT", onnx_dtype, batch_dim + onnx_input_shape
    )
    onnx_start = onnx.helper.make_tensor_value_info(
        "START", onnx_control_dtype, batch_dim + [1]
    )
    onnx_end = onnx.helper.make_tensor_value_info(
        "END", onnx_control_dtype, batch_dim + [1]
    )
    onnx_ready = onnx.helper.make_tensor_value_info(
        "READY", onnx_control_dtype, batch_dim + [1]
    )
    onnx_corrid = onnx.helper.make_tensor_value_info(
        "CORRID", onnx.TensorProto.UINT64, batch_dim + [1]
    )
    onnx_output = onnx.helper.make_tensor_value_info(
        "OUTPUT", onnx_dtype, batch_dim + onnx_output_shape
    )

    internal_input = onnx.helper.make_node("Identity", ["INPUT"], ["_INPUT"])

    # cast int8, int16 input to higher precision int as Onnx Add/Sub operator doesn't support those type
    # Also casting String data type to int32
    if (
        (onnx_dtype == onnx.TensorProto.INT8)
        or (onnx_dtype == onnx.TensorProto.INT16)
        or (onnx_dtype == onnx.TensorProto.STRING)
    ):
        internal_input = onnx.helper.make_node(
            "Cast", ["INPUT"], ["_INPUT"], to=onnx.TensorProto.INT32
        )

    onnx_corrid_cast0 = onnx.helper.make_node(
        "Cast", ["CORRID"], ["onnx_corrid_cast0"], to=onnx_control_dtype
    )
    add0 = onnx.helper.make_node("Add", ["_INPUT", "START"], ["add0"])
    mul0 = onnx.helper.make_node("Mul", ["END", "onnx_corrid_cast0"], ["mul0"])
    sum0 = onnx.helper.make_node("Add", ["add0", "mul0"], ["sum0"])
    res0 = onnx.helper.make_node("Mul", ["READY", "sum0"], ["CAST"])
    cast = onnx.helper.make_node("Cast", ["CAST"], ["OUTPUT"], to=onnx_dtype)

    # Avoid cast from float16 to float16
    # (bug in Onnx Runtime, cast from float16 to float16 will become cast from float16 to float32)
    if onnx_dtype == onnx.TensorProto.FLOAT16:
        cast = onnx.helper.make_node("Identity", ["CAST"], ["OUTPUT"])

    onnx_nodes = [internal_input, onnx_corrid_cast0, add0, mul0, sum0, res0, cast]
    onnx_inputs = [onnx_input, onnx_start, onnx_end, onnx_ready, onnx_corrid]
    onnx_outputs = [onnx_output]

    graph_proto = onnx.helper.make_graph(
        onnx_nodes, model_name, onnx_inputs, onnx_outputs
    )
    if FLAGS.onnx_opset > 0:
        model_opset = onnx.helper.make_operatorsetid("", FLAGS.onnx_opset)
        model_def = onnx.helper.make_model(
            graph_proto, producer_name="triton", opset_imports=[model_opset]
        )
    else:
        model_def = onnx.helper.make_model(graph_proto, producer_name="triton")

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    onnx.save(model_def, model_version_dir + "/model.onnx")


def create_onnx_modelconfig(models_dir, model_version, max_batch, dtype, shape):
    if not tu.validate_for_onnx_model(dtype, dtype, dtype, shape, shape, shape):
        return

    model_name = tu.get_dyna_sequence_model_name(
        "onnx_nobatch" if max_batch == 0 else "onnx", dtype
    )
    config_dir = models_dir + "/" + model_name
    config = """
name: "{}"
platform: "onnxruntime_onnx"
max_batch_size: {}
sequence_batching {{
  max_sequence_idle_microseconds: 5000000
  {}
  control_input [
    {{
      name: "START"
      control [
        {{
          kind: CONTROL_SEQUENCE_START
          {type}_false_true: [ 0, 1 ]
        }}
      ]
    }},
    {{
      name: "END"
      control [
        {{
          kind: CONTROL_SEQUENCE_END
          {type}_false_true: [ 0, 1 ]
        }}
      ]
    }},
    {{
      name: "READY"
      control [
        {{
          kind: CONTROL_SEQUENCE_READY
          {type}_false_true: [ 0, 1 ]
        }}
      ]
    }},
    {{
      name: "CORRID"
      control [
        {{
          kind: CONTROL_SEQUENCE_CORRID
          data_type: TYPE_UINT64
        }}
      ]
    }}
  ]
}}
input [
  {{
    name: "INPUT"
    data_type: {}
    dims: [ {} ]
  }}
]
output [
  {{
    name: "OUTPUT"
    data_type: {}
    dims: [ {} ]
  }}
]
instance_group [
  {{
    kind: KIND_CPU
  }}
]
""".format(
        model_name,
        max_batch,
        (
            "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}"
            if max_batch > 0
            else ""
        ),
        np_to_model_dtype(dtype),
        tu.shape_to_dims_str(shape),
        np_to_model_dtype(dtype),
        tu.shape_to_dims_str(shape),
        type="fp32" if dtype == np.float32 else "int32",
    )

    try:
        os.makedirs(config_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)


def create_libtorch_modelfile(models_dir, model_version, max_batch, dtype, shape):
    if not tu.validate_for_libtorch_model(dtype, dtype, dtype, shape, shape, shape):
        return

    torch_dtype = np_to_torch_dtype(dtype)

    model_name = tu.get_dyna_sequence_model_name(
        "libtorch_nobatch" if max_batch == 0 else "libtorch", dtype
    )
    # handle for -1 (when variable) since can't create tensor with shape of [-1]
    shape = [abs(ips) for ips in shape]

    class SequenceNet(nn.Module):
        def __init__(self):
            super(SequenceNet, self).__init__()

        def forward(self, input0, start0, end0, ready0, corrid0):
            tmp = input0 + start0 + (end0 * corrid0)
            return tmp * ready0

    sequenceModel = SequenceNet()
    example_input = torch.zeros(shape, dtype=torch_dtype)
    example_corrid_input = torch.zeros(shape, dtype=torch.long)
    traced = torch.jit.trace(
        sequenceModel,
        (
            example_input,
            example_input,
            example_input,
            example_input,
            example_corrid_input,
        ),
    )

    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    traced.save(model_version_dir + "/model.pt")


def create_libtorch_modelconfig(models_dir, model_version, max_batch, dtype, shape):
    if not tu.validate_for_libtorch_model(dtype, dtype, dtype, shape, shape, shape):
        return

    model_name = tu.get_dyna_sequence_model_name(
        "libtorch_nobatch" if max_batch == 0 else "libtorch", dtype
    )
    config_dir = models_dir + "/" + model_name
    #  FIX FOR LibTorch
    config = """
name: "{}"
platform: "pytorch_libtorch"
max_batch_size: {}
sequence_batching {{
  max_sequence_idle_microseconds: 5000000
  {}
  control_input [
    {{
      name: "START__1"
      control [
        {{
          kind: CONTROL_SEQUENCE_START
          {}_false_true: [ 0, 1 ]
        }}
      ]
    }},
    {{
      name: "END__2"
      control [
        {{
          kind: CONTROL_SEQUENCE_END
          {}_false_true: [ 0, 1 ]
        }}
      ]
    }},
    {{
      name: "READY__3"
      control [
        {{
          kind: CONTROL_SEQUENCE_READY
          {}_false_true: [ 0, 1 ]
        }}
      ]
    }},
    {{
      name: "CORRID__4"
      control [
        {{
          kind: CONTROL_SEQUENCE_CORRID
          data_type: TYPE_INT32
        }}
      ]
    }}
  ]
}}
input [
  {{
    name: "INPUT__0"
    data_type: {}
    dims: [ {} ]
  }}
]
output [
  {{
    name: "OUTPUT__0"
    data_type: {}
    dims: [ 1 ]
  }}
]
instance_group [
  {{
    kind: KIND_CPU
  }}
]
""".format(
        model_name,
        max_batch,
        (
            "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}"
            if max_batch > 0
            else ""
        ),
        "int32" if dtype == np.int32 else "fp32",
        "int32" if dtype == np.int32 else "fp32",
        "int32" if dtype == np.int32 else "fp32",
        np_to_model_dtype(dtype),
        tu.shape_to_dims_str(shape),
        np_to_model_dtype(dtype),
    )

    try:
        os.makedirs(config_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)


def create_openvino_modelfile(models_dir, model_version, max_batch, dtype, shape):
    batch_dim = (
        []
        if max_batch == 0
        else [
            max_batch,
        ]
    )
    if not tu.validate_for_openvino_model(
        dtype, dtype, dtype, batch_dim + shape, batch_dim + shape, batch_dim + shape
    ):
        return

    model_name = tu.get_dyna_sequence_model_name(
        "openvino_nobatch" if max_batch == 0 else "openvino", dtype
    )
    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    in0 = ov.opset1.parameter(shape=batch_dim + shape, dtype=dtype, name="INPUT")
    start = ov.opset1.parameter(shape=batch_dim + shape, dtype=dtype, name="START")
    end = ov.opset1.parameter(shape=batch_dim + shape, dtype=dtype, name="END")
    ready = ov.opset1.parameter(shape=batch_dim + shape, dtype=dtype, name="READY")
    corrid = ov.opset1.parameter(shape=batch_dim + shape, dtype=dtype, name="CORRID")

    tmp1 = ov.opset1.add(in0, start)
    tmp2 = ov.opset1.multiply(end, corrid)
    tmp = ov.opset1.add(tmp1, tmp2)
    op0 = ov.opset1.multiply(tmp, ready, name="OUTPUT")

    model = ov.Model([op0], [in0, start, end, ready, corrid], model_name)
    openvino_save_model(model_version_dir, model)


def create_openvino_modelconfig(models_dir, model_version, max_batch, dtype, shape):
    batch_dim = (
        []
        if max_batch == 0
        else [
            max_batch,
        ]
    )
    if not tu.validate_for_openvino_model(
        dtype, dtype, dtype, batch_dim + shape, batch_dim + shape, batch_dim + shape
    ):
        return

    model_name = tu.get_dyna_sequence_model_name(
        "openvino_nobatch" if max_batch == 0 else "openvino", dtype
    )
    config_dir = models_dir + "/" + model_name
    config = """
name: "{}"
backend: "openvino"
max_batch_size: {}
sequence_batching {{
  max_sequence_idle_microseconds: 5000000
  {}
  control_input [
    {{
      name: "START"
      control [
        {{
          kind: CONTROL_SEQUENCE_START
          {}_false_true: [ 0, 1 ]
        }}
      ]
    }},
    {{
      name: "END"
      control [
        {{
          kind: CONTROL_SEQUENCE_END
          {}_false_true: [ 0, 1 ]
        }}
      ]
    }},
    {{
      name: "READY"
      control [
        {{
          kind: CONTROL_SEQUENCE_READY
          {}_false_true: [ 0, 1 ]
        }}
      ]
    }},
    {{
      name: "CORRID"
      control [
        {{
          kind: CONTROL_SEQUENCE_CORRID
          data_type: TYPE_INT32
        }}
      ]
    }}
  ]
}}
input [
  {{
    name: "INPUT"
    data_type: {}
    dims: [ {} ]
  }}
]
output [
  {{
    name: "OUTPUT"
    data_type: {}
    dims: [ 1 ]
  }}
]
""".format(
        model_name,
        max_batch,
        (
            "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}"
            if max_batch > 0
            else ""
        ),
        "int32" if dtype == np.int32 else "fp32",
        "int32" if dtype == np.int32 else "fp32",
        "int32" if dtype == np.int32 else "fp32",
        np_to_model_dtype(dtype),
        tu.shape_to_dims_str(shape),
        np_to_model_dtype(dtype),
    )

    try:
        os.makedirs(config_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)


def create_shape_tensor_models(
    models_dir, dtype, shape, shape_tensor_input_dtype, no_batch=True
):
    model_version = 1

    create_plan_modelconfig(
        models_dir, model_version, 8, dtype, shape, shape_tensor_input_dtype
    )
    create_plan_shape_tensor_modelfile(
        models_dir, model_version, 8, dtype, shape, shape_tensor_input_dtype
    )
    if no_batch:
        create_plan_modelconfig(
            models_dir, model_version, 0, dtype, shape, shape_tensor_input_dtype
        )
        create_plan_shape_tensor_modelfile(
            models_dir, model_version, 0, dtype, shape, shape_tensor_input_dtype
        )


def create_models(models_dir, dtype, shape, no_batch=True):
    model_version = 1

    if FLAGS.tensorrt:
        suffix = []
        if dtype == np.int8:
            suffix = [1, 1]

        create_plan_modelconfig(models_dir, model_version, 8, dtype, shape + suffix)
        create_plan_models(models_dir, model_version, 8, dtype, shape + suffix)
        if no_batch:
            create_plan_modelconfig(models_dir, model_version, 0, dtype, shape + suffix)
            create_plan_models(models_dir, model_version, 0, dtype, shape + suffix)

    if FLAGS.onnx:
        create_onnx_modelconfig(models_dir, model_version, 8, dtype, shape)
        create_onnx_modelfile(models_dir, model_version, 8, dtype, shape)
        if no_batch:
            create_onnx_modelconfig(models_dir, model_version, 0, dtype, shape)
            create_onnx_modelfile(models_dir, model_version, 0, dtype, shape)

    if FLAGS.libtorch:
        create_libtorch_modelconfig(models_dir, model_version, 8, dtype, shape)
        create_libtorch_modelfile(models_dir, model_version, 8, dtype, shape)
        if no_batch:
            create_libtorch_modelconfig(models_dir, model_version, 0, dtype, shape)
            create_libtorch_modelfile(models_dir, model_version, 0, dtype, shape)

    if FLAGS.openvino:
        create_openvino_modelconfig(models_dir, model_version, 8, dtype, shape)
        create_openvino_modelfile(models_dir, model_version, 8, dtype, shape)
        if no_batch:
            create_openvino_modelconfig(models_dir, model_version, 0, dtype, shape)
            create_openvino_modelfile(models_dir, model_version, 0, dtype, shape)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--models_dir", type=str, required=True, help="Top-level model directory"
    )
    parser.add_argument(
        "--tensorrt",
        required=False,
        action="store_true",
        help="Generate TensorRT PLAN models",
    )
    parser.add_argument(
        "--tensorrt-shape-io",
        required=False,
        action="store_true",
        help="Generate TensorRT PLAN models w/ shape tensor i/o",
    )
    parser.add_argument(
        "--onnx", required=False, action="store_true", help="Generate Onnx models"
    )
    parser.add_argument(
        "--onnx_opset",
        type=int,
        required=False,
        default=0,
        help="Opset used for Onnx models. Default is to use ONNXRT default",
    )
    parser.add_argument(
        "--libtorch",
        required=False,
        action="store_true",
        help="Generate Pytorch LibTorch models",
    )
    parser.add_argument(
        "--openvino",
        required=False,
        action="store_true",
        help="Generate OpenVino models",
    )
    parser.add_argument(
        "--variable",
        required=False,
        action="store_true",
        help="Used variable-shape tensors for input/output",
    )
    FLAGS, unparsed = parser.parse_known_args()

    if FLAGS.tensorrt or FLAGS.tensorrt_shape_io:
        import tensorrt as trt
    if FLAGS.onnx:
        import onnx
    if FLAGS.libtorch:
        import torch
        from torch import nn
    if FLAGS.openvino:
        import openvino.runtime as ov

    import test_util as tu

    if FLAGS.tensorrt_shape_io:
        create_shape_tensor_models(
            FLAGS.models_dir,
            np.float32,
            [
                -1,
            ],
            np.int32,
        )
        create_shape_tensor_models(
            FLAGS.models_dir,
            np.float32,
            [
                -1,
            ],
            np.int64,
        )
    else:
        # Tests with models that accept fixed-shape input/output tensors
        if not FLAGS.variable:
            create_models(
                FLAGS.models_dir,
                np.int32,
                [
                    1,
                ],
            )

        # Tests with models that accept variable-shape input/output tensors
        if FLAGS.variable:
            create_models(
                FLAGS.models_dir,
                np.int32,
                [
                    -1,
                ],
                False,
            )


================================================
FILE: qa/common/gen_qa_identity_models.py
================================================
#!/usr/bin/env python3

# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import os
from builtins import range

import gen_ensemble_model_utils as emu
import numpy as np
from gen_common import (
    np_to_model_dtype,
    np_to_onnx_dtype,
    np_to_trt_dtype,
    openvino_save_model,
)

FLAGS = None
np_dtype_string = np.dtype(object)
from typing import List, Tuple


def create_ensemble_modelfile(
    create_savedmodel, models_dir, model_version, io_cnt, max_batch, dtype, shape
):
    if not tu.validate_for_ensemble_model(
        "zero", dtype, dtype, dtype, shape, shape, shape
    ):
        return

    emu.create_identity_ensemble_modelfile(
        "zero",
        models_dir,
        model_version,
        max_batch,
        dtype,
        [shape] * io_cnt,
        [shape] * io_cnt,
    )


def create_ensemble_modelconfig(
    create_savedmodel, models_dir, model_version, io_cnt, max_batch, dtype, shape
):
    if not tu.validate_for_ensemble_model(
        "zero", dtype, dtype, dtype, shape, shape, shape
    ):
        return

    emu.create_identity_ensemble_modelconfig(
        "zero",
        models_dir,
        model_version,
        max_batch,
        dtype,
        [shape] * io_cnt,
        [shape] * io_cnt,
        [shape] * io_cnt,
        [shape] * io_cnt,
    )


def create_onnx_modelfile(
    create_savedmodel, models_dir, model_version, io_cnt, max_batch, dtype, shape
):
    if not tu.validate_for_onnx_model(dtype, dtype, dtype, shape, shape, shape):
        return

    onnx_dtype = np_to_onnx_dtype(dtype)

    # Create the model
    model_name = tu.get_zero_model_name(
        "onnx_nobatch" if max_batch == 0 else "onnx", io_cnt, dtype
    )
    model_version_dir = os.path.join(models_dir, model_name, str(model_version))

    batch_dim = [] if max_batch == 0 else [None]

    onnx_nodes = []
    onnx_inputs = []
    onnx_outputs = []
    idx = 0
    for io_num in range(io_cnt):
        # Repeat so that the variable dimension name is different
        in_shape, idx = tu.shape_to_onnx_shape(shape, idx)
        out_shape, idx = tu.shape_to_onnx_shape(shape, idx)
        in_name = "INPUT{}".format(io_num)
        out_name = "OUTPUT{}".format(io_num)

        onnx_inputs.append(
            onnx.helper.make_tensor_value_info(
                in_name, onnx_dtype, batch_dim + in_shape
            )
        )
        onnx_outputs.append(
            onnx.helper.make_tensor_value_info(
                out_name, onnx_dtype, batch_dim + out_shape
            )
        )
        onnx_nodes.append(onnx.helper.make_node("Identity", [in_name], [out_name]))

    graph_proto = onnx.helper.make_graph(
        onnx_nodes, model_name, onnx_inputs, onnx_outputs
    )
    if FLAGS.onnx_opset > 0:
        model_opset = onnx.helper.make_operatorsetid("", FLAGS.onnx_opset)
        model_def = onnx.helper.make_model(
            graph_proto, producer_name="triton", opset_imports=[model_opset]
        )
    else:
        model_def = onnx.helper.make_model(graph_proto, producer_name="triton")

    os.makedirs(model_version_dir, exist_ok=True)

    onnx.save(model_def, model_version_dir + "/model.onnx")


def create_onnx_modelconfig(
    create_savedmodel, models_dir, model_version, io_cnt, max_batch, dtype, shape
):
    if not tu.validate_for_onnx_model(dtype, dtype, dtype, shape, shape, shape):
        return

    # Use a different model name for the non-batching variant
    model_name = tu.get_zero_model_name(
        "onnx_nobatch" if max_batch == 0 else "onnx", io_cnt, dtype
    )
    config_dir = os.path.join(models_dir, model_name)

    config = emu.create_general_modelconfig(
        model_name,
        "onnxruntime_onnx",
        max_batch,
        emu.repeat(dtype, io_cnt),
        emu.repeat(shape, io_cnt),
        emu.repeat(shape, io_cnt),
        emu.repeat(dtype, io_cnt),
        emu.repeat(shape, io_cnt),
        emu.repeat(shape, io_cnt),
        emu.repeat(None, io_cnt),
        force_tensor_number_suffix=True,
    )

    os.makedirs(config_dir, exist_ok=True)

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)


def create_libtorch_modelfile(
    create_savedmodel, models_dir, model_version, io_cnt, max_batch, dtype, shape
):
    if not tu.validate_for_libtorch_model(
        dtype, dtype, dtype, shape, shape, shape, max_batch
    ):
        return

    model_name = tu.get_zero_model_name(
        "libtorch_nobatch" if max_batch == 0 else "libtorch", io_cnt, dtype
    )

    # Create the model
    if io_cnt == 1:
        if dtype == np_dtype_string:

            class IdentityNet(nn.Module):
                def __init__(self):
                    super(IdentityNet, self).__init__()

                def forward(self, input0: List[str]) -> List[str]:
                    return input0

        else:

            class IdentityNet(nn.Module):
                def __init__(self):
                    super(IdentityNet, self).__init__()

                def forward(self, input0):
                    return input0

    elif io_cnt == 2:
        if dtype == np_dtype_string:

            class IdentityNet(nn.Module):
                def __init__(self):
                    super(IdentityNet, self).__init__()

                def forward(
                    self, input0: List[str], input1: List[str]
                ) -> Tuple[List[str], List[str]]:
                    return input0, input1

        else:

            class IdentityNet(nn.Module):
                def __init__(self):
                    super(IdentityNet, self).__init__()

                def forward(self, input0, input1):
                    return input0, input1

    elif io_cnt == 3:
        if dtype == np_dtype_string:

            class IdentityNet(nn.Module):
                def __init__(self):
                    super(IdentityNet, self).__init__()

                def forward(
                    self, input0: List[str], input1: List[str], input2: List[str]
                ) -> Tuple[List[str], List[str], List[str]]:
                    return input0, input1, input2

        else:

            class IdentityNet(nn.Module):
                def __init__(self):
                    super(IdentityNet, self).__init__()

                def forward(self, input0, input1, input2):
                    return input0, input1, input2

    elif io_cnt == 4:
        if dtype == np_dtype_string:

            class IdentityNet(nn.Module):
                def __init__(self):
                    super(IdentityNet, self).__init__()

                def forward(
                    self,
                    input0: List[str],
                    input1: List[str],
                    input2: List[str],
                    input3: List[str],
                ) -> Tuple[List[str], List[str], List[str], List[str]]:
                    return input0, input1, input2, input3

        else:

            class IdentityNet(nn.Module):
                def __init__(self):
                    super(IdentityNet, self).__init__()

                def forward(self, input0, input1, input2, input3):
                    return input0, input1, input2, input3

    identityModel = IdentityNet()
    traced = torch.jit.script(identityModel)

    model_version_dir = os.path.join(models_dir, model_name, str(model_version))
    os.makedirs(model_version_dir, exist_ok=True)

    traced.save(model_version_dir + "/model.pt")


def create_libtorch_modelconfig(
    create_savedmodel, models_dir, model_version, io_cnt, max_batch, dtype, shape
):
    if not tu.validate_for_libtorch_model(
        dtype, dtype, dtype, shape, shape, shape, max_batch
    ):
        return

    # Unpack version policy
    version_policy_str = "{ latest { num_versions: 1 }}"

    # Use a different model name for the non-batching variant
    model_name = tu.get_zero_model_name(
        "libtorch_nobatch" if max_batch == 0 else "libtorch", io_cnt, dtype
    )
    shape_str = tu.shape_to_dims_str(shape)

    config_dir = os.path.join(models_dir, model_name)
    config = """
name: "{}"
platform: "pytorch_libtorch"
max_batch_size: {}
version_policy: {}
""".format(
        model_name, max_batch, version_policy_str
    )

    for io_num in range(io_cnt):
        config += """
input [
  {{
    name: "INPUT__{}"
    data_type: {}
    dims: [ {} ]
  }}
]
output [
  {{
    name: "OUTPUT__{}"
    data_type: {}
    dims: [ {} ]
  }}
]
""".format(
            io_num,
            np_to_model_dtype(dtype),
            shape_str,
            io_num,
            np_to_model_dtype(dtype),
            shape_str,
        )

    os.makedirs(config_dir, exist_ok=True)

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)


def create_libtorch_linalg_modelfile(create_savedmodel, models_dir, model_version):
    model_name = "libtorch_float32_linalg"

    # To test the linalg library, this script uses two inverse matrix operations
    # to return the original input.
    class IdentityNet(nn.Module):
        def __init__(self, ref_pts):
            super(IdentityNet, self).__init__()
            ref_pts = torch.as_tensor(ref_pts)
            self.register_buffer("ref_pts", ref_pts)

        def forward(self, src: torch.Tensor):
            X = torch.linalg.tensorsolve(self.ref_pts, src)
            Y = torch.tensordot(self.ref_pts, X, dims=X.ndim)
            return Y

    ref_pts = torch.eye(2 * 3 * 4).reshape(2 * 3, 4, 2, 3, 4)
    identityModel = IdentityNet(ref_pts)
    traced = torch.jit.script(identityModel)

    model_version_dir = os.path.join(models_dir, model_name, str(model_version))
    os.makedirs(model_version_dir, exist_ok=True)

    traced.save(model_version_dir + "/model.pt")


def create_libtorch_linalg_modelconfig(create_savedmodel, models_dir, model_version):
    # Unpack version policy
    version_policy_str = "{ latest { num_versions: 1 }}"

    model_name = "libtorch_float32_linalg"
    dtype = np.float32
    io_cnt = 1
    max_batch = 0
    shape = [6, 4]
    shape_str = tu.shape_to_dims_str(shape)

    config_dir = os.path.join(models_dir, model_name)
    config = """
name: "{}"
platform: "pytorch_libtorch"
max_batch_size: {}
version_policy: {}
""".format(
        model_name, max_batch, version_policy_str
    )

    for io_num in range(io_cnt):
        config += """
input [
  {{
    name: "INPUT__{}"
    data_type: {}
    dims: [ {} ]
  }}
]
output [
  {{
    name: "OUTPUT__{}"
    data_type: {}
    dims: [ {} ]
  }}
]
""".format(
            io_num,
            np_to_model_dtype(dtype),
            shape_str,
            io_num,
            np_to_model_dtype(dtype),
            shape_str,
        )

    os.makedirs(config_dir, exist_ok=True)

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)


def create_openvino_modelfile(
    models_dir, model_version, io_cnt, max_batch, dtype, shape
):
    batch_dim = (
        []
        if max_batch == 0
        else [
            max_batch,
        ]
    )
    if not tu.validate_for_openvino_model(
        dtype, dtype, dtype, batch_dim + shape, batch_dim + shape, batch_dim + shape
    ):
        return

    # Create the model
    model_name = tu.get_zero_model_name(
        "openvino_nobatch" if max_batch == 0 else "openvino", io_cnt, dtype
    )
    model_version_dir = os.path.join(models_dir, model_name, str(model_version))

    openvino_inputs = []
    openvino_outputs = []
    for io_num in range(io_cnt):
        in_name = "INPUT{}".format(io_num)
        out_name = "OUTPUT{}".format(io_num)
        openvino_inputs.append(
            ov.opset1.parameter(shape=batch_dim + shape, dtype=dtype, name=in_name)
        )
        openvino_outputs.append(
            ov.opset1.result(openvino_inputs[io_num], name=out_name)
        )

    model = ov.Model(openvino_outputs, openvino_inputs, model_name)
    openvino_save_model(model_version_dir, model)


def create_openvino_modelconfig(
    models_dir, model_version, io_cnt, max_batch, dtype, shape
):
    batch_dim = (
        []
        if max_batch == 0
        else [
            max_batch,
        ]
    )
    if not tu.validate_for_openvino_model(
        dtype, dtype, dtype, batch_dim + shape, batch_dim + shape, batch_dim + shape
    ):
        return

    # Unpack version policy
    version_policy_str = "{ latest { num_versions: 1 }}"

    # Use a different model name for the non-batching variant
    model_name = tu.get_zero_model_name(
        "openvino_nobatch" if max_batch == 0 else "openvino", io_cnt, dtype
    )
    shape_str = tu.shape_to_dims_str(shape)

    config_dir = os.path.join(models_dir, model_name)
    config = """
name: "{}"
backend: "openvino"
max_batch_size: {}
version_policy: {}
""".format(
        model_name, max_batch, version_policy_str
    )

    for io_num in range(io_cnt):
        config += """
input [
  {{
    name: "INPUT__{}"
    data_type: {}
    dims: [ {} ]
  }}
]
output [
  {{
    name: "OUTPUT__{}"
    data_type: {}
    dims: [ {} ]
  }}
]
""".format(
            io_num,
            np_to_model_dtype(dtype),
            shape_str,
            io_num,
            np_to_model_dtype(dtype),
            shape_str,
        )

    os.makedirs(config_dir, exist_ok=True)

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)


def create_plan_modelfile(
    create_savedmodel,
    models_dir,
    model_version,
    io_cnt,
    max_batch,
    dtype,
    shape,
    profile_max_size,
):
    if not tu.validate_for_trt_model(dtype, dtype, dtype, shape, shape, shape):
        return

    # generate models with different configuration to ensure test coverage
    if dtype != np.float32:
        create_plan_dynamic_rf_modelfile(
            models_dir, model_version, io_cnt, max_batch, dtype, shape, profile_max_size
        )
    else:
        create_plan_dynamic_modelfile(
            models_dir, model_version, io_cnt, max_batch, dtype, shape, profile_max_size
        )


def create_plan_dynamic_rf_modelfile(
    models_dir, model_version, io_cnt, max_batch, dtype, shape, profile_max_size
):
    # Create the model
    TRT_LOGGER = (
        trt.Logger(trt.Logger.INFO)
        if os.environ.get("TRT_VERBOSE") != "1"
        else trt.Logger(trt.Logger.VERBOSE)
    )
    builder = trt.Builder(TRT_LOGGER)
    network = builder.create_network()

    if max_batch == 0:
        shape_with_batchsize = [i for i in shape]
    else:
        shape_with_batchsize = [-1] + [i for i in shape]

    trt_dtype = np_to_trt_dtype(dtype)
    trt_memory_format = trt.TensorFormat.LINEAR
    for io_num in range(io_cnt):
        in_node = network.add_input(
            "INPUT{}".format(io_num), trt_dtype, shape_with_batchsize
        )
        in_node.allowed_formats = 1 << int(trt_memory_format)

        out_node = network.add_identity(in_node)

        out_node.get_output(0).name = "OUTPUT{}".format(io_num)
        out_node.get_output(0).dtype = trt_dtype
        network.mark_output(out_node.get_output(0))
        out_node.get_output(0).allowed_formats = 1 << int(trt_memory_format)

        if trt_dtype == trt.int8:
            in_node.dynamic_range = (-128.0, 127.0)
            out_node.get_output(0).dynamic_range = (-128.0, 127.0)

    min_shape = []
    opt_shape = []
    max_shape = []
    if max_batch != 0:
        min_shape = min_shape + [1]
        opt_shape = opt_shape + [max(1, max_batch)]
        max_shape = max_shape + [max(1, max_batch)]
    for i in shape:
        if i == -1:
            # Generating a very generous optimization profile
            min_shape = min_shape + [1]
            opt_shape = opt_shape + [8]
            max_shape = max_shape + [profile_max_size]
        else:
            min_shape = min_shape + [i]
            opt_shape = opt_shape + [i]
            max_shape = max_shape + [i]

    profile = builder.create_optimization_profile()
    for io_num in range(io_cnt):
        profile.set_shape("INPUT{}".format(io_num), min_shape, opt_shape, max_shape)

    flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
    flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
    flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
    datatype_set = set([trt_dtype])
    for dt in datatype_set:
        if dt == trt.int8:
            flags |= 1 << int(trt.BuilderFlag.INT8)
        elif dt == trt.float16:
            flags |= 1 << int(trt.BuilderFlag.FP16)
    config = builder.create_builder_config()
    config.flags = flags
    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
    config.add_optimization_profile(profile)
    try:
        engine_bytes = builder.build_serialized_network(network, config)
    except AttributeError:
        engine = builder.build_engine(network, config)
        engine_bytes = engine.serialize()
        del engine

    model_name = tu.get_zero_model_name(
        "plan_nobatch" if max_batch == 0 else "plan", io_cnt, dtype
    )
    model_version_dir = os.path.join(models_dir, model_name, str(model_version))
    os.makedirs(model_version_dir, exist_ok=True)

    with open(model_version_dir + "/model.plan", "wb") as f:
        f.write(engine_bytes)


def create_plan_shape_tensor_modelfile(
    models_dir,
    model_version,
    io_cnt,
    max_batch,
    dtype,
    shape,
    profile_max_size,
    shape_tensor_input_dtype,
):
    # Note that resize layer does not support int tensors.
    # The model takes two inputs (INPUT and DUMMY_INPUT)
    # and produce two outputs.
    # OUTPUT : The shape of resized output 'DUMMY_OUTPUT'.
    # DUMMY_OUTPUT : Obtained after resizing 'DUMMY_INPUT'
    # to shape specified in 'INPUT'.
    # Note that values of OUTPUT tensor must be identical
    # to INPUT values

    TRT_LOGGER = (
        trt.Logger(trt.Logger.INFO)
        if os.environ.get("TRT_VERBOSE") != "1"
        else trt.Logger(trt.Logger.VERBOSE)
    )
    builder = trt.Builder(TRT_LOGGER)
    network = builder.create_network()

    if max_batch == 0:
        shape_with_batchsize = len(shape)
        dummy_shape = [-1] * shape_with_batchsize
    else:
        shape_with_batchsize = len(shape) + 1
        dummy_shape = [-1] * shape_with_batchsize

    trt_dtype = np_to_trt_dtype(dtype)
    trt_shape_dtype = np_to_trt_dtype(shape_tensor_input_dtype)
    trt_memory_format = trt.TensorFormat.LINEAR
    for io_num in range(io_cnt):
        in_node = network.add_input(
            "INPUT{}".format(io_num), trt_shape_dtype, [shape_with_batchsize]
        )
        in_node.allowed_formats = 1 << int(trt_memory_format)
        dummy_in_node = network.add_input(
            "DUMMY_INPUT{}".format(io_num), trt_dtype, dummy_shape
        )
        dummy_in_node.allowed_formats = 1 << int(trt_memory_format)
        resize_layer = network.add_resize(dummy_in_node)
        resize_layer.set_input(1, in_node)
        out_node = network.add_shape(resize_layer.get_output(0))

        dummy_out_node = resize_layer.get_output(0)
        out_node.get_output(0).name = "OUTPUT{}".format(io_num)

        dummy_out_node.name = "DUMMY_OUTPUT{}".format(io_num)

        dummy_out_node.dtype = trt_dtype
        network.mark_output(dummy_out_node)
        dummy_out_node.allowed_formats = 1 << int(trt_memory_format)

        out_node.get_output(0).dtype = trt.int64
        network.mark_output_for_shapes(out_node.get_output(0))
        out_node.get_output(0).allowed_formats = 1 << int(trt_memory_format)

        if trt_dtype == trt.int8:
            in_node.dynamic_range = (-128.0, 127.0)
            out_node.get_output(0).dynamic_range = (-128.0, 127.0)

    config = builder.create_builder_config()

    min_prefix = []
    opt_prefix = []
    max_prefix = []

    if max_batch != 0:
        min_prefix = [1]
        opt_prefix = [max(1, max_batch)]
        max_prefix = [max(1, max_batch)]

    min_shape = min_prefix + [1] * len(shape)
    opt_shape = opt_prefix + [8] * len(shape)
    max_shape = max_prefix + [profile_max_size] * len(shape)

    profile = builder.create_optimization_profile()
    for io_num in range(io_cnt):
        profile.set_shape_input(
            "INPUT{}".format(io_num), min_shape, opt_shape, max_shape
        )
        profile.set_shape(
            "DUMMY_INPUT{}".format(io_num), min_shape, opt_shape, max_shape
        )

    config.add_optimization_profile(profile)

    flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
    flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
    flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
    datatype_set = set([trt_dtype])
    for dt in datatype_set:
        if dt == trt.int8:
            flags |= 1 << int(trt.BuilderFlag.INT8)
        elif dt == trt.float16:
            flags |= 1 << int(trt.BuilderFlag.FP16)
    config.flags = flags

    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
    try:
        engine_bytes = builder.build_serialized_network(network, config)
    except AttributeError:
        engine = builder.build_engine(network, config)
        engine_bytes = engine.serialize()
        del engine

    model_name = tu.get_zero_model_name(
        "plan_nobatch" if max_batch == 0 else "plan", io_cnt, dtype
    )
    model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name
    model_version_dir = os.path.join(models_dir, model_name, str(model_version))
    os.makedirs(model_version_dir, exist_ok=True)

    with open(model_version_dir + "/model.plan", "wb") as f:
        f.write(engine_bytes)


def create_plan_dynamic_modelfile(
    models_dir, model_version, io_cnt, max_batch, dtype, shape, profile_max_size
):
    # Create the model
    TRT_LOGGER = (
        trt.Logger(trt.Logger.INFO)
        if os.environ.get("TRT_VERBOSE") != "1"
        else trt.Logger(trt.Logger.VERBOSE)
    )
    builder = trt.Builder(TRT_LOGGER)
    network = builder.create_network()

    if max_batch == 0:
        shape_with_batchsize = [i for i in shape]
    else:
        shape_with_batchsize = [-1] + [i for i in shape]

    trt_dtype = np_to_trt_dtype(dtype)
    for io_num in range(io_cnt):
        in_node = network.add_input(
            "INPUT{}".format(io_num), trt_dtype, shape_with_batchsize
        )
        out_node = network.add_identity(in_node)
        out_node.get_output(0).name = "OUTPUT{}".format(io_num)
        network.mark_output(out_node.get_output(0))

    min_shape = []
    opt_shape = []
    max_shape = []
    if max_batch != 0:
        min_shape = min_shape + [1]
        opt_shape = opt_shape + [max(1, max_batch)]
        max_shape = max_shape + [max(1, max_batch)]
    for i in shape:
        if i == -1:
            # Generating a very generous optimization profile
            min_shape = min_shape + [1]
            opt_shape = opt_shape + [8]
            max_shape = max_shape + [profile_max_size]
        else:
            min_shape = min_shape + [i]
            opt_shape = opt_shape + [i]
            max_shape = max_shape + [i]

    profile = builder.create_optimization_profile()
    for io_num in range(io_cnt):
        profile.set_shape("INPUT{}".format(io_num), min_shape, opt_shape, max_shape)
    config = builder.create_builder_config()
    config.add_optimization_profile(profile)
    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
    if FLAGS.tensorrt_compat:
        config.set_flag(trt.BuilderFlag.VERSION_COMPATIBLE)
    try:
        engine_bytes = builder.build_serialized_network(network, config)
    except AttributeError:
        engine = builder.build_engine(network, config)
        engine_bytes = engine.serialize()
        del engine

    model_name_base = "plan"
    if max_batch == 0:
        model_name_base += "_nobatch"
    if FLAGS.tensorrt_compat:
        model_name_base += "_compatible"

    model_name = tu.get_zero_model_name(model_name_base, io_cnt, dtype)
    model_version_dir = os.path.join(models_dir, model_name, str(model_version))
    os.makedirs(model_version_dir, exist_ok=True)

    with open(model_version_dir + "/model.plan", "wb") as f:
        f.write(engine_bytes)


def create_plan_modelconfig(
    create_savedmodel,
    models_dir,
    model_version,
    io_cnt,
    max_batch,
    dtype,
    shape,
    shape_tensor_input_dtype=None,
):
    if not tu.validate_for_trt_model(dtype, dtype, dtype, shape, shape, shape):
        return

    shape_str = tu.shape_to_dims_str(shape)

    model_name_base = "plan"
    if max_batch == 0:
        model_name_base += "_nobatch"
    if FLAGS.tensorrt_compat:
        model_name_base += "_compatible"
    model_name = tu.get_zero_model_name(model_name_base, io_cnt, dtype)
    if shape_tensor_input_dtype:
        model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name
    config_dir = os.path.join(models_dir, model_name)

    if FLAGS.tensorrt_shape_io:
        shape_tensor_dim = len(shape)
        config = """
name: "{}"
platform: "tensorrt_plan"
max_batch_size: {}
""".format(
            model_name, max_batch
        )

        for io_num in range(io_cnt):
            config += """
input [
  {{
    name: "DUMMY_INPUT{}"
    data_type: {}
    dims: [ {} ]
  }},
  {{
    name: "INPUT{}"
    data_type: {}
    dims: [ {} ]
    is_shape_tensor: true
  }}
]
output [
  {{
    name: "DUMMY_OUTPUT{}"
    data_type: {}
    dims: [ {} ]
  }},
  {{
    name: "OUTPUT{}"
    data_type: TYPE_INT64
    dims: [ {} ]
    is_shape_tensor: true
  }}
]
""".format(
                io_num,
                np_to_model_dtype(dtype),
                shape_str,
                io_num,
                np_to_model_dtype(shape_tensor_input_dtype),
                shape_tensor_dim,
                io_num,
                np_to_model_dtype(dtype),
                shape_str,
                io_num,
                shape_tensor_dim,
            )

    else:
        config = """
name: "{}"
platform: "tensorrt_plan"
max_batch_size: {}
""".format(
            model_name, max_batch
        )

        for io_num in range(io_cnt):
            config += """
input [
  {{
    name: "INPUT{}"
    data_type: {}
    dims: [ {} ]
  }}
]
output [
  {{
    name: "OUTPUT{}"
    data_type: {}
    dims: [ {} ]
  }}
]
""".format(
                io_num,
                np_to_model_dtype(dtype),
                shape_str,
                io_num,
                np_to_model_dtype(dtype),
                shape_str,
            )

    os.makedirs(config_dir, exist_ok=True)

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)


def create_shape_tensor_models(
    models_dir, dtype, shape, shape_tensor_input_dtype, io_cnt=1, no_batch=True
):
    model_version = 1

    create_plan_modelconfig(
        True,
        models_dir,
        model_version,
        io_cnt,
        8,
        dtype,
        shape,
        shape_tensor_input_dtype,
    )
    create_plan_shape_tensor_modelfile(
        models_dir, model_version, io_cnt, 8, dtype, shape, 32, shape_tensor_input_dtype
    )
    if no_batch:
        create_plan_modelconfig(
            True,
            models_dir,
            model_version,
            io_cnt,
            0,
            dtype,
            shape,
            shape_tensor_input_dtype,
        )
        create_plan_shape_tensor_modelfile(
            models_dir,
            model_version,
            io_cnt,
            0,
            dtype,
            shape,
            32,
            shape_tensor_input_dtype,
        )


def create_models(models_dir, dtype, shape, io_cnt=1, no_batch=True):
    model_version = 1

    if FLAGS.onnx:
        create_onnx_modelconfig(
            True, models_dir, model_version, io_cnt, 8, dtype, shape
        )
        create_onnx_modelfile(True, models_dir, model_version, io_cnt, 8, dtype, shape)
        if no_batch:
            create_onnx_modelconfig(
                True, models_dir, model_version, io_cnt, 0, dtype, shape
            )
            create_onnx_modelfile(
                True, models_dir, model_version, io_cnt, 0, dtype, shape
            )

    if FLAGS.openvino:
        create_openvino_modelconfig(models_dir, model_version, io_cnt, 8, dtype, shape)
        create_openvino_modelfile(models_dir, model_version, io_cnt, 8, dtype, shape)
        if no_batch:
            create_openvino_modelconfig(
                models_dir, model_version, io_cnt, 0, dtype, shape
            )
            create_openvino_modelfile(
                models_dir, model_version, io_cnt, 0, dtype, shape
            )

    if FLAGS.libtorch:
        create_libtorch_modelconfig(
            True, models_dir, model_version, io_cnt, 8, dtype, shape
        )
        create_libtorch_modelfile(
            True, models_dir, model_version, io_cnt, 8, dtype, shape
        )
        if no_batch:
            create_libtorch_modelconfig(
                True, models_dir, model_version, io_cnt, 0, dtype, shape
            )
            create_libtorch_modelfile(
                True, models_dir, model_version, io_cnt, 0, dtype, shape
            )

    if FLAGS.tensorrt or FLAGS.tensorrt_compat:
        create_plan_modelconfig(
            True, models_dir, model_version, io_cnt, 8, dtype, shape
        )
        create_plan_modelfile(
            True, models_dir, model_version, io_cnt, 8, dtype, shape, 32
        )
        if no_batch:
            create_plan_modelconfig(
                True, models_dir, model_version, io_cnt, 0, dtype, shape
            )
            create_plan_modelfile(
                True, models_dir, model_version, io_cnt, 0, dtype, shape, 32
            )

    if FLAGS.tensorrt_big:
        create_plan_modelconfig(
            True, models_dir, model_version, io_cnt, 8, dtype, shape
        )
        create_plan_modelfile(
            True, models_dir, model_version, io_cnt, 8, dtype, shape, 16 * 1024 * 1024
        )
        if no_batch:
            create_plan_modelconfig(
                True, models_dir, model_version, io_cnt, 0, dtype, shape
            )
            create_plan_modelfile(
                True,
                models_dir,
                model_version,
                io_cnt,
                0,
                dtype,
                shape,
                16 * 1024 * 1024,
            )

    if FLAGS.ensemble:
        emu.create_nop_modelconfig(models_dir, shape, dtype)
        create_ensemble_modelconfig(
            True, models_dir, model_version, io_cnt, 8, dtype, shape
        )
        create_ensemble_modelfile(
            True, models_dir, model_version, io_cnt, 8, dtype, shape
        )
        if no_batch:
            create_ensemble_modelconfig(
                True, models_dir, model_version, io_cnt, 0, dtype, shape
            )
            create_ensemble_modelfile(
                True, models_dir, model_version, io_cnt, 0, dtype, shape
            )


# FIXME: The function signatures require a `savedmodel` boolean flag
# on all of them even though Tensorflow has been deprecated since 25.03
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--models_dir", type=str, required=True, help="Top-level model directory"
    )
    parser.add_argument(
        "--onnx",
        required=False,
        action="store_true",
        help="Generate Onnx Runtime Onnx models",
    )
    parser.add_argument(
        "--onnx_opset",
        type=int,
        required=False,
        default=0,
        help="Opset used for Onnx models. Default is to use ONNXRT default",
    )
    parser.add_argument(
        "--libtorch",
        required=False,
        action="store_true",
        help="Generate Pytorch LibTorch models",
    )
    parser.add_argument(
        "--openvino",
        required=False,
        action="store_true",
        help="Generate OpenVino models",
    )
    parser.add_argument(
        "--tensorrt",
        required=False,
        action="store_true",
        help="Generate TensorRT PLAN models",
    )
    parser.add_argument(
        "--tensorrt-big",
        required=False,
        action="store_true",
        help="Generate TensorRT PLAN models w/ opt profile with large max",
    )
    parser.add_argument(
        "--tensorrt-compat",
        required=False,
        action="store_true",
        help="Generate TensorRT version-compatible models",
    )
    parser.add_argument(
        "--tensorrt-shape-io",
        required=False,
        action="store_true",
        help="Generate TensorRT PLAN models w/ shape tensor i/o",
    )
    parser.add_argument(
        "--ensemble",
        required=False,
        action="store_true",
        help="Generate ensemble models",
    )
    FLAGS, unparsed = parser.parse_known_args()

    if FLAGS.onnx:
        import onnx
    if FLAGS.libtorch:
        import torch
        from torch import nn
    if (
        FLAGS.tensorrt
        or FLAGS.tensorrt_big
        or FLAGS.tensorrt_compat
        or FLAGS.tensorrt_shape_io
    ):
        import tensorrt as trt
    if FLAGS.openvino:
        import openvino.runtime as ov

    import test_util as tu

    # Create models with variable-sized input and output. For big
    # and version-compatible TensorRT models, only create the one
    # needed for testing.
    if FLAGS.tensorrt_big:
        create_models(FLAGS.models_dir, np.float32, [-1], io_cnt=1)
    elif FLAGS.tensorrt_compat:
        create_models(FLAGS.models_dir, np.float32, [-1], io_cnt=1, no_batch=False)
    elif FLAGS.tensorrt_shape_io:
        create_shape_tensor_models(
            FLAGS.models_dir, np.float32, [-1, -1], np.int32, io_cnt=1
        )
        create_shape_tensor_models(
            FLAGS.models_dir, np.float32, [-1, -1], np.int64, io_cnt=1
        )
    else:
        create_models(FLAGS.models_dir, bool, [-1], io_cnt=1)
        create_models(FLAGS.models_dir, np.float32, [-1], io_cnt=1)
        create_models(FLAGS.models_dir, np.float32, [-1], io_cnt=3)
        create_models(FLAGS.models_dir, np.float16, [-1, -1], io_cnt=1)
        create_models(FLAGS.models_dir, np.float16, [-1, -1], io_cnt=3)
        create_models(FLAGS.models_dir, np_dtype_string, [-1], io_cnt=1)
        create_models(FLAGS.models_dir, np_dtype_string, [-1, -1], io_cnt=3)

    # Create libtorch linalg model
    if FLAGS.libtorch:
        model_version = 1
        create_libtorch_linalg_modelconfig(True, FLAGS.models_dir, model_version)
        create_libtorch_linalg_modelfile(True, FLAGS.models_dir, model_version)


================================================
FILE: qa/common/gen_qa_image_models.py
================================================
#!/usr/bin/env python3

# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


import argparse
import os

import requests
import test_util as tu
import torch
import torch.onnx
import torchvision.models as models

LABELS_URL = "https://raw.githubusercontent.com/triton-inference-server/python_backend/main/examples/preprocessing/model_repository/resnet50_trt/labels.txt"


def download_labels_file(url, path, file_name="labels.txt"):
    response = requests.get(url)
    if response.status_code == 200:
        with open(os.path.join(path, file_name), "wb") as file:
            file.write(response.content)
    else:
        print(
            f"Failed to download file from {url}. Status code: {response.status_code}"
        )


def create_onnx_model_config(
    name,
    batch_size,
    input_name,
    input_shape,
    output_name,
    output_shape,
    label_filename,
    config_dir,
    config_name="config.pbtxt",
):
    config = """name: "{}"
max_batch_size: {}
platform: "onnxruntime_onnx"
input [
  {{
    name: "{}"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [ {} ]
  }}
]
output [
  {{
    name: "{}"
    data_type: TYPE_FP32
    dims: [ {} ]
    label_filename: "{}"
  }}
]
""".format(
        name,
        batch_size,
        input_name,
        tu.shape_to_dims_str(input_shape),
        output_name,
        tu.shape_to_dims_str(output_shape),
        label_filename,
    )
    with open(f"{config_dir}/{config_name}", "w") as cfile:
        cfile.write(config)


def export_vgg19(models_dir, model_name="model.onnx"):
    model_path = f"{models_dir}/{model_name}"
    model = models.vgg19(weights=models.VGG19_Weights.IMAGENET1K_V1)
    model.eval()
    dummy_input = torch.randn(1, 3, 224, 224)  # (batch, channels, height, width)

    # Use legacy TorchScript-based ONNX export
    # TODO: Update to use new torch.export-based ONNX exporter (default dynamo=True)
    torch.onnx.export(
        model,
        dummy_input,
        model_path,
        input_names=["input"],
        output_names=["output"],
        dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}},
        dynamo=False,
    )

    print(f"VGG19 model exported to: {model_path}")
    create_onnx_model_config(
        "vgg19_onnx",
        32,
        "input",
        (3, 224, 224),
        "output",
        (1000,),
        "labels.txt",
        os.path.dirname(models_dir),
    )
    download_labels_file(LABELS_URL, os.path.dirname(models_dir))


def export_resnet152(models_dir, model_name="model.onnx"):
    model_path = f"{models_dir}/{model_name}"
    model = models.resnet152(weights=models.ResNet152_Weights.DEFAULT)
    model.eval()
    dummy_input = torch.randn(1, 3, 224, 224)  # (batch, channels, height, width)

    # Use legacy TorchScript-based ONNX export
    # TODO: Update to use new torch.export-based ONNX exporter (default dynamo=True)
    torch.onnx.export(
        model,
        dummy_input,
        model_path,
        input_names=["input"],
        output_names=["output"],
        dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}},
        dynamo=False,
    )

    print(f"ResNet-152 model exported to: {model_path}")
    create_onnx_model_config(
        "resnet152_onnx",
        32,
        "input",
        (3, 224, 224),
        "output",
        (1000,),
        "labels.txt",
        os.path.dirname(models_dir),
    )
    download_labels_file(LABELS_URL, os.path.dirname(models_dir))


def export_resnet50(models_dir, model_name="model.onnx"):
    model_path = f"{models_dir}/{model_name}"
    model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
    model.eval()
    dummy_input = torch.randn(1, 3, 224, 224)  # (batch, channels, height, width)

    # Use legacy TorchScript-based ONNX export
    # TODO: Update to use new torch.export-based ONNX exporter (default dynamo=True)
    torch.onnx.export(
        model,
        dummy_input,
        model_path,
        input_names=["input"],
        output_names=["output"],
        dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}},
        dynamo=False,
    )

    print(f"ResNet-50 model exported to: {model_path}")
    create_onnx_model_config(
        "resnet50_onnx",
        32,
        "input",
        (3, 224, 224),
        "output",
        (1000,),
        "labels.txt",
        os.path.dirname(models_dir),
    )
    download_labels_file(LABELS_URL, os.path.dirname(models_dir))


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Export pre-trained models to ONNX format"
    )
    parser.add_argument(
        "--models_dir",
        type=str,
        required=True,
        help="Directory to save the ONNX models",
    )
    parser.add_argument(
        "--resnet152", action="store_true", help="Export ResNet-152 model"
    )
    parser.add_argument(
        "--resnet50", action="store_true", help="Export ResNet-50 model"
    )
    parser.add_argument("--vgg19", action="store_true", help="Export VGG19 model")
    FLAGS, unparsed = parser.parse_known_args()

    if FLAGS.resnet152:
        models_dir = os.path.join(FLAGS.models_dir, "resnet152_onnx/1")
        os.makedirs(models_dir, exist_ok=True)
        export_resnet152(models_dir)
    if FLAGS.resnet50:
        models_dir = os.path.join(FLAGS.models_dir, "resnet50_onnx/1")
        os.makedirs(models_dir, exist_ok=True)
        export_resnet50(models_dir)
    if FLAGS.vgg19:
        models_dir = os.path.join(FLAGS.models_dir, "vgg19_onnx/1")
        os.makedirs(models_dir, exist_ok=True)
        export_vgg19(models_dir)


================================================
FILE: qa/common/gen_qa_implicit_models.py
================================================
#!/usr/bin/env python3

# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import os
from typing import List, Tuple

import gen_ensemble_model_utils as emu
import numpy as np
from gen_common import (
    np_to_model_dtype,
    np_to_onnx_dtype,
    np_to_torch_dtype,
    np_to_trt_dtype,
)

FLAGS = None
np_dtype_string = np.dtype(object)


def create_onnx_modelfile_wo_initial_state(
    models_dir, model_version, max_batch, dtype, shape
):
    if not tu.validate_for_onnx_model(dtype, dtype, dtype, shape, shape, shape):
        return

    model_name = tu.get_sequence_model_name(
        "onnx_nobatch" if max_batch == 0 else "onnx", dtype
    )
    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    onnx_dtype = np_to_onnx_dtype(dtype)
    onnx_control_dtype = onnx_dtype
    onnx_input_shape, idx = tu.shape_to_onnx_shape(shape, 0)
    onnx_output_shape, idx = tu.shape_to_onnx_shape(shape, idx)

    # If the input is a string then use int32 for operation and just
    # cast to/from string for input and output.
    if onnx_dtype == onnx.TensorProto.STRING:
        onnx_control_dtype = onnx.TensorProto.INT32

    # If input dtype is bool, then use bool type for control and
    # int32 type for input/output
    if onnx_dtype == onnx.TensorProto.BOOL:
        onnx_dtype = onnx.TensorProto.INT32

    batch_dim = [] if max_batch == 0 else [None]

    onnx_input = onnx.helper.make_tensor_value_info(
        "INPUT", onnx_dtype, batch_dim + onnx_input_shape
    )
    onnx_input_state = onnx.helper.make_tensor_value_info(
        "INPUT_STATE", onnx_dtype, batch_dim + onnx_input_shape
    )
    onnx_start = onnx.helper.make_tensor_value_info(
        "START", onnx_control_dtype, batch_dim + [1]
    )
    onnx_ready = onnx.helper.make_tensor_value_info(
        "READY", onnx_control_dtype, batch_dim + [1]
    )
    onnx_output = onnx.helper.make_tensor_value_info(
        "OUTPUT", onnx_dtype, batch_dim + onnx_output_shape
    )
    onnx_output_state = onnx.helper.make_tensor_value_info(
        "OUTPUT_STATE", onnx_dtype, batch_dim + onnx_output_shape
    )

    internal_input = onnx.helper.make_node("Identity", ["INPUT"], ["_INPUT"])
    internal_input_state = onnx.helper.make_node(
        "Identity", ["INPUT_STATE"], ["_INPUT_STATE"]
    )
    # cast int8, int16 input to higher precision int as Onnx Add/Sub operator doesn't support those type
    if (onnx_dtype == onnx.TensorProto.INT8) or (onnx_dtype == onnx.TensorProto.INT16):
        internal_input = onnx.helper.make_node(
            "Cast", ["INPUT"], ["_INPUT"], to=onnx.TensorProto.INT32
        )
        internal_input_state = onnx.helper.make_node(
            "Cast", ["INPUT_STATE"], ["_INPUT_STATE"], to=onnx.TensorProto.INT32
        )

    # Convert boolean value to int32 value
    if onnx_control_dtype == onnx.TensorProto.BOOL:
        if onnx_dtype != onnx.TensorProto.STRING:
            internal_input1 = onnx.helper.make_node(
                "Cast", ["START"], ["_START"], to=onnx.TensorProto.INT32
            )
            internal_input2 = onnx.helper.make_node(
                "Cast", ["READY"], ["_READY"], to=onnx.TensorProto.INT32
            )
            not_start_cast = onnx.helper.make_node(
                "Not", ["START"], ["_NOT_START_CAST"]
            )
            not_start = onnx.helper.make_node(
                "Cast", ["_NOT_START_CAST"], ["_NOT_START"], to=onnx.TensorProto.INT32
            )
            not_ready_cast = onnx.helper.make_node(
                "Not", ["START"], ["_NOT_READY_CAST"]
            )
            not_ready = onnx.helper.make_node(
                "Cast", ["_NOT_READY_CAST"], ["_NOT_READY"], to=onnx.TensorProto.INT32
            )
            input_state_cond = onnx.helper.make_node(
                "And", ["READY", "_NOT_START_CAST"], ["input_state_cond"]
            )
            input_state_cond_cast = onnx.helper.make_node(
                "Cast",
                ["input_state_cond"],
                ["input_state_cond_cast"],
                to=onnx.TensorProto.INT32,
            )
            mul_state = onnx.helper.make_node(
                "Mul", ["_INPUT_STATE", "input_state_cond_cast"], ["mul_state"]
            )
            add = onnx.helper.make_node("Add", ["_INPUT", "mul_state"], ["CAST"])

    else:
        if onnx_dtype != onnx.TensorProto.STRING:
            start_cast = onnx.helper.make_node(
                "Cast", ["START"], ["_START_CAST"], to=onnx.TensorProto.BOOL
            )
            not_start_cast = onnx.helper.make_node(
                "Not", ["_START_CAST"], ["_NOT_START_CAST"]
            )
            not_start = onnx.helper.make_node(
                "Cast", ["_NOT_START_CAST"], ["_NOT_START"], to=onnx.TensorProto.INT32
            )

            ready_cast = onnx.helper.make_node(
                "Cast", ["READY"], ["_READY_CAST"], to=onnx.TensorProto.BOOL
            )
            not_ready_cast = onnx.helper.make_node(
                "Not", ["_READY_CAST"], ["_NOT_READY_CAST"]
            )
            not_ready = onnx.helper.make_node(
                "Cast", ["_NOT_READY_CAST"], ["_NOT_READY"], to=onnx.TensorProto.INT32
            )
            # Take advantage of knowledge that the READY false value is 0 and true is 1
            input_state_cond = onnx.helper.make_node(
                "And", ["_NOT_START_CAST", "_READY_CAST"], ["input_state_cond"]
            )
            input_state_cond_cast = onnx.helper.make_node(
                "Cast",
                ["input_state_cond"],
                ["input_state_cond_cast"],
                to=onnx.TensorProto.INT32,
            )
            mul_state = onnx.helper.make_node(
                "Mul", ["_INPUT_STATE", "input_state_cond_cast"], ["mul_state"]
            )
            add = onnx.helper.make_node("Add", ["_INPUT", "mul_state"], ["CAST"])

    if onnx_dtype == onnx.TensorProto.STRING:
        cast = onnx.helper.make_node("Identity", ["_INPUT"], ["OUTPUT"])
        cast_output_state = onnx.helper.make_node(
            "Identity", ["_INPUT"], ["OUTPUT_STATE"]
        )
    elif onnx_dtype == onnx.TensorProto.FLOAT16:
        # Avoid cast from float16 to float16
        # (bug in Onnx Runtime, cast from float16 to float16 will become cast from float16 to float32)
        cast = onnx.helper.make_node("Identity", ["CAST"], ["OUTPUT"])
        cast_output_state = onnx.helper.make_node(
            "Identity", ["CAST"], ["OUTPUT_STATE"]
        )
    else:
        cast = onnx.helper.make_node("Cast", ["CAST"], ["OUTPUT"], to=onnx_dtype)
        cast_output_state = onnx.helper.make_node(
            "Cast", ["CAST"], ["OUTPUT_STATE"], to=onnx_dtype
        )

    if onnx_control_dtype == onnx.TensorProto.BOOL:
        if onnx_dtype != onnx.TensorProto.STRING:
            onnx_nodes = [
                internal_input,
                internal_input_state,
                internal_input1,
                internal_input2,
                not_start_cast,
                not_start,
                not_ready_cast,
                not_ready,
                input_state_cond,
                input_state_cond_cast,
                mul_state,
                add,
                cast,
                cast_output_state,
            ]
        else:
            onnx_nodes = [internal_input, internal_input_state, cast, cast_output_state]
    else:
        if onnx_dtype != onnx.TensorProto.STRING:
            onnx_nodes = [
                internal_input,
                internal_input_state,
                start_cast,
                not_start_cast,
                not_start,
                ready_cast,
                not_ready_cast,
                not_ready,
                input_state_cond,
                input_state_cond_cast,
                mul_state,
                add,
                cast,
                cast_output_state,
            ]
        else:
            onnx_nodes = [internal_input, internal_input_state, cast, cast_output_state]

    onnx_inputs = [onnx_input_state, onnx_input, onnx_start, onnx_ready]
    onnx_outputs = [onnx_output, onnx_output_state]
    graph_proto = onnx.helper.make_graph(
        onnx_nodes, model_name, onnx_inputs, onnx_outputs
    )

    if FLAGS.onnx_opset > 0:
        model_opset = onnx.helper.make_operatorsetid("", FLAGS.onnx_opset)
        model_def = onnx.helper.make_model(
            graph_proto, producer_name="triton", opset_imports=[model_opset]
        )
    else:
        model_def = onnx.helper.make_model(graph_proto, producer_name="triton")

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    onnx.save(model_def, model_version_dir + "/model.onnx")


def create_onnx_modelfile_with_initial_state(
    models_dir, model_version, max_batch, dtype, shape
):
    if not tu.validate_for_onnx_model(dtype, dtype, dtype, shape, shape, shape):
        return

    model_name = tu.get_sequence_model_name(
        "onnx_nobatch" if max_batch == 0 else "onnx", dtype
    )
    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    onnx_dtype = np_to_onnx_dtype(dtype)
    onnx_control_dtype = onnx_dtype
    onnx_input_shape, idx = tu.shape_to_onnx_shape(shape, 0)
    onnx_output_shape, idx = tu.shape_to_onnx_shape(shape, idx)

    # If the input is a string then use int32 for operation and just
    # cast to/from string for input and output.
    if onnx_dtype == onnx.TensorProto.STRING:
        onnx_control_dtype = onnx.TensorProto.INT32

    # If input dtype is bool, then use bool type for control and
    # int32 type for input/output
    if onnx_dtype == onnx.TensorProto.BOOL:
        onnx_dtype = onnx.TensorProto.INT32

    batch_dim = [] if max_batch == 0 else [None]

    onnx_input = onnx.helper.make_tensor_value_info(
        "INPUT", onnx_dtype, batch_dim + onnx_input_shape
    )
    onnx_input_state = onnx.helper.make_tensor_value_info(
        "INPUT_STATE", onnx_dtype, batch_dim + onnx_input_shape
    )
    onnx_start = onnx.helper.make_tensor_value_info(
        "START", onnx_control_dtype, batch_dim + [1]
    )
    onnx_ready = onnx.helper.make_tensor_value_info(
        "READY", onnx_control_dtype, batch_dim + [1]
    )
    onnx_output = onnx.helper.make_tensor_value_info(
        "OUTPUT", onnx_dtype, batch_dim + onnx_output_shape
    )
    onnx_output_state = onnx.helper.make_tensor_value_info(
        "OUTPUT_STATE", onnx_dtype, batch_dim + onnx_output_shape
    )

    internal_input = onnx.helper.make_node("Identity", ["INPUT"], ["_INPUT"])
    internal_input_state = onnx.helper.make_node(
        "Identity", ["INPUT_STATE"], ["_INPUT_STATE"]
    )
    # cast int8, int16 input to higher precision int as Onnx Add/Sub operator doesn't support those type
    if (onnx_dtype == onnx.TensorProto.INT8) or (onnx_dtype == onnx.TensorProto.INT16):
        internal_input = onnx.helper.make_node(
            "Cast", ["INPUT"], ["_INPUT"], to=onnx.TensorProto.INT32
        )
        internal_input_state = onnx.helper.make_node(
            "Cast", ["INPUT_STATE"], ["_INPUT_STATE"], to=onnx.TensorProto.INT32
        )

    if onnx_dtype == onnx.TensorProto.STRING:
        identity = onnx.helper.make_node("Identity", ["_INPUT"], ["OUTPUT"])
        identity_output_state = onnx.helper.make_node(
            "Identity", ["_INPUT"], ["OUTPUT_STATE"]
        )
        onnx_nodes = [
            internal_input,
            internal_input_state,
            identity,
            identity_output_state,
        ]
    else:
        add = onnx.helper.make_node("Add", ["_INPUT", "_INPUT_STATE"], ["CAST"])
        cast = onnx.helper.make_node("Cast", ["CAST"], ["OUTPUT"], to=onnx_dtype)
        cast_output_state = onnx.helper.make_node(
            "Cast", ["CAST"], ["OUTPUT_STATE"], to=onnx_dtype
        )
        # Avoid cast from float16 to float16
        # (bug in Onnx Runtime, cast from float16 to float16 will become cast from float16 to float32)
        if onnx_dtype == onnx.TensorProto.FLOAT16:
            cast = onnx.helper.make_node("Identity", ["CAST"], ["OUTPUT"])
            cast_output_state = onnx.helper.make_node(
                "Identity", ["CAST"], ["OUTPUT_STATE"]
            )
        onnx_nodes = [
            internal_input,
            internal_input_state,
            add,
            cast,
            cast_output_state,
        ]

    onnx_inputs = [onnx_input_state, onnx_input, onnx_start, onnx_ready]
    onnx_outputs = [onnx_output, onnx_output_state]
    graph_proto = onnx.helper.make_graph(
        onnx_nodes, model_name, onnx_inputs, onnx_outputs
    )

    if FLAGS.onnx_opset > 0:
        model_opset = onnx.helper.make_operatorsetid("", FLAGS.onnx_opset)
        model_def = onnx.helper.make_model(
            graph_proto, producer_name="triton", opset_imports=[model_opset]
        )
    else:
        model_def = onnx.helper.make_model(graph_proto, producer_name="triton")

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    onnx.save(model_def, model_version_dir + "/model.onnx")


def create_onnx_modelfile(
    models_dir, model_version, max_batch, dtype, shape, initial_state
):
    if initial_state is None:
        create_onnx_modelfile_wo_initial_state(
            models_dir, model_version, max_batch, dtype, shape
        )
    else:
        # This model assumes that the initial state contains correct data
        create_onnx_modelfile_with_initial_state(
            models_dir, model_version, max_batch, dtype, shape
        )


def create_libtorch_modelfile_wo_initial_state(
    models_dir, model_version, max_batch, dtype, shape
):
    if not tu.validate_for_libtorch_model(dtype, dtype, dtype, shape, shape, shape):
        return

    torch_dtype = np_to_torch_dtype(dtype)
    # If input dtype is bool, then use bool type for control and
    # int32 type for input/output
    if torch_dtype == torch.bool:
        torch_dtype = torch.int32

    model_name = tu.get_sequence_model_name(
        "libtorch_nobatch" if max_batch == 0 else "libtorch", dtype
    )

    if torch_dtype == List[str]:

        class SequenceNet(nn.Module):
            def __init__(self):
                super(SequenceNet, self).__init__()

            def forward(
                self, input0: List[str], input0_state: List[str], start0, ready0
            ) -> Tuple[List[str], List[str]]:
                use_state = torch.logical_and(ready0, torch.logical_not(start0))

                input0_state_int = torch.tensor(
                    [int("0" + i) for i in input0_state], device=use_state.device
                )
                input0_int = torch.tensor(
                    [int("0" + i) for i in input0], device=use_state.device
                )
                result_int = torch.mul(use_state, input0_state_int)
                result_int += input0_int
                result = [str(i.item()) for i in result_int.cpu()]
                return result, result

    else:

        class SequenceNet(nn.Module):
            def __init__(self):
                super(SequenceNet, self).__init__()

            def forward(self, input0, input0_state, start0, ready0):
                use_state = torch.logical_and(ready0, torch.logical_not(start0))

                result = torch.mul(use_state, input0_state)
                result += input0
                return result, result

    traced = torch.jit.script(SequenceNet())
    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    traced.save(model_version_dir + "/model.pt")


def create_libtorch_modelfile_with_initial_state(
    models_dir, model_version, max_batch, dtype, shape
):
    if not tu.validate_for_libtorch_model(dtype, dtype, dtype, shape, shape, shape):
        return

    torch_dtype = np_to_torch_dtype(dtype)

    # If input dtype is bool, then use bool type for control and
    # int32 type for input/output
    if torch_dtype == torch.bool:
        torch_dtype = torch.int32

    model_name = tu.get_sequence_model_name(
        "libtorch_nobatch" if max_batch == 0 else "libtorch", dtype
    )
    # handle for -1 (when variable) since can't create tensor with shape of [-1]
    if torch_dtype == List[str]:

        class SequenceNet(nn.Module):
            def __init__(self):
                super(SequenceNet, self).__init__()

            def forward(
                self, input0: List[str], input0_state: List[str], start0, ready0
            ) -> Tuple[List[str], List[str]]:
                input0_state_int = torch.tensor(
                    [int("0" + i) for i in input0_state], device=start0.device
                )
                input0_int = torch.tensor(
                    [int("0" + i) for i in input0], device=start0.device
                )
                result_int = (input0_state_int + input0_int).cpu()
                result = [str(i.item()) for i in result_int]
                return result, result

    else:

        class SequenceNet(nn.Module):
            def __init__(self):
                super(SequenceNet, self).__init__()

            def forward(self, input0, input0_state, start0, ready0):
                result = input0_state + input0
                return result, result

    sequenceModel = SequenceNet()

    traced = torch.jit.script(sequenceModel)

    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    traced.save(model_version_dir + "/model.pt")


def create_libtorch_modelfile(
    models_dir, model_version, max_batch, dtype, shape, initial_state
):
    if initial_state is None:
        create_libtorch_modelfile_wo_initial_state(
            models_dir, model_version, max_batch, dtype, shape
        )
    else:
        # This model assumes that the initial state contains correct data
        create_libtorch_modelfile_with_initial_state(
            models_dir, model_version, max_batch, dtype, shape
        )


def create_libtorch_modelconfig(
    models_dir, model_version, max_batch, dtype, shape, initial_state
):
    if not tu.validate_for_libtorch_model(dtype, dtype, dtype, shape, shape, shape):
        return

    model_name = tu.get_sequence_model_name(
        "libtorch_nobatch" if max_batch == 0 else "libtorch", dtype
    )
    config_dir = models_dir + "/" + model_name

    if dtype == np.float32:
        control_type = "fp32"
    elif dtype == bool:
        control_type = "bool"
        dtype = np.int32
    else:
        control_type = "int32"

    instance_group_string = """
instance_group [
  {
    kind: KIND_GPU
  }
]
"""

    config = f"""
name: "{model_name}"
platform: "pytorch_libtorch"
max_batch_size: {max_batch}

input [
  {{
    name: "INPUT__0"
    data_type: {emu.dtype_str(dtype)}
    dims: [ {tu.shape_to_dims_str(shape)} ]
  }}
]
output [
  {{
    name: "OUTPUT__0"
    data_type: {emu.dtype_str(dtype)}
    dims: [ {tu.shape_to_dims_str(shape)} ]
  }}
]
"""
    config += instance_group_string

    # Prepare the shapes for initial state initialization
    shape_without_variable_dims = []
    for dim in shape:
        if dim == -1:
            shape_without_variable_dims.append(1)
        else:
            shape_without_variable_dims.append(dim)

    if initial_state is None:
        config += """
    sequence_batching {{
      max_sequence_idle_microseconds: 5000000
      control_input [
        {{
          name: "START__2"
          control [
            {{
              kind: CONTROL_SEQUENCE_START
              {type}_false_true: [ 0, 1 ]
            }}
          ]
        }},
        {{
          name: "READY__3"
          control [
            {{
              kind: CONTROL_SEQUENCE_READY
              {type}_false_true: [ 0, 1 ]
            }}
          ]
        }}
      ]
      state [
        {{
          input_name: "INPUT_STATE__1"
          output_name: "OUTPUT_STATE__1"
          data_type: {dtype}
          dims: {dims}
        }}
      ]
    }}
    """.format(
            type=control_type,
            dims=tu.shape_to_dims_str(shape),
            dtype=emu.dtype_str(dtype),
        )
    elif initial_state == "zero":
        config += f"""
    sequence_batching {{
      max_sequence_idle_microseconds: 5000000
      control_input [
        {{
          name: "START__2"
          control [
            {{
              kind: CONTROL_SEQUENCE_START
              {control_type}_false_true: [ 0, 1 ]
            }}
          ]
        }},
        {{
          name: "READY__3"
          control [
            {{
              kind: CONTROL_SEQUENCE_READY
              {control_type}_false_true: [ 0, 1 ]
            }}
          ]
        }}
      ]
      state [
        {{
          input_name: "INPUT_STATE__1"
          output_name: "OUTPUT_STATE__1"
          data_type: {emu.dtype_str(dtype)}
          dims: {tu.shape_to_dims_str(shape)}
          initial_state: {{
              name: "state init"
              data_type: {emu.dtype_str(dtype)}
              dims: {tu.shape_to_dims_str(shape_without_variable_dims)}
              zero_data: true
          }}
        }}
      ]
    }}
    """
    elif initial_state == "file":
        config += """
    sequence_batching {{
      max_sequence_idle_microseconds: 5000000
      control_input [
        {{
          name: "START__2"
          control [
            {{
              kind: CONTROL_SEQUENCE_START
              {type}_false_true: [ 0, 1 ]
            }}
          ]
        }},
        {{
          name: "READY__3"
          control [
            {{
              kind: CONTROL_SEQUENCE_READY
              {type}_false_true: [ 0, 1 ]
            }}
          ]
        }}
      ]
      state [
        {{
          input_name: "INPUT_STATE_1"
          output_name: "OUTPUT_STATE_1"
          data_type: {dtype}
          dims: {dims}
          initial_state: {{
              name: "state init"
              data_type: {dtype}
              dims: {shape_without_variable_dims}
              data_file: input_state_data
          }}
        }}
      ]
    }}
    """.format(
            type=control_type,
            dims=tu.shape_to_dims_str(shape),
            dtype=emu.dtype_str(dtype),
            shape_without_variable_dims=tu.shape_to_dims_str(
                shape_without_variable_dims
            ),
        )

    try:
        os.makedirs(config_dir)
    except OSError as ex:
        pass  # ignore existing dir
    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)


def create_onnx_modelconfig(
    models_dir, model_version, max_batch, dtype, shape, initial_state
):
    if not tu.validate_for_onnx_model(dtype, dtype, dtype, shape, shape, shape):
        return

    model_name = tu.get_sequence_model_name(
        "onnx_nobatch" if max_batch == 0 else "onnx", dtype
    )
    config_dir = models_dir + "/" + model_name

    if dtype == np.float32:
        control_type = "fp32"
    elif dtype == bool:
        control_type = "bool"
        dtype = np.int32
    else:
        control_type = "int32"

    instance_group_string = """
instance_group [
  {
    kind: KIND_GPU
  }
]
"""

    # [TODO] move create_general_modelconfig() out of emu as it is general
    # enough for all backends to use
    config = emu.create_general_modelconfig(
        model_name,
        "onnxruntime_onnx",
        max_batch,
        [dtype],
        [shape],
        [None],
        [dtype],
        [shape],
        [None],
        [None],
        force_tensor_number_suffix=False,
        instance_group_str=instance_group_string,
    )

    # Prepare the shapes for initial state initialization
    shape_without_variable_dims = []
    for dim in shape:
        if dim == -1:
            shape_without_variable_dims.append(1)
        else:
            shape_without_variable_dims.append(dim)

    if initial_state is None:
        config += """
    sequence_batching {{
      max_sequence_idle_microseconds: 5000000
      control_input [
        {{
          name: "START"
          control [
            {{
              kind: CONTROL_SEQUENCE_START
              {type}_false_true: [ 0, 1 ]
            }}
          ]
        }},
        {{
          name: "READY"
          control [
            {{
              kind: CONTROL_SEQUENCE_READY
              {type}_false_true: [ 0, 1 ]
            }}
          ]
        }}
      ]
      state [
        {{
          input_name: "INPUT_STATE"
          output_name: "OUTPUT_STATE"
          data_type: {dtype}
          dims: {dims}
        }}
      ]
    }}
    """.format(
            type=control_type,
            dims=tu.shape_to_dims_str(shape),
            dtype=emu.dtype_str(dtype),
        )
    elif initial_state == "zero":
        config += f"""
    sequence_batching {{
      max_sequence_idle_microseconds: 5000000
      control_input [
        {{
          name: "START"
          control [
            {{
              kind: CONTROL_SEQUENCE_START
              {control_type}_false_true: [ 0, 1 ]
            }}
          ]
        }},
        {{
          name: "READY"
          control [
            {{
              kind: CONTROL_SEQUENCE_READY
              {control_type}_false_true: [ 0, 1 ]
            }}
          ]
        }}
      ]
      state [
        {{
          input_name: "INPUT_STATE"
          output_name: "OUTPUT_STATE"
          data_type: {emu.dtype_str(dtype)}
          dims: {tu.shape_to_dims_str(shape)}
          initial_state: {{
              name: "state init"
              data_type: {emu.dtype_str(dtype)}
              dims: {tu.shape_to_dims_str(shape_without_variable_dims)}
              zero_data: true
          }}
        }}
      ]
    }}
    """
    elif initial_state == "file":
        config += """
    sequence_batching {{
      max_sequence_idle_microseconds: 5000000
      control_input [
        {{
          name: "START"
          control [
            {{
              kind: CONTROL_SEQUENCE_START
              {type}_false_true: [ 0, 1 ]
            }}
          ]
        }},
        {{
          name: "READY"
          control [
            {{
              kind: CONTROL_SEQUENCE_READY
              {type}_false_true: [ 0, 1 ]
            }}
          ]
        }}
      ]
      state [
        {{
          input_name: "INPUT_STATE"
          output_name: "OUTPUT_STATE"
          data_type: {dtype}
          dims: {dims}
          initial_state: {{
              name: "state init"
              data_type: {dtype}
              dims: {shape_without_variable_dims}
              data_file: input_state_data
          }}
        }}
      ]
    }}
    """.format(
            type=control_type,
            dims=tu.shape_to_dims_str(shape),
            dtype=emu.dtype_str(dtype),
            shape_without_variable_dims=tu.shape_to_dims_str(
                shape_without_variable_dims
            ),
        )

    try:
        os.makedirs(config_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)


def create_plan_modelfile(models_dir, model_version, max_batch, dtype, shape):
    trt_dtype = np_to_trt_dtype(dtype)
    TRT_LOGGER = (
        trt.Logger(trt.Logger.INFO)
        if os.environ.get("TRT_VERBOSE") != "1"
        else trt.Logger(trt.Logger.VERBOSE)
    )
    builder = trt.Builder(TRT_LOGGER)
    network = builder.create_network()

    unit_shape = [1] * len(shape)
    if max_batch != 0:
        in0 = network.add_input("INPUT", trt_dtype, [-1] + shape)
        start0 = network.add_input("START", trt_dtype, [-1] + unit_shape)
        ready0 = network.add_input("READY", trt_dtype, [-1] + unit_shape)
        in_state0 = network.add_input("INPUT_STATE", trt_dtype, [-1] + shape)
        # Append the dimension by 1 so that broadcasting works properly
        constant_1_data = trt.Weights(np.ones(unit_shape + [1], dtype=dtype))
        constant_1 = network.add_constant(unit_shape + [1], constant_1_data)
    else:
        in0 = network.add_input("INPUT", trt_dtype, shape)
        start0 = network.add_input("START", trt_dtype, unit_shape)
        ready0 = network.add_input("READY", trt_dtype, unit_shape)
        in_state0 = network.add_input("INPUT_STATE", trt_dtype, shape)
        constant_1_data = trt.Weights(np.ones(unit_shape, dtype=dtype))
        constant_1 = network.add_constant(unit_shape, constant_1_data)

    not_start = network.add_elementwise(
        constant_1.get_output(0), start0, trt.ElementWiseOperation.SUB
    )
    not_start.set_output_type(0, trt_dtype)
    internal_state = network.add_elementwise(
        in_state0, not_start.get_output(0), trt.ElementWiseOperation.PROD
    )
    out0 = network.add_elementwise(
        internal_state.get_output(0), in0, trt.ElementWiseOperation.SUM
    )
    out0_state = network.add_elementwise(
        internal_state.get_output(0), in0, trt.ElementWiseOperation.SUM
    )

    out0.get_output(0).name = "OUTPUT"
    network.mark_output(out0.get_output(0))

    out0_state.get_output(0).name = "OUTPUT_STATE"
    network.mark_output(out0_state.get_output(0))

    min_shape = []
    opt_shape = []
    max_shape = []
    if max_batch != 0:
        min_shape = min_shape + [1]
        opt_shape = opt_shape + [max(1, max_batch)]
        max_shape = max_shape + [max(1, max_batch)]
    for i in shape:
        if i == -1:
            min_shape = min_shape + [1]
            opt_shape = opt_shape + [8]
            max_shape = max_shape + [32]
        else:
            min_shape = min_shape + [i]
            opt_shape = opt_shape + [i]
            max_shape = max_shape + [i]

    profile = builder.create_optimization_profile()
    profile.set_shape("INPUT", min_shape, opt_shape, max_shape)
    profile.set_shape("INPUT_STATE", min_shape, opt_shape, max_shape)
    if max_batch != 0:
        profile.set_shape(
            "START",
            [1] + unit_shape,
            [max_batch] + unit_shape,
            [max_batch] + unit_shape,
        )
        profile.set_shape(
            "READY",
            [1] + unit_shape,
            [max_batch] + unit_shape,
            [max_batch] + unit_shape,
        )
    else:
        profile.set_shape("START", unit_shape, unit_shape, unit_shape)
        profile.set_shape("READY", unit_shape, unit_shape, unit_shape)
    config = builder.create_builder_config()
    config.add_optimization_profile(profile)

    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
    try:
        engine_bytes = builder.build_serialized_network(network, config)
    except AttributeError:
        engine = builder.build_engine(network, config)
        engine_bytes = engine.serialize()
        del engine

    model_name = tu.get_sequence_model_name(
        "plan_nobatch" if max_batch == 0 else "plan", dtype
    )
    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(model_version_dir + "/model.plan", "wb") as f:
        f.write(engine_bytes)


def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape):
    trt_dtype = np_to_trt_dtype(dtype)
    trt_memory_format = trt.TensorFormat.LINEAR

    TRT_LOGGER = (
        trt.Logger(trt.Logger.INFO)
        if os.environ.get("TRT_VERBOSE") != "1"
        else trt.Logger(trt.Logger.VERBOSE)
    )
    builder = trt.Builder(TRT_LOGGER)
    network = builder.create_network()

    unit_shape = [1] * len(shape)
    if max_batch != 0:
        in0 = network.add_input("INPUT", trt_dtype, [-1] + shape)
        start0 = network.add_input("START", trt_dtype, [-1] + unit_shape)
        ready0 = network.add_input("READY", trt_dtype, [-1] + unit_shape)
        in_state0 = network.add_input("INPUT_STATE", trt_dtype, [-1] + shape)
        # Append the dimension by 1 so that broadcasting works properly
        constant_1_data = trt.Weights(np.ones(unit_shape + [1], dtype=dtype))
        constant_1 = network.add_constant(unit_shape + [1], constant_1_data)
    else:
        in0 = network.add_input("INPUT", trt_dtype, shape)
        start0 = network.add_input("START", trt_dtype, unit_shape)
        ready0 = network.add_input("READY", trt_dtype, unit_shape)
        in_state0 = network.add_input("INPUT_STATE", trt_dtype, shape)
        constant_1_data = trt.Weights(np.ones(unit_shape, dtype=dtype))
        constant_1 = network.add_constant(unit_shape, constant_1_data)

    not_start = network.add_elementwise(
        constant_1.get_output(0), start0, trt.ElementWiseOperation.SUB
    )
    not_start.set_output_type(0, trt_dtype)
    internal_state = network.add_elementwise(
        in_state0, not_start.get_output(0), trt.ElementWiseOperation.PROD
    )
    out0 = network.add_elementwise(
        internal_state.get_output(0), in0, trt.ElementWiseOperation.SUM
    )
    out0_state = network.add_elementwise(
        internal_state.get_output(0), in0, trt.ElementWiseOperation.SUM
    )
    out0.get_output(0).name = "OUTPUT"
    network.mark_output(out0.get_output(0))

    out0_state.get_output(0).name = "OUTPUT_STATE"
    network.mark_output(out0_state.get_output(0))

    out0.get_output(0).dtype = trt_dtype
    out0_state.get_output(0).dtype = trt_dtype

    in0.allowed_formats = 1 << int(trt_memory_format)
    start0.allowed_formats = 1 << int(trt_memory_format)
    ready0.allowed_formats = 1 << int(trt_memory_format)
    out0.get_output(0).allowed_formats = 1 << int(trt_memory_format)

    if trt_dtype == trt.int8:
        in0.dynamic_range = (-128.0, 127.0)
        in_state0.dynamic_range = (-128.0, 127.0)
        out0.dynamic_range = (-128.0, 127.0)
        start0.dynamic_range = (-128.0, 127.0)
        ready0.dynamic_range = (-128.0, 127.0)

    flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
    flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
    flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)

    if trt_dtype == trt.int8:
        flags |= 1 << int(trt.BuilderFlag.INT8)
    elif trt_dtype == trt.float16:
        flags |= 1 << int(trt.BuilderFlag.FP16)

    min_shape = []
    opt_shape = []
    max_shape = []
    if max_batch != 0:
        min_shape = min_shape + [1]
        opt_shape = opt_shape + [max(1, max_batch)]
        max_shape = max_shape + [max(1, max_batch)]
    for i in shape:
        if i == -1:
            min_shape = min_shape + [1]
            opt_shape = opt_shape + [8]
            max_shape = max_shape + [32]
        else:
            min_shape = min_shape + [i]
            opt_shape = opt_shape + [i]
            max_shape = max_shape + [i]

    profile = builder.create_optimization_profile()
    profile.set_shape("INPUT", min_shape, opt_shape, max_shape)
    profile.set_shape("INPUT_STATE", min_shape, opt_shape, max_shape)
    if max_batch != 0:
        profile.set_shape(
            "START",
            [1] + unit_shape,
            [max_batch] + unit_shape,
            [max_batch] + unit_shape,
        )
        profile.set_shape(
            "READY",
            [1] + unit_shape,
            [max_batch] + unit_shape,
            [max_batch] + unit_shape,
        )
    else:
        profile.set_shape("START", unit_shape, unit_shape, unit_shape)
        profile.set_shape("READY", unit_shape, unit_shape, unit_shape)

    config = builder.create_builder_config()
    config.flags = flags
    config.add_optimization_profile(profile)

    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
    try:
        engine_bytes = builder.build_serialized_network(network, config)
    except AttributeError:
        engine = builder.build_engine(network, config)
        engine_bytes = engine.serialize()
        del engine

    model_name = tu.get_sequence_model_name(
        "plan_nobatch" if max_batch == 0 else "plan", dtype
    )
    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(model_version_dir + "/model.plan", "wb") as f:
        f.write(engine_bytes)


def create_plan_models(models_dir, model_version, max_batch, dtype, shape):
    if not tu.validate_for_trt_model(dtype, dtype, dtype, shape, shape, shape):
        return

    if dtype != np.float32:
        create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape)
    else:
        create_plan_modelfile(models_dir, model_version, max_batch, dtype, shape)


def create_plan_modelconfig(models_dir, model_version, max_batch, dtype, shape):
    if not tu.validate_for_trt_model(dtype, dtype, dtype, shape, shape, shape):
        return

    model_name = tu.get_sequence_model_name(
        "plan_nobatch" if max_batch == 0 else "plan", dtype
    )
    config_dir = models_dir + "/" + model_name
    config = """
name: "{}"
platform: "tensorrt_plan"
max_batch_size: {}
sequence_batching {{
  max_sequence_idle_microseconds: 5000000
  control_input [
    {{
      name: "START"
      control [
        {{
          kind: CONTROL_SEQUENCE_START
          {}_false_true: [ 0, 1 ]
        }}
      ]
    }},
    {{
      name: "READY"
      control [
        {{
          kind: CONTROL_SEQUENCE_READY
          {}_false_true: [ 0, 1 ]
        }}
      ]
    }}
  ]
  state [
    {{
      input_name: "INPUT_STATE"
      output_name: "OUTPUT_STATE"
      data_type: {dtype}
      dims: {shape}
    }}
  ]
}}
input [
  {{
    name: "INPUT"
    data_type: {dtype}
    dims: [ {shape} ]
  }}
]
output [
  {{
    name: "OUTPUT"
    data_type: {dtype}
    dims: [ {shape} ]
  }}
]
instance_group [
  {{
    kind: KIND_GPU
  }}
]
""".format(
        model_name,
        max_batch,
        "int32" if dtype == np.int32 else "fp32",
        "int32" if dtype == np.int32 else "fp32",
        dtype=np_to_model_dtype(dtype),
        shape=tu.shape_to_dims_str(shape),
    )

    try:
        os.makedirs(config_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)


def create_models(models_dir, dtype, shape, initial_state, no_batch=True):
    model_version = 1

    if FLAGS.onnx:
        create_onnx_modelconfig(
            models_dir, model_version, 8, dtype, shape, initial_state
        )
        create_onnx_modelfile(models_dir, model_version, 8, dtype, shape, initial_state)
        if no_batch:
            create_onnx_modelconfig(
                models_dir, model_version, 0, dtype, shape, initial_state
            )
            create_onnx_modelfile(
                models_dir, model_version, 0, dtype, shape, initial_state
            )

    if FLAGS.tensorrt:
        if dtype == bool:
            return
        suffix = []
        if dtype == np.int8:
            suffix = [1, 1]

        create_plan_modelconfig(models_dir, model_version, 8, dtype, shape + suffix)
        create_plan_models(models_dir, model_version, 8, dtype, shape + suffix)
        if no_batch:
            create_plan_modelconfig(models_dir, model_version, 0, dtype, shape + suffix)
            create_plan_models(models_dir, model_version, 0, dtype, shape + suffix)

    if FLAGS.libtorch:
        suffix = []
        if dtype == np.int8:
            suffix = [1, 1]

        create_libtorch_modelconfig(
            models_dir, model_version, 8, dtype, shape + suffix, initial_state
        )
        create_libtorch_modelfile(
            models_dir, model_version, 8, dtype, shape + suffix, initial_state
        )
        if no_batch:
            create_libtorch_modelconfig(
                models_dir, model_version, 0, dtype, shape + suffix, initial_state
            )
            create_libtorch_modelfile(
                models_dir, model_version, 0, dtype, shape + suffix, initial_state
            )


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--models_dir", type=str, required=True, help="Top-level model directory"
    )
    parser.add_argument(
        "--tensorrt",
        required=False,
        action="store_true",
        help="Generate TensorRT PLAN models",
    )
    parser.add_argument(
        "--initial-state",
        required=False,
        choices=["zero", "file"],
        help="Generate models that rely on initial state.",
    )
    parser.add_argument(
        "--tensorrt-shape-io",
        required=False,
        action="store_true",
        help="Generate TensorRT PLAN models w/ shape tensor i/o",
    )
    parser.add_argument(
        "--onnx", required=False, action="store_true", help="Generate Onnx models"
    )
    parser.add_argument(
        "--onnx_opset",
        type=int,
        required=False,
        default=0,
        help="Opset used for Onnx models. Default is to use ONNXRT default",
    )
    parser.add_argument(
        "--libtorch",
        required=False,
        action="store_true",
        help="Generate Pytorch LibTorch models",
    )
    parser.add_argument(
        "--openvino",
        required=False,
        action="store_true",
        help="Generate OpenVino models",
    )
    parser.add_argument(
        "--variable",
        required=False,
        action="store_true",
        help="Used variable-shape tensors for input/output",
    )
    parser.add_argument(
        "--ensemble",
        required=False,
        action="store_true",
        help="Generate ensemble models against the models"
        + " in all platforms. Note that the models generated"
        + " are not completed.",
    )
    FLAGS, unparsed = parser.parse_known_args()

    if FLAGS.onnx:
        import onnx

    if FLAGS.tensorrt:
        import tensorrt as trt

    if FLAGS.libtorch:
        import torch
        from torch import nn

    import test_util as tu

    # Tests with models that accept fixed-shape input/output tensors
    if not FLAGS.variable:
        create_models(
            FLAGS.models_dir,
            np.float32,
            [
                1,
            ],
            FLAGS.initial_state,
        )
        create_models(
            FLAGS.models_dir,
            np.int32,
            [
                1,
            ],
            FLAGS.initial_state,
        )
        create_models(
            FLAGS.models_dir,
            np_dtype_string,
            [
                1,
            ],
            FLAGS.initial_state,
        )
        create_models(
            FLAGS.models_dir,
            bool,
            [
                1,
            ],
            FLAGS.initial_state,
        )

    # Tests with models that accept variable-shape input/output tensors
    if FLAGS.variable:
        create_models(
            FLAGS.models_dir,
            np.int32,
            [
                -1,
            ],
            FLAGS.initial_state,
            False,
        )
        create_models(
            FLAGS.models_dir,
            np.float32,
            [
                -1,
            ],
            FLAGS.initial_state,
            False,
        )
        create_models(
            FLAGS.models_dir,
            np_dtype_string,
            [
                -1,
            ],
            FLAGS.initial_state,
            False,
        )
        create_models(
            FLAGS.models_dir,
            bool,
            [
                -1,
            ],
            FLAGS.initial_state,
            False,
        )


================================================
FILE: qa/common/gen_qa_model_repository
================================================
#!/bin/bash
# Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

############################################################################
## This script generates the model repository needed by some of the
## tritonserver CI tests. Generating these models requires using
## the PyTorch container.
##
## 1. Update TENSORRT_IMAGE and PYTORCH_IMAGE to
## match what is being used by the tritonserver release being
## tested.
##
## 2. Set NVIDIA_VISIBLE_DEVICES to the ID of the CUDA device present on the
## local system that you want to target for the generated models.
##
## 3. Run this script to create /tmp/qa_model_repository,
## /tmp/qa_variable_model_repository, /tmp/qa_shapetensor_model_repository
## /tmp/qa_identity_model_repository, /tmp/qa_identity_big_model_repository
## /tmp/qa_reshape_model_repository, /tmp/qa_noshape_model_repository,
## /tmp/qa_sequence_model_repository, /tmp/qa_ensemble_model_repository
## /tmp/qa_dyna_sequence_model_repository, and
## /tmp/qa_variable_sequence_model_repository directories containing
## all the models needed for CI testing.
##
############################################################################
TRITON_MDLS_BASE_SCRIPT_DIR="$(dirname $(readlink -f $0))"
TRITON_MDLS_BASE_SCRIPT_FILE="$(readlink -f $0)"

COLOR_ERROR="\033[31m"
COLOR_INFO="\033[94m"
COLOR_RESET="\033[0m"
COLOR_STATUS="\033[36m"
COLOR_WARNING="\033[33m"

log_message.info() { local message=$@ ; echo -e "${COLOR_INFO}$(date +"%Y-%m-%d %H:%M:%S") - [ INFO ] - ${message} ${COLOR_RESET}"; } ;
log_message.status() { local message=$@ ; echo -e "${COLOR_STATUS}$(date +"%Y-%m-%d %H:%M:%S") - [ STATUS ] - ${message} ${COLOR_RESET}"; } ;
log_message.warning() { local message=$@ ; echo -e "${COLOR_WARNING}$(date +"%Y-%m-%d %H:%M:%S") - [ WARNING ] - ${message} ${COLOR_RESET}"; } ;
log_message.error() { local message=$@ ; echo -e "${COLOR_ERROR}$(date +"%Y-%m-%d %H:%M:%S") - [ ERROR ] - ${message} ${COLOR_RESET}"; } ;

log_message.status "Changing working directory to the script directory to: " "${TRITON_MDLS_BASE_SCRIPT_DIR}"

cd ${TRITON_MDLS_BASE_SCRIPT_DIR}

log_message.status "define: default values"
TRITON_VERSION=${TRITON_VERSION:=26.02}
ONNX_VERSION=1.20.1
ONNX_OPSET=0
OPENVINO_VERSION=2024.5.0
UBUNTU_IMAGE=${UBUNTU_IMAGE:=ubuntu:22.04}
PYTORCH_IMAGE=${PYTORCH_IMAGE:=nvcr.io/nvidia/pytorch:$TRITON_VERSION-py3}
TENSORRT_IMAGE=${TENSORRT_IMAGE:=nvcr.io/nvidia/tensorrt:$TRITON_VERSION-py3}
NVIDIA_VISIBLE_DEVICES=${NVIDIA_VISIBLE_DEVICES:=0}
MODEL_TYPE=${MODEL_TYPE:-""}
TRITON_MODELS_USE_DOCKER=${TRITON_MODELS_USE_DOCKER:-"1"}
TRITON_MODELS_USE_ENROOT=${TRITON_MODELS_USE_ENROOT:-"1"}

log_message.status "define: CI related parameters"
CI_JOB_ID=${CI_JOB_ID:=$(date +%Y%m%d_%H%M)}
RUNNER_ID=${RUNNER_ID:=0}
PROJECT_NAME=${PROJECT_NAME:=tritonserver}

log_message.status "define: Docker engine parameters"
DOCKER_VOLUME=${DOCKER_VOLUME:=volume.gen_qa_model_repository.${CI_JOB_ID}}
DOCKER_VOLUME_CONTAINER=${DOCKER_VOLUME}.gen_qa_model_repository.${CI_JOB_ID}
DOCKER_GPU_ARGS=${DOCKER_GPU_ARGS:-$([[ -v RUNNER_GPUS && $RUNNER_GPUS =~ ^[0-9] ]] && eval $NV_DOCKER_ARGS || echo "--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=$NVIDIA_VISIBLE_DEVICES" )}

function define_model_output_directories() {
    log_message.status "define: model output directories"
    TRITON_MDLS_BLD_DIR=${TRITON_MDLS_BLD_DIR:=/mnt/$CI_JOB_ID}
    TRITON_MDLS_SRC_DIR=${TRITON_MDLS_SRC_DIR:=$TRITON_MDLS_BLD_DIR/gen_srcdir}
    TRITON_MDLS_QA_MODEL=$TRITON_MDLS_BLD_DIR/$TRITON_VERSION/qa_model_repository
    TRITON_MDLS_QA_VARIABLE_MODEL=$TRITON_MDLS_BLD_DIR/$TRITON_VERSION/qa_variable_model_repository
    TRITON_MDLS_QA_IDENTITY_MODEL=$TRITON_MDLS_BLD_DIR/$TRITON_VERSION/qa_identity_model_repository
    TRITON_MDLS_QA_IDENTITY_BIG_MODEL=$TRITON_MDLS_BLD_DIR/$TRITON_VERSION/qa_identity_big_model_repository
    TRITON_MDLS_QA_SHAPETENSOR_MODEL=$TRITON_MDLS_BLD_DIR/$TRITON_VERSION/qa_shapetensor_model_repository
    TRITON_MDLS_QA_RESHAPE_MODEL=$TRITON_MDLS_BLD_DIR/$TRITON_VERSION/qa_reshape_model_repository
    TRITON_MDLS_QA_SEQUENCE_MODEL=$TRITON_MDLS_BLD_DIR/$TRITON_VERSION/qa_sequence_model_repository
    TRITON_MDLS_QA_DYNA_SEQUENCE_MODEL=$TRITON_MDLS_BLD_DIR/$TRITON_VERSION/qa_dyna_sequence_model_repository
    TRITON_MDLS_QA_DYNA_SEQUENCE_IMPLICIT_MODEL=$TRITON_MDLS_BLD_DIR/$TRITON_VERSION/qa_dyna_sequence_implicit_model_repository
    TRITON_MDLS_QA_VARIABLE_SEQUENCE_MODEL=$TRITON_MDLS_BLD_DIR/$TRITON_VERSION/qa_variable_sequence_model_repository
    TRITON_MDLS_QA_ENSEMBLE_MODEL=$TRITON_MDLS_BLD_DIR/$TRITON_VERSION/qa_ensemble_model_repository
    TRITON_MDLS_QA_NOSHAPE_MODEL=$TRITON_MDLS_BLD_DIR/$TRITON_VERSION/qa_noshape_model_repository
    TRITON_MDLS_QA_TRT_PLUGIN_MODEL=$TRITON_MDLS_BLD_DIR/$TRITON_VERSION/qa_trt_plugin_model_repository
    TRITON_MDLS_QA_RAGGED_MODEL=$TRITON_MDLS_BLD_DIR/$TRITON_VERSION/qa_ragged_model_repository
    TRITON_MDLS_QA_TRT_FORMAT_MODEL=$TRITON_MDLS_BLD_DIR/$TRITON_VERSION/qa_trt_format_model_repository
    TRITON_MDLS_QA_TRT_DATA_DEPENDENT_MODEL=$TRITON_MDLS_BLD_DIR/$TRITON_VERSION/qa_trt_data_dependent_model_repository
    TRITON_MDLS_QA_SEQUENCE_IMPLICIT_MODEL=$TRITON_MDLS_BLD_DIR/$TRITON_VERSION/qa_sequence_implicit_model_repository
    TRITON_MDLS_QA_VARIABLE_SEQUENCE_IMPLICIT_MODEL=$TRITON_MDLS_BLD_DIR/$TRITON_VERSION/qa_variable_sequence_implicit_model_repository
    TRITON_MDLS_QA_SEQUENCE_INITIAL_STATE_IMPLICIT_MODEL=$TRITON_MDLS_BLD_DIR/$TRITON_VERSION/qa_sequence_initial_state_implicit_model_repository
    TRITON_MDLS_QA_VARIABLE_SEQUENCE_INITIAL_STATE_IMPLICIT_MODEL=$TRITON_MDLS_BLD_DIR/$TRITON_VERSION/qa_variable_sequence_initial_state_implicit_model_repository
    TRITON_MDLS_QA_TORCHTRT_MODEL=$TRITON_MDLS_BLD_DIR/$TRITON_VERSION/torchtrt_model_store
    TRITON_MDLS_QA_SCALAR_MODELS=$TRITON_MDLS_BLD_DIR/$TRITON_VERSION/qa_scalar_models
    TRITON_MDLS_QA_DYNAMIC_BATCH_IMAGE_MODEL=$TRITON_MDLS_BLD_DIR/$TRITON_VERSION/qa_dynamic_batch_image_model_repository
    TRITON_MDLS_QA_CUSTOM_OPS=$TRITON_MDLS_BLD_DIR/$TRITON_VERSION/qa_custom_ops/libtorch_custom_ops
}

function define_models_generation_scripts() {
    log_message.status "define: script names"
    SCRIPT_NAME_SUFFIX=${SCRIPT_NAME_SUFFIX:-v2}

    OPENVINOSCRIPT=gen.OpenVINO.gen_qa_model_repository.${SCRIPT_NAME_SUFFIX}.sh
    log_message.info "OpenVINO script: " "${OPENVINOSCRIPT}"

    ONNXSCRIPT=gen.ONNXRuntime.gen_qa_model_repository.${SCRIPT_NAME_SUFFIX}.sh
    log_message.info "ONNX script: " "${ONNXSCRIPT}"

    TORCHSCRIPT=gen.PyTorch.gen_qa_model_repository.${SCRIPT_NAME_SUFFIX}.sh
    log_message.info "PyTorch script: " "${TORCHSCRIPT}"

    TRTSCRIPT=gen.TensorRT.gen_qa_model_repository.${SCRIPT_NAME_SUFFIX}.sh
    log_message.info "TensorRT script: " "${TRTSCRIPT}"

    log_message.status "create: OpenVINO script - ${OPENVINOSCRIPT}"
    cat > $OPENVINOSCRIPT <<EOF
#!/bin/bash
# Make all generated files accessible outside of container
umask 0000
nvidia-smi --query-gpu=compute_cap,compute_mode,driver_version,name,index --format=csv || true
nvidia-smi || true
echo -e "${COLOR_INFO}Generating OpenVINO models${COLOR_RESET}"
set -e
export DEBIAN_FRONTEND=noninteractive
apt-get update && \
    apt-get install -y --no-install-recommends \
        build-essential \
        cmake \
        libprotobuf-dev \
        protobuf-compiler \
        python3 \
        python3-dev \
        python3-pip \
        wget \
        gnupg2 \
        software-properties-common

ln -s /usr/bin/python3 /usr/bin/python

pip3 install  "numpy<=1.23.5" setuptools

pip3 install openvino==$OPENVINO_VERSION

# Since variable shape tensors are not allowed, identity models may fail to generate.
# TODO Add variable size tensor models after DLIS-2827 adds support for variable shape tensors.
# TODO Add sequence models after DLIS-2864 adds support for sequence/control inputs.
python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --openvino --models_dir=$TRITON_MDLS_QA_MODEL
chmod -R 777 $TRITON_MDLS_QA_MODEL
# python3 $TRITON_MDLS_SRC_DIR/gen_qa_identity_models.py --openvino --models_dir=$TRITON_MDLS_QA_IDENTITY_MODEL
# chmod -R 777 $TRITON_MDLS_QA_IDENTITY_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_reshape_models.py --openvino --models_dir=$TRITON_MDLS_QA_RESHAPE_MODEL
chmod -R 777 $TRITON_MDLS_QA_RESHAPE_MODEL
# python3 $TRITON_MDLS_SRC_DIR/gen_qa_sequence_models.py --openvino --models_dir=$TRITON_MDLS_QA_SEQUENCE_MODEL
# chmod -R 777 $SVOLUME_EQDESTDIR
# python3 $TRITON_MDLS_SRC_DIR/gen_qa_dyna_sequence_models.py --openvino --models_dir=$TRITON_MDLS_QA_DYNA_SEQUENCE_MODEL
# chmod -R 777 $TRITON_MDLS_QA_DYNA_SEQUENCE_MODEL
exit 0
EOF

    log_message.status "run: chmod a+x ${OPENVINOSCRIPT}"
    chmod a+x $OPENVINOSCRIPT

    if [ $? -ne 0 ]; then
        log_message.error "failed: chmod ${OPENVINOSCRIPT}"
        exit 1
    fi

    log_message.status "create: ONNX script - ${ONNXSCRIPT}"
    cat > $ONNXSCRIPT <<EOF
#!/bin/bash
# Make all generated files accessible outside of container
umask 0000
nvidia-smi --query-gpu=compute_cap,compute_mode,driver_version,name,index --format=csv || true
nvidia-smi || true
echo -e "${COLOR_INFO}Generating ONNX models${COLOR_RESET}"
set -e
export DEBIAN_FRONTEND=noninteractive
apt-get update && \
        apt-get install -y --no-install-recommends build-essential cmake libprotobuf-dev \
                protobuf-compiler python3 python3-dev python3-pip
ln -s /usr/bin/python3 /usr/bin/python

pip3 install "protobuf<=3.20.1"  "numpy<=1.23.5" # TODO: Remove current line DLIS-3838
pip3 install --upgrade onnx==${ONNX_VERSION}

python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --onnx --onnx_opset=$ONNX_OPSET --models_dir=$TRITON_MDLS_QA_MODEL
chmod -R 777 $TRITON_MDLS_QA_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --onnx --onnx_opset=$ONNX_OPSET --variable --models_dir=$TRITON_MDLS_QA_VARIABLE_MODEL
chmod -R 777 $TRITON_MDLS_QA_VARIABLE_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_identity_models.py --onnx --onnx_opset=$ONNX_OPSET --models_dir=$TRITON_MDLS_QA_IDENTITY_MODEL
chmod -R 777 $TRITON_MDLS_QA_IDENTITY_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_reshape_models.py --onnx --onnx_opset=$ONNX_OPSET --variable --models_dir=$TRITON_MDLS_QA_RESHAPE_MODEL
chmod -R 777 $TRITON_MDLS_QA_RESHAPE_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_sequence_models.py --onnx --onnx_opset=$ONNX_OPSET --models_dir=$TRITON_MDLS_QA_SEQUENCE_MODEL
chmod -R 777 $TRITON_MDLS_QA_SEQUENCE_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_sequence_models.py --onnx --onnx_opset=$ONNX_OPSET --variable --models_dir=$TRITON_MDLS_QA_VARIABLE_SEQUENCE_MODEL
chmod -R 777 $TRITON_MDLS_QA_VARIABLE_SEQUENCE_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_implicit_models.py --onnx --initial-state zero --onnx_opset=$ONNX_OPSET --models_dir=$TRITON_MDLS_QA_SEQUENCE_INITIAL_STATE_IMPLICIT_MODEL
chmod -R 777 $TRITON_MDLS_QA_SEQUENCE_INITIAL_STATE_IMPLICIT_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_implicit_models.py --onnx --initial-state zero --onnx_opset=$ONNX_OPSET --variable --models_dir=$TRITON_MDLS_QA_VARIABLE_SEQUENCE_INITIAL_STATE_IMPLICIT_MODEL
chmod -R 777 $TRITON_MDLS_QA_VARIABLE_SEQUENCE_INITIAL_STATE_IMPLICIT_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_implicit_models.py --onnx --onnx_opset=$ONNX_OPSET --models_dir=$TRITON_MDLS_QA_SEQUENCE_IMPLICIT_MODEL
chmod -R 777 $TRITON_MDLS_QA_SEQUENCE_IMPLICIT_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_implicit_models.py --onnx --onnx_opset=$ONNX_OPSET --variable --models_dir=$TRITON_MDLS_QA_VARIABLE_SEQUENCE_IMPLICIT_MODEL
chmod -R 777 $TRITON_MDLS_QA_VARIABLE_SEQUENCE_IMPLICIT_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_dyna_sequence_models.py --onnx --onnx_opset=$ONNX_OPSET --models_dir=$TRITON_MDLS_QA_DYNA_SEQUENCE_MODEL
chmod -R 777 $TRITON_MDLS_QA_DYNA_SEQUENCE_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_dyna_sequence_implicit_models.py --onnx --onnx_opset=$ONNX_OPSET --models_dir=$TRITON_MDLS_QA_DYNA_SEQUENCE_IMPLICIT_MODEL
chmod -R 777 $TRITON_MDLS_QA_DYNA_SEQUENCE_IMPLICIT_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_ragged_models.py --onnx --onnx_opset=$ONNX_OPSET --models_dir=$TRITON_MDLS_QA_RAGGED_MODEL
chmod -R 777 $TRITON_MDLS_QA_RAGGED_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_ort_scalar_models.py --onnx_opset=$ONNX_OPSET --models_dir=$TRITON_MDLS_QA_SCALAR_MODELS
chmod -R 777 $TRITON_MDLS_QA_SCALAR_MODELS
python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --ensemble --models_dir=$TRITON_MDLS_QA_ENSEMBLE_MODEL/qa_model_repository
python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --ensemble --variable --models_dir=$TRITON_MDLS_QA_ENSEMBLE_MODEL/qa_variable_model_repository
python3 $TRITON_MDLS_SRC_DIR/gen_qa_reshape_models.py --ensemble --models_dir=$TRITON_MDLS_QA_ENSEMBLE_MODEL/qa_reshape_model_repository
python3 $TRITON_MDLS_SRC_DIR/gen_qa_identity_models.py --ensemble --models_dir=$TRITON_MDLS_QA_ENSEMBLE_MODEL/qa_identity_model_repository
python3 $TRITON_MDLS_SRC_DIR/gen_qa_sequence_models.py --ensemble --models_dir=$TRITON_MDLS_QA_ENSEMBLE_MODEL/qa_sequence_model_repository
python3 $TRITON_MDLS_SRC_DIR/gen_qa_sequence_models.py --ensemble --variable --models_dir=$TRITON_MDLS_QA_ENSEMBLE_MODEL/qa_variable_sequence_model_repository
chmod -R 777 $TRITON_MDLS_QA_ENSEMBLE_MODEL
exit 0
EOF

    log_message.status "run: chmod a+x ${ONNXSCRIPT}"
    chmod a+x ${ONNXSCRIPT}
    if [ $? -ne 0 ]; then
        log_message.error "failed: chmod ${ONNXSCRIPT}"
        exit 1
    fi

    log_message.status "create: PyTorch script - ${TORCHSCRIPT}"
    cat > $TORCHSCRIPT <<EOF
#!/bin/bash
# Make all generated files accessible outside of container
umask 0000
nvidia-smi --query-gpu=compute_cap,compute_mode,driver_version,name,index --format=csv || true
nvidia-smi || true
echo -e "${COLOR_INFO}Generating PyTorch models${COLOR_RESET}"
pip3 install onnxscript
set -e
PATH=$PATH:/usr/local/cuda-13.0/bin
python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --libtorch --models_dir=$TRITON_MDLS_QA_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --torch-aoti --models_dir=$TRITON_MDLS_QA_MODEL
chmod -R 777 $TRITON_MDLS_QA_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --libtorch --variable --models_dir=$TRITON_MDLS_QA_VARIABLE_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --torch-aoti --variable --models_dir=$TRITON_MDLS_QA_VARIABLE_MODEL
chmod -R 777 $TRITON_MDLS_QA_VARIABLE_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_identity_models.py --libtorch --models_dir=$TRITON_MDLS_QA_IDENTITY_MODEL
chmod -R 777 $TRITON_MDLS_QA_IDENTITY_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_reshape_models.py --libtorch --variable --models_dir=$TRITON_MDLS_QA_RESHAPE_MODEL
chmod -R 777 $TRITON_MDLS_QA_RESHAPE_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_sequence_models.py --libtorch --models_dir=$TRITON_MDLS_QA_SEQUENCE_MODEL
chmod -R 777 $TRITON_MDLS_QA_SEQUENCE_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_sequence_models.py --libtorch --variable --models_dir=$TRITON_MDLS_QA_VARIABLE_SEQUENCE_MODEL
chmod -R 777 $TRITON_MDLS_QA_VARIABLE_SEQUENCE_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_implicit_models.py --libtorch --models_dir=$TRITON_MDLS_QA_SEQUENCE_IMPLICIT_MODEL
chmod -R 777 $TRITON_MDLS_QA_SEQUENCE_IMPLICIT_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_implicit_models.py --libtorch --variable --models_dir=$TRITON_MDLS_QA_VARIABLE_SEQUENCE_IMPLICIT_MODEL
chmod -R 777 $TRITON_MDLS_QA_VARIABLE_SEQUENCE_IMPLICIT_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_dyna_sequence_models.py --libtorch --models_dir=$TRITON_MDLS_QA_DYNA_SEQUENCE_MODEL
chmod -R 777 $TRITON_MDLS_QA_DYNA_SEQUENCE_MODEL
if [ -z "$MODEL_TYPE" ] || [ "$MODEL_TYPE" != "igpu" ]; then
  nvidia-smi --query-gpu=compute_cap | grep -qz 12.1 && echo -e '${COLOR_WARNING}[WARNING]${COLOR_RESET} Skipping model generation for Torch TensorRT${COLOR_RESET}' || python3 $TRITON_MDLS_SRC_DIR/gen_qa_torchtrt_models.py --models_dir=$TRITON_MDLS_QA_TORCHTRT_MODEL
  chmod -R 777 $TRITON_MDLS_QA_TORCHTRT_MODEL
fi
python3 $TRITON_MDLS_SRC_DIR/gen_qa_ragged_models.py --libtorch --models_dir=$TRITON_MDLS_QA_RAGGED_MODEL
chmod -R 777 $TRITON_MDLS_QA_RAGGED_MODEL
# Export torchvision image models to ONNX
python3 $TRITON_MDLS_SRC_DIR/gen_qa_image_models.py --resnet50 --resnet152 --vgg19 --models_dir=$TRITON_MDLS_QA_DYNAMIC_BATCH_IMAGE_MODEL
chmod -R 777 $TRITON_MDLS_QA_DYNAMIC_BATCH_IMAGE_MODEL

export TORCH_EXTENSIONS_DIR=/tmp/.cache/torch_extensions/
mkdir -p \${TORCH_EXTENSIONS_DIR}
python3 $TRITON_MDLS_SRC_DIR/gen_qa_custom_ops_models.py --libtorch --models_dir=$TRITON_MDLS_QA_CUSTOM_OPS
mkdir -p $TRITON_MDLS_QA_CUSTOM_OPS/libtorch_modulo/
cp \${TORCH_EXTENSIONS_DIR}/custom_modulo/custom_modulo.so $TRITON_MDLS_QA_CUSTOM_OPS/libtorch_modulo/.
chmod -R 777 $TRITON_MDLS_QA_CUSTOM_OPS
exit 0
EOF

    log_message.status "run: chmod a+x ${TORCHSCRIPT}"
    chmod a+x ${TORCHSCRIPT}
    if [ $? -ne 0 ]; then
        log_message.error "failed: chmod ${TORCHSCRIPT}"
        exit 1
    fi

    log_message.status "create: TensorRT script - ${TRTSCRIPT}"
    cat > $TRTSCRIPT <<EOF
#!/bin/bash
# Make all generated files accessible outside of container
umask 0000
nvidia-smi --query-gpu=compute_cap,compute_mode,driver_version,name,index --format=csv || true
nvidia-smi || true
echo -e "${COLOR_INFO}Generating TensorRT models${COLOR_RESET}"
set -e
dpkg -l | grep TensorRT
export TRT_SUPPRESS_DEPRECATION_WARNINGS=1
# Models using shape tensor i/o
python3 $TRITON_MDLS_SRC_DIR/gen_qa_identity_models.py --tensorrt-shape-io --models_dir=$TRITON_MDLS_QA_SHAPETENSOR_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_sequence_models.py --tensorrt-shape-io --models_dir=$TRITON_MDLS_QA_SHAPETENSOR_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_dyna_sequence_models.py --tensorrt-shape-io --models_dir=$TRITON_MDLS_QA_SHAPETENSOR_MODEL
chmod -R 777 $TRITON_MDLS_QA_SHAPETENSOR_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --tensorrt --models_dir=$TRITON_MDLS_QA_MODEL
chmod -R 777 $TRITON_MDLS_QA_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --tensorrt --variable --models_dir=$TRITON_MDLS_QA_VARIABLE_MODEL
chmod -R 777 $TRITON_MDLS_QA_VARIABLE_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_identity_models.py --tensorrt --models_dir=$TRITON_MDLS_QA_IDENTITY_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_identity_models.py --tensorrt-compat --models_dir=$TRITON_MDLS_QA_IDENTITY_MODEL
chmod -R 777 $TRITON_MDLS_QA_IDENTITY_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_identity_models.py --tensorrt-big --models_dir=$TRITON_MDLS_QA_IDENTITY_BIG_MODEL
chmod -R 777 $TRITON_MDLS_QA_IDENTITY_BIG_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_reshape_models.py --tensorrt --variable --models_dir=$TRITON_MDLS_QA_RESHAPE_MODEL
chmod -R 777 $TRITON_MDLS_QA_RESHAPE_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_sequence_models.py --tensorrt --models_dir=$TRITON_MDLS_QA_SEQUENCE_MODEL
chmod -R 777 $TRITON_MDLS_QA_SEQUENCE_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_implicit_models.py --tensorrt --models_dir=$TRITON_MDLS_QA_SEQUENCE_IMPLICIT_MODEL
chmod -R 777 $TRITON_MDLS_QA_SEQUENCE_IMPLICIT_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_implicit_models.py --tensorrt --variable --models_dir=$TRITON_MDLS_QA_VARIABLE_SEQUENCE_IMPLICIT_MODEL
chmod -R 777 $TRITON_MDLS_QA_VARIABLE_SEQUENCE_IMPLICIT_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_dyna_sequence_models.py --tensorrt --models_dir=$TRITON_MDLS_QA_DYNA_SEQUENCE_MODEL
chmod -R 777 $TRITON_MDLS_QA_DYNA_SEQUENCE_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_sequence_models.py --tensorrt --variable --models_dir=$TRITON_MDLS_QA_VARIABLE_SEQUENCE_MODEL
chmod -R 777 $TRITON_MDLS_QA_VARIABLE_SEQUENCE_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_dyna_sequence_implicit_models.py --tensorrt --models_dir=$TRITON_MDLS_QA_DYNA_SEQUENCE_IMPLICIT_MODEL
chmod -R 777 $TRITON_MDLS_QA_DYNA_SEQUENCE_IMPLICIT_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_ragged_models.py --tensorrt --models_dir=$TRITON_MDLS_QA_RAGGED_MODEL
chmod -R 777 $TRITON_MDLS_QA_RAGGED_MODEL
python3 $TRITON_MDLS_SRC_DIR/gen_qa_trt_format_models.py --models_dir=$TRITON_MDLS_QA_TRT_FORMAT_MODEL
chmod -R 777 $TRITON_MDLS_QA_TRT_FORMAT_MODEL
nvidia-smi --query-gpu=compute_cap | grep -qz 11.0 && echo -e '${COLOR_WARNING}[WARNING]${COLOR_RESET} Skipping model generation for data dependent shape${COLOR_RESET}' || python3 $TRITON_MDLS_SRC_DIR/gen_qa_trt_data_dependent_shape.py --models_dir=$TRITON_MDLS_QA_TRT_DATA_DEPENDENT_MODEL
chmod -R 777 $TRITON_MDLS_QA_TRT_DATA_DEPENDENT_MODEL
# Make shared library for custom Hardmax plugin.
if [ -d "/usr/src/tensorrt/samples/python/onnx_custom_plugin" ]; then
    cd /usr/src/tensorrt/samples/python/onnx_custom_plugin
else
    TRT_BRANCH=\$(echo \$TRT_VERSION | cut -d . -f -2)
    if ! git clone -b release/\${TRT_BRANCH} --depth 1 https://github.com/NVIDIA/TensorRT.git /workspace/TensorRT; then
      MAJOR=\$(echo "\$TRT_BRANCH" | cut -d . -f 1)
      MINOR=\$(echo "\$TRT_BRANCH" | cut -d . -f 2)
      if [ -n "\$MINOR" ] && [ "\$MINOR" -gt 0 ] 2>/dev/null; then
        TRT_BRANCH="\${MAJOR}.\$((MINOR - 1))"
        echo "Fallback: cloning TensorRT release/\${TRT_BRANCH} (previous minor)"
        git clone -b release/\${TRT_BRANCH} --depth 1 https://github.com/NVIDIA/TensorRT.git /workspace/TensorRT
      else
        exit 1
      fi
    fi
    cd /workspace/TensorRT/samples/python/onnx_custom_plugin
fi
rm -rf build && mkdir build && \
cd build && cmake .. && make -j && cp libcustomHardmaxPlugin.so $TRITON_MDLS_QA_TRT_PLUGIN_MODEL/.
LD_PRELOAD=$TRITON_MDLS_QA_TRT_PLUGIN_MODEL/libcustomHardmaxPlugin.so python3 $TRITON_MDLS_SRC_DIR/gen_qa_trt_plugin_models.py --models_dir=$TRITON_MDLS_QA_TRT_PLUGIN_MODEL
chmod -R 777 $TRITON_MDLS_QA_TRT_PLUGIN_MODEL
exit 0
EOF

    log_message.status "run: chmod a+x ${TRTSCRIPT}"
    chmod a+x ${TRTSCRIPT}
    if [ $? -ne 0 ]; then
        log_message.error "failed: chmod ${TRTSCRIPT}"
        exit 1
    fi

}

log_message.status "check: engine installation"
if [ "$TRITON_MODELS_USE_DOCKER" -eq 1 ] && which docker ; then
    log_message.info "Docker is installed."

    define_model_output_directories

    SCRIPT_NAME_SUFFIX=docker.v2 define_models_generation_scripts

    if ! docker volume inspect $DOCKER_VOLUME > /dev/null 2>&1; then
        log_message.status "docker volume: $DOCKER_VOLUME does not exist. Creating..."
        docker volume create $DOCKER_VOLUME --label RUNNER_ID=$RUNNER_ID --label PROJECT_NAME=$PROJECT_NAME
        log_message.status "docker volume: $DOCKER_VOLUME created"
        docker volume inspect $DOCKER_VOLUME
    else
        log_message.status "docker volume: $DOCKER_VOLUME in use"
        docker volume inspect $DOCKER_VOLUME
    fi

    log_message.status "docker pull: $UBUNTU_IMAGE"
    docker pull $UBUNTU_IMAGE

    log_message.status "docker volume: create destination directory on volume"
    log_message.info "docker run -v $DOCKER_VOLUME:/mnt -w /mnt/$CI_JOB_ID $UBUNTU_IMAGE mkdir -p gen_srcdir ${TRITON_VERSION}"
    docker run \
        --rm \
        --label RUNNER_ID=$RUNNER_ID \
        --label PROJECT_NAME=$PROJECT_NAME \
        -v $DOCKER_VOLUME:/mnt \
        -w /mnt/$CI_JOB_ID \
        $UBUNTU_IMAGE \
        mkdir -p gen_srcdir ${TRITON_VERSION}

    log_message.status "docker volume: create model directories on volume"
    docker run \
        --rm \
        --label RUNNER_ID=$RUNNER_ID \
        --label PROJECT_NAME=$PROJECT_NAME \
        -v $DOCKER_VOLUME:/mnt \
        -w /mnt/$CI_JOB_ID \
        $UBUNTU_IMAGE \
        mkdir -p \
        $TRITON_MDLS_BLD_DIR \
        $TRITON_MDLS_SRC_DIR \
        $TRITON_MDLS_QA_MODEL \
        $TRITON_MDLS_QA_VARIABLE_MODEL \
        $TRITON_MDLS_QA_IDENTITY_MODEL \
        $TRITON_MDLS_QA_IDENTITY_BIG_MODEL \
        $TRITON_MDLS_QA_SHAPETENSOR_MODEL \
        $TRITON_MDLS_QA_RESHAPE_MODEL \
        $TRITON_MDLS_QA_SEQUENCE_MODEL \
        $TRITON_MDLS_QA_DYNA_SEQUENCE_MODEL \
        $TRITON_MDLS_QA_DYNA_SEQUENCE_IMPLICIT_MODEL \
        $TRITON_MDLS_QA_VARIABLE_SEQUENCE_MODEL \
        $TRITON_MDLS_QA_ENSEMBLE_MODEL \
        $TRITON_MDLS_QA_NOSHAPE_MODEL \
        $TRITON_MDLS_QA_TRT_PLUGIN_MODEL \
        $TRITON_MDLS_QA_RAGGED_MODEL \
        $TRITON_MDLS_QA_TRT_FORMAT_MODEL \
        $TRITON_MDLS_QA_TRT_DATA_DEPENDENT_MODEL \
        $TRITON_MDLS_QA_SEQUENCE_IMPLICIT_MODEL \
        $TRITON_MDLS_QA_VARIABLE_SEQUENCE_IMPLICIT_MODEL \
        $TRITON_MDLS_QA_SEQUENCE_INITIAL_STATE_IMPLICIT_MODEL \
        $TRITON_MDLS_QA_VARIABLE_SEQUENCE_INITIAL_STATE_IMPLICIT_MODEL \
        $TRITON_MDLS_QA_TORCHTRT_MODEL \
        $TRITON_MDLS_QA_SCALAR_MODELS \
        $TRITON_MDLS_QA_DYNAMIC_BATCH_IMAGE_MODEL

    log_message.status "docker container: create container $DOCKER_VOLUME_CONTAINER"
    log_message.info "docker create --name $DOCKER_VOLUME_CONTAINER -v $DOCKER_VOLUME:/mnt -w /mnt/$CI_JOB_ID $UBUNTU_IMAGE"
    docker create \
        --label RUNNER_ID=$RUNNER_ID \
        --label PROJECT_NAME=$PROJECT_NAME \
        --name $DOCKER_VOLUME_CONTAINER \
        -v $DOCKER_VOLUME:/mnt \
        -w /mnt/$CI_JOB_ID \
        $UBUNTU_IMAGE

    log_message.status "docker container: copy script to container"
    docker cp . $DOCKER_VOLUME_CONTAINER:/mnt/$CI_JOB_ID/gen_srcdir

    if [[ "aarch64" != $(uname -m) ]] ; then

        log_message.status "docker run: $OPENVINOSCRIPT"
        log_message.info "docker run $DOCKER_GPU_ARGS -v $DOCKER_VOLUME:/mnt $UBUNTU_IMAGE bash -e $TRITON_MDLS_SRC_DIR/$OPENVINOSCRIPT"
        docker run \
            --rm \
            -e TRITON_GENSRCDIR=$TRITON_MDLS_SRC_DIR \
            --label RUNNER_ID=$RUNNER_ID \
            --label PROJECT_NAME=$PROJECT_NAME \
            $DOCKER_GPU_ARGS \
            -v $DOCKER_VOLUME:/mnt \
            -t \
            $UBUNTU_IMAGE \
            bash -e $TRITON_MDLS_SRC_DIR/$OPENVINOSCRIPT

        exit_code=$?

        if [ $exit_code -ne 0 ]; then
            log_message.error "docker run: ${OPENVINOSCRIPT} failed"
            exit 1
        fi

        rm $OPENVINOSCRIPT
    fi # [[ "aarch64" != $(uname -m) ]]

    log_message.status "docker run: $ONNXSCRIPT"
    log_message.info "docker run $DOCKER_GPU_ARGS -v $DOCKER_VOLUME:/mnt $UBUNTU_IMAGE bash -e $TRITON_MDLS_SRC_DIR/$ONNXSCRIPT"
    docker run \
        --rm \
        -e TRITON_GENSRCDIR=$TRITON_MDLS_SRC_DIR \
        --label RUNNER_ID=$RUNNER_ID \
        --label PROJECT_NAME=$PROJECT_NAME \
        $DOCKER_GPU_ARGS \
        -v $DOCKER_VOLUME:/mnt \
        -t \
        $UBUNTU_IMAGE \
        bash -e $TRITON_MDLS_SRC_DIR/$ONNXSCRIPT

    exit_code=$?

    if [ $exit_code -ne 0 ]; then
        log_message.error "docker run: ${ONNXSCRIPT} failed"
        exit 1
    fi

    rm $ONNXSCRIPT

    log_message.status "docker pull: $PYTORCH_IMAGE"
    log_message.info "docker pull $PYTORCH_IMAGE"
    docker pull $PYTORCH_IMAGE

    log_message.status "docker run: $TORCHSCRIPT"
    log_message.info "docker run $DOCKER_GPU_ARGS -v $DOCKER_VOLUME:/mnt $PYTORCH_IMAGE bash -e $TRITON_MDLS_SRC_DIR/$TORCHSCRIPT"
    docker run \
        --rm \
        -e TRITON_GENSRCDIR=$TRITON_MDLS_SRC_DIR \
        --label RUNNER_ID=$RUNNER_ID \
        --label PROJECT_NAME=$PROJECT_NAME \
        $DOCKER_GPU_ARGS \
        -v $DOCKER_VOLUME:/mnt \
        -t \
        $PYTORCH_IMAGE \
        bash -e $TRITON_MDLS_SRC_DIR/$TORCHSCRIPT

    exit_code=$?

    if [ $exit_code -ne 0 ]; then
        log_message.error "docker run: ${TORCHSCRIPT} failed"
        exit 1
    fi

    rm $TORCHSCRIPT

    if [ "$MODEL_TYPE" != "igpu" ] ; then
        log_message.status "docker pull: $TENSORRT_IMAGE"
        docker pull $TENSORRT_IMAGE

        log_message.status "docker run: $TRTSCRIPT"
        log_message.info "docker run $DOCKER_GPU_ARGS -v $DOCKER_VOLUME:/mnt $TENSORRT_IMAGE bash -e $TRITON_MDLS_SRC_DIR/$TRTSCRIPT"
        docker run \
            --rm \
            -e TRITON_GENSRCDIR=$TRITON_MDLS_SRC_DIR \
            --label RUNNER_ID=$RUNNER_ID \
            --label PROJECT_NAME=$PROJECT_NAME \
            $DOCKER_GPU_ARGS \
            -v $DOCKER_VOLUME:/mnt \
            -t \
            -e TRT_VERBOSE \
            $TENSORRT_IMAGE \
            bash -e $TRITON_MDLS_SRC_DIR/$TRTSCRIPT

        exit_code=$?

        if [ $exit_code -ne 0 ]; then
            log_message.error "docker run: ${TRTSCRIPT} failed"
            exit 1
        fi

        rm $TRTSCRIPT
    fi # [ "$MODEL_TYPE" != "igpu" ]

    if [ -z $CI ] ; then
        log_message.status "docker cp:copying generated models to /tmp/"
        docker cp $DOCKER_VOLUME_CONTAINER:$TRITON_MDLS_BLD_DIR/$TRITON_VERSION /tmp/
        log_message.status "docker rm: removing docker container $DOCKER_VOLUME_CONTAINER"
        docker rm -f $(docker ps -a --filter volume=$DOCKER_VOLUME --format '{{ .ID }}')
        log_message.status "docker volume rm: removing docker volume $DOCKER_VOLUME"
        docker volume rm $DOCKER_VOLUME
    fi # [ -z $CI ]

elif [ "$TRITON_MODELS_USE_ENROOT" -eq 1 ] && which enroot ; then
    log_message.info "NVIDIA Enroot is installed." ;

    TRITON_MDLS_BLD_DIR="/tmp/$CI_JOB_ID" define_model_output_directories

    SCRIPT_NAME_SUFFIX=enroot.v1 define_models_generation_scripts

    log_message.status "cleanup models folder if exists: $TRITON_MDLS_BLD_DIR"
    rm -rf $TRITON_MDLS_BLD_DIR

    log_message.status "create models directory structure in: $TRITON_MDLS_BLD_DIR"
     mkdir -p \
        $TRITON_MDLS_BLD_DIR \
        $TRITON_MDLS_SRC_DIR \
        $TRITON_MDLS_QA_MODEL \
        $TRITON_MDLS_QA_VARIABLE_MODEL \
        $TRITON_MDLS_QA_IDENTITY_MODEL \
        $TRITON_MDLS_QA_IDENTITY_BIG_MODEL \
        $TRITON_MDLS_QA_SHAPETENSOR_MODEL \
        $TRITON_MDLS_QA_RESHAPE_MODEL \
        $TRITON_MDLS_QA_SEQUENCE_MODEL \
        $TRITON_MDLS_QA_DYNA_SEQUENCE_MODEL \
        $TRITON_MDLS_QA_DYNA_SEQUENCE_IMPLICIT_MODEL \
        $TRITON_MDLS_QA_VARIABLE_SEQUENCE_MODEL \
        $TRITON_MDLS_QA_ENSEMBLE_MODEL \
        $TRITON_MDLS_QA_NOSHAPE_MODEL \
        $TRITON_MDLS_QA_TRT_PLUGIN_MODEL \
        $TRITON_MDLS_QA_RAGGED_MODEL \
        $TRITON_MDLS_QA_TRT_FORMAT_MODEL \
        $TRITON_MDLS_QA_TRT_DATA_DEPENDENT_MODEL \
        $TRITON_MDLS_QA_SEQUENCE_IMPLICIT_MODEL \
        $TRITON_MDLS_QA_VARIABLE_SEQUENCE_IMPLICIT_MODEL \
        $TRITON_MDLS_QA_SEQUENCE_INITIAL_STATE_IMPLICIT_MODEL \
        $TRITON_MDLS_QA_VARIABLE_SEQUENCE_INITIAL_STATE_IMPLICIT_MODEL \
        $TRITON_MDLS_QA_TORCHTRT_MODEL \
        $TRITON_MDLS_QA_SCALAR_MODELS \
        $TRITON_MDLS_QA_DYNAMIC_BATCH_IMAGE_MODEL

    log_message.status "copy scripts to: $TRITON_MDLS_SRC_DIR"
    cp -rv $TRITON_MDLS_BASE_SCRIPT_DIR/* $TRITON_MDLS_SRC_DIR/

    log_message.status "enroot import: $UBUNTU_IMAGE to ubuntu.$CI_JOB_ID.enroot.sqsh"
    enroot import --output /tmp/ubuntu.$CI_JOB_ID.enroot.sqsh docker://$UBUNTU_IMAGE


    log_message.status "enroot create: openvino.ubuntu.$CI_JOB_ID"
    enroot create --name openvino.ubuntu.$CI_JOB_ID /tmp/ubuntu.$CI_JOB_ID.enroot.sqsh
    log_message.info "enroot start: openvino.ubuntu.$CI_JOB_ID"
    enroot start --root --rw -m /tmp:/tmp openvino.ubuntu.$CI_JOB_ID bash -xe $TRITON_MDLS_SRC_DIR/$OPENVINOSCRIPT
    if [ $? -ne 0 ]; then
        log_message.error "enroot start: ${OPENVINOSCRIPT} failed"
        exit 1
    fi


    log_message.status "enroot create: onnxruntime.ubuntu.$CI_JOB_ID"
    enroot create --name onnxruntime.ubuntu.$CI_JOB_ID /tmp/ubuntu.$CI_JOB_ID.enroot.sqsh
    log_message.info "enroot start: onnxruntime.ubuntu.$CI_JOB_ID"
    enroot start --root --rw -m /tmp:/tmp onnxruntime.ubuntu.$CI_JOB_ID bash -xe $TRITON_MDLS_SRC_DIR/$ONNXSCRIPT
    if [ $? -ne 0 ]; then
        log_message.error "enroot start: ${ONNXSCRIPT} failed"
        exit 1
    fi


    log_message.status "enroot import: $PYTORCH_IMAGE to /tmp/pytorch.$CI_JOB_ID.enroot.sqsh"
    enroot import --output /tmp/pytorch.$CI_JOB_ID.enroot.sqsh docker://$PYTORCH_IMAGE
    if [ $? -ne 0 ]; then
        log_message.error "enroot import: ${PYTORCH_IMAGE} failed"
        exit 1
    fi

    log_message.status "enroot create: pytorch.$CI_JOB_ID"
    enroot create --name pytorch.$CI_JOB_ID /tmp/pytorch.$CI_JOB_ID.enroot.sqsh
    log_message.info "enroot start: pytorch.$CI_JOB_ID"
    enroot start --rw -m /tmp:/tmp pytorch.$CI_JOB_ID bash -xe $TRITON_MDLS_SRC_DIR/$TORCHSCRIPT
    if [ $? -ne 0 ]; then
        log_message.error "enroot start: ${TORCHSCRIPT} failed"
        exit 1
    fi

    log_message.status "enroot import: $TENSORRT_IMAGE to /tmp/tensorrt.$CI_JOB_ID.enroot.sqsh"
    enroot import --output /tmp/tensorrt.$CI_JOB_ID.enroot.sqsh docker://$TENSORRT_IMAGE
    log_message.status "enroot create: tensorrt.$CI_JOB_ID"
    enroot create --name tensorrt.$CI_JOB_ID /tmp/tensorrt.$CI_JOB_ID.enroot.sqsh
    log_message.info "enroot start: tensorrt.$CI_JOB_ID"
    enroot start --rw -m /tmp:/tmp tensorrt.$CI_JOB_ID bash -xe $TRITON_MDLS_SRC_DIR/$TRTSCRIPT
    if [ $? -ne 0 ]; then
        log_message.error "enroot start: ${TRTSCRIPT} failed"
        exit 1
    fi

else
    log_message.warning "Neither Docker nor NVIDIA Enroot is installed." ;
    log_message.warning "Please install Docker or NVIDIA Enroot to generate the models." ;
fi


================================================
FILE: qa/common/gen_qa_models.py
================================================
#!/usr/bin/env python3

# Copyright 2018-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import os
import sys
from builtins import range

import gen_ensemble_model_utils as emu
import numpy as np
from gen_common import (
    np_dtype_bfloat16,
    np_to_model_dtype,
    np_to_onnx_dtype,
    np_to_torch_dtype,
    np_to_trt_dtype,
    openvino_save_model,
)

FLAGS = None
np_dtype_string = np.dtype(object)
from typing import List, Tuple

_color_blue = "\033[94m"
_color_green = "\033[32m"
_color_magenta = "\033[35m"
_color_red = "\033[31m"
_color_reset = "\033[0m"
_color_yellow = "\033[33m"


def create_plan_dynamic_rf_modelfile(
    models_dir,
    max_batch,
    model_version,
    input_shape,
    output0_shape,
    output1_shape,
    input_dtype,
    output0_dtype,
    output1_dtype,
    swap,
    min_dim,
    max_dim,
):
    trt_input_dtype = np_to_trt_dtype(input_dtype)
    trt_output0_dtype = np_to_trt_dtype(output0_dtype)
    trt_output1_dtype = np_to_trt_dtype(output1_dtype)
    trt_memory_format = trt.TensorFormat.LINEAR

    # Create the model
    TRT_LOGGER = (
        trt.Logger(trt.Logger.INFO)
        if os.environ.get("TRT_VERBOSE") != "1"
        else trt.Logger(trt.Logger.VERBOSE)
    )
    builder = trt.Builder(TRT_LOGGER)
    network = builder.create_network()
    if max_batch == 0:
        input_with_batchsize = [i for i in input_shape]
    else:
        input_with_batchsize = [-1] + [i for i in input_shape]

    in0 = network.add_input("INPUT0", trt_input_dtype, input_with_batchsize)
    in1 = network.add_input("INPUT1", trt_input_dtype, input_with_batchsize)

    # TRT uint8 cannot be used to represent quantized floating-point value yet
    # uint8 must be converted to float16 or float32 before any operation
    # FIXME: Remove support check when jetson supports TRT 8.5 (DLIS-4256)
    if tu.support_trt_uint8():
        if trt_input_dtype == trt.uint8:
            in0_cast = network.add_identity(in0)
            in0_cast.set_output_type(0, trt.float32)
            in0 = in0_cast.get_output(0)
            in1_cast = network.add_identity(in1)
            in1_cast.set_output_type(0, trt.float32)
            in1 = in1_cast.get_output(0)

    add = network.add_elementwise(in0, in1, trt.ElementWiseOperation.SUM)
    sub = network.add_elementwise(in0, in1, trt.ElementWiseOperation.SUB)
    out0 = add if not swap else sub
    out1 = sub if not swap else add

    # uint8 conversion after operations
    # FIXME: Remove support check when jetson supports TRT 8.5 (DLIS-4256)
    if tu.support_trt_uint8():
        if trt_output0_dtype == trt.uint8:
            out0 = network.add_identity(out0.get_output(0))
            out0.set_output_type(0, trt.uint8)
        if trt_output1_dtype == trt.uint8:
            out1 = network.add_identity(out1.get_output(0))
            out1.set_output_type(0, trt.uint8)

    out0.get_output(0).name = "OUTPUT0"
    out1.get_output(0).name = "OUTPUT1"
    network.mark_output(out0.get_output(0))
    network.mark_output(out1.get_output(0))

    out0.get_output(0).dtype = trt_output0_dtype
    out1.get_output(0).dtype = trt_output1_dtype

    in0.allowed_formats = 1 << int(trt_memory_format)
    in1.allowed_formats = 1 << int(trt_memory_format)
    out0.get_output(0).allowed_formats = 1 << int(trt_memory_format)
    out1.get_output(0).allowed_formats = 1 << int(trt_memory_format)

    if trt_input_dtype == trt.int8:
        in0.dynamic_range = (-128.0, 127.0)
        in1.dynamic_range = (-128.0, 127.0)
    if trt_output0_dtype == trt.int8:
        out0.get_output(0).dynamic_range = (-128.0, 127.0)
    if trt_output1_dtype == trt.int8:
        out1.get_output(0).dynamic_range = (-128.0, 127.0)

    min_shape = []
    opt_shape = []
    max_shape = []
    if max_batch != 0:
        min_shape = min_shape + [1]
        opt_shape = opt_shape + [max(1, max_batch)]
        max_shape = max_shape + [max(1, max_batch)]
    for i in input_shape:
        if i == -1:
            min_shape = min_shape + [min_dim]
            opt_shape = opt_shape + [int((max_dim + min_dim) / 2)]
            max_shape = max_shape + [max_dim]
        else:
            min_shape = min_shape + [i]
            opt_shape = opt_shape + [i]
            max_shape = max_shape + [i]

    profile = builder.create_optimization_profile()
    profile.set_shape("INPUT0", min_shape, opt_shape, max_shape)
    profile.set_shape("INPUT1", min_shape, opt_shape, max_shape)

    flags = 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
    flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)

    datatype_set = set([trt_input_dtype, trt_output0_dtype, trt_output1_dtype])
    for dt in datatype_set:
        if dt == trt.int8:
            flags |= 1 << int(trt.BuilderFlag.INT8)
        elif dt == trt.float16:
            flags |= 1 << int(trt.BuilderFlag.FP16)
    config = builder.create_builder_config()
    config.flags = flags
    config.add_optimization_profile(profile)
    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
    try:
        engine_bytes = builder.build_serialized_network(network, config)
    except AttributeError:
        engine = builder.build_engine(network, config)
        engine_bytes = engine.serialize()
        del engine

    # Use a different model name for different kinds of models
    model_name = tu.get_model_name(
        "plan_nobatch" if max_batch == 0 else "plan",
        input_dtype,
        output0_dtype,
        output1_dtype,
    )
    if min_dim != 1 or max_dim != 32:
        model_name = "{}-{}-{}".format(model_name, min_dim, max_dim)

    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(model_version_dir + "/model.plan", "wb") as f:
        f.write(engine_bytes)


def create_plan_dynamic_modelfile(
    models_dir,
    max_batch,
    model_version,
    input_shape,
    output0_shape,
    output1_shape,
    input_dtype,
    output0_dtype,
    output1_dtype,
    swap,
    min_dim,
    max_dim,
):
    trt_input_dtype = np_to_trt_dtype(input_dtype)
    trt_output0_dtype = np_to_trt_dtype(output0_dtype)
    trt_output1_dtype = np_to_trt_dtype(output1_dtype)

    # Create the model
    TRT_LOGGER = (
        trt.Logger(trt.Logger.INFO)
        if os.environ.get("TRT_VERBOSE") != "1"
        else trt.Logger(trt.Logger.VERBOSE)
    )
    builder = trt.Builder(TRT_LOGGER)
    network = builder.create_network()
    if max_batch == 0:
        input_with_batchsize = [i for i in input_shape]
    else:
        input_with_batchsize = [-1] + [i for i in input_shape]

    in0 = network.add_input("INPUT0", trt_input_dtype, input_with_batchsize)
    in1 = network.add_input("INPUT1", trt_input_dtype, input_with_batchsize)
    add = network.add_elementwise(in0, in1, trt.ElementWiseOperation.SUM)
    sub = network.add_elementwise(in0, in1, trt.ElementWiseOperation.SUB)

    out0 = add if not swap else sub
    out1 = sub if not swap else add

    out0.get_output(0).name = "OUTPUT0"
    out1.get_output(0).name = "OUTPUT1"
    network.mark_output(out0.get_output(0))
    network.mark_output(out1.get_output(0))

    min_shape = []
    opt_shape = []
    max_shape = []
    for i in input_shape:
        if i == -1:
            min_shape = min_shape + [min_dim]
            opt_shape = opt_shape + [int((max_dim + min_dim) / 2)]
            max_shape = max_shape + [max_dim]
        else:
            min_shape = min_shape + [i]
            opt_shape = opt_shape + [i]
            max_shape = max_shape + [i]

    config = builder.create_builder_config()
    # create multiple profiles with same shape for testing
    # with decreasing batch sizes
    profile = []
    for i in range(4):
        profile.append(builder.create_optimization_profile())
        if max_batch == 0:
            profile[i].set_shape("INPUT0", min_shape, opt_shape, max_shape)
            profile[i].set_shape("INPUT1", min_shape, opt_shape, max_shape)
        else:
            bs = [max_batch - i if max_batch > i else 1]
            opt_bs = [1 + i if 1 + i < max_batch - 1 else max_batch - 1]
            # Hardcoded 'max_shape[0] += 1' in default profile for
            # L0_trt_dynamic_shape, to differentiate whether default profile
            # is used if no profile is specified
            max_shape_override = max_shape
            if i == 0 and (min_dim == 1 and max_dim == 32):
                max_shape_override[0] += 1

            profile[i].set_shape(
                "INPUT0", [1] + min_shape, opt_bs + opt_shape, bs + max_shape_override
            )
            profile[i].set_shape(
                "INPUT1", [1] + min_shape, opt_bs + opt_shape, bs + max_shape_override
            )
        config.add_optimization_profile(profile[i])
    # some profiles with non-one min shape for first dim to test autofiller
    for i in range(2):
        profile.append(builder.create_optimization_profile())
        if max_batch == 0:
            profile[i + 4].set_shape("INPUT0", min_shape, opt_shape, max_shape)
            profile[i + 4].set_shape("INPUT1", min_shape, opt_shape, max_shape)
        else:
            profile[i + 4].set_shape(
                "INPUT0", [5 + i] + min_shape, [6] + opt_shape, [max_batch] + max_shape
            )
            profile[i + 4].set_shape(
                "INPUT1", [5 + i] + min_shape, [6] + opt_shape, [max_batch] + max_shape
            )
        config.add_optimization_profile(profile[i + 4])
    # Will repeat another profile with same min and max shapes as the first profile to test non-zero profile
    # for infer_variable test.
    profile.append(builder.create_optimization_profile())
    if max_batch == 0:
        profile[6].set_shape("INPUT0", min_shape, opt_shape, max_shape)
        profile[6].set_shape("INPUT1", min_shape, opt_shape, max_shape)
    else:
        profile[6].set_shape(
            "INPUT0", [1] + min_shape, [1] + opt_shape, [max_batch] + max_shape
        )
        profile[6].set_shape(
            "INPUT1", [1] + min_shape, [1] + opt_shape, [max_batch] + max_shape
        )
    config.add_optimization_profile(profile[6])

    # Will add some profiles with static shapes to test the cases where min_shape=opt_shape=max_shape
    for i in range(3):
        profile.append(builder.create_optimization_profile())
        if max_batch == 0:
            static_shape = max_shape
            profile[7 + i].set_shape("INPUT0", static_shape, static_shape, static_shape)
            profile[7 + i].set_shape("INPUT1", static_shape, static_shape, static_shape)
        else:
            # Skipping alternate batch sizes for testing unsupported batches in L0_trt_dynamic_shape.
            full_static_shape = [1 + (2 * i)] + max_shape
            profile[7 + i].set_shape(
                "INPUT0", full_static_shape, full_static_shape, full_static_shape
            )
            profile[7 + i].set_shape(
                "INPUT1", full_static_shape, full_static_shape, full_static_shape
            )
        config.add_optimization_profile(profile[7 + i])

    # Add profiles where each profile supports a specific batch size
    if max_batch != 0:
        for i in range(max_batch):
            profile.append(builder.create_optimization_profile())
            profile[10 + i].set_shape(
                "INPUT0", [1 + i] + min_shape, [1 + i] + opt_shape, [1 + i] + max_shape
            )
            profile[10 + i].set_shape(
                "INPUT1", [1 + i] + min_shape, [1 + i] + opt_shape, [1 + i] + max_shape
            )
            config.add_optimization_profile(profile[10 + i])

    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
    try:
        engine_bytes = builder.build_serialized_network(network, config)
    except AttributeError:
        engine = builder.build_engine(network, config)
        engine_bytes = engine.serialize()
        del engine

    # Use a different model name for different kinds of models
    model_name = tu.get_model_name(
        "plan_nobatch" if max_batch == 0 else "plan",
        input_dtype,
        output0_dtype,
        output1_dtype,
    )
    print(f"{_color_green}Creating model {model_name}{_color_reset}")
    if min_dim != 1 or max_dim != 32:
        model_name = "{}-{}-{}".format(model_name, min_dim, max_dim)

    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(model_version_dir + "/model.plan", "wb") as f:
        f.write(engine_bytes)


def create_plan_fixed_rf_modelfile(
    models_dir,
    max_batch,
    model_version,
    input_shape,
    output0_shape,
    output1_shape,
    input_dtype,
    output0_dtype,
    output1_dtype,
    swap,
):
    trt_input_dtype = np_to_trt_dtype(input_dtype)
    trt_output0_dtype = np_to_trt_dtype(output0_dtype)
    trt_output1_dtype = np_to_trt_dtype(output1_dtype)
    trt_memory_format = trt.TensorFormat.LINEAR

    # Create the model
    TRT_LOGGER = (
        trt.Logger(trt.Logger.INFO)
        if os.environ.get("TRT_VERBOSE") != "1"
        else trt.Logger(trt.Logger.VERBOSE)
    )
    builder = trt.Builder(TRT_LOGGER)
    network = builder.create_network()
    if max_batch == 0:
        input_with_batchsize = [i for i in input_shape]
    else:
        input_with_batchsize = [-1] + [i for i in input_shape]

    in0 = network.add_input("INPUT0", trt_input_dtype, input_with_batchsize)
    in1 = network.add_input("INPUT1", trt_input_dtype, input_with_batchsize)
    add = network.add_elementwise(in0, in1, trt.ElementWiseOperation.SUM)
    sub = network.add_elementwise(in0, in1, trt.ElementWiseOperation.SUB)

    out0 = add if not swap else sub
    out1 = sub if not swap else add

    out0.get_output(0).name = "OUTPUT0"
    out1.get_output(0).name = "OUTPUT1"
    network.mark_output(out0.get_output(0))
    network.mark_output(out1.get_output(0))

    out0.get_output(0).dtype = trt_output0_dtype
    out1.get_output(0).dtype = trt_output1_dtype

    in0.allowed_formats = 1 << int(trt_memory_format)
    in1.allowed_formats = 1 << int(trt_memory_format)
    out0.get_output(0).allowed_formats = 1 << int(trt_memory_format)
    out1.get_output(0).allowed_formats = 1 << int(trt_memory_format)

    if trt_input_dtype == trt.int8:
        in0.dynamic_range = (-128.0, 127.0)
        in1.dynamic_range = (-128.0, 127.0)
    if trt_output0_dtype == trt.int8:
        out0.get_output(0).dynamic_range = (-128.0, 127.0)
    if trt_output1_dtype == trt.int8:
        out1.get_output(0).dynamic_range = (-128.0, 127.0)

    config = builder.create_builder_config()

    min_shape = []
    opt_shape = []
    max_shape = []
    if max_batch != 0:
        min_shape = min_shape + [1]
        opt_shape = opt_shape + [max(1, max_batch)]
        max_shape = max_shape + [max(1, max_batch)]
    for i in input_shape:
        min_shape = min_shape + [i]
        opt_shape = opt_shape + [i]
        max_shape = max_shape + [i]

    profile = builder.create_optimization_profile()
    profile.set_shape("INPUT0", min_shape, opt_shape, max_shape)
    profile.set_shape("INPUT1", min_shape, opt_shape, max_shape)

    flags = 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
    flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)

    datatype_set = set([trt_input_dtype, trt_output0_dtype, trt_output1_dtype])
    for dt in datatype_set:
        if dt == trt.int8:
            flags |= 1 << int(trt.BuilderFlag.INT8)
        elif dt == trt.float16:
            flags |= 1 << int(trt.BuilderFlag.FP16)

    config = builder.create_builder_config()
    config.flags = flags
    config.add_optimization_profile(profile)
    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)

    try:
        engine_bytes = builder.build_serialized_network(network, config)
    except AttributeError:
        engine = builder.build_engine(network, config)
        engine_bytes = engine.serialize()
        del engine

    model_name = tu.get_model_name(
        "plan_nobatch" if max_batch == 0 else "plan",
        input_dtype,
        output0_dtype,
        output1_dtype,
    )
    print(f"{_color_green}Creating model {model_name}{_color_reset}")
    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(model_version_dir + "/model.plan", "wb") as f:
        f.write(engine_bytes)


def create_plan_fixed_modelfile(
    models_dir,
    max_batch,
    model_version,
    input_shape,
    output0_shape,
    output1_shape,
    input_dtype,
    output0_dtype,
    output1_dtype,
    swap,
):
    trt_input_dtype = np_to_trt_dtype(input_dtype)
    trt_output0_dtype = np_to_trt_dtype(output0_dtype)
    trt_output1_dtype = np_to_trt_dtype(output1_dtype)

    # Create the model
    TRT_LOGGER = (
        trt.Logger(trt.Logger.INFO)
        if os.environ.get("TRT_VERBOSE") != "1"
        else trt.Logger(trt.Logger.VERBOSE)
    )
    builder = trt.Builder(TRT_LOGGER)
    network = builder.create_network()
    if max_batch == 0:
        input_with_batchsize = [i for i in input_shape]
    else:
        input_with_batchsize = [-1] + [i for i in input_shape]

    in0 = network.add_input("INPUT0", trt_input_dtype, input_with_batchsize)
    in1 = network.add_input("INPUT1", trt_input_dtype, input_with_batchsize)
    add = network.add_elementwise(in0, in1, trt.ElementWiseOperation.SUM)
    sub = network.add_elementwise(in0, in1, trt.ElementWiseOperation.SUB)

    out0 = add if not swap else sub
    out1 = sub if not swap else add

    out0.get_output(0).name = "OUTPUT0"
    out1.get_output(0).name = "OUTPUT1"
    network.mark_output(out0.get_output(0))
    network.mark_output(out1.get_output(0))

    config = builder.create_builder_config()

    min_shape = []
    opt_shape = []
    max_shape = []
    if max_batch != 0:
        min_shape = min_shape + [1]
        opt_shape = opt_shape + [max(1, max_batch)]
        max_shape = max_shape + [max(1, max_batch)]
    for i in input_shape:
        min_shape = min_shape + [i]
        opt_shape = opt_shape + [i]
        max_shape = max_shape + [i]

    profile = builder.create_optimization_profile()
    profile.set_shape("INPUT0", min_shape, opt_shape, max_shape)
    profile.set_shape("INPUT1", min_shape, opt_shape, max_shape)
    config.add_optimization_profile(profile)

    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
    try:
        engine_bytes = builder.build_serialized_network(network, config)
    except AttributeError:
        engine = builder.build_engine(network, config)
        engine_bytes = engine.serialize()
        del engine
    del network

    model_name = tu.get_model_name(
        "plan_nobatch" if max_batch == 0 else "plan",
        input_dtype,
        output0_dtype,
        output1_dtype,
    )
    print(f"{_color_green}Creating model {model_name}{_color_reset}")
    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(model_version_dir + "/model.plan", "wb") as f:
        f.write(engine_bytes)


def create_plan_modelfile(
    models_dir,
    max_batch,
    model_version,
    input_shape,
    output0_shape,
    output1_shape,
    input_dtype,
    output0_dtype,
    output1_dtype,
    swap=False,
    min_dim=1,
    max_dim=32,
):
    if not tu.validate_for_trt_model(
        input_dtype,
        output0_dtype,
        output1_dtype,
        input_shape,
        output0_shape,
        output1_shape,
    ):
        return

    if (
        input_dtype == np.uint8
        or output0_dtype == np.uint8
        or output1_dtype == np.uint8
    ):
        # TRT uint8 cannot be used to represent quantized floating-point value yet
        create_plan_dynamic_rf_modelfile(
            models_dir,
            max_batch,
            model_version,
            input_shape,
            output0_shape,
            output1_shape,
            input_dtype,
            output0_dtype,
            output1_dtype,
            swap,
            min_dim,
            max_dim,
        )

    elif (
        input_dtype != np.float32
        or output0_dtype != np.float32
        or output1_dtype != np.float32
    ):
        if (
            not tu.shape_is_fixed(input_shape)
            or not tu.shape_is_fixed(output0_shape)
            or not tu.shape_is_fixed(output1_shape)
        ):
            create_plan_dynamic_rf_modelfile(
                models_dir,
                max_batch,
                model_version,
                input_shape,
                output0_shape,
                output1_shape,
                input_dtype,
                output0_dtype,
                output1_dtype,
                swap,
                min_dim,
                max_dim,
            )
        else:
            create_plan_fixed_rf_modelfile(
                models_dir,
                max_batch,
                model_version,
                input_shape,
                output0_shape,
                output1_shape,
                input_dtype,
                output0_dtype,
                output1_dtype,
                swap,
            )

    else:
        if (
            not tu.shape_is_fixed(input_shape)
            or not tu.shape_is_fixed(output0_shape)
            or not tu.shape_is_fixed(output1_shape)
        ):
            create_plan_dynamic_modelfile(
                models_dir,
                max_batch,
                model_version,
                input_shape,
                output0_shape,
                output1_shape,
                input_dtype,
                output0_dtype,
                output1_dtype,
                swap,
                min_dim,
                max_dim,
            )
        else:
            create_plan_fixed_modelfile(
                models_dir,
                max_batch,
                model_version,
                input_shape,
                output0_shape,
                output1_shape,
                input_dtype,
                output0_dtype,
                output1_dtype,
                swap,
            )


def create_plan_modelconfig(
    models_dir,
    max_batch,
    model_version,
    input_shape,
    output0_shape,
    output1_shape,
    input_dtype,
    output0_dtype,
    output1_dtype,
    output0_label_cnt,
    version_policy,
    min_dim=1,
    max_dim=32,
):
    if not tu.validate_for_trt_model(
        input_dtype,
        output0_dtype,
        output1_dtype,
        input_shape,
        output0_shape,
        output1_shape,
    ):
        return

    # Unpack version policy
    version_policy_str = "{ latest { num_versions: 1 }}"
    if version_policy is not None:
        type, val = version_policy
        if type == "latest":
            version_policy_str = "{{ latest {{ num_versions: {} }}}}".format(val)
        elif type == "specific":
            version_policy_str = "{{ specific {{ versions: {} }}}}".format(val)
        else:
            version_policy_str = "{ all { }}"

    # Use a different model name for different kinds of models
    model_name = tu.get_model_name(
        "plan_nobatch" if max_batch == 0 else "plan",
        input_dtype,
        output0_dtype,
        output1_dtype,
    )
    print(f"{_color_green}Creating config for {model_name}{_color_reset}")
    if min_dim != 1 or max_dim != 32:
        model_name = "{}-{}-{}".format(model_name, min_dim, max_dim)

    config_dir = models_dir + "/" + model_name
    if -1 in input_shape:
        # Selects the sixth profile for FP32 datatype
        # Note the min and max shapes of first and sixth
        # profile are identical.
        profile_index = 6 if input_dtype == np.float32 else 0
        config = """
name: "{}"
platform: "tensorrt_plan"
max_batch_size: {}
version_policy: {}
input [
  {{
    name: "INPUT0"
    data_type: {}
    dims: [ {} ]
  }},
  {{
    name: "INPUT1"
    data_type: {}
    dims: [ {} ]
  }}
]
output [
  {{
    name: "OUTPUT0"
    data_type: {}
    dims: [ {} ]
    label_filename: "output0_labels.txt"
   }},
  {{
    name: "OUTPUT1"
    data_type: {}
    dims: [ {} ]
  }}
]
instance_group [
  {{
      profile:"{}"
  }}
]
""".format(
            model_name,
            max_batch,
            version_policy_str,
            np_to_model_dtype(input_dtype),
            tu.shape_to_dims_str(input_shape),
            np_to_model_dtype(input_dtype),
            tu.shape_to_dims_str(input_shape),
            np_to_model_dtype(output0_dtype),
            tu.shape_to_dims_str(output0_shape),
            np_to_model_dtype(output1_dtype),
            tu.shape_to_dims_str(output1_shape),
            profile_index,
        )
    else:
        config = """
name: "{}"
platform: "tensorrt_plan"
max_batch_size: {}
version_policy: {}
input [
  {{
    name: "INPUT0"
    data_type: {}
    dims: [ {} ]
  }},
  {{
    name: "INPUT1"
    data_type: {}
    dims: [ {} ]
  }}
]
output [
  {{
    name: "OUTPUT0"
    data_type: {}
    dims: [ {} ]
    label_filename: "output0_labels.txt"
   }},
  {{
    name: "OUTPUT1"
    data_type: {}
    dims: [ {} ]
  }}
]
""".format(
            model_name,
            max_batch,
            version_policy_str,
            np_to_model_dtype(input_dtype),
            tu.shape_to_dims_str(input_shape),
            np_to_model_dtype(input_dtype),
            tu.shape_to_dims_str(input_shape),
            np_to_model_dtype(output0_dtype),
            tu.shape_to_dims_str(output0_shape),
            np_to_model_dtype(output1_dtype),
            tu.shape_to_dims_str(output1_shape),
        )

    try:
        os.makedirs(config_dir)
    except OSError:
        pass  # ignore existing dir

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)

    with open(config_dir + "/output0_labels.txt", "w") as lfile:
        for l in range(output0_label_cnt):
            lfile.write("label" + str(l) + "\n")


def create_onnx_modelfile(
    models_dir,
    max_batch,
    model_version,
    input_shape,
    output0_shape,
    output1_shape,
    input_dtype,
    output0_dtype,
    output1_dtype,
    swap=False,
):
    if not tu.validate_for_onnx_model(
        input_dtype,
        output0_dtype,
        output1_dtype,
        input_shape,
        output0_shape,
        output1_shape,
    ):
        return

    onnx_input_dtype = np_to_onnx_dtype(input_dtype)
    onnx_output0_dtype = np_to_onnx_dtype(output0_dtype)
    onnx_output1_dtype = np_to_onnx_dtype(output1_dtype)

    onnx_input_shape, idx = tu.shape_to_onnx_shape(input_shape, 0)
    onnx_output0_shape, idx = tu.shape_to_onnx_shape(input_shape, idx)
    onnx_output1_shape, idx = tu.shape_to_onnx_shape(input_shape, idx)

    # Create the model
    model_name = tu.get_model_name(
        "onnx_nobatch" if max_batch == 0 else "onnx",
        input_dtype,
        output0_dtype,
        output1_dtype,
    )
    print(f"{_color_green}Creating model {model_name}{_color_reset}")
    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    batch_dim = [] if max_batch == 0 else [None]

    in0 = onnx.helper.make_tensor_value_info(
        "INPUT0", onnx_input_dtype, batch_dim + onnx_input_shape
    )
    in1 = onnx.helper.make_tensor_value_info(
        "INPUT1", onnx_input_dtype, batch_dim + onnx_input_shape
    )

    out0 = onnx.helper.make_tensor_value_info(
        "OUTPUT0", onnx_output0_dtype, batch_dim + onnx_output0_shape
    )
    out1 = onnx.helper.make_tensor_value_info(
        "OUTPUT1", onnx_output1_dtype, batch_dim + onnx_output1_shape
    )

    internal_in0 = onnx.helper.make_node("Identity", ["INPUT0"], ["_INPUT0"])
    internal_in1 = onnx.helper.make_node("Identity", ["INPUT1"], ["_INPUT1"])

    # cast int8, int16 input to higher precision int as Onnx Add/Sub operator doesn't support those type
    # Also casting String data type to int32
    if (
        (onnx_input_dtype == onnx.TensorProto.INT8)
        or (onnx_input_dtype == onnx.TensorProto.INT16)
        or (onnx_input_dtype == onnx.TensorProto.STRING)
    ):
        internal_in0 = onnx.helper.make_node(
            "Cast", ["INPUT0"], ["_INPUT0"], to=onnx.TensorProto.INT32
        )
        internal_in1 = onnx.helper.make_node(
            "Cast", ["INPUT1"], ["_INPUT1"], to=onnx.TensorProto.INT32
        )

    add = onnx.helper.make_node(
        "Add", ["_INPUT0", "_INPUT1"], ["CAST0" if not swap else "CAST1"]
    )
    sub = onnx.helper.make_node(
        "Sub", ["_INPUT0", "_INPUT1"], ["CAST1" if not swap else "CAST0"]
    )
    cast0 = onnx.helper.make_node("Cast", ["CAST0"], ["OUTPUT0"], to=onnx_output0_dtype)
    cast1 = onnx.helper.make_node("Cast", ["CAST1"], ["OUTPUT1"], to=onnx_output1_dtype)

    # Avoid cast from float16 to float16
    # (bug in Onnx Runtime, cast from float16 to float16 will become cast from float16 to float32)
    if onnx_input_dtype == onnx.TensorProto.FLOAT16:
        if onnx_output0_dtype == onnx_input_dtype:
            cast0 = onnx.helper.make_node("Identity", ["CAST0"], ["OUTPUT0"])
        if onnx_output1_dtype == onnx_input_dtype:
            cast1 = onnx.helper.make_node("Identity", ["CAST1"], ["OUTPUT1"])

    onnx_nodes = [internal_in0, internal_in1, add, sub, cast0, cast1]
    onnx_inputs = [in0, in1]
    onnx_outputs = [out0, out1]

    graph_proto = onnx.helper.make_graph(
        onnx_nodes, model_name, onnx_inputs, onnx_outputs
    )
    if FLAGS.onnx_opset > 0:
        model_opset = onnx.helper.make_operatorsetid("", FLAGS.onnx_opset)
        model_def = onnx.helper.make_model(
            graph_proto, producer_name="triton", opset_imports=[model_opset]
        )
    else:
        model_def = onnx.helper.make_model(graph_proto, producer_name="triton")

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    onnx.save(model_def, model_version_dir + "/model.onnx")


def create_onnx_modelconfig(
    models_dir,
    max_batch,
    model_version,
    input_shape,
    output0_shape,
    output1_shape,
    input_dtype,
    output0_dtype,
    output1_dtype,
    output0_label_cnt,
    version_policy,
):
    if not tu.validate_for_onnx_model(
        input_dtype,
        output0_dtype,
        output1_dtype,
        input_shape,
        output0_shape,
        output1_shape,
    ):
        return

    # Use a different model name for the non-batching variant
    model_name = tu.get_model_name(
        "onnx_nobatch" if max_batch == 0 else "onnx",
        input_dtype,
        output0_dtype,
        output1_dtype,
    )

    print(f"{_color_green}Creating config for {model_name}{_color_reset}")

    config_dir = models_dir + "/" + model_name

    # [TODO] move create_general_modelconfig() out of emu as it is general
    # enough for all backends to use
    config = emu.create_general_modelconfig(
        model_name,
        "onnxruntime_onnx",
        max_batch,
        emu.repeat(input_dtype, 2),
        emu.repeat(input_shape, 2),
        emu.repeat(None, 2),
        [output0_dtype, output1_dtype],
        [output0_shape, output1_shape],
        emu.repeat(None, 2),
        ["output0_labels.txt", None],
        version_policy=version_policy,
        force_tensor_number_suffix=True,
    )

    try:
        os.makedirs(config_dir)
    except OSError:
        pass  # ignore existing dir

    with open(config_dir + "/config.pbtxt", "w") as file:
        file.write(config)

    with open(config_dir + "/output0_labels.txt", "w") as file:
        for l in range(output0_label_cnt):
            file.write("label" + str(l) + "\n")


def create_libtorch_modelfile(
    models_dir,
    max_batch,
    model_version,
    input_shape,
    output0_shape,
    output1_shape,
    input_dtype,
    output0_dtype,
    output1_dtype,
    swap=False,
):
    if not tu.validate_for_libtorch_model(
        input_dtype,
        output0_dtype,
        output1_dtype,
        input_shape,
        output0_shape,
        output1_shape,
        max_batch,
    ):
        return

    torch_output0_dtype = np_to_torch_dtype(output0_dtype)
    torch_output1_dtype = np_to_torch_dtype(output1_dtype)

    model_name = tu.get_model_name(
        "libtorch_nobatch" if max_batch == 0 else "libtorch",
        input_dtype,
        output0_dtype,
        output1_dtype,
    )

    print(f"{_color_green}Creating model {model_name}{_color_reset}")

    # handle for -1 (when variable) since can't create tensor with shape of [-1]
    input_shape = [abs(ips) for ips in input_shape]

    # Create the model
    if (
        (input_dtype == np_dtype_string)
        and (output0_dtype != np_dtype_string)
        and (output1_dtype != np_dtype_string)
    ):

        class AddSubNet(nn.Module):
            def __init__(self, *args):
                self.output0_dtype = args[0][0]
                self.output1_dtype = args[0][1]
                self.swap = args[0][2]
                super(AddSubNet, self).__init__()

            def forward(self, INPUT0: List[str], INPUT1: List[str]):
                input0_int = torch.tensor([int(i) for i in INPUT0])
                input1_int = torch.tensor([int(i) for i in INPUT1])
                op0 = (
                    input0_int + input1_int
                    if not self.swap
                    else input0_int - input1_int
                )
                op1 = (
                    input0_int - input1_int
                    if not self.swap
                    else input0_int + input1_int
                )
                return op0.to(self.output0_dtype), op1.to(self.output1_dtype)

    elif (
        (input_dtype == np_dtype_string)
        and (output0_dtype == np_dtype_string)
        and (output1_dtype == np_dtype_string)
    ):

        class AddSubNet(nn.Module):
            def __init__(self, *args):
                self.output0_dtype = args[0][0]
                self.output1_dtype = args[0][1]
                self.swap = args[0][2]
                super(AddSubNet, self).__init__()

            def forward(
                self, INPUT0: List[str], INPUT1: List[str]
            ) -> Tuple[List[str], List[str]]:
                input0_int = torch.tensor([int(i) for i in INPUT0])
                input1_int = torch.tensor([int(i) for i in INPUT1])
                op0 = [
                    str(i.item())
                    for i in (
                        input0_int + input1_int
                        if not self.swap
                        else input0_int - input1_int
                    )
                ]
                op1 = [
                    str(i.item())
                    for i in (
                        input0_int - input1_int
                        if not self.swap
                        else input0_int + input1_int
                    )
                ]
                return op0, op1

    elif (
        (input_dtype == np_dtype_string)
        and (output0_dtype == np_dtype_string)
        and (output1_dtype != np_dtype_string)
    ):

        class AddSubNet(nn.Module):
            def __init__(self, *args):
                self.output0_dtype = args[0][0]
                self.output1_dtype = args[0][1]
                self.swap = args[0][2]
                super(AddSubNet, self).__init__()

            def forward(
                self, INPUT0: List[str], INPUT1: List[str]
            ) -> Tuple[List[str], torch.Tensor]:
                input0_int = torch.tensor([int(i) for i in INPUT0])
                input1_int = torch.tensor([int(i) for i in INPUT1])
                op0 = [
                    str(i.item())
                    for i in (
                        input0_int + input1_int
                        if not self.swap
                        else input0_int - input1_int
                    )
                ]
                op1 = (
                    input0_int - input1_int
                    if not self.swap
                    else input0_int + input1_int
                ).to(self.output1_dtype)
                return op0, op1

    elif (
        (input_dtype == np_dtype_string)
        and (output0_dtype != np_dtype_string)
        and (output1_dtype == np_dtype_string)
    ):

        class AddSubNet(nn.Module):
            def __init__(self, *args):
                self.output0_dtype = args[0][0]
                self.output1_dtype = args[0][1]
                self.swap = args[0][2]
                super(AddSubNet, self).__init__()

            def forward(
                self, INPUT0: List[str], INPUT1: List[str]
            ) -> Tuple[torch.Tensor, List[str]]:
                input0_int = torch.tensor([int(i) for i in INPUT0])
                input1_int = torch.tensor([int(i) for i in INPUT1])
                op0 = (
                    input0_int + input1_int
                    if not self.swap
                    else input0_int - input1_int
                ).to(self.output0_dtype)
                op1 = [
                    str(i.item())
                    for i in (
                        input0_int - input1_int
                        if not self.swap
                        else input0_int + input1_int
                    )
                ]
                return op0, op1

    elif (
        (input_dtype != np_dtype_string)
        and (output0_dtype == np_dtype_string)
        and (output1_dtype == np_dtype_string)
    ):

        class AddSubNet(nn.Module):
            def __init__(self, *args):
                self.output0_dtype = args[0][0]
                self.output1_dtype = args[0][1]
                self.swap = args[0][2]
                super(AddSubNet, self).__init__()

            def forward(self, INPUT0, INPUT1) -> Tuple[List[str], List[str]]:
                op0 = [
                    str(i.item())
                    for i in (INPUT0 + INPUT1 if not self.swap else INPUT0 - INPUT1)
                ]
                op1 = [
                    str(i.item())
                    for i in (INPUT0 - INPUT1 if not self.swap else INPUT0 + INPUT1)
                ]
                return op0, op1

    elif (
        (input_dtype != np_dtype_string)
        and (output0_dtype != np_dtype_string)
        and (output1_dtype == np_dtype_string)
    ):

        class AddSubNet(nn.Module):
            def __init__(self, *args):
                self.output0_dtype = args[0][0]
                self.output1_dtype = args[0][1]
                self.swap = args[0][2]
                super(AddSubNet, self).__init__()

            def forward(self, INPUT0, INPUT1) -> Tuple[torch.Tensor, List[str]]:
                op0 = (INPUT0 + INPUT1 if not self.swap else INPUT0 - INPUT1).to(
                    self.output0_dtype
                )
                op1 = [
                    str(i.item())
                    for i in (INPUT0 - INPUT1 if not self.swap else INPUT0 + INPUT1)
                ]
                return op0, op1

    elif (
        (input_dtype != np_dtype_string)
        and (output0_dtype == np_dtype_string)
        and (output1_dtype != np_dtype_string)
    ):

        class AddSubNet(nn.Module):
            def __init__(self, *args):
                self.output0_dtype = args[0][0]
                self.output1_dtype = args[0][1]
                self.swap = args[0][2]
                super(AddSubNet, self).__init__()

            def forward(self, INPUT0, INPUT1) -> Tuple[List[str], torch.Tensor]:
                op0 = [
                    str(i.item())
                    for i in (INPUT0 + INPUT1 if not self.swap else INPUT0 - INPUT1)
                ]
                op1 = (INPUT0 - INPUT1 if not self.swap else INPUT0 + INPUT1).to(
                    self.output1_dtype
                )
                return op0, op1

    else:

        class AddSubNet(nn.Module):
            def __init__(self, *args):
                self.output0_dtype = args[0][0]
                self.output1_dtype = args[0][1]
                self.swap = args[0][2]
                super(AddSubNet, self).__init__()

            def forward(self, INPUT0, INPUT1):
                op0 = (INPUT0 + INPUT1 if not self.swap else INPUT0 - INPUT1).to(
                    self.output0_dtype
                )
                op1 = (INPUT0 - INPUT1 if not self.swap else INPUT0 + INPUT1).to(
                    self.output1_dtype
                )
                return op0, op1

    addSubModel = AddSubNet((torch_output0_dtype, torch_output1_dtype, swap))
    traced = torch.jit.script(addSubModel)

    model_version_dir = f"{models_dir}/{model_name}/{model_version}"

    try:
        os.makedirs(model_version_dir)
    except OSError:
        pass  # ignore existing dir

    traced.save(f"{model_version_dir}/model.pt")


def generate_sample_inputs(
    input_shape,
    input_dtype,
    device,
):
    # handle for -1 (when variable) since can't create tensor with shape of [-1]
    input_shape = [abs(ips) for ips in input_shape]

    if input_dtype == np.int8:
        input0 = torch.randint(-128, 127, input_shape, dtype=torch.int8, device=device)
        input1 = torch.randint(-128, 127, input_shape, dtype=torch.int8, device=device)
    elif input_dtype == np.int16:
        input0 = torch.randint(
            -32768, 32767, input_shape, dtype=torch.int16, device=device
        )
        input1 = torch.randint(
            -32768, 32767, input_shape, dtype=torch.int16, device=device
        )
    elif input_dtype == np.int32:
        input0 = torch.randint(
            -2147483648, 2147483647, input_shape, dtype=torch.int32, device=device
        )
        input1 = torch.randint(
            -2147483648, 2147483647, input_shape, dtype=torch.int32, device=device
        )
    elif input_dtype == np.int64:
        input0 = torch.randint(
            -9223372036854775808,
            9223372036854775807,
            input_shape,
            dtype=torch.int64,
            device=device,
        )
        input1 = torch.randint(
            -9223372036854775808,
            9223372036854775807,
            input_shape,
            dtype=torch.int64,
            device=device,
        )
    elif input_dtype == np.float16:
        input0 = torch.randn(*input_shape, dtype=torch.float16, device=device)
        input1 = torch.randn(*input_shape, dtype=torch.float16, device=device)
    elif input_dtype == np.float32:
        input0 = torch.randn(*input_shape, dtype=torch.float32, device=device)
        input1 = torch.randn(*input_shape, dtype=torch.float32, device=device)
    elif input_dtype == np.float64:
        input0 = torch.randn(*input_shape, dtype=torch.float64, device=device)
        input1 = torch.randn(*input_shape, dtype=torch.float64, device=device)
    elif input_dtype == np.uint8:
        input0 = torch.randint(0, 255, input_shape, dtype=torch.uint8, device=device)
        input1 = torch.randint(0, 255, input_shape, dtype=torch.uint8, device=device)
    elif input_dtype == np.uint16:
        input0 = torch.randint(0, 65535, input_shape, dtype=torch.uint16, device=device)
        input1 = torch.randint(0, 65535, input_shape, dtype=torch.uint16, device=device)
    elif input_dtype == np.uint32:
        input0 = torch.randint(
            0, 4294967295, input_shape, dtype=torch.uint32, device=device
        )
        input1 = torch.randint(
            0, 4294967295, input_shape, dtype=torch.uint32, device=device
        )
    elif input_dtype == np.uint64:
        input0 = torch.randint(
            0, 18446744073709551615, input_shape, dtype=torch.uint64, device=device
        )
        input1 = torch.randint(
            0, 18446744073709551615, input_shape, dtype=torch.uint64, device=device
        )
    else:
        input0 = torch.randn(*input_shape, device=device)
        input1 = torch.randn(*input_shape, device=device)

    return (input0, input1)


def np_to_dtype(np_dtype):
    if np_dtype == np.int8:
        return torch.int8
    elif np_dtype == np.int16:
        return torch.int16
    elif np_dtype == np.int32:
        return torch.int32
    elif np_dtype == np.int64:
        return torch.int64
    elif np_dtype == np.float16:
        return torch.float16
    elif np_dtype == np.float32:
        return torch.float32
    elif np_dtype == np.float64:
        return torch.float64
    elif np_dtype == np.uint8:
        return torch.uint8
    elif np_dtype == np.uint16:
        return torch.uint16
    elif np_dtype == np.uint32:
        return torch.uint32
    elif np_dtype == np.uint64:
        return torch.uint64
    else:
        print(
            f"{_color_yellow}warning: dtype {np_dtype} is unsupported; falling back to torch.int32{_color_reset}"
        )
        return torch.int32


def create_torch_aoti_modelfile(
    models_dir,
    model_version,
    input_shape,
    input_dtype,
    output_dtype,
    swap=False,
):
    model_name = tu.get_model_name(
        "torch_aoti",
        input_dtype,
        output_dtype,
        None,
    )
    model_version_dir = f"{models_dir}/{model_name}/{model_version}"

    print(f"{_color_green}Creating model {model_name}{_color_reset}")

    torch_input_dtype: torch.dtype = np_to_dtype(input_dtype)
    torch_output_dtype: torch.dtype = np_to_dtype(output_dtype)

    print(f"{model_name}({torch_input_dtype}) -> {torch_output_dtype}")

    # handle for -1 (when variable) since can't create tensor with shape of [-1]
    input_shape = [abs(ips) for ips in input_shape]

    try:
        os.makedirs(model_version_dir)
    except OSError:
        pass  # ignore existing dir

    class AddSubNet(nn.Module):
        def __init__(
            self,
            swap: bool,
            input_dtype: torch.dtype,
            output_dtype: torch.dtype,
        ) -> None:
            self.swap = swap
            self.input_dtype = input_dtype
            self.output_dtype = output_dtype
            super(AddSubNet, self).__init__()

        def forward(self, INPUT0: torch.Tensor, INPUT1: torch.Tensor) -> torch.Tensor:
            if INPUT0.dtype != self.input_dtype:
                raise TypeError(
                    f"INPUT0 expected {self.input_dtype} vs. actual {INPUT0.dtype} type."
                )
            if INPUT1.dtype != self.input_dtype:
                raise TypeError(
                    f"INPUT1 expected {self.input_dtype} vs. actual {INPUT1.dtype} type."
                )
            return (INPUT0 - INPUT1 if self.swap else INPUT0 + INPUT1).to(
                self.output_dtype,
            )

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = AddSubNet(swap, torch_input_dtype, torch_output_dtype)
    model.to(device)
    model = model.eval()

    sample_input = generate_sample_inputs(input_shape, input_dtype, device)

    try:
        ep = torch.export.export(model, sample_input)
        torch._inductor.aoti_compile_and_package(
            ep,
            package_path=f"{model_version_dir}/model.pt2",
        )
    except Exception as e:
        print(
            f"{_color_red}error: Failed to create model {model_name}{_color_reset}",
            file=sys.stderr,
        )
        print(f"\n{_color_red}{e}{_color_reset}\n", file=sys.stderr)
        return False

    return True


def create_torchvision_aoti_modelfile(
    models_dir: str,
    max_batch: int,
    model_version: int,
):
    model_name = "torchvision_aoti"
    model_version_dir = f"{models_dir}/{model_name}/{model_version}"

    try:
        os.makedirs(model_version_dir)
    except OSError:
        pass  # ignore existing dir

    print(f"{_color_green}Creating model {model_name}{_color_reset}")

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
    model = model.to(device)
    model = model.eval()

    # Example input tensor with batch size 1 and 3 color channels (RGB), height and width of 224
    input_tensor = torch.randn(max_batch, 3, 224, 224, device=device)

    try:
        ep = torch.export.export(model, (input_tensor,))

        torch._inductor.aoti_compile_and_package(
            ep,
            package_path=f"{model_version_dir}/model.pt2",
        )
    except Exception as e:
        print(
            f"{_color_red}error: Failed to create model {model_name}{_color_reset}",
            file=sys.stderr,
        )
        print(f"\n{_color_red}{e}{_color_reset}\n", file=sys.stderr)
        return False

    return True


def create_libtorch_modelconfig(
    models_dir,
    max_batch,
    model_version,
    input_shape,
    output0_shape,
    output1_shape,
    input_dtype,
    output0_dtype,
    output1_dtype,
    output0_label_cnt,
    version_policy,
):
    if not tu.validate_for_libtorch_model(
        input_dtype,
        output0_dtype,
        output1_dtype,
        input_shape,
        output0_shape,
        output1_shape,
        max_batch,
    ):
        return

    # Unpack version policy
    version_policy_str = "{ latest { num_versions: 1 }}"
    if version_policy is not None:
        type, val = version_policy
        if type == "latest":
            version_policy_str = f"{{ latest {{ num_versions: {val} }} }}"
        elif type == "specific":
            version_policy_str = f"{{ specific {{ versions: {val} }} }}"
        else:
            version_policy_str = "{ all { }}"

    # Use a different model name for the non-batching variant
    model_name = tu.get_model_name(
        "libtorch_nobatch" if max_batch == 0 else "libtorch",
        input_dtype,
        output0_dtype,
        output1_dtype,
    )

    print(f"{_color_green}Creating config for {model_name}{_color_reset}")

    label_filename = "output0_labels.txt"
    config_dir = f"{models_dir}/{model_name}"
    config = f"""
backend: "pytorch"
name: "{model_name}"
platform: "pytorch_libtorch"
max_batch_size: {max_batch}
version_policy: {version_policy_str}
input [
  {{
    name: "INPUT0"
    data_type: {np_to_model_dtype(input_dtype)}
    dims: [ {tu.shape_to_dims_str(input_shape)} ]
  }},
  {{
    name: "INPUT1"
    data_type: {np_to_model_dtype(input_dtype)}
    dims: [ {tu.shape_to_dims_str(input_shape)} ]
  }}
]
output [
  {{
    name: "OUTPUT__0"
    data_type: {np_to_model_dtype(output0_dtype)}
    dims: [ {tu.shape_to_dims_str(output0_shape)} ]
    label_filename: "{label_filename}"
  }},
  {{
    name: "OUTPUT__1"
    data_type: {np_to_model_dtype(output1_dtype)}
    dims: [ {tu.shape_to_dims_str(output1_shape)} ]
  }}
]
"""

    try:
        os.makedirs(config_dir)
    except OSError:
        pass  # ignore existing dir

    with open(f"{config_dir}/config.pbtxt", "w") as file:
        file.write(config)
        print(f"Created {config_dir}/config.pbtxt")

    with open(f"{config_dir}/{label_filename}", "w") as file:
        for l in range(output0_label_cnt):
            file.write("label" + str(l) + "\n")
        print(f"Created {config_dir}/{label_filename}")


def create_torch_aoti_modelconfig(
    models_dir,
    input_shape,
    output_shape,
    input_dtype,
    output_dtype,
    output_label_cnt,
    version_policy,
):
    # Unpack version policy
    version_policy_str = "{ latest { num_versions: 1 }}"
    if version_policy is not None:
        type, val = version_policy
        if type == "latest":
            version_policy_str = f"{{ latest {{ num_versions: {val} }} }}"
        elif type == "specific":
            version_policy_str = f"{{ specific {{ versions: {val} }} }}"
        else:
            version_policy_str = "{ all { }}"

    # Use a different model name for the non-batching variant
    model_name = tu.get_model_name(
        "torch_aoti",
        input_dtype,
        output_dtype,
        None,
    )

    print(f"{_color_green}Creating config for {model_name}{_color_reset}")

    label_filename = "output_labels.txt"
    config_dir = f"{models_dir}/{model_name}"
    config = f"""
backend: "pytorch"
name: "{model_name}"
platform: "torch_aoti"
version_policy: {version_policy_str}
input [
  {{
    name: "INPUT0"
    data_type: {np_to_model_dtype(input_dtype)}
    dims: [ {tu.shape_to_dims_str(input_shape)} ]
  }},
  {{
    name: "INPUT1"
    data_type: {np_to_model_dtype(input_dtype)}
    dims: [ {tu.shape_to_dims_str(input_shape)} ]
  }}
]
output [
  {{
    name: "OUTPUT__0"
    data_type: {np_to_model_dtype(output_dtype)}
    dims: [ {tu.shape_to_dims_str(output_shape)} ]
    label_filename: "{label_filename}"
  }}
]
instance_group [{{ kind: {"KIND_GPU" if torch.cuda.is_available() else "KIND_CPU"} }}]
"""

    try:
        os.makedirs(config_dir)
    except OSError:
        pass  # ignore existing dir

    with open(f"{config_dir}/config.pbtxt", "w") as file:
        file.write(config)
        print(f"Created {config_dir}/config.pbtxt")

    with open(f"{config_dir}/{label_filename}", "w") as file:
        for l in range(output_label_cnt):
            file.write(f"label{l}\n")
        print(f"Created {config_dir}/{label_filename}")


def create_torchvision_aoti_modelconfig(
    models_dir: str,
    max_batch: int,
):
    model_name = "torchvision_aoti"
    label_filename = "resnet50_labels.txt"

    print(f"{_color_green}Creating config for {model_name}{_color_reset}")

    config_dir = f"{models_dir}/{model_name}"
    config = f"""
backend: "pytorch"
name: "{model_name}"
platform: "torch_aoti"
max_batch_size: {max_batch}
input  [
  {{
    name: "INPUT__0"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [ 3, 224, 224 ]
  }}]
output [
  {{
    name: "OUTPUT__0"
    data_type: TYPE_FP32
    dims: [ 1000 ]
    label_filename: "{label_filename}"
  }}
]
instance_group [{{ kind: {"KIND_GPU" if torch.cuda.is_available() else "KIND_CPU"} }}]
"""

    try:
        os.makedirs(config_dir)
    except OSError:
        pass  # ignore existing dir

    with open(f"{config_dir}/config.pbtxt", "w") as file:
        file.write(config)
        print(f"Created {config_dir}/config.pbtxt")

    source_path = os.environ.get("TRITON_GENSRCDIR", default="gen_srcdir")
    source_filename = os.path.join(source_path, RESNET50_LABEL_FILE)

    shutil.copyfile(source_filename, f"{config_dir}/{label_filename}")
    print(f"Created {config_dir}/{label_filename}")


def create_openvino_modelfile(
    models_dir,
    max_batch,
    model_version,
    input_shape,
    output0_shape,
    output1_shape,
    input_dtype,
    output0_dtype,
    output1_dtype,
    swap=False,
):
    batch_dim = () if max_batch == 0 else (max_batch,)
    if not tu.validate_for_openvino_model(
        input_dtype,
        output0_dtype,
        output1_dtype,
        batch_dim + input_shape,
        batch_dim + output0_shape,
        batch_dim + output1_shape,
    ):
        return

    # Create the model
    model_name = tu.get_model_name(
        "openvino_nobatch" if max_batch == 0 else "openvino",
        input_dtype,
        output0_dtype,
        output1_dtype,
    )
    print(f"{_color_green}Creating model {model_name}{_color_reset}")
    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    in0 = ov.opset1.parameter(
        shape=batch_dim + input_shape, dtype=input_dtype, name="INPUT0"
    )
    in1 = ov.opset1.parameter(
        shape=batch_dim + input_shape, dtype=input_dtype, name="INPUT1"
    )

    r0 = ov.opset1.add(in0, in1) if not swap else ov.opset1.subtract(in0, in1)
    r1 = ov.opset1.subtract(in0, in1) if not swap else ov.opset1.add(in0, in1)

    result0 = ov.opset1.reshape(r0, batch_dim + output0_shape, special_zero=False)
    result1 = ov.opset1.reshape(r1, batch_dim + output1_shape, special_zero=False)

    op0 = ov.opset1.convert(result0, destination_type=output0_dtype, name="OUTPUT0")
    op1 = ov.opset1.convert(result1, destination_type=output1_dtype, name="OUTPUT1")

    model = ov.Model([op0, op1], [in0, in1], model_name)
    openvino_save_model(model_version_dir, model)


def create_openvino_modelconfig(
    models_dir,
    max_batch,
    model_version,
    input_shape,
    output0_shape,
    output1_shape,
    input_dtype,
    output0_dtype,
    output1_dtype,
    output0_label_cnt,
    version_policy,
):
    batch_dim = () if max_batch == 0 else (max_batch,)
    if not tu.validate_for_openvino_model(
        input_dtype,
        output0_dtype,
        output1_dtype,
        batch_dim + input_shape,
        batch_dim + output0_shape,
        batch_dim + output1_shape,
    ):
        return

    # Unpack version policy
    version_policy_str = "{ latest { num_versions: 1 }}"
    if version_policy is not None:
        type, val = version_policy
        if type == "latest":
            version_policy_str = "{{ latest {{ num_versions: {} }}}}".format(val)
        elif type == "specific":
            version_policy_str = "{{ specific {{ versions: {} }}}}".format(val)
        else:
            version_policy_str = "{ all { }}"

    # Use a different model name for the non-batching variant
    model_name = tu.get_model_name(
        "openvino_nobatch" if max_batch == 0 else "openvino",
        input_dtype,
        output0_dtype,
        output1_dtype,
    )
    print(f"{_color_green}Creating config for {model_name}{_color_reset}")
    config_dir = models_dir + "/" + model_name

    # platform is empty and backend is 'openvino' for openvino model
    config = """
name: "{}"
backend: "openvino"
max_batch_size: {}
version_policy: {}
input [
  {{
    name: "INPUT0"
    data_type: {}
    dims: [ {} ]
  }},
  {{
    name: "INPUT1"
    data_type: {}
    dims: [ {} ]
  }}
]
output [
  {{
    name: "OUTPUT0"
    data_type: {}
    dims: [ {} ]
    label_filename: "output0_labels.txt"
   }},
  {{
    name: "OUTPUT1"
    data_type: {}
    dims: [ {} ]
  }}
]
""".format(
        model_name,
        max_batch,
        version_policy_str,
        np_to_model_dtype(input_dtype),
        tu.shape_to_dims_str(input_shape),
        np_to_model_dtype(input_dtype),
        tu.shape_to_dims_str(input_shape),
        np_to_model_dtype(output0_dtype),
        tu.shape_to_dims_str(output0_shape),
        np_to_model_dtype(output1_dtype),
        tu.shape_to_dims_str(output1_shape),
    )

    try:
        os.makedirs(config_dir)
    except OSError:
        pass  # ignore existing dir

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)

    with open(config_dir + "/output0_labels.txt", "w") as lfile:
        for l in range(output0_label_cnt):
            lfile.write("label" + str(l) + "\n")


def create_models(
    models_dir,
    input_dtype,
    output0_dtype,
    output1_dtype,
    input_shape,
    output0_shape,
    output1_shape,
    output0_label_cnt,
    version_policy=None,
):
    print(f"{_color_blue}Creating models in {models_dir}{_color_reset}")
    model_version = 1
    if FLAGS.tensorrt:
        print(f"{_color_magenta}TensorRT model generation requested{_color_reset}")
        # max-batch 8
        suffix = ()
        if (
            input_dtype == np.int8
            or output0_dtype == np.int8
            or output1_dtype == np.int8
        ):
            suffix = (1, 1)
        create_plan_modelconfig(
            models_dir,
            8,
            model_version,
            input_shape + suffix,
            output0_shape + suffix,
            output1_shape + suffix,
            input_dtype,
            output0_dtype,
            output1_dtype,
            output0_label_cnt,
            version_policy,
        )
        create_plan_modelfile(
            models_dir,
            8,
            model_version,
            input_shape + suffix,
            output0_shape + suffix,
            output1_shape + suffix,
            input_dtype,
            output0_dtype,
            output1_dtype,
        )
        # max-batch 0
        create_plan_modelconfig(
            models_dir,
            0,
            model_version,
            input_shape + suffix,
            output0_shape + suffix,
            output1_shape + suffix,
            input_dtype,
            output0_dtype,
            output1_dtype,
            output0_label_cnt,
            version_policy,
        )
        create_plan_modelfile(
            models_dir,
            0,
            model_version,
            input_shape + suffix,
            output0_shape + suffix,
            output1_shape + suffix,
            input_dtype,
            output0_dtype,
            output1_dtype,
        )

        if -1 in input_shape:
            # models for testing optimization profiles
            create_plan_modelconfig(
                models_dir,
                8,
                model_version,
                input_shape + suffix,
                output0_shape + suffix,
                output1_shape + suffix,
                input_dtype,
                output0_dtype,
                output1_dtype,
                output0_label_cnt,
                version_policy,
                min_dim=4,
                max_dim=32,
            )
            create_plan_modelfile(
                models_dir,
                8,
                model_version,
                input_shape + suffix,
                output0_shape + suffix,
                output1_shape + suffix,
                input_dtype,
                output0_dtype,
                output1_dtype,
                min_dim=4,
                max_dim=32,
            )

    if FLAGS.onnx:
        print(f"{_color_magenta}ONNX model generation requested{_color_reset}")
        # max-batch 8
        create_onnx_modelconfig(
            models_dir,
            8,
            model_version,
            input_shape,
            output0_shape,
            output1_shape,
            input_dtype,
            output0_dtype,
            output1_dtype,
            output0_label_cnt,
            version_policy,
        )
        create_onnx_modelfile(
            models_dir,
            8,
            model_version,
            input_shape,
            output0_shape,
            output1_shape,
            input_dtype,
            output0_dtype,
            output1_dtype,
        )
        # max-batch 0
        create_onnx_modelconfig(
            models_dir,
            0,
            model_version,
            input_shape,
            output0_shape,
            output1_shape,
            input_dtype,
            output0_dtype,
            output1_dtype,
            output0_label_cnt,
            version_policy,
        )
        create_onnx_modelfile(
            models_dir,
            0,
            model_version,
            input_shape,
            output0_shape,
            output1_shape,
            input_dtype,
            output0_dtype,
            output1_dtype,
        )

    if FLAGS.libtorch:
        print(f"{_color_magenta}PyTorch: PT model generation requested{_color_reset}")
        # max-batch 8
        create_libtorch_modelconfig(
            models_dir,
            8,
            model_version,
            input_shape,
            output0_shape,
            output1_shape,
            input_dtype,
            output0_dtype,
            output1_dtype,
            output0_label_cnt,
            version_policy,
        )
        create_libtorch_modelfile(
            models_dir,
            8,
            model_version,
            input_shape,
            output0_shape,
            output1_shape,
            input_dtype,
            output0_dtype,
            output1_dtype,
        )
        # max-batch 0
        create_libtorch_modelconfig(
            models_dir,
            0,
            model_version,
            input_shape,
            output0_shape,
            output1_shape,
            input_dtype,
            output0_dtype,
            output1_dtype,
            output0_label_cnt,
            version_policy,
        )
        create_libtorch_modelfile(
            models_dir,
            0,
            model_version,
            input_shape,
            output0_shape,
            output1_shape,
            input_dtype,
            output0_dtype,
            output1_dtype,
        )

    if FLAGS.torch_aoti:
        if output0_dtype == output1_dtype:
            print(
                f"{_color_magenta}PyTorch: AOTI model generation requested{_color_reset}"
            )
            # max-batch 8
            if create_torch_aoti_modelfile(
                models_dir,
                model_version,
                input_shape,
                input_dtype,
                output0_dtype,
            ):
                create_torch_aoti_modelconfig(
                    models_dir,
                    input_shape,
                    output0_shape,
                    input_dtype,
                    output0_dtype,
                    output0_label_cnt,
                    version_policy,
                )

    if FLAGS.openvino:
        print(f"{_color_magenta}OpenVINO model generation requested{_color_reset}")
        # max-batch 8
        create_openvino_modelconfig(
            models_dir,
            8,
            model_version,
            input_shape,
            output0_shape,
            output1_shape,
            input_dtype,
            output0_dtype,
            output1_dtype,
            output0_label_cnt,
            version_policy,
        )
        create_openvino_modelfile(
            models_dir,
            8,
            model_version,
            input_shape,
            output0_shape,
            output1_shape,
            input_dtype,
            output0_dtype,
            output1_dtype,
        )
        # max-batch 0
        create_openvino_modelconfig(
            models_dir,
            0,
            model_version,
            input_shape,
            output0_shape,
            output1_shape,
            input_dtype,
            output0_dtype,
            output1_dtype,
            output0_label_cnt,
            version_policy,
        )
        create_openvino_modelfile(
            models_dir,
            0,
            model_version,
            input_shape,
            output0_shape,
            output1_shape,
            input_dtype,
            output0_dtype,
            output1_dtype,
        )

    if FLAGS.ensemble:
        print(f"{_color_magenta}Ensemble model generation requested{_color_reset}")
        for pair in emu.platform_types_and_validation():
            if not pair[1](
                input_dtype,
                output0_dtype,
                output1_dtype,
                input_shape,
                output0_shape,
                output1_shape,
            ):
                continue

            config_input_shape = input_shape
            config_output0_shape = output0_shape
            config_output1_shape = output1_shape
            if pair[0] == "plan":
                if len(input_shape) == 1 and input_dtype == np.int8:
                    config_input_shape = (input_shape[0], 1, 1)
                if len(output0_shape) == 1 and output0_dtype == np.int8:
                    config_output0_shape = (output0_shape[0], 1, 1)
                if len(output1_shape) == 1 and output1_dtype == np.int8:
                    config_output1_shape = (output1_shape[0], 1, 1)

            # max-batch 0
            emu.create_ensemble_modelconfig(
                pair[0],
                models_dir,
                0,
                model_version,
                config_input_shape,
                config_output0_shape,
                config_output1_shape,
                input_dtype,
                output0_dtype,
                output1_dtype,
                output0_label_cnt,
                version_policy,
            )
            emu.create_ensemble_modelfile(
                pair[0],
                models_dir,
                0,
                model_version,
                config_input_shape,
                config_output0_shape,
                config_output1_shape,
                input_dtype,
                output0_dtype,
                output1_dtype,
            )

            # max-batch 8 (Skip for PyTorch models with String I/O)
            if (pair[0] == "libtorch") and not pair[1](
                input_dtype,
                output0_dtype,
                output1_dtype,
                input_shape,
                output0_shape,
                output1_shape,
                8,
            ):
                continue

            emu.create_ensemble_modelconfig(
                pair[0],
                models_dir,
                8,
                model_version,
                config_input_shape,
                config_output0_shape,
                config_output1_shape,
                input_dtype,
                output0_dtype,
                output1_dtype,
                output0_label_cnt,
                version_policy,
            )
            emu.create_ensemble_modelfile(
                pair[0],
                models_dir,
                8,
                model_version,
                config_input_shape,
                config_output0_shape,
                config_output1_shape,
                input_dtype,
                output0_dtype,
                output1_dtype,
            )


def create_fixed_models(
    models_dir, input_dtype, output0_dtype, output1_dtype, version_policy=None
):
    input_size = 16
    create_models(
        models_dir,
        input_dtype,
        output0_dtype,
        output1_dtype,
        (input_size,),
        (input_size,),
        (input_size,),
        input_size,
        version_policy,
    )


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--models_dir", type=str, required=True, help="Top-level model directory"
    )
    parser.add_argument(
        "--tensorrt",
        required=False,
        action="store_true",
        help="Generate TensorRT PLAN models",
    )
    parser.add_argument(
        "--onnx",
        required=False,
        action="store_true",
        help="Generate Onnx Runtime Onnx models",
    )
    parser.add_argument(
        "--onnx_opset",
        type=int,
        required=False,
        default=0,
        help="Opset used for Onnx models. Default is to use ONNXRT default",
    )
    parser.add_argument(
        "--libtorch",
        required=False,
        action="store_true",
        help="Generate Pytorch LibTorch models",
    )
    parser.add_argument(
        "--torch-aoti",
        required=False,
        action="store_true",
        help="Generate Pytorch LibTorch models using PT2",
    )
    parser.add_argument(
        "--torchvision-aoti",
        required=False,
        action="store_true",
        help="Generate Pytorch Torchvision models using PT2",
    )
    parser.add_argument(
        "--openvino",
        required=False,
        action="store_true",
        help="Generate Openvino models",
    )
    parser.add_argument(
        "--variable",
        required=False,
        action="store_true",
        help="Used variable-shape tensors for input/output",
    )
    parser.add_argument(
        "--ensemble",
        required=False,
        action="store_true",
        help="Generate ensemble models against the models"
        + " in all platforms. Note that the models generated"
        + " are not completed.",
    )
    FLAGS, unparsed = parser.parse_known_args()

    if FLAGS.tensorrt:
        import tensorrt as trt
    if FLAGS.onnx:
        import onnx
    if FLAGS.libtorch or FLAGS.torch_aoti:
        import torch
        from torch import nn
    if FLAGS.torchvision_aoti:
        import shutil

        import torch
        import torchvision.models as models

        RESNET50_LABEL_FILE = "resnet50_labels.txt"
    if FLAGS.openvino:
        import openvino.runtime as ov

    import test_util as tu

    # Tests with models that accept fixed-shape input/output tensors
    if not FLAGS.variable:
        create_fixed_models(
            FLAGS.models_dir, np.uint8, np.uint8, np.uint8, ("latest", 3)
        )
        create_fixed_models(FLAGS.models_dir, np.int8, np.int8, np.int8, ("latest", 1))
        create_fixed_models(
            FLAGS.models_dir, np.int16, np.int16, np.int16, ("latest", 2)
        )
        create_fixed_models(
            FLAGS.models_dir, np.int32, np.int32, np.int32, ("all", None)
        )
        create_fixed_models(FLAGS.models_dir, np.int64, np.int64, np.int64)
        create_fixed_models(
            FLAGS.models_dir,
            np.float16,
            np.float16,
            np.float16,
            (
                "specific",
                [
                    1,
                ],
            ),
        )
        create_fixed_models(
            FLAGS.models_dir, np.float32, np.float32, np.float32, ("specific", [1, 3])
        )
        create_fixed_models(FLAGS.models_dir, np.float16, np.float32, np.float32)
        create_fixed_models(FLAGS.models_dir, np.int32, np.int8, np.int8)
        create_fixed_models(FLAGS.models_dir, np.int8, np.int32, np.int32)
        create_fixed_models(FLAGS.models_dir, np.int32, np.int8, np.int16)
        create_fixed_models(FLAGS.models_dir, np.float32, np.uint8, np.uint8)
        create_fixed_models(FLAGS.models_dir, np.uint8, np.float32, np.float32)
        create_fixed_models(FLAGS.models_dir, np.float32, np.uint8, np.float16)
        create_fixed_models(FLAGS.models_dir, np.int32, np.float32, np.float32)
        create_fixed_models(FLAGS.models_dir, np.float32, np.int32, np.int32)
        create_fixed_models(FLAGS.models_dir, np.int32, np.float16, np.int16)

        create_fixed_models(FLAGS.models_dir, np_dtype_string, np.int32, np.int32)
        create_fixed_models(
            FLAGS.models_dir, np_dtype_string, np_dtype_string, np_dtype_string
        )
        create_fixed_models(
            FLAGS.models_dir, np_dtype_string, np.int32, np_dtype_string
        )
        create_fixed_models(
            FLAGS.models_dir, np_dtype_string, np_dtype_string, np.int32
        )
        create_fixed_models(
            FLAGS.models_dir, np.int32, np_dtype_string, np_dtype_string
        )
        create_fixed_models(FLAGS.models_dir, np.int32, np.int32, np_dtype_string)
        create_fixed_models(FLAGS.models_dir, np.int32, np_dtype_string, np.int32)

        # Make multiple versions of some models for version testing
        # (they use different version policies when created above)
        if FLAGS.tensorrt:
            if tu.check_gpus_compute_capability(min_capability=8.0):
                create_fixed_models(
                    FLAGS.models_dir,
                    np_dtype_bfloat16,
                    np_dtype_bfloat16,
                    np_dtype_bfloat16,
                )
            else:
                print(
                    "Skipping the generation of TensorRT PLAN models for the BF16 datatype!"
                )

            for vt in [np.float32, np.float16, np.int32, np.uint8]:
                create_plan_modelfile(
                    FLAGS.models_dir, 8, 2, (16,), (16,), (16,), vt, vt, vt, swap=True
                )
                create_plan_modelfile(
                    FLAGS.models_dir, 8, 3, (16,), (16,), (16,), vt, vt, vt, swap=True
                )
                create_plan_modelfile(
                    FLAGS.models_dir, 0, 2, (16,), (16,), (16,), vt, vt, vt, swap=True
                )
                create_plan_modelfile(
                    FLAGS.models_dir, 0, 3, (16,), (16,), (16,), vt, vt, vt, swap=True
                )

            vt = np.int8
            # handle INT8 separately as it doesn't allow 1d tensors
            create_plan_modelfile(
                FLAGS.models_dir,
                8,
                2,
                (16, 1, 1),
                (16, 1, 1),
                (16, 1, 1),
                vt,
                vt,
                vt,
                swap=True,
            )
            create_plan_modelfile(
                FLAGS.models_dir,
                8,
                3,
                (16, 1, 1),
                (16, 1, 1),
                (16, 1, 1),
                vt,
                vt,
                vt,
                swap=True,
            )
            create_plan_modelfile(
                FLAGS.models_dir,
                0,
                2,
                (16, 1, 1),
                (16, 1, 1),
                (16, 1, 1),
                vt,
                vt,
                vt,
                swap=True,
            )
            create_plan_modelfile(
                FLAGS.models_dir,
                0,
                3,
                (16, 1, 1),
                (16, 1, 1),
                (16, 1, 1),
                vt,
                vt,
                vt,
                swap=True,
            )

        if FLAGS.onnx:
            for vt in [np.float16, np.float32, np.int8, np.int16, np.int32]:
                create_onnx_modelfile(
                    FLAGS.models_dir, 8, 2, (16,), (16,), (16,), vt, vt, vt, swap=True
                )
                create_onnx_modelfile(
                    FLAGS.models_dir, 8, 3, (16,), (16,), (16,), vt, vt, vt, swap=True
                )
                create_onnx_modelfile(
                    FLAGS.models_dir, 0, 2, (16,), (16,), (16,), vt, vt, vt, swap=True
                )
                create_onnx_modelfile(
                    FLAGS.models_dir, 0, 3, (16,), (16,), (16,), vt, vt, vt, swap=True
                )

        if FLAGS.libtorch:
            for vt in [np.float32, np.int32, np.int16, np.int8]:
                create_libtorch_modelfile(
                    FLAGS.models_dir, 8, 2, (16,), (16,), (16,), vt, vt, vt, swap=True
                )
                create_libtorch_modelfile(
                    FLAGS.models_dir, 8, 3, (16,), (16,), (16,), vt, vt, vt, swap=True
                )
                create_libtorch_modelfile(
                    FLAGS.models_dir, 0, 2, (16,), (16,), (16,), vt, vt, vt, swap=True
                )
                create_libtorch_modelfile(
                    FLAGS.models_dir, 0, 3, (16,), (16,), (16,), vt, vt, vt, swap=True
                )

        if FLAGS.openvino:
            for vt in [np.float16, np.float32, np.int8, np.int16, np.int32]:
                create_openvino_modelfile(
                    FLAGS.models_dir, 8, 2, (16,), (16,), (16,), vt, vt, vt, swap=True
                )
                create_openvino_modelfile(
                    FLAGS.models_dir, 8, 3, (16,), (16,), (16,), vt, vt, vt, swap=True
                )
                create_openvino_modelfile(
                    FLAGS.models_dir, 0, 2, (16,), (16,), (16,), vt, vt, vt, swap=True
                )
                create_openvino_modelfile(
                    FLAGS.models_dir, 0, 3, (16,), (16,), (16,), vt, vt, vt, swap=True
                )

        if FLAGS.ensemble:
            for pair in emu.platform_types_and_validation():
                for vt in [np.float16, np.float32, np.int8, np.int16, np.int32]:
                    shape = (
                        (16, 1, 1) if (pair[0] == "plan" and vt == np.int8) else (16,)
                    )
                    if not pair[1](vt, vt, vt, shape, shape, shape):
                        continue
                    emu.create_ensemble_modelfile(
                        pair[0],
                        FLAGS.models_dir,
                        8,
                        2,
                        shape,
                        shape,
                        shape,
                        vt,
                        vt,
                        vt,
                        swap=True,
                    )
                    emu.create_ensemble_modelfile(
                        pair[0],
                        FLAGS.models_dir,
                        8,
                        3,
                        shape,
                        shape,
                        shape,
                        vt,
                        vt,
                        vt,
                        swap=True,
                    )
                    emu.create_ensemble_modelfile(
                        pair[0],
                        FLAGS.models_dir,
                        0,
                        2,
                        shape,
                        shape,
                        shape,
                        vt,
                        vt,
                        vt,
                        swap=True,
                    )
                    emu.create_ensemble_modelfile(
                        pair[0],
                        FLAGS.models_dir,
                        0,
                        3,
                        shape,
                        shape,
                        shape,
                        vt,
                        vt,
                        vt,
                        swap=True,
                    )

    # Tests with models that accept variable-shape input/output tensors
    if FLAGS.variable:
        create_models(
            FLAGS.models_dir,
            np.float32,
            np.float32,
            np.float32,
            (-1,),
            (-1,),
            (-1,),
            16,
        )
        create_models(
            FLAGS.models_dir,
            np.float32,
            np.int32,
            np.int32,
            (-1, -1),
            (-1, -1),
            (-1, -1),
            16,
        )
        create_models(
            FLAGS.models_dir,
            np.float32,
            np.int64,
            np.int64,
            (8, -1),
            (8, -1),
            (8, -1),
            32,
        )
        create_models(
            FLAGS.models_dir,
            np.float32,
            np.int32,
            np.int64,
            (-1, 8, -1),
            (-1, 8, -1),
            (-1, 8, -1),
            32,
        )
        create_models(
            FLAGS.models_dir, np.float32, np.float32, np.int32, (-1,), (-1,), (-1,), 16
        )
        create_models(
            FLAGS.models_dir,
            np.int32,
            np.int32,
            np.int32,
            (-1, -1),
            (-1, -1),
            (-1, -1),
            16,
        )
        create_models(
            FLAGS.models_dir,
            np.int32,
            np.int32,
            np.float32,
            (-1, 8, -1),
            (-1, 8, -1),
            (-1, 8, -1),
            32,
        )

        create_models(
            FLAGS.models_dir,
            np_dtype_string,
            np_dtype_string,
            np_dtype_string,
            (-1,),
            (-1,),
            (-1,),
            16,
        )
        create_models(
            FLAGS.models_dir,
            np_dtype_string,
            np.int32,
            np.int32,
            (-1, -1),
            (-1, -1),
            (-1, -1),
            16,
        )
        create_models(
            FLAGS.models_dir,
            np_dtype_string,
            np_dtype_string,
            np.int32,
            (8, -1),
            (8, -1),
            (8, -1),
            32,
        )
        create_models(
            FLAGS.models_dir,
            np_dtype_string,
            np.int32,
            np_dtype_string,
            (-1, 8, -1),
            (-1, 8, -1),
            (-1, 8, -1),
            32,
        )

        if FLAGS.tensorrt:
            if tu.check_gpus_compute_capability(min_capability=8.0):
                create_models(
                    FLAGS.models_dir,
                    np_dtype_bfloat16,
                    np_dtype_bfloat16,
                    np_dtype_bfloat16,
                    (-1, -1),
                    (-1, -1),
                    (-1, -1),
                    0,
                )
            else:
                print(
                    "Skipping the generation of TensorRT PLAN models for the BF16 datatype!"
                )

    if FLAGS.ensemble:
        # Create utility models used in ensemble
        # nop (only creates model config, should add model file before use)
        model_dtypes = ["TYPE_BOOL", "TYPE_STRING"]
        for s in [8, 16, 32, 64]:
            for t in ["INT", "UINT", "FP"]:
                if t == "FP" and s == 8:
                    continue
                model_dtypes.append("TYPE_{}{}".format(t, s))

        for model_dtype in model_dtypes:
            # Use variable size to handle all shape. Note: piping variable size output
            # to fixed size model is not safe but doable
            for model_shape in [(-1,), (-1, -1), (-1, -1, -1)]:
                emu.create_nop_modelconfig(FLAGS.models_dir, model_shape, model_dtype)

    if FLAGS.torchvision_aoti:
        print(f"{_color_blue}TorchVision AOTI model generation requested{_color_reset}")
        if create_torchvision_aoti_modelfile(FLAGS.models_dir, 1, 1):
            create_torchvision_aoti_modelconfig(FLAGS.models_dir, 1)


================================================
FILE: qa/common/gen_qa_ort_scalar_models.py
================================================
#!/usr/bin/env python3

# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")
import argparse
import os

import numpy as np
import onnx
import test_util as tu
from gen_common import np_to_model_dtype, np_to_onnx_dtype


def create_onnx_modelfile(models_dir, shape, dtype, model_version=1):
    onnx_io_dtype = np_to_onnx_dtype(dtype)

    # Create the model
    model_name = f"onnx_scalar_{len(shape)}dim"
    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    input = onnx.helper.make_tensor_value_info("INPUT", onnx_io_dtype, None)

    output = onnx.helper.make_tensor_value_info("OUTPUT", onnx_io_dtype, None)

    identity = onnx.helper.make_node("Identity", ["INPUT"], ["OUTPUT"])

    onnx_nodes = [identity]
    onnx_inputs = [input]
    onnx_outputs = [output]

    graph_proto = onnx.helper.make_graph(
        onnx_nodes, model_name, onnx_inputs, onnx_outputs
    )
    if FLAGS.onnx_opset > 0:
        model_opset = onnx.helper.make_operatorsetid("", FLAGS.onnx_opset)
        model_def = onnx.helper.make_model(
            graph_proto, producer_name="triton", opset_imports=[model_opset]
        )
    else:
        model_def = onnx.helper.make_model(graph_proto, producer_name="triton")

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    onnx.save(model_def, model_version_dir + "/model.onnx")


def create_onnx_modelconfig(models_dir, dtype, shape):
    # Create the model
    model_name = f"onnx_scalar_{len(shape)}dim"
    config_dir = models_dir + "/" + model_name

    config = """
input [
  {{
    name: "INPUT"
    data_type: {}
    dims: [ {} ]
  }}
]
output [
  {{
    name: "OUTPUT"
    data_type: {}
    dims: [ {} ]
  }}
]
""".format(
        np_to_model_dtype(dtype),
        tu.shape_to_dims_str(shape),
        np_to_model_dtype(dtype),
        tu.shape_to_dims_str(shape),
    )

    try:
        os.makedirs(config_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--models_dir", type=str, required=True, help="Top-level model directory"
    )
    parser.add_argument(
        "--onnx_opset",
        type=int,
        required=False,
        default=0,
        help="Opset used for Onnx models. Default is to use ONNXRT default",
    )

    FLAGS = parser.parse_args()

    if not FLAGS.models_dir:
        raise Exception("--models_dir is required")

    create_onnx_modelfile(FLAGS.models_dir, shape=[1], dtype=np.float32)
    create_onnx_modelconfig(FLAGS.models_dir, shape=[1], dtype=np.float32)
    create_onnx_modelfile(FLAGS.models_dir, shape=[1, 1], dtype=np.float32)
    create_onnx_modelconfig(FLAGS.models_dir, shape=[1, 1], dtype=np.float32)


================================================
FILE: qa/common/gen_qa_pytorch_model.py
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


import argparse
import os

import torch
from torch import nn


class AddSubNet(nn.Module):
    def __init__(self):
        super(AddSubNet, self).__init__()

    def forward(self, input0, input1):
        return (input0 + input1), (input0 - input1)


def generate_model(model_dir):
    model = AddSubNet()

    traced_model = torch.jit.trace(
        model,
        (torch.rand(1, 4, dtype=torch.float), torch.rand(1, 4, dtype=torch.float)),
    )

    os.makedirs(model_dir, exist_ok=True)
    model_path = os.path.join(model_dir, "model.pt")

    traced_model.save(model_path)


def generate_config(config_path):
    with open(f"{config_path}/config.pbtxt", "w") as f:
        f.write(
            """
backend: "pytorch"
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
input [
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
output [
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
"""
        )


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-m",
        "--model-directory",
        type=str,
        required=True,
        help="The path to the model repository.",
    )
    parser.add_argument(
        "--model-name",
        type=str,
        required=False,
        default="add_sub_pytorch",
        help="Model name",
    )
    parser.add_argument(
        "--version",
        type=str,
        required=False,
        default="1",
        help="Model version",
    )

    args = parser.parse_args()

    model_directory = os.path.join(args.model_directory, args.model_name)
    os.makedirs(model_directory, exist_ok=True)

    generate_model(model_dir=os.path.join(model_directory, args.version))
    generate_config(model_directory)


================================================
FILE: qa/common/gen_qa_ragged_models.py
================================================
#!/usr/bin/env python3

# Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import os

import numpy as np
from gen_common import np_to_model_dtype, np_to_onnx_dtype, np_to_trt_dtype

np_dtype_string = np.dtype(object)


def create_plan_modelfile(models_dir, model_version, dtype):
    # Create special identity model for batch input testing.
    # Because the ragged input and batch input are one dimensional vector
    # when passing to the model, the model must generate output with batch
    # dimension so that Triton can scatter it to different responses along
    # the batch dimension.
    # 'BATCH_AND_SIZE_INPUT' is also used as a hint to generate output with
    # batch dimension, 'BATCH_AND_SIZE_INPUT' must have shape [batch_size].
    # Each output corresponds to the input with the same name, so if there
    # are two requests, one has "RAGGED_INPUT" [2, 4] and the other has [1],
    # since the input is ragged, the model sees the input as [2, 4, 1], and
    # "BATCH_AND_SIZE_INPUT" will have shape [2]. Then the model output will
    # be [[2, 4, 1], [2, 4, 1]] and Triton will send responses that each has
    # value [[2, 4, 1]].
    # For "BATCH_INPUT", the input tensor must only have one variable dimension
    # to be broadcasted along the batch dimension properly, thus the currently
    # allowed batch input types are:
    # - BATCH_ACCUMULATED_ELEMENT_COUNT
    # - BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO
    # - BATCH_MAX_ELEMENT_COUNT_AS_SHAPE
    # - BATCH_ITEM_SHAPE_FLATTEN

    TRT_LOGGER = (
        trt.Logger(trt.Logger.INFO)
        if os.environ.get("TRT_VERBOSE") != "1"
        else trt.Logger(trt.Logger.VERBOSE)
    )
    builder = trt.Builder(TRT_LOGGER)
    network = builder.create_network()
    trt_dtype = np_to_trt_dtype(dtype)

    in_node = network.add_input("RAGGED_INPUT", trt_dtype, [-1])
    bs_node = network.add_input("BATCH_AND_SIZE_INPUT", trt_dtype, [-1])
    batch_node = network.add_input("BATCH_INPUT", trt_dtype, [-1])

    reshape_dims = trt.Dims([-1, 1])
    in_mat = network.add_shuffle(in_node)
    in_mat.reshape_dims = reshape_dims
    bs_mat = network.add_shuffle(bs_node)
    bs_mat.reshape_dims = reshape_dims
    batch_mat = network.add_shuffle(batch_node)
    batch_mat.reshape_dims = reshape_dims

    batch_entry = network.add_elementwise(
        bs_mat.get_output(0), bs_mat.get_output(0), trt.ElementWiseOperation.DIV
    )
    out_node = network.add_matrix_multiply(
        batch_entry.get_output(0),
        trt.MatrixOperation.NONE,
        in_mat.get_output(0),
        trt.MatrixOperation.TRANSPOSE,
    )
    bs_out_node = network.add_matrix_multiply(
        batch_entry.get_output(0),
        trt.MatrixOperation.NONE,
        bs_mat.get_output(0),
        trt.MatrixOperation.TRANSPOSE,
    )
    batch_out_node = network.add_matrix_multiply(
        batch_entry.get_output(0),
        trt.MatrixOperation.NONE,
        batch_mat.get_output(0),
        trt.MatrixOperation.TRANSPOSE,
    )
    out_node.get_output(0).name = "RAGGED_OUTPUT"
    bs_out_node.get_output(0).name = "BATCH_AND_SIZE_OUTPUT"
    batch_out_node.get_output(0).name = "BATCH_OUTPUT"
    network.mark_output(out_node.get_output(0))
    network.mark_output(bs_out_node.get_output(0))
    network.mark_output(batch_out_node.get_output(0))

    # Hard coded optimization profile
    min_shape = [1]
    opt_shape = [8]
    max_shape = [32]

    profile = builder.create_optimization_profile()
    for input_name in ["RAGGED_INPUT", "BATCH_AND_SIZE_INPUT", "BATCH_INPUT"]:
        profile.set_shape("{}".format(input_name), min_shape, opt_shape, max_shape)
    config = builder.create_builder_config()
    config.add_optimization_profile(profile)
    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
    try:
        engine_bytes = builder.build_serialized_network(network, config)
    except AttributeError:
        engine = builder.build_engine(network, config)
        engine_bytes = engine.serialize()
        del engine

    model_name = "plan_batch_input"
    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(model_version_dir + "/model.plan", "wb") as f:
        f.write(engine_bytes)


def create_onnx_modelfile(models_dir, model_version, dtype):
    # Create special identity model for batch input testing.
    # Because the ragged input and batch input are one dimensional vector
    # when passing to the model, the model must generate output with batch
    # dimension so that Triton can scatter it to different responses along
    # the batch dimension.
    # 'BATCH_AND_SIZE_INPUT' is also used as a hint to generate output with
    # batch dimension, 'BATCH_AND_SIZE_INPUT' must have shape [batch_size].
    # Each output corresponds to the input with the same name, so if there
    # are two requests, one has "RAGGED_INPUT" [2, 4] and the other has [1],
    # since the input is ragged, the model sees the input as [2, 4, 1], and
    # "BATCH_AND_SIZE_INPUT" will have shape [2]. Then the model output will
    # be [[2, 4, 1], [2, 4, 1]] and Triton will send responses that each has
    # value [[2, 4, 1]].
    # For "BATCH_INPUT", the input tensor must only have one variable dimension
    # to be broadcasted along the batch dimension properly, thus the currently
    # allowed batch input types are:
    # - BATCH_ACCUMULATED_ELEMENT_COUNT
    # - BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO
    # - BATCH_MAX_ELEMENT_COUNT_AS_SHAPE
    # - BATCH_ITEM_SHAPE_FLATTEN

    onnx_dtype = np_to_onnx_dtype(dtype)

    # Create the model
    model_name = "onnx_batch_input"
    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    in0_shape, idx = tu.shape_to_onnx_shape([-1], 0)
    bs_shape, idx = tu.shape_to_onnx_shape([-1], 0)
    batch_shape, idx = tu.shape_to_onnx_shape([-1], 0)

    in0 = onnx.helper.make_tensor_value_info("RAGGED_INPUT", onnx_dtype, in0_shape)
    bs_in = onnx.helper.make_tensor_value_info(
        "BATCH_AND_SIZE_INPUT", onnx_dtype, bs_shape
    )
    batch_in = onnx.helper.make_tensor_value_info(
        "BATCH_INPUT", onnx_dtype, batch_shape
    )

    out_shape, idx = tu.shape_to_onnx_shape([-1, -1], idx)
    bs_out_shape, idx = tu.shape_to_onnx_shape([-1, -1], idx)
    batch_out_shape, idx = tu.shape_to_onnx_shape([-1, -1], idx)

    out = onnx.helper.make_tensor_value_info("RAGGED_OUTPUT", onnx_dtype, out_shape)
    bs_out = onnx.helper.make_tensor_value_info(
        "BATCH_AND_SIZE_OUTPUT", onnx_dtype, bs_out_shape
    )
    batch_out = onnx.helper.make_tensor_value_info(
        "BATCH_OUTPUT", onnx_dtype, batch_out_shape
    )

    const_node_shape = onnx.helper.make_node(
        "Constant",
        [],
        ["shape"],
        value=onnx.helper.make_tensor(
            "const_shape", onnx.TensorProto.INT64, [2], [1, -1]
        ),
    )

    const_node_expander_shape = onnx.helper.make_node(
        "Constant",
        [],
        ["expander_shape"],
        value=onnx.helper.make_tensor(
            "const_expander_shape", onnx.TensorProto.INT64, [2], [-1, 1]
        ),
    )

    in0_mat_node = onnx.helper.make_node(
        "Reshape", ["RAGGED_INPUT", "shape"], ["in_mat"]
    )
    bs_mat_node = onnx.helper.make_node(
        "Reshape", ["BATCH_AND_SIZE_INPUT", "shape"], ["bs_mat"]
    )
    batch_mat_node = onnx.helper.make_node(
        "Reshape", ["BATCH_INPUT", "shape"], ["batch_mat"]
    )

    internal_node_div = onnx.helper.make_node(
        "Div", ["BATCH_AND_SIZE_INPUT", "BATCH_AND_SIZE_INPUT"], ["output_expander_int"]
    )
    internal_node_reshape = onnx.helper.make_node(
        "Reshape", ["output_expander_int", "expander_shape"], ["output_expander"]
    )

    out_node = onnx.helper.make_node(
        "MatMul", ["output_expander", "in_mat"], ["RAGGED_OUTPUT"]
    )
    bs_out_node = onnx.helper.make_node(
        "MatMul", ["output_expander", "bs_mat"], ["BATCH_AND_SIZE_OUTPUT"]
    )
    batch_out_node = onnx.helper.make_node(
        "MatMul", ["output_expander", "batch_mat"], ["BATCH_OUTPUT"]
    )

    onnx_nodes = [
        const_node_shape,
        const_node_expander_shape,
        in0_mat_node,
        bs_mat_node,
        batch_mat_node,
        internal_node_div,
        internal_node_reshape,
        out_node,
        bs_out_node,
        batch_out_node,
    ]
    onnx_inputs = [in0, bs_in, batch_in]
    onnx_outputs = [out, bs_out, batch_out]

    graph_proto = onnx.helper.make_graph(
        onnx_nodes, model_name, onnx_inputs, onnx_outputs
    )
    if FLAGS.onnx_opset > 0:
        model_opset = onnx.helper.make_operatorsetid("", FLAGS.onnx_opset)
        model_def = onnx.helper.make_model(
            graph_proto, producer_name="triton", opset_imports=[model_opset]
        )
    else:
        model_def = onnx.helper.make_model(graph_proto, producer_name="triton")

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    onnx.save(model_def, model_version_dir + "/model.onnx")


def create_libtorch_modelfile(models_dir, model_version, dtype):
    # Create special identity model for batch input testing.
    # Because the ragged input and batch input are one dimensional vector
    # when passing to the model, the model must generate output with batch
    # dimension so that Triton can scatter it to different responses along
    # the batch dimension.
    # 'BATCH_AND_SIZE_INPUT' is also used as a hint to generate output with
    # batch dimension, 'BATCH_AND_SIZE_INPUT' must have shape [batch_size].
    # Each output corresponds to the input with the same name, so if there
    # are two requests, one has "RAGGED_INPUT" [2, 4] and the other has [1],
    # since the input is ragged, the model sees the input as [2, 4, 1], and
    # "BATCH_AND_SIZE_INPUT" will have shape [2]. Then the model output will
    # be [[2, 4, 1], [2, 4, 1]] and Triton will send responses that each has
    # value [[2, 4, 1]].
    # For "BATCH_INPUT", the input tensor must only have one variable dimension
    # to be broadcasted along the batch dimension properly, thus the currently
    # allowed batch input types are:
    # - BATCH_ACCUMULATED_ELEMENT_COUNT
    # - BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO
    # - BATCH_MAX_ELEMENT_COUNT_AS_SHAPE
    # - BATCH_ITEM_SHAPE_FLATTEN

    # Create the model
    model_name = "libtorch_batch_input"
    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    if dtype == np_dtype_string:
        raise Exception(
            "PyTorch ragged model generation for string models not yet implemented"
        )

    else:

        class IdentityNet(nn.Module):
            def __init__(self):
                super(IdentityNet, self).__init__()

            def forward(self, BATCH_INPUT, BATCH_AND_SIZE_INPUT, RAGGED_INPUT):
                batch_entry = BATCH_AND_SIZE_INPUT / BATCH_AND_SIZE_INPUT
                batch_entry = batch_entry.view(-1, 1)

                BATCH_INPUT = BATCH_INPUT.view(1, -1)
                BATCH_OUTPUT = torch.matmul(batch_entry, BATCH_INPUT)

                BATCH_AND_SIZE_INPUT = BATCH_AND_SIZE_INPUT.view(1, -1)
                BATCH_AND_SIZE_OUTPUT = torch.matmul(batch_entry, BATCH_AND_SIZE_INPUT)

                RAGGED_INPUT = RAGGED_INPUT.view(1, -1)
                RAGGED_OUTPUT = torch.matmul(batch_entry, RAGGED_INPUT)

                return RAGGED_OUTPUT, BATCH_AND_SIZE_OUTPUT, BATCH_OUTPUT

    identityModel = IdentityNet()
    traced = torch.jit.script(identityModel)

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    traced.save(model_version_dir + "/model.pt")


def create_modelconfig(models_dir, max_batch, model_version, dtype, backend, platform):
    version_policy_str = "{ latest { num_versions: 1 }}"

    backend_spec = """
backend: "{}"
""".format(
        backend
    )

    model_name = "{}_batch_input".format(platform)
    config_dir = models_dir + "/" + model_name
    config = """
name: "{}"
{}
max_batch_size: {}
version_policy: {}
input [
  {{
    name: "RAGGED_INPUT"
    data_type: {data_type}
    dims: [ -1 ]
    allow_ragged_batch: true
  }}
]
output [
  {{
    name: "RAGGED_OUTPUT"
    data_type: {data_type}
    dims: [ -1 ]
   }}
]
output [
  {{
    name: "BATCH_AND_SIZE_OUTPUT"
    data_type: {data_type}
    dims: [ -1 ]
   }}
]
output [
  {{
    name: "BATCH_OUTPUT"
    data_type: {data_type}
    dims: [ -1 ]
   }}
]
batch_input [
  {{
    kind: BATCH_ELEMENT_COUNT
    target_name: "BATCH_AND_SIZE_INPUT"
    data_type: {data_type}
    source_input: "RAGGED_INPUT"
  }},
  {{
    kind: BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO
    target_name: "BATCH_INPUT"
    data_type: {data_type}
    source_input: "RAGGED_INPUT"
  }}
]
dynamic_batching {{
  max_queue_delay_microseconds: 1000000
}}
""".format(
        model_name,
        backend_spec,
        max_batch,
        version_policy_str,
        data_type=np_to_model_dtype(dtype),
    )

    try:
        os.makedirs(config_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)


def create_plan_itemshape_modelfile(models_dir, model_version, dtype):
    # Create special identity model for batch input 'BATCH_ITEM_SHAPE' testing,
    # such model has one ragged input and one batch input, and one output to
    # return the batch input directly. Because 'BATCH_ITEM_SHAPE' should be
    # generated to have matching batch dimension, the output can be produced
    # via identity op and expect Triton will scatter the output properly.

    TRT_LOGGER = (
        trt.Logger(trt.Logger.INFO)
        if os.environ.get("TRT_VERBOSE") != "1"
        else trt.Logger(trt.Logger.VERBOSE)
    )
    builder = trt.Builder(TRT_LOGGER)
    network = builder.create_network()
    trt_dtype = np_to_trt_dtype(dtype)

    in_node = network.add_input("RAGGED_INPUT", trt_dtype, [-1])
    batch_node = network.add_input("BATCH_INPUT", trt_dtype, [-1, 2])

    batch_out_node = network.add_identity(batch_node)
    batch_out_node.get_output(0).name = "BATCH_OUTPUT"
    network.mark_output(batch_out_node.get_output(0))

    # Hard coded optimization profile
    min_shape = [1]
    opt_shape = [8]
    max_shape = [32]

    profile = builder.create_optimization_profile()
    profile.set_shape("RAGGED_INPUT", min_shape, opt_shape, max_shape)
    profile.set_shape("BATCH_INPUT", min_shape + [2], opt_shape + [2], max_shape + [2])
    config = builder.create_builder_config()
    config.add_optimization_profile(profile)
    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
    try:
        engine_bytes = builder.build_serialized_network(network, config)
    except AttributeError:
        engine = builder.build_engine(network, config)
        engine_bytes = engine.serialize()
        del engine

    model_name = "plan_batch_item"
    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(model_version_dir + "/model.plan", "wb") as f:
        f.write(engine_bytes)


def create_onnx_itemshape_modelfile(models_dir, model_version, dtype):
    # Create special identity model for batch input 'BATCH_ITEM_SHAPE' testing,
    # such model has one ragged input and one batch input, and one output to
    # return the batch input directly. Because 'BATCH_ITEM_SHAPE' should be
    # generated to have matching batch dimension, the output can be produced
    # via identity op and expect Triton will scatter the output properly.

    onnx_dtype = np_to_onnx_dtype(dtype)

    # Create the model
    model_name = "onnx_batch_item"
    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    in0_shape, idx = tu.shape_to_onnx_shape([-1], 0)
    batch_shape, idx = tu.shape_to_onnx_shape([-1, 2], 0)

    in0 = onnx.helper.make_tensor_value_info("RAGGED_INPUT", onnx_dtype, in0_shape)
    batch_in = onnx.helper.make_tensor_value_info(
        "BATCH_INPUT", onnx_dtype, batch_shape
    )

    batch_out_shape, idx = tu.shape_to_onnx_shape([-1, -1], idx)
    batch_out = onnx.helper.make_tensor_value_info(
        "BATCH_OUTPUT", onnx_dtype, batch_out_shape
    )

    onnx_nodes = [onnx.helper.make_node("Identity", ["BATCH_INPUT"], ["BATCH_OUTPUT"])]
    onnx_inputs = [in0, batch_in]
    onnx_outputs = [batch_out]

    graph_proto = onnx.helper.make_graph(
        onnx_nodes, model_name, onnx_inputs, onnx_outputs
    )
    if FLAGS.onnx_opset > 0:
        model_opset = onnx.helper.make_operatorsetid("", FLAGS.onnx_opset)
        model_def = onnx.helper.make_model(
            graph_proto, producer_name="triton", opset_imports=[model_opset]
        )
    else:
        model_def = onnx.helper.make_model(graph_proto, producer_name="triton")

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    onnx.save(model_def, model_version_dir + "/model.onnx")


def create_libtorch_itemshape_modelfile(models_dir, model_version, dtype):
    # Create special identity model for batch input 'BATCH_ITEM_SHAPE' testing,
    # such model has one ragged input and one batch input, and one output to
    # return the batch input directly. Because 'BATCH_ITEM_SHAPE' should be
    # generated to have matching batch dimension, the output can be produced
    # via identity op and expect Triton will scatter the output properly.

    # Create the model
    model_name = "libtorch_batch_item"
    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    if dtype == np_dtype_string:
        raise Exception(
            "PyTorch ragged model generation for string models not yet implemented"
        )

    else:

        class IdentityNet(nn.Module):
            def __init__(self):
                super(IdentityNet, self).__init__()

            def forward(self, RAGGED_INPUT, BATCH_INPUT):
                return BATCH_INPUT

    identityModel = IdentityNet()
    traced = torch.jit.script(identityModel)

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    traced.save(model_version_dir + "/model.pt")


def create_itemshape_modelconfig(
    models_dir, max_batch, model_version, dtype, backend, platform
):
    version_policy_str = "{ latest { num_versions: 1 }}"

    backend_spec = """
backend: "{}"
""".format(
        backend
    )

    model_name = "{}_batch_item".format(platform)
    config_dir = models_dir + "/" + model_name
    config = """
name: "{}"
{}
max_batch_size: {}
version_policy: {}
input [
  {{
    name: "RAGGED_INPUT"
    data_type: {data_type}
    dims: [ -1, -1 ]
    allow_ragged_batch: true
  }}
]
output [
  {{
    name: "BATCH_OUTPUT"
    data_type: {data_type}
    dims: [ 2 ]
   }}
]
batch_input [
  {{
    kind: BATCH_ITEM_SHAPE
    target_name: "BATCH_INPUT"
    data_type: {data_type}
    source_input: "RAGGED_INPUT"
  }}
]
dynamic_batching {{
  max_queue_delay_microseconds: 1000000
}}
""".format(
        model_name,
        backend_spec,
        max_batch,
        version_policy_str,
        data_type=np_to_model_dtype(dtype),
    )

    try:
        os.makedirs(config_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)


def create_batch_input_models(models_dir):
    model_version = 1
    if FLAGS.tensorrt:
        create_modelconfig(models_dir, 4, model_version, np.float32, "tensorrt", "plan")
        create_plan_modelfile(models_dir, model_version, np.float32)
        create_itemshape_modelconfig(
            models_dir, 4, model_version, np.float32, "tensorrt", "plan"
        )
        create_plan_itemshape_modelfile(models_dir, model_version, np.float32)
    if FLAGS.onnx:
        create_modelconfig(
            models_dir, 4, model_version, np.float32, "onnxruntime", "onnx"
        )
        create_onnx_modelfile(models_dir, model_version, np.float32)
        create_itemshape_modelconfig(
            models_dir, 4, model_version, np.float32, "onnxruntime", "onnx"
        )
        create_onnx_itemshape_modelfile(models_dir, model_version, np.float32)
    if FLAGS.libtorch:
        create_modelconfig(
            models_dir, 4, model_version, np.float32, "pytorch", "libtorch"
        )
        create_libtorch_modelfile(models_dir, model_version, np.float32)
        create_itemshape_modelconfig(
            models_dir, 4, model_version, np.float32, "pytorch", "libtorch"
        )
        create_libtorch_itemshape_modelfile(models_dir, model_version, np.float32)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--models_dir", type=str, required=True, help="Top-level model directory"
    )
    parser.add_argument(
        "--tensorrt",
        required=False,
        action="store_true",
        help="Generate TensorRT PLAN models",
    )
    parser.add_argument(
        "--onnx",
        required=False,
        action="store_true",
        help="Generate Onnx Runtime Onnx models",
    )
    parser.add_argument(
        "--libtorch",
        required=False,
        action="store_true",
        help="Generate Libtorch models",
    )
    parser.add_argument(
        "--onnx_opset",
        type=int,
        required=False,
        default=0,
        help="Opset used for Onnx models. Default is to use ONNXRT default",
    )

    FLAGS, unparsed = parser.parse_known_args()

    import test_util as tu

    if FLAGS.tensorrt:
        import tensorrt as trt
    if FLAGS.onnx:
        import onnx
    if FLAGS.libtorch:
        import torch
        from torch import nn

    create_batch_input_models(FLAGS.models_dir)


================================================
FILE: qa/common/gen_qa_reshape_models.py
================================================
#!/usr/bin/env python3

# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import os
from builtins import range

import gen_ensemble_model_utils as emu
import numpy as np
from gen_common import (
    np_to_model_dtype,
    np_to_onnx_dtype,
    np_to_torch_dtype,
    np_to_trt_dtype,
    openvino_save_model,
)

FLAGS = None
np_dtype_string = np.dtype(object)
from typing import List


def create_plan_modelfile(
    models_dir, model_version, max_batch, dtype, input_shapes, output_shapes
):
    assert len(input_shapes) == len(output_shapes)
    if not tu.validate_for_trt_model(
        dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]
    ):
        return

    trt_dtype = np_to_trt_dtype(dtype)
    io_cnt = len(input_shapes)

    # Create the model that copies inputs to corresponding outputs.
    TRT_LOGGER = (
        trt.Logger(trt.Logger.INFO)
        if os.environ.get("TRT_VERBOSE") != "1"
        else trt.Logger(trt.Logger.VERBOSE)
    )
    builder = trt.Builder(TRT_LOGGER)
    network = builder.create_network()

    profile = builder.create_optimization_profile()
    for io_num in range(io_cnt):
        input_name = "INPUT{}".format(io_num)
        output_name = "OUTPUT{}".format(io_num)

        if max_batch == 0:
            input_with_batchsize = [i for i in input_shapes[io_num]]
        else:
            input_with_batchsize = [-1] + [i for i in input_shapes[io_num]]

        in0 = network.add_input(input_name, trt_dtype, input_with_batchsize)
        if input_shapes == output_shapes:
            out0 = network.add_identity(in0)
        else:
            out0 = network.add_shuffle(in0)
            out0.set_reshape_dimensions(output_shapes[io_num])

        out0.get_output(0).name = output_name
        network.mark_output(out0.get_output(0))

        min_shape = []
        opt_shape = []
        max_shape = []

        if max_batch != 0:
            min_shape = min_shape + [1]
            opt_shape = opt_shape + [max(1, max_batch)]
            max_shape = max_shape + [max(1, max_batch)]
        for i in input_shapes[io_num]:
            min_shape = min_shape + [i]
            opt_shape = opt_shape + [i]
            max_shape = max_shape + [i]
        profile.set_shape(input_name, min_shape, opt_shape, max_shape)

    config = builder.create_builder_config()
    config.add_optimization_profile(profile)
    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
    try:
        engine_bytes = builder.build_serialized_network(network, config)
    except AttributeError:
        engine = builder.build_engine(network, config)
        engine_bytes = engine.serialize()
        del engine
    del network

    model_name = tu.get_zero_model_name(
        "plan_nobatch" if max_batch == 0 else "plan", io_cnt, dtype
    )
    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(model_version_dir + "/model.plan", "wb") as f:
        f.write(engine_bytes)


def create_plan_modelconfig(
    models_dir,
    model_version,
    max_batch,
    dtype,
    input_shapes,
    input_model_shapes,
    output_shapes,
    output_model_shapes,
):
    assert len(input_shapes) == len(input_model_shapes)
    assert len(output_shapes) == len(output_model_shapes)
    assert len(input_shapes) == len(output_shapes)
    if not tu.validate_for_trt_model(
        dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]
    ):
        return

    io_cnt = len(input_shapes)

    model_name = tu.get_zero_model_name(
        "plan_nobatch" if max_batch == 0 else "plan", io_cnt, dtype
    )
    config_dir = models_dir + "/" + model_name
    config = """
name: "{}"
platform: "tensorrt_plan"
max_batch_size: {}
""".format(
        model_name, max_batch
    )

    for io_num in range(io_cnt):
        config += """
input [
  {{
    name: "INPUT{}"
    data_type: {}
    dims: [ {} ]
    {}
  }}
]
output [
  {{
    name: "OUTPUT{}"
    data_type: {}
    dims: [ {} ]
    {}
  }}
]
""".format(
            io_num,
            np_to_model_dtype(dtype),
            tu.shape_to_dims_str(input_shapes[io_num]),
            (
                "reshape: {{ shape: [ {} ] }}".format(
                    tu.shape_to_dims_str(input_model_shapes[io_num])
                )
                if input_shapes[io_num] != input_model_shapes[io_num]
                else ""
            ),
            io_num,
            np_to_model_dtype(dtype),
            tu.shape_to_dims_str(output_shapes[io_num]),
            (
                "reshape: {{ shape: [ {} ] }}".format(
                    tu.shape_to_dims_str(output_model_shapes[io_num])
                )
                if output_shapes[io_num] != output_model_shapes[io_num]
                else ""
            ),
        )

    try:
        os.makedirs(config_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)


def create_libtorch_modelfile(
    models_dir, model_version, max_batch, dtype, input_shapes, output_shapes
):
    assert len(input_shapes) == len(output_shapes)
    if not tu.validate_for_libtorch_model(
        dtype,
        dtype,
        dtype,
        input_shapes[0],
        input_shapes[0],
        input_shapes[0],
        max_batch,
        reshape=True,
    ):
        return

    torch_dtype = np_to_torch_dtype(dtype)
    io_cnt = len(input_shapes)
    model_name = tu.get_zero_model_name(
        "libtorch_nobatch" if max_batch == 0 else "libtorch", io_cnt, dtype
    )

    # Create the model that reshapes inputs to corresponding outputs
    # Note that string I/O is supported only for 1-dimensional inputs/outputs.
    # Use identity model for string I/O models and add 'reshape' field with
    # empty shape so that batching is supported and the full shape becomes [-1].
    if io_cnt == 1:
        if dtype == np_dtype_string:

            class IdentityNet(nn.Module):
                def __init__(self):
                    super(IdentityNet, self).__init__()

                def forward(self, input0: List[str]) -> List[str]:
                    return input0

        else:

            class ReshapeNet(nn.Module):
                def __init__(self, *args):
                    super(ReshapeNet, self).__init__()
                    self.shape = args[0][0]
                    self.max_batch = args[0][1]

                def forward(self, input0):
                    if self.max_batch == 0:
                        return input0.view(self.shape[0])
                    else:
                        return input0.view(
                            [
                                -1,
                            ]
                            + self.shape[0]
                        )

    elif io_cnt == 2:
        if dtype == np_dtype_string:

            class IdentityNet(nn.Module):
                def __init__(self):
                    super(IdentityNet, self).__init__()

                def forward(
                    self, input0: List[str], input1: List[str]
                ) -> Tuple[List[str], List[str]]:
                    return input0, input1

        else:

            class ReshapeNet(nn.Module):
                def __init__(self, *args):
                    super(ReshapeNet, self).__init__()
                    self.shape = args[0][0]
                    self.max_batch = args[0][1]

                def forward(self, input0, input1):
                    if self.max_batch == 0:
                        return input0.view(self.shape[0]), input1.view(self.shape[1])
                    else:
                        return input0.view(
                            [
                                -1,
                            ]
                            + self.shape[0]
                        ), input1.view(
                            [
                                -1,
                            ]
                            + self.shape[1]
                        )

    elif io_cnt == 3:
        if dtype == np_dtype_string:

            class IdentityNet(nn.Module):
                def __init__(self):
                    super(IdentityNet, self).__init__()

                def forward(
                    self, input0: List[str], input1: List[str], input2: List[str]
                ) -> Tuple[List[str], List[str], List[str]]:
                    return input0, input1, input2

        else:

            class ReshapeNet(nn.Module):
                def __init__(self, *args):
                    super(ReshapeNet, self).__init__()
                    self.shape = args[0][0]
                    self.max_batch = args[0][1]

                def forward(self, input0, input1, input2):
                    if self.max_batch == 0:
                        return (
                            input0.view(self.shape[0]),
                            input1.view(self.shape[1]),
                            input2.view(self.shape[2]),
                        )
                    else:
                        return (
                            input0.view(
                                [
                                    -1,
                                ]
                                + self.shape[0]
                            ),
                            input1.view(
                                [
                                    -1,
                                ]
                                + self.shape[1]
                            ),
                            input2.view(
                                [
                                    -1,
                                ]
                                + self.shape[2]
                            ),
                        )

    elif io_cnt == 4:
        if dtype == np_dtype_string:

            class IdentityNet(nn.Module):
                def __init__(self):
                    super(IdentityNet, self).__init__()

                def forward(
                    self,
                    input0: List[str],
                    input1: List[str],
                    input2: List[str],
                    input3: List[str],
                ) -> Tuple[List[str], List[str], List[str], List[str]]:
                    return input0, input1, input2, input3

        else:

            class ReshapeNet(nn.Module):
                def __init__(self, *args):
                    super(ReshapeNet, self).__init__()
                    self.shape = args[0][0]
                    self.max_batch = args[0][1]

                def forward(self, input0, input1, input2, input3):
                    if self.max_batch == 0:
                        return (
                            input0.view(self.shape[0]),
                            input1.view(self.shape[1]),
                            input2.view(self.shape[2]),
                            input3.view(self.shape[3]),
                        )
                    else:
                        return (
                            input0.view(
                                [
                                    -1,
                                ]
                                + self.shape[0]
                            ),
                            input1.view(
                                [
                                    -1,
                                ]
                                + self.shape[1]
                            ),
                            input2.view(
                                [
                                    -1,
                                ]
                                + self.shape[2]
                            ),
                            input3.view(
                                [
                                    -1,
                                ]
                                + self.shape[3]
                            ),
                        )

    if dtype == np_dtype_string:
        identityModel = IdentityNet()
        traced = torch.jit.script(identityModel)
    else:
        reshapeModel = ReshapeNet([[op_shape for op_shape in output_shapes], max_batch])
        traced = torch.jit.script(reshapeModel)

    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    traced.save(model_version_dir + "/model.pt")


def create_libtorch_modelconfig(
    models_dir,
    model_version,
    max_batch,
    dtype,
    input_shapes,
    input_model_shapes,
    output_shapes,
    output_model_shapes,
):
    assert len(input_shapes) == len(input_model_shapes)
    assert len(output_shapes) == len(output_model_shapes)
    assert len(input_shapes) == len(output_shapes)
    if not tu.validate_for_libtorch_model(
        dtype,
        dtype,
        dtype,
        input_shapes[0],
        input_shapes[0],
        input_shapes[0],
        max_batch,
        reshape=True,
    ):
        return

    io_cnt = len(input_shapes)

    model_name = tu.get_zero_model_name(
        "libtorch_nobatch" if max_batch == 0 else "libtorch", io_cnt, dtype
    )
    config_dir = models_dir + "/" + model_name
    config = """
name: "{}"
platform: "pytorch_libtorch"
max_batch_size: {}
""".format(
        model_name, max_batch
    )

    for io_num in range(io_cnt):
        config += """
input [
  {{
    name: "INPUT__{}"
    data_type: {}
    dims: [ {} ]
    {}
  }}
]
output [
  {{
    name: "OUTPUT__{}"
    data_type: {}
    dims: [ {} ]
    {}
  }}
]
""".format(
            io_num,
            np_to_model_dtype(dtype),
            tu.shape_to_dims_str(input_shapes[io_num]),
            (
                "reshape: {{ shape: [ {} ] }}".format(
                    tu.shape_to_dims_str(input_model_shapes[io_num])
                )
                if input_shapes[io_num] != input_model_shapes[io_num]
                else ""
            ),
            io_num,
            np_to_model_dtype(dtype),
            tu.shape_to_dims_str(output_shapes[io_num]),
            (
                "reshape: {{ shape: [ {} ] }}".format(
                    tu.shape_to_dims_str(output_model_shapes[io_num])
                )
                if output_shapes[io_num] != output_model_shapes[io_num]
                else ""
            ),
        )

    try:
        os.makedirs(config_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)


def create_ensemble_modelfile(
    models_dir, model_version, max_batch, dtype, input_shapes, output_shapes
):
    assert len(input_shapes) == len(output_shapes)
    if not tu.validate_for_ensemble_model(
        "reshape",
        dtype,
        dtype,
        dtype,
        input_shapes[0],
        input_shapes[0],
        input_shapes[0],
    ):
        return

    emu.create_identity_ensemble_modelfile(
        "reshape",
        models_dir,
        model_version,
        max_batch,
        dtype,
        input_shapes,
        output_shapes,
    )


def create_ensemble_modelconfig(
    models_dir,
    model_version,
    max_batch,
    dtype,
    input_shapes,
    input_model_shapes,
    output_shapes,
    output_model_shapes,
):
    assert len(input_shapes) == len(input_model_shapes)
    assert len(output_shapes) == len(output_model_shapes)
    assert len(input_shapes) == len(output_shapes)
    if not tu.validate_for_ensemble_model(
        "reshape",
        dtype,
        dtype,
        dtype,
        input_shapes[0],
        input_shapes[0],
        input_shapes[0],
    ):
        return

    # No reason to reshape ensemble inputs / outputs to empty as the inner models
    # have to have non-empty shapes for inputs / outputs.
    input_model_shapes_list = []
    output_model_shapes_list = []
    for idx in range(len(input_shapes)):
        if len(input_model_shapes[idx]) == 0:
            input_model_shapes_list.append(input_shapes[idx])
        else:
            input_model_shapes_list.append(input_model_shapes[idx])
        if len(output_model_shapes[idx]) == 0:
            output_model_shapes_list.append(output_shapes[idx])
        else:
            output_model_shapes_list.append(output_model_shapes[idx])

    emu.create_identity_ensemble_modelconfig(
        "reshape",
        models_dir,
        model_version,
        max_batch,
        dtype,
        input_shapes,
        tuple(input_model_shapes_list),
        output_shapes,
        tuple(output_model_shapes_list),
    )


def create_onnx_modelfile(
    models_dir, model_version, max_batch, dtype, input_shapes, output_shapes
):
    assert len(input_shapes) == len(output_shapes)
    if not tu.validate_for_onnx_model(
        dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]
    ):
        return

    onnx_dtype = np_to_onnx_dtype(dtype)
    io_cnt = len(input_shapes)

    # Create the model
    model_name = tu.get_zero_model_name(
        "onnx_nobatch" if max_batch == 0 else "onnx", io_cnt, dtype
    )
    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    batch_dim = [] if max_batch == 0 else [None]

    onnx_nodes = []
    onnx_inputs = []
    onnx_outputs = []
    idx = 0
    for io_num in range(io_cnt):
        # Repeat so that the variable dimension name is different
        in_shape, idx = tu.shape_to_onnx_shape(input_shapes[io_num], idx)
        out_shape, idx = tu.shape_to_onnx_shape(output_shapes[io_num], idx)
        in_name = "INPUT{}".format(io_num)
        out_name = "OUTPUT{}".format(io_num)
        out_shape_name = out_name + "_shape"

        onnx_inputs.append(
            onnx.helper.make_tensor_value_info(
                in_name, onnx_dtype, batch_dim + in_shape
            )
        )
        onnx_outputs.append(
            onnx.helper.make_tensor_value_info(
                out_name, onnx_dtype, batch_dim + out_shape
            )
        )

        if input_shapes == output_shapes:
            onnx_nodes.append(onnx.helper.make_node("Identity", [in_name], [out_name]))
        else:
            onnx_nodes.append(
                onnx.helper.make_node("Shape", [out_name], [out_shape_name])
            )
            onnx_nodes.append(
                onnx.helper.make_node("Reshape", [in_name, out_shape_name], [out_name])
            )

    graph_proto = onnx.helper.make_graph(
        onnx_nodes, model_name, onnx_inputs, onnx_outputs
    )
    if FLAGS.onnx_opset > 0:
        model_opset = onnx.helper.make_operatorsetid("", FLAGS.onnx_opset)
        model_def = onnx.helper.make_model(
            graph_proto, producer_name="triton", opset_imports=[model_opset]
        )
    else:
        model_def = onnx.helper.make_model(graph_proto, producer_name="triton")

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    onnx.save(model_def, model_version_dir + "/model.onnx")


def create_onnx_modelconfig(
    models_dir,
    model_version,
    max_batch,
    dtype,
    input_shapes,
    input_model_shapes,
    output_shapes,
    output_model_shapes,
):
    assert len(input_shapes) == len(input_model_shapes)
    assert len(output_shapes) == len(output_model_shapes)
    assert len(input_shapes) == len(output_shapes)
    if not tu.validate_for_onnx_model(
        dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]
    ):
        return

    io_cnt = len(input_shapes)

    # Use a different model name for the non-batching variant
    model_name = tu.get_zero_model_name(
        "onnx_nobatch" if max_batch == 0 else "onnx", io_cnt, dtype
    )
    config_dir = models_dir + "/" + model_name

    config = emu.create_general_modelconfig(
        model_name,
        "onnxruntime_onnx",
        max_batch,
        emu.repeat(dtype, io_cnt),
        input_shapes,
        input_model_shapes,
        emu.repeat(dtype, io_cnt),
        output_shapes,
        output_model_shapes,
        emu.repeat(None, io_cnt),
        force_tensor_number_suffix=True,
    )

    try:
        os.makedirs(config_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)


def create_openvino_modelfile(
    models_dir, model_version, max_batch, dtype, input_shapes, output_shapes
):
    assert len(input_shapes) == len(output_shapes)
    batch_dim = (
        []
        if max_batch == 0
        else [
            max_batch,
        ]
    )
    if not tu.validate_for_openvino_model(
        dtype,
        dtype,
        dtype,
        batch_dim + input_shapes[0],
        batch_dim + input_shapes[0],
        batch_dim + input_shapes[0],
    ):
        return

    io_cnt = len(input_shapes)

    # Create the model
    model_name = tu.get_zero_model_name(
        "openvino_nobatch" if max_batch == 0 else "openvino", io_cnt, dtype
    )
    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    openvino_inputs = []
    openvino_outputs = []
    for io_num in range(io_cnt):
        in_name = "INPUT{}".format(io_num)
        out_name = "OUTPUT{}".format(io_num)
        openvino_inputs.append(
            ov.opset1.parameter(
                shape=batch_dim + input_shapes[io_num], dtype=dtype, name=in_name
            )
        )

        openvino_outputs.append(
            ov.opset1.reshape(
                openvino_inputs[io_num],
                batch_dim + output_shapes[io_num],
                name=out_name,
                special_zero=False,
            )
        )

    model = ov.Model(openvino_outputs, openvino_inputs, model_name)
    openvino_save_model(model_version_dir, model)


def create_openvino_modelconfig(
    models_dir,
    model_version,
    max_batch,
    dtype,
    input_shapes,
    input_model_shapes,
    output_shapes,
    output_model_shapes,
):
    assert len(input_shapes) == len(input_model_shapes)
    assert len(output_shapes) == len(output_model_shapes)
    assert len(input_shapes) == len(output_shapes)
    batch_dim = (
        []
        if max_batch == 0
        else [
            max_batch,
        ]
    )
    if not tu.validate_for_openvino_model(
        dtype,
        dtype,
        dtype,
        batch_dim + input_shapes[0],
        batch_dim + input_shapes[0],
        batch_dim + input_shapes[0],
    ):
        return

    io_cnt = len(input_shapes)

    # Use a different model name for the non-batching variant
    model_name = tu.get_zero_model_name(
        "openvino_nobatch" if max_batch == 0 else "openvino", io_cnt, dtype
    )
    config_dir = models_dir + "/" + model_name

    config = """
name: "{}"
backend: "openvino"
max_batch_size: {}
""".format(
        model_name, max_batch
    )

    for io_num in range(io_cnt):
        config += """
input [
  {{
    name: "INPUT{}"
    data_type: {}
    dims: [ {} ]
    {}
  }}
]
output [
  {{
    name: "OUTPUT{}"
    data_type: {}
    dims: [ {} ]
    {}
  }}
]
""".format(
            io_num,
            np_to_model_dtype(dtype),
            tu.shape_to_dims_str(input_shapes[io_num]),
            (
                "reshape: {{ shape: [ {} ] }}".format(
                    tu.shape_to_dims_str(input_model_shapes[io_num])
                )
                if input_shapes[io_num] != input_model_shapes[io_num]
                else ""
            ),
            io_num,
            np_to_model_dtype(dtype),
            tu.shape_to_dims_str(output_shapes[io_num]),
            (
                "reshape: {{ shape: [ {} ] }}".format(
                    tu.shape_to_dims_str(output_model_shapes[io_num])
                )
                if output_shapes[io_num] != output_model_shapes[io_num]
                else ""
            ),
        )

    try:
        os.makedirs(config_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)


def create_models(
    models_dir,
    dtype,
    input_shapes,
    input_model_shapes,
    output_shapes=None,
    output_model_shapes=None,
    no_batch=True,
):
    model_version = 1
    if output_shapes is None:
        output_shapes = input_shapes
    if output_model_shapes is None:
        output_model_shapes = input_model_shapes

    if FLAGS.onnx:
        create_onnx_modelconfig(
            models_dir,
            model_version,
            8,
            dtype,
            input_shapes,
            input_model_shapes,
            output_shapes,
            output_model_shapes,
        )
        create_onnx_modelfile(
            models_dir, model_version, 8, dtype, input_model_shapes, output_model_shapes
        )
        if no_batch:
            create_onnx_modelconfig(
                models_dir,
                model_version,
                0,
                dtype,
                input_shapes,
                input_model_shapes,
                output_shapes,
                output_model_shapes,
            )
            create_onnx_modelfile(
                models_dir,
                model_version,
                0,
                dtype,
                input_model_shapes,
                output_model_shapes,
            )

    # Shouldn't create ensembles that reshape to zero-sized tensors. Reshaping
    # from / to zero dimension is not allow as ensemble inputs / outputs
    # are passed from / to other model AS IF direct inference from client.
    # But create it anyway, expecting that the ensemble models can be served but
    # they will always return error message.
    if FLAGS.ensemble:
        # Create fixed size nop for ensemble models
        for shape in input_model_shapes:
            emu.create_nop_modelconfig(models_dir, shape, np.float32)
            emu.create_nop_tunnel_modelconfig(models_dir, shape, np.float32)
            emu.create_nop_modelconfig(models_dir, [-1], np.float32)
        create_ensemble_modelconfig(
            models_dir,
            model_version,
            8,
            dtype,
            input_shapes,
            input_model_shapes,
            output_shapes,
            output_model_shapes,
        )
        create_ensemble_modelfile(
            models_dir, model_version, 8, dtype, input_model_shapes, output_model_shapes
        )
        if no_batch:
            create_ensemble_modelconfig(
                models_dir,
                model_version,
                0,
                dtype,
                input_shapes,
                input_model_shapes,
                output_shapes,
                output_model_shapes,
            )
            create_ensemble_modelfile(
                models_dir,
                model_version,
                0,
                dtype,
                input_model_shapes,
                output_model_shapes,
            )


def create_trt_models(
    models_dir,
    dtype,
    input_shapes,
    input_model_shapes,
    output_shapes=None,
    output_model_shapes=None,
    no_batch=True,
):
    model_version = 1
    if output_shapes is None:
        output_shapes = input_shapes
    if output_model_shapes is None:
        output_model_shapes = input_model_shapes

    if FLAGS.tensorrt:
        create_plan_modelconfig(
            models_dir,
            model_version,
            8,
            dtype,
            input_shapes,
            input_model_shapes,
            output_shapes,
            output_model_shapes,
        )
        create_plan_modelfile(
            models_dir, model_version, 8, dtype, input_model_shapes, output_model_shapes
        )
        if no_batch:
            create_plan_modelconfig(
                models_dir,
                model_version,
                0,
                dtype,
                input_shapes,
                input_model_shapes,
                output_shapes,
                output_model_shapes,
            )
            create_plan_modelfile(
                models_dir,
                model_version,
                0,
                dtype,
                input_model_shapes,
                output_model_shapes,
            )


def create_libtorch_models(
    models_dir,
    dtype,
    input_shapes,
    input_model_shapes,
    output_shapes=None,
    output_model_shapes=None,
    no_batch=True,
):
    model_version = 1
    if output_shapes is None:
        output_shapes = input_shapes
    if output_model_shapes is None:
        output_model_shapes = input_model_shapes

    if FLAGS.libtorch:
        create_libtorch_modelconfig(
            models_dir,
            model_version,
            8,
            dtype,
            input_shapes,
            input_model_shapes,
            output_shapes,
            output_model_shapes,
        )
        create_libtorch_modelfile(
            models_dir, model_version, 8, dtype, input_model_shapes, output_model_shapes
        )
        # skip for libtorch string I/O
        if no_batch and (dtype != np_dtype_string):
            create_libtorch_modelconfig(
                models_dir,
                model_version,
                0,
                dtype,
                input_shapes,
                input_model_shapes,
                output_shapes,
                output_model_shapes,
            )
            create_libtorch_modelfile(
                models_dir,
                model_version,
                0,
                dtype,
                input_model_shapes,
                output_model_shapes,
            )


def create_openvino_models(
    models_dir,
    dtype,
    input_shapes,
    input_model_shapes,
    output_shapes=None,
    output_model_shapes=None,
    no_batch=True,
):
    model_version = 1
    if output_shapes is None:
        output_shapes = input_shapes
    if output_model_shapes is None:
        output_model_shapes = input_model_shapes

    if FLAGS.openvino:
        create_openvino_modelconfig(
            models_dir,
            model_version,
            8,
            dtype,
            input_shapes,
            input_model_shapes,
            output_shapes,
            output_model_shapes,
        )
        create_openvino_modelfile(
            models_dir, model_version, 8, dtype, input_model_shapes, output_model_shapes
        )
        if no_batch:
            create_openvino_modelconfig(
                models_dir,
                model_version,
                0,
                dtype,
                input_shapes,
                input_model_shapes,
                output_shapes,
                output_model_shapes,
            )
            create_openvino_modelfile(
                models_dir,
                model_version,
                0,
                dtype,
                input_model_shapes,
                output_model_shapes,
            )


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--models_dir", type=str, required=True, help="Top-level model directory"
    )
    parser.add_argument(
        "--tensorrt",
        required=False,
        action="store_true",
        help="Generate TensorRT PLAN models",
    )
    parser.add_argument(
        "--onnx",
        required=False,
        action="store_true",
        help="Generate Onnx Runtime Onnx models",
    )
    parser.add_argument(
        "--onnx_opset",
        type=int,
        required=False,
        default=0,
        help="Opset used for Onnx models. Default is to use ONNXRT default",
    )
    parser.add_argument(
        "--libtorch",
        required=False,
        action="store_true",
        help="Generate Pytorch LibTorch models",
    )
    parser.add_argument(
        "--openvino",
        required=False,
        action="store_true",
        help="Generate OpenVino models",
    )
    parser.add_argument(
        "--ensemble",
        required=False,
        action="store_true",
        help="Generate ensemble models",
    )
    parser.add_argument(
        "--variable",
        required=False,
        action="store_true",
        help="Used variable-shape tensors for input/output",
    )
    FLAGS, unparsed = parser.parse_known_args()

    if FLAGS.tensorrt:
        import tensorrt as trt
    if FLAGS.onnx:
        import onnx
    if FLAGS.libtorch:
        import torch
        from torch import nn
    if FLAGS.openvino:
        import openvino.runtime as ov

    import test_util as tu

    # TensorRT, OpenVino and LibTorch must be handled separately since they
    # don't support zero-sized tensors.
    create_models(FLAGS.models_dir, np_dtype_string, ([1],), ([],), no_batch=False)
    create_models(FLAGS.models_dir, np.float32, ([1],), ([],), no_batch=False)
    create_models(
        FLAGS.models_dir, np.float32, ([1], [8]), ([], [4, 1, 2]), no_batch=False
    )
    create_models(
        FLAGS.models_dir,
        np.float32,
        ([4, 4], [2], [2, 2, 3]),
        ([16], [1, 2], [3, 2, 2]),
    )
    create_libtorch_models(
        FLAGS.models_dir, np.float32, ([1],), ([1, 1, 1],), no_batch=False
    )
    create_libtorch_models(
        FLAGS.models_dir, np.float32, ([1], [8]), ([1, 1, 1], [4, 1, 2]), no_batch=False
    )
    create_libtorch_models(
        FLAGS.models_dir,
        np.float32,
        ([4, 4], [2], [2, 2, 3]),
        ([16], [1, 2], [3, 2, 2]),
    )
    create_libtorch_models(
        FLAGS.models_dir, np_dtype_string, ([1],), ([],), no_batch=False
    )
    create_openvino_models(
        FLAGS.models_dir, np.float32, ([1],), ([1, 1, 1],), no_batch=False
    )
    create_openvino_models(
        FLAGS.models_dir, np.float32, ([1], [8]), ([1, 1, 1], [4, 1, 2]), no_batch=False
    )
    create_openvino_models(
        FLAGS.models_dir,
        np.float32,
        ([4, 4], [2], [2, 2, 3]),
        ([16], [1, 2], [3, 2, 2]),
    )
    create_trt_models(FLAGS.models_dir, np.float32, ([1], [8]), ([1, 1, 1], [4, 1, 2]))

    # Models that reshape only the input, not the output.
    create_models(
        FLAGS.models_dir,
        np.float32,
        ([4, 4], [2], [2, 2, 3], [1]),
        ([16], [1, 2], [3, 2, 2], [1]),
        output_shapes=([16], [1, 2], [3, 2, 2], [1]),
        output_model_shapes=([16], [1, 2], [3, 2, 2], [1]),
    )

    create_libtorch_models(
        FLAGS.models_dir,
        np.float32,
        ([4, 4], [2], [2, 2, 3], [1]),
        ([16], [1, 2], [3, 2, 2], [1]),
        output_shapes=([16], [1, 2], [3, 2, 2], [1]),
        output_model_shapes=([16], [1, 2], [3, 2, 2], [1]),
    )

    create_openvino_models(
        FLAGS.models_dir,
        np.float32,
        ([4, 4], [2], [2, 2, 3], [1]),
        ([16], [1, 2], [3, 2, 2], [1]),
        output_shapes=([16], [1, 2], [3, 2, 2], [1]),
        output_model_shapes=([16], [1, 2], [3, 2, 2], [1]),
    )

    create_trt_models(
        FLAGS.models_dir,
        np.float32,
        ([4, 4], [2], [2, 2, 3], [1]),
        ([2, 2, 4], [1, 2, 1], [3, 2, 2], [1, 1, 1]),
        output_shapes=([2, 2, 4], [1, 2, 1], [3, 2, 2], [1, 1, 1]),
        output_model_shapes=([2, 2, 4], [1, 2, 1], [3, 2, 2], [1, 1, 1]),
    )

    # Tests with models that accept variable-shape input/output tensors and reshape
    # TensorRT is ignored as it only allows fixed-shape tensors
    # PyTorch is ignored as "tensor.view()" is shape dependent (shape is fixed
    # based on input used for tracing), need to find equivalent operation that
    # is not shape dependent.
    if FLAGS.variable:
        create_models(FLAGS.models_dir, np.int32, ([2, 4, -1, 6],), ([8, -1, 1, 6],))
        create_models(
            FLAGS.models_dir,
            np.int32,
            ([1, -1, 1], [-1], [2, 2, 3]),
            ([-1], [1, -1, 1], [3, 2, 2]),
        )
        create_models(
            FLAGS.models_dir,
            np.int32,
            ([-1, 1], [2]),
            ([1, -1], [1, 2]),
            output_shapes=([1, -1], [1, 2]),
            output_model_shapes=([1, -1], [1, 2]),
        )

    # TRT plan that reshapes neither input nor output. Needed for
    # L0_perflab_nomodel.
    create_trt_models(FLAGS.models_dir, np.float32, ([1],), ([1],))


================================================
FILE: qa/common/gen_qa_sequence_models.py
================================================
#!/usr/bin/env python3

# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import os

import gen_ensemble_model_utils as emu
import numpy as np
from gen_common import (
    np_to_model_dtype,
    np_to_onnx_dtype,
    np_to_torch_dtype,
    np_to_trt_dtype,
    openvino_save_model,
)

FLAGS = None
np_dtype_string = np.dtype(object)


def create_plan_shape_tensor_modelfile(
    models_dir, model_version, max_batch, dtype, shape, shape_tensor_input_dtype
):
    # Note that resize layer does not support int tensors.
    # The model takes two inputs (INPUT and SHAPE_INPUT)
    # and two control inputs(START and READY).
    # In absence of proper accumulator,
    # OUTPUT : 0 if not-ready and 'INPUT'+'START' otherwise
    # RESIZED_OUTPUT : Obtained after resizing 'INPUT' to shape specified
    #          in 'SHAPE_INPUT'
    # SHAPE_OUTPUT : The shape values of resized output

    trt_dtype = np_to_trt_dtype(dtype)
    trt_shape_dtype = np_to_trt_dtype(shape_tensor_input_dtype)
    trt_memory_format = trt.TensorFormat.LINEAR

    TRT_LOGGER = (
        trt.Logger(trt.Logger.INFO)
        if os.environ.get("TRT_VERBOSE") != "1"
        else trt.Logger(trt.Logger.VERBOSE)
    )
    builder = trt.Builder(TRT_LOGGER)
    network = builder.create_network()

    unit_shape = [1] * len(shape)
    if max_batch != 0:
        shape_in0 = network.add_input("SHAPE_INPUT", trt_shape_dtype, [1 + len(shape)])
        in0 = network.add_input("INPUT", trt_dtype, [-1] + shape)
        start0 = network.add_input("START", trt_dtype, [-1] + unit_shape)
        ready0 = network.add_input("READY", trt_dtype, [-1] + unit_shape)
    else:
        shape_in0 = network.add_input("SHAPE_INPUT", trt_shape_dtype, [len(shape)])
        in0 = network.add_input("INPUT", trt_dtype, shape)
        start0 = network.add_input("START", trt_dtype, unit_shape)
        ready0 = network.add_input("READY", trt_dtype, unit_shape)

    add = network.add_elementwise(in0, start0, trt.ElementWiseOperation.SUM)
    out0 = network.add_elementwise(
        add.get_output(0), ready0, trt.ElementWiseOperation.PROD
    ).get_output(0)

    resize_layer = network.add_resize(input=in0)
    resize_layer.set_input(1, shape_in0)
    resized_out0 = resize_layer.get_output(0)
    shape_out0 = network.add_shape(resized_out0)

    shape_out0.get_output(0).name = "SHAPE_OUTPUT"
    shape_out0.get_output(0).dtype = trt.int64
    network.mark_output_for_shapes(shape_out0.get_output(0))

    out0.name = "OUTPUT"
    out0.dtype = trt_dtype
    network.mark_output(out0)

    resized_out0.name = "RESIZED_OUTPUT"
    resized_out0.dtype = trt_dtype
    network.mark_output(resized_out0)

    in0.allowed_formats = 1 << int(trt_memory_format)
    shape_in0.allowed_formats = 1 << int(trt_memory_format)
    start0.allowed_formats = 1 << int(trt_memory_format)
    ready0.allowed_formats = 1 << int(trt_memory_format)
    out0.allowed_formats = 1 << int(trt_memory_format)
    shape_out0.get_output(0).allowed_formats = 1 << int(trt_memory_format)
    resized_out0.allowed_formats = 1 << int(trt_memory_format)

    if trt_dtype == trt.int8:
        in0.dynamic_range = (-128.0, 127.0)
        out0.dynamic_range = (-128.0, 127.0)
        resized_out0.dynamic_range = (-128.0, 127.0)
        start0.dynamic_range = (-128.0, 127.0)
        ready0.dynamic_range = (-128.0, 127.0)

    flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
    flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
    flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)

    if trt_dtype == trt.int8:
        flags |= 1 << int(trt.BuilderFlag.INT8)
    elif trt_dtype == trt.float16:
        flags |= 1 << int(trt.BuilderFlag.FP16)

    min_prefix = []
    opt_prefix = []
    max_prefix = []

    if max_batch != 0:
        min_prefix = [1]
        opt_prefix = [max(1, max_batch)]
        max_prefix = [max(1, max_batch)]

    min_shape = min_prefix + [1] * len(shape)
    opt_shape = opt_prefix + [8] * len(shape)
    max_shape = max_prefix + [32] * len(shape)

    profile = builder.create_optimization_profile()
    profile.set_shape_input("SHAPE_INPUT", min_shape, opt_shape, max_shape)
    profile.set_shape("INPUT", min_shape, opt_shape, max_shape)
    profile.set_shape(
        "START",
        min_prefix + unit_shape,
        opt_prefix + unit_shape,
        opt_prefix + unit_shape,
    )
    profile.set_shape(
        "READY",
        min_prefix + unit_shape,
        opt_prefix + unit_shape,
        opt_prefix + unit_shape,
    )

    config = builder.create_builder_config()
    config.flags = flags
    config.add_optimization_profile(profile)

    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
    try:
        engine_bytes = builder.build_serialized_network(network, config)
    except AttributeError:
        engine = builder.build_engine(network, config)
        engine_bytes = engine.serialize()
        del engine

    model_name = tu.get_sequence_model_name(
        "plan_nobatch" if max_batch == 0 else "plan", dtype
    )
    model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name
    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(model_version_dir + "/model.plan", "wb") as f:
        f.write(engine_bytes)


def create_plan_modelfile(models_dir, model_version, max_batch, dtype, shape):
    trt_dtype = np_to_trt_dtype(dtype)
    # Create the model. For now don't implement a proper accumulator
    # just return 0 if not-ready and 'INPUT'+'START' otherwise...  the
    # tests know to expect this.
    TRT_LOGGER = (
        trt.Logger(trt.Logger.INFO)
        if os.environ.get("TRT_VERBOSE") != "1"
        else trt.Logger(trt.Logger.VERBOSE)
    )
    builder = trt.Builder(TRT_LOGGER)
    network = builder.create_network()

    unit_shape = [1] * len(shape)
    if max_batch != 0:
        in0 = network.add_input("INPUT", trt_dtype, [-1] + shape)
        start0 = network.add_input("START", trt_dtype, [-1] + unit_shape)
        ready0 = network.add_input("READY", trt_dtype, [-1] + unit_shape)
    else:
        in0 = network.add_input("INPUT", trt_dtype, shape)
        start0 = network.add_input("START", trt_dtype, unit_shape)
        ready0 = network.add_input("READY", trt_dtype, unit_shape)

    add = network.add_elementwise(in0, start0, trt.ElementWiseOperation.SUM)
    out0 = network.add_elementwise(
        add.get_output(0), ready0, trt.ElementWiseOperation.PROD
    )

    out0.get_output(0).name = "OUTPUT"
    network.mark_output(out0.get_output(0))

    min_shape = []
    opt_shape = []
    max_shape = []
    if max_batch != 0:
        min_shape = min_shape + [1]
        opt_shape = opt_shape + [max(1, max_batch)]
        max_shape = max_shape + [max(1, max_batch)]
    for i in shape:
        if i == -1:
            min_shape = min_shape + [1]
            opt_shape = opt_shape + [8]
            max_shape = max_shape + [32]
        else:
            min_shape = min_shape + [i]
            opt_shape = opt_shape + [i]
            max_shape = max_shape + [i]

    profile = builder.create_optimization_profile()
    profile.set_shape("INPUT", min_shape, opt_shape, max_shape)
    if max_batch != 0:
        profile.set_shape(
            "START",
            [1] + unit_shape,
            [max_batch] + unit_shape,
            [max_batch] + unit_shape,
        )
        profile.set_shape(
            "READY",
            [1] + unit_shape,
            [max_batch] + unit_shape,
            [max_batch] + unit_shape,
        )
    else:
        profile.set_shape("START", unit_shape, unit_shape, unit_shape)
        profile.set_shape("READY", unit_shape, unit_shape, unit_shape)
    config = builder.create_builder_config()
    config.add_optimization_profile(profile)

    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
    try:
        engine_bytes = builder.build_serialized_network(network, config)
    except AttributeError:
        engine = builder.build_engine(network, config)
        engine_bytes = engine.serialize()
        del engine

    model_name = tu.get_sequence_model_name(
        "plan_nobatch" if max_batch == 0 else "plan", dtype
    )
    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(model_version_dir + "/model.plan", "wb") as f:
        f.write(engine_bytes)


def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape):
    trt_dtype = np_to_trt_dtype(dtype)
    trt_memory_format = trt.TensorFormat.LINEAR

    # Create the model. For now don't implement a proper accumulator
    # just return 0 if not-ready and 'INPUT'+'START' otherwise...  the
    # tests know to expect this.
    TRT_LOGGER = (
        trt.Logger(trt.Logger.INFO)
        if os.environ.get("TRT_VERBOSE") != "1"
        else trt.Logger(trt.Logger.VERBOSE)
    )
    builder = trt.Builder(TRT_LOGGER)
    network = builder.create_network()

    unit_shape = [1] * len(shape)
    if max_batch != 0:
        in0 = network.add_input("INPUT", trt_dtype, [-1] + shape)
        start0 = network.add_input("START", trt_dtype, [-1] + unit_shape)
        ready0 = network.add_input("READY", trt_dtype, [-1] + unit_shape)
    else:
        in0 = network.add_input("INPUT", trt_dtype, shape)
        start0 = network.add_input("START", trt_dtype, unit_shape)
        ready0 = network.add_input("READY", trt_dtype, unit_shape)

    add = network.add_elementwise(in0, start0, trt.ElementWiseOperation.SUM)
    out0 = network.add_elementwise(
        add.get_output(0), ready0, trt.ElementWiseOperation.PROD
    )

    out0.get_output(0).name = "OUTPUT"
    network.mark_output(out0.get_output(0))

    out0.get_output(0).dtype = trt_dtype

    in0.allowed_formats = 1 << int(trt_memory_format)
    start0.allowed_formats = 1 << int(trt_memory_format)
    ready0.allowed_formats = 1 << int(trt_memory_format)
    out0.get_output(0).allowed_formats = 1 << int(trt_memory_format)

    if trt_dtype == trt.int8:
        in0.dynamic_range = (-128.0, 127.0)
        out0.dynamic_range = (-128.0, 127.0)
        start0.dynamic_range = (-128.0, 127.0)
        ready0.dynamic_range = (-128.0, 127.0)

    flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
    flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
    flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)

    if trt_dtype == trt.int8:
        flags |= 1 << int(trt.BuilderFlag.INT8)
    elif trt_dtype == trt.float16:
        flags |= 1 << int(trt.BuilderFlag.FP16)

    min_shape = []
    opt_shape = []
    max_shape = []
    if max_batch != 0:
        min_shape = min_shape + [1]
        opt_shape = opt_shape + [max(1, max_batch)]
        max_shape = max_shape + [max(1, max_batch)]
    for i in shape:
        if i == -1:
            min_shape = min_shape + [1]
            opt_shape = opt_shape + [8]
            max_shape = max_shape + [32]
        else:
            min_shape = min_shape + [i]
            opt_shape = opt_shape + [i]
            max_shape = max_shape + [i]

    profile = builder.create_optimization_profile()
    profile.set_shape("INPUT", min_shape, opt_shape, max_shape)
    if max_batch != 0:
        profile.set_shape(
            "START",
            [1] + unit_shape,
            [max_batch] + unit_shape,
            [max_batch] + unit_shape,
        )
        profile.set_shape(
            "READY",
            [1] + unit_shape,
            [max_batch] + unit_shape,
            [max_batch] + unit_shape,
        )
    else:
        profile.set_shape("START", unit_shape, unit_shape, unit_shape)
        profile.set_shape("READY", unit_shape, unit_shape, unit_shape)

    config = builder.create_builder_config()
    config.flags = flags
    config.add_optimization_profile(profile)

    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
    try:
        engine_bytes = builder.build_serialized_network(network, config)
    except AttributeError:
        engine = builder.build_engine(network, config)
        engine_bytes = engine.serialize()
        del engine

    model_name = tu.get_sequence_model_name(
        "plan_nobatch" if max_batch == 0 else "plan", dtype
    )
    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(model_version_dir + "/model.plan", "wb") as f:
        f.write(engine_bytes)


def create_plan_models(models_dir, model_version, max_batch, dtype, shape):
    if not tu.validate_for_trt_model(dtype, dtype, dtype, shape, shape, shape):
        return

    if dtype != np.float32:
        create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape)
    else:
        create_plan_modelfile(models_dir, model_version, max_batch, dtype, shape)


def create_plan_modelconfig(
    models_dir, model_version, max_batch, dtype, shape, shape_tensor_input_dtype=None
):
    if not tu.validate_for_trt_model(dtype, dtype, dtype, shape, shape, shape):
        return

    model_name = tu.get_sequence_model_name(
        "plan_nobatch" if max_batch == 0 else "plan", dtype
    )
    if shape_tensor_input_dtype:
        model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name

    config_dir = models_dir + "/" + model_name
    if FLAGS.tensorrt_shape_io:
        shape_tensor_dim = len(shape)
        config = """
name: "{}"
platform: "tensorrt_plan"
max_batch_size: {}
sequence_batching {{
  max_sequence_idle_microseconds: 5000000
  control_input [
    {{
      name: "START"
      control [
        {{
          kind: CONTROL_SEQUENCE_START
          {}_false_true: [ 0, 1 ]
        }}
      ]
    }},
    {{
      name: "READY"
      control [
        {{
          kind: CONTROL_SEQUENCE_READY
          {}_false_true: [ 0, 1 ]
        }}
      ]
    }}
  ]
}}
input [
  {{
    name: "INPUT"
    data_type: {}
    dims: [ {} ]
  }}
]
input [
  {{
    name: "SHAPE_INPUT"
    data_type: {}
    dims: [ {} ]
    is_shape_tensor: true
  }}
]
output [
  {{
    name: "OUTPUT"
    data_type: {}
    dims: [ {} ]
  }}
]
output [
  {{
    name: "RESIZED_OUTPUT"
    data_type: {}
    dims: [ {} ]
  }}
]
output [
  {{
    name: "SHAPE_OUTPUT"
    data_type: TYPE_INT64
    dims: [ {} ]
    is_shape_tensor: true
  }}
]
instance_group [
  {{
    kind: KIND_GPU
  }}
]
""".format(
            model_name,
            max_batch,
            "int32" if dtype == np.int32 else "fp32",
            "int32" if dtype == np.int32 else "fp32",
            np_to_model_dtype(dtype),
            tu.shape_to_dims_str(shape),
            np_to_model_dtype(shape_tensor_input_dtype),
            shape_tensor_dim,
            np_to_model_dtype(dtype),
            tu.shape_to_dims_str(shape),
            np_to_model_dtype(dtype),
            tu.shape_to_dims_str(shape),
            shape_tensor_dim,
        )

    else:
        config = """
name: "{}"
platform: "tensorrt_plan"
max_batch_size: {}
sequence_batching {{
  max_sequence_idle_microseconds: 5000000
  control_input [
    {{
      name: "START"
      control [
        {{
          kind: CONTROL_SEQUENCE_START
          {}_false_true: [ 0, 1 ]
        }}
      ]
    }},
    {{
      name: "READY"
      control [
        {{
          kind: CONTROL_SEQUENCE_READY
          {}_false_true: [ 0, 1 ]
        }}
      ]
    }}
  ]
}}
input [
  {{
    name: "INPUT"
    data_type: {}
    dims: [ {} ]
  }}
]
output [
  {{
    name: "OUTPUT"
    data_type: {}
    dims: [ {} ]
  }}
]
instance_group [
  {{
    kind: KIND_GPU
  }}
]
""".format(
            model_name,
            max_batch,
            "int32" if dtype == np.int32 else "fp32",
            "int32" if dtype == np.int32 else "fp32",
            np_to_model_dtype(dtype),
            tu.shape_to_dims_str(shape),
            np_to_model_dtype(dtype),
            tu.shape_to_dims_str(shape),
        )

    try:
        os.makedirs(config_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)


def create_onnx_modelfile(models_dir, model_version, max_batch, dtype, shape):
    if not tu.validate_for_onnx_model(dtype, dtype, dtype, shape, shape, shape):
        return

    model_name = tu.get_sequence_model_name(
        "onnx_nobatch" if max_batch == 0 else "onnx", dtype
    )
    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    # Create the model. For now don't implement a proper accumulator
    # just return 0 if not-ready and 'INPUT'+'START' otherwise...  the
    # tests know to expect this.
    onnx_dtype = np_to_onnx_dtype(dtype)
    onnx_control_dtype = onnx_dtype
    onnx_input_shape, idx = tu.shape_to_onnx_shape(shape, 0)
    onnx_output_shape, idx = tu.shape_to_onnx_shape(shape, idx)

    # If the input is a string then use int32 for operation and just
    # cast to/from string for input and output.
    if onnx_dtype == onnx.TensorProto.STRING:
        onnx_control_dtype = onnx.TensorProto.INT32

    # If input dtype is bool, then use bool type for control and
    # int32 type for input/output
    if onnx_dtype == onnx.TensorProto.BOOL:
        onnx_dtype = onnx.TensorProto.INT32

    batch_dim = [] if max_batch == 0 else [None]

    onnx_input = onnx.helper.make_tensor_value_info(
        "INPUT", onnx_dtype, batch_dim + onnx_input_shape
    )
    onnx_start = onnx.helper.make_tensor_value_info(
        "START", onnx_control_dtype, batch_dim + [1]
    )
    onnx_ready = onnx.helper.make_tensor_value_info(
        "READY", onnx_control_dtype, batch_dim + [1]
    )
    onnx_output = onnx.helper.make_tensor_value_info(
        "OUTPUT", onnx_dtype, batch_dim + onnx_output_shape
    )

    internal_input = onnx.helper.make_node("Identity", ["INPUT"], ["_INPUT"])

    # cast int8, int16 input to higher precision int as Onnx Add/Sub operator doesn't support those type
    # Also casting String data type to int32
    if (
        (onnx_dtype == onnx.TensorProto.INT8)
        or (onnx_dtype == onnx.TensorProto.INT16)
        or (onnx_dtype == onnx.TensorProto.STRING)
    ):
        internal_input = onnx.helper.make_node(
            "Cast", ["INPUT"], ["_INPUT"], to=onnx.TensorProto.INT32
        )

    # Convert boolean value to int32 value
    if onnx_control_dtype == onnx.TensorProto.BOOL:
        internal_input1 = onnx.helper.make_node(
            "Cast", ["START"], ["_START"], to=onnx.TensorProto.INT32
        )
        internal_input2 = onnx.helper.make_node(
            "Cast", ["READY"], ["_READY"], to=onnx.TensorProto.INT32
        )
        add = onnx.helper.make_node("Add", ["_INPUT", "_START"], ["add"])
        # Take advantage of knowledge that the READY false value is 0 and true is 1
        mul = onnx.helper.make_node("Mul", ["_READY", "add"], ["CAST"])

    else:
        add = onnx.helper.make_node("Add", ["_INPUT", "START"], ["add"])
        # Take advantage of knowledge that the READY false value is 0 and true is 1
        mul = onnx.helper.make_node("Mul", ["READY", "add"], ["CAST"])

    cast = onnx.helper.make_node("Cast", ["CAST"], ["OUTPUT"], to=onnx_dtype)

    # Avoid cast from float16 to float16
    # (bug in Onnx Runtime, cast from float16 to float16 will become cast from float16 to float32)
    if onnx_dtype == onnx.TensorProto.FLOAT16:
        cast = onnx.helper.make_node("Identity", ["CAST"], ["OUTPUT"])

    if onnx_control_dtype == onnx.TensorProto.BOOL:
        onnx_nodes = [internal_input, internal_input1, internal_input2, add, mul, cast]
    else:
        onnx_nodes = [internal_input, add, mul, cast]
    onnx_inputs = [onnx_input, onnx_start, onnx_ready]
    onnx_outputs = [onnx_output]

    graph_proto = onnx.helper.make_graph(
        onnx_nodes, model_name, onnx_inputs, onnx_outputs
    )
    if FLAGS.onnx_opset > 0:
        model_opset = onnx.helper.make_operatorsetid("", FLAGS.onnx_opset)
        model_def = onnx.helper.make_model(
            graph_proto, producer_name="triton", opset_imports=[model_opset]
        )
    else:
        model_def = onnx.helper.make_model(graph_proto, producer_name="triton")

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    onnx.save(model_def, model_version_dir + "/model.onnx")


def create_onnx_modelconfig(models_dir, model_version, max_batch, dtype, shape):
    if not tu.validate_for_onnx_model(dtype, dtype, dtype, shape, shape, shape):
        return

    model_name = tu.get_sequence_model_name(
        "onnx_nobatch" if max_batch == 0 else "onnx", dtype
    )
    config_dir = models_dir + "/" + model_name

    if dtype == np.float32:
        control_type = "fp32"
    elif dtype == bool:
        control_type = "bool"
        dtype = np.int32
    else:
        control_type = "int32"

    instance_group_string = """
instance_group [
  {
    kind: KIND_GPU
  }
]
"""

    # [TODO] move create_general_modelconfig() out of emu as it is general
    # enough for all backends to use
    config = emu.create_general_modelconfig(
        model_name,
        "onnxruntime_onnx",
        max_batch,
        [dtype],
        [shape],
        [None],
        [dtype],
        [shape],
        [None],
        [None],
        force_tensor_number_suffix=False,
        instance_group_str=instance_group_string,
    )

    config += """
sequence_batching {{
  max_sequence_idle_microseconds: 5000000
  control_input [
    {{
      name: "START"
      control [
        {{
          kind: CONTROL_SEQUENCE_START
          {type}_false_true: [ 0, 1 ]
        }}
      ]
    }},
    {{
      name: "READY"
      control [
        {{
          kind: CONTROL_SEQUENCE_READY
          {type}_false_true: [ 0, 1 ]
        }}
      ]
    }}
  ]
}}
""".format(
        type=control_type
    )

    try:
        os.makedirs(config_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)


def create_libtorch_modelfile(models_dir, model_version, max_batch, dtype, shape):
    if not tu.validate_for_libtorch_model(
        dtype, dtype, dtype, shape, shape, shape, max_batch
    ):
        return

    torch_dtype = np_to_torch_dtype(dtype)
    torch_control_type = torch_dtype

    # If input dtype is bool, then use bool type for control and
    # int32 type for input/output
    if torch_dtype == torch.bool:
        torch_dtype = torch.int32

    model_name = tu.get_sequence_model_name(
        "libtorch_nobatch" if max_batch == 0 else "libtorch", dtype
    )
    # handle for -1 (when variable) since can't create tensor with shape of [-1]
    shape = [abs(ips) for ips in shape]

    class SequenceNet(nn.Module):
        def __init__(self):
            super(SequenceNet, self).__init__()

        def forward(self, input0, start0, ready0):
            tmp = input0 + start0
            return tmp * ready0

    sequenceModel = SequenceNet()
    example_input0 = torch.zeros(shape, dtype=torch_dtype)
    example_input1 = torch.zeros(shape, dtype=torch_control_type)
    example_input2 = torch.zeros(shape, dtype=torch_control_type)

    # Convert boolean value to int32 value
    if torch_control_type == torch.bool:
        example_input1 = example_input1.long()
        example_input2 = example_input2.long()

    traced = torch.jit.trace(
        sequenceModel, (example_input0, example_input1, example_input2)
    )

    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    traced.save(model_version_dir + "/model.pt")


def create_libtorch_modelconfig(models_dir, model_version, max_batch, dtype, shape):
    if not tu.validate_for_libtorch_model(
        dtype, dtype, dtype, shape, shape, shape, max_batch
    ):
        return

    model_name = tu.get_sequence_model_name(
        "libtorch_nobatch" if max_batch == 0 else "libtorch", dtype
    )
    config_dir = models_dir + "/" + model_name

    if dtype == np.float32:
        control_type = "fp32"
    elif dtype == bool:
        control_type = "bool"
        dtype = np.int32
    else:
        control_type = "int32"

    #  FIX FOR LibTorch
    config = """
name: "{}"
platform: "pytorch_libtorch"
max_batch_size: {}
sequence_batching {{
  max_sequence_idle_microseconds: 5000000
  control_input [
    {{
      name: "START__1"
      control [
        {{
          kind: CONTROL_SEQUENCE_START
          {}_false_true: [ 0, 1 ]
        }}
      ]
    }},
    {{
      name: "READY__2"
      control [
        {{
          kind: CONTROL_SEQUENCE_READY
          {}_false_true: [ 0, 1 ]
        }}
      ]
    }}
  ]
}}
input [
  {{
    name: "INPUT__0"
    data_type: {}
    dims: [ {} ]
  }}
]
output [
  {{
    name: "OUTPUT__0"
    data_type: {}
    dims: [ 1 ]
  }}
]
instance_group [
  {{
    kind: KIND_GPU
  }}
]
""".format(
        model_name,
        max_batch,
        control_type,
        control_type,
        np_to_model_dtype(dtype),
        tu.shape_to_dims_str(shape),
        np_to_model_dtype(dtype),
    )

    try:
        os.makedirs(config_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)


def create_openvino_modelfile(models_dir, model_version, max_batch, dtype, shape):
    batch_dim = (
        []
        if max_batch == 0
        else [
            max_batch,
        ]
    )
    if not tu.validate_for_openvino_model(
        dtype, dtype, dtype, batch_dim + shape, batch_dim + shape, batch_dim + shape
    ):
        return

    model_name = tu.get_sequence_model_name(
        "openvino_nobatch" if max_batch == 0 else "openvino", dtype
    )
    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    in0 = ov.opset1.parameter(shape=batch_dim + shape, dtype=dtype, name="INPUT")
    start = ov.opset1.parameter(shape=batch_dim + shape, dtype=dtype, name="START")
    ready = ov.opset1.parameter(shape=batch_dim + shape, dtype=dtype, name="READY")

    tmp = ov.opset1.add(in0, start)
    op0 = ov.opset1.multiply(tmp, ready, name="OUTPUT")

    model = ov.Model([op0], [in0, start, ready], model_name)
    openvino_save_model(model_version_dir, model)


def create_openvino_modelconfig(models_dir, model_version, max_batch, dtype, shape):
    batch_dim = (
        []
        if max_batch == 0
        else [
            max_batch,
        ]
    )
    if not tu.validate_for_openvino_model(
        dtype, dtype, dtype, batch_dim + shape, batch_dim + shape, batch_dim + shape
    ):
        return

    model_name = tu.get_sequence_model_name(
        "openvino_nobatch" if max_batch == 0 else "openvino", dtype
    )
    config_dir = models_dir + "/" + model_name
    config = """
name: "{}"
backend: "openvino"
max_batch_size: {}
sequence_batching {{
  max_sequence_idle_microseconds: 5000000
  control_input [
    {{
      name: "START"
      control [
        {{
          kind: CONTROL_SEQUENCE_START
          {}_false_true: [ 0, 1 ]
        }}
      ]
    }},
    {{
      name: "READY"
      control [
        {{
          kind: CONTROL_SEQUENCE_READY
          {}_false_true: [ 0, 1 ]
        }}
      ]
    }}
  ]
}}
input [
  {{
    name: "INPUT"
    data_type: {}
    dims: [ {} ]
  }}
]
output [
  {{
    name: "OUTPUT"
    data_type: {}
    dims: [ 1 ]
  }}
]
""".format(
        model_name,
        max_batch,
        "int32" if dtype == np.int32 else "fp32",
        "int32" if dtype == np.int32 else "fp32",
        np_to_model_dtype(dtype),
        tu.shape_to_dims_str(shape),
        np_to_model_dtype(dtype),
    )

    try:
        os.makedirs(config_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)


def create_shape_tensor_models(
    models_dir, dtype, shape, shape_tensor_input_dtype, no_batch=True
):
    model_version = 1

    create_plan_modelconfig(
        models_dir, model_version, 8, dtype, shape, shape_tensor_input_dtype
    )
    create_plan_shape_tensor_modelfile(
        models_dir, model_version, 8, dtype, shape, shape_tensor_input_dtype
    )
    if no_batch:
        create_plan_modelconfig(
            models_dir, model_version, 0, dtype, shape, shape_tensor_input_dtype
        )
        create_plan_shape_tensor_modelfile(
            models_dir, model_version, 0, dtype, shape, shape_tensor_input_dtype
        )


def create_models(models_dir, dtype, shape, no_batch=True):
    model_version = 1

    if FLAGS.tensorrt:
        if dtype == bool:
            return
        suffix = []
        if dtype == np.int8:
            suffix = [1, 1]

        create_plan_modelconfig(models_dir, model_version, 8, dtype, shape + suffix)
        create_plan_models(models_dir, model_version, 8, dtype, shape + suffix)
        if no_batch:
            create_plan_modelconfig(models_dir, model_version, 0, dtype, shape + suffix)
            create_plan_models(models_dir, model_version, 0, dtype, shape + suffix)

    if FLAGS.onnx:
        create_onnx_modelconfig(models_dir, model_version, 8, dtype, shape)
        create_onnx_modelfile(models_dir, model_version, 8, dtype, shape)
        if no_batch:
            create_onnx_modelconfig(models_dir, model_version, 0, dtype, shape)
            create_onnx_modelfile(models_dir, model_version, 0, dtype, shape)

    # Skip for PyTorch String I/O
    if FLAGS.libtorch and (dtype != np_dtype_string):
        create_libtorch_modelconfig(models_dir, model_version, 8, dtype, shape)
        create_libtorch_modelfile(models_dir, model_version, 8, dtype, shape)
        if no_batch:
            create_libtorch_modelconfig(models_dir, model_version, 0, dtype, shape)
            create_libtorch_modelfile(models_dir, model_version, 0, dtype, shape)

    if FLAGS.openvino:
        create_openvino_modelconfig(models_dir, model_version, 8, dtype, shape)
        create_openvino_modelfile(models_dir, model_version, 8, dtype, shape)
        if no_batch:
            create_openvino_modelconfig(models_dir, model_version, 0, dtype, shape)
            create_openvino_modelfile(models_dir, model_version, 0, dtype, shape)

    if FLAGS.ensemble:
        if dtype == bool:
            return
        for pair in emu.platform_types_and_validation():
            config_shape = shape
            if pair[0] == "plan" and dtype == np.int8:
                config_shape = shape + [1, 1]
            if not pair[1](
                dtype, dtype, dtype, config_shape, config_shape, config_shape
            ):
                continue

            emu.create_sequence_ensemble_modelconfig(
                pair[0], models_dir, 8, model_version, config_shape, dtype
            )
            emu.create_sequence_ensemble_modelfile(
                pair[0], models_dir, 8, model_version, config_shape, dtype
            )
            if no_batch:
                emu.create_sequence_ensemble_modelconfig(
                    pair[0], models_dir, 0, model_version, config_shape, dtype
                )
                emu.create_sequence_ensemble_modelfile(
                    pair[0], models_dir, 0, model_version, config_shape, dtype
                )


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--models_dir", type=str, required=True, help="Top-level model directory"
    )
    parser.add_argument(
        "--tensorrt",
        required=False,
        action="store_true",
        help="Generate TensorRT PLAN models",
    )
    parser.add_argument(
        "--tensorrt-shape-io",
        required=False,
        action="store_true",
        help="Generate TensorRT PLAN models w/ shape tensor i/o",
    )
    parser.add_argument(
        "--onnx", required=False, action="store_true", help="Generate Onnx models"
    )
    parser.add_argument(
        "--onnx_opset",
        type=int,
        required=False,
        default=0,
        help="Opset used for Onnx models. Default is to use ONNXRT default",
    )
    parser.add_argument(
        "--libtorch",
        required=False,
        action="store_true",
        help="Generate Pytorch LibTorch models",
    )
    parser.add_argument(
        "--openvino",
        required=False,
        action="store_true",
        help="Generate OpenVino models",
    )
    parser.add_argument(
        "--variable",
        required=False,
        action="store_true",
        help="Used variable-shape tensors for input/output",
    )
    parser.add_argument(
        "--ensemble",
        required=False,
        action="store_true",
        help="Generate ensemble models against the models"
        + " in all platforms. Note that the models generated"
        + " are not completed.",
    )
    FLAGS, unparsed = parser.parse_known_args()

    if FLAGS.tensorrt or FLAGS.tensorrt_shape_io:
        import tensorrt as trt
    if FLAGS.onnx:
        import onnx
    if FLAGS.libtorch:
        import torch
        from torch import nn
    if FLAGS.openvino:
        import openvino.runtime as ov

    import test_util as tu

    if FLAGS.tensorrt_shape_io:
        create_shape_tensor_models(
            FLAGS.models_dir,
            np.float32,
            [
                -1,
            ],
            np.int32,
        )
        create_shape_tensor_models(
            FLAGS.models_dir,
            np.float32,
            [
                -1,
            ],
            np.int64,
        )
    else:
        # Tests with models that accept fixed-shape input/output tensors
        if not FLAGS.variable:
            create_models(
                FLAGS.models_dir,
                np.float32,
                [
                    1,
                ],
            )
            create_models(
                FLAGS.models_dir,
                np.int32,
                [
                    1,
                ],
            )
            create_models(
                FLAGS.models_dir,
                np_dtype_string,
                [
                    1,
                ],
            )
            create_models(
                FLAGS.models_dir,
                bool,
                [
                    1,
                ],
            )

        # Tests with models that accept variable-shape input/output tensors
        if FLAGS.variable:
            create_models(
                FLAGS.models_dir,
                np.int32,
                [
                    -1,
                ],
                False,
            )
            create_models(
                FLAGS.models_dir,
                np.float32,
                [
                    -1,
                ],
                False,
            )
            create_models(
                FLAGS.models_dir,
                np_dtype_string,
                [
                    -1,
                ],
                False,
            )
            create_models(
                FLAGS.models_dir,
                bool,
                [
                    -1,
                ],
                False,
            )

        if FLAGS.ensemble:
            # Create nop models used in ensemble
            for model_dtype in ["TYPE_INT32", "TYPE_FP32"]:
                for model_shape in [(-1,)]:
                    emu.create_nop_modelconfig(
                        FLAGS.models_dir, model_shape, model_dtype
                    )


================================================
FILE: qa/common/gen_qa_torchtrt_models.py
================================================
#!/usr/bin/env python3

# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import os

import torch
import torch_tensorrt
import torchvision


def create_resnet50_torchtrt(models_dir, max_batch):
    model = torchvision.models.resnet50(pretrained=True)
    model.eval()
    example_input = torch.rand(1, 3, 224, 224, dtype=torch.float)

    resnet50_ts = torch.jit.trace(model, example_input)

    trt_ts_module = torch_tensorrt.compile(
        resnet50_ts,
        inputs=[
            torch_tensorrt.Input(
                min_shape=[1, 3, 224, 224],
                opt_shape=[1, 3, 224, 224],
                max_shape=[max_batch, 3, 224, 224],
                dtype=torch.float,
            )
        ],
        enabled_precisions={torch.float},
    )

    model_name = "resnet50_libtorch"

    model_version = 1
    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    torch.jit.save(trt_ts_module, model_version_dir + "/model.pt")


def create_resnet50_torchtrt_modelconfig(models_dir, max_batch):
    model_name = "resnet50_libtorch"
    config_dir = models_dir + "/" + model_name
    config = """
name: "{}"
backend: "pytorch"
max_batch_size: {}
input [
  {{
    name: "INPUT__0"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [ 3, 224, 224 ]
  }}
]
output [
  {{
    name: "OUTPUT__0"
    data_type: TYPE_FP32
    dims: [ 1000 ]
    label_filename: "resnet50_labels.txt"
  }}
]
""".format(
        model_name, max_batch
    )

    try:
        os.makedirs(config_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--models_dir", type=str, required=True, help="Top-level model directory"
    )
    FLAGS, unparsed = parser.parse_known_args()

    create_resnet50_torchtrt(FLAGS.models_dir, 128)
    create_resnet50_torchtrt_modelconfig(FLAGS.models_dir, 128)


================================================
FILE: qa/common/gen_qa_trt_data_dependent_shape.py
================================================
#!/usr/bin/env python3

# Copyright 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import os

import numpy as np
import tensorrt as trt
import test_util as tu
from gen_common import np_to_model_dtype, np_to_trt_dtype


# The 'nonzero' model that we use for data dependent shape is naturally
# not support batching, because the layer output is not trivially separable
# based on the request batch size.
# input_shape is config shape
def create_data_dependent_modelfile(
    models_dir, model_name, input_shape, input_dtype=np.int32, min_dim=1, max_dim=32
):
    trt_input_dtype = np_to_trt_dtype(input_dtype)

    # Create the model
    TRT_LOGGER = (
        trt.Logger(trt.Logger.INFO)
        if os.environ.get("TRT_VERBOSE") != "1"
        else trt.Logger(trt.Logger.VERBOSE)
    )
    builder = trt.Builder(TRT_LOGGER)
    network = builder.create_network()

    # input
    in0 = network.add_input("INPUT", trt_input_dtype, input_shape)

    # layers
    non_zero = network.add_non_zero(in0)

    # configure output
    out0 = non_zero.get_output(0)
    out0.name = "OUTPUT"
    network.mark_output(out0)

    # optimization profile
    min_shape = []
    opt_shape = []
    max_shape = []
    for i in input_shape:
        if i == -1:
            min_shape = min_shape + [min_dim]
            opt_shape = opt_shape + [int((max_dim + min_dim) / 2)]
            max_shape = max_shape + [max_dim]
        else:
            min_shape = min_shape + [i]
            opt_shape = opt_shape + [i]
            max_shape = max_shape + [i]

    profile = builder.create_optimization_profile()
    profile.set_shape("INPUT", min_shape, opt_shape, max_shape)
    config = builder.create_builder_config()
    config.add_optimization_profile(profile)
    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)

    # serialized model
    engine_bytes = builder.build_serialized_network(network, config)

    model_version_dir = models_dir + "/" + model_name + "/1"
    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(model_version_dir + "/model.plan", "wb") as f:
        f.write(engine_bytes)


def create_data_dependent_modelconfig(
    models_dir, model_name, input_shape, input_dtype=np.int32
):
    config_dir = models_dir + "/" + model_name
    config = """
name: "{}"
platform: "tensorrt_plan"
max_batch_size: 0
input [
  {{
    name: "INPUT"
    data_type: {}
    dims: [ {} ]
  }}
]
output [
  {{
    name: "OUTPUT"
    data_type: {}
    dims: [ {} ]
   }}
]
""".format(
        model_name,
        np_to_model_dtype(input_dtype),
        tu.shape_to_dims_str(input_shape),
        np_to_model_dtype(np.int32),
        tu.shape_to_dims_str((len(input_shape), -1)),
    )

    try:
        os.makedirs(config_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--models_dir", type=str, required=True, help="Top-level model directory"
    )
    FLAGS, unparsed = parser.parse_known_args()

    # Fixed input shape
    create_data_dependent_modelfile(
        FLAGS.models_dir, "plan_nobatch_nonzero_fixed", (4, 4)
    )
    create_data_dependent_modelconfig(
        FLAGS.models_dir, "plan_nobatch_nonzero_fixed", (4, 4)
    )

    # Dynamic input shape
    create_data_dependent_modelfile(
        FLAGS.models_dir, "plan_nobatch_nonzero_dynamic", (-1, -1)
    )
    create_data_dependent_modelconfig(
        FLAGS.models_dir, "plan_nobatch_nonzero_dynamic", (-1, -1)
    )


================================================
FILE: qa/common/gen_qa_trt_format_models.py
================================================
#!/usr/bin/env python3

# Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import os

import numpy as np
import tensorrt as trt
import test_util as tu
from gen_common import np_to_model_dtype, np_to_trt_dtype

np_dtype_string = np.dtype(object)


def trt_format_to_string(trt_format):
    if trt_format == trt.TensorFormat.CDHW32:
        return "CDHW32"
    if trt_format == trt.TensorFormat.DHWC8:
        return "DHWC8"
    if trt_format == trt.TensorFormat.HWC:
        return "HWC"
    if trt_format == trt.TensorFormat.CHW2:
        return "CHW2"
    if trt_format == trt.TensorFormat.CHW32:
        return "CHW32"
    if trt_format == trt.TensorFormat.LINEAR:
        return "LINEAR"
    if trt_format == trt.TensorFormat.CHW4:
        return "CHW4"
    if trt_format == trt.TensorFormat.HWC8:
        return "HWC8"
    if trt_format == trt.TensorFormat.CHW16:
        return "CHW16"
    return "INVALID"


def create_plan_modelfile(
    models_dir,
    max_batch,
    model_version,
    input_shape,
    output0_shape,
    output1_shape,
    input_dtype,
    output0_dtype,
    output1_dtype,
    input_memory_format,
    output_memory_format,
    min_dim=1,
    max_dim=64,
):
    trt_input_dtype = np_to_trt_dtype(input_dtype)
    trt_output0_dtype = np_to_trt_dtype(output0_dtype)
    trt_output1_dtype = np_to_trt_dtype(output1_dtype)
    trt_input_memory_format = input_memory_format
    trt_output_memory_format = output_memory_format

    # Create the model
    TRT_LOGGER = (
        trt.Logger(trt.Logger.INFO)
        if os.environ.get("TRT_VERBOSE") != "1"
        else trt.Logger(trt.Logger.VERBOSE)
    )
    builder = trt.Builder(TRT_LOGGER)
    network = builder.create_network()
    if max_batch == 0:
        input_with_batchsize = [i for i in input_shape]
    else:
        input_with_batchsize = [-1] + [i for i in input_shape]

    in0 = network.add_input("INPUT0", trt_input_dtype, input_with_batchsize)
    in1 = network.add_input("INPUT1", trt_input_dtype, input_with_batchsize)
    add = network.add_elementwise(in0, in1, trt.ElementWiseOperation.SUM)
    sub = network.add_elementwise(in0, in1, trt.ElementWiseOperation.SUB)

    out0 = network.add_identity(add.get_output(0))
    out1 = network.add_identity(sub.get_output(0))

    out0.get_output(0).name = "OUTPUT0"
    out1.get_output(0).name = "OUTPUT1"
    network.mark_output(out0.get_output(0))
    network.mark_output(out1.get_output(0))

    out0.get_output(0).dtype = trt_output0_dtype
    out1.get_output(0).dtype = trt_output1_dtype

    in0.allowed_formats = 1 << int(trt_input_memory_format)
    in1.allowed_formats = 1 << int(trt_input_memory_format)
    out0.get_output(0).allowed_formats = 1 << int(trt_output_memory_format)
    out1.get_output(0).allowed_formats = 1 << int(trt_output_memory_format)

    if trt_input_dtype == trt.int8:
        in0.dynamic_range = (-128.0, 127.0)
        in1.dynamic_range = (-128.0, 127.0)
    if trt_output0_dtype == trt.int8:
        out0.get_output(0).dynamic_range = (-128.0, 127.0)
    if trt_output1_dtype == trt.int8:
        out1.get_output(0).dynamic_range = (-128.0, 127.0)

    min_shape = []
    opt_shape = []
    max_shape = []
    if max_batch != 0:
        min_shape = min_shape + [1]
        opt_shape = opt_shape + [max(1, max_batch)]
        max_shape = max_shape + [max(1, max_batch)]
    for i in input_shape:
        if i == -1:
            min_shape = min_shape + [min_dim]
            opt_shape = opt_shape + [int((max_dim + min_dim) / 2)]
            max_shape = max_shape + [max_dim]
        else:
            min_shape = min_shape + [i]
            opt_shape = opt_shape + [i]
            max_shape = max_shape + [i]

    profile = builder.create_optimization_profile()
    profile.set_shape("INPUT0", min_shape, opt_shape, max_shape)
    profile.set_shape("INPUT1", min_shape, opt_shape, max_shape)

    # Commenting this because from I/O Formats from TensorRT Developer Guide:
    # The build will fail if TensorRT cannot build an engine without introducing such reformatting. The failure may happen only for some target platforms, because of what formats are supported by kernels for those platforms.
    # flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
    flags = 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
    flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
    datatype_set = set([trt_input_dtype, trt_output0_dtype, trt_output1_dtype])
    for dt in datatype_set:
        if dt == trt.int8:
            flags |= 1 << int(trt.BuilderFlag.INT8)
        elif dt == trt.float16:
            flags |= 1 << int(trt.BuilderFlag.FP16)
    config = builder.create_builder_config()
    config.flags = flags
    config.add_optimization_profile(profile)
    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
    try:
        engine_bytes = builder.build_serialized_network(network, config)
    except AttributeError:
        engine = builder.build_engine(network, config)
        engine_bytes = engine.serialize()
        del engine

    # Use a different model name for different kinds of models
    base_name = "plan_nobatch" if max_batch == 0 else "plan"
    base_name += (
        "_"
        + trt_format_to_string(input_memory_format)
        + "_"
        + trt_format_to_string(output_memory_format)
    )
    model_name = tu.get_model_name(base_name, input_dtype, output0_dtype, output1_dtype)

    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(model_version_dir + "/model.plan", "wb") as f:
        f.write(engine_bytes)


def create_plan_modelconfig(
    models_dir,
    max_batch,
    model_version,
    input_shape,
    output0_shape,
    output1_shape,
    input_dtype,
    output0_dtype,
    output1_dtype,
    input_memory_format,
    output_memory_format,
    version_policy,
):
    if not tu.validate_for_trt_model(
        input_dtype,
        output0_dtype,
        output1_dtype,
        input_shape,
        output0_shape,
        output1_shape,
    ):
        return

    # Unpack version policy
    version_policy_str = "{ latest { num_versions: 1 }}"
    if version_policy is not None:
        type, val = version_policy
        if type == "latest":
            version_policy_str = "{{ latest {{ num_versions: {} }}}}".format(val)
        elif type == "specific":
            version_policy_str = "{{ specific {{ versions: {} }}}}".format(val)
        else:
            version_policy_str = "{ all { }}"

    # Use a different model name for different kinds of models
    base_name = "plan_nobatch" if max_batch == 0 else "plan"
    base_name += (
        "_"
        + trt_format_to_string(input_memory_format)
        + "_"
        + trt_format_to_string(output_memory_format)
    )
    model_name = tu.get_model_name(base_name, input_dtype, output0_dtype, output1_dtype)

    config_dir = models_dir + "/" + model_name
    if -1 in input_shape:
        profile_index = 0
        config = """
name: "{}"
platform: "tensorrt_plan"
max_batch_size: {}
version_policy: {}
input [
  {{
    name: "INPUT0"
    data_type: {}
    dims: [ {} ]
  }},
  {{
    name: "INPUT1"
    data_type: {}
    dims: [ {} ]
  }}
]
output [
  {{
    name: "OUTPUT0"
    data_type: {}
    dims: [ {} ]
   }},
  {{
    name: "OUTPUT1"
    data_type: {}
    dims: [ {} ]
  }}
]
instance_group [
  {{
      profile:"{}"
  }}
]
""".format(
            model_name,
            max_batch,
            version_policy_str,
            np_to_model_dtype(input_dtype),
            tu.shape_to_dims_str(input_shape),
            np_to_model_dtype(input_dtype),
            tu.shape_to_dims_str(input_shape),
            np_to_model_dtype(output0_dtype),
            tu.shape_to_dims_str(output0_shape),
            np_to_model_dtype(output1_dtype),
            tu.shape_to_dims_str(output1_shape),
            profile_index,
        )
    else:
        config = """
name: "{}"
platform: "tensorrt_plan"
max_batch_size: {}
version_policy: {}
input [
  {{
    name: "INPUT0"
    data_type: {}
    dims: [ {} ]
  }},
  {{
    name: "INPUT1"
    data_type: {}
    dims: [ {} ]
  }}
]
output [
  {{
    name: "OUTPUT0"
    data_type: {}
    dims: [ {} ]
   }},
  {{
    name: "OUTPUT1"
    data_type: {}
    dims: [ {} ]
  }}
]
""".format(
            model_name,
            max_batch,
            version_policy_str,
            np_to_model_dtype(input_dtype),
            tu.shape_to_dims_str(input_shape),
            np_to_model_dtype(input_dtype),
            tu.shape_to_dims_str(input_shape),
            np_to_model_dtype(output0_dtype),
            tu.shape_to_dims_str(output0_shape),
            np_to_model_dtype(output1_dtype),
            tu.shape_to_dims_str(output1_shape),
        )

    try:
        os.makedirs(config_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)


def create_plan_model(
    models_dir,
    max_batch,
    model_version,
    input_shape,
    output0_shape,
    output1_shape,
    input_dtype,
    output0_dtype,
    output1_dtype,
    input_memory_format,
    output_memory_format,
):
    if not tu.validate_for_trt_model(
        input_dtype,
        output0_dtype,
        output1_dtype,
        input_shape,
        output0_shape,
        output1_shape,
    ):
        return

    create_plan_modelconfig(
        models_dir,
        max_batch,
        model_version,
        input_shape,
        output0_shape,
        output1_shape,
        input_dtype,
        output0_dtype,
        output1_dtype,
        input_memory_format,
        output_memory_format,
        None,
    )

    create_plan_modelfile(
        models_dir,
        max_batch,
        model_version,
        input_shape,
        output0_shape,
        output1_shape,
        input_dtype,
        output0_dtype,
        output1_dtype,
        input_memory_format,
        output_memory_format,
    )


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--models_dir", type=str, required=True, help="Top-level model directory"
    )
    FLAGS, unparsed = parser.parse_known_args()

    # reformat-free input
    # Fixed shape
    create_plan_model(
        FLAGS.models_dir,
        0,
        1,
        (13, 2, 1),
        (13, 2, 1),
        (13, 2, 1),
        np.float16,
        np.float16,
        np.float16,
        trt.TensorFormat.CHW2,
        trt.TensorFormat.LINEAR,
    )
    create_plan_model(
        FLAGS.models_dir,
        8,
        1,
        (13, 2, 1),
        (13, 2, 1),
        (13, 2, 1),
        np.float16,
        np.float16,
        np.float16,
        trt.TensorFormat.CHW2,
        trt.TensorFormat.LINEAR,
    )

    # Dynamic shape
    create_plan_model(
        FLAGS.models_dir,
        0,
        1,
        (-1, 2, 1),
        (-1, 2, 1),
        (-1, 2, 1),
        np.float32,
        np.float32,
        np.float32,
        trt.TensorFormat.CHW32,
        trt.TensorFormat.LINEAR,
    )
    create_plan_model(
        FLAGS.models_dir,
        8,
        1,
        (-1, 2, 1),
        (-1, 2, 1),
        (-1, 2, 1),
        np.float32,
        np.float32,
        np.float32,
        trt.TensorFormat.CHW32,
        trt.TensorFormat.LINEAR,
    )

    # reformat-free output
    # reformat-free I/O


================================================
FILE: qa/common/gen_qa_trt_plugin_models.py
================================================
#!/usr/bin/env python3

# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import ctypes
import os

import numpy as np
import tensorrt as trt
from gen_common import np_to_model_dtype, np_to_trt_dtype

np_dtype_string = np.dtype(object)

TRT_LOGGER = trt.Logger()

trt.init_libnvinfer_plugins(TRT_LOGGER, "")


def get_trt_plugin(plugin_name):
    plugin = None
    field_collection = None
    plugin_creators = trt.get_plugin_registry().plugin_creator_list
    for plugin_creator in plugin_creators:
        if (plugin_creator.name == "CustomHardmax") and (
            plugin_name == "CustomHardmax"
        ):
            axis_attr = trt.PluginField(
                "axis", np.array([0]), type=trt.PluginFieldType.INT32
            )
            field_collection = trt.PluginFieldCollection([axis_attr])
            break

    if field_collection is None:
        raise RuntimeError("Plugin not found: " + plugin_name)
    plugin = plugin_creator.create_plugin(
        name=plugin_name, field_collection=field_collection
    )

    return plugin


def create_plan_modelfile(
    models_dir,
    max_batch,
    model_version,
    plugin_name,
    input_shape,
    output0_shape,
    input_dtype,
    output0_dtype,
):
    if not tu.validate_for_trt_model(
        input_dtype,
        output0_dtype,
        output0_dtype,
        input_shape,
        output0_shape,
        output0_shape,
    ):
        return

    trt_input_dtype = np_to_trt_dtype(input_dtype)

    model_name = (
        tu.get_model_name(
            "plan_nobatch" if max_batch == 0 else "plan",
            input_dtype,
            output0_dtype,
            output0_dtype,
        )
        + "_"
        + plugin_name
    )

    builder = trt.Builder(TRT_LOGGER)
    network = builder.create_network()
    if max_batch == 0:
        input_with_batchsize = [i for i in input_shape]
    else:
        input_with_batchsize = [-1] + [i for i in input_shape]

    input_layer = network.add_input(
        name="INPUT0", dtype=trt_input_dtype, shape=input_with_batchsize
    )
    plugin_layer = network.add_plugin_v2(
        inputs=[input_layer], plugin=get_trt_plugin(plugin_name)
    )
    plugin_layer.get_output(0).name = "OUTPUT0"
    network.mark_output(plugin_layer.get_output(0))

    min_shape = []
    opt_shape = []
    max_shape = []
    for i in input_shape:
        min_shape = min_shape + [i]
        opt_shape = opt_shape + [i]
        max_shape = max_shape + [i]

    profile = builder.create_optimization_profile()
    if max_batch == 0:
        profile.set_shape("INPUT0", min_shape, opt_shape, max_shape)
    else:
        profile.set_shape(
            "INPUT0",
            [1] + min_shape,
            [max_batch] + opt_shape,
            [max_batch] + max_shape,
        )

    config = builder.create_builder_config()
    config.add_optimization_profile(profile)
    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)

    try:
        engine_bytes = builder.build_serialized_network(network, config)
    except AttributeError:
        engine = builder.build_engine(network, config)
        engine_bytes = engine.serialize()
        del engine

    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)

    try:
        os.makedirs(model_version_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(model_version_dir + "/model.plan", "wb") as f:
        f.write(engine_bytes)


def create_plan_modelconfig(
    models_dir,
    max_batch,
    model_version,
    plugin_name,
    input_shape,
    output0_shape,
    input_dtype,
    output0_dtype,
):
    if not tu.validate_for_trt_model(
        input_dtype,
        output0_dtype,
        output0_dtype,
        input_shape,
        output0_shape,
        output0_shape,
    ):
        return

    version_policy_str = "{ latest { num_versions: 1 }}"

    # Use a different model name for the non-batching variant
    model_name = (
        tu.get_model_name(
            "plan_nobatch" if max_batch == 0 else "plan",
            input_dtype,
            output0_dtype,
            output0_dtype,
        )
        + "_"
        + plugin_name
    )
    config_dir = models_dir + "/" + model_name
    config = """
name: "{}"
platform: "tensorrt_plan"
max_batch_size: {}
version_policy: {}
input [
  {{
    name: "INPUT0"
    data_type: {}
    dims: [ {} ]
  }}
]
output [
  {{
    name: "OUTPUT0"
    data_type: {}
    dims: [ {} ]
   }}
]
""".format(
        model_name,
        max_batch,
        version_policy_str,
        np_to_model_dtype(input_dtype),
        tu.shape_to_dims_str(input_shape),
        np_to_model_dtype(output0_dtype),
        tu.shape_to_dims_str(output0_shape),
    )

    try:
        os.makedirs(config_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)


def create_plugin_models(models_dir):
    model_version = 1

    # custom CustomHardmax
    create_plan_modelconfig(
        models_dir,
        8,
        model_version,
        "CustomHardmax",
        (2, 2),
        (2, 2),
        np.float32,
        np.float32,
    )
    create_plan_modelfile(
        models_dir,
        8,
        model_version,
        "CustomHardmax",
        (2, 2),
        (2, 2),
        np.float32,
        np.float32,
    )

    create_plan_modelconfig(
        models_dir,
        0,
        model_version,
        "CustomHardmax",
        (16, 1, 1),
        (16, 1, 1),
        np.float32,
        np.float32,
    )
    create_plan_modelfile(
        models_dir,
        0,
        model_version,
        "CustomHardmax",
        (16, 1, 1),
        (16, 1, 1),
        np.float32,
        np.float32,
    )


def windows_load_plugin_lib(win_plugin_dll):
    if os.path.isfile(win_plugin_dll):
        try:
            ctypes.CDLL(win_plugin_dll, winmode=0)
        except TypeError:
            # winmode only introduced in python 3.8
            ctypes.CDLL(win_plugin_dll)
        return

    raise IOError('Failed to load library: "{}".'.format(win_plugin_dll))


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--models_dir", type=str, required=True, help="Top-level model directory"
    )
    parser.add_argument(
        "--win_plugin_dll",
        type=str,
        required=False,
        default="",
        help="Path to Windows plugin .dll",
    )
    FLAGS, unparsed = parser.parse_known_args()

    import test_util as tu

    # Linux can leverage LD_PRELOAD. We must load the Windows plugin manually
    # in order for it to be discovered in the registry.
    if os.name == "nt":
        windows_load_plugin_lib(FLAGS.win_plugin_dll)

    create_plugin_models(FLAGS.models_dir)


================================================
FILE: qa/common/infer_test.py
================================================
#!/usr/bin/env python3

# Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import os
import unittest

import infer_util as iu
import numpy as np
import test_util as tu

np_dtype_string = np.dtype(object)

# Allow caller to setup specific set of backends to test
DEFAULT_BACKENDS = "plan onnx libtorch"
TEST_BACKENDS = os.environ.get("BACKENDS", DEFAULT_BACKENDS).split()


class InferTest(tu.TestResultCollector):
    def _full_exact(
        self, input_dtype, output0_dtype, output1_dtype, output0_raw, output1_raw, swap
    ):
        def _infer_exact_helper(
            tester,
            pf,
            tensor_shape,
            batch_size,
            input_dtype,
            output0_dtype,
            output1_dtype,
            output0_raw=True,
            output1_raw=True,
            model_version=None,
            swap=False,
            outputs=("OUTPUT0", "OUTPUT1"),
            use_http=True,
            use_grpc=True,
            skip_request_id_check=False,
            use_streaming=True,
            correlation_id=0,
        ):
            for bs in (1, batch_size):
                iu.infer_exact(
                    tester,
                    pf,
                    (bs,) + tensor_shape,
                    bs,
                    input_dtype,
                    output0_dtype,
                    output1_dtype,
                    output0_raw=output0_raw,
                    output1_raw=output1_raw,
                    model_version=model_version,
                    swap=swap,
                    outputs=outputs,
                    use_http=use_http,
                    use_grpc=use_grpc,
                    skip_request_id_check=skip_request_id_check,
                    use_streaming=use_streaming,
                    correlation_id=correlation_id,
                )

        input_size = 16

        if tu.validate_for_trt_model(
            input_dtype,
            output0_dtype,
            output1_dtype,
            (input_size, 1, 1),
            (input_size, 1, 1),
            (input_size, 1, 1),
        ):
            if "plan" in TEST_BACKENDS:
                if input_dtype == np.int8:
                    shape = (input_size, 1, 1)
                else:
                    shape = (input_size,)
                _infer_exact_helper(
                    self,
                    "plan",
                    shape,
                    8,
                    input_dtype,
                    output0_dtype,
                    output1_dtype,
                    output0_raw=output0_raw,
                    output1_raw=output1_raw,
                    swap=swap,
                )

        if tu.validate_for_onnx_model(
            input_dtype,
            output0_dtype,
            output1_dtype,
            (input_size,),
            (input_size,),
            (input_size,),
        ):
            if "onnx" in TEST_BACKENDS:
                _infer_exact_helper(
                    self,
                    "onnx",
                    (input_size,),
                    8,
                    input_dtype,
                    output0_dtype,
                    output1_dtype,
                    output0_raw=output0_raw,
                    output1_raw=output1_raw,
                    swap=swap,
                )

        # Skip for batched string I/O
        if tu.validate_for_libtorch_model(
            input_dtype,
            output0_dtype,
            output1_dtype,
            (input_size,),
            (input_size,),
            (input_size,),
            8,
        ):
            if "libtorch" in TEST_BACKENDS:
                _infer_exact_helper(
                    self,
                    "libtorch",
                    (input_size,),
                    8,
                    input_dtype,
                    output0_dtype,
                    output1_dtype,
                    output0_raw=output0_raw,
                    output1_raw=output1_raw,
                    swap=swap,
                )

    def test_raw_fff(self):
        self._full_exact(
            np.float32,
            np.float32,
            np.float32,
            output0_raw=True,
            output1_raw=True,
            swap=True,
        )

    def test_raw_ooo(self):
        self._full_exact(
            np_dtype_string,
            np_dtype_string,
            np_dtype_string,
            output0_raw=True,
            output1_raw=True,
            swap=False,
        )

    def test_class_fff(self):
        self._full_exact(
            np.float32,
            np.float32,
            np.float32,
            output0_raw=False,
            output1_raw=False,
            swap=True,
        )


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/common/infer_util.py
================================================
#!/usr/bin/env python3

# Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import sys
from functools import partial

import numpy as np
import shm_util as su
import test_util as tu
import tritonclient.grpc as grpcclient
import tritonclient.http as httpclient
from tritonclient.utils import *

if sys.version_info >= (3, 0):
    import queue
else:
    import Queue as queue

# unicode() doesn't exist on python3, for how we use it the
# corresponding function is bytes()
if sys.version_info.major == 3:
    unicode = bytes

_seen_request_ids = set()

# By default, find tritonserver on "localhost", but can be overridden
# with TRITONSERVER_IPADDR envvar
_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")


def _unique_request_id():
    if len(_seen_request_ids) == 0:
        return 1
    else:
        return max(_seen_request_ids) + 1


def _range_repr_dtype(dtype):
    if dtype == np.float64:
        return np.int32
    elif dtype == np.float32:
        return np.int16
    elif dtype == np.float16:
        return np.int8
    elif dtype == np.object_:  # TYPE_STRING
        return np.int32
    return dtype


def serialize_byte_tensor_list(tensor_values):
    tensor_list = []
    for tensor_value in tensor_values:
        tensor_list.append(serialize_byte_tensor(tensor_value))
    return tensor_list


class UserData:
    def __init__(self):
        self._completed_requests = queue.Queue()


# Callback function used for async_stream_infer()
def completion_callback(user_data, result, error):
    # passing error raise and handling out
    user_data._completed_requests.put((result, error))


# Perform inference using an "addsum" type verification backend.
def infer_exact(
    tester,
    pf,
    tensor_shape,
    batch_size,
    input_dtype,
    output0_dtype,
    output1_dtype,
    output0_raw=True,
    output1_raw=True,
    model_version=None,
    swap=False,
    outputs=("OUTPUT0", "OUTPUT1"),
    use_http=True,
    use_grpc=True,
    use_http_json_tensors=True,
    skip_request_id_check=False,
    use_streaming=True,
    correlation_id=0,
    shm_region_names=None,
    precreated_shm_regions=None,
    use_system_shared_memory=False,
    use_cuda_shared_memory=False,
    priority=0,
    # 60 sec is the default value for L0_infer_valgrind
    network_timeout=60.0,
):
    # Lazy shm imports...
    if use_system_shared_memory:
        import tritonclient.utils.shared_memory as shm
    if use_cuda_shared_memory:
        import tritonclient.utils.cuda_shared_memory as cudashm

    tester.assertTrue(use_http or use_grpc or use_streaming)
    # configs [ url, protocol, async stream, binary data ]
    configs = []
    if use_http:
        configs.append((f"{_tritonserver_ipaddr}:8000", "http", False, True))
        if output0_raw == output1_raw:
            # Float16 not supported for Input and Output via JSON
            if (
                use_http_json_tensors
                and (input_dtype != np.float16)
                and (output0_dtype != np.float16)
                and (output1_dtype != np.float16)
            ):
                configs.append((f"{_tritonserver_ipaddr}:8000", "http", False, False))
    if use_grpc:
        configs.append((f"{_tritonserver_ipaddr}:8001", "grpc", False, False))
    if use_streaming:
        configs.append((f"{_tritonserver_ipaddr}:8001", "grpc", True, False))

    # outputs are sum and difference of inputs so set max input
    # values so that they will not overflow the output. This
    # allows us to do an exact match. For float types use 8, 16,
    # 32 int range for fp 16, 32, 64 respectively. When getting
    # class outputs the result value/probability is returned as a
    # float so must use fp32 range in that case.
    rinput_dtype = _range_repr_dtype(input_dtype)
    routput0_dtype = _range_repr_dtype(output0_dtype if output0_raw else np.float32)
    routput1_dtype = _range_repr_dtype(output1_dtype if output1_raw else np.float32)
    val_min = (
        max(
            np.iinfo(rinput_dtype).min,
            np.iinfo(routput0_dtype).min,
            np.iinfo(routput1_dtype).min,
        )
        / 2
    )
    val_max = (
        min(
            np.iinfo(rinput_dtype).max,
            np.iinfo(routput0_dtype).max,
            np.iinfo(routput1_dtype).max,
        )
        / 2
    )

    input0_array = np.random.randint(
        low=val_min, high=val_max, size=tensor_shape, dtype=rinput_dtype
    )
    input1_array = np.random.randint(
        low=val_min, high=val_max, size=tensor_shape, dtype=rinput_dtype
    )
    if input_dtype != np.object_:
        input0_array = input0_array.astype(input_dtype)
        input1_array = input1_array.astype(input_dtype)

    # for unsigned data type, the value being subtracted must be less than the
    # value it is subtracted from, to avoid overflow.
    if val_min == 0:
        # swap element if the element at input 0 < input 1
        tmp = np.where(input0_array < input1_array, input1_array, input0_array)
        input1_array = np.where(input0_array < input1_array, input0_array, input1_array)
        input0_array = tmp

    if not swap:
        output0_array = input0_array + input1_array
        output1_array = input0_array - input1_array
    else:
        output0_array = input0_array - input1_array
        output1_array = input0_array + input1_array

    if output0_dtype == np.object_:
        output0_array = np.array(
            [unicode(str(x), encoding="utf-8") for x in (output0_array.flatten())],
            dtype=object,
        ).reshape(output0_array.shape)
    else:
        output0_array = output0_array.astype(output0_dtype)
    if output1_dtype == np.object_:
        output1_array = np.array(
            [unicode(str(x), encoding="utf-8") for x in (output1_array.flatten())],
            dtype=object,
        ).reshape(output1_array.shape)
    else:
        output1_array = output1_array.astype(output1_dtype)

    if input_dtype == np.object_:
        in0n = np.array(
            [str(x) for x in input0_array.reshape(input0_array.size)], dtype=object
        )
        input0_array = in0n.reshape(input0_array.shape)
        in1n = np.array(
            [str(x) for x in input1_array.reshape(input1_array.size)], dtype=object
        )
        input1_array = in1n.reshape(input1_array.shape)

    # prepend size of string to output string data
    if output0_dtype == np.object_:
        if batch_size == 1:
            output0_array_tmp = serialize_byte_tensor_list([output0_array])
        else:
            output0_array_tmp = serialize_byte_tensor_list(output0_array)
    else:
        output0_array_tmp = output0_array

    if output1_dtype == np.object_:
        if batch_size == 1:
            output1_array_tmp = serialize_byte_tensor_list([output1_array])
        else:
            output1_array_tmp = serialize_byte_tensor_list(output1_array)
    else:
        output1_array_tmp = output1_array

    if output0_dtype == np.object_:
        output0_byte_size = sum([serialized_byte_size(o0) for o0 in output0_array_tmp])
    else:
        output0_byte_size = sum([o0.nbytes for o0 in output0_array_tmp])

    if output1_dtype == np.object_:
        output1_byte_size = sum([serialized_byte_size(o1) for o1 in output1_array_tmp])
    else:
        output1_byte_size = sum([o1.nbytes for o1 in output1_array_tmp])

    if batch_size == 1:
        input0_list = [input0_array]
        input1_list = [input1_array]
    else:
        input0_list = [x for x in input0_array]
        input1_list = [x for x in input1_array]

    # Serialization of string tensors in the case of shared memory must be done manually
    if input_dtype == np.object_:
        input0_list_tmp = serialize_byte_tensor_list(input0_list)
        input1_list_tmp = serialize_byte_tensor_list(input1_list)
    else:
        input0_list_tmp = input0_list
        input1_list_tmp = input1_list

    if input_dtype == np.object_:
        input0_byte_size = sum([serialized_byte_size(i0) for i0 in input0_list_tmp])
        input1_byte_size = sum([serialized_byte_size(i1) for i1 in input1_list_tmp])
    else:
        input0_byte_size = sum([i0.nbytes for i0 in input0_list_tmp])
        input1_byte_size = sum([i1.nbytes for i1 in input1_list_tmp])

    if model_version is not None:
        model_version = str(model_version)
    else:
        model_version = ""

    # Run inference and check results for each config
    inferAndCheckResults(
        tester,
        configs,
        pf,
        batch_size,
        model_version,
        input_dtype,
        output0_dtype,
        output1_dtype,
        tensor_shape,
        input0_array,
        input1_array,
        output0_array,
        output1_array,
        output0_raw,
        output1_raw,
        outputs,
        precreated_shm_regions,
        input0_list_tmp,
        input1_list_tmp,
        shm_region_names,
        input0_byte_size,
        input1_byte_size,
        output0_byte_size,
        output1_byte_size,
        use_system_shared_memory,
        use_cuda_shared_memory,
        network_timeout,
        skip_request_id_check,
    )


def inferAndCheckResults(
    tester,
    configs,
    pf,
    batch_size,
    model_version,
    input_dtype,
    output0_dtype,
    output1_dtype,
    tensor_shape,
    input0_array,
    input1_array,
    output0_array,
    output1_array,
    output0_raw,
    output1_raw,
    outputs,
    precreated_shm_regions,
    input0_list_tmp,
    input1_list_tmp,
    shm_region_names,
    input0_byte_size,
    input1_byte_size,
    output0_byte_size,
    output1_byte_size,
    use_system_shared_memory,
    use_cuda_shared_memory,
    network_timeout,
    skip_request_id_check,
):
    # Lazy shm imports...
    if use_system_shared_memory:
        import tritonclient.utils.shared_memory as shm
    if use_cuda_shared_memory:
        import tritonclient.utils.cuda_shared_memory as cudashm
    num_classes = 3

    # Get model platform
    model_name = tu.get_model_name(pf, input_dtype, output0_dtype, output1_dtype)
    if configs[0][1] == "http":
        metadata_client = httpclient.InferenceServerClient(configs[0][0], verbose=True)
        metadata = metadata_client.get_model_metadata(model_name)
        platform = metadata["platform"]
    else:
        metadata_client = grpcclient.InferenceServerClient(configs[0][0], verbose=True)
        metadata = metadata_client.get_model_metadata(model_name)
        platform = metadata.platform

    INPUT0 = "INPUT0"
    INPUT1 = "INPUT1"

    if platform == "pytorch_libtorch":
        OUTPUT0 = "OUTPUT__0"
        OUTPUT1 = "OUTPUT__1"
    else:
        OUTPUT0 = "OUTPUT0"
        OUTPUT1 = "OUTPUT1"

    # Create system/cuda shared memory regions if needed
    shm_regions, shm_handles = su.create_set_shm_regions(
        input0_list_tmp,
        input1_list_tmp,
        output0_byte_size,
        output1_byte_size,
        outputs,
        shm_region_names,
        precreated_shm_regions,
        use_system_shared_memory,
        use_cuda_shared_memory,
    )
    try:
        for config in configs:
            model_name = tu.get_model_name(
                pf, input_dtype, output0_dtype, output1_dtype
            )

            if config[1] == "http":
                triton_client = httpclient.InferenceServerClient(
                    config[0], verbose=True, network_timeout=network_timeout
                )
            else:
                triton_client = grpcclient.InferenceServerClient(
                    config[0], verbose=True
                )

            inputs = []
            if config[1] == "http":
                inputs.append(
                    httpclient.InferInput(
                        INPUT0, tensor_shape, np_to_triton_dtype(input_dtype)
                    )
                )
                inputs.append(
                    httpclient.InferInput(
                        INPUT1, tensor_shape, np_to_triton_dtype(input_dtype)
                    )
                )
            else:
                inputs.append(
                    grpcclient.InferInput(
                        INPUT0, tensor_shape, np_to_triton_dtype(input_dtype)
                    )
                )
                inputs.append(
                    grpcclient.InferInput(
                        INPUT1, tensor_shape, np_to_triton_dtype(input_dtype)
                    )
                )

            if not (use_cuda_shared_memory or use_system_shared_memory):
                if config[1] == "http":
                    inputs[0].set_data_from_numpy(input0_array, binary_data=config[3])
                    inputs[1].set_data_from_numpy(input1_array, binary_data=config[3])
                else:
                    inputs[0].set_data_from_numpy(input0_array)
                    inputs[1].set_data_from_numpy(input1_array)
            else:
                # Register necessary shared memory regions/handles
                su.register_add_shm_regions(
                    inputs,
                    outputs,
                    shm_regions,
                    precreated_shm_regions,
                    shm_handles,
                    input0_byte_size,
                    input1_byte_size,
                    output0_byte_size,
                    output1_byte_size,
                    use_system_shared_memory,
                    use_cuda_shared_memory,
                    triton_client,
                )

            if batch_size == 1:
                expected0_sort_idx = [
                    np.flip(np.argsort(x.flatten()), 0)
                    for x in output0_array.reshape((1,) + tensor_shape)
                ]
                expected1_sort_idx = [
                    np.flip(np.argsort(x.flatten()), 0)
                    for x in output1_array.reshape((1,) + tensor_shape)
                ]
            else:
                expected0_sort_idx = [
                    np.flip(np.argsort(x.flatten()), 0)
                    for x in output0_array.reshape(tensor_shape)
                ]
                expected1_sort_idx = [
                    np.flip(np.argsort(x.flatten()), 0)
                    for x in output1_array.reshape(tensor_shape)
                ]

            # Force binary_data = False for shared memory and class
            output_req = []
            i = 0
            if "OUTPUT0" in outputs:
                if len(shm_regions) != 0:
                    if config[1] == "http":
                        output_req.append(
                            httpclient.InferRequestedOutput(
                                OUTPUT0, binary_data=config[3]
                            )
                        )
                    else:
                        output_req.append(grpcclient.InferRequestedOutput(OUTPUT0))

                    output_req[-1].set_shared_memory(
                        shm_regions[2] + "_data", output0_byte_size
                    )
                else:
                    if output0_raw:
                        if config[1] == "http":
                            output_req.append(
                                httpclient.InferRequestedOutput(
                                    OUTPUT0, binary_data=config[3]
                                )
                            )
                        else:
                            output_req.append(grpcclient.InferRequestedOutput(OUTPUT0))
                    else:
                        if config[1] == "http":
                            output_req.append(
                                httpclient.InferRequestedOutput(
                                    OUTPUT0,
                                    binary_data=config[3],
                                    class_count=num_classes,
                                )
                            )
                        else:
                            output_req.append(
                                grpcclient.InferRequestedOutput(
                                    OUTPUT0, class_count=num_classes
                                )
                            )
                i += 1
            if "OUTPUT1" in outputs:
                if len(shm_regions) != 0:
                    if config[1] == "http":
                        output_req.append(
                            httpclient.InferRequestedOutput(
                                OUTPUT1, binary_data=config[3]
                            )
                        )
                    else:
                        output_req.append(grpcclient.InferRequestedOutput(OUTPUT1))

                    output_req[-1].set_shared_memory(
                        shm_regions[2 + i] + "_data", output1_byte_size
                    )
                else:
                    if output1_raw:
                        if config[1] == "http":
                            output_req.append(
                                httpclient.InferRequestedOutput(
                                    OUTPUT1, binary_data=config[3]
                                )
                            )
                        else:
                            output_req.append(grpcclient.InferRequestedOutput(OUTPUT1))
                    else:
                        if config[1] == "http":
                            output_req.append(
                                httpclient.InferRequestedOutput(
                                    OUTPUT1,
                                    binary_data=config[3],
                                    class_count=num_classes,
                                )
                            )
                        else:
                            output_req.append(
                                grpcclient.InferRequestedOutput(
                                    OUTPUT1, class_count=num_classes
                                )
                            )

            if config[2]:
                user_data = UserData()
                triton_client.start_stream(partial(completion_callback, user_data))
                try:
                    results = triton_client.async_stream_infer(
                        model_name,
                        inputs,
                        model_version=model_version,
                        outputs=output_req,
                        request_id=str(_unique_request_id()),
                    )
                except Exception as e:
                    triton_client.stop_stream()
                    raise e
                triton_client.stop_stream()
                (results, error) = user_data._completed_requests.get()
                if error is not None:
                    raise error
            else:
                results = triton_client.infer(
                    model_name,
                    inputs,
                    model_version=model_version,
                    outputs=output_req,
                    request_id=str(_unique_request_id()),
                )

            last_response = results.get_response()

            if not skip_request_id_check:
                global _seen_request_ids
                if config[1] == "http":
                    request_id = int(last_response["id"])
                else:
                    request_id = int(last_response.id)
                tester.assertFalse(
                    request_id in _seen_request_ids, "request_id: {}".format(request_id)
                )
                _seen_request_ids.add(request_id)

            if config[1] == "http":
                response_model_name = last_response["model_name"]
                if model_version != "":
                    response_model_version = last_response["model_version"]
                response_outputs = last_response["outputs"]
            else:
                response_model_name = last_response.model_name
                if model_version != "":
                    response_model_version = last_response.model_version
                response_outputs = last_response.outputs

            tester.assertEqual(response_model_name, model_name)

            if model_version != "":
                tester.assertEqual(str(response_model_version), model_version)

            tester.assertEqual(len(response_outputs), len(outputs))

            for result in response_outputs:
                if config[1] == "http":
                    result_name = result["name"]
                else:
                    result_name = result.name

                if (result_name == OUTPUT0 and output0_raw) or (
                    result_name == OUTPUT1 and output1_raw
                ):
                    if use_system_shared_memory or use_cuda_shared_memory:
                        if result_name == OUTPUT0:
                            shm_handle = shm_handles[2]
                        else:
                            shm_handle = shm_handles[3]

                        output = results.get_output(result_name)
                        if config[1] == "http":
                            output_datatype = output["datatype"]
                            output_shape = output["shape"]
                        else:
                            output_datatype = output.datatype
                            output_shape = output.shape
                        output_dtype = triton_to_np_dtype(output_datatype)
                    if use_system_shared_memory:
                        output_data = shm.get_contents_as_numpy(
                            shm_handle, output_dtype, output_shape
                        )
                    elif use_cuda_shared_memory:
                        output_data = cudashm.get_contents_as_numpy(
                            shm_handle, output_dtype, output_shape
                        )
                    else:
                        output_data = results.as_numpy(result_name)
                        if (output_data.dtype == np.object_) and (not config[3]):
                            if config[1] == "http":
                                output_data = np.array(
                                    [
                                        unicode(str(x), encoding="utf-8")
                                        for x in (output_data.flatten())
                                    ],
                                    dtype=np.object_,
                                ).reshape(output_data.shape)
                            elif config[1] == "grpc":
                                output_data = np.array(
                                    [x for x in (output_data.flatten())],
                                    dtype=np.object_,
                                ).reshape(output_data.shape)

                    if result_name == OUTPUT0:
                        tester.assertTrue(
                            np.array_equal(output_data, output0_array),
                            "{}, {} expected: {}, got {}".format(
                                model_name, OUTPUT0, output0_array, output_data
                            ),
                        )
                    elif result_name == OUTPUT1:
                        tester.assertTrue(
                            np.array_equal(output_data, output1_array),
                            "{}, {} expected: {}, got {}".format(
                                model_name, OUTPUT1, output1_array, output_data
                            ),
                        )
                    else:
                        tester.assertTrue(
                            False, "unexpected raw result {}".format(result_name)
                        )
                else:
                    for b in range(batch_size):
                        # num_classes values must be returned and must
                        # match expected top values
                        if "nobatch" in pf:
                            class_list = results.as_numpy(result_name)
                        else:
                            class_list = results.as_numpy(result_name)[b]

                        tester.assertEqual(len(class_list), num_classes)
                        if batch_size == 1:
                            expected0_flatten = output0_array.flatten()
                            expected1_flatten = output1_array.flatten()
                        else:
                            expected0_flatten = output0_array[b].flatten()
                            expected1_flatten = output1_array[b].flatten()

                        for idx, class_label in enumerate(class_list):
                            # can't compare indices since could have different
                            # indices with the same value/prob, so check that
                            # the value of each index equals the expected value.
                            # Only compare labels when the indices are equal.
                            if type(class_label) == str:
                                ctuple = class_label.split(":")
                            else:
                                ctuple = "".join(chr(x) for x in class_label).split(":")
                            cval = float(ctuple[0])
                            cidx = int(ctuple[1])
                            if result_name == OUTPUT0:
                                tester.assertEqual(cval, expected0_flatten[cidx])
                                tester.assertEqual(
                                    cval, expected0_flatten[expected0_sort_idx[b][idx]]
                                )
                                if cidx == expected0_sort_idx[b][idx]:
                                    tester.assertEqual(
                                        ctuple[2].strip("\r"),
                                        "label{}".format(expected0_sort_idx[b][idx]),
                                    )
                            elif result_name == OUTPUT1:
                                tester.assertEqual(cval, expected1_flatten[cidx])
                                tester.assertEqual(
                                    cval, expected1_flatten[expected1_sort_idx[b][idx]]
                                )
                            else:
                                tester.assertTrue(
                                    False,
                                    "unexpected class result {}".format(result_name),
                                )
    finally:
        # Unregister system/cuda shared memory regions if they exist
        su.unregister_cleanup_shm_regions(
            shm_regions,
            shm_handles,
            precreated_shm_regions,
            outputs,
            use_system_shared_memory,
            use_cuda_shared_memory,
        )

    return results


# resize the dummy tensor with the provided values in the shape tensor and finally
# return the shape of the resized tensor.
def infer_shape_tensor(
    tester,
    pf,
    tensor_dtype,
    input_shape_values,
    dummy_input_shapes,
    use_http=True,
    use_grpc=True,
    use_streaming=True,
    shm_suffix="",
    use_system_shared_memory=False,
    priority=0,
    timeout_us=0,
    batch_size=1,
    shape_tensor_input_dtype=np.int32,
):
    # Lazy shm imports...
    if use_system_shared_memory:
        import tritonclient.utils.shared_memory as shm

    tester.assertTrue(use_http or use_grpc or use_streaming)
    tester.assertTrue(pf.startswith("plan"))
    tester.assertEqual(len(input_shape_values), len(dummy_input_shapes))

    configs = []
    if use_http:
        configs.append((f"{_tritonserver_ipaddr}:8000", "http", False))
    if use_grpc:
        configs.append((f"{_tritonserver_ipaddr}:8001", "grpc", False))
    if use_streaming:
        configs.append((f"{_tritonserver_ipaddr}:8001", "grpc", True))

    io_cnt = len(input_shape_values)

    # FIXME wrap up shm handle cleanup
    # item is (handle, byte_size)
    input_shm_handle_list = []
    output_shm_handle_list = []
    dummy_input_list = []
    input_list = []
    expected_dict = dict()
    # Prepare IO in advance
    for io_num in range(io_cnt):
        dummy_input_name = "DUMMY_INPUT{}".format(io_num)
        input_name = "INPUT{}".format(io_num)
        dummy_output_name = "DUMMY_OUTPUT{}".format(io_num)
        output_name = "OUTPUT{}".format(io_num)

        # Prepare the dummy tensor
        rtensor_dtype = _range_repr_dtype(tensor_dtype)
        if rtensor_dtype != bool:
            dummy_in0 = np.random.randint(
                low=np.iinfo(rtensor_dtype).min,
                high=np.iinfo(rtensor_dtype).max,
                size=dummy_input_shapes[io_num],
                dtype=rtensor_dtype,
            )
        else:
            dummy_in0 = np.random.choice(
                a=[False, True], size=dummy_input_shapes[io_num]
            )
        if tensor_dtype != np.object_:
            dummy_in0 = dummy_in0.astype(tensor_dtype)
        else:
            dummy_in0 = np.array(
                [str(x) for x in dummy_in0.flatten()], dtype=object
            ).reshape(dummy_in0.shape)
        dummy_input_list.append(dummy_in0)

        # Prepare shape input tensor
        in0 = np.asarray(input_shape_values[io_num], dtype=shape_tensor_input_dtype)
        input_list.append(in0)

        # Prepare the expected value for the output. Skip dummy output as we
        # only care about its shape (== value of OUTPUT*)
        expected_dict[output_name] = np.ndarray.copy(in0)

        # Only need to create region once
        input_byte_size = in0.size * np.dtype(shape_tensor_input_dtype).itemsize
        output_byte_size = input_byte_size * batch_size
        if shape_tensor_input_dtype == np.int32:
            # Currently in our test cases we are
            # using int64 outputs for shape tensors
            # hence there is a multiple of 2 to compute the byte size
            # properly.
            output_byte_size = output_byte_size * 2
        if use_system_shared_memory:
            input_shm_handle_list.append(
                (
                    shm.create_shared_memory_region(
                        input_name + shm_suffix,
                        "/" + input_name + shm_suffix,
                        input_byte_size,
                    ),
                    input_byte_size,
                )
            )
            output_shm_handle_list.append(
                (
                    shm.create_shared_memory_region(
                        output_name + shm_suffix,
                        "/" + output_name + shm_suffix,
                        output_byte_size,
                    ),
                    output_byte_size,
                )
            )
            shm.set_shared_memory_region(
                input_shm_handle_list[-1][0],
                [
                    in0,
                ],
            )

    model_name = tu.get_zero_model_name(pf, io_cnt, tensor_dtype)
    model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name
    # Run inference and check results for each config
    for config in configs:
        client_utils = grpcclient if config[1] == "grpc" else httpclient
        triton_client = client_utils.InferenceServerClient(config[0], verbose=True)

        inputs = []
        outputs = []

        # Set IOs
        for io_num in range(io_cnt):
            dummy_input_name = "DUMMY_INPUT{}".format(io_num)
            input_name = "INPUT{}".format(io_num)
            dummy_output_name = "DUMMY_OUTPUT{}".format(io_num)
            output_name = "OUTPUT{}".format(io_num)

            inputs.append(
                client_utils.InferInput(
                    dummy_input_name,
                    dummy_input_shapes[io_num],
                    np_to_triton_dtype(tensor_dtype),
                )
            )
            inputs.append(
                client_utils.InferInput(
                    input_name,
                    input_list[io_num].shape,
                    np_to_triton_dtype(shape_tensor_input_dtype),
                )
            )
            outputs.append(client_utils.InferRequestedOutput(dummy_output_name))
            outputs.append(client_utils.InferRequestedOutput(output_name))

            # -2: dummy; -1: input
            inputs[-2].set_data_from_numpy(dummy_input_list[io_num])
            if not use_system_shared_memory:
                inputs[-1].set_data_from_numpy(input_list[io_num])
            else:
                input_byte_size = input_shm_handle_list[io_num][1]
                output_byte_size = output_shm_handle_list[io_num][1]
                triton_client.register_system_shared_memory(
                    input_name + shm_suffix,
                    "/" + input_name + shm_suffix,
                    input_byte_size,
                )
                triton_client.register_system_shared_memory(
                    output_name + shm_suffix,
                    "/" + output_name + shm_suffix,
                    output_byte_size,
                )
                inputs[-1].set_shared_memory(input_name + shm_suffix, input_byte_size)
                outputs[-1].set_shared_memory(
                    output_name + shm_suffix, output_byte_size
                )

        if config[2]:
            user_data = UserData()
            triton_client.start_stream(partial(completion_callback, user_data))
            try:
                results = triton_client.async_stream_infer(
                    model_name,
                    inputs,
                    outputs=outputs,
                    priority=priority,
                    timeout=timeout_us,
                )
            except Exception as e:
                triton_client.stop_stream()
                raise e
            triton_client.stop_stream()
            (results, error) = user_data._completed_requests.get()
            if error is not None:
                raise error
        else:
            try:
                results = triton_client.infer(
                    model_name,
                    inputs,
                    outputs=outputs,
                    priority=priority,
                    timeout=timeout_us,
                )
            except Exception as e:
                if use_system_shared_memory:
                    for io_num in range(io_cnt):
                        shm.destroy_shared_memory_region(
                            input_shm_handle_list[io_num][0]
                        )
                        triton_client.unregister_system_shared_memory(
                            f"INPUT{io_num}" + shm_suffix
                        )
                        shm.destroy_shared_memory_region(
                            output_shm_handle_list[io_num][0]
                        )
                        triton_client.unregister_system_shared_memory(
                            f"OUTPUT{io_num}" + shm_suffix
                        )
                raise e

        for io_num in range(io_cnt):
            output_name = "OUTPUT{}".format(io_num)
            dummy_output_name = "DUMMY_OUTPUT{}".format(io_num)
            expected = expected_dict[output_name]

            # get outputs as numpy array
            dummy_out = results.as_numpy(dummy_output_name)
            if not use_system_shared_memory:
                out = results.as_numpy(output_name)
            else:
                output = results.get_output(output_name)
                if config[1] == "grpc":
                    output_shape = output.shape
                else:
                    output_shape = output["shape"]
                # Currently in our test cases we are
                # using int64 outputs for shape tensors
                # hence passing int64 as datatype.
                out = shm.get_contents_as_numpy(
                    output_shm_handle_list[io_num][0], np.int64, output_shape
                )

            # if out shape is 2D, it is batched
            if len(out.shape) == 2:
                # The shape of the dummy output should be equal to the shape values
                # specified in the shape tensor
                tester.assertTrue(
                    np.array_equal(dummy_out.shape[1:], out[0]),
                    "{}, {} shape, expected: {}, got {}".format(
                        model_name, dummy_output_name, out[0], dummy_out.shape[1:]
                    ),
                )
                for b in range(1, out.shape[0]):
                    tester.assertTrue(
                        np.array_equal(out[b - 1], out[b]),
                        "expect shape tensor has consistent value, "
                        "expected: {}, got {}".format(out[b - 1], out[b]),
                    )
                out = out[0]
            else:
                tester.assertTrue(
                    np.array_equal(dummy_out.shape, out),
                    "{}, {} shape, expected: {}, got {}".format(
                        model_name, dummy_output_name, out, dummy_out.shape
                    ),
                )
            tester.assertTrue(
                np.array_equal(out, expected),
                "{}, {}, expected: {}, got {}".format(
                    model_name, output_name, expected, out
                ),
            )

            # unregister shared memory region for next config
            if use_system_shared_memory:
                triton_client.unregister_system_shared_memory(input_name + shm_suffix)
                triton_client.unregister_system_shared_memory(output_name + shm_suffix)

    for handle in input_shm_handle_list:
        shm.destroy_shared_memory_region(handle[0])
    for handle in output_shm_handle_list:
        shm.destroy_shared_memory_region(handle[0])


# Perform inference using a "nop" model that expects some form or
# zero-sized input/output tensor.
# FIXME Support for empty tensors using non-empty shared memory regions.
# Currently shared memory support is broken for empty input/outputs tensors.
def infer_zero(
    tester,
    pf,
    batch_size,
    tensor_dtype,
    input_shapes,
    output_shapes,
    model_version=None,
    use_http=True,
    use_grpc=True,
    use_http_json_tensors=True,
    use_streaming=True,
    shm_region_name_prefix=None,
    use_system_shared_memory=False,
    use_cuda_shared_memory=False,
    priority=0,
    timeout_us=0,
    override_model_name=None,
    override_input_names=[],
    override_output_names=[],
):
    # Lazy shm imports...
    if use_system_shared_memory:
        import tritonclient.utils.shared_memory as shm
    if use_cuda_shared_memory:
        import tritonclient.utils.cuda_shared_memory as cudashm

    tester.assertTrue(use_http or use_grpc or use_streaming)
    configs = []
    if use_http:
        configs.append((f"{_tritonserver_ipaddr}:8000", "http", False, True))
        if use_http_json_tensors and (tensor_dtype != np.float16):
            configs.append((f"{_tritonserver_ipaddr}:8000", "http", False, False))
    if use_grpc:
        configs.append((f"{_tritonserver_ipaddr}:8001", "grpc", False, False))
    if use_streaming:
        configs.append((f"{_tritonserver_ipaddr}:8001", "grpc", True, False))
    tester.assertEqual(len(input_shapes), len(output_shapes))
    io_cnt = len(input_shapes)

    if shm_region_name_prefix is None:
        shm_region_name_prefix = ["input", "output"]

    input_dict = {}
    expected_dict = {}
    shm_ip_handles = list()
    shm_op_handles = list()

    # Get model platform
    if override_model_name is None:
        model_name = tu.get_zero_model_name(pf, io_cnt, tensor_dtype)
    else:
        model_name = override_model_name
    if configs[0][1] == "http":
        metadata_client = httpclient.InferenceServerClient(configs[0][0], verbose=True)
        metadata = metadata_client.get_model_metadata(model_name)
        platform = metadata["platform"]
    else:
        metadata_client = grpcclient.InferenceServerClient(configs[0][0], verbose=True)
        metadata = metadata_client.get_model_metadata(model_name)
        platform = metadata.platform

    for io_num in range(io_cnt):
        if override_input_names:
            input_name = override_input_names[io_num]
        else:
            if platform == "pytorch_libtorch":
                input_name = "INPUT__{}".format(io_num)
            else:
                input_name = "INPUT{}".format(io_num)

        if override_output_names:
            output_name = override_output_names[io_num]
        else:
            if platform == "pytorch_libtorch":
                output_name = "OUTPUT__{}".format(io_num)
            else:
                output_name = "OUTPUT{}".format(io_num)

        input_shape = input_shapes[io_num]
        output_shape = output_shapes[io_num]

        rtensor_dtype = _range_repr_dtype(tensor_dtype)
        if rtensor_dtype != bool:
            input_array = np.random.randint(
                low=np.iinfo(rtensor_dtype).min,
                high=np.iinfo(rtensor_dtype).max,
                size=input_shape,
                dtype=rtensor_dtype,
            )
        else:
            input_array = np.random.choice(a=[False, True], size=input_shape)
        if tensor_dtype != np.object_:
            input_array = input_array.astype(tensor_dtype)
            expected_array = np.ndarray.copy(input_array)
        else:
            expected_array = np.array(
                [unicode(str(x), encoding="utf-8") for x in input_array.flatten()],
                dtype=object,
            )
            input_array = np.array(
                [str(x) for x in input_array.flatten()], dtype=object
            ).reshape(input_array.shape)

        expected_array = expected_array.reshape(output_shape)
        expected_dict[output_name] = expected_array

        if tensor_dtype == np.object_:
            output_byte_size = serialized_byte_size(expected_array)
        else:
            output_byte_size = expected_array.nbytes

        if batch_size == 1:
            input_list = [input_array]
        else:
            input_list = [x for x in input_array]

        # Serialization of string tensors in the case of shared memory must be done manually
        if tensor_dtype == np.object_:
            input_list_tmp = serialize_byte_tensor_list(input_list)
        else:
            input_list_tmp = input_list

        if tensor_dtype == np.object_:
            input_byte_size = sum([serialized_byte_size(ip) for ip in input_list_tmp])
        else:
            input_byte_size = sum([ip.nbytes for ip in input_list_tmp])

        # create and register shared memory region for inputs and outputs
        shm_io_handles = su.create_set_either_shm_region(
            [
                shm_region_name_prefix[0] + str(io_num),
                shm_region_name_prefix[1] + str(io_num),
            ],
            input_list_tmp,
            input_byte_size,
            output_byte_size,
            use_system_shared_memory,
            use_cuda_shared_memory,
        )

        if len(shm_io_handles) != 0:
            shm_ip_handles.append(shm_io_handles[0])
            shm_op_handles.append(shm_io_handles[1])
        input_dict[input_name] = input_array

    if model_version is not None:
        model_version = str(model_version)
    else:
        model_version = ""

    # Run inference and check results for each config
    for config in configs:
        if config[1] == "http":
            triton_client = httpclient.InferenceServerClient(config[0], verbose=True)
        else:
            triton_client = grpcclient.InferenceServerClient(config[0], verbose=True)

        inputs = []
        output_req = []
        for io_num, (input_name, output_name) in enumerate(
            zip(input_dict.keys(), expected_dict.keys())
        ):
            input_data = input_dict[input_name]
            output_data = expected_dict[output_name]
            if tensor_dtype == np.object_:
                input_byte_size = serialized_byte_size(
                    serialize_byte_tensor(input_data)
                )
                output_byte_size = serialized_byte_size(
                    serialize_byte_tensor(output_data)
                )
            else:
                input_byte_size = input_data.nbytes
                output_byte_size = output_data.nbytes
            if config[1] == "http":
                inputs.append(
                    httpclient.InferInput(
                        input_name, input_data.shape, np_to_triton_dtype(tensor_dtype)
                    )
                )
                output_req.append(
                    httpclient.InferRequestedOutput(output_name, binary_data=config[3])
                )
            else:
                inputs.append(
                    grpcclient.InferInput(
                        input_name, input_data.shape, np_to_triton_dtype(tensor_dtype)
                    )
                )
                output_req.append(grpcclient.InferRequestedOutput(output_name))

            if not (use_cuda_shared_memory or use_system_shared_memory):
                if config[1] == "http":
                    inputs[-1].set_data_from_numpy(input_data, binary_data=config[3])
                else:
                    inputs[-1].set_data_from_numpy(input_data)
            else:
                # Register necessary shared memory regions/handles
                su.register_add_either_shm_regions(
                    inputs,
                    output_req,
                    shm_region_name_prefix,
                    (shm_ip_handles, shm_op_handles),
                    io_num,
                    input_byte_size,
                    output_byte_size,
                    use_system_shared_memory,
                    use_cuda_shared_memory,
                    triton_client,
                )

        if config[2]:
            user_data = UserData()
            triton_client.start_stream(partial(completion_callback, user_data))
            try:
                results = triton_client.async_stream_infer(
                    model_name,
                    inputs,
                    model_version=model_version,
                    outputs=output_req,
                    request_id=str(_unique_request_id()),
                    priority=priority,
                    timeout=timeout_us,
                )
            except Exception as e:
                triton_client.stop_stream()
                raise e
            triton_client.stop_stream()
            (results, error) = user_data._completed_requests.get()
            if error is not None:
                raise error
        else:
            results = triton_client.infer(
                model_name,
                inputs,
                model_version=model_version,
                outputs=output_req,
                request_id=str(_unique_request_id()),
                priority=priority,
                timeout=timeout_us,
            )

        last_response = results.get_response()

        if config[1] == "http":
            response_model_name = last_response["model_name"]
            if model_version != "":
                response_model_version = last_response["model_version"]
            response_outputs = last_response["outputs"]
        else:
            response_model_name = last_response.model_name
            if model_version != "":
                response_model_version = last_response.model_version
            response_outputs = last_response.outputs

        tester.assertEqual(response_model_name, model_name)

        if model_version != "":
            tester.assertEqual(response_model_version, model_version)

        tester.assertEqual(len(response_outputs), io_cnt)

        for result in response_outputs:
            if config[1] == "http":
                result_name = result["name"]
            else:
                result_name = result.name

            tester.assertIn(result_name, expected_dict)
            if use_system_shared_memory or use_cuda_shared_memory:
                if platform == "pytorch_libtorch":
                    io_num = int(result_name.split("OUTPUT__")[1])
                else:
                    io_num = int(result_name.split("OUTPUT")[1])
                shm_handle = shm_op_handles[io_num]

                output = results.get_output(result_name)
                if config[1] == "http":
                    output_datatype = output["datatype"]
                    output_shape = output["shape"]
                else:
                    output_datatype = output.datatype
                    output_shape = output.shape
                output_dtype = triton_to_np_dtype(output_datatype)
            if use_system_shared_memory:
                output_data = shm.get_contents_as_numpy(
                    shm_handle, output_dtype, output_shape
                )
            elif use_cuda_shared_memory:
                output_data = cudashm.get_contents_as_numpy(
                    shm_handle, output_dtype, output_shape
                )
            else:
                output_data = results.as_numpy(result_name)

                if (output_data.dtype == np.object_) and (config[3] == False):
                    if config[1] == "http":
                        output_data = np.array(
                            [
                                unicode(str(x), encoding="utf-8")
                                for x in (output_data.flatten())
                            ],
                            dtype=np.object_,
                        ).reshape(output_data.shape)
                    elif config[1] == "grpc":
                        output_data = np.array(
                            [x for x in (output_data.flatten())], dtype=np.object_
                        ).reshape(output_data.shape)

            expected = expected_dict[result_name]
            tester.assertEqual(output_data.shape, expected.shape)
            tester.assertTrue(
                np.array_equal(output_data, expected),
                "{}, {}, expected: {}, got {}".format(
                    model_name, result_name, expected, output_data
                ),
            )

    if len(shm_ip_handles) != 0:
        for io_num in range(io_cnt):
            if use_cuda_shared_memory:
                triton_client.unregister_cuda_shared_memory(
                    shm_region_name_prefix[0] + str(io_num) + "_data"
                )
                triton_client.unregister_cuda_shared_memory(
                    shm_region_name_prefix[0] + str(io_num) + "_data"
                )
                cudashm.destroy_shared_memory_region(shm_ip_handles[io_num])
                cudashm.destroy_shared_memory_region(shm_op_handles[io_num])
            else:
                triton_client.unregister_system_shared_memory(
                    shm_region_name_prefix[1] + str(io_num) + "_data"
                )
                triton_client.unregister_system_shared_memory(
                    shm_region_name_prefix[1] + str(io_num) + "_data"
                )
                shm.destroy_shared_memory_region(shm_ip_handles[io_num])
                shm.destroy_shared_memory_region(shm_op_handles[io_num])

    return results


# Perform basic inference for shared memory tests
def shm_basic_infer(
    tester,
    triton_client,
    shm_ip0_handle,
    shm_ip1_handle,
    shm_op0_handle,
    shm_op1_handle,
    error_msg,
    big_shm_name="",
    big_shm_size=64,
    default_shm_byte_size=64,
    register_offset=0,
    shm_output_offset=0,
    shm_output_byte_size=64,
    protocol="http",
    use_system_shared_memory=False,
    use_cuda_shared_memory=False,
    override_model_name=None,
):
    # Lazy shm imports...
    if use_system_shared_memory:
        import tritonclient.utils.shared_memory as shm
    elif use_cuda_shared_memory:
        import tritonclient.utils.cuda_shared_memory as cudashm
    else:
        raise Exception("No shared memory type specified")

    if override_model_name is None:
        model_name = "simple"
    else:
        model_name = override_model_name

    if model_name.startswith("libtorch"):
        output_names = ["OUTPUT__0", "OUTPUT__1"]
    else:
        output_names = ["OUTPUT0", "OUTPUT1"]

    input0_data = np.arange(start=0, stop=16, dtype=np.int32)
    input1_data = np.ones(shape=16, dtype=np.int32)
    inputs = []
    outputs = []
    if protocol == "http":
        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))
        outputs.append(
            httpclient.InferRequestedOutput(output_names[0], binary_data=True)
        )
        outputs.append(
            httpclient.InferRequestedOutput(output_names[1], binary_data=False)
        )
    else:
        inputs.append(grpcclient.InferInput("INPUT0", [1, 16], "INT32"))
        inputs.append(grpcclient.InferInput("INPUT1", [1, 16], "INT32"))
        outputs.append(grpcclient.InferRequestedOutput(output_names[0]))
        outputs.append(grpcclient.InferRequestedOutput(output_names[1]))

    inputs[0].set_shared_memory("input0_data", default_shm_byte_size)

    if type(shm_ip1_handle) == np.array:
        inputs[1].set_data_from_numpy(input0_data, binary_data=True)
    elif big_shm_name != "":
        inputs[1].set_shared_memory(big_shm_name, big_shm_size)
    else:
        inputs[1].set_shared_memory("input1_data", default_shm_byte_size)

    outputs[0].set_shared_memory(
        "output0_data", shm_output_byte_size, offset=shm_output_offset
    )
    outputs[1].set_shared_memory(
        "output1_data", shm_output_byte_size, offset=shm_output_offset
    )

    try:
        results = triton_client.infer(
            model_name, inputs, model_version="", outputs=outputs
        )
        output = results.get_output(output_names[0])
        if protocol == "http":
            output_datatype = output["datatype"]
            output_shape = output["shape"]
        else:
            output_datatype = output.datatype
            output_shape = output.shape
        output_dtype = triton_to_np_dtype(output_datatype)

        if use_system_shared_memory:
            output_data = shm.get_contents_as_numpy(
                shm_op0_handle,
                output_dtype,
                output_shape,
                offset=register_offset + shm_output_offset,
            )
        elif use_cuda_shared_memory:
            output_data = cudashm.get_contents_as_numpy(
                shm_op0_handle,
                output_dtype,
                output_shape,
            )

        tester.assertTrue(
            (output_data[0] == (input0_data + input1_data)).all(),
            "Model output does not match expected output",
        )
    except Exception as ex:
        error_msg.append(str(ex))


================================================
FILE: qa/common/inferentia_perf_analyzer_input_data_json/non_aligned_validation_batched.json
================================================
{
  "data" :
    [
      {
        "INPUT__0" :
        {
          "content": [1, 2, 3, 4],
          "shape": [4]
        },
        "INPUT__1" :
        {
          "content": [1, 1, 1, 1],
          "shape": [4]
        }
      },
      {
        "INPUT__0" :
        {
          "content": [0, 0, 0, 0],
          "shape": [4]
        },
        "INPUT__1" :
        {
          "content": [1, 1, 1, 1],
          "shape": [4]
        }
      },
      {
        "INPUT__0" :
        {
          "content": [-1, -2, -3, -4],
          "shape": [4]
        },
        "INPUT__1" :
        {
          "content": [1, 1, 1, 1],
          "shape": [4]
        }
      },
      {
        "INPUT__0" :
        {
          "content": [-4, -3, -2, -1],
          "shape": [4]
        },
        "INPUT__1" :
        {
          "content": [-1, -1, -1, -1],
          "shape": [4]
        }
      }
    ],
  "validation_data" :
  [
      {
        "OUTPUT__0" :
        {
          "content": [2, 3, 4, 5],
          "shape": [4]
        },
        "OUTPUT__1" :
        {
          "content": [0, 1, 2, 3],
          "shape": [4]
        }
      },
      {
        "OUTPUT__0" :
        {
          "content": [1, 1, 1, 1],
          "shape": [4]
        },
        "OUTPUT__1" :
        {
          "content": [-1, -1 ,-1, -1],
          "shape": [4]
        }
      },
      {
        "OUTPUT__0" :
        {
          "content": [0, -1, -2, -3],
          "shape": [4]
        },
        "OUTPUT__1" :
        {
          "content": [-2, -3, -4, -5],
          "shape": [4]
        }
      }
  ]
}


================================================
FILE: qa/common/inferentia_perf_analyzer_input_data_json/non_aligned_validation_no_batch.json
================================================
{
  "data" :
    [
      {
        "INPUT__0" :
        {
          "content": [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4],
          "shape": [6, 4]
        },
        "INPUT__1" :
        {
          "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
          "shape": [6, 4]
        }
      },
      {
        "INPUT__0" :
        {
          "content": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
          "shape": [6, 4]
        },
        "INPUT__1" :
        {
          "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
          "shape": [6, 4]
        }
      },
      {
        "INPUT__0" :
        {
          "content": [-1, -2, -3, -4, -1, -2, -3, -4, -1, -2, -3, -4, -1, -2, -3, -4, -1, -2, -3, -4, -1, -2, -3, -4],
          "shape": [6, 4]
        },
        "INPUT__1" :
        {
          "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
          "shape": [6, 4]
        }
      },
      {
        "INPUT__0" :
        {
          "content": [-4, -3, -2, -1, -4, -3, -2, -1, -4, -3, -2, -1, -4, -3, -2, -1, -4, -3, -2, -1, -4, -3, -2, -1],
          "shape": [6, 4]
        },
        "INPUT__1" :
        {
          "content": [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
          "shape": [6, 4]
        }
      }
    ],
  "validation_data" :
  [
      {
        "OUTPUT__0" :
        {
          "content": [2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5],
          "shape": [6, 4]
        },
        "OUTPUT__1" :
        {
          "content": [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3],
          "shape": [6, 4]
        }
      },
      {
        "OUTPUT__0" :
        {
          "content": [0, -1, -2, -3, 0, -1, -2, -3, 0, -1, -2, -3, 0, -1, -2, -3, 0, -1, -2, -3, 0, -1, -2, -3],
          "shape": [6, 4]
        },
        "OUTPUT__1" :
        {
          "content": [-2, -3, -4, -5, -2, -3, -4, -5, -2, -3, -4, -5, -2, -3, -4, -5, -2, -3, -4, -5, -2, -3, -4, -5],
          "shape": [6, 4]
        }
      },
      {
        "OUTPUT__0" :
        {
          "content": [-5, -4, -3, -2, -5, -4, -3, -2, -5, -4, -3, -2, -5, -4, -3, -2, -5, -4, -3, -2, -5, -4, -3, -2],
          "shape": [6, 4]
        },
        "OUTPUT__1" :
        {
          "content": [-3, -2, -1, 0, -3, -2, -1, 0, -3, -2, -1, 0, -3, -2, -1, 0, -3, -2, -1, 0, -3, -2, -1, 0],
          "shape": [6, 4]
        }
      }
  ]
}


================================================
FILE: qa/common/inferentia_perf_analyzer_input_data_json/simple_model.py
================================================
#!/usr/bin/env python
# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import argparse


def gen_pytorch_model(name, batch_size):
    class PyAddSubNet(nn.Module):
        """
        Simple AddSub network in PyTorch. This network outputs the sum and
        subtraction of the inputs.
        """

        def __init__(self):
            super(PyAddSubNet, self).__init__()

        def forward(self, input0, input1):
            return torch.sub(input0, input1, alpha=-1), torch.sub(
                input0, input1, alpha=1
            )

    model = PyAddSubNet()
    model.eval()
    batch_size = 1
    example_inputs = torch.zeros([8, 4], dtype=torch.int64), torch.zeros(
        [8, 4], dtype=torch.int64
    )
    model_neuron = torch_neuron.trace(model, example_inputs, dynamic_batch_size=True)
    model_neuron.save("{}.pt".format(name))


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model_type",
        type=str,
        required=True,
        choices=["pytorch"],
        help="""The type of the compiled model. Currently,
                        only supports \"pytorch\".""",
    )
    parser.add_argument(
        "--name", type=str, required=True, help="The name of the compiled model"
    )
    parser.add_argument(
        "--batch_size",
        type=int,
        default=1,
        help="The batch size for the compiled model",
    )

    FLAGS, unparsed = parser.parse_known_args()
    if len(unparsed) > 0:
        raise Exception("Unrecognized options: {}".format(unparsed))
    elif FLAGS.model_type == "pytorch":
        import torch
        import torch_neuron
        from torch import nn

        gen_pytorch_model(FLAGS.name, FLAGS.batch_size)


================================================
FILE: qa/common/inferentia_perf_analyzer_input_data_json/validation_batched.json
================================================
{
    "data" :
      [
        {
          "INPUT__0" :
          {
            "content": [1, 2, 3, 4],
            "shape": [4]
          },
          "INPUT__1" :
          {
            "content": [1, 1, 1, 1],
            "shape": [4]
          }
        },
        {
          "INPUT__0" :
          {
            "content": [0, 0, 0, 0],
            "shape": [4]
          },
          "INPUT__1" :
          {
            "content": [1, 1, 1, 1],
            "shape": [4]
          }
        },
        {
          "INPUT__0" :
          {
            "content": [-1, -2, -3, -4],
            "shape": [4]
          },
          "INPUT__1" :
          {
            "content": [1, 1, 1, 1],
            "shape": [4]
          }
        },
        {
          "INPUT__0" :
          {
            "content": [-4, -3, -2, -1],
            "shape": [4]
          },
          "INPUT__1" :
          {
            "content": [-1, -1, -1, -1],
            "shape": [4]
          }
        }
      ],
    "validation_data" :
    [
        {
          "OUTPUT__0" :
          {
            "content": [2, 3, 4, 5],
            "shape": [4]
          },
          "OUTPUT__1" :
          {
            "content": [0, 1, 2, 3],
            "shape": [4]
          }
        },
        {
          "OUTPUT__0" :
          {
            "content": [1, 1, 1, 1],
            "shape": [4]
          },
          "OUTPUT__1" :
          {
            "content": [-1, -1 ,-1, -1],
            "shape": [4]
          }
        },
        {
          "OUTPUT__0" :
          {
            "content": [0, -1, -2, -3],
            "shape": [4]
          },
          "OUTPUT__1" :
          {
            "content": [-2, -3, -4, -5],
            "shape": [4]
          }
        },
        {
          "OUTPUT__0" :
          {
            "content": [-5, -4, -3, -2],
            "shape": [4]
          },
          "OUTPUT__1" :
          {
            "content": [-3, -2, -1, 0],
            "shape": [4]
          }
        }
    ]
}


================================================
FILE: qa/common/inferentia_perf_analyzer_input_data_json/validation_no_batch.json
================================================
{
    "data" :
      [
        {
          "INPUT__0" :
          {
            "content": [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4],
            "shape": [6, 4]
          },
          "INPUT__1" :
          {
            "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
            "shape": [6, 4]
          }
        },
        {
          "INPUT__0" :
          {
            "content": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            "shape": [6, 4]
          },
          "INPUT__1" :
          {
            "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
            "shape": [6, 4]
          }
        },
        {
          "INPUT__0" :
          {
            "content": [-1, -2, -3, -4, -1, -2, -3, -4, -1, -2, -3, -4, -1, -2, -3, -4, -1, -2, -3, -4, -1, -2, -3, -4],
            "shape": [6, 4]
          },
          "INPUT__1" :
          {
            "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
            "shape": [6, 4]
          }
        },
        {
          "INPUT__0" :
          {
            "content": [-4, -3, -2, -1, -4, -3, -2, -1, -4, -3, -2, -1, -4, -3, -2, -1, -4, -3, -2, -1, -4, -3, -2, -1],
            "shape": [6, 4]
          },
          "INPUT__1" :
          {
            "content": [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
            "shape": [6, 4]
          }
        }
      ],
    "validation_data" :
    [
        {
          "OUTPUT__0" :
          {
            "content": [2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5],
            "shape": [6, 4]
          },
          "OUTPUT__1" :
          {
            "content": [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3],
            "shape": [6, 4]
          }
        },
        {
          "OUTPUT__0" :
          {
            "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
            "shape": [6, 4]
          },
          "OUTPUT__1" :
          {
            "content": [-1, -1 ,-1, -1, -1, -1 ,-1, -1, -1, -1 ,-1, -1, -1, -1 ,-1, -1, -1, -1 ,-1, -1, -1, -1 ,-1, -1],
            "shape": [6, 4]
          }
        },
        {
          "OUTPUT__0" :
          {
            "content": [0, -1, -2, -3, 0, -1, -2, -3, 0, -1, -2, -3, 0, -1, -2, -3, 0, -1, -2, -3, 0, -1, -2, -3],
            "shape": [6, 4]
          },
          "OUTPUT__1" :
          {
            "content": [-2, -3, -4, -5, -2, -3, -4, -5, -2, -3, -4, -5, -2, -3, -4, -5, -2, -3, -4, -5, -2, -3, -4, -5],
            "shape": [6, 4]
          }
        },
        {
          "OUTPUT__0" :
          {
            "content": [-5, -4, -3, -2, -5, -4, -3, -2, -5, -4, -3, -2, -5, -4, -3, -2, -5, -4, -3, -2, -5, -4, -3, -2],
            "shape": [6, 4]
          },
          "OUTPUT__1" :
          {
            "content": [-3, -2, -1, 0, -3, -2, -1, 0, -3, -2, -1, 0, -3, -2, -1, 0, -3, -2, -1, 0, -3, -2, -1, 0],
            "shape": [6, 4]
          }
        }
    ]
}


================================================
FILE: qa/common/inferentia_perf_analyzer_input_data_json/wrong_validation_batched.json
================================================
{
  "data" :
    [
      {
        "INPUT__0" :
        {
          "content": [1, 2, 3, 4],
          "shape": [4]
        },
        "INPUT__1" :
        {
          "content": [1, 1, 1, 1],
          "shape": [4]
        }
      },
      {
        "INPUT__0" :
        {
          "content": [0, 0, 0, 0],
          "shape": [4]
        },
        "INPUT__1" :
        {
          "content": [1, 1, 1, 1],
          "shape": [4]
        }
      },
      {
        "INPUT__0" :
        {
          "content": [-1, -2, -3, -4],
          "shape": [4]
        },
        "INPUT__1" :
        {
          "content": [1, 1, 1, 1],
          "shape": [4]
        }
      },
      {
        "INPUT__0" :
        {
          "content": [-4, -3, -2, -1],
          "shape": [4]
        },
        "INPUT__1" :
        {
          "content": [-1, -1, -1, -1],
          "shape": [4]
        }
      }
    ],
    "validation_data" :
    [
        {
          "OUTPUT__0" :
          {
            "content": [2, 3, 4, 5],
            "shape": [4]
          },
          "OUTPUT__1" :
          {
            "content": [0, 0, 0, 0],
            "shape": [4]
          }
        },
        {
          "OUTPUT__0" :
          {
            "content": [1, 1, 1, 1],
            "shape": [4]
          },
          "OUTPUT__1" :
          {
            "content": [1, 1, 1, 1],
            "shape": [4]
          }
        },
        {
          "OUTPUT__0" :
          {
            "content": [0, 1, 2, 3],
            "shape": [4]
          },
          "OUTPUT__1" :
          {
            "content": [7, 8, 9, 10],
            "shape": [4]
          }
        },
        {
          "OUTPUT__0" :
          {
            "content": [-5, -4, -3, -1],
            "shape": [4]
          },
          "OUTPUT__1" :
          {
            "content": [-3, -2, -1, 0],
            "shape": [4]
          }
        }
    ]
}


================================================
FILE: qa/common/inferentia_perf_analyzer_input_data_json/wrong_validation_no_batch.json
================================================
{
  "data" :
    [
      {
        "INPUT__0" :
        {
          "content": [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4],
          "shape": [6, 4]
        },
        "INPUT__1" :
        {
          "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
          "shape": [6, 4]
        }
      },
      {
        "INPUT__0" :
        {
          "content": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
          "shape": [6, 4]
        },
        "INPUT__1" :
        {
          "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
          "shape": [6, 4]
        }
      },
      {
        "INPUT__0" :
        {
          "content": [-1, -2, -3, -4, -1, -2, -3, -4, -1, -2, -3, -4, -1, -2, -3, -4, -1, -2, -3, -4, -1, -2, -3, -4],
          "shape": [6, 4]
        },
        "INPUT__1" :
        {
          "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
          "shape": [6, 4]
        }
      },
      {
        "INPUT__0" :
        {
          "content": [-4, -3, -2, -1, -4, -3, -2, -1, -4, -3, -2, -1, -4, -3, -2, -1, -4, -3, -2, -1, -4, -3, -2, -1],
          "shape": [6, 4]
        },
        "INPUT__1" :
        {
          "content": [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
          "shape": [6, 4]
        }
      }
    ],
    "validation_data" :
    [
        {
          "OUTPUT__0" :
          {
            "content": [2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5],
            "shape": [6, 4]
          },
          "OUTPUT__1" :
          {
            "content": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3],
            "shape": [6, 4]
          }
        },
        {
          "OUTPUT__0" :
          {
            "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
            "shape": [6, 4]
          },
          "OUTPUT__1" :
          {
            "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1 ,-1, -1],
            "shape": [6, 4]
          }
        },
        {
          "OUTPUT__0" :
          {
            "content": [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3],
            "shape": [6, 4]
          },
          "OUTPUT__1" :
          {
            "content": [7, 8, 9, 10, 7, 8, 9, 10, 7, 8, 9, 10, 7, 8, 9, 10, 7, 8, 9, 10, 7, 8, 9, 10],
            "shape": [6, 4]
          }
        },
        {
          "OUTPUT__0" :
          {
            "content": [-5, -4, -3, -1, -5, -4, -3, -1, -5, -4, -3, -1, -5, -4, -3, -1, -5, -4, -3, -1, -5, -4, -3, -1],
            "shape": [6, 4]
          },
          "OUTPUT__1" :
          {
            "content": [-3, -2, -1, 0, -3, -2, -1, 0, -3, -2, -1, 0, -3, -2, -1, 0, -3, -2, -1, 0, -3, -2, -1, 0],
            "shape": [6, 4]
          }
        }
    ]
}


================================================
FILE: qa/common/libtorch_infer_client.py
================================================
#!/usr/bin/env python
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import sys

sys.path.append("../common")

import unittest

import numpy as np
import test_util as tu
import tritonclient.http as httpclient

# By default, find tritonserver on "localhost", but can be overridden
# with TRITONSERVER_IPADDR envvar
_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")


class InferTest(tu.TestResultCollector):
    def test_infer(self):
        try:
            triton_client = httpclient.InferenceServerClient(
                url=f"{_tritonserver_ipaddr}:8000"
            )
        except Exception as e:
            print("channel creation failed: " + str(e))
            sys.exit(1)

        model_name = "libtorch_int32_int32_int32"

        inputs = []
        outputs = []
        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))

        # Create the data for the two input tensors. Initialize the first
        # to unique integers and the second to all ones.
        input0_data = np.arange(start=0, stop=16, dtype=np.int32)
        input0_data = np.expand_dims(input0_data, axis=0)
        input1_data = np.full(shape=(1, 16), fill_value=-1, dtype=np.int32)

        # Initialize the data
        inputs[0].set_data_from_numpy(input0_data, binary_data=True)
        inputs[1].set_data_from_numpy(input1_data, binary_data=True)

        outputs.append(httpclient.InferRequestedOutput("OUTPUT__0", binary_data=True))
        outputs.append(httpclient.InferRequestedOutput("OUTPUT__1", binary_data=True))

        results = triton_client.infer(model_name, inputs, outputs=outputs)

        output0_data = results.as_numpy("OUTPUT__0")
        output1_data = results.as_numpy("OUTPUT__1")

        # Validate the results by comparing with precomputed values.
        for i in range(16):
            print(
                str(input0_data[0][i])
                + " - "
                + str(input1_data[0][i])
                + " = "
                + str(output0_data[0][i])
            )
            print(
                str(input0_data[0][i])
                + " + "
                + str(input1_data[0][i])
                + " = "
                + str(output1_data[0][i])
            )
            if (input0_data[0][i] - input1_data[0][i]) != output0_data[0][i]:
                print("sync infer error: incorrect difference")
                sys.exit(1)
            if (input0_data[0][i] + input1_data[0][i]) != output1_data[0][i]:
                print("sync infer error: incorrect sum")
                sys.exit(1)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: qa/common/nightly_email_helper.py
================================================
#!/usr/bin/env python
# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import glob
import os
import smtplib
import sys
import tarfile
from email import encoders
from email.mime.base import MIMEBase
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText


def send(
    subject: str, content: str, attachments=None, files_to_tar=None, is_html=False
):
    FROM = os.environ.get("TRITON_FROM", "")
    TO = os.environ.get("TRITON_TO_DL", "")
    if FROM == "" or TO == "":
        print("Must set TRITON_FROM and TRITON_TO_DL env variables")
        sys.exit(1)

    msg = MIMEMultipart("alternative")
    msg["Subject"] = subject
    msg["From"] = FROM
    msg["To"] = TO
    if is_html:
        mime_text = MIMEText(content, "html")
    else:
        mime_text = MIMEText(content)
    msg.attach(mime_text)

    if attachments is None:
        attachments = []

    if files_to_tar is not None:
        with tarfile.open(subject + ".tgz", "w:gz") as csv_tar:
            for filename in glob.glob(files_to_tar):
                csv_tar.add(filename)
        attachments.append(subject + ".tgz")

    for fname in attachments:
        p = MIMEBase("application", "octet-stream")
        with open(fname, "rb") as attachment:
            p.set_payload((attachment).read())
        encoders.encode_base64(p)
        p.add_header("Content-Disposition", "attachment; filename= %s" % (fname))
        msg.attach(p)

    mailServer = smtplib.SMTP("mailgw.nvidia.com")
    mailServer.send_message(msg)
    mailServer.quit()


================================================
FILE: qa/common/orca_header_test.py
================================================
#!/usr/bin/python3
# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import argparse
import json

import requests


# To run the test, have tritonserver running and run this script with the endpoint as a flag.
#
# Example:
# ```
# python3 orca_header_test.py http://localhost:8000/v2/models/ensemble/generate
# ```
def get_endpoint_header(url, data, request_header=None):
    """
    Sends a POST request to the given URL with the provided data and returns the value of the "endpoint-load-metrics" header,
    or None if the request fails.
    """
    HEADER_KEY = "endpoint-load-metrics"
    try:
        response = None
        if request_header:
            response = requests.post(url, json=data, headers=request_header)
        else:
            response = requests.post(url, json=data)
        response.raise_for_status()
        return response.headers.get(HEADER_KEY, "")
    except requests.exceptions.RequestException as e:
        print(f"Error making request: {e}")
        return None


def parse_header_data(header, orca_format):
    """
    Parses the header data into a dictionary based on the given format.
    """
    METRIC_KEY = "named_metrics"
    try:
        if orca_format == "json":
            # Parse the header in JSON format
            data = json.loads(header.replace("JSON ", ""))
            if METRIC_KEY in data:
                return data[METRIC_KEY]
            else:
                print(f"No key '{METRIC_KEY}' in header data: {data}")
                return None
        elif orca_format == "text":
            # Parse the header in TEXT format
            data = {}
            for key_value_pair in header.replace("TEXT ", "").split(", "):
                key, value = key_value_pair.split("=")
                if "." in key:
                    prefix, nested_key = key.split(".", 1)
                    if prefix == METRIC_KEY:
                        data[nested_key] = float(value)
            if not data:
                print(f"Could not parse any keys from header: {header}")
                return None
            return data
        else:
            print(f"Invalid ORCA format: {orca_format}")
            return None
    except (json.JSONDecodeError, ValueError, KeyError):
        print("Error: Invalid data in the header.")
        return None


def check_for_keys(data, desired_keys, orca_format):
    """
    Checks if all desired keys are present in the given data dictionary.
    """
    if all(key in data for key in desired_keys):
        print(
            "ORCA header present in ",
            orca_format,
            "format with" "kv_cache_utilization:",
            [k + ": " + str(data[k]) for k in desired_keys],
        )
        return True
    else:
        print(f"Missing keys in header: {', '.join(set(desired_keys) - set(data))}")
        return False


def request_header(orca_format):
    return {"endpoint-load-metrics-format": orca_format} if orca_format else None


def test_header_type(url, data, orca_format):
    req_header = request_header(orca_format)
    response_header = get_endpoint_header(args.url, TEST_DATA, req_header)

    desired_keys = {
        "kv_cache_utilization",
        "max_token_capacity",
    }  # Just the keys, no need to initialize with None

    if response_header is None:
        print(f"Request to endpoint: '{args.url}' failed.")
        return False
    elif response_header == "":
        if orca_format:
            print(
                f"response header empty, endpoint-load-metrics-format={orca_format} is not a valid ORCA metric format"
            )
            return False
        else:
            # No request header set <=> no response header. Intended behavior.
            print(f"response header empty, endpoint-load-metrics-format is not set")
            return True

    data = parse_header_data(response_header, orca_format)
    if data:
        return check_for_keys(data, desired_keys, orca_format)
    else:
        print(f"Unexpected response header value: {response_header}")
        return False


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Make a POST request to generate endpoint to test the ORCA metrics header."
    )
    parser.add_argument("url", help="The model URL to send the request to.")
    args = parser.parse_args()
    TEST_DATA = json.loads(
        '{"text_input": "hello world", "max_tokens": 20, "bad_words": "", "stop_words": ""}'
    )
    passed = True

    for format in ["json", "text", None]:
        print("Checking response header for ORCA format:", format)
        if not test_header_type(args.url, TEST_DATA, format):
            print("FAIL on format:", format)
            passed = False

    sys.exit(0 if passed else 1)


================================================
FILE: qa/common/perf_analyzer_input_data_json/float_data_with_shape.json
================================================
{
    "data" :
        [
            [
                {
                    "INPUT" :
                    {
                        "content": [1.0],
                        "shape": [1]
                    }
                },
                {
                    "INPUT" :
                    {
                        "content": [2.0, 3.0]
                    }
                },
                {
                    "INPUT" :
                    {
                        "content": [4.0, 5.0]
                    }
                },
                {
                    "INPUT" :
                    {
                        "content": [1.0, 2.0, 3.0],
                        "shape": [3]
                    }
                }
            ],
            [
                {
                    "INPUT" :
                    {
                        "content": [1.0],
                        "shape": [1]
                    }
                },
                {
                    "INPUT" :
                    {
                        "content": [2.0, 3.0]
                    }
                },
                {
                    "INPUT" :
                    {
                        "content": [4.0, 5.0]
                    }
                }
            ],
            [
                {
                    "INPUT" :
                    {
                        "content": [1.0],
                        "shape": [1]
                    }
                },
                {
                    "INPUT" :
                    {
                        "content": [2.0, 3.0]
                    }
                }
            ]
        ]
}


================================================
FILE: qa/common/perf_analyzer_input_data_json/image_data.json
================================================
{"data":[{"INPUT":{"b64":"klkPAP/Y/+AAEEpGSUYAAQEBAEgASAAA/+En4kV4aWYAAE1NACoAAAAIAAYACwACAAAAJgAA\nCGIBEgADAAAAAQABAAABMQACAAAAJgAACIgBMgACAAAAFAAACK6HaQAEAAAAAQAACMLqHAAH\nAAAIDAAAAFYAABFGHOoAAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAABXaW5kb3dzIFBob3RvIEVkaXRvciAxMC4wLjEwMDExLjE2\nMzg0AFdpbmRvd3MgUGhvdG8gRWRpdG9yIDEwLjAuMTAwMTEuMTYzODQAMjAxODowNjowNSAx\nNzo0OToyMAAABpADAAIAAAAUAAARHJAEAAIAAAAUAAARMJKRAAIAAAADMjgAAJKSAAIAAAAD\nMjgAAKABAAMAAAABAAEAAOocAAcAAAgMAAAJEAAAAAAc6gAAAAgAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADIwMTg6MDY6MDUgMTc6\nNDg6MzMAMjAxODowNjowNSAxNzo0ODozMwAAAAAGAQMAAwAAAAEABgAAARoABQAAAAEAABGU\nARsABQAAAAEAABGcASgAAwAAAAEAAgAAAgEABAAAAAEAABGkAgIABAAAAAEAABY2AAAAAAAA\nAGAAAAABAAAAYAAAAAH/2P/bAEMACAYGBwYFCAcHBwkJCAoMFA0MCwsMGRITDxQdGh8eHRoc\nHCAkLicgIiwjHBwoNyksMDE0NDQfJzk9ODI8LjM0Mv/bAEMBCQkJDAsMGA0NGDIhHCEyMjIy\nMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMv/AABEIAMAB\nAAMBIQACEQEDEQH/xAAfAAABBQEBAQEBAQAAAAAAAAAAAQIDBAUGBwgJCgv/xAC1EAACAQMD\nAgQDBQUEBAAAAX0BAgMABBEFEiExQQYTUWEHInEUMoGRoQgjQrHBFVLR8CQzYnKCCQoWFxgZ\nGiUmJygpKjQ1Njc4OTpDREVGR0hJSlNUVVZXWFlaY2RlZmdoaWpzdHV2d3h5eoOEhYaHiImK\nkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4eLj5OXm5+jp\n6vHy8/T19vf4+fr/xAAfAQADAQEBAQEBAQEBAAAAAAAAAQIDBAUGBwgJCgv/xAC1EQACAQIE\nBAMEBwUEBAABAncAAQIDEQQFITEGEkFRB2FxEyIygQgUQpGhscEJIzNS8BVictEKFiQ04SXx\nFxgZGiYnKCkqNTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqCg4SFhoeI\niYqSk5SVlpeYmZqio6Slpqeoqaqys7S1tre4ubrCw8TFxsfIycrS09TV1tfY2dri4+Tl5ufo\n6ery8/T19vf4+fr/2gAMAwEAAhEDEQA/AON86l86mYjxIfWneYaYBvJq7b6Tqd0R5GnXcoPQ\npCxH54pXGlc0k8F68QGe0SMH+/OgP5ZzV6LwVqJH7y5sov8AekJP6KaXMiuVlhfBTY/eapbD\n/dVj/hTx4MiHXVl/CA//ABVLmHyijwZAf+Yv/wCS3/2VSJ4KhUgnWP8AyW/+yo5mHIiX/hCL\nc8jWBn/r2/8AsqkTwOnbV0/GAj/2ajnDkLH/AAg5b7upQH6qR/WoJ/AN+4/c3dk3Hd2H9KSm\nPkM6TwFryfdigl/3JR/XFVp/COv243NpsjD/AKZsr/oDmq50TyMoz6fe26gzWVxFjqXiZf5i\nqzMN1WmS0NL80okpkjhJ708Se9UIcJDUgmpoRIs1SiamMeJsmrMM3NNEsvpMR1qYTitBHkQf\nPenBjmuM2NHStPu9XvUs7KIyzP27KPUnsK9H034d6faIH1e6a4kxnyoTsQfj1P6UmxpHQwxa\nVpe0WVjbQFRgOqAt+fWmXGqlgT5rk+mKLdy79jNku2fPJqAyE9zSGG8gcd6XzCRjJpAKGqRZ\nDSAlWRqnSZx0J496Bkyzv61MLhqAJkumHQkfjVpNQkH8X51Iyyl+2edppktrpl6S11YW8rHq\nzxqT+eKNgMe58D+H7stsSa1c8gxSHH5Nmub1L4b6jApk0+4ivFH8J+R/1OD+daRn3M5Q7HHT\nwT2kzQ3EMkUi9UkUqR+dMD1sZMeHpwamhEykkcU8bqYDkYlsVfgHzCqQjRlXCKR9KRQfWtbE\nXPJl+tSrj+9XCbnpnwqvNPhlvbdmUXsm0ru6lPQfjXfahYXKIZIVMidyvJqb2Zoloc5M7biD\nkHvmq7N70wGZpN1IYu6lDUgHbqkU0hkqtUqtQBOrVIDQMkU1KKQEg9qlVmHegCVZWFWI7gjG\nDQBX17Q7XX9LdLhVWdRmKbHKn/D2rw5y0crRt95SVP4VvDYxmtQElSJJk4rRMixOJWXoKkWR\nm69KdxWLETovarcc6iqTE0aK3CSQYzzimRzrjrW10Z2PKA1PDVwHSNM09vOlzbStHLHyrocE\nGu60L4yanYIsOp26XSjgyA7W/wAKTVy0zvbL4ieEfEAVJykEhXkTjbz/AL1aI0zQ9QXfZX4G\neRtcOKjVFaMgm8KzjmC6hlHvlTVKTw9qcf8Ay77x6owNFwsVn02+j+/aTD/gBqAxuhwyMp9x\nRcLCg08GgZIpqVW5pATo3FSqaAJFNTLk9KBliOKVukbn8KnW2m/iXb/vHFAhdsKH97dRL7A5\nqtc6/oumDMtzHu/22H8qcU2xN2OU174m2wt3h04+bOwwHH3U9/c152S7sWblicmulpRVjnu2\n7igGpFyDmgZKGNSK9AE6NU6GmhMvW4Plk+9MbKyfWtFsS0ebClrlNSaGPzElHouf1rMlTDGk\nWtiHocg4qxBqd7anMNxIv0amM17Xx1r9pgR6hOAO2/P862rT4teIbfG+dJR6Og/pS5UF2bVv\n8bL9f9fZQP8A7rEVow/GyFv9fpv/AHzID/MVPIPmLi/GHQ3H72wlB/3ENPHxW8MuebVue5gX\n/GlyMLj/APhZ3hUjmAD/ALY0n/CzfCv/ADyUf9sTRysOYT/hZ/hgdI1/78mmH4reHU+7GPwh\np8jDmIn+L2iL9yNvwhqpL8ZLPnyYZj6fKBRyC5jJu/jLcEHyLUk/7clYN18VdduMiPyogfQE\n/wA6pRQrmHc+LdbvifNv5sHsp2/yqoLmaVt0kjMfVjmrWhL1NCxffOi+pFdCKpkDwKdigBae\nBTAnSp1poRfQukIzgLTWORn0q4bCkcjc+GNWs8/aNOuowO5jJH5is9rN1OCCPqK5LmtixYwM\nJpFI+9Gw/r/Ssi6TDkU+pS2KbCozTGMNNoEJSUAIaTNACZNG4+tACZPrRk0AJSg0xCUUAPWp\n1bimI1NF+e/XJwFBJrohOM8KT9aGwtceLtF6qPzo+3xDrspXHyoeupWufm2fnirUc1pN/q5c\nH3ORTUiXEkZJUIxjB6HsanhJ43Y/CtCC9JMHVQv401DniqgKZ7Lsz2qvPplndArPaQyg/wB9\nAa4TqOf1nwdoy6Zd3FtYRRXEcLsjR5HOD26V4DqMeyZh71URMy2FRmtBEZptAhKQ0ANNJQAl\nJTAKSgQUUAFKKAHCpRzTES29ybeRip5IxVk6lIRzJSY0RNqB/vMajN856Z/OkMYbmQ9jTkvZ\nYzkMymgDa03xPNbkJN+8iPUGuutLiO8iE1vJuQ/mD6GtIsza1LAc5qxGe9aRIZ7dilIwK4Tr\nGTRCWCSMjIdSp/EV8ya5AYr2VSMYYinHclmA4qFhWoiIimmgQlNoAQ02gApKAEopiCkoAKUU\nAPFTQrubFAiGbImYUiRtIwVQST0AGSaBmzaaESA1y23/AGF6/nWpHp1pGoAgQ+7c5/OmkS5E\nv2O1x/x7Q/8AfsVBNo9pODtUxN2K9Pyp8olIwb7TZrGXbIBg8q69GqbR9Wm0u7Vskxnh09RU\np2ZbV0egQyrMiyowZHGQRVtDxxW6MWe64pcVwnWFfO/jq0+zeIr6MD5RM2PpnimtxM4qUYNV\n2FaEkRFNNMQ0000AJim0AFJQAlJQAtJTAKUUCFq9Yx75RQAp064udRkiijJIPJ7Ae5rorLSU\nso8AbpD958fy9BSE9i15R9KPLNWQBQ4p6oapCYl7bR3WmyROPmHzIfQ1w8iEMynqtZPc1Wx1\nnhC+MkMlo55j+ZPpXXRDNbJ6GdtT3WlrjOkXFeIfE+18rxJO2OJFVh+QprcTPM5l+Y1VYVoQ\nRNTCKYDabQAhpKAEpKAEpKACjFABS0wFFbmgwebdRrjqwFJiPeoPhjbCzjKXbRzMoaQGMEZP\n+HSqtx8Nbxf9Tc28v+8Cp/rWakU4mVceAdXhBP2LeB3jcGsmfw3eQZ82zuI8d2jOKtSJcSi2\nmHP+NJ/Z7gdKtSJ5SnqMRtrbcRjLYrg7zAvG96l7lLYv+F3MevwqOjhlP5V6XFGQucVaegmt\nT2/FKBXMbDq8p+LVt/plrMB96HGfoT/jQtxM8euB8xqk4rVEETCmGmA0000ANpMUAJTaACig\nBKKAClxQA4Dmu48AWIvPEFjGRkNMufpmh7Atz6epayLCikBXlsbSf/XWsMn+9GDWdP4V0W45\nayRT6xkr/KncLHk3xVs9O0WaysrNpPNZGmkVmzgdF/k1ePXTbpwfU1oiWbfgmxm1HxlYW0Cb\n5HZiFz6KSf5V7U3h68hBD2cwA7hSR+lO9hWPSQaXNYGotcD8VLfzNItJsfckZT+IH+FNEnhl\n0uHNZzitUSQsKYaYhhpCKAGmkoASkoASkoAKKAFxSgUAOA5r1j4Q2ok8QRSkZESM/wCmB/Ok\n9gR7wHpwasyxwNGaQC1Fc3MNnay3NxII4YULyO3RVAyTQM+WPGHiR/EniC81E5CSNtiU/wAM\nY4UfkPzzXJud0/sK1Rmen/AzS3u/Gk+o7CYrK3b5vR3+UD8t35V9GColuXEr0VBQ6uY8f232\njwpO3eJ1f+n9aYj58vkw5rKkHNaIzIGFRmqAYaSgBppKAEpMUAJRQAYoxQAuKeBQAuMGvV/h\nfq1rpF5D9pIUXYMSuTwp4P8AgKTVwPbg4p6vWZQ8PTt9IYNKsaM7sFRRlmY4AHqa8H+KPxHX\nWi2i6RIf7PRv30w/5bsOw/2R+pqorUGzyl5MKSevaoUGBnua0IPp74S+GG8O+Do5biPZeX5+\n0Sg9VXHyL+XP1Y132aye5oivRUjFzWZ4hg+1eHr+LHWFj+XP9KYj5x1NNsrfWsWUc1oiGQOm\nFzmoSKoQ0000AJTaAEpKAEooAXFGKAHAVIooAHGBVldVkga3UH5Y6aEz2fwn8SIfskNtqZYq\nAAs6jOB/tD/Cu8ttf0m7XdBqVq/t5oB/I81Eo2HFliTV9PgTfNf2sa+rTKB/Ouf1X4l+H9Mj\nPlXDXsvZLccfix4x9M1FrlXPJ/Fvj/V/EqtAXFrZH/l3iJw3+8erfy46VwsvAya0SsSymwy2\na9F+FfgR/EusLqV9Ef7KtHBbcOJnHIT3Hr+XeiTshrc+kwafmsjQgNFIAqOZBJDJGf4lKn8R\nQB8261FsupFPZiK56Uc1ojNld84xUJqhDDTaYDaSgBKT60AJRQAtLQA4VKgoAdMv7vNZ75LU\nAXrDUZLQ7eqE8itlNQimXh8H3pisDXC/89B+dV5buIdwakZTaZpD8oNVZCM8nLegpgdf4G+H\nl/4sulnnDW2lo37ycjl/9lPU+/QfpX0hpmn2mk6fDY2MKw20K7URR0/+v71nJlxLwang1JRH\nSUhiUhNAj5/8ZwCDXr6MD5RM2PpmuOmHJrSJmyq5OMVCRVCIzTTTAQ0lADaSgAooAKcKAHCp\n0FAF23gE7BCM7uK60/CO6ltVli1OFJmGfKkjOB7Fgf6Ur2A5vUfh94j04Mzae08Y/jtj5mfw\nHP6Vz8ttc2zlJopI27q6lT+tO4DQxHUH86cJD2jz9aANvS/D3iHXMR2VhK0XXeUCoP8AgR4r\n03wt8JLOzkS612VbuUYIt48iMf7x6t+n41LZSR6pbrHBCkMMaxxooVEQYCgdABVlXrMolDVI\npoGITTc1IxM0wtQB4t8R4PK8R3DAcOFcfiK88nBya1jsZvcqNUJqiRhpppgNpKAGmigBKWgA\np1ADhU0fWgDofDdv9p1i0jxkGVSfoDXtYepYx4emyxw3MflzxRyof4ZFDD8jSAonw9obnLaP\nYn/t3X/CrFvo2k2rh4NMs4nHRkgUEfjigZqq9WI2OaQyyjVZVqQyZTUymkMTNMJqRjSaYxoA\n8s+KEBF9bz44aLGfcE/4ivK5+prSOxnLcit7Y3U/lh1T5SxLZ6AZPSrS6PGykPcPG3UFo8DH\nuM5A9+lKU+UzlKxDceH7+BSwRZFAzmNs/pWSaqE1JaDjJMSm1YxKSgAooAKdQA4VNH1oA7Tw\nLHv1tHx9xGP6Y/rXqceSKhjROFzSiM0h2HiM04IaBkyKasopoAsoKsL0pDJVqZTxSGJmmE1I\nxpyaaaAOF+JNqZtHhuFGfJcg/Q//AKq8WuR8xq4kSKgkeGVZUOHU5Bras7mO6QukG+RSpIwx\n24BA+7yRjj9D7qotLmU11NDT5ii/ZDHcsMEiR4iqj2HcCq3gi0sbn+2nvbKK7FvZtMiSDuvP\nB6j8KmnpcVPdlmLSdA1jw9JrcNlNYLZ3KJcwrOXV0JUEgnkHDZ/CnP4Dtl8XXdlJPLHpUFt9\nq84MC2zHrjHUHt0FbXNLFS28G2l3pejXqXM6/wBo3ZhKMB8iZfnp1wo/OrKeD9AvNZutFtNQ\nv01CAE5mjQxsRj0570XCxl3/AIbig8PaLJBHI2pX8zIQW+XrgAD8q6O98JWQ0bUbG10yRbqz\nhjdLwqx+0PjLhSfywPWi4HmzxPEwEiMhIDAMMZB5B+lNqhDh1qePrQB33gGMia4nxwFCA/qf\n6V6JFNgVDKRZWYVOsgqRkysDUy4NAyZVFTBeRgZoGTKKlFIB61MtADc001IxtMY0AZer2Ueo\n6fNay8rIpH096+ftZsZtPv5rWddskbYPv71UGTIxnqLcyPuRirDoQcGtSC/Fr9/Bgeb5i/3Z\nBn9etVLTUrvT2nNpO0XnxmKTAHzKeo5qYwS2JSS2JbbXL2z0e70qJk+y3RDSArk5GOh7dBWr\nN44v5vDp0hreAZgW3NwufMKKeATn6j8TVWKuMtPGVxaWGk2gtImXTpjKrFjl87uD6feqaTxy\n6TXdzYaVaWl7dZ8y5DM789cZOBSsFygvi/U0i01YxAh09GWB/LyeRgk5yM4rN/tbURcPcLfX\nCzOCGdZSCQeozTsBVZ3kbc7MzYxljmgCmIcKngR5pljjUs7HAAoA9X8Pad/Z1gkX8RGWPqa3\n0JxUMpEyk1MjH1qSizG59atRuaBlpGNWUY4pATIalFAyQY6mpFoATNNNSA01GaAIJBkVxfjL\nwmmu2/nwBUvYx8pPAcehoTswaujxW+tJrO5eC4iaORDhlYYIqk3BrdGQw0w0wE+9TTQIQ0lA\nBRQAU7NAEkMMlxKsUKF3Y4AAr0nwv4S+wqt1dgG4YcD+7SbGkdlHBgdKnWE1mWSCE+lSCI0D\nJFQirEeRQBajNXExSGTripRSAeOtSqaBjaaTzSENOc0w0ARP0qu65pAc74h8Kafr8P79Nk6j\n5JkHzD/EV5Fr/gbVtFdnERuLcciWIZx9R1FXCVtCZI5YqwbBBB9KY30rUgbmkpiEptABxRQA\nqqzsFRSxPQAV0ukeCtT1Iq0sZt4T/E45P0FJuwz0jRPCdlpEY8uPdLjmRhya6FLfHas2y0rE\n62/tUywe1IZKIPaneTQAeTQI+elICZFx2q0i0DJlFSjpSGPFSLQAnTvmmk80CG0hoAiYVEw5\noAjK1E0YYYIyKQHP6p4M0XVWZ57NVlP/AC0j+U/pXIah8JkfJsr8r/syrn9RVqTQnG5zd18L\n9dhJ8ryJh22vj+dZkngPxHH105z/ALrKf61amiOVkH/CF+If+gXN+lSx+A/EUp/5B7L/ALzA\nf1p8yDlZp2vwv1mXBmkghHfJJNb9j8KraPBvLqSUjqEG0UuYfKdVp3hPTNNA+zWkat/exk/n\nWutqq9qm47Eqwj0qVYaQyZYhUwioAf5YpfLFIBPLpClACqtTqKBkyjinAcUgHCpBQMaabQIK\naaAGEcVGRQAwimlaAG7KQpxSAaYh6Uwwj0oAaYR6Unkj0pgJ5XtR5QoAPLo8umA4R09UoETK\nlSBaAF20YoAQim4pAKBzUqCgZKBTqQCingUxn//Z/+Ex6Gh0dHA6Ly9ucy5hZG9iZS5jb20v\neGFwLzEuMC8APD94cGFja2V0IGJlZ2luPSfvu78nIGlkPSdXNU0wTXBDZWhpSHpyZVN6TlRj\nemtjOWQnPz4NCjx4OnhtcG1ldGEgeG1sbnM6eD0iYWRvYmU6bnM6bWV0YS8iPjxyZGY6UkRG\nIHhtbG5zOnJkZj0iaHR0cDovL3d3dy53My5vcmcvMTk5OS8wMi8yMi1yZGYtc3ludGF4LW5z\nIyI+PHJkZjpEZXNjcmlwdGlvbiByZGY6YWJvdXQ9InV1aWQ6ZmFmNWJkZDUtYmEzZC0xMWRh\nLWFkMzEtZDMzZDc1MTgyZjFiIiB4bWxuczp4bXA9Imh0dHA6Ly9ucy5hZG9iZS5jb20veGFw\nLzEuMC8iPjx4bXA6Q3JlYXRvclRvb2w+V2luZG93cyBQaG90byBFZGl0b3IgMTAuMC4xMDAx\nMS4xNjM4NDwveG1wOkNyZWF0b3JUb29sPjx4bXA6Q3JlYXRlRGF0ZT4yMDE4LTA2LTA1VDE3\nOjQ4OjMzLjI3NzwveG1wOkNyZWF0ZURhdGU+PC9yZGY6RGVzY3JpcHRpb24+PC9yZGY6UkRG\nPjwveDp4bXBtZXRhPg0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAog\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAK\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAog\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAK\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAog\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgPD94cGFja2V0\nIGVuZD0ndyc/Pv/bAEMAAwICAwICAwMDAwQDAwQFCAUFBAQFCgcHBggMCgwMCwoLCw0OEhAN\nDhEOCwsQFhARExQVFRUMDxcYFhQYEhQVFP/bAEMBAwQEBQQFCQUFCRQNCw0UFBQUFBQUFBQU\nFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFP/AABEICdkNIQMBIgAC\nEQEDEQH/xAAfAAABBQEBAQEBAQAAAAAAAAAAAQIDBAUGBwgJCgv/xAC1EAACAQMDAgQDBQUE\nBAAAAX0BAgMABBEFEiExQQYTUWEHInEUMoGRoQgjQrHBFVLR8CQzYnKCCQoWFxgZGiUmJygp\nKjQ1Njc4OTpDREVGR0hJSlNUVVZXWFlaY2RlZmdoaWpzdHV2d3h5eoOEhYaHiImKkpOUlZaX\nmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4eLj5OXm5+jp6vHy8/T1\n9vf4+fr/xAAfAQADAQEBAQEBAQEBAAAAAAAAAQIDBAUGBwgJCgv/xAC1EQACAQIEBAMEBwUE\nBAABAncAAQIDEQQFITEGEkFRB2FxEyIygQgUQpGhscEJIzNS8BVictEKFiQ04SXxFxgZGiYn\nKCkqNTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqCg4SFhoeIiYqSk5SV\nlpeYmZqio6Slpqeoqaqys7S1tre4ubrCw8TFxsfIycrS09TV1tfY2dri4+Tl5ufo6ery8/T1\n9vf4+fr/2gAMAwEAAhEDEQA/APCppT5aknLsagbnBGVycZp7/wCoAHJ7lqduG2NWG1e4zyPe\ntLHnD8hsmIfd7+9NEnmKd5G71NIdrDCjaT3z1pnnBosmPnp+NMkfIudrKuc8baWZiqhV+fB6\n1HJG7MCxKhOcD0py4VvMZ1Vj0BNFgJEZG+U9W4B96aGWNdhyXz2pDtj52/NnOaf5ygMy9+tA\nD12mTP3jj7tN3HJizyBnf/Sm+SyqWHyjrSoxbO48459cUwFjYmUsRiNf4vWnqVk3E8qah8wF\nWbGdp65pdpYcfL654zSAFXaFYZZs/pUkc25SjcEjIqMnEgXGSaasWee44FFwJUQlEY/Ow9aY\nse5ZG2jdn16USMVjC9W7+lPjVY0BQ8N296aAD+7VNo3FuDntRGzswG3JBxxxx3pVLH+HBzgE\n+tRCdmZg3UHGamwXZYZAqnZ9wnpnmiRfurn5xzkVGwVWyfvkcCnRqzZ3HB9KQvMZJ97KtiTr\ninFj36/pS29qG3O5+fpSACPK546Aj1qhjm3buQFTGaIxuJcnBb1prL8yIT5meCKAwwd68Lxi\ngBZCUbAO4+npRHMdpjP3qjjVVJBG4MOD6UM20AoM7RwaYFlG3R4DbCozUKt9ojA8zDlup7ih\npCWBxsyOc96TenmEBOM8CpsKxPuG/g8DjAp26Pdhxlh0qCMIGJI2855NKZdzcjmmKw7crSBy\neOlIc4UL1DZJPpUajzN7YwO1SMuRGV5X+KgY9tkilgNp6n6UcJgJlu4aoI5CAxlH7puKlOVi\nLKePug0AP+9GSTl2bG6iPEMnA3EcFqZ5hhUqx4XpgUkWArFzkdQBTDclzvjHck9KaJNyk55z\ntxSHOePukVG/zKFYbec8fzoFYmmwrbS3DDO6lmc5jBTB/nQyrJJvY4OMA0JIGbd0jHH40FBL\nllZyNmTgCmsoVQRzmo94kznnvTmcKqlctnrntSAkba0YdMqVODTiqpJlW3cc4pArMqgHPrQQ\nh+TPTndQIeZPIUkjlumaiQBEIOTuOfpT5Nu4c7sjApvMLeW4znHzCgB5jHVjnPG0nrSx79oz\n8oU8CmKfMmIcfKtI0gZmG392OjZoGEavtk3HAYkgUbmBwUyQOtLuDEAHJHIpyzFmIcbfY0AR\nrhsGTgA5ApzMHzJjJqMKWd+foBSpvVcdaY2OZxgEruGadIu75QuQOlM8zy7hoz06ZHTNKrTb\nSzAKM4BBouSSx/NyvLY5XuKjXaEYjg96csbrIshYFj6UrKik4GOeRSKQH5VGDlm/hpjZZdg4\n74pTzJk454C0ehUFRnHvQUNZFD7oxxj5qXarbcHb7mk2mLcWfg04yJ5ahhkCgBNwbPPI4FOj\nY7SOMA9TUWTDInO4MetPbb5j84bB5pgJGF3biMMR0FM4+ZzyenNPjUQxoO2KayDDIeT1DCkI\nSON3YqOmMmiTO1doGe9NZ3jj56HvQu1+FB9zmmMdIyp9wfvWqJVb5j949DTpVDcDg/3geabH\nuWQ84XH50AOcJ8vGCaFba5BAyB1FIx9egpvlqDwcE9aQC+Zks2cBegFDbdwkCjcR603asjEY\n+b9KfHgyNlc7VpgNdlZNqnn2pEVWULuIwalZVXDbec8019pVjtwR0oAPMLfMV4BxTZGXpjrS\nnDMdx2qBwKa5TIO7tzxSAF3KeOVNO8wJEwOSc9KauWRFH8XNP2hDnG7HrQAR7W5PWiPL5UnJ\nznHrT8lc7sEHmoTGSTtYHuKAFVn3PkFSeMUiR7lOPvY70Fz95uvTijzCpII59qAB1YbNgyB1\nNKsm+RgQFY1F5jjhTkdcU9seYCBnPX2pgKrqd2QTxjNMWZlj+6Rjoae+3gdRSMocsM7FxSAa\nzEMQwJFTfLuAPp0qNXdVyVDY5qblsMFwWoERjbuy2QnSkdhJxnocAU+QlkKOv0psaqMEHkda\noB7MdoHahm3jGM96YytI4APGc/WpOcgAYGfvCkMdG3y/Kv1qUNuQkDPvUZdlY7OVHUVLH80f\nGAGqgHbTtVhzx0p8fbCkCo1yCFDfXFOXcWGfuj0qWAFS3bHPWrCueOhOOTSFQACDwe1ORV3E\n9xQPqOkYKq55JppRd6kc0nO4bjhfWlUhm3c/LSBkpYsQSu0Zpsi7uQcetM2nJG77xpwQbjuO\ncU7jD5VUcY+nWl+R1+9xUO4p849cYqZMhsdj2pEgrKP4c8UxYzywp0g/A05VOAc7f9k0hkaq\ncfjmpdrduD1pJJGZSoGQeM0Sfu1Ub8tjoKZQjFmbcTg9ABTVbruGTnoafvzIQRheOe1CouSD\nwD0FPcAD7mK42+hqRo8FSWwaj2n7pAAB4qVcnB6+9IQN+8kyO1NZhtxuOQc5FAmGSp4OcZpW\nZUxjkZ5piIHIY5NEY6kjn0p/ytISfu+lIzBs4pCE2ZkOOlN3BQ28YweDSp90MDnmmSKeAeWz\nkUwHKzKSByp9aezLleM01tyrxyc80xtyqAoyxPIoYEizAZV+tScqvA3EioXJbHyfd6tTtrTN\nkNhaBDixeNWPHahZAPuru96b/HgdqFVthwDnPakBKsnyYB201pDkDOR60qk5HHHQiiPDbvl6\nHmgYjxDaTnJpB8zLt4Yc1LuVmJ6cY4pmX2hmA9AapALN8ylj17VHIuVBB57gU7O5dp9aRQY/\nmzQwFZtmCOeOaXzMgENgd6byWDDkdxSKPlbtzn8KVhakqsuNp4FL5anG3gqajbPy4O7vS7xk\n/wABP61QyTdG2SoIbNNY+/FJu4J2kDpmkyQuzbnjINAEqlsDace1NbduyRlTTQz7QMYb1o3M\nzDBoJsODjkhsY4ApkcgUMT0o2lfmzuJPSpGZCoBH4UAKdpQMg9iaF/d+4xTVbCnb93vSGQsg\nKjjPegQ7aV+YNmo2mKowAye+aWSTG0L1pvVfmfB9KBiedhVK59xTnUbtwHBpGxGDtOTSx4O3\nLdabAVpAuAoxxQrFeAuHPc0i7WMi9SOlLuOAc8YqbFBhl5JwD/DSSLnbu/8ArUFtybmprZZc\ncmmIkVtvBHI6URsPugcg5qNg3GG49Kk9D0qmKw9sLgdWJzUq5bcD+FQ7grrj1p6qW37T+FZj\nLcLlkHQEcVZVjHGCx5NVbddygHjirGOm7kdqQybgKq475zUUjHccDGT0pzMduCeDStlwqHgg\ndaXQAZQ2SOoFQTSbtoAwakf7vByOlRTE+UABj3oAhuN3bn1qrtG1gCQT3BqfdtG4NkVW+6Cf\nU0wGHMcYQcnPNLuCNkDjoaGzvDHgU75SBuXqciqAFj68AilVgsgOM+1C7VYAjnNPkUdR1piG\nM3zk4yKNytg9DT4U+UnHSpI1Eik9xzigLirGXYDdkUjRl5ChQDHQ0/YWwQMelCNuLH0GKoQ0\nQlZBHGML1NKIiQe9OVQu3L7SetO2FuQ22kwGKo9ckelSKu5ssMZ6UKh2nI+X+dPjwRnGAtAh\nsce1yccVJ8oHC8UKx2nAyc0/ywY8ngDqKQwiTDDPWpEVdpDdc0xV3RjHBpzYHuaoYq/J0Hel\n4kdhjjrSQ/dbPTFKvCs3pwKQmNyIyNw49BUrqA59McU0OGQf3qG+4BjnNNBYcQTCPzNSsQwX\nB7CoVUuoUHGDU8SEHIHGaRQ9W+YYXJq516Et64qvGoVd/XnFX7WEqpIxgioYEMg3N0yMVUmi\n5JXrWiFwuABVaaPzG44qLjsUzk8k4HpTXj5LA8DkVaMJbt04NMEYVsY60rjsQhSy7j0p+35c\nt96pWh6LnpQV3LgjkUri5SBhtjAI4pH+VcKuasqpb5iuBimKpETH1q0ykiuV3Z4waX7+V709\nkKxg/e5pY2284570riYiMSQpXkVJsLNjG30pqgDc6t97jFPjH3QPmajmDlYjQIfkLcmoGtwq\n47etXWhRmIbgimbNylAM+9HMKxTWNdo4yaTyyFyODVpUAXPekaL5ee9PmKsVlzgdC1Nkj6r0\nGeas+WdwCAE1G0e7dnrVIlxKx+XqOPapVXqAPxpGYbgf4elSJ90rnntVkDMlWVgM84NPkb5y\nV4pyqVbaRTWQ8/yqhalaYbs+tU2ZWY4BXA61blUlTjg1Rb3+7nmpKQ2Ta0Yz8xzTtwkUqwIx\n0NNjXaxOOM01cncBk896QhzLtwAcgc0En72N3rStgYw2D6U3iT+LApjFky3yr96jzPl6YIpq\nn5shsFe9DAqu496oZJHJvyc8035lOAeppm4GMjpzStnau3ikIXc0bHdyKUMSMgfhSbucj5qY\nMs+6kMl+aRgCcCiRlyBu5qKHeAdxyM1IqpsL0CE+8uS/OafgY+br7U0N8wzgD6UhYhueaBDo\n2C5w3HpTtxZTnpSKFZemAaXyyucjNA7Dn/h46Ck3YPA560kYdVI654pzYZfu4YcZpgK0yqoy\nuc00SAZ7D9abuO4AqRxwaXy/MXJ4PpQMAxbOORT42LKQ3SmIoXPf6UsfcjgelMBfn3FRyuKT\ncV+ZPmx29KUqNnBw2aRclSQB1xUksd5KtkkfMepFIqlEI70q7omweT1pkc26MnaQc0CJQm5A\nD8tIN20x7ieeopPvMM5IPanxgqpB4bmmUhB94qDjFJu8uPleaDIFXbj5vWjB6MelLUA87Ayw\np2Y/J9Gpm35B35oeItIp/hHegY9eI8bsUoUq2SQQKjW4j+Zev4Uu0cMpIGeRQKxMuVQgDg8i\nmnDRgYG6k8zdkjpnAqPHfOOaYiRl+XOcmk3DADAn60b16Ac04t0HegBysi4P8NOEm35egPNM\nXjP93rilaRfTG7pQIlkV2wAMgjrmnHO5QfTHFQx7jIcZO0fhU6+vtkVSAdx0AOaTblc44Bpf\nM+Xn5ajdvmI3UyRyn5uTj2qNmPTHHrSsyOpNN3H8MdKY7CyKMAbsUNIFby93JHWljUK2WfKH\np60nl5Y7k6dKkYnzx8AYp4bqSRnH3aasf7wsx5pSsb9QQfagQ1Wbdlvu4oWRlkC84xmnYy3X\nA6YNNXKZUnLZ4qhj2zI3XGO1PX7pBGTTWA+VwcH3pxby1yeTQAu7jAHSo2IVtxHFSMw2g5xT\nZGVgeOO9DJI4WbnsPWpVbeoAO4j3qLyz5fDd+lIqqrZXKsKAJnl2rjbzQ2NynOeKh3EqSV3Y\nPWgP5intQyR8nseD3p0eISBupoZWjAHzNTmjKsN/TGTVASecDJgc4FJnHU89aiZhwQMDHWkT\nLMW6/WpYiVZFwS/NMDLJ8vQjpSSyL5f3e/Woo2DKT39aCiyG+6cd6VW2l+c89fSovN24wM0s\nYGCGGGPNNBZEit3JyKdCwZyAQT71AS3l4U96ejBfmxnPelcLIcpbazYAojZmXBPU1EzBUbjq\naVZBtC80JisT5xxn60hkCkhRu96Yrrjpk9Dim42scdKsCQzKVU46GlLEMSOQeahbrz92nCTP\nAGRRcLEgIfgZzSqNwPaoVkIPqKe2fl5wetNCFVm3dMDvU6ybM4GagVjknNEbNjbnJNICQON5\nbbnjpSyfMyvngVGr+XnjJ6e9KG3LsJoAduLcZG2h24H+RTWjGMenpS7iykZwOtMB4kLDBYil\naTJXIwh4pvnFkw3HvikU5j56elArDpPlUjdilIB246d6i3bVJOMk0ZU9sHrTAerOcjBqTvgk\njioo5syHBOKVpOndqA6jl+bj3pS2WJB+XFIz/Mcj5jTWZWT5etIZIGOVf05pzMG6HvmogxIH\ny8d6f5ijkU0SOZjG3Tdn0oWTYuO2aTcWlIB59qHxuA647UFD+Fk3k9ulJG4YnaME9Ka0n7wE\nLxSSMu4c4oJJdxbG4YUdhQyhskH5fSmRr5mTnFOj+aT1UUAOVw3zE4IGMmnwsACeCAetRnaG\nZWHvR8u1sD3xVREO2MwLn8qUdgnXuaY0yLgg4OMYoXCPn1HFA0SfdBGeaRpNpB7YpAuZACwz\nRHkZQ4xnOadwJI24PzZ9qQSBhjONp6UnVunFJgLnA60XJJWZ5GwOM1H5hKtG4+hpd23A6ime\nWVZiBkdc0BqK3yxqCakUhcK36VFJkqMDc3oanWEbgWIzjkGgCKRcZTuPek3HeOoAHWn7fm39\nuhpcqylv4elBSGr+73ljnmjr15xzS4+UgcsTkgUqtt+90oICNm8zldqtzmpDJuUr+Rpkjbdr\nDpnFFwjeWGA6HmmMkT92o53HNOV8MSR8xOBiouoBU4qSFm2njDe9MB7A7QT19akXaOM81Xdm\nVQRxk1PL0zjJx2oARgF+Y/nQcsBx9KbHwuwnPf6VJ5mSABgUwEK7mDE06ZfLAKGmhc/MDhM8\nmnLtyfQ00IQPnOBxSLyx4AxSsvP3vlxmlGGPBwCKWoxVmwpO3Pamqzx9TgHtSqoaIgnpzQ43\nBSBzigQu8lgexpIpPmbeB14ppzuVRnPem7sygnk45xRcYsc3nSnjaM4pzKqk7jn8aiwWI4wM\n0+aFfMAzii4akikMvBpWk4UFe/WoY924r0HYU7naM5GKNQsWQpXLAe3/ANeljby1wcH3qCOS\nQ9walhT7wY4BoCw9WLI2Bz1B9aWOR9oyMMaj2twqPjBqVWMuT93bRcVhNysMHkA0rOGVsfKo\nGajyFw+Op6U8L8u3tnJpFCRuGjDBiPan7hJHh+BUUnJz09KVWPcc9qYEzyfIM8gUxiAoGSAe\naG5PPFDSFGA25zQxksca7ycZGO9EZ3bkIyvtTA5PBXYwoZjHgDjnNTqIlG2NvbGKlZd0Ywdt\nVlYS5DrgZ4pdpZm7KBTAez7WJHPGKI22MJCTnGMUQ4kTb0PUGhctjcMj1oKsPSYbTzz1pTIS\nw7GocruGRjntUkcZlmJzwo4oGWOScjoBUbN5hCjgnmkDHyyQcVC8oAUg5bvQSWADtxnil3eW\nd1R7PLUtnOelLHIzKfbqKBC7V8osDkE9DU6jMIA4Paq6/u/lYEjrUkMgjOOWPaqAk24X0IHN\nO3KeV54qNpHIGONxxS7TGxX+KmJj/MAjwaZsLISxxjoaa77Y/u5b+7SHLfwkU7iHgGQBg1MQ\ntJz1pEwp5ODnmlckMMHAFMQOvlsp9+aa0J3MRjHelclj075pm8KxByCaBDuFj4bBHYU+BjtO\n7oaRmDKAF+alZSflz2oGSRsobOCabtJbJI21BCrxlgW4FTRqH696YDlxtxnvUhYAZxjJpvl+\nUwxzQpVWIHIznmgCVXPOeBjJp3mDbtU/eFRxt5gYY5JqSG3OdwGQtAE+QqrtB9PpTtwVsEnO\naTaWbcOB1xUkbxyIWB5B70hjg5bKgfSnqfuk8VGsm4cdQaeiuzHsCeKpAWUkUg55Xqal85Uj\n39Cx9KrwL5mQ3GOKm3/KF2ggGrJaLG8rGDt/GnRSBsnPHaoQrbdx4XPFKVWTBQ7RnpTETxkq\n4Yjcak3APuPemqyxqVRst70KAPvck0Ek2D91TwevtSOGjwfvD2qMbtpwcMKeuHUMeWFUA5pQ\n65ZalZVIBzhaYGUqSMn2p42iMFhxTAFkKsAKtBt2M8VUaQNwg/E0pZyoHX1qgLKskakuQadC\n21ST0P8ADVcSIylWHIqWMhvf0p9QJhIki8HG2kJRueRUUKjzCW44pFIKY3Zo0ETNlFwG3KT0\npds2/I4AGc+1RwvsDYGTjGaNz4APC07kjy25OE/GhVG35u9NB28DnNISwkC4+amFiTheeiCl\nYqJNo6etMY7VK5+XvRt2qGwAT2pASbihYLz6Cl3FmIHykDnNMydwZeGzSNGfMLk5qCkidpMw\nqwwRnFO3/LnvVWT5gD0A7VLG4/DvSZVix5qMPlUjHWoigWTOcjrxTN6hWx1oWTfB+7ylJjHb\njNyowfepFYqQQKiVXVc44/vUKxYYzUoLFz7ZJ/dWiqe6X1WiqDQ+IPMkjbO3AI5pQhmjkTPz\nA5H071G2fLUhuCMbe9OLKfmByW4r547mNUxswQk5QZ+lKqh48L8p6801chmSRMDrupTiThW4\nx1oCxNMR5W52+duwpGb5ozjc3Tp0piSbQQRuKnApf9X82cnqR6UNjHyTFeSuT6ClO1V9QeaE\nwFAY/e5zTWVeVU5PYUwsPeQxxrk89qT7QWO8Da/Ax601x+53SADHrTFO5g+Qy9OKQrD2Xbxn\nHOacQzAF+uccmmLtYsn8fqaDub7/ACw460ibE8xVWBB2nGM0SNtVQp+tVWK/KGOAD1NSbtrO\n45X0oHsiXj5nHVugJ/WmK5wAo49aRdn3j1I6U/hkYu23A4FBIkm5lL53EdFB/WjgOu4cZyfr\nUPO5SnzBhzt61JGom3lTtHfd60xkjAqrMRljz9BTFkVpOTgEdfShWD7tpJIGM9qZGwVfmXkd\n6ZRM7bZAIzlMctSx7cgksqduO9MiUswYvhQMg0rStJHknClsUCHxtjcwO7sPWmO56A89KTzF\nAztwFPDCo2lDRbz8oBzRcCQSbVIHBB5PpT9xRTgjHUnsKg3ebIW6KRzTIcs0gB74+tLqBPKx\nlVAR3yPenZwFJ+bHHHUUMfMdeOQMUizmNmBHHQ/WqFcc+0qMDcWPGaUK2794cnH3RUUcm4kE\nfSpI97ckZH1qQQqsVbcQqqvvSSSA7ckrznFM81AWBHyjtSSN8oPU9qoRJMS0ioT8jcAUpbGV\nGSo4wP50x9j4DDY+KSORtvBGM8n2pDJpCVYEHzD3FHnl+2BmouE3uh+9wM02NRlVc4HXNAyd\nXaRXGNpHPNL5zCInbuNJI2yPAB92FObayjuvqDTAjVlaPcQ3J2j61I7FVVAcj+7SL+8VkjIU\nL83zUz942H4OO9Ji3Hq25cAfN/SpIyzoVYZIPFQ/NtAON2c4Wm+aGYlSQg+8tIZPueMsOop+\nRuRVGQRy3pTF+VT81FvhvuHv3piJWjMTZ28djTOWzjuc05pGLYB3D1FRqp3cnFIaHLH8xfdg\n46UrMSq7hg9/eoizFi+MHptoVnkj3HimO5NLsjUHdye1L5oKgffb3quMSYI7Cnq+GBBGaBD1\nkcZEa4bvQvyNluMd6iZmycHHOM07zSrZ4BAxjrQA6RlXdu6n5s9qa7kBQ2cEZxTMlnHmfN3p\n/wAyqWYB1pCsO8or84fn+7SyNvkVc4OOW9aYuflYgqh/Sk+bO1ecHdk0xkm9Wbd0I4pit975\nix9KcN3zZxjG4VDIFdcg9uRQMk3dOCT6Unmbzg+tJG21QOgAyaaAfvKRjOQBQUT/AMJ+bI9P\nSo5IztyDx603cVbDc570SZbYozj3pAObdJGVJ5pyOV5A7YyaNzFsKwAA4zUTbpl2g7T3FMB7\nFdo43HPWmxsySOwGOO9N53BVXJ7+lN81SzAj6igCfdvG7o+OoqIsScD5vWnqrLgJ0K5piS7Z\nwOpxzQIaybcGM7iTgg0f3weDjApZG+cjZvX2pjNwQUPoKYwjkGdvVguG9KkSU+WcJwOKjjdF\n9+xFP4jJx0P5UMB3mfMeMZGeaYsjpG27B4pojLLx27mldd7hQc8dqQDmUKoO3cx54pzfLtIA\nPtTFLR5BHPY1LtGUYdOpxQNCIh/u9OnaniRWUAjJY9KVdskZ3AgZzikGybAK7thyMdc0mIZt\n27jg5HQ/0qN8BshSCRwamkYySFjwevt9KYsjpkD5twpgMXdAoZT9T1pN33gy7CRnik+dckjB\noUbjnGKYCSD94rKvGKFYxn5uRQY2fnOB6ChZDynb1NIB7OyqOAM9aZGxbCk8Z5pVwwyDnHWm\nxg7mCjrQA8YZjh8e1SH5o927BFRNtEgJHan/AHV4PB4NA7Ehk24IIY4yajWQx5Z04Y8GnCML\nkA5461GuWVl6nstMRLuYNg4I7VJGwbdhgQB90dc1GN0ahSNwx970pdohk3Y3ccUholRioyF+\ntK2513bcLnoKFY5GBtB5Ip4V2ZiT8uPu0xAFQL97HPQVKjEsQcKCMAj1qukZjZtw47VYhb5R\n3I70hokXLAIOcdTS7uCCc0u1mbcD164pWAwSy5oKGsqSKi55oOdrfw9s0vlrw2MfSjo24jOe\nlBLFOFRCRn/ap27vznNR7dzYYYx2NIzMCxGSKNQHKwVicBqcjB35GFxUUbDhtu76U9WKq37s\nYPb+tLUBWA4G7OOcU5/mZecsaTG1VLLtbv70iyDLEUgH79g5GMGo9wPOOM/epUztJ+9u9aVV\nIyq8j0oARfmXDDCj1pY5DIx2jJAo+ZmG7nHYil3qm7aNp/nQUO2ng5z607l+AcelNh+Y5YkD\n0FP4LcAg0wASfwlcsDTWY+W7NhSegqVDuZl9s01lEnJ4HpTJZD5fzLz2pJCy8Lx65p+zDKvd\njhahZclssVX3pCDcOc9MUpbKq+QPrTd4dNuAv9ac0e4AdT/dpgJu2tkHI65NShQrB8dRUTRq\nw2n8Kcdu0LuwFpiBpHZCoTb7d6auVjH8JzTsqzEnk+tR7WK53cZoGOSba7DHOOvrU0bMOR1x\nTI12ryOMEmlfaqqUbHHSgViVxt2k8+uKVPl3BRkmmLIeMn2FOT5l2jqDmgY7aAOBTVXOATn+\nlO4Rg3P0FNbKtx9aABsc4HWmq25S4OMHFO53btuPpRtGCccegpXAZIqh1weepNNO0M+05zTv\nLGAM5NN8sJJtHGaVxoduxHtVctjGfSk2nYqjlu7Gm7iSR90+tOZm24Yceop3EOZpIyAMMmea\ncWHylRjjmoVCt8xzlf4akBSRQy8egp3AVWJkG44pJFG0kHGTmiQswyQKbnnB6UAOBGcAZ4pC\ndrZ70HpjpTD97JNMQ/kQ4zgE0qMzcY4AzTV5UFuB2olUnBDYUUxWH/zzmk27g2cGoxhDk9Tz\nTgPPyCdvOaljE4AyDx3pY9qsADmgup+RTUbHqB+dAyVGzznAzik8wIrfLlqjhO4YPBzmpJPm\nyBwaNQHx/KoIIYnqKfuJOO+OKgRRt/umlWbnkcqPvetNE3FRmTOBn1zTWBb2Ipd29cHoeaXc\nCCO/pTAfGSsgJGTjinxo3mZVsEjmoFkwysOcVIsh3En5SaQy/bNnBPzAVa37cgLwRxms1GCK\ncHrV5ZBtUEZ460hjlyI8Nx3NLuCnhs5GQaPl25XqaXb8oXjjmpAj3MFwKiMjkEEbqlLYfn6i\nmlSGz260AVS3IG3aKj8smQ85qeSQyZ+X5c0kkKqodW9+KBEG3fz0PantbvKuABv6k0rNiQcZ\n71MoKxswPJ6irQEKwHALDBp/l4B38DtUrbWjG4446Ck27+D0FMQiqy5+UYI70+NQke7AzQsh\nk+UdR60uVb7w4XvT6CIFZ5M7eD6VKUGFGMN1p/l/KSFwCetMmZeM/jikA35hwVzzU25JF46d\nOaYjL1A/OgMFXGKoB7BmwrH5PWhV8sspHUUjcsoIytLL87YXhhQMX5+MDFTMp2gNUBmIYJ3q\nXcduM57GlYAbPBX7tPX5snFQqh8vYM4zVhV2xgYwfWjUBMFVI9eopI9xDKBlT1FK24cnmnBX\nXGCATQMase7lBxT/APlmDjoeaRQVyo9e1L5fynJxntQAoyxGDtB6mpo24cbc45zUMaleMc09\nctJznBpDLKxgAAHnPSr8K7eNvGKp26gHJPOavxlskDlcVEih7YIUjgCo2QBy+PwqVVOOTSHL\ncD86yKsV5PvZ6Z9KgVCWOTV6THJGPwqIIWOcYoGQLCWYA/rSiNmJz97v9Kn2E/e496Vo9rDD\nZzSAphcLjnHWlKswwMY9KseWdx9DSCM5JGMimBV29RjGKGVG+bueoqwsfmc9s5pnknczbfl7\nYoAj2jfyNvFKq5K9hmp1j3KSTzjvS7DwMgigZGF+Ygnj3pyqvODjdxUixhc8/jT9oDA4zSER\n/Z9igY5prQ5bAHWrKp83BwTzTtvmEbeGzzQMotb7VZlOCKqSKVGcZzWrJDlmOeKpzLtXApoL\nGVwjE9QD0NPVjknGG/lRPH8uOh3ZyKeuFf3rpijmY9ehOcn1pis3O7oKk2kqDjBzTG+ZcZBG\nKonmZVuXDKeSM+lUHwsiLnPqKuXHzZx/DzWYzbmJHHfNSWmP+0FsqVwAaRX2ljnnrUZby+gy\nWPalC7iT3pFD5HA2+rcijbuIBHBpg2jhjnFIG3NTAc3ysB2pwzI2CcIvpTeS3zDNIy9CpwT1\nFO4EkY3MRjv3pzNtOeo6U0twCM49Kjm6Dj6ikBKuF+6RTW+aMqo5zTFcrgMv0p+0jJPXrTEI\nBt75PcU4ru5UcelNkcDaUG4dzQw3SAbtueaQDkkyMPx9KdgE/LyKTKspXP0OKcq+ThZOeODS\nGPTG7bjAxmpVJ2Z71AqMBnOB0qXayMoYcevrQAhUNznBqNc7sjkdwamMm1WOMqahO7PuaoQ7\nB4YNn2NJuO/J6UhUxgBjx60rM3AH50MYZRMlQc/xZpJGBYL075WnMyyL8xyBTdo6jp6mkIaP\nlbI+YjrT94b+H/8AXTFXy36/WpGysfQbTSJsxZchd469DUaiQ8jlDTjjaCqlfUnvTgpYkHIx\nQOzF8w/d+770bhnrn1NLuWTpwR+tO2gr8wx34oKE2/N6seanEJbHGAe9NtwApDLk5q55ePmx\n/wABp3HYreTtbA6UeQdhFXljXcM8/SnfZxt4+/npUthYzRGv3lQZFL5BC5/Gr/lFmAwF9aY0\nJ5UHOT1pA0UVyc5xjsKZtBLL09zU80bxryOh61WyOvQ9qtGYkcW7IVue1Ob7oGDn+9SKu3lf\nvU44aMYbJ/u0wFiQ8knPHFOYZ2/3h1FCrhAc45qTcm4sfSmA6Nt2QDjNSjBApkLKrKcgjFSf\ndXGM96BCP+8fbtyOuaZMp8wYQEYqRlYtgHgio2UiTPOBQMZu3Agpj+VJko6h+QRjNSO25ccZ\nqL5mwG4xQFxjBU75OelS7hGQxfcccr6VGWC5xyO+RSxqGUH17UxC7TySe2aajFgBjI/Wnheo\nIwD/ABUrHao42kdqQxWAXCkfNjio9wK4IO/PahWbdz1NO4Vsg/NTuAqsqLtOcE96cdrNn7w6\nYpu4Nj1+lL5m3gD8aQDvuqq45zTplO0nOBmmKWyGxnjrTg25eTk9xTJYwZYr8vA701DuZuMH\ntmnh8/ePyjtTAqtIcfUGqAbuaNdvUZpC21TgZzSsrydOB9etNhLspBXZz3oAX7oHOPpT/MYo\nS3NRNgL680pynzIME0CHtcblwF47U3liBtxSbh1brSbmbkelIRJGy7tpH/16aPL+YnrnGB2p\nkaFoz61KuxQB070DEDKOQfalaQ+WCeaaudx4Byc09m+amHUcp3IGA25o3EjZ0UUxGIO08qe9\nOxnJI4FADOWXrTY8LuZnwOlOCls8bQKbHhjgDJzSFYlT5Yzg/N2NKrMPm20wsd3A4pQWZsk7\nQfWmMEZmkORxT12jgj8qY0m3vkg8kUsjF2zkLnpTAcqleR0PGKVWPOedpxUYVgwOae4LfcPz\nUxCs3zHnIo8zaeOuKFP8JUA96TaVJ9OxoEO3HdvXpjFEjcgg4PtTcqygNwacmVPC0wHs26RS\nG4PFMfcrMOtGR1Ixg0isPMK7sk80APVn8skfN7UvmBl54460xWaNsMD0pNwVV4wSaQCsysRg\nhlHcVMrYQuBxUC7Q52jIJpZG64zgdqpCZKu5VPQ5705WyvJx7iq8cWOWJPfb6Ui8n7/fpQMn\n8wA+9LuCrkdKjfbuHHFNydhyOKYE8bFWwfmFG4ZPHy1Cu75Sp+uakVvlYYoAVpNuBnBzT/MO\n/cOtQtls+pHFIpKq3c9DTBllZG6tTFTa7EndxnFRqx2qWOacSCxPRqQD/MLKGcfSjzQrZxg0\n3G2POQTUayF+CPrQInaTbjIzupdxaMuDjtiombMgDDj2qTy1YbkOF9KESOiaNgMjLCnbwVK9\neaghUh3waUScEnNMZYbauNv401WDZK1CM/ez8tPEiFsYwcUhEz/dXD++KSOTZkfePXmoVxuG\nT0px+Q470IaHqzPk4xQXPY4b0pjsYx13r7VFuO3kHdSAslmZx39qUkKxy3PtUO75eDnFP4bo\nABjimIsGTbgEcYyaauPunGDziolcMQxP1pNyrkqec0AT+aNxZBg9KYsnmOflzTGcoABn5jQu\nFY449aAJ1YyMCBxQzszFT+lMWfHQ/LSeeS+QNu7jmr3AsKPu7uac+CxOfSoN3VRzUqyKF4GW\n9KYh7bZEUKMkMM06TaG+U4Oaihm3cHil3BmCEcjnNUBLgr97p/epWkO0AjJ9KibHmYPHfbmk\nWRjJzyv8qQMlb936KT2qWNlEwJGflquqhlyW3c9KWIE8kkHPFAkTDocYB96GVl2k9O9Mbbyf\nvE9aZNnYvFBQ9pNuRyee1SH5VB3kA9Ki3lVyenrQFeSM46DmgVhzMGQEcHNNjULjb1oefcoI\nGKTzcEEHrTEPZSZlDHihlxn0zjNNZj1PJp6uGXGC2Kdx6i/cYMp470vWNs/gaIlVX3dmzxTV\njYE55PalcByKh5yRVmHacBhzUKx7lwxwR0qTJKryM0hiyK0cqlcY70qfNnsM0GT98NwwvrSY\nwzHPA5oEP4OD0ApGlAU9fakbaIeOS3NIG2svGVpgN2nBPrSxLwWzg96fcLjgdTzgUx8tyBgU\nFCsVmxipOFUAjJzmoxhVzil3FmCj86kpDnLBlZuQx/KnMC0hIPAoCsFG48UsYLORjHFAgMgb\nAAORR83JPB7UxWILErwBjipJPmVRk5p+YwXORk8dCaUM65UH5PWmnCsoI696cWCsx3bvakKw\n9ZA3G0bjxmnL+6x2PSomZfl2gr60M4PfPPWmOxMAVk9iKNgGVxz60zrj5smpGk2rz1BoCwm3\noQcr6UuMShg2OOfSo/MKL1GM5qOScOw9etMVi3uDZXOT60q/KysBx0qsM4+Xk+lP85uFA5HN\nA7E5kYNxz70xmY/MPvE1A8j9V6HtUyN8pDHBIpkND3lQSDJwfamxkmR8tlQeDUckiqvy8gjB\nNNLAKMHv0pkliHYqsG+9mkaYNkAZ5qFXBc5OB3ojYNnHr3qhEkbbgT3zikkkDMARypzTfMVi\ndox604YkkJJCrjk/yoESLltxB96TcSu5ecnrUS7o+G4BpFJxgcDPFINSZm6jqelLFxt5wfSo\nhLlmx94VIZNu9ivIphqWpPvbgQSO1NJzhs/Jn8aiRhtU7ct160skjbgoXC9eaBkyghmCk81L\nHK0XA6ehqvb7tzE8qafBndgnc2c0DLKuzbwO/aiM+XGVC81BJljx8vPUU9HAI3E5z1pgWIi2\n3AGDU1q5dsHn3pkSLu+bp6UqMY5GB5PagCaN8tx681LHiTcxbA6VBG25SFGD3NSx427CcE85\nqiSw8gSNBnOe1PJUKGUfhVeGRQxGckDrUxbcDnoO9UImaZVUYXnvSrIGGBxVYNvYgGnL90An\nHvVAWx+8IO7AqRsq3y4I7kVU3FRjODUrM6sB2xTJJ/NO0c9TjFLlipHcVAjbuWHIqRZOvIpi\nJVB25yKdA3Bz1qCNtq8d6kik3MQwA96oWo4sd4BHNSqxRuBUTMWcuBgdKlRdoPOaYyUM20nq\naa52hWA5PWmwt/Dkk9OaczGNSpbNAxWkxtC8DrQWZjk/cpgLONgHNSbgE2mgkTIYfWnqojmV\ng3FRSN8gxxTosfLkZJp9AZN8sbMd2SajUrGoGdwJ6U4bVGc89xTGILbhUgWEwrkk4FNkf5QE\nB+9yaZ5nIGOetOaSQtuJ49KCrEjRgtu7e9M5xlTxSbmK+oNIF4L5wP7tSAqttzxkU+NtjYJw\np7UzAEe79KaNzYJBK1LKLDMUG0HIJxQq/wAJ4qIt8vy/w+tPhk/dMW696B2ViXev94UVT8xf\neignlR8RRvGVDOcNjj/ClbKxhNuRnIpPMXcGC5DHkU5nDNHu4BFfPHf1FC9dx+Sj5GbJO1Rx\ngU1gzMsbHGPmz60qsNxG3I68UAPik++wGQf0pwwpLH+Ick1Ckhiz8uSx5FDMNxZiQBTuA/yz\nJJtVsBVzmjfna7cAc5qOZleNGXhgfzoAYbA4+RmyaYgaQSMSoJDHOCamjyqsEA5HFVo5EG4n\nIBOQfan7XUYjOT60hkxJZsgYwvNNYRmMyMSMHAbsaarOEcZ6jFNb/j3KE7k64HrQC2JRGmPn\nPyjn6mgynbwpHPHv7VGu3y1XOTUiSNztxjt7e9AD4JELtx8/fPQUK26NkJyD0NRLJ8zDguRz\n/jQwDKNvSkImQvAu1cJkfjUbKFxubLUk0m0rk7iopm4SPuxnHJApgShmwVxtB6GnbmaMKeAD\n+NMkb7uwnH3jTcncxYcN39qAJjJ5ykoNuOAvrSxuGwDwMfrUO8fLgFe2aY8jKAF5DdcU7gT7\n0OQvJ/vU37y4I3c80yNfmCr6c+9EZKTYLdDyKLgKZNrHI+TpihsfaG28BhkUxtqyOS2VPTFK\nJB5YUfe9aBh5xRQnJf8AlUn3cHnHr701QWU4A3ULIWxhsrnFAErKfkY01WMUhBJ+bkc02XLT\nFi3ygcCmQ47A7xzzQSShdylhz65pd6hUXrk9abHNtYME4zg+1NEitK7MPkXp70ATO37wkpub\np7AVHu3MSUKxnilZtu8E5RugzTW3bUUvlcZwo4oAduG3CjnpTusoB6URxtCFyy/NzTDC3mEG\nReOSRQMnwdzKxIB6Lmk5hXaG+btTF/1hLMQcdTSqwbaAct61NwYs2W2GQ7eR8wqTKKTGMsM8\nkVDuEgfjnphqFk8pQOSadxInkk8yH5GC845FEzIqrt+6OvvSMvnIFkO0Z3cUxZQs2H+cdKBk\njSBY1I4H96mq251B6dcDvRHiRXQHOCcA0m4xr8qZYGjbUZKkqLuJ4fOaIZvMV22Hd2qKSMyK\nWYgEnt2p+5oypzwB0o3EObcz7tuSBzTgxZW42g9KZ5h2ZB+ZjzSeYy8/5FA7kvmbMZjxUSsM\nlm4FIqbcgHIbn3prRsHABHrTCxMsgeQccY61DG37wgL36Gkbd78HrUjS/MRxgrQGob/3hDD2\n4pwRlmAz8mOaZkxbTnORj6VJzuYqN2BzQgEkIB+/+FOWQR5wCzNxzUHHlk/xHuaTa5kDEjGO\n1AixgKMjoBk5NNZh5PmY4PQUxQscbFm3ButO2rcKrKfLVeM+lA7Dj86orLsJpNvlsSnPbFJ8\nsmJHO4j5QR3pPMYK4xj1HemUIWO0Keuc5PSnLN8r7jn0NM+TaF5x1NSeYnRsFOwHWpAbGoaP\ngse5JpeduUGT3pVj/gRtvc5PahZBGxUDjHBpkibf3eQcilZVRThfnxTE+7mTccnmpD8p2gcU\n7FEfzLjDYHWmxRiInaS7seTQ0/7wEJkdMU4yDdjdgjnpSEBVo5Du4+lMMkkKjd82T1NNZhuJ\nDFiewp+3dgMxI6470ACxqzkg7TS/NHHjtnpTWUvkBPp604x+p244yPWgYm8lsKc+oocnzuI9\nvT5hTAFViA/HepVk8pSAcp3oAViY8unzjutLHllZR8pYZHtTTt/hGQafDJuGzPzLzn2oGTI0\ngjHAwKD+7yQvLc0mR5XzcO3OKcpeUKCAO3NIBJP3fIHykc1WUAOyqct1U9jVhl8nIIJPtUDM\nEQdj3NUgGmTOBgM4POabkNz2zjANNZwrHPAxwaauxU7565FFxC8o21TmnOxPG3J9aayrJhM4\nc8j6VJDuX5GHWkAxpSpHygA+lHmbQWBz2xTHcyZUJjnbU0aJDnDhvUe9ACE7YuRmnrjaDxzS\nH5lC/ePXHpSjy1Td1AoGNXcqk570sMgwz4II44o5ZgB9w85pPK2hueO9MRK0zADHBxmnbzI6\n54bjmmqwZhngY9KWMZYlh7CkBPyyl8bhnGaOi9wKSNH8ry8gc560vzbCW7dvWmA9cM+VbOev\ntUmQsgGfl9KjhjKgHGVPNP8ALG3P3iadiiQzGMMF6Gm7SVyScGnKVUEA7+PShl3YIBUgfgaQ\nxNu8ZXOPelWXHIHAqNcMvXaPrT1kZRgjjsRU6kscJgec5LetKFdcoTgYzmmsmeDtJ6mlh5J6\n4xSGEeUYKo69zUrKynqD60zH7vryKRvl2sOnpVASFWkU55PQVGGbywHQZ6U9pDkYOFz0pGZe\n3A70AOXEa57UbgeuQfakOMA/fWlZUPXg0rBYVcIpLZJPSjguoPSmSZ+Qqc4pzMAxJXgigB+V\nK4HzHPrSh92MLgioodqtuxx2qTzDuPrSGPMjNkJ8poMhxg8ds03zN0hJ44pokG3BOTnpTuIX\ncmcKS3v6UjKsi/MQOeaazqoxyB1qN3VlDEfhTFYlZVjBcDIpjMcDBw9N85VynbrRk7RJ1+lB\nI/cUwRj3FMk27gD1z26Um59xIGFpseNxBGfemBKqbmJxhetSMoZRhcVFuPILdqRX3cbsGgok\n8wxyAj8qOW3ZK5PIxVfzFUZGTzUxZW6CmgJF9zz1pxY5HOPUVAZATnpjgUuRgHJJ9aQWLLfN\n0O2m7f7/AOlRecVAxyB3xTmk+bdnrUsYvmeXgYzTyxbocU3duzx9KR2MeegbGetNjsG0Lzux\n60DLOJO1MXazA559qcW3SEnge1AWFaIndzz1pFVmTBOKVWD5JXAxTFPcnigXKxVYrkjr0NPi\nwkZ24JJ7UwsB05pykEcDao5K0XDlG7gzc5602bDE7eMdxTiQq8DAPNRcKD82QeuaLj5WLuPG\nfTrTiwVcnk9qUsvljjoKazbdrbcjilzBysWRjtUMc/SkZfmIzxjikE4VmGAR0wajbGzj7+ck\n+1O4crHqv7ndnn0o8z7uWwaTzEfuQO3FMYq6479j0p3QuVk28tkqAaSQhsEflUa5VtrcDHOK\nFkUZ4ORxQ2Kw9fLK5VuachIQgnmoCwOAvy+2Kc0m773H0ouIlMy89hio/OVehyKikdS/ytxj\ntzSKpl24Xj17UcyFYm85d6kfjTjMFk64PqKgTjJcbRnjml3EDAQMSeadwsWdwkUgcA9aSNht\nWMtznFVfM2yEZG32qWFwOq5I5DUgLsPyqcnIxxVuN8KpByO4qhFKWztXPHPpVi3l5DdB09qV\n0Mv+YNvAyPWo1bBLE9eKCwClRxmq+4KxHQHrQBc84CPlfmxxTGm+Ubl4qNWXywME453VEztt\nI7HnrRcCaNtsbDGdxzn2p6NGY8AEAGoPMDfdbC7ad5hXaFQkYzTRJJgZ4Gacu3v970qFWAXr\n1/h709JFk+Xr9KoNxePMx7UhmG3Yfu5603zEVcZOAfvULMnmDJ59KY7MlVsKSn3P1NDY2qR6\n1FujGSSN2ad5gY+i+npTETOu5wGb5Qaj+QbweQaiWYpk4LfWnM+4qFXIYc+1SFmOBVQAefen\n8OOOtQ+YiLtB3HPQ0GQMpxwaEwsyVvlYEHOetKcLzjLVEzAMFIwSM0NIpbr2qgJVIxleW9DT\n1U7uuD6VAJESMKOSec1IJ14HU9qd0MsxvupyspfBaqayNzjGM880+NgzA8bRSuKxaVgzc9PS\njhj6GoPN8xmIGAOlKsm7novegstIdvue5pS52471WQtJuPQVKWPljByfQ1IWJPuxtniiEtww\nOQaiSY7jvGR0NSRN3GFX0pAaVuofBar0W3yyM1mRSA4VTk+1WYX2nrWbLWxdRlzwOB1prD+I\ndKrtJ+7JBw1HmNgYOfrUFDyx6gcDrUmwPgk57iot4denNCybc45/pQMmIEq8DHrSbVHShXOc\ngdulIjDdkjB9KQwZTG2c5pqKGfJpzSYPtTd25txP4UrgCwruPPB7UNa7sgHpyKRGDZJ49Kl8\nzb0P40BYhMar83PvTtu7gDC1Ky/QjrTGBbg8D2piGqgwe4qaNWZcgY9qYuGGR8o6VIq4BG7N\nIQjcHO3P0p20rz0pF2qxXnpUi/KoAGaBkLn2+tUbvKjdjir0xYKTjjFUrj94gx+IprcUjLYF\npMn7tJg9SM805mPmMpGAKFfqM/hXVAwY9tgAOefSo2+6eMDtTvTtTM7lI7g1ZBUl+RTuPNUd\npBYY4xV2aREYhhubr9KqbTuy3K4qGUQCH5+fvYpeVBCc+opwTdJu6Uv3GJzn2pFEbLtVc4NL\nJhehwKXg52qCe+aag8xSSOR60XAYJCr5PpSldmOcs3NPjyc4HbmnKpJUg5xSuIa2/qo496Nv\nBLGl3j7zDHOPrTcfvCrDGelAxPMGzd2qUSHycjqeM1Fs2oQ3AzUq7uMDAHamAgbb8vTIo3eY\nB8uMdc9aG6Y27j60salmPI3Y6UCY+OQcYXJFLt8yNmIy3amRqQowCfenj5FHrnmgZLG22NSx\nzilXaz7nbv0pvMYwRkUKiSEL37UALMokbC8LnioVYsxAGB607GFKbsc9e9Rhv4QCfegB/lhj\nkk7QKduDRgKOfemHOdp5HtUkZG3BHFADVj4O4c+lJjbgZyBT+WGc4NMx8uAMHNAAxHmHHSnF\nzsUDpTVBbIUAj1oZQ23HagBUYnJBwM/dNTLK/TIJNQLhpCD07mn84wOD1zQA51O75Rgd6lX5\ngGY5I4AqJW3DG7LUqSKwKngigC3FnbyBu9PSrNuW3bjVSFvlGRu5qxG205xgVEmBaibAI7nv\nTmyoUsd1QwsGRsHinbt2B0NSWh755KrgdahZvLAz0Pegu4UnOR0xUTKzJjOe9NEsbcsB935h\nVKSQHnaBipXlVWYdSO1VpMFd4HJ6VoTYGYbeDzUsZ4+UYbuTTfJU8htvGcURllXLDGaBEm0h\ncDkGnr1GRmhVB4PC4zUwU7BsPHvVCGNGuAM4JqdWKjke1NkHzKWGVzTmBzz0zxTC4rZfAHC1\nHM3zKvQe1SqofnOBUUnUlTxmgLkMu2PGDmmMHMhbdipf4s8ZFRgEyHccmgBscju/OCvejcsZ\nOBjnFNIKrx0pVXZikxWF4zw+MdakH7zaqtj1qDy9sgZvmqViGGFGxvWmWtgDtucE7SOnvUiy\nfIMDL1VZmVfmPNSRsXT5vTtQFiaQMoyTg96YrDbt6EnmmqxbgjtxQqkL1yaCR+7ywAvIFJ5g\n25AwTSK+1tp6U5WXaQKAGvtVM5yTxS78YUDkjFNjVl4cbjnimq3mbucYOKpCCUH5QoIweaSY\nMf4tx7EUBWXjfwaRlZWAU4HajUY6Nn2kEge1KqksWPApqtz0596aJGLAZxzQSyVypUgcmhR8\ng520udwweDnrUcmDxnjPNAhd27hOT7U5ceWSw2nNNVevl8HPFI2Wyq9O7UAOVt2dvWiRD5e5\nu3aoWZmVSoxzg4qaRm9flp3AWM7WAHSnZ3fLyRnoO1MSQRrwMn1NInDFgcE+9Ax8kmGIx26U\nittAG3HFNWRjHyuWBoSb92dwyc9qAFdiqnqaXczx7m6f3aTe20ZFLvDAevegBVO5c9hxStIG\nCrggg0n3Oh+WhSVYHru4oAduyzAc0jNzgDB9aQM8bHgFs4xSMzsxAwMDNAiVcKpYnLYpNxkA\nUjK45FQx7iwUnJbpRgq5IJJzg0wJWLDpjZ6Uskw2q4Py9MVHGvmTFAeNuc05G2x4Zc4o6iHm\nQYAHOaM7QSTz2qOOT5ufwp8ny7WxzVCHM5+U80zeZN2TjB6GlmfcwXOCKi6t85xTKsShTGwI\n5HXFSNI23lcHOcVG2cYDAimhm3daZLJslj6mlG1Cc4zTGOCD3p3lqoJbDMfSp6jQq/N06U4k\n7Rmq+0Z9Fp6hpGO4fLiqESNJ8nTHPBojkbcx/OmN91cD2oVgo5OCeBQA9mLsGxjHFIzrvZcZ\nB5NKrCSEjOcHrTgwGAeSBQO9wCfLx+VK48w4HB7Um8qDtTPHemtujRc8t1wKbRPUfuVTlhnt\nRFubPzcCmNgQndwc55pkjb9pT7vrUgTY+UZ+9nrSrIfut93PaomAYcnjPNPjk25APH86oSJP\nL2swB5xkGkj3LGS3K0LndkHqOlMSNgSM5X9KGIk84FlXOe9L96Td2zzUW3c3TA9aejZJ/KpA\nedqyMcggdMVHtd1LE96bkFQFB61Ku4cHn2oRSHJjacjoM01ZC3z9vQ9qa0ueO3TbSgfwnhMU\nwHKDyScDtSiQLHtxk5poYclyAV4VaPLAXdvy30oAezI2B93NCrujLL1U/pTV+ZgAcmjlWYbs\ne1MNBdrHnv2p7Mxkx2xUO5+cDLChZCyqenekA5JP3m1jgVK0gaP5hk9jTCyBScbm9KXzfmVQ\nozjJHerETxs20A4x3oRtqkH5eetRc+Zub7tEn+sPPbpQImDHduBBHSntlcDODnORVbIdAVHT\ntT9vzcHPGSKAJ2ZI2JUZJpUOxDmq+8r7E1LI7ZAZecU7iFU7TxwTTxIfvHkCoTkthvlNKrmM\nkZzlaECJEkCl8+vFOZi3yg/nURKso5561IOFBxmmUOWTsfvClEjKxYDjHSmLt6ZAJpvmbc5b\ngUtRWJvMHkgjmkYBdpxlT39KhaUtwq7VIqRWG0KeR70wFLJ5hbt6Uu75wwJVTSFwoyyZppmD\nKRnA7YpATRyLtPXOe9PVg3U8jpVeNiGDHkegp6OZiWOF2nimBOzrt469zSw5UEscioZFXbwc\njrSrLlV+XIBqbjLETGRiM/KOak87dHgjBqt5m3cMfSjduQN90njmqETbiy7kGCP5U3zNw4GK\nYWY4CHJz0p3mIFwOX7igpEqyYkRj+JpykeY2W5zwKh8z5VA9aVGKzMW59KBEvmCKQs3T0pDI\nPvetJuWYnA596Ryu3OPu8CoLJFV/lYHcpHT0p7ZjUB+ST1qJZBtHGCBSSTFmUYyOtMLErMA2\nBwKNzdBwfeoZmAAOdvrmmLh1Lh/pSCxbdxtXndzRwrncOnPFQrJtiUMAcGnu+xsgbh61VwJY\n2WXJU/N2FO3DG0jDnrUKsFUODz14pwf5gzfWi4DuFbG7nPFPJOSz/dqLarc45znNEk6ykKvr\nk0wDibjODmnL3OANvrUSttJUDqetKyn7g4zQG4+OfLMWXGO9OSRUIbO3PNIwI2x/cHcmmM+5\ntvGM4B9qAJhleV6H1pE3SK3HtupnKrgcjNO+YRnb1piHrGCuA2aRlLH5U6DNIJBHGAT8x60r\nblyynPHaqIa1CKNed3XrUZy/HTnqKTJDA5yKNwDYzt9KCR25OUU5IpzYG0jgio1YquSAfUjr\nTuH4DUCJMbnXnK5qV8KxxyOlMj/vdMU0ycEYyDzQMlWFVbhuTQWDFgRmo4u3GT6GpBuWTmgQ\nRxFckHB9DU/mfLhlz2zTGYvg7cds0/IaYk/cXjj1pgJGpjU45Oe9KD8wGMe9DN8uRz3pqNuU\nkLhqBlpQJGwDt4705XBJ3noMY/rTFVsKx607ad2T370IB4kxGHZ97g8AVJ525gxBDdjUEafv\nAq9asR4SY78elaASxtu/i2jvS+YBnPIqHaOecA1YhQIypgEY5JoJbCOPy8Hue1WfMKsUwT3q\nEgs2O1N3OsinkqDTQMtZ4AAw3elYBo+DubuKiZmY7iOPUU9FCK8gGOOtUBYBHAPzHFNZS2W3\ndPWo45P3uR0x0NPZdwwPvGmQSRyHuc0/cvmD3qGKPysKfmJ7imh9u72OAaYFhWKbtjZGeQam\n3MykhNo61UjkRQW6Y61MJv3JCdGPU0wJ2m8xV5xUirtdSH6VCq5UHjHtStJv5xgKapAStJhz\nyevWnRrubO7IqszFl4OB70/eqYP3T6VQInXKqSTim+YJGB60xpEk+/u54GKXaY0+UZ+tBJM7\niSLBGPpQzN5OFyD60xd7c9u9SmQyfKBQIRGZiN33sU+NvM3AnAXp70xyep4pY8Fcg4akNEpd\nW2sOO1IrCTKrwRTU+dzjkCgK27sGpGiHKxC8DHsaQjCkZ4601n8v7xyfUU3zDswMMaiQidm3\nKDgkdKdu+U4J44IqKSULCsY5bPamtJu9vXFT1GTLu8sOQPpSht2D09RUXCjg7gaTd/dHFUBc\n3J/zzWiqmTRQB8RGQRhI0UgZ3E+p70M23buXI60xpEXJJzjt70vmAOxT50H6V86d45v3kaEH\nmnZ8lR3bOTUY24XBxmlX7uDy1AD45G2k45zmh5MnngHrUCszMAeFYGlMyw7dp+UDFAhXdWZu\nMFR0/rS7hJCAH5HPNDKY1LKdxI4FV2YiJR1LcGmIsBghYFQ64zj0+lJDLJu+4yqud3pUbMIU\nCk5weRQwO/5ycDmlcCdbgLy6/LnrmhpArMTxu/h7VC7kqAQFYnj6UKxkU4ACKeafqVYmZPlU\nEfNntRbcO/OEXq3rUTblYlWycc57VI7FFXcdoxSEHnRi6xtzuGNw9KfyjMgG1uqmoo/l3E8+\njUk02/bIDkDj60BYkZGfa8Yyx6q1OVS0m04jP93tTI5unzYBPPtSeckkjR87h3oAkEhV2wDn\npuPShYy7dck0wZZCFOF6ZNAARkye+Bj1qhEnzfMo6JyaIc7AxYHjjFRRxssjYOWzz9KWRfnL\nAbeMD0pFEm4gLjHzdfb3pqsVYoxAx0elQKzAtwoHJpFZE+bG4dR7ULcQqqsXyBgx6/WmmRZG\n2ng5pflTEihSzfpTZmLR4kCkZyu3rVFDm/dspkJ2+gpc/e9+aYrfLhiBx0NJGoViSSRihksm\nV22jdheKNzFiQMHHBpuG2/PyexoWMvIu1s4/hqRj5GCIik8sM/X2ojAK7uo9KhYblVjztbAH\npU7MdvyjAPUe9MQgcPwFy+elOTMcjKc/MOVqMcTcNt46VLhly5OfSmId8/lg4xt7Uc8tj5Se\ntNVA+SXOT2pY1KKEb149KkYrfMuWOcn8qcsQVgN2PQ0xpPnKt07Y6UMBIo2vnbQDHGNWkJZs\nYHJpAHVNpOV6hqTcdx4xkc4qNZG2mJhl88UwJAduH/j6VJt2tuB59TUe/c2wc+tKfmQg847U\nDsSqzSMzEBUA/OmRtt3Luyp/SodztGRjCginj7vp3pCsPhj224Offmgrtkz5mRj7tJGwKnBz\nhaRpNu35dxxSExfMCL15p42sAcnIpnlrFF9zr60qujKVZMZHagfmOaQLJnJ+YYzTYv8AWEE8\nKM0xWXZ09qeIy2cja2M/hTGSc+UVPBIyBRgMEZvlprKWUFSSSOlAjcLtHIHP0piJF3rIQ5Vh\n1C0n2hY42CqQxPDCorhnfDJH83TOalZz8q4ywHSkMj8w7RvOc8U5plPABAXjNDfMpZkw1NZT\nHGXI3DHIFMkVXA42nbUm5PLwikHuDUYYsilDjIyKXEm7czDaRSGPViVOBgDtSSS7myR14yai\nVmkAB+XH61Iygr1+U0FCMu3GEw2emaFXZuz68UOx+6TkjoaaueA4yaQD9w2++aVpBtEhOQDw\ntG3nngdhTF2qGDLwTgYpiHiYyLnO1c9KGT5ywOWA4pkn7tfUdKfsbZgFdxGOaBjZMhUL9WPO\nOgoGcMnAI6e9NDfII3bgfjzQx7YJbs1PoA11CgKB83UmkLH7sYJPqaX7xOc5xycUqBkt9x4O\naQCZbaTu5FI2W6HIIyaH3FTt5pCxRcKOcdaYCmXa2NnGMGnDG3HcDvSQyb9w+/6UeWec4292\n9KaAcu+P7uWyOlTRo2McB8dutVlzGB1OKlhZlX5Gyx55pAWwBJj1Apkx+YEdaRSY9xIzkUvm\nblUDlscCpAazsuSDge9Q7sghvnDGp/JLD5jgDk5qOYquMLx1yKYEG4+YTtDbemaVG3R7lX5i\neRUYVlzlcZOcinbgnK9c0xsRpNzJgEnrmpGIYMf4TRx5gC9hTlQqpAIHegQ0euOaTafJyFzu\n4NKzj73SneYQF28rQA1F8tdo5fHanxr+5X1U5NEbKSG+7g0M+CxC8GgYr/MwIBAPpUqqrt0w\nPXtUCyBVyX4pN7GM4OUNBLTLDcNhhgetOQjyy4OCTge9VlbcAeueMU/zuwxt/u0DsWBlsZ+X\n/Gnqw5YdOlVFLNyWC8fdp6yN24Hf3poaRZjlOcBse3apVYjk8HvVQThs7lXFOinG0jdn61RR\naabbyg4pfOMsfI2nsRVRpcMqj736U750w2ckHJFSBLEyycO3y56YpDJuYjnYvQVE1wok6cnn\nFO3568buuKQtBzuVj3kcnoM07zpFVCQMnsKrySeXjgHHvSxyO3Lkbs/LUjJ5GZlOOuadv24Y\n/dAqAtuUuDtAbn/P501bgBsj5x6D1ouiuVllZAwLdeeaBLuLY5HXPaqq3sKZVjg5wcjgfjU0\nUiyJhCSP9lCew7UcyQcrJlk3KAR1oEm1uef61Hd211ayAtDKgIBCNGQ2PXHpVzRfD+r+IGlW\nx0u7vXjHzRQxsxx68UuZdx8kmV45ztZuAOu2pEbcwI+tasPw58TXkxt4PDuqSTqfueQ/+FW1\n+EPjx3NunhTUBKTkFoyKnmQckjBEg3E45zSKp8w/Oa7y1/Z3+JlwqbPDF0CePmxj863NL/ZP\n+KV0wjbRFh3ckzSgUudIapyPKFYJuBILUjsFXLFRntmvf2/Yl8fMsJt1snG398JpsEPnoOOm\nMVatP2HPGk8p+1ahp9sNvAXL/N2B46e9R7VXK9lI+bfMG8oDuHpTGkVec59yf0r6v039gvV4\n4Qb7UbN5GB3MhYYPqDjtTh/wT9vJGBk8WQxAHPyQEnH1OMmq9qg9jI+TWuAzdQN3uP8AGnrc\nuvA5QjORjp+dfbeg/sC+HrGGQan4gn1Qt6AJj8a3bf8AYf8AAFpgvPfSMB/FIAPpil7ZB7Fn\nwIsj8k9MZFEbFd4JYgc9On41+gg/Y7+Hzrsmlutn91ZAPwrX0P8AZR+F+jJJnRXut3U3E2+l\n7VFKifnK0yNg7se56VEZN3zBx/ujrX6VH9l34VRyLIPDsCtnPDHFW/8Ahnb4WxyM48M2spIw\nQ5yPyo9qP2J+Z0cgXvn2xU8asqbmRijDOUBP9K/TS1+Cfw20sAw+GNPz0/exq/H4itU+BfBU\nbRoPD2msI12r+4XgenSpdZ9C/Yn5cW9m90rrDukYDcRtNSjT7gsMQzMMfdKEGv1Dt/C3g7T5\nHeLQdPjZv7sC/wCFW4dP8OR48rSbJD2YW65/lT9sxqgj8t/7Lu5VKx2N03+7Ax5/Cpo/DOr+\nQNmk37Z4ybd8fhxX6mI2jW/P2G1Hcf6Ov+FKNS09fmSxh3dsQqP0xUe2kHsUfl9Z+AvEl9Io\nttB1FiRjP2d8fyrUu/gz430uQef4cvmLrkARFmx9AK/S5tUgblLeNG9kAplrraW0hyik9Qdo\no9pJi9mj817T4N+NGz5fhPVJS3T9yw//AFVbh+BXju8UY8KakgPTMeP51+kcniYtHgJ8vbjm\nmN4lkk+XYB74o9rIapo/O+2/Zv8AiNJxH4cugT2Ygf1qaP8AZj+JVwd48L3BA4+ZwOntmv0I\nbxBJtwFBb6VEfEUysFYKfx61DqTK5InwTD+yz8SpkBTw+2T2ZlH9asw/skfE51P/ABJY1z2e\nYZr7xHiCWYnAVT2pf7buMdUB9cVXO+oezR8Kj9j34l7V/wCJdbKo/vTipI/2NPiVJuH2OxGf\n+m+f6V90Sa1Oy43L9Kg/teYqRvXNHtGV7NHxVa/sT/EaTA22KjuGlI/pVr/hhvx4o/1+nsfQ\nSn/CvstdbnRvvAGhdcl3H5vype0dg5UfGi/sLeOW5N5psf8AvSH/AApT+wv46hbm90yTPZZS\nOPyr7KfXnjjJ3Z9jUbazJtVj0zU+0YuVHxx/ww3487XOmle370j+lV2/Yl+IMLEIdNcD1nP+\nFfai61JtVgw59RSNrciycSD8qpTkLkR8Rv8AsU/EBeClkcnO4THj9KVv2KfiBCNyNYyKe6yH\nP5Yr7bbXJcH58mqx1q5ZjhsD6Ucz6hyRPiyP9in4gNgt9jKntvOf5Vej/Yb8eKwJlsFB7M5/\nwr7Ki1SYjLyYx0qVtUkbAEmeO9LnfcXs10Pkq3/YZ8U3Uax3V5YWyd3iJLVPJ+wZrX3YPEtv\ngdmiP9K+r/7UlZMGT5aT7bIo2eYcVHvXvcOVHyS37A/iKVju1+x2juVb/CpB+wPr6qB/wkdm\nvqpjb/CvrVbl15DGo/thQnMnyntVKT7lqCZ8mf8ADBGu7vMHiWzEnT/VN/hTv+GDvEO7nxJY\nkj0jbmvrD7dxgOwqVbwqud+TT5n3IcF2Pk2P9hPX05/t+y5POUapbf8AYY1hA6ya9aoM/KVR\niD+GK+qDfP5n3ySac102/h80c7H7NHys37DfiBf9X4gs2H+0jfyxULfsP+Ii/Ot2JHf5Gr6x\njvGVuWzSG8YMfmx6UczF7NHyn/wwz4jdf+Q1ZJjsAxzT0/Ye8RshDaxYD/gLf4V9WLqEkeBu\nwKd9s3Zy/WjmfcfIj5Mk/Yf8QyQjGqWBIPAAPP6VEv7Dvi1VBXWNPI/uLnI/Svrhrj5cBuKV\nbzauCfpRzMXs0fIkv7DPi3hv7U09hnqxYH+VMm/Yi8Xt+8XUNOlfoFVmB/lX2C14d2Cxxio0\nvGXODjmmqkivZwPj0fsU+Mo8hbvTyh6rvNR/8MXeNlYFZLEtn+GYg19jyTPgkOR+NOju34Ik\nwaOeZPs4nxndfsa+O+QFtJB/124rOf8AZB+IMeQILTaP4hLmvuT+0HKnDkimi6lUBg5Bzxzz\nS9pNC9lA+Gbj9kX4hxx5FrbSZ6qJhuqp/wAMn/EOJTjT4yw6RiQZr7wa+lBypGKdHqUgPLDP\nqRVKrMfsoHwK37K/xGt9zPopZj2jkBP5VDJ+zH8Q4SP+JDI3GSQ2a/QFdWmVsgj8qT+0pmJO\nRj6U/aTH7KB+eV1+zv8AEGPh/D9yQvcDmqk3wH8cpgr4bv2J4GEzX6LnU5I+QefpSf2jLIvU\nA+uKXtJk+xgfm5N8G/G65L+GdQjC9/KNRN8KfFqx5Ph7UCPUQtX6TLdTK2dysPcA0/7fIw+a\nNPwUCl7SQexifmgPhz4ojYg6BqHI6eQ2T+lMk8D+I4Qc+H9RVu4+zt/hX6YNfjkbFGfYCnC6\nDrg26H3Iz/Oq9tLqT7GJ+Yw8K66nL6NeKo/6ZMP6Uh8M6wdoGlXqhxkfuG9cenqK/TvzrQJ/\nx5wk/wDXMUwLaOMm1jPplBx+lV7eQexR+ZreGdXhiVpNNvIkYfIxhb5v0rOlsL0EA20xA6/I\nQa/T97HTmJ32ELA9tgxUH9kaKikNpVtz38pf8KPbMfsT8yQsi4QxSKW7bTmpltbnyZJfIkaO\nPlmVCVHPr2r9LH8L+HpgC+jWbN6mFf8ACoF8I+HRHLGdHtRFKMSJ5S4YflS9q2Hskfm5FI7R\n71O0qcVbWRsD16j0r9Dv+Fa+Cdwf/hH7MY4x5QxVd/hD4DnLZ0O1w5ycp/Kp9pIXsj8+9x3B\nc8mpIy2Fy3J6V98SfAvwDcRPD/Y8ARiG+UYIx6H8aqSfs1/D65mB+wIIscx5IJPb5u1CqB7M\n+EvM3EfNx+lSodqFc9819uTfsteAdxHkToPWOQ1Rl/ZS8CtkpPeZ7DzTR7Rh7M+NGmIxgE/S\nkWXc2cYbPSvsJv2S/CSTK/228C/3VkrNvP2P9HuJi1rrdzEN2fLcg4HtS9o+oezPlB5CwyFJ\nOcECgybeSMc4r6ivP2ObVhm31yVW9ZBms+T9ja83HZrsRGP7hx/Knzh7NnzcsgO0YxnnHrT2\nx3O30r6Am/Y/13ywY9WtJWU7Ubay8flVOb9kXxQBkXVq5z0OQPrmn7RD9mzwzzMcnj0pWYgZ\nr1+8/Zd8Z28hSKK3nC91kyDz9Ko3P7NfjO3t0Y2CyytztjYnbzjnijnJcGeYO4baMYHU05SF\nj9QetdtqPwI8aafHufS38xm2LEpyTx1qqvwf8YBURtCuhuHB20c4uVnJtIMDHWnea0Y69a1r\nrwLr9tIUbR73CttLeS2MjtVS68L6taktJptygA5zGaakLlZlzTNGmCetU5WOPlbJq7cWkq/6\nyCRe/wAyEVnTJJE+ZFZM9FIxVKSuQ4lGaUs5BODSbh8uD81Qzf6wlm+nHNMkHlsMnBI6GuqM\nkYOLLHmMc+3eoy5RWbOTUStJtOQVPpjmoJHG0jd83Yd615k9ieXyH3MiupfPzYxtqup3AfMQ\nKifecsOn+1gUglLKAwx71MgsyZtu4tnOKb524lgO1RLtU8t19KflVX19KVyrMZ5mV4/E+tOW\nQhvQEYprNs+UYA9M0izLHwwO73qREq4ZSN230NL5hRgqjJ6VAsm4g5yM9Kc0j7+evrTHYkkc\ns2T29KdjcpP3j61XhmVSTjcKVZHw3GKBFtsGIA+lNjYEYJxx2qtzIoyeO9P8wRqffpQMcrP6\n4qT5WHy8g/nUPmjdxxmn7tq0CLBclQM9OtG3chIOcdahR9wIzikjlMT7s7R0zVAXIyGXlsim\nnHlZT72ePWo45Fk+8OSeKc0bK428UADsc9MkdxTWYnAA+bPJ9qBIeUAzzUW4dScHpQIeZfNk\nOPkHQe9St+7GAM+vNQrGki4yMrzxQrKwGcg+9AyZmLKSCPpUDscAD8acyqPm7e1IXj/g59qZ\nQ6OTadq0jSrtxjFNjbaN5GDUxKsuSMCkIhZvmAxn1pY9248YFDkZ39FPFPywYbhhPWgQv3T8\no6d6kjYMx3Lz60za7MGTG2nRkMuwt82aQ7FiORFPfd6VPC3GQM+1U93lyAYyfWrcfTnipY0i\nYHcPlGD1NKznINMjZdxIPNNZhxkc5qSiQSctiq8kmd0mduO1PaRVU8EjP41CMSNt6H3qkSNL\nDZn+JhUAzu6cdqnyG+UDJXvUcpAXeVyaYhFB29PmNODDcCRntTDOPLHVcnpSMxjbI+770ySz\nHtUHnJz0qRJB06e1V4sK545x3qVcHHf3piJlZ88cj0pWXzOM4PpTYV2tnODUjcc9/WqJsJkx\nqMnjqaiZvlIboeafv3nA59aZK7xhhsBFMLDN20+o9KGxGNx5B9KiZ2fGBgU5m2qMc1IxiYbO\nexpfMZlZiCF7ULnDErgmk/hbLnGOAelAxFBbYWOSDmnbg+8bTjPBpFA4b2708MAMA/Wi4RI1\nk3KR94981KoBU59MjFRqAqnauTmnLuERDD34NIGK0iqo4II9qazEqMcMT60hxJGGGacFOVPX\nFMLDlj3KcjJ7mkVGCnJzntSyk9BwTQuRgE/nQgGO7oAc8CkdsR7lXhutK33iDSHO3bg4HOat\nCGNGOMtyakbGwAD5h3qHcGU787u1JG38O6gRM77iOeKbuyp4x6GmSIRJ8vIpqptU9cZ5pCHt\nlkAB5HNP8zdtBXIxzVcHkhTk1NGwwRnGB6U0wtYTzDyvUUgkODu704YVckZNM24wWXOOeKoB\n0XGVIwak/hO0jPpURG5S4Y+uKETevOQKmwiTC7N544xUSx8jJznmpSoWAKDvBP5VG0YkIRcj\nHemMkWRlVuMKeM09mVY844FMUZUqMHAyaRiQFJ+76UdRkkbGRtoPOKcqruAzz71CrBZsdKGx\n1x9aYiZf3Z3LzinCQtlzgHsKqq3y9QM9M09W38DmkAqnMZdsBs80g2jLBjmmyK0ZBxkelKmM\nMykAHqKBMfHj7wODS+YBkDrSQqrLg8ntikZDg80wbJEfbnHDNQuTkEgU3jHHPFMRcsSynAFM\nVyfCiPggnNHzde3pUS/Kdqj3p6qPLznHOOaYC+WsmWDfMOab5hUB+GHSkVPKYsOX9qcPmHH3\ne31pj+ZH84bG/cG/SpBlWGTlR3pNvl8Yxnv6U9VVUODn196BDmYFsgcUm4LyD8opF5x6dMUu\nxMlTz70g2JN27ooPpSux2gj72eRSKoXpz3pqbWkyeDTEKu7aQOe+ac6bUUED3qKNhyFO2pC3\nyZJyaYMcGjSPP3VBwaIV754Jzk1FuQ554p0in5V38HrimJC+Y6yEj5lzTpZgWDbccYpse1Gw\nRmhpDwMZXNSxpDJsOyknJFO54A/KmM2c49acvLEt+FIGPb7pGOcdqVI9y9QDimowXJbqeKXP\nUA8mrRI//V8dTjrSRuVwAaapkZSvftUfCAAD5s8mgLFkfvPlJ96FbqVGB0NN3Dy1JbBx2pFx\ntw/P070DsP8AvR+2advB+RelRDCptUYzzQuFPfH61IDnzuHy4xTmkJQuw+Tp+NIG285JBpue\nAAPkzz9aYDmbdhgMnpTsHbgcA9aYshVeRtGetNXc+VPIzndTESiTapyd30pVxJkNxjpiofMG\n7CqcetK27dkdaBom8xlY8cdOKUbdoHfPIqNWO7jnHX60Ix3ZxmgkkYHduUfhTuhLEAnpkVFu\nKt/eHtQvQsOnYUASD5ZME8GjcyyZ25X+9UW/aWU8H3p6tmPZ0JOaCh4YxplW5NIrF23EnIpm\nSvTkVJkbeAaoLD1fzZSei46Uu4tgvksKZ5i7vlODRueRsdMUiWTR8xnPWmrKGOep6U1pB5e0\nn5qjVtuCBzTGkTKNzHIwPepY29GGO9VpGO0ZOcml4EYA4yaa0ESXDAsW2g+lMV2UAnBT0pGB\nEmQciiOMnAyME07jRN5nzjstP8xQpbPBPFQeZ5eVbpnrQZAzHI47UrgTmTdjd97tQ22NgQBt\nNQs4AyR8wFMxu2uBgelUMtqwVvlbBxTdpEbHkCo42Dtkduc1KXznPQjpSYCI5WMEHIqXzAsR\nK5fsQKgaGNdoL89dtOy68ocJ3FSLcc0rsq4GD0qWQsFXcMj2qHBypzUhm3AITx2pgSxsFOMf\nM3ejcS+RwOhpisseWJ3U5W5LAYGKBkqgxyYKgjrTI3YSP8tNX5upJNP42kchqAsSqVIA+6e5\nok3YAAzzVfft3FuABUqs0qLJnbxxQMex2r83HNJMp2hg3I5pvzLHnO856U6TGAVGeOaRXQb5\nhkj57HvT9yvhF4PU0zaPLCngmlbEceUXc2KAuS7BuHzYGKerFW25wMdKg8xWVc/e4zUkjfdX\n3oGKzjp+eKkWQbsMMjFQKwiZlbpSRybQcnINMROJgg9cmn5UAsFwDVeMGTHGDmgzNuI6c9+l\nArEwUL060jMWTP8AHnpTXk2ou1vmzzQ2Gk3Lwe9AbD97Ny2SRQWEcgB+bP8AFUURMjt8xA7U\n9m2qvfFO4h3KhgvOaFkO3bkg9zUTZVix6Gn7icfNk+lMB27jj5iPWpvNVWAHUjFVtz5IOAKR\nXG3JGcelMmRO6NDgMMD2NIzblFMaZWCkDimSfMxIOBTFYmSTcPu85xT1Ozp0NV4843KdwbrU\nn3c5OcdqZJNuC8LzxzSrICvIxVduW+XrSK3zMA350AWfMXqT81SRyOcEnKY6VV8zao4Ge9Oa\ndtoI4WkIswyruKtyKsRgSOwA2g9apxyKu3ZyTUjTNgj37UxXLJZQo574JojYKxyM88Gq+7eR\nn5B2NO3MYyepHFBSLSP83J4pVuCWGOOahjbzFUlee9KrK0hU8Z4FNBYsmR/MLKMgjFOWNmUk\n1CvyqVz0pyyFQXz07VQFqJdygN+FSOxWEgj5t3FV1k3AHqBzUzO0mGxkDmmTYsRr1z6U1Zic\npggetRR3GOD9c1J5pjGVGVPemBLFJ8hGfl6UiyDyyHzt6YFMZt+AeBTyyKCDyMVQiSNwuP5m\nnTSbpAVODjFQv8yjnbmnltsYyN1UiSTcYl4bJNK8hZTs4Pf60fIqgnk4pV+Tk4BNUAiluARu\npJmKdyR/dFPZgrrxkY7U+MBXJYgjoKQDY26YJPfip93HB4PUVBkFiFGKcvzcZpgTfeKjOOak\nl+bORk1UEhbH+yc08S7ssGI3cU0ySykm7A6cc1Yjk2843CqMMm33PTFTKwVuGx/s1YExmfZu\n7VJvVtp3ct6VXjz5hUn5etL5wVuRxntSegmTtyTzxUkajrjFQqAuMdWNK0xIYdAKV7jJtxRh\njA55pZGDH735VAoVyrFufakZUVygPuaGUkLtLZGMmpIwqocfe7+1Ry7lkQA/iKc7eUDtXc5N\nQULHJtYnAp+7Ct6mq7A5G7hup21Mrdhzn1p2ATmRRxjHpS9FyPlPvUQdm3gZXnrT2UyYwcjF\nIB3nH0FFQ/Zz/eNFO4HxJGP9HZtuSRg/407/AFXyLgKwBpo2xsqFGwaJFfePm4B+Wvmz0B8j\nAYwoyB19KHZnkTP3vUUknytyQQRzUCyNtLoMPnApgT+Zjdv528cVGzpNGdse3joaY0m3YxAO\n/lvrSyzFWJO1h1GKCWPMrN5ajggYzUchZC6g7goz7UxbgMoBTDnk0CZpEbC7VxQInj3OFb5W\nyOhokZ94JHy1BGzKEIHHc1I1wqkKcuGH45pgS+YsbBpF9hTWnzlQu0jqv9aYpKkqT2zz1qVH\nj3ZUcEYAPrSZYjTL5seY/MHcg4p0jb7ll/5Z7c89qjQlv4QuPWnx9S5XhuDg8UEsFYhVZT06\n+9OSNirncN33gO1IWXcqAfu+9Isu2N8Dcobp7UwHFSyhgAR1K980xZyJCcAH+9imRSHcFx8z\nc49BSsAx2525ODQNFgAxr/e3cmnxsqoQR89VokaPeu/BUZ5pyMGH3vmxnJoIFjYuxLHaMYyT\nilaQxptIZ9vtTS0bBRkOhPNJG7ZZ8/uweM0FEodCVAznrg/ypxVgzsRweKqmRY5Czjqc+9S+\nYcnn5T0NMXkPjVo1+bAHWmnEceGbJzkH+lKy/L8pDMfemSM20K6fIvPPWhj2H/I2c/McZzSm\nRRhcfLUTYZWwpBPIpU3Od2RhQMLUiHwtlSCSOcCn7wMR7vnzTPM6EjFHLMWz8vU0FElqxWN0\nfhQTzRGxebCn92oyfelbIZBtJDelEjeW53HpxxVAOU7W+ZRk8qB2p6suzceecYqIcszBTjOM\n5p+wFSTwoOQwoES7RtJ3AkDkUkjFx7Ypm4GTIX7w5xRvLscDG3rQhCgqI8nnHrRH83T7lOjd\nBHkj5+3FKGIXnHPoKTGMZhHGT09KYspcfdxx170uUbOznnvT1y0e5B32n2pgEatwEXHHU0Ln\ndwcN69qeu1skZMi/gKaAI1/eHg9TQUK0i+WCT14xTI8rkZzz0oA+bYuG7g0qnOePm6UgGySD\nBYqUC85p3mAd93cUMWaMKcEngr60NgMRtwfSlYBWmfei/f7ilfMi7AeM5zUQwzKu37tSyHLb\nuhHFOwDGjfGcYX1qRs+YjO3sW/pTdpWQtn9aVVKgM3JJpkirIQu/Pln09qFc+XnzNoJ59KXH\nmRkEbB0AqN18zCsBtXpQBIp2MT19BSmQR/M6/Nzikb5toAAFN3FW2tzjvRuPoEW5RufIzU8b\nFVKhhz/FVdpArfP869qfMreWrL25NDJHtITnKrkDHFMSTdyDlB96mRvjczAkk5o69Btjzk1I\nDivzM2cE8gULu8st19FpspO/IHGeKezHJ2+nSgY4ydN4xxxSc+WrA4Oe9DcohYE4pRHliXBA\n6haYDnj+YFnx7UyNtzEB+OnNOWPMoGd+BnNIEVd5bHcjFBQb8qQecHFJt3/Nu6DNMWQSttHp\nzTo1BUg4xSAcy+Yq/KPU/SlYKrADI/pUZ/1iZJBpw2+a583j3pgLuC8IeT1NIG3KOMLUbF8b\ngMHtUabmcbiRu7UbAT7jHGSGwajViqjcOOtDMPunGAfxpV4yCwaqARweAvGOnvTy5aM7sU0s\nVUsDxTON2c0ibkis3H86lRyr7iNvGMVSWYx7yTlfSpY7jIA52nrmpY0WMnualkddygHC44aq\nnmlfvHIpTJuU7eR2XtTKsW5DuxtPHeoZ9pwwb2xUP2z5QWGDnGBTPM3M2DmkMkLbj97FNVtz\nYB/4EelRmbc2xT+NRx3PnEqvGKq6BllZAGbbgdsZqNpGxkfiAeajfManIBbqCKjMuOQGBPt/\nSlcOVosNJtUjqSOlSROTGAflHSs5L+Hc2W2kdj3q1E1w6h1hkcqceWiktz3A7ii5XK+xYWZV\nyhHz55HtUn2j5mBTAxwRV/TfBviHWYjNY+HtQukXkstu4H5kV0fh34K+O9avVWHwxeiMnazS\nLsH4Z9KnmXUpQZxIaPbgY/Clmk3MqqcL37V7Sn7GvxNkuCILC1WM4IaSU9+3Are0f9g34gXx\nDXd7Y2BZsbeX/Xilzov2bPnlm6FBkdAe1NeYIwUDL+oIP/6q+x7D/gnuVs4hfeIpWuv+Whhw\nF/Cur0T9gPwbaWzjU9SvNQY8qQ4Xac9f59an2geybPgr7QJFWVlcAHHTNP8AtsTR5WRUPo1f\npT4f/ZB+HehxtGLSW6jLbytw/mfN65/p0roLT9nP4badMkg8NWjupz8yAgn1NL2pfsT8uUcS\nKhQmQkdAp/SrNvp19eNi1066uPQpA5z+Qr9Y9O+GXg+xYPbeHNPRh/0xUf0rYjsdJs2CxWNp\nEF/uwrx+lL2w/Yn5S6T8NfF+snbaeGNRmdWClhCwAJOOc4wPeuot/wBnH4oXlx5EPhW6Bzhn\ndlVeOvJNfp0t5aKpCiEAc/KoH+f/AK1Qz67b/wB9QO/HBqfayZSpI/OrT/2O/ifqDEf2bFak\nfNuuJB/Suk0/9g74h3KLJd31jahjyhy38q+55vE1sgIVtzHj5arzeLURcAt9CelTzyuWqaPk\nbTv+CfertsfUPEduoz/qY4jn8zW7a/8ABPnTBlrjxPcK3fyUwv06V9JyeLieEXI6VFJ4nm3D\navH1pc8iuRI8Z039hTwBZsjTyXtw3cGbAyRjPSumb9jn4YCNf+JY4ccE+a3NdxN4iuWHylVG\naZJ4guNoyfxFTzMdkcxZ/sw/DLS5EdPD8Ujp0LtvH4giukh+GfhHS9nkaBYx4GAVhUfnxVZt\nWnkbmQ4zTptQeQDcxwOOtS22Kxb/AOEX0SOdpU0uzV9u0N5Kk/TPpVuw07T9M5t7S3gYj5vJ\niVc/kKxlvDuwz7Qfek8zy23CQn8aEPQ6uPWLe2jICorE8kKAambWrYruTGe+EGa4xrqMSEbu\nfenJeKPmDcUwOvPieKFduHb2pi+Kiyn5CB7muWXUopsg8N61H9vjBK5BpAdQviVnYgp196Y2\nuNjkLXKNqY65A+lQtqJ5bdRYZ1ja9LyB83HfnFRLrFzIpPmEH64rmF1csDzT11Q7dwOaVhHR\njWLo5XzSB6ZzUSancxsd07H6nNcxJqp3cHBqB9XdF4bOaLDsdRPdSNjMhb8aa98SvzSNx71z\nQ1UtyzYqJtUMpIDcUDOoF95a8tuzyOajbUQrDDc+ma5ZNS6qWwaik1DEuRkmixJ17agr8t/O\nov7SRRhRmuWXUj3OATUn9pbeE5OaVirnRnUEGM8Ui6knJ6CuYa/LMdwwaYt3JgjtnNUFzqV1\nIbSWJ+lPXVcDK8fWuTk1Ddgl9gFINSLcBs0rEnXDWPMQ9veov7UVsr+tc21+CvPBqI32FOKB\nnUf2grNnPy9xmrcmrWckJKttcDpXFC68zndTJLgqpxzS6gdgupfuwfyNUmvHeYszZHasBNQP\nlgZIxTV1A7TlsGqsB0Ud88cu4NirH9sEnrz3rm11IKnON2KrNqau3eiwHSy6tKz/ACk06PUG\n2/McGueW+SP7x5p8eorkjqaVhHQSakRjmo1vnWYHdwetYE195nU7T6U9dS3R5zz0FOwG9Jde\nYSc5FK2oHaMZAFc7/aZjxzmgam3HHFCVhnRLqJZRyQKkW83YJNc/HfHaSaVb395ycD9KYHSi\n9RcEHNMF9nJzgZ/GueGoDqvOO+aX7f33ClYRvtfYOCSR9anXUI8AZ5rmvtfy5JyRSLeGRuTg\nUrCOmbUFZOuD6Ukep7G5bNc39rC8s3NJ9uOzNUB1X9pGRh83HtQ1+m773SuXW9OwMGpyX3vz\nSKOma/5BB/CmNfsGx3rnm1SNJMk4Jpy6gpYtvzSsI6BdQOd2frTlvi3OeTXOS3a7Thue9Kmp\nLxgnNMZ0n27jK/jT/twbGeK52XUCuCvApg1BdwYtzSEdNHeGQk54pVucDBH41zs2pDjHHHan\nx6ltOWyVoA6VLwLwaPtAbO3iud/tUHnHFC6wF6CmM6b7R+P1o+0/NzxXPjVhjByM0p1Zdvzd\naQHRLcERnnNNa8Ma+prA/tb5OOKjm1YNGXGT9KLAdEt0zcg8VIt9k7a5TTdceRmV/u9q0l1J\nNu49KLBc2FuDu+9waY1x8/Xmsz+0kVckcVC2ohmyF5pcojaW4y3LYFTfaCkeCeetZMd4jYO3\nnvStfpI3PA6UDNT7R5nTrR59Za3gXpyKX7V3HSgaNIzq3BbFI14Qw54qj5wbkimecFzmjoBc\nklLHdmplumXA61nNMGxtBq15g44NLoMt+Y7c5p6yHpnmq8bYUHHFDTYPyikBaF00Z+YZFIZv\nlIJ5qukvmL8wP5UvmJsPB9uKewFhZOlKXPrxVRZAy45B96PtG9sCjcNS2snBHSk3Y4FRZLY4\npxVm6GiwEiSbQfWned8o/kah8s5JDChYyF55pAWfO9aZ5g7UwRlu9O8lic9AKYDzNt7kfSmm\n4fAIZh9DTWiLdDQYG28mkBJ9o39XP5mpPObruyv1qp5fI45qbyWaPPQUAT+YzY+b86e1zL9w\nv0qExsy8YBo8ttuT1oAm+0NgZUHBz0ohupFZsYAYelQ7W49KOVzQBYkumm271VihyuVHBpW1\nCVvp3XFVtx6UbSVJxzTEWvtiseUH0wKjlNldwtFNYwzRnqGQGoUYrwBSq3lrx1osIibQtDmX\nbJo9o4/2oVP9KguvA/hW9I8/QrKQe8K1cWU56U49qBo5m/8Ag74D1Jj5uhWq/wC0q4NVrf4E\n/D+38wR6LbhWUg7l3dR2zXWMO+0GmyfuwWPAxRdi5bnmdz+yz8PbqYy+RLDzyqSHHpUmsfsz\n+Cr7w9cactnHbCQYjuEX94vAwSc565/SvRVbzYz6MKdtKnPPP+cU+Zlcp8+/8MO6Oyr9n1uV\nXxn51GP0rJ1X9hWS4jJttdRZSePMBK/yr6ZKOy/K5HpzU0bOiYZiWqudkuEWfHsn7CfiWFmZ\nNWsnUdOG/wAKyLz9jPxpbPkiG6iAyPIfqfpX28txLn72KfHeSK/Yd+lP2jI9kj88Lv8AZd+I\ncO+U6I6hTnar7sj61zeofA/xwjGRvD94vHQxntX6b/bHwAQCO3ApHuxtClAVHXgVaqsh0Vc/\nK+f4e+JrXl/D+oYHGfIbHvVCbRtRVmH2C7Qrwd0LAA/Wv1aSa3UnMIOfUCo5NI0i6/1un27j\nqcxLz+lHtpC9ij8oFsbqFiHhf6BTSTKY1IcMhzyMV+qt14L8LXSssmiWTbuv7lf8KwLr4I+A\nb6Rnl8PWfmPzu2U1W7i9gmfmKZFhXax2jqDSKzmMuUYIO9fozd/sofDe+8wrpnkM54MUmMVz\nepfsS+BZi3kXWoQFhgATAj9RWirIn6u+h8GBtyBkzIBT927BH3v7tfY837BmnFma28RXMcYJ\nwrIGNcvq/wCwhr1u/maXrNvcA8hJ1Kt9fSrVaInh5HzGlxtzkEn+VIu1myxJHXFe9XH7Gnjt\nWZU+xSrnHmLIQP1FctrH7L3xC0tl8vSzdx/e3W7Bsiq9qrmXspHmsc21SCMUqyb2yjHH1rpd\nV+FvivSFR7rQ76Nc4P7kn8OPoao2/gvW7hmji0W/Z1OGTyGGM4x/On7Rdw9mzG3Mzcce/rSb\nh124PcVf1jQb7w/efZNTtJbKdescowRWSoZmYYO7rn29qakn1F7NllX7Y2k9KdJll4Xp39ag\n5TAOVkXqe350quJMnPTnGavzRnboSM21cg4HvSqy4zkD+dRs4jwH+XPrTvLRW5wOOlFxixyC\nYvjn0qTdhBk546VEqjccfKMdaRMI2CSe+TRcErkrt8oIp5ZvlfGageQMhwMClWQbV4P40gJ1\nH3stgk5ox0Oec1AjeWxDfMOtPjdZFyMgE0hlpd24Mx47VZWQgjI+buKo/wATMWz7Z6VZR97B\n/bFA0T7tjkkcU5ZAynIweoNQ/wC996pH6AZFSMazYbpjNNkQ4zxn360pzwTztND4J3N0PNAW\nINwUkKeaa0jBsHnI6VJJtC5PHoah2lF3jnBzk1QnYkLA4yuR6U2RVGCDz/dpVfLAg8EZpChZ\ns45oMxVXc2TU8eVYqANvrUCqPM5JNTKGj7fKaaEybcBu9aI2ZSMdCOc0KpHHGaTaTkA4NaIQ\n8MFf3qK4ywbt9aerLGDnkgZqKZ3kADKBxkVWgDNw+TPy8cio9xVmGMHsafjG0MfmpuFVjuOS\nakFYTdubdv46bTSlgCQKj2+Y3HBpU3lctzzSGPVQq5PzEdBTVkfaylQKVmWNsnBoOCp7k0xI\nc2VUBTj/AGqbHIyyEEblxSbD5fXdSbTsGOKBj9xVRg4FP8wlfWomUyDr27U0MY9o60hltS3y\ntjP9KRtzZ9fWoiz7CAcNnH4Ubjktn5RxTEKysvU1HLI3C5wPUUrM3JPOe1R7V28UyRN37vkZ\nOcU1f9YQOtIjbecc0/afL3fxUAOhzg8jPpSMTtJzhx2pkeM78Hj0pd2+XKHjuaBC+WNwbvjJ\nJpfMO0beaY2AAfvc+tLJ2KtgenpVWEK0nGVOe1KSdsbkcntTGQ92Gwc0qsGUFuABxQFiR98e\nd2Nh44ph3RoAM02SbzFXggZpf9Y2Q/zf3aB2JlwyjnAFDtxlTlarON2OSMHmiPcFbBzzQNIm\nZw3RSKbu3qNgyM0it+5ODyfWmqu0cHBpICbdifG3jFKDtRjjJJ6U1ZPLUF2yaaZsgEcDPWmK\nw5QFOSAfSnRndwOPpTeZGOBxmk2ukpORg0xDt23cH59xTfLRcN2J5ok2KuAOe+KFUbs8+1AE\nqMAWYcdlpPM3A8cdD9aarB2YHrjim53HK/ex92kIlUCPHNOVm7c57VFuVuCDn19KUgqw4+hq\nkBYVTnkD61GW+UluRngU5srwT1/Km+Xlwc49qYDlYlum00ZLfd4GabncD+lKu4Jt6E9KBDmY\niYZXcuMGklyqjC00FgoOCe/4U0oTkEkjqGoAkcGTBU4IpwlUkfrUA+8QSR70qx985pj1LCyB\nWOKVWyQxOKibKqPbijnjaBx0PekIl8xWDbhxSbuAo5U85pNo6uBk0Bvl2A+wpiFkYcbV56Gk\nkBZcLx6U7/VuO4wc/WofMLY7CqAm+bILNyeDTfOKybRz25pPMzwV79acuF3Hr9akdxr53fKM\nc0924DAcZ5pisZF9CtIP9WQBk96egiZVL7mPCimcrtYdByc0qq3l/McA9qSTCqBvxTAlkkMe\nDu4JzTA4yeN2TxS7cqC/rwaRMxuTjr0FAhdyNnPB6GnR53Lx8o4ppVVkLZ4pR8+McA0DHedx\ns25IpqlpFO75T2xQAGOBhT60rqJGyDj6UrAKD8nXLdADSMzFPTbyaYv+sZsHB45p3mfgh4pA\nSbxuz6jIFMbKknoG60g25A6Y70SNvYAj5fWmIfuZYx/dFEcnmSBAcZ6Uu1lXaW60jRmOMjd5\nbUCWosa4YqT35p+OoB5NRL91RyT03etKuVY8dKChfmRtqipEIWTkZAWmeZ3Y7SKSNuq9S3Oa\nYh67W3MRyeaUbWHXtTfLYsw68dqQ/KoG3mgByMzL/tVPHJuUdu1Q7gv40h6BQeM0C3Jnwq5A\nyO5oDMq88DuaYj7kI3AAHkUhwRy2CaYh+zCM2QfQ+tNUY59OuKjxumUK25umB/OnHMcmCc0w\nJTtIBPyk+tKyeWoBO4DnNMYh1weTnijccbeo9KdwJNykZ25FNJ2jK8GkhBdsltqelDMoXy+W\noGSyAtGGyCw5pWbzdu7CmmKHjjAXgUzerKjYJLD9aLDsSsr+YSSAvQCnQqEBBOTjIqFf3oxI\n2GHpTpCFIccjpQA7BZDs+UVLtDhQ3GRzTFkDfLjApd24hc80CDaqjDnK/wAqa2Wj3Kflz93+\ntNlB5BHU4qYgCMY+UdM0DQqsvGc1JGykE9qrbdzYD/L2p744UHBPFAiRW/dmpI22jcR7VAre\nUxQjcRUsTFskjjHSgpDmYpgryCealX5+Qeag8zy4vu5qTcgLY59aRQ8RiVSpbNEbYUo3CjgV\nHt3SKy8dsU9WG1gVPBOaBDlztZ930xQsh2ncMMewpiyBSMck9qNwmZgow/egCR2G4HbnaKZG\n26Ns+vSk3FY8N1FKoZWyRkn8qAsOb5WAA+XHQU9ZMHJ5YdqhUlcnOSaVpfLJfG6gZK37wE9x\n82KSTmNCvTrUazGNf9puaekhCHPINAD5MtGuD78U2RgAATgU1TlicnGKUqZYwzDvQMfHNHgk\n8n1pFYFSQ2QTgU0gKCAvUdqRrjEaIY8D1FUhMkjdt2OgAwacsocNheOmajV9ynkZ604yY24+\nVG6/WkSSKrLGoY7lp5KxnB4NQqpijIMmVzxmnKzdXGDimAqMpkOB+FKBtZv7tRiY84HzHvSL\nIy/K5y3XmgkXzTH0OU7+tPWfbIWBDKR0qJf4jjg96G3lhtwAfamSAxGAdzZzn2qbaG+YMQPW\nkyrKcDOKe25lUgYHcU7iY6Jh5pZfmXFJmPcMdc80znd8gxjtTz8qluB7d6AHFixZQPpmk3lQ\nqnnccfSk852jUkc0sgzIrAbu5FUDHx8cp6YqSPft+eq6se2R7VK0h+XtzzmgRIWb+BvbmrMJ\nVbcK3B7YqkrdWXkE1IyhdrY+XNAFpZNqEjj196duLDspx1qBGHOTyxp3LDaefemhk29lQKTu\nfGc09fmVTnluvtVeOQR5ySVI71J8rqCjbR6YqhF0MqAKhyDQJZOduMA4NVkDeYDuBQDt1p8b\nFWyD7mhCJ45N3ykZzT44m24LfKO1M89Y5AcZ3CnKxJOeCaoCeI9WYcY4FNDNJGQcI+elNjUl\nWUtuHpT4UG3gc+tUSTrtk2gHJHGTU+wK208ZqtASvGMkVOMs2XpiJASsYH3jjNNXOS0gwOlD\nMABtGSBQrb4skZ9qpAJyQoDZAOeKczHzAVHGelMiYRAnHzHjmiTf8q5wP71BRMTtkGflzSs2\n0FQMnPWolYupDcsDxS+btUHHegQ7zA+VztNKSgwC20CmrJuYnaPWo1DZPmdzkU0SWtyeXljg\n1M0y+Sh6ntVL7hOPnPf2qQSCXCk4XFWSW1k87I+6wGacrGSPpjAqsjDbkHin7m8rGcCi4FqO\nTzFwv3gO5pWY9WwB3xVdZAYxjg460m8+XkmkUi0+wKNjYOKZuXb13yVEvzY5zTGnWOYjawzx\nnFIZYn2TRoFJVgc8UrYEwKluKj3bYycfN60Rybztbj3pDLPnOwyygCm7mPzMuFqLhd4cHKnC\nnPFN80444NNjJPMJbjOM1IMpINrcGoeAPmOM01JI9pDHJHSpEXMP60VX+1D1NFLQLnxRHuji\n3gbz2z2pvmfIWJ3Ecn2qJZnEm8tuUdlpnnHJKDIbtXzx6I5WG4MPmHUZqVZ23BSuAD6VHGpZ\nApKqo5z6e1BcM/DZAFMByj5R/Fu7elQncucsEHQClWTLAr90DDULs2Pkbg33aLEsWRTG+eMg\ndaWTLKCSMkU1mO0gLu4yRQAswiaPhVOSDQIc0okUBtwC9h/On5CkDG7A44qPd/Enp972zUn3\npMryR3pDsNVfOY7uWBxx29qauVkKjgf3uwp4J+0ZQ7QxycjqaiaQLI2RgMaAJfMaVQw+6ePx\n9akiG2NkPX9KidljwUBG4dPapBtYMwbhRx/9egB6yeYm0jDevamjZCoUtncenrTQwaAOOfXF\nOlVThehxuDUxCfOitkZCjgZprSAIrFcA9cdRTHkeOMEfvGJ5FO9MABW7+lBSJGZWVecljjdQ\nF3MQ/K4x0qKQNJIQfkCjgf1pA25gFb5lXHNBJM23IBGewpGcRqVYfL14piyGMKG6twaTYFyM\nZPY+tBXQkJO3eRnjK09iHiyBnPUioVdAoDEq/vT1I5Xt2oJ6gz7VyFYEdc0/cRDhj8vXmjbu\n2qT7mo5FMskWcYb+GkFh7MNqnqvHFPUFW+X5VNN2s0blhgrxge1RhmkZQ2Seo9qAJxIsasrc\nnufSnxkeWBnAB/OodoEwwMq33v8AGpJI0QMmcZ6GgY5mdV3O+FJwpFPyy5OATjk/1pnmI1rt\nc5HQUnTAzxijUokZw+HUbh/eHFO+XaBnbnn/AOtRtJUbsY9RUbMyyB1TcnanqSyVeOR8seOB\nRuATP96ljmSYcrgDnNIGDg+XzzmhAAZxgbhmnncIyenOKYxDcBvnzmnH5GAIJ3dfShlANu7B\nHHqKeuxF+XdtPYVHEoLMm/GOakGFz83zY6UgEMfUh9pIpowY/LPzMOlIp3AqxwaGcQsoJz2O\nKoBi5jEbHjd/F3pzbouAMgnORSKRxjOO2acCDuOM+pNIBW/ecLxu/OhXJUluCvFRGQ7RtGcn\ng1JjzScnHrRqAi7nXdjax6Gl5Me2VsMDxTkIkVht4HTPemtuCcnJ7D0oEPKhtrkg8YIpYT+8\n2kfL71GGaMBuGOeRTppnyG2YDcUBYlkXeSAee1N3fwsMGmBjjK8EHmnq2WLsuc8UxWHoqsAW\nOQOlJt8tm2sMHrmo2J/1ZHUc0uPubl+VR+dIYjDzCM4IHQCnq4kUFR8ycn3oxFuyMqSKZGCr\nNzSESNI7tuZcDr+FMkYeXmMbVbik2HnDZpFLKnPzD+7QIkkPkKPl38dqXcNoYDB9KSKQow+U\n596FXaoZjk7ucelMdhc/uy+7IJ6e9PNx93li3emtIRlNmdwznsKdhWYEfMQtBQ3a0f3Tjcc5\n/pTREBzu5P8ADSsURsAZC8ncaYzbY2LHO7nj0pAKzLDweM9cU2ORdxBGfakZirhSu5MZ3U1p\nNoJUbz2oAndNwBC/iajxt4J3ZqL7RIw+YHjtRHKZBjO3vVDHtk53NkKO1ReaJPn3c4wKbJMA\nhYHAPFRGTYyscKmOSaXqBbWYKoLDBxQ0gZSVHTmqct0hG4lVU8CoRcGR1SH94ScYUHP5VPMP\nlZdaXb8x6HpSLMWX3q3Z+HNe1ZUNroGoyKxwGW2faTnHXFdx4Z/Zv+IviaZmTR2sIxx5l2So\n/LFVzLqPklLoecJL5m5SMHOaWOYSSbR+Ne76T+xX4/vrqNZ5bOKInLuhLYHfsM13Wm/8E/bs\ntG8+sTTRMQZUjXyyVOM4J/Go9pDuUqMz5Q37W2My89Of6DNCh3uEhhDSNJ91QDyfQfka+8/D\n37A/hCzbfdxz3KjlftE/zdehx+Fer+H/ANmnwB4ZuI5YNCtfMiI2NJGH28ep+tRKquhvGk+p\n+ZLeEvEMN1PanRL1riE4khWB2K56ZwOMjmtfSfhH4617b/Z/hjUWDEjfJCYx/wCPY/Ov1at/\nCGh2ckkqWcbSScu5QZb61pW9tY2aYigjUZyBisvayNfZRPzJ0D9jv4oa5MIzZQaeMZMkj7wD\n6HFeg+G/2APE11eQ/wBtarHDan/WfZ1wRxkHmvvk3QVN27YO571H/aR6BsjrxUc8th+zgfKH\nhj/gn3oVrcO+q6xcanEBhEZxHg/3sgdvSu1sv2H/AIf2a4mhkm45ZpCzE+oz0r3U3yfxNj6m\noJNQjUHc/TnOafM9hOMb7Hnmj/su/DjRY1FtoUQAGCzDcTnqTmu10/4c+F9Kto4odItSsYwu\n6Fcj8aWTxNBAME59yKpT+MFU/Ltx7jii7HY6OO2s7RBHDaQRx/3UiAFCzRRElYo19RtFclJ4\nqYqSgAz6HiqTeKJ9xywANAzthqnlDIwefWmrraR58xvLUnIxXAz6tLJyW49qyrvUp5DkSHjt\nmpA9Lm1mLOVuDj61Wk8TWqr5SysxPUV57HqzKw54PB5qSZ2ADE5B5zTLO2m8UbWwcFfQ1Uuv\nE0jL8rYX0FcdJfNjA6ioXvG27mb8BQQdRceJppI9qyMuPSq8eoTTLlnwPrXOR6ipkIxUj3hU\ncGgZuSX8mRl8CmNeOz4DErWTDdrIp3E05b4JnOMe1OwjVafkfrUU1x0rOk1JVjZx096hh1Jb\nhc9KVgNeO6C8HpT21BQMIKwW1AMxA61GdQKjnANFgNw6k+egA71FJqh+6KwZNSJbH3RTP7QJ\namO5rrfSeYey1JNqTbQBgfjWG2pMowWAFRtfh8gHmlYLm9JO9wvJPvS/biqiPPFc4t7KvO40\n86g33iQadibm1NffMFztNL9qfhd/FYMmoBm+cgU1tQw2A1FhXZvC7ZWPzcVE125OA2T+tYUm\npYHBpP7RC7W3fMaLDubRvdrYYkntUjXRC5B4rnprwcEEk02PUHRSCcjNArs6AzHaTupn2wog\nwx5rEF8zN1wtDXDLznIoHc2muDHyW3HFQG53cniseS/LdODTReNIG+anYVzbN1jPzYFR/bl2\n43c1ifbDty35CmfbBJkgY9qQXNsXSsP9qmfaisg28+tY/wBqPTOKPteGHPNBVzWlkMgJyQvX\nNIk5DAhvcGswXWGKg/nSG7GCQeaVgNSW8ctzyTzSnUG4C5rHe8LbcUv2rL88DFFgNRrgSN8z\ncCnfaI/LyjA+1Y0k5bJP6UsLHbTsBqNdlR97NRf2ltYjkmqYkOR8pzVaZirFuc56UrBc1V1J\nl4HIp39qblxytYv2gq4wM5pxcgHsaVgNR79kOQ+4HvSnUN3esjDbeDxUikNEcnBoA1EvBg5a\nk849d2BWRGxPB6VKzMq8n5RTA0vtXduTT1vjGxx+dZfmlhwcD1oVmDY3cUAa8t4XXOcmk+0G\nRQQdvtWSwLbuafCzbccmmguaZlz0qdbnYnWsiOYr3JqVCVbk5FSFzTF18hycg0qzlhjdxWYJ\nAvGe9K03Py5xTFzGoJ9uRn8Ka10xGGOPQCs5ZmWTmpPNUZyc59aLDuaBvD8pzimtdt/e+lZx\nlJIGOO3NP9M80WAv/aHZcHmp/tBaHg1mmT5c7se1J5xUEZ+op2Fc0hdCPAzkGka6IyAcZrO8\n4NgcjHPFP85dxOeD0pFXLrTepzUhulVMDnNZzyBW5Izio/tG1eRQI147rbwWo+0Fe/FZK3kc\nnANSmT5dxPApDNQXR8uoXviOpB9Ky/tHmZO449Kj4kXcTRYLm5b35kPP86ka9OW54rEgVVbJ\nOPxqys4Py5osBqR3W7DMcCpFuk/GsaSb5dwPApvnF1yGxQBvNeDIyOKZ9qZpMg5WslZm2435\nFL9o2qcGgRq/amZvvYFPkuxHwD1rHE/yfeGaTzSxPNAXNZpuhXip49SaNcMcisSO4I6mjznb\nNBRtHUiNxByO1O/tjbGpByaxFlIzz1FN8zb1oA3P7Vk28H3qX+0JMDLfLjmsWOTOCamMx2+1\nIaNeLUm6I3WnSalKy43YNYokwwIOKnSZWHJwaBGqmqSKuGelOqSKcg5rH8085HPajztyUWC5\nttq8isMNVqHW3jb5m3+1c2JPlz1p/njrjnFFh3Osj8QPnIOF9DR/bHmNnODXKRysvU1IJm4+\naiwtTrf+EgKrsBqeHXlOFJya4n7SfM61Kl1hvalYZ2r+IEjcKUVlqT+24c58obT09q4uS4Zl\n+X9aX7U8ZGTkU7Cud0Nbt40wVH1py61a9doI+tcHHeFs7j1qWObaCS30osM7j+2LVvk2AH1z\nUi6jbFemPbNcC99gr82RU6X53Z3YosO53S31puHzfN3FTefav8wYkfWuDS9Y/Mzc09dUfcAG\nIFTqO6O63QsflkyKY0ccgK+ftrkI9SZRw5px1BpGBUkmjlYHXqirtIk3D1NTqquvzSBR2rif\n7TkdsbsYqzHqjYAZsrRYR2Hk9g4zSuoGMOK5Ma1LwA3FC6nMJP8AWZ9aQ9DqMM7fKQaVV+U5\nK5+tcx/bUkOdp5qVdYLLu3YzSEdCueyg0/a69VyKwI9YLL96po9d2ryeO9PURsbSqn5cUxvu\n5IrKj8Q4znle3NSJ4gjkU5pgaCYYcZzT1IbtiqK6vGqZIAzSrqkZYDg0mUXNoPU1HcW4aNjn\nNJ9uh25PIp/2qBlAIz9KQwhg/cgmn+WTwOlNjkj3Z3EL9aXzojnD4FAai+X8uV6CjaSRxTo5\nIdhG/ml68hwKQyPaTSEnr3qeNCy8c0vlnHTNAEO8hRTmfnkcU5o8YOMimsNwyQQM0hEa5LH0\nqTdgdcUrRoeRmmrGuzINIPMd5mF6ZNJuyM45pwUEA96cyhuhGKYhgwTikWVl6OVPsadt64NN\nWH+I9aB6ircSKchjmljndRjqPfmmN3PSmgH04pDLH2p+hUMPoKUXOP8Aln+VVd5XjFLuPFMC\nyptplxLHvH0p6x2G4EQJGenyqB+tVtxC+9OVgzfMtAFTVfCHh3WJvNudIsrmbGDJLECSK5jV\nvgR4E8QKq3WhWuQeGi+Qj247c12e0N1XAo8kKwPQU07BynkWofsc/Dq/aQiG6h3c4jlwK53V\nv2G/CckJOnalf2kvGHYq+B3GK+hFLrysmDQZps5Lk496r2klqQ4I+U9W/YVkWFzpviBXk6qL\nlB6/WuO1D9izxna7hBfWcpHO0gg/zr7ijvZI14A96c2oSSLgqar2sifZQPz01/8AZV+IelhN\nmmC+j/v27DH061yt58DvGmktLDceG9QD7uGRN69M4BH48V+mkd5tjIzhj+H8qkjmjkhIkIZh\nyM9vpVKrIPZwPye1Dw3q2jymO70+6t3jOGWWB1I/MfSqPzMoBG1iT1FfrFdaNpWpQOt3Y291\n5gw3moGzz3rltV+BfgnWriOW50SzYorKoWIYwwI545x29K1VZnP7FH5jtG8RXcCM+39anWP5\ncMAvGeCK/QjxB+yb4B1yMLHaPaHt5D7efeuMu/2FfDkkcrWerX0D4wm9twB/wq1W7keyZ8Xr\nCrMpBz61OgMMZ+XvxX0tqn7DWu2sZax1uG75+7IMNj2rmNR/ZJ8b6WGkSOG9Reio2Cfwp+1Q\nezaPFG+ZRn73rQrfNgjivQr/AOAfjjTY3mn8OXjRLyTCu4/lXK3XhbVdPKpLpl5GWGRuhbnm\nj2iDkZjltrgn7ppJMbcZyP5VPLb+WoVzyDhhyCPbkVVdCpba2MDBB45qk77CcbbiniPGRkVH\nI3y5LdahZmXr8xojctvA6CtDIWNhuAX6kmpvMwwy9Vj8+MDik3sjE8baZFi0rAMMfNVgfMhw\nMc1RiZjJkdKtxszA81XUnUm2uQAKNpAOeOMZpm7oSc0N84xmriSKp+YYO7196SZtxGTt54pA\noXkGmyfO+7IGKBkXJZufxpnljcGBJyaG3bsg5NODcjatAhsnDADI96C+0FvvD0okmbbkDg96\nY0wXaMZzSHcdG3A3Jge9DNtJ2HOe1Ju3cNnNNPyyLzR0HYl+4oPTHJo8zzJM9jTNzYLMMjNK\noZm3AhR2pgO+65xngd6OJF3qucdqMPuzuwaerBV5GaYajfM6HB5OMU5lxlM+/tTVwQQeDUay\nFmIK/KtITHqWaP1amSYXaM8+1D7g+RwmKZv28+9MAYYYgtxQzAgKrcUkn7w8Y9aZJnao7d6A\nJo/3eQTnIpqr1G3AzSnopHPak3YB3HI60EimNBnHT0pOqYB+U8UitzuU4NLuDZPQYoDqJ6L1\nHSnrGFjIboOgqFTuwy8AHipy3Q7sgmqGHllgp6AUxiu4kYB70oOd3pnH1pFjYqxCgkdaTE2J\nuO3aRwOlAJZSF49c0gbzFxnnuKU8KWPTPSkIWRQWCD68UMqthgB+FOXCYIXjvmm7Vj3Mo4PT\nFMdx24MvA57U9QAFB5PeoNyqyqCdp71JxgnpTC49vnG1BjJ60jbVXHp1NN8wowIGewFLI4Vg\nDwzenamIdu43fwdKNxZRk4oU7mK8EdqQybUK4wexNAA4+XCmmxnd04HQGkEexQRnd3NCsViK\nqO/OaQD2j+Xhvm9alikMa4cBm6AmmKpb7zALRIh2bjyR0FMBy7lG1yDk0gysjqfmyODTSy7A\nzZHtSrkMfzxQLqSRLuHJ5Ao2sWO87Tjim7doBb5c0hfzTyKBjtxVSu7OaPM2gAjNIsfBxims\n26MD+LNMRLkNJwPkxTtwMnzZC+1V1uBzwRg09vmXIbtmmMQtmQ5J2+9SK6nGFwBULSELgDcD\nTlLPEADls9aCWSjYrZZifQUsfzZOOnNMJ2sMc0NL5bbiePagQrN8231pQMvs2YP96mKzrHgr\n1Oc96erGP5mORVDHbjuyRtxxildlZgc4X1qOMndwd2ecmlJKscjcP5UBckbjb2B/ipGX5SVO\n0Ugcbdr8c0jZ5GcjqKQCrkruOTSE5+UqSD3oX7q+vpSyMXhzu71QhQ2MoenapOZFB/iHSoGU\ngnnJxkU6OR8gZwcZpEjtp3Mei9SKVclcYx3FJNIzlc8L3x3p2Q2Rk56igpDiAqnJycU126YO\nBjmlOF4HWk8ng5PB7Uhaiqw3bN2T1ojYMzBugHFJ8r4IGCBim7fmYjpjFIaHH5sAc07+EA03\n7qZ6cU5gJtoD4zTGSKCRuJojzgu2WNNkB57jpTEyFfaSKYEpkGMUuDtPzYPWmbAVyeCpzS8F\ndxHJHSmTYcnzNnGR602Rtrccj1FI0xCjHAxSxkKnTINNh1JI8sueAcU3IMIb+PPQ00/6w44F\nLH87buhpCsR/eZQy/MDmp1+9gcd6RsbiSRmmqp3DJxQUPVdzFRyD1NEjBWAI6elIzBUxtOM8\nkUv7piqjO3+8aCWCbg29BjPU0qKrAgk76iwfmRG71IjFfnH3sYpgO27WzkdMCljxtGTznk1G\nq45PAp0bfu2BAK/3qoBVkY72HC9qWMsyls89qYoEcZK9PSmqzCPOO9IZOM7Rz89OXdt5PK84\nqPcPL9/XvRu3N+HWq1EOVgzbh1brTmUqCCOOtRqCudoyaerSP9O9IYLuL9eMU5dvJJ+Yc0Kw\nLYPy9qY2GDA/KM9R3oAm3NIu49+gpdjeWFZsNnNNXa0YycN2oRgXwcsfWgCbIVvkG5qbJxKN\n33qRY2XIYY75pm5ixOOB0BoYEseN+M5frT1dWjPYk4NQiY7gcdalRl3LkcLzSGHzdPunHFPW\nQl+Rj1pkjhcM3POc96X5/LLY4NSBPt5BQ8dajaUhXzSbjHHgcjHHrTDIFXBGc0wRMZCYwQO3\npTI23R/J1zzTMeYmSSMdAKRSQ2F+71ouUTKx+6PmOadGzo5ycj0qKNzHGxbP4CnpMAysTz0p\nBcf5h3HaQOOaapPlgFcnqTURKbn2k4JpxYIcZycdqeoD/ODEZ+mMVIr7FJIyO1MZgqofXiiN\n25J5UjigGShtqgnqaTzjnb2pkbDA6sR2pUkVtx469qZNxcnzABwCetPmIWPb0bPNRF9pVSMZ\n5FKWDNkjce9MBytFvC9DjNSearKw6jtUYVdxxTFcMN20rg9KaAlUiTbkHANO3M0hLHPpTPMM\ncR3DBJyKSPO7fnnsKQEhbYu7Hzf3aacsAc5NJ9+Q+poaP95wdq45polkkfLZBxihfmyGHOet\nNTG08/l1psbGMAH5x1zQSTI/Xvg1JIz+SPWq6sVBYjatP84llBOOOlUALP0xnd0Jp5x15Yns\naiGdzccE9aWKQ84UnHegRNCqqhz3NLEyKzIPqajMiqoA6k81K2F5UcnimARttbIXjGRTmZ/l\nAALZzj2pqtu+7xg4OabjazENuI6GmBNG2RgfLzT1PzeoBqFZATtPBp/MfA579aALMjoqg9ya\nQZZTtOPeq6swYSYwR0FWY5txyV68fjVASwnyjsI3d6XzAkmMZBpEcbS3Q9DSKQvIORTAkhwV\nOAVGcZqUn5gB06GoNzLGecgmpVYKwx0pk2JshgQR0p7MNyZUnPANIrL0PpzUh+6MHPHApiFg\nbcW2HkdKnTftD8KvfFNt4dsahhtJPbrmrKwmPJI3DuKtBYUkbVZOSTjpVjySykN19qIS06lV\nUYxxUi7oxtJGMc0ySAx7RkAlehqMyNEzIFyh71amTYWGcjtVWb7g2imA5j90gDaOoprSEZG3\nPcUyNTI2AcetOPmIAQe+KB3HxgTSD+H2p6xgOVPC+tMLfvBzzinBd5yTz60AJ5ZbBU0km4gl\nuVpWk2qx9u1NhYSMpbIWkIlj27BtGBik2iPJPOaYrMzlUHHvQGLMR0NXuSydWDA7R0/ho++u\nW4PpTI28t8E4JHWlUjzOSWFIRLHGNvD4HpSk4bhvwquOVO3rmnY+Tcex4NMotMwUDPWomkC8\nvkrUTSFs+nrRGSyH5tw9KQFuOTdww256ZprN83zHIpgcSYZyRjtTmAY5HQ9KBkiE7CeoY5NM\nDL0brmk3+WQvQYprfe3Ebl9aBj2ysmMkik45K/SmxzLv3MPlx8ppFBPJbGTmgRJuNFN/4EKK\ndkI+KvL2BWA4/nS5BJIG3+lR7itqQcs27I9hSTv8pKgqcZ69a+bPRBgG5APXr60jSBpBgc9N\nopnmMPmK546VJCxkc7WwccgigBY9jYIJVwegpzMVZinC9wajWZFmdV+Xb1XHWjcsbbnO5eu2\ni4xQ7xyD59g6nPQ/WjI3gngM3IXpTZlBXOCGPQ06R1jCgED5enagXUk3BdwJwgWmLOY5EDrg\nt0waZu2MOdzbeOOlBiVdpibeR8xz60DJd28suMjOQPWhp9+4HDDpj0qIxudsisAmMUsQRgyk\nYz1osIlLdY3GflyG/pTlIbEg9OahOI0CrknoKdG3yjkKM4PuaY7EqSquXXlGXBxQrK0Zy2Dj\nFRR5J25CqPyNL5jjduXgdqQiVWbaVA3DbTFjJi3qcv3zQCJmDDcExjHvUcmUbKNlM4yKBDm2\nrGW8w46kVIrAFVIypGainmXPyjKEYNMnbBTB+YcDHpQUTF42ygbI7GlbeuGJ+bptHNMEa4LL\ngv7U2JtzBk+79elMZKJG3nIHHWla4+YDI5HGKhk4+XdnJ+96+1SnYwCqvtu9KaIH3DFlDbdp\n7GmqVmX7xO0Ywe1KJDGvzfNtP4UzYd3zOPm5wOtSMk3mONBywPapFkDTZQZUDH41GWffgYTj\nGO9Sp+7GzAPfdTQC5O0jBDe1O+8qvtPXvTIVKzDc2Ocg1JJsWYt/EDn2pDF+T5m2AEc4qOQ7\n87R37dqWSRywYjIb0pwUx5AIIY8mgZLksu0/eHNMWQrFuHJPUe1JG0i5YDK9M0i7oyhU8epo\nFYmTaFGw7c+tP5WMtswQecUkaj5mIy3Wm7mVceYHHU0AIysdzDgkc07HyruYn1FR7/MjBySM\n4zT2ZlZfpzQAm7bu+XPvTtv7wc9ulNO3duVsjutNbCsHUkZ45pjHtzGcrgjkU4rkqw+Zcc+1\nDRyRzLkZG3mlVvkYqMLnpTAVoxGqovIHOaQKNuG+U5zilLYIjI4I7mjjaRnce1IBskgDqq8j\n0FOT7zA8CkaPyenBAyaQsZAxwQO1ACtj12kcik3fKSOe55pZkDN1yRxTRlm4XEeMGlqA6Nlz\nnk9qerL5ZRjl88e3vUDKoBy2PapnZfJB74xnvVCEjV1b52+U+1OkYGMIvPNRiQrsjHTvUrEc\nkDjvSGNaHMi4Yn/ZprSK0oRSVJHJPTNPjb7vPyZ601mXyyABgGgCXb059jUe7buAGR60SNvR\nSBg96jTYWC5wetBLJeFxlsBuOaNyIuepHGKhz5ikbtwBzQ0m3Dj5geKQiZpmVVCpknq1IkjZ\nyq8d/pUKzGT5o9ykHp64p8kr+UeOXPI6cfWmNIla7PzFY8Z659Kb9qCgNt3EdgO1Vp2O1VZy\ngPGcU2O6ZpIzaxvdDowiUsT74A6UF2ZO9wu9x/B1yajd22jHrn8K6rSvhf4v8TXEf9maBcOs\nhBDvGVUZ6de1epeFf2KfGfiVRLf6jZ6arH5o1be6jtnpjn9KyclcqNObPBVmjiU+Yec4PP8A\nIVavoorW3gZHGJBkE44+vpX2Nof/AAT/ANDRoZtS1W8unT5Xj3qi57sMD3P6V6z4d/ZP8CaL\nDHGdFju41wTJcSFi2BjOM1PtEjdUXuz8249P1C8uNlpYT3hdtqeWhO76ECu50T9nT4k6/bi5\ntvDs0ELdPOIVj7YNfptpPw+0LTbaO3hsLWG3i/1aJEAV/Gt6Ozs412CLcB6k1Pti/Yn5yaD+\nxP441i6jN/dWtnFs3MqguV6cfWvUfDn/AAT20i38t9V1q91Bxy8UeIgx9PXivsyR4412hflP\nYVFJe7WG3hvWodRyKVNI8F0T9ifwDpe15NJWdwMASylhn1JJNeh6F8CfCGh2sUMek2i+Wdwx\nAp5H1612El+NxPWqX9oySN2xnmoLUUWIvDOk2rCSK2RWUbfugAj0xVmOG0t1xDBGG/vbRms5\nr5gxwcD2qFbx+pagteRrecqsNoA5zUv2sqnDbV9qwlvGbI/WhrhujH5fanYZtNdeYpG8nPXJ\nqB71F+UnOPXmslr0M2F4NUprgs/UmlYRty345IP5VUmvjgsCR+NYUl4VkKk8U2a8yuAeKW4z\nWm1YhRl+1U21oxgk81kSzb1wT0qnLMfLIpgzZm1sup9PrWbNq7zqVEmBWaX3KQDVZsr0q7EG\nhPMZo9rPmqzzfLtzxVaR+2cVC0hVDg07CuW49QKNtPSo59SwC23dVLcqrnrULtt4PNOwjSi1\nIzRnHyimNeEDGeO9ZnmbeowKillO0haVhXNUXSluRQ+qCJipJKelYslwRjnHFQyXj4PFAcxt\n/wBqLIpIbJ75qOW68zoa59pmxkGpFuyoCmnYd2aovjG+euKvR3ySqOcGuc+0bsr696Rrho+/\n0pW1EdDdXyxpkdvSqj6su7OcGsQXTFipbK9aTzBJ8w4FFhm7NdedCeeMVWtroxxbSxU5rPjk\nCjO6kkb5fvZosBqNfr1B574qP7cJecmswZKnFOViq+lMC5LeFu9C3RUHJqmrdd3SoGuOcdaL\nCNCS43ctTftO37tZz3AdSegFNa56ADFFgL73Ug75FJNcF0wDis2S4Kr1phu84waAuaQYk+tS\nTPtUetZK3LF+TxUjXBL8cigVy+Zj0PSofOCsMnvzVYXBU8HPsaZI4bBHXrTC5ofaAWznjNPM\nyq3J61l+aeo6USK7x5BpDNLzsxsQenWka4PAzwRWWu5eCetPWQds1IrF5WPPPNMSbLZxVXcd\n2d+BQs3l5xVAaEbbmyacMb+lV4btdo3DFWFmVlytAAwHO6gcYO3I9aWXlQc9aaWUYBOOKB3H\nbTv3Z4qMruJIGKTgqSp4pytvUgcCgYjYOOcAUZyvtTM7j7VJIOABwKkARivHUU9JGVNoHze9\nRh9vGKf5hZwRigCdZn8sdqrzO28FjkNT92cZPFQySLk96QhsiYGOhpf4cE80fakkOzHOOtRN\nKyMSaAJ4YR1JqTy0PtUEM29SaVboDIbp6UDLCqFximytjqOKqtddW5A9Kb9o+Y7mzmkFy4WG\n0gr9BTvNU4JADelZhkbcPn3DPSgybcgdKQaml5it7U9cR5YHctZazHaMGpBLIYyA3FUgNSHa\n3z9KerK4bHNYvmvz82Klt7ho+SaLAaQ2n2pyrGVy54qj9sC8jmkE369qYGjBHEd20k/7xp0Z\nST7xxWe0/OwHC/zo+0GFs9vSgRrpbxlgMkipPJRVYdDmsj+0n8rC8HOc1NHeSdXbrS6jNLyY\nWXAPI6mhreP7u4/Wsxr5sHbyBUqagzADgEimIu/Yo15DE+tJJbx7geQuKz/tz884FObUZJWA\nJ+UVIF1rdGU4PPrVaVSBt60v2pmXA4NVGuHZ+tUMkhXywRjBJqZeMhs1Cz8BifzpQzFfmOKX\nUZKFVs44NDLuXK/lUe054NSbgynaeKYrDgxLdKd5gYZHBqJn6c0m4FsDgetK4akufU8VLuXj\nbVfb83LZFSow9OaRQ7lc45BoZvypnIJFIW+X+dNAG0k5BwKeudwpkWdrEn6UuSQDn5qQE67u\npHGaXcM7t34VEN3IY5HWlyDSGSlTuz0oUFsg9e1RxsdvJp+dvzGmImLHjFPRmz7VVVz1J96g\nkll8zMZ6djQM0kkBk54pwds+lVLeRplDMMGrQk6CnbqIly27BpXbaMU8wliEDhT71ck0WaSE\nOvzD+8vSgRngkLnsaeWLYwcVYjsZUhct0XvmmfI2B0OKkZHuLZINLHIQeRzStheBxS7Rt+9k\nmmNhx1zQH3dKbgRnBGalWB1ZcqVz0piFSRqduZu9K1tKq52nb60/7O6qDg560rajuCjK9M0N\nlqSOGQ5YDAoZWVc5qibsjCjksO+aVX207afu9e9N/i9KXUdyVZiy80ok/Go/4AcUq4Vs9RTA\nmWQr06VILpkPBqvvIwoGfel2hFPOe9ILlnzCzbgeaf8AaPMbngVT+9znFOx054pWHctNcbWw\nO1O+2MwyetU2O1c96ccbeuBinYC6snHJojl5YdutU0f5QSaXzNuRjmlYdzQjm6c+9TRynJ56\n1mRynINTRuRye9TYC4W+bjrUkMhXIJFUgzbsjpTzIvPY0WAuecZGxnNSxz+X1NZqzGNTkc05\nrj93gDmlYq5rrdbVAPNSi425wxwaxre4PRuat/alUYyMUrDuaP2p/wC8QKkivAqjLZrJ/tCO\nPljkU1byMnk4pWC5uSX442ce9OjvirD5s+1YceoxZ4OQKeuqxGTAHPrRYOa25vJfOq5BK+1T\nf2ge7nNc+2oqzAhuKc+pxrhcZajlYXN9NULKFJ5zVmPUNyHnjNc1DfK+SRinLfY5yOtLlHc6\nf7ajL1FC3kOAuMH1rmpL1MZLUn2wNgKxApcojqUmQtnqtHmRZIHBrno9QCqct+AqWG+O3O4f\njSA3lIYdaeIyxxuBrCGrbVAzipBq3fNBZsbCTtx0prqWxt6VTh1NZACzYqb7YqjA5HWpAk2h\nmwaI/LXrmoftYVgcZzQtwCST0pgT8MTg8VJjcox1qKOVSpp6zpt29D60wJtp9MikbO3ikjde\nR5lN8wbcZ5pIBdrZz0prA9xipwTJHx2oweMigCIL3pNx5qVmAyCKbtDDJGKBDcBlPFM9TjGa\nftBTavWjbtWgQxY8YOcGnNuH8ZFLt3YNNkJyMdKAI1u5PP2q2Rirkd5L5eTk/U1TjiEbFh1N\nS5OcE0wLZvsIGKflUi6jGxBIwfcCqfPSoZF+bkcUC0NoXsbAjOcjB96pXGn2Ux3PbRSNjGXQ\nH+lUl9qG68sRQFijffDPwtrULx3WjWRRzltsK8k9+lcDr37JPgDVWxHYyWrMCBJDIQc+9enR\nySno+RUy3k0YAIoUrD5Uz541D9hfQJlK2mr30L9N0jBgR3HT9a4bxB+wzrcNxNJomtwS2eTs\nW84kAzwCR14x+tfYYvuu8HnrUq6lET2HOelae2aM/ZxPz71r9kn4h6TAJYLKLUFwciGTkfh7\n/wBK4e8+DfjbS2kW58Nakmzlj5RIGAT1APYGv1Eju1bHIBxgetPKxyLh0R19wDWyrSIdOKPy\nUubO90y6MNzbTWzY4jmjZT1PPTnp/wDqqxbx/KPm69sgE/hmv1K1DwXoWqc3Wl2s3JI8yPdj\nPXGen4Vyev8A7PngPxBblJtCt4pCMCS1HlsDnOeDyav2zI9knsfnI0e4A7sfTkUx1YMCVKp6\n+tfb+sfsY+FL51a2lvLSRRjcrgq3Ocn9a4zxF+xLdeWp0TV1C5wyXSk59CCOn5VpGuRKifJ0\nshC+nNNZsY717TqX7Jvj2xkkxDa3YXO4pLggDnOCK4bVPgr410kSebod2yqRtkVNytnGMY7c\n1t7SPcx9jPocR522RvSl84NgfyqzeeG9VsRIs9lLA2WB81doyoJIBPU8dKynuEikVCfvZ9M8\nde/+eKpST2M5Qkt0XPN3HYc7RTfv8jgr2qKFjIu4dDzRu3cqMqOpzzVEcrJxlV+bkt0ok+9t\nzjvUUbBQWY8dRmjzSu7uegNAE7ZZRg4yDTlx5Qzyai84rjd09qRnOTzyaOgiyqhl3E5NJ5gK\ngc5zUUczKuMYpVYnJzjFA7jo2+aTI5HSmM20HnqKkCqY+WwzUxo9iKc7vWqEIWYKOfl96bzI\nwz92pJGB2hRnNNb7pH3QKLCHSN5bcDAx271FLjcuXx2qQs21QV5pFUKpbGSD3oAGzGNg+970\n0FWUj8KayhcMDg5zimr8zsOncUAII/mwc7RTt+WCqvy96XDeX1pVVwwTjB5zQAi7P4RjtihQ\ndu1TzS7VIOGwc0LHtySePWqGxMGLqOO319akVti8HJahvu4bkjn2pocE5zhum2pZIbV3HHBp\nORICRlRS5+Zl24PrSeYNuD09aYx6zHkDgVGd23hutLu+YYPGKX+He33KYrCbW46Mg7DrUjqS\nynAyaYriSUFflU8U9m3cLztNAWEVwzZZcDpil9cDntUbFnb09aWEGP5iaBCxx5VgjYPWnMSx\nHOcUw43EgYGe1LvUr0280wF8xmU8+1NLZA3DPsKSPbhgT8ppIwIzkDketAE20H5eoxyD2pEZ\ntwOCQOOaXIkUgHDdT60isDGQxIcdBQA4F2ZnAAIpiuwhyeDmlDbUJIOW4203gKf7yjpQxEsZ\nEuDvz7GkbPXNMjZVX/aIzxTmkZowMYYetAx+7coAGPeiQFWOMEimjK4wfvU5VaNiByT1NVcX\nURnVVA6n0FKFRfmAx9elNdlZvb1FDHEO3+EnIoGG7bxjHNKrN9FpdoZhuxkDim7f3ec4yaBE\n7bTH8v36YGDc7c/WkjXa4YHcO9OXbuOWAHUUCAs3JJwtDTbVBYZXpSD52CbgT3+lPeRVGQmS\nvTNA7iRKTlemecCn7ivypz70i5ZWkUjdjmolkEce4narfzoC5NuKne4Gewpu7O4suOM0wKWw\nGPzYzzUjNwFI3dyapEgVzhlPBpikchh36Cl8zcFxwKeFXdljgEYNMQ1oztyWwaew6qv+sxmh\nim4KDkDoaQyBnZl7CpELkrtUnJ64pVmDMWxyKZAjM2SOeuacBnLH5R0wKY0OOG2t3p38OcZq\nHcI5sA5GKf8AOv3ehGSTRYESbhuyV2g96VueMcY4qONi0YD+veny8MozytSMazDnIyBwBT9o\nwoGAeoqJsMhB+madtjjxhsk8UDH7XKls4FCMVTn+Km7u4b5R2pdpKFg3PZaYxsjEFmJ+c9B6\n0+N25Zxg4xj0pgUkDcN23nNPTqSOB/WmSyONvnwxyPSpA4+8enoKaTuXGMNnGRSSBt3yj5Qe\nT2pk2HFucAYJ70scjKCWOTSu3zAZyKYsm5yCuFplDm2lgTxxmnrI0y9e3FQZ8tMMST2qQKyx\njkHFSJj1QjB6+ozTQxbKkY5pVU7cs2R1Ipqg+Q3OCTx9KYyXcVB459qbHIMENxRgrtUtzilW\nEZGetVFAOViW+bijdt3f3TTG29MHinNnaF6AmgBq7SCGzijHcHPtSSSBsgduDT/K3LkcfSgB\nY95TOOPSntIM4x8vXj+VMjwrZ3dRjFKqb8DOMVXQBd3Qjp1PtT2dtpZPu0xVBBw3y9/el2gL\nkcr6CpEPE26NQRzSjbsORUcjowDrkDpThl1HbmmA6NdzZzj/AGaUsfMyBTJFJbPftTlLEEDJ\n7mmMmikY5ydwFIrDnPzGkjU7Sy9KdGu5y2QAO1IRHJMGVVAxzT/MO4A/d7mk2jox255Bp21N\noUNuOeaQx52M2C2MjvSx7mGwHGBxTZFSRlfI2rxj3oebzFG1drKetA7AhbzMLyf9qnuyeYoK\n554qNVZcE/NnmiYFhnPz5oGSNyxKnjptqJcRqcg7mP5UrMu0fKT6mk3Nt7kDpQMlLOoJA3Do\nRQqBjzwMd6j804HpSFw4C4JNAEsbDdnp2xSswVtxXg8A02OMswI59acuGQjPGeKBD4V3Ntbq\nMmlDjZ09qhV+g5z1p7DaOep5xQKxIZFLHkdOlLGwYZAyccmo2wWyAOBSKfL3c/Ke1AWHtIGx\nnoOhNNbOcK+0nmmCQsxTGF605WDZJHzehpjJsfMu7jjpStIFjLEDOcVByzBmGB60vznOVyM0\niSZWEi7upoEgLYPBqHAVjn5e2KaAy7sHOO9AFzzIsnJw1RKvO4NuFRMymHkZaneZ5YUdWPXH\nSqJY+MkMcDinbuSCdq0xW6Ddg5pXjO4YwfWmBKyBkCklgOadtEOHI3VGjGRWAPzCnlgynJyw\npgJ5irJwDz2o3yM5A+VaZIjDvzTgjPGAThhQSTxgYIbGf1oZtq5546etQGMqw4zx96plUdea\nYgVyRyNrd/enriNcYpjMsz4zhh3pyE+oJpgKQSoXO1vWpVjC9TlvWoeFUk8tThI2wPjjpQA4\nMy5J+b2qxJnajdwOlQBhwzcE9qcuSpHfsaoZMGLKD055zUu0sQFOKhHKhSccZzT40P3QTnNA\niy2Vb5SCO9OjP77HX0qPZtOc5pSrKyMnBpiLQUx5GRzUu0llI9KbGgY5OPxqZSFypqiLk1vJ\ntUbsN71bjYMwIbIxVFFRV9au2YfGPl2/rVoRYh2gZUkL3pwQBSG43UiKVBGflJp27JIPNaWI\nHXAaJ16EEVWk/d87csfSp23SYOeAKhlYrH8pyaB3I9wVSMYNRrIxYruHToaaefmyWNPUiTOw\ngnHeoKBMnh+vtSqGx852jPWodz9iF55qxu56huOKoYuNzFSOMdKjZRkNswPSlRT689RS5ZST\n2oQgDKnKflQzfPuA4x0pjbtwfI+lPbBXcaoTDJKlifoKabgFQoznNIrKo6ZppxuyDtNAE5+U\nEAZ9xTo13KFGai8ltpKv1pVmKYUct3NIQ+P52KvwRUittXaANveoGjO4nqDQi7fl5HrSsMne\nYZwCAKVGwOTjmotqlfM4ODS+WWUHqOtMCQSbsswyM4zSeafJIzzmmBTuAJwvpSn92cY4pgKW\nZio24HYU9cZ+YA803zAuB39aDImcHp1oAXcPQUVJvh9BRQB8SM5kZVVGC5+8aGjZ2A6KD96l\n+0NIdqjbihnVY23g7jXzR6QjbdwdWJbpt7U/YysWA+U9TTUkLxgIgQY+8aSRkOza21weVz+t\nArCltrNvACt0NCsghAA3sDSSXCKp3c+jYqLb96QE/dpDLHn7oyccj+GmMg4BGGYcZqLDMoC8\nNnJ+lHmbowTlnP6Uxk6srXBKnhflNMj4ZtvA5pEbLHPyJ1OOtL9o+QtHhsnHvQAKvy7V+71x\n2pThl3qRnoaGL+ZsBAGPTigRpGM4JGKBDVZtxwc8Urfu1QEZUnk02E7SPlwPXvTiw3Y3bec0\nCHzMsnK8KDTmLHYd3HrUSqG3AtkMetEhEfyjkryDVWAlRhE2WBznj2pI3RWbBJ3dRUNzLJ8m\n1cxdd3vT2Zt+WGD6VIBsPQfdzSyMPvKm8Dv3p0ZZpFwwQ0hHyko3zE9KCghjG5pFfbg8inbV\nWNoxyH6AdqYBuDPnbg5A96cN4iDFPmPJ56UyR3LFF2/dHf2pfLMjFVcjjOBTIpgzMzEqSMKS\nOAacuYRhzhs9RzxSEPdBLGCrnHenceWSOmMZ70xFKqSOVJ4qUDzNqggFfmxQMXzDGiqikjj8\nKVZA2/B4HGKFuFkJVgdx429Me9MjiEY3Z8zB6UbDJuGChuCBx60kjfvACOSOajz5qlv4j27i\npPtG19uzedvU0wFjby168UkUhPAGB1HvTQzTKVwEU9W/pQrIkmwHkcZpCLCs+CelCDj5uV65\nzUMZbBJkwuabuSNfmJ2k8UwZY3mK4Ln7jDAApY1VWKkcHnFIJHyoOMYyKev7wNxgj+KmNDuW\nYLuGPQU1pDjaOTnk0u3dgcKeu6hwrktjYF7eposAeWFkK9CehpMFlzt3KDzTdobJDZ7n29qc\n28EAD5SPWlYZIrH746jgZobaVUDjJ5NNVSykg8L2pP8AWNkcY7GkBIyguI8ZI6ml8tIVIXIG\neveo4HdtzjgdzTwx4YtQA+VmK9S5749KazDd3GR1o8xlYuPvEYzSySkFAoHI5poBV2IZTx8v\namLcKocLkcZpZVTcdi7QPmPv7UxpPMbew255C44FNiFPzMrFckjpSqdzfSmJM21gRx3NNaQK\nwKN8npUajHKwabJOQKVJGVmAbJ/Somk2kvikabcwAXnNMZIcbTk59abI+2MAMM56U1iG3HOM\ncke1QxTRyMcYZ9ucdaYrMm8xz3K/XpTfMXbvK8Zxntj61q+H/CeueKrxY9K0u6uZOSCsbFcD\nuTjivXfBP7HnjbxZcE6g1tpdoeWYksxHqoOM0pSSKjCTPCpbj7OokQjyhx7H296ekkly221h\nkmDHjYhY+wwPavu/wT+w74Z0HbNqbPqbMmySGXG3OTkqB0yMdfevZfC3wR8LeGoktrDQ7SG2\nXkB4VZvrk1g6hvGg+p+cXhz4M+PfE88EVl4bvAkhBVpI9i49ST0Fes+G/wBiLxXrBjk1rUYt\nOtmYhoogS4Pp9MZ/Gvv238PwWyjCJGo4AUY6fSrf2eJf4c++az9ozZUbHzB4W/Yc8G6fHCL9\nZ9ZbliZX2k85x2xjNeu6D8CvCnh9oTa6JaRJEuwAxqTj3969H+0be3y1BJeY561PM2a8iRQs\n/C2mWmPKgWI9PkXH4f59aurY20EgeNQrDjgY49P8+lQveHg44xULXGTnOKgLJGjLMBjI4H+c\n1Wmumjz3GapyXR4JORUDXBz97rTsVdmi1wN2RwD1pPtRBPoao+ePXFQtdbSRmmBflm9yDVea\nYdCcVTlvAfmz0qnNebhkmgRoySIq7i3FVhMrZA+XvWa1182W+7TJLpShGcg1QmWpbsxq3HJN\nNF5uUE8Vky34Ztp6dqaLkButFguajahuBKkg/wA6b9uZ+Ky5LjY3qKqzXzLgA8U7BzGzJcHa\ncEg+tU5Lx92c4NUFvWC5ao5rrI4HNPlFzF9rgtkniq810FUgtWcb0sxTOPaopJ1HBNVyi5jQ\nku8x9fpVWS5Jxz25qjNNv4BwBUTTbV5PfrRyk8xfSYKCO5pqyFY8k55rOkvNsbEDcRUS33mr\nkfdxV8qC5oS3Ss2QKryzH8KrrNkYzUMkxbODxS5SblrzgF56VF5wbqKrM/7v15pGlULTsK5P\nJMAoB5qNnXdntiq8jbvpTT833TS8haksmOjCovKaQEAfnQSx6nikaYqPlOKLIpFWTHO3tUe7\nGCTSyH5sg/Wod4L/ADdKAH7t7daVn25GOKhb5ZAQeKXd82e1Ax0cm7OOtSrkDP6VArqudvWl\n87b945qbCLHG33pqjK5JzURm3Dg0xZsbhg0WAtrIFHLUw3IdSAOaoNMsgLA1HvLH5TiqAvtc\nKpBLc+lQmbdnAwPWq0PL5+8BTztZvl4HvRcoSSQ9ulBcMoOabuG7aMcUbh6VLEIF+bjpT1K7\nj8lR5K9+KUfMRzxQIVpPUcUbscd+1K/fIxSBKLDJV6cj8qYw4JzQrnBFCpwc0CGx/N04FXLe\nHzeKgiRVdNx+TPIre0xbNbqHzcPbBwXAPOPSi1xkcmhrJabi+3PfFZ91pws0yZQTivRGm8Ix\n2c1yk8n2oEiOzf7vTr0qOX4jadY6e0Ol6BZW8zja07qH3DGCceta8qRHM7nm3l7eDyf8809R\nuyMY4prsGkG0bV+lLHKDuBIrItgWZFAI4pXd+NpwKZJMsjYJIHYUwcNnqMetAFv7QWwtSYEw\nxnms0Ha5wcDrVq3uFC/1oC5dTGOlI3ypxxUXnbSDniklbau7dxQA4sFXJ45pwYlc9s1VmbzF\nqRWO0D0qRXLDtgcGmbguCp571C3zck4pocdc8Uii3JMixkkVSklUsWXpTZJi6kADFV2HtzQI\nmST5tx4xQZu+arrjPHSneWdxIPHvQMk8zHIJqL5iwbNKPvU5cbfU0AP3lutRMrcjnmnhty/L\n2pzKduRSAaq7SMjBp20hjuoZSxBbNEitJxjA9aLDuHtn5afuO3appix+XgFfxqVVGetMQKDs\nAY5pVbb15zSGTawULnNJz5hyce1AuopyOnWnI53AnrTNpaTg0oY9xzmgYvmHOT61IrFlOcE0\n1lLZx060GErgnvQBJHnaMnmpEYSHBPSo1jPWlVQc8YNAEm7cpPahWUjnik6LR5ZK9qAHNhVB\nzxUm7KfKKZjau0DIpy/e9BQMeszMpHelbIXt0psfzMTnAxSfN25qQGxuzfe5FT7iVGTxTRHu\n7YoVRuwo/OgCRpeoByKI2GMj8aYykdAMUi0Bcmzub2pY9vQ1CuVyOpp27A5PNICwqjcAKenG\nT1FQM5DKAc+tPyV6dKfQCfcD25pm3K4zzQ5wqkGk3LtOeDQMXZ8vWhW5GRzSKwRcdT1oVj2H\nXrTFsS/e4GTmlZaZHlcDp9KkOfxpdRjht288Go9xxgn5aRuTikVeik5FAEgYbcdajC7myKXB\nXJxgUmTH0HWhASqfLTAqRJCFzjOKh39AOlP3H7oNUMl8wyJyauQalNDb+Urssf8AdzVBcqMY\n5oDHOM0AaP8AaEskPlB8L6VCZcj3qoch8il3HdSsBbjmycEGpfMGRxxVOOQ7s0ryMef1pAXY\n5C3A69ia6i78YR3mlW9k+nxRrHj94vUkdT+Nces3y4zinNI74z0poR29v4k0WGEh9La5cjlm\nfH6VHc67plwoEVqbcdPvZrj1bb34okkJVfTNMDae/iZm2/lUD3aMv3eMZrKDnBxwTT1lC8Ut\nRam6t1YSWqhlYyY6r2rPk2ecPLyM+tVlYSDHSpMndwe2KAOj0vTbFtFlvru+SN0YqsPc/hUl\nvDpczqPtAUt/eXj865VmPmluueBSspVcetUB2yeHLC4m3RahHj+6pBplx4bikLKl1CpAzyw5\nri8eWODgd6PNdV+V/l/WnYDX/s9/NEaDec44rYj8J3t3aieOICPOOtcvHfSxqpDlT61dj169\nSEIl1IqjnAbilqBrTeGJ44y7qwUd8cVm3Fi0bYAyfQUN4m1CSPypbl5Iz/CTxTI9YkX5WAYU\n+gCeQVzkfX2qPa69eAasR6z5XGzcO+atTeJvOt/J+xxgY+/3FK7KsUY1Kj0qTnbwc0keoQ4+\nZM+lJHdRPJyMA0gF3OvPIpfmPOcelSiSJuhzinb48Y60tRkPmMRwOaTcy4FWPLRud2D2qvN+\n7XrTAczE89KjZ8YqMOT1NStkqOARUsQm716Ghm+WlZScdxTlVQuTSLGIo2k0qqVYk8fSnKit\nnHShl42k0EsdnjIzSLuDZzzTlToc8U4kenegpDtzL3JpBMfU03J554zS7lVfU0AK0zqeMkVO\ntw+3HeoE44NOUepyamwFjzG3BicEihrhgMg81CSN/oAKTFTYdydroleTzTvtuyMc5NViAc1H\ntOMnpSHc0Ir4Be9XrW+LDl+KwGbgelWEkLbccYosB08MnncjoKJZmVeW+WsezvGibH3lq9JJ\n5y4PA68VPKMvwXJdQe1TPIcdcVRDBUXBxx0pWk+YDdxRYC6Lk7hkVKtx83tWerBs55qSM/KB\n3NFhmqt0oYYNS/ajnI/GsofKRg5p7M3rRYk0/tXzZxxSC6U9qzVmKd80LNuz2+tFgNdWVvm6\nU1mVQcVQFzxjNPFwWXtS1GWlZj7ClWq6zYxT45xna3BoAmzQelM81eAeDTi/PI59jQITnb1o\nGSDQv3TTlb5fSnYLEcjMmAq0/hlG4c07cVOexpWO7oKBjVUduKGf5s04AjjHFJj1ot1DoJvD\n+9G1MHAGaTA5xSqM4oYhGjViDkqaYsLxszLI1SlDtJHNOGVXOGx9KAFjvJ1wpGQKlTUmBwyY\nqDcNvvQZA3BxTXYDRj1SP+I4H1p4ukPKsMnisnCEY24pTGONvFAG0uzaWba2T0wKJoYrmMrI\nu4MMFe1YbLKudrHH1psmpXFrECe3ei4EmseCdC1a0kt73TbWeGQYKyRD/Oa4PXP2ZvAHiGJo\n5NEhiB6NGdrD8c13drrD3A3yjeKvw3i7gc7QfzpqVgcebc+cta/YV8MSwyGyv72FjwqhlYD0\n49BXm2v/ALCfiCxiMml61b3kin7si7CR6dTX28t8pYqpJ/GnLKW6mtY1pmTpxsfmxrv7Mnj/\nAEdXkl0Xz4R1eGTcD7CuD1HwVrultsvdIvLMn5gJIucV+su5ZIwMAL6VX1DRLDUgi3VtDNGp\nzh1zmtViO5hKgj8j5ElgYLKMYOD8p6U1ZB1HzAd8V+o3iT4JeEfFquLzSbchupVACeMdfavL\nvEX7E/hS/Vjp93eaZOcANGwI4HoeK2jXTMZUWtj4MEasowxyTmpOeRleOOTX0t4s/Yi1/TY9\n+katBevub93INrH0wcmvOtW/Zp8fabGSdIWfgsGikB6HHIrVVImfs3c8uTuDxjvSxk9Ccq3e\nr2ueGdW0ORodQsLiylH8EqEEj1rM84eUOMkcYXmqUkxcpIZGiBPANRrM7R7ZDnJp0yOVGUI4\n5yMH8qqRzHccDeAa1smiXGxb3DjnBHvUqyBgccjqarnYvznLN+lHmjtxSJJmkjXqOT0ppbC5\nHDU3d1ONy9s1GshVt2PzpXAmSU5wTil+6vPX1zUW/PBGecg05nJzx9adwHP+5HTdQpDnIHy+\nlNjYsMlSeDTY3IYdfpQBKMr82cr6VEZA2SRyT6VJwGLHp6U1sn2oAXd5ZHy/Sm7myT1FLI/y\noc5xwaTzCufl4PrV6AGxmw4YCnxtkkA7jUMeWfGMVImFOFpbAOaVeCTmhSeSp98U2ZUZeDg5\n5FNVdp3ryTwKBi7mIJzg+lKuWU9cU1mKt83yihfu4U8MetBLHKoKBs7jn7velkO2Tg5XHSmB\ncN/dx+tOCrkcYpiARp5WRkEHNPVRLGGVuemelNZtykD5h6U3zD5ag8gd+lGoEm3y+S2T0BoZ\nsKT97jmgMwULjI6io3YqMgd6BEo+6u0/N15pVUs2See5pv3WDZ4xxRGxbjGDSAc2wLuHHvTo\n2ZzkjGRio2+9tIz3+tK3zEEttOelAx7csGByAMfjTF3vnB496TmNiFAo3FYjlufSqEOl4TH3\nSOc05ZAVDnn1qNcNENzfNT/usMn5fT3piAMgPQ5o4BUZypNO2bVYsPnH8NRu6bQAOc5pj6Ek\nI3MTnB9BT93mK3HbpUQBG2QDO09qRpCNxBwzH7tNIkkjwH3HrjFLHNufB6dKZGuwBjz61JL+\n9RVj4Ock0rCHcrIx6DoaTjK8Zx0qJ3dVCsQcnFHDMSvHbntTsMk+ZXPO5aTcxUbqbkKAu7mk\nXBPdjmgLIlhweoxz2p7fM2FHIHeoizQ7sdM04MWcFumO1MB6sPK3jk5waRmO3AAGab5ZYlei\n9sGg/NGFHJB+9UgPOS4AboOlHDfMeR0psPzMTnOM1Ifu5BHX9aGITy/LjJxlqVmPlhWypxmo\n2ZmYc9OtSSONq5Oc0g1EbEgAxwBkYpwk2qDty3SmRyKrHceOgpxJOGzgg0xj2ZmIA6dxTl2u\npBAIB4NRBwr8H5vWhWG87evegY9imw4GKFxuypNI2CwwOM0ryLwdvPtQAqnYxJ574oyJFOBt\nGc0hYFgzDANObC4C8DuaoQ1hwM/n6UIy7SwbABpTnGU+f3qNmCKH2fMe1Axx/eOMnAI609VM\nWB29TUUh8wxvjjOKkx95i+U6BaEJimTcxzginRsnO4H1qPdGMbRk0STCSMFRjnmkIlBXyw2e\nM8fSmxsXc+nak3rsKkcdqSOQbDg9PWmMk8s5YnOaduC/NjmoxI2AT+BpGm3OUCnpktVIRJuw\nhbPPoaVpf3at05qBssdq8HHJNPRlUBMbsck0hki7TG3Q96ljcGMAcE81XbaOegJ7VIoXd8wz\nTJAMoUj+LNOCmopFXyyUBJp65kVQCQaYaj42VeCKXaORnAPOahA2Z3fezintIVUlh8vpSKJV\ndWXgbQKWOTdgMucGo+GVHX7pp6ttySBjpSEwYFWwPXNTMxjT3xg1FywK/rSvGDGBnvVFWHxs\nePl+lN+6u4H6ijzBEMMOD0NN25BdMdelILEqvhNrLnP6UeYuOF6cCmMrtt54zTpGRXI6mmJD\nocCFwy4bqPamF98i5yAOtIrbZiCC2R96nBmJIC7sfxdqRQ9VZlZlznrxTWB3dyc0kcjFsfdp\nsziHIByWOSaARMOpbqMU3czdWx3pFUHvTZm3MvGSD0FLURJ1XGQD1pwZWj6/jTBh0ORtOenp\nQkaJuJfqMAe9AE/nMsnGGytM3DqGAx2qKNjH14IFHmGVuQPbFMRPG7HBJAxxT4d0zHdyQMiq\no3LNk8oB0qVZgp+919KAuPjcsPTnmkdQ3BPzZyPeo87PbJo8zZICcbfUUAPkX5iRxxScKvzH\n5j1oPyg8gmjzAGHAoAfIxWNeeCetStIFXk544qu0mSc/MvYU5MbCeo7CmSxyR+Y+7IJxTId2\n7PfnOaa8isvAKsPSkx935sbuq+tFgJ8M2Rt2t0AprR5YBTnHU+9HmYO7otKq7Y9+7C5p6iCM\n+ZuO3G2nuS6xiNtrscUkjCKMYPU07cPv7fYUxDkkMO7aDkjaacmducbjTGdlI6Yx0pA6qvJy\n/pVIESElinOCDzTpJBk/0qCMlm2lSe+6pIWGQo555zRsNku5kUbj8tSIwC9cKe9R7cyMpOBU\niA5YHlMUyBPl24+9z96nrFzkjj2ps2A8YH3cdBSsxYFd209qYw3IuQDz2qSNS6iMthqj8soV\nOBU6/MpbbQAipuZNzcjrinyY3FuTk4+lLGq7fTAxQ37yMLjAHekihVzhh9/IwBVmObavK7Wx\ng1UVWK7gePSrCnLDvxyKsRKrNt3Ecdak2MTvBytNjbzUbPCnjFPjxtKqSRnmgklWRfLCrySe\ntTiQrjAqtHGGjO3GQat2rLJHu28r2PWqEyeMK6k5+Yc4q1B87A4wMVV2OdrKNvPIq3AMA54y\na0iZlyMEL0yM1Iuw5A4aq6sxYL6c1NDmSY4rUBFyoYHjPSonLxowOOlWGj5JxTJoTIATiiwi\ngsgUY281GVDDKjBzzU11GOCMZWoOR0JwahjEZh5ZzjOalLbUTA470xlj2gueaa0iyLtHFCGS\nmTKkjg0n3uGIHvUSgM4XGF7mnbVySxPtSC2pMkO3HP4Us3TYajjZtw4pF3FvlO5s96YpA6D5\nQsmz1pArMgBYHmiaMuxGOKkWP5fm+aqQDHkdeAtPjkCruI56Ujpuxt49KBGyqewPOaAJI8M3\nzHFICQzB+AOM+tN4VVZm6H86c0h3YY8UAP8A3a7Sxwqj8KfCxddwOPaoGkHmrGfmB7U/7r/L\n9M9h70wJGyrEMMnGRTYZPMB3cYpSdu3D7iP1puDy5XaT/DUgPgjZslhSKVZ8EfjQzMo3A474\npvCqc9TziqsBL8npRUGX/uUUWA+LEcqrjPI4DYqZrkycmPay8L/te9NmPlxYyvz4OPakmkUs\nmCoH8q+ZPSHNhncFvLZeTRiPae644qLzg0jH5WB/i9KcNy7g6jLHhs0w1G5MyKerL+gqNx5y\ngOe+cD0pzIVkOw5HRsc0Rv0Xau0nG6kAKg3FkO1R6npREwY7Vzs/vGlVdqsiOAueG96jVDCM\nPkqepXvQMm3EZbGVPG70pi/LMFGB7+tOMiKq5bAz0FRed8jv5eSrfnQA9yN7AA8nA5pSxjkE\natkng+lJHubLnhiOBTS23apX5hzmmBKzencUny7V3pnB70zzDJiMHB6g048Rrubcc8+opEsU\no/O3pmhZHQklNwH8VNCljy29B0NK0bxwjP3WbigEOWZVjLAcelO3AYj68ZJ9KSNVXcQd3+zQ\n7Fo9xA9DiiwDo9rRlw2WX+H1FKzZUdj2pgUBg6nIX5SPen7WTcCFbA3H2oKGr88gxxmn7yGA\nA284IqKNg0mVB3Z5+lDTDduAwh4H1oETq24tjg9BmljyZmDYZeuOwNRQgFXYsRTI38lWBbKt\n09aBE235ix3LjogqWNPLUFxl2IOM9qjSfeyRuu0r19xTmkbzH4G1etNFEsi7rh+QU9qQKu4f\nMR6io4cR+aucqxyBTmd/3bfgKQh7Sbec98CiXowzkEZ96jZtzgsu4HqRQ8YEeXOGB7fpSGKZ\nGaNlwd68Cn7n4DrklucVCsbbWYnAanBiuO2CKYiZvlXCjHNOYiQcjFRxf6x2dtuemadbtsVt\n43imG5JIQ0ilDwBzT0YrJ1yG7VDHIjIdgPXvT1YhSqjBz8p9TSDqTxuVXzCOvApjXBRgAny5\n5pojKFctn1HXFISOWByoP3qoB8mMYPVj2pAknlj1U9PalaVljGOeadHlZGBfJYcmgYMxEkZR\nSQTyKFb5icfN1IzQ7FhgtwOmKj24YYznAGKBdSYSFMoB8rjnNOj2rHtbkgdqiWYq2OCehFO+\n9k9+uBUj0JfOCptC847jikjkXaDnOBUe/ZmQN7nNRCbdyItrNzVICUNzuYkj0psruykgjA6+\nwqNm4+f5W7Co/OVnIRd7L156j0oY1qMeYqW+cjI4B70+3y2VxjHr1rpvCvw/1zxldLDp+lzS\nhsbpthKKucH5vUZr6I8P/sR6hqSwTajemEkAhY1wcdPm9qzbSNFTbPlNrhA2xjtPocnHvxXS\n+GPhn4p8bTRJpWlzMGGVldSqt6ckdK+8PBn7J/g/wrPFOljHdXK9XuFEg9+DxXrmmeGdP0mN\nYbayhiQcfIgGRWLqdjaNE+H/AAZ+xL4k1q5S41u/h0+3zh4YwS54r3vwN+x74J8LtC81iuou\nASd5PUn71e+pbqVG1doUYFTW8YDBWXIrPnZsqaRjaP4H03SjF9ls4YhGNo2xhR+OOtdHb2Cx\ng7tvzcnAxT2mVV44qGS66HdSHaxZDKmcKMfSka62c4GOnArP+1bm5ao5Lj5sZ4qbFXLjXm7P\naoWuPlzVF7jy+vNVp7wj5Qc5osPU02uAF64qu1x154qlJcFh0qu03y4PSrRLNH7VujzniovO\n3LWabgjAxhaZJefLzxiiwrmg0nHzfhVO4uyJV9KrPe7o+OWqvcTmTB7UWYcxekvGboahkutn\nJJNZ8t15Y461Vkui3U1XKPmNSS+BU4FU5r4yr6VQlu/3PB5zUMlzu56UcouYvvcEry1V3uwF\n+9We10Tz271A90q8VSiTzF9rjkZ6Uxpt2SDiqHn7icdKa0p+lVyiuWZLt1xzTJLzOABzUG7c\nozTG4c5HFWloLmJJr5tvPNRi6PJX9ahwHzzikkwkfvQK4yaIyOXyQ3sacrHbhjk+tIpIGMc0\njHHWgVwaQdDk0hkDYHpzimIR1JGajMh3EgZp9CbsfcTIVJAxVSJQuQOh5qSTDL/tVAyntUsa\nJN4VeDg0vmhuc/Wq7SDp1NMUbVz271I7F1WDBj0Wq7Hd0HShpMJtxg1AzBW5NVcLCSSHd1xS\neZ8uD1pkwXcDnikLA4/nSGTeZt45qKS42ggdaj3fMfm6dqheQMSw6UWGh3mfNihz6c4qJstg\njpRuLMFHWkMkU+oxS7sVFlmIXNLyvXpQKwrZVeKjkVux4pTIOM8io2bcfvcUAG45FIuWZm6r\nUm0A8DIqPd8hXG1s0h2GqMMc8L7Uisc5AxmhQ3IAO3FRSFlkAHFMGWYJArHPQ0zzAzGhYzt5\n5zQsJXOOnvU3AF+YFu9JkKeWpVi4680iKGcjHOKaJHHHljPU01flbg0+SHKgDr7UqwhSM0Ma\nQjYY8mmNJtYgcVO8K7sg8UeVubgCmMhWQ9OlSfMwPPUUsaoJeQcU9mEfIANSxlUM23Bqddy4\n2HaKThunGaOQuCfxoESBi2csRnrUjTBsY69qgVh2GT705SvOTg07skbMrbxnvxSND5Z4anN8\nxBznFRtJhju5FIYfxAZ609kwuMfN0qldMd6EHAzmrG4yfMeBU9RhJGeB0FOjgbO0Hik3Fgcj\ngULIcg9hVAW/m3Be1LcKGjKluagV++etKykHpk0APjH7sDuOlKzttBWotp3ZB4FDOOvSpYxZ\nC3CnimSHLDa1Isgb71J8o7VIEi4PHenA44qLb0IanpyaYCbF3ccCnKFdcg0xsbs8kUvG0tnC\n0AOWPrzzTvlVeOtNjxjczYz0pJExgA80APRgpOFpV+ZfehMnik25brzQBLuxgN0NNVgygdKZ\nkt90fnS8cD0oHclOduTTBtb60GQ4NIeFBxzQIkXqCeCKbJlcsRTPMO8qRUjPuIB6YoAFbAyB\nzilRsLlhSfw+9Jz1J/CkBIrBWz0pxLH5j0qGT7oxzQJN3fimBOsnBKjmjcdvTmoVm+bp+VKr\nNtYZyM8UDJEdl4apOM1Bu3ck1JvbouDSETRtt75FLu3MSRxUIbEmDyKVSW74FMZNjgmjKrzn\ntTWcbcZ5FN3BeetIRYVtvbk0NNhdoxmq6yFhweKXcAPWmMm+706UgbDcCm7iFwaI2xzSCxIG\n6kkg01W65ppO6jtx0oESKxHA5p6vhSCck1GrZXg0ig8g8HtQMsbjsCnvTkwxJJ4HaoGfaoPe\nmyS9Nv40FFjcNwx1p6O2cfnVXzhuHrTg20E5piZdV/lPPNOdiY896rKu5d2cGnKzLnJ4qeok\nTKwfrUfmHzQQM1C27seKeuGbngYpsosFgw+9g+lIr7mx2qKNlDHFEj478UITJeduc/LSqw3D\nsagaYcAc0hyz5zTBF7e2ORSKw3YPBqAStuBPTGKFY8kHNAydflJBOaUEBycVGS7Dd1pu49+K\nlgTmZcAdyadv3ZGflqqMFsjrTt20nrikBa8zcu3gVIrNtAqqpDLxUm/avXNMRYOCDSM3yrzU\nSttHXIokw2CD+FAyQtubg08sO4qBflp4bHGc0XAmXAGelSbsEc8HvVXzN+VzS7jtpk9S28m7\nGO1OaTJHOaoxyHby1SBgV44OaY9ywMHOfyoC/u+RioWcMcA4al3NjBNAWHMx6ZzTg3y470zz\nA2ABz605/kGRzQBLuzggfnSsd3IGDUatt4PLdaXzSy5xQAuTzkYp6n5etIrBgO9O4xtHBoGM\n3FmODUyuduMc1GAp6U4ZxknmlzATK23nOPapBNuzjiqgk6cU4M3mYHSlcZZ+0MqjJpskm5eu\ne9QlgTg0jN1AWgRNGxb5jwal3Fu/FVEyVPanKxKgE4pFF5ZRuCr1xT9x24qnCec96fu2nG7I\noAsh+2eaflR1NUhJ83y/rTg7bs44oAubwCBijdxlarfaCBjH40m8rtwetIC1uHpQuCMnqO1Q\nhj1zzT1PzA9fWmMn7Dil24GV61GzA4+apEbgEUCH9V5HNNyc9KFbbzkUu7f3xSAZtPanNwuD\n1pxIXHem7Szc0rDGNzH0p0eWUYOKF54pdm3Ax+NICaM7SMHmp1u9vHXNUHbbkA9KXcGbIOKA\nNUXQVQSaetyOpIJrIJbPqKkVtvOcn3pWGjejuguFxUouF+lc/wDaH7HmpI7p/L5PNIZtLebe\nCKke/Xb71hNdFvmHApyzFj14pkm39qXqDk0R3S9GasZZvfFKzGiwzdW6jCnOBSLeJ2Y1gZZm\n3HpUiyEjGeKkDpobnZ82VP1rT0W7sDNIb2MSbhgMOcVxYuGGcNipI7yWE7lbb9Kdh9D1EaPp\nFwqhCEOMnY+D+NQXmg6erIsdy0JPRS24V5//AGxLgAPTv7YfZkjL1NhI6+bSnhjMizJJH7Hm\ns9i6qxI47VhLq0m0YPynjb2qb+0MDDNxTK2NNJi0fPXPFOW4wxHWqEd0jfd60sbBSTu570hm\noJFZMZ5pu7PI5qiswXGD+FSrMedvSgks+Zt4PNPUhsgiqisW+bpVqNWK/dJPsM0wHKxXoauR\n6o0MYQxqy5yc1TMMifeikH1Q01uMfKyn/aGDTA0YLy1aR3ltgQRwoqG48iQLsTyznOF7VU3c\n4zxTmYD+Kl1AsPBDuXZISD2NRmP72G6VCsgVgetKsm5z2FMY7ayL1yKikjEgIYZBqUDdQOpp\nCI44xGuMAAdKeqqT6ChmO0ZGKeoxxjigCT7MqjdG5z71GzXMfP3lpzNx6ULKV6dKOoCLfMAA\nwxVmPUV+6wxUG5ZDytJJbIx3A7TSA04r1Nv3sD61Os4ZCd+fxrmpYGXBzn6ULfSpxmgnqdCc\nvh8gDvT2WKRGXaAf7wFc+urTJ3xVix1R5tyyc89aabBpMr6n4F0HVtxudMtbhmbcfMiDc/Xr\nXAa/+y54J16SaUaZHaXDnd5sIC984xXrEMwYcYNPE+3ILBWPYmqU2Tyo+TPFH7FFvl30XU5o\ncnKpIm8cnOPWvLfEX7HXjXS5Z2sRbXq7S6BTtZsDOOehwD/kGv0GEmchumOtCqkm71Pv9P8A\nCt413FkSpKR+V8nwn8Y6bvjuvDOoRleWxHuxgEnOOgwOvpz0rl7mHybghjtIGMHjnuK/XCXR\n7S6heMxRqjqQV2jB9q4Pxd+zv4M8YwEXmj20MrYzNbqI3HOcAjFb/WEzH6ufmMu9lAUZGeT1\nxQ21hh856DFfbXiz9iPSpLZzoN/c2l3gskkxDx9fukdfxz+deF+Jv2RfHWhzOI4IbxW6GCUY\nzzjOe3T/AOtVqomYSpuJ4xErNlTwV9aegAmBBz6g102ofCzxZo7SLd6DdAxhslUJ6cH9ePxF\ncm7NHk7SeegBzW0Wu5k4u5a8wKWwcE+lRhjvViMGo1VpAWjyxHDAdqTzNrbidzdAK0JJx87N\nluOtSmQMwGO1V9xUjOMt1FOeYblA4peQA2WfDDgc5FOjUySZUZHvxSMxCsc5yeAKD+8iCj15\nxTAdjLNg8D0pm1ehHBp+0BuHwMdKSNkbO7JA6UrskZymVpTlmXBHy9hSMcvg8n0oj+8xIwcU\nFIC3zAkdaVpNvy4+XpTdxZQPSnsplUq3DLyMVRLI8AsRuOB0NPVtqkEZHrRFIpO1lO3tSMSM\ntkAUASfwgdB1PvTZHBycZ4pJCJFVs4OKFXfEQD81MYK3K87RjFC5G5T1pshLgMwxinsu4Aqc\n5pkiBv4TwcdaVWYsC3THaiRQ7bS2PWiNfnAT7o70rDJCxB24yMZqPazLnOBnpTtx65yCe1Ir\nDJJzjpigBQw3cpgnvSqipu385pskjOwXjPahQzMR17/SqAUKp+VuCDkUkjbvlByRzTgAzYzn\nihUVuQe/WgQg3tGHZ85/Og/wqAKBwrA8EHikU7jh+cUATo67sEfjTVZVY/32OBUSsyttAwM5\nzTmk3N2yO9ArEiNuba3VeM0xf9Y2z7h4py4DYLdRzSqd7KoXAWgLDY0EchDjIxwT60is+0s3\nAJxTo9zeYm7OeeaauZdqkfKo6+pphYkRRxxk0uwI/DEZ6YqNs7lPQd6lZirME6dKLEsZ5jmM\nAr3609JMN0yvvSbicAE4HWkUh2wvzAHFMaHllViUyff0p0chEfq2eaWPEOdxGT2psbbd5J3r\njp3FIbH7iD0wKTy++cLRC37vI4B9aEaNlbd0PamxDedwIdQO49aFkLMwI+UHqO1Mt40Gcjd/\ntU5n2t8owrcZpWEPRFbnjHXmnlSR8p4JqONRg4bcemajXdsYFsEGmGpO0RVtxakjXcvoag8z\neuDnA71ZVlZc84HpTAIv9bnHI6inKwXOBuqLdvkLhgpx0pI8orHqBSGickHG7oe3pTQPmx15\n60wfMu7GSOopWZvLAXnJz+FMbHr8mQDTll28Ypi7UXax+c8inMdowSKBjHZTwuRzSptjQqfm\nbNLtLLgMAetHKksq7yaBDExGxXv61Nu+XJ5x2qKPDMcjmnFWfGSEoAe2fQAntSRqrKQeh4pu\n0eYXY9uKbycMG+XPSgLEuAq/L0HFNJZojjnucU7aNuQQVNJyvyrwKAFXHHyc4o4yAFwMc0wl\ngy+v9Ke7bExt4NUAu5IWUkb+eBUrAsxZelV1YL/Dn09qd522MsB7YoJJIWK7kTnPrRGrFSW4\nx6U2Ntq9eTT2OOAOvUUxjt+3bke+aGbfkdaRmVeSCR6ULOJMnGMDp7UgFOVVIweOtOaP5gM5\nPXFN3xsysDkGnbhHNu7jpSAmaTcu0jp1qGNhtYKeO1P3FssBTBhgSq7QO1ADyp3IDyMd6VZA\nuf7oPShcquSMntTPLKkliDu9KYiQSYjYnPqKRGX5XJwe2ajRW5QnJxxS7mjUKMMe+aYEnzNh\nAcHOcigbi5IbP+zTvO+YfLg03cFYEcZpDEZjk9SD3pylV6jd7UvzFiAOKaYwGweDQBI2EIYE\nH2phYsmd2OelG1FZQPlBONxpqLtZ8gHnj3piuTDLQ56H09aMjjIwcVGygtuX5RjBp24bBnoO\nppDH8NHuAz602PG9fSoRu8xsHAIp8cm5VH92gZLIGWNjnkmmlQmBj5qcDuk+cYFRswbPGTmm\nFiZcyZAAOOeaP3bSKvY80yPCnIOPWlkT+LPHbFAhw2Kx4JHY0qEE5IwKj84rhQOPWnMVHy9T\n1IFMZJ5arGSTjNIyp5eCflpjSlsfLgU5l3YU80E2Gn5SNo+WnbVUnP3qFxg4zxxil4YhiPxp\niY5lDYGMUDgEMM4/hFRNIGlGeq80Ky+c7ZIyMigRIfmIKnPtTmLMo28DPNMTPDKcUqqGY/Nt\nAFAhzMGK8HINPGwzbsYqFm8zAHUdxUu9djYID4zViJZWK8JwDSR7llVmIC9qSPLKjvycfdpx\nx5bAdeuD2oAklYElj+VOjzwO2OlRr8ybicnNSRpIufQ0CRJHGFyRy2OBT+WYZXBxUahuQxxU\nojO3duwBQXawFPM4c4wc1NHINgGCpz+FRlRNgAZNSspMf3cYoFuLwevSnkHb149KZt3YVuKX\ncSCPu4PahFDlbdwB361KFGchsNSLtAyo4701Y9zb+mKskmXgcHj6VKkiryvIzzUca7snHWnx\nfKuCuee1IksxRjzOOB1xVm1xvJ6fWqqSFnUheatL824uu1vatIklqNvMyKsQqJFxuqtbgPJx\n6dqvw+WvBXBPpW0SWOjzuA/WrAjQHnjPpUawkLkHrUoYAbSM1okS30HLtj4A4qJm2qyE54zm\np921lJ+53prR7skkBSaqxGpnzQ7l5NUt5jyoOVB61o3Chm3Zxis2YJHk5wc1FkWhrNuJ3DHp\nS8M4Udcc0gYcgHcO1KjEqWPy1JoPaPyxhDzjNNCllOW5xSKz/f69ttI5+6TwfQUuohzu21H6\n9sUbm2l/ummMCsQZhgZ4pVXjO6qSExfOPG7OBzTzcfdbHB4xUcgG9c9KVVKuTnKdMUCHLI/J\nAwvIpY3Kx9d+ecelNX92jK5yOoIoj3RxsQAd1FhD5k3KvHBqT72ARgDg+9RDeMbueO1SSKd6\nnO3igQKVWRsDk9DQsjJ+8XnHUU3gtn+H1qUOsO51XcpGKLDQseGyxGCaUztMNrLtI496hZyA\nVI/GpZAvy5GeOWpbAAU/KSwY0ryEbI2X5i33qVUWNcDj0pGzuXPzHNUMd839+il8n3opgfEf\nzyKM4wDwfWmOqycMmw5zTiwbKSYC+gprKvmAKc991fMHphtaNXxx3K1Osm7aW5GOhqFfllZc\nd+KTllO75CKQEkyLuXY3luW7CldWVyDt3dtv86SZjhSg3uAMgelJ5m5gw6np2/CmAS7mjG0b\nWB5prKcbSxBPvTmkaZd2zaQSOtNdVXB5JxzSAGzNGUP316Ed6kYuyrErAs3AApkbIyglMEcd\nadCQsi7v4f4qABX+Xc33lG0r9KQMZMdh+VN8v/R2DZZd27jvSqVZfm6DkGqFqAJQksdppfNV\nm3ZCjHDetIuJAM4LE9+1JIiRyDf+AHepFYcwHljbwM805stIpL7lToKZGpbljgMcYIpyxtuI\nxtP8LVRQsOG3EDMmc4o3MofdHz2AqNd5kbYcv0LU8SbcbW9uetLUQuGVVXv1NSLufk8oTz7i\nolbdtU/M2cH6UJtdWjXPUjd6UgJPuuWQ8E8DtTWZ/MQ7MoOKZtCxrgHHQVLHkvuzgAZpjHSM\nzXAwcZH3cU1dvlkNgMD8oPWjkMrMcqx60hx52cZPqaBDnkDOuBlu5obakZ55brQzbW3qOO9A\nkWRgAMZ60hDo5CoIC4f0qTc2xQMZzk81HGSqiPIxS7BGwBO8Z4plEkPzNncRH0GfWlCrIGBY\nkg80xsfdU4QHp705dyr8v8XGfeoESy/Kqng+1C5LBz8val8sAAlG83oVpY1O0juOc0BqNWJW\nbaHyvX2p8alQSh+U8U7yf3ijGOM5p+zawwMgdMdKoNSNvlnCnjjnFJGxVmTf19ac0LNIXDAy\nd1btTtobKnrnsM0mBGIzGpY5HpzzSbUiUIHyx5/GppEIZSRkdfwqJ0EnG3HO78KoB+4+WD3H\nWhZFzyO3NDybY89UY4GKjmYR/f5HSmMss26NTjAppZ1OcDHQVDJIjQ8HBAyBUXnMuM/KMZNI\nC1JsVS6t82Oc0is27zAeAvOageNI1GSx3c//AFvrW54U8I6z4svUi02zkuGJ2Yxx+fQVFzWK\nvoY32hDGQdzAHI2jOauabp99rDr9kgeb1VUJx7e1fUPgP9juKaCC51m8kM5+ZordcKPVcnrj\n1r6G8F/BDQPBwhGn2kaqByrKCzE+po5kjX2Z8b+E/wBmfxJ4kkt7i+tPJtG2l2DAHaT0Gf8A\nPFfR3gf9j/wxoshubu1+0iQgjzD8yYPTAPrXv9holpaR4VAccZzxWgsXp0/OsXUuaRppO5ha\nH4F0nw+ix2Vmtuq5IRenNdJCDHlVyFxjr/n0pI4SvNTLIFXGOaz1NRfs4Zs9KmdgqgBRmoJL\ngBelRvcbm4NKwXYtw7McKQKrx3TqcNjNErhW96rSHJ3ZpAWmkJYls4prTDbiqjSle9V5JS7Z\n60wLDyZYHODQ0x289KptMAPpUMl1u4JwKYi7JLv4zVSRwzZHaoPOO3rioJJu+adhXLbTMo64\nqrNcYHXJqtJdBT1qvNcbhkCqsK5NNet90daia4P8Z4qlcTNuGOnrUfntIvPaqUSepekuAOQa\nge5CrnNVSzMg7VGys2QOavlBsnkuNy5Peo2Y8HNVt0m3B4xTi4LZJPSq5WRzBI3y5FROzY46\nU9lDLUUmQwUdKtRuTzjc5jIYYNQSLkDjNWXjZl45PpTVhOBu4P1quUXMQK21gMECndGJPIqQ\nqGbBPPtUZUK2D0+lPlFzDN31pJJODmpdoLH0qC4QiPoetJoVxhx8ppZF3N2+lCgBME0qqD3P\ntWbKG7tq4NQO24gVakhK8tVadgrBehqRleTKt0prSBVJokc469DjmoTIOc80DsJI+0bh3qNC\nVbk5pHbc3A4qFWLMaRVibGRzSS/Kg9KgMnUZpVnLKQBkds0h6kqZkU8/dqKTG4ZqEzNHkA9a\nGl3IM9e1NASlh3Hy9KbIu3gDg1F5m1TzSecWwCaQCuhWMnGWqNiegHBoMp3ZU5XvSMd2MGgB\nGPynHWgg8betO3bRyKa33sDipGKVD85p2VXvwahUlWIFO8z93yKWoyaRVVQQMjvUaiOTIA5p\nrMnl7lPPpUccxZeBg0CsTqyqvAOBRJtkYMeKZvYLgioy3X1ouPyEm1BLVSQM9qr2chnXfJ0z\n0qC8j3Lk8iiwbdEcdc0CNLd8vFBc9KYrNtHy1Hnkk8UFFhWH3icU3cFbcDmmK233pC+0HPSn\ncmxJ5wX5hnNN3E85qPB28nntSorDrzQMnVgwOBmkMpVfQ0xHb0waGyy+9AxWY5UnpStgsCOa\na3+rAzTgV2e/rSEIGHpihR1H5U3zh0xk0Fj1FIQrErx69aViAQuO1G7ocZpJvu54oEIrDB54\npH4HIpWQMoPQUzAY/epj8yNoxKwPXFSN8vQcUpTCgrRyAMYPekG4p+YCnKAyelMLbecdaUzF\nRjGBQUSxx7eSc1Jt2oSarx4U5I/Wn+ZuyAKY0LGw7A470wfMxzyBTlLdO1OXHJ7UmTcYQG6L\nS/exkYpu4qd38NKxLD0qRjtoUE5waI2wuDjPamH5m54FKANwAoAduIBIHHek+9wBxS7g64HS\nhm2rgKaQDhj04pFXc288Cmbm69FpNxYZzxTAnXvim+ZtbBojcH1FPDKy570DFVffAIpOVU8d\nO9Lu47U2Zswkg45oJHKT5YJ59ajdyrYpI5PlK9Khm3ZAHWkMk3ZwG55qff8AMAozVL5woAIz\nVtZMKP72KYD2+WQ4NORdzEHGai/h+9yaQEpigB7ud21RxRuULijzOc4pGUq2CMk0gFViG3D6\nU75iy44WomOeKenzd8UajJT8ynHGKeVyq4P1qBsrjHIzzSxgbjlutAibaAODS53Y9qiz84BO\nBTyxAOO9AD9wDZ60My4HrVWRiFyp+tEchcZNFxloMFU5NKGAXPeokxuO7mlyCeOlMRIrfxFv\nwp8bBlyaiULtb1pUYKpBNAyXcAuB+tICSMVEx+XA/OhQeOaAZZHyqFxilkk6evtUXuW4o8wd\nQOKARI3zHjNI3t0qM5U81Ikg29KRQgJ3ZAqTnjkVGHw3rQrDeDmgllkNtxjJpxJZTmoVY+uK\nTJkyM4NIaJRnfnPFDNjjtUO4qwXqacWHQ9aOgD1bGTml++oz1phbP0pwx9aAQuDG/JxUkfXJ\nqIbdnzDFPWQjjHWmCJQf3eSe9OVgBkiovvHnpR5mM0DJ1kLZxTsnaOM1UE3dgQKmVjtUjket\nILE64XtzSbjsORgVHIwOMdaF+YkZzigCQP8ALnGKkSTK8c1G2So44pqN8uQcAUAWFbb70pwF\nznDU2Nt60yR+xGKYEsj9MmkaQseOtQfe46U/5uMc0gJVYbgf4qlaQbeBVdWCsDjmn/e+tAWH\n8bRipFkG2olzt4xTDIQwG2gLE+4bgalYFcE8rUCsOuOKcshk+8cL2pgSK2Ogp/3Pvc1DG2Oc\n0/zBJ7+9ICRW3ZOcU1GPmEZpuQvOTSBu+KA6k+7aRg896XzNx9DUMbd9tS8bgTQOxIMLxmnC\nQqASOKYzLjOKYzB2AIpCLDKAelCthuuKg56ZzRn1FICwueSeTQXy2BUHmFWwD161Ju7D86aK\nH7hg80hctyBUfb+dPWRNuDQBKkm1ATxS7/lxjmqxZcAD8ad5noaLjLKyBUx0NOWQt05qsX45\n6UbjxjiggtM/GPShZBgY/Wq6t60cbx6UrlFreVk56U9ZF5wTVJpMNzSqxz0oJL6yK+Djj3qR\nZcLgGqLSE8dB3xSq2D14plF8nK9aVpAsY5yfaqizbuM05W70AXFc7Q2eKPO2nA61UExVeDmh\nZC2d3BpAWdzfSneblcdTVdJDtx1qSORcc9adgHswx60RhTTC3Ge1IGHFSBajbdle9PSPKlqr\nCT5hgc1JHIW6cUDJlTc2M7ad5JXjdmmAnsee9PZ9vJJpAMORxQ2VxjrRuH3ulJIc8jmmAgdi\n23t1qZZA554FV2PORxRu3DHekBejBMeccU5seuKEYyRgA4FNyF68kUAKW+Wm7mbqeKTduOcU\nyQ7T160Bck8z25FS+YWjwvFVd2WzUu4Lgg0FImWQ5wetKZDtxUa4JyDT8YOaVhkyzMmMGpFu\nW6Z+tU+dx9KdHll96LEF0XRHQ06O8m6dB61RPChqHdgQM0gNaPUGXA/Wr8OsMpByBj1ANc5n\nbg1MJMrxSGdlH42niUBtkg6fdxSXPik37KWSP5RgYrjjMOnamrJlsgkfjVIZ1TXAlJIwDTPM\n4weTXPfaH4+YirEd8y4BOSaQjaVjSGX5qzVvdq4LZo+2Bu+KBGq0hVgwb8Kd5oLDHSsqO6Ja\nrCzFVpFGjGRKwBOKvWenzX8hWEqNozuY1iJNt6Gp476aLJilKHHY0yTWk0q7jZ0MRcr3U5qq\nymNcujKM9xTLPWryFi5mZm9W5qVtVedz5gBz14pDEjZRnsauQ6ZJcAFWQE9ATVCSZW7YNOju\nNmOv50XEW20m4jZ18vcR12nNUJrNlPzI6fVcVpWuqTW3KvnPUGrUmufaIjG0Skng5pAcw9q8\nZJPzCkUHqOBittwgX5QD7VWmhR+CMUDM+C8khbKnAqH7S00xlkJL5qzPaiNTjIBPWqjL1709\nRGuuposK5OTVuG6DAEcZFc2q/MCc8Vdt7z94Nw4FLUZ0Ecoz1qfzunNZMVwrEHdirAlPBzQF\nrF55Aw54OMVGI0b6+xxVZpww680+OQL160XYuUj1DQ7bVIZFnhWRXXYwb0znA9K818Ufs3eC\n/FDSyT6YsU8gI3Q4XHfjGO9eqCQlvapFmA9M0KbixcqZ8geJv2F7dpHfRdXeEso2pKuF685/\nzzXk/iL9lXxhoCzgWyXMMQZkMIJeQD0Ffou+JBnHzYxuqOWJZFKsFx9K7YYi2jMJUU9j8iNQ\n0fUtHmaPULCe12k5aSMjH1/SqhcOrMvKjjcf8a/VfxD8OdB8Rwvb32l288TnLMU+bI756/hX\nj3jr9jPwlrUM0mnmTRrsjKtb42yc8gqeOR6Yrp+sRZyyoyWx8CxyE9G5qeNm3bs4XvXvHjP9\njXxN4fxJpbjUbYgjYgzKjHoCAeh4/I143rXgHxF4NjkOr6bcWqK5jE0iER7sdM49jWvMpbGT\njJbmcrbmO1Qc96b5hXhhz2xUHnMZ9hVg3cKOcVNtOckc4zV2M+t2Ot/lYu3LelLktlu/pTSd\n3zAYbuKarFeSefQU+Uq47zBjpy1HnNxzUZyDnGKlj2nJPXHFOwCrIeVHTOaFzyD0pu4BRlea\nXYyqOeaBC/u+eDnFEbMPu8D1pq52njnpSp8uEY56nigB04KSBgfypu8bv4iT3pGZSo4yc/lS\ns25flOB70BYHYbhlS3tSsxUAKML7UYbbuBwccmnrtj29+M0XEIZPLGAMn+VMEmCcjd7Uu0HJ\nyOuaa67e+SfSgQsYLSZUgHrmpPM5xjGep71D5e0k5wMVIv3QSe3NUINq78M3PpUn3pB2qJij\nNkcE09pA2Mt7CkAu9DNk9BUjMvLEYWo5D5hBVPypJDiQAgj+lMVhyg7Scce9G9CuFGTRuJmD\nY3Ljmk+VSGAwKChWXDBjxxxmhSc7j19qY2585O5QfyqTyzuDI/GOlAhHbHzg8dMUvCuQOVz2\nok2lM54700HavA++elAhzKFYZbO48UBXVACeCaiX5o40JIfNTMm1ShbLZwKYCsSoXt705d7Z\nwny46VDy2T1A4x705ZJUyQTjpRcByHarAjI96cpVTwdoPamLMNqhkNG0qwY/hSAccyH5Txmn\ny/IQVGaZvK446nNAkaRz8uAOKAJvljYFB2/Co2YyDb0Pc00L0C8e1OZVMnDbR3+tAmC5jwA2\nQvPFORjJuY8HP6VBIdmdv3c1MzBVXc2d393tVlWHqj+XjjnvTFmP3PTiiT5l4PQ0jOvnZ7Yo\nBokAU/MU+ZeT9KRHPLA7lPTimxt8jN2PahW8xQCSvYGgWxIMqu1zhjzkUi/u2+U/UUjEqw2n\neehBpGcqM45PVaBk25dwzzTXjyzN1A7Co0UbjnO0c5qSNsRlh0zyaBCBU3KdxOR+VSmRVX5R\nj3qOTbhijd+lN3A8k49qAJV2v82cAdhSM25jtG8URbIow2eT2pXbKrgYXOSKAFkXEOB82DnN\nNjYNliOPak8x9p7k9KSTIUfwsetA0h6qMbf4c5zTg26TCnjFRBhHgHO31pVbbICw2+tAD2Yy\nSKf4ehFPX5o2ycqDxTAyNnbzz+NNXEcLjqAcmgRIuYmDKNxpflZt459ahQFYw6n5Sf0p6kOu\nV7HoKYDsLjj5jmn52z5YdO1NbMa7yoC+1RxOGyCDnqKAJ2bcpbGOaF7OR+FReYRGd3CipGYe\nWp3daAFVkxwu0ntTx+Oe1M3bpNuBnFNWR/OOPu9OaYE5yWB3Yp+7cp7GolZ2HIwBxQud33qd\ngJJs4Qq3PcUSEZAGfmp6bVYH16mmbxIcEcg8GgBcYHA+YULgLnqfeljA2k9xSo6OuduKEAmd\ny8cnvTVYMoUjnNKuFYnGabtG7cDg9qQ7ErMw+UHHvTfN6ZGT601tyAfPxTmUNICPu0BYcr7l\nB6MDUbqC42nnrmkVtwc45pqt8oGKCbFgZ2gNyO9M2rNkK3I6VGGYRDPIp8KqOMfN1AoGO8xt\ngU8AHrimhgm4LnJPXFMMpfcBnj2qVSZIwGXgdxTK0H/P3Hy92NG5RuOMkjioYvl3bif506TH\nmDJ4xSIJowSPMYDGMYoDbFAIxnpUW75Tj1zUrSeZtLr7cU9QA53DjC9DTI1+cnoenNPZtyJk\n/N7U1Cysx61QEqht3PNLu2rnuTgVFHlVPzZzzjPNSMhaIMMYNAmDPtYBevQ005VCAeM5pBhs\nMvXoaEU7GLjikIcqhm38AAUI4VTxkHvUSNjI6k04LhcDmmA/5jls8Y4FOTDR7jkL70u/eAuM\nN69qez+biMnAUdKYhBIBCcrsYc/hTIsTTcA460jMRIARuPtUkeEUnO0e1Ah53l9wOO1SMwzt\nH3+59qg2lVB35yelPSZd0YZGDf3u1CEWAmYyA2BmpudoKtk9KiUA5+Q4zT1bbntk9KtFWTJc\neZIpboO9PVM5AJ2+tRqu1sn8DUqKYwWbk0hj17ccDo1O9Wzx6GmRZVSVXfnrT0ZX4YfN3p2A\nfHlgAUz33U5yI+ODu9ajQMuDnK0jfeAJyKYEwYKQv608OFHQntUIjMbZJyKm27QAOjGqJJIX\nMnA4xUyuVwQOO9Rq207QPxp8cZbdk4WmBNHu35HIz1FXoWw3z9T1qC3QKq/N8tW2Hm4OMD0q\noksswquRt496sJgrgDv96q8ShcBRx1NW41VlIDY71uiCZcbQRzU2fM4xUS/dB6LVg4C5B6Ct\nkQRLuk4HA70ph/dnfyPanqw25AyDTmXcw3cAdhQSynJF8voKp3UIkBHU9q0WZY8llO0mqkzK\nG6YNJooyVQBWDdQcUpVgoC/dJ5zUtzGGwU9cmovKbru75xWZYrKEbJ604ruUEnBBzzUch4G8\n9+1PWESbmJ6DgGoEKwMjAbs+gpNpwRjA70irnALYApTIY8hl3DtimAxnCzAAdB1qbeq8Y685\npvkNwR0NNDM25JF2kdDVAOZg3GOPSm+fvACjaBT49qx8fO1NZQzE4x7UaiJN3zFgQQe1HnfL\nhjnP8XpUW3djA21YhjReGGcUxAF3KNvGemaJFH2faQd2cGnttZlG7Bpw+ZzuFA0MYhScr97i\nnrH+5KE+9OAbJyAeKGUrtJGdx6UWEDKp2nkcU3yzjIOBU0jDJQdPWkZRJgqenSkMj3H1P5UV\nPtl9aKBfM+Fgu9t78A8g0rMYpDhcIRwajZdu5ScHOKP3m4BRv7Zr5i561ibdJ5fyne33iKXc\nQ2SeGGeaj85opCATux2HalfEkKnnGcg0CJVc4xjaccGl27W+c5PbFM8zzlyW2kCmeYY2CFhI\ngGCfencB6/vJCArHI4xUsyny9jDa2OtNZ3j2rGQv0o3PIoLjgnHXpSvcBQNq7FXJxmm8SKQR\ng459KkwYdxJUjGM5qKOILtG4kNyc0ahsEO7aADtHvUhj3KQGAz2pvlhZN7HcOQFFMjkCRncM\nnJ6fyoAfKxhjAADbjjNS8jaCASe/XFRxy7tuSoz/AAmk3Ou7L7QTgcUCFRfMkJ6jOKGyXLbs\nAcAU1WOcDOAeafuDMTjGTwfSjUY0K3ceXQ+I2A+6P1zUrKW273GwMDkd8U4OJpjtCrz/ABUA\nQQ/ulMmNzHgZpVZ5WJRdm0c07y1eR9pzzxTGDMr4O3bwT6mgB3mtsDY49KWFXeT5uO/XtTVk\n/dgBRuIxzTUzuVWPOOaaAkjb5XbOYweKCwK5Azg4oZYxDywLdDTdqrnPyqw4PvQMTaVbG75D\n1qUbdw4OMY3etM25x825lGOKRV+YNknH8NImxIyhV8w7iyjAVe9K0w8tCFZADnmkEZVjh+vJ\n5pZF3KGyWzx7UFImKjjH8XPFL80RUo2OfrUCv5JznaSMD3qdMxiNSvy9eKQizvf7QpOQrL1q\nTaGX1Ge1Qr80bfNwvQU6Fv3fyHdnqKQmyRcebkHKdMVNuBOzGB1zUSum4AAbgKVpNy47mrQD\nsFGZlAJPBNNydpydrD5jimncqqegB4o8z5pFJzupAKwMe47y2eQPakjJbeWHUUx5F8zDZzim\n+c6K3QjtVrYLAJEVQn8P8jULSNG2SM+jMe9OkUSLmMENjJGOnvWp4d8C6140vo4NLg3NvCFj\n03E469O9Z83c0UTELHzsSKWkY4AXmum8O/DnxF40vIYNOsmkSY4DHjA9cfnX018Kf2SI4VW6\n15Rdzuw+TB2jj19a+nfC/wAOdP8AD8UUdtbRwlF2jy0xgfWplNLY0jT6s+ZPhr+yf5VrGurA\nXMysMlWGOmfX/OK+lPCPwo0vwzZx2trbosKcgMAefX65ruo7SOEfKMHGM09sq2M8Zrn1budK\nSiU1tVtflTkDil3lcAc1PJGWOMZqaG3ROSMmkMihVmOOgqwuEGKVmwvPAqvJeKAcVSGW3mGO\nOarNcHdg1X+0hutQSS/xLSAttc5zmolmPPeq5lB6tioZJgWwposItNOSxJ4pnnDPPSqLTAMQ\nWBqJpy3Q8U+UVy61wsmTnFVmmKtwc1VaYbevNV2ugq45z61fKLmLUkoOcmoGmCnLHiqck28c\nnmoTN8x9KfITzF2S4HrUEk3y1R+0Fm9hRJKxHWq5Q5iZp0J2luaa0h6dRVJiu/3qVZDtx0q1\nEhyHySbo8VDuK9jS7iGyRU8caxty+8e1Wok3IWkzHgjmlWOTaDtNWPMjVT8vPao5Lh3+ULz7\nVajZkNkDW75weKclptXBbmht7HcTintMI4x8wX3q7CGyRJt4XBqNlMny4GKt4juFyHwQPzpR\nGnyjcozRyokLWyUwAkcdhRPZmb5UTAx1rQN5p9ku2W7jB9F5qs3irTLViv8ArW7Y6U7oWpSk\n0J5WXacN61sWfgrzbfzJWJPt1rB1Px2yyJ9mtkh28/Mcmmt8UdTUhojGjn+6vFHMgszQ1DTY\ndPjywxzg1i3l7B5YjV1B71i6h4gu7yR3kkyGbcR71ltMTliSc1jKSZqjba7iKn584OKhbUhD\n93k1j7wP92mSSDcOazuXY1pNWY53c1Ua7MnzE/Ss9pC3XjFKrDy+etSOxba4PAzxTWuOMdCa\nqsx2Y6e9NDFzgnpUsqxPJcHcAPxNRmTbyDkd6iz154qMtzgA47mgZO3c9RTFlypAOKiZufWm\nqvOaQkSCTaCPvNTmbbhjxUbMNwIA3Ukhbbk8incY9JA2TnijqpJ4qJdq8HgUrFscdPSkBKqD\nZkHFMyF96j3HpmmtwOtAErMeMc0i7udxojIZQAcGlb5Tj86kAVW3ZBpF+VvmFJuCkANTdxd8\nk9BQO4pkVwR0+lCDb05pkbLITxtIpysV5H3T1oGiRWZsrmoTJ14zUjNu6cVFuG04pi8yvNG0\n0OFO05pbdBDHjqxqbdtUdjTTxIM9KAJAzZwOlG0BTkc0zeV70qsSCeppDHLgDPrTc/NzwKI2\nVtxbqO1DLuAOaRIucDOKFDeZz0pNp454p2dzbhQND9xZsdKRcgkZyKbuLKR3o52Z7UXHcN3O\nSM07qucUmQecVE7bVznFICRWIPApCcdDzSH5MZPUZ4pjH5vaqGTK4245zTGzxu6VGvrmlJ7s\ncelAiVXbJQ0w55HQ0f7WaTqc9KQxfM/hJopMBunWmncy5FIViZUDKQODRtLDkdKarE4J4oCn\ndhqBhGp2ncP1p6sVYgDBIpi5L4NKPm4HHuaBEsbFWx96h1Oz3zTC3lrgdfWnNuGOaQxkmdww\ncinLJujOTg01SOcjmlU7s5x7UAPLZUEmnLt4qHPy4NLuAxQA7cW3BcA0KSU7k0mVZjjinLu7\nHigfQOkfqKdvVVA70YLZ7YoGG5xkUhCrIAp9aUSA9FwaZwzD5cU4KEOetMCTI4qteSFowqHB\nzUoPy88E1GyjbyM0AVJA4kUo+T3qwrEdetJGixMxPzHHA9KlVemeuKAI48ySEH5amPGFDcjr\nTWTjOcGnLu+8CM0ANZD5gw2fWp8/w96gX5myRUiqqru70mA7b607zG3etRD7pOc5pysaY7Cj\noOMnNOXbgk0gBbocUbdoIJoEKWVl9qRiDgdBSbT5fTFIy7sYqQJeq5PWhtzLkZFN27eRzS/e\nXH6UhoHXqQccVFbt5inHQHvT5FZkApkURjUgHqaBFgD5fek+6wHWlZdqDmkU5pjJVYdAOaTg\nZOM0hV+MDinRqYx8w5NACbjt9BSq+5c44pzruX0prfdxQMC2PpTueCOnpR8v4Uka78kdqLgK\nCfOOTuGOKk3buOhpq4LdMVIRjmqAQnbj1qBmfP3eKm7kngU1YweRSBiqxZsipI855NMRdp6Y\nFTJjPrQJCbWjOW5NKcFulO3h+vQUxsdVpFDlb93jFORfxqPlfcU5c7s9BQhDmCvgEdKdj+Lp\nS7SF3CkwR9KGCJRIBHjHNM27yT0ppHy+5o5VfWmIVuGwORUgXGOaiVunapFwMZpFD1O5jngU\nq7Yx8pzTVHUnmnkBUBA5pAHzSLtDYpd+3jGTTcALuLYpW+6CPvd6AH+ZuIA4oZiOvNNRRtHc\n07ccY75pgCktnjGKcpPagKWc88YprZUe9IB+Tzk4p6vu6HFRbQeScilbtjrQBPyoyKcrZxkU\nzdhRnk01pCrDjg0ASb9ykZ4oUKqimMV249fzpeeh6UwHhgzEdBToW+U4HGe9RL6UK2TjsKQ7\nFkyDb2NMOTTB82MmnYbPB4oCxKzcAZqTdtXgcVX+9yOlP3FloGSeZu75p3Udeaij7CnKw78C\ngCRTwKdu2kbuRUS4GcGl+8pz0oAcWGN2MilVj0Xio/4uOlG8sfSgRJuEa+tJ7nqaYfve1H3m\nHPFAySQ5xzx3pC21Tg8UcdcZpu7k8cUATxyh48Y5qXcN3B4xVWM4YYqXd1IoFYcxCrxTkYsa\ni42jJ4odiMbc4pWGTFt+eKFYrgZ4qOOQsvT60i7mz2FArE/c45zS5P4iq6MVanKx3HmhjsTI\nw3c9am5C8Hiqy4JzmpV65J4pASIwz71JuDdzmq+4bh2HrTlkG04pgWR8w4OKRWCtjqPWolJY\ngjpRuCtgCgCxu3fShJMNiofMbYRil3cKR1pAWl688Uqt04x64qHzPmGeTRuJbI+7QBOZSx+X\nrT/MOME5qt5mw9cCqf2h5JiF4QHrQBsB9ygGmrIFzzzVVZirAmpl5b3NAydW+XpScHHaojJt\n47Uu8MOf1pCLfnHhQaTJP8VV4+WOKkXHPPNMCRSVzzS+YeFHWo1znjml8wE7scUAP5xg0K2a\nFyzZ7UJjcaRRPG+F6YFKWGeOajUnHJ70snXjgUCJVLM3JGKUydhx9KhjODg07yzk96YEvLIA\nOaVvTrimKxDYpcnd6UrCHn7vIpVYn2pm4460fMDzRYY7B25PWnhgFxULZOTnpRHJuwexoAld\n9wFKkhbg8Gm43HinMw3ZAwKARIu5aVjzyajDFulP2hhk9aBbE6Sf3etTxzP97tVJTt71K0hZ\nQM4osMuJMcDA+tTCYelZvmFTjNSozMeTRYRejnHdsVMs3PHWs8MGPSpFznINKwzSjm6Z5p3m\nHJweKoLIyr1zTvOPGBg0hM0Ypjg806OQqxJPNZ6zbae1xzxQCNIXBAz1NOaYYBPWs0TEdTgU\nG6Dcdfegdy9MQ8ZyaoSYVgAO9PWfoO1NZtzZzigBD8uT1pN3y+lDNt9CKh3Zye1AJFhZTHyK\nsw3hb7xxWZv39Kk3FQOOaRRtJcBgKmSTdyDWRGzblYcetXVkwQetAXL/AJjA8GnLIcZqqpJ5\np6yAYBPNJiLYuOnaneduYVXWQd6lUr17UhlobWAyMilIDKcjg9qredzgdKd5x4z0pgElqGj4\nGPTisHWPA+m+Ibd4NQtY54m5w6hufXn610Il3YFLu7YzWqqOJnKCZ4V42/ZT8J64sktraGG4\nI3fuwEJbsB+NfOnjT9kfxNpUbzWU8d5GHACLksOeR+H05r7/AG+ZR356VXms45E+YdueBzXT\nCvJbmEqMZH5d638J/EOlW0s7WFw0UbBZC0TLt5xuPHT2rjbyxudOuRBLE3nL1RQTjjrX6v61\n4X03XLRbee0V1X+HoG+vr+NePeNv2ZdC1y1misrZYUcmT5eHj5JwD35rqjVUtzGVG2x+fr/P\nnnJ7DP50RNjAHIr2T4lfszav4RjF5pgmvMf6y32fNjPUHoe3FeQXenXGlXbwXlvJayZwEkQq\nT+BFbLY53F7ETbd53H34pyq0m31PTJpmzasjEYx69PzpYm+YEcjbRcnlJVPy7Ryc80KwVjkc\n9M0wOdp7Ukee/wAw60wuP2hZDk49qWNgy7McA9aI9qzZYZJ9aa3zM20cUEk0illAHT0qNm6K\nRjtTWwqbg31pq4Ztzc+ntTAm4UFioKngU1lG3AOQO4oXhc9qcsXUAgdyKABkIXpk49aQAbgO\nq4py7iMR9e9RcbDg8k00SL5eVJ6nNJuAbA+Yd8UoZ0+ZTu7GjaOgPB5NMY5HZciPt2p/nHbk\nj5jTF+UjstLxgtyMdaAAt5zfJwPU01sYCd89aTcqruByvahtnBTh+tAhWYo23qvcinrIWU7e\nG6cU1fu7SMknO6kkTyuR8v0oAkU+YrKT+NR7vmUYOR3oiG4kBgW9Ka2TwvX1oCw92Hy55INW\nOGYFuuKgXYrKCee9P8zDcjP40ALu+VsdccYpI2bpjK45py/L94YJoEZZCVO7HNAxeVfOMpjF\nRb9rYOTzUrYXbgcEZxTG3k4OBQSSqo3qxbJbgL70zeWdlQYye9RpmNTg5z6VIMNGWzjJpsBz\nKVIywDUiRMrkHGeuaXcvmFMZOODTF+RuVLUwJGjUqMD60RnPQZB7VE0ivgqNg6YNK3YK2U6G\nmBKWEK4+9mndFHHB6mox8p29QBTUZ+QQWB4FA/Ift+XKjK5pc7Qyge5pIWZV2kYo5Ctlsk0A\nDMeD39BTizMpxzSbQI1IPNLIRuGBgnjFIB2GVVIoOBk4684poYLlf4sYANI7Esi/cOOfakMl\nkby/bdzTGwPmHNKWAYZXcMYpquN3I2jHFMLDmkZlXC5PSlJYKeOfemSq6rs754IpzOJNgPBH\nH41RI4FmAwcccrT2zKwHQ4/Co412xknG7NOyWxgn3FKwwXd90HIBzzUiSeZuLJx0zSJt3fIN\no/iJprMFOMkqeaAYqu0bE7cH1pyONrE9etM3Fjxx9abHGyyMTznjFAiWFlmgYK2M0o/d7UHX\n+JqasIVcbeKQny5CByccUwJG2ndGR8nUNSR9Q6jBA6UyPe8ZyORSuHZVO7ac1RI6Tewwejdq\nc2DERjLDge1I/JBJ6dKB+8XrhutArjo127WIwfWpMbucYX1qPcWUADIFPVQIyFOe5FIsVSWy\nOxp235guOSKjjbGOMCpMNn5TyeKZQ7PzALzikGVyerGlWMyKxXjZwaRldQp3Dn8qRI+QFpEO\nML3p27byq4A4pdpbDdfUU0Zj+8MKe1NAI3ysRu+8MBRTVVY1AY5YUdAc9KSQZAKn7o71Nxj2\nK78A545pqh1yV5FEX+pww+cnrSq23JIyR2phzCQx/NjO0tzSyRkEAHPOc0/apjBU/OeTx+lN\n80knuAO1AhVj2xdc80hkCruamn5VOOeKVf3ihcZ7tQBIs3mHfgbjxRk7Sp4J6UxIxuII9xR8\n0jBsbcUCYsQCMWXkgY5p7ruYFsEmmJuTNMLFmUoMCgESrtOSRg9Kfwi5zkdai3EEg0uDGoLt\nknjFVqFiRdr8hsACmxsXDbuM9qSNVycrwBT84O4HHHNAhixq0mAcAU9cRKw38+lKoPXHBHBp\nn3uQnPemA+ZDuULx3OKUzMvyD5uelNOGbJJBA7UkTZJYHntTEAXyyXHX2p6synIGKi9wec9K\nVSy/K2frQImaRpOByaVVLSLg4x19ahVT5mQ3FPWTaw9etADk+WR2Xr3pTlxgj5TzTBubOO/J\n96mjhBh8xe3BNFhMSJVdmyCNp+U1YjBzjrt5FNiKbWORnqanjYK24HK4pjsSRkgB2apFZZOM\n4NNRcLSxn5Dt+979aaAfEu1Tn1p8fUktxUcI+ba3fnmnqoy5zxSGTHMagr8qn3oXb5uM/Njk\n9qjXmPOM+gp6ncPu5PcVSAftTkHce9Cru5U4B6U6NdyMp6gUow8KbRgg4qhDlVvLCgbiOpqU\nbm5XB9qihy0pXvUrL5bBh0700STRRs024/IuOlWEy2V6ZqJCGyQc1YXC4f1GMUxiriNckcVf\nhYMgZlqmyqCGH3TViNiGUqMgVcSWX4xHwegHNWY8eXjHDHrWepEjEscE9q0If3ihemBxW8TJ\nj48DKjkDilVGBYE5FJCSobipFA4BrdED9vlxpx07U6OQN1XHHek8z59qin+W7KxIyAM07ARz\nLujx1qlcKC2R09TV7cGjG3r6VVuGDcAcd6ljM6VQSShAPpVVsKoV2wSea0HgQMSR/wACqlMo\nkBHVh271mO5HISSFxgDpUjE5U5BGelN5kIyADin+Woh3bsjOB61FiiNgW3fwrmpFb5VB6j9a\nYmQxVvmGKnYAsgUZAGSadgIiGZmw34U/ZtX5hkjg09QrZx96ns2CN3U07CI4jFGGwpJNCqrf\nNgfSnzsfLypB46URlPLQ45PU00QNaPkZOBU6MI4yQM9qiZg2O/NPkYqMBcjNUMdLIjOgZcGh\nsMxP4UjQnzQG4GOtOjH3l79iakoccSN8vXGKVlKsoUqSB1pkeE6DPrTmTHOOtFhDWj8wjpn6\n0/b/AAkbcU2TIZNicjmlb7/chuc0aCHY96KMp7/lRTuSfCTttO0nc+M7vSlX5Gz918cgGotw\nLBd3zMc5x2qSOTbMyxj933yM18se2Kx2OCF+fH3/AGoVSuAG3c5zmmSSLGeEY7uOfT1FOBSK\nGPjc+cHHekKxLIdzucBTjAIpn2c7Y06N1LnvTbhfJ53d+BnkUqsWhwXYHPO4c0AShpDkAgsD\n+OKcsn8KkuDzu9KjbEkg6kY6jilDgKSDz2A70hDtu3O75s+tLN/CqnBXsKRlLBD1P92kdGWQ\njbuZuABT1ESspjbaoweuTTVlRlaNlYE8mmyxllVWyNvXmiORyAqLuYr1NBVh33lIVcrTcjcE\n2FkHf3pfmjAy6+mBQNxATO4E9qYrEqyNC3ahGLrtPynOaYFbzAuBjOM06ZG83apGP71IQ6Tf\ntVCuYyenrTJIhCwO/AbtnpTmM6xhw27acgU1VWT52XDqc80CF3MrbSfu9Gp20M3XIb+E0Pv+\n8UD7qYy9GDAqvWkMf0yoTDqefpRIgZRkFgDkkUu4s23IyR+lELeUSAOCP8mmAzMfzBU4P8VP\n3FYWjONqnHvQF/ufOppwJWEvtyWbGO9JgMK/vAyDaoXqT1pVIXIJyetOEZWbceeOKRoQzBsg\nf3qWoDWj+UoDjJpzNJ8u5uBxUzKpZdp+ZulJMd2M9M4/GgBFkEnMnVegNO+fO4HjvTBC1wxH\nQr+tPYZ4H3MZz70DJNxK4UYyeealOcsAccYOKrKVwBhievWnNKxjLouQeCaYWJFkjRVBOPpT\n2mYMQGzxxVdc7VLHOOgp+8M28ja2OKZNh3mIyffLFevoKXzi0II//XVaaQJCwxknrx1phMbR\nlYzhlAwKYy1I/nQjzMow/j9aZH5rzBLaPfIxAUfWpbW3udSZYoYGcA+lfUnwP/Z2Hn21/qQW\nW4RvmhAI2E54J74/Lik9DWnG+5wXwx/Z81bxNdLJqcAtVPVc5JHA3ZHGOelfZPwv+D+n+FNO\ntkWJQy/M37sYZsff6da7Lwn4Zj022RAgUoQD6HHTiurhtwiY6muSb1Oqy6DLPT4Iog3zb/XP\nWrO1YwQBgGnYEaEn8Kp39wV2hTzigY+ZhjKmok3SH3qONmOM9KuKo25HFDAeoI5IzUbSbSSe\nKja627vmrLurw/3qkZauLw888VntcfhVWa4LDg1GshLYNWrgaSzDoajaXbwTwKqfa0U4PWom\nuAVwDmrURXLUkwb7pqpJc9dp/Go2k2qfSq8zfLx0rTlIlLUQyMJOScmn/aNvFU2mJanq27gc\nmrUTPmJJZmY8Gq7THoaf5bLnIxTGU7eavlRDbI2kOTg0wbiKkjt/ObA45qO9Z7WZMcKeDSAC\nvamt9047Uxpi+OeopjkqvzU+gxrOBzjNGTJgLVWR8OBmpobqNCSXHAosFi7HDI7Yxk1ej03C\n/vn8setYdxrAjQbW5NUn8R3EysDJgelNNIR0kskMTYUh/wDaNULrU1jjYjGRXOSahJIdxbNU\n5Jm34Y9aTmKz6m62sIw+/k+lVbrUnc5XJWsZWbdjpUhkA43YFRzlcpfk1eaOPAO0VXm1iVmB\nySMdM1Wkk3L7VA7BlGDg0nNj5bMfJdSScE4B7Uzdt6n5vWoVbe2aV8+ucVLZpZE3nfMBnj0F\nS7lZfT8apMe470m8MuCehpCZJJIFYqWNR+d8pHUVHNIWOF4oUjyzgZouIUyHHHSmlhgcc00s\ncDFDtlgKChJGY4wKOQ3tTmJUDuKj5AOTQMUsdvXPrTtwqPcvGepoeQbh2qQBZPmxjjNJ5g3Y\nJwuaNvU5yKZncGz0oAdnbk9qRSS5wab5g4DdKO+FoFYVvvcGhmPK5zSeWeSTTVOE7UyrAzfL\nwe9PLbMHuaj29GpWYscnrSCwD73PSmu4B+Xmj5v7wFMwwbI6UwRIMseDinlyzZIyKrrnnPFO\n3heC+KhgiVlHVRzTAh45waYZecZI96VWK9efekMdJwv+FHmce1IW96NgC/zoEtxyNmhmAzgU\nzcN3HSjdl8ZqxsbndjPBp0jE9qd91jlcn1pchm44pMBir0JqTeOgFNJznAwKZzxxzSAe2FU8\nc037uOtD4FGTjOKCR+f3eetJz16CmlisdOwdu4nikPoIjFecZpy5bvgGm5KjjrTuFGM80CG5\nbaT3BxQFXq7de1BkO0gCmhBu5HXvQUh7dh3o27eO9IMFuRmns209KYtRhX5dvem8/wAfNKp3\nAnvQR36mgNRJFG3IpyH5QD1pJGGBin8Ow4waAGsAv17UigH2PpT9oY5BytRxsOVxU2GOEZkb\nHQUMu043cUgzzzTQRuwQTSGSfeIC/nQxPKkUAY5HWhGyxxQA9fu4pWYsR2pqrzk9KFfvikIc\nJBySKRVDj0pXUFcjoaTbu4WmMTooJNOfCr/Km5G3JGacihhnPPvS1AABweoNPx5YAIyaYrYH\nTpTvMOCzCmA8sBwBxT1QKuNtRZLJxyalVisYyeaCdRrsVzgDFC9eTimsx59MVWhmMjlcYI9a\nTKLTr05oVQST1NKcNjvSBfRuKYg2Dcf71Nzhlx1FKy/Nwcj1pY1Cx5JyaXUBBlt2RgUDavQ9\nKM8FjzS7lK5xxSGKrAgD1oY7eMUu4FRtGKQybeSM9qYBgDpT1xtPrUfMZwR83WnIu/2NIYMr\nFRzgU76cmkUjdjkil469qYxMHjPC5p+3rzSZBUKRT9rNwCAB1pCEXIwAaTJ39OaON/PBp2Qr\nZzSGhF3eYWJ49Kcq4XnpUDSFuhNSLnuc+1AEn3ueoFKjbecfhQw/djPyj2pV5A5xQFhzMWAz\n8ppT8zbgc49aY3ykkAZ9aFz0frQIfuP50h+U89aGx2PNAzwWoKQoT5afHhOTQOuAM0qIdpJ6\nUEjYyVyCKdISenSlXBB9R1pWYYApoYjEbcGl57DAppY7akXA69aoTAtvQHFO42gDINNRj2FP\nLZ60DQHHbrSqm5DikVgrdKcpbbxxUDsIV2rjNKp6Z5pNoOSetPUbcVSAXd8p9KT74GDg05m+\nXp19KZx2GTSYD5GK4qPzRk81J3+bpSKoIwBzmkCJIsMvK05cNgGkjjO3fn8KezhlxtwfWgQh\nxztpu47dp4NS7cGmt/rOetAwWPaQpOaXncBjND/dyOTSGNmwwP1oAfEwTIIoXJYnHWl2+nNL\ng9KAEUFW5NSfeXIwaaP3f3+aGAbAxg5oAaV3LyCBntUqsOKYrfvCKcFBJ7UAO2/MTnimsxVc\nk8UpRmXjilO0pg0AC9umaezbuCPqajRccn8Kkb0PFADdrZJyMU5DhcHrTcBeOopVb/ZoAfxt\nzjAoBOPl6UnLcDmnKPlzgUFCbueKcNy/SkVcrnNG4kc80APf5eM4qJm3Hb2qU4ZRkc0zBHOM\n0ASLINoFPX7vLVW464I9qm27uh4oAd5gB64FOVtoI65qNlC8dqRpOuAQaAJDlVJNGfU81GrE\n9TxTgoxyfmpASCY4x0pWYY5NLgbTkZNR9sEcUAOJUDA+9SjLDBNRBgvPepAzMM4xTGP3buMY\nxTVky3WmNhT1INM75xQBa34+lSI6hagjZV7ZpwYbwAcZpXAe+V6cilZTwRxTfMKjBORSxsWy\nOo96B9ATCsec81YyOKgTBzkYpVb16UCJZfvYHIpVO1eR1puRt9qFYs33hikBKO3ancbgOlRF\nvWnqRs5PNNiHyNt6GgPuXiog3rR9OtFgJ/Mzx0PrUgbb1NVxluKN21sUgJ5JBtwR9KbsVeQe\nPSoyx9RinqTuyelAD9xXpUytx15qsoJ5JpWfpQBPJIRjHSneYX6cCoVYNgEUvJxigCwkh6Cn\nLlVyaiVjHgetDSBl9TmgCYSFTkZxT45NzH0qoGOeW4p5bbyTgUAXWfoBwaTd2HWqwk6MTmpP\nNG3caALW7jk4pVbdjBzVUNuXrT45F2gdKAJ1cdRzU6sWXrg1UU/Lx605pNx4ODQBMzHzMA/j\nQ0m/kGoS27oePWkRjz0xQBaVsKKUSdc81XVs+1SI2AO9Ionzhf507aFI29Kj4DA9aUueg4pi\nJlXnNBPPPSmRyYT3peCvtQFh33cZoZu9N3ArTlYOnTkUALGwxz1qRSPpUeQ3bnvQM96kRMsn\nqKmRgvJqrzuz2pwbGM0wL6sD0qWKNmWs77QN3yip4bptqkHimBc8vb70bl3cVH9q3NTPOXJA\n61IE3anK3y5zVdZOo6inxt8pzQBKWJyetNyeP1p6uNoBGKJMY+U0DGJIecmhpMqOabtAajb6\nCkBJv+Uc0hfbwBxTDnj1pGb5vQVQ7aD9wUYzinrJ8wJPFVpGFR+YQuOtFiTSWYKtWIZvlGDm\nsdrj5gKswXAHBBpWA37e4VcbvmHetaxFhcqVlj2Sdd2e1ctDcBl96s290GyCcGlYZ0V1pMUc\nDSwzBlHY1T5VVB6ms77U3I3celO+2M2OM0rAX17nNOD5wDVZLhWQ+tO8zdg55oAn3MO/NKJS\nrVCGy2aY+d3BNAyysxHBpGl3N7VArFs54qVVBXk0CQcLzk1Lxn7o9aYi5xnkU5fmqlLlG0iu\n2m2rqplgWTBzhhkVy3ij4V+GfGkM0eoaXBcpIpU7kG8fRuo5x3rtRh84600R/KfetY1WYSpp\nnx18Rf2NLeJbm70YTQszb440cuiqBjaM9efftXzt4y+GGq+D5WeSCWeDzCvmKp2qBnr6dOvS\nv1ODFhsYbk9CK5rXvh3pfifzTdW0RJBCjbj8Pp/ia6I1dSPZo/KeS3cdmVf4jg0q5UFF6Y61\n9nfE79jC1+zm50S5W0blmXrGTnOMdsc/XNfMfjr4P+IPBOpSQS2s13GFDfaIl/d8+9dUaikc\nk6dmcbGzR7g4zkdaegKptJ7cVErZzn5CByG/KkZsdDnp35rUysO42425alLFjkDjpUaMRJ6e\ntPTq3PagViRSeCR8vanKOcg/hUUYZsANjvmgN8zEcp7+tAmP5Dddh6cU0R7ZMMc4qTzCoGOK\nYQfMLNy1MBVO5flXndil3LIzbfl2cEUis4zkYXrTG+XkDjvVASqQQQ/3MZz6Ur7doO4YqLqp\nz+GKVmRgPlywpMQ7aCu5eucGmtJtl3Y9ulLuY42jBpNzNkMcDvQArYdfl49qQA8j7x7UmWbn\nGQB1ow2Nx4GOMUxDsDjdwo7DrmpP9WRk4Ldc1XVgOW7ipvvKpPI9T1oAdxIr4xjoM9aYQoPJ\n46Ui7QrActngGnKrSKVA5HNPoMkGdwJb8KUSfvCB8oxTOMBh8pHWjO7hm/KkA3aSuNxJJzUk\nij5MjFMLKqkDr60p3+WOenrTExzPyBt20sW3D55U03Pm98fWk3Krbc4zzSESKxGCi59fpTJG\ndSwzhuoX1pY9zfMDgU3lpOfmftT1HYdyrAZDd/p7Uu0d14J7UxUVtzA7VzjHv3qQgeWEBz3z\nQALkKcnFKsmchfwpu5TuwMinAKqLtPvTGxBIcn9aTbvDN2PSk2buS3XpmpMbo2BbBXmmSNi3\nBgpNPaZZGViMe9MVTsD5waJGjbahB4GcigB8537ccc0/YNoYnfzg1DkbvXA71JF045+tIYih\nRuy+70pkq524HJGPpUkce1to4HWpGhKzA53DHSmSIzEjnIYHH1pvl8sd2B1p7OWySRnPC03z\nBsJIyaBjQNyBlX56lSUbtznaccDFRrIzKrYyw9PSgM3L53DPekBPGy8vuGPSmCRG/hJ+bGab\ntCrkD7x6UszH5cdF9PWmA4IGmDZyB/CKeqmS4JVzUW7dt2Hyz3Jp65ZjzhfWqAeqltx3dO1N\nZmUbnwB2xSbVj+bOaGiVsN3FMQ4s0nz9BSrl02tyaNwVN5Htikyse3HJamSO5bBPToKbHlnI\nxg+9H/LThcU9i7HORnIpB5ixKzMcGpYVJVhjrVcZPQ4bPSrG07wOnekXe4+JkaPBByDg0+ON\nGbofal80+X8q5NKu5ufTtQA/ywuSWwuORTdqNHj+HPApzRq2DngimbR5gA4UCgBimVVKg4XN\nPbp6kU7njJ49KXlVztz7UARKrbTz8xpSoA59OlTqytjjBpm0R5LLuPaiwDB93PcdqReDl+G6\nipWXcFbFBUr/AA5NMkiZUCBQST1/E0qq6qBtBz/F6VKshjyduN3AoTLKUzlgeRQURbjH8rjI\nPSmYHm55Ax2qeQqzBQcv/dp7RhTsIwetAEMcKNHnJBHUk0ctlc4A6U/bu5xhc80+SRZFKovQ\nY6UWEQlTtG4flTdvULyAc1JzlDgg+9OWPkleSTTAZgeXvB59KbLG0kLY+8ehqz5YXlhQFDJh\netMRCNywggZ4xmkKiRTngVIoIyT90H7vvSbhErDGR1oAQMojx/COlMO+JGkX7pOKPLVVBI+X\nvTT8ysOq5yBQIcrDcBjginRusbFB+dMSTkgttPagSbVyRls44p6iFhjKtkDJ70+QknnpjrTH\n3hdwH4U7cfL46Dk0CG7gOB361MrhZhtUNgfeqIYONg56mn8bdwG3PBpASLvcHn8RTl/dwFBz\nzUQjEEYOeKmZzGF2nINUBLt2gKAC2OasKDt/ujFV4XLYJOD3qzGpkkJyCgFADkz03ZNJtKuc\njg0oVV4BNTb+AGFMYm0x7RuFSc7WX1FRmPOcfNSlyuAOWoAn6RgA9KFzGCQ3PrTFfdEcg5Jx\nQ6sVCKM+pqhEy43Byfl61JktjYuATkVGq7oxx0pybtwxyopgSLEfMLq31qeNvMBx0zjNMZS/\nQ7TTl+UYHC4pgSL+7yRwM1aYlsc8VUjVWxg7iOq1NHGPm5+UVZJaHzMOMD0q0oCLtAOaq28u\nY1JPzCr7k7gw54q4ohkkKg4JXBq9HiReDyKoqzbhxnPWrUYbO0cVtHcgnUNtBHCg809SrZPU\nU2Fj8zZ7YojIChcZPetbdRWHlgpBxgVLFODCRk7qi6MB1HenwuORjPvVIQ7yiyEggHFVWjJ4\nznNWznBPWo2ZWOQMAVIilNGyj7uRVBo/Jk7FSOWrRuGcMOflrPvJk3EAY44qSkQqymQqGAGO\nKav7tdv8WajGflIGCetWIV+Y5GTUAO2hmApUTaxw3JFNDbm5+8KkXbGpJxkigpPQcI2Vlxhh\n0pjZLD196ljCiMEttakLBtoU5I607aiGhVZiDyO4qVsbQcZx0pAP3nPI9qaFeRSDwoPQUwFX\nbgE8YFKoIkAY4HWh1P3R07U4R8gE8AdadxCqreZuz9KcuRkcbvWmqvzk4/Ckj3M7EfdoAlYD\ngY570uN0eQRnpUUqBnU7uKkaNZEwDhs0BYVi0m0Ae1RCIRyFVzipFVkbIPy9MUvK/Mc1ImLt\nX/aoqHP+9RRoI+EN4chh+7THBPWmrJ84wmcdW7U2bEa7pF+Vf4e5oc7VX0bkD+lfLHtknnS7\ni3XAwBjNK5Z44Qy7SvJ9aa7e/lqB3piyNLhd24nq3tQIn3hQh4JYZIPNP58wMpycd6hfasoL\nY5GOKN21tkbZIPU9KBE4kdicLuGMHFDYjjyo3DPTvUWQsmc7R3b3pflRlXknOaAJ1k+YtjjH\n5Uo+WQMWOG746UwFlkKBcjqcChWEcJbDEg9DQIlGAuxnJGfvUm4bSVYhRxSxfuwW2cNSbvLy\nSw4oARURVCDuc7vSnq43NtX5R0PrTZGOwZALsefpTW+5ndg57UAWG2CFTnhjlj3FDIu7fGeP\nSoWDLtKjcD0zSjcZGG8Bsd6GMmf5WyD8rDmj5NrsW+QL0qFWdoyhK4HvQrFlBCgKOgoJ1CKR\nZkBG5T6VJHlcDZwTzUTZYrk47GiKRo2JUfdPegY9ljCggHdTiQsAHBXdg801cSAsxwD04p6i\nNZAIx82M4NAD/LMK5Byf7vcCnBhG2C3PUU3e7bio28YKmovLDbNgyRxtakBPHHuY7mxu6UHP\n3RyehobDDdjLjg0iqeMfNJ6UAO3eWeewxuqTKHgtkdahkZ+VcKwPGPSiL5sxZHpuFPQB3zNu\nIbZGfXvT2Yi3Bcg9hiq7SMV8pVGN2Mk1IqvL+7z8q9e2KksfzgccdadJJu3Rsu3d/F2qIMPL\nX5iwHQ0q4ZNzPuyfu0xCsrLja3C04uNq5HJOKjAcM38JP3fpSN8xIkbnsc1QiWPDK2QCrDIy\neam0LRrvXrwR6dAZz1YDA4z71FbWUlxcLDEpdQOT6CvrP9nL4SKdN+2T2vzN/G6fl9aHJRRc\nI3dyx8DPgPDZyW97qFq00/DIx4RX+ntx1r6u8N+HxZxhAqontTvDfh5LKFV9s11MMfl8YAHW\nuOUnJnX6DYwI1weT0qf+EY4prY44ob5cNmoAjlnEWQ3PFZbM0jkmpb6bcdtV92e9UMtRNsXn\nmnSTNGvDY9qjWRRGB/OmTyqO+adhojmkyp55NY91P8xGanvbwDhaypHLMSeBVqJJMJi2Owp3\n2jGe9Umk+XFR+d5Y5PWtUiLsus9OWJmXB4HrWdJeKqgnpSTawdoVetUkSzUaL5cFsVRuJBGc\nB9wqtNqD7OufxqkbpuSRkVZmagZNuWPPoKBfJEQAnPrWWbg7fl601ctyT9aYGnJcGbnOKAHb\nHBqsJlVeT0ps2qDI28ECjzA6TTbWOP5pHUD1JrN8RalZsphi2s6nO4Vztxqk8gKh8LVKSYvy\nTz3ougLjXHzZzgVFLqZHX5lqhNNu4J4qJpM/KMVFyki410XUkcGqMsrM2c4FIsgDYzmkPzcZ\nxU3L5Rj7mxk0BNnTkmmMxaTHTFMZmHIPFINiVm28VBJJu6g5p/mDAbqaikbctA9xS/vTOv1p\nnp7U1pAzZHFIBZJDwtIGyeaY0g3AetIz/wAIHHqKQx28b+OBS/x+tQs3mAHpS8+uDSuMkbIq\nE559ac24cbqFxxk0wsMZjjBGTSMSYxj5accDJPWo9w6AZNT1JsJ5gDetOYhvmHX0qMtt46Uh\nbjI/GmMeZecdqaG59qaxHXPWkVjnHUUAKct8392j73OM01220AkYxSAOd3HSjPX9aapO85NG\nfyouAp+XntSrKNwqM4U9ePShW+Y8cGi4yVn/ACqNsenFNLNjimrkjGc0XH0H7vlOOKdkgDHN\nMB42jr3prKW4B5oQiT/V4LDg0h+9xyKavHy8mnKxVc4waQbCMwZff0qKYhxwMVIpxkkc0vHf\nigEN27l5OTTlY+XkdKQr81EbgMRSAa2OPrUm4ljgcU3cOmMGjBUdaBjlxz8vNN4wRjBpVJ7j\nFSLICuWHNFxjPmxk0vB5zg0biWxjIpFyc54oEK3uOKQ47HApB94AmnBV3YPApiG/xeopc84B\npxX5uOlMb5ecUgF29jRt75/CjcPSjaFyxPHpSAGHyg0pXLZzQrKzEdKFXeTjpTBDue1I3TJ7\nUjDbxSBR3PWgSEaQs3yjAp24bMZyaFAVuefSmY54plIkT7tKzDbgdaTy9pwDTHUq3pQMVMbe\nRz60Z285zQD8vPOaVR7cUhDsZjG38ajydvTnPalhk35wfwp6jj1pAJtAII5psjhWBAxzT2Pc\nDFMH3STz9aGMcX6sBmljbavC0g7Y6elLu6gDFIBQ24YApwHy4xTN23a3Tmpd3p39aAGNkcAc\nUnOD2p23acHmkZdzccCgAC5IPbvT967tqjAqPnPFSsAseMZPrQAz8KdxnBOR6UjN8uBz60o2\nhgB6UtQGjuM4FP3BuKaP9rpTlQMuR2pDsLjcoDDpUbLtkyF68E0/73JpV5HHSgQi5XPHHrQy\nlfmAypqRuFxjg0hyYwOnNUAiA+X+NLkKvPNKFK9entRn5ScUAI21VPFKnzIV7UjYZRinbT2P\nNIBWIAA4HrTNqq3Jz3xS887hzSMPmBxxQPoPZtxz1NJtPY0NHnBI6CnKOAVH1pCGKNrbifwp\nS3zcjAoB2sSeaXAbnPJoGL9/tgVJ91Qe9N2N36UjLu+bPFSMXcOcj8aI8cEDNKuTHmmsOBtG\nPWmIeY+rY5ojCjqOfWnLnYBnmmtk4HYdaCiRo9xx1FIwx2xikVmXntTs7+vSgmwnIXP8Jp2N\n3JPNKuVGOq0KNy5PrQMCA3IXinc98EUu/gqBxTNvzDBoGSLhnyOKCxPbjNCrtzzk0/JZQDwK\nCSN884FOEZCj1oK7umcULlmHPFADlAVeo3dKX7qlTyx70KOSSOaGcHAxzTGNXdtOPWpBkY45\noGIxgcg+tHuT9KYth7Y445pVbkZ6UirtjHrQ3zcZwaRQ1WG6pY8lueRTVjVeo5pOUPtQA8tw\ncdKT+Hg80qsMY6A0AFe1IBFU4APrTmYM2MbaRXO48c04AtktxTGG5t2AeKlXBBHQ0yM84xT9\nwGBj5qAH7ty571C2fTNSbvMX5aOFwTzQITYVxu4qaP5V46VGMs3Tj3p5Yx44zSAcrdwMUjN8\n24UMx/hHBpqZbtg0wFY+bz3pWOMBu1Kq7l44pNpYkZpDHR4bJ609VLLTRGqqAp+tBDdQePag\nRIPvAg8Uvl7lI70i8d+Kdu3crgEUDI1Y9xzT/mk+lN7c8mkj4XHQ0CHIp3+1Px8v9KbzuFSN\njAIHNAEZb5eODT8/KKY0g25I56U9VGBk0FDfL6YBp20jvUjZ2GhYy0eegpAMXnANG0r94808\n8NjimswbkCkA1RznFO4UgUqLhic8U8J1xQAxj82TzTsUKvqKQtsl9qYhcqM4pvLHJ6Ubf4vW\nhmGcUMZJ5nUY4o5C03hsZPFP3DpmkPoNZlVhx1pytuYDOBTdvcijA25xzQIViGbI5pNpPtR9\n00pkHpQA5lKc4zSKpbJPHpThJ8opVBc+goACQVwetPVvl44NN6ydOPWnKvBxQMVWzkd6VVJ6\n8Uz8cYp/mfu8CgQ7heCcUqgLztzTDjA4zUyttYemKBgF28daXcG4AxTcncSKF5yxIxQxD9wI\n4oztpmAzYHAp8mFUUAGMH5jk0rZ+lRbi/UU8MfwoAk/iAqU/Nxmq/mfMCKerlzwMGgBwYqxH\nWn5yoDCotobrxSMSp68UAWDjbgdqQt1waiWTOBT2PPA4oAdllYZbNSN6CoG9ad5hIBoAkI54\n5okj3JgnGaYr/NgdKUId24NmkBPFjZtPGO9OXOQD0qHcWUginByTjtTAmHynk4FKhHf1qPcW\np3mZXntQMsBwGIFCthsEZ9Kr+YGHpSqSjdelLqCLWCw6YFHcc1F5rYPvTgxVQSOaYMc2eRnA\np8cp6VXYF+c0b/nAHT1oEXVuB3604yluBxVQNtOTzQshkOaBl8MO3WpEbCnNVEbBz2p28v0N\nAFlWAAz3pQ208HFVg/c81IrfNk8CgRMGGfmODTjIqjg5NV8Bl96cuPSkPoTq3rUjHLdBxUKP\n3Io3bhVCJ1IZenHrT12jqcCq/mELjtR5meKQywWDNwadjC8darq459KezDrk4pCJUyqcHn3p\n2T1zmo1bAz2NCn0OaYEnmMTnoKlEowDmotxKmm7sLgDFAFnzR1zTxJ3BqoGHTFAz2OKQFot7\nHNBcbeahaQ8E9elE2ewoH0CVht96gZiGBqVnDckVE3zZOcCpCxGZMtz1qdZ24qBl3ZxQuduD\nVIC9HdbfvcGri3ACj1rGHX1qxDKdoHQ0hGqlwF96Vrjrg1n+b6UBj3NFhWZqQ3GfutVqKYsc\nZ5rEtyV+tWkutq4AOaQzXW4LNjtS+ZjjOazo5uhyanjmC5PekBoLJ8gJFPHzJVVbgYGSKlWY\neoxSYFuPO0mpVbOOKrKx654qTzNilugpjJsbW4FKrbj05qFZieetPDGkBKcD60xmHORTFb5s\nmlMg71XM0KwokDJiRdw96yNe8J6T4ih2yQ7JRyrIBy3vWmxPIzUax9CcVtCdjKa5j5t+LH7J\nunatC17p8H2Oby2y1koAkYk9V6Adcn6V8uePPgf4g8FzSSC1aaxjx+8jy+PlzjHXpX6ex3BT\naDyOlZt54ZsryORbaCOJ2yeVGDnqK74VEzklGx+R+478PGyOOoYYx35qTa0kYZTgNyD7V9w/\nGb9muy12O6ubSxj07UHQMhiXC7h1OAOTivlTxh8KvEHhNWlu7RntVPM0ZyB7kfw549q05jK3\nc4tVaNQp5x6VLGo8ohhtpr5ZASAMcdf5f57U0yZOTyfSrTIsPUlcg4OOlCyFj5mcgDBWpdyE\nq5GB7UbRyduBnjFMkiG1kBIz3NJ8u7d/B2FPydzDaQOtN3df0FV0CwhU7Sd2T6VF90cD5j3q\nRc7yW4GOtHOMKc5oFqIueAD83rUm36M1IrMrBCBnHWmldqk5+bNMQ5ZNu4dMDmmrJuUZ456V\nGxOcsfwpT/DkjB/OgQBt2W2/LmlWbaoH8WelOwOzY5pv3ZPMPU8YoGSDGd5HWkxtKtnHNL5Z\nAPvzzTZg4VaYD2YGNhyQDmiNk3ZOduKjDvj045p6qWiO4jPYUyhZGMce1Buz3qRmLKqt0x1p\nsXChQPmpvmFsoTikIU/MARwM4ojUeYQTn0NRg+XIGbpjBFSZ2j5VyetIQQzFflA74pysY2Zj\n1o7gqMH7xpWYTHAHLHNUhibcMpB4IyaFY+bzSbd33jjB6DrSt13BsimSLGwViKRvmXGdp6j3\npAQcDGQepqSKPcxPAA6UWFcaFaSMZTaQaVmG4k8LSbnzhu3WnbULHjIxQMWP5o2XduoZdoxw\nOO9IqiJW3jIPYUyTLtGoHGaaHYejIrZxz705dskx3jjqKiZQqkng7sc0rMdv+FICyx+UbCAK\nf5hXsTxUDbtokUZAGDSecd3Gc9h60CuSY4z0f0pm3JIAx7etLvVZd2Sz45HpTVYNlzwRRYaF\n3sCTgrx+FIoMi4X6mmxsZmxn5T2qXmJCFwVPBFVYBHkWTCq+GpWR2AA+XHWhIEjQAdRzT2bc\nwOD81BAkaqAQ43Empfl2EE7T6U1sQsQeMdKQMJACyZPrQIVmRY8g5PpSxt/e6UpMUgyqYPQ+\nlNZd5AIximMfublBRt3qSrYC0sbNLF0xioo48Zzk81QEtvho2zy3XNLH3+X5sUiKRg9T6U1m\n3H5eBmkxEsbfLuYdKfmRvmT9ahVTu9QakUv8vOF7igpFiKQt8xGDT1+VgwPyNwfaol279wBN\nTRYkYjOAOeelIOpLDGNp/iCnipCo29KihCs3DcdzUyqo680hiSRjKr+VEg2nIGCOtOK7dpHb\nmlXAJbG4k9KBEe3GXxkt2pJMiQH+H0qRSVyeozUcieZk559aYw2vvP50okXBOeTTf3iEAHAx\nT8hlBI5HagViKThlXGc8mpnwFVl57Ypu5WU/3qcmcAgcdKYxIwGU9iKSMt84de3WpF/d5DD5\nutG7zPmUc0gGbt0aj8zTwu1GYdKIyVPCAn19KGyzfKNzd/SqF1IuZGB/SpUjOBtwrL+tHryF\nNDfdAznNAhy8qwdqMbVO0jcB2pEUrIRnC0jY3n+EUCGZCyKeoxyRUTMVZvlyDTnIVOuVz2qK\nZm4ZenpTAFccx8kH1pnlyMSCB8vpRGdzHf0qQKegbb3pARxoAu8c+tKxLYK4+lCgrkD7p7UE\n7cfwknFBI75m2ljtFLGSrMoPy4qLyT8wJzTyMcA4OKYCxsMHdjI7UqttUcZyabGhwHAyelSw\ng/Nv6DoKAHDK7h949dtTRlJcDvTQRtEgHzdKmWMoRkgqR0pjJ4Y15bZlTxTkjQfKpINP3bVW\nMLwOlAYxtgDPc0wsOKnzB0+UcU9iJDg8HHamru8wkpt74qXaNoLdSaAuIqfLleKAFVTjGT39\nKfuG44FESsN+UAx92mhCrLt2jGQO/rTyCqnHJPOKYMkKcck81Iqhc5+amSIQdhPC+1SxqJIw\npzTIyD8rDAqz92MtjA6A00PoJH8yhAcbetOlnEeEAx70iYVgcZz1p0ijGCO/FUA+Ftr7/bnF\nWI5EkJA+lQKdjfdIyMYqZIxuyODnNMktW0ZaXGzIxgVfgbcozxt4xVGMsZA6nbV5fnbPFXEk\nlH3/AEFWI1KsD04qDaSuTzipgxbAzjiuhIROFZlwFxUgj2p9e9NhZl6njFSxn92c9a0RAjRc\nALJn2pVKxyYByO9IhJYcY55p+3y43785qiRZPlz6Go2b92Rt5pwzNDkDBHWhmGRz8uOakaIJ\nEEuD2xis+6X7OxU4LMPStXhhx0NZ90m6TeBuC8UFGaQRtYn6ipI2+YuTx0xTZm3ZIGOaFKrG\nSx69hUAJuG3bzknrVjC7xkZ9KqyKYo1I+Y9adHMxwDjOelIRYb58YxuBpYG27uzU3cORjDU5\nEDLg8GmAqu4VsD5fWnx4aSMElTnJx3pqMiZJGDUg8uT5+/SgRKyZZyn3R2qLdujBxgZpCzxk\nqD1o/wBYRn+E0ALHJnc3rxT1LY4XAPFRR7F3nb8uakVmQHJwpFBOpIiryo+bAoh4Y554qFQF\n53bc05d24BTuHc0xk4wq7uvtTBhiQGz3NI2yaRU6LQq7iynrnqKQCbx7UUfZT6UUgPgeKQbT\nyGwOhpUmLYkWNf8A69MkfL5VQi5+ZgKQy/vsqP3OO386+WPbJkZ0iLSbZATgj0NCsH4b5V6E\nVC2GYkL908j196ezJvbOce9AAqttKqvCj5c96cWMKrxnvgdc0L1Cb8kDIpseV/ecgZ5oESRy\nRkFuMf7XrUsse3bz++OPyqv5n/LFkyGPDY6ZqTbGFCg9Djk0DLbMFX5m2/7vWkSTy9qkbt4z\ng+lVXBcjyxlAOCaeznzF4z8uNtBNibdwMMQmeGpThQTtz33j+VQj95g43BecevtU3mLtBjjK\nhusZNAhV2tjapAxnFNDD5lAOcc08ebHJuO0BRtIHrUZfZt38Fh+dMBI2bGDnHWlVfOeQgndn\ntTeY364zwRSLITNsHyk8g0APC7mDAcDgrT9zbQcZXtTfmLZAIOfXrT/uYXdtx/DU6gLlplH8\nLUsZMaNuIYtxikTPzbTuOOtKVRQrN90jkHqKAHr8rJhMJjJzT48o7FuSeQaj5kVeyjpijztq\nkk57c0MCRS20l+Nx4NKJNpdSDuA602XeGUOwK7egplw2/YVY7hxTHYcgeaLyw2xh1pN2Wyh5\nXg0L8svyv84FJJ8ylMAEnLH0oQgW4XzNw5B4ApDGVfO7YOvFLEpcYK7mXo3rRGyhWG35s9ak\npD1xtBXDMfX+dCxLsO/JG7nmmnDoAeTUnmGQKCRtx3609kIXq52nK9hUjSeWMEBcj71V49sj\nAnggcgUlx5bbS+cryBUiHGRmQnO4j9KijXzLhI+cv+ODQ0jeWX3AHuK7D4T+Ef8AhJ9eVGRr\ng4z8vRfTPpnmqTtuaxjc9m/Z9+DMWqRi8vofNdmGEblBzwc/Svtbwr4bh0u1VUjWJD0VRj9P\nwrkPhh4Rg0PR7aCKPbtjUMcd8Dj9K9Vsrfy1BIwueBXLKTkzqUeXYuWkKxdDzVgj0pkIGeel\nSsdvaoGNU4+tRXEgWPngmpR2Iqtd5kjOOooAyri42yNxkVHFL8vA596gumKsRmo45tpwTVpA\nWZrjYv8AtVRurk7cBqS4l3E81nXEw5HetLAMmuCT1qFpjtyTmonfdntSLljwR0qzOTEkmLMM\nHFQySE96XynbOBx0p4tmyCRVkkBYOADzikWEscgVd8lI1zgE01QvSqRJA0LbQGx+NMaLj0qz\ngbsE5FDRndg8g9KoRV+WMYA4qNn2qSOtTSQhFIZse1UpbhAuFHPrUsLCyzncOOKikl6+tQyT\nFlyPxqEyHvyKi7Cw+WT5eGqrIxYYFDsOgNNVsg/wmkMRuwzmo3YbvQ05mG33qPNSUhG+U5Bx\nSM/THLGh3DZwOO9RmTrk5HaqLFaQdCPmphbHrijzAF5HNN3jGD+tSAi5ZjgYGaj3bs4bPNSZ\nXGR61GNoJ96QDW5XpThnYSRTGLCnbvl54FADNobr1pVIX5c1Ex3MSDgUZHrzQMJWy3Tj2oYb\nSGJ4xScspAPNNkYKvrSAQPwTjNAHqcCmnJHoppv3lAzQO5JIRvApm7FRM23PPIo3DywcmkIk\nOee9M2kx49TTUb/6/tT2fOAKAZGCRkEcCnedlSMYFIx55pv16U7iAFtvNK0ncdRSbgwPOMUg\nUduakoRmLEcYpuG5HUU+QbWx1pFU59aBC7iy/d5pBuwc4pf9WTnpQFAbNAkLH8ue1MXC5NLu\n3HBGDQuVz70ygPqOtIPlGTwaVsdcUiAH7xwKYAuRTmb5umai3f3Tmn7T3/KpYgfLcdPSpNyu\noJGKiDD8qep3ZGMCmPoL5nPTFM/i4GaOQ2McU9R1OaARGyluaeo7d/elz2ApVBPJFSIj3M30\np24svNK4xg54ppypDHpQFxUVt3tTtowe1Ju/ioyu7niqEKuGx8vPrTzHvJG7pTVY8nHHansv\nyhqBke4Zxmlk+ZR6CkRRz7ml2lTgnNILDWb5RgdKc2dowBikJ2kg9KbuJTrQMeTx0yaYrHPA\nxS7scinLhutK4CBlPJpoUs2eABUiqFxwKb95c9gelMkFTPJzQcKOOaN25vQU4r+7yBmkNDVx\nnilf7oB5pq8oMdaTnr1NAxV2qxxTs4WmsBtDAc0m45GeRmkMUKiDdjBz2pVYs3BxSlQXyOQa\nCuGyOKAF24UgnOagUlZOean3bug5qPA3c9aBDlUty35CkHTng07zsEZXinDGSDxQFxDhmAxg\nU89CKFXC56j1pMFulACqeD3pVfv1pqr1HekXAJxQMeDkHtSMelG3+Inmn8c8c0DG7T2o+82e\nlC7iMdPpT1X14FAhjDPc0fdHFOzngikjYK5zyKTAdjdyDkGj7i460iqMHb0pw6hscUkA5pNh\n9qaZBtwKfkMhOKibA6VQEibnA5pOQ/XPtSRyBBRvDNkHbQId6Z49qNw3dKZsG7dn60vzK56G\ngY5k55NG4hct+FDMNoHejcWFSxiRsx68A+tPVhtxu6UhXzF9BTUiCqT1oEOdd3fAogPqM0ka\n7l609W25HSkOwSN6HBND52j0pFX5v7wpzsChGMDtQMjVmVcdqn2kMO64pkUZVd3UVKOV9Mnm\ngNhCNw4prERKcmnD7vy54PNKQknWgPURHPy8ZzTtpUt3BpseQDxgVJuDNjpQMTO2Pkc1IvC5\nFNX5id1OOOtArgWPXHFBOR05pD/qz+lLGwOD7UBqKvrTucU1R2zTsbeCc0CEyfXFO6EfrSEh\nselDfKAQODQA/nd7U1Yz5hJ5FOLfKe1COu05P4UFIBGznOeKc6/LgHJpA3y8Cnw+pFBO4Ddt\nznmnDAXkc0jgN16UjEc4HFBa2FK/MOetSv8AKBxmo1+4CVye1KpLd8UCDIpdzHHFG3t0NA3K\n1MBx9ehoZSoGOaA+7ORSfMGBpDHxsdhJHejce4oX5u/FPClvmJoGNRir9COaevfOMZp3bB60\n3AxzQSP3jAHpSKxdjkYFOXGM03cN55oAVVK5bPFOyzKNvBpJFXZinKTHgDmgocFKrzQhxz2p\nc9c8g0bdvB5FIQ3O5zUix/LgUnyZBA5qUfKB700JjWUKvNCqOtD5+tKnLADrSGNb2H50uwsu\nTSuwbBpsjYUE8ChCAMCwGcGpJOFAB5qNY1XnqaeMMvvVDDbx605cg4IpyrtHXApd+7gdaQw3\n4zxxSltwx2ox69aQqQD2FADWz6U7ay4yKFjPXpS+YQCO5pEjcGnp8vU5xTAMd6WMBmNIY4tu\n56Cmk7iARTmU/d70igBvmPNAgDD04pMDdzjNLx1pvlllJoZSDZ81SZC9skUka9CTzT2UbsjI\noKGMS3OcihiW4FOSPb0pQR0xg0EjNuD8w5pApUY9afyxwcmn9GoGNVeOmTUi/KpyacucEgZp\nGHzc0gEVW69qerBPem7ioOOlJuB5xTF6jiQzY6Un3WwRmmqysuVHzZ71JuDc4waAAfMwHanl\nfmBH0oUjGOjU5fu9eaAG5xkd6cNp/Cm7vlyBmnxlS2DQMTb82RwDSsSccdKeFzwO1K2emMZo\nERhs57elO+vSho+lO424zzSGIOFPFJjbgk05u1BQ7RxmgBzbT9KZgkEipCAOKZyWGOKYkhIw\nQM55qRffpScK3tTJstjH3aBjxIAuaYsm44zijyQF44FN27X3GkBPHhV55NSxjrmoFccVIpIO\netL1BkudvX0pN2F9KiLbmwTUmQy+ppiF8wrxtzmnZKvimjB5PX0pY+hpjHqR3pVkDLnPemqv\n7sk8Uix/J70gJvM+YYp6zEcHB9KgPy8HinKp5OeKYWJWbdjmhffpUY5I7UqttXnkmgCRfm5b\nIUU/aVxxx61Fv9elP35H3vloAe0hWPjmnq/yqBwajVtvfik5X5vyoEWN4JBHBpQWZuTxUayB\nuT1o3gtnPFAEwzng8CpVk6iq3mfKePyo3NwR0oKLLyll64x1p6Nke1VWPy80qykfSmIubtuc\ndKI5dyYAwfWqokBx3qRZArH0NDESyTBADjrxUiP0DVTZt0nHAFSbt3SpAteZ8uOop8Z+brgV\nUjk3DA6VYXvnn0piH+YcnFKJCfc1Hu+XOaRZPm64pBqT+aDgDg09Tu5zzVdZNowOacGDMMnB\noHcsht2OM4p/m/MTnAqurlScGl8wbMHp3pgKsgk6DFO8vI561FG235R1p4Y7gcc9KnqMEh5P\nOKd5ZKmjcA5yakSZWXbVCIWTvtx605I9qjnnPFTrIOh5FOVY2kyTipJIwQx68UYP0p5jXccc\niozuyeKCkPjc7uBT/M5qL7vtS7e+aQyysh4OeKmE3vxWduKtyeKcrHr1FAmaSycZ61NHcjHI\nrNjk9+tTK23HNKwjVhvAygZxU7MZlA3HHesmNgW96ljutvfigZsxNt4FT78dTWIL3ZxnmrUV\n0JMZ4JoGX93Q96N2R05qv5hYYxTgzBRVIW5IWIAFOzhumRUW4jmlDHnmgRMuN2SKUOUbg81C\nshbinMx/GtIyaJkrl5LkTKIpUDDG3J64rjvGHwu0vWLSZIrZGjlO4x4+XPpjua6EPxkEip47\noqQGOa64zOWUWfFPxR/ZsttOhlm0m3kEh3M0TLjB6njFfOWteGLzw7eT2V5C3nxjPTAwee9f\nq1qOhw6rC8yYEvQ8Z3CvH/Gvwn07xC8iahZJMGGPuDJ9Oe2K2UjPlufnYrllZNjDHTdTmmLR\nqFOMd69l+LP7P+q6DqIn0q3V7eXLeXEflRQDyT614zc272c3lyptPoa2TRlKPKOZh3cksMHF\nDEEjLYIHHFNz6cdzRI+6TA6YzWhmPYFdpJ3ZFMDbWGeBnrTkYsmRg+xpVkCrkjd2waQmKWVt\nwDc9qiIfBBAz6ilIThiOAaeQdvAxzTJIVwV+c/NUqtgAbMt2NJiM8sORSMx25BwO1SA5SW4Y\nYprYVuu49jTly0fJwaTIZRxj0plDo90gOW/Gn/MGyfmwOKhVQgZi3y9xT9x5C84XNMBwZmXA\nHJ5pybGB4w1MkbdtC8tjJIoGA3oW9aBD9+M7fvY60yVgyjA59cUxpCi9CDnAqR2dlAPFA0NY\nHbzyKdudkbyxyB1NIrLt2s2acULShVPy4zQJkZ/1Y+Y7vWnxuTwTjHcUMgVNgPOcmmAqON2R\nmqQiSOR423FQwPGaXjBzwetOXG1h26VH5WMnOfrTGh+0CMHPHWol+ZhgkAHJ5pWZeQego3Be\nVXjHWgRMGG456+lCszYG3FM5bkLk44NKGO4FqCR0jcHPUHFJtDfNv2gdqRf9Zx/FwM1JJEQx\nBAYUFEcZWQNu5JpYm2dRkUsfLM3ehlBVuMhTzQA/zFXEZ7mnxt5fyng9s1Eq7hluo5HNDzCR\nsFSfbFADlYF2GRuHakODGRnPrTISFYnbv7mnHHXbtB5pgB2quV4PtTtw+UA54yabGNiszfNx\nwtOjQRxbuCT0NMCf7oHTcab5jNIQDt+tNVi0Sbh8+KDH+7bPzHrmgnqKZg8ZLDkHGT3o3NgM\nBx0ojjKoFdlx1yaebcBUYMcsM47VQMSObzAccBetSrGfLDFwe4FRmFvO4GVxUm4qcEY4piEW\nYLnPIp6puXGdo61Ey/JlQSM1JHIWXI5HvQMfuEmQvDDrUartDAnJYU/jcWzTIwJOQDjPWgB5\nVlQKB0Gc96VWKvkng0vmFQ2eRjtRGoAUnluuDQBMr7ep7UkbZ5Bw1EZ3OcDjrzUpBdfkABpA\nNUhmG3IbvVjzBu68dKrKxVuvIqXIZfu80rFFpF6b2wewodtrYU496rfNJgsc1L5m0YH0zRYQ\n8gLHtVs5NOVdwbPCdD9aZuwFyvP96pdwK4PGf4aLBYjMJXaxbJHTFDqVXcepPWlY44x81PDM\nygAde9FgZEFBfg4BqRW/duq9V5oaPJGenammMqx4zu609QRLuLwgsvzEdaPM3KAeDjFRhjjg\n9KXYSA9MQq4ikCFevU0fdYhc5Gak+Zo92NzDk0yb/ZPLjOaBiLllXf1znNP5LcLx60rMUjVc\nDFRSzMqbRQJgzBtwPAB61FPJlhzkYpPM/eYPQj9aY0gkZgnAHX60CI/MEakdc0rE9e1R7STl\niM+lNmY7fQ+lPoA8L5asfvMacqmbHUFaRX3IvGTjilTIYFetBIHcFKsNrZpJn3Kh2736YpCz\n+YWLU5gY1fce+BinYY7lUO7rS7vm4HzYpjOflAB6VNHndkrzilYBsbSK2R90ipF2vJk+n608\nqCwA6Y7UsMYRvmB65BpAPgjZiA2F74q15aFQeuKgYNx83406EllJJ59BVIVu5aaQDCjg+tAY\ntjHT1qOPLKd3H1pF3MSP4e1MC0Sok3Fsv6Zpd25t6nrwRTEhTzN5HzAU/blsthV600IfGGZi\nM5qaMr5jKV5xxTIceblTketTrtkkOOGFUIiKHgZAY1OsexwByMc03yweepFSx853JgmgQ5I1\nbvk5pfL/ANI2t/q8dKWCPazc8UjN5cZZic5poYfd+UDGT1p7Ydip/hHB70jSMqrhd2e/pT/L\nPmDK4OOaYySIDdgff96mkUFiAe3JqBc+dnFS7ct8w+9TuySaHEbL71ZRnXPHFUVxuGOCO1WE\nmZGyTgHtVxFY1LeQsoz0qUYdmA61lC4K4VT1rQhHygg/N3reJDLdvuEZc5IFThhL82CBVeOQ\npGQaeoeRAedtboT2Jtp8sEEZJpJGO3AOT6UowOnUilAC89TimZi/NtUgYQ8fjSSuABtHPQ0q\nyF1KtwF5HrSR5nwCtFihnLA5475qCf5lBTgd6s7euemcVFcxhV5bC9hUtBcyZl5xuHPWq5VV\nxzz3qxIqqx9c5qvIFaTBOBUBcVmAQhiSPSkZwrq23gDimPMMcDinRr5ihmGBSAlWQsct8tP3\nDdkdai27Qcc5qSHbJwOMetMB8b+dG3y9DTlZshT0qEM2Djhc1M37tQ3XNADmYbuTkipIwI0J\n3Es3aoYex65qRZA3Cg9cHNAthx+Vdm3r3pVjMibCQTTljzxn5uppqr1IOG6UAHk/LtPUUnzK\nBjK89Kfy2M9QKGJypPJxQA9FBYk9hmmKwLcNgnkUjHKk/nSRxrGoYDOentTQD/3n96il59KK\nYHwJjcpIfzFYZGOtIFClSp9tvakLbo1ZF2jPNO+9HtQHGc18ke2PLbckDc6jkUyPlSTyGyeR\nQ3+sdhnYByvr70uXkdQWG0r8uKBCbi0a/KNwPB71J5co/eJyueVqEKqKFY5OelPSJkP7slie\ndmaAJDIVCsy53DOf6U1hubK4HopoUdV3ZIH3feljXzJFLLtOOc0AOWQydWJPQ0FcMGPyR9Np\nPek2iSQc/KcgY9aR7dGkXO7ntmgVx6tujXPPfipDlP3mMg/w1HtAYBBgKMGhZHhHK7gTQBOs\nZdQyvkDllzQrjptLFRkZ5xUP7vzDzjd/DT/M3EBOO4B70ASSEsoc8luaZDGVbfL07e9EbK2d\nxPHrSR/vhiN9wz0agY+RvKAPOM5NPYMpLA7kYZXHWlEf7vcvY4Ipqq7YLYBPagQR4DY5TI5p\n25FwNnmFjjdTUWRVbLAtnGPSljk8vcD8x9aBB5gRjhsLnBpY5OrP9zOBTVxNiPbhuTTZGLbV\nK7cHk1LAnbcpLFsjtUYmA+bYWOKCXdmHVAOtI29cBDnAz7mqGSRncNx+73NICNvByxqJ3G3K\ncjutStGEVQoyp60rhaw/eWwQ2CBzihLg7MgZPrTGAjwoHU9adJmN/lGVXqKQDlYcNjmhgm5Q\nBj/apofEW7OAzUcDKhcgHOaYMfhd5B4/rSOzrHswD3p33WX+JT1B61XMjYbjaik0gW5Np9q1\n9dIN2xWHXP8AF6fzr7E/Zz+GMei6bbzIp8yZt57kA+tfM3wt8L/8JBq0MYYoyYYgjOeeAPev\n0K+F/h0afp8USp5Um0Aj09qznLodVNdT0rQtJS1twByvTNbyR/KMnjNR2Nr5duo7DjiraqNv\nIxWBsIoXdgU7734Uox2H40KrH6UhB93kVVuf9WzdMCrdVLtSysOxoGcxeNsy56Vnvcbf4q27\n+0EkZGea5u4jaJju7VrEQ+W5LNxxVKST5j69qfzJ0p0dn1Zjn0rRktldVaXtU0NqVVjnrVmO\nMKnvTsDYcnFNC6Ee0FRkYpu089hQ0uFwetQyXBVT6VRA+Rh5Y45qvJKisSTg0kk+QOwxVKaU\nZJPNVcVieW8RVyFqo+oNu+ViDVeSQN04FQswqRkk07Tbizc1BuYYXsaNvyHuKjEnzc80hjnk\nC8VH1PHNKzDBOOahkfaud2DQIDJ8xxxUbEdutJu3SHH40N1wPxpAEjcjjFRbwzcdKJHHOfwq\nPdtxxgGgYrbucdKj3bScipC23I/WoTls5PFBQ/AK8c+9Rs3PUZpokz04FIcdcZpAOzheOaaW\nG0+tM+70z70oZdpJpAJIx4K8imNIeaCw6A8035mbk4oARtwU9qYshb5eppzSbuvSmKvzc8Gk\nMfnacE4NRt96h1DjihVx1PNACNI3TFMbKjFOZux4pCcr70CI2+9z0pynaM9R2poIXr1pVI9M\nCiwwPzZNH8PJxSs46c4pjDc3WkApbK4NITlcZ5prZ6dvWnfdkGDkYoDUa5BXAWjdtUc0M4Xr\nTF/eYIoGSFvmHv1o8wK3HNDfNjK9KVvlxwMGglvqKW3JyOaQEcD0qNs7s9BS7TtOPzoLFLbp\nM44pTk9TimRklMZ6UL8zc80rgP2/KecmmbQPvdKd/FQy7+M0CG4HBXpQeWBJx9akVMR8VHgZ\n+YfSgB7rtHHJpVb5cVErEkg5FPZQqg5z3pgAJ5yaXcVAxyKXbuUnI5ppX5Mc5oDYXc3pR5km\n3AFMVT3NPVdvIPPvUsNxsz7cAqab5yntTpG3ZHPNQsu1eKAsWScYFBbv/OoYPlAd/m+tSNjP\nt1oGSFtvHY0SMGUAHpTVIbmgnbzt5oELuYLg9KRCw60p+VQxpSvmcg4NAAwzwKVQMZGMe9MO\n7ae1KijIzwKBhjv2peOcZBpy4bdjjHrQrnaSRSAbg7QD1znilZuScDFP3/KGxUZYNxjFMQjf\nMue5pfmxgHjvSsvHpTQpWTIOVoYCKNvOaVWxn1pzMvORim8q/BGKQx6rvUKTTR8rletA45xz\nTUznNADo9244HFOZTvPcGkbKjOefSmKzcmgBV3K3HWiThsmlhG1WfvTziRRkUAM2+YAewpcD\nHBpoO1iucCnLGfvZ4oAc25uAcD0ojJxik3Ecjn2pV+XmgBvmbCT+FKrrs470vylTnBJ60MwU\nBVGM96AFySvTilRSeScCmkDjDZFSKPl9qkBdwVjz2oZjnJ5GMUnysARTGbpigB+cBQTzQvyt\njGaapVue4py5znOBR0GOC7cjselKpz8vQUzccZNCt8wOaaCxIW7AU04YntShgrZI4poAzuPT\nrQFhdvHApDxjApySfMfekU84PTNJCDaQ3Lde1DZGD2pXbzJOBzikDeYtACNyuRS52gU7dtTG\nKjDbs80DJN+cYNPVdy5BxUS/dGODT/uxk5wPekMVcL0pzRnPJ4qNMGMc5NPMm7AHA70DF4Re\nKVMFeuRUbOu4DNPVlPA60Ej1kGD6UM25gegoH3cHFIvzcYoCw5VKq3OQelLD05NNGVPH3acu\nGzg5oKBVznPSkYjnHWnq20cjFIRt5AzQK4Lu2nPJp69s9aXb6elOXGMnNA7iKp6mnqoVQD60\nrc4IPB60NgLnGeaBCbguQOaTdgClXOTxgUnHPPNAIUMPSnqflx3qPkLntTmG7G1sN3oAGz6e\n9C4kUcYNDDPB/Ol+U8AUuoWFUlWPNPV/fios916UbhzTK6FgNuxgcUclvaoY2PUnA71MxGzK\nnigQsk3krkioo5jKd3QVT1C4aNjn7uOKnsUaWNT2NMC22d4PWlyWPWmNiPrzT12vgqvHekOw\nKpVSzHilCDqWINEijHHSkGcZoAcsZToc1Kq4PNRK3ygjrTy+4bhxigZIueSfu0b1wMjio0+f\nILcUpX0+7QJjlHcGn4+XpzTEXLelKXZenNBI9cjqtL5m7vimCY9D3pcDANAx65UjPIpyyHOM\nVG36Um5mXg89KQ9SzxjkAUZ/yaiUkrgjkUpYtjjFMRMuSTnpRwrFifpTAzBemaNxZOR9KLAK\nzfMOKHwy4PJo+YuAOmKkXO4gdfekBHGQopy8HJpdtO+tMoXtz92lXbxjrS/wgD8aPlXnvS9B\nCc7yOtGSQRmg8cgU7I25I+tIbE3buCcUfeYc01l3tkdKew8tQ350xC7PlOabxxgc0of5SRzm\nm4KHJPJ7UmMd5btJ1/OnNheCMmk+83NIvGc0ALtHHcGlZtvTpSLIM9KOd31oGAbd2p38OM8U\n1c7sHpTmbcMAYoAkMfyg559qF2kE9GpvG7rS7RwcUCFUFeT0o29xzmkLds0ik9BQUh6sVI5p\n+4MeTSLGWI7008ZyuDQIfjkDt3o+7nHFMGevanjL544FACH7uF/GjbtcHFOXbuGRinpGN1Ax\nh/edODSq21QMc1KML/DzTdg2ls/SgYvTaOlIQGBPQ0feXJ5NOTbtIoEOwNobjNK3zgDvTNoI\n60/d7HigQD5cZ7UfeJwM0n3hu7UhbH3TxQAv8PI5FOVyVoUlvm/nTV3ButT1EGRnHenDjrzQ\nibskjFPVMe5pjBVyKRvl7Zp2T0xQ3IyKBEEz+UCx59qTzA0fTmpmh3ckZHvSeUV5HajqAsan\nrjAp64X2pUG5aGbnpQAbV3ZXmkXg8DmhSf4RTo15yaY0LxuzQr++BRuBYr3prLubpQNkzMuO\nDSLk8jik4XtzTgdoJoAazZPNLz0HFDfNg45pd27jGKkY7nAGeaA21vm/CmrjoT81N3buvSmQ\nTh1zTVz07U3+EYFC5IJximA/cV4zxT1OVNRZ556U5CzLjgUDsSKeMH9KeuCvJxUSsVPrTt24\ndMGkBOpwMdaQkKox2qMMR0o5agRYDBlpFfbx1qPOVGOtSDaVxmmMVm8thgU/+LIPWoj6Hml3\ncelDESLjcxzSrJ8gx1pFZSvPamnBPHQ0ATq2361MjEqOxqnu6AnmnrKRnJ5oGWlOOvNP4xjb\nVZX4Gad5haTg0Eu5L93GDSg5OetQhsPyaesnJPagr0JYpNoJI5qTcGGcc1AHPWl3Ed6BEq5Y\nDtUm4tx0qsm4ZBGacj880hk24AnuaWPqcnFRtjg07cu3vmgmxKpPY81LkL94VVz83Bp/mFcb\nuc0hk4k25PbtSpL1yfwqBiCvWiP5mJ6HFFgLDSeYoGKdtPrWfMzbSwb7tS2t0ZYfm60gLDKR\n1pV4+lPVsoM80/aOM8CnYBseM8CrC5K8iohGrNleopVV42yfu0WAkwRggUnnfNjHFMaQYNVT\nME78UhF1pBwScVLDMWbrWWtx5sgBOBmp4ZgshGaEh9Dchujjk5q3HIWYDGBWLDJkcmrSXBPA\nODTYI05GPTPFRtIGbrUCXBZQDzUTyryaOoGgrFcelO8z5hzxWdHeErhuKeswY/eqhMvPKPoK\nTduY8VXSQMuKn3BcH1FXEzLEDGPBz3zT7+xi1GIkAK/XI4qBZFUDvUtvcDHPAzW6kYtHDa94\nXWUOroXPTbnAHvXy18bvgCJrhtR0uNiHfLxoOVPr9P5V9wXlqlwvmKAWBya4fxFoyX0MgXqR\n+XPNaruRa5+Z+peFb/Rbh47yKSJs5w47e3tWXxG+xjkqK+2fid8KU1nT2RUAmILLIFzg+hPY\nV8iePPC954XvIxNCUPKscdPeuuMro55U30MFZB1HBzzT1YckHNQK2cgc+9SFt0eNuB03UzMe\nBtjPO4t0FIzF1U/3TTD8sZb+JRTWKsqN93POKBWJC+5nzx6UkmNpXNCuFRuMn3qPftyBySc0\nCsSKrfdz0FSbtrA461HGT1JyvejDYJHKimMccMCCetKyldvlnB6fhUbMf7oLGnhm2gkY96YM\nezbFOw896axLdR8wpijapI5HU0NnduAyMcigTFWQiZlILD+9TucEM9N3beFHGMimeYec4OBQ\nUTCJVBYNnsKVZCqlCc1HHJujxtzTdxHXgdKaAkDbSecZHeiNdq+vPWmKoOGzupzSKPk55qhE\nvzq21eR1psjFh1zQGK4APNMXBYhchKAF7cLk1JHjnexAbgexpih1ZgORilztUAjJNSSH+rVk\n3Et0zSquyHlssabuJIJGFzigY6FcHOc00FiaGQr/AA54pRlgcnNMhdizbWwMcUyOVlBJGaEO\nxNtDEHtj8qbG2N+D97rmog/T+EZqZ9u/dxuI/CqsFhqvnCqec4NTeZsycfN0qtHF5OXY7sno\ntSjLybD8xxmkFhY2EbHA69fSnck9wGPSkCtzuHOPvUivuXn04NUIXoCGNCsGYM3IXtTW2tkg\n7iB0oZ9pGBxnFAiZSrSDkrT92GZFOOPzqNgoYYP405pFLbs+1UA7achSMjGeaN27BB6U3OAW\nYZOOMUiNna4UhKYE/wA/kkgknNOTdJhvvColmxkHj0pBMu4KoIGKCSZWKuMkBTxihFMe5c96\nau0ZfGRjr709skKRwTzQMdtIAxj3zTpedvHyf7JpgxnnnFOYmTAAwtAC7jtzjHYD196ax6sw\nIUD71PbPVD7U1squD91utAD13xvhR8uMhvWpBhmJLYyOPrUHznkNx0p8bLn5uT2oEywqjC9y\nPWnNcAISyd+tQMN3Q4OOlISyxqrLuYnt6UAiwjDaCp4609pgVwBiq33lIB2qKBldu1t2TS1K\nL24MikjNEcvmSkMOKiOV5bn0FJ5mxeAM5zTGTRTBWYMM9cVIrnaFTv1zUEbDd1BBNDShZCM4\nz0NArE4baME85pysC5bOB71TaYvHu3cg4NSBtwAY8UATL8uWfoTUgc7faq7ScENjikaQYwOm\nOlIRP5pWQEH5cc0M6zR/KduDVPzNvPbpTVkDfKPrmmBcN1uJXH40x234fOOxqu27nmmybuhP\nGM0CHSTfNwMntTUbrzjuahWRmkAxweuKljUvEdp4pgO2lpBhc96WSMyZAXgdKYuY8MDkEYom\nkb5PLP1xQSOB8sKccE4oZ2hkY7f9kChV3xnd1zQGk4DcqoyaLDGqpKOz/Ke4NORvM56r1pqs\nNjO/IboKd5ZWNAvBPQUxEiZkXnjuMVNIGDLtG3jGaWFHgxkAk/pU24+cCwwBTGNhhKgL3Izm\npdvKlsBRmpoceWGVskdBT5IY2Uvn5vbtSAj8nzFBU8Zo2LlmXjtTxG4RWXr3qVIwyBipBzwO\n9MRGynaFb71TeWWyVXjFK0Kq28HPY05oyq8EjNAhQu5d2cDFKUBUMfypRGPJ4OaVSF4bk0DC\nGQDJC4qdVO5XH3TQBwCBke1O3grj3x+NaIhg8gViVNO3PIoLHB9Kj2BVLA5PcU9Sp/eHqeKQ\niVGCp1p7TBlwy5FRbcrg/XilztIIqhlhsLs4x3xUituLE9e1QNJuYBlz707zCzDcMAdKYDkU\n5PJ9akyV+fPGOKjUtHzjOak43gdqrpoBKgG4HjJp0jB02nhqrtncccY6VKrAyZzk4qkSTxsq\nsmRjFaVsvmscGsxdqj++Ac1fgYlQcbSa2iSy2m5WOeRVtdzR4B+WoLdo36rzVqHaqkKMitiR\ndwR1AGRjrS7uPfNLGfO6cEdqbtbnPFUSOC/vAzDFEbHzGA+7mlVg52scnqKWNAku/PB4xSYE\ncm5lxjBzTLqEnbjnjpVmRiowBk1G5+XkfNSYupkTRiRuRx61lzM3mBcYjBxmt2RQFbavPesu\n4ZfK24zzxUMZX2ovTtUkbLtIJI4qLejKwUcrzT13S5B+XHSkA+NgVwfzpSrKwDHHuKbnbhOp\nzmpZJEzsOSaAHRybY9vUZpY58yOCmVA4z61BHIi/IeeeDTpAfLwp5JzQMsnEePXNPkyCNvBq\nJclQxOWJ6U+FZMsT+tMQ+TIO5T2p33cMwwxqNW8tjg5B6il8xpCdoyFoAnZsRFh1qPO7btb6\n0xZVZcbTmn7D5eF+U5zzQBGMoxy1PjcL3zTvLU1E3ysAMBu9GwibLepoqPzG/v0U7jPgpm8y\nEFV2N7U1oyeVmxkfNTo1+ztwxK9aI2VmJKbs18me0IsqzSsMkbVxnsaTcVwfuoPbmlMatg42\n89qIy6ncCCG4pDQ51GOV5bn3o3eaOpyvzACkCtncWx2ApSrR7dxA3Hk0gFZt0h27RKDzSb2m\ndi3JHpT9wMxO7jpuxTYY24zwN3HvVAPMgiTGDgDdxTV8xsuF2lucUMGaRs4EfZe9K0jFsE4T\nFIBVUr1G0n7wp0TLlh6d6OGbbnqKSRg0WxOMHFIQbjICV4PQE092MkirjnI7Uz7q7FPAH3qe\nytLtMb/Oo5/xpkibnWZy/J9af5hkXOzaB17ZpjMYUUsN0pYY9KkVt1wXbB3L09KYAsxjkDfw\n9cU2HEcUhZjubpUgR+fLO4DvSMQzbmbA74oASInaFwxYdWFG47jg7hnnFETNhyCVXpUrMkMa\nqo5xksfWgByKIlYlcHoHqLdJuPzfKDjmlLHaSxI3DG0/zpqKTHtz9f8AGkAoZpGZiNg9BTvl\nt5JN3JIwTTlJ3BMbu5PtTV+9JwHDetIAPEYKrtA6e9OkkjRRyWyPwoaOSONTwe230FSsoIGC\nAMY2mi47kaxuu0/wn1p0m5YeDh88j2oMZm4LZ29PTNSbRhix+YHGKBMbjzMRqOF+anvCXG7O\n3d0X0p6f6tiyc5zx3p0mOuCBnikBWCMuC5zt9KiEaz3CKQSCSSM4q35MgboQc8Cn6ZZtcXxE\nhAQOF+tMaPoT9nfwbHHGt6RnfIG8z29K+0PDIWFRtO7aBivnf4F6LHZ6HbKEK4UMdx9RX0Z4\ncj8tkkxkdTXJJ6nfFaHo0OPJXjAxQT82MUyyYS2oO6lZsNipLH4P4Uq5BwOlNz23c0oyOe4p\nEjtvfmq8y8mplYtkdPWopgc9KLhqZV4u1ScCsHUIUNdTcW+7JPSsW/sMqSORWkZAc9FH5chy\nMipmG0c8GneWVmGeKklh3KTjk1rcgpsSpHeo5M9zViRfLXLdPeqcjDByaokikl2tz0qlNOwB\nHUU6aYbsE59KqyP83J4qhFa4mkOcE4FNW4aRRuGKmdQ2c4qqykDpigB8mdvXmo2kXgEUDG7J\nPFRStlenGaQhZJGYcdKh8z+E/nSljtx0qJg3egLj/M65OagZSy8+tPbG0EUwsWJApD8wbCsC\nDxTc9fmpG4UimcEZ6GqKBvlySKaZA4GTxSyNuPNV260mBKx+TAOKZu46c96axOM9RR/DmkAw\nNjIPSlyQp29aYzAcDvRu2nPekIXkbSaRz2A4pGY7M9iahabDEZO2goVm29Bk0kmcZJwKJFOR\nUbZXPepbCw5mDLxzSbcdWoX5eajx0Oc80wHNhvlAI+lKzKVpBu3H0oKjHWgaQjHnOM0g9aVl\nA/KkJZhjGFpMQPGFIPXNJhuQRSrntzilLHbntTAiVjnGKeq7mPajOF5FNY/rQwFZQV68elIj\nLtI6HtSN1wOlJu2qfUVIDmw3bJpjdscGns2QCentTerHA496YBuJXGaBu/CgEHIpgbbxzQPc\nerDo3X0pFkJyMfhQI/MyT0o/1Z9qBDd3bpR06dKXOF3Y3A+tCr8uCQPSlYoduywwKRiU6DrS\n57rxSMdp65zR0AF9qXaGzng06Ngucimn94xxUgMbOOetC4xg0rL82ScUqr1z+FO5IoH50uct\nzxUZLMCQMGlUnIzTuMCo6/xU9mLLx+NM3At3pYz8pGcVIEiqNgqI4ZulSL8q9M00/vAMcGgY\n3aPXil3FRSrtxgjmh2DchelAAjKSdw60v3W/2aRWDe1Iud2CaCdR7NnpyP5U3K7s9aGYcgUu\neOBQA7zNuOOCaXPmNgcAUxlHG403cVbjoaBkjfMvB4zTto59KZjauRTQ27gUASmQBtp4FJyO\ne3rRuXpQ+O3SgQijqc5ox8uQ2D6UxX2sc9KXILY70FDnXIGaanzKQRjHSlMgyAe1ODbs8UAJ\ng7c7qbGxU5JyKXaFzTVbDZI4pE6jpcHrnNCtleaRmz9KXYSu4mhFD41PJPIp4YHkD5ajic8+\nlL5g6dBQIRh8xBFEeWyM0gO0H+KnrnaGApgIGC8Dg0bhj0NNDfvORx60eZ8/K5ApFD9vcDij\n7zc9fSjduXg4FG35uDSCw3aeeOlOjk9elAbsaBHtbORikFhd5/h4FOwWzkZxTMdeeKN+Rgcm\ngQKu49MU9gVUA9aay7cKOvWns25QT1plIX+HrS/Lt4FMX5+p6UrdPegA+bv0pzMdpyOKRmMa\ngdc01Wyev50gF4xnvSr655phYs2Ogp7KFoJDzNnPWlbaFUg803IUmjcCoBGKAHdutI3y5yKH\nbkbeBSfeNAwDfKGyKJo2kX72FNCgZwOgp7LuXHSgNSGBWhbaasKTyPWmrk84yaU5XkGgAaHe\nhxxRHEV5PNO3cZpfu/xcUrjH7hxmlZueDiotw5I5qTbmPOc0JgOLBAMHNLuCc46+lRRsO4/O\nnhhtIP50CCSQ9OtCSsxGBxSdCRmnqM9Tj6Uxki7jkd6Eb2oVt3Q4pOWWgWxIrYY54poyynPW\njdvXpg96OjA/pQMIWIZu9SEBgCeKYG7jjPWn7gxx3oGG0ZHpQWCtwCaNpH1owSu7GKAFbpxz\nRGpGT2pq8qR0Whc7cZzSGPX07GpFA21Fz5fPBpY8pwfmpgP3BV5HFSLjHIxUW48kil+YnJqR\nBLGk3ysuakRfJACrgCmoSc9qfuznPSgLDW+bnPFSRt5agdqYXVlxinFt2KAFLHHHehiQgGaR\ncls4wKecL2zmgoQtyOwpSxZsDpSYAI9KMbeBxVAL93PFSBmbtximLk57ilXcvuBTFuAZt2Dw\nan+7gVBsO7d0qVZMdRSFYdgbSaftHl5qBWJb2qZm+WgYrqTj0oUfMAOKG4OQe1NGcZ4zRcZY\njPGetG3LZpgp65ORg0EgqluvSkX5cgilBJ6GkZ/mPFSA4Nj2pjM2fak5kHTkUDO0A0wJVfzP\nan7gDjrUUZUZxzinqy9hQA4SfNjFMbO7OKUNycUf8s8559KRQ5W3YGadJ2FN+VVz0pd23HOT\nQIeregxT2bnaRmmN2A60O248frT6iB5Co4UD6U0EyNuI5oXjhRkmpGx2pMocvTnFNYnGaQZb\nj3pww6+2aAHKvy54ppJ6YpduGHpSsPTNACou3k0gbbnNGdvU8UKfmPcGgBVxnPWlOecUzlen\nFO53deKAEXnHapVAWoNvzYzUrNtT3FAxzfKucUZ3LnFNRzIeelBb5z6UgHh9oH1p6t8rHn8K\ngLByCO1TR5GTn8KADaN3NG4u/oKb5ny8jmnqQ2MHmgCVcs3I5pHJ24703zPn9PpTz/eNAxqs\nFGAOak49OTTFyzdMUBsckc0wYq5VsY5qRVLAnPFM3d8cmnKoVcUDDIAwRTev3RzTjwvNCgHn\nFIkFUgY70rDoKD8rZxxTiOMjrQMI5BznpQjFSVxyaVcbMYpducUMYN29aVGG7FIxC8URkFic\nUxWJNyt8opM44xTM/wAXel5YA9KQh3K9KCpLUuD+FMLc9aBjo1bnjFIwIyKcudpIPWnK3ygk\nZpdRDNpPOKVn4KjqKcsg8w+lInLN2FUAFunrinqDgU1e3FC7tx9KQCuwVuOlMEny470/b1z2\npuwbs0WAX04+alLBWwVo/A0ZP8S0ADA5B7UKx24xS7dwz0FKuetMQu8MuCORS+9G5d2DSrll\nJ6YpAKctgAYPWnMwXFVWErOCsm0d6nVt3P3uaYEvY80K25eDzTG9acnC8CgB6sYxQrAcmmbi\nzYpQvzYNAx6sTlqU5wDSNheetC/vM88UgHZ3dRT9vy8VErYwDzTnbavB5oAlTPpmkU9ciouw\nycVLuwnXmgCSNj60nJPXFR+Yccrg0bvl65NMTJ92VIxihQWjABIpicr1pe33qAtYlVyvBqRW\n3Kd3SoFYdPWnLlQRnmpAkaX0PFPRhtJzzUBI6dKdwRgdaYydm+UHNC/MRzgVArbeCKcrfN0o\nGWM+lKzHbzUe8DPagSBlwaRJKsg44pxbGTzio1cbcdKUNxii4+gkjhlYgcYzSW2OB0B5FRzM\nFAA6k09flUAUydi8jbe9SbmbmqyseCRU0bNls9D0pgSJMBIAMgVZ+0ZGGNUlIzzT9wb8KAJp\nEDdOBVG5jAHXmrYkytQSHfxjBqWMqLG33uvNORiuSRzVmFNuRjrUnlhl5FMLiwz8A4571bil\n+bNUvIZVJU4pUdl7cetHUDUjk75/KmtJx1yKorMyjr1pfMOQO1IC4shz7U/zM9qp7/m4PFTw\nt78U0DL0bkjOKm3FyB2qrGxU9eKsRvxx1rRGZMqnHoalVjtNQodzZzUqc55rREssRzEx4Gax\n5tPm81ty4BrXh9qs+TuXcTyK2jcxaszhdU07zF2bc7j8ymvGPid8N7TXrOUraLIyhmAUDO7B\n59z7V9H69ZAokoXb9BxXEaxo/wBoifa2B/dreJFz82PEmjv4d1ie2cGNgx3oRyv+cis5d0i7\nOFCjOM19b/Hj4YQ63p4uYolF8q5Mka/MV9/XqK+TdU0u40m58idGXqgZenFamEo9ivyzYbni\nlTpkrn0FQx7kOAPmAzQrbW35yvcUzIkCEKcj5s5xmlVyW+ZRtxSFjnd0GKizIykdutAyZQu3\nOCOc09VLH5OA3U9qYrBkC9T3FI2YztDEHNAwfG7A/OjzNwwDwOtHmEnIA9zTQwqkIn3KuR2N\nC7lY7RxUWcjB69af5+EHYetHUCQY3dcHrUe5cMAuc0wMG+YGpGYINwHJGMUxAuFXABHpRu+Z\nRjg9aaM4ByCccU1mLKecHrxRcEOjkKM7cMccClz81Nt1UyLg5YDNPjcsp43FaLjsKPU/hin7\nh0HehThc7eDzUX3t5Ixmi4EpO0hG6/3qFbK4weDxUManygScnNP5Dbt4A/u0hD23rnPPcChZ\nG8sbxgtTC5+bdyexpVZW5YElRTAcylFUjjBzml35YnPHU03zGXr0x+FKxDYAHWgRI2T84646\nUR5ZeetJ5itCGXrnbikMoxgHHY1QDlZvLbLLH9ajMjKvX5v71O2mYeXtAK/NupyqNpzjLD73\npTGOmkbyxsO/kA0knygVFHtYE5wuefrTpCWx9aBD42/dEDg55PtSqyr1bg9Kj81WXAHOeakO\n1tp24X1poLEn3o1jz3yfWlY+Wo4+UjNRjJmG7oOQafvKcdRVgJGxjYkn6USb1+ZuQe1NCnHz\nHLZ6U87iVGN205pEjthYKQMH0pUwZGZuAo6Uh+aTLEqfamsy7iCOfWgRPE4EPH3ielTLIyx5\nJw3vVdWHBZeg4xTPMLbwRhfWmBP5vByeSOoqfePL2d8daqoV2KcBk9ak3qcjdwBmmMlUEHCk\nZqPzSNwJ3VFu6Opz2p4X5hlvegCbG/DdAO1PjTDFwMGq64bODnmpGkCjOePQUCsStMDICww1\nPWbfJjoP71VN/fGfrT7eQrtwuTnp7UhWLXCydeKUuu5QOXPtUYmVpWxyKZ5zKxOdrY7VRRaM\ngVTubLUwNuUHHNUjJ5gUnrnP1p/nbWwoNIC3yyjaQApzRITyo+83INV2YbRk8Uqycg55FBJL\n5xIwi9OD9acJCzLuOMCq7MVYlWwMUoLKVJXI65pAWfNMjFSOMUknzrgHaw9Kh85t2dowaQYW\nbPduKBEoYs21RzimqxzlVx60zcA2G4APXNS52k7R16800Go6RwVwDmmfN09qjUny2AA45zTl\nk3bWPyjGOaYxRugXzANx6YojlLBmHy57UkTZzlcHqM0gXdJkkAdaCepKrYUAVIrbOBwahOVP\nAHuewpVlE2d3yj+dUOxIqldzBs56rSxqCMkkGmoob6dRUkfzSBui9OakLD1Rey5fHSrUUbNG\nCRUax/xH73TAParKrwOSFxwKQh+0dMZbFPiQjBPU9qbJIVjTjBzyRT2kDYHU0x6j1UM5XgDu\nKfGEZcKeBUO1wwwMLUgKKwUNyaAsSRt+8IHYU5ZHbLA/N0xUSRvGx/nUu5Tns+O1MnqOA+YE\n8nuBRHKZWKkdD0py7doUdeu6mqrqxcAZHemMk3KTleF6UpUBDxuNNhwyk4G4nmlxuXaOD3qg\nFTMKAh8n2qVDuYZXHH61FGpCjavfn3qRWO85XHNUId92TcFyMc0qqOfl+9SfeyMc5zTl+Vsk\n47YpAOXEbEZ3U5d0i7gNo9TUS4jzv+9mnxsznaRhe1UIdu8x/LBzx1pzSYwCenBpmwx4Jbbz\n19PapI40k3GQfj60ATbiynb82KAvy7s89xSRqF5b5U6YpVKqMKOKaELG3zHPT3p8eEfKvnPF\nJgSfLjr3pF2IpA5I4zWgFpD5fQZFWIZTGuSc/WqaqWwwbCjvU0cwY9M1cSWaqzFJFOcAir8c\n22MAdfWsi3lGzaxyP1rRimSQAk8KOK3RmWFkCsePvcGnyNu2oTgjpioI23rgnrzTmz5mQ3y1\nYmTBdvOKX5cDJwaarAx7V+9Q4zhQOfWmSSc5BV8j9aayZJLEjtT4cAnPGBTWmVuTzSGkUrj9\n2xU1nzx7m4X5RWlN8zbsZrPvYivPQe1Qxme7bn+VABnmhWLSMc45pkjfNjGDTdzdR2NSFiyu\nfNbJyfSjaucseai27Dvz89G8yMTjjp+NAWJlj5wBgdakSRRIO9V0yq8525xU6oN2RwaBEg65\nXmptxdcE8modwJITjHapV2+YMDPFMBcqpAK8dKQ/KQEGBUUihTtPDZzntUpYMMjhcUCJPMTH\nBpdzLgdQRUYUY+UDHc0M21hg8UAOwFwN2Bnr2pPk+7u3ZPJp0n3QAML1zTY403ZHXrQNDtsX\n91qKX7QPT9KKQHwUy+WrAtuOKRZPuA/dx0x0pWxHGHIyMfiDSsSZI2wArDBr5Q9saI9q71O/\nJ6+lNVjFt+XgcfX3qdYxAxQAFOp+lDbQFC/NzwtJgQqrTMNzhY+vT9Kase8EBgdrflU0jH5f\nLXay8USRou2RBhurfWkA1Y13N7d6G3SLvHzheT7U5iJ1BRuc/NUibBvw2RjtTAhUFlDYyzdB\nTtrrNGhXdjkk96kWPeyDdtxR80inDAIG/GhgN2lWaRV46Y9KcFG3e3/fIo2HBK8L6U7e3QLh\nSODTAjWMSnG0otO4bfgbdq8NUiKVj8wtuA4pvliOMkn5j/DTFYjVSww+MDoam4ZVUNuAHBxS\nbVZcOm8N09Kk2nyyccdM0CGcq2T1/nThGm7DnYnWn7VjUMfn/wBqpDGjKWYAjHA9aQECYZXK\njAB6djinqJJEAVOSM59Kmdd0aq4AA4IHejAWZhu2qF4ApCIdoVgDnpSBSpL4xzgVPj94meR7\n1KxGfnCgEZAFAEfksJAynqOcUkcPz4POOcVPwVwOGHens8e07hlccbeuaB2K7xhsPjYen/1q\nPLOxgy7j6d6kbiMZG4dQvpTl3HBI+c9CKRIvkxts2/LgZ2j1qRPLfO7hyeQBmgRqqdSvPJpz\nKdo/hJ9PSgdhq4jIwvmIeKf5eG4PU5+lOihWPcqcketTRjaeQCh7e9M0sQNbsmWjkDKetWvD\n9v5mqQhlJXfuOOSKjZcbjtwcU7R7to9Sh2v5b54P9KOpSSPtH4UN5mk2igEfuwozz04/pXue\nhTYUDd8o4NfP3wc1A3Wj2+5dpC4znJ+te5aK2wIpPU8+9cUtztjsej2My+WMHirnmD04NYln\nIqqvbIrRjk+UCpAs7uMgc1IrHioFl5zUiuTkdKQD921uKkZgV9aotNgkE1Xku3iXCnvSA0yo\nZTmqlxAGjIxgVFb3Lybcng1cXkENzTTHY5u+sRGxx9ahWI7c+ldHcQB1PArJms2iyVGfatoy\nIaMi6jDRHI+lYF1tjyu7JrqdokjZWB5rG1DR/M3OnFbJ3IZzU2d2QcU0kfU/pU9zZtDnf0qp\n69Rir6EBJleagMmWPpUkjEDPXNQN8q4qQG7tykYqOT7uGp+7jGOaZJy3tVANYjYOwqNmzyT0\npWIOR2qPhevzClqA1z0IpFbrg4pdw78CmM2M8Z+lIoVsBc1C7ZPApZG6jqPWmsrAZHIqgFbp\nmoW+br1oJ5yTzTe/vQIj3FW6HFP9ewpWbCH1NRtkLk4qWMbt+b2okXnrQzccHmkyBSAaGGCM\n8UMVwvHFO4CjimN8pIJ4oGHU5HSmt7daVRzSbhzgZNLcLDWb5cY5NIy7eR0pzLtxmoww9M0w\nF3Hn1pAdo+7Qu1SSecnilClW6ZzQIa2WwAaaMsMU9QIsk8ikKgNnOM1ICqWb5e9GGTjr9aNx\nGTil27l3UDuMZiwwOTRuZAAeaXaVpjMwYDGBQAHLHFQ3VwtvGx6t0xUzdOKZ5IaTcwyKQDbV\nX+zgOevNP5Py5xTzH6U1lBbrg0wG8px3p24sMH8abwVIH50nKqDnmgaJNo2nbTGk3YyKFyik\nU1gSKBi7iB0NGMLk8miVjxjp3ppPcgmgCT8MULg8n8Kjjztz2p/oKkB/O3nmmhgoOOtDZzTC\neuB9aBCN8vzFs+xpySHAwMimFQ3JpwyORwKTAX5hkk4HpSiTdj8qaW3KARzSr8poFYXaVyOp\npFBbOeBS8ryR+NGcqccmgYbmDYU5FN3lWA70rZVRjgmkK570DJQwXAxyaazFWpOOATzQp656\n1ICFST6U9PvYxk03eM9c0bh1xVAO4GWI+lJuOcd6b/FxyKXcF56k0AO24OWGaVscUbzj60gy\ncjbQAqsZAcdqNpXnjNMXjtgU7zF6YoAdxtyetNydpNOU5XkUxshvagA525pVxtyfvUbh0NJ0\nwevsKAGtndgDNSBjwO9MZhuBB+tCuN2cUASnI60H36U3ll3ZpqybjipAdIp5UfWmyMVUdxTc\n/MctzRM25RjtQIWJh0ant8rAEZ71CF389RSTPHGhZuo4FAyZpBnpTlJVevFMhfdEGZcDFKfv\nANwMZoAeVO7rnNL/ABHPAqMMeOMGlZjzR0GCt6jilVu1HmALxwaIcsM4pBcei7RyaH+VSRzS\nMu7Jxik6L654oGDsGjwKI12kE8UnC4GPzqTnBJGaAEKhuQetKN3FNxt5A47UBmX8aAJNuOc0\nNIA/A+WiORSxyMmkP7s4wcdaAHO27nnFJxwc01GJ5PShmGaAF3e1SNll+7TeGOP4qd5ZTvmg\nLDQoYdMGmkhTyc08klTk0xmXaB0oEKvKjvT87cZHFN5K+lLt+XBbNAhI8DOaU53ZXikZdvJH\n0p0eWU8UFD1zt5OBTh83CjNRRsZABnFOZduQrUwH8suNoFIyjsaRWPpSk/KOMUhjjtjX0FCy\nBh8tN4brzQqBjgdO9AyRs4x1zSO3AU8UsjBVGKTac5OGqSRdwLc9KGkwDtXNLG6vzR7VQbEd\nur5JfpVhW+UYpoJVcYqTcAAAKBjd52nHWhd24E0A/N/jT1b5iO1T1EOaRVPSkj6l8Ui4kbHc\nUhJL5Bwo7VQyXcTmk3bslhmgrznOKah6jPWiwC/dXbSGTC4AwaFXb15pHyrA4DChgPVixweR\nT8AE4NM8zbSyH5eBUgSKv8Wcilx33YFRq3QDgVLldozQPoG4Y4pVGM89aafl4B4pVzw2c0Ah\n23tT9vy9aQtnpwad25oEG47cUu7avTmkj+YdOfWkO4deh707DFZ9yqcYGaXdlsHpUM8bNgI3\nQ5qQfMOuDTGSRqVJ557UqyZXOOT1psb7hkckU9GychePSgBUJ69frSbmbnpSDJk56elLu+Yg\nDAqQHxrsySeKckny5xUYJ2kHihXGA2DQBYyWGelCx7TknIpN25eOnvTcn0xTsA/fubgcVI02\n5cBsVCOOPWniI8HcBRYB24ryBkd6buO3pUmf3fB59KYzE80gFXcuMdTTu4B60m4qAe9PDAgH\nqxoASNVjJ4znrUgxs44qNW2OQehpwwMgjHpS6gPVe4601V8tjnvTo6fx/EKYEXlls+lSKo2g\ndx60u7aucUjEOvHFACbvm+lKzH04pVXaOTSfxDJyKbAF+XvinJNuBGOaY21mwKXhW96OoD+V\nxz0oRvmzjilVgxz2oPXrikA5lbdkHilEh24Api5ZuKcPl59OaAGtyD61NGRs4FUoGaRnkboT\nwKsq3y/KOaAJV/CkVtoO4cULw3Tml2nkHpS6lCFRx6UjRliTQvC4NDNt6UAOjXaPU0E56LzT\nVfLU9iFPA4PWgV7DV9APrT/vdOBTFHTB4pzN8wBOBQA4Lwe4qFXO8gVOuV47VBJGY8kd6Yyw\nny8ZzU3mDac1Whxjk9KlVt3bg0D3HRk7SetCEqORmkTAkK54p+7t1pdSRQOeTRksTkYxTS2G\nHPFKzAMSO9MB+dxx2pf4qjYndwOKft9elAmOeQbfakOW5XpSMfkx3pVUL34oKHIx4zTmbIOO\nKSNhuPHNJvG40gHAblz1FLg7eBgVBhlPDZFTBjwtMVhkyttUj8afDuZfmH/1qGY8kdKVJeAc\nYNIY4ptTg0DbtzSrlyWPXvTCoyQBSBDoyWTg4FPX93njikjXEfFLy2fUCgNGIu1l9KNvynHN\nNVgy4x9akBU8YwKEFgKnyxg4pcBV680KoIx2o4XkjNMQise4o3Ads0eYrfMpz7U5uPmHSkGo\nhYN14pze3WmNT1ztycCqATBbApzKRwOlC/K3I5prEs3tQAq0qq3TPFAIHbJoXJ6jFIBGTPQd\nKWGMrwRTw3zeopyjqaYBtpvKrwc0oLYz1zS7Sp4FA9hA3y+9KFZcHNMWNt249Ked3Skxhyxy\nKd3x0pGUqopcjqOaYDlNLt3NgU1RgZNHJyQaAEbDMc/hTh82BTNvekGevSlYklYY9qcq/KCD\nimxszDpmnY3UCH9qXaVUnNM3Hdt7U6jqMUOKXzRuzUbcUuFZfegRL52VpwkCLmoVXC+9KpLZ\nHTHrTAm3FuTmneZsHTrUO8dD1p5+ZM4pDJSxK80K3PBqFdzKMmnIfbBpDSJkztyetO875qi5\nGSTxSLj60WER+c0lxwOKtx/LnOKgjUcnoalDKcA0CLCtlsdqk3bsbScVAqkqeaduIXA4pgT7\njuoRtre1QqT1zT1bKnFMLErSkr2pisWPPWk4PsadjaM55pWFsPVtrDJqXIPQ1UDdT1qdWxig\nZNGx705WDZGMCoVk61KAOBnNSBJ5a7c01oCVYg4OOM0Zw2ScClSTLdeKoCEZ2jgg4qWFiq8t\nipvJ8znOKU2u7rVCY6K4G0c81Zjm3YHas5l8vIIxT4JmziqRBswY3A5yKuKB6Vn2pG0ECr6y\nBm46VsQyRf3fTmre4bR6YqpGw3j0qSRx0rSJnIvzRreWWwruIGa4zVrT7MTkYHWuzt5gqD6V\nR1a1jmhaTGR3zW6Oc8u8Q6TDe2pYrkqMn3HpXzn8UvhXb3JN0kQG1snaMY9K+rNSsQ0bLjiv\nO/F2kmfT7lQMkjaAR19qtCPgm90S6s7u480BFViB/KsmZBHJjj6V6d8TtJuLLWpGCMkRBycc\nD1H6V5fITIxZFG3607kuOlxGJOUzn+VLgyY2nHGDUXmEttHy7eaU5VgoG3cMlqohD2XZkD7/\nAEzSs2+NPmycc0ke5GG4Y9/Wo/m6Ej5Tnigodg7tuMDpTzlVJxnFN8wY+c4LGjzNrFP4cc1S\nJ6jtpZc5ycc4obK7VHIxUe4KvXnpS8Rgc4zUhYlVvkKsuD2NHmMyqMgcc1HIxZsP6cUu3aCN\nuTjNMLC7sBlHp0qS1UwZJ+cEYx6UzdhAxA56E9qVG2tkc9znpSCwiodx5w/alVjGOBn+tRhj\nuPHPWkZwmAoIJ5oGTrMFQlgaXzA21tu0be9Rsu5VJpZsbQn8PY0xMWMny93TnpR0zu61HGqo\npy2e9LuU4IB6UCHr8p9aerbo+ODUeerdT6UqEFA3KmmBK0gZgR93GCKQE+dhW2mmqwjbGM96\nVgJCGH3+2aZNhxY7zlCP6e9LtGRzx/OjcZMNkbhwRTGd1O4jPONtUh2JmJZiI/TmkjbczZ4C\nnBpFyxznb9KSRvmOW6npQMXavJJxnt6+lOPQAnJ9ahDExt8v3TgZ609HRsDOKBCqy5O3Iz1x\nTlznvt9KiVv3npU8ismPnHIpiE58vOe9OZSVGRTF+Xqck9qfIzySAlflUYxQAsZyORtAOd1C\nyHdxwvX60g+9hTx6HtTicZx1700IeJN0gJO2M0gYt5hC8UjS+aoH8INLubIwv3jSuFh44RcH\nPFKvzNjGAe9NVl8zBOKkaN1XIwVJplWGqmO+4DtQ24ybscY+7QcbmkRe2MUSMflIXt1poVhy\nsFjOOPal3Fdu/DL+tQbjghjnJpzI2OvymmIkEilSFOMmnl1Vhj5jUMKhQc9elCzAYL/Kc1QF\nhZdrHI+maRZP3hY/dx+tQq/mLwc96bMzj5QMd8VLAtMGZSwPOajaQMc9D3piSKu4sCQeqiot\nx2/KPlJ6UATszybdhxinwyA8FsHvUW09GGD2ApeI5N5XigRPu/d4U555p8bq0LDoQePU1CJ1\n5K4UkflSbt21h170CJmb5eRx3pyXC8DsahWQMxVlyPWmNOGU+WBtH60XAtyN82FGRjOabIuV\nVg2R61TadjtQDCnqKm84ZUg57UxMnWNfvFs5qOORgzHnC06OQMrM4A96Z8y4z160xk0bGRJA\nvCqcHjvSrtfAfnA4qOFpMvn7nb3pFcKOFLc8mkFiWPdtO48ZzmnKqySHd25pq7vMH92pQ37s\n8YxQiraA/wC8jHO0VNGibNxHOME461HtBC8YzzinQmTDDP4U7k2JY/vAr9wcVNhF2nque1Rw\n5Py4wKlh2puUd+aVwHLCDMCM7if0q2ki8A8f7IqATeZ26cCk3hV3KenBouKzLi9C2eOwNLHI\nG5x81VFYOoOdtKskbSHJPHegdi4su2TYWPSkzsfehB4wc1H5oLA5zzR5imQ4PFMC0jMo65NA\nb5shtpqBW25wcnvQ3z9OvpVIRZ46Bs0FmVdoORUPmAPnBxT92GOO4piJ1kC4AGP507ILHDbj\nVaN2bj9TUhYxyAKB05NMLE8bmM9OKkkbdIPQ1WV9qktyaej/AC89f5U7iJfmV8ry36U8MO47\n1VE5JYD5R2qxGwMfPWgQqyCR2B4561IWzJgDp+VQ7l3bc4zT2HzLt6etWSSmN94YDd61I2G2\n44X0qCNjErbs49aVZvlORxQUWCpkcE9aFwp9x1qAM+eOSO1Tqyyct8vHNNCsKsmV3Buh6U+N\ng2S5wp6VXTDZwP8ACnbvMwmPc0+YLE6uY1K9R7VNHnHPy1XH7vDAZp8ckjYJFXFkMt2zFWOT\nxWtHIFVcDI6msaBtzncM4q/ZyfLtB710Ihmsp+bI/KnSMG6cAVAuVXNO3EDB5PWrJJlbyznr\nkUK3YHce1QAlcEjjOadDIh5zhs5p3CxNHOVbBXOTyKJsbiBnFJIxyWWgNnGOvWmhCsoZORzV\nC5iLybccYq4zF84NQuzMd2cKBg1PQZlTx/KAwwVPWqcwznaDj1rRvBkH1rNmaT5UUZ5zUMLg\nqvJHtGSw5ptvcMuVZCDnvUkchjbcSM9KZJI0chaQZTHHtU6gTrIfmDH5asGTa6AenOKqLIzq\nG429qkjcM2C2aYiUyfvAoGDnrU0OYt3du+aqoy4Oc8cg1OsyMAytnPXIq0K5N5qzRn+Ij86a\noBCg8DrSMwTICYJ70iMdxLDKgcUxk/8ACVU/LSjapAPQ9aZGojXcOA1OXaVBzkd6BD2b5uOV\nHaomkZWBRcEn8qbgvu2nB9KI/lxnrSBE3zf36Kh8xv7lFSM+FWjLb1A3MoyR2pGjLQoDznn6\nUikxscP5gYc0ixhcFXOe69q+UPdJAxWQxE7jiiQEISGwccgVHuZlb5ec4yKc3meXwMt0IoJH\nFk2ozHav973om8xERsrtb8cUkmI1UdFx92k2jaAvTHAPapAljcBhsCljwcGkbG5sJhh6Dg0j\nFDtKlUZRgnFOVgsIUZ3Z5HrTATaFYZbnGd3p7U4xmL5mXg9hTfMG0tgqy8AU+OQxxhcnnk+1\nIdhwYTMCo+YDimtGS2d+GVegpypuYAfK/Ue4qNkzKzKdjEdKA1H4Ei7OWfqKnkJRl3YY4xwO\nlQxy9Co78mntLkbExnOTmmS7iopaFlPPPAqXzF2DO4vjBwOKhDNIMgkHPapImPmY6ZOM0BYe\nI0YhlBIx0NKuEk3Z6DnPSlZSvTjB+92o3qSwJ+U9TSARVTyy7Kzg+9OZVVVAXBPQmgSGRzGc\nKuKdKyIq7sMcYGKBCrG3nKWIG0Zx60RssnLclegxSQkMjZOH9TSrlmU52j07mmFiaOQrllXn\nvmkSNnLyR8jGTTJH3qVyQc4NOVVhIWNiFHX3pBYerlSu5F3HqaFfLE7dseeDTpG3Ebht9Gpq\nyCNQOrZ5oETg7Z1KOpHdaGkTcwHAzyPSj90yhQMA8g0gt925VHQ5yKCh6MuTv+XPSnKwTn73\notMlVflz8x6gdqdMgLAxna+OeKC0Ok3Lg7cg9cf4Vi3l21vMGjJzu+6BzitYRs+TvbCjrWY6\n28qtIWwwJ5PWmPqfTP7PPjSCS1+wvIxZcYyPlXjoTX1BpGqJhM5xX55/DjxCnh3V42VmaJjl\nlU/ma+xPAvi+31KCMrKGDLkMDwa45prU7IPoe+WF8skSmtq1n3c7q4bQL3dbrk557V0sMhXr\nxWW5rY3luBtwfzqSGXzCQG4rH88bcE5NXdPXcc7se1AvUteS75A5FM/s6WR2A5BHFa9vD8vA\n/GrlrCsZJxmgkyY9PKoq7cEDk1P5JC4JrWSPevTihrQP0FAzKSHHDdKgkhDMcda2ms+MVRur\nVlb5R1pokwbqxOWIGKy5YW2sPzrqZoWXHGfrWfdWm4EgfWtkyWcfqVkskbALzjNcneQtDJgK\ncHrXpk1iGydvbrWNfaKJskjHHFaqRnY4KRSPfiolbpnn0rZ1LSZISwQZ9awCWhYqQT71VxWH\neYdxzxUBYg+op0r9OKYzDJx096oLCcMfQUyRh/Cfzp7MCMVDIobgUDCRuAOopi7gppGb5SKa\nrEqDSARvlUrmkGBjJpWYZ4FRsw3YIzSEKxXJPWot/Wg/exTNwVuooGIxJGO1M3djT2+ZsZ+X\n1qNWUg89+KTAFxuJNKufWkYhR0pAwwTnmmMGznFNxvbHpTlO7BpHC7qVxis3zcUg+bnBFHy4\n96Nx+6OKQw8xWyueajXCkjINLkbue1DBVyR1qmQN3D0xS79uRnJpmD1IyKX+LdjioGHbk0jS\nDdxSS45wKQDdg4x60BYRn6U7ew6dKMK30pVwRgZpdRjmY7QSaZJu4z/9ahh69Pej6NmhiEOK\nXHyjGaiV18zZnmnAswzu4oGS8dc1CzCTIxQXyfUUgOGGOaQ/MX+EDGKby2cnvT2zt5OKZgqN\nxHWgQoDfhTd3UdKd1bAbIoVRkHGRQGow+tO3fLjsaTcNx44o3e3FACFjnC9KdzmhCNxA5HqK\nTBX60guSbie1H3eTTVYqMGkxnqeaA1EYjOacTuUYPFMfCqMmnbgE9u1AhCxJwenrQvzcd6Pv\nAc09QduMYNBQzzuq80Yxyp5o5GWoUlWywyMUAPMgbHrSKuAe9NX5hnGPanOCvOcDHQUAJy3o\nKQsVbjn1oVvlxinqucAUgEZRjcKUfdpnKtjqBRkbsnpTAf8AKcmk3bsZpGUNyOlDdAfwoAar\nDd1wKk3HPtUbAL2p0eQpGcmkAdO9HDDpQo6DFLtKt14oAcuduRQ0nTjmhc49vWowwbO7t3pA\nOI/vce1K3y8jimNjCsDk+9I0hZsdaYxRhgcDFLu2gg+lNjbc+MYpxUbetIQ6NuN2OKbt9DTR\nlSMD5RT2bjIHNIBh+VuOSakZg2F/OopC3BUc96dy3saBjlIjyAOKgugsjJ2Gc092K8dqGXOM\n8+lAyX73AGBTtp2gk5NRxyNnmpOT3wKRIM/zDikLHlsU1TuJz2p3zDIP3aBiIdx5xilZvKU4\nPNIuF5PrSZD5wKoCRWPlgj8acxC7fWoQxU+1SbdyYpAPZTJnn8qdG25go64pkO7ac03tleT3\noGTH5VPGKQNxzzTfvN6UnKseOKAH79vbrRuaRcUqtux2NJt2gnOaAF2fLweaaeOoyKVW3DI/\nI0ik9COaAH4+XIFKv3evNIT601cdc81IxSxXGSKRWHI6nqKax7sM0qYwWwaYdB7MxXO3ikSQ\nSEmkZy0fIIqNFG4lRhTTJsTO+TyeOlKrkdTj6VAvytk8+1TLg8ng9qBj419s5pRHtc570qny\n1DUZ3NnPWgB4b5enNBbcmDimYIbmneWScDpRcYR4XORmiPuMYpgyvbNPzuYE8UASYHcZpFYH\n60AnODzR0U5XH0oABgdTUh/DFMGFwCMnrT2UFc9DQN7DtpznpS7htORzTRknmgg7qXUkXduA\nyccU3JPY07d0OORQZM54zRYYh+UjB5IqSNSAc/hTB94HHan7dx+9T1AOdmCcmhiOB3pZF3Ll\neo60zaODSAevzMQacVB+Ujmo2z5gPQU4yNyQM5pAOXaACPpStu29aYrA43LinSZ3A9qYDjjb\ninYGBTUGVz1qTcMY4PtSAVRySelNVsKTg4pVy2f5U5uFAxQUIG6EnrUi9TmogA31p+dhx1FB\nJOX2rwMimbiflxxTllC9emKbgg5z1600MFG1s0igbsNwKFO7PFCg5+YZphqOKqpO080+PJTj\nrUDKfMzxzUsZ8tiBQA5s7venhSBuNR5zk5waduPBz9akY1sswOeM04nDcdKT7zZA4pY+5YZo\nAcZN2O1PkPzf7NRIwbrTmO1ck1QBEwVjuNSrLhumar+SVXnv3qXb8vFSBKOhYdakjYbfmGKi\nj+ZfQ09ssuKAHYBUmmqp455pOVG0daVuOvBpAOA5weaccMcY4pqHjOOaUsCwINADxJ82AKd8\nzd+KauFcE9KGYLuPemMdJJ2FIrHaMjFRxs23cRTipXrzQBIzbvlWhcjnHWmLwvIwaft6EHig\nQjNtAYjFL9/GeKap8z7xzUgxtwaAQfdfFOYqxyTUa9cnmh8FhgUATKxToOKN2c5HFM3FcHP4\nUrSYYcHmgAVMHPb0qRWCjPemA7hkDipMbsZFA7DlY7h6U0PubBNKwPXPFM2/NmkMf/FxS8N2\npm4jpTgTkjFAMVfkXnGaevK8niox3Lc0r4CZFBIrqO3FLtXr1qLzCwGRT4XBBGOlMZPncoAF\nNb5garz75CNrFRntVhCW49upoKEWPaNxNSrllB6Co1xnpkVJwR1wKVxBjnpzTk+X8aa3zEY4\npec8igOo7+LkUm4B/mH0oGV/pT9p69TTGIW3DilB3LgnFLj5RSEetAAp9eTSsCo9aaB+8zni\npGPPFAgxuPHWlbG3AGPemNnHHFOzx1oATHzVIOx6Gho+Rg0rfLjIoHcOdwNDdu1HDc0bcnPa\nkL1HSZX15pAp/GnZJwG60H72egpWGKqnsaVTtzmmZzzSh88dqbEP4zkCkVcNzQ3y8CgKSue9\nIY/+LngUgxjk80jMWIokU+ZkdKBWGxxbW4HFTZ5ximq2O+Kdng4oAXA9KE/1hJ6UHlRQOmR0\nqhBtK985OBSFimfypvNOdAy5zzQUAYEggYzSeYVySMjpUM8rIYkA47mpwo60gFGVI96e3txT\nuCPWmFTkc0hEg4UdzQvHWjb3Hze1FO4DmYKoo+6QCc0m4MMYzSbSeTSC+orD34pF2qpwKXGP\noaX/AFcYPWncHoC9MnpSZwcilWRX6dPShcZ5yKQwPqBzTlA3fNSNgY9KFU9TR1JHcjpyKRfk\n6dacvy+9OPHLLxTEIpG0+tM3EY4zTgvzZHSk2kZpAJuGSSeKUEHGD1pNoZSO9KqAYNADtxVu\neMUizHzCMZNLwSSc00Z27hwaYD2+Xnv6U/zDsx3NRKSy5PWl+63NMCTdtPvTlbv0NQrJ82Wp\n3XvSsBORu6nj2pFyFzimfMFyf0o3llIBoAl34HSpRhgPWqy84yfwqbcOOMUxkzOcYHWhQ3eo\nQxbnPNSrJzyaAH528ipI8MvTmq6tljg5qbdt4bikxC8Z64pxJMZ5qNvmyOxpMkAqaLAP5246\ncVJk4Az0qMNjqelKhBBOcVIEokycYxT42Ktmo1H40bju5HNUBYZi3VakTHWqnmEnHSnxMd3t\nQTqaCt8oAqYSDIHeq0cm5c4qTJZu2apARXTE5NMgJ8wAZ5qW4xspkeFIbpVolm1bJ+7Vc9O9\nXlUdqy7C4H3c8VsxqeMc1uQNQ0kj7T0zUrIBxjFQyKQ5PaqRnInhuCoHzYp15dq1uVHJNUGz\n9BRGpZgBzW8dTFoq6hZl7UyqO1cVrmnhlY9scivTmVWs3+mK5K609biQh16HnArWxmfKHxq8\nLG4SSZFwFXG1R16818tX0b2s0kZHCnHHqK+9PiVocVwLhFG6McDPavjH4jaDLpuqNIi4XnPb\nPPWmgOOYhGBJw2OlNYFiHckp6ZoKnyy5GWbgUKpUbd2R79KkmxIJPMbOcY6CkZs89DQzKyhg\nuAv86RtyqCRnPNADkct1TJ6CiQ7htHGO+aYp6ndjtimbVOGLd+R6mqQiTaOcnBxRg7QCfoc0\noX5h8vUZ60jMGGSMsOwpDJCU8sZfLdDSrkjBPaot6iMHblvSnsvzbyRjFFwHvtjUBPmI6im7\nsMTtyOuaczdGGBxUO0+pAFMmxJG5lQNnJB4FL80i5YDdntTFZXXr/wDWpy/KuVztzQA/zD/d\nyOlICGYowx6U1XZc45Gae/zLknGPzpdQGqD3GRTvXI/CmSKu0KXI7kU7zAy748kDtTASPjpw\n3bNPXc3y5znr7UgkV+n4mhcR8nvTGPDeVnA3EcGnGRpFBzjA71EvzNknCj170febKcAUyRYp\nAV4zgHrThKZGIPB65FMjC7eW+opImZXfjimBNuK5A6kUMQVHIDd6hEnz7+i96fwWZh1I4oAf\nGobo2T/Kk2FWDfex6UyPIXkZ9x3qQKQuVHH1oAVJEY4PWlkB7ncFpkZ3cAc1IzbeGxihtiBp\nF3KQfnxUiyYYsW4xUY28Hb+NJjBGcFc0w6kyyFuOnrRuCqc/L6D1qJ2DMcLkd6f5qsnzD5O5\np3Ex9vlQzMMA9BU/mk7G28rxUAclAV5Wp1wrAgZ3DjNBViWPy9rBz8zGn7dsfHOajjUqDuUZ\n61M2GUAH5qY7EcihVyBjIqLjy0wxDZxVjaZDt6H1qNdysw25I6UCIyo2sM8g01G3t94gVYhU\nSKxJpjWqtzv+7yKdybEG3dIuGPHWjc7ZZwqjPFSSheAPlz1NNkAYooGQKdxpCmMx4+bHFM8w\nzjzHXjpinRqz7gw+UHildflwTgZzSuBHlm2+W23n7tKV+UsOCKTzAqlcd+tNXCqVJIGc0h2J\nY2yvzN83akSZo1ZXBY04bei/NSNlVOByetArBIVYKcflSeYQW6kY4oWVeEVc+vtT2yI9u3BB\np3Ack5UnaNwx19qbuGSR0xmmiQcKo2qoxTv9XwRuJ6UhWEY741IO0jvT45FX5RjHUmm7T5fz\nLz6UsEe1s42+meaBWF3bZtuWKN61KpkDHceMYpu0iYbmDZ9DT2hduQ2fXPagdh6ruUHPT86F\nRtu0c85zTkjO3H6inx7ZWIzggfhVXCwlvubd2PTbU+0hRnp7UsUY3AIf+BU9Y+CB8wz+tFxi\nqrNg4z7+lSsV3Er1xgVFISgwPlOetSKDxuXnrmlcVgzhQhySeeKkXBcnGOwpjMW6DafWlkG2\nRR7UriHxlcEbip7U8LuQtnC/zqLlvlX86Vm6RkZXrmgCReVxnipQwEYT0/WoAFGGxgj0qTzB\nMvOBJ2p+gErdCqnDUkMjcA9qi2gIW3/N0NG48DPSi7CxebhgdwHHNN85W+7x/tVCrRyYZjhw\nOFpI5GaTDDC1VxWZZMghIyNwYZBpMklQDjJqNdzLsY5wcg0K+3kkZFaElnH7wrnik+5J97io\nY3ypIPLUr7oyFC7vU0hMtKwVT824U9JAy88A96qge2PWnIpddp6U7hYuQkeWS3OOKjWZhnoQ\nTTN3/LMHB9TTQQrqT9wdxRcdi1uZs7VxUivKUwpG39apykYYK/OMiiH93ghufc07smxebezA\nO2V7D3p3zMuNwAFVhKxX5s0x5ixAJwP507gWVk5AJzTo5i0xRuEHSqoY9RUnmfuwG4YiquMu\nbjGQFHy9c0LOF3Gqm5/LA3E05TtbJGTjAWi4WL8btFD8x3E8il80soYNj1AqhFcsVIfnnGKd\nHI2zYB3zVKxFjVhfa4YkEVbhl8yT5Oo7VjFmKrgZAPrVy1mK4LceuK2jIlo6FJysY3nLelPj\nkAJLZNZsNwu4EHA96sNcCRdyjvit7mdi6ZlZfkPFNT/WbiM1U80fdX5ad9o24Bb8aoNjR3nq\nDx6UqqdwGcVSW4CnP3velEjt0PU0XEy1MyqoKnvUTfu1JP3TUTyHjJ4zRO3m8Dp6UhJFWaNm\nBB/Cs+4VkK9jVq4nZW2qMGqU7O2QTnvn0qSlEahWRmK/MR2pEl3OQmGHfdVSOQKw2HnPP1qR\nXWNz6mpCxYEjbcjgCpFnD4wMmoudoyaUfu0JQ55p3FYu+WLj5ZF+QcnHGakkij6jpjFVFuG2\nZ9uKma4Bh5HOKshocrHo240qyEArkD0zQsnmQqQRnvTJo/MjADYyaARZgJb5mb5egxUrZyFx\ngd6gX90q7V+UVKvzIz9GoBk3Abjp61XaTnAGR61EsiruJJ5H60yPzdznfj0AouMl856Ki8w/\n3z+VFK4z4eUblYq3HTpSdgMgDpimbXXKFg7Yydvak3JjIywAxz618ke2iRGEcLkHcM4Ap0bl\nCpAymOWqN4w0Crjb3PvQmOPmxQFhzrubJPfIbt9KduLSqTwwP3abu8v5cng53U+Njjg7l65o\nEKCJWk2R/wDAT1oXKkKW3NnpUcLlZAVBG8ckmlR1iRsKTJu6nv7UAP3DzDjrnAHalaTadzcF\nuB9KhZSy8Nt9V9KkTcGCFsp1GeaYyQKz4P8AEBgMaQLMyHev7zpx/OkRvlIJYDdwp/lRJvjZ\npWLDPy4HNAx+QyhVG3Ayx7ZFG1WKsOvWkVlCKRwp657mkVwx9R/d70iSRpNu8n5T0C06OMoC\nWbbx361AsnmK5Y/eO5fYVJuSXAkyWx8re/pT6DJ/ndURyVB7UkPMTox2MDgE9xUG7y4d5OQt\nKzK0Hmt8yN7UhFhZVxheGxjmnI4Rd7AMR6VVjfGCvzcfdxUkayOvyhQRyeeo9KAJw2SOOTzj\n1py5ZiQOB1GarI7csW+8fujqB7UnmbCybip9+tO5Ja80NHgDA69eaVJDCue57+3eqgzlcc7u\n9TtN5uVVsqOD7UgHq4Ecke4kN8wz2qWGQyRJs6j7x9qpK4Zcq2T0qQlt+4DCAYOPWgZfk9W+\nVW+7/jT7eRoR8z89PrWek7spx90cgNTvNLbCy/eOCAf1oGaCzLhhIMbeQ1SCT91ycE/wjrWc\ntw8bfNyvTFOZmhlRmG0n8qBovLIsULdmz+lc9ezBrqQq2CeBWusjyLIMLtxXPX0yR3H3cMpw\nTQVcSPUGs54JFB85Tn+lexfCX4iT6feFJXOxyC7Z6duBXiTMGI2ev5Vf0e+/s2/ibe20sBn1\nqZLmVi4ysz9NfAuufbLCBlcMD1Oetd/DfBsc9fevjz4LfFbyYEt5m3svCZPUDr/SvpbQfE0O\nowKQOD0rhknF2Z3K0tjuY5PmDA55xWna3vktj865e1mJbGeOtaUEzKoBOaAO4sb1XXCtx71p\nQ3Axkde9cZY3RTAIrYtr4haAZ1MUgwB0q1GfQVz1vqAK/MeK07W9SRR81BJfEYfPemNa7+va\nlhuUXIzkmpVlB4FAGfLY+YTjkVTuLEp/DxW+p9qY1uJOvIqkyWctLahVI28Vn3tnuPyDaMV2\nRs1k4IxVO40wdhmquwsef3ejibOV5NczqXhszMeOV9BXq9xpe5cDj04rJudL2qRt578VpGRN\njxi90SWGQtzisuZDHwetet6p4f3MzKp57YrmNQ8OYyGjI/CtOYhxOGZt2MCmPkc54rWvtFmt\nCQBu9qy5oJVbYVP4immIgbnntTFbbnbzStIFODx7UxmB9qYC7izEY4ApjZ78U9SSuO9MLevJ\nFIZG3XGaGVVHTJpGwxzjBpQ3ykYoAaGzxjim7QoPy4HbFLgjnoaOWXr3oYiEzDaMrlqTcG5A\nqSQfvDgcVDyq46ipGOVtrcdKQfe68UrLtTnv1pNo4waCh+5V6rzSH5mpkhwDxSqSQOeKVxBj\nPuaZIwZvSpWwrZqFiOTincLAMq2AeKkXknHI71GOzdjTyA33OM9aAuIxCqBjNMZgq8d6eQEO\n0iox94jFAdBrMAwxyKeuOucH0pkiDdkUsb8+9IkXaWbJPFHAzgYFOOSetRMeuPWkyhjxrvLg\nVJg7dwHFObO3pxUfmYGAaBCBvlwo5pSrYOODTl5yR1qPG7k9qQAGMgA796cQemeBSLkLxTQx\nzgj8aBj0x6baew25GeKiZ8ADrig565oK6CycYNJJkpQM854FNbavTmgQgYqnHFLu9DmkGVx8\ntORQGPYUACtnO7g0isSx4zSyYZgaRZMNx+VACMAy0qqG68CnMQcjGO5qNmCr655oAm2Bj6mh\nlJ5BwKjD/L6UuWzQAMdzYApGfsRyKRWHIPWnAZXGOaAFUgtyOKb95zgcUhJ6DrSq21eaAJH+\n7kDmkViq+lM3E5xS5yMdalgIOvtSq27jFDdAKaM5xnBoQD+RwOlC8mkGWHNI2PpQA5mG4E80\nuQ1NCkjkU1CN3WgCVVC4+anfKT61E43dBQpz04NDGSkkZ5wPSoDkMc/dp+3d1Oabt69aQxN2\ncYXgUMxV+nJoMm4+mKauJTkNnFMkf5gx0O6kzjjrQWPWhuGB7GkApZtvHSnds5xTc7TyfwpV\n/eHkYxQA3Jx15pyn5TTmAo2/KeOKAI9wGBnOfWnbwGGRxScYzjntRtJUfnzQMlzu56U09hTV\nYtninjO3pzUsBTnoDxQvy8Hmm7W4NOUbqoBVwW56U3aeQvSnBducGnKdyjHTvQHQbGoVfm61\nIGCtjHHrTNwdsHgU5vlO3qKQ0OYjjmo3wrZ9fShSOeaQ8c460gHrIU427j7VJyGOBTF+WMEd\nTT/MCryOD3pgI2fMBzgUqsMkYpoxuPPFITnIHUUxkhA9KZ/Ecmgj5fvZ4pnl5XOcUrAOZzxz\nUi9AQOlVmXbIM1aXrkHiiwyJ2705ZAVwBinOoIJpNu2Pd3ppADcr1p3XheF96Zu3dKdt+XOc\nUCEVRuAHJqZYxu5xmoYztPHWpY853GgB23GQTxSHG3PbtSNyAT1zRzg5OR2oEKHLdelLzuBP\nSmr93/CkXccii4yTnaSSMUR470RqshznjvTY8LkZyKVwJCVA4OTTtxbGTUSyAMRt4pTnqBmk\nHQmMm4jHUcU4gkjNReYigZyM05fmJGcGgB6MVYhjx7UZDNkVFGxDFSOPWlyV980ASBfmPOac\ncjhcZqNDhuBmn7duWzVgSKwHXrTeGbHSkQbue1OyN1ADwqquM5pnseRSsy7TijcOgPFTcA2g\ntktx2pSPm603GBk8ikwTg5x7UgHlht5FO3FVB6imBscEU4fMuO1ACeZtOcdabHGGkMhzmnSN\nt2/LUu3avB6880AKp3DgYpzfOwb0pg+VR3FO3dcDmgNSXnjFI2FXpmmbznngU7zBggjIoGIr\nfMcjIpdx7DNA+6SD9BQGO3g8UxitngZxTl3buTTN3TdS7juB7U7hccv+syeaeDtOaYSF780u\ncrjvUhce3Y4yKA3YjFC5Xg0rN8oBHOaAHbc9aQseF7U0naOTxT1bfgiqARCWwNuMGpGIbOR9\nKRlPc4PtTgpx2NACruZcHpRu2rnp60Mx2ilX95Gee9IQsa7mHOae27t070zAXGODRzt65NA2\nPKjdmnMu1QG60xCNwzyKkOW4J4qRiKw2sc59qFUMopqqOe1LgrnHNMQ/27GnbgVxjmmMenqK\ncPm7YNAx6t8uMcChW3Lnoc03+PA4HelZSze1AWG8+YSTwafu+YdqF+b5cU5l+XIoAYrbmPH5\nVLGD1NRLlcmhWO3IPPpQImbGeDQQSM96aVONx4NMXeaAJETdz3p7YH0poB4HSpGHI9BQAgf5\ncAYpy4wM9ajXLdRQGA96kqxNtwvXNRrQWx06UvXmmIec9aQSHkkUinc2OlOClc55FMATjJJ4\npkh2qTilfc3bApsnzQ460gCNty57VLERtx0JqFUwoVeDUy/KvI5oGOC59qXJAIzxSlgw44pd\nu3HemMWNdyjnApzYVSKSPAU7qkRQ2N3Sp6gN2lhknb7Uv8PrRIo3Ddx9KXhVqgsM3FTzUgYb\nfem5DUqgde1ADuQetJJkLmk77jSnDDqfpQA6NtyAgYpygLnJ60xPu+lLtyuT0oAUMMEd6dyw\nzTF55xilVuetAdB+ctTmLHimIfMyc1IrDH86BWEVRjHenJ7nik75FGCB71I9xx696Xg4HekQ\nmk/5aYzzT1EPG1R0piEcg07IyeaUYyeKQArH6ilDd+aYr5UkVLHjoKAuItOjBamsQrcCljam\nxike2aXOe2BSdTgUM20YPSkAv8PXFOU/IRUYIdc0qFsdOKCR235gSeKMlval3HacjFIq8Z6U\nBcRl+YdOKegJbJHFNVd3HQ9al5ZcigoVeT6GjIpCw3Aj0pcBm5pAOztXCjn1pu0soGafx15p\nBt79aBDQm3pzSrwvPWl57dKXluo/KgAf7vrSkAHBPOKB8rYY0u4c8ZAp9BjfKAUetO3fKARR\nv3YwMUhz6UCFk5AAFJyzY6Up+6Oacfug5FIYcdKdu3Db1FAwy00c4HSncQu07QB60M2PekPy\nPknjpSsueM0hIZjOSOad5ny429Kbs2nIzRnjrTEhytz04obJYc8U7cNoApNo9cUygVQFNJuz\n2p2CWwKXbgjpkUxBtHHf1okU7ht+7SZIPP6Uq5boaAFcnpnpTVOVJHBpWBLZXpQo+Y0B1H/w\n570ueM96avy5yaGOfpSuMfG25eDTmbDCoUYLwBipGYeuRQIlDrxxj3qRfmbJPBqtG2RzUqyf\nMKAJd27jOCKVWy1RN8zZpsbfOTmmMsYGSetOVh+Xao1OeR1pV+ZeaVhEySBj6U/d8xPWoFYL\nnjNKZgpHc0CJGIYZPBojbKkZpjNv5NPjx6UwL1u55A5GKsx9j3qpBhQDirkfHBqiQlTctV88\nlTVljuGKI7cySABSc960irkMksE3OAtdVbR7YwR1xWfp9klvGHIz61ckuCPujFakDZmw2Cag\nc7j6092GaiY85FMQ1unNJEwVjjNNLfNknimNJkHHHvWsDKRoxN+7IPTFUbxUjjY4xkUz7UI0\nO45OKxb7UXmJ5wMY4rrRys878UW4uVnRx949/rXy18bNLSYFljIdfl3juM19U+JN2JGI5UED\n8q+avi8rSwtIDgAZI+lVyibaPnZZCrEE4phz5g7rnpRdM32hwEyc520B2C/eGT+FZPQtaig/\neVuBmkZdoDbu+OaTdt5I5NLy4x1XH5VJIMfmGRnnimLgsyjlTyQaG28Jzke9IvUnO3tTuBIF\nd0CsNo6U6CJgCAcgGmtujwM5p0fygknJJwBQA6bZyN2DjrTHyIV7jHFEoCoSPvZpZFyQB/EO\n9ADl/eKM5BxQCVOxjkGgNhiD2FKvy8k8nvQMRRvBQHKj0pyyAAJnHPSmQ/efHSnrti6/MT/F\nRcQu7bzt3c9qTau75uMnIpWK4+Q/WmLjILDigCRcK/OMepoQjY4B2kHt0pg/eK3Q4PAojPUA\n7W6GmHUfGF57EjkU3eTtAGCOOaI8RqSDtA60fKyNuPzg/LjvTAVcsCTy69Qe9LHKMAtxkcL6\nU1TtyT6cinM0RVXzw3AFUhdSNt2QFbrUkbHeASQOhqN8bn2gnA7U5d5jjboe9IB7KrsGH3em\nKUL5jEYwoFNQHy2OOSaRWPJB46UxEu5tpwc4peFIHJyKYrbF46+lLJncPQc8UwsOT5WYgAnH\nSlYHcvy/PjNRr3JGGallCvtIdlYdSBQA5t78k4HenZwo7imc+WR396Xbui2k4x1oEKrFWyDn\n2p3VShOOc0kPyADG5R3pcb2JbrQDJlzuCquR0qyy7WRQOB1Peqit5fVthX+KpkVu52knOKZZ\ncXYxOP8AWYp7KW+bGOOahaQjJ4GKeGbYFUkFuuaGMfGr+mRTeY2LNkikZjHGADnnBxQzHqOV\npCA/Ku5DweMUrMfL4+8O1NVTyeMdcUYO4HHLdKAHTKGVSF+bFQRw4fzN2akdmZhtOdvB9aVR\nukI2nbjOKdwI9vzE7yAenuaRF8xg3UipeC0ZAypbA9qdG37xsDFFxWICnnOe3rTVjDtyfk9a\ns7QZNy81HJCqsQTnPPFAxFQIpKtz7io1Y9SeoqbavmLlsKBnFMZD8zkbx1FFwEWMyIMYHrT/\nALqjPPbimti3jHGS/PHanLGduE69TQKwwKJOCcfSlWFvmXPQ9acsJRVJ6McDFSSLtZkB+ZTn\n60XCwjR7ZF4LHHGKIwP4wakTzNu7GTT9zLtG3cTyTQFhsaDcDtyRUrAN+8C8dOD0oxiTcgwD\nxTljWM4A5b0ouMFX5QTyTSwqFyD09aRZI1YgZLepqVdjRn5sCgmwiR7WUjgDnIp7fu/MbGWf\nvTYnZe25afCp55BBOQDQASKZZFAbjHNSqx8s7j7VGB1GMe9NQjbgHnNAyVmwoGTlqdKThQeS\nO9Qsx3dOc08ttBZunSqFYEbqy8jPOacjGQkqMLTNg4xwB196epHlk5wnTHepFYeP3kinO0Lx\nTmO35+wPWoUVhCWUHdu4+lPJ8zJJz7UDsiUBZFwvDMeT2o3CMFG5HSmRg9TwPSkZjuY1Q7Eu\n4MoHGM4qWA4U5Pyk4UGq/l7cYO44yaNzZBU5oEXGUJkb88c+lQrKshAAwRxzTFbc3OSKeGVm\n5X5j6Vd2TbqTQN82WGV7U+Jfmbc2F6mogwZggOPWmmMqzEtke1AE8Uh2tk8dj60sLNklmwMV\nVLBpAOox1pxYspPRaCdS4sgLYYdRwaMhlODhR1qsd0agghulPkbcpAG1iKBpE6skjDaO2M+t\nEkisMgbTu6VFDkxBwAWHFSGRXQfu8N0qkybD8s27BP8ASpEY7V4ziqom24Jz16VNuPVjgelM\nLEqyA7uTljTmYq21xxVcTFeGGMU7zn288+9HQCwpzINp/Cpt3z5PSqgcquAck9aVpUHBPGcc\nUJhYm2gsSAT3xT93llTjlqrR3B8z0C1KZNzHcQV7YqxEobbMpzwD0qaOZ1kwuTVENtj2c7s5\nqaOQjaW4pqViDbhlZgBjINXPM8ohQvFZFvk9Dg1dWToN2TXRGVzPYnSf96RUjcISPu1SSTdl\nWzvJ7VKrNwTyOhWtlIknW58yMANirMch8rryD1NZYPzfKO9TrMSuB1JpcxSjc0FlHV/yps7/\nADBlOKorPuXJOXPWl88nAFCYcoXDeZnJ5HSqcgKvu6cZNTSSM2cnBFVLiRghZjweKm4WIHkR\nnXHDdakSRcbiM5PWqQbavHNOjnJkCAZUDOaOYZbW4KAlhnnpSQ3AWR1PTGc1V3ncx3cdac0i\n7doPLVPMLlL3mEg7WyBUiOOgfPGaoxyFd4znjpTo8xqDgMT3qlIjlNONsqSM4xwBThJtZRuw\nw5qjHMY1yBwTiiSf1+90NUpCsasj7o1Y/eJ496Q3DRqUB+b2rP8AtgmUKSQVpNw253nNO4WL\njOZN46nHNTxmNbcbmwev41mR3AX1681I10DkrwOnNF2LyJ/l9aKqeZJ6iinzMfKfF6/u2kcA\nkYwKby0SqeMc5pGVllKlsj2pMYRkAzmvkj2yT93sZQcnPemKysqptwd3PpmlddwDA5I4xTme\nKNlxkHv6UwEM2JchcqDgntQArsRmmyKfKPTrnIpzHzDhQR2zQKwFRtfcTsVcLjrmnKwZY09s\nn6+tIPm+UcOOBSqxZtyn5xweOtAxjR/vNwbK9KfGpeRSf4elBbzDtzj2FATy1aNjjnkg0uoh\n8szTHcyBE7+v1pqzSLNuK/Jj5T607ncykYOPw+tNWRvLVd2WznFMB8cjSZ8z5u5XFJtWNDz9\n7pR5mWZtpDdMUyaMRKZP4c4ABzSBoevzKY1GPr0FO+VYwM/d7d6Z80hRgOCvalhhMgLD5WBp\njBmERORhGHX3pzNIqpECNmPu0kiFmUKuRu604cSsqHB96CWJHJE28rwwHUetGP3e7J9wKRtu\n0oBtZevvT4yBJuXhdtMAHyqqDkdd/pTZd8jcjJx/nml3FVZgQ2BTI53+RQf3Z61OoiZl2lcL\nkYxx2NNU+SpDDOepUU4ZQMFGUBzmm+ZL5ZdSFboFNMLEqSKsbBflI5zTI5DKoVHKv15pqr9x\nQMk9RmnSZ3K+3jHbtQMkVjGWDHcMckCnqytgBgyDrjrUagMN2dvHWlO+ONiECuxyfpQA+Moy\nkZOB0FOaRmwMNz3qOOZhlcc9zS7jGFffkg9qRRLGwt8g7t3UntWJrEg83zDwJOfpWrNukYvk\nsGOSRWP4gRgFdMEHB+lA0Z6sFBUPxjdmnPMsrDe/zBchsflVZf3eCWB7ZxUkeZGGCNvTbQUd\nZ4V8VTaTJGHlaPa42soyc+tfSXgH4zC28pJ7jcQcsc/K/wD9evkhj5cgznr1rXtdZnsZYypM\nYUg+oNTKKktS4y5Wfpf4T8bR6xbo4Yof7pPSu+tb1JV4bJr8/Phn8ZpdPuESYs8LEKVV8sDk\nZJ9sZ4+lfVngHx9b6pGpEwbgMCxxkevNccouJ2cyZ7XazFlGa17TLRhiK5zTbxJkDBs10VnI\ndoOMg1KK0sXIsr34NW4XdTkNgVVSTauSMg1PnoMUyLGhBesn3jmr9vqnGTx9awyxGO4qQSHg\n549KQHTx6gsigdKu27CVevNclHcYOQTV+y1Ly5OSc+lMk6f7M+3KjNRyQ4X5hUtjqitGpIwT\nUl5cRrCRxk0BqZklvuA7elVJtN35ONxPTmrTXA9aFmDMPWquFjHm0dtpJHNZd9opliOVwc+l\ndi7Bs80wwh/92jmHY8v1Lw7HIpVk/wCBY5rltS8KnnYN3oO9e2SaOkhY8Gsy68PgqdoFaKeh\nDiz581Dw06lsoUP0rKm0mVBnaQPWvfNR8LhvmIHp0rn73wmdrfu/l9hVKVyeU8RkDrIRjGKb\n0YcZr0TVvBobcyrgVy1z4XnhZtqnA7mrFaxh7iwPy4pGc7cAVNNZS27bdpP0qv8APuxt5p3G\nJnuTTfMJ60Sc5BHNCruHBpEi7ugzUedueM08gbsGmrnbnqKB9BT/AKv603tjHNLu6A0jMGbA\noFqBPykU1clcL1p2Aw680gGDk8fSgYrNtBx3qPjvUsihunNQhSrEkUBqLu7dBSq2BQpBXFNc\nApx60rgKzA4z1pjDkkZoUA5zRvwpGaQwVhTAy7vn4FPHzIOMetVpwX4qibEgmGT2X1qRmCtn\nFQ5xgMOMU7k7T1HpUsZKWO3NRnHUDFOVCXHPFN2levApDFBGTjrQ2NuM4oaPGGzx7U3b6igA\nViM4GRTuMHjNMMmOAKXnuaQAFITB5pG+ZfShmO7ApO4GaYDi37npk1EzfMABxUpHPPSo2/Wg\nB3zbRzSsp69qb5gb6U1dzNyeKQDsbelIyJJwTTjuzxzQi/N0pgNkXbGxB6dqb1QFfm4pWk5P\nHFHIjyOnpSAFDbc/pS7yG5FKVBjznml2cZIpARtgN6mpFyrg5/CkRcMc80zlmJNAErNhs460\nyTgdOaQbnjoVX3c80CHA4Xg0ifLnHNIv3utPwGzgUhjfM3Z7Uuc845xSKAeDQsbLz1FABtbj\nnikyRnnJp0xx0pu3PIPNADlyqk9zTI/lyStKrNtOcZoAbaecjuKAAsVcDrkc0bhu460g+VhT\nWXB96BkoYK2AeaUSA89qiVd3SiYHbxQA4OJBgcUqrtG3pUEUZRMk1MV3YINACp9OPWg4Y4DZ\nFIwK8HpQML9aAHN97pxTu5IPakXLDnpSMpB60ASf3ffrTHyWxmgGkxhsk0AHKnpT2bcvoaYe\nc07HyjA+tDGNTDdTinfw9aJMZ4FJuBUYHFSAqls8HipUbGMjNQx9xUg+VTTEK8itwMilJPao\n16g4qRlzTGCg4PGaU/dz3pArKnBoB49aljGrt7j6045ZTwdvakAK896cWZQKaAFf5cU9GDR4\nNNONuelJyOnNJgO9BjAp3fPbpTSrMBjkUH5WxniqATJ3cfdpy52HIpNobvScp05FMkj2lpOp\nxUyLyRuxTVJLZp7fdyaCtR2V24zzSq4VTnk4pkeFTkc0bS3TipZQE7lwOCadkbMHrSRjBFKF\nGSxoBiFQxz2Ap6sI1HoaRCNpJFJu3cgcUEkjfNnB4pRtAA71GvII6CnR/d5/CkNDguQcHAoV\nSoznNNGBgZoRTuPpigQ7cME9KFwxoKE4I6Ui8g8YxQBLtC8GlLFR8o496i5zk1I24qAtACMN\n+O1OX5l/zzRgKCerelDZ2jHFACqo6Ac0vVsZpFbB5PPtSrtXpzQUSK4Vs8g08fMCO1Q8lSSM\nc07J28VQhRkL1waFYN1FIfmHvTlHy+9IYRsvPFKqhepowdvyjnvTdobJ70gJFbt+tKcE5ppI\nzjNH3T1wtBIufvd+KVWJUYpzYXH50xWIBx3oAkPoTmpV/wBVtxzmqyrhR35qXd8w+bmgCVl2\nk49KjRiX56Uob5sEUzDE8CgZKwycDpSbscEUgJGR3FKrfLk80DEb5QQKfGv7vJNNzu5xgUu3\n8vagB33uTTtwVabglc9qVe4xmgBjtvYbfxqVMr0OahC+Xn1qW2AYkGgCVtzKCTmlCBu+KRgd\nuAaSNgq4PWgBWXdyeRT1XGCDgU1SeVxxQrcYNVcCT73bNOHyt3pmeODT1b0PNSA7O7txT8Dj\nApi/6zBOBT9xVsdqYC5yeeKTa2Tjk0nl7iSG5p2SsJOeaQBwsYIHenKwbJOQaZ97HYU5WFAC\nryuSOKPvKMcUu7dwOlO2Fsc4oARscetP37yCeKQbVbHU0ioTu9KkBzcvnGDSnryaiZm8wAdO\n9G3DZzTGTRttYgDNKOcnOPaoF3bvk/Gptm33NMAZTkc8UbRQ2W5Jx7U4LkDJ7UuoiQLnGelI\n338jpSbiRtpfvLgde9AC5LZPpRuzgnpTFYoxo5VSevNIZIR+8yD8tLtC9RTNxTjFSq+3mnYY\n1lGOvWm8rxTmUYHPvQp3NzzQIFJ45p3mDGO9L/CQRSNgYPGaLjFILR8HimsuyPilB2qBSP0y\nTxSGLH9+pWUMMVAGMfSpS+VGaYCqNqgd6ezdetRBst04p2DwN1BJJuGQaXzvmprLwB3pSu1Q\nO9A0PDBuCaUHdkGo1UcZ5NO+6xOM0dQJE4PtTYz1Apv3Uz2p3vigNR6jrjgUqhfSowpPIGKc\nAQ2D1pASc7hTW3FsDkU0dzmhfmXJODQA8ORyRScSDpRHjmnADdnpQCHL94ADilj+XNNX5Tml\n3Atk9aBkmSRkChW3H1pq+tNbPY4oJJv4iBSbtvbmkGFIJ60N8z8cfWqGKx7kYoVj2pNxyQac\nuCDzg96liGKf3hAp5fDYBANRRthSelCrk5pAPRjuJPSpV54xz1qCPKkg8ip9xbjtQMdyRxSb\nvlIxmljIVval2jrmgY2NdpAAqRT26Cm7ivNDZGMdaAJfvE96RvuUm7pgc0bsqKBDwwC8rRHz\nk5/ClwWGd3FMxyT0oGyQt8oGKRSKBGyuGzx6UFd3TikIXzC3ygUeppGYqvAoTAXBPNMCTB2i\nlU4+tIPmbrzSrlWO7mgYjAd+aXovsaNpYj0o2k0wFCkKDjihiTnHSk3Ed6ApzgGkA5Rkcil4\nC/dJpqqcEZ5p6sWXrigBB0GOKXjrijOEPY0KOetACBhnJFO4zkUzDbvY07aeOOPaqshAoK9T\nTcDAxyTUm3Jzn8KPukDGKBehHnYwHansNxBxRsG7PWlbLZ9qYXHHPpik3Bjg8UDdtHNN25zQ\nMc2NxpFPXFIVwuaVDjrUki89qaOV9DSkHdkGkC88UIY7nuOaQKf4jSbucdxTiu7mmBH5gYYz\nTl+7gdaZHEFZs/WpFx270rAK2VpwbcuRxTBnbg0AH04pgSrn3xRHjv1qLnbgMakjBUc0AWEb\nsKcCVyKiYDZ15pd4IGTTAm3DbxScelN3Dg9qf/rPagkZu46cirMOWCj+VMih81hjkVo29usf\nIHPvTsK4tvGyjkGrakFQOpoVWZhx+VaFlYg5LDnNaJEthDZmTbuXArSt4Uhz8oyaFUBcCjdz\nWiiiSXzAFNQ7iwzmk3FRkikLcVQdAbIWo2yo68UFh0NQXD9QDxTZJI8gVTxVOSYtntR9oyv0\nqpJMME5rSG5jMdNJ8mM1m3DbVBIwM1bX52AzU/8AZrXikDkLya7Yo5JHmviq4zCzAdcmvnL4\nsYFjIfx96+h/GT+SrgdFNfNvxaulNvOp+8OgzVshXZ8+6kGa4bIw2f0qD7ykkg1Jqij7UdrE\nueqmq0jHYAcCsDUljLLkYyuKdkeWDn5fSmK+0tj7uOhqPcyqqke+KQWJ+Gk3qMr05pu35ipH\nHUGkjk3MxBwfSm8+Xktu55oAlkUfKG/Q1Jz5Z3KF54NQ/I6jBwKYy+dkluAaBFiLO4/L26k0\n1SZZ1xgKKYTnAzkdMCl2nBAbjtQMeGCyENz6D1pHkypBG30ApkedxBPagEKHUrkjuaGMkaYM\nwCAn1xUrTLGpI4WoIWKxttGMHikZWWM7l4b/ADmkIlWQbADxu7/ypjHbhSee9JHtwFPPGP8A\n69PmVfMB74piFZgCNoxnjNKq/KxY7jnhqRdzdhR9z5gM/wCzVC6kjhJvlx2yaav3grKQDQGb\navGBSCRt4LDLdh7UihzKWYr90Gl8sI2zjgcUjMpmDE5HpSSEMjH+L+9VIQqvwNw79RTpCN2R\nkL7UR4YA5yQOaGTYGY8huQabEIshx1+U96dkIvt0HvTGwqLkdetORN6u2eFHFIQMfLOVHzE4\nxTkkBbJUqc7fxqLiTaQ2eOaftxGF3YOelUA6MtHu3Hoacr+ozmoo1LSAscgVL5iTOQMLtHel\n1AXcX69e/tRv2sV+8WFMfAX0Y9qdjqQmSBgUxjvnn+VeMcHNKzfKqH5SD1pvMmB0PcU6RjkK\nF460tQsSsp27ywU4wKn3Kyoc7mI6iqqnzRyeB1zVpWXsOg4xTAkcgoRn61IrfIpYkDpUW7hf\nl+b1pcBsbzQMkGI887iDmjcMB84Hdaa3yjIO7+lAzsPIoKvoPKtyVUcj1pzZ3Lg4IHeoFZlG\ncEn1qeNTInPOetAgA3SKVIBzyKk+cOe+KhZhjCDaVOS1O8xvN8zOAwxRcLCR71Qd8tn6VM5C\noMfMRUMwKrgcAc0rfK3ytn+lSFgV8g849x29qbtCtluTTFj8klWfO47sU9iG7fSrCwKyNIS6\n4BGNx7VIJC3yqMAcfUUzbnCnk5py7WZgxw3SoANq7SCeM8NTuFjbnaCMZpm35cxrhRTk/wBW\nXA+f1NMBIY/Lhyzc9AKlL7thUd8HP86jjH7vPViaeuTJhumO1JgSOxViq8ZpVYhTztB+Wk+b\njf1FE0JbaVPGadwsPZTGoU/NTUmjiuArZIxy3pStuWbH8PemxqNzc4GaBAyYHyjPOQak3Jwq\njL/yprKIySfvYpYfl2kr97vR1Al3KqZHLEYxQv3ec7vSosFmYDpnNP8AO2Zwe1UT1H/Mw3jg\n07cM8AFqiWQ8NjCmggAgoee9IocJmVsiPd7U8yMy8jBzSbtrAg805W/fEt91hinqBKzF1Cgg\nH1pnmhE2Ovz54xQkY4wfrStGDMJAPlA6UhCmZgoUH5jTlZY2CkYz396Zu+bcBzT0wwKONxJ4\npgPLfu8lTkcmmK6bQWJy3RaF3LJt3ZwaVuVboWB4oGHKA92pWYLH8o2saQOqgSc+9MZmkbPI\nCnNUIsYPl4zyetLGxWTJGR0qFZmkxngZ5qRidygcCgVh0YVQ5L85pzMI16nOKYrGRmCrnmkd\nvlO1sn0oDlJY2LccbaRpPMVlIwKZEx4yMHuKfJIq4J49qNSRyzDYDtyvTFK0kjYZVGM8AUxZ\nA0eQuWzwtJ52zcAuB/doAmdWcrtPlHPNSsgRmxJuOKjWb5cE8YqNYyfu/e9KoRPuJTJGRjvS\nwyv6ZWjcsnyk446U2JAmQTVgSQ53FyuaVmCrvPzeppElLLtHG3k0iY6joeaBEsEi44596X5X\n3HGMc7aZGcMTjOaaZT1Hc4JoFYlVxgc8HnNPYhQAv4VCxK/dGV7YpJJWZQo4YigCfz1bJJPH\nBqSK4BwCCzfwr/WqsbDChRuHR6mEm1iCeegNUiGXbaQoxZzyD07CtBS3l4VvesyP5lZQeKs2\npONrcBe9bQepDNCMlhnNP3E5dTz6VXWQNkZyOv4UM6rJwSB14rW4FhW83AB25609ZF8wA8bR\njIqDcjSKQCXxwtSnH3gu3ii4BGqL+fWpIhtLc59PWo/LcLnPPaj5t+4dcc0xdRWTb9786o3Q\nLQ8NlQe9XNzspyMdhmopLcyROhHbNSMy2fzOFXHHJqLzRGpABIPp2p+0xk4OAOTUXn+YzEja\nMcLUlDWTZ3yDTjJ8o9uKjhYyRjd35PtUkahgcHheaQieFTsYp1xk5NPj+ZSd3yqN1QRyAfNg\nkegp245GwcMentQJosrMdwZfmjYcU6ZhIwcjCrxn1qONVWMxsQEznFNjMjc43p0q09CCVbhZ\nJcqrAd6fuJ5YFVzgZquMjrxzkHvR53mHZzjuavmJ5S1JG3G0jHrmkjJ24ZcnNRDDTYpVkbzC\n/wDEOKfMPlLexfX9KKr/AGiT1oo5h8p8ao565BIp2SwywwRzTVWLzN45bbyO1O2vCd/Xjrni\nvlz2BPL2jdnhud2eKV8bcPz3AXp+dNZU8xcglm6YPFLtdMrjKg8in0Acv3gqYy3Wm/8ALRt7\nZ2nOVocgsgB2n/ZpQwjVgBuPoKBiMx3bgdzHkAUu7b8gX52BO7PQ0KBGyE8/0pdoVg3Z+1Ag\nkDqVwQxAydvFSxtzJsG71Y/rUHloqkqcnPrRIfMZmVynOKQifb8gCt74/pTYW82by1VQvXrS\neYomLh8LjpihVGd4HJHFAWHhiGcPyrcDFNgxHIUyNhOMH1pgZsFh83bb6GjaGBTODjJPqaBk\nuPlC/dKnFCKdrFJMHPNNULIgCNux1Y05tg+cDHHIoEO3AtuHC9MUg4Jb72ORTYXUKruCcnGB\nQxYybEG1s55oEP3Sdk5B5yKahLuQxxjpilWZ23CRs5PbtS+WmMbtuOuKTAXb5ahMYU859fak\nVhu2AcHn6U2Rl2g/fPYU5m3BBHhWP3hQgFj3ySfN8ir096XzEdwWHOcChlTzQWc4x0pNw4Y7\nQueppjQ51Eb7sZOe1LHGys28naw4FISPOP8AFgZzS7iyNnPy/wAVMlhgsAQSMcUrOWIQn5et\nMErsoyODxtH86UKGYDO0AUhjixjwVxjvT5GSEYA3MwxxUUcasvzZOTSxShgQqcgc5pdSrkvm\nPHgAZXGDWXqkKtFkHaV/KtFZN2QD0FU9SjLWuEQsOppjWpz8uIzvYgr0qOGQ7GJICgUOA4yw\nwAeF9agmZk25ICtzt9KRrY0DP5ccbOQcD+Gplk8yHrtYc89MVl+YeA6hs+npU8cztk4yw4H0\npAkbek6muny4wqBuen6ivXfAfxQlsbi3O/YsfBz0cehrw1JnXG6Pec8ue1aNldPbyF85XOQu\netFuYZ+jvwv+LFnq0Ef74KG4+Zsk+oz7V7boviK3mhjO9Tux3r8s/AXxCu9BuGaKVkIcN5Z6\nV9J/Dr43PJMRJcqxwCY88fgfWsXTNfaWPtuG7jlGQw+lWFmHHOSeleOeFvihY6gY8OCoGGbd\n1PpivQbHxFb3i7onU8+vSsnFotTTOmWTLe9OXDOazre8WSQMGBFWo7hTn1zUFl+NTt6VPHwc\nqOaqQzMy8VPu54oEy/a3Lx96umZpk+9WZGw2gVZhcKo5yaBFmOGR2Hep2t5Ym5p1nMFYE8n0\nq5JMjZY0uozOSRx15qVLv5QM4qORcMT60zy/QcUwLfm7ueopxYdqoqSPYVKGLKDmgonkjjZc\nMm6q8+nRTZIAAx0pwmYHnpSpMe4pksxbrw1HJnao3e/NYt54WEinzIgfoK7iOQ9+BQyrIx7i\nqJZ4xrngxGZgsJU44wK5h/A4VixRie9fQkmlxyNkIGxWbdeGY5QzgKOelVzE2PnLUPC7W+Tt\n/KsafTXssqUPPQ19Faj4NjlUkIN3tXKah4BMhY+Tubtz1quYXKeJvHgHdxUXHTPFelar4FKq\n48oRt12nrXO33g+4jjwY9p68UcwcpzDYbvikGOSK0ZtDmhXkf0qnJbvGhyhyO2Kq4WGLhl5G\nPegELxnNM3Mx6ED3pOCfSi4hzMQ2R0pkkhbjHHem7mUYzTWY4xTAl3DbhRzTVXrk0wScDtUi\nsGyB+dSAg+XjtTTt2njmpGAAwetIGG33piGA8dcVEyFjkU8ru96euVXBGBQMhlYGMimwkqvu\nadcbcGmw7eo5FICVe/YUxju70FhwAKPu5wKAF3ce9I2VwcgmkGcZxTWjLNkUwFyM5IpWbcM4\nxSdPrQzdKhgGdxpoO49KdxuNNcZwRxQgHbcx8k5pjEnnoaG3Lk5470KN2MNQAmdvGM0oJZuK\ncyk5ojBbIxzSAWPOc5oYndxSrhcg0xmpgDL8p5pFztABBNDLuXrxUaY3ZpAPcngAc0oZl6nI\npN3XrS8CMZHNACq27PHHekZt33aUNtX1zTfutgZzQAq53gCgM28jFH+rXn71AzjPU0CFwqrw\nfmzQ2UOd1NX5ic9acPU80xiDG8YPJqXzMMeag3DOcYNBwx6ZNSxh5gkLf1qXjt1qPAXGRxSh\njuJwKYWH+WetJt680B27jAo3ZzgUCG4GKTnJzSoRggjpSHL4PegYRAs3XFJ5m7ORxStlseXx\nTdpDDNAheD7U4U1/mbnrUmAPm7UXGMbLMOelPznoOaRmVT15NIX2tikwF3DcFIoLlpD6CkY/\nxdaFZjzjA70x3H7/AGx9aT7zZPWkkyxHpSbOpoGS7OAetIGyCO9KrEAZ6UskYU5zyaTAh3Z7\nU9TuxxiobiRoW3Bcj2qG31JJJ9h4J9aEBbByxH51IrKOM7qTKr7Z4pqr8x7UCFzu74p28Lx1\no8vPbijywVIoGO3YXOaRflGe9IYywAFOx82O1HUQDlcjmgZ70q4XpTevIoGPGAMn8qb/AA8Z\npfQmgdz+VMAVgx9DSq27ryaYAc5PSnR/fNADhgfLnilX7xUHikYBuTxRtIOQeKAsO27ec0vm\nYX5hk0KeuBmklkw2NvHrQMVXIGMcU4Z25pA/ygdqHIHOeKkYZ6+9IoIUZGacGDDgUo4+brQI\nbuPpRxt4anGQfMcZpvlgAe9AaD9wxtP506NvlOBmmSYwBinKuzA3UAC4J5PNK2V6c03Hzcin\nqp70gHcqhI6UxeV4pw+XjNRruZ8dBQMmUDIzSu2xvpUKOS3pTpNsy55oAdHuk+6OTUrfd561\nFG37sKpwaeQVXk80AAjHJyM06MNt+lMX9315zQWyvFMSJASxGBxSv8vOcUxZRgDG2mu24cUh\nEsYLMc1Lj5cmoIWKqMmnsxYkYwKYxyscHBpdpOD3pqr15o8zLDPagBSVUkEZNP3bgMdaYPmY\nmnNkLkdaAH7ugYZpv3W56U1t2zPNKrHyxx9c0hWH7T1Xp70rKN2e9RqxHy4qTaaQxVzu5OKV\nSGbbTD87dOaTlWxmqGWNq9zzTVxyvX0pACOn60g+VuvNADwAq880isqKDk0m4rnHJpdu4cnI\nPagCRX8xePwoVD0NMRQrYJz709v9mgCN/lf7uKlX24prqTjmnDkelAEvK4z1pdny4x171Hu5\n5qTJ2gg8UAPZWXAHIpq9Tnr70u5tvBwKb7nk0gHKwVipHyjmnIqyMCpwajLfKcjHbNSwrsX5\nTxTAev3Txml5am7j0UYpY+c96aGPjAH1py/eAqM5C4H405D3AOaQhXQ9AeKfsHTNNU5yDT1x\nu59KABV7g08DoM4qPO0kYwaXgsMnkUgHfxH2NOAPamA1IjbVyaQCMpGT2pv3uQM0u4t16Ucj\np0pjHMuThTinc9c80zGGFO3DOCcGmIaep9akT361Hu284qRSc80gAkqfWlZ9vTrTQ4ZsA80M\n3zAAYpgOZTs3A801mKrn+dSfKq4Y4qOQgqV5J9aljHLlhzUi/dwfWoIWIXrUwbd3zVBYe+Dn\njFAYbQDxTNw5z+tO42880mUL/F14oZR/EKbu9KUMWoF1FVflIAz6UjY+6eTTuRjB470rbS+a\nBgq5XrQvzHA605SozxSRt1OKkTHbCRjOKXb8woT5h/jTdx3YzVIZJt5zmlJG30NRnI4p5bav\nIpAAG3LE07IfFIxVsH9KXO3GBTHYVUI6jIo3be+akVvlOelNG3OccUMGG4svXFLuLcmmnI3Z\nFP4ZRjrQDAbWGf4qOGPoab15p38OcZNBIuRu46UBg350m5Rgd6Xb3ApdRj+uMU9l6Go9xXtT\ngxP0oJHdGwORSsp+XHHNIjA9RQR8wIHFADlG4nJ5zSO3T2oDAHOM0rSDbkjincYi+uc0jt8u\nOlLGwOQeDSOdy0C6gudvtTsfKD0qPJ2+lSquVHPJpWH1HbSFBp275cYxSFtvy5yKRpA3Q4xQ\nMfgLy3SnnGKYvzY7inJja2eeaAHFcrweKTnd0pPftSruzzwtAhyA7TkinZywpMblIFCsq8Hk\n0hjiu0ZFIzA49aduHvTI1O4k0AS7ttDHjg0zllPal5244pAKV3KKB2JGKQEo2OoqQjC9KYCs\nvygrSgHbz1pq5280LnnPTtQA8c8Zoz2NM2/NT1YLgnmgBpUKM0rN3xxSsR2FClXzxxQAqsBg\nEfjTt2G4ximcKvFG0cNjNADiN3J7UvH1NJwxOKfgZGOtMBm7jGDTw3OM/hTd26lxznigB3uR\nimljnnkUpyy4NGN3HTFAhdvFGRnAOaRenHNOKhmGBg0CGhjG2OtA4PNOVeuetI2QBkUDAe45\no4PbmgL/ABZzQT07UMQMeBxzmlyA2B0pG96TacEH60CFUbSenNGRjApF9+tDcNhRTuUIM8k0\nbh0AxTgQUODjFNB3A0xXFUktntQHPTvUfIGaVRuYntU9QJOF61KrZX2qH7w5608DK8dPSqJZ\nIGDKTSYDKMCkjToBVqO1LYxQDGxn5dpxViO2L4wcVatbENj5ea17XTxu+6MUWIZTtbPaAVGf\nWrkdkxbpWnDarCvAqcIAM4xWpJXhtgFAxgirUSoq+9HGM4oUBl9KBC01uDSZ6g81HJIR25q0\nND2kH1qBpOp6Co5Jiqt2NU3vAw25q0IteaCCOrVXe43NgCofOx0qCS4C8Z5p8tyWPkYc84qj\nJJTJ7g5IByagaQbeD+ddVOm9zknUWxMl55ci8981tQ+II7aFyACCOc1yNxIUyTWbcaoYt6H0\n711xVjklLU5nxzeKzXGAQhO7kcV8o/EjVhd3twASBux19K9++IniBLW3lTfyy8nNfK/iq6e6\nunkzjkmrlEcTj7tmaY85B5qAp1BIqw2zzd5POMVX2+nLZ61zSNiRSGVcdO5NIzBpCCDuHSmx\nsfMxjjvnpUrSbvuj5hxuqBjNqLJyDkj9acsbKAOpPOKRlZvQmmtJvQ7QWNIQnl7t2D3pQysc\nk4CjGKVlRlSQHLdDQ42ZXsfamAgbYCcYPYinoBgO3T2pYiVQ714pJFGzGeM54oAecKf4Qc+t\nMClZJGbkHnpR56tgY5FIrEyZXIU9aBh5jEkDoRnFOkbzNu4/LjBpApZsnt+lNkcnAxlj0xQI\nkRlTAZstnA4pwjLctwM/jTF+baCuXHWpsDf6+ooAY0e5/vbR60hwGAJqWbbJ8o/KoGUSqcHB\nHFAEu/cuV455z+tKxMjZB5A4NNtxuAycBeo9aaygIw3bQxpgBzGpYDOe1CqY1BHLd6kZRHHw\ncjHU0yOMnGWAbqRTGKPmUsozJ0PNSLKVj2k71HHFRsoAwRg5pzYQ+hb8qZDH7gIyQMj3pil1\nbGMKecUiBlGG45xik+Zpm3cKB1pIB+xm5wFGe1O+ZiCE+YUbhvAA3gUYd23Ifl9qYCszKpGc\nMeTShg+cLtOOtM255cZfNJtZWU5wP7tBSHN+82AtllOc06OQy7f4VXIzQyvHHlMZznJ7U3YW\nO/dgfzoAliOM54z3707cyL8xz/Om7C2G9e9IrhWIPzfWmKxJGwBweBU8LBF5ztzVeNg0TSfx\nKcbanXzW2EsMdaYyz1PoPWkUjaD1HrTV2zLkdQcmkLEcMODyKQDmZJWxjHv60RyBmK9h0qNc\n98celSKqMu88GgBygMpDE8c09GdVAUjBqASlmOThcU8SCNcr8w7Y6ihjsTIvmIwbjFRbtygY\nzzgU7zMjJHXqKerJI25VwPSkDDcUBHbpTlUMD2PWoxLlnBGRjINCncjHO00DJVwpyVyaRmG/\nKjJ7rSQuuAG+/TWjPzHqc/dqg1H+bllxggnt1p7bZGyvUHFRLtjj3MMPSRMWjOFwSc0WEOG9\nVIznnn2p8aiOBkduc5pm4MxA/OkZvMU542+vWhlEgYRjjjPan8rg9ajUhlU9SPWpAy7sg5NS\nIkRyxDMCfajz8OAQePSm7hu64FIrNxj680BqSvu5xz3pI1LoSw2570Bgy8nIHPFJuM0eVB25\n6UCJcbcBufenL6Ofl7VECscykKcdDk9KV2T5lzz/ADqhDl2pcZ3fIaj8tm3DGFzktTY2BjAb\n5cfnU6kjhTkMO9ADFkLEZ4ToPwqYMIj/AHgai8sfePOOKRcvCqbSCxzQKxNIxaMfLg5pcbY/\nnOW7UyJ90hj2kIB1o4dhu4INAyeMGTr8pApyjLHrgCojMJl6bSD071PDgo3PzEdKAI12qOM5\np7khlfFRpMFUkgccULJ8p9T0oAVlImJByOtPbAUkjk96aqlsLng88U7ceQcEU7gLDxhc5FSS\nDkAD61GqjCqMhs5okmKyEZzmgVhfRQeKXdukDFfYCmKHXvipNw4HrTFYdsZBvHHP3RTVlCt5\nRAP8WccUiSSclT7YpyqVUfLvJpjsSBh6jHrTWAVfmwx7YpPLVVLMMr2ApBhuvI7UCsOVgvzO\nCD020rNuUqowR39qVZGkcLjgd6a0e9nZWAGKBWFh+ZgAOB39KkRsscEnmmo/lqvOSeMUu452\nkjPtVAPfaCADinv8sbOmOBzmo96tjjnPWpS26RUCgA96YA3ysnPDAE0co/B+WljZTgPgDtml\njZXdzjocAUxCB2cZGc055Czr8u0Ck3Oqj5Npz09qcFCq+OT1waL6iE7Ag96dC252DHLfyqNS\nPLHBPfIqVW8zgJg/3qLjsgk2RKOfmbqKFBl+XGMUnDMo2gmnQ7o8k9ziqIcSa1mZevA6ZrRj\nmJwJBxnmspfmyucAGrMXmPjcMmtIkWNQSKWATgGpGHmMNvGDzUG05Vlxgdqm80rlivGMcVqS\nP3lskcNnAapiwbqe3NVnXy1XLYJ5xSRtu3Keh5zTFZllfmb7+RRgbmCybj0NQptjYFQW461M\nsfVuF46VQWHyBwqgn5RwKYwaT5Q2GH8qWR1ZApY49aj+VcsPTGakCldAMvTHPWqRRmyR61pz\nfwpKMA9xVRgUZwDt7AUmBSkhKqFXHXmlVlUkZwOnFNZTG27qO9IHTcGA2g1IyfOxgqHccUqy\nbm4O0gVDHI0cjOgyaGysRbeGdvTrQMkRg6mRwcqe9TMoEKOjnJPK1VDFsAnJx1q0twUiQEZP\nsKaJJJAyq245A6Uke4qMEEEZqLzi2QeN3rTirLHhOvpVCFXzSxBPPqKeJ+2ATUZxuUs2D3FJ\nDDm5d+gxx6UCLfnf7P60VX3yf3B+dFAHx6JCmGA+Xpn1oWUKc5LDOdtCr5inLADrk0kY/iJ2\n/wCfSvnD12Pab5Tkbvp2pq7mVVDkN1OelNhD/MzDDdgadKS0LqzZ57daCbin5F+h60M6x/OX\n2s3H0pxxGqF23ex703cPMO9cMfX0oKQkmQFXcMH+P1qTc0kaKcZUfpUe5WJ+QllGaepzgMmw\nryPpQAq2/UgcZyTntTmUZIzkE5HFRsCqlxkvnP4UGRsBj0J4NPoAuUkQBR0PcUoLoMAA/U9K\nUyHcT0Q8cetJ94LkkBTijoAQ7m3Mvyn6VJJJuaMKOO570yPC8IxPPWmeY0e4Mu1c9e9TqBL8\nnzFWwM9BT9yyNyvHTHeo41CZ7A0ojYHcHyaAFKnzFRULJ160qeWPnX+LjntTI2kSb5E47mhf\nlHGOKOpLHqrNmReAvGaWJVjzzx1x61GxbaytwT0x3p0XqVxjrTAX5ecrg+lKuxSHRsH0xRky\nMxx170Ky4Knhf71IbHNIrLuI5HPSk3CaPeF2exFJHuZVXgcZpyyNPtCjtyT7UISuO+8VPBzS\nbmO/ccKT0pkbs2doGF6UrSFsMOc9V9TVA9RyybXWQE46GkyZssw2LmlZjsHRTnlfSlVkw+4n\n0pFWBZfmRk5TNPlmAlQ7cD271DGQnA4IFPh3nbuG7B60iWiSP/WMxwAePpUGpI8NuMP8x4wP\nSnFGeUqwxz2pt6WFswK4Pr7UxpWOauE2SctkCqsnDLtXnOBmrkhDbckEluAKg3fMzAbjnpSN\nkyszbZiHkw/rUkLeXIEZsqecZqFIzJK4K5IOaRmCyEnkgcAjmgsvx3gaRlZgBjk/yqRZhJgg\n4HasyReCGPJ7CpPtG2Mb+EA9OlIDWhvXjyfMwa3NE8RXmj3H2qCTYQMNu5GPXFcdHcDA2nPq\nT6VdW68zCg7lAyKQj6H8F/Fme3EBaQqzYIK/xH1r6A8G/G6G4jG6ZI2yN2eua+CrXVmtlSNH\nZN3Jx2rq9G8Uy20yFJCyr2bv7VorNakSutj9JfDvxIguo1WSUCdmwirzuFd7pXiKK4AbcrL1\n68ivzp8OfF+ezkVYn/dsMq2eUr3HwP8AGiKXCNMzIBhmJ6H1Nc06b3RtCp3PsqzvBLHkEdM8\nVaiuN2MHnvXi3hf4jQXEYHnjOONzcEetehab4hiuFDI6N/tBuK5ddja6OtWYq3FWIbjbyxxW\nCmpqxzn5vTNWFvAw3Zz6rQBvx3m1hira3eFwTxWBHdAY6g1PHdBm65FBRurMrHBOakyOOeKx\nftW3vzU9vdeZjLfnVAaPrjmnqp24qskg6g5qxDJu60DF8ppFx0oVG6Yq/ZqsjBa2UsovLUFA\na0IZzyxk9TUsdtufGdtbM2mR7coMNVJYzHJtbikIqSWrxrnOartJ8u09K1LlwsWD17VjSMd3\nPSpYx25W4IzTJLWKX+AUgb2pd/y9eKQWM+80SC7wWQHFZNz4LikJJCkdvaun3DoKcnPOaCrH\nm+p+AIXGPK3c8elcrq3gby2xs6H7oHFe4yRiTO4A5qpJp8U2AwHHAzTUhWPnW88DSLE+1GXk\nnkVhT+E7lI2bZx7V9O3mgwzKRgY7ViX3g+KZBvX5f9kVopkOJ8yXGmy2vzlSRnGDVVieQRtP\noa+gbz4crcQyIV+8eM8Vy2p/DcbdqxjevHPOarmuTY8jX1IyKevy+1dlfeAriGXG0gew4rLu\nvC80Z5DY6ZxihCszD3jPPSmNjt0qa40+a3kKsrHb3xULRsEwRg9uKq4DuMY70jkjBY8UwNxg\n9aGPy+opC6jJGDKccimwn5do4xTpMLgKPrRCh3elIYeZn8Kcr9yabIwXcO+KYmGUZ5piJN5B\nIGSKdy2Oxpo9jSc8kmkwG7trEnpTdxbIxUnEi9KT5RkmpFqIqtnJ4FJuO4LjjNKWJUDvSSbu\noqh6jmcc9xTI3G7pzS7QVyacpGMgUgGq3zd6d5m08Him7g3tTWUrj5cjuaQajmJzwc0uze3G\nPek3c5ApCDjg80AKMcgdPem7gvRc0rNlM02JXwdw5pCHsecdKOdw9KYF5HrR5Z2nLYNMYrNt\n57Uud2Cp5NNaPcozQrBc8UDFYZb56GYFRjI7UN97JNMC+vrQA8Nt/wAaTDdjSMMMQeQaVV+U\nAUuoB935ifrTkA+8KVgMANzSKu3gDNMpDZtxUDvmlRP4s8GiSRiRkUgyp2ngdaLgPYcfepVb\n5epqP5frT2Py8daRIDO3GOTR93HrTXY7+OlJ+OTVDBs7euKchDEZNNbG4ZNKqjrSEO2gEk9K\nRsLwWpuNy9aPegYjY2k09ANuaYp52npTgdnXkVIh33l4oxxwc03dyCBwacymNsnpTAcjfLgi\nmKxbjpTuwIFNJ+Y0DBXbkVJ1HWmLn04FPQbl5FIojklRRg/M1RR26eZ5jpz2oCZkPy4Iq0yn\nAb9KaARm3Ee1P+Zmz2pu3Cg/zp+fypMB4U7TzSMpUDimrJ04zSuxb2p2AFO3JoVtynHWmIDs\nbnNCqduO1ICT0Ud+ppMbWK02Pt81SFfm600FhFXnJNLuGaZtKkccZpRjmqCwm75vm6UpXADK\naXYG47Un3XIxxUhYczBwOKaMrk9qRc5IqQZ4FBQ5GLHjgUsi+YBnmjo3FDcAY5pAN8sL+FDL\nu27ulOO5+nSnMv7vpQMZwrDbTt38Pc01sleOfYUirtGTwaAZKF24bqDTQ27OeMUnmFlACc07\nCqMYwaCBA29qPl2k5oJ4xil2hqCh3UClfleuKaeF6UcD7xyKAFRt3H60cNnmmMwXBXpUgj3f\nMD8tACBR1HWpOQuRx60xPvelOHPUcUgF3+9OOWznrTVXnJHFKQeTmkDFKF2BzzRtO7AGaaoI\n5zmpFY7SB1p3FqG3dggcCnnazZHSokZucjingFUGeM0uohW4b0FPXLcA0xlK7QehpzOFbp+N\nMoFyuaUfKvTNJuPU0u47eOlAxei+lO7hgaYvzEelLtI4xjmgXmPZmK89KjaVtw+XC08yFUxj\nNLtMkfXFAxSCfTNOUbOTSIuR6091HGTn2oECk7jnge1CgbskZoZemTxTZN20bMEUMY7JPQ0r\nLlQf4qaMcN0pW3bcgZpiHNkL701flGDyaUE4BPFL977tAD1Az6e1IGwTxk0MpalxQA7A256U\ndOo4prHMfNCk5HcUAP5Kn0p8YGBzTcque9NX5j6UDJN+PUrQsm7k/hQw4wG4psa7jkGgCbO5\nMsOKkUkKMCo4W3ZVuaeuVOQcigQ4/KoNOUlTuAwKjz7E08M2enFAx/mDn5aVWwOe9J654PpQ\nG3dRQBJ93ODmlWQbRkc1HvBHAxUuF2jPWgBjOeT60ICrZ4NC4Y8dKRD+85oEHV89ql+8Mdqb\nuCgZ55pVyck9KAFbGMg0pbauDwabtCnlcrTmxwccUAKvzd8UPgcmm7lP1o4ZsGkA7kxjHNPT\n7uGzUcZ25HpS7iDuNAx+0Qn3NOPLZzUe/OMg5pzAEA9DSAVl3AZ9aXdtH3abuyM96c0nyqDw\ne9ADduecYBqRRxxxTVIpclsDvQUOZvUUgb5aPu/WkXv2oJHqflzTo2Clee/NRj7uKeqZ54pj\nJWUc4HBNM+7k4o3/ACkGlUe/FGlxgsm3r3p24KeO9JtUofWkZeBjmgB4Y7R29aRsbuB+NNz6\n04HH0pAAJIzmn8svrUI+97elShyzegpEjhwQdvNS7g3GOaiD+nOKRpNrZxQUiVCW3A05ehGM\n1Grd164pVbgHuaoVxxmwpzzQrfKvbmm7uxGTT2jDICO1IpAMMxxShsLwKjTinKx5GKYg+9yB\nmjc+3HSpFZUXHQ01vvZAyKT0AAx2gnmpd3y5HSmRtnginj5VoYgznrUq/KvTOaqw5BO45Oas\nE7W5pCDdtGCKUZb6UFlbIFNzt96YxSvPTNDfdpFYueDSsPloEJH8wyTkU5QWbJyBUe0luDip\nvmOO2KEME78U3yMPnOafklqTO1simw6kigqwp/3WJ7Gmq3GcUDLH2qRihge2M1Iq/wC1mo9w\n3AYpHYiUnoKCWTA/LgUnB5NMjPIJ6U89DigofwflBoOe1MHKjjBp3I55xSbEBbb15pN2e3FJ\n15FLtJ460xkiyD0pzHPfimgZXGOaPu/WgBS3B5xQW4ADZoLZzgYNJjPbFADlYpnuTTj26U3B\nzkUoUd6QCt8o4PWhGx70BeRSbWzx3oEPXg8c04n5sdBTQP4Rx60ZJU5oQg/h96eo+XPQ1Grb\neD1qSmigVTSkjbg9aaPv9eKUY3YPSgXqPDbsZprfe4pG+R8Gn7vl+7zTGLlVx+tLnqRTC1Ir\nfL70MBrO6/d5PvT2kJ6igLwfWolk3Nwdw9aEKxMvIwTgUkh/d4A/GmtINuMc+1KuXHoKliBQ\nSM9aOSvWjJVvak+8cLVAKG3fUU5lLe1Jtxj1qTqOuKAsQgZUjHNEfpTurHtgUbS33Rk0XAYy\nnoDRzwB1q3FZuyA7T71YXSyxUhetAiikbSYCjnvVyGxeRvu4rXs9I289G+lbNvp6M2SvAFWI\nwLXTTwPLya07fS2Ybgu0Vrx26qwwuMVZAUUxFS309I1Hc1ZWEKSAMAU/pg45pPMC5zTMxM/L\nSFiF54FI0yqp9artNuxmrETtNtwMUzzc5Iqo0x65zUXn9s0AXGmC9+aga63knpVRpueTxVeS\n4AbrVICzPIVG4niqDyBZKJrjcvXNVpHB71aVxE0l0V56iq7XAbOaiDbnxnilaMMCa66VO5yV\nKgsaF2LUSKqqBjk0eZ5WNtHbzHGFX7x7V6MIM86c9TPvh5cLM+QuK878WeIY7CxkkLZdRkV0\nvjLxdDY2skRIVgM7e/1+lfOvjvxr5yGNXOGHPNbxp9zDn1MHxt4uOoNIDJk4zt9BXjutXrTM\n55wv8q0da1ovcMSCV5Ofb0rkbi6e6kkO/apHyr7Vz1bI66Y0XCzNlBkHjFIqOFPzYOegpLeI\npHtwFHWnyKQu7PFcZuOE3PT60ij+H3zTFcZAxmpFkZiAV2ioYxsa7pCCDinNJh8Hg0m792fm\n25pWHyr3I70IBxx5fy9c9KYuOcHPcnNACsGH40iqGUOTtbpmmBJG3mN/s0m75cDnmmbTJwG9\nzUvkqRvT5gO3vQAiAsoygHoaaxwDk5we1Cn92FYYK88d6WHDxls5UH7poAViByMDjBB609VP\nGXHTr6Uz70fKjOaXjZhR83XJoGCysdwAyOm71p8Pyr1ztNRN8uAxwTzxTo2RgRv560CHzNtY\nD+InPvTY8fMW+Veg+tKrCTa38YPWlkYNIADjvQIVYpNuQelP3fJk4JXmo1Z1bco+U9feneZn\nkjn0poYrZPDLksMjFNVj5atnHzbcUPMVMTDrnp7Uj4ZR823JzimANhdxYZOcU9G2xgsOCeM0\nyTa20D5sj9afGxYHK5A4+lMQ8YVzk5YjmmyN5agfezwaZwvJHPvTmzwdpI7UWGSrgMFAwhGS\naDJxwDuz09ajO5uSTx/DRHceYxOzjGBRYCQOF3HGCRSEu20Y+bGaFk427Tn+VBZUJDHI7etM\nBV5+ZuvQ0CMKp28DvmmcKoAJPOcU5u7ZGKXUkerBoxlSAvNNj+WYHqG7U5DtzuYEU4Oq4OKO\noxY4wuSercVZVhCi4GV6HmoIWxwRuU96czDdtC5X1pjJVfeG2jA6UcRsMvlccUzztpOV46Db\nTVXywC3IzyKQEy/NlV5I5zT/ADOOAR6io1Z1cgjavan7vLUk8k0FDmdGAODk8bacqhpMAjp2\nqJNqsSTl/SnR428cHOaBjxiPGQTzS8huRgZpsmUYs4GMcClVlba27t0oJFZsScDatS7vmbBC\n5qJzgbCeaa0YkbGO1IdyT7knXcPapt7b1IGBVdWBtyVb5wcYNKshwF3Z9aYE1xI5BIKkZ6VG\njFQck7j2piqi5b5s0Z3kHdz6U7h1JdyxxggZGelCqXbd+NNg+ZnycH0p8ZPmbW+UsKQwKr05\n+bninbQjBlH1pdgV1y3Tjio5B8yjJwfWgSJ4+hYgFKYCA2VPz54zSLIE+XGKYrK/ylfnznNH\nUZYjZpDwdvPNOUmPj3qGEbAw6Zp2VjBL/MfWn5kksinofunrzSrjbtbkUyOMHDHLj60pUEjs\nM9qVxhuC5xzUitnDdFA5qKZgrDYMkUSExoMNljzxRqMlkk8vAxknmpAxAUL94d6i2+dHubiX\nOKRGDZKHBXg0ySQE7mAbcQcmnOWYrxk9c0wEiTcDuOMGk3eY2CcY7CgQ6SQrMWAyvrU7A7lK\n/wAQycUxFKqVC5B/vUiyMsiqfurQAH/Wbhz7GntgNnJOeMUxsMCAcEn8qcFCsp7dzQIccQt8\njdetJGx5JXnNJkEkjjnANOYeaCSuGHH196AJP3m1HyAvX/61KzHPmOMDqBUPlqgjQnj0qTcD\njP3scCmFiVfnBz1IpF3bQu3oeppMuwOBtOKb5p24xteqQyffhjlQBUahtxBJxngUjM8mO5GM\n4pTNJk8dsD60AGDI3OQfU0u5n2rtxj0p/mEBRj5sfNSJwx525GKBO46TcijDZz2HWnRqrbkV\nfmIzUS/McjqvHNSfMg3EYpiFhj5z3WmqcuzKvzDnmpeRH159KZt/dkE8d6aEOgx13cn1qRVX\nJ5y3rUK/vNrEc9KewKMU6Ec5qgHjftCtzg8U9nMwZQQoY81FGTcKNjYI6k09QkbDeTx/OmBI\nR8qIpJbHXNOMiu+CcEcHFMhI3nGemQKYzKJOOC3XNJiJ0xjKnjPSnLl1bBI+gpFjzONvCgVH\nuBkYZK80LVajJlZowOm3PXFJCxVmduM9FpvnH/V447UO2CrH/WL/AA1QEgzIdoba/XBq7bSN\nOoBba3SqMjGRwyjDdffNWbVXXLP19KpMzZpQsVGMZbvUqkspwRj09Kgt5ykYJXmnwqzHOQuT\nWiZBKseVDscnpzTgobqdgprK0mQnOO1DbtoB4zWgDmUHhGxzinlVwMHLZpm7Z24Jozhc4zzm\nmTcnVlbIZcY70xlLIRt5zSNICo3cCjcGx83NAhkgLEZOdtZ7LulJPJ/vVo7ygYMM5qlcLu4X\n5RSYzPkAc4D856etI2WIAUHkZFTeSI33DrVdXO4kcsDUFE27y3OV+gquztuJYfPT3PmYbndm\nnAIu4scyHpTARZMyKqdSOlPYy7vlIB6HimKBww4ZeaGmIlC927imSyfd86EnPagqd7ln+VqR\npNylcfd4pvy/3jiqAcj7cRr8w6+9TLlZNxbAxiq+0LIDnD1JG5OQTkE81IWJ9yetFN2j+9RQ\nKx8cqoyfQck1MqrMsjAbZF+7z2pnG4AMp3Lnn+VIqiRGVjnPfpg+lfPHqMVhuVRHyxHrUg3J\nEdyZZuCKYu1JAzDJxgAU4SMxYZ57UiSKOOPd3bHByelSjcWxjApnDIoTqWy3FNZikgOCQW59\nhTLJtzsvDAMeD/hQ+9nGW5J5+lNkkj+8eFB4NPRF3FgwBbpmkIcyttJUhj6elMG6RMOArKc1\nFET84IwwOM1NJtUruU9KYXY7lnaQDZxwKRWMmwk4djjNIrGSE7WwW9aRim0Do3SgLjvMG5dv\nIboaPLG0oQS555pys20YVQMYFMVHyzA7mA5I9KAJOFZV3ZyOT2FI37vdtYYPTPeiPlWU/cIy\nGNR87QjHJH3aBk25wCQcDHNI0mzgrtLLxQzEYdj8/QYpdylhkbj/ACoEIp8zGeXA5pVY8pwO\naZ8zNtzkjoVpWUhi5GCG4oGDMsce2Td1wGHr6UvEezDb16kgdD6UPljkMR3PFMbPB3eXkY6c\n0E9SRsts3DLZwMU7cFkLkYdeC3tTRnyyyHyz/dP86TzDIu0ruPUsO9ADo1SVmKclfvMDwaRl\nKjdGgODnmkVkVg0ce0dGBpV3Dc+f3anOKBWHAD7wySeSrdqI23glj8g5oXJyGPDfMTQrGPIZ\ndoYcEdqBj2ZWDZXacDFKGBm8pQQAM7qhaQFgmCpXna3p65qQ53Fs7STwR6UCHea0kbbjhvrz\nUciLKpDtyF6Z5NPHlLNhweB+BqFsSbuq4ORQOJkXSyRxlQi+3qKqGFoNqlVAxyc1qaiseBt4\nfuT61RMTqpJGcnvQdEWVmjZsKAq7jkkGoJYmkYruAC8gmrskf7tt3LAZFQMpkfaRg4pFFFcR\nFsbnd2yT2prOwVlbuasSbFOBnrgiq8hLDGVAzjBoGNYmOMKjAOeM1bhmD5xgBRgsfWqaxtuK\nlgwHT2pqlQrPtZR0LZ4zUiNaG6O3AbCDqcdasx6kQuFPydQfpWLJK67E4Hy+tPjkwoIX/wCv\nTQbnVWmqmHEknzDqFX1rqfDfji5tdw37GY5ye1eZrcOsgAbK9cVftLoL8pyW9VqiWj6R8K/F\nifT2DRT5Tdghvu+5r2/wz8YIXhJkk2sOcZwv1r4YtbxrdiY5N3OevQ10Gm+Nri1b98+/B3EK\n2Pyo5Yvcm7R+iOkfFK1a3hk88bHHDSPg59PpXaaV8QLW8wv3Xx13ZBr84tO+KFzHKrRSldvU\nSdB/SvSfDHxqmktvKSXMgbDO/X8BUSoxtdDjUZ99WviSOZR82D1OTWtDqyTKHVsfSvknRPi8\n91bxPJMrCP5X2nBGPavT9F+JkEkMZWQMMA7dwBxjriuJwkmdUZJo9wW6EhDM3NWY5juAyR6V\n5hp/jyK7j+WQKeuD1/Kt218VxsFYyKeMjDcVNmjRWsegLfE4UnFX7W6XYpI5zXC2/iSKVshh\nu/u5rVt9YGVLYHfrVJMlno2lRCTBzit5V2KADxXCaNra/KA/PXr1rr49SUxIWHOOa0uSyzNM\nIhk1z95fo1wcGmapriNIyxtx0rn3uC7ZzzUuQ0jZmv8Afj0qDzN7ZqgLjbweaPO+Yc1JRfMg\n5FJ/BxjFVFm+bPrT/N+XrQVYsbsUq528VB5g2g1KsgpASbiPpQKYrnbzzQPmx2pCJmb8aTd8\nvPSoxnPXinbunH1pANmjEigleagk06GTJK8/SrJ9RxQvHOc1QGVP4dtpRgruH93oPrWLfeCb\na5VlKlj7mu0VVYg0xoxtOBzmi4rHlWofDqGRSGiAA79652++GsS5MQ/Blr3OSFdvIB+tVW0+\nKZSPL5q+YjlPnHUPANwrHYimP+8P8KwLjwhcx7t0b7B0IFfTt14fjZt4jUevFZl34UjmVtq4\nJ68U7i5T5jn0e4t48mJgPVhzVTypFU7kYYr6LvvAy3C4KbgO2OK5rUPhrt5yvJ6Yp3FY8UZG\n7qeagZjHkV6lffD9lyBxzjpXP6l4BuIsBNx55wtO4WOUjYcdzTclX68Vpz+FbqEAlW/rWfNp\n9xDtGC30oFysbt75o3DuKa29SAUOGGenNN3HpjpQKzJW+tMXdtIzkUit60zcF75FAEkbFcZ6\n1K4+XOagJU9OtP3K3yg4pgJ/yz59aeZiwxim8betNLbvlXgUgHcAGlx8uc0zIVSOtNDcc0AP\nKjrTQ24HacU9cFcZqMYVsA8UAObrmk+9nNG7aMdRSMwwD3pgOVtuARSSY8zjpSyZwDSFvQUA\nDFWXpSdhTl6Y2070GBipYDGcZB605W+UtjFNQdttPYNx2FMaGqfm+brTlfaMHrTM7uMZpWxn\n3pXKDcAcnpSAlmyRRyFPpQuFbPUYpAJz9KXyWXktk+gpdwY0ojKjO7NMkRiMgEc03luRyKcP\nnJFIy7SV7UxBtD46GnNhlIzzTVbcpGMYoBOOnNAD1UdhTOWUg9qkbnGPyphXqR1oGKv3QxpG\nG5s0u7C460n3SGPT0qeohYyfoKfyysD60xs+nHtS7vlzVFAGLcdPpSrlSAy/jTdvfFK2Rgmp\nYDtu/g8VIF6Y7VEuUfcRxUitt5PQ9qBjeACfWkbIJGR04o3DBwvFJGNykEUwB+dozUmcfL1F\nNCgtyOlOyrEZ4NIACqo5PNJuA5xxTm+9k80n3uOgouADDKMcU88LjNN3fNgUZ3Kd3WgfQaig\ndTxUwA253ZNRfdAzTvlLZ+7QIGYlcelN6L704txxzSL3JFMBscv7zZipmbapJFRKn70NjJqR\nQB1o0AamZPn6U/aS3oKUA7vRaftB70guIoNJuC5yMCnrnkVGzfK2ecdKQx8UhHFEhbbkcj0q\nGJjt96mbryaQxQo44IJpCufrTd3zZpS3zEdTTAcrlRytKc9MfWjcWXHSk+by/WgA3UrfJ24p\nn8NPb/V5Jz7Uhi7gy4zTR701lDYPINSeV8uSaBDdy78beKft+UKKGICjApW5XpVAKPl60q/K\nnPWmKCPen8heaAEDls84p+3C9aQrmMnvTd21Rg0rAPT72MZp427ulMhf5sKMU8/Kp7miwIau\nFyS34U7HmY4+X1p3G3AH1pqnPHSgBygbdp5obgYB4oP97+EUBd4zjrQAhYs42809vk+tRxjY\n2O9TsPmyOaYhP4cgUpkMg5GMDFMOelKq+9AxVfOBn8KVHLccAULgSdOaZuO7lSBmgZZRmYEr\nwO1IpGMHk0BmbkDim8jnvUjsPbHTqKFCspBo4VfekVgq8d6BCKvYdPWl3Fc4NOGcccCmqRwS\nOKoBysWHPNSL8vQVH15XpS7mxxUsB0cnz4I4qMzgOUbg+tLuH8QzUbxq2etAhXk+XaDk1KuQ\noGeah8kY+XrU8cZGNx4oAkX7vIp2AV4pp4zg0ij5SRQMk3bVxjOaFbap470m7tmhVIBNUOxY\nXGOKN3IA6VFuxipc4/GgViQYxzS/wg7cgVFuIyD0qRXKjg8VKCw9f9ofNQqhhnOKaxMjAZ5p\n7qFAqgsPCBVzSH5j6UisPWkGWOO1AAwOc5/KnI/cijIDHNOT7pIGRUiBVDNUm35faoRleR0q\nQn92CvGetADzwvBpOFAHUUxlPY8Uo4weopgLMoU5AwKI2Ugk9aQ57mmA5wRQMmjXapzSHDHj\nmhm3cdqSNSPunBoAfuOM4xTtueaYGJ5PQU/dkZFLqGomNy7e1P4bII6U0A5HpS8cc4FAwyFX\ngU7zOAcc0mBg4HFNyeR0pCFP3sk1JuG35lHtTVxkAnigqOeaBAy7V3A07eCOKOFXnmkVec9a\ndykIx3Hninxjb3zQyjcMijjbxSGOVSrZzmjzPyFORgw6U1iu3pzQA9cMPQUq7doz+NMx0Ap+\n3afY0CFYA5Ipit19KWMlt3HSjBJxjApdQsABXBU9adt/vHNIJMMF208ryaZVhpLdjipFJaQZ\nGABUAYiQZH0qdW68cUCF2k96mXjjvUCsX5VfrUv8NADZOvvmjPzZ6jvTuOMjNM6ZOKaARTyW\nxkGpUbI2g02Pgg9RTuN2cYoYCncM7RSscKe9Iu5WznNIx+YUgC3HHPODU5bcxzwKghJ53DHP\nFSlQe9AAAetByV6UFjwBSq3PNAhImAXkYNN3E/Sl2tzQsZVc5zTGPVed3TFSbvm9qjHzIAet\nPH3TzzQIVV+Y4zSYxxilXNJ83JxSEPXKryKcrAr1xTF3Y5NBI6A0DHljjinbiFHvSLnaO9Kc\nqORQArMcZpd/IIH1oXDA00KMn0oGS8FetIwO088Ui7VU0oHGaAYiru+gpycDFJu2jilFAkAY\nhh6+1SY3c+lMVcD39ad/CPWgQrfezStnGetNXlvajlevINADlyxHNOz1FRg4X3oky2CODQND\ng56sMGn7vkyKjboM805c7cY4oEOX5uAaXbuOaRcKR60hUr3zQMX8KkXHFM7inYCsT1oAX+Ek\n9c0L82CKY2S2SePSnRnC0DY7buznqKN21hmhWyxNNkZefWgQrZPAGfSldWXBz9aFbIBpiMGy\nwORml1Ad5g2nFRqqxZA4HWljGXzin4C5zzmmIF2PjI60sbLkgjFN2kqNq0+GEsxLA0AxD9c0\nL94YFXI9PZ9xIx6VZj0t2QcYNVYNjPUBj14qVLcydvpWvBoe1RjkmtO20sKACBxQLmMCLSzj\nJGav2+i4wWABrfhsVHJAqwIRgcYNBJm2uliFdpGWq5DYqrAbRVrb0Oc0oJ6gUwGiML1UYo2h\nfujinmTPU4FRb+TjpQTqTKwpGb14AquZh0zg1HJcDbjOTTAmeU7h61E1x8xHeqpuz0NVZbzB\nJBzTEW5Lrr2qCa73YHQ+1UGut2STxVaS8HmE7uKtCNOS58tck4zVVrsc5qhJceYpy3FVJb4L\nkU7XC5qTXgPGcCqj3W5utZclw0jcNxTBcEZGefWtoxbM5SNJrsoCMZqL7VvI4waz2uNx5NTQ\n5bpzXfSo8xx1aqijRg+bgHmnvIFQqPvVEoEce7dg1kXmuQwhtzbcHrXqU6NjzZ1rmysyKm5y\nBjrXLeL/AB1HpVjMqyBGAxjuc+1cn42+IUNnblYyRLnhc4H1zXh/ibxxNqTMrudpySWPU10R\ngkYN3NTxl44e7jZFkbdnG4nk+1eN+IPEBuJDvbcVGAPSp/EGvN5LELjbyDnvXBXuoM/LMd5F\nZzkraGsY6jry/wDO3LnA6AVnxNtyP481XmmG0Hd82alhIkJk5zXnVDsirFqNuu7qaDhY/vbk\nzTNpZgw4FPZ1xsHArmNhMmQhkGBStv2qQdzZpqhvugbBj71OXpgN+PekOxIWRmB25A4Ipplw\n33fk70kkmFXbwM8mjeZWOBsQ8ZPegALDdhec9DTvMj3KCM84xUQ/1hYNtUcUrL8wkPHagRJu\nK78fhTQ5Azux7UokCqVIzmkiYgkMnbimSx8OFHPA9TTmLKh2/LSr1CPyCM03LbgCQV7Uih6s\nDhwu7t9KbhmZs/KewpQoYEIMAHmgKGGF5aqF1GqwVuee2KVmRm252nrjFLtXqw6UxmRge/qD\nQMlUncOM8dPSmttK/MMc02OQ8bfu460513RkkigQ4l+I1PvmlkbKqF+/nk1EmVUHcQak3PtO\n7B9xQMPvXBJ+6Bn60/5cA9RjpTIy27n5l9Kf83l9l5zj2piEj2qwGc9hTSz7io45x+NL95QV\nwAKa3zSAgnI7UFEiqWkz0VRzn1pySbtw5HtUakq3LfL3qXjb8ze/4UCYn3yDu2qOtNVVKkZO\n3NEeGRs8c8UKcggGgWov3Fbafc1Io2wqNuS3O40yPcqkH5h3pwjyP9ZheoFAxCmzOfT7wpVj\nBXn7vUU5c+SSQNvTFI02zYCODwFFArCL82SfSnqu5Rg7VJ70vJ+UDBz0p6v/AMs2GR1xSDUe\nd4UnGBToWG4NuyQOlIrBQQw+X3pBt/4CewpiBcQs23lW52+hp7IzcY5xmmpjOBxx0NPhUqMZ\nwaGUOJ+Tf129afjdjK/e5FRQsQzDt396duJkwucAUdSkSKqliCcECkAVSF3Ypi/Llm6+lK7b\ntpZQOO1MQm3DsTlvSpGyVBBAI7UwucDA46GkAJjI7daTAkDFWDsc0u5t24dTzTV2rGNxz7Un\n/LMYJ256UhD1EeC5PPcelC4Dei9qjzhwAf3Z7U9mRmA5FMbJg3cjgVHuZs7hjnOBS/KzYHzU\n4ZRuG+b39KLDQLJ8pAGAalaX5E3djiq7FxNz0PIpfMLMQq7/AFWgRPjbG5UZ5yKYrExr3NNW\nXy87RvGKcGznd8gxxQA6WQoNxBNG9GX5l+f1FRhWZcMccZxT0hTjzJgrdelBQ7ggENz0xUig\nfxH5ar/KmcnIJ4NPV1kU4Py9D9aAJ42DDGSO4FLzuwGC8ZwKr7ShBLZPbFP+0MoYuu49sUEs\neilm5PTk09I1ZSRxt5zmoeH5zjcvSnK37lI9uDnrTAnCyKolJwD/AA96dG3lkhlwWqBc8nPG\nfWnyMXUHHzZxSYh3KDDLlSetPdQuAi1GpZFO4cA8ZobLMSr44yaoCRixVQBnjml8z9xh1/Ko\no5PL5bPPoKkYBJPmOQRkUCHqoWNfm79KezjOQOnao1mULt28rzTn6KyDcz8UMURx27dwGWPP\n1pWl3Y/h45NR/dIOMFeCKGw559OKkpAW3qD3z96p92Yxsw7DvUEe7oBgZxUsTheq4NUMVGJQ\nnf8AN70+OTjDDcajbG7j5B3qVWxG2PmGKYh6s6plcAZwRTWkZGK4FRr/AKkBjtGcipnK7lyc\nZHWqEK+VRTnfk80O7BVBAweRTVKFTgk47dqc21gjf3e1IQ75/MO4YVhSuzrJk/MAO9KsyiJi\neQKEkXy8n5u9ADFUL8wzk81JGxkmUDC8ZpFbb8g6k5ApjkQkAH5ietMRJIq/6wHDA9KdLKBg\nYJzzUAVxJ8zbuamVt0u11yB6VYiSPDN8h2j+VKqhpQH+Ye1RjawKnjByKlhXb09e9IYsX3mf\noegp6qOT1Yc7qj3iNWJ+YZ6VLG6tHsZccZzTFsOfOwNnLHvSFty8gA0KnnKFVuBRgRvw25B1\nplCxpztdSOc5FSBVkkH93NNMhK5Q59qI87CS2D3UUwAs25gPu5496njmLAbux6VXKhcZzipN\npLLt+7mgVjSt2WeTav4+1XGXKjav3T1qlb4iUheGJ5q2sbhQc7gTgLWkSHEdHIYySOCTRu4J\nYfSn7RHw/Df3aJGBO1OoHWtTMazeYMLT+flUr070xVCx7m6ipYScsOxGaZI3d5mQw2gc5NG9\nWQHGMUrN8hVjuZuBUbSAlRjDL8u2gY9MSNuY8CqfnFpDlTjNT7tsgBGM9qYu5m4GFzjNAWKv\nBk+fpmqsylZGCKRzwKuNGNzNxmqkwOQwbatIZGqvtznDdxSMyhlAG7nk0szH5Tn5qXHl44xn\npSARRuZ+MCn/APLMMo+lQ7pVkJXgdDTmkMe1gCSTg/40Ej14+VuG609sTR/KdpFIG3NkjJHe\nkVjuLE/KO1NAPjZsjC7j61HGzKx3HvUu3aoVRk9RQqmRvmOD79qQXDzH9aKPJHr+tFAz4/27\nZPl/hHGe1IsgAKg/Mep9aWRU+UjcGJwfancRqU3K7A9vSvAsekIu/aNygHs1Obc7AhNvGBQG\nG45kDqeaY2RGCeQe+elFhIkWR0dSg6feFNOWYqG+djmmrn5Sr545J60/5+GIUDp15osGorMr\nME6fw5PrRlVJTGNvftmo5HKucoSuPvds0/y3+0ZY8Y6UWAcuQpD/AHuuKRpGYgScbRkcUbhI\nB5fY8ihSzDruVT3oHYcz8lgjDI6ClVsKMr16etKzhnfYeo201YyoUb8v6UCHMvmAbThs8j2q\nQK0GDGMH+dRvujYkDnjNJ97PzkZ7/wBKQErGSQ/Mvy9zUZ/eKoU5bHel6FQT09DQhxIImIHp\nQBJCzZKn94uPu01X8uJi+ACcADtQyyAsmQoXnj0pu4LjHzKe1ADs7FUqc7TTpMFgWPGO9N+6\nDkbmI6VJJG4VGYqQB0zQUhgcuuwLnFKyxtjli2OG9KPur8gyT3FGzfGHKn5RnGe9AmIzbgoV\ngNvWgTNJMDnYoGOnFCtvXzGUpH7etOUmQ7DhQBximIaRJ90lW5zkUL++3dVPTnpSR7zIu3hs\nZOaEXllZhljwaAHLGiAbm3Z4OKcv3iAeP73t6U1OZioTauOT6mlVV5XbjvxSAVsiQHbuz0Pt\nTmYYJ2Yb68UYVeh+XHeo12s4Eu7BHG2gCWRpC0aphuM5prZVsn86XaY9uBwOaj5YnDfK3PPa\nmNFW+TdAGDAnd9096zlZ9zeZWndZY5ZcoBgD1qhcfJg7ML1Y+lI1iiHazKwB3H0NRKhQYb5u\n/wBKsFlkxscEdyKZtA3qflzxn0pFkDAKjptwd3WqcsQbggFhWjLGfMG5srjriq12qIobHzbg\nKRRSmhO0eV8pbhqbIRtCDoBjd6VZuGkix5fzc84HaqpyzOpTaM9aYyFN0f3cFj60q/Mxy/Pc\nelI0ZX5mP0NAZFlyVJQjmgkkikIYxqCFHercdx3HHGMVTik+Y5O1OgGOKbuHKg5IPXNMDU85\nlBCNtJOc+tWYbrzGw2Fx+tYkcgGVLYPUVMs/zB9wbsaLhY6WzvP3ON20k42t3961bLVvs8yi\nOYrtOSnT9a4yOTPz537evNXmvEhjV1YENQTax6VY+MJUvodrtt6bQxxn1NdlpvxQntcBLj5s\n7vmGQcV4LHqDshVDjnHFXYrq4WQbZPurkEfypoD6h0f41LNMjM7CZRkyqcL/ALtegaX8aEmj\njbzAqNhdue9fFen6pNZSEgsd4yVzWyvjS5EexXbcpwD0AFRyxe5SfLsfdGl/FqATBJbrYSeg\nOcV6DpnxFt5toEjsGAxuOMe/0r87tG8eXazqWk3J6Zr0bQfi5P5iqk7Fozjdnt6c1nKn2K5j\n780PxfHJJgyhR2bdxXaWPjORogVlYxkcYOeK+I/DfxcM0aK1wrkn+Ec/TFeqeHfilAqozSlx\nnbuU/d+orKUWjRNPc+j11ZbjLq2R35qRb4swNeU6b47twy+ZNw390ZP5V09p4iSbb+8CsRnb\nmuezbLujtvtXoeafFKepNc5ba3HIeCCoH3ia0F1KNVXJ689aLvYpG1DMG69actxuyMYxWWt4\nrYKsAPrT0utzYBo1LNPzgKcs3HJqh53IpwkLD0qlqSzTW5C85qVXLLkcmsnziijjIoFyVYHd\nxUjNqGXdwwqf73NZMN1t78GrsNwNwyeKBWJ2U59aFWnRtu5B4qdI9zfL0piIF574pPMx0/Wp\n5INqlqpeZ78UICdiGUetPUZbPSoFmXj+tKZORg9aoZZ2bvp3pHtVZThaZHcD7pHNSfaNrY60\nmBFJZq0ZAFVZtMRoypUH09q0Fb5uvFOO3oKZFjmZtBRpAWX9KqXnhuGRSqx5Oc5IrsRGG5Ay\nKctshJPQ07i1PM73wLb3C/Om0d9orFvfhzbmMR7FMI6dRmvY/wCzFmfpj3qvNoRyeAQewqgP\nBbz4bxbQqQ4P94c8Vg6l8MW3M6wsi9OOSa+jm0IqCVTDZx0qnceH1KkFQT6YpXBnzDN4Anj4\nKHb/ALXFZd14VktQwEeR69q+n5vC0LM2VHP8O2sq+8F2rQ7Xi+UtyRxVXFY+ZG0uSP8AhYH0\nxVVreSPJaMqfpX0VefDyB3YKFKnpuGTWPdfDXzGzGikZxtbkU7oVjwouy8Y5pYup57c163qX\nw1Mci4tlY+xrEuPAJRsCII2Oc9qNBWOAGdvTj1pdwVeldbN4NnDELG3+8B1rLuvC9xC3zRu3\n0XNGgWMcSL2FR9TyMVeuNHlj+bYR+HFV5dNn2lljbaOpxxQFhjfMvFIw+TpzQFKrjBz3oZva\np6isx4xtweab91aYDu74pd/zHAzTEPDHbwcU/mo1+ZMng0vmdAD9aYDlOW4PFG7jk0mAckEU\nzcCuMUDRIrBQfU0n3uTR1PpTGbygXJ6UrFEm4MpGPrmkONvHFRpJ5iBsUv3hjvSFcVVO7BPF\nPDbhwDimsQAvHNO2Hkg49RQINvpSyelKp9aST957UAIudvIpD1yOtODH1yKdgbifWjUASPaA\nxpHXC9c5o3EZzyKRcFuTVCBflXAFG4FeRzTj972pjeoFLqOwufk+9mgtubGMEUnA6DAp5XOD\n+tDHYQZdemKTaeATinMNw4NN8sDp170h2FZD1B4pGyc4PSnAblx6VGqljxTsMeZOFGOtPVgq\nkH8KZtZsDPIpfvckUgYqsWGKcxG7pxTQp3ZzxSK2Ae9BJP8AdXGKZIpXkc0ROWUZNK3tQMaP\nfilVRt5oEZ4ZjkUu8N0oGMZRu6k0pUSNgnApW4Yds0hIXPHNADsMvTpS7gI8ZyTTUXfznHtQ\nqgNQIVeB15pd3Q0cMcilkUcBRx3oAVstjmpFYM2M8VH/AKvjqMVGpP8Au0DRM/K8HFIrDvTD\nk470DPOOaQBwshI6YoiYtkN1pFy7YPFWFjXd0oAQx7VHehflamN1IPShQC3ynmgZNt+UnvSM\nxxgUi5UZNJNJsXcTQAyOQ+ZtxU1VYdxcydqs9eRxRYA8z5+RxTtxYA54pMA8HmkWlYY9lO0Y\npCzcccUK3PJ4oO5V65NMALbWx2oGWfk8UoXPpTuOlMQHK8/w0gQYBzkU4fMgGc0gX5iQPlpA\nIxG7OMAUsfTOePem7TyCKfsHTNMB+T3PFDttOSOPWhV46UvLNt6ipsArsVwuOKXnApGzwM0D\n5vlBwaYDmbLdKOQCc0gJ/D1pduMk9KYCKxZQx609lHUmhWDqABzQo5x1qQEzubI5qQtSKvoM\nUu3n1oAf5hAwtM8zruFKrBcrjj1pGGF60APXHXGaAeTTBu8s4p8a/L05oAVs4x0FNLDGO1Ox\nubHUU3joBxTYx4/1eAcUin5cHtTMD1pT2z0oYCZ25p6ybsClZdyggjimM4jXkZPapECzHzCo\nB471LHIW9qiXPP50+P5vmoAn3bmx2oI24OePampyOTg0/gKc+tNAwxuORTtwJ9DSbehB4pqj\nlietBRNtwMg5Pen8NzjGKjQfNTtxAwOSKQEm8d6epDAgVD/rGwBgU+NtrYxQBNtGznr7Ucbe\n9CkMOeKX+HJ6UxAo3KMdad936DrSKpUZHIpjrlsDkUCHqVPNPXI6HimbgTtI4pcfL/KkMlXC\nrimIxbIxxTFznml3FTwCaBijLZHNOXdtNMVicnPPpT0Jbg8GgQuMrjmm5Bzng4pcleTStjsO\ntAxir5abupqQZK56ZpWXYvXg0feUfTigBVf5SMcU7dxg4FCrtjx1OaSSMtzQA9VZec8U7aAu\nTSL90Z4FJIMtwcigQcxt7U48NuHPtTVyeOtK67VDUuodBfvDpzS7c9sGkViccYFOb72SaYCt\n8o6cUeYVUZ4FV7iTYq88E1KX2r83NIYrMcetBzs6URycYI5pd3NAx0LEZOKkk5UYHNRBs/Sp\nFYNhcYpgKMLJ+FKqnIJ5oKkZxz70u4jAoAcny7uMbqgkmKdqlbO3k8VHyw6ZoAfDIGUHnNSS\nA5460yGHjk49KeDtXmgY1wVZc9Kdkq2D92nffXBOTTJN1AhFcLkZ+b2qVchRzioo8bQcfN61\nKqtkZOaAHLnv3oYF+BTW37sDigPtO0nBp3sBIqkqAv40v8XXio/MO7b936VNtBI9aQARs69K\njJJJHepGba3zc+lV5JvLblaAJ1c49hxml4zk1BHIZIx2XNWVXcwx0xQA4n06U37wyDginD5V\nIpFwvSpELz9TQuAuO9IcgZHFLnODVBqOz8u4dKXjbuHWjd6CkOV/GgY8Biuc/lThlvao4yeh\n6U7cd2c0AOxu4FCqPTJpo45NPXKnNIQ5flY46U773JNR5GeBxSsxYDigWo8YC9aNwLYoY/Lj\nbg0BQvU0AHl4xyCKeuNvFRqvvRHgqQOGoGSbgvbNKzA4I60hwPrSnG3gc0CFjzimjO4rS7mU\ncEYo37s0XBhGpjHJzk1Iy+h4pDjbQO27n0plCBj6U7B2UjKMbhxTeoBzzQGw/PA9aXnjrTf8\n80HOeDxQIf8Adb2ozzQp3LjqaFXBwaQx6570Dcqk/lRgsfSnMhYcHimFxvG0E9aRgTyDxUkM\nLc8VZSydl6UElJWI5xRtMjccVqLo8jr79atW+imTHP14oAxvJLKFAojs2GQE4rpIdCbcflyP\nWrtvo+FwU4/vUCZzENjIy/cxmpodLeRumCPWuvj0tYzkDI9anFijMOAKBnNWul56rk1pQaLG\nF5SthYI16LxUiqBgDpQSUItJQ9hU62SL0GR05qz0zTN457UwGJbxxjgU8qvGOtN85VqF5hnP\nbtTEWgwXjvQzDqeM1SN183QimS3nXB5zzTAtNIF5z9aVrgYznArMkuC2aia7wMGgRfe7XmoG\nu++eKzpLvacZ4qCS7HPpQI0ZLjdlt2Kqm8KuQfzrPkvwFxg1WkvvfFNBc1Zr/auQaz5tSyCM\n4zWfcagpXAqhJeAZzzW3KTfU02vmAOT8tVbi+VVz3rM/tANkE1XkuhzzVRiyHI0vt2V+9VV7\n0nPes83BU8dKj+1AZzXRGm2YSqIum4bcSGxUn2zKgEnNZ8b/AN6kmmVVxuxXoUaNziqVktja\nhKsRzzWnZusLANjFcX/wkMNmmZZAoXvWBq/xGjQHZKAQcDmvapUlE8upVctj0DxR4kh0+yZk\nZS6jPXArw7xd4+cSNJ5jru/5Zocg/jWH4y+JMl9b/KyoYySMnIJryLXPFMl5KztLlgMnB4zW\nkrIiKctzpfEHjA3U2ZGYjP3c5Arh9X1loXJR924ZG6sK615l3ShgVY4696wrzWpJm/1owvFc\ncpnVGBf1TXGmXEnz89c1g3N+8m5lHy9MGq81x5kxXfuHWofM4YfeFckpm6jYsxzFiASACK0I\nBnG07ARg1lxeZhty5XPFakMLcHvjrXPJ3NY6ljDDCk5x3FKWJQ5GaEWVo8ZHXik3F244A4Nc\n5sGfkBJyDxj0pYwfmC9AOv8ASmr6Y60/yyfl6DNAhP8AWBieimlZWYKzN8mKQxsu6MHajHJN\nMC7ck8p0HvQMVcbefWn8NznI9KI4gq7+Dn+GkEflvkfXFAC7+QP4sUbSOQckUgbr8gzmlXMK\nknlf5UAOWZhGA4yGOAw9aF+Xdg5b1pvLKny4jByKCiW8gK5bcckVYx8ci7QQfqKcskeeBz60\nka/Kx285zijyx8xHLjkUtSA254I5zSMoVunuKkEjsA5Tj0oaNm6/WkVcbxwQNo7jsKXlSPl3\nDtQyhlJB6DrTv9Ww3clhkUCYgTkkn5f7tOX5F3EZz2pEJ3EngUscbjLFcDsSaY0MjVo2Oeh/\nShWBkAJ6HP4UoU+Xls5Y07bt6jJxQFwZl+bcpBJyNvSlyOAw+bHWkXGTlTUnlhm65wPxoEMb\nb93Zlj0NPVlWH5vmJ6UrKUKkDJpsnUpt9xzVIBrSbwoC4KmlVDvIVSSedopyxOrBgcDrzUxU\n7Tzlie1AIiXcMjaetSMsY2lVIPpStu4yTuPGaMMq+4oGN3Lu4GcUnzxqNw3FjkUvEgBIwxPS\npfLbzCQeDx9KQCbS3zDgjigfeBx3wTQGK8AZHQmj/V43GnYCZcnIOOtK/wAvXC59Ki585lJx\ninElsYG40AP+64DA5PenMo3dcU19xY5+92FRSNvwMHIpAx67lYk9aerHqOW7+1IsbLEH8zJB\npGYuxIPNBRJCyxlgTkkdKccHarD5vam8Bhn0+9TsFmyDkDuaZIoYx5Dc5OKRRuY49cHNN8sq\nN5bJzxQyvJHuJxzSAVmEfLdc08MDkE8HvUSnaD+Rp28rGF2/NmkA7G1TxyvOaQAuyEcN1pfM\nLAY+7nBzSK7GQ7euO9MYbW3Y3Y5zuqQATrjHzjjPrQwLKueo5pJFVfnYkNjtRcBBGytjOWHN\nSMxLZJ69cU1SGiPzfvPT1ohJ3NuwMDmmDJd4HJHA/WkkZWkAPTGcU1hznFDr+9DA5XHJoAfl\nfvFc9qSMeXkMAVP6UkeWXcDlc9Kcy7v4vw9KRQsQO0Yx1xg1KypuO0Yzx+NRyxumMDoOoo8w\nqygDI65pkirukyirhvWhlPJU7j0NRzfNgNlPmzjvUnyLkg57nFMQ75mjAU456UBl3Ek9KVcl\nVJwB1FNUKwZifakA58KuACc81IqlYQxIqJcptXPP9KVm4PBYk8CgCXADDeSRQzK0mB3qNlZ8\nLnOBmpVxv3DjjFLUZMq+WpH3vamMBuDFvwpeFUryGPJpu0Rjvg1Qh43urEHipVfYqgjk9Kjj\n3Mp2HI9KXYeCThhzigWxKD5bFepYcmo49yRsGbj170wGTcRH8x6nNS4JQ8ZOKAHxyB5MHgYy\nTQsgyWIzzkUx5BIq4Q9AOKdDtViScAdjQA4zBpd2OtSgEjPTPWo1ZWbaBk4z7U/d79BnFMY5\nlbcCQAnTFNP+sXjdt4pG3ZDMcrjIFTiTy4+OpGaYyJWdY3wPlPanpCI0U5zn+Go41bOd3LZO\nKcv3iw4boQ1SS9STiPaP4WaiRhuCkbRnIxTY88DPPJGaerfKC4yapIQCMsd275s0PgSFtuew\nFLHJtbAy240rMsJKnLc81YhjlCBnp3FSDC8Lmlj2Lxt+gpwVhtYnigBq5mZgeMU7aFI7/WpV\nx82OuOtRRsZo2354ONtAupIqoF4OTmpXkP8AdGOlRQlWXYBtNOyFYAg7lqgFMjbsnhRyAtEU\niRwk9Sx5pdpXL/xMO1KsYGzJzUgOj2bdydqeThN45BPSmKgOVUY5pki7MhM+4FBRJukOTnnP\n3asQh5SABg9eKgjYsQcc461pW7LFtYEE4z707lFy1tiseTgmr3lBYxk4FV7VS3zBsA9Qask7\nSA5BHvTQnsVpmEkg38Y6U1VKtwMA1YdAyEuvzZ4aoIw24kfMlboykriSLujKA4wwOaAxU4Bz\nz1pq58tsD588Ur7ty5XkdxVowFEwkk+deR0NM3Ll8DGakVgzHkKPU1HJheoKmqKQuRtGFy1R\nszfc+761LGRt+90psmd2R17GkJkU0O1PXvxVGZecMM5/hq8rM0hBO6oZo9uc8+tICksmM5TJ\nxgCmqxyrHrjG09qR13Kw3YUcikh2oufvdzR0HccpfbtYcA5pFkKyYUe7fSoxFtD/ADHcfm+l\nPhIZSCST1zSESbRuJ3YUimxgb8bs0ZUgEcr6U+PbnBG0d6YxZtpGQ2f6U/gbFJwD3pvl+Yf3\nQ3e1L5YePldxU9KQD/M9hRUOw/3T+dFArHyGdwUEjPGWpgMSbpHGRjhVH60is+5d33CMEg0/\nJPyhlwOleB5npCYbcB5ajcOM+lCpt6DDLwFJpq5W4UsSc9PSpZF/eAMnPUmi4DSxBQqvzgfN\n9KVVA2uO/QHrTVYQyNgd8D/CneYI5EKjcFGD/s+1MLjlUnepbC5yxNSbjJMX3ZTGA1QOc5BQ\nnd3FOjZPLKbth6AmlqALIzZjXnnkVJ5hUNk5GNpwKiVRDGQkmDjBVh1PrmnyM6qj5BIxkjoa\nYDreMbmCjnP3qSf9452Ehl70bmbLDoWzTNu6PcOQeue1OxJKrM8YVsEj+KhAzMRjj+6KjWXz\nCMHAAxz7Uom8lycljj+GlZASyLtIX73rQyjjjOfehcPyvOR1NJIyCIqy5PrmlYoUIq78PuHd\nvb0qRMlN4XC9qijkDKAvzn0xilmk2/dJOevr9KQrjpHWTDB8epFN2qHK7t56g9qGaNNoVdue\nophkCqcoU9KY0TqZBMiI3GMkdqTzgwdQeRxQqHy1dT+8z0pVZjcbTHgY3E0WGxIyyA84780R\nzKY1UITu53AdD70RyCSRmC9OaElKnAXDdqBD1JTAVgzevrSSMdvzx8DuKWPds3Erv/u0sUki\nklhtPTJ6UMOgxpCWXaO1ODGNgOpI5pskpWJcD5WG4DvSKTGodjuB5wKQ0S8+VyM9z60MCGV8\nYOOBUcis8igEgMNwx/Km5l8vzCdx3bduaAJUkbndlT1I9qSNZFVlJVd3IpB/x8CRm+bG0r60\nqwjzch8IOeTQMhvjIoUHGwdcVRdmXODuAPStWbfNG2VHkmqEgGQNuVznikXEhj2sojMPlspx\nn1NRyRmXORgqeKvqokYEr7AevvTCu0nA570GxnsN2NrYI61BIjjd8o29c1bMZ8vcTjJPTmq7\nKWUerdVP6UAVCwjYKcuf0qCZGM27bVmRWX7wXg9qZuZl3Hcv1oAqPGY3I6swyPSk2uvAw+R0\nq1Iflyo3cZBNQxxnbuP8RoGVZdoXbghs5oaSPjtjv/SrLxtk7armPbcbGGSRn2pkkSyN5gHy\nvuPI705QIwRtwSfwoVvlB2rmmszeYMHgfw0hk8UhRWBGARzSRN5bBcZB9T0qGSM7WRm2sed1\nN2ll2qPmxyxNAF2G7RWYfMV6VoW14qqFBIxzzWIpKIN33farMU7ptxjAOee/tRcRuLdP97OS\nf5VMt4F+ZPlJ61grfGRZCy4cHIx/KrMdx5iqSML14o6ks3EuA0e1iytnO5eKvQ37DaVc4zzz\nzXPLdnehJ2jPWkmum8w7Wwc0xHpGmeLri1KkswaP5gV449K7TRfijJbSAvMYed2F53eua8St\n77KhTKCx4681fjvJFY4YD/a96Bn0vovxtSO9XMuYOp3sfyGK9B0H43LcqknnYixkZb049K+L\nodcmjZELIAfatCPxVJuURSNEq8HaeGIqlbYTkz9A9N+LkeYwdzknksdorqLT4nwSTbFuN/c+\nw9K+CND+I1xZyI/2l5XZcZZzwfTmuk0/4qzm5jXzWRQfmkDZ59KydNGqm7H3da/EK2m+5KTx\n0PFb+keL4bhdzSYPpmvhyz+Mj2smxn37ui55rufDfxiiliWZTI02Nu1vX39qh0UP2jPtGz1J\nL1gEPfH/ANf6V1a+F79Id4USAjI2nORXy34H+LwmvolcqqyH728EfTNfVvgX4hWfiDS1YzRu\n0a4bZ1GPb2/rWLpOOxoqiMCRmhcxOCrDrkYxUbSbunIq58QNWtftUMkcqgMvJ6EmuXh1Rmjy\nDx6Vk4uO5opJm/FN3bpVmO4IUkNiubh1ZATuJ96kXUQzZDYU9KQzpY9QKkZNaFrqShhk1yf2\noSAc7cVKtwy4Gc+9IZ2M2oL5BGaoNMNoPasX7cW+UtnFSJeDgE8dRmmBqfaBt9qWObvms5bj\nvnHtSm42t14oHY0TeckDrTluSO+ayvMP3geDUscm7tQKxsfalLcGpftHTmsdZCuM05bj5jk5\np3FY24bjjHTFTR3KsORg1gLdFe5qRbtt3zNRcVmdNDcLuA6VMsgDdOlc1/aRHQ4xVmz1bdkM\nc1XMKzN/z+xPFQuF7AYqidQT0z6037dz6L2qbisTXGApAAx24qi8auoDKCKfLccY6ioGk4oK\nsONnGykFRUD6XCQf4T7VZ84fWmhtzcCpCxmf2ChXnG7tmqs3hsbeQDjviugEg9c/WkZxIMUw\nschc+F45AXEezPpzWVeeEUkQgR5/2ulehnbtx3qNoUPbnuaLhY8qvfAavjcoLdsCsu8+H/y5\nEWTnkH/CvZWtI25YYPrUf9mxMuSMn1o5gPBrj4dfNK5tyo91wKxZvh6WbdswP9la+jZdLSRc\nYyR61QufDyyMGCgH2quYk+brjwHIrMUX5ffisubwnc2+SflXoTj8q+mLrwuJFIcZU+gqhceB\n7ZgRsyuM4bJ5p8wrHza2g3SpnyyecVSk0+4jcq0LL719ITeA4pEG2EZrJvvAkfOItueDT5g5\nTwJY2XOEOfpUPz5I2MPwr266+H6tGqonOeGwM1lXnw+3IwKMGHotVzBynlXmfJ0zUUjbvu81\n6PN8N2VcLEUbrWbf+Ap7fJ8vbkcYHFO6Fys4vzA8YxwBUmQuD/FW8/gu4VRtjcn2FV5vC95C\n6h4yDjNLQLMyl6de+aeWHarUmi3SqWCYH0qL+z5o1AKEk+gpE2K5YbuORSbt2SOlStBIuQEb\nHrimGN16qfypDsC4K+lO6LntUOGPGCB9Kk/h25qh2G889cGnbl25xUbPubaKfGylNoHNMmxI\nrevSm5HY03nuaaZFVh60ATJ3yM07d1FMVi3PalbrUlBxtyKF+Yg0MvlpuNNDBQCDg0CHN94j\nOBSL8p4OKGbcuDSNtoGOZwB059aFJC0m7jGKfwooGLuPlmm7h93HNPXDDPemKCrdMmgkeq9A\nOtOZgp25oDhXyOtLkFhgc0h9BBJ8uCuKasYFP+8+RTl4yaYiMoSwJPAp5I5IGaBIrZA6U37n\nI6Uh6irypI60gUnrRk7cjpSr83TrTARlKrxS8hetEindyeKCAfu9aTELIN3Pek2bqUt0yaTf\ntyaGUOjbbjinFfKG7oD1pgk3KKdw3HWgQfLuDdBUm4N0NQtjoePapI15JPAoGNZSmCTx1oTA\nk3AYFI3DYY5pd2c8YpiHu/GarybpMA9KscNHScFT60DDgIMdPSlSRuwpi+1SKTt9DQMQE45q\nTcFUHH1pv3V25zmgt8nI5oHYRm3N0wKcsg6Hk0gwwOTihQu4YNSIfwuODS5zz2703B9aRj8v\nHWmBJ5Y3DHGaf5ZXA6io1+6Cf0qRW4HNMQ1sK1R+csikrUrndxjg9ajVUVtooAcjM3epEY/j\nTduIyQcUKu72NIB2491+alPT+dI2VwTzSq4bhhQA7I49KVyDgDkUN83H8NN245BoGPWPB3dB\nQx28gcmkUNknORTl+6R2FKwApO4U7HzU3f7U/OMA80hAo2g5pGkVulRecWkK5p/Ct8vNAE0e\nOnahpNuQBTO1O4IznmmA2NtzcdKdGhXOTkZqONTGTnmplwFz0FDKBo93NJ25HSnRqW5okbPA\nFIRCzHdgVIy7VHembMtupzKWIoAbgpxyafD6c49aUHg85PpRGxwVxQImA65HPan8rxim7sJ6\n05mKrk9KABgeNvSgr83B4oHAAHOaaoO7rgUwHrkZwelSIe/c0yNf3mMdutSxt8pOKCug77qm\nkWT5QTwaZknnNDA7hkUhFlWXucUqqGUjNV/qOKmVunpQMliwu4E/SlK9MU0ja2ccUm7qR1pd\nRjuB9e9P6KKhz3qVl+ZTnjFMA3FV9jTkbAPFNzxnHFJz2BxQA9AME96XzBjnhqTaetDfNkZ5\noF0FXlsHpTwyltpFQc8Cp8EjI4pCHMFLAHpTGYAcdRTdxbjH41J8vYZplAjevSn8seOlMA9+\nKVT6dKBMkZQG56YpiqSMdKduDrgU3nikwHbuc07IOARxTDtJxnFP6LgZNAhc/hTC21T60K2e\ntH8RI5pjGN+8X5hSquVzmnK2MnqvpRuG3gdaBiqpH1pduPc0R/dJPWkbJxzigBWPy46Gpd3S\noWUHDZqVFzipAejbeAM0fjk0i5VuDTgo9OTTAAfMUg05AF60jqV7Uo67qYEiMOflpFXv2pmT\nng0/O6MY9aAHoNrciopHAU7mqTdu471DMq8A96BsfGuRwflFS7flyDzUEeduAcipvuYFAhzE\nqqnvUbMN3OM0M4/EUxk8xgx4pdQHsd0gqxzjpxUQx+FSfw9aAEZs/MeTUcxH3m5FSrjccDtU\nbKGUg9fSmAyFSeh4q3tKrgVXt/l6ipt59cUFDunHWl4bGKTleTzmjq3A4pEjfm3Nk8U5V2jm\nnMgK9cU1PmOKAHBSx68UoI3DjmhVK0qrjnNMkdJHtbGaQgelNXnHOTmjfjII9qCh69BT8/Lz\n1qvJdIsgjU7n746VKG+XmgTHs20AChWKr1zTOVT1pyj5aAJVO/knFG0belRp8wwTUi424IzQ\nMXPy9KhjcOxPSnqp3Gk27jjFK4Eit60u6otxLdCanSFiMY5pC6jFfdwRxTwM8D9aPIkHG05q\nVLeRsEKQPemDIdx5BpV+7g9atrpckgznHNWo9GdxvH0pCM5fu4IyKYqM0hPb0rdi0NkOG5qe\nHQyDgDBoC5gxxMy9KlWydl+UGupg0HgZGD3q/Ho6DAAyRVBzI4uPT34zxVmLTWk4xnFdiujo\neAv51MmlRqoG3HrikFzlIdJdj93irEeh7vl24PWupS0jjOAKlWFd24rzTEjnodFVgCVq7HpC\nKAzKM1qKqlsYwPanL940ElGPT0VRgZqwtvEvG3FSyfKuScVE0wUE9aZQ6NFVsg/hUu1eSetV\nTeLtzjFN+2Iozn86YmW2PcnApnmrt44qlJfLzv6elV5rzcgC4xSDU1fMHbpTJLhErIW+2jJb\n681FJqAbdx16UhGm1371GbgtnJ4rN+1qqkMfmqGTUB5eBVXA0XudoJ3Zpv2oY61htfAtgE/j\nUD3+3JzgUrgbzXW4+1U5b8LuHesSTVHZgATtqG41PpnA96aFc2JL5lX5unaoG1DaeuaxJNS8\nzI3ZA6Cqs2oFN3PFVYLo3pr5eueaqTaptXB/Ouel1Zdud31qtJrKSLkH5avlJujo5NTXy+uR\n3qlPfCQZDYNc5JqhkyofAFQyaiOu7b+NXGJEpG7NfDOM9DVaS+3SfexmsGTUjtK7+e9Um1IA\nHc+PeuiMTBzvsdDLfKucdRwaqSaoNvJ79q5K68UR27EF/rg81j3XixGU7G+TuRXXTp9zmnUO\n7u9cWIHa2RUEOtKVJ3Z/GvOZ/Ey+Udz/AC9tx5rIbxlGtw0aMVwOpPFd8KaRwVKlz2T/AISV\nY4ypYZHvWBqnjVBC4aQRYP3q8m1b4hPDG26VV56etcbrXjkSZzKVz0UV2x5Y6nM22eka98Qo\nWLhNzsvYnA+ua8w1LxxM1xL+/wDMDnp2H0rjNX8TNMzHecf3c1g3GqfIcEsT/DTdXsXGmdXr\nHidvKEQYlgeAD+tczdaxPvY7htbg+9ZMmrblIYH3HeqP2vcWx8jfwjNc8qlzZRLlxcMPkPOT\nkCqyks0igEcZ5qEyCXEgYvJ3Bp8ZaNhgbs9ea55PqaqKQisdvy9fWp7ZV5LtxUf3ui7ATjNL\n3LH7o4xWLZoi9bkqpHU7t34VqwrldykgkZFZtnktgjIrZjjGFGMd6zZokNVixyFPTFJt+YYq\nbDIcqMAdqjk+6Gxg5wQKyZQsW9ZCpUHvSYyxboPSpWHzEMecdPWmKvOApHepAXjd03H0pGjD\nDGcGnqCqbgcEmlEbdSwNUgIRbFQMndTmVlcNj5KnXbJGT3Wm/eUgDBHNDAhZlG58EdiKcsQZ\nSTwOv0pVXdjJ680rIV6cigBjRt5eAeMUiLtdGxmppFQ/OeDjFK67dpA2r0pXYCMuHyjZJ607\ny9z5HBA6U6RdiFlXinY2qrfxEU7iIlQSAhRz60zq2wnOBU02I4xgZ59KJIvmBXq3amPUj8se\nWAOlHlncu88LwKkLLuwBtxwaWTLkcYQc7qYhrID07d6UsPMAPII5pQpVRk5UnNLjcDkcnpQM\nds3SAscqOlPkVNpIGD703kKMrzQVaVyo7jpQOxE8DRyAE53DjHanRxjkZzj+dOjJzn+JeKA3\nXI+c96BApJDHoRxUWFK/MpD561MoKqNw6mgMWYpt3Ac5pisMYbgApJNSLmNhkYHc0bWj+ZeN\n3b2p7rhcdalh1Grnqw5zkD2oyzAjpmm4ZhndnsKfHE2eTg4NUMRoypC4+Y8k04rxgcnrSeZ8\nmSMmkUBWJzgMMUDFzjcCNwzS485GAXOOcUiuR8u3Pqakjz5RCtyexoAYG3MSepH3aezKqqc4\nY9qj2de1OKDA7mnYQ9mLSKR2oZ9sh9TRGoZiOVxSr94jbgj+KmAL8uVDbh1xThtDAgZ7U1cr\n8wGRnk0Mu4M44xyKkYSfMpzgKKVdxiGG49KYvzLyMfxVNCwkOQvGOKYgjU7sdcc0jKCxLHIx\nkY9aJF2wna+TUbPtQYTcx6AHpSYD45N0eWGPpTlYtyTTJNwiDKR75oiZpFyMbh1FIB38O3Hf\nOaeF2jewzgfeqKNpD99cLUu4iJkbkEflVACSbeASeeM0515JPQ9cVGwJVT/CKVGKqzHkdqkC\nR2XIVQSMdcVFxGh3de3vUm4xRoXP3+MU5QADkbgvegYjSNtVtuT6U2LeuWJ6npTi+SATweQa\napXzvl5HU0xEqsV3EEbvQUqKok+c8t3pIlX5m/MmhWDbWbgelMZLyuQp3LTUb5sNwKZjy2O3\noaJDub73SmIkkbdklcEH60QsrKxwQAM/WkV/LXaMZPUfWnQtsVkY8jn8PSgYTYaJCB8p6+1P\neTasq4GwdDSK5KkkZBGQKZhz8oXlud1ABCzBdzYJx09KlUysQ7DYB0qGEbE2be/WppP3gUHO\nKBE24qvIGTTMnhQOB09qYQ3VztK06OT5Q3r1oAlXPU/MfWnI3mKdwqPdtJGeM0jOWwFO3HJ9\n6BIlUGKQEMFGOakzuBONxPFRqVaTaUzuGaf1cAHaAKB2TFbManAwehpN7sVU9MdaBgs568cU\niqdinO1qYyVTsKhTkUPGd3AzzmmLIfmJHTvUscgIDbsZpkknmFQ3ABpEk8z5vwNDAPg9/WmI\nxCvlcYoGThtwJAzxikUr5ZVj7H2pFb5TgfLnFRlhuCE55wcUxEqxhUA+8MYGKeuHwFXaenNI\nN24gjafWk3Bc5O49M0hEvlsrYUZxyaartgLjAbnJpqscja3GeaVhnBPy4OAasVybcV2lRj+d\nJuzMQVycdKMCPkg7hSKxbLFSc0AKp35BOGpyspb52wtR7isg44JqR/8AZwuaYWJVw8bBWyO1\nIsZZ1YdMcmmBdqZxlhTlZRtIbaD1oEOWRVfHUN/F3p+1uD1OM1HMojBY9d3FOVWRhhvujJzS\nuPcdH80mXzjHWnLIN/TCDoaRctCSRgk8UqMq5BU5xwT3oAcc+ZuxmiMlZMtTF3M3Bye+amZT\nwCMnqDQMWHc24bcYOc+1XoICfm7YyPpUFuvlcY5I+9Wnaxh1CufxqWzRRLsMJVF7ZANSSRq8\nihutMjXbujyTg8Gp0w7N34601IOUJYcKSThaq7NvAOR/eq4qllJPIFVnVDGVIK89q6ImL0IY\nPmZsLzn71PyfmPtTGkZGxjGO1OWQSTc5CY4rQwI9pjUFY97dz6UjfdPO4jtUxVsEK2KjaPa2\nOg9adwE4bAC9RzTXBjXcBnb1qSJirFfXqaezLtHc55oAoxqMsAfnPzY9qiZufmOA1WnVfM3D\nv39BVZl3Rk7uAeBQMqvGsMUjfePaq20gLz+FXbsY4B2jGTVXftjJzkgUaki8oxdjkEYpqMVQ\njGD1z6imriVc5wCOaRYyEx1OeMntSAmk/eQoAuMmhQVUnGQRijcyspAx2x2oVWVjnJ4xjtTQ\nDo2aPheFA61Is22HYDlScmmBh5JZWyqn5vwp+C0anG3POKGIT5P9qineYtFID46ijQsyyEqi\nDPHehfKj5VWJ7rSeX5YxGN65zjNFupaYnPy45z3rwLHpkmfOyQNncA0YdWZpGAUjjvSbUVf7\n6449qdtRIyoXLkZFNAIZwsbb0BGeCPX1ojmFuu0MMH5iaWPG0HjcPvA0wRhWJO1mzkCkBKpb\n74Xd3x60ods/cV0bjBqJwXl25+fqQpqRcySckhsYAxQIeWK53RqSowO+famf6vaxXCt1UdqQ\nyeSp3k/5701trBXY7Owb3oAl81BzyBnpihWOwMyBFzjFO2lpAzLyF69qb5bPCWxtTPTNMTFO\n1dqlRtzxUTKGZwpHB/Kl3q6FSdzKfvUi8BwwwMfeFIaH+czRKWbc2cbR6U0urAqMpj+8OaTb\n0lVc7RxipZPNjCtJ1YfLjrzTGCuuxSQVAPIqSRt0jbxwOlQM5VflIxnkVJNKoY722nrj0qRB\nHjySGwV7r3o8xnPqvQZHApPmZQVACE8tTwp8wshDqBjrxVIQxtzNtGTz1HQUrZBBLnap+tLu\nHlkRv838TL2qNQkDKYskHrk8e9BRMzCPDo23J/1femIyybi5PWnblaQuO3QUxR5uXX5R1I96\nQDxtHzhzkfwYp0uW2kEAHBHPeoxMJdzFSu35eaXd5e0kggUgHqwk3ueo44oYbkGzgdwaYJnL\nj5RsJzilVsl3J+8cBfSmBK0m4hpSQwGBxxTICSz7yAcZFCsVkww3rSTyBFXIBJbGRRYBY4y0\noaRcHHWpE2rJluajkBZcFiMHkingfNtBGccHPNIoe1w2COCvQe9UPOIc7VI5wcjirzIqoAW+\ncc4xVOQswIHTqM0hrcexxIq5wPWhVKs8bnCt+VJCJGOOgxjcatLHx5SYPfmkze5kTjYrIilS\ntQsPLj+UjBXnPWtO4jyGwu89/WqEiiTqAo6CgZRjtysbMXOSO4qDb/FI25enBrQMPnSYDY2j\nBHY1WMexS2MrnBAoAgRVCsg+6RwabIggjWMdeu6rAUbggXCkZ96h8nq7HdzgD0p9QGyfeAVs\nZH3qrSRllHPzKfz9qnZQ4OTgim/dUY55zRuBUVQ7Z24xSLGG6c88irTRnzt20BDyc9jTZF8t\nWbGST17YpbAQMq+Y2GLBRmom+ZFIXnOc+1TqpVm2LkAZzS+WFjUsfnbtTEVWw2VDZJ42+lRH\nEWGMjOF6D/PerMalsM0e5R36c0hSNg+eG65oK6EYmJbcv8Q5BqxDKYWXkc8EZqtJCqbcvjvT\n1+ViHXKt0OPyoJLS3G1Npyxz37VLHJtmG45GPvep9KpeWVVVXgjvnrQz7SQpJH8qYGgsmJiy\ncOvPrVxdRMkSlmP06EmsbzVjC7G27iAT7+tT+cVdk+8qn8/egVjV+2MvJGSRip4Lr5AAOemc\ncGstbrbJtK5TPUGlhuHLeUo28k5pisdNb3SoqDG5jxj0q/DMseSpMeOetclb3hjwWGW65BrT\nh1SOSPDkuT93/CkB1C6mHKMr5xzWtD4meHaVkZSeGwenvXCreRow5x321Mt1uw3IJPJoDyPY\ndD8f3EeSkmxV4IHGfcelew/D/wDaf1DwvcR27+TNkBdp449z6/57V8k22oBc5Lem3PWr9vqk\ntuu9CAe4I5/OmTyn2zq37Rf9t30ZiYrGgwIzjJbuTXZaf8WkktYNz9hn5ufyr4CXX5reRJFk\nKg9feuw0H4iXlqu1rnjsW61MuWW44txPuNfiIiruWdXz0UNyKv6b4+SZNxf9eTXxfJ8RZ5mj\ncygL3YNWnZ/FKe3UCSdlQnG/Nc7pLobe0Pte18eQNtG/qcVt2PiJJz98A+gNfHvh/wCJUgj8\nwT/OvPJzkV2Og/FwSNsaTDMf4eozWTpvoUqnQ+p11KPgtJj3zUw1VJMhW/WvALL4lKrbludy\n9CD/AIVs2/xFhxkTbFJ43NimqbNeZHtq6koUZOTUsOoKFIJye2a8d07x8txcmMTNux6g10Vr\n4kGAS2T6k4qHFlcyPQ01LtVuO8Dcnp2rz6LxJE3Bf5vboa0bXXI2UbZMrjIJ4qRnZ/avfip4\n51bkcVysGqBlDBs+tWY9WXhc8HoaBnRmcL05p3mDGe9YK6kGOM8D3p39pDceeKQG6k2cd6l3\nbSMcCsWG++X5TkU/7c3TPFAzY8488nGeadJcfKMMSDWYt1twW49KkS4B5PSgVi+twQM7uPSn\ni66EniqCt5mMdaczbevNIC8LoevFSrcehrN3jpSpIQ2BQI0jOqmnLOOpPFZpl+b1p6y7lwaY\njQaXzOnSjzcjiqvmZjC9DTGuNrDsB1pjLyyHqeaesg6g1Sjmzkg07z+MDioYi/5gxjvR5gAx\n3qn5wPPelWYK2DQIutJuUAYzTCqt1xUHmhm64FKJgaY7EhhjY/dxUcumwyLkoM0ed2NS+ble\ntFwsUm0mLqVB+tVm0CJpG+VQW9q0vM6bqcrDrVAYUnhaBRwvzdsVQuPCRuOo49667duHpRuC\n8YyaVxHEzeDFKqfLwwOcVSm8GpuJ8orJ69a9ELhuDUMiBhwOKBHmcng1Zt25Mgf3hWa3gWPa\nxSPcfXGK9cW3RmzjmmtZxuvIAOadxWPFZfApZmHkkDHRRWVceAVUhghbJ9Ole+/2XC6kDgnr\nVSXw7G3yhVA9aV2Fj5+uPAIyQ0bBuxqhJ4DeNSWk49AvIr6HbwqjZDHdUD+F1wdiDHuKpSGf\nNs/gy4gztG5f72KqyeFJ9oeMgY4Oa+jZPCbMuTt+mOlU5vByuDhEz3wtUpCsfOUnh66ViNrB\nvccfnVcaTcABjE3J4OK+ipvA6+XtSEKc5z1rNuPA+3ccY3c/MvyijmFY8INtOqkeWQRVdlkP\nYkj0Fe4zeA49+4gE4xtC8VSbwFtRgIlAPX5eadx2PHF39CCR6U0rtbDDHpXq0nw9hP8ABg46\nlTVFvAXUMvmAdMLQKx5wzZYjGKXcDHuPFdzefD1QpYb8+mKz/wDhBZohvddsZ6UiTllbHbNP\nZg3Wt9/B1wgBUFlqpe+Gbq2XKoeafQepl7xkUok3NkHAqf8AsG8Vh8hwe5p0ujzp0Un2xSCz\nK6Y4J609mC5IPNH2K5XkxEUn2WcHLRELjOaQ7Cq2FPFKrY70iqwXoT+FIyNnIU4p3BD02r2p\nrYUZPApsbMcmmMxkycHbVAOEw3YzipY2FU417nr16VMrhe+Qe9IRKTuyc5FLxtGBUSyqykD8\nafI6qowc0mIV03YGfek29j1NCtlc55pd3QkUFIYsZV8FsCpmABBQ5ao3YtzjvQrbT14pgS5+\nbB5NLx3qJfXPNKOnNMB7L3zn60LjsOaQUqqc8HNJiHKNvBp3GTxxUYz3pysHwM4pjEDBeACR\nTy3fH0pVUc5PHamMuY8A85qRj+CvPWjjqaZ0XJpVw3OcUwFbDr0py4Cj5cGkDfKePxpWf5Bn\nkUwHD72evHao+GzTlbqcce1JjK7sc0AODY7cU/d8uABUO+pfvYApAJzuxTlVfMz1ozjrSceX\nnnNIRJt9ORTWYrTY/lbBNSZB4qgE8wtg09vm5FRqvzcng1J7Z4oAUScYNKzqjA44NMODgU7A\nkyoPSgY/lQc9Kcv7v3qGMkn5mHFSfLupMQ4kbunBpGPGetMaTa2AOKkUrt9KkZCF+ckCpVYL\nxjmnN93K0gXcM5xTGLtLc5wKftAXI5zTOvBHFKrBcCkMd1b2pPUdeacvHPUU0YBOTzQIeW2j\npTfm3ZzkGlbDdaQsegoF0BDnI6UKx3E0xZA0npUqj5TyKYDEj+c8Zo8zy5ehNSdDgHj1qBm3\nPgg/WkNljzP3ZHQ023ut2VcZpvG3inJFtwcdaCScnBGOlKynOSaauF5am4546GgCVJDtwOtO\n8xtv86YGG4ED605lO07TyaBgs24470/ecZJqkRiQEGrKN0BoKJt2+pVxjmq8bbcntUme+akC\nfcVXJb8KXhmBxxUJYMw9KcsnzYI4oEStt6DinbvxFRFu/anKwK5ORVDJQwKEYpUYsppm7d3F\nPjPJBoExV+XvzRwre9Nded2acGG4MRQIGJHbmpeNtNJBGc01CMmkMQHk4pxkA6dKXafmwetR\nspVSD1pjJSwDAdTTTnoDUaRsnzZ3Gpv4gQOT60AKikDPrTwTkjHFM525zxntTvvLw3NT1EOR\nQz5xzS7tmRTVGAORmlVRuO48U2MRec05OnoaTb6GnKowSetIBrFdpHRqVRnt0oZQ3OKXBXBz\nVAEeFkx1FK2N2cUkagZ9aTlegoGOUAEZpxY846UbgVBIxSZ+VqliBWp+7nvmm/ej5GKFwcHp\nTQEytvyM801ZP4D1qMPtkOaXgtmgZJLIIY89TTVkZwMjbSlSzdOKXA696YnoObK/d5oYCSPP\n86Td8o20FieelACxrtxirXEiE1T8zb1p8cm5uDgUgJcD05pVRm7gCkyrEc0j4jPBxRYY9Qkb\nYHPrT2+ZuMUyHDfT1NP/AIuOtAhFU7snqKaz/KxNAkJzSSLuU56UXGELF1zU7bWxxzVe1UoM\nY4qyFbrtzSuMQ8nHQUq/N0NSC3dv4eKeLV9oAGPpTuSQMex60qn937+1Si1ffhl5qRNPlJ6Y\nFMCvuzQFO05rQi01pO1TNpM2chCRUiMpQB0HNNbduBroIdDZgCV61IfDhLA4oGchawlJHc9d\n2a0IY2bnqK6KLw383CEjvkVbTQjjCKAB60XYjl/Jfd8vze1SfY5NvAJB64rr4NDVsbV+bvVt\nfDuw9MD2p3BHGw6Y7clcVOums2MCuzh0YRsG2gj0qb+y44m+6DSA4kaLIucg81JFoLgDjDHq\nK7ePT05Lcj0qRLGMnpjA4oYXOSh0Ao2NnP0q3H4f53EcV06QhVFSBQzcjj0oEc4mhrwWWp10\nUMwIGBW3lNxGMDtRx0BpiMyPStq4wKtR2CKuNo/CrLOq5GeKb5ydm5oAWOzjWlMaqflAJ+lQ\nveDovbvUcl4kYO5stQIu8Lwaco2msr+0l2jL/NULa0M4z0oA2lbB96DIOmcGudk14M2AfmqN\ntaVVJY4NCA6PzV5zUcl+u7ArlJNcJyQ2VqjJrW1s78k+9UM7B9URW44qL+1lV25ycVxba47b\ngW+lVW1ZuTvOaQcp276tuYbulVpdUUMef1riZNcaTAySO5zTZdWK9Dj8aB2Osm1pVwM4JNQS\na1uyC2fauKm1ks3Oahk1Y9mwaBNHbtribPmPzVAut/KxzxXCSayyYySxNNm1d1BUN71WhJ28\nmvK3Q8U2LWVkfaG6DPNcGuqnfn9Knt9SEhyvyk01Ek7X+0d3zM2agl1Re7cVzP8AaW/gN+VV\n574beDk1VmK50Umplucke9QvqW7OG4rlZNWkwVPSoJtWCj72OPWjlDmOobVDswD0qnNqxO4N\n0XmuUl1JxnD4WqEmuGTI8z86uMTOUjqJdc3MVGTVW51dgoLN8ufu5rlbzWBCww25sdjWXdeL\nIYpNhZXf/e6Vqot7Ecx2VxqyIvDctWfc6luQ4fYuOtefap40S2xucAEZyTXNat8SobVQA5Le\noOa1VNsnmsj1ebxJHb/KZAGx94msq/8AFxjXJkG0H71eKah8S4mVyZXYnquMmsLUPiAVXbG5\nUMOPMP8ASuiNE5qlXse5XHjhVkLebkY4IPFZGoeO3uIdqSYP1rwBvG00lwyNLlcdRnGalXxZ\nKud4wMfeBrqjBI5nNvY9F1Dxy/2whZdxBwXzVOLxtLJcNskUqODz19xXlt3qhmkZ4z97rzis\nhtYdGeMvheyqec/WupWMZRPY5vGG6NmMmT061z1x4mmjZsPudjgOTk152mvSsCsmflGMZ/rV\ne41p1xgld3YGnzGUYNvU6zUPEUkm9ZJPlwWX1rDm8Rb4w7BmPsf0rD+2STIQW+ct0qAN1HVX\nHIHrUynY1UO5qXWrteScJ5bdwe9RS3EnDBsN02+tZayMvDHgHG70pWmyCdxYA8Vjz3OhRsWg\nXbc4OG6GllGERVXvyKhjykhXcSPvZ/pUg3GbcW+lRfUdiVZCiEYKKD96l8wZVtxx6Co/O2kA\nc/WlklRm+YcnvVC6lpm8xQpO0k05XdtwA+XoWqDv8x57EVMrE4HT0qGPY1dLjZmAJxnmt5id\ny7RxjHNY2lRsrAn5mx1rYG7IweO4NZyLQ943XJ3BiajaMtnDAmn7SGHrQyBcsvSsywVdyju/\nqaFR9p7k0bvmxjnqalK7uQDkjpUsREny5yaey+Yuen0pFUO3A+op+7yz8vIxQgGBOgU/N6Uq\nsI3y35UoUOuM8+tLt3844Xp70xjVwMjb1p23aAVGecEUgjDqWJGfc04RhcjcemRQOwKo2yKV\n57U1EMaqzcuPU1INvVj1FMCKX65PrTuFgfczn1YdKOSqqDgjrS4LSA78nNPiXazn7xz+FFgG\nbT0wWOaeo3dOcU7JWY/NwRUTfIuwclu9UJtoU7WZypx2FL80cJbGT3pflGM9KVmO7bjhqAuM\nUltodcZ70sjHoACB0pW3IxXbkAZo+RV3YwfSmARqdu5uKVlYHaMjI5NC5wG6+1KWZWz1z/DU\n9SBuzaoAP1pzKr8g8UMckbuF4yKG/d7l27fSmAiqe4+XNOjzDvyw60qKwwT3FDKnPegeo2FG\nZiZG3elSLhSxB/CkyWZdgxjrSBT8xYYpiE27Nv8AtGlkj8xjhuRxTmXcvIyw5pGxuAYZXrxS\nHciDbSd3bin+SJlxuwSOafGI23Y+UdeaauZ48K2ADn3pjGurcAcADGfWnNIN2AuDjipOsag8\nPnj3pGjK8Zyc/lQITaZFC/dI6098RhecsD0ppUxqC3JzTmdO44xQBHIx2kg457UbvMbBO0Y5\nNOjQspIXCY4BoVdsYBOPWmMYrFxlTjHanhnZgufkpY4xubpnHahXJjZR2pALDFjd82R3pvmb\nXwq4IpYCWyCcY9akVeNy8sKADbtQgDPeo3X5g7fK2OKczErkcDuKYI97EYJPXPpSAU5UDPTI\n4p3l/MwHyN1/Chcu21xgYxmmhRHkBiwpgSRytlcrx1pW3O24DardaQsFjyc8jihWdcbRgds0\nwDLDjadp4DUvLLtY0LvEZbOefmHpQvyAbvmB5BpAAVvMGWzin92y3y9xUSyFFJwC2eBTo8q3\nofegBY12rwcqec/0pWYIpbGDnGKY2dxDAAe1BLNjPzUgJSzNHkDPqKVcsoyuabuKkjbgGgMq\nsFIIHrQBJ3GBk+lPjYHKuApFR7NrD5vxodixxtOSfvVQAF3s3dh0p0cflplhudu9IytyQRkU\noYEgdqYD+RHgdQc0RsOcHDHkD0po+djt/OlVgwIx89ADuVbcACSKlRvMDKGz/So5XTABJDU5\nQuFZGIBGcUAO2/MAx3EDBJp6qh68rjtTAz9AuGz3pXc5LL93ofrQLUWKLknr6ZpGcBSzDBB/\nCkj5jcs3OOPrRDH5kYU9V5PvQKzuTRq0cBLPyeaeoLYI+Vsd6TarKCeQT09Kc25uGGMHtTKC\nSPcFO7BHU0qsNpBHy/3qVv3jdQBjpRuDKFPyt60gGeXux82fepDJubBToMZ7Uqr5ZJPTGaQM\nDHvH/fNAx4x5eRzt5xT/AJpsEnAI+7UartUcfMfTtSsrkY/izTES/dUowyeoxTVyrKT8o6mk\nbKsGNOb5tzKOaoRI7llJxg0z5ViBK5FSeYm0KB26Gm7CsfsO1Ah8cZkUFflpvnBY2/jw2D7U\nseJeclfYUeWSTtwF/ipiJS22P5vlXtQxAUnJOe1RtCZYlB5AP3qk3bnG0YwKYCbVTGTliOvo\nacFXcPmz29s0CNSh3deoxTPlXt175pkkyyDdkfMPu0u3LFey8k00KpYqzYC9CKdNIw27ACpG\nDSKJAS/zn5qYtwckAbg2QaRGJjOMLjgVJDCEwx6dcUxbDlBVP7yjoamZvlDH8B2qHduVg3Ln\np9KcxJVQJAy4xQJMVTnqwVial9Vzu9KiCKrKANzEVYjU+YPlytJmkdyxbnapwOfU1pafGsre\nYxx61RtwWO0DNadqgKle1ZM3joXItzbuRjOakYr5J2kK/XB71GrYXjjHHtSeWHbB+960IZLG\nA0YYNgHqKbMVdV+XDEcEUSZVwqr8mOTTgSsIGBnOBXXBnLUKX2dl+VfryaAol4yPlqd8LnB5\nHWq5iDKCDjn1rUxsOb727GPaomLTLkDpzin7/LmLfeHShsDcw+Xd1xRqIYy+ZGxU4IOad97f\ng4xTeY+T0ahPmU5GCaYEbttj6AjHPrUDNuUBBg+lWdqqgBHfk96j2jzCVbODyD6UwKd64dlG\napuQCydeM1euYT5pO3IPIxVOaBVbcTy3AoJsM2+XCox1oZd6gn6UxiWYKF4H8XbNPCMjEYyB\nyakBQwjIw28k45p7bmz83zZ5xTM/MBtyetPkCvynynGTQIkVUWAKF3c5209lTzRk87c4pIZF\nUgEYyOtCwl2O4becCgA8xP7hoo20UC1PjpwryHapEbcj2pYcbiWGcfxZpIWVQqqp24znPSk2\noucnd/FxXgnqsemfLIC9TnHoKRtu3O75s8e1Ioflsg7hng9B9KYymRggXCHke9CEOkARgxfP\nqBTmjZtxUq3cr3ApWh3fKNq7ecd6jilC7sqSxoAlj8xxhACuOT7UNxGu35c9eaVQoQGMGM5+\nY9aNytcY2lgaXUCUMcYkHy9MGovlbeoOWx8uegFBbziWb5EA60yRVOCvIP8AF7UySVSUULuO\nccqac21sYbaR2qFx+7Lb844Ap6oUhViQ6+vpTAGYyZI4Ud6eVyxMeGAGNuaG2MqqrZXvim7R\n5haM/L3JoKFOGDA8fLjC9qCrsoDPvEY49qcz5jOThfXHSmSb/JMaOMk5zSAeFBwehYY6UxlK\nxFFOQOC7ClI2jLNkAdKXczgxscllypoEIFVYVIfzD0o2uisiDk849KFRdg3Z+UYIFCMkamUg\njtTBC7lSBUQc9WbFP+VWwBuOBioJHkjCBRuJO7aPSrDMI8MSMH+EdqBDW3K4G3NP+VN/oRj8\nfUVDI3HlBCXYfeBpfMMkYbZtKcZ7EUDJIV5IOG45qH724hc+vtT/AKc/xcelOZwqs0Z+YcYp\nAMzuUOnTpSxsY4zldvqaj8ssFdeO/FTGI8sXxuoBCkhtuQwbHGPSj5XjQMw2tzn3qOGR9/PJ\nUcN6CpDhkj7jPOKBCMxjznBWnMyiPIOXHIGOaNu6OQuMgHjPamKPlyc4BxuxmkWSfMuJmG5m\nGNvpUUqkxqfMxzgD3qRVLSbM7TjIIORTeJMK3ygHmkUSKvRs57ED1qbbt2qeCabaxjdz93sK\nuybfL+cfNmokbIzWj+b5PlBHNUri1PlFs8ew5rVdWZMJ8uB19KoSHapJfnpQUZ2Ay5Tdu/Km\neYMNgYHQ1ZClpB2zURhKF0I6nOaAKjqdy8EDuaRVDO20ZX1FSzLIrAr846AVEqmGQop28flT\nEVbiHdjnbz1PWmuny5KkBOlWps7cSAMo6Gq7KZI+pG05x6ikMQyZjKbSGcfxUSbvLVAeFHIx\n1pJG3bXZcc8Y9KJnO8EnCelMREVHl8cNnpSN8vzbuKkJVCAD19KZJ8zY4FFhkMgHlqScR/1o\nK8AMmARmnPlY8Hpng0mGZgGGMD7vt60ARsu1+SOaeqybgrcjsRTth25C7gOKb8yBfSmBEqAs\n3O1icc1Hg7tiFt3epWG2Qg4P8xTgsnn4JBUrzimMgjA3Ng52ipUlZ8543Dml8sBQFypB6etN\nkXbhui5xSEM3NGrbc7D6etO27VVt7ZPXDUjKSRh/lB6dqb8uS5O4HoMUwLqzPJajK5K/yp63\nPyqRkDpVT/VxhhnaTyKkSRckOcAdKQGgJELKGYjHI55qX+1WYYVCSvPzGssSAKfl2kn7x60r\nP867sgjsKYjdj1MrtB6t82T29qux6gNu9m2K3HPPPauYFx8jYXPbB9asRXYKxr1XGD9aRLOo\nhvmbiToOh7CprfU4ly7Fg3161ycl1JtO18Ln7uact4x+WTg9QKYHew6tJJb8NmPqMmtKy1gx\nqwl5b+HvXC6fejJUn32/1rTt9YlMgIjUjOQD6etITOxs/Ehj2MoaI5ycHrXSaZ4saFtzjJb0\nNeax3Uc0hwQrYz1qzDqDNH80u5R/D0ppjPZrPxs6Yb7SWxj5R/KrjfEN2DEy7Fz91u1eQad4\nhKfKqKc8fManfUlmZiCc+h7UAe9eHvHm0bmbnP8ArM5r0HS/iGs0RiD4bpu9a+V7HWDaSLIJ\nOD95e1dZpPjmCGQrI2G6jnHNQ43Hzcp9IR+MVGT520rwFzWvaeOkWPzHl344zmvmabxZdySk\nrIST82B6eoq/p3jB3ZVZiFzk5PFRyFqo2fU9n8QA7KQwAxxg1pw+NQ+CPn9818vR+ODayBQV\nz1BVs1tw/EL5Rul2+4OBUuCNOY+mrHxhGq/OcfjWjD4pilYAnj0zXzKPHyxbN9wSTwO+K1bb\nx8xUDzt5HO6s/Z6lKR9KQeJI2ATdg/WtCHWomUEuCB1NfOdj8SFaNB53TjaDya3tM8eRx9ZN\nu7tIccZqHFormPfBqiS4CsCPXNTLqW0bc5HbFeRWnjqJpAEmVuOGH+FbVr43t2XHmh/9rv8A\nlUBc9Og1IFM5xmpftRyCWyK8/TxNFIwPmbRj1q3H4kSY4L89sGlYu6O4W9BU84NKt0Rg5rko\ndcibAMg/OrUesJuwzAL65pWY72OpW4G3jn1p6XC9O9c3/aSpgeZwf7verEeoAAHdyaYHQece\nCDTfOBPrWNHfFfm3celL9uBbIYfnUjNxXOOuKFmycGsqPUPlyx4qRLob85yaQjV3fLTlfaBn\n5qox3isuRxT1nXIJNAi35hapFuPlqkswbJB/WmiYd+KLiNEXAY59O1ONxuA28Gs9ZirYxT0k\nKtnrQHKXvP8A3gJ5FSGUYJB5rO87bTjKW6cVQi8txtU55pfPwQfWqO/t3p3mhuDxS6gW2mHr\nTlk7A5qmzfLxQs3PWqAupIBnPWl8zaBVHeeuaUTZHXBoGaAl5HFO8zvVAXDMMA0ouMcGmI0V\nYbSaYZOcDrVJbkk47VJ52cmkMst8y+/tTNigdMmoPP8Am60/zRmgWxP5Kt1XNMWzh7oDnsea\nZ5x9aUyjdkGgBs2lxMDhADULaLC3fLYq55x4JNJ5hzkcimMzJvD6OCCARVb/AIReJskfKe1b\nfm9TmlWYbQNv40EnOTeGA+Rt3Gqf/CGpjOGJIIxjjpj+tdgsvOP1pwnxxmgRwVx4NEZ/1ZP0\nFZ8ngtW58vHua9Mfa3b60myPptB+tO4I8rk8EnlggbHPAqr/AMIaZGLLGEz1yK9bNui9FFJN\nYwuOEFIo8dn8FpIMEbsH+7VZ/Bq7inkhgfavZBpsPcACmPo8RUkMM+lAHikvgeDBxBx3qm3g\nVW3YhAHbJr3NdBibI2jmo28OxsPlC/jSuB4QfA7KuGjA+gqvJ4F3A4TYnrXvTeG4tuCo3VC/\nhWPBwop8zA8DuPAobHlrk464qm3gdowQfl9+tfQDeE0OCyA9sLUT+DY3O0qCOtHMKx8/t4Jk\nVRhdwPOcVWk8GzjPBwenFfQJ8HouVCnHoKJPByeSMR8Z6mi4j56l8I3MeMA9O9Rjw7cMpGCD\n05r3uTwnvDbk2kHjAzVWTwjujAEXQ/exincZ4U3h28hYDYTx2pv9h3cbZMLH6Cvc/wDhEgr5\n2Z96iHg1ir5GDnincDxD+z593+ofP0pn2Gbdgg5/u17ZJ4PTaA0TN61DJ4Nwcpbr+XNO4His\n1tNGuGTFRbZY2G1WI7kCvabjwWhj+eIP+FVG8EknCIuB1GOKLi6nkihyDlcmpI4+oKkH3Fep\nL4KCA5hU59qhfwOkhI2lPXIpcxSPL9zbcEEelPjctxt5HBr0tPAqrw4DjGAcVA3gpMldiqOh\nZaVwPOWkKqQRn2oDHA4wfpXev4DAydu8f3qhPgoEgBDn1zVXA4rByByKXcem2u2k8DAEYZsf\nSov+ENCKf3XPYnvRcDjs9cUvmHbx19K6tvCbqCNmD9KT/hDDu3Y3HHGKLiZzEce7GKcd0XJH\nFdMPCcqqMr5YzjPWo28LzqWypaMfxYouI5zhmzmnhv4ela3/AAjEigg5YdQcUz/hHZ+uMUgZ\nmKp6saN/zgDrWpJ4fn24VSWo/wCEbnVgxODTuUZxULn1o81V6itKXQLnduCcVBLo9w0fEbA+\nmKAKn3uRQD+B9qvx6HdhceWfWhdBuZcsqkEdaLgUVAZc96kCdM8VbGiz/d2Et3qOTTZ1+8CM\ne1IViAj1OBQCMClaxnC4ZSPehbGfIwuRQMVR8xP8NO2AdKk+yzRdYyw70n2eWQfKpAoAj6nA\npWHPJxTvssi/wk/hQbWWQbthB+lICPPO0HigkNj271L9klWMkxmj7O+3ATj6UwGM2BgDNKH3\nMOduOtK1tKu0lPlppjaOTaV47UCEKBmJApzfNjHAqTa6rjbg+lMWN2XcFOPpSEIpOcdqjkkA\ncL1FSrG3Pykn0qKSJxIp2EZPpSAnVuMYwKkGVABPFEdu7dFPvTjbuv3kNDGNk+nFJtIxzxUz\nQuy8qQKgVG3gEED6VQibHy0AZJAbIp2wmPofcYoRfmxtK8elICBYxJJlh930qdl6EDinrC7M\nfl7daVY3IHHFBQjY2im7vm5yKma1kaP7mDTZLSRcZU0gFjz0xT1bdSLDLt3bW/KgRuxHykfh\nSEIu7dz0pzMcYzSLFIckr0pVhkbBwcUxhG3PWrKksmelM+yv2XIpfsswwApK0MTH5496NwPf\nBp32aTjg80n2Z1P3TTAUENn2oEgztPekaGVuEXNJ9jnI+50o6jJFHX5sAUM24dOacuny9eac\nLOYtnadtIBkOc5AzUxy2OxoispmmGBx6VM+nzpIcRkigCvj5vl5HU0SfLjA61aj0yVuoKipP\n7Jk7c+hoAzmbDAHipMBlzmrjaPJjJX5qWPRZcc5x9KAKWdvTpTkb5vWrv9kTowUoSKc2jTqS\nduKQFBm28dRRuAHNaS6Gx2k5PtT7jw9JwNpFMDNUd6OT2rXTw/Ie3GKmi8Pv90qaAMPP8JGR\nS7T0xW+vh1ouq7qk/wCEefgbSDSA59YSynINMZT0x0rrI/D7Kozk/hT18M/MWxx6GmBxzAk9\nP0qW3j3MQR2rrV8PbmOU49cVPD4b2sMLke4pXHc41klVgApqX7LJj7vWu2j8OqZDuX5cYFSw\n+GRGM4JFMRwsdq57YFLHZyTAkDAzXct4dPzbYzg0/wD4RvbGpA+WmI4iTS5T0FEOjyDoMmvR\nodBTyguKdH4fCycjav8AOjqFzhYtHkmAKjp1qP8AsGRnLHdj0r0hNGWJSABmnR6TH1C0mK5w\nEegzeWFAp48PzbhtXNd//ZqxtyAQaEtQp6DFIDhf+EdPVlIqc+Hv3QwhNdulqi/w8+9S+TGw\nGRgj0oC5xEfh47QPKOfpU0fh8/dZcfSuzWJSenFDQhWzx+VBSOYj8PjO3HFWo/D6pgLyK3VZ\nc9OKduX8KCWYsmhwlwSoBqVdBjIweh7Vps3zZ7Uecq9DTApQ6XFAgCrxU6WUSgnHP0qZrgbe\ntN+0r60AySO0j4GKcLWNicAcVVOoquT1FJ/aqNCQOGp2AuCNQCCBSIqdhWYdSPJzmof7YTqD\nhqAbN0bNuQAppfOXHJya5x9ZC8E9aRtSJAKvgYpCOlMyRrnPWqzXSrkYrmZNaZWwTmmHViF2\ns3fJpgdOt0u3jrTW1JF7jPeuQm1SQMdrcVE2onYcnmjUDrX1pVb5ufSoG1tm5UYHauSbUWZe\neKibUmX7vr0oGdgdWOzkkepquNcddwz+NcvLqjbQM8d6ia+PZuKVxHV/20zKP3nPeov7W3ZO\n6uWW+LhsdaSS+KqoAOaYHRSa4VbaSQKqyas0g6nrWE10XXLHkVF9qLLjNMDZk1X94SCcYqu2\nrHccGszzMrjNV/MzyKBGs1+dxYnFV5NSMbHOWrPa4C9TVZ7vcuc5oGi62ot2bHPSoZL5mON3\nvWZNdfNnv6VA8xyWL4PpVAar6g6qcH86rrqDc5bNZM1/t71Ue4POTwaVgN7+0PU4FV5tQDKc\nNWFNdHoG4qo2oANhSdtAHRpc4XJ5qvdXm4Ag856ZrG/tZY0PzcVQuNX+QlW5P6UWA3ptSXr0\nH1qrJqW5c81y1xrWDgtk+1U28SxD5ZCVx0NUlqZtnYvqO9gQcU7+0drDD4NcHJ4qSJuCASe5\nqldeMo42YiXke/AroUbkNnph1jyWPzndVWbXtrFS+GNeWS/EWFoyd+VBwTmsl/iNBcTFYWJx\n/eP9aaizPmPWZtaPJeTG3nrWXeeJYmJG/I65LYryDVPiUsm5WkyQOqN0rh9e+Jaebsa48tQM\nls9a09myHI+gb/xlDBGoMhRM4LZrAv8Ax9CkjlZVMfTKnnNfP0nxJjWTJlMikfKC3BrGuPHj\nuzESYbOeOlaRpkOZ7vq/xG8uMqJP+BMcVyesfEBoFVRKA/3hj0+teOzeLrm43ebKWQn7veqN\n1q0ky/6xmHuckD0FbxiomEpM9K1fxwbyH727HucVyeqeJrgxuTIwJHAU9K5BtWmZdhb93np6\nVHHdq+QzsT9avYlvQ34PEFwcAyZz94nvVu51HzIfN3HGNuM1yfnImMPn2qWO63KVx8vvVcxn\ny3NdtQfG5TvI6LUw1iRlIkbco6LXPfaSm5UOG96VrjcuMlSx+anzBy6m7JqmxQOcEciqBvGO\n8kADt/jVCSXcqoSTg1G0nlo3b39qfOVylxrpl2uAS3fJqZ5zIo2j5jzms5Tl1IPDDANWI2C8\nKcn7uPenzBy2LX2gKvzDJHWlW42xqqHqflzVWPcqtn16UeYyqN33fbtSuK2pPlo9zMwBz901\nLEyvgA/eqsu5ogSdxPHNTRyAKqlcsvpQXYsMHjbAGMnnmpo8hWB4JPWqqsJXJzuOc/SpUZgx\nOcj0NArE6soG1j7k06QR5RWQ5PIxUHmHbyucnGKnVTuHzDcOlMViXcBkMc46Cp4pBvYEcgcV\nBDGGmOOvercMZMjZbrQFjY0VGfZJJxz2rckVWbg7V/WszTIAqpzuFaQjHzbyeOhzWMmapCkk\nYPpTOSQd2FzT0QrGdxyWpiW7qSJDgdRWZQocMjZARgeCe9Ee6RS278BSNGN6sxz2qSNChbt9\nKBWGRlgMtgemKXPzYC596Xy/mHfuaMfKxPBPSgBFZljII47CnbWXBBwD2pgUqN5zuH8I/nUq\nktye/egQw+nU9TTm+UgqCSe1KF2gcZ560pzuyPlNUISJepbAPem7XTIPOehp8i7iCflJ4zSS\nbgu3O455Y0rARLHtxg/PUibVUjOMc8Uix+W+8cjFPhkPIVRg9c9aBguOATkmjgKSevanuy44\nHPTNIuWkxtyKtAMbLNlVyDx9KkbmRR1VRg0u0c549KQx/NnOAKQhjKZMDBxnrT5sbvlwVXoK\nFMkqkrhB/OmIm0kMd1Idh+/5ck49Vpu12w/SPsKcqlo2HT0zTY0Masxbgce1UhWHBRI5YHhe\ncU4SGTAbr1pjjYqHbgt1xRnaxGPmpgOClVJJ4z2pFUqzAnHGTTo2I4PzL2NIuG3Fj+H9KEA5\nRtYHO1CODS8MQrnA6UIu5VA5A5x6UrOWzwCBzSJAyfdRDh8ZLdsUbi64I49aFYPhgvGKaw2r\n/vH8qAE4x0yafGoVcdKb5YwMdutOyqt83TGRQxgqFmXJyAac+NxGTjNAHXnbkUqldpHVjxTE\nRyOWLdwopFzt+7kEU/y9qk7uAKainapJ4zxQA5Wby8v0/WlkKttwvBpEDRrJgZz6mgxsPm68\nU2UOjjCqTnvRCD5jlcEYpm7bhj0706H/AFrZ4BFSABdyuPWpd23aFX+HBFRonl/d55pk0jeY\np46UASRN1Xr/ALNOX7+B8rEc1GoXGVHznvSrGdxJ+8aCWxwY8o/XPFB2oSFHbmjb8xBXtzQq\ngLyDg9KZQoy2McCjdyQyke9CqS+VG3jpRIXnU87aAFdjgleMDmms2du05JHFOXGApG4gc4pT\nhXGFyo7+lMXURossCxGMU9YWZflOPr3pi7sNt5yakjDcgHJ7c0iiN89XQelL823IU8d6WQMF\nGQTQ4bIwcr6UCGlWP3hgeuaVMt8xUAdBmkjYsGBB3Z4p8mGXJO3sPrSAVhlcE80keWAahSvk\nhScv60MpbA+4KYx27hux/nQpO0/LiiNWTOeRTtvOW6EUCHpHugwDg9moDEYzjPfHeosGJev5\nU+RRlWBycciqAUr8rNnB6inK+2HBHzDgUgztyOAf71Ah3SZL9s4NJgPjy0iktg460/aPM3E/\nLnJHrTI/m5GD9aeqnaxxubtmmIF2tubGQf0p8I4YDjAyfWofnbKhdoAyWqeNsgZ5PGTQA5VD\nxjsTzUi58snkj3prSbW7AHpRufKr1z2oGHEijafmFSeSxKsW571HtC5bGOetSO+xlI5Q96Yh\nfLMhOc4z92n/ADMuHUKnQKKjSRWyWzkU5pjDJg5Mf8qYgVSvy56c1J878AEHrmmKoLswJxjm\nkG/Ks+RF+tOwEkeS4U856ZpE80MwxtFJwrAICak80KxDAjjFADlUTMeRleKVVZZAhP1pkeDG\nxU4anYbcCGy2KABMxMVPB6/hUobnZn52GRxUcaEqXJG7Penbt0gLD6EdqoB43bFXPfmnKmdz\nhxtHaolbDEE05U2nocH0oJY7G4DZlieacp+XBHJOKbyoO35hj71NVx5a9znk0CHIHZXXbgg8\ng1IGTcA3THSmbtoLMpLnjinbcKSeuO9ADtoUYC4HWneYVwR81N8w7kAGSakGdxIAIFADljEi\nhnYA5zihV85tueQfzpjKJFBPGDUsalcN2zTGOtyyqVX7+cc1at1KqVbls81Wkf5TsODn8qsW\nbbpd27IxyfWlI0iaUMakkp8px0rShj9eD6etZ8CrwzZXNanK8qc96xaN0PYfdXGBSxEqT6dO\naRACNwOTmgMDuDHilsUTRrvyGbPFNa3cMpjPA7UkbBQFP4VLnauM4weTXRBnPNFOb+JjwM81\nWO0tgEkjmrlwmWOeVPTHrVFk2kknHaupHLqOjaTy2XGQxyaGZVX7pP0piyFV2sDkUvmhv8BT\n6CFccbjwfehn2sOc54ps2xeSxLAdKZGFkUhhg9QafkA/zA4xhgQeSRSyxnhtvyt0NO3eXlc8\n9jUU2CqknaaAI5MrGxXnHeqEiqyjPFaTKrKydTjrVDyx5JBPINMLFYY/5aD5c9qXaFyC2fUU\nSRtyM5Ddqb5myDCr8ucH1qRWJI/LX5gckcUpUs4Gdo9KTcq/Mg3DHFKskgZOB6kUiQ4UqpGT\nntUrSszEddvNNEhWaSXbgbeBQodvmHWgQfaV/u0U3a/90/lRQFz472/vgBlQBmlT/WMAuVYY\n3UK5HLMRjvSM0kowpwvc14J6rFbauFQYZRzUjYVt7fdXpUccZWQL0DDBapFQK2yQ7lJoQhkg\nDbGX7zepp8gTaiodz+vamyMki7XTBByq0Kw2hsgHpigdh8eFhc7sP/e9KSKNZHUhjGFGfqaS\nQH5Qo5zmnSBpJtxOwChADNuZVUblHH1pd0a5XATnB+lLuO8ZPyDkcd6QhXY7seuKYhNsfOBk\n9BQ8XlxiEja3XIPFN4myy/u/9mnRsXiIcbj3NAh+9dztGMIgwV/rTfNDtgQ+WO+e9IqBcHPy\nNxtY094/3cm5tuemetAwXLQrjlvT0pHKtgAYJON1MCbtqhsp94/WpmkZlxjcF9KQDSUkUddi\n8fWlZlZ14Kj+dCqBIFA2oRmmiNGXdljg49qWoBt2xlSCVY5wOKWR9qgAbo6dt+dsMDxgYNId\nwHlAgjH3vSmA5lVvmTjjnnpUQ3Zz5Z2/r9aciBlAJ+YDGRSqsgUysTvHyrt9PelqR1FON0QV\nGQ+/cUqlgXTbsQjNN2+ZbgIeV4IzTigk4DcKmMVSKQihtqNnZt6j1FGNyMpH40MSNinJfGOO\nlEsJjXIyW6Y7/WpELGw3kyH5AuN3vSIpOVPfoTQQdmAuxsfnTQpj2gcnFMNSSOTa2GGCvRqV\nf3buAdzfepNpmwoAB96I/mkyVOVGMrSuAeZ8xyeoyVp0TFkYr8pJ6UyNArEu24nke1LGpMhP\nf27UiiSKPps9eVpytGBI2dy54qFstudXyfanKoW1CHGXPSmUi1Yq8mPM2pk4FaMyhI2Vht+n\n86zYpfJKEruGMZPY1sxqbiH956dQOorI6lqZE8ciwZXoeDz1FUGVhLw3IX7pFblzGMBUOBjA\nGOlYzxOu4McHOM0aiRRCyNKxdeSKJVJXarZbGc1Oy+XGQ7c9DiqksZ27U6Z/OmMr+S5TIPGe\ncdqaqhF6kjuSKlYHaWRsbf4ahff5ikgqx5K0xIY0JmBEa47gt6VXkjLPnkkDip5GxC25ty55\nI6j2qPy28sBGDL1AJp6giBkYQsNw4OKjkT14OOfarDKQz5wpznimCPb977rUhEJXaqsv7zAq\nPdGy/OuGFWX3Kq7DtXjNM2AzMWXcKZVxq4TIz2yKZ8j4IOQ3H0pY3Pln/Z4ANGFRVGMKx9Oa\nQxqiTo2OuKa67cICXGc0/wAvdyME0xsqAM4Qnn2qhEZx5mGXPH5U9lRVR/myVyacp2sybcjq\nGpo3hl3MVQfw+tABM2duDtyODUbK20bhtXPB9aVpMnhMsefoKcWL5O7PHU9qAEXbHlXHOMn/\nAAoj+YE4KLjrikVAxLH7hGAvfd60sgaOLKdhgkUhDGHmMp5Rf50wELhSdxzUkmVjQKO2SKRu\nMOP4uMUxoYzH5sH5h0Y9Md6FzvADfeGRStCyoeffafSkMW0hgOMceooEDzNJnaegwMU0O0ca\nrkuueaVYuR0UetKVdQUUjA5zSDlHgeWQd29GP3c9KkkmZfvDgc9aiVQp3bdy4/I0PhgOPxoJ\nsW11CVJAVGw/eFaMWpYYSNwG+83vWGWHXOR0GakZxGmd2F/TNMVjamu90hUgjuoBx+tWodVa\nBQN20Y5Qc/rXNed0IY1Is4b5t2e2aQ7HUwal+73EbBnIbv8ASrkOrfNvC4JHOT1rjxMwUIH3\nDqVqws+5t0b7loA7m31ZZGQN83tnpVtdSC7iY8r0JB71wUWpMJFdeCvpVu31ppbg4bHOTntT\nFY7+HXJFdPLk+YDGCf0q3HrTR4LExNnOAc81wUOpb5DlhuzndVuPWGSTLMAnQL2+tID0CHWm\naQu3yv8A3q0rLxTuXDnLg46cV5rHqjyyAK+W9/StNdShaEGMkyKfmApWBnoVrrTMW+YY5+XP\n61bk8RNDEEjmKyKPWvO11UspLkAjkY9KcNWVflY/MeQc9qQ7noFn4qkfG6Y7887eM1v2XjaS\n3BMdyQx4Kt83H415A151aOQfQVOdSkjjAU9Rg80nqO7PcLb4leTt3nd7Kf61pWPxSdJgyzbW\nH8J5r57/ALWfYBISIxxTbfXnhb/WHZjOM5qeVD5j6lX4qSsqkXGEPJUdAa27X4sCWFWD5BHX\nP9a+SbXxRNDKdwYo/A+bircXi+ezD4dihOCucAVXKg5mz7GsfiT5jEGZAo5GGya3YfiNGSAJ\nsntXxda+PpYlxFOzn0J4H41tWvxHuXkSMyZQ9RnJ/A1Lgi1LQ+xo/iFDgE3DZ61ct/iLA0gV\np/mPP4V8jj4mzMyqtyoCDr7CpYfik8m5mnEgz8vtWfsiuc+x4/HkbNlZG24x2/OtS18YQSYU\nzLuAzz3r42s/ik+7K3O6MdVA710en/FIGTaZ9p7Nxj8TUezRSqH1rH4hGEPm8H34q+uvIifP\nJgjuOlfMFj8VIuUlnYgnI2ncK17f4obZNvmj1Cs3NT7OxXNc+kLfXEmXKMHX2NXU1gNxkD2z\nXz1a/E4RxBQxTPJZSDWxbfEiOfBW4HvuNTysdz3CPVIyxwdo781MupxkYD5NeP2/jyGZck/K\nO+eDV6DxtCwCswQHtnJqOViuerHWAenOKtJqSOow1eVW/jCLcFWdcd1Lc1fj8VRPKqLJz654\no5WXzHpsN4ki4zzT1my3X6V5/b68jdJefrWjDry7QfMzj3o1DQ7IXHXPFC3AI9TXOQ6wJRyc\nfU1ZS+VVzu4pDNvzNyg9aUNnq2Kw/wC0Pm4fC1Yh1IFsbgaGKxrhiWzRuHLEcVmrqCr8oOal\n+2Dbg8ijYdi55u0EihJj1PeqSy4XOeKU3CsCAafQC403+RSrNtHXrVNpguPWmi4DMOKQjQ88\ndKVZCO9US+WzUgkC9TQBaFztU4605Zvl561U3fKCKPMG7jrTuKxd87pzxQ14duAcVU8wjrR5\nm45ouMuC4O3rTlujjBPFUd23jqKYzbsc4xTE11NTzQsY+bJo88dqzwwZRzigSfNgHpSJNRbh\nVoNwOueazRLmk37aYzSW6DA80iXBOQKz1f0OKBKVbg0AanmblyDxR5nzDFZ3nFVxnAo84kde\nKANRZvL5zSpMJMnHNZfn+/FPF3t6CmBpNJ2HBpPM9TmqH2lmXngmk+0DgUtQNNZstwOMUrMO\noODWc10exprXm3jFAF9Xx/jS+YGUg81m/aCepxT/AD/Q896ALgGVzgA0hiRl+ZaqfaOp3dKc\ntwW6mgZZWCLbjANN+yxEH5eahW4C9aX7R70CJVs4GGCoNI2mwM33MU37Qq0v2oDkHNMBp0q2\njUnbz6dajGkws2T8o9AKnFyG5zTZLgHFICvN4fgZeCPpUS6DGFx1WtBZB1Bo8wDqaAM5vD8b\nf8sxtqtL4bVj8qKMVtmYDoxo+0KV/nTSAw/+EZSQZwBUMnhzAIUD8q6DzgpGehp2/igDnI/C\nyNEcLz601vCcbfeQGumklG3ANMjkAPJpAc5/wiw3ZVV24xtxUEvhTLZC4x2ArrVmHaleQFea\nAOMHhNd2SrM39zHFJJ4WLbcKIyOo65rs9wxjNMO3bjrQBw//AAi5O7CDHuKjbwlu4ZVH4V3o\nVPTmlyqjkAigR543hEhPmTbz1FEnhPaAqxLzzz0r0N/LYcqPpTI1XoUyM0uoHnTeFDkq65K/\n3aV/Cp4bYAOnSvQ/s6bidtKYI+6cU9Quebf8IyI2OVb67eKf/wAIvvQkRlVz1UV6M0EeNpA2\n+lCW8aqRs4oGebjwj85IjZjion8J7cjyuevIr0z7OvULzSSWqZyRkUwPL28KrI3MQz34qJvC\n65wkY6+leqLawFfljCnvTG0+BsfIB34ouB5Y3hhlDYT6gDiqsPhdeS0X6V6y+nxBTgAZqH+y\nYTjIyPagDzH/AIRkcEwnrwKlbwwBjKYPoBXp39nQbQoHHvQNPRegBH0oYHmD+FImZf3dMbwq\nofKx4OcZIr1L+y4gNxXNJ/Z8P3tv4GkB5d/wiQeQfJuIpknhOMTEmLc3vXqa6egUttx9KQ6T\nFIASNp9aYjy6TwiFxiIN70jeEyrAFBt/u16n/ZMcfQA0NpMJUngNSA8sHhtY9o8kY9MUyTwy\npJAQDHI4r1RdHQDOV/KlbSYe4BNAXPK/+EYLANtGO+BU/wDwjqtjEeR9K9NbSYGx8tH9nRox\nKAYHOKYXPNV8LhcsYx9MUreGVkxmMAfSvSjZJnO0EGo206Jhz1oEeaSeGdxPye3ApP8AhEyO\nfK4PevT10+Lb9ymf2bH34HpQB5p/wibLGAVBqRfDIXH7sV6IdMiZvapY9PhUn5e1TcDzdvDJ\nK9MtnrilXwuATkFnx6V6ObOJT93ikGnoWzgAetMZ5ufDajB5A9MVKfC6MoYjA9Mc16H/AGbD\n/CtBsYdvTBoA85Hhld2BHyfUU/8A4RNY+SfwAr0NNNQY4BPvTm0+NjkgcUCPP4/CuOSuVp58\nN+XgBcc8ivQPs8UYHy5oazikxkYoA8/Xw6MFtmOacvhstzsyK7xrSMLgLTWgVVAAoGcM3h1Y\n14TvU0fh+NVwUOfpXYrAisGZAQKm8mHHyr1NAHGf8I2Cp+Tj2py+GE28DFdn5SZPAApY4lK8\nACnYRyEfh3b/AAcewp6eHdpLEED0xXXnAXGMGk4brQO5y6eH0VM7Mj3FI3h9WXCJj8K6xdqr\ngUm8bsgYpCOZ/sBNuNgLCnpoStGA4AFdBkbsgcUpKHApgYP9gKmAozmpV0ReAVB9a3CwHbHp\nTCy7twx70n5AY/8AYkYHCYNL/Y68Ern61rGRR0ajzhn1qrD6FCPSo1HKij+ykU/d96uyOBzn\nFMkuF7HmkCIF0+PhtnNC6ehY7l3A1P5+aFukVck80hakZs0jXCrgDpT/ALOu0AAHPWmNdqDj\nOTSreIuc0gD7EvVlGKf5KN0G3AqKS6LHg1G12OR3pgWFhQN04xUmwDHGBVH7VzxyBStfgtg0\nAXmx2xxSKT0OAKz2vNvA5pDdFl64pkmkzKvejepbnkVkSXR2/wBaj+2MrA5yKBmt567scine\nd6d6yHv89BzUf9oHmkBsNNxyaZJdqoIHNZQ1DcuD1xVZ7v5sZoA3ReDgk8U1tQUtgda59rza\nDg/rUf27pg0DOh/tLaeTmlbUdvOetc39sLN1ok1Da2M5plG62oY470G+IU81zq6gepP50v8A\naG8bh0pAbLaqfXik/tD+LPFc9NqA3bc4FRfbwv3jxQI6H+0DnO7IqCbVDjiuem1A/wALcVG1\n/lgTz60yTfa+Mh3Hio5NRLdeBWGb7PIFRm8LKeMAU2Bs/wBoOo25zURuu5ODWR/aBHBXI9aa\n16pbOMUhmo15u75pReDbkngVi/awM81DNqOGC9qAN/zl3A0ySfb15rE/tAbOXxzxSPqAkOA2\nKols1JLzaxI6elH2pWXJrGk1BOOcD1qBtRRlbBI5oHc2Zro4BA4qNbvd3rH/ALS3RkMcVWW+\nVe+Paq5R3N/7WOec1CbgLyT0FYsmoBfutzUK6hu3N6+9LlA3FviwODg1H/aDHkkmudk1I7jj\ng9qWTUB5YCtk96fKQ2b7XxI9B61WOpEMT1rnJNTMfG4j3zUDapzu3c0uUDqZNXXgA4o/tDc2\nS2AO1cb/AGmGfJOcdabLrC7uXwMdc07CvY69tQEnzZ4qnNqQXODtrkJNfWMcSfL25qCTxJEx\nBMn1FPlFc6htWVmO45FRTaojYywArhrrxOiTFQw29Sax9S8cJDH8ylRn7wPar5Q5j0CbVk+Y\nBuPeqMmsfMctx6CvMb74hwrnDZB6HtmsK++IybW+Yo4HXdgUcrFzHr1x4hjjY4bI9QazLjxJ\nDkhZVA7814pf/EsQxgNN8p53LzXM6j8SiGbbciRepbOAK05Bc59A3HihY42JkAH+91rF1Dxd\nHHDjz9jdQpOM18/3nxSNzDsEgCgYLZyM1kXXxDbbtNyHbGdr9h61apkubPeb3x8I41XzeT1a\nuen+IkUfOHOWxnNeB3Hjya7kkkec8D5T61m3Xi6YZzO3mZztFWqZPMe56h8RozJIqs2wds1y\nWsfEh1jPlSAHPRjmvJ7zxIVj+aVtzHJxVGbUWkk3OeOwrVRRDdz0WT4hTJks21jx8vT61Sk8\neXXmkmTgDqp61wDXLNknKsORzwaiklEmSW+bPGKuyWxmdxP4yefMqyMJSMGsa+1ZpSXcF+3z\nVj+aPI2qfnb9KVplnZdzfKOOKBamhHOWjOOQx4J7UkczNlTzjp6VVWTyzndmP2pk052ArgAj\nJ5pk2NCO6MJBPzdjUxlbacnAPPB5rJFx0IGEximyT+W24nj9Ke5DRoLKvzCPO8cnNRecFcZU\n/N1ao55vmDD5EIGW600XI3AKcDvmncOUt5VsYIzjNSRynYAQQaoq27lhynYdxViOTcoYdG6G\npBom8wysWJwgp6sc5kIVSeBVWSQx8YxznNOYfLuPQ8hc1SKsWg/XcNvPFOZnJZBgj+dVY7gh\ngX+b+lTqyybtn3xz+FUSTQghh3Uc0/8A1beZgdztHeooWEigKMOT9w09sSMw7YwT6UBqTMBI\noI+VuppV5BJ5UmooZF2gLkqBSxsWXcOEzyDQSybcRnB+QnAqSJtq/M2GHGagjIJ68ZzUsbK0\nhJ69qtBsWITtjbKbT/eBp8Mhb733fWoVH3VBwB81WIwH+TA3tzQIeqKTu6GnqxyMrtOfzqON\nRAxDEt2qePO0K+WFMCeNSpyX59Ku2aq7DJ571QWMqoGOetamm8XC8df1ovoJI6CyI2qACF9e\n9XnUMp9ueaghhMajC+5qYyGSPJG0CudmqFyVhBbvzTPM/dvuGX6inySHCgZIPamKu2Qk8g9a\nQxisdobqM8j0qUkk44APFJLndkEbPSlVVCqrHljmglgiGORgXBxTWZ4+Su7ngCpPLXcSOW6Y\noT5QyMDj19KZQ7LHLN8rEckfypkeOMdc8Uq/Ku3nB7Gl+6gBGO+aQCsWVmJ5NNZWb5i2D/do\nDb87fmHWlYBmBA7ZzVEsbGxkyr7l9CKHUt8pJPP3qYrOw+9g55xTueAeuenrUiHrIFUADIB4\no8wLIzKD83rTtoGQQA3XHpTVUZH86pAPQL5Q3etLtKkEjaD0pJVXnPTrxT1QttLfgKoYm0rg\ndyM0mN3LE9eac0ZOMk8VHn94VxxQA7ACnBwOwpI8EFsYIp5UhgF6daYV2ISPvE8+1HQklVjt\nLHvximsSkJULgnvTsnA2gk+tKqllJcbj6imhjUQKAMnCjOKaxE2W7H+dO4kBGTjpijaFUKBg\nUxDYw64Ockfw1IFY8sApzke1NKF+QfwpWYBQB69aAHNGdp+Yc81GuG+VTgmlbbu3dT0p8bJx\nx0NMAj2sCnIxSnDAhRz2prLgtg4DVIsgXBT5hjFSKwxXUH1AHNDR71weO9Kild/GQed1OXEq\n8npSZREy7mUkZUVKQpXI+90Apu35uGwKeo2/dOaNRDY8GNg3Bz0o8srhc8dabIvmYBXvzjvS\n7WXaoPy56VSYWHMwYNsbPrS8KSB8231pBGGcqoxRI4k4xgd8UhiFSxHTBHT0p24txjBHHFIq\nl/udKcF+bGeR3oAQbmbpsUUySPcoxhivepIc8knd7UxY9rOSDtNMBylmXKjkCkDDABHzURLt\nyeVBHSlPyqGHDUhWHJlZDgHOKOWkAPC+tI2+TkttY8UiuyKBncqnkfSmASSYyMHZnmpBmRVH\nRV5xUe0ruPUOcjNOZS23JwM0alAzCPLqOtIzHg4wuOlEefMYdMHPNP8ALDtnOD2oENZyrhc4\nTFKNu4oR15BzSbfmwwyMU7ywUJXhloANp2ttyB70NHkbiccURltuGHPX2ojG5Sd27njNAAmG\nZR0B4zTvu4V13cZFG7EY4wAeR3pdwZlx27GgCNVWZhxtxyc0+RtyjH4U6TczbcY5qMAMwC8k\nGkA5eWI3ZOKRpF4OaGAVmJG0nilWMLGI2Ugk8ZpjRIvzNjGTjOaRI/mD7ucYpEUrM4A4x1pY\n9y5DDJ7LQSKQflLnHNSNtLbkbK+tRtGWIJJLD+dSrCV3IDkDkmgBsWfMJBHuKezfvPvbQaXa\nGcHGB7Uu0SI5xjFUMRXdUZDy3ZqVRJuAGGNIqjy1xy9PEbAkqO3WmA9FMin5eFp6ksu4D5h0\npsasgCA4zyc03cVJA+Y54oJH+YUjUkZGfmBp8ZG3DLhc8D1qJ2Jj+6SfepuoRm4UdaQCRuPL\n56n+GpG/eKxLAhvTtUce3aWUhuSBTpFCQqVQjnk1QEjfd+Z8AnJ20pZW6MSPehkyokAyKZkc\nDbz1qhD1UeYMHtnNPkba44yzVCAdrhRluop8bfLubrigB7g7TtHTjHrTgx2gKNmOvtTY3O0m\npFdWUIRmgGKZPkDEDHQtihS/Cryc5zTWYtlQeOwoUgnl/m7UDHMvmkhjtPWn9Iztfkd6bIBv\nQqDnqV9aRULbx0PJHtVCsKzCONQAct1NLtG0k8gdKI2OQCNwx3o5DEgcelTqFh4VpE+Y5H90\nUn3eCDt6A0gZuoGPX1qbcjLtYZBHFMBi7+CPkOcfWpTtjU8/Me1N2lVA67aXftIJ4pgKn3Bh\ncj3qUzbWGAPpUbOGX5T9Tin+Twu7lieKBWJFj3bn24Partqv7sFhg96qhPMwqjgVoKvC8e1Q\ny47l6CLzF3A9B371c3lI1Hf2qtZxBssGwqjmrUYVgxByFrPU3HjcuWHAoRlXn72aj+dxuRuA\nakyvkbnA3FqCkyTcvl8inCYfL+tRNIUyF+71qMTAZP51rEzkywsgaQgAY9apXEiySHHT3pzT\nBVwo4qs82F3cMewrrTOSQrTbZhuORimGQBtwX5c0NITtLjqPSjb5y7FO0Kc1ZncMB2J3YPem\n7jt2kZ7ZoZtysRwM0Hc2F6tjOKQDWZvMCNkD1FOVBubrjqCe9M3e1OG7ZhXzznFUOxJk5Jzj\nA5FVz+76j73NPZiwyTnPWlMbMwLEfKKBWKkylpFXlQe9Vg37tkA3Grs3zbX3ZK84AqsrKykh\ndp9fapEIqlrYYGCp4HrQ0ySMD91lFJNlYxg5z0A7e9Isi/cC5XHzH3pkko+ZT/FSiTzCQ446\n4FG3jbGR0pPLO5dx2sRQHQl+0t/f/SimYFFAj45aQLcFVGcDJpV2vMqsuCckYNRrlV3gYGd2\nPalVVbdKCwHbivnj1iRGdpA27KqegHNKwEtw2889qbJIzbcfKO5FOlbcwYHAC8GmAhYswy2d\ngxzUvB+UKC2MgmoHZFyxO5sfdpwEko3RMAmOc0xjkjTzMZPmHknsMUsf7/GDgnp6Go23JhWQ\ntxnIpyQrtCg7Qf0pAPRi8hjHReaGjVwS6nr94HpSoDs2KygDvSHa3DPg459KQmN48wZb5fX1\npyruBxgHrn2oTayBtmAARkd6El2Nyu4460yRs2Gjwx46rxSBUYDhpCPWjPmNtI29+egqVQ0K\n71GMcfWmAm9t3zEZ9qBlcgHJbgAUxtzSMGXYD/FihiYVyCDIB2P60gJljbbsC7n7rmm7Y1Vg\n2Qx6rTVkLfMS28LkstKjbedu7+LBPNABEqLGCp3EHoaazbQFVSFZu9S7d0jSKNjn+HtTOSuC\ncseB6/hTGIdki7wDjPApy7FVmXLZOAM0uxlkxKeBzsFIrLBABj7vRe9BNhy5AJGC2aa+TnHG\ne9H+sk3D5crmj7i88g9fagY7dtYAfMP7wNKrMsnue5pnCKI0j+bPTOKbwuWlBOTtULSAduKt\ny/fvS7VGSJdx7UDEalceY+efpR5fy4A2pnOO9AxGk3IX5RulPBO3MYO1T83vScCPAXdHnljT\nVD/vW3eUhPyk0IB+/wAw4x8vXHpT/M6lQAvTikaJ+QcKAOlJHMSm1Y/rQIXmNih+VGXnaKSV\nQgjDMcryv+NJ5ytu3Ej0/CiOM+crfeDdQelSNasvWqowEf3hjNbMakRqcMgAzn29axreOORi\nu4oc4z7V0CMPs4RTuC461EnY6olfUI1OGU/MRxjv71kXS7i+RwvB+tdBcNujUIv14rGn2tuJ\nHVsbfepKMub5VGxfmI6mqXl7ULMef4s1pzKyyMSOMYCenvVG6G3BcbloAz5jnG1fkqGaRzKF\nU7lP8VWrhQ0ZKAxoOhP8qrSk7oiqbOR83Y1SAjm2qw2jK4yKhfZ34UjmpnjPnS5X7x49qZHj\nO0/M69AasEJt3YKH5fVqjmjCk85B5GKf97cJDs/2aTcoVVA+m6pYETqW2NnKbcY75piqfJJL\nAEHmpWZuzDZ2Xvio2jXgpzu/SkHQarDJA4OOpHFJtPyg8kc4okXzMEtQ7fKDj95jHWqAaB5a\nsVGDjio2YttJUYApz/6yMjnseaUyushOPkHHSmMOQ5IUnjuKYMOfn4x1FS7mj3qfnB96jDfd\nUH2yehpAQlgw45A6U8He2duBjn3pocbThSCrYNKMtL1AFGoCLhVJYc9s9qBtjXaO570j7jIT\nnOPXpTmYhVHBLelAhrO7qygbttBhyoHmYHUL3FDO8e4L260pz5ZLkbsUANwfLJdjSltqrjgE\nccU1OYyMZU8GlQ/MCpGF/hNAhQreXk+n3aTAjZVX5iwyQfSg/Mztkhj0PWkZdyjcMMO/Q0xi\n4VW+8FPYGkHygDPOeWzSrt5kxuUDG3HT3pN5jVSoUjuKQDlQtG3AznmmNiOMD7xzTmBaQsx2\n98LTZNrMDuCjHIpiBmLTDdyvWiPCkqBgFuBTFyvl87j1p+1/NO7k9R7UAPDLtds9DinOSMPu\nx2UDjNRLjLgDcxH4U11LAKxyMZ47UCJ1Ow/e2jrt96sKwjJYj5fas/du6MDgcrT2mMmMDKHk\nLQBoQ3hZhkcent61M2oBWGDlT0z3/CsoyruUH92x/hqVpNxDHAfp+FAjWt7wx4fcQM/dq/Hq\nxWPMbYGecVzdu3lqfnCr3Jp0Ui7d28xq3p3pBY6qLVt8jhn+XsuOtWbXURcRHbwV6CuQjkZd\nxd8gnjFWbe5kKfe/dg/w8GmM6xb1WjBJ2kdQKswagGwpfIxkNXHTX00c4KybYyORSf2i8PIb\nj+7UPcOh2d1qG6FS2GXPQVAt0247+FxxXMQ3zFkZmYrnvVmTU2hbG7IJ60rEnTpOsnCsORSu\nzbRlsds561hLfEYyvTkgGnW2q+azJt4+8MmnYZuGRlXI+VQOmOtJHO7YZZNh7Vltqb7Rgbg3\nU+lTfaFYLtYE9MUCLq3bR71R2BY+vBqW1vGK5Lsioc8njNZscoZeGDDnkdqsfaEdVQDI/vCg\npF23vmXzZTITu5znFW7PxNNC3zMfLbtmsaRo1Rsrj29agkuFkcKF2qB0pdR3O5j8dXG5nLLg\n/KqjtUqeP7uMAxsu8nb/ALXvXACfy24OKbJMX4YkZOBiqsF7I9Vt/iVJHtUI8eOpD5Ga6DT/\nAIoCOFhJOCCeWzz7V4ZIWjABkIK9eeDTftUiSJs3MF/WnYEz6K034sBfu3LKmcFSa27X4qLu\n2/agiHplv618yLq08chdsE9Nvpmrlv4heEBvvsODu6VDiVc+p4PiEF2SJcB8+9bEXxQgjcI8\nmAR9/PGa+TovFcqfNuZR2x0qVfGtwIyGlJJzhe/1pcq6hzH2XpvxJVYwXnAi9M1tWfxKhlcK\nkhI+pFfGGm/Ei4jREnlO4DGV7e59a3NL+JqK5MsnzoMr8xxJ9Kj2a6D5mfa9j8RII8kTB2A+\n6Wz+ta8PxEhZQu87s8beRXxhpvxO+0klZdkvXZ2+ldJpfxSmhg2+dtbdkjPIFT7HqXzs+uv+\nE4Vgu6TaDzg1PD4uiz97J7YNfKf/AAtYMCYxuz0y5HNadj8VfMhDE7X6bQ2P1qfZspTXU+rb\nfxNEwB83JHVe9acPiJHClT9cmvlmx+JLbtzSbBjpuzWva/FNGUBXdj654pezL50fSy6+shwG\n2r7mnNrKMcb1x6g188WvxKSVsFnB/wBo8VqwfEiKRGTzfLUfxVLpsXOj3ePUAxGZAR9easxa\nmqtz0+teKw/EaAJGxlUf7S1esvH6Tq+5gDn5fmqORhzI9kTUkYnLZ+lKl6j5bNeVp48hjX55\nQuRVuHxgqhWEodGHTOM0co+ZHpkd+obbvxxTlukDD5q86i8XJIwAZc98N0rQTxJH/fDntg1P\nKx3R3X2pepPHapFlXGc1xsPiKOVRhtvsTVldc3LkSDHtRYV0dO1x1oVgenNc7FrHmdMmppNV\nVV+Vv1p2Yrm8lwpwCaXzB0B5rBj1SFh9859KmGoqzAh+KANjc3AB+tG48luDWWupRsQA9Tfb\nUZx83FAF/wAz5aVJCMjtVT7Qp6EY+tK1wMcECmMt+dleetL5g6VS+0q2MnFTeevTP41I7FkS\nfL0NIJAwqr5xAxnIo8wfQUxFxZN2cGhpAQM9aqCbLDFKH3AkUx2LPm/lTDIWGc5qFpunFMDE\nscHikFi4JNy5NN8w+Zmq5c7ab53B9aTCxbdi3Q4pvnHZjvVfzix6c1H57K2MU0IutKdvNH2j\nb05qm0pLc9KRm44oEXVuPmOTxR9qHIBqg0oC9DmoxId3NAGkt0acbv0PPessylG4PFL5xYna\neaANb7UVXIOaFuDwSayRM3Td0pkt0egNJD6G614gI9aRrgbeDzWKs2QMmn/aNveqEbCXBxhq\ncLzPH51i/bGHQ0kd2V560gNuS8XOR0oW7U+wrFa6zS/aAsfJoA2vP285o+3Yb5uRWM1yzKB0\nprXHOCaANtrtcZB60guF2kk81i+czGl84r1ORTA2luuOacZ93Q1hm4fHB4py3TRt1qQNr7Rl\nQCcmnLdZ6HisRr7HsSad9p2tnNAG2bjPFK10NuCawnvmWTOeKc14dwB60wNjz8rwaVrklRzW\nMLrbnmm/bGZsDrQBtNdttpVu+gPNYjXzMu386fHc7cndQBrm4IJbtS/aN4yOKxv7Q656U77c\nFGe1MZq+fSiYL3zWS18VXjvTVvm24zzQI2PODc0ouvm+Y8Vj/bAgx1NC3m7OTmkBtfaCwJ7V\nF9oOTnpWZ/aAVCM0xbwspORigRrCbbxng077RjGTWKt93oN9nJNAjb847hz8tK067cg5rB/t\nPaoFOXUFwc8cUDNz7QOKa9wOSelYralwOe1R/by3VsigRuNMCc560nnjp0rEOrFMYHHvR/aA\n+8e9AjZNx0APFPEgz71iPfqGGDkGlXVAqnjmgDaeYr9KjEvpnmskaluXrTG1AqvXFMZuecM8\ndRTmmG3J61hLqg3e9LJqgZTg80gNn7V8p44pguA3TpWKNSOzk5pBqQVT1osxo3VuNp9qbJcH\ntxWGuoFjnfxTm1QFevSnYW5u/ayOc0faBIMmufTUtzcj5akOpK2CDjHvQFjaE2R9KPtPXmsN\ntXXd6U3+0gwpWA3GuDgUrXAVc4Oawm1Lpg5NNbVOnzZ9qYG756n39KQ3GzBJ4rDOoBe+Kauo\nCQg7s/WiwG753zdc0G56jdisGTUOTtbiol1Mr3yKdhHRi6P8RxTRfBycdK53+1C5IxRHqGzv\ng+lKwzoPtR7Gn/auOTxXNTasF4BpF1MgE7s5o6gdF9qHQNSLehck9elc6upYUnGDSHVumO3X\nNAjp3vh0znioTMSv3q5v+2CvByc9KVtUV2X5+ehFHUo6D7XtXAOTSx3x5J5rnG1AjocigaoV\njIXk1QjpGugy7s1ALobuozXOR6s0nBOAKjN802SDtFSCOmF2GU/NUa3A34J965z+0McBuaX+\n0Sz5HB6VQzekvhuz2pi324kBuvrXPXOoMxCg4Heo11DbxzSJZ039pBSd2enakW+QYLNXOm++\nU5P41HJqQK/Kc9qaRR0v27cvHA9ajkvgp+9muYk1RsgAlTUZvyzc9fWiwjqf7SG0kdqZ/aO7\nvjNc0bzbzuOaZNfMy/ex70rCOq/tFVXDNVaTUgpIGDXO/wBobvvHmoJLwMuCeaAN9tUdZeOl\nC6k3mEk8Guc+2DggmlXUAsneiwjo31LcxwcACqj6keg6+tYx1IE9OKrzXrFsg0DNr+0mJKk8\n0fbm6Z4rnnv88H86EvG2nc2BTsB0X27gfNiopL73OfWsFr75eDmm/bvl5NOwG79uLNy2DVdr\n5g2Q52+lYrXnmMTuxTP7Q2qc8UrBc25dSXr1qGXUiy89KxBf7uhyKZJe4z3pcoG7/aAZVVTm\no5b7yz1yTWB9qdTx16017w7tzHmqsBv/AG4v3wAaV9SKqMGufj1BcfMxpk+oYU4GR60WA3pL\n47fX1xTVvS3fj0rnU1IvwpwfemNqO1tqkg/Wnyi5jfbUOoPFRNdnduyT7VzzalhjualbUkVd\nwenyktm+1yNpIb3wahW+3KSz4rnH1hNp/eY/GoZtaiK8N+dVygzpJNUXbgHPNVftjbiS35mu\nVk8QRKrHODWVceLF3AFvpVKJJ6C2pK1RtfL1LZNebTeMkDKhlxzyPSqM3jyGN9vmbiR94NVc\nrC56bPqClc78dqoXGuLbttLflXlV18QwjHE/mFeQuaxL74mBpBESS7c7c4JquQXOezN4hReW\ncdPXpVRvEkcXzCTKn3rwW9+JS28u/wA3CjqM5xWDdfEkssh+0YlzlQp4NPkFzn0XceMFVfmZ\nQueDms648aRMD+9XA6+lfOVx8TAzFfM804+b5uhrJuviZLcKVV2WNTyqnrT9mLnPo+4+ICLl\nBgn2P61lX3xDSG2Pz8E4znpXzvc/EiRF3CQqf4cnOPasDVPiBK7ESuUDDOQeDT9mhc1z6Pk+\nIgRvnnUx4+761l6l8TI44wWkaLnhfX0r5tPjadpl2zZwMBvT2qjJ4wnuN++ViAejHvT5EK59\nAat8RPP+cExsO27Oa56++JRl3R72IxyzdPwrw9vFV1MzIu6GQdAxqOPVpQm2RiZM565p2sFz\n068+Iknyx7mCN0x296x7rxayKzNIxfd+BriJbozNu38gfhUUl5wAZN3pmgDbufEUrPIWZvm5\nwvSs1teeR9xT939ev4VjtdHzHc8Dt9ahWZpIyC3lnr7UEmlPqVwJUKMFjxjaB/OoJriTzAGf\negO78aoCUn+LcxHajzMEp/B61aEXI7iSSR8H5j3FSrIzTMNwVdves2Nvs+Ewdz8A1LgMvJO6\nqETSYWP7wz/KmJll3NJ8q9B61Ft+XLnIpIV2qSo6noaoVixJdEbmHygeo706HK7CTklecVBI\nzeXgkbScmhm2Mc5OBnimFi4Zl3AFtvv61JbqqszMAA1Z+R5all5Y/KKnWRpCMseu3FIZK0wR\ncA5GaDI4wD+NRqreYw6AHvTVXzGYk4pisWElGc/w56VIzCdSU2gL6moIceYOMECnr8pYeVhm\n9aZNiQyEEFsrnv2qXbukK/8ALJec+tV2cKpVhkqOKdudonIIZiencUCJ48NyQXQ9RVlHO7au\nNo/SqMch5XdtGKlRhHhi3PekItqA0Z3HgHvU33l2gDPY1SMgIBK/Wplk3MhDYwefeqQkSwgr\nMDnHr6VISDIW3DnPNQSZ7cLu4qRVTacnApiJQmQN3J7EU8yEszHghecelRbiyAK2D2FTAgRE\nE44yaYDlmO4ED5e1SrNuUnYPeoCqlk252Mc04MFYkDK5qiSyjM6gqNo9MVKJEVi4G5sdqrCa\nTGRz6mpY2xGrrzzyKBEzyBlUgY9RUvzecrA7SO9RxqjOWYZHanDLg5OFB71RJbt5GW6DOBsP\nGKnDEyMrDHP6Vnqd3OC2DzWgp3SKTwvXNAEkTblcD8PetzQFXchcZPv2NYoXd8oXC5zW/osO\nNpHTriplsNHRGNlwrN9cUN8uFBGPQ02SXdjGfrSTMrR55LdsViaC5LHcp5HFC5ZeB9aaAu0e\nuOak427gMAdvWgCOHndkZ7ACj5GbLcHsKEOCccE80DBXPVs800AvJk5Oc+lSSIduEBPvTc5Y\nEcN3pVcruxkjFIBd4mjXJIfOM08rhSrsNtRs26MELjHc0q/vYmY4IHNMBjJjLJwOmKU437VJ\n2eopOduR0PNPEYVT/dpksVbcDPzYf07UrKJF+Y4wO1CqpXqfrS+WqDI5PWkIauY+QNwIwc0L\nhW4X5aTzm3HI49KEXHJ4FUIeWVjwD1pcNyS3NO+8oxxRJGGX39aYwY7uc4AGfrSKWK72UAH1\npN6lcrwOlJJGzck8jsOlAx0beY4QHINJvADjrQjdCuAQKfIwKrgdeppCFhIZSQ3Hp3pq7vLY\njgZpqq0W7AAP86kUlowGH0pjCH7xYrg46U5uVJwS3tTN74Lj6U+Obd1GzcMZpkjFypwM9M5p\nrMoj3DPPB9qkbanIb5VGSadtUhd3Cn5qBjUXyyoxyO5pyyK4bcQvPSj5ZF65GaGRTy4+Xtim\nAMR94dqFYRgseCelLsym0nb+FKFZckqCopADswRAo6nJ5pFTzGDgkL12ipFjVkVuc+lM3beA\nCuKAFkUhHYqMdeDTeFVCrdRk0rSCRSO/86VlDJgjHHFLqFyP59pI5p67vKJZcNSRqQVz9BT5\nD0yCSDTGGcKuG+fvTgohYkYweTUQ+9kgHninMrNyelAh6yFVwuck0m4qpDH8aSTbtPzbSB1p\n23bF13jP40wDPzAHihGYKW27wD0prZPUdOhpN7bh2z6UCHsknleu40xsD5WHenb+vbB4pvLQ\nszN82eBQMHjYsHB3J0BpWjKyZzweOKa2VjwvTFKMsqknLdlpADNnOOg4p6/MBnj60qqFB3DB\n7ihkHlgk5z0WmMTzN0gOOOlKFwAP4s9zTPJ3MpzhvSpAu3OOSeM0aiEyQc5zg8CnxAiQlhj2\nqPCqSW521L5gUrjqwzmgA55yfmz0HSoowZJGXPyr2qQFY8tnevqKj4UFlPLGkMe21os9TnrQ\nI/mJB5ApYGCnAxjPINOEbMHAXHpTEHzOoB4x3pU4BwArdzS7MAemOc9c1FJlTk5GO9IBY/Ma\nNj1APWnqrSEEncV9ajkYowCtkHk09Iy5yvDHgCmAbzuOPl56il5ibAPvSKvln5RlgcHnvTWJ\nMnqT1zQMm2uG3joaeGZIn2rnPOajXBc55XoB71JLuWP0HcUxArFmJHRaVpPMXBBQUzeNwOMJ\njNOPEi5+YGjUB64G1ivQdqlhkViBuwOpFQrksVU8AULGqxlzwxOKpATS/K4HUE8fSldS4DIO\n+Kjb93ED1Yc+2Ke0uQAMfN2qrCF+X7uSWpy8qVwSO9Nb93gKuMVJwBgPk4yamwCIoTIAyAOM\nUsXmbSCcDqAaQo0bLt/iHFHmN95gPl4Ipk6kqSFe+cjBFKu1eV49fWo4d7jevTNSld10RuDH\nGSopgGfJQsDnPp1pIcswGMZ9aULgsRzntSxw/KWbqeM0DHfe3Efdzim79pATr3pVyvf5R60q\nyKJCVHPrQIljO5wSMrjA+tRsFl4AwynrUisWTb93HNPi3IxwoJI5qgGZLEZOGped4UnHvTNu\n7ljg5pVzuCk+9Aydo2VWGQ3HFMX93GCRk96jcOOeoJpzkxwhgmTQA+MCSQc4HenrD97B796g\nXbu3jj1qZmKsrKccdaYmG5i+DwKkHAIbB9KZ8zASE5qbCv8APnbtpE3Ygm3R7QMHvUkPzNtA\n3FRkk1FCu5STw2acEZehwe+O9BRehYLkkYzVpJB5YA6gjIqnE6twPugc1ZgZQ2cYGOtJmkTT\nhkXyzsGB3xTxcMJMZ2g1R3eWnyttU804uGxjlsVmUWGl3Mw3c0/7UeQOVXjms/zNu4n7woZy\nN7fw57UBcvvcdB049ahkmYxndwaqNtb5g2TjgUiuZIlLLhiOlaxM5GhGT5YHqKjiOC3GarrJ\n+8GCc46VLHuYEk4Ga6ImJIZPM4YEY74pu0s5xyvYg0hkWPcrZYUKNq4U7Qea1IHthU+7nHam\n+azM3G3jim72XcQQ3GME06Py2Yb89KBPQTc8cZIG5c4pVmTeB0PpTeV3ID8jHdinIu0cKM0b\nDHNjBPH0pkmWYFRkEYNOPzOfl2tjmkRTjaDwORQBDcAqRjhcc1WaQt1GRnpVmRTIMAc5znNQ\nSZ805+6tMljUk27iBxnpSNluBx9KbG+NyydDyKFU+YnzkjpQIeccKvysBRukaQFug60nMchD\n8c9RUiRjjJzjnPrTEP8A3XvRS7/9n9KKLCPjcMqsvqeMU1WK79/BU8ccCnKwyTtDKBmkZVjh\ncMWIJzwK+bPWsSK3yyGQhlxgECmSbjGqoAR33HFOEilQGJVSeCFpjL5c+M+YD/EKdwHbgzBf\nwwB1ojVDuUPsVT09/SpNuC2CFPvxUSyDzPlA45OKBDkBRl+fd3x/Sl3eX99Sx3H5R1qNSQT5\nbbUbnbUvzZAQZagLirCN24nMZP5VGy+WxXqQcg+opH4kQbcgHJFT3DBk+7wDkmgerGqu3JDZ\nBGTilI8uQNjcrDAx70m7y8EDee7U1m8uQOoLL6elMVhJA02VbAxx71JCv7wEnIx0pG2AhD8x\nb5iaNgZSQ+OwUigBAxjc7+eeOaSSFVPJwG5I9KSJRjEq55wMU8oem35lbv6UeYC8x7TGuOOG\nFNkcbVjxl2PJ9KMhd+CGHqP5UsZZVyeTjt1pgLId9wUjlwAOfamNtkVQPvK2eT+tEkhVldYs\nN0LihnBYqI/vDJPagB8jMJFIw2T949KTMigk4JLYJ7fhToyTHvx04yeooZi0gViCCucgcUr6\ngOb5GJLAnsBTVUZXB4Y8tTVkUDOd23n3pGbdHtIwCdwANMByxkOxGZCD94jtQWG7djjso9ab\nGzTZKgoqjjJ64pyuSQW4U9B70AEkbR4AIDH5j7e1CkrGXzls8H0qR03KGPyDPIqPhlY9UHQC\ngXUFMpHPr17VJISy4K5IOabHI3l7Rxnk5pyMVPznb9OtLYBf3kmHB2gnnFIpPVCQN3U96YpL\nKjrlQxxtp0sbIrK3ODnZSAV1duAFDt1VqWNn3ZO0beDigfMwz1zyDTZNiybEJKmkxrc1beOK\nREfrk8it2xRHiA24APLe1YFrIsJQbM8ciul0qIm3ZO7c4zzWMjqjqytMJWbj5I1/WszULchw\n6/Lu710F+iLIFzgsOayr6FvJ3qMAUizGJXydpJB71n3EaIuQSy56CtK5SPzURHGMcjNUJN7L\nKioMjp9KoCgz+YShb5VOcVD9nlkYnb8n3uTVhowjp8uWZc/jTZlK4zzGecZpoCqqtIxlXBHT\nnvUEgJi8zuzbT7VakjAYEkqp6exquylW2pyv3sNVkkEkfzKP4lGfwqPzAySl2ynbFSNMQylv\nvYPWo/OIXBGw/SkAoUKqnGdwx9Ka25W2p8oIwaftZkEf3jnIb+lMbEe1mGQT070IYix/Kytx\nt/i7mowpDE99tTODtOVOwnt1FMVmCl8fJ0x3PvTQiMJJtBwBx1/rUbKyrlny/YetSXLMFB5I\nOBxTJNnTO3jrQxgVzEVcfMeRj1oaPMYHU9/XNNGGk8xwVAXgZpy7WVSWI5zmmMawPG0AsvcU\nMm0g4znrTgVy21cjNRthVzu5oAVsNwDgE4NNKhWwucjgMae2ABj05NL5crZAcMoGQf6UmSyK\nXJQLgBs5OeM06VS2GHT605izqDIoJH61E2dpHQg5C0xiyK4YbYwynquccd6jLIvymP5DwOaf\ntMxXa3I5INO8zuF35OKBjWjfai4G0HjaaJvMVirYAXkE9/amog+dVctzkkjFNJRV3O5f296B\nMeZP3eScA9cfypQF+YKflx3FMRhypXjGcUshdo1UYI/ipCDdmMKevb2pjMG/hA7FvSpG3LtL\nfdpkv7wHjyyO3rQAi9Ubh9v6inBRJuf7pJ+9npTTIrIy7fLbpn2pZFDRooDf7Q9R60xjTkqQ\nPl5z9afuVU5XaT0GaGK/f5jA4H+FJtw/C8Htn9aBCeWPN8wKFGMGmkMsfzHqe3anR4ERKAls\n45oztblfnal1ENXd5y8BhjljSNtjyzNliei0qr8u1jz3NHy7SvAPrQAu47A2Op4FOZ1dwSn3\nRikbnABPTpSSSCNdxXcDwM+tHQY/zBu7gjpUsdyqqVyQT1aodpVhnG3HUfypnmFcq4G36UAT\n+bu2hW3L6mnlz1B+ZeQ1Vt0YIAHbpTFYruO3J7CmBdaV5ZFbdtjHJ+tWIbt+GKgjdWZllzlt\nxI/KlRn/AI+uKCDXa78pXHKkmlgvNj7jnG3Ge9ZdvIXUZk3Z7Gpdw6t+7A6UDNdr5ty8/u8V\nNHffL8p4JzWEkgHzAkBu5NSeZ5Z2rxmkBvQ3kkMgYPlm46VJ9sMcyfNnPJUVifaTIVG/j+tN\nacxqB1YdCetAHQzajIgHQ45psN8siEsMnNYf2hzyGwDwamgbEeGPXjP9aBms822RlPB7c05p\niyj5s46/Wsw3KSbgD93ika6bKkcKOtMDYSZj+AyWPWhrkDDKcnOSDxWN9uaFcBshu9PNxuKk\nHa57HkUDNpZhuMrYCkfWnRXCybgAG+XI9qxvtRkkC7gF6EGpPN+YhDgH5c+1AkaRuxtjTGSW\n69qnVUWRhySf4j2FYxmVVCofu5FO+0SttDSHpjFAGq0sZ/3c4zTmuE8weoHDCspZvMYKvyqO\noNEdxtk3g8Z20Dubq3TQ7nV2J65B6VYh190ZcBskYLk9aw1uOMD5EXk09pAFGP4jkCkBvw+I\nblUZEdw2fXpU/wDwml9tWMybo17N3rlo5JNzsWY49sU+KQMuRxn1NGojuLbxzcMEQlYx1O0k\nfrWlH49uDtCu0exs7c5B/GvNprweX0yenNC3Xl4PLHGMA0WGj1a2+Jl3CpIlJLnJCnpWjH8U\npFZFe7PzHBBNeL/apomGBmPGOKsLIo2yE8encUwZ77p/xUMaEefsQHHmN3+grWtfisGICzZU\nHqp5+tfOTai6r5YOF6/Wlj1SYcCUoe/bilZAfT//AAtIsP8Aj5aQevpWtp/xUaRMeaNqnAy4\nya+W4ddNqpCu/TPByKmh8USSg7gcZz1pWK5j6wj+JKJMuJsbvfGK3Lb4nLEwHbH3gevvXx7a\n+MLhMu5OQfl3HtWnD44vHi+Z/wBe1Q43JcmfYlv8QlkbiZTuGdobn61qWvxIto4wXu1LdyzY\n/CvjK1+IVxbyIwJn29g2DWzD8TJ9uGwr53BmPb0pezRXMfYNv8TVlbEd0F9zxVv/AIWBGy7T\nKOTgMrZOfpXx5D8T1WNhLdbSTx361PF8UljZf3zCPON/vS9mPmPsmPx8sALPLtxxjNTx/ESN\nAAX75JzXx0vxQeNWH2lsZztLZ/GrEfxT3KH85pAvYGl7ND5z7Lt/iNHMSRIjEdl9PWr0PjyC\nbAV1APvz+VfGdr8XJd4SH5wwyecAfU1qWPxZLcicmbodp4/Ol7KIKp1PseHxZbsvMwU+7VcH\niqFlAMylD3Br5AtfiqG5djtX+HcT+ua2LX4rRALtnKkc7Caj2XZlc9z6sHiKIrtS4XP1qaPx\nImBmVSor5ft/ipGdx89S2N24nA+lXbf4opJtxcHdjO0nFT7Jj5rH0z/wkEUqYEwGKsR62jAD\nzFYeua+cofiePvNIVB44NWh8UFiZRneB6NR7Ji5z6IXXo88YIHvViPWEkXIwB9eK+fLf4mFv\n3gkCjsp6Vp2fxG3sAs46Zb0q/ZsrnPc/7QDZOQR9aVNQjVQSevvXkEPxEVsBZeM4arKePrWR\nSGuOc8DHNRysfMj1xbxCDg/rTWuUPQ/WvL4vH0W77zIvqTViDxxE2cyZOfXip5R8yPS/tS7c\n5pjTfNnPNcND4vtpgu2XJ9M1ci8U23OZBv8A7pNTyhdnZIwZaG4HWuTXxQnLh1x/dzTj4i8z\npKGP90dqdmPmOocjZ1yaTcO1c1/bgYDD8VIuu7B8zgY9aVmI3pVCjg1Hzxg4rGHiFGYMx+Xv\nzViPWomw2PpUu49DQZWAyOtM25Urnn3quuqoXKscUxr2I5Af5qTAlaQr0ycVBLeOWBHGKa0w\nHJPUVCSitgnnrVCLa3xPBGTTvtu1elUfNQDrzTjPGFxnBpFFr7YxUntT47wKMtyKzTdKowDn\n2oSUdf4aaA1ftw9yaX7QG5zg1l/bI4/4x+dI18md2Rj60CsbC3JX5uq0NcZzg896yPtqMAzH\n5frTFvR5m3dimFjVW4LNgnApxuPLbrWU10u3l+/FDXAbnfk0Aa7XAbB4FNmvAuD0FYv2w9jn\nFC3BbIY0AbTXAXndmka+3cA81j/auOv0pGuNvzdDTsBsrdHGD1p3nMvzZrGe9+Uc8+1Au2Vc\n5yKANuO43EjPNIZjzk1hi+2tnOCaX7a/Q9D3pCNf7V1B6UNqAZcY4FYz3ynCE0guVjzzTA2o\n7wt8pOKma7XrnFYX27au7GaRrzcoOKQGzNdgMNufWo471wxJ71jNeFcEnBpTfYxnjPegTNr7\nXuXB60LfiPK9qxJLz5uDmmLffLhgaoZtveDBOefSkW8LKe1ZEl0Am4HcKh+2NtzSsI2JLsHg\nc4o+2EjgVki6Xy8980n2pVyM5NIRrrdbfvc+nNQ/bz5megrKN0W6cHNL9qVmyKYja+2hlyxw\nfT1prXwZio4GKxZrzK7lPFNW73ZJ54pgbJuiq5zmlGoHPPQ1hi4LZwTinC8ODx+VFkOxtm85\n46U37ZuDDNYovBtzmmteDbndigLGwL5o2py3w6Z5rB89yuQ2RTftgVsjk0FG8NQIfaOaQ3rZ\n5PFYj3m5euD7U17jCgAk4piN1dQAGCee1M+3blINYn2pV5zz6UgvfL5x1pC5Ta+3kY9Ke12M\ndcd6xPtw8s56movtTDJJOPekUbv21W70f2htbrxWCt4ucg03+0A3BPNVYlm62oHsaVb4yMCR\nisT7aF4JqNrw9iSKQjfnuj1DUpvPlyDgVzv2zuTzUn2zcoG6lYdja+1+jZpDqG3OOvesE3m1\nsBsGhrjqc5ppAbral025BppvGaT71c82pFcDJNAvw0g+agR0P2w/j7037WNud1YTXgx9+mrd\nGME54osBttdFpAASVpXvAvHOPrWF/aBz1/Kj7aJCATzmgZuR3Qw3z1F9oLNkHisa4uinCmmR\n6lmMjPFLYDoWvvlwD0FAusx5DYzXNtfM3Q1K19lfvUa7gbzXIXnPFRNfhVJB9uKw1vMrgtTJ\nLzt2osxo22v9rAikk1A5wD+NYH23b1OVpy3o2nDCqsxNmy14e7GkW+9WrE+2dSelRm8LEnPF\nOzFa5vNeNtIZsCo/tix85wKxmvguMnd+NRTXe7HofSmBvPfq3zZpn27cMbq583xXPGVFNTUF\nb5s4osI25tQbfgDjvQt8OFLGsH+0v3m0tgd6j/tJTIVVuaXKFzopLzoCcVBJqHUAcetY39ob\nuepqNrsEHLYHpRyhzG19sKc5ytBu2lb5eK559UCnlvl9KT+1wvU4HtT5RXOg+1MvOeO9RnUO\npz9K52TWk+VS3BNRLrMO8/OABRyj5jfa83ZJOR6UfbRIvTArmptYRslXGz69aqR+IIxld+Dn\nGM00ieY6pbzb0bipPtJdM1yLeIYtrKp5HvTH8UIFA3Fccmq5Q5jqZbzywOc81DLqJ78Vx0vi\nmHa2ZVJ7c9ayrrxlHztkwQOhp8r7C5kehtqUawjBGe/tUK6mh/j2n0zXl9540XytolUBuuTW\nc3jeGFSvnhZOvJzmqUHYXtEet/2wqZbfnFQvq6PgM+0HnOa8gPj8TZCzYXu2ar3Hj5FGFk3E\njGW6Ucltx8yZ7DJrkXlkA52+9VZfEyLtwdw6YrxbUPHwgwHmAbGflNZdx8QxBuJkJyNwUHNU\nqYuY9wn8WRw9Ths9AelQN4sVmLLKq5/vGvny4+IyspO/azddzVQn+JEW3K3G/HGAeRV8hPMe\n/wAnjqLD/MCFyAxNZzeOlVCJLnAboV6V4BP8R45oysbHd79Kw7r4iy8lJlB6eWOlPkuQ5H0H\neeOoWYgXK+YOqKeapzfEKI5xOQoHQtivmvUPH020ssqxuTjjrWNceNLi7UqST6EHvVqCI5mf\nSN18SUVTtnYE8fN2rmL34lKsyEzmQZ4Ga8EuPF1xCjM05PYgHoazLzXZ5m3s24dRg9KrkQrn\nuWofE5opC3mnk5C7qyb74kv9nYxt83fJxivGJNSdoWIbnGeaqWuoTspMjs5IyGzTtYZ6pN8R\nnlYMMo3dkbIPtWTq3j57hUCTMJSfvjt6CuDa8kVdmcEnPA61Cztzn8qVhnT3PiW73OTM24jB\nZj1+lVj4mk3ASA4UYVlPOawXm87azkBAMe9J5giBZzgelOwWNn+3LmSQsjCPHJY9T+FNj1i4\nG/ziQ0nO7tisVSjcljmpNzSEbnJPagnU0PtbSKTkls4HNNVtj5k+YDjBPeqKzhnOOo65o+0L\nJJj+HvzUjLv2jdGythAehUd6g5xkuPl6tVeSf+6M7ecik3qx3ls7uSP6UgLMknmMjnkY/MVN\nHdJuRiMDp+FUDIFZWZsdMD0HpTVmPmSMrfKTjFIDS+0eXIVyFJ52+o7VWa6w2WGR6elVmfy2\n2hS3pmoWzkAcrnn60FFtpty4wOtQSSs3GOPWmYBkClcY5+tG8s3+wPTsadhMf5g89Bt+bGeK\nieZmVQRjHYUqsSTsKl89zQnzSFABnGCfWqESJI7LuySe3tTuWUvv59BUCkq2OgHFTRbmJygA\nIwDTAcGYMVPzfWnRu0kmOBxjNQLl/NTdiRT17Uu0SRHjAHfvTQidVZsZOdvANPYBl+9hsfe9\nfaobckqQjYX3pyttT726gYqyfMB3AqXzCy8nAPWoo0YOdwzgZOKVY1aM8ctyCKaETxyAHaCT\nxRu4BHUH5qYo/wBHVMjOefWnt8ybVXkHFAkKreZMWVuPSrPnNJIO+B1qttypG3C9z71J93BB\n2gDPFMGKkm+RiOp4wakjTmTIw55FMj/4+GbOeME+9PWPduUttPXdQQP8tdqOTwO1S7N2ZMbV\nPHNNMP7kHI69KcVbgYLFaYEkg+UDbz2NTx/MudwB6c+tVtzspwMtjFWSisqhhtYL+tMQ/kqq\nscMDzUjMApwMjGarnDbQcl9u6pIW/eZ9ulMkfHJlsn7nTHepF3DqMDPB9BUCt5cbhh95s59K\nmVhu+cblIxTQE6ncpYnco9Kcsm5sqDVaORVj244zx/jU3nfMONoHX396oknXcuF28saemI87\nOv8AF6ZqGKYOpkUMwHAqaPLQjcOScnBouIkT94q7WIOeRUx+Ucls+lMyAyEDJJxtFTnKs3pn\niqJHRSS7R05q8gDAbuMCqSnnIOwY5z0q0qiRRg5HrTAvQybsYGSOpre0V9sZ2nqefauetW6K\nGz611GlwjZwflIrNlGihYdRuWnRyfeCjtSJlGAJwuKd5a4JGayLBdy4Zlpd2VwehPNPZhuXk\n4I9KYyjp0P8AdoAXy8R4B+lNkPQAYpfm8vj8qaPuD1zQAsTbZAzdPSl8zbkjjJ4qPnLN6fzp\n8aBmVmJ245FAAAzMMn5R+VSrjzDsAVR19DSFfJbP3gRkUM4Mee3egQkfzKSMEZ/KkWRdxB42\n0KwUlVXrTi2GAxgY59aBC9GB5OafgnngVGyk7ccgdKRj8uM7TmqANu47d2Cal8ndtG7Pt3pk\nluVZcsGVutCsEYEfw8EUwJVk3sE7jvTPMYs3dakMgP3VzTNo2EdD3FAhhwjjZznrUnnbVHyE\nU6PaIznAPZqRlbBBfccUwEWP5gwIyedtGfmP9w+tKuQOV6DinRx8EN0J4NIAIEeP4s96VVbb\n70KflLfwrwTRu34I6diKYAqsMnPyelG/zFDDgg8UuGVSzHvgChR8+0YOOtAxsXzMWZc9Sadl\nnQDb1/lSnO4gDApVbnGcAU2AKVkB2DCrxQ3C9OlOAVYiAMt1pvOAwHOeRS1AcJM7d5yzDIFP\nx93kY70xdxVf4SBzStt4A5pAOhaTaQMbs8fSms3mH+dNbJwF4PrT+S2D0UVQEfHmLgc0uWdy\nWGRmnLHv+Zvl9qd90dOOtAhrt/D0PUUquNpBOSetB4YE8Z6UqkLuyozQMauS5UjAxwaWPO7J\nOQM8Uir+8GTlcZ5pdq/MQOcU9BCctuwueKFkbGc5b1FPRjnA44waaw8vI4Uevagdhqs8ylCe\nOo+tIq7VznJHWkj/ANXwec1KuNpBGc9aQEYZmUnGW6inB/LXGPmNKynk42jHAo2qdvrjpQIZ\nz5ZA4p7KQoKc4FC7yWzlFxwD61HFu28rjtzQBLGzM3zDg8fShoxI5G48elKq8HP5ClZivQHB\nHSgdhrDO7HQDrQzBGTJyMU3eH4ClR/Olx6jac9KYCrIRnA3Z7Gnfw8gD+lNky2TjjpxT/LG0\nIBk9s0CCMk5GPk9Kc2MfKOcUnzFic42jBFGSsi7RncKQCqsfD9GHWhXk8wlWyM8UxZPvoV+p\nojkR9u0420xEq7Axdm570m5mXk/KDnNNOF3EfdbmnRybsqR9KLDGx4fqvfOTUn3W3MMHtTV+\n8STn1pw+deDu570xjPJ2yZ6jrTgp64zz0pfL2tknhu1KsZ3b1O0ZpCF2gqWHX09KfMjFVXOW\nYYNLtaNiB827nNN4bJOc4xn0oAdbqN5B5VRjmnD5i275OOKYrM0WOnt3pW+VA/J7ZqgD5sZH\nG3qPWplgVmPmnjGdoPQU1W2xqV+7nPSlVTHLI2R+86/4UxAq53gfdzx7injAkXau4Y601cop\nZuh/SgMIenzA9MUADRgNlT3ycmkEiTH0APJqQbVIGMtTlEcYZOjHkmkMVmGE546Uu0Lle3So\nskZZxuLcipTGNqZ5PU1ZI2BdxKqcRrzj3p6qgwDwepNOVdpOGwD0FH5Y702AsjhvmLYYYAPr\nT3ZzgAfLSN8xBIGAOBRGxRic4GNxqRCnCwuSc+1Iq7Yyp4b1oDM53KdoYcg1LtDNux0/WrsA\n3jaPmyRUsc25sj7uOtMb5uCuBTmbDbSRux0xSGM+++acsg78npSHJbPQdKAqhsHj0NMQ7ndt\nJxzT+d2GPFEW12Lk8rwM0i7D1yznmpGP8tNvydQe9KpMgY9FqJVXb3qRmSNcYyD19qoXUViU\njVSOp4qSJQrMHwVxwKj2hVjXdliaevy7zjJBoCw9U3MuDkU+Fv3rlugFNVd0eelIQFjKj6n/\nAApgiSPEONz4Lfyq7C23dg7xjg1TTZMy7l6DoakhUqrqp5PrUs1SLYZn2scYPal3F5jxt2io\n8blUsMhaljlBXj8akBNpDdc7qVVPzKRx3qM43g5+bsKfKD1BwO9BIzaAFGSDmpSQGGznHala\nEMoG7DetM8sxsQOSB1FVEmSZLGFZjk49xUrfK/LZHamQ5WPnjjmnnG1c4Jrcy23FbMP3hkN6\nU7KHg54FMk3b1G75T3pis+GLdc8VabExQdsuQOKe24ZY9e1NXcyk556nNDSLtKscErxWnQgM\n8gt6U7c+5AOOajBJVRg5xzmnyK3BU5FLcY+TMcjd2oUnbt69yajCt5Z9QetOVWVSScenpTAA\nyDLAHPY1GyKsZkb7rDHvT1V2RiThRTFUSQjJOzOdtMgrKpbAAyPekmmWJGJGPQe9SyJsZwWy\nRzj0qGRfNzuOABwTSAfGu7YcYLAZJp6udxVuecAimNJsjbb85Uc035VVW2nmmBP5cnpRSecv\nv+dFUB8db/L4I2AjHSn52tg85oWRd4Rx2yTTPL753Hdxj0r5o9Ue4Eq5I27Rzk9aQhX2Hbhe\nvBpOuRnOecH1pqxl12qM7fmJoEwkzMdwGOfTNO2h2wcKfRaRSfOj2fePJ7DHepEBh3FiAhbh\nsZ/CgLDVVW3fKEI6nFSBmCqFG054ej5hJhjjvSNI237u7nAx2pjGxybvmXDH0NNG5lfB7dac\nsSLERIdrjnPrSbAkYVjknncDzigY6JwYwNvzEcntSrujUtlQPelj+YlQwApv3sgjK9MmnckG\nMjLvZQF/vU1WHl7mbkml+ZozE456+wpvk7YyQQUU/douIk8xwBjhs9KSZhJLuJz/ALOKUtiM\n52lyfpTG+Vfmyfp2pXKF4fCqu055FPkWQSKchgP4ajEh+UA7/cdqc2W9QScUEj2mVWO7jjiP\nHemxsfL2t8rHkihmZcbsMq9eKFXcrODuVvfpVAKuVXCE4zyxpIlZQW4LA8UjfMgG7jpilIZg\nAQVQH7wqeoBuKqcpudu4pm3bIoHLY/ClMi4IVjkHjFL8xBfb97j2qgJGUPIqBwT1Hpmo4+IS\nzJzu6e9I8LviIBRtGRtNPHyqpUbgoxQA5QtwwHKt33dKYpZWcBlC5xwODT2zJHkDLUkeYWJX\n5if4aQgcKcDdhh19qFUrMsjNtA6e9N+Rcx7eCcn2NOUtJIcDCAd6TGOVtoZido5wtOSMSQgB\nST15NMdH5BbccUxcGNGALDOOtAEjMzRyFgoC0qttRMYx94ZqMAr5is2B9OKeVdTGWU7cdcUi\nkatkwmkVlxgde9dVYoPJyQ2c53e3pXF2DGCZTkbGPSu60ZkktZCPn5wBnpWUzeAy+I2g7Tt+\nlZcjLcNhslOlampM/lgNlQePrWLcQNF91wB/d9alFmXqdvHDJuRMkHr61SWP5tyvk55Hp7Vp\n3EbtHuPArNmVcHC4JPUdc0xlW6UJs+bazdPpVKSRd4I+YEcr2FXZIUwrt1X5ev61UkU7vmTa\nOinPUetNCI1Xg4+v0qGbMcbbSvXO2pdxUED5ccZzVWRQGI2bEziqAjkXcpBj27uSCaavmDDb\nFwvc1LIjt1BZR1IqLB6Yb12n0oBDQsS7pDN1PC+9RCRVwvVs5JxUskce0L9xs5G4dqcy4Odm\nVxge5pjK75bcN5bdwVFO5UFiwwq7Qv8AWlUlN4GBxtYGoZFCKuM7QMetMQkeSi5XOBnnpSSf\nvV9C3b0p8m1sbVbH93p+NDLvbpg4oGRbTGuB9xT3pi8sd7BYycg1Jt3A7TtbHQ96au5l/wBW\nCR39PamIGkV5Pk+Qkfe/xokxwrfMRzxR5x42ptXoVprZjZgExxwBzQIRfmmBJwuP1pWOzkDH\nqBSIPkBPX+770KA0uSxz3pdRDCrbhzuQnn2pWbaFbIJ7Cj+FmBwpP3afHGHbldu0UxjS/wAx\nym3jkrTIo22M3UcnAqRVMcnuRTFyrMuOf50D1Ht5ciKUUrJ3Y1GzLGpdxnBxjHelXEcfQnnp\n3FAkKqSyeaM9BSASOQEMwUbiuMU3D8BhnPGRTisRA2Z83qQabIu35hkL3FMQrSOW2Kc4OCDS\nM2zdn5UJwAehP1pfLwu4NuLc0m1plEa4wejN60AxNzlegIf5h7Uu70BDHqaaxJZeuSM4x0oY\n7WBIz6AUDFaQtyV9qCqt8gO1cZ3e9N3SK2R94c9KVt0q5GPXdTAVfmI5yR3+lMZ1mYkHPOOe\n1KrNsBBye9N8wFh8uMdcdKQhY15ZflOO/ekCgsc8D1pehyqj60iqPMADbsnk0AOZieByf7wo\nRQvU5xzg0gITepBPPHNJ8iqpA+bNIdhAVjHmBS3PC/1pzI3lMG6HncaFyz7Quw9Qe1I3zfKx\n3c/h9aBDZCV2qVxx1pOMAhgy5x6YqTJkbbg8cUhTcDwAoOeaYDNv7oY+9nmmP87EjsMe9Pk2\nrsZAQc4pr4+h3cmgmw8R7Y1VTlhyTUgzl2lbcmKiZjygbGe9OBYgqWUcdaCiRX3xDjK9vejz\nzHJk/Mfem7hGSGPygY2jvTN21cOB/hSJJ1kCt06nOKe0g8xm3cVVEzFjxlAOKaHXawZtoz36\n0xl1bgK3AJ96ctwYt3eQ1T8xcplsDjA9aGmCu5KndTEXcMo3E7ieuKal0EyOevBNVPtAVfkO\nexpqt5eS3ze1MDTadGXzG+8OCR0pYJlZyQMJjgk96zHnMmB90Y6DpR5gwowSw6NTC5qNKuFL\nNjB5arMalm+YYz0GayUm3A/3upq3a3G2YDeDxSH0NTykhUc/MOajmmaMhhgk+9QrJ5jMzBvb\nFOu7jbECoyxGOaSJEkmBkUksfXHSnrOI2LbRt9M81mR3ClSuctnjmpFugvUHPTJo6jRtpefu\n2XZuGe1DXTTY2/dX1rJW627udhB60qXm6EsO56UDNX7QVYM5JyOxouJY1Zdr8N1rMS48vcS2\nWHQGhJFxnI9TRYDUWdNp2nO2nC8KxgkgsT6Vkm4VSdp5/nT4boMSdntSA11kWNWVjg/eqOC5\nLOS/3e3pVL7QG+VuXxTWuAsKgvjnmgDVaUMwOcY5z6U1pk+ZpOSBkAd6zUuOuTuj9ab9qDZG\nfof6U0I2IZA0OU3HB6e1Pa6Vyy427TnHtWVDdOshOcKVwQO1P85NpYj952B61IGq96jYfH7v\nHAqxDqKPH8p2nsGrC88cKPmGM+wpftQZioGW9aYjVa5aZS+795nHHFTSXEj7Czknr8prFEje\nZymOMZFSfbCjBVGc8DJoC5ptdEElmyTyFPpT/tc20bW4xkZ7VlNeDht2EJ5GO1SfaG3cD5Se\nFzzikBoR308bA7tzHq3pSS6lMrnY7E99pqos6ZZR0qN2CgbWOaaKL326X5S7uuOwatFdenbY\nI9ykdSKwRP8AvFQ/MWqVZljzuyyA4O2hjN+TxZcQufLfcOxB71JH40uUkUNO8rYyR0rnZWHG\n0g9+naoZHBPyKfep6gduvja6bg3Lhup71csfHN3HJkuZAGxnPb1rz0SMqkg4XuKnkuG8tNjf\nK3WquxHp0fxFCyFfNZhnn5uKtR/EJhI4+1tjHGen515YAfL3Njfjpmlmnby1wM54NAHrI+KU\nm5EL4RvlMinIrbs/iRJDINlxvKjGeMV4SvmQ8I5bnP0qzFqjbsH5FWkFz6GsvitLCw+aPcB1\nyc/WrR+LLSR7nlbcDw64r5z/ALelhUuNwU8ZFNXWpduFJf03GloM+jn+LTSsoluct/eLYq/F\n8Xmh2L58eD/dbJ/KvmObVpZE2l9r9SGptvqzxuHaQhW4LKaYz680/wCKrMoeKYKO7Hj9K2IP\nisrqxEmdvRR/jXyHa+KLmPBjckDgMTwa0D46uCwRSFTqdrd6OVC1PrP/AIWmUw1wXC44A/8A\nrUJ8XFjfdDcZPTGSP518kr43uFkO7cwHIbfnHtinL44uBJl1Vec4FHKhXaPrqP4uEttkmJB5\n3IeM1JD8YdpK+bvbPKua+Ro/HVx8xklCHP8ACecUQ+PpkmcA4QjhieankQ+Zn2ZB8XRNhWC5\n7DdzWnD8Wo42ALducZ4r4ph+JTw42u3mHoAasL8SrqJdzyydfu7+DS9nF7hzM+0V+KsDyYW7\nLt6HitW1+J1v8rNIuemNwz/OviK3+KDzNiSQq/bPJFalv8SFhUb542kPTHBB9aXsoMrnZ9tQ\n/Ei2ZtpuQremanXx1Fux5okP94Gvi2P4qS252m4Qt3Yf41cj+LjTDmRmQckq2BR9XUh+0Psn\n/hOIWHMsalevzUjeMIWwVkD577q+Nf8AhbKrGHacx5bor9qnj+LRzuS5kPPd6Tw1upXtD6+b\nxdCGO6YAemeaavjeDczPORxxzXyMvxelPzyTlf8AbHNJ/wALYCr5n2jzZM4xS9gHtD6zk8cJ\njfvDgDHBFRf8J2G4DZ77epr5THxWkmU/vAqd9wqIfFJtjBZsHqGVsUexSDnZ9bJ46BwjyKB1\nAzg/jU3/AAmRxnzAG6AV8hf8LVkiUuZftO08jPNbdr8WY/LjMr8N0z2o9khc7PrC38XJcQ7t\n6krwRmpk8VRbGbcAOnWvmGw+Kwh3K0ixMeR6fWtGD4sJJws0c395lPAodIpTPotvFCKmQeno\netVZ/F43AqcP6E14E3xTimVtsnT+6eaiX4pW7D55VDemcml7MXMfQg8WKzZ6HvzxQ/iwnGOx\n6ZrwCP4lJG3zP8hGck9KavxOhZiTcKwPGCcGj2bDmPoP/hL4uctlgfu1LH4sjc8sQP0r53b4\njQcgTc+zdfxq1a/EaFlCea+P7vb86XsmXzH0G3iiLILD8jTW8SJyC/4ZrwaPx6jKWE5Vf9oi\nmv8AEBWZSkqsD1OaPZPqQ5Hux8SJn5TzTh4mQr97n0rwWbx8FwQzcnGVNOXx4Vj8zzVjTOCX\nNL2YuY94/wCEmyp5wPc0xfEytFgyBeexrwdfHyhsPc4PbHT86kXxyq8ibDe/IqvZj5j3n/hI\nY2+XzB9c1FJr27vhfc14UfHO5dxuMjPJB5pw8fESKYpTIMZ3Z4o9kHMe4J4ljHBY+1P/AOEg\n6liAK8Mb4jBWJadQc9uaZN8RON32hcN0yaXswcj3mPxJEuEDKQfej/hI4FZgZMj1zXg8fj75\ncmbcv93t+dWV8eooxI4ZyM/e60vZsnmPbl1xXbIcEfWk/t5EJbdxXiq/EKNtvluUJ4KiiTx0\nVyDOB7ZqfZsXMe1f8JBFIpw/IqP+24znEgB9K8ZHj5IlU+coycdaVfHCNlhMpVepDc1Xs2Pm\nPZI9YBHU4z60/wDtpSxUNg9K8YHjos+Y24I6E8/lQnjgKd3nYb0Y8mj2bDmPaV16OPKMfxpF\n19BwOR9a8a/4ThGALSk5/vHFKvjqJYy3nKQOmDS5GHMevya4ik/Ng+maBrisuC3bpXjg8fIY\nC5kJk3ccYoHjz92xeTae2DVcjDmPYf7c425H0BqP+2gGxu+Xvk14/wD8Jw7KpLhGx97tTY/H\nQf5Umjf13Gl7NhznsQ19fMBLqF9BTm1sSDIce3NeNP48SNyNyqR0AOaWPx7FIctchP51Xsx+\n0PYV1hFbLNhqa2uo2Mtn3ryKXx7btgmVn9xUf/Ca742CP3yORR7MOY9hOtp1zx0GDR/bCyDm\nQAV42PGk0S/8fEan3alTx2rctNuYe3FHswUj2CTWIkXCt+RqP+2I+o+97mvI38fIrEuVAxnA\nbmoz4+iYAxS7weuOv5UcjDmPXzrgfJ34Ao/txWXG/P0ryKTx7EVysvlqPXr+VMbx1kgq4IPf\nPNL2bJ5j17+2FZclsYpv9tqv8Rrx8eOQzEtNwOgJp/8AwmyxgM5Yo38WaFTDmPXl16Ikknn6\n0h10fwnA9c143N45TGFkwvX7tVD4/HIaUAf7JqvZlKaPZ/7fVX3FsrnHFI2sKpLeZ+FeJf8A\nCdjy9yzYGclc0h+ICeWT52HPbNHsxcx7e2vKqDJpza0iqPn614ivjqNUBa5y/ox6U1vH7vCM\nTAH1Rsmj2Ycx7c2spsBDc1APEEayZ3HFeJzfEKNGDSyOB03A/wA6gb4jIzZOUx91i3Wj2Qcx\n7xHrySKzFx9AaifXo143AE89a8Gk+IqSMAZwr9flbtUf/Cx42cmSckdsGj2Qc57x/wAJAqFv\nmyO2DUf/AAkHybgTzXhE3xD3KM3GAOmKik+Ijbjmb5McAnAp+xYudHvUniEqVJbn0JwKD4iQ\n8mTr/DmvBW+IicBp8HH3c5pj/ERNolFxwvb/AOtR7Jle0Pe5PEkCowLjHsaqT+JUjwwY4x61\n4W3xESSM5cbmOdoOKhl+Igjk2tJtXHHzU+Rk8x7x/wAJOHQFS3J79KVvE0e4Dzdp714BJ8So\n0kAF2yY6t1FRt8R05ZZsRN3JzVezJ5z35vFCNkK/HrTv+EqEar8wI65rwBviGnlriXaezM2M\n1Sf4lh5GJkXeBxtYkfWj2bFzn0G3i6KSRgZc45ODUT+KkZfkkGOvXmvnZviUVkIMykryccE/\nWmRfEcS/vF++xxu30ezDnPodvGEZYMGwo+8zVH/wlkW4kMG7gj0r50l+IgZXC3eSpwVbrVK4\n+J3yqqu+VPzdgRR7MXMfS3/CaoAQzKh7HdVWbxmFUv5m0fWvm5fiL525t+1Dz8x5qvcfEcLH\ns80t33Z4p+zFzM+iH8cJvwHLEDPPSqs3jyKGQHztwPOAa+cbj4mMsYKyEHONxNZ83xFfzP3h\nY46FTgVXs0HOfSd18QI9xBcBhxz61RuviAOQJAGA/hr5rn8e3CzFy2UK8jPU1Qfx86SFDcsv\nGTVezQczPplviCqx58zCgctms+X4iea7EPtQjANfN3/CcMHDKTInYFqVvFzyMHLuVPYcYp+z\nQuY+im+IQjwPOI29s9ahm+Ixk3fvNo785r55bxNJvx5rB+o560f8JVLhy0jEg8DPFHJ1Fdnu\nF948LR/LIg77VPNc/qHxGHzgNyB0Y4FeR3XiCSZfNVmR/Ws661OSZWweepY07ITZ6fN8RJ5G\nIRxD/D8xyPwrPPxAS3YiSTeR3NeZPcPI5eRj179KQGSXKOcHqM0aBY7+bx5LJHsR8hjkJ6iq\nTePDI4Rd6jPrxXGMwC4c5561WkuGWZfQHJ5qZBsdxN4qlklKmRmz39qpyeIpF+WGRgM4GTXM\nyOY2DhyCeDQvzYDOCy/MBTGbUmvXMjMFIYg4LNmq82qTLkHAY91rMhuAzMxfj096PMTguQGP\nQ0hFqTUJVXaxIXr1p0l0ZYVkHyr09zVGS4T5VfkY5aoRcKqupPy9aq4Fi6aIKNwJfORVTHk5\nIJyxzinNMs0ifMAUHeoGuHkkkZ8KoHB9KoBJsI2/h1AyFFQLKHZ3YbW28L2pjS7enMeMljVV\nmDRibJ2ZwBSuOxbkZWh2k7T1OPShpVhUKPuYqDzdzR7vlRRytNZwsTlWypbAWgCVmATPJPUc\n01bo4wRlT+lNjddwLDBAqPzgY24IGeM0wsTt5fnKQR07UjTBo8H5snAFV/NO4cfe4zUfK4B+\nYA8kUBYsLP2K/Nnbj0qT7QVbLKQ68CqvmA9zjqMCldy7CXGEXrk9aQFg43bif3jdfQUx9kW9\nsEnsQetRNIytu3AZ5pThkGG+Ynk0gaJvMLKrLwSORTVbzIsDOM4xTFbZISp3Ec5pGkDbiOGb\n0pDJvMHftwAaI2MhAHBFQbzvRyPu96f5zbmdVzu6sKmwrEzed5m9SG46VFIwZSgJA6575oPU\nZzsA7HvSBdzKerHrVDHLIPl559DRJjcQcqSf4elNTh1Bj3HnOKesz+TGQMrnkHqKLCFXay+W\nyZ5zn1pDIfMHyYTP40770RXGeThhSruhw4wygYzQIdDgyNg4Qc808BmVx1cDhqriQRKzMS5P\npUmWDRyZyNuGApgIN3l7ivDdTT1IZcZ4HGaYzncQigcYG49KNzMp4G5eCo700A8sAhGOadCs\naRAqd5x3PemfKzAr6U5EO3ywmOd1AEiyFvlc7SR6fpTtvyFCNnpzUbTKZNh4TGc470m0H5mf\n6UxE8f8ArAoYB8elSHbI5zkn/Z7VGi/NvBycc0E5yQdrUwJFXcwUcAc1NtWSQL/EPmI9RUTS\nKdpU9OoHerELIyO4ALYxg8UxMaUUMw3bQOoFSRt5YVXDc9/akVTIkbBVbAztHXj3p43t95sZ\nOQPT2oEP3L8ipwvWnrINw2v94Zye1RRlo23cE/dHt70KvlhBgPJn5jTAsIzfKvoclqkVt0jO\nwy2fu0yHMz7AuHJ4PtTvL2ljnABxj39aCJCiX93ymGBwc9celTRttBAYYYZx/SoFcbSSfl6U\nqruyB97HBqhWLG5fL3bcKOlPjU+TvLDBPFQQydUfJAB/OlhKx/Jn73T60CLMmBgAjnjIoVij\nYx81RSEMwi24K9c1O0nm7E+7x976VQEnEe7GTt5JHSrCtnIxtGM5qhbyMwbHBY9as4eSSTLf\nux1NMllrzCNhDYIH3qsfKxGTnHPvWeMFW74PerlvMWwSO2OaokthS23cAe4FSpIq5J42nr2q\ntGzLICFz71ZXG50IyhHB96QF23UrtbhQ38VdfpduTb53cY/OuXso2eOPjOOMV2VmrLAisvIH\nUGoZSJAodcMcN2FG4cEofSiVtrfL96lQbictk4zWVyhdzbvkG33pRjHIqJWLry3Q1M3bAzTA\nhDFsgcCng7sAjGBT9p6YHHNIy7cMTg+lAhGUqo7j+dD4wCD8o60rNty2M59Kd90cjauOtADf\nNO5QFJGOtOTaqtkZ3HgU1cqCGbPuKVvv5AONvWgZJj94Pl5XvTWyWwSDzS7nMfAwc5pE5ByM\nHvTRLDbuPB5HX0pwVUYpjk85pq7VU5pxb5d5G0HimgETdIxyML0FIyrICB8retKqn5sc+lOM\njNhMAetMGIUCDdkfhSMhfDhsgU9QN/qgOCKIl3eYSuFX7uKBCfw4X72c4NKjDec9aSVj8hxj\nP8VO2FsFRz05oESLwQQdpPFNBIlI6nvSbTuIB+ZabIxG1w+D60xkkcm/qMD0pyYj4HUnrRuy\nw3enWjjqSMVQCfOWbPGOPajnaGzgetO3mTjovtTGCSfKuQMUC1FWHapLMZO/Han7lVjj+IdC\nKanyLnBA6VIeCpPWkMbHzkkZAFIvIwDw3WlZH556+lLHjb83GPWgA3n5s84NLGQw4GzNMjYt\nlgPlzjBpy72YDbg9jQARxnbukTaetN4IJHrzUok8z5FXIUYP1pvmbWIxjNACLJuJHOPWiJRK\nxUMeOacGZOo3CkbKtlOD3oGDSc4ANLuQ43DnrxSc7ck59TRywbGPagQwBuQDznIFPG1VbBOS\nOfrTWOACp57+9G393zlQT0oGOMe6EE9fakGGOD8y07ldu0cE4pWjyjKrbeeRSKBXTH3SGpVw\nGHAYY6UxpSrBM5oiUMzY4U8mmKwobeOTnsVo+7yCcf3aSMK6kj5eaYrfMTkHsKBDo2O3DHOa\nezEjaTj3qKOEkYbI5p8kPbdn+VAh65XgkEnoaVvvfK+c+3SmNGVXqM4xTlkJjG4DIHFBYoba\nMHoaRV+VnblqT5fKCty3UUuRwVXPsaZLBvmwx4GOlJ5hwACc5zTuGIYD5elOVP50hCN+7b+9\nuHQdqUt9zZyO9NVQu49T609W3RlV65zj0oK6CPnlx0zTuCeBj1oC7j/sjnn1pik8FvlyaYrj\niM8hfl9aRV2qQGzmmbsSnHK9xThlo9uKfQRKmF4xkkd6VZGXjbyBgGo4ldlOe3vUgUhsHJOM\n0gFZgAMjLUgyFZSPc56U5WbdkjOentSLu3OXYYPFUA98ZBVs7eOtNVicrjrzS7di4BXrk8Uk\nYaRiVPC0ASfMi9Pm6Zp1uH2vuX5ad80ijOAR+tSKzKoQnJ60AMWRVU7zk4xtH86SThgw5psa\nMsjsWBHT3qWNtyEnGOlUAxWJYsRx2oDlkDBOM9afJ8sZyeKSNQ0ICn8KTAfG25dwAx3PenCM\nNkMPvdGNM2lc7F61IcSYWUFT2pCYKw3FHXK4xxSBQkmRkgDin/6wccEcURqUY/KStUIdtV2D\nNwcdc0jx+Y3yf99dqc0Y25I+lOx5yhV/d0wFkPygEc/3qc0aMgIbj0qJmYMAenTb60SZVtq9\nOuKBCxyfMBjipd21SUJX2pqYK5AwTSNnzAfwxTGSKS+P60P8z8/M1Ki5LDNRwZRWbO4g/jSA\nl8zKnAyR2pY5DG2WTg/pTEYfTJzmiRkz8ufehAO+6Wyflp0P7vEwIPamLJhcEb88inqf3Pyj\nb6rVCFOPXvnIp8bBVYsmaaqqFABHWpfM2yOuNykflQKw1SqqrlTnPGKfNAV+fPLHpUUalsA5\n68VakBjwxOaZVhseOO/OMe9BYBWUDac5OaV8qyKBk9Q1PjUs2W71IASAqk8j+9UpVZZFTv1y\ntPWMMwyRs9KlgU4IC4NSyyTyfMGQcY7etJ5e0ttHXrmp4Y9qHoD39aVYAxHd+ppDsVfILzB/\nu7RU8cYkU5OKtRx9Rt4xTlt0UjGFXrzRcrlKwUKoQ8k96iZY0yTuLVosoYYxnvuFVp4ywUe/\nNVHUTTKzMVjy3Q+lLGfkA/i6/hSyM27aBn2pFIkUbhxnqK3RhJDo3LfKePQUSLtKsW+XPNSM\nxZVAXao6UxVGSeBWhlccsgaUgcjHXtSLHuBZh9DTWcMpQDBPNItw+3aOEHarETpIWY8duaYG\nMa5ycdxUayZypbDU9WY4PGOmPWnYYoYeW2Oc1L5abFGcgDcarrITuyMHpS7v3iqDxTDoSH94\nrHPy9MDrUSq2O7L+VSFRGrYyKcSVVVDDGM0iCs7GTJPHbNVtrHIB3DNWZlfaB90ntVY+aq7V\n+U55FMQCddzqBywxTslQE/j9Kb0ZXKbT3qQ7ZJAW4bGM0AL5Lf3h+VFN8v8A3qKBHyFC0bNl\ngSNuRxRaYyxAK9TzTt0m0AYQjsPSlKjeSvJHORXzfU9Ww3JZQuNw6iiONlbLdT3FKpCyeYee\nKIhuZgxwuM8npTASYeSu4qG54B4oLRnbvLFs7iMYxQsZVCwbdnoSc1Ii5wxHzY5FPoFxskZZ\nlMXR+eTRuPlhcZwetIqhpBgjCikGVjXcfvtgYGaEIey7uGQD1OaZHGFIDjDHjBpdv7jLMdjH\nAyOaXyVaOM+YTIq/dNIAKpJIGwAV4pVQ4Lrx9aQqsjx87Gz8wqRtrTKofAVunY07gV93ytk4\n9u9EUxOVKEHPX1qxMpWY7kyCe3ao5Gfz+RsAGAfSi7AI98RfzDuDdsZqNFI3Y5P161Mm5ZCd\n2I8fd7imqxeI5wATgcUgEbDRgodrD+EUrKJmA3/NjtSeUfMVB8rd27U7c3mN0C428fzoGEbF\nZMZ35+Ux9z70NGAzqoK459qRdojB6t93cKI4yMgvtP8AdJ60xJChQ0eAN2elNkjZYMA8DqM0\nKojOGHI6Y6U6MeYrFfmOOaQCrO+QwC+UFxyKbu+YjqO1Eu4Kh7DjbilfAQM5yM4GBTAMxqpC\nszvjjAxmkYtHCnH1A61IN4jy33exxSGNSTI3zn60gFZfLyVB6dBTBN+8UkHb1NOJ8s5XK7uo\nPYUxdyqQGzzkfSmAvnLtMmMjdjB60NNvkxtKKB0pGwQC3HOR6ZpZJFeU7zvOO1IBYy8jebG3\nBXGO1KGPkhT8rq3GKWFiI1SNMBDg0NIiSN5hJB7CmLqOWMNIQxzxx9ab8+OJCR0Io3K+Aq7T\n/DuOBQspbJIVAODjv70iixYobqQIpPy/hxXoOg23+hGRV2+nvXnWmzfvtuMDOevWvTPDkizW\nR8wkspxgVlM6KZHfea0atsVwPU9KybqE7SWwsjHt2rotSjHl4TjvkCudvoZVwzNlc5z3qDV9\nzIu1k8ndnJHBFZ9zakSA7sDGSK07y43RuOmKynkG4EknjkGqROpTaMSbstnPIXpVOZt0fyqS\n2eAavyKrN8yfL1xVeZSck9CMY6U7DM9oQxGGyzfNgdhRtRmcFsxkfrUyxEjKMoHT6UyRVXfG\nRnBxxVAVZUKsMFipXG3PeomG9RklscHmrJQxZJOSRjHeo1UcbflOcc0xEDRKzlScrjIz2pjZ\n8sDJbHpUiqVLg9yRTQBEjoByOvNAakLLiIl2z6gUmCiqxPygfd/pTz8yBNu3vTRypjPPfn+d\nAhDvKbXbJ6/QelNIETZ27k7Uq+Vu+8ZO/oKdH8ykDOM96eoyFuw4Dct9KhWYtwAVDdTVlkKs\ncAn1NJJIUz8nHTFMBqLIpOMHj7xqNd0XVg644QUgjLSYOcfWnlY/N2+3GKAGiMM/mBsLjlDT\nGjZXA6qOfwp+NzbSenT1pVbCnP0pAJwGwwAJGQahUGNdpbYOuDzipG+ZVB/I01WSQkkfNRqM\nbIh3x4O1T/GKfJxNvZtpUcCh9qqvOCO1O2rJnjg80ARiRmUMWy2e1OkbyXyCdzelNVlmVl+7\n68YoaP8A1bBuV9aYDvtWwlSmHPRqj2MY/mfAzyakKhpASQzZyDTHJYOM8E9KAEBUl2iIJHSk\n8wtGH2cqeR6H1p/3ugwqjkCm7m+zlPvHPFAgkJ3hAQO5J705lACquA3XA7U1kWZgWG3ApqgZ\nJU5PIJoGNOQyjduyeWp23ajAdM8Uuzb91cACkaLe+Mlj1FMBjKynPRacrFTwqhD1zTUyysCc\ndsHmpPnUBjjgY+tIRHuEi7QMc9aGULGWVNvODS/eZgVCvjqKbkR/uznb1x70BcXarH+6wH50\nbV3FgcDGNvvS7U3LkEHrS+W0bksAY24zUiGHdhSMgdDS5HTHljpuppkZIwhz96nthSAGBBNO\nwCeWFkR9+F6f4U1gxEjZG3p7Zp21lZi/QdGNRKAATKdjt0Hb60CFViqqFYMQOR2/OlYsq4Kn\nJ+bPYj60ih3XadoPTA6kUqkodgJ9h2oGKrHaoTCITnmmsoHzE5H97FEagsQnzsOdppnyxr8z\n5Oev9KCdSVd3mHIBLcgDmkdhuIYdOcentTV/d9PunnikA+8q8Hqc96YCtJ8wfbjcOPrTAvmM\nd5+VuSPQ07zGZct84/u+lG7/AGDjr8tAxFj8wqoPPcmm7ip5OecVIqruJP3COGo3ZwuAadxC\nsoB2j5iBnFRyN5qrtHfsacrfvM4IoQ/6QcL1FMQkg8xfl+U9CaMFdm05PTFLtZsr94daT+6N\npUk8ikV0HeYVY8+xxTowI8SAY7detNaMKwTkZGcf0qIRs2cKeOnpSJNa31RFhKyKQQetUjMb\niR3ySP4VzUTJIqjeQVI6DrTVDKg44znNUMlKqsQ2ggE8tTo5GOVPzj0pscjMpNMjZ/m3ffJ4\nNIZMJPMyMYO7rR5pEzEN2xTAu0gHse1MdtvTueQKLAWluGjVmIBPrSR3GY9vXJyarZ38Abn/\nALuaduJXGzawPFBOxceRV4zx2FIt24kUoMr0xVLzG+63c5JpyyMSwB5HSkWaEM5XdI3JzjFJ\nJOu/5lye3tVIzKsYY5UscGnSPH0zg9frS6kMsJdSRr5W3cSfvU9bjc3zncy9QOlUZJSw3KCF\nFPWY8H8x60wNJpt0bY5Y88UjTL5YO7PHX1qgsgZiB97qMdqkaQ+YWxuGOtAF+O6OwxgZ4z9K\njF1tAyc9zVRJC24hiOOajMgkjOe9AGpHcEupBJB5BpDcIsgJJJB6Vl+Y6qMNkjjPtStdAAkd\nWGKANVroLIV25B+YelTR6gEGSfnHX2rFgvC3lhxgY/OneduLkrj0qkBqfb1fkBlJOeakkvcK\nr4JGccVl/ajx8w6YxikjlYyDnI7LQgNV7gSMG+6M8A9atx3RXDscgjFYQl65OCOxqeG6AU5U\nnjuakDW888MrY7fWl87cwwcNnrWWLkLGA7/Iew6ipFuP3eN20L3pFGqZBtx1HepBtbCKduOm\nax49QO7g7geAakW+dmwFHHFGoGo0hdQXOecYpzTbGBZugxWZHdBm2l9wxzTFug/Gfk7UAarT\n7tqDK55yDUdxKJH28jHpVF5vlDLx7mmfbPMyGO04+9RqM1VnLRZBwvTFMkmDKqngg/eqlDfZ\nGPL+7/F2+tRPfs7cYkQ9+lBJprIZIiQehxzR5nl/wgL2Has5pjD053Dv2pWul8tc8nOMUCNC\nOZlXJyFz2qVJhu25+U8+lZbTMv3Wwc/dpzXQjjUZ3O3WgDTkuERQwOCOvNTMyPGrB+GHANYs\nN0GyrcID360jXPlkODkdhTKNFpCoc9WXgrT2ZpY1OcY5qm12MK74Q9SfWo/titGT0JbpmkUa\nUlyFIdRzjGailvmm2k8haz/PYpwfl6t7CmpcDBB5GMqaq4jQ+0PExIbBI4JpTfzIu6RweMDF\nUjKrRqCSabLNuVkH3jSGaH9rS+SqAbjnOCetNGqSzSFWHl+uDWWJTJGCp+7wfWnI+7lm2r+R\np3EzQj1GRZSzPkDtViPVrlyqk5X1rGPyq4AIOM5qSGUqpUMOmDmi5Jr3GuXAjEMUmFXjNNj1\nN1XPmHIOTjvWRHIvKg5IGTT942gg9ecUFG1/as7Tb24AGMZ60w6rOGwzAq3vzWZ525lfHtim\nK25mymeeOaLsRsLrVwkxEbYyeTVmLWp4FYiY49DWLbyI8RZvXg9KN29zk8elIZ0v/CTT7UBm\nZeODn9KI/Fd7CQEfYWOCCetcysi8KwxzgGnySHaQSCwPX2ouTY6mTxnceYEB6cHnioX8YzXE\njKrlNvPymuV3/MCnTPPvT9u1i33T3FAzr4fG9wwDtluMck/nTm8U3SjO4ZPJKnPFcgjYVtp7\nVMZvJCfMTngjNMZ1g8ZTbQA2OeCeasw+NLyEBd2BnO41xO/ayvkEHqtNabbJy24+9LcD0Fvi\nBPNzGy/Lz8x60f8ACw5G64EgP/LM4rz2WXzcgDAp0MmFYZAJoA9LX4izv8ofb6Zamt4+cs5S\ndt5HKMMivM1mMfzBunXNMW4ljyd24N70tgPTG+ITxoDM4dem0dak/wCFhTRqAJ2VTyDnH4V5\nS10AoGSMdTQdQZVAVsqT1PNMnY9ZX4iMqtuZ/NIwRnIPvmmH4jSKvlhyEx/Ca8p+0OCWLEE8\ndf6U37RL9nJLbQDwaYHq9r8RGk3qTlcYAPGKiHxE2xqqHJH3jnNeYteumzOTkcGomuCu0x9c\n4ai4z1yHx/IPmd2LL/dPH5VLH8SHjRnkKuc5+Y449q8jjv5ldvnwoBGPWoftTycs5H8jTuSe\nwt8SHZmAJjXqGV6avxKuI1LAoQeAzNzXjq6iw+UE+5Jpn2oqwyxPOQAaZJ7JD8RzlxJLuIGc\nEdDR/wALKto9oZ2Zj94KcCvHJL6WSQZJQ+5zQ15NuJQYHU+lAHtK/Ep442jRznOcZ7fWpF+I\nnmQDzJmXnOz/AOvXiLalJw275z+WKlmvpQo3sSeopjPaF+JTbXVRlOwZifxzTP8AhZDGFfn8\ntxxuLcn8K8Xk1RvlHmts6hQcc0fbHbLySl80mB7U3xEnkZXM7BB2zimL8Rt0nErM2c9eleMJ\ndSFSfNY+2eab/aEkTgmRlbpxUDse0SfEhpJDtlYnP8Lc/lTpviI80flq4PuOGrxD7UysZ13J\nIp5PqKsx3peAuXO9ueDyPaqEexf8LAMahy7Fuh+bFOf4jFpNqM4JHO7pXjC3jxwtuYsrfw96\nb9sm8k7ZCAfU0D5T2hPiFJbxh2YYzjBOf0pW+I0m4M1yq+kYHNeMLeS8fvNxo/tDcdwc7gfu\n+9MFE9jf4iSDdI7qecelRyfEqX5yjiQKdrbSQK8dmvrjzfk3B34PpT/7QMORI+4Hgn3oFqer\nr8RJFwC4jwchc5zUrfEWZZG8xgV9Rxj2rxz+0pWLf8tFxgY7Go47i4kbIlYgclc96dxHsq/E\naSRMxnagPJzzUf8Aws2REd4ZCO3zc15FJeTb9hfEZ6qKYLho4yEYjJ4Boug1PYf+FnPNlS7G\nTqOMA05fiE/lbWlJUnLbX5/L1rxsXZWHc0pkCfKQD1PrT2unZUYHapOd1SB61N8SJo5iiyyC\nLGQf8aqXHj+duUk3dyG4rzFb2TzCwZtp42nvS/aXZjl8DFMdj0f/AIWBOi/6zduOSq56U5vi\nI6ysQ2F2/eB5+mK8xW4eMY8zjOeTSNcTSZZz9FoA9Lb4hyfKFjOwn7zMc/lSt49fnzJ2jVf4\nVNeXPdSwqJCx+lEcrMzEklmGSM9Kegz0yb4iSN+7iIZcfeduTVS4+IFyyhWlxIONw6AV59Hh\nOckv35pzTeY2eQcYPOaQ9Ttv+E6uYc42uP726nN45kaMAsCO+OSK4bzNigqNw70k9wGVSvyn\nPFMR26+NJFb/AFu4MOnTFQyeNrppMGYrxgc5FcQ7PyqcnFLb/wCrLZ3H1NK9ibHbN42nUqpf\n5v7/AHqNfGd1C2A7MWPBPauOWTdj5trkcU5pGbAY4ZRk+tHMOzOwj8YXF1KTJNtKnGOmabce\nLJVcgSFTnhgc/hXIjZ5i8feojkXzmA+Xb1JpXGzp28VTR5kWSR5WOCxHA/Col8UTmYkzM6en\nQA1gNMJGB3Ek8D0qLdtUlOQDyKd2I3m8VzZw8j7yeNuSKY3iS6m3ZZgMYIBwTWC8yyQEM3Ge\ndtRsx3ggkIMDP8qLsR0S+KLlHBQMo6fMckUw6/LcZ3b1jLfMc45FYDM67yQSw5DCnLIvy7jk\nkZJz3pgdAmuP5uRKe4VjUC+IHVyWLO4P3u1Ykkm3Ixk0xN6ghmAB5FIDak1WW6bG9gepOf0p\njak8LBXdgvXr2rKS6XJycA8c0slyFPkuMtjO4/yp2A1ZNW87y1HCkZqP7dcMWLSZXptrOWXy\nwiqvy+lElw6lmwM+1IRpy3zPDsXlwPWqqYUlnkBJ4K1W8zcM7vc0vnQtICI8RgcH3poZbWSM\n+uB3q3HcbWySSMc1lNNuyqjjrTo7jdITg/d7e1IDWa853ZUj3ohfdMihwMDNZ63G51J6YxtI\npJmO4FR0POKANCWcSbiCuFPK0xnEbHd3H3RzVWOQbC25EbruNQvdMY9gHzdfpSJZZDbmOfmX\nqKcsxZ8nqeKqQtu+bcQx+XA6U1rgfOc528e9MsutcDb5Y6A5NVWZY2LDufvVXM22PGMZ5JqO\nSV2jGwgAUAWEmOWjaQuPfvRHcgkAKVI75qid28jqfXvTWk25cFs9D9aYzQ3KobY2Wz0prTFi\nvzZx1HpVOSQhEYHJ9qhMjbS+cnPIo8hMvyT7gpwcnjiovOCoV7Ak8HvVZZiyqRk4/hpjMe4C\n+1NIkvNIvmIyjcSOfamPI7jdIcBjiqy3HzBANmf4qaQ0zSYfKoakdhzSELjZn0qMF9yDA2sc\n07jzgw4GNvWoj32nIByRVBYcznazM3zHqPSkRQ+1c/PjpTdxdTgYNCseG2ZI6GgLA5ZnU4Bw\nKezeYhDHacdKj464xx1omyjowGWIxikHoDKQV2tuPUL6UqsQpB+XPUGgqVUtgiTtRJ82C55x\nzS1GNj2xyZzlB29aeTtYHqOu3t9KRi27DKChHC9/ahfl2rnPqKYD8NuLlAx7IKXc6sMrtc9v\nSmj5cDlM0vP8Z4NMVhUZhIrKu4Dhj60kX8W/5d2SKTjcpzjn86UrukxJ8g7VIDFyy49Dipzx\ntVCAp7GomwrFc80oBmKrn5T1zQUO2hWKlu9DFXjYk4K9qNvy8dFOaVjnHG5W5oELE0i4Zf7v\nSmEsFB2/Jn86RjtU9VHOMU9jtUDkjrTC3Ueyjy8RvtOfu0MpVACM5OMU3azTK3AUjNPDYwVO\nV3Y57UMTH7duVTg/xUzyVZeXKtninqMbtrbmU5Jpy4DFmXcM9u1SS2JtYSDcQ3pxS+Q0bMxI\nDZztFLGgZSXBwpyKNpPzH5gT1pjEX7xCAbjzj0pVkbdlvv8ATFSfwAAE854pNwXd8nHeqGKv\nzAgkKM0nLwOCoJ5pdm4DbjsctUuNrHj5cZ+vtQIjWMoEOONtKse1gzc+tOKFo8Kcev8AhT2b\n7hRQzYxj1qgGqQPujqewqeJ2TKhc+tRKzRkrjeD/ABelPU7lJBIw2M0gJy21V2jr6fyqVU+X\nJPA5xUPn5jUAfdbt607/AFkhXOF6n1qiSXg9Ru3Cljh2MjMdvbFCoxQ7cHj1p64aPLKSQOva\nq6EBwrvIeMDAqSHYmWzvDDBzTFk39VXaKcWG09gT29aBDVUKSByg6ZqVflXcDvqNo2WQhueO\nlOePayjOVx90etAx8avtJB75JqVSQ3IVh6VDGPMZ8/dxgtTo23cqOFoAly3zY4JH40ke+RSf\nu7Rk0wruQsTjnIxSFirBlftzTFYmRkyrISARxVvcCNu7BY81nnB5+8D0xVlZGIGVwR2qiC75\ng24A+XP3qk8wjqNv0qpEzMxCjYP9qrLMVdQRgj06UxFxWbbuDEt0GK0IRuwc5I6is6GRuu3c\nx4q9CSr7m4IHIp9BG7pSG4uAAvcd66tYfLTAPHrmuT0ONmxICRk11ipt24bKkZ5rORaJVjVl\n44JpEXaW7CjcOox9acpMmADt9axKAxqVEmfqtP3q3btxioVdlY7RuFO8z+6u096YiXsWC4OK\njbLYO3nHNOOdvynJprLuwF59T6VRLHDeGBPIPQUrZbKP0zUao6/MWyoPFS7WkbPGcUAMkXy3\nABxTiS3zdsVGuJ4S5GSDgU/IXkdf7tAC7iWQ9vrQyndycc9Ka0ILfKpwed2amb5R8oyT1NMQ\nzYsmfmx6UfN8pLZHc0ob5sUseVUlhxnmgaBcM5Un3zSLHu3MCQe1Iu1s9u9TfKvA5JGRRqMa\nsZVfc9aRlOcZxT23qm7qfSmyfMNxPXt3piBs7VQjI9KesWSC7EH0FNkYJgleRQzlskkg07CE\n4+bhs5p4ZljAKZ9qZHu3HnrTk+WQ7noAVuF5FIM8ZHy05cyDbu5601sn5e2adwJPlBIQZA70\njYYggbRiljUJIQvOB0pMBlG7Kg9KbAFBkyrHGelLho1xt3Yowp+UHPFKzBiOSF6HPWkFxnmE\nKDjPt6U7bldzDCH1prR4UAAkZqdV3KV6imBGsYkUL2zwalb522520znzF5yvpT9xkc4G3bzk\n0MBuMSFVGwdT9aaSq4H3s9cUu4t99gT1pB3C/WlqMf5gUDqRnkUgVW3IOB1FNlYyFSG4pThc\nE8n1oCwinzPl3Y9qMhyQGxikbG4t09BTlZUkyOvUgUBYdGyqcKM45570rbmxkgDrimKHGec5\n5FAHGT+JpBYVmXYAPvA5FG5mVhjn1pypj/aHc01lTaWXPWmMRlMiZAwcYJp33QoBzxSKvzYB\nwMUiKGXJbFMBxUYOw/L3pjYVAdmefypVY4I2/iKQ5QZJ78UEkzt8ig8js1Mkbc4XquOGoVss\n20cYphhZo8k/N1AoCw4oOMnDUjZkBydtOZRwdvOORSrt256tQUJxIvTJAoG6QqD8v0ojYO7c\nY4709Y0aPO/FAiNl+TGeQeKnLFtpU0zytsZAJY0scZjXI5P6UCE3HdycAUoJxnbj1NK6hlAP\nBJyKCRtZiDtU4yKABS8kTEfMF7d6dJ/CccqOaWKM7i6jGetJtHzFhxTEIMdQM0R7+VpYx0HH\ntT/LdlaQuFH3RQMTyQoO484yKmXAhHzc+9QRqpYI5ycfep0aBlbccbf4qBDtyMcbunWm46gD\nvkZoZvMRQoGPXHXFPVk++Rhum2jUBu/rk4XvSq2cFW+9wOKQxddre9LtE2GUdPSnqBKrNtwy\n5NOVlZVUn585xUatuVjuyT2p2D8qsMHOaYyX5Rkng56VGQVODjbT8GTcp49KTAGSRkAU9QFW\nNljORlu2aU71UDZn/aHamLdH52YDH3QtSxsIlzg+ZjOM0CBgJjj5gMZ4p247UDZIPA9qIY84\nLnlj2p3CSFmGR2piBVZfm98Gn72kmyB8uKjZg3DNhc1NJhSNg5AyVFCEx8MvmMC3AXoKbI3J\nYEY9qbJGSqv9wGiIA78kYWgfQf8AeCg/f60se0M2eBjikZTIobpxxSMxZlVlzigQ9V+UA8c9\nadjdITkfL2pki7mBBwo9KbIMzAjgelMELJcCPauMsx7elSSnnCjBPcVApZZMbS/P5VKyyRvl\nV3erelAgVNpVSfl6/jUm5N4bb8vQ01lK4Y/N7etOZjtxjYWHQ0FWCNvvFhtTtRDmRfk4x19q\nRQ/lqjHIzUseY1YgjB4NAhi452EHmpQx8vceucUxVVVxtIk/pTlzJgkYC0wB2kjxj5hmpVKr\nJ84LU1VCyDnPGTSvI3DZ2e/qKRSHhVVSA2cnr6VNCm5iwbAHBqOOIKozyW5FXoY1KqGBHP50\nmwW5PFahY1zz3q2qErlBzT4o1KMenpVlYyipxjdWdzXlIksw6gnqT1py2ZZmXdt9DVpIiccj\nbmpVjCqxIyM8UrlqJA0AhjUZyTgUGEMPu4xU0i4wc7j6UgUKuWOeeaXUdiL+EkLgCqd1gjI4\n79avTHqA2B7Vn30QVc/eHtWy3IkUsBmJ3YxzUXKkgD5WGaa+FkVBxnrS+Y0ROPnXpmt0c8iT\n5iR85AxxUmAyYb72ajjxJt5wBzUgRfO3hsA9q0RiDbACQCT61FAu5SXO3FShiuUbg9qiZccF\ns5qxD425bKb+O1IY/lUbuRzSRsY1Pv0o3/vdp9O1UBKqgZHfrSgpkfwsBzSeYOGHJ6UrKQrO\nxx2AoEEMhkkIOCOgpm5SxyeQcUsa7e3P96lYmIFsBlPVhQSI6E/OXDEdPSmuyhsOcSMKVpF8\nkIB3zSDO1lxz2NAirNbllCgsoU0/c25UODxSyyGMrhwePmzUMbbmDtxz1pgW/Ml9BRTdy/7V\nFO6A+QRudQSQpU9M8mmLvkyF/djOSxoVQpJBBZex6mpJHfap2beO/avmj1RiHb05zxk09rcC\nQDG4jnNOGGjbdjdjr2pPmaOPccnH8PFMVhrK5XdlVOfuU7aPMLbznGOlRf8ALYseCvWpeGQr\njBPO4HpQMYqheOnPX2pV27sB8leRjtTg23GDv/CmOu9mJ+RT1OOntQT1EVWmkJD8gcA9qkQn\nzFU4AUZPvTFxuKorKwOB70qxtlQx56kEUDsPXAViwyzE49qbDGJMrvw/vT3Mm0BU3c9RUclu\n8bM3VvvcGl1DqPUybNgzx6etR/OcM64OcZFS7QoSQZV+ppq5+9uySaYxjKfMPl/Oe/pSnKrz\n+nIzSlVkmJRtj46+9EO2STaRg45PvVJXJ6j1TdITjLYyD2zTFjeJmyBvIyaVWPlg7gADyO9I\nswaTaATkfe9KkYKyxlUYdsihdrTFmBx33U6AExjHzMG6kdqDvuFIVVIVvXmgQ1YSWOTheopy\n/Ko8peeu6kZjyCNrKcc012Cg5/U0XAejN8xMgzjlT2pqkeUA6nbnIOKcRiMKQCTzmky9x8in\nGBgCgAGArYYkZztY0jRhsBHxnnHpT2Ro2QuFBXrTY2ZtzHCjPFADQv77PO4DBB5Bp/mCRgij\naw65Hak85Y+xd242j/GnlmkkXdHhB1xQAwksxjJ3DHCmnfIVC7drY+tOZk+Yqvy9M1Hn5uTk\nLyaAe5IV2x7fMCNjmmqrx7UH7zP/AC0xSShZZFycbuhxTjlVlVs/LwBnigQk1tsYNJ8w9fWl\nBTyyvl9elJIJGVSXAC8bDRveLPJkOMnFBQ6FU3DA2tXpPg1kkt2RgQyjOT+FebLMsm0YJ3c9\nMGvRPAzeZH+84B+RR+XWs6mxvA6O6jVbdoyQBz25rlb23dVIJO0HOK7C8XMOWGTniuZ1RApZ\n2HbHFYXN2cpdttMrqA+PTtWcWVmOImA7Y/nW9dIfLbK8EY4H51iShmw0fT3qiSo67ZFwOfWo\nZMeY7HhqnkjONyHHPf1qLyg8bNLlSp5FaCRSlYLIoAxu9B0NMO7ezjkk4I9PepZtu1So+XOQ\nf61D/wAe8jZzlhndVARyMkcwymSf4vWoJBuMjY2jsuanVXZzuIbbyKjKmRf3gG7Oc+1AyGT5\nsJjccDPvTJFVo2UEBsZAqRmXzAgGQRnimrt2kMgx+tAiLJYLjg4xikO9lZNuTRwybA2FBzxT\nY+JPmYmOqQajZoRIPvZwAAelJL8qrtYY4Bx2p0OVaQH95uHGajGwcqcYPKn0pjFkkOQg+YZz\nuHf2qOTK5DEHNG8FSeeTkN7UmEZvuMRjvUkiyK8e7yxk9CSaj53BCoJ4+7TvlVmLAt7e9Kcx\n5wmxgcGncBuweYzKDjpjNIx2oORgnqTih9u7DHcM5NLJ82H2q3cAigLiLtYhdpLdc9vzpYY2\nZ2zHtGeopGBdBgdu1OKh2+UlFHGM9aYxoUIzgnPPGaau7yzgYIOTmnPhtuB909z1prSBsseN\nxxQFxVJf5dgHGc9qb8qkbjtbPFL/AKtNrKSAc7gf0pWVZP4cnHB9KQEe3LPwPds0CVVXBPzD\nikZdq/L97oaTywykbenOB1oGLlkY4XJYYFDK52qvy4HNB3EjAyKbtbcTjcD1oAGjyuCckHO4\nUrfNFtC7V74pUbah74/h9qZ/BkNhTTAWQJsDK59APSg7sq4zuHBHrSM27hfu+tPfLLleQvp6\n0ANbCvkcr1J9/SmKPMyM470r5+YfxE07y/M6HHrigQ11DqFLcjnNR5PVlye1SKo3YJyfXtTM\nrlt5IPZqQCwrtY7jke/amo53MNvA5GelPKiROGBOcUxd0vuqnn8KYMdukf8Ah3Z6t6UjMoZe\ncL0pyuIsnHyNTH4YcbeeDQAbioIB6c0jN86o3zFhuHFBj/eEMfnbrzQjFhlOq9/YUCFKovzH\nqe/9KLjMREkZBjxgj0NOjxJ8xG1epzTGjG3O75S3QUhMYMK+A3JGdwpQw24Pz56mnjAVscjp\nTAjGP5RxTAMZyB6ZoDLtG84PtSFT5X3sc80NtGGC7jjFAMVWaMsPLBB/iz+tIV2J1yM035t2\nWHHepAqBs7uMZC0ANkcNHgjCnoTRxswu0H19qBkqRj5evNDbsDOFPX6VYAWD4UHHofWlX5s8\n7X7CjeVxlNx/vU2RRtODhj1qWArNtXH8YGeKb88mPn3HGTSllWNcHbnv6U2bPmAxrnK4pCY7\nLSKD0A4FI25WyCc+g/nS8zKBu+UD6U3a3lhQercn2pgPaQqvB7dajaRWUbSeeM96eVCuxHAp\nF2N8xwDQUNljEeSp3HHVak/hVjJuGM4xQqhslSB9aaiMyhSNpBzSC4u47W3HZnkCmvtAzyT6\n07jfvKkmhVK5yNwb1piCTLcDjjhhxR5jMV35bPBakUMqHB+XoQaSPCNknCDv6UgANsLgcg05\ncbgW+UlelJtZs7h8v96l3lo8Y3nOAaACRfugjK0u5c7iM+lJuyoDDnO3Apx8sNt3gqvVgKAF\ndT5e4OMH+GkUMsQPUg8EdKZIoxlWwc0B9sYG7bk8HtTFYkDbcmNSXbjFIzHYFLYYfwj1pnz7\nSPut1H0pcnzkKoQMZ47mkMXzi0LKDhqc5+4F6Y5qPcTuyMMTyKasqxuQGwv0oCwSPvkxkgD1\noVUEZYHOOcetGAQxJ3D3o27QGBGD/DQIkjULGvYtyBTizIwYtgZphDjB2qw64pm5tu/HAPSg\nLErSbVyfm7g0kMgfnOCPzqL5fLBDZbPI707J+VgMH0700FizG25j/E/oeKFmCKcgkjk4/lVa\n4Pzcj5f60GR0wQwbPWmFifzduGK471J9oHlk87j0NVPOIVweRQ0bMfvADGcVIy6sijaCQDjJ\nqSO6ZMgHlhway9x6Hoe9O8zcuegXgUAX1mTYVBO4csRSyXQz8jFSOOlUVkUYBOGzz71G0m5m\nGeF6L6+9O4GuNQZ4cHBHrmkjkK5Z+h71lQ3RXDP82eBU0kp27Wkw2eRSA0Ptm7MYfJPJPal+\n0hVBJ+prOZ0C/Ly/pTPtDNGQpySeKANZZfMTdvzzQtx9oIH3cfyrLjkZl27tsgp8dwAxDEgn\nqtBJr+Yu4qXwx6D2pm75lIwQvGAay1vD5nAwvqeac1w3UHGelLqFjQN4itgLlvWk+07WDPkb\nu1Z8kwVCD+IpPlmUAkgjkHNAy4JGkLnJ9uakgmCcHkDn6Gs7zWB5fAHaplukVAVXaW7nvQM0\nvNZIzk4HY0faV2hWHUfrWcbr92xJJfpTVn8uP2Pb0pgaH2raTtABx0oa4O3cOTVA3QWQNnKg\nc+tNa4XdlQWU9RQNF+O8LdVxxww7UCbzlGTntn3qkLpOmMD+VOLHb8u3rS6gzS8zeDubJxji\nmrIvnHccGqEk3z7QQuO/rTGlEjEk4bt70xGl520tzt96PNCtjJ3Z4rPeb93zg9qeLwfecbGX\nsKALzSN5nXp19qk+0J5eM/Lnnms9rjcplLYB7UxbhUU72AJHHegDWW4Q5HUKetElykbKS3Df\nxVjGfy1HzYLdWp3nLIvzjK9Ae9BRrG6LOPm+VetPa6R2O09s1iedlB8xJU4xUxuFV+GIFMVz\nSVsEbW4PODR9vXyzHk5zWa053ZLfQ9qY0zSSHbgEdRSEan2kspCtg077SBtBbcay/tfvj1po\nn3SA/dFOwzVM+3G/rjIpVuBHHwfMJPP+FZbXTSKuSMoaY135kzYbahGNvv60hGo9wW3lTsCn\nG2j7YON528daypJisWXOee1K11H0J7dKBmnHeCVsAYpjXxVtv3snFZrXT7hs4UjGab56KpbB\nL9PpQI1AxZSB1z3pJpI4VV0XJ+tZkkzSRqTJgeq0xZ+M7SxBoQmagutm4svOM59KXzhKoff8\ntUFmWRm4w+M4pPM3KpUgY680hWLzXBbg8qB19advKnJbalZPnL5ZdW8z5qnjuGO4Trgt8wHa\ngouecysQ3Ab9aGlO08gDpVP7QssOJOAD+NMW4VmdVBx2DUyS7tVowCwA9qRLqINjt/OqEbFQ\nQ3VTxg0vmh2G9QgJ5z3+lMOhorMJGLHgdKRSywhWbvn61RjmULtB4J6+lI8wxjzMoO1Ai425\nsgLk/wB2iSZvMVjyAMbarvdZXG/y8Dk1Gsh+Ugja1MZYbEjcN7/SnRkbAAdxz3qEybZtvUeg\npGuPL3bQoHcmgdi0Gd2Iznj7opfOXbycY/Oq28eSJDnOcimTTEgsxGT3qQLDTFyCThT1pvlh\neWl2e3rVZpT3YdMjFMnkO4MTuyOtUIvtOskON+0jnPr7Uzzy0fAwvf2qktwGwu3JHOKdNIsj\nDA5PJIpXDUsRtIrodxGemKdJIryZY5XODiq7XGwFi2SOMGljkWNPmG4tyF96ALcNysQaPLFu\nw61Csgmm2EAcdc9KgkmWRcKNp6NUcTqsgQ9DxQOxe8wIvnLwudv1prSHaWXj1qoZgqtGykKp\n4yetK1wFUKByfXpQBaT93IC3OelOkkI4GN2eR6iqfnPJGP8AZPBp7S7s7g2GHUdaaAkkZY/l\nSPhutCr5Me1umcgZqGNgpVNxVfemPcFc4XcM1Ii+JPlJA5xxUazBjtzg9xVOO6ZpMKctjIpF\nl+YsxySevpVILFyRldgAuAO5pJ5yvO046VWjmRpmBPyAcE+tRrc7lcs3H92kBZ5fPzEp1H1p\nXbywrZBbrVOS5OQUG1cYPpTXmb7O+1dxB6mmM0PORm3dGIx1pfM8t+F3Pis5JMQocjd71LJM\n2SFbqOaBFkt5e5uWOMmmcbQOn8XJqs8w8sBWxmm7wAuST6tSAsvjqD1PNDXC7SSenFVF2Ruz\nGTvwKibc8hzz7Z7UxFyO6VHQbcsOjGpWuGeYs7Z9hVBZF3ZHyhfUUnnFSXxwRQM0rebdHIzH\nbjoT2qE3Cy25wec9emarGTco4prALtVjjd0HemBdjnHkiQHavbmo3vGZhg4HeqnGHBO1c420\nkkqIwJ5A4p9ALm7aSVb5e1NMnnycthMc49RVdZvldQflx/nFCbvL3N0AxikBNFNtzycfWlLZ\nb5iQvaq29VVQy8mhpHnXaCBimKxakmMsZ+bjpTOGwVLbcdPSoeA65l2gc4xQHLb1JK5ORSAd\n975duWp/mGSRC4zt6/SoWYblIbIHG6l+Zvut3607iZNvJYD7oznNI0x3HnAzzxUXmOSFxkr/\nACp2/bkkZU8ipY0h8jYYIp27jkU5mDHyxnIOT+FQb2ZE4+bPBpwfbk575aqGWlmyTt+7jFOZ\n0+QI2MD86pZMUeYxkMc4qfIKnKhcjAqAsWXk3NkHhaRJAVkkU4YDNVhtyQcg98UpbGVUcNwf\npTFYtu3mR796g9KimkMi4x8vQbfWoUO3PyZVenvUkbLKihchs5KntQJoSK4ZP3QGPWoVlIVk\nU7ucnNSSYaZnQE4HNRSP5cYIHzH9KRYb1ZhwfcUrsHkJB2IRgN2pc5kBGB8tRM25ecs1AhZG\nMgLDO5f4uhNJuLKqkY5zmiaRpEjLDHqB3pkjFVAwQGOPpTAQ7mJ4+XPUUH5VbB5PFJGpjdz2\nAxSbU5O7gj9aYh8fyyg9OMH0NN3Ku44JbPQ96au5vdgOlPDhG2kbi1BIjYRcgfeHTvSbtykb\nTu7qKXG0MpOcdBSSsNqkcYGM0FBIqnBJx3xQrbvm2+2BSGPd8ynJHUUnDcqdvqtJDHbQsZHU\n9fpTSTDjPpnApW+XJHCdvU0gbLqSM/WmIbMyGPCt85OcU9H81dzHaR0Y0nA3Njg96JEBG0n5\ncUE7C4Jxgnc3c0cK53g7R0J70M/yocdOp9qc2ZME8J1oHcarEvl27cUvl7mXcdrdcCkK7iST\nke1Krb48L8v9KChd398d+DS7yzEgZUCl3q2Nx46YPek2ncg9asVxNwbJbnjgUHEiqzLnHHNO\naNmUqME55INN3PIq4XC55qOohfvZIHzDpS7XZSfukYNI2/BKsoYdKNhZWBPQ+vWmMcpEjZL7\nF78Uu75SfyxTNrNtAxg9qcoBYq4w/b0pDBtp+UtkkcgUL90jrk0KvzAFcN3NACwh2ZsntTEO\nCgNnJJH8NLG7qp3qATzt9BSJIW2Z44yfelbd85J5I/SkIkRdvEYyW/WnKx3MpG1hUZYqqADj\nHGKYD1AOG680CLTM5jG4bTn86kVlWNe2etMDNtB++OPwpd4XoAyDqapFC/Mqkq+0gZ/CnMBt\nHfjdTY2DKxUDd3qZVJUcAjpmgRGFG1T1Ru3oan+XjjGPSm7gvAIY5xto3PGWDDCkimIcyYXd\n0+lBAyvGWpW+8gxkHrSSTIxBYEovBx1qgFCs33sgZ49acFbHqG5IHalSQlgU+4vTNKuYwQzY\nBPX1oFqKinaBwuOetKuQpYt8x/hpg2qOCpGep61I37zG7o3HSgRJHF5coVX3A9+1TN+7DjBd\ne+DVfcVbBPAOM0/zWVWBGf8AaoIHDbGvzfNkZGO1Iq7huXtyMmiFdq7cA99xpNq4Jzkt0qgJ\nPO8zDP1z970pW5Y5UhTzTcmNQQoBA7043G2EA9WPApgSq7hg/bofeljZnX5e5qBcsuQPlB/G\nnbyWGBgjmmBcjkRIm7MDTdu9SDjPU1WaTDbhkE8kU9Zd8TSZyWXgd6RI/adpVDhQenrVtZBI\nCBwMD5h2PpVSGQ/KwGMdc9qFkwzKG+UnOKBMtrK+1pMfP71bjkkkaMkckc1S87csfU84qx5m\nM9ucUx2NKGbaxKAYPTnmr0Yby8vwQM5rLhkWFgqYJPrzWrbq0zEE9RgUyTqPDzGbDbfkHH1r\noY8njGAPWsbw3CsakjkbcH61tuvVVk5XrUNjE2kLxgqeaXcB9aT+ELnrQq571mhodGNsuQ2B\ninjJG8nAzSYVV3dR3prANuyCPQUAxxIbkDb+NHIVyOMVGWeTavp3qWbHUdTxTuKw5Y9rhj83\npSeWAWYnHP40ozt3nAJ4prAMo5yelMBVXauIxkdadIx2iRQMnqKiBO7gkbaVcspDHgnlaBMk\nLKsgJbCY6etKrJjcv5UxmMb8gMac0gZvkXHrRcQeWSykjb/WgksuM8HtTmyyg9qQqvBYHYBQ\nUPVSuflwMUnmbUyy5NRq/wAwTnGeKmZmG7K7hjpVgC4ZQecN0FOYbQCvGD0piMBzt4AzThuO\n0njcc0AJu87dv45zmiPLfMT8vvTNo+bPPPSl2/MFHQUCsSbgFPGOaRlDKcfjTmUBcY/KmjK8\n7c+tG4D8lFyozTWlPAPyt3p/l7Y92cCmQqWU5GSe9VYkcT8wI+7/AHqljbdJuIymKhkbK7U6\nD2pPN3SFc8DsKAHqAigMOd3BqRgG2jOeeTTSflUbeM81FIyjIAJLdhQA/wCZWYr2/WjzG3bg\nO33aAo+YYK4XjNEcYKjdyMZNAXE3HIwME8/SpPmbcM8GmK/mYxwO1Sb1+7jnOakAkUFVAGDS\nKqKDu+lK0h3+tGxfLDZ3c8iqGEe2NTlQcc0gk3bgY8oeQe9DFWkGSF9KGwx3bSR+lIdxUURx\n7lG4jnmnYPlgkbSec0zIhU7V3E0RsGjCu/zHsO1MB+wqm49aTcNuD0PWhiyqQDlaAQevIoEh\nkIBjf58dsVKuNoGeMU2SJmYbQFWmLlsg+vUUyiRl2qqHrnOaQom1juzSMp2t83TnNM2tsODw\neaBWHqrsnBAHYUSAGPOCTSLJ8q8YY9D2oZzvIAye/pSFYcjd8YCinO25lIO0d6aJsKTtxnil\nZRJgfdUdTTGMeQq24KRmnTMxkHIAHPFOZ9uRgOpFIuPLOevYYpMEJ5nykqvJOKXHmLywTb1o\nQ7lTPy4PpSuw3MhH40XGPjbDcHcPakUk4J4wcgVHGfLm2xjKkdTU0agDLHBoERM3X+EE9amh\njZfmVsAj/JqNT95iCfQUJGzAurc9xQFh6ZCMAd3NPxtG3I96TeNgDcEdKa2C4IGT/epiEjK7\ngxBJzgVO4O7H8PXFQupY7s85+6KereZKcLgKOSTQFhI3w2WAAzxUu1QwDLy3IxUe9JMOwC8/\ndp/8fByewpkjW3E7Rzzj6U9VJTG3J7YobmTJG1h1pBGVO4ccUihI0ZpNoOCexqRAyyFE5+Xk\nVGwZI1ctk5pqkLGVOQ3amST+X5kgBXBHUCnxymaY5G0KMDioVYn7rHgU94ZSF/gB/iJpjHrF\nncckEH1pWySA33aRItxO1iTS4IZSxGOmKAHKF3bdmAecUskfII655oXC8YYuDkNTixkj3KO/\nSgVw81lXcBnnv6UrOzkZ6HnrSs5abBGFXqBTMK6nBxk8CmNEiMiyKzLznFPkHlsxBwO1Rthm\nBY4IFIrbmAbluvtTDcfu3KiuSecmgKquWV8qT92iVlbgDoOaBhVRunotMCXc0iFc4I6U394p\nDdexpVwikgZYmlkbPQYpiHq2YwQfmPOKFXaBn5nPSmMp2r2ZRSovG7r6UgsSySBVIUlJRT1J\nPAHJHLetRM53qMcHrTvn69FBoEPdQq5Y84xxUbMPLDMMjHHrUg+VW3Hk0kSo6tvyExxQII8S\nRgt1pisFYlTgnin8CRMHPal2/e2jBU80DH/MjKO9LH80j/ypMbpFJbAxSbgshGetPoBJH824\nsCDjmmrnOACfY9KVn5IJ2jHFSBizLs4HfikNE8KgsOclRuwa1FGVDE455FUbbb5jMR8pGMd6\n0I2LLgY2jpWUjWJcjA8sIOSDk1dizI3oo9arwRMY/M6Z4qVmGzJGOwrI3H/MuVbrnpUrMQV9\nO9NjUNgNy+OtKzgLz97NMY8x7VwvzufemvGfL6jPcU7crK2BkUyRWkCYH1pCtYqvlY2IPOap\nXTOy8nKf3atXIIywbjPSs+7CRyblJBxW8TKbK0zBuBwemajfcqqNw29DQMltzZ5pzbFUMeVN\nbxOZiRyHzAvTnnHpUjyFWePHzj8qZGwzvA6dqcWWRi2dr+lbkMSPEm44JY1N8rHYVI4HzVGs\nhjzjgEfexTlkxkL97FUQM6MSSNq9KXd82PXkGlj2qMY+c8nPao2jLNlj09O9UgJo12/vCMc9\nM1O6lmyvIxmoo2WRTIy42jinbs7RnG7k0IQNmNWKnaTRHhYyrAq3v3oVRg5U5zQq7t2PugZ+\nlNEiLJ8pbbnHGfekbdGMhuGHpUyOxgVUx1ycjrUewSK2c9eKQitMqyR5IwfWmDY3B5HbtU2w\nLHw+DmmSNv5AGR2pgHlt6rRUfm/9MjRSA+RoVM1w5IwtPDMFKnkA8+9LtaOHKfMC3epPu5yP\nlI9a+cPVGMpkdF27UP8ADTeZVY4Kqpxj2qSRfmU5GQOoo3HcWB2vjG2jUY1tihVwWXqD2pWx\nEoWJevU0KzSNtdTt9SOKWWN41UYABPHNArERhI+dXIP93tQCWX5FO496mZdzHBbaPbvTI33b\nto2nHTNAhSpKHd8p65zQ2fKjDkA9Ac800f6sAvyfWnrtkUMyYkTotAxjBlYP8wReODT2ZxIv\nzbgey9aJVYfOpyW5KntSJhmBUY7GjzAMsEYsv157UrL5aqmMkHp60jFHjZCpYDoBSNIPKXa5\nVxj5SP60wE6ZXG05zinffJkzlF64/nUkkm/GSM4zlahaMMoRDs8wZOO9IQ1Yxy5GVbpSJJty\nQmF9+uakjVIyqMeBxilMaq3yqWbPAJpiBpG3bypJx97pTFYRqGJ2s1KzLnYGOc8+1I37yQMF\n3KKAHEeX824MT+NEgDR7Zl3fTqKasZZiygAZ5an5LZ2KysBw9SAmzZJsYBQRgCiOPafl+QA4\nz3pLclskgF+9PLKu3KnHr3qhojA3MzEb1z0J5yKVlOD/AHeuKV1C8gbXJ+6etB2rKQHxxyMd\nKBCLOWwjjanbHrT4f9YysC3Gd3b6UxotpMko+TO0bae3yxhMkp6CgoY2IkO7cyH+6OQakKmP\naV+cEc/SliBXkIQv1/WmJhHZ9nscmlqSw3Fc7D2xkCkO0sfnxnqw9afCrLISp2gDJA9KXjGO\nNud1IBmV42puLHofWmyI0KkA855UVYKhssD8pHGKjXlWIwMetUUMgwoB3bnPQnt7V6T8P3W4\nZW7MM/TtXm0anyzjv1+teifDeMKqIGBdvnznoPSsqmxrT3OzvomVd27C5OB71zF6vmRSKpyz\nciuxvV8yNm24HvXJ6gu2c4IWsDpOcnjUKwfOQMGsdrFfMTBKk8AN0robyJAzllwT3FZM2PMG\n4/KpxVohmJNA0Z+YZOcYHrUDNtny7EE8bcVp6jGqwlSWUltuBz9DVGZVQxsxBIGDVLcWxn3S\n/vm2n5QOGqGPDIBu3se7dqty/wCsOVxH61Du8xhhVCj0HatBET7YVLOpJPGar4JcbcHb2NWH\nRNzbjlD0qvKNj4XhgOKVxkSq+5in0z6VG+3aVBxJ3qXzS1ucx7PQ0xNssqMwGVHQUw1II/lz\ntXI7mmNuIwARz36U9ZPmdVG7cTgelSOsy/u9oyFzweDVAQSwn5SWG3sR1zTVUb8HkgYGKRcn\nBXp39qP9VKGbLHGPagLi4CyHI3OB2qHbJ8r8MCe9TKp2kBg46hRUePMb5jz3X0pARSyESMpG\ncnGKc0jKvzcLmpCoKcY+U9TUfVj/AB7ec9qYrBCx+bIGM9fWk29CpxzyPanr+7UhuS3ORTFZ\nlPzncen4UxjpE8tt6tt3fLj1pGjZNrZyT1FDSCQl+oX5QB/OjA3BXO4H34oGNYYJ5AHb1ppb\ndjv3HHSl3BmOcenvTmUR4wcHoD2oAac7txBKnrRIwkUA5TaetIfkbaTknq3rQjAcO2Tnjjig\nQjEmPzMZAPaov+WpJUqxHJzU+4AMjsME/eBqPaFXb94g5LdsUhiKFwGVsrnHFIzFSWxuwalW\nSNGcJwrcBfT3qLy8n52yem0fzpki5Dckcnr7CmgBG2ooABzk9KczCMfdy3ShmZVBKdaB9BvC\nyZEgKt1oRdoY8stLv27VC/pSndzt6d6CRqq0uSF/+tTGbyXG08ngipVkIwoBx601lVt2Bt9c\n0DuN4ZQU69CtEjFlVdgAzzSOwbbtG05pzYDggbvUUAN8kfKEHGc5z0oyqxsRz2ojy0mFBAH4\nZpVzknbgf1pMLkcY3QlAcjOcelOkVtyrjJzkmh93mfOvlnHIpVdmGR8v1pgRLjc2BtAPNK0m\n35gfl9qc30BU8NTFjRYyADjPUUCHM2xv76n+GnbkcAquD0I9KZvViABuA6+tNaPCkKMBjnOf\n0oAGyQdgAHQj+tKHVVwckUzIVsFcn9KNu3jnHWpFcVsyLyAq98GjbhQwPSnMobAVcNSZBkY9\n17dqoBrN5ihAMnPJpXULjaoUZ5ahQshDMdhznP8ASmtuVSWTKlulAx3EmUI3LnNLs3ZKZLDp\nupWk2nO3pwAKNx5ycjHSmAn3uJF59QaY0gkfbwdowKZGyjGQduKe0YWMME70gGspbeGG1cfd\npyt91dhAA9aQEjGTuGcU5Qy7hkBj2zQITb5zSAOBj+LtSfMsQ54zzSj9wpVRjcfmoUDdgHOB\n9z+tAxWfMvH5Gm7UXOPmJ70pyzZ4U45pIkYr82FFAxWbaVTAIPf0p29WZwudwGPrSK3zbI3B\nLdmpili27aMjj/69IB3+sUbVLkDoDyKFzjduyAKapZZF2/KM/NTo1Ls4XJfnmmIRQ23LEp3I\npBGjKecgnvQjM0OGGMU5wflwMLjpQAixspP7wKcYOaNvkoMnJ9aG2sRuXr3NKyhSuD83QCgB\nv3TuDH5qlMYSI7cAe/c0xmEfUcA8sKRl2oCPmy2aAAHa3zfPx27Uv3PU4FJK23BAxk04qGHJ\nxn9KBgvEj8/e6A9qaG6puII5pbjKruIw2ePU0mV3B2G0Y5oEKnmbg4CsDwPakYuGZdm49OaD\niFnXPLDK+lLtby1ZjlPb1pDEVV6lvpQ5LL8q7VPHNKTuXnk0L8w+YZ9yaZKGbQzAnIbGKVU+\nUsSvfC5pVjPzZU57U2RNq+Y6YOOKB3Gncqrhcdiae2V27eWx1oXfxnkY6GmyK0fIzn9KpDFd\niY8nkd6JCjxhdmCeaRmO3GcetOYnIGPlxikBGrHGFUPxyM0HbKucbDjnd2qRPlZiY8LjGKbH\n+7U7BnPBzSERljtRMc57VLu6KTt5pq9dsabWHNP2lYyXGWbpS1ASQMwLYyB396bK2Y0JOHzz\nj0pyRhlCl+h5xTWyu4df8KYDP3bxNt+5nj60rKpUYfLZHWh48INnIzyKSRNuN3IBzkelPoNE\njfNI68FB/OmhSFx1Pp6Uu5VDMRhX5FM+5hjyfakIVpJGABHA4p2d33jgGmwtuDjnd1AxSCMF\nDlsMOaAHMWjXI6Z6UnJBYnnsaF+ZvmG1scN2NJynUZ9qAF85urfNnjFOVtuN2eD92k3BuqjI\nobJTKnBJ6+tAxWYGQgHn+lAkYfunbgcqfSoz8p2hee59KR/lZVzlSfvUAT8soUvjmlMm1guO\nOxpjYbcMg9h703b/AAgcYzuoEO+UgpuwSetAkeNgrH5elMVk47KOtG5GU55A5FAybgqRjil8\n4Hag+XFQSMeB3PIx6UrN821gduPvUgZKrckk59jSRzDJJJJz19BTWiUYffuPTHtTWbhztwvp\nTESLMxwc4+bI9cUQPtL5G7nNVpM7VBB3H+VSxsDlC4Xb60DHyMWxuBA7DNO3bpF2LlgKiEJl\nyd2707VGzMqgFMN35oET72Z8luM1J5nm7sjlRnr1qsUEny7stjmk3r8oB3HOAKZXQtLMqqpG\nWU8k+lL5gbLlwyqM4zzVfcPMKscD+9ULKqozqN2TgjvSEWWlaZXGOf4celL5mxdp7dOarLmE\nOf71N3Mq5Azk5OafQC8swjbb13foaRZBDlHJZc5Le9QLIUHzKNvZs0370fynvQIsecp+cChZ\nhIuXwM8CqjrmPhsn0FLwoUHv0FDAttIkYCeZk0zzlTaxO49jVbcOrY9KWHldvHy9KQyx5xjk\n5XczflSbi8mehpkZ434yfelbc5yOM/pQBIsjLERnAB4pftG6R1B+RRy3vUHzLhSeKYpPmPnh\nW70E2LHneWxcnODgYpFkDNhkzUacK2Bk9aVmfISX5e4YUgJlBycr8h7U7zD26VXV9rspO4Yz\nmmtMR8iKSc9RRqMnLKz4JLHripJCVUcjPUGqjSCNyCOcZ3UnK7CpwWHFMdkTSMcYVc7jlmpz\nyBmTByB0zUPnHaQxx2+tDSBSpxlOmKBWJ5nMar0Lg8gUm8MzFh9Krc7Sdxzng0b92D97HamS\nT8BSW+tCny2A+Zi3PWo/MJzt+Un1pPuNy26QD1pjJWnbcSh5HWntMihip35HXHSq/mCYjBCe\nuKWRgq/Jwi98daAJ3mEzbiSCF6dKRplRcnk5xiq7N53QbgR6UjyFhjgLngUuoE+5CwABDnrU\nPnFl5+4Dge9JJKy4JxjpSFuQMcdcUwJGmwwAG1jUjyKg+bgY61WaYyAYwMetKxRskDkHGKAJ\nVlAwSuc9KVpGWRV6561AZMY2Kdq0zncQW5PO6jYRZdip+/xmk8wu3Jyo6VWGFwHbAz1NO4bh\nGJAOQfWkPUlmkLADv1yadDJ56cHLD1NQ/MzMzfKKa22NgCpZTydtAFhX3sQjnHc0NMzYduV6\nECoFUK2f4e9Ku1jkNtB7UFEyuC3zuQCeB7UskgaQEfTrTWQLhR8zn0PShm8uPgBiOTQSxzPu\ndR0PoKGYL5mT82OlQZaVfMUYB6ZNJG374KwOMcn0qhkoY7FDfjgUI0fPOSvqKZw25e45Bpcb\nVLovB6g1PUB5bap2jO45z6U2QM0hywCsOSO1MZfl6YPp2pCo8oMBtPQrTESKx2hccdN3rSmT\n5Tu6dM0xsO2S3y44xTGf9wcD5ge1ICWH+M9EXpnvQrPIhY9P7tNblFJ5pxk8phuJA7H1o6k9\nRrsvyfuz1xzSs2ZC2AOMcUjTeZIVxjjNNUL1b7uOnfNUA7DKoJwzelEm7K5/OmKo2lh97sKV\nD5iEj8j60DBVHnEFsMRzjvTliQLhiS/b2FRqD0IyehpVcAAFSTnGAeaA6gfUchT60gYZ+5vD\ndqVctkAfL/eNORQUGz/vqmAir8vHUGlZt/yk4GO1NUL84PB9aUnzFX5eR+tIBJVKoo70nTbk\n/NnIFKMsvA+YnHNCqA4YjLCgY5cqxC8Fj3pFwW57HBWhe+e9DRmNCOueQ1ACcqzIuADzg9Kc\nqhWG3GT2oYgAll5FRsMMCDgmgRKrAyNz06g/ypu8om0KQGOTnsKV2Z1GQMqeaPuyFmbcMZNI\nLCowkkJAwBxigY2geXkE8461FGTtLdjz+FP3SSRDHygnFMCQbVUg8E9MdqXc0e1W5Ibj6U2N\nTHEEOMqce9PfDyEnGKOgiRnaQsRtApG3LnPVaTanLg4zxims27jeQSagdiVZGWMOoz60CXfg\n4xz1FMUiRsA8DrmjcQFyPlb+7VajFEzbShG3ufeo0+dW7EDPtTj8rFWXYwGAvc1HJvjJ8s/j\nUjHRyMWYkfKOM01cFs53beadKQ7OqthsZxUXBjAztPdaoBdu1hGw3SMcgZ6CkMp+YH5kHtRu\nVWDHLenrTQzTNg9z1pkiKx288KRSNH8qbOT3FKzFkxjABxSKoVvvYz3/AKUAKu9ZMng9B709\nGIbcAAelMOWXazeWC1JtU/KjZwe9AhY38wHcfm7k0NJ+5ICbucCnbcKThT6r3piM2w4+Vuoo\nGCqY49uf3n8hSs4ZiwyoIwRQMt97qOSaNy5HG6gXUV15XjIxTQDwGABznNHmFZgrKSPSiVGj\nJB5J5GO1MLDk2I2NpdSc49KRWG1l2GM54LdKan3fk+9nnNO3K3zg5/2aADa0jgMdp9R0pdkk\nYIY7m/vUpY71MgxkZpNxbc+fl9KQxuWKkj5m9KQsPvA4BHSljYDDA7T3p3lBgzk/KO1FgEAG\nwBvXOKRR8u0Zx25obYuAXLMemKIodv3zg9qBD4k8uQnqGGKVQdoj3YUUxWPlksSMcDinNGWO\nEYDaehoExq/MSQNxzT/u7mPBJximMJB04JPQdqcxEy46MOCR0oGKyrx8+cdqdh5GIJBC87aY\nSjSbGG7jtTost82QAvamhIFLSHLDj27UICYyeCc/dNKhYK56jNJ5ZZuhDYpFD/LBIYcEe9L9\n488luBTZMNDgKQe7VITuVVHBAyRTJGBPLXaNxf07UoZdxT7pxQdyyLt6H+GlKqjFW55zj0pj\nHI5+Qg7TT96tGQR369qawJk+Ucqcbu2KdvQAoF3RdcL1zSJHRttjAHNTK6AKF6Hv79qqxy+Z\nIwVdm0fdp8LHAyvyE/r2pgWNpaYjIwRj8aRcnCnLBTjNIqg5APz57U7ajHC5XsfrVAOVmCtl\ngApz60MreYZE5T+VDLvGFO1+n/16dLh/4s45OOMmgB6thiDzuGRTs7Vw65phZXxxgjtTwy7W\nyDuoADDthDZBJP3aegcZHRcZqJWTrg4pQpRmBbJI4FMgljzsIZwR1G6iTMbYxk8Hg8U3aI1x\n94E8g1IzMFK/dPYYoAJGOGAwDnHWmgMBtZsYORijzN25ioJ6inDCqolbLHkL6UxEpwqFi25e\nuT/KnNNuZSFyMZqBShkYE4jKng+tSwyDy49uMZxu/pTQyR5PmT+5QWjSMnoSeG9KY3+sfzE2\ngdKbvDKQVx3piZK2ZGXfyPSn7fMkVsbWX+lU4mHLBsKvUVayqsdvzg9TTESbhJ984Geq0+Pr\nhl47MKhXy1g+XcMetO8tpFU43E9ge1IRNtKxhBkkHNWFkfIXK5PXNVowqsMNtI7k9adls7if\n0oGa1q3GXUGQHjFbFmxjwxHDH8qw7Wb5cnBOOPat3S2EnlLnJzk0XJZ3OiwiK1DFT81aDMFy\ndvJqG2P+jJkYA5qbJZRvP0FQxEfMSgn9KkSPgkHaTzimrukkwBkDrUu3uT81SUNj3MxwPlA5\nxTyhZVC8Ec01V2gLnIHWnyNlxjjikSDYC5HHrTVZZSVYbe4NO5VSSwI9KaVDhXIwKYAmVYdw\ne1SK6KCuPmPSo5l+dSvFKy/I3c4yMUASFRHGTnLU0MVX5xhiOtL0xkgkc0isTJubJXGaNQGN\nIWwMfL+tSR/PhcYUd6i2Lu4OWPNTqrcbcZFMQBfQ96cwLLtBz3oVgzPx0PJpFP7zGcHFUhCO\nu5U2jOOppx3rg7vvcU35ZMjOAKV5FbaFHOOaBoUZXAHQcZHenswjYcndjpSLIEUjbg0jNmMP\n3oELudk3Ecg8U/78eQ3zHqKYqnqeAe9IeWBXkZ60xkqv5a7Wxv60u/cAR19KjYBpGyMnFM3s\n8igHauMUCsyVd/U/lTgqmMkNh6YuEYjOTRwqnPU1QaiIQQVJ/GnN1JUD3xUaofLJxz2FOhDS\nAgIzLnkjvTGPVuBj5if4aJMcDuKlSJyzeWuOOB6UxY2YvlcMpxS5kVyiBG2k7uMdKXIjXavP\nGKkEflx/MOe9I0DMMjp6UXDlYwArhmXA9Ke21lyODVlbOaRlHBUjI9qX7C/Py5A5pE2ZTYna\nCQB60rAt3wPSrFvbmV8EbuelTzafsYHOSaLi5WZ/lhm459aagJUquQffpWounpIpDZU+1M/s\n3yeQ3B/Oi5XKZ23bgs2V70+RlXIReat/YGkjJUfd/lUkWluTuDfe6ZpcwcpRXLYOeMZxRksR\nlOc8Vbm0uSOQkHcDxwaRrGRecfNijmGkVgrpIys2F60i7VyFbLGrP2KWQKCDu9akfTn7JhhT\n5hWKDAohPUd6I5AqhM/MfWrf2OXytpX5s0g0+Vs/LhscZpc1x2KrdODnmnrG24hVyxHT1oWJ\noRtcZb+7jmnBmJAKt16UyeUjKgkAHLDqPSpThu5x3FNmjaKZnbJz6UyOTj5Q2aYrEi7o22/K\nBR5QI3P69c01vkj3N1JpAz7Srcg0yiXhcnO4e1DkrCXYbm/u1HtUxjHAobbIvAPy+p60APEa\neWA3J+8COMU/auADnceah84NGzE89Kf5z70AIHGaROo+Ftisr5JJ49qjDnzMeZtH0p6y5l6A\nse1L13NjK9OaAHeWjFjnJXpSbTGuOpP50MQpyB1pjEu3HHHUmqGSK2Y9qNk5ywpwZCrImOOu\nRUS7uFI4xnIpWDMwyfmxzQDHrEWUscY7L3p8eHU76auVYL/HQsbMxA5Hegiw/wC9ECq5wcZp\nGZtuGbPpinqHwAD8ntTN22Q4FAh+AIxk7snt2pgJ3qSMmpIpBtCquc/pSY3EKp3OBxTKQoZe\nVHDNSsjFRufOO1AVW2sflZeTRt7k7CaqxIokUA7fvA1IuPMUhefegqpk2oQzAdaTdwozznmg\nZIz7ScEM/U/SmJh8c8ZyaVtkjfKOc8kUHEYYKMdqLBYlL/NuC8Z596JDvkPljaeuKbCu6P5m\n59KI/lzhWzjFACszbgAm7PU1L8vnfdxximoGzvbkAfdHWkjxjJUg9aYIXBVjkDPpTlZTkMCT\n60EYwxb8aVl2sP7p5plWFXgcHJoXdKp4wRRs2vu6A9D2pQ23LMcHoAOaBMFZghyNzDrTv9Yi\nY+XmmLG/3hxmlUc7nPToBQSSum3kH2zTY2k8k7hlQeTT1PB4yetC7W+YLhu+aAsLuDEADHqa\nczEBlP3T0qKMt8+RwP4qdu3bS/TNAWEjUq21fzqZZCxfJ7YprkmQ7F46inRjHUcmgA3BVUbc\nkDJqT5ZOCMMcYzUQDJJtzz71OjeeTt42jrQAJEZNwJGV70+Iv8seMYPJpqxvGuCvJ5qSNnM2\nWAxikxrctQRGNwccE/erUiUbjx8g54qlbxiRRuyo7Vpwx/u9vb2rKRvEsbjuGx8KvapGmZQp\nZd3OeBTBHn7oHTvVhV2qox0rI1E+bcO2eaeNrMN3HuelEeRIGc5zxto2fMcjgHpTKEDKuV28\n0yZmRFwcCpo1BUnrnio5pEaMKUyV6+lBJnTSn5iRleprOl3by3A4+Wrt1s3OqnAbnAqjIvIU\nrk4xXTEwk7kRZpME/wAPU0m4hgFGcnpSrho/kI25IP4Uh+bnv6itUYXYgmDKVOOtKI3VTtxn\nGBUahVb5CMGpYnZgwNaIlj9rR4beAuMFaZgqquORmo2w2MEjtg1YZSy4zha0M2JKfM27R1pN\n37vA6g8/SmmTbhF+almIRwRyuKpCuS7l2gdyaViGbaeD2xTNizKMcYFJCPmBP3loAljkffg9\nqc8jMenlp396b5nO5hjvSltxVyMCncQ6ORnbsFxxxSndGRzxUo/d4YkA45WolUuGcnIHNMkg\nlQhWJGajkj+VTnDVajfKgYzuqCeETBQTjPemA3zX9KKi+wN/z3/SigD5RVfl2yDA6kUxVPb1\n71ZZB3cFfQUyaMlchdy91r5s9Uj5U5B2t/SkWOTGVdduckjk1LLHu2hQAcdfSgRn92pwCwwR\nSBjZN0mCW49fSiNQ3LHzMd6kWN/uKoYCoNojkHzZVzwaQMNvUkHbnpmn7SuAyCUjnAppUxzN\nGPmHrT2d3IwduOppoSG/8tGLABsdah2NGu/OecFKsNtkbDLgt0am+TtZeeelAyPY8dxt4PGD\n7GhlG1AziNs8sKmXDLyMlTgn1pjRpCPm+U9mpgKV2ZZRuOcE9jSNhlB8vjpilIU4MT5Veo9a\nbub7zZI6CgYjSOMKYwFHSlRQFUnlQOTStCdo25z1oQFMjqW6D3pCIlR/MOANmOM+lPWMeWCz\nYCnO09afDGQvzZw3P0prKUYAc5/iPXFITHbWWMsUG9ufXimbmVBs4TvToj5ZYKM+xoVV8wcl\nlPVD2NDAauyMlQpDHnFEhaRRtwpXkin7TyOrKcge1G6MfMUKq38VMQ0qcptPB5zQQPM2kkZ7\nntUgZduxTnmmyQnzAn3ge9MYzdtYhxkL0I703zTLuCrsLDBz1AqzGFwd33V496reW7u8kq7T\n0Uj0oFbUVhJGNin5O1OVXjIBfc+KQMCoDHgdzSvtdlAfB6hqBjlk3SBXbaG4IFRLGdjRhuFb\nvViSHzEyOGzmm9Ae5PtQIb5A3bwdrYwB60i/IvI3MflPtUjI6qoZdytzu6GjZ8y5wO9Sh2Bh\n5ewDlhxt7GhjHDkEbmbsKU45KDgnHvTmyuAoyq9T71QyJSrOcEoV5IxXc/DGULckcHexwe+K\n4PyzJnb1fg89K6v4fzNDfABgdh69OfSs5q6NIbnrN5G3lsoOTjPtXI6oFL/MPmxnIrsriN5L\ndCOCVzlf5Vy+q25HPGcc1zHSc3dfvI9yt7YNY06tIz4wRGcVszYjLgcjrWNM3lNIU6vzxWkR\nGfdZ3bsn1xnpWdNGTb5H3M9K1ZNkyBuh6HNUJowsjBuF7Dsa0EU5FdDuJAXHQ1XmZFwUO1m4\nNWJH2szFf3gHANQrtMbfd5ONvpVILEErBcR4DFurVHK5LIpG3I5IqaWKOL5cFiOcmq8rCNlK\nx52jk02IWQMclxtVRgcZqpIxysgG3tx1qeQkLgHhucUnLYw4zjlcUBsQ7m8wOmPQg00xja53\nlx2PvT1T90yKPmByWpJFWR8Ac4y1AyFsYXPDd8d6ZCRuKvllPY1LMpVg6chhioeG2fNlh6UC\nHKpRiqcKO9JxG2ZF+bqG7UbWLPnIXqV9aayq23PIHPHagY2TDNlvuk0jY5UcA9NtSbRtw43K\nTlcUisRKzAZXGCtMCNiFZSw+Xofak2CMMR/+sUECSMLz1okXG1T8vPNAhqx7QVDcZz/wGkeM\nNk5G0cipFYMQoGwD5cGo2VtxUR7ueMGlcBCyxLk/Nu6n0pWVFYEFjTlG5sEAHpikaR2Uoy52\nn7y1QXG7g20EHeRkUq5UhmXA/u03AVV3fMM+tPZpNpAGeeFpBqQrjcwbjnjikVRGrAlmZqJH\nAZlMZH94+hqRcsM/wg4o1AYX+8vl5JHSmt8+xl4IGNop8jDdlW4PAxSK5jdXxz0xTAZ0YDYT\nk9aVpRN8i/LxStndkEg5JxTWYcHGO26gQvRgGPIpNw3Njg/0p25mlXgI2PvZpFbczOWHXGaB\n3FWTaoPLL16UxmAYljwwp7ZCsqnJNIxLKc9MYzjpSEM4CjrwetHRiSc98etC5WL5sFf7xo8v\nqGHXpTAVJGuJA/OWH3cU5ZPlwTgA9u1Rs5VVGNmOAKcxaHBIyOpxUsBNzFeF3rnnPWmgD5up\nb+E+lEjh/mT5S1KGKrk9PSqEN3MqlWG5vUUMzx4XCkHnGaftG3J5J5xUQVeSQVIHB65oAduV\nn3A7AKZGypEcq2Wbj296Gxuww4z1p25fLLH/AFfTd3oEJy3AGex/xprbx23Y44o4bbg7GzgU\nK3lswDYfptosMMyO2Tyo7UmwhGJbAP6Uxd0anc2FPIPvUjYXGRvHX/69AC7h5e0cHGMn+dIE\naOH72FPAPqaVRuywx70xY41ycHPqTTGOXMLLu+bsTSLyFBOMnNKqHcDuLAHPPSiVgQGAG3pi\ngQ37sjgr8uPu0fewSSCo4pWA+Rh82RxUjMHmxgEheVHekBDw7B84XNIwCszbcmnYIG5hhf7o\npqkCTOTjt9aAHLgsD7dDSIg84vgDIxycUcn5pAPehvvb2+YdAaBBt+bAXC9eaGkIU5O5uuB6\nUmAq4yWbrTtpRiB+tAxA3mRkhQB6nrRgKpBbnGRihsoxB6Y6UyNWRgVXcCOlACw42jjNPXKq\n2G247imhTtYk4Oc8dval8xdvt05pAJHucZYZUijaZVAzyBnBof8AdKFDYyeKGR9pcHheaADC\nmJA4IYjP0o8wyLvx8w4H+NIxPyhWwpHf09KcduQuOO1MBWQxqGY5H1prNj5jgEdOaa+WG3BG\nDQxO3PXnigBFb5N7Ek9hinFh95lwMc0ct2we/PFIrKPvZPseaAHNJ5ignk9qQuSwJ5Ujv60r\nN0KrhKTeWy2Mrjp70DJNrTfNnaQMYNRSM6wrGBkZzTlaThmFNVtrZzle1AhVJkYhThhTTvZg\nUxx1U0uMcFevU0MqsoUjaR0WgBGULypMjtwW9KXy9vPmZ7YNDP8AxeoxgetLtPBIHTmgVhGU\nswViSuc5okDtja2EzjmmK2GAO4n9Keu7zTkBlI4HoaaGJtEZIxk+tK0Zj2EncByRQ0Y8vAPT\nrimNgtkBvSgB8jfLnOQTQW3S4HSmFm3bFHy9OlJjyx0wOnvSGSFz5ZAHzg9aVvmUEDJprKfM\nXPA9aG4BK8gHGaBCbe4/iPSk3Krbtm7tTVduXU9Dg05m4AI4JzigYisVZiq7g1Kp8nAwG3dv\nSl80M3Qhs4zTZIXZSANnOc0DHSdgQDj+Gk3hpAAMcZpdpbB6q3X1ppxx2GOKBCszMpYcYP3h\nR0UKTlmIOafu6IV+XqcCmDIlJQjPuOlLqIX725CDjOQ1NKvja5HrmlZmwG5Yk4NN54bktimA\nihZMdfTNLwSFU71Q9aVHfcwHAxzTIdojIC7VPGe9AxyNlJDjDnvSYLAKo4I5pxjTaArNxwfe\nmNIu0ASY7FRQDFjB65yBxt7ihXETHA3HFNz5ZGxsY55pWUnJGQeufrQT1HDa2Aw4NNXDyN8u\n0KOMdDQsnRQMhRndScyNz9724FMYu5cFdvJHWnYE0IXP3e9N2n1x600qWiABIBakMVcMxzwA\nOfSmqzNkFSV7LT+S23sKYMbsEH60AO8stEpGSue/agjqoG09iaRRhmQE7ME/ShSSikc8dTVI\nAkULGZeQvQ4NKnzMGIK4H3SaGUEAA7fWkYfKAW+XPNSOw5d3mHHGaRGj3FeR9fWlYAHAGPSi\nNvmZWTkDINAhiqVzlsvnGfahVKsVTp1pFc8MRn2oVxgEcZoEJ5gQhiuBnmnFlbcQdp9/6UnM\njMMgAjjd0pF3MoUjcw4oAXyl6Zz6c05v7qdeM5pN5G0gewIpH3bWLfeHp1ph0F2FVYBefWm7\nyPmI3dqVt0bKd2QQDilyV5IGSeg7U2JAsYbgj3OKSSIuoIXAzxSlvvHkdhRIWaROSFApdBkh\nCqy545pnKs/ynfnp7UoUMpG78aRWZWG1s+5pAIsZ8wEk/nT2VwpK8r1xTVT94xb8KczHAIPy\nrz/9akAwr++JJChun0p25txGO2AaYSW2KRvOM8dqdu/eYXoOcGmA0/MvIyehxSgGMcNyeTjt\nSbuvFIyhYPlBDHqTQAsmdoB5BP40se+RmBwoHAOf5Uhb5R1GB1NDfcAOHJ5FMA3lUygyo4O4\ndTS72/3eOaR8smGPPU02NU25L/h6UhC+YV6D5af9wA46c8UwqpUHOQDT1X5m2fMPQ0CB2Myg\nng0zdtGe386OWyPuDFN3+WAQu4HgVVx2HbgvBwuR0xQs21VU9M/nTZPmw2evFOfLL8wXIGBz\nRcAYlSSDgdRSMS7DeQB2xTtwXBJ5xjGKYo4PUnGBQUM3OykYDc4GTUoZcAANuHB9BTcZYDad\nwHNKu5SVwBnrzSEPCiQNkYHrUYZRvdV+opwb5QqnI703/loTnA70yQZwuQpyKXDxgElcHt6U\nYVs4PDH7tHkhRhRkD1ouMa2PMG8hvRuwpzsFl4XjHX3oZSGePP3D6cZoXfGrEKDk9KQw2lXU\nbv3nUjrxSFisZVRt5yBRH8vfaeoFEjFl+U5PU8UC6jmHzZyBxTdoGPrQ5WTBUEkDlaVcFmYn\nAxSGDZOSB3ocN2Xju1LvVIx3Bpg83yW/iXP3RQAsmzYDg7c9PelYhckn52449aMbo1O3BI6N\nSBiqhs8N92qQhfMZl3MMkDBoZiOGyQe1KzZYnpnjPvSY2qCX3HOMVIeYMxZQFbGOgpu6Qt+8\nx6cU5mQE9h09803/AJZ5wWIPSgYvEa8A7u1OMjyKGVecc01m3MD9zHrQAZl8wtsQH86ABvlK\nlm28cfjSrukQHeA467qFwuQw3A9M9qQ48t8cds1RI5IzhhxjGfrSJHu2g9W6e1IUX5T1IFOa\nFNu52yP9ntTATHAbPQ4pV2qTwcnnFMX7vBz6Zp0jBZFcjhaAEyBtIBBYUqsvpnb1NJt2kENl\nWPB9Palk2+cRtK8c88UAL5w3nufpjApgl2qSvSmdOFG7Pf0pdqsRs4I7etAD9xddoAPvSqfL\n3KeD/e60L/rgMY9QOlA8xGYhcjpk0AJtI6thqaqkzEhsA06NgvyilZgHwKBhuUrzy1Cphdrt\nheuaFIk3Dv70jRiMAnO7saQC7flIPBJzSNhueu3kUFd3zYx6f404sfM3CPK7eo/nTGxHZWBC\nDBz3oZui4yDxSIwXJAJJ5ojz8zYwOwpXAVmePAQbgwxj0p8ZZWC439vpUS7zECvyvSyZh285\nb2oJYqvukckcjgZ7+9OXP8PHZs01mIZVzl8Z46VIq+Y2xuB1JFMAWNGkDcrt4AJ60pG5s9Pa\nkjk25wu89qNpz8wwWHBzSC4qsEwAPzpSwBVWfAHoKYCV2r/FnHNLI3mbvl2qDgt6mgaFxuYD\nqM/lSMRjaDhF4+tEe5JAAPl7j1pioWXOMgn7tAhx2sWI4bqcntUUj7W/lipXUNnptAqGTcqg\nA5HUChAPILYOAOOf60bt5wfkP8I9qRmEhxjaSPwpnDYycN29aYh0hMkOenOKa2FCnGNvSnDd\nJ8revFKI1l749fagQjMGYr78UB9jHcuT60gUtIT2pWysJBw3P3qBgrIqlm4bs1JzuVi2B6Gk\nWMqu0r19aVmzjzMDHApDEkyykg4IORS+YzSKWAVu5ofGcHBUc5pRh++T1DGhCFYFZd2dxHQ0\nxWdpcHqe/agR8MGOGzR1AVOSKYhdhVWcPnb1FJ5Yxt9Rk4pdwXgYz3oVWG0GgEDDgJ7YGaY2\nVj5XOD0p/d5T0XhV96QNuZs4znmgocZNzbgBs9Mc5oYHucL1z/Skb7oI7GhlO4A8r1xQIFb5\nt2Qo6DilEpkyRjevr3pF2sp/rS9X3EZ4x0oYC7lbOWJ5xj0p7c5HHzdKjZggwRlSfypzRhVG\nBg9ck0xMVVKfLuyf1prq33BtBHPHej5d4Zsg47URkBDg7+uSaChykO4b7nYiho02kl8AnFNL\n8K3fP3aeG8x8JwMZzQIe0b7s4CgDp3zTQsrFGUcdN2aT5mTJ69aTapQldw70gJE3SHZuCupw\nfSkYYDBBwvOaPmkAJO0frSrg8D5cdR60MXUaS8gXB5PSnZLZUN83ehvvER56cexo6Mp48zHL\ndvemMVn67BlWOM+9STt5QAA4HXimZVclOmOnvUrMNqENhiOfWmIYkgjCqo3b+rY6UsbCMD72\n70HcU/5eezdxTNoOctggdaALWI9q7dyjGcHrQGJjKgYzyfWq6yKqDJLc4qxJjdnd26/0piAY\nZSmxt3UGlXDFjjLAc0zc+c54Xkt/SnkuUY8LuGfemAD/AFZz8znmpVXbcAHOTxgetRwspUZO\nVxUqyBpnIOAOaQCKztH1GQewqURiP5mcNu/ix0qKNQd3z7kPK/WpMsy4PzEYJqhDmjEQ4O4u\n3Q09X/eMpfIUctUWWZi55B420wYQBV4I60E6kzLlMuuaNqxyfhQrBTg8gj7tIzKzYIwPX+lA\nrD43UR7Am5jyaJmBj2JwOuMd6hSRuSB+NOWQ9N2360xj2lLMA3JxSpuaPeRuwcURBWBB+Z25\nzRG2VYKcdqaH0AttmX5cblySOlOhkePKjB3HrTWkO2MZyB8p/wAaFk25G3q23NMgseW6RZZ9\ny7ulSrnLEthM8VBEEWQIDkY5JPepAy7V52hRjnrQBJ15kO5TwD3qxGoCnkhvequ5Nvzvlxzi\npYG82RS3amgNC35ZRsyRyTXVaHGssyOq5UHpXJ2rlsnvnFdp4agMwZ1YJtbB96zZLOzj2pCh\nI3Cnx7C4Qk7+vtTU2ptzwuO9OSRWz0J6+9LoA5SPMOMhe5pQFK8NzTVkU4I+6eop5kG0gDA7\nVI0IGVG9acWZ+SMj2ppVV27u5pEcjJHGf4aAHtH5nyn5cc07lpODxjgVFv8AMyN3elYjcD3H\nSgCVssBnjbSM3zHHYUz5mXjqaRWKsc9BwaYh8bZ3PxkjpSxnpv4B96YihuegNPKR+Zz0A6VQ\ng8sZLBsAdTSR4wSpJNSeWzovlxllJwakFq7Lwu0g4xUsBqx/e2ttHXdTWT5S38WOtSR28jbl\nIwe1SR2Mky4Y7SPvMaokrqqrGNwyTzmnKGUEA5z3FX1s4I1PzZbHBqxataBPLkX5uuaCjJVS\nyfj1NSLtkjJ6DOBgd6uyrG7H5crnio2ZUm+UcY4WgmxV8t5/lUnI68VLHBJI2I0O3oTVqG6S\nObYDkN1YCtOxmW2yRtJOeKLlJGWNIuVYMMOMdqP7LbZlhhu1acd66sSzKOaJbhHwVb/69K5Z\nnLpLMqspGfetCz8Ni6wGmVX69M1E1yu8c4xVlWdfnGVJHIoCxLJoscDEyFZE9vWrDR2y2vlR\noFkxy3asqa+bG0SZx6Uz+0tiFXBPoQady7EpsGg+YnOaJFSPIJzu6il+3/dUHOR0qo025mZ+\nSO3pSugLMNunnBX+72z60v2ZgzOqYA4INV9581WzvXHSpXvJHOOikc0rjtoSQyJHjI5NT2/l\nKzFpceimqU10vysPujgim3U4+UgYBqrk2ZOjNHKXRcAnFTRzKx3P8uO1UvtTFQpGB6U3f8/z\nHk1N0OzNePa0chUZOePWj5Wj2uOay/7QSFhGO5zmrceoQ8pIpDjncKWgWZcWPZblsqg71HHL\nuOQPkA65rPuLksxEZLA+tQi4MMbDPJ4wKNAszYVEkXA+91pFVlyRhh61mr5ixiQNj3qRdQMS\n7RzTZOppRzBGUPgDoKjuNyzeYhGzOKo+f5jKzc8ce1SQyMsJyc/NkUwRb3Fs8ZYU+DMzHcBt\n75qusjzSFY0XPqTUbO1upLZ3qeaRRqSWKfeKKVx1xVW4tVWA4xu6j1xUX9pTrCNxyP6UjXCS\nIGY4Y0XCwjWaPt3jqM/SpbfSrYQnue1VGuEWbaGLH17VowXieWoPB6GmpakMZ/YSyQ7Cyj0x\nVN/DvOM5YVtO48vK8UqsGUPuz6+tVck5tfD9w8hMf3enzU5vD9zCnXd/sgc11C3CK+ACOOKX\nzlAZ1UlugqriOEmhaAlHQhQcGkaESEAHC9c967m7t1eNRIEO4c8VmN4bg2vgYLdKoVmc4VVu\nR2/OlWM7st09a2JPDdxGVEUgZfQ1Tk0y4eQxqjNz/CKBWKh/ePgnAXpQqHaRuwSfwqf7JLH9\n9GTB9KjYk5JOR04oKI2k2/KzfLn5dvpTyyhjzuPajb5Csypls5I9KI41bazjBY5pksdvIXJX\nOejCpFaM/dY8Dkj1prOImJ+6vTFNZUhXP97nFADxhcEE7OpFPfYyfIce3pTGUMoxwvXB706N\nSx5wABnbTZNhvmGPj7vvUyyIGDYIB4qNlZvm4CY4zzSqQAq7fn67aQ0M+YnOcelT+Z0UjrSY\n+UqeGpsDYYj73v6VRLCONs7gTx1qSELIGZeoPenLu3kseMfdFNXbnK8CmMEYqrICEY87j0Pt\nT8lY8N8zgZpjR5XgbT156VD5zfPlsn7tMG2WPMWSNjkLjv6UrSFMOr+Zx1qBZFWIsRy3FQLM\nFjO0YPSgLs0Y5l3/ALs555zThcDd83rWarMuQD8x5zUol4BYc0xF+NlbcCevrTmkPC5z2zWe\nrgY3N8pP41J9o8vj72D0osMuSMpaNWzsxnINKx7Y7/eqONuQeq44zUccrYyW69aBFpcC4Cq2\nc9falY7WfPKjvUMcoQAHjd/FUqlE3M+eh4oEL5wVQQ3JqYZZRuUiqqsrKDt57ZqwryvGB1Of\nwoGKc7do/i705htAJGWUY4pP9WwG7OOoojkDMwyACe9ADipYAqTnrTuc5A+ppoYxyZHTvS84\nIPQ80FEnnbo8Bfm9akwPLDI3PpUTZ4JxjpipFXb27ZoFYlUs0ybunSpLcI7ZzxUMeEAk5Bwa\nsWao208+9IaL9iWZdp45wBWtHGfwxz9az7BSZGb7vua1FU7B83HXNYvc6Eh64WLOfm9KQM0s\nJU5B601c9uR61MvysW7Y61JpsCMI8Enc38qeWJYMeM0xl+YEcmnGYeXyOM4qQA7gpVfvGopL\njYFUrz0qSQdlbJNZ00mGKsp3ZwMVcSJFW8kUSZHeqUrcglvm6cU+UncYzy2aiDKAeOa6Ec7E\n3AfdH4etJ5jcoBt9aegVST1INJgeau0cFua0EyIAqwG3BpVcRx88805VdpCzD5TwKbIDGpUc\n8800YyDdvXd1x3p26TC5UhTxS5VYwUGF705f97K9cVqRuKp28xjgdaRpAWHHHXFLzHtYN8pO\neKRXaRnIGBnirRLGklm+V9vtUkeGPT8RUa4kwucMD1qSNljBIHHQ0DJiVbA7ngVIZW3eWiZP\nTNV+u1qmZWYqRxjk0xXJPlGVxx1OaRQ235XARqa5VlB3cHrSxIpjbLfJnj1qiRWCyJtU856i\nmspVQqhSyjrT1kaNWVMY7UkbbVIb7/vQBW8yb+6KKl3N6iigD5QMSqxH8R5JxxQUdcyYOzvV\nptwkO8FUxwfWm+Xnq+7I+70r5k9cgkVZNrRrnHJANNHlSSrlSzfyqdlYSLgZwMYFEK/Z1ZSP\nnY/lRcVhkkKruyW2npjrVeOFWkIkXjqK0ArbSjcjHX+tRmNFjAB+b1agCm0Um4HZx/epZID5\nG8g9atKv3l6MeaXcrMFPC4+92piKoXGWXBAHPtUSxuykythmPBxVsxkYZVBycZ6UNG6tmQgt\nnGB0oAryKUTaQMrzuqHI+VmYkZzgjpV4Rr0Y5Ge9EkIbJI5JxtoGVEXb5jRgEH+93pFYhvLf\naGIzgVYKkqQycLTEjLSDbhmxnOO1AiFFbcec0FQG+QFj2+tWGtmZSykb1O32pPJYOPQdQPWg\nCFWVGZ8Op6EHsacN0ijDLIfU8cUrxyNKFJIyeRQLTylfjofx+tABv/dBdmTnFNVdo2spD/ex\n6inrFEzDfIy46elRy2rtIBuyex9aAF2lmGWypPTpRuEa8sA7dOOlEka7lILeaDznpRuGSShy\nvAOKoLCBGjgznbk8jvSqu75VY7utOKCaMSsfmU420kKhpGkZ9pxge5oAZu3bmX5VU4bd1pds\nskYX5lU/dGKFw27ecOD+tP2NN95yH7UgK8gdSFcbZP7tSeSu5RnJY8nHFSbXmYBflj6FmHel\njjC/JwFUnv3oF1EYEt5m3aijBJ9ajXlMbs859KkWN14Y5XHFIVKqWEeSegoGL9+TLEl+3PAp\nrwhgSxxn+LNLGoVsSAA/3j/KnsqswBG0g/dBoAiAVWCZ2AjljQ7usSqhwoODn+dWDtdNuzeQ\nc+Z/SoNnz7w3Gfy96LgMWJHC8YP3hzjNdv4BhtproToA0g+8v9TXETRRsuAS5B69M+9bXhG8\nNhqqSI/ly5AP0qZJ20Ljue/NGGhQ4G3H9K5nWLXyV8zG6OuphjaSyWRiDuAI/KsDWI91u6Z4\nx3rkWp1M4G6wJnwuUzxWJcK0cjbMD3PSuguU2KU+6SOtYc8YEu0jj19a1SJMuTO4ghScZNVZ\nP9IX+8PQ9q0rqNlUs2OTgetU2QRybscYwcVSEzMk8xZjyoXH8VQSBJMsq5Zj27e9T3wRTnfx\nnqf8KjLHd5vKDGAMVp0GNaRmYlApZRjFU5N7D5jhj2Aq03l7P4gxPX+lRSSJ0II7UhFdpNu5\nSoXg80xtjxI6DYVGCe5p7IGxl8542mopD0APflRVAMZTwwBCkfeHUU3adyydz8u6nyNzhcgd\n+abzHneRtxQMrNG67mHPOMd6i5255UqMDFTSRy4LYxTipbaWIUAZx60AMVS/lszEMP4fWlXK\nvkAKSfu96GJ8zpkHFChfMZmJDLxQBFHlS672Z859hSMr8f0qXa32Ryr7RnIX19qi2nydxYqT\nzj0pAKHPPQD1zTCoXLbize4qeOFeSBuXGcHvUGDwduHY8c1QrjdokKtkDHVac2zcCNwYdBil\nZlkkJH8PfpzSbmVfM3cAGmAzy/MYMSc53GkZt2cArk+vFOcM4Vg+Cw6UyON3YqBu+U0CAK7M\nFGMDnJpoG2QnkN656U6FcIu/gZxSFCspAO4dj3pAKdqgs+Sx7jqfem87iA2T14p4Xy5AZDjP\nHFIu7kbeCetAEfOA64C9CvvTv+Wm4txj9aGYbnI4OPuUybDqoUjd+lMQ8MfUbvXFNkUqR2U9\nRTMCTIQ4ZeM05YxCqk9OvJ7+lAxWDMxyQSPT0pqxJH1P3u3amqoHmEj5zyT2FOk/eRxq+1h6\nUBYOCz87QtNw7Kfm28cY70vlIpGWwf7tIV34ONozy1AAodRkDc3rSqzSA5xjrTGV1jcA7Xz+\nlNhYldzEhB0x60AP3htm/wCY5/KlkcKM/eycAU7arR5G3rn3pqlGyzR5GOnvQIJIwqqz8f0p\nkahvlJx6elPk/gBGQR92lZf3Z+X5c/eoAgWTaHPUAYJ70IpZeeQox9aevzR7BgtnPPemMvmY\nXO36UADRseQvBOeTRxG2e5PPHFJuPOGyAcYpfkD887h93NACsqySdMbeR2prf6QfmUZXk+9L\nsEi/McbQSRnk0m1di7Sdp5z/AEpCFRQ5LMueOPam/L5jMrHGMU/dklI87e9NCHzMF8DHAxTG\nNA7EfWmqDtYE/LnpShvkXPJ3YobajsBxmgBVbdkLwOlDR7Y1CovynOTSKhyu5hjHTvTmwykA\nlP8AaHP6UwCQMyo7OBID26YoK/edTgngHvSe7FW/T9KRmVdpRsjPQ0ADdlH3qJFUKMnDf3RT\nsCM7WPXk03G1iGOE6g0hifLuIwdpGTSRbWk+U5YfwGl+8TIrfL0ApSw3MHG0FeSooENZWO7B\nB460vLYUsN+N2KNqsqlclcdR1pWy7A7cOB1oGhNyqSCd2ef/AK1G8FWUfKwHB96I1EbEdS3N\nGGClMZPUUCEDCKFcrtkPX3NLy0hXaCO9OhLMpKjP1pih1bBwWPYUAOX/AFgPDBB0NJ80jFj8\nq/ypGVdnLfMDkrSyq/lhV+VSQaAIyVbqMYOM+tLIyxqep7gUi5aQoFzinMu+VixxgZOaAGiR\ntgIXluDntSts3E5yAKGk3Rh9uB2NIFPJYc+tABlGwSOlOEhViFUEYyWpgClupDfpTiwaXJGI\nwO3c0ANjLSscghQO1PIOzn0yMUblEY2g7m5qPzH3BgpxnFLqA5ZN0gz0IwaE+XcOoHH0pzN5\nzK+3bg7aR2LSHLYHQ4FAA29SpDc+uOKbLJsGNpKk9acSfL+U7iOlLvCOGOWBGcHsaNQFkXG1\nVOO9NXEkjsD8u3mhd33ufm6ZoOY42G4DmmMapwoJOVxx7Uuw+WGzmjBYAHgHpR88nBIVF7+t\nBISMG68ZwKRsdQOenBpGkAwO1D4RiScIf4qLlDlYhueRSfJyRxz3pGxDwD2+9Sso2qzNnHWk\nISMdcEup45pQ20cDp1FCtucjp3HpSeXGFf5mDY5U0wD5uWUDDcAU4ktGMrgLxmmbVbY+fkxj\nFIuGjypOPT3oC46Nk3DzMnHQUSMYVLBvNJOMelI7MwHyjj71KrGP0IJ4FADuTIu37wGc0gVR\nNljnjpTW+8c9fQUBXUhkG4f3aADeWb5Rj2p3yBgyjnvzQ2CwbAHPIpzBYwh28NyaAGtK6kH+\nEc1E0gkYH7tOM25yEBC56GmqB8xYZ9KAHgiPLBs7himtuVkCHch6+1LtHCg570pH7tj2zzQA\nyXesbseOeF9felj+ZVUBeRwcd6GkXy95ySvAqPO5jGFKk8igAGGbLDbjgn3pVb5gWP0pWIVt\nm3Jbj8aAq5YHqOPxoCwN5m05wcntSybWA54xTfnVdp6/yo5ZgB696YCqu6MqPl5zk0rFtuV6\nUkg3KR29qI1LfLxwM0h2DbuwS3Pel2nb0470jZ6sQE68UKxIyDj0zTQAJPvbRxjBBpWbay91\nx09KN4+Un73INIqsm7ILDFMoGxgZ6Z49qRlbhQN3NI2W28YHenfMoI/hz1FSSBjZSS3y+9K2\n/wAwHO0EYxSF3EhAJbb0pNh2kM+1zTECq3mKCNpzjPamqv3lx8qnG6gFEcbmJ9fSmcjJUArn\nI9qkBzHOBg9eM0rKWbG3K96cd0km4kDikAKjktg/rTGBYc/Lz6018nhBuYjqadsO1gRj60bm\njjUKd1Ag4dAmcY60fL5m0/NnpSKckgHFAUBdxXLetMQv3X2EZpNr+XjpzQynaQc7qU7iOvT1\noGI27dnbjFIVDZIb/gNH3nO47TQI9q/KPmz1pAPyRJjBcrz+FLvXDYGNw+7TVO5nweD2FM5j\nxnkdOKAHsDGq4+YkYyKNoKkqBnGCTTF+WQ7eaVh5m7HK45pgBb7qu3QdaeuF+YncvpTIYRI0\nabs5pWkOSiHKgUAKpEgIPfpQOmQMY4FJtKqCp+c0mRIBubFAxzZ35XkgZz/Smsw/1n8Z60eY\nG3IpwOpakyeML8uOppCEyS3p3oDNuY4yMcj0oKMy8NjnO6l3Btwz8xHQdaAEZfLjUljz0x6U\n9lKoeMehpm1Ny7TnH6Upb+827nk+lO47iMgVACcjrQwVl+XgkdDRtCg4y47H60vGdvVhRdkC\nqG2ggH6Gk3HYXA2t6UsjMqY7elJHuVBk89qAGqy5+djuNSt8rbsZHrUfbBHOaNpVyr88cfWg\noVivU8A+nrTeeAV29zStuVBgc5wcetC/IGUcH+8e1MkFYDJxtPWlUOFznCnketHzcHbtx1XN\nDbduXUlie3pSSGMCHB5OT6HvUkjhmUj5f7wpvyoxycelAbKknBH9aVhAdjcA456HrTmyzDCl\nDnpTGjO4HhuMkU1ZC205IHTHpVgS7WUkDg555ppIbgcrUbuHk2pkH+I+tScbc/w9MVLGLHt3\nbWIIHakEnlyc9c9BSMvTPFLu3OSwAx09TSYw3KykclyaQsFZhjI7e1N3blAZu/XFOXARTtOD\n69aBMRQZF64PpSg/KxHXtQy43ADDdqF+VgTxxnNMkAxb5F7jO73oLFlc9CowfrTdy4GRyecC\nlAO/oFB9aBjocSAllDfLTRIoYRtgr+lLGuY2wcNnAFMULuU7eT+VDGOMxTc2Nw6UsbbV2nhe\ntJn5WU8DPFJtYqN3BB6UxDlf5R/SkDHYNwwtI8h87ao+XHWiTJ2q3CgYxTAcrFV5XbTdwVSW\nB9BSqxKhmbDA8ZpZSN/ctn7uKAE4jWMAlOc0M7eY2FyPenMj8iRkAHvTW2tGzNIDgcYPWgaF\n2qzBV+VsZNN3Fo9wXgHFO3EquFwuOT3pFC8BSeDnnvQIGwxGDzT1k+TnnPaoy2SxA4607cuA\nAeG7YoENyjNkcY/hqQEvEwHHoKYWDSY7dNtAZsnbwo4FMaHYUKGK857d6fJyAM4zzUalFjG9\nwDnO2kDAZbYSv6/hUh1Htjy8bsevuKQSGPaBls8DHpTZG52Jzxk+3tTUyJAM4bqPpQBKWYsA\nCFFK6hnw3BIpG2gJnkk4A/rSSSOuMLu45NMGK23kH7o96I2AQhBg9TuNKyiM52hs/pSK3zEq\noC0ACktH3znO7Hann5WJXuODTFdY4yAThuoojAH3D8vfNADo8qpXqT6+tLJEzlSwyV4J9Kar\nZYEg8dMU4sVY/NjdSEEanaxZgxB4pm4sQxJHPSnRsnmkFcAdQKTcW+YnIB4+lBSAM24Mhyuf\nu0OSrZ5Ur0AoVQZhg/I3UUikrLlRz70CEVh5J54J5NNXHO44IGRRIhkkVRwvXFNwGDZxnOCS\nelADpGXzCOcFaa7Y2fLg9jT9ygsVAK9N1ImF3M3zADikIQ5ZiU6jrTm2TRncMNj+HvTdrdMg\nFhnIowQRu44xkVQgG9gpyB249KDt3MG+4o4p33eFO4f3qQqAuF6nvSGJ/rFBBL9sUsi/7Ocd\nRSKdrjI6jGaWPKrtLcZxQMAE3fd7UFMxle3b2pvVTtOedoPenMp3Z3fKowaAFRi2MgEgUnEb\nE5A46U1h5keO9KQ3GANo4xTFYcu1VIBySMkmkXKsOd2Rn8KRsKwyMgnj3pZAF3DHzY7dqACQ\nKCGDZHp2p8m4yZYDB5GKZ2xtGF4obKgMMEZ/KgYKp56de5pWYtkgc4ximtGjRli2Qe3vQeIA\nCcbfSgBfJCsofrjOKcvYAHbnpTWPR+d2O1PK+Xhy2HIzt7UMQjNtYFRnB6d6WQBhlyWbtRuz\nlsbd1Nb5jgHnpTEPXdHzkM1MjxLI2GwOpxQVCnBLZ9aVVXgjhep96TGP42bgOc8GiTKKBs5z\nkgelKPl+bPyfw01MxhiM+pqgHZLHCL155pGZuVP4KKPMcLwcHuabhVYluSvP1qRjowzkOPv9\n1qRZPlBK7BnFJGxjYyrkL/doLGRD2B6g0yQ53ddvPU96GK8gNtbOAKajZwJAWGcCnfKJAFHQ\n0wuO5aQY7DHtSsjSYGdjDvSMoDYzk5yKduVpApOPegBduxWbBc+ooXLFSy7fanK3BJBUL096\nbtc4yMN1zQMdCfkJYfKW4xUkaqjHJ3c5FMWQhQR0FLkHPzbXHzAVSEPV85VT1OSKfGu1cFuG\nP3e4qFm8zkDnr0qUScE4xkYx3FMkAqruQDEeeTVjavnLj5cj7tQPG2z92OAeTRJ8xTGeuanq\nImLKsKMqhCG5zUzNtDAfNu/iFVRGNxVV3buuakQnOzoByBTAeDiPHrTioj4x1HUimt+8g2uM\nc5UikYu8e/hjnBY+1AC7S6qR95eS3tSeYMlsFj1FMEhkXA3KueRmpm2hvlG0EYFMBNpZtxG0\nk4welEbBlYEZ296MSyOWDKw6YJ70+NPMjJDCNxwRQIarBuclTj7tL91vlG09frTpeTyNm0ZI\nXrTG+f5VGARuD/0qwY3zi2Ay7ST2p/3nzyAvp2pkeWLcfw0QBmwwb5qkksxsP4vmP96hW3Mc\ndKijkRmJbr6U9ZAuEQEZNPoFidXBChkwV6tVpx+7VgQQT+NV1X5mIbn7pqRouAJDjHQipuBe\nt28xdn3D2Nd54PjO0s3BHGP61wUNuPNHJYZxjPevQ/DMb+UHx83dRUisdJ5hkzuGSKGzJzxl\nR0zioB87OyZVsYIp8VvNcSJHDGTIB/k0rgkyWOQ7dpA696eJPkYk5x0xV218PXk2EmjMfGdx\nHFQf2LNbA5YAE4PekOzIWl86NflJK88U5G8wg54xyalbTnhO4ZKnjipo9PEaglTs789aY7FX\nPBOMZ4pY1YHr+dWobdWclug6fSkeb95u8tQo496AsRRqzlWX5B6n1qZrVljO4jGeSO9Oe6gh\nhXcuxgOlQveF5FdhhMYGKfQXKWI4UkhZRwRyCaNsccQbgmoGlXYVJ5PSmD92GIGQBnFK4uVl\n+O+FrHtwSTzgU37Y8zAE4GenrVGGfcox8zkZzT4bgrIpZc9ttK4+UuiQkseXUHoO1RR3wRnA\nBKn17UhuEhBIO0ddvr7VVlnLMRgLnkD0o5g5DXhvo5kIkQKSMA0yRvLXAXB/vVmxzH5Rndz0\nqdrgqvzH5s/gKfMPlJWnZSBnIoaYvj5NwzTdw2swG4t+VOgbaMZyw7UuYfKL5qqSMYPcVNIS\nqBlfa3TAqFYzJncuTmk+7IedoHapuOw+S4ZmKAc45zToZDGu18hv4ec1AzEqSo+cn71TwzGL\nkgE+tIpIervGu7Hz/nWhNq4azFuUwCOTWeM9cYXOTTVnh3Nkk/SkPlHKAwyWyRRIVDAnmo96\nlSAxbHelimQx/vDsb2GaBkpmI29Mj060bgzbsd+apNcBpf3LbsdWxUi3Bx8xyxoKsi4WaNd6\nHgHke1JJJmMtuwSeKrK25sHgdzmpXaORWUrnjhqBliOSJoyM9OT71D5kbSAh8nHHequ4qpRT\nlRwaihkAU4U/L0zQKxfK7vlB696Tyyyr83bJNRQyFlXvzSSZVeuBn7tAyYqCMMB6g01n8sHv\nUXnjcVY54qNZuuRk9hQKxNFNuwACNtTCYbTkAn1qo0hSPcvU9RTkiMmPbmgLF7dIygj7o6VK\nsynn7r47iq21toBbjtikul8tgVfqOlArEv2ppDtxjtS7mDDceB71QjuCz9M9s06SUr3yaB2N\nKC6LTYVtuKlwGBLHKk5LDvWMrMnzcg9Kt2t08b4/hxwtMLFuSYqVQKWB71JHuVnGPlxxUWPM\nkwrfNjP/ANaonWbbtBwep5oJLMQYDLBcdqsQqSm8sARztqh8wwm7efvVat1juEGDhz2ouQ0a\nMcm0KpbrzTmkHG0/Nn7o9KzzMY5FjzljxR/x7yI5kyxrRSIsa27jr0NKkh4OcVUtbtJIyrcN\nVmQAxgBvpWlyS0im4Ut94jtU0bFoxhdx6cdqofZ5I41dGIdT2NSw3haTdIAnYgcZqkwuaMVq\nJEYEkHoQaFj+zqBH0HU1B/aqGZVUAZ4Jq0lxtUhgDzVbkle8ghkjO5MlutU5vDcM1vvhXY56\nYrTdFkXchz6iiTfGqhM7QcGqFqcfceHbuwUmRgRnOF5rOZgjYcFTmu8uZiX8vG6q8el2lyHF\nxD8w5x3NIRxLyeYSOT6ZHFODCeQK5AIGBiuqvPC1pvG1DESMjJrBuNJmhuNixErmgY2Ozkul\nREjL4OcCiGQNK0QQgqcEn+Vavh1pNNvEaRcxE4+lQ67AsGoPsXYshyMd6oDOkKxsRnDDpUcM\nhXpQsRwXwCfepFYmIKgxzy1IlgshY7n5x0pTmPk/xdvSlkjaTGWAGM06T5o0LNwKZCA/LIh9\nwN1OONrsGztPAA4pEwwP8S+npUbqRIAPlQ9xQUitNcStxjIPU5qBpEWTIOM9asyx7UYA5zVB\nYypORhhTEyf7UOQuX9+1IzjjdmoYdoTOWUk4p8gHmFC2VU4JoAlZvLG3PzdaTzmbnov9agX5\nWAzkZwKkwkefQmqESrMQVWUAbecqc1JHNujLKQzMe/aqm4jLZ5+nalDbenIbniqEW1uH+6Ww\nM1JHLuIXJJHrVNXcvx90dvSnrmVjvbYpGKAuXobpHXEhztbgVPJK537jgZ4rIRfmwqnHTIq0\nszLuZyStSBeWZyyZ+4eKnE/ylGPfgDvWb5iepx125qTztzoMbR0FMZqrJuVT09qesRK78jGe\nlUreRmbHZTV2Nt3y9yaQE6lZI+VP1pnmMAQw4FOVAc5O0qfl+tHzMpJHz5oKuPhzJGGZcegN\nO8slW+bacU+EsuS/p1oj+bgnA/WgBQvyjHpVuBQuGzle9VXb51wdoFXrbEnycetBaL9qSzDs\nlaIJbCjAx0qhHwmBV9dyqoI69awZumPUHbkcmnNuXLfeU0xVPQNinbff5RUMolGPKGPvUoAa\nMg8H0AqKMDhU+9T5GRGxuJJFA0Rbdy4XNUpyY0PGGHNX7mRYQfmOR6Vm3l0rcAhjjn1q47kT\nMxtzMWHXNRSZVgD8pPaiSRQxGSR7U5QZJApGRWxziRuVDE8knmpd25QFX5qjmHylcHr+VKo2\nyIADzx9asliyMdnJ2gcCojuZTk9uvrUkmJCGxgL1+tMlwpBPPPNUiWMjV2jwDj1qViAoT260\nwKJJHO7auOKVGZvLOO9aIyZIJFVThfl96ajAqFLbWpGbzV6bT60rfw45/wBqtEQDMokCY+Xu\n1Ojk2s2MAHgZ7UNG0nKAEelIFKrtkX5s9qoNSVWXzFVhk4yWpzP5ZYE9R92hY/kyy/dHFRLh\nzn+LrzRYknXHloAQG9KfGGWTaRlSKaoTcGYfNjmk3eZnBKgc1RJJbxgSsXGAvNOOPM3bd2Rk\ne1Iv3kBGQeaeNxY4A64FMfUi8lvSip/Lf2/OimM+VV8xnVWORtzzUaqzMQwBbOR7ipmbaDnr\n1xQrArleR0x6V8pc9gjeTbywKj0oZl3AnqBkilaNkULnvnmnyRhpl4wfenuPqNVmk3bjjA6U\n0gsik/yqbYGY8YI7+tRyL8u1fmPtQJoJF2zKSBhhjNI0IK/L91euP509WYqwYrwOPSlDFug+\nQ8EjpTuKxXkjeQLGy/L13Z61J5RQKOM9MVLKq7QNudvAPakVV8pQw+ZujelK4aDWhDLgrvx1\npjW+1hzgdQe9TbTEWKHKYwf8aRoyUDb9xPTFO4ir5X323l2P8Ip8cZwMfKoGcVYWN/8AZyOc\n0NGzzDgEkUXArrCixkAnB6j3pnkusZ24C+9WdrbSOFAPNNZTIcbsjtSuMqtb+YNwbc3qKRrf\ny5DkfMV6+tXPJCZBGBilW3iVcgM31NO4WKclvvZGC8L2pvlxTMZArBxVvyyw+RT1x16Uslvm\nQE4AXrVCsUZBMrBRGWVhwcZqO3Z/JlSQbk5BUjBHvWhEvyllLKc/dzSXCLtwy7lb7xHegPUo\n/KwUZwpGKNm3AK4QcCrTQRTMh2n5BwKfJB5iln6A9KBGfHtC5ZhhW7VOU/eFwvysMYzU620c\nMRIHXqaSOEKABu20DIGjbbtwUUcZqOaMIm1VCgnk5q3OrZwrY9R3pvlrGgAyRnk4zincRW2t\ntPzfuwMDjvT0Z2XAX5gOeealWIbWJX6Nn+lK0LLtRBj1YelICDazqD1A5zTf9l48jOc1IkY2\nyDoFPDdvpSKGmdQvzEjpTATmNTg7V9TTLi3IVdkgDN09PoadtMYaMqZI/wC971IIVzGCPn64\noAjmUbgm0BgvVelWNK2wX9uPvF2ANQqFEpZlzu/hqWzxHfwybdqBx+FJlLc+i9LxNo9uhbLY\nzntWNrUG6Tnlc9K6LwlH9p8PxEHjZnNY+sL+82MMe571xvc6zgNUhLb1dcDPBrAuIwq7Qa6v\nWbdZpAw4x1Ga5y8jMexiCQ2elVsSZE0bvJwMqB0rPkh3YJJx0x6Vq7d28B8c1mSM29uMqOla\ngULqNGQ4YO3Q8dKqx58vBJwh6Hoav3Hl7du35upqpIp4YsAv8K1QhjNvypA2N0qpLt5AP3Ti\nrPnxxjc5BH3se1VptshZgu1H755poCtIwXIJ354BHSkZdu0kcDjiljiCRtFn5s/L2Jobd5aj\nOGBx681QyF1RpBsJ9/SlReWKntxnv7USAqACoU7uQvWo5oW5Ktheu2kIbJ2yxXj7tINny5bJ\nUZPFPdfO2f3sZ/8Ar0zd95EYY75qgE+7mQA4pp3CMsU3q38I60/Z5hO18EHBXHBoYlXyPlI4\npWGNVtylEzsx909jUahVBR8vx1qeRi3Rl/ComwzDYMEdc0BcaZNvyDlcYGaTaPLUb9pU5Ioa\nQMx4BPX603HluPMOS/zfT2ouIUkHcVG+Q8g0wtldjrkHrmn/AHum0Z6UGM7tpHHWhgyB/mcA\nIcgcUoZmjyflOcYqXyyzYQ4OM+9R7g3Jb5l6incBWU5RduSTzihmG85AGD+NIzBeTwxOQwol\nZlUlwDu/i9+1AhFBP3mzk8UsaMqhWYDvQqqxRwcMByKQZ8vLcdqAGyqAwJX5u5qFl2sVIxkc\nH3qaRt6jgntmmhGWXlSox96gBF2rsG35+h+vrS+XuB4LjPbsfWnbGjbCocY6moyrbsBihPVR\n3oDUb5Z+YZ2t/OlK42ZXHuaUPI0xD7emBSf6xDklh05oAPLMbAsM7uRSbcKQ3HP60FCp27iQ\nOA1O85VQfKzN9M0xEZ2M2B8oHX3pWUxhWOBGe1OdPMkORt4zio9uwBWO92PTsKQCttI2kc9Q\nRxSw/ukBb5s8YpXVtoIO8dvpTd3z9wDxzRqA9V5z19qaC3Cu3zFqHX988YBJXqajclpIzwSK\nYCEE5I5ekY7sDdh8ZqXaBk5PPUelRKquDlSoB+9QAK2Y/mXYSaEYRyHIx/tYpGQkow555FPZ\nvvAev3vT2oAYyueSykZ4Y09NmCoIK9SPSm7W3KTxt64p0bbd5KhiR96mIbuXbsVtv+17U3aF\njP7wn0b1pRhmyFwRwc05WU5BXtSDUSRl2oBxx1pijqcZajeML8mVJx+NJIT6YP0oGOlXMm5h\n84/lQ7Ksm3ZnPOaXy1M2SxzjNN/vArmQ9M+lAC87ckLgHv1pGaLaCiE5PcUjZRRs5YdfalYl\noyx+U/zoGOJ3s2UGcYqLl4QpIz609vvIx+6BzSFFXcUPB55pgEezI+bLd1pdwDMzc1GfLOZc\nHOMcU4Ksigjr1oJG4+8GBCdsGndcDO046Uok8xtwXgdaQKsasB1zu47UihCwZQvcfxChWAlK\nqWJx1pzMVUDGAxyKQEjIAxzyT3oAVmPC4wOpxTFbLfKCB1OacynaWI5/u0eYZA0ePunr3oHo\nC53FW6MeDSKrRsUL5A/L6UjN829Tntgil3ZHHRuce9AmNUbZBng099i/Pyxz96kkcbQen94j\nvRuO7phev0oAR16Zb5TTXYtG/OOwpwXaHJwynnNIZPlUDqPvUANX5QhYHGO1PwFVsHK9abue\nM5Y4Vjx9KVsoxyAUPTFACQ5ZSS3twKVWbyzuwFBxgdad93IUbQRxTNyhh3HegBVJMm8coB92\nk+4pO3cW7CnNuMTshAwOKZGx2gE4P96gB7rwFz2ycUilV45I7A0hXyZAQmT/AHqVjnIYZJoA\nPMdo9xHPQLRwwIZcHHSkVRuCudvHTtTtzAHDYHSgYjyEsmOFxio41yXUtjnJp7blUANg0m3y\n13EZPTNAKIixgqy7tzHn6UcMoj6cdWpxwnzevHHrUckYkjG4Z56UhC7gUX+MKfvetK+0t8vA\nJpzHrxgYxgU1VXaQPm4zTAH6YY5weg70jZkO5Tk9h6UH5XTnacZFKueSpwPX1oEGGaQHIG0f\ndFSPuZV6AD0qKRjnKn5qeuc5xQA6QHqoyT1qOJGR2DjLdQKk24I9OvFNVhJuZAck4JNADGkU\nKWIJHt1oWQthlGV6Um1tpUDHPJpzI0bJyCMjKigYcBiWIAHOP6UiFcAZPTIJ7UkinexVPvH5\nfalKttJP3uhoAarjbkfMAeaPlPyk7d1Kc/LhRihflY5T5h0NACKpbjpxw1CqWzgfN70Kpk+8\nMleadu3YdeF7mgBrOFUdd/Q8UjEeWAAVyc0RB2YkvlSeKTYVxvOeM0CFOVbaoOeuTTVdcMeS\nOpJHenM26Mc5GabJuLhmbHOBQMAxOHHzb/XtTi20BORj+KlkDlwOOvO3vSEssnOCOmKYhuSu\nD1PcetIPfgU7ccEHBJPAoUeYGwMAcDNIAZAi5KnHXaKGVZOCRjGcUvLKBksQOTmmKvcqcA/n\nTQDiu1QDuz1HFLJIzKoLkHPI7Ypm7MhZs7Ow9KcV3KB3z0NMYjSDbtzvGcg4pdyqQc/J6GgY\nXnI3ihsTDGOf50CAttG9lKljSSYWUYO49c0hkY/I/wB4U1m2pvIyvTaKQDpBuDEde4p2wths\ngLjoKawCsozxjNNTCxnJIGaQXFVSIyA3OeM0N8ighiW70DCgsenQCkkVo0D+/wB3+tADsg5y\nSS1JHvVdqn/61LJlmAHG7nNI2PMIDfWgAZiuAQN2aVlzyGx3o+WPG7lfWhogzAg4HXmmAbi3\nbJ9aFXazBjxjNCJtLA/UYoX513fdGfzoEICVUN1LHFKeGcsudp6DvSL8zNzuPakLN8xzgHrS\nAUYVd6LtY9VoDZ+7n3zSeYFYFeT0pwDNGTu53YK98UDF5VyqYLYzmo9zLzuwP7opJiu5VVdr\nY4anSLI0f3cDpVIQL8wD7tuO/pSJkMQRgdfaliyzLF0Hc035VTaCQucYoAI0KxsdwZmPAoA2\n/KwBoHcBegpFVVUEZbnpUlD8N5hwAq4pB94mQ7lxwKf905I4piqxDfLnnimIThoVwSRnpSsw\nMgbG1tuOKRo2UYYjd/d9KTaVCpnJ7n0FIYq58ksuBzilbKxHgbuhIo2qYdxHG7AA701m8tcr\nyWH5UyWEbFmGCFxzQrBiXPBJpo4XlfnI/Gnqo2qB8wNAhFXy92CTu65p7FldQo5x0pjOPmAG\n4ig42q4OGbjPcUhDlUYBBzzyO9Nm3jBPHoaVsnIXIPQcU1f3i4z04OfWgdxybzH02nOc0jDc\nwAGDT1YrkscnGBQZHOONzDuKaATAbao52nnNI2d5I55pUy7HPy+vNL8pdsfKAODQAxictuXI\nxnNIMNG5xnb0FODBUOfmDUxF8vPo3UUAHKoxJ2nsaVm+SPjLZ5FHIZlddwJo2tyc8DimAMTG\nz7Ry3YUvCsu45BHPtTcKUwSQaXl8OTn+E0DE+bcTnIHPsaHyxVhjeelPaNlUR4yOtNULsYt0\nzwtADpGYvg7VHcU359yAnI7GgZGdq/UnrS53KAxwR096BDdp5y2WzS8spLdMdKFAePBGxyaT\naw+Vjt54oAUfNhgMHHegoN26X7oXj60hxI2G3Z7U37ygjkE/KTSHYe2VVZPugdh3pI2LYZV+\nTOBStndgvyONxoVdzOAcADimKwMo80lR8oPOfWlbK7yTyT1pqq0kPAyc+vWlWNY3VWyTj9aY\nAY9u1gcL/Oh2ZpCCMnGc0bTsIPJzS7yMkrhunFAEbRswBz8rfpT9o4wxcjrQFdcEHK0oYu24\nkZoAQqAxPOMUnkxquBjf1NHzZJc4WnKy5U7QGHJNACHdG3yg4xw1DElQe460i52ksx5OQKUo\nWjdu9ADiw3FguARg01csyoflx0Jo2lo1Bb5RySKUvuP94dfpSAbkqu085NKVC5A/L3pVGJhk\n7g36U1WaQEqhBBxQA6GNZIhu4bPzE0mVOQvb86Ex8wZvm6kfSkWQOGGNnPGfSmKQ4HYSc/XH\negrukUkgLjn6elIFWNfnIIz8oFJHsCtg7vWgYfxED+HlaeHIMhVchRjHrTVJVDjp/eNG4MB/\nAx4zQIWN2Xa23nFLnaCzdT+lN3eYu0DDL60MDI4YEEKvSgB27y49/VcgUrKVUkZz159Kbksy\n/LjjJBoLM3zevFBSHjf5mF+7jdQv7tiWXcrDIoIO0gcN/MUioJMAtgZzg9/agYvzLMBj5j6d\n6G+ZjkEeopCuGLI546+oprZ8zdn5x196BC7hg7eAvWgRnYMHGeAaXHynjHOaRl+7l/lz0oJE\nk/dEEdQcUjKobc64bNHDSDPpkCkVgWLMdxoAdwrbguV9qTbG6EjIPUNTV3iMqnfjFOzI2EWM\nZUYNSAA9OML1oz5iswwH7elC4LgngdDQdvBXo1UAFm2jjOeMUuDGxI6dMelN3KcEevFLtO52\n5wevNAxY8HOB0NEihmDhsHv7U3cy4PbpinLGV24ALdT9KADase5uh7d+PWmjDZwx2gZwe9G4\nYJAJA6Ck2lSOx6laAHfNuVgvbpQ2DwDz70nytMSSUUDNOZgqbhyWGD7UAJt3MFb5VHQ5obHz\nANl15PHGKFAVQuckdzR0Yr6j71FxC5K/Njgmj/VqVKFgxzTJWUkBhkddtLywxvIHb2p3uIJQ\n0fAUP3AoVhMoDKVOOwpY5Fjc78kYx+NIjOI92MKppFDkXc6hmwo6805W2qxALqxwCaYuGck/\n6w9KkzJFhTh8Dp70AJvHntGeqnHNCsFUoo3HOS3pTQ3mZY/xfePfNNjYqxjzgDvQKxNjcdzH\nkDtSbztXcQR/s035evOcUirlSfu/hQBIVU4OcLngUquQzKFLetMGEyOpI6VJhtu0HbwM+tAh\nqsJFwfvHjFKq7hhhgdMgUeW/ysx+6eKRmO4Hv944oGKGlVXC42k5p3TliNxHQUw5U5K5zyKd\nN2UY3AZNMQ6T5VCEZx3pvIQttwnQGlVuRgltw49aDG24DO4Y59AaYhE3RkseMjbUyt5mflGU\nGP8A69NwTJs3bmx0pVjK+Ww6q3ze4oARGeRiCMlfvHtShipeQDcCePpTm2h2xwGGSKYzOEQI\nu1cYpgSNnaEC9Tke1PVRKxJUK68c0ifvG2Y+YD73tSlSFzj516HPagBxYKoeI85waYF8mRhk\n5POPrSnheDgMM5p+4jaCnz4xmjUBzZUBcnb1O2l+duuNoORil8vagy5U56CnFk34d8IP4qCR\nzA7gyMAx4Iz1pWLFs7cEcU3iGQBgCDz9RTxiWX92xVT1DGgBvzeYzONuBwtLs3MhJ4HJFKu1\nWORuYd6jbcVXnYH70+ghzbNvA285pNzeYd3II6elDEDOMEAY3UxtzSBieQOR6ihDJl2+WChz\nzSsqeYWZWJI5waaGSJtyDKEfcpvmPuCquPWmA6JyjBACtO+8r91H8XvUTs7RsWO85wFHWnqp\nbaoY+9MQ5JBHGAo3NnnNI2F+Zjg56Uq4WRipGPWmYDxZYFgT+dMCW3UsrEDDZzg1IrKpyili\ne5qGPco2scYqdGC7TnJ6VIFqPGwHPzZxUhyzBnOQOMColKiLK84bOKkib5WJGCefpSEzQ01h\nJeLkc5+7npXtXg/w/BJp5klfDMvIFeLaL8t3CRy7NivoTR7dYNNi2AjcuCfep2GWLbTNO0+4\nVmXzFXoT61aXVIbWYukSgseuOtZd1I8eY2xjOc1Vuf8AUjBG7GRUlI6O68SQNCVflgMDmuau\nbxpHMiNlj1rMml+XcwIbPXtUEd+FYAMCe5qRmtJLNtDHO7tinSTMqqZDnvx3rNTVpGXC4z/e\npGu5JCrE59adwsXJNSGW2ptJHStiHwletZC7bBRv4gegrmFm+ZncYB6etEutXccbRCeXyW6r\nuOKVylE1biGCzlIY+YcduazhMWk9FB70y0zIA0h+Xs2ad50casPvHOaQcpJIxkxJ1H8qbHLu\n3DcetVprzoETHPSpGvBCroqD/eoDlJoz5f3AcZyDVmO/DAjYOnJqibgyQnnAUcU1bgDZ8mV9\nRTuFrFmdisIZ/uZ6Y71Vut0sJZCQ3pmnSO7E5bPPC+lQsyW44BIbkj3pAoiQvOrbOikdfetA\nM32cNkhl4PeqC4lZWJ4q9DNBHIFl3FMdaQWLEdwV5zk45psTNIpZpAGzxg1dt9SsYoykkW9u\nxArOmnjm3NGgVc8UC5WXo7p/MGRsxUUl15kpXPOetU2UswJJDdwaduxtDAfKfSkOxcWRY9y5\n4/vVB53zBS+5T0NQLIWLbTg56GmbHaNnGVpjNCKYpldxZe+e1RNKGbKcL0qOzL4O/ow60so+\nzoSFyo/lTAmWOSXaIXwOpzSyTvBkEA5HDVFayMwAY/u+uKW6nEkYXOWz19qQIhtleNs7vlJ5\nq/5ZjJcfOnrVJVEY+Qbs8Zpsl0Y18gS4GeaRoTLN5IYrksx9elWbeTcCCeQMnPeqUexTlTkn\nqTS/al8v3BwDQIuM6+Z97hueO1RCRUZ9uSMVUaUMcqO3P1qaH5Y14OTQxC+bJCqlenXAqSOb\n7R8z5B6803PnYGdhApoJ8vgbiODTKQs0waVX6DoasSyCOMY+YmqbMr9eCvapI5kWNZGOAp/G\ngC3G5VdzAewpkl08LA9ATzUTXXnNiIjHUUy8Jkj3q3NIRcaRupODUcjNJLtGcdSTVeFnkAPV\ngKf9pZl75B5NAi7Cu87FHPXmlaOTzQjjCjvVeC4QHdu96tQ3ReM5G45yPpTAcJQrbXXK1BNI\nVP3eOoOe1Mm1AMxIXBX0pNymPzZed36UATpMAuQTzUklx91i3y1SV134U7gOlTtjymB4BouI\ntNcbpC4PzsvWoob6SP8Adl8Enk1TXcqq7/MucAUrDdJtOD9KYcpqPN5twhz90YDVa3Ksahuc\nVgh3jk2DnHPWp5tQd/lQYOKB6GosyLNzgLjA9qnjvimOQ2DxzWB9skMZBTLeoqW3uDdRsxTa\nw4quYnlN1tWlVj5b7lPPSnrqfnYAAWQ9axocKrjGR6VPNZzMqSLlFxn3z6U+YhxNZZmjmBZd\n4q/HqCecvGVxzzXLW+oSLclGHb+KtIR/IJPxrVS11IcdTqVul2ZAG49qZFcPIuCwyD0rBhvm\nVgCWxir1rcf3iAe9aqSuZtM1W2tk5BY/pWjZkeWrSgb1GSx9q5xrph0IAHpSf2k0ieUrtnPN\nNiNe8ka6k3Z75A9qkjTzAAI95rJ+2bGUBtx71Yt9QdZTsbaaoRrXln9uVRsVBjHTFcz4q0+W\n1mgZB5ny/eHSthdTmH3802dBebEY5A55qRXOI+aEgsu05xSsSXLY+UcGu5g0yzJKSxAg/wAR\n7VzeqaOFmcwFim7HSnuIyduMkknPQCnqxWIbvxpGgaFSkisHDcHpTJPmkANMViVnC8AcGlkj\nC7AVwhqKPBU9ivTPenbmYgt16BT60CI2Y7j5cZwDjBqCSNmkwF2nvVxZfN25bYe4pmGfdk7v\nQCgm5WMQQkcMMVFInmMSp2mrpgCqBH1brTPs7LIz47bfrQMpxqF4J3H1NI+WjAUd81c+z7/l\nKgbfXrTJrfbIrR/MGGCtUhlVt/G1c80qxhXVt3zZ+7mrIh8sFWUgVDJGqsqlSV7VZIRksrk9\nCeopVV1DtgEAdKd9mRv3aOSBzSkCMDYhyTtOaQ9h0e5s7fu45p2QXCk9OTSeXtBwTkmniA9M\nc4zmkIVcOxIXI6kU4zIz/KMcZAPakUsVGAMentTkhO0Ns2gjIoGSW8km5ShzzyK1o2ZWzjnr\nWbHH8oIBUj0q9Gv7sqzZDdaAJo1bqxzu9Kn8wMoO75hxUIZfLVEyGWpofLZSSeaRRLv6HO9O\nn41H9o8uQru+f6U2M+WcqcoT92pwx3E7QOPSmMm/uFuQRVqHC8/dbt71VVl3LkZOOanhn+bc\nxG0DAoGakMjBhx8uOfrVkzSbgHPHas1JG2/ewMZqdZ2lRcjms3EtSNOOZWbI49TUnmK7BAcj\nrms2GY9Bk1MswHy/rUcpqpIusRHcZVSVI604KVViy5J6VEJivQ8Usky+Xwfmo5R8xDcfMxfb\n+tZd0duSRwf4hVqVm5TqaoXSbcLu+fHC5rSMUZydyurKrHDb0605Zisgz0YdqijUK3SnhWC7\njyf7tWjIdJI2zKjvijc7KMHBXpQkYZV2Hg8kUyRl+0BBkITjd2zVkC7g2BjjvTnzIhKkYpVU\neYyM2CO1IXC5CrjtzTQmRsyBB8uc8cUKwjkIPTHFJJD5b7c5IGfpmmCQt/DyvetUZtEjqAoK\nnPanFljAVxx13ComYR25yMbjkmpFUCJfTrzWhA7I3gKcbuQKfu87C5JxxTFdVJfGO1KpC8gF\nWNUhdSRvmKjoelK2EbBGPek8t9vPU96dCm7AZtxFMhkmQsOSPm7U3AUjBy2eQKRg3IPK0/ym\nwGQbtoycdhTJHCceZt546U/dvQI3BJqJdm0E4HfIqyqljvHKYpjHfYR/e/Wiotr+hooA+WI5\nDuUfdwf0qR4S5Q7SAW4x6V0V58OdUs33PbyyA8fKhOP/AK1UW0e+gWRVgcvCMu6gsFH0r4/m\nR7vKZcjRrJ82Txgbqf8ANw2Bhf4s1bk06dVDTBnB6MUI+n4VT8sQswHLdNo5p8yFZitIHLFS\nOV69qj2GNcnliOCKkBdrcZi2rnB45oRVb5d+CD37VaYWYwII1UNz6jFOjkG5ymWH93HFKVHK\nsGKscbqcsZhR05X07mgmwHcVCjA780jKI0AZcj1FCqbgjg4XqadyqEA556UrjsRCNsBkbaAR\n/wDqqRlVgWEe2pI4/tBOWAUdPrTY25k/2SRQOwx13KFwYx97NNJQ3CghvKPU96txyFlQkjIo\nT5mdZQBnndigVip5KRySAgsp6CnCNcIVOPap9uSc8r0xTGjKRtuAyDxjpTAbJGZJGC88dT6U\nnzLHyRx0HpUyxrKo2kj+tKdqq6Abmxn6UxFaCMSRthiHHIqT7OZ1+9nacnHejb5a5Re3IpFU\nwqV7N83FO4DWjC8DG08ikNu1xyvyqOuamlBOxm5Y8VMu7ySq8Ed6BFFYy0mMbWFPVfMBLDb7\nGrDw7eAQTjOPWovLd0yfudx3FMRB9mMqbSOh6jpTvJXJH3WX9as+WnljB2DtRt8xMHt/Eo60\nXKKaQiYsz8FR0HemeSyqMYVe1XG2LtbiNjxtz0pZIwvMn3j/ABLSEUHV92HGCTj5e3vTnh8w\n8H5l43Z61akHl7FC5Unk0uxYdxIBx0FUIpiB1iCttG48iotm12baPlOBV5rdWwXJy1K1v5a8\ndc4+tMCjMGZBggOeRiljjXJbgYHK+lWFtVYLgneTnntTpLdY9wIzk4JpdQKDJtjj9jw1N2uz\nqrjaC3Xt9atLCrOQ/EQ+6PegqvzA/KvTdSewLc+hfhndC60KIjKpGnlj3x3qHxNbozPxl88H\n/P1pnwkuEl0GKJed0eQTWl4lt/3ysGzxyBXI9ztPPLuE+YqlPn75rndYhKxuFGdp6jtXS6oz\njdITg+tc9eSKy7ctvHO7rVrcbOZyRuY/LzjmqkjFtwc7e2BWpqEZLBxyh6/Ws2VUds8E+tam\nZQuFO0fLwe1VLm3Ei4Vic/w+laFwrrgspZfaqTO6qUBxg5HrVAVLiNI12oNy45zVaQCRVZRw\nOgHFXrhVkLkDaV7iq8aSFVUBWz1bpin1EV7iPzMg5UY+9UCkRsgB/wCBf1qzJv4UNkqemOKh\nx+85AYZ53dvemBG8beYM5IBzx396i8t1YHG76mrbEGMkHPPUVSY7m3BSvtVIAZirlwe3ODTW\nwzJt79aczFmBC7QTigRqWYA554PrSAZtIkmKMD/s03buVskkU5tzNxHsanL+7YkEk9SuOKA1\nIvk3ZC4UDHFNkY7ULDH06/WpDGm4FcnPNReVLHI7PMHBHyrjpTAafkzyGDH0p8mVXy9uTSsf\n3alvm2noB3pzNuYxk/MGzu9KYiNvn4C7VHGDTUyBkZ9OKV9+7ci7lzj60vmBWKY3E8j2qWMa\nyMsYx97PLUm2Pe2Bk4yT704smcgnf/cpvzMzFVALDBz0oAb8w3qOfShWZdoZQxHOGNK+FIOD\n/wDXpv3ZCzMNo6iqATCSSs27r6Uv7xWLEZGMbf60xWMkhI2lf71PXdsG8gknGaBDVLMoXoF5\noZtzFjnFKzhXTnAbjOOKRFa4WQF8IDwT1NBQ1t23huSepNIY/wDloc+makC/dDAKR0NMfaN3\nOB3yepoEDoQVYqOTye9Iz7VIA70ozIgLBmpEG0sfvcfd60CGSTOzGMHAPzfSl3DccMckdaFV\n2ySg+vtQyOzbUO4AZ6UuoDYwQuwc0rB2/d8HIpfvR5J2sO3rSeU20mPcQ33j3FADV3qxI+Z+\nm2jktydw7Uv3m2ROSR1OKOGYnJLD73FADVZwsjEkszfnTGcbSpXHPNP3dGGSo6iiTJPykFWp\niE3D/V9D1BNMcrJGwLEIadK24IWPTjA6mmJGpYE8g849KBjvugsqbn7CmjK4DDYT2FPXu2ee\nhph+6TnLCgLAihWxuJ5+7QI9oIcbVzzzSs2eScntikbLsFH45NO4DvvsQgxnndTVBjbBb5TT\ntreYQcDA6g1FIPMwoGOetIQ5vlhYjjHRabvZ0UhSWPbNP8sx58znFM8xf4UII79qQCOxCKXB\nBzytPZiPmABHvQu1m7kYwaZt/dkdRng+vtTGKAFjJY4B70bSbYKrhiDkUqjdjzBwB93+lR7V\n8s4HH90UCJJG3MFHIOM0HfFJgRqy9qTd5cakc46UFd2MtljxQUhFKsm0gqf7ooP3gACCO9SO\np3ANwQO1NYFULDk9BTII25b5hhfakRW2PgfKejVI26NQYyJPUkUkjESAE7lxniiw0N+ZVUsc\n4HSlkw0aMCCCMkH1pV3qxzhvTNMbasas43c42ikMVstGrA4Ip7MoY8YJpDs5wcKBnAphyyg/\nj+FMQuBtKBfm65pd5XYyj5TxTlZdpPY/Lk9qCpVQv8I70AMK+WWI6U5VCqVbAG3NIOFKk/jQ\n21l2dWxwaQxipvBUHHOKdtDNsI6cGnKu4gYAIFRgCVuPldTkg96AF/5bAbflHr0owAzDIPBx\nS/NlsDIIzSSMGhDBdj96AGK2PnILHbjaaUN8gYryRTmlPGcZx2pOX7Z4oEDcLgjt2pHVF+5y\nDQv+tXIKjGD9adh49ySbcZwCtMaQbmaTOeAO9JvbncwYelDEjAJ3Y4NJhc8f99UITFwufkBc\n4o+Yr93K+1JHlpD2U/pTlBVgGOW6ZpDGNmRkbPy7sEYoY7ZCFGd3TmhpPMU4GApxx601UKxb\nWb5xyKAFZdrbWOXI5xRlo2x1I5pxxlX6nFIWHlg/x5zigQcBg27cWGQopEwoJIwe9HmL8zKv\nPtQuCu4fd9DTsA7buB29PemrhhsHLD9KXa0i4HA601lHlja2CKQDQPLy7fw9BUsbHcFPynGR\nTACrfWl27gTu+bGKBjmB3hwctn86VpBtyowSeaXYoVV35OOnvQrIflCkEHkUANbO04OTmgx7\nMHu3rTkBWQ5HTng0mCZCX+UA8U7CE8sx/KOWHOaSQ/OGc4B7etPXejSqDn0NRttXBYZx1+tF\ngCRZN47Cm4CgnnJNL5hXJJyPU0jN8y4GaLDEYFFJL4BpQegz8uKFjVjh+RSKyfOAOg4pDYrK\nFYYXikb5lCntxupNxCqy8PxgGlkYbtx+UHigkZkbTx92n/NsDDDA9jSNtX5VOT6Y603zCUI2\n7ccUDF/1uWUbNgyeetN8wuu5Rz15p25SgGKTC7uRjimAKwO5iMHFGSqqOvcmhstGRgEdajcu\n8I4xj0oEP+XdxxihmO0gHC0LhwCRx6ijBz8w3igQqoFRgTk4prsdow4ORyopFGGJPymk2qpH\nQepoAkLLIFztVQOPWhn2sMLkeopmPLXlNwzwQOtO8yRo2OwY7YpjEyi5yc59OtLGMMCUAAoX\nCkMeO1NBBzuYhs8elSMXerEj71H/ACz9z0FKudhKjoeTikymN3Q9PpQKwrNuZeMgUi7VkYFs\nZ7ml+aNQSQV6Ck+XdwvzDuaYCSFo8bhuHUEU5X+RiV2sfajnzNrNkYzSMXUjadxP8NIYo3GE\nBmx3FI2JNvzEMOSPalfls9D6Um5i24jrxmgWoOR5ikvgMeKUxiJWBbJznHahQittK7jikVv4\nWGW6UCsG0ecrdRikwCxVTz1xTmURsFP3vSmSKJF5GzmgBfmdMYAYcimhmKgBcN/ezSt/q9oH\nHTNCL5cW3Yfrmgocc7owAGKjv0pgLfOJG6nIxRGvyswOBQylYt4+YUCYu/EgAGPWmqvQBupp\n/miXaOj46YprKFQjrk0xJgmQxQ/ePU9qT/Vj+6QcfWnj5Bj+A0z7q8deozSGK67XXBIPehQy\ntjuvNOYu2MjB6k0nmGRSSTj1oAU/OxZRlj1pFJRWBXLEYpCAwwrbc/xUfM2N+cL096YCjHlo\nCPu9KUuem3CDtTWXPQcN1pWVVTGfoaRLFkZQ28t85GAKTYNoAOcHJxS8BQeCehbtRCQqlQQO\n5zTuPoNBaUs6R+WPX1pqsJMLjB67lp3zdmwp7U8IGGFbaR+tBIvmM7bcYP8AepmY9rrjIz97\n3ob92wwCxPU0NiNid2BSKQxl2q3dscYqThlQISOOaa2xcAjYucjFCoAzBMsW5oGB+6wJy1Ei\nnyxhssetOTPmAbcv3Wowp3MApIHOfSmT1FxgBWPHrSMNrbTnDcBvekcKVCgcHkkUSYRVJ+YA\n9PSgGK/y4JJZweRSM3zbfXmiZiqhwfm704MC27GPQ0wEbDAALx60NtZSeuOPxoXPnNlsZHHH\nWhkDbVXju3uaBjWjK7SWy3oKey4yvTPO6j+P0OKaM7sHj0oEO3M/0AwD60kWHyP0o8s7SAOe\nuc0ECMABeT3oBiFto3MOM8KOtKxA2lvn3c8dqOFmRz8wBxihP3cjA8A5AoQBHJjeA2D/AHqV\npFY/MdzEdcdKSMn7pXdjkCkDbGbHKsM9P0p2AVcr+7bkdSTSsxjwnll2PI2ntSHLFR1+tG5l\nyucg9xQAMD0UcdaRflySegzTh7HJx600feOV9jQAgbcQSetOHygkHJzTtpEbbcZxTIQB9484\nyfagaBtxlBDEKOqGnPjdjhT2pMq7KG5Y96Y3ysSRuxxQFh/ybWyCeOtG0bBg5AGTSsNpEWcC\nkYqykIdoHWgQLjbkkgHoaFYs20KePWhl8xcAHaR+tHmElSV5UYNAheG+bHyDsKRcAsw6dx6U\niIUZecr1FKMNkYwTzQAIc7iBhT+lEzFEXAP4Up+X7vBPBx2pg2q2WYsRxUj3BSpY7F3HGS1L\nJKGwOpb9KdJ8seVG1SaFyu3KYUjhqoATAcsEB4xzTWkdYyMDcTxgUucqoByzHaPT60DaZCVG\n4Lx9aAHFcKCxwpGMUzaFhUH/AFgbIoVVbnfkeh7e1OIPVmGaBoJDvYlR8x6014woKHgZ+8KW\nPMfzYPqcf0pWKrKZA2PMHCmgQkkfzgEngdM08ZjUHqPT0pFj+Xk4wPXnNL5bCMAsPUn1oAau\n6Nzu5Palb93Ii4zkZNDP5jCQrwOMU5GV2OBhsZFIbANt4HI680yFnUk4BJ4H1oVh8uedwyaV\nlEYLA4weKAFeRpEG7AxwR700/L/DubsKST7o+XcxOaUnK/MSUz+tAMQ4Xa2PmAwaAMLlwF9x\n3pSSW9jxTfvNlD83Q5o2JEVjIzEAgj9Kkk4VNh+8eT3psP7tWw2FPWlXC4AHPtSAU/JJngjs\nRTFj7cbjwDQsaszADa+cmlG5U5PfH096oQNtVgpj+bpxS7thZCDk+g4o3uqOx6jj60bnZADg\ndxQMbu6fJjbyeaX5FTLvhyOCPSmEFmOGz6mpNo25IGKBjI8LhFY4PbtT+7s64IGBQy7pAY8d\nKFHm8feP900AJgbF/vEc4pqKkWVBPPPPSnbWDsqfKuOW9KGPmKMD2FACZKt7d80skmSdpHtm\nlTMikvz6ik2KV3dgOKkTDckiq2OenFBUmTrgYpN22Phcjrn0p207kG4ZIyGpgIsnXevyetOX\ncV2fd7/QUit155FOVQ7EZ2gDn60B1EZtyhDz6H2o+XK7f4TQzBVG3nsBSbWRNrY3deKEMeWL\nTHAwpPSmMSS4HHvUhxIowPm70yNQqEMT8wzViB1EcmV6gY9jTlDlshuCOhpF3KGBBOeTT15j\nLdB0ApWEMC7d2eW6KaesaomWPPc0kYZhjOCBmmx53MSNq991AC/K67gxJzwKVVaTGDhvu5oB\nMYPA5HGPSm8HbgEDHDUguP8AmVy5beB8pWh/lUAjOTx7Ui4xvxyO1KzebgBPl/vUCHuQrjHy\n8cn+tNz+7IU5J7mnLls5OWAwKbHJ5gwuQw6g1QD4vu5H3g2Ax64pVLhVJ5UtyKZCojj+YfMv\nTPenwr5a5zjHOPWmIUHdxgqMfe9KXzGkYfLkAUkef4u46U9csq7D8+celAxDIGIZBhR1NOjZ\necZYZzTOQxQ/Kvf608KVaMKNozzTQCYaSTDDIXkLUrIzMpHPp60kezcctuB6luOadGnnfPnM\ninjFDAGZ1DHOMdc1I37xOm0DufSmR/NM6Ocqw6jtTlILFRmTbSAcVBXHUgZB9qV8cYO0cc0x\n2dhgAbR6Hn6Uqsu5ONoHUmn6gK7P5m5PlQcGh2MW1/vgcChJhllAIG7IpQxjjO05YnimSxyq\nu5VcFd3UenvUcaqZnBJYrxmk8wrJzkeoqT+8pHBGc96AGuP3kez5vrSszbgxGRmkVkQDk7sY\npY8Mq5bKf3vegBPlWVipxkcURxneAW5xzTdrq2GXjP3x0NCruDMQQQaYiVfnb5R92ljkOMnH\nXAqNoxJHsDFCeppwRI/kAPsuf1oYEqybiG6kHH1qQSKWBxnnP/1qhjjXhASoP8X9KWPGCucs\nDxmkIt7SFJCdWzwas5aSPZkVSEZwAJNwzkrnvVuMNgB+ZCeKBnQ+D4ftGpRArwp4PpXvunuY\n7Xyx2FeP/C/TxNrCtJgr0Qe9ewXCmNXKnBA6VEthrczLhX+1c/Oeu2kuI3FqD1cfwiqNw1zJ\nc5j3Z74qT7ZPC2SvbHNQXaxnXyyOhyrLjrxWTtaJWQAkk5ra/tKe4lKylQoOaoXH+s4YJuOe\nakZGzjyY1AKt3qV5iu0qp602SRI2RiCxzyO1FzM8bAIPlbpQUNk1HbLtZSOc1Jb3i3G5f4s5\nGRUDqs3UYYdzUkcShdm4BjzigAEzeYTk7c4KU6UlV46dadKiquB8rY+9UW47VPUkfdoGXlsy\n0azqvDD8ah+zlt3PLGoobue1jIDZ7kN2FTec/wAzFl9QooGCwiOYln4xTNvl8KNzZpygyLyu\nG65NRsphbc55PQ0hBJcFmwDk1NGyyYwRjuarxjhuMk98VJCpiZAqhgx59qBkpVtzmMYX0NIq\nyTNkjgCtCRVbafuDHX1qqzMuWU98YoERtaooBbcW9QaW6kb5VVeBzUscytxu6dQab5g3Fcbh\nQMsWd55y+Y4xgd6ZLdLIS4O3H8JqvIBgjO0dfakZFm2Y+U45+lSSWbeeLbu/iY8mpvtSxRsq\n/vD0xWXMixSHC4jIp0LOIyQ33hgEUBYmmmdeF4Xv7UqyTPhdwIx36VCyt5YXdl8Ukkkcbgli\neAMe9MCzEzpGS3POKcWLKAvXoaa0gZSqHbzUTY2/3T3p6jJmZo25bjGBioTIJZAQMDoabDNt\nYq5yD0z2p7TICVRArH1/nTKGbnbK7wF+lS4KgYPB4BqNTuwVHOeas7TM3Iwg4qeohI2JyOwH\nU+tOWYph5MhVGKa0ix7VxkjtTPOKsTI3y07CJIpAvBJ9cnvUhmVh8pI9qri6Xnd82enFDSKs\nZzyf1plD1b5SduD60SMpX7uVNMV48hieMcimXFxHs2YKjqPWkA6MMrJjIUHOatSMY42JOdxz\ntqC4uvMhhSPGcjOKsQ2+5mBG70oEJ9o27VC7gOeKJLsrbttXCnPy1HMvkkheB3NV/OjmPlA7\nX7Emiwi0rF9pxxjrVmORuoOwY61jq7gNHnG0+tSNcksgVsr3BpMC35mSVT15PrT8nyvmY/L0\nqnudZtowIzzVibb5ZKPzjpTGW442eB33beM570W9+yxCOVPmAzWdHMbfaPNJ9f8ACnzTDcwH\nA6g0BYvNctJyDhf5U3zSGBHIPWqUdwoG8tj5cVLdMy2asS28dMCgCzHII5GI3Z75605rhTIH\n2bR05rPt7oiNvM/1mM5zSR3hmYbxkY4pCNGGYFuOfUVKsqLICpyh4z71ThuoWUI5KHOTU0ky\nPC+zGOxoAvxzBHGDtya1LXV3b5GG/aM7u1cut4W2ZTp6VdhukZdoJUg5xVAa8yJLiULh2Ofr\nVhpuNpBAHGM1jSaoJmCj5NtEd8XkZGfgDj3qkyTVa7VpQVfBUYpBdvu2jIXPas0S7m/u9iam\nRtmRv5x+FClYlo1ZLryVCKSx7sKfZ3/XcMsvrWNHI8akFwT1qQXjKwXbjPU1fNchxOgjulZd\nxxzS2kyZJLbmzxWNb3kWWSXhabBfskx2L+7B4q1IzcTrTINuXPNWoSqD72eM1y0OoNNJirou\nirHaSVxirUrk8p0f2kNCVYfMOQ3eoJIzuXPPes6C6ZlKnqtalvtmjy3T61oibDZLOGb/AFqg\n5GPpXPXmg/vPMiPGMkV0UiquGQ7lFOiXzj0+XGOlMDhsyRxsrwkDd1I4pVR2LbmDL1A7iu/u\n9LhmtSrhQDxiuT1LQXsWLR7io/i60EMzFkjaMYX5h3pOVwU+93Bp0m3cBjB9cYp0g3KVIwcd\naCRIWGC+NrigMZNgA3FlzRny1T5PlHWlQlXyoIHYimUhxt2aQk8ZGeaRFRpFITBzilZWZi+/\noPu05VGwHdjPJoASW3baxP8ACaY1rGuHGCzDk9ql+cMHBLDuPWnqobIIwOvPSqGU/s5iUDjc\nfSpfJJYEj6CpxtQb9uO1G4scbSOaBEX2XbIMtk9TSRW7OxO/P+yKsxx7iw6Ac80vk/eb7pzg\nD2pAV1tSnQbj1xU62xaMDv6mpWT7rIdh9KlVQzKPx+ppiIUUcbjjHGMVLCNu4hc/WnMNzlvu\nt2pVbnb1LDk0gSHqpEYdRk9MCmseFH3T0ohXy1yp6H86d5m7JC556UFWFjh/eAK26rDE7cud\nvOMVAIzKwCnBXk1LxJIu9c/Q0DJI2UKcNk0sboWwTn2pq7VYhRn0pTu2k42t60AWFlIk25yo\nFW4piGznr2rNjk5IzlsdMU5ZHYrn7negDV8xlHy8fSljkZQRjIqlBKylwfmj7eoqRZ3bjtjk\n0rFJmmlw7qAo2n1pPNxnPJPeqEEzMpG7J9BUjXCk7d3OM0WHzEskrKu0DLVQlkMZ3SD8ae8r\nMu88HpULjzMbue+KNibicsu3IVOtOkYrCUIPTIPrUXlhXLZGMcCpWYs0aE4wOc0wAMflbPzY\n5x/KkLmPPy59h2oLKkjc4PamqCvO/wC91NUSxVUvnjYRz83elUsQN1DKd21m3YHBpxUlCd25\ngKpEsheRhwMnJpRIu3HJx6UoYBVbnNNbEUe0D5m5rRGbGhjJJtC4yOc1Ko+Upt+VRxTPmjbL\nHJx2pWkaRQANueK1IFTmEnbxUy5Vl70gfbjJwg4OKI4yoYggg9DVICY4aYkg4xT1YDHH4io/\n4QKVdqtlQRH6HrVE2JEkBmC84PemqTbMxDkvmn7WkQ7eB1FLtVUILZfrwKCGRp8oAcfMTmrS\nybW2q2CRUQxwepxUgUbT3NUCJPOl9RRTdx9KKBHtN/8ADdJJWaSDbJjkVzuofCO2d2nFsquw\n2swXHy9xX022iwzKU2gH1qvN4Xt5V4xn09a+JPquVHyJefBHTm3eXCGGT8pGB+lctf8AwGhL\nS+UrBP7pXnP1r7Tm8GKyMRGp9sdKy7jwPlj+7Xp6cUE8p8PX3wKuFjBgiZNvPzYwa5S4+Dep\nWas0aeYN3+q29zX3vd+ClVDtgXHSsm5+H2/BeIKf7yjmqQcp8B3/AMPNW0qdBNbtG8g3IOuB\nWXfeG7qE7nidX25KqP1r7x1H4Z21xIWeMMF6BhWDqXwns7hQBBtOMblUflVXaIcD4ij0028G\nH+R27Gqs1q0ILp84HTbzivsDWvgbp10AzWWZFBAOdo/SuLv/AIDx27GSOVoc8FApI/M0uYnl\nPnCPbJlmU5x6c0KwERQL8zHOfWvbb/4HM0bNG8jE5GQnI9zXOyfBnUYGLBdwT+HHDe+aI1F1\nJcDzJo/uqfl4yfap5Jt0YCja6/x10+ofD/UbGYAWzzj1TtWJceG7y2VswTbw2NpXH45rTmQu\nUpxtuHAJJpGXapyue9WYbG4jH7xWBBxkio2jZWYbc5NNNESuQqrMVJX5OoFKzhX4OPanzRvL\ngq/yKOVAzUSybm2gZUdj1quYLBJGd6lW4PWpdm5iCflzj60jfMvlKffOKV1O3BfvnFFxcrQk\nKgK65JZTnbSyM7MTjKH0pzthwwG0U7gRna2U6kCnckYyEBefcU1oztXHGW6CnTL9oZAAQPX0\npf3SyYBIIPGe5oGDBVBRxuAPHHFOhCMzdI1U9PWiNipZfxIP8qjaNZJME/e/hFLqAeXFsbAG\nN2RTmz911A+lO2hYxEE47Gj5uMgOewqhFdV6g8j19KlWF1+h459amVf3ZXHJOdtP5jGxjubr\nigGVvLDkkj5sYxSOi+WrMeUPHrU5ZZmLgZXGCe9QIishH3yehoAGh3YkA4YcAUbS6hR/D1qf\n94QoQDOMdag8vereZuiwew/SgRW27WOF5zwaintwEdcliwz71eS3G/rjjcQewpSgZTuXIPSn\n0BbnrvwTRm0tFx80eRz6cV0vixfJDFOGzXJ/AuZ2ku4WIAX5VHvXb+MoWZFAAHpmuR6M7ovQ\n8w1TGw4G3t0rAuofLt8rg9xiuj1NScgAgisK8cKvK5Hc1URnOXijD8E5HP1rF2mFgrKSMfe7\nVt3OCzYGFzWfMvaI4Pqa1IaMy4uGCuSCFA/KqrtujAIHzDAcVfu03wmMDAPB3VmsrKrRhRtQ\nZ3U0QRFQEKnqD99e9V49ytl2yPpzVqSFGjUA7Vbk/WqkkyiQADCLxk+tV1EQsVxIpJz7VXYA\nqFUMR1P0q4UErFgRj+dV2V442IfYfzqhleQr5gCsVhxn8aMHksQeO1NlXax2jIPeho1K7g2D\njFO4xdrMuH4+XioVlTasaqQe496lVWZkEnDAZHP60nmNNMcYT/apCGbV3EHLDH600xorJkne\nRjAPSnr91iW4HanK3lnftyuOtMYjQgsBkoO2ahGFZsA8fxGpWyzD5cg8g1D823Yxzz1pX1Ab\nzxvwfVRTcBtynKNnIPpRs/eMA2Fxk5pFy0ZOeOmKq4hPNbaFQED+9TZF2kYOH6mn7fOXap2A\ncikVfvE/OelIYrgzfNjbJ096YufL2lsZOCKEX98NuWOME+lHEeed5zjNNCGkImBuJAPApeF3\nMRlvenFD8xHTt9abIysC2M44/GkHUQMDHlgq/QUm3dtIGKSSQ7QNp38U5sGMKCQo+YrQANJu\nUIeCvb+tMJ4yfXn/ABp7kbjtIPP4kelN/v8ABzjjjp7UADEDEhG8dAKYVJjXeR149RTlfauW\nIxjr70xgysGZcDv9KfUCVgrEYOPxpp/dtnAUdCfWkmwsewEEHkNTRu6sg+XsD+tMQCQqu4/d\nI60yPd1DYBGOvNLuEjfN8v8As+tL5m372CvTGOlAxu0Sc9McYpyjczMScBeBSbW2/eEZzwaJ\nCWyWOD60BbUasZ2KYxhu4pysARt+bsaU7eOdwAz+NKoMe5mZS7Dt2oGxka7cjcAM9KZtDSAb\nfYDPT3pzMcYcZHt1pF4YhRl8cE0EBtVZQS2Cp49KYVPluyn5l5p3k5C4HTqPWmY3MWJwemPa\ngYrbv4cCmq2CQzL/ALwp77V+7yuM0Mqqobb17UgG+XkHZ8oP8R/wqOSTYBkYPrTpGkdV2nAJ\nx705tu4mRiT0HFO4g/hyDmk+8CCABSMuW4YZ9+KU5BTadynrQAir82Ad/oO9KvzQuJPlIPFI\nV6gHDZ4+lP2iRhhgFHY0ARr0G3O7uopqyKFKPwpPHsaerHlMDOfvd6bhWRjgjb2x096ADaS4\nGcoep9/WjgcIcdjigY3MCOccUke5WzjIAoGPjYHkHj0NIqhZju6ZzmmSfMwIOD6UqKysVcZ4\nyDTGP/iOWySevpTfN8zcmCBnoaO2c849KVZDsOU3H9aRIjLtwQ2QxAxSugX+LjPSkzt2/wAJ\nPU9hSttYBsbwV6+9ADWjKMW+8ppvZcAhep9adGSvXPuKGZlYMwyuOKaGHHnEA9aRmTacHK9M\njtQqs3zceuO+Kase0nAAj6ikIeRGroHB8vGSw6Uxd43KBujzn3pMiQHinKCi7mXcW4FACOTG\no43DrxTpGXAK8LwaSNSgQFcD1/pSqq/MoIVfWgYjKN2FOF65/pStvbL7cNjGaBGI1XndShW3\nEnoRQAm0SKuDtZR19aZIDNFxwO5p7blbG7twKbtZWADA9yKYhAm1lI+cMvJp23aobPSmfxeu\nKVvl+bOVNIBBjp8xJ5O7ikkwWC9eKUlUG4DeTxyaQ4XO7hscYoGSMPlAxg8fNUUkb8DgIT2p\n6qCyqwJGM8HvUZZtuzpzVCHnLSZGNo4NO8vbu569KauHwoGBnk0rMyKcLkA9uuKQajTgKpA+\nUnPFJyVIK7snvSyBVGAMZOQKRVzJyCB70gE8xZdwD5ZfvHH6U5WDHptYjvTVdQhRRtUnk0sr\nb224OQOtACKwXAztU8USYUgbec/h9aTIaMLt3Hqc+lSNhlO4cdj6CgBrMF+UDk9MUjRoGzu9\nvxo3MrcHdxQu1nwflGM0AOdghJJyFAFN4XqMMecU6MjzMbR0+9SLGdx2kYHrQJgqLknNKZBO\npyMFemKE3bhv6dj60zaFb5Qce9IBcFVUb9zGnySHkMvWotoZXwvSnqrLGoDcHqKZQsgAydxB\n449aRtrKAVINSPMFK5wQKYquOQw5NMY0ru+XIYU1ZOSPut0FPG1Ccj5x1pMny9xTHv3pCEZT\nsKAbierUoVemBnFN44CgszHrmnbfmztyo4LUANJEiYxkL/Kh1TZ1/wB0Uu1Nx3cA0bSF+58o\n70CGqxO3PIB5pXJRivXJyBTW4XJGc0rHdGAOWoHcbuP8K4bNLv3Od43NjFJvby1wdozTvMQu\nQ3UDP1pgG0I23gnHQ1GxG3g4Ydx0pytuRSVwG5OfSkXKBlUYGcEe9AhFIHUjFLGGRd/6Un3u\nCw468UfeXaASexpCtcVWc4LqKMnJY/rQoCoGZslhz7U0JuxhgQT0NAxecHcTz3FG4KqhT8po\nzt27hk/3V7UKy+YDt/GgAKhm2ds8+9LIwb5Rzt52+1EbbbjG3qfyoZdkjZwc0x3ESZic4IVv\nwpJMeacfdxk05t74Zzkdvakbbt57UguCt8n7wEhui9x6Uv3QRnoKTce7HnpxRuKhiRtzx9aA\nBQVXH8W7P4U4Sbd2VzmmrlmzuyeopG7Ek9elAC7dnGBk8ml53KueM55pAyBmH8Z5+lIv75iu\nSGxwaAF4/eZHzZ4PakZTuAAyW607/V7hnkHFGC0eCMgHNMQiKqszhs9qRuvJzmlx5mNucdzS\nyAbduPm6mnYBjqUkBPKqM8Uu4sSeoYZPtTWbLA4PUDFDt5kpKnCjgipANvlr0xkZHNDHauSM\nk9VFIwGxT1ANPjIZyccN60wGsN8YOMD260qx4IJ4U9qTcyqw6YNGRu9v73pSAOSxGzH1oyuA\nH45oIlDBj83OKNwbOeeaBh8yZ3c+lG8KuGGApxg07JYb9w2jilf7xJ+YdaZIzaBnb8zHkClZ\njuPXI4oztbjHIoMgaPgc55p9AGhieppV+RgCNxxxSsNq72OFIpgJXB79j7UAPbaq7V/eMf4a\nC/AAxlevFNx82FGD1IoGWDH2/OkIU72UsxA54HtQ2FxtPJ7mjd8oUjnFCqCwDLn0oAXliADy\nKFVznkMvvSow57k8UzcVbaDhRQUO+9jjdTZScgDKil2qmdjl2xnFIpEkY3jae9IBfMZeFGSR\n96jzDMoGMOBy1EkhAGwfLSbn3DawAPXigBNoZcHg9qOGZVPJ9P601s4b68U/5Hb5ugGT9afQ\nkRgMk9VGfxpeNoyvBFMLsAzE9fan8rxjj1pAIp29Ac0NneMcjrn3pdxfIB2joWpFjOSiNkDo\nfWmASBuARz1JpSxYEsNqngYpFUvGSG3c9aVdpJHJ+tAEbKWGckD+dP5LZA4xihcSyE4zjt60\nn3ZMqCB6GqEA2n5W5Xpx60rNtZeMgHBpMnnC9OaB90c89TQMXcPO2qcEHOaVWVWZzy3pSrtL\n5GMY5z601WK5AQMR3PXFUIADJlSueM7vShQSuB0xSbyT12D+VLu+b6dWqeoxiL82GU7u1SkC\nNsAEk9aaxMkq4+Vc9aPM8st8+Wzz9KAGghvlO4gnFO3Y+UKAF4xSLhWOW3Fh8vpSyEyRgsMh\neDSuMN29eBkA/e/pRwzZ6A8BaVmySpHGMio8YO/HH8qAJNzNvyADnk0nyrgKmFxkt60csPMw\ncHgUKxMq4HygfrTExDI3lkZ2sDyKVeVOOGPeklYPLkknBwadIRj5Sdo6E0DvYGYMwbO0dPpS\nbRtbfkUMxymFCnGc0gkB3YPHXHrQIF25VUPHXmlkYMCSNo6YoDDneNuRkYo3SkD5QAehoGhN\nwdVKg/KcH0pfm2FsZPbnpQWGCSeOgHvSJuwWJAGMUAxdwXDEHsDRzGzhe5zj0prZkTBPI7U6\nHHl56/zoEB2sPl6dSaV1EgB25I6N2pI125wNuO1KrFeNvy96AQbQrbicAflSnA+Q/cPOT60o\n4j5y6A520m5QpdxleuCaQhh2xkllJPYCnLt4Pb0pzLuCsCAp+bPt6Um4f3eGPWmMfGpYlef7\n1MZlZ8qM9jShgGxyT0FLIqogVV+bPLCkP1E+VY1VFyAcZ7ikP3WVl3CmlmY7V4xydtSRsG4z\nhuooEMdivPbHGKRVLMQO45HpSqwhVgTls56Uiq2C+8YanYOoqr23YXPJoCgru7nnAoC7MMTj\n1FMWNYXI5CkfLSAem3a3y9KQNtwB16mkUhVIJ+bH50u0KqHq2KYuoDO85+UnpmlkUBsBwfUU\n2Nvm3OclaWOMecSeOevpQKwBg2VIyDxgc0qqvUsPTGelDSKs2xE3MvzeYtIzR7tzdW6+9Awb\nC5VSKQrtOGOemKFwykYwc8E0qndnA5H8qBgBtZiF56ZoVWCsejA4FDblypKgt8w5p/3Vz13d\nRQBGNrrtb5W9aGZWyFDAryGI60rSbeMZpu5myxG70FADt7bs7TjHNLt2wsCOCaXDhc5wvcU1\nV+Xczg4PSgQsTBN3PGMc008qSBgdqcd3TA69KazbduRxmgCRS0e3dgkdqCx2sGH3h2PSl4bc\nztk44Apq8qoK7eM5oF1G8eWqg/d5NPVgyv8ALtULnJpsXy7nbj0o3fu2aQfK3y/SgoVfljO1\nS3HUUZ3R+W3ULkMKe0ixxkIccYpFO4DK47GgljBMVRAMlscinZLdG2gDJpVwWY4wcY3UxSYy\nFYZB4JxQIcrB4cLwx7+tP3fKoddw6GkWTMmMqsYOAaRgBtY8bj92newEm0MxI4Pb6UrYaPA4\nApi7IWJD7hg0i5aMEfNSAe7szKybfQ0gIVfvfxZpu3zMYGD39Kk+X7vBAGc0ADLtcAnI65p2\n5vLDhcDOKjdRG0bK25WP5U5m/fHHT9Keo0ObbtYsD0prcKOSc4/Cn/NuyW+XHSkT5s91pgPX\nb1LksRxQFG3DnC4/Wl2s2MEbV60z5nGO2cigZKCXYE/MYx1NNWZpnc44zjIPSlKsx2suT3Ip\nykPMGUYTpgd6LgKDHvYn5n60reZuVQBEGOd1Iu0SMqjrxuo8xmO3rt9e1AiUKRcAKMrjnNLy\nTuAwOnFRJM+CxHPQe9StuYBSApxk0xCqNqA/eAOTQJI2k4BA6kGk83y1UY5zniklkO4k/dc8\nCgVyTdgZHzE9vSo1P3RnPNNRfnyPlUcGnLuik3IgPHeqQhxAdvMc7VU9KachnJPU8L3o58tt\n4O49Fpsx2Ngf60jNMY5XbzElC4K8EGpAEzz1+9imMoMIVSXfGfqaRIzuJPylWG4/0pAP3P5e\nEHU5waQMy7g5w+MlfSrDMVkcgfLjNVdxwPMbc7dPagVhVzs38hO+ac0ZklUjqBmozJ+6I5x0\n+lO+bgg84wT7UAPVwyksNygcEetPj4WN3GOe1MjYrlQPl9KdGzNKoI+Qfe/pQIsxtGhfAO48\n5qwyho42DFs8Zqtt2yccj+lT26t5qlW+TsO1ID074UxCGZnaTKqc59PpXrszCWPeACpFeSeD\n4SzKqDYp4/GvS/tjLarECMipYDLv938yYQ4rInn8zrgDqauzKbpCCwIPbOKoyWrQNtZRgfjW\nbNDOuIW3ckKDzVeRjJuXt2NT3kkjXOCvy4qtNjbtTJ+tIaI7aYBtrgmpLySM7VwQQePxohQq\nTkgcfepGmDL84D9sikURtmRfuk4PaiNsfNyzZxTgwl7GNOx9amhWOVSqocjk4pgCTDhZeSWx\n9KlvXtmUR27b2j+8wqm6tuwF56gd8VahsvtChoYyh7gigY37M8qlhg9iKkZQJAAADj8qsrF9\nn2pIdxXqKa22OTGARnP0pAMbLIGzx0NRzR+awXH3e9S7BtLZyM1NDNHIuxlyxPWgdiG2gkmb\nygM/7XpV210RoZmkEm4KcbT3rYks47CyWVFw5GM96wL7UGhYt5mcnkCgRJevJuEbrwvQCqvK\n8kEVIr+d8/mc1C8okJXdznr2pAK2EVmB561X+2OJFCqGJ9aJ5gS0SHIx1oWFWVG3cDrR0Alk\nZmXYxz3pnnO8nzMAFGKWPJViBnHQVJbwCRfMcAKKOgEUYaTeg6N61Ou6NMFQEXgVFJMZECpi\nPnGarPdEyFC+R3xSAtzXQQqo7/xVXDGGQM4Dhu47VVkuQwzt3AdqWG6b5QFG3HINAFuZma4K\n7tqnn60/51wdpK5wFrPkvGlyGONvRvSlS+fjc+SBwKYupdMhkZm644PtSNOWAIG5v6elUmmG\n3CHbIxyR2oklaLAY7R/e7incotrMsGdvO7nr0PpT1vmSPuR1rN8xYztzkE5Jp8sw4I+5mkBp\ntcqVEhHzHqKheQbd2csT8q1lfb9pffll7Ugm+YY6NSAvfaJMncR1xxVlruNo+CQ2MHjvWN9o\nkaQoi4Cjjnqac1020bipP5UwNNZV2rtkBHemXBM8qSK+0Ywc9KyI518zPIXqRTn1KNmGyTCH\njaKBGrHIfMzv27envU66hKnO7C98Gufa8K/eGGzT474y5Q8f1qRmyup+YrI24ljUHnhZiQ+C\nOnpWM+o+VlgcP0AqOTUZDgbM7vQ0xm7Jfbv3jMMDqRTlmDJlDknkH2rn2uJoYN7sNm7AwKnG\nqHfyABjtSYjUh1STc2cnB4NWV1JpGB2bX6HFc1DqQkzE4wGPWpJLxreQsr7Qox9aoGdFHJzy\n3K/MM9/apFupBG8jASZGQork7zVpsqFbgjJFSQ6xJ5RUPywxtoGjZbUJPLOV+bHSlbU5TboG\nZtmecmufkmeMx7CQc5antqnzeseeeKBG99sEmBvH4VY3/uzhuOp5rmJb+NUbDBW6/hUy6v5d\ntiPkMM1IzZ+3OFyW3BeQo9Ksw6l5ytgtGo/hPeuYF45QksBnoO9STXP3myVJHVT096oR1Udw\n3I6H0J5o/tARtHgZOcEA1zFjqW1iN5b17g1pNqUSxkOdpI429c0AbcOsCSRlIww7npTv7SLS\n/Mv7phw1c3FfIrLCvDEcvU0l+Fbbuw69s9aoR06X6hApYsAOopF1SZ12lsCubXUdzKFc+bkZ\nz0HtWl9oRHDSdG4z2oJNxroyLkH2zV20uin+sOcjgmubt7pI5GwwO3kc0NqbByGfoeOaV9R9\nDo5mWRs7sPVrZsiB8zaMc+9YNvdlo8yDJz96r8c26P8AeH5PWqMy+LhlKgDNXI75YmUA5z1F\nYsLnG4Nn0Oaf9oR8K2VK81cZAdXb3SO2Q+H7g9KsyX29lw3y56Ka5SPUFjYEnCds1bS++fJX\nb3C1rzEOJ1dndeWxDdD2q9BeRtnflMdCK5O11J5jn7vPOa1FKtHkvuHWtLmXKdUJkfaS24Yz\nTv8AWIcKGHWsGG5j2/IcnHT3rQtbzdsyNtNEtFbVNIhumBCiNjz0rn77R5LdiSC0Y79q7u62\nzgOUB7Yqu0bbdrRDy/Q1ZJ56FEiFhkqDTVSR48MdvPT2rr9U8PRJbtNEfLb7xXHBrmbjfAxD\nqSrcg1SQDZGRVCdGApCP3SgHkmkTbsBbgfrUywjcFJ3r1BqRiRsdpVeg705VbgucjFG0Rsdu\nAmeWprb2JjwQOpNPoAuBjJ+Zc96lzj5+SG4HtTFjDYULz609QS2G+7jgUhDvmVSGHbrSNIsy\n5GSw7GkyxiyfmTONtP4b7gwR1UUgsO+VoQxH4j1pI/8AUBScHOeadvXaYx8rUhiGQM/KeoFU\niRwG5gS2Fp3DKdpzUbokeCh+XpTlhZWU5xn0pgieIq3OORSAGNi+ODxTQ21+OT3qTy3m+UcA\nDJpFCRqyngc9frUqs6xsWXk9vSmLJ5eGOWDfLUrsWPlt93+7QISFN2DnDDvUgYgZJ5XtTWIH\nCL8vYU1n+YDH1plE3nDqRtBx82OlSNiONlwMkZyaijYNGCowR608DcpwMn0NArDgzLbr8wGe\n4pzfcGOnc1H92EK/X0pQWjjwMEHtTuMlhkEKZH3vWiOQ7WO3JPrUMfB24xn9KHZmkK5wMUgJ\nflZPmzmmbmZQcYbOKFfdDszxnrSqyYGcgikAM6rj5cHNKytMxJxhR+tMBaRmG2nBRnGcHFMB\nWjKyZb+7zTlkULgGmSNtRSNxFOPzLyuM8iiwh7K3lgk5OaRGCqQv3u+aSQlcHt/KlZkKY4L1\nSEJuU84+729aYrbgTtPWkw0ZL5wegFPX/VkHIOOwrREMazBwpBxjrT44/Mjb5uahjjC4y2al\n3FwCBtxwa0MyaPEmxQMY4JpXbaML83aq8bbvvDaFPX1qz8rK0mc9hVIB6/KhVxnjIIojRYih\nLZHUhqYpK7ATgk1LIqqw3cnNUQOdg0hKHA9KdGGMmWwO2aQ4XBHB/u0jN85cZBNUIeFDMSDk\ne1SKvGe9RYbb8ozuFSKxXBHUDvVoBfm9KKTzX9KKoD75WPC570YHB6+1KFO4+lO+7ngV8KfU\ni4+XkgimeXlcEZNKe2BxQsnrQIimtUmXaVGKqtpsWfnPHStFmDdTimHHdc+lAGTJoUPIK5FV\nLjwzFIuDHkfyrpOZFGRgmnN8q471SA4qbwbA8ZCYB9DWVe+DGU4Vcg9lr0RV+XGKTye+BQLY\n8ou/ApkyvlFRj8ax7r4fqx3CIjHbHBr25rZXwx5FNawhboooEfP998ObeSMkoBIT97HP0rnN\nR+ENlcb3eINuGCpFfSsmi2zZ/dhjnPNQS+HbZk4jGSc7jzQFj5L1H4NWl0NscIVgMfMtcpq3\nwFK7ggC59FzX2ddeFY9xVU3BuQcVkXXg1D1TmgXKj4kvPgXcxxkxRhgeqLxXP3XwfulRnRGj\nZOPu5r7ol8BkjKKWj+lZt14GWRvmhHHtQTynwbc/Du/tmJltyUYdupHrWFN4ZuLcvmFioPAI\nr74uvh3bsz4tlGf9msLUfhfb3TB5LWMuvAcIOKrmfQfKj4bk0y5tYx58fH94dR7YqBbcxsCY\nmVv4jivsLWfgtZzyHMJRs5aSJR8wrltU+BMTH5EkZOgyR0+lPmfUjkR8yxxtvZwDnP8AEKXc\nuXZl3Lj05Br3HWPgTNHGGg5kY8K3A/PFczc/CHUYG5gGASvAP5inzkOmeVneygMML1+tKjfL\nhV2nPG6u21D4a6lHMGhgdgOvHX8Ky5fCV3FIwkRjID90jBFHORyswo1PILYI6mn/ACKpXGGP\n86u3Wl3MOD5JyRyMfyqusDwq+5CRjj1BqlMlxZHEflOBgr1NIMI288n1z2qSOMqu8j5sdBVd\n41jjJZTnOc+lCkKzJ9sZDIF/dt81Hlp5e08EjhqEjaSPeqlff+tNI8zAORnjPr71XMOxMoUq\nEQYYDqe9NZSFPYH9DSgDIQtuI5BFPZlUKA2O5z3p3EQFAoB5560ySP5Gwfl7CntvZmYHrwB2\nqP5lOT1x0p3HY9A+C8vk6gyruAfgn0PrXrHiKMSKuU3KVLBq8X+F8kkfiSEx8K2AR+Ne8eKY\nTcWW1SFAAOBXNPc6obHkGoRhZCUGcHrXL6xlFfA+XPPrXZ6k6rM/HTiuX1VWOcrjPNEdxnJy\ncMVA3d91VJ4WmVsNsUd607kIk+cVWnljWPJ/Fa1IZhXC/Ip/izgk1Q5DM5GV6H3rWmUTMQqg\ndznpVOS1MzbVBx2CjrVIRlndNCTtIAPGar7vlIC/MTjJFaTRyxkhlBUHFV5MKSMD1zTQFP7z\nEEggddvrVeRSVJ+8OgUVMyqrYz948+1OEZBP+z37VYFLG1t4TLHjk9DSNH5akvgDPP8AhU80\niKxIBbI/Wq0pDMof5T1DU99QEdCSsuM9gPSmOHIJ3Ls9BUqkspAbaOu49qjWMFcYwp/i9feg\nRCyrtJ3/AExTTjaFZjjqRUywh1IA2qDyajWPdGVVtw5oGNTKoVBLDt7UrRjzMsdgC9OxoaOS\nSNSg+XoTmk2lFO5snOKYmMjI5bBJPGcVGkqKrIuV9VqVmMeViyeOvam7hswV3seC1Axke1SQ\nRu5pG+U/IMHPQU9mVMZGAD1FMkJZlcEjnOO5FMQp3DgMB3x3pqKq/f8AlBPFTbQxc7fvdqjV\nTGoUgeo9qLhsKEVm4DbR39TTFUK0kZ4LnNIrNy2do6ZoUBVOGzn1/pSGOtxtYru3EfxNTFcy\n7uBGTwf8aftXy9uCT1pu0tKTjjb+NFgE+VcEAHn73rTZB83TnPPNSLtG3enyngH0pI/vMpHH\nrVCZGx7BsD6U1m+XaBnNSIhWNnx0J601Y3XaNvzdcikJES/KWXqw6U6PEcbdSDwV60+bP8Qz\nn0puFT5T8qHp9aYDVXbHuzyP4fSmyEOuCx3MMcCpPnYFW24zimsRvUEYJ5XHHFAwwGWIk7mU\nYAoVOTuXAXoKRV/d42kjrvqeFfMBw2flJ/GgXmQL8rHHHc0m0SKSW+btS/NtXcoB7+9GVeTJ\nGwD0oAGG0KN3NN3K3Thu9Bw0mRyo55pVbDkdB16UhDFYuuAcgHpSRruZ9o59D3p+SxOFPXtS\ndFYA8t3plIjWMFRwS/8AdzTmkbg7QMHBFLtHBB5FJuyvPT+dAmBXaWPIX1pkvzKvcnp6047W\nYDBA9c0btzbjyRwDQIbuUTZC7jt60hk24IGVPXA6U5m5HGVPSkC7flIbNADeRllOOe4oKrMc\nngDnilZdylTkeuKVI1ZkRVIPbNIBAArKApCtzuNOXJJIJ+YZPp9KPL3MQTyp6elId4UJyAOT\njvTGNIZuSoDE8Z9KVt3KgcUSL5mJAec4AzTXXav3tvPLe/pQArL8wBOOKApZfmODSDG35zzn\nGaTI2kA5I6cUAO2srBj1zQyGONwTmRjndQrHh2BI6Ypm1pCCxztPT2oEObcrL0bjkCiSMeSm\n0kMD2oT58so2rnrTtx5IIxS1BjEbLbehLfeoMeWyTk0nKryvTndS7clSp+UDvVdAGN/DhcnO\nKkU+Wx3HAwfl9Kaq5JJbC5zQ2DIMAc0hjVZNjNjOePelaPy4ly/J4FDMWUMvy4ONppV2tuB7\netLUQrsG9wowAPX1puw7V42tnJH9aTLKBwNvenMwZgwOR0FMYgXfkn72e9CMJGAweOppWAky\nCSAB196VnEWxUXIOAT3oEJIwCk5yuaay7MNu56AUHG1sDIDdKVtg5OTx6UwGKwC4AwM5zSsA\n0fTHOaFDfd2Y3c0+RQgyWz22igBm0NIATiJRk8U4mONsr82eQfShmJjwGyPYUxmHCY6dfakA\nAiNCVOXzk5p7k8lRljxiomAVuTjPcCplk/eHjDGgBuPOwqDDKMsKVstkq23PamyKVO8HDdz6\n0MQyghvc0+gxm0nndjtSkFcBWyM85p7nyyA4BLdKZt2vnpzSEO8mNSZvvL029s01SwXDnnrk\nU5W2rhV/d5596azHc2ABFjA+tIYnBUZbbnpil8v5fmO0e9NY74guMEHmhgu5VYk980xDpFzH\njPHQ0uNp+7wB0pGYLzjK/wAXtSNtb5g+5cfjQA1VZWyVwvQU5l8tcn5R0K0vzSFT6D7tGQpz\n/F70tQGsC7KuSqjmnSfO2U4A70zex6nIp0bRurKpIJHQ0agCqyjbncWoY5bYBjHBJpvDFQoy\nwHBz0pG+987Y9aaAcV2jIPzDnNJ5YBDsWKNyfrTmO35VOe+aXcVwSeKYxWVuM9euOuBUZYN8\nxVuOBTo1DFzuYZ6AU6NSqgKd2KQiNeW+7jvTWA+YnnPHPSlyQVZhtbqRRn92XZPlJ4NAC8Da\noHzf3qFJBdS3XrSbl34VT7e9IThjuBA+lAxvLcsdqetKqll6YbPH0obPOF2kCg54I64oEO8l\nd23dkdaI+FPbPrSfe3AD5sUij5hubB28gUANVjuUMQQp6mlMgZmDZJHIpHjRWwPmXGaMhpFZ\nRt9VIoATYQuTzn86cFO0MHxjt3pVz87dMnjPaiNNqHPXr9aATE2DhscdwaSJV3BunoKfuLnL\ncg0oA2nswFADAGR9w57HPpSc+SuTjJ3D3pWb92MHGe5pJIz+7Bbp0+lACbjgvnBY4FJyeoBb\noTTlysjDbkHpSdF+b5T7d6BhtbjnK9KJMvHlFzjrSZBTr3yaeN+fkGO4oENbEijHAxmlZmIU\nfezxRkqCWGSTQrBVII5zQAAb2I6FeMUhHbfhSPu0cMwKnHtQCGzjqeKYCrtyBt2/7VIvzISf\nlOcY9qXJ2h1PIOCKRR8pGOo3ZoAOcN3UGl27UJzlT0FNZvlG0Z9qP41Urj2poB0ajaWV8nvu\npIss2SM8USKN3y9KVm3HhtoHFSAz51GcZ70AB1/ur1LCnc7WUN8xHWmIG2cD5V/WgBfMBxn5\nl6dKJGKqQcADpjrSSfLGfVuw7Uvlsy/Mc4GaZIDYqhzzkcihZCqhgMdgPWkGGZgRtZR931pZ\nMuVzwuKQBghv7hPX2o2Fm2p90dWal+bIbG7b2pF/iZjlmH3fSmWJtWQOFyoA496VcqqY5OMk\nU3lYQM9+opzSDcFYYAGTRYgYoLNuKkqefSnxs0jeg6AUK5jyrHcW6fSlRRGx2nt+VAxn3sjv\nyKS3Akkzk4AxzQAwXORk96RceXgNuOfSgYZZVx6nBbvUnCthfvdMCo8bJNuc55qQOPvsAO2R\nTJEYlV459fWgfNtGCPejA25xg560BiwIUZYHO6kAFVaQn7uKUI7KCB37+lHys2C2Q3WkbHBA\nJwcZpFDlY7iV6r0pu75skHnqKV3CoR9wjkH1pm9ZPm3YJGBQArKsbbc5GMihs7SfbmmMAjLx\nuxT1ZlmDcAf3fWgVhowFwHAzzS8GNlDfN/OkIVmPGMnP0o27m+YYHpTAUL+7ALZb+VSEAsxx\n82OtMGJGwo49qcGxnPI6UhWDDLHlh+I/nTFUSrtPzNnPFOdmkUYPy9CKRkG4kFlPtVAM2Bsq\nOV69e9PbeyAN98d6Qrs5UfKR1prAEDGTQIfkR4I4b1o3N5Z3Yx3pkq7ERcZ5+7TpAf8AgAPC\n0wE4ZQApBIp5VVUA9vzprD5NgOec5oZCcZbA/u0AJsCngbwe1OU/KQMeYaG5+dCEXoTSIpVm\n3DHGQwoAcqu8LB8MewpgUFCW+XacFaPlVXJyWzihP3u7cDuHY0DFGXUEjb7U1YxguUBXP3qd\ntk2NxuY9QfSkY/KAB5Y7CgA2MqliMKOijtTlQrGWAypGSDTWIb5QCPUUKoVWx8xxSuAJllII\nw3UUSNuVQp+ajcSwAGTjn2FO3jd0wPWgbCRtwUZwe/0pAmVDs2Pmzj2pvlj5uuSc09lOSpHp\nTEErI0xbGVbnil2jy9u4FeozSL8rbgN6AYxUW3cNrHjOfYUCJNzKMbf+BUZDScjn1p7ScLsH\nblaY3LHGAw5oKQrcNvJ+bsppuWMjMcnjgUrIZFyX5x940iqSOuFweT3o6CFaQgAbPrRhV56j\n0oZyQo+6qikVQ24D+HnNIB0i4TcwwCcULCY229uv0qORSsisxJjIzipXYyqGxtGcA0AKzfvC\nSflIx0pVBUgLligznFDshfactxjikVn2bCfxFUIEZv8AWMcZ7Ui4ZSSuVzxStG0Z2j589BRt\nWZNuduO/vUjGtlI3QjCqcn6Usi7QozjAzgUszGFUBOQx5pJN0i9MH+lACLuZQxO3Pb1pY8nI\nK459eKQx7ZE7sf4aNuG2J8wznP8ASmAi7juB9eDT9wZASo3DjiopJGdsKO+OKdIF81dvUdaB\ni84ySOvPFN2qY22ZK54pyk+YTnj+VIQfLO1snPNAB8rLlzhgOB60gy0a7jk9centQSJACwHX\nFPOACO446UAN3HkdfalEZkbAH3RnFEeZAQPl/wBqmMpbkEq2fvL3oEG0NlthBPanqM9T8xOA\nM0jN1z1pjRhiCwwaT0Aki8wSNlcMvGKbGCq5JywP3aVVO7CMW45FKD5bHP3TwQR0NMA6cucH\nrikVTJkYw7dOaEDfN0Zfu4Pb3o8sqoYnkcCgBJF+Xa4AZR96kOGI65Apy+vUDrmlX7vLcHt3\nxSATdnG2kVvMwGJGDxinbgysNpA7Gk3blVgPl6ZoAT5DIRluOufWhVDYJ+9TuMg8EDqabtUr\ngEg5yKBdRSu7IztYGldUPJJz6UMysAgGJP7xp23IIC89mNAxI+M8A8c0u0qoL8r2FI8Py43b\nTkZNK7I0TAZ3dqYhpXc20LnvS/Nv+9tOP0pykCTAbt1pqSPlyY+3P0oAPLDxtjgLzS7jx6Ec\nUb/LjBAyGGBjvTQpGBg4C5zQIXcVZcchuDmj+LcQR2HuaQldo2nJ64oaRlWMHnngelMAk2Eo\nhXJB5I9ak5VVdhvIO0euKTEe5l+53JpfKEkf7tsN1OaYhHjK5woGTUjMvlHDbDjhQKjk5Ydw\noznNSxuMDPJxnJHagoadywjJ68Z70Rttw2BgcH3ofKxhc9Wzn0FOcKrBX6dR/Sp6iY3aVBB6\nA0rIchlYEHtTgmVOOGPb+tM3InBbnpxVDHqxO5iCVxSqw2gJwG65FARlwOqt3+tEbFQwIJw2\nCP60DDb5ajb1z69ac375iehHX2pFmQSkrkBeeRTi/wAzFujcj3piuPZizDy/TlqXkR7gPkBx\nx1psjeUowMH2pQixsF37z1OKkLisxxH8uCR1FC/KxKnCd/Y0qA7SpHQcGnKu9QenY0CEwXwC\neOvFOXJZS5ztPApkZxkMvy/3qcpw2B+GaBD49rXDtjAx07Um3dtcdc9KTDO+OF9TTnQKwYEh\nc4yKYmC4WR1xkEfrQv7sIH+Y9B7UZeNi2VK9lpqsZFbq3qtUgJArLIWPGOlNG55sjC8cmmM2\nYgyjIXgZ/lS7AzK54yOfrTAXIi3EHDdvapFlwuQckjnNQu3mYbhip6e1SKysx3DOfu4phqEj\nKFGD+76e4NOkKiJWHzMD19ag3BI3AXIH3qft3fdDbB2pCHt8zZxlW7VHuWWMhWYHHOaI1O5S\noxtb1owNzbWC/XoaQIejYC5Hbt3qVF2kru3bhkVGMMVC8HFNUSSfebocY/rQhFuLeSQ52Pjo\nKv2yeWVH3kAyTWbh1dcN+Petm1XMqsBtIH8XepA9P8JwOY1fcFG3I966Oa6MbddzBcmuI8M6\ni7SkFgAnb29K6COQuxBJCsclj6UAa9tdb5GAODjIFO+1BZC0h3Y9TWNLerZyGNDvZuQ3oKov\nLJK7M5JQ1my0atxKskkroCRmqQYySHIICjOKgWZlbYhwp7VYjbcwLHZt5PvSNOhFMWCA/d9q\nl3htp2gADOKNzSKcLkHoT2qFt5bJOABipJJVkLIQQAM5GafHdeVGQ25JCcYFQKQ8gZvu9l9a\nlZQ0xZmyAKfQpAtx5cZCn94T9484q1Z30lurLuJY96o7vk+UDJOd1O3lcFhn1ak2MueZIZGy\n+Sec1LDH5kZc849KoSXabiyjIIwKjiuZY1OxsA07jNGSZOfm2r1rOvr5o3V4Bl179qZczp5O\nT+fvVdZT5ZcnKngUdCS2uv38yhZnzj3pWmhmVdzbZjztPes5pAI8fK/vmnrcRyJjblgOKQyy\nbmRVKqSuT0FSJJtH3vrVF5kC5BIx15qM3QVdyHI67qNwLrTFXyvX0qeKYowLDMbDNZX9oBm4\nOU/vCq8mpHzCBlU7HNAG/uWSM84bORg1A2oTQoYx91jzWPDehVZvMy3SnLqAAbc2SBxTQGqH\n3DqRxVWaZYsKhzITiqf9rNcbG3Db6DvVSW7RpmlZ9pJwAO1IZqNc4+Uths9D3pXuE8nqd+eg\nrKNwtxJs5wBkN60tteCJiWG4CgdjVWTC8NlzSw3IVGVgCozWPdXDtO7xjap6DNR/a3Vd5IC9\nDzQSagvhIquoxzjaetRyXTGQbm+bP8VZUdw90z7SEVR1z1ojm34Qtk4yAaGBtNepuAHJ70TX\nTxoVHIJz9B61kLMPKBcAOD1rQ0bULZmK3I37TuG7pTGQCeXdtbBRhkN61IJjEql2O/sKTUbx\ndU1DdBGFRRtAXv71QmJWQkHLKcHmkBY+1n5lK87s7gaaZnLOzAnd6dRUEMgjPmFhtJwfakia\nNZCqvy3Sgdi3uMce+N8lhypqSOEsoMQ5681mterCWCglumPemWupSwlSxO4np6UhF+4uJFJB\nIEg5qt5zMwy/LelOkmR5i7Nz3qG4uYmZDEuFI60CLbNJLCJBCSM8YpFkT92jqQzckYNQQ6tJ\nYwtbowIY5JPahddCKVYbcDhiMmgZt6Zapf5BdsAkE1BqVrHp7CNWY5PrVKLX3YKsSrG3U7ap\nalrEl5IQDuK8jmnYBxuGhO4Lv9VqW6vFaFFz5gUbzj+VY73UkjJzkjqDUjSsmfusG+YkGgC4\n9wlwwYZBbkg9KY80edqSdDyBWfIxYfe2qTkU23mEe7aNwzTA1VuJHBQNkockUxpGLEStw3IU\ndqotK/BJI7UkrOWUfdx696Y7mn5x6nDLjGKSO+8pSNu5v5Csprxo2JxxjpmnC++0KjR/Ic4b\nPcUCNb+0N0ZIG5SOnekmvHKIFGSeqe1ZLl4VxkndyOMYpfOLJ5fTHG6gLmw1wttGDncp4wD0\nNMXVpJiRwoXisa3YKreaWZs8c8U6JvvIRls5HPNKxFzY+3fMHJJIPWj7cN3zZC5zkc81ll4w\n+OSD39DQtwqx4J6HFMLnRf2kVXKkfKM57/WrJ1Q3EKgyfL2HrXKLfFt5/h+6fpViO8Roz8+1\nugAHSgDp47/yc5bkjH0FXJNQKxq4YHkZbFcjFJI21i2W7Vcg1EmUKzgHHTFIDrYNa8wFGJVi\nM/8A16vtqjRRHD5XGM5zXENqEK9M5HHWrEGtfZ2CAjymG45HemI7e31SFYwrMTJj1q7Hq0Nx\nGuPldePrXCR3jSMjscBTggVp2t0YZslQyE8UAdxY3EPzBlVtw5J5xSw3O248uL50FclDqZ8x\nihwjdBWjHeFZAyvscjkirEdZvMbD5gBj7vrV+2ukQDcePrXFR6jj5y3APXNWk1gCQeUCe+TT\nuS0dtDeKsY2K2c5ya1oNQh+XDYJ65rjLPxFHJaHeu2QcU9r4sxIYkZ6VqpGbieiW+rJCwEhL\nn2q216ZY8KG+lcXp18dyqW3FuB9a2IdQaFgJGyOuK2TMnFo6GNN0fIyMdWrJ1DRY7pSV61PZ\nawJHKN824feq+pWNgMYB71VyDhLyzks7jDfMBwDioPN+Ygj8q7zUNOiulWMgDnO6uU1bRZLW\n4JUbgeaBplGPazAIcYGSDTgpYFkfA7CqzF4pyrISwHQVYiYAYC4JFLUZIrHhhy2KXvy3PpSq\ngaMFSSw9KJTxtx1HJqiWKA8eTjcv92nxsFwSNp71ArFNq87e5pyyqxI689aBEm0jcSu5uxpy\nxmFgX7j8KiMpb5eg9O5p7NvjCHkDnNACx4X5ezHnPapVXHB6Z61HHIOGPzHPFPlcSMAB7mmB\nIQA3y/ezToVZpS247jxjtTFyp44FNUlmIWTjNIoszKVYDGCDTPM/eliNzHinqzLnLZOMc0m7\ndsAHzLzmhEofGrmMsR060eZ5hwRjvTU3qrBSSSckU9MMADw+elBTF2mSPI+UCkaQfL12+1Sc\nlWAPtimADYAuA3T60FjfvZPPrirDKxjToCaj3+Wm1mx6miWYMqnBBpIQ5QWOQcpRvTcCSQKb\nG27K5Kd6esfy9Bmn0EIrKjbgcj0prt97fw3UULlW2YA75pwILMXGXxn8KQDt/wAvDZbjNAO3\nLYz2pu5lUFfvdqTLKuGO5uuKYFiNg+c9CMCo4tySFW5IojP7zO3Hy4H1pTuIAGd3c0wF2hNw\nc5VqPJjkZGUY28GlkXzJMheAOfTNPYBWBBxuH3e9UibjJIdx3E5Ge1I8hRdhOe4qTaGYLnPH\nNNk/1Y/LNaIzZCrDcdwOMcVKzOqoOgPXFIqnOwEHj71CqGX5jkVoiR0eNoye/SpBIMMuNq9c\n01QqqWxtXHWljI4xyW9askVGDbSp3mpdwO5tvzDoDzQrfKQMYxSqw2KV4zxVIljFYjDcsx5y\nanViy4zyRzUYYtIFPAHpSqxRT2OaYIeu6P5kO7PGKmC8Lk/P7VDHJtRj3xUkc21VJXJA61SC\nw/n3oo+3D+4KKYWPvtTtU5604KBhvvU1ccE8k0/nHHHPWvhj6cYVO7g8elIyjOMU9R1yc04d\nOnHrQA3ylB9qZ95gAMVL96k2ncO1ACglj06UdetO3fNimn8zTARvl6jimqp3HnjvRuPOeRTw\nPlz2oGNwNu0UoXgZGDRIvygigY25NUIaqrv4GaTy8cngU9WX6UrLubml1FZDW6fLUJhUNuIB\nOc1YChOSeKY2CcdaBkMgEhwVBH0qL+zICSdg59qtn5cZGadyvAGDQBmyaNA3GwVVbw7DNldg\nA71vBCTkfjRsVcgGmBzEnhFeQuCuOBWJc+DPvOycA9K9B2krjBFDAPxtHHY07iPK7jwIHkDM\nNyAfdFZ954GhkyXt8p29a9hNvH3Wq5tI2B3KKQjwm7+Gtvcfei2nt8uSawL34S2LSbltVY9T\nuXrX0lJpMNxjCAEd8VQn8Nwtztx7ilZE2PlzVfg3bXETBbfywfTIrjdS+AtqpJWF1Q9cEk/r\nX2TJ4Y8zgDcvoRWVdeCVkY7kBGc7RS5SWmfEN98E5rdH8o74c4DKvzD8KwNQ+DN5GpeMMEHd\nxjJr7mvvBCySMTCFT6cVj3HgMSOwMS7cdMVSQ+U+Epvh/qNmzh+w+8oOPpWVP4Zvlbc0LYx9\n7FfdV/8ADaK4jKeSAO+BWXdfCe2WFNtszfTrQrolxPh660S6YgJGVbHORTH091wrLhh2I619\ng6z8HReyZcAD9fxrkdT+CaCPbJAyBujMd2Peq5n1FynzOYfJIwrDnmmTIFJI+c17nqnwVkjY\nFF8zseSOnfNctefCOe38zyDIO/zKT+Ro5x8pyXge8Nr4ltNvyoxxzX0LqCmbSw/U46/hXjWm\n+B7u01m2bY3loc7yMDNe2Tqy6LtHVUA/ECofvGkTybWLYbnA+9uzXNXTMoII4zjmuw1uMNM7\n4IUdfrXKXknlqSo3GrQM5W+A8xiw+XPFZ94qNkDAyK2NUjEyB9uw5596yJI3k3hEz6tVpmbM\nJvMaVVRM4OG560khK7wdxI6be1WGChi2SrDjFVpg06vHvKMw4Iq0IpOZGB8x85/iqvPs2kBd\nyL+pqYRrBGAckjg96rSKBKcc9xV2GRb8IrIcs4wRjmmtKI4SMbVXgr3oZ8uABjb0A601nZcN\nt3+tUhESvuUkLhscdhUDkliTGpOMFqkEhGSgy+e9OePzAWT5CetUIpD5iEYZ5zupJBtIBPOe\nFqRnMMh4MkfTjtTGZTMRyxxxSATcBIyruBI5FIvyyfd2x7fvU8MQsasQrMceppkmYmUNzjt2\nPrSGQGQx9PlXP3aWTczAZwW5PtSvt3Hcdy9QoFMkcepWgTCRmK7d3mKvZaT59gJwccmnegTr\nimRqYvMJJOR07UwI5GEnzKuFU7iad5nmAlwG3d/QU7y2SLJ4QCoxbpJMuBgEfnQMdt3Mq7tq\nr096btO4sTg88UrbpCBtwoPfvTplkjkCHDZ6e1MWpF5MhUhX4xnNIo3MAACQOWpXWSMg54z0\nXtQ8YVhx83UkGgYiq7b8rwvGM07zAsfyruNRiN2G0kknn2qRZGKgsnzDgUhdR0iBiUzggfMp\n7GqwJZ8Hjj8Klz+6Z25YdWpqAfN3YDIFMY4IPLILcDnHXNNDPHHt5yeaXaAwz94jOPSj5sEg\nEf7Rpk7Ea49fmPrTN42tH949STT3wrE7cnHT096apXzNwG3jJHrQAYUxrxhaVn3yDcPlB+76\nUnmGR3x8ijp9KXIwNgzn1pagRyM2xwx4z29KeZV8vEYyuAM0jyeYuGGH6CkVVZPLPBAzTHYP\nmCLnr6mjlWPcetIDuYEnIxgCneW21lz8v6igNhrK3B75yPelDB2OfkHr/QUjEj5TnAGKaFPE\nLrxnINIQcsSF49805c7ihYAKM0zy8ghhgZ4I70fIw9SODQA6P5pODn2pGUsxUDC45JpV+Zg2\nMfSmqTIzEHqKYCbQy8HayjihfmUHo2OT2pjrmMLn95nlh6U5V2LgsCucUDsCsGXHGAetHnEn\ncwwnTdQWDJlRjBxSbflYvwp/nQJjY8tMQ3Hf8KXcJAGycmlVnVlHt92mgKNoXhiOhoBEvzcc\nZWmCORd/7zcOv0pseFbYCSD1Y9PpS4K8Z+XOPpQUMEY3AfjSMwGFxjnk1JtDbsc7elNZldAW\nOGU5FBLEmUSQ7EXy9p3cnrS+a0igbcD2ob95kk+5FJu6lVwvqaoQmSy9ec5Ip7EblUAZb+Km\nn5V3jkU5wMqAeeoHpUgIyhAMcEjpSouIQMfM3BprbnddoyR1zQzbcsw59PSgNRYwd7I38K8G\nm7v3akJu7GlLtkMTuwOlOWT5QQdoPUUAQ7TtYHjnNKcvGACQ3WhtyqQDuGc0rZB5/CgAZjtD\nMctnmg5ZTheDSSNtjXC4OcbqRkOFGdw3c0APDNwFw2RjbUPCumeQD6VM25kyuGPUsKTBf94/\nJ7UAIdgzs+dSefc0qDoBww5ye1C52su3aeuBSbn8vDDCjv3pgJzywIL0+Nn27CMnr0pgwzKd\nuBTmZ+H3YAOMUgB18yQHB29z6UjYV8AZB4zS58xtwO0ds0kg29xuHJFO4hqgqvB2ntmlYMIw\nxXce9Ml3cDHzNyKWPAXe5JOcUhirIVbaQAT0PpS8qcMMf40i/MzluRim/daMnnNPoBL8rYTh\njTWh2nGcmm7DuyGAYnJxStgg7WJx1pDELZY7slu1PUngDBbrTNp6j0pSu0kbgB0zQMbIuSRy\nozzSNtOMDOP4aWTdnbnjGc+tDKWbIbHHLUCBhuXJ79qbI6qoKfMBwac/3sk5GOAB1prgr8xx\nvoAGVNoIG49eKV9qsMYIPbvSttUEr3HWmRsoYPg7gMZoAf8A6tixBZSKRVHltuBK4yKX5RFy\nxL0K2SWUEDbgfWgQDAjDdX/vUiyFhu29eKF+aM5XODjrjmkkPl4Vic44GOBTGLtG45G1gOT7\n0FfLXL4bIokVmkZiu0qAOtDRlcFv4h60CBsYBY/w/hSKu3BduO2KWQjKDblT19qZuBxjn0FA\nEqnPBbqewpyqFPlZxjmkWUtjAVPXNA+WQsTkUFEMzNIQAd4HApQwfgcbe3vT2kG1o9mD1wO9\nRrz0XGB0pCF+ZmIIwcZyDSfPHGQG3FuopSTt4G0HjPekUH5sOSR04p2ENChcKOFUZpVBC5IG\nDyDTFYMuT97PWnSMCg53DOOKQCYPl7mxzxxS4yBleM44o2+WmB97PFCb9/POeKqwBhSTtU8H\nGaXcWZHXgEcUm4rnnIztx6e9AUSDbtwF4BHelYBFyqsHGSTTmy0ZAbaQM4ojyx54xTPLZXHO\n7vmiwD/M+VSF2jvn1pVXzGz909/Sm7WJwRuHWhdzcEbQaQAzhMqV8z0ApoVioz1BzT8qrcgc\ncfWkVivX5c5wKAI2+aQ8lB60gwuTkke9P2nYAfn3Gk/1fJG+POD9aAFLI2Dt2+1JtPAHyjPW\nlkbYmScg9AO1DNuIJPyY/GmgBVHmY3AikjwNzZ3EZwKRcAYVTkng0vG4YXaaQBjYoOM/xZFD\nbeHYYDcDFOaTBwFpuSVx1wc4oGKF+baBtx+tK6lQg7DrTXAlZSH2kGnSZMnHpwe2apECBtjk\ngjZQ/wAy5T7wNNZVZQXbG7pTtxaQbeFxjmhFAoAJfp6ihV3Y468Ck2j7vcdfenL8rb1b5fSk\nIjO7OSOAcUhkaONmJ+Qtjb/Wpd+5eOM/rUYVvmBHFFhiK3ynuAaMFeA33qk2ltm07U6MDTWi\nTorZINBIm4gtv+YnqabDhbfOSzbuBTvvbu1IAflweR0oARWLM7EEDpTnwzIDwccULI3PGQet\nIcsoO3BzinYodJsVdg6jmmNIGUE4IJxTtoOT0IPTFII8AnGccgUAOUHceOlN4VSWPOefpR8y\nupPORk0rMCoz169KAuKI1fhOF7bqbtMbFOjdaUqeGHTPSiRg2WY/OeBUgDNuKnG003bt3YO5\nSelOKNwucNjIpGUiNQcAZ9adwsH8O7p2NIWEeQByeuKedqqV/EmhfmGfbpQT1EVl+VgmWxSt\nIfKIUfjTB91g/wAo9Kcsu4A7c/WnYoRlRlWQ9F6imSMkjBlyBkU7a0UjhVyOoFJHu3HA3NjN\nFhDyduTnPOR71FGxfcGH3qc/zYycg0qptUhSEHr1oC4m0+WM84P40BvMbJfnoBSKuzJI/wDr\n0kke4hug7igoVVCnGeP73vTmZpoztACr1NJ8v3sceppWxHHyc7j19aQh2d20AcYzmmZEikE4\nzxu9KVA+3JOCOPam7jtAxkngetMlht2jyskY6D1okbLAY2ntilVj5vlkcdPxp21l+VSDg4oA\nTgZYdaNp+9njrTQdzEKCCwxR/FtPIUY4oAXq2AAR1oG8L8y5Hb1pNyrGGHUcYFKjkjJ5A5Bp\nE9RNw+RlbG4YPsac23dsDZBGCv8AWmqoA3dSRjFCqN24j5sYBpjHfPGflAZQaPmmUtuzzSKz\nFgQvsajWNpMhV+bPrTAfIrbhiTB6ge1PkYv0B4/Wo4vmzgd8bqfuKhj+AoBCjeq7uhPFRLJu\nfaRhf506NmPDZJpFVdoDAk5yKB9R4bqFJGeOaG5Ugfw9aTd1OPmJ7051G3JIBPB20gY1TliW\nfnHSjzN0hwflC8mh1K/KcEjv3oXHz5U+mBQgBMLvUEsD2oVg2APlQdqUsVYMOO3FISfMwOeO\nlMBuzp5TfMvOfWnj5YzgbnY8+opNu3AU4I5oxw248t0NACeUed/KgcYoHMfTg9BS7QsZy2QO\nMgUqM20kcqOhoGCjdwg6dWoaNmVXVgcjmmrxGBuwpOTSLGNhwOh4oESg7vvcYFImGjYlsp0H\n1pp4b5uTil8tQuF79aACQbSmOGxg0vy8YO4dDQzdFIyR0PrS8LyOtACbV+bknbSlAMBuW6HF\nNZhuz0X09acwLKGXntQAwoDJjIcL0yaGkZRuY5Ofu0LhsjaF44Y+tJtLMSxy2KQD87phuXK4\nzxTdxDYUY70kajywwODil2/MO4PXmgAUockcf40ZCqMnJbrQ8Sxx4UHOaVsZBHzAUxgoKLyc\nqp4FBMfzfL83WlZV27hyx5A9KaM8buSTzQAcfZwDxk5FObr9zAYY/GmBRGjcd/Wl+4q/Nvbs\nKACOP92yseQecUhIXocY6UqqPmJ49TSqo2nd+FAmI7LlSOSeppOjBicqWxmkUmNSRk9hSsBx\nvOFA6e9ADiu2RiWwM9qTewO08KecUi5aTK9x1NCjaABk/WgAbPl7t2COvvSyYbBQ5GKVt28N\n2PG3tQAF+X0OKXUYFk8vB+97d6YjKvQfNjNG5t+MhVB6U44WQnPynrTEGWVCWOMjIpP4ioOB\n2b3o+XnGSCMLTiQqZblj0HvSARl+ZdxA9fc09WIkG8Ljt7UzG8DK5UfnmnSKZFGAN3SgQ5WL\nHnBNN81SdpzspMJHkgEk4H0p7RHc0Z4YDIA5/GkMa25o9pHy9aUpuVcHnHQUih+MHdx1NG4b\ns5bPQ47e1MBvGSc8Z/AU9SRlyxVuhpqtmE5Axu+7inrmSQZHPZRTAarBVUhuEOcmnGR2xnlT\nzxSKFZmTq3UijB8st6cDBpgD484YHX9KVVGSepz0NEmNyAnJx+NKzCSTDKcY7UyWIyjdtYZH\nb60snzFMDaM4NL2YjAx0FJkspB496Bjtu0kjlW45oOVX7uMetCqV3ZIIHSiX94qgt97ilqFx\nTlPvLlWFJtVWxv3Fep9KcvzfMT8i9BSYXzMqMjHNINxFG1iqvzgn6U6FCvJUA46Gl+VlwOdv\n8VM/2vvIRyaoByko3zZweg7U5GddzNxu6fSmg9SzZTbwvoacuGZA6nPXd2AoAdvLbNqYI69q\nU4kYlwRg0MrTRyFiNm/5VB5x60qzAquSGZeAfX2oJBfvFWODjP4U6MpGxJHbOaVQ2xiRg54X\nvTlc/KQuR0oAQs7sueM8hRTl+Te545ztpnlhY3U8AkkGnx7MAMSMCkAdSwY4VuRTmmEaqGHP\nTNNZo14YEnrntSMu4DLZHXBpgKFMjBydq5+ag5Xch+UK1D7d2WGPaibbvjQ5JoAAxYqxG4Z6\nUrHzGcqMjPGOKN0aykEblx+VCqWyFOGAz+FMBvLMSw+6cYp3+s4HA60sLb2JA+bqKXO0ucbp\nMdqoQ1v3K7scHikaPYwAb34pdowuQW45I6UicM2F4HQUACllZgAMdyaRgdrsMliQMD+dOjYv\nJ0AUjHPrSKp8zaMhkHXsaBCj5Y2Xd+8A4x3pjMNis6AnvT8vsVl4Ynn1pZCVUFlG4igENDLu\nGPvHsKlH3t2whhxULMNgccv0FSq5wPM+X+81AWJ4bjyZCdpc9+K09PlLSbgN4I4XOTWdGy27\ngqM8ZGea2NE3ST71UbsZJA7VImdTo6Ffmz5Z9DW400kZIL/JtyfSs6ygFwoYHJNXrpGb5RjG\nMYqQRBNdBlDR8k9/SltZ3bKu2RVN28keWTmqy6h5EhkDZZeNtZO5qjWkkMciAnI/vU+S89OR\nn16VmrqCXEeWXGOfeoZrrawGTtY80FnR29wk2AXI449KlmkMEGAu/PSsWzvIVkVdw4GSDVu6\n1hJIieFGMKM80XAX7XyVkBQdcinyX6iPAOFNZS30bZRgS2MhjUcl5sXJXLEUtSjWN5xtTlMU\nvnbIN0jbR2rPt7hZIx0G0dc1Q1S+aT5Gk2DGKBM6GGdGj3CQEVVe7DK5D+XEO9YVvdstrgHH\nrnrUE2oHaMAlc4xQSb32oSDBbKtwOaikmEeQJPlHHXjNYw1I+cNnPbJ6VKZG5RzuJOTtHQ0x\nmhHcMuOm3ocUxtQk3sAcKO1Z8ckivjbkdqikkB3bmw/cd6YGk2oGOM7F3c8inq4aE5Iz2Of0\nrGhYbSXfzB7VHJmUjLlfTNAzQW9X7oVj2PpT/OEmVJ+UD1rNWZ4lODj1/wAabJIu6Mht27ge\ntAF+OTyF3s2EzinSXAkyMYB4zVNSGjUOfunO2nLIWY7cFSc4NSBKGDMFB2oOtJvHmED7uO9U\n5mEm9Ryw/hHWkgjaThc7iOhoAsrIxkIWQ7hyBU63AP3gQD1FOj02RYvNC4GOapC4ZJOOgBzm\ngC7cXRXAjbLdQoprSqqHe+C3tmqMM8azKWO44xT5JP3mVIMfpQBJCGkkJjOFAx6ZNSNcSQox\nKKWDYJHaqM6iNtwbaG44pVuAkm5Rlfc9aYiaSdZoyykhs8UNIWUKGw2KhA/eZU/epskmVKtn\nI7ihjRow3z2+DuB4xxUcbAbmDZdzms8SBsY7mniQtz91FOOPWi4izdXCeXtB4PWo47gKBjDE\n9xVaWYvIGYZb17UQeYw3FQeeT7UgLEjDfgSbs/pS283mFmX5sfKO341FtULuVcYPBqKXCLtU\n7Q55xxzQNal/Y7YIIYr1IqCSTavLBjUMcrbPKztPTdmkUKM5O5RwTTsND3nVh/q8Du2aRpE3\nF8nJGAT2qsivucODt6jaOMdqWTJwBjA7+lAhWkO7KH5van+YwfeXVYiOeOc0zzF4KR89pKj3\nuc8ZYHdz0o1EXJbgRshUKV6Gq8k8kG5kUbWPTFDSCb7x2Hr0qKZtzBlkxjoPWmA5W3N5eMt1\n5/lQWIYL91ep46UxXfzAQMuR0FDsfMdfu/LksTQMlkkBUkElcYBpnmAqgLmTHGTURbyYkwcg\n8/Wm/MckgIvXjtQIklAOY2Ktz2okYSSAY+eMZGO+O1RLInHp/ep6ukjOqncW6MKYCfaC2GYs\nq/XOKmWQ7d5PynjFVVQ7woTcc09pjIMmMEA4z05oC5Ksiqhwx65pNxjkDbuT+lRM26MxFeCc\n8HvTeGZV3AAHkGkBb87yyI2+9ndkUy7uAiptDOpbp61B56wM7EhgeAfSmySDYnabPfpQBaZy\nsQ/gJagSOshJ5GKhkYtJhnzuHBp0Lb2yDtXG0/WgXUv28ys3mCTO35dp9ad9rWNW3keYTxjr\nWdGzKSzBSjHHHrS+cse/dguOB6igTNOO6ByA244+6RSpIkbI+Wzjqen0rNa6dtr5BQcE4wCa\nk+0LJ985VeT6UAaqX7qy4cKjr0zk5q1DrckMZQDzeMdcGucjkbhlOIkOCSOc1ZW9Z5GLFRGw\n4GOaAOrt9SM0AlR8HoVrQt9UOwEttbpgnJrjlvPLYAsBkD5RU63iswzlQo5amI7FNYG4LJn5\nf4q07e8+0KSw+UHrmuDtdQSQGDeQ3UM3etW1v0hmVGfBJ/hOTT6EanaQ3aiPYnrkVpx6kZI+\nPlHdq4lroyBxHk7ecngk1es9QMnDcMqZqgsdva6oVYODjA6jt710lvqMV1GhYEH+9615xZ3B\n3RlSSWGTnv7VoWuqzRzYblSOgPAq0RJHfrdxwt8rgEnpnpXQQ36zQJlwD0615rHdO53FgwFa\nmk3j3EwG75l561tGRztM9DZnGCGyRzUtxbxTwh5BnjGawbfXllYpIMMBwwNa1nqKXFuFOGFa\noGjntQ0mVrlxAhx2NU/sE8LYZdpPHNdnGxWbpgelQ30KXKlCo3ZyGpbiOSjG3dt4wMY9TSMz\neYBjcM4NXLqz+xzFieDVVvlZiOcnOKAGmN3Ztv3KjMbLjPCmpPlCledx6U2QOyjf8oHb1oEM\nA8vJDbiTjPpSea6jaG3c85pGkTaNq55pcguQww3aqAkjLquMZOfyqTzxuHZv51AGZMheGJxU\nuSsgJAyKBoeJTt3dEPGfQ09ZAq7MZYelReZuUupyM9B2pY5BuRl65wR60hsuq25cjk4pN+cK\nG2mqu798SpIGaeGCtlic+tMVi0kjBvlPTqaVTub5upqux2tyfkI4p0NwJG69BQVYsINuRuwK\nkKjcOeMdagj+bAIx3+tO+71ODnikFiVQskJDruFO3AbePbFMGVOSeGp53dR2oC6CSQsxQABh\nxTDI5UK36Um487up6UxZWb93j5uuaBkqyDdhxyKFZ8nHzk9fpTROeoTLdKDMqsM8Z4+WmInT\nnPHI6GmvllJPbpSr5a/KGx360rqioVDDnvmkAsLDIBHakaWQk8YHqKapMY3E7j04pRIRGWPf\njFMliyN5eNrbuNxFODD90cEEn7xpZwvyYOOPvUm19wIG5B/DVrYPmSNIEYrt5z1okVuVzgU3\ncm75l5PSkP3SDzVkMVY90fHDDpg0okJjPyj3ohIhz9KVpOMx4YN/DirRFxV2sMg7hj7vak3F\ncE9c0sjbBlQOnOKaPmZXPQDpWiJJnQKQ4G4NxT0UMeDtUfw1HGxOVI2q3anlflUdFHFWId5O\n1ic805W3HLcgVCjSRn5eVp8eWZhj3oEWV2mQEKCpHIp5UDGB35FN27FK4wQevtSNI0Mx5+Uj\nqaAuLsj9aKb5n+zRQFz9AFj6dqeeOpyKXyztBPFGB/8Arr4jqfThtG3OKXPyD+7RztJ603O7\ngdKYCrtb2NG7IPtS/KMYBzSR/eO4cUDEX5sYGM0pXYAT1pfcdKHJbr0oENK9STxRu2rgcilV\nS3XkU4xhWx0qkBHgtxjilUEEDqBTwvPPSnfL0HWmBG3zDnAPajcNuDSbfMTI/Wl27VAIyKkB\ngUFs4JFORS3bGKeinI9KfjdkZFPUCEj06UMdpz1p7D9KZyeSOKQCq21c4pV5XkcikWT1XFKu\nVXFGoDo2JbNN6swoZSpPYUi5/GgQcdzTRjOTTztZTheKML2o1ER7+SM4zTu/IzQY8kUoU4Io\nYIReOD909qY2FbheKftJ4FLtZm5pgQNGsmQRlTSf2fE2NwXP0qfAXg07qAQKAKc+jwspwoqn\nJ4ZgcFghBx61tN8y8DFJ82OaYHLSeEQysQAtZV14LRVYCP29a77+HmoZDucYXigbPL7jwIpR\nlWIc+orKl8AhTlo8nGOK9l8pZBlkprWEGPucGkJHgsvwxgkk3PDxnONvGfWuQ8ceHv7EspCE\nVY844+lfVTaPDKoAAPrxXkvxm8N+XpbGPaDuzhuuMdqa0GfHesRmRHAyD/dNcZccMxORtNd9\n4ki8m4l2j5c1xOoQl4mJGGzmmiTmtVlCzbXdSuMisy8UeQNjFe9aOpQtIQVAc56VmXPlsuDk\nr3xxirRDMe4xvyEOW6571lXHy7wx2y9hWvMjLId5+UnjFVLwDy3KAFjxg1oSZqudoUgFyOn9\nar3EnlQnadxzzgVckjVtrFQpUY/+vVa43Izc4JGcgVQitMoZFIGRjqO9VZFaNtiAr+NXJA6x\nnOACOvp71WYmPDY4xgv60wuRPGiSAKcEck0Sboyu1gcnINP8seYjEZQnnNMaILI+eEP3VNO4\nFdHE8jYc43c8Yp0vlbWD8E9MdaQqWzt4buaimCjaSCTnmmGozeUIGwvjpxSzZkCo6HGOGPSp\nNzLkn7voOtQMskgWNT8u7PNACsWVgW9McDpUS46yYKdmp8jhnBAPythgO9Mdt0ZyvDdFXpTE\nMdyWBC/TFSbfmAJyrDkUhjbYpQhl/vVHtBG5Ths9KBj9oVtoPB4AqPa6hW6N3qT55Fy3HcYq\nLjco3c9zQMJCNyiTgZ5FDEnJ3AKDxmkXEu92G4qePelkwWU429wMUAJJnbtLHJ5zjj6VGuFA\n3Lg98VLJEjcsSr5654NQtKY3xjdzjdTESLMI7cnGXDYC0M207nwwx27UjKUB/io3Ksi4GUIx\nj3oF1GMP3ZJGI+v1pZPm24Xg88Upk3I4PGDjFLHtUKX+XtTKI/mbLMPl7UbgORktj7vpSqzK\nvHz85pvmFskjD+uKkmQyQoQNqnngYprgvCoI2tj71Sj94oKrznOKPlz8zf8AAaYDcndtIGSK\nbtUAAryO1Iy4yCfmzkGnMqqUGdxP8Xr7UwDfltgXP+1TZG2gBRl880/5d3A2imtu2l8YIPFA\nxjFdxZUwemKJCSEcDOOq0m7ozDIJ+8O1SKoVmXG4kfnQIh3llO4sOcgUrOxCZbBYc0shbydg\nHPr6Um3dtAGSF5oEL80bcEEetAwu4Hk4oYbmjVRtzzQxLSDBz6LQAbBtAByepApgYKzYTaw6\nGntuj3FFx2NGNvAGGI5oGRodzEkc0sapuMf97qD2poj2sD1U9aVYlaQ7SFGMgmkAsZB2rxkC\nmMdzKFGOe/anMq/eH31GKR23KgbjPegBdzCT5eRnApnVdjfLjijafnRVyFPBFKqllOc46k0w\nCQAMqt8/0oClFBJ43Z96cykrkdMcGmMQydfmHNAIcx/fMVXIPXFG7dk7fLPTNMZ285SowzcY\noVXZieB70wYhRCSpGR/e96C3zKzfexg/0pNyh1DHNO3Zw4IY7up7UCGhd6OrZHNPZf3aheSO\nBnrTS2xi7HknAamhlXLbiWPH41LAczHj8jQjrLJ8xwnTFHHCqdx75okj/dknhs8CmArNtDMF\nyOmaPMU7dq5PvTDnO4gxjHPenbmk2lF/GgBPmGCxwS2DSDdxhuex9vSnttbIP388ZpuDgZOF\nHFACRyGNTLtOQcA+lHRufkB5x1p4YNtKjOR07UzajNkoQBwPrQAm0qNiDA7n1pWBlYAdhxTS\n7SRGIDDA5zTmQ+ZHJnauOcUDEVssxwQQOCaVmBMf8TnqaTOQcDgtimc5H8Kg9aYD2UyZL8tn\nApzKSxUjjHIokkJ4C5b0pse7czM2AB0oYCBQ2dsbYx1pcptXKljjl6N7KpJbC9gKDKnQfpSJ\ntqJt3HG4jHKsKY2DyDyeNvvUw+ZTgYP60nl/MMgAj86BjXUrIyEYA4NHQAdNtPBUs+Qcnksa\naVdWzkHcP0qgG/dVQw/4FS+WI1LD6UseVUhzkdPpSKojYq3A65qRkcak7gx+9xxT/LVdu7J5\nwKRsNjacjPWhsnG04OaAHK22RgeW9+1N3fK6gZOO9OaQhmymSRjdQp46dPzp2ERk7jgHbx+d\nKd2F3Y9PekXcy4KdDkNQf3km7tQAseFZuPlA4HrSA5IJGDj7opWOSQy8rzTTGxYSZxkfd9aB\nijg8/KPagZjJ5zn1p2194woAAzim7i7AsMMTwaQBtJUgHIY5pdgfa7g4JwDmkVTsffwc9aTI\nVWBJxjj0pgOaR5GdmG5c7fehoyuFwB3JpGyIVJOT7UcMFG7hjjPpSAHP7shW2/WlRfNIA6qv\nakZBucA7scAnvTV6AjgjgkUCF2MxWTbjPGKSQ469QefWnfxgRngdSTxQcsTgBiepoKANukDI\nQcdTUe35mbPLHrTsqpKR4HqDS8uMHavsKBDdvBOMkdOaGYjaRhQerUgbb94bhmnoQdyFcg8j\n2piGc5AxmOkZVjUkNljzijzHkkxjavRvQ0KoZXUY455NDAMhVBb73U0nJhLYwc9aUPsZQBlW\nHcUrSboyBwM4INACbVGCn3u5NC4HK8DuTS7vJUADO7jFDRxxspJIOOmM0AIFPzMDuXFCqV4J\n6jt1pOR05Y9gaMHcQRk0XAAuSRv9iaUru28kD2obHZMUfOuEB2t1pDEZFB+Uhznr6UrKH3MZ\nNu32pF3bSCBhjk4oaM7jsOU7r60wFBLYOQPQCkZRG2Bgr94/WnR7pJMn5do9O1RMq/eXkZzS\nAJvmbAU9M0NgKhPBbinBhktjluKRR5ZBOGwc4Pan0Ac2Q2QQSOCKay7VLAgZ9e1Cf60uF4br\nmk8tZOpJAPIFIRJyAhBycc+lM5bccbSKNvtgdhRyAT1HSgAwvlqNnI5LUpUmI45GenehY95+\n98gGSKYrNgEj5c8GqEO3ZKlhhfSlb5+F6N3pvliRSd+Sv8Io3FcHAX/ZoAco+VjjCjjPqaI1\n5OcdKbGFZSDwM5FKq8EtlCvQ0WEDZO0Lxg5+tKFVmOeO9CrJHht6tnk0gbdknkk0D3BlDeWx\n5XPaldVMh25Cn9KaVO5gCNgHSjnaP4Y+9AgZW3BV4bHK+tNaT5SCCGHH0pydG3cP2PtT1B+b\ndxigCPJ2rjhG7+tG1/NIHJ9aRenI6c0sYYsASBnk0ANkkIbIGT0NBX5QQcetOZflJUZ57UpV\nCrDBAxzQUCljIqqMqvOTTQzKNzFSCcAUKQ0bKTt2nAJoaNU3YHSmhAjDcST7E+9Kqjeec4HP\nFJ94KxGB0wKSSUh9vT1NQMXyz13EE9KRs5QHB2nmnuzNzjtjH9aT5eBmmAOrlSRyM80i4XOD\nkAfnRuxC53c5wAKVW4ztwR2oQCCQ7d38HoaVi7YCr7n6UAsY2JGA3ahWPlpjg560gBleR/lO\nQetMkXaAh+7nkCnMCzEY59c8CmrlmCqMsPypiBtm8LtOMcGjlfunkigEqQpxxwc01lBbHICn\nBpkj1ysYBFIWZsnpxihvmjPYZpM7Rg53HjihjFWM7wo+Y/Wk3qrFcliOmBzSkBYyuPmBzket\nAIkYgDMn5ZpgG3jczBYx2zQzFTtHJPU0jckxkfhSncrfwkjqKnUBFZt+QQqj1pzPt2ENz6Um\n4K2SPl9DSFj5gOOTxwKYChH+/wDwj+KjldoAyW9KTZtyob5c5x6URttPy9aBXHKpXewIyo5o\n5HXGMZpjIGU9SGOCRTygMgGMoKVg8xFUkFlHDdqRshVK5c9cCnbSrHPQ/oKRZWgbKjAPAFAr\nitvaNcDHc0jbTlSdp6ikWPLHc+znNKxAb5h5gHpQFwVtsYH3mY447UpUo+ck44xSD5Q2369K\nTdzkZJNAx3PBzt9qRZNsZ7tuoVWwcx5H97Pel/h4A39xS1GEgJwrLtJ520gwoEikfSg5yq+l\nJ8q8jnJIqvMByq7RtIxyc88dqVhtBbdgnkU3cqggluKT5dw2k7cfxUwFZcqCOh7A0bm6hMEc\nE+lKpWNcDkg5pq72zjg9aBDlz1C5WkOc5xlc9u1Kq7v4tvrTVUx5UDIzk4oAVWCq+ByTgUrb\nmxskGAMEY70hVSwZl46ihdzSEj5U96ChPMjaMApll60uM5KNxjIpwyrlV79OKjxujIPyFWoC\nw7d8w3jAx96lbO3GMCkbDY+bj+6aEwwLEY7c96BDkY7STzjpQ4KgEcr7UEqq5A2sDRv8xsqp\nxQA8n5hkAHHQU3ncOcHGfpSZXaSTufPHtQ2W5HHr7ml1Aa0arDhzlyeaAoVRz81Cr8/zEZ60\ncqxPAPWgkcyhWUAjgc01TvZgMgNQzcENweuaUqdhYcLjqaEMaFKSqwOWxilVR9zdz39KP4eP\nvY4NNZiYeBkjvTGP3ZyqfK2KRgcDuMc02RnEyv8AdyOGpWyVZ2bjuR/SgBTt44zntS+YsL52\n57UhXIHdSOvehiVOB9wcAd80ADFdw6+tG08sxwcZzSq7GRUI57mhN4Zmk+YZ+77UANPb+91p\n3bDHd6ZoLMJCxG1McGmkDcPUjjPekAsbMoIZcjPFLllGJPlYnim5fADnacUeW5UF5N6UgEXK\nsT1Ofu0sxLbSOOcml2DzCQPmo3GSM7vWmAsg/dgng7skj0pGkDHA+YnoKdGsTbSfu9+eKjaM\nL8uflzncPT0pgPYs0flFQpzxTP4s4J7gVIqluTx2FHmK28kFFHBb/CjoMaudpJPHpTlKbRnP\nHJ96Y2PLBzkr1pyqzruC49PcVAhVyrAKe+4e1JwrGRNwYn5j6/SnMzLtULyepPpSruj4bB7r\nVDGqA25sEHGQKFwrblByRz7UqMzMHUbefmP9KXlywPD5+77UwGMd0J8sYBPI/rUqlpJFwdmB\nj603cy5ICg919qWL7x5+bqqmgRJtO7P3V6H1qMAMgVOgPANAblZM/PnBXtSs3lsxx87dqYhQ\nWkY5Crt9DmmMHjyT9c0rKd3ytgY6ChmCxjD896LiDptOdysc5obbtyQSc4ApWjZkMijdjkUL\nvaMMxBzydtIYxshTgYJOTTmjLRqD8jbs0D942UHGORUiq8mA45HrTGLH+8kkO3aD/Okb720p\nsHdaTzBkDfgg9RSbGkmIDYbrSAVldZGIwPlyfpSI/l4LDcrDhaajNkj727qalUFpAyrkIKro\nAqhWx5ny0LhNyspIPTmmyMOWJz320+NTIM5+6M4qSRcDcAeEJyacsa7CGXgnhqjZtxDY47r6\nU+NS7FhwMcc9KdxD9xKrjKsDjB605VddwzgHnrUcYcKUBw+Ovan8NHl2yRxkUagCg7sgfWn7\ntjbjyvSomAWEMNwbPen7WPJGT6dqYDlYurEAE57+lIhVtr78cU4YjYKcHPpR8qqVePAHC7e/\nvQLqCnncg+VvuilWQliXYcdKarNGCDwe3tTtoGGC7R/F70wGqQy/KMAUj7kJVT8wXd+FLy0p\nAGARml2hiwHXofpTFdjSCoBLbQ3pQmOhbgGm7kVWyvyg53VJt2nYcFWP3vSi4/Mc/wDrFKuN\nrccVH83yhsD3ApVUBvKz8wOaVmLbSONp5HrTGI2GT7pJz0pQrRlucZGTQxcZYHI659PahWMi\nY29e9AhnzqSQrNzjAP61JvO7DjJTgH1FRsWjaMMfvfMcU/ADPI7ZBpCEZixjxwrNRJIyXTKE\nz2yTSNjg7uP5Um1G3ktnYeG9aCibzNsgj7rg1uaZdC1iZjwWOfasSNo2CsPmY/eXvWrDsjjV\nW4z0AqSWdd4buM7zI3CnIwe1dO1/bw/P5e761w+lr5Y3Z2j0HetvUr1JLF9o7YHNSNIgvrj7\nTJIUXaM8e1Y00m2PJyf73+NNW8lATP3ehqvqFwkbAocjoVpFlyG+T7gb733TSvOGbktleDWK\nsoUP2HULVq11FGUK/HHSoZSNLzGaMdsd81EJHBypII7561AlxEYy6H86u28Ec0PmjkKOaChV\nuo47csSxlY8KadNcIYAVkxJ3B7+1Z1xJs3MW+lJZwSXcZMZDFT8v19aYE4nJjypI55WpVMnV\n1LFec9eKz/thjbDgMc881r+HtUtGdlnYgscHI7UhFKSTPmKAflNS6fYz6tcGGEfd55qHW2ij\nuZDazb1Y/MfQetN0fXJNLuHlhcbyMZPQ0rDLOp2M+h3CxTqH3cqVPFVJLh5mOGKsP1FNvtYm\n1S58ycF3X0PH4VEZuisNjNzmqEXLe7MjlZHwQM/L6UxpF2M4O7PFUoxtY/3j39asrGVhGPmy\n1Ax8lwpjG0FDjk01XKKHzuJpk3zbB26H60bX35ACheKBEk03yggZLcMaiVQzjJ+7yKYNrMyy\nHHP8NAhA+XfgdqWoycyIWAifcy8M1OnZ/LbnC+oqJWSMqcZK8HA605d8kZJHyk8A9qQiRSWh\nyODinQXEkBEoXpwKjk5UEdf7tRx3EhZgnQDpQPoak2sSSQ4Q43cGqEn3WIlBPTpTF3iPn7/X\n2qKP5lyw2tnp2FAi0qmGfDEYA6019rMApzznio/OVQTncX60o5lBHDL0X1pjIZnJKMBknsD0\npVaT5Rt+XP3qJGZmYHbGPX0psDFVKFThRnJoAtRKvzbmIPqKjYI3y+bj69ab8zQkhsL94g9a\na24qowvzc89cUxjGb+FW2svWp0fbxuB4z7VCD5jMpXJ7c9qTcPJbouPXvTEOJeXLMw2g447U\n5lcLvWTA6YHeoVmfaMqsYz0qZVBdt5zx+FIQ9pWRlJ4HWomma7YtwqKeh60kv3eG38dPSq7T\nFGTpkd6Bll5kXJReaiEm7LKNnr71Ak2N25gG3ZqeMGZjuwgxxz1oGhxk3ZTcQCOMVJLCFjBE\nigNwSexqrNugYYYbcY4Hf0qGM/Kd4YrnPPrTJJpncyRxRksFP3h0qeWUSHY3y/TmqiyBYZDI\nhT0wetMabyY02HLsMgUagWnl28HaeOhqBMGPDnA3ZqESNtCrnLHnuBSLIHJIJ3DtigC3JKXU\niOQcHsKjUYzHkluvPeoB+7j27sSbucjqKUXBjY5bJ5pDuPkzIy7T908ila4CyMFO5upFRr+8\nwcbfpULQN2Hzk/e9qYiTdIyK0n7sMSAQM1Mj7W2KFz0OKqSNt8rLmPnhutDSAl2yVGdpPb61\nQtSyz7IeezetOuZD5YzxxwtVUYLHtz5m3g+mfWmK26TndIvv2pMRb3jy1B4Y9aTzdzM+NqAb\nef51XW6bygFCt3+lLJIZkjYkHd0A7VIx/mH5s4OB1xT2aORACux8cZ/nVeVpNxEe0MOOaNxZ\nhuHGP4e1MC0zbY8NTY+N2Hw687areZwrr2OPmp6zK2VjXIblifWiwy2bxpFCsFA9PWiGfLHc\nn3DkZ71VjYcnOCvrStGJgXDkADOemT6UyWTtOZiQy5Gd3sKiacNg4xzn2NN84LGCT94dqbwS\nqs4K46+lAizcyPIiDHljHI7fWprWTy4VbG7jG761VF0ZtrAZx8o3U8v+7dV+bAyDSDYtxjav\nl79ozkN9aJL6TG0KAOhZjjNVI5HLZLdRhabPJMvyud/fgUgNCG6dV6DzivDGren6gIlIZSXX\nndisVXnXaxKjtg9am88p+6Iw3881SJZ1NnqrSKw3HBOM5rStdQdYz5XOODXFw7IZC0Z2E9Vz\nnmpo9UEYGQ6knHXrTEd1FfXFqFmRjjp6gVt2esGZvmAG1eWrziz1nbA64bbnAG7qa17fVGCH\nH7sDrnvWi2Eeh6fqTSHDHbG3INa8NygkR1fJAzlTjjvXAWerCTbulEfGB71pRXjJyH+boMGq\nIsejx3iNsCNnPIFadnrBt4ywGRnBFedW+oeUyOWIC988V0cNydqsBkNzke9aRkZuJ3djqpk+\nYS7hjha2w6nB4ycV5vp9zJaXCYbhjgg9q6qxu903zuCO1aEG5qFjHPsI/lXO6tp72JaRT5kR\n4DCugt7jzGZQd6KM06eM3MJVwCG56VQji4TuAZXAKjljSbXVdxbI6gVf1bSfJkzGp8r29ayn\nmcSeV0YDPtSAczP5igAHjpT9xeVcjcy+lV03vJnOTVlcLyOD0680xjZJFWYDBY5/Kn7huJxj\n2poYDnG4g9KBMAzMEOakYqsBkp8qnqKk+XIKHt+NMb5l+Rfmbr7UiqSAQcFeTmgRYHzAL09a\nkUFjtBwvfNQbXZmYH5c0vDLsDYPemKxLnOTuB2nHFAbaQF/i43UyFdqEKN1TAny1IAAHOKqw\nErsyqg+6wHNSeYTGN2PfiqykSLl+W9aeQ/ljB6VA/Um8w55O4dqk8/p8u5e9QrIJHAHXHNSD\nETEOeP71MLCtIOuMMTwKjkkbczMNvYYpWmXjjJHIamNL52A3Hq1MYzzH5Ctn3p4VGBcNnHao\n2AjAZRkd6k4YZTA7kUCFV8DcBlu1THZ5oYj5sDK9qrhmjIZsFPSnRszIxHDZyKAuW92Q23nA\nz9KQyLIi88VVhlLbiTj3NLGzL/8AEmkItEr8qYJHXNTbjyQ20Y7VQhmKyHccgdqnaQHBVsd6\n06Ekw+bG7IH96kZfMbAfA9ajZt0TE8Nnikjj3YZWx61USWTrIqsqs2QeAe/5Uqx+Xuw3IP41\nXXElxukGMcD2qVm3ZK/NWqJJfM6EDg9aVmGchc1DuaRgVXg9anUrIwBOGFaCY7aZJQ2MECkb\nCsCDuHTFBiIULu+bNKcRqExls0akkiyFV+b5B0p1sgLv5ZzkUzZuU55I7VIqsgQgBM8Z70wL\nJ3MoB5akZdykNyo5+lMmcoy7T9aGy20/dBphqP8Asz/3hRS7D/z1ooFqfoIyHb1pu3gA09ct\n14o3bjgL+NfEn04xpNinApFyvHc9aftJ4IpBG20nHNAxEOcgdadGRtIx0oCjaP4Wpdu1iM5H\nrQIa3TIPFAJC05gOlJt28YGKBjeRyKl4ZeTzS+WNvFI2No4xTEIyleetJ9RilOWIHanPhzTA\nhGOnSpPvLjNA2luRxSqNuaAE53cCkUH0pVDDqOKbnbnb0pALg7SaYW4xjmnB84B6UMNzZNPq\nAzJfkDGKcDuwTxTmUquAc0iqO4zTADubik2+1PZeeKGBwOaRLGDsppxUZ6cVIFC4NNyBnjPN\nA0MwMHFN3dPUVLhSuOh7VH2ww/GkOwb+TjpQu7b1pP4cdacVKjgUAIwGwjHNJt4yKkVSvWm7\ndueKBCj7oFLtLAYOBTY26ginIu33HpTEDLuH9aacL71My8f0qNccihgJH23Uqx5Y56VKq5Tp\nSdTzxR1ASNjk8e1cR8YLFptFLKgb93npyOK7yNd3fBFc/wDEKESaBKc4fGB9O9HUD4M8VW5S\n6lJ+VD+Wa4LVkzGdnrgmvVvHtoBfTBuCpOPz4rzDUlEhcEbT6VQjlL+Aj7oFc5efu7o8+YCO\neK629URhcjGe9c9fQxr8xG7PPFWiWjn7xx5gUHDDmqs0btGWcAnquKu3VtHJIXRuB1Y9aqt+\n8bO7BbgCr9CCg+PMAbjnJ96bIwUu7HC44qzcRkoQRyOvNZ8knmN8q5ReOa0ArTQCQockZ5J9\nRTJFCnGMrVt5JZoegEYrOupJMRtGdpbsBzigQSTBgG24x2NV5v3rbvvN6HpU7bpscfL+XNMl\njLDkDcP4QeaaFYhUqZNx/dgD5h7UNHu+ZSNh6GolVmbeSNnTFLGivLsOQV+aqsURkbW2sOez\nVGzFZv3qktj746VLzJPuK/If50wKzEoeV/i9jQIgbO1jkBj2qNNyxjbgEHnPepNqyMBnGDwa\nWSM7GwMjOT9KBDRJjJUkux5UjrQzNICjrtzwB6e9ELblRshG67ad5bbiTzVDGfKYyiMVCfL0\n60mW25UZPrTwu4lWIC9M4pFt3jVlJ2ntz2pCIOfvYxk9ac2WlLF8r0+lPUl0Crg4PftTZPmh\nZuNoPLDpmmMbu2zDIwQOCe1IG2sQ3zt16U/uGJBO373pTJHbAzyezetAXGg4zzntRIzRkqBy\nD1p0ar82/gdSBSll8zcvzbu3pQBAZCzHfwP7xpSu1QD+8AOc0E7c7l3DORS/8sztPHWkIRlO\n18HbgcLQoZhnHI4+tBP7sluWpFkSRfkBDDnmgQih9pcsAM42d6arR7jxwR0qZtq7XUZ7Godq\nuu0jDFutIok2nyxjj3pjAsUAXIH8XoaJNqt8nzqpwVp3lkkhW2jrn0oYhjKxYrn5M5J96a25\noycZpzyKFDOMZ+XaO/vSNGWWMrkDvg1QCBduBjJI5FN37GDL9/NIRtm3E4AHU0iyIzdyhPXv\nQIlkYOxYcBhyPemeWhjyH5FEfy7sHIH6UiZ3OcgdqBoRmKjdj5ugFKu6JwxO7ik2+WOcr3wa\njj3NExB6NxQIeMrlm5Vjk+1B+9lMtzTpNyruboeDSxSGPAAzgc0DGNu87j7ookiJmX5flYcH\nNByzbidq56imrEz7jywzw2aBB8qtzkMOB9aRyOSzhl6H604eazkP1XvTMgPkR9eMUDBmaNvk\n+UkZoZvmUjp6Cl83apAXIAxTP3mQSeMdBQA5Y1O/5sE06NQjE4wduKQo3B4AxyKQ/K2087hw\n1MYiRyNhkYDHNOJ6DHJ4pke3nAO7oKVozIpbpgZApBYZxtVgAQvX3pzoN+G4Ut2obb5QCr8u\nP19aRY5Fb7mSeetAmK7Ns25GO1CEyYUDaR3pDv2pu5GOw705mK4jxg+ooEJnfkEbfak3hUAT\n6c05lLZGQRj73emLF8mQ2fWgB7KVUKGG3GcU3BZi3fGKTYJFAPFPyNx+XgDHFADVbbGoYfPQ\no8s7mPA5xSgbeNu/0akEnzEk8YwfagdhDns/0xT1G7Ic4GOKaJFWRMD5aMHzA5GVPagAULJg\nk47fWjaPLO35iDRtKqTkKueOKSRjnCnafUigQNtkbIbHYntmkfP2eP1zQRtjC9D1wO9Aj2hT\nuBP900DFkZZG2sdnFJvHl4Ix2pH2fKpcZYZC96eoP7voWI6npT6CEx8u0kY7U1flUgDvzSpG\nWJBbd7UmM4KnOOOfWkAOquitjbz60oJ5LfMByPWiQASKH4ApDgNuVTu7D2pjEkcBwrfxc57U\n5VxjPTHeo9u1f7oY5204MVzj5j/dpAKrIzK68Doc+tDKTIQ5P5daRVVY9rjGWqQKCx2Zz7mg\nCAKx5C/KOlKW2ru6t3qfbz+7J29800x/N7dTTEM3FMFjhTTdxb5RwxpS3mMpYZUH86cFXLyK\nhDf3e9AxBuZcb+ncVCc4BUZLcVJw2Ch2jrSxsAQVPI4FAxGj2gsZM9iKJFBTa3GOQaRh82du\nO7L60jYaQNtJwM9aGIMhmXlvTimKD5ci7iWzxUmdyszHY3rTcFlDZ/BetIY5Q1xHhvlxTG+Z\nQE5wacN5jwG6/wB7gmnNsjPQZA6UCFIIbsfSmRA8rt+bnBoVEYElucZo2qqgg89TQIGzuCuc\nD9aAFWPgbj6ZprAcM2DnpSeSsjls4TGPxplDlYbRnkfSo1K4KkFDn7tOb5Y+Ww3TPrSLnpuG\nByKQhx3LwVA9DSLyoLcnPWnBSrYPzDqaTcH/AIdtAgSRVYgnI6gUE5UvjG3nHehgrRkD73ak\nUBdx5yRigAdtyhj1bovpUaxiKbcrZ45qRWIhQ98daTlvm2/hQMaCWZiF565p5+ZRkYpARIiY\nO35uaQZZmCnB9aYriNnzPmG0dqd82wFzkjpQ0Z8tQAD6035umc9qQChhv9D60v8Aq2Yg7i1J\ngnLfdwPzoDfKFAy+M0wEPUIPlY+tOBYtuccrxSBizYI6UFWJJJ49aQBGRsP9/wBaao8tSDyQ\nckD0p/H8A+bHSiNvmJZcOevpTAazbv48KegpVYJEyjpSLJuVg3y88etKVXd9R+FAw2+ZGCTj\naMimf8swxGSTinKxJ5+UelDY2nHPfpQIau485yPT0o6bsH3pSGyu3le9IyhWXHO5sbaQCyBt\nvDZ+lJJn5e5B59KduyW2qV2nFCxs0ZYfd70AI20ycnAojULIV5Ze1II9sX7ttx/umhlLlAG2\ngcmgBdxVTtGPWmsAxB7YxTgdrFjyrcAUsiiT7uOOKqwEEilsqPlIHWpJW37MnIVefp60pYbd\nnf1prRsoUD5yeM+nvSsAiNxt6Z6fT1p6n5WAGTjApQxXBGCMYz70zIPzKfbNUSTKEjh/vPjG\nKWTY6ghSoVec0zzCy5jG1l5+bvUu4iMMerjJX3pDsVtwb5c7D02+tObG5g45PORQY13qxjJf\nHzU1d6/eGeeAaBAdzxnaOf6UpiZmV24HYUM204HXrgU7aXweq9waGNCLlWJDEAnGOwowsbNn\n5geKXd/COEzmlXEgIzwOR70wI2+6UxuJ5xT2cs3A3KR96mLIGDHGT3pPmZVLDCk44pXGCt8o\n3HaF/wA4py43A5xn2pG/1ZVugNLz1LcUAEcZ5QLkckNmmq6MpBX5hRwjF+VHQCnBdyk4C0eY\nhEVVzz1FCttjJ7+tA/1Zx1A5BpqNjjbxjNLUYoycD8SaXlsgjgfxUZKYIGcihgyxgHODQSxE\njzkZyaPL2kAHnPFB2rg9OMUuzaoC8t1piIgp3NuOeaWRt0g+Xr/FT3LfMRgH3qPaNowc5NAx\nzKQeeaGf94MfTOKGbacD72OlG7GNoAUjn60AOALBsDnt60NGu4bjh8Z4pqhztfG8j0NIu5tx\nx35J7UgDazL14PSl+VsqDg4pSNvC/gaGysfOGaqARmP8QwMYBpWkZWAHXGSKazA/KF4HOKVl\nKzbz90ryetIBGOIc84znNObb8u3pjtQsSsuXO3jgU1gExl+ccHFACq2GAQcHgegNEqvN8h+R\nl4JHemk7VHbnpSyIBlhuPp60xBtPAUgkdadE/wC8+Y7x3J6CmRk+uGPpSruCMjLlT3pAKVDS\nfMfl6gUKV24OVbPDGk42gAZPAJ9KdxlhwRSFYZC43OvU57098+XgMAO3rSOo8sEYy3RhSZDK\nARjH8VMY5v4lyRs4IHc0m35g3VmFKjBlI5G7+I0yNCMkvuKng0+gxVYrIwOVwKWT51IAxgbj\nQxLYffuycihizscnarDGaQAVcqwA4PSjHmRrjg9CP60m7f8ALjIFOXK/MDhj0PaqQAY8Bm28\ndBSMOEDenP0pRGWjYk7moA6DdhsdKRIm3YxCjK9T9KEI/h5XrmhX3ZFAYeTiM9TUsYikbgd2\nGz0qcEtICSPVj61Dtwu4qOOKMI65b5WqhjlDGR2D89QPQUbRGgLAMW5zSecWUeXwe+R2p2Bn\nGPlxkmgeo3y/uk9+5oHzufQfw0Mr/dByOq80MvOQMnvikIdyykFcnrmkDHofu+1JzGwA4Zuv\nNObhgEYAdxSARhsbCpuGP8mmtllUZ3OOTTmk/izg+1R4G44JPHUUxXHbDIuSeScfSiT5iSRt\nx8tIN3mYB3KKXdtbrnNMQ44aRcdhgimrCSjHfk9aMkMW+83QmjaF2jHfNAAjfKPl+Yc7qcCi\nt04J/CmEqqtyT3pdyrGBjIxmgYp/1hUgt6Y6UmS3U/KDxx/So45HmQ/I3XOe1TToFlQ5+Yjn\nHagYgXhsdMdfekUtsCZw45pPmb7hz83GaUltxbOT39qAAMFwDkN1zSNvVvvc96Ukna5G5Pej\neS4JGAxwKBAysykjoozS483aH44yKNpXdklgOMYqPb5cZG7fzQIkfdt+YfITSbQAQTk+vamq\n3AI59RSqN24KPfFLRjEO5FEecO3enNGVUOnzKvU0nmbtpPBzjPelKgKygElmznPFADm2ELtw\nARnbUUanyiGyfm+76U5tmwd+eDSr+7j65JNMA7bc5b1pcSswOQyDtSbfmPbjmk6R459qBiyL\nnHQ+tSs25t2dqqvC1E2Y48suD6UY24XGXf1pCY5pTIyDbgsOSKcq4yW6jimcheDkpSNtkZPm\nOTyRTGK3yQ8PtycVIoO4bF39jTWbahBXPPSniQMQq8Z7UAG47znbjoRUUI2q+SevGac20MxX\nhRx+NLGw3EEckUCHR/NCCQF7laRWywJ5pPMHOOW6Ck+6Dj7x7etAIcp+cEDOeCaVW2swA9gT\n2oOVyAuTjP0pURSzMuW7c+tNjI0+VmJ3McYOKkX5Y9xwh7DtTY1Kqdpye+KQKZIy7DBHrSJF\nXCMoByWNPkVhvyckcg0i5fa2FBA6+lG1uedy9TQMG2SKTtCnoR/WnKrLMODv29vSh1XzjuHJ\nGRTI1eQOH+Vl9+1Ax7LuUmNgwX7zCnqhjTzB06gUxY2WP5Tgt0pZDuVTnA6E+9UiQWMSAkHJ\nbr7UbGjVhkk4xj1pNoVW5yPUdqc0e+QbH2gLktU9RBuHyBVLDHNOeP8AeFM7SRximqRGFBJO\nelScrIPMGST+lUAg3wqh3bsnaaVgFYjbhc4z2pv+skzjaOwqVSSrZTK54+tADmyyOB85HSk+\ndmQvxxg7aYiuy8ZXsadGzRkID8nU5oASNz8rbd3bp2qTzGYsI2Hyn8RTd+ZBt+ULxinyKRkk\nKc/dZev40CB87Q/De2etOOSwz8wxTWRWUh1ycUi72wz84GPeqQwcMueODSsybuBjv6UisFbP\nY0KBuYnBj9KQmBwGCgKYXHLdhTtoET5PHak2/IQev8qcOgLDcv60wuRswK7GGG25ElPhYSMp\nx8oH3velUCYMGBBXn/61NXbtKoCAefYU7gIo5JD4BP3aSQvuGOSD+FSkeW+AmTj71RE7uA3y\n5zmgNRv3miA+bd1pzArgbR6mmN8qr2CnPFOYYUE5O7p9KQWGNlm3H5Yz3p9uG2u235T/AD7U\nbSZPTA4GaUDbJwSVx92gB8WN4cLh+9aKqvysTn8elZ8e4MAxyR/FVxo/3ka/Myk87akRuTag\nIYVDN5Z29ac1/wCZarl+aq3UipagAKcrjcw5+lUbe8EceGX5V5ApWKVzZsbYXXBb61R1iGK3\nm8tW3n+8O1RLetsz80ZPPHamQs24lyrE9c1DNEQtIB8ynJoXAbkcMMbqGXzZ8D5Fx+dIsY8z\nYS2BUlEsedmF6rkelWbG7mXIjYkEYIPeqtuvz7C3y5z1qSTcrkIASfSkA+5WTyw2fm9Kkjvn\ns1Zo8h3H3ajjbbGd2fdTVab5mI3ZUcjHWqAkVmcl2AUsaao8snaCGz170yRgqKd3AOdtP+bm\nbOc/wUgFkYbRxls0oUrAVAwDzQuFwSe/SklZlXLnCA9PWmMmidUGDjOOPenbdzbn546VAFKy\nKdoZTyB6VIrtJO69F/vdgaQh8m1ZEAOT147e1Sx3f7tWUFQT0qOVGWPMYBHfJ5zSRny5IzyF\nxyKYh4mLMB1wcVJIuFwG781U3M0hYDejN2qdMRrIAN3oKChk0g52jDHgZ6VctovtMqLL8oxg\nle1QJAfk8xeByv1qcSCzV3Y8sTjntQFia6s1h3MvzBenv71R8xmXdtOCPXpUwv1eMKDlz94t\n6VDJcQspCErn1pASpIHt9hOJvWmRXAhViCBtOBUKs655wMfjUDXBWFkZQSzZx3oEW/MJjLZG\n5j60NL5wY5ACjtVcOSoCBeRSM3lsqsdnqKNwLKDBXJ6jPFPlYSKMv0GSwNUvORtyq53D1qsJ\nBuGxwuOtAMuKDu3E5Pb6VKsoJULJlc88VReZ5FLt8uO/tUCzHYEU7d3I9aYG3IqLAysfmz96\noG28hWzxgGs1bgpgOSeeCetG6TDFmx3p2EaG350ZG+f7pqPISRxJ8qjjb15qpb3TMMRt81Sv\nIPLyeq8k+9OwieSQblRuwGTUrSqImwwPtWf/AGgHjZduW9cUxZB13YGKGNE3mfPkSbQoJxUb\nXG6HYRtZuRUSqZI2cA8eveo1mClOQzHse3tQMsqrfKcKeO9AdhlW5KtgGqk0hkZgT5ZP6Uhm\n8tgeTxjr+tICwZ1aRgu4KvJ+tMkupDG4blOo5qPzNy8jBxg4qN5gsbK3zHsFFAFlZ9ygn5go\n6E0xmLYwyqzHIDVWbzFhO7qTn6Ur7pcK3p1pkk6s8bhd3Q8rUgmjRiFOXxiqTy/KAD8y9TTJ\nZkjhEh4ywHHrTEaCyd3O8MeTSM23ovIP3ev41R3tIhJXDqeGFOjdk+YttbupqWNF1maGQHHX\noaZHdSSsMk7up9xVdbkLGzkMW7NnIFRGT93Gx+d89j0oGXWmBfCt9zgt6U0XHnyFFwCp+6f4\nveq6ySSNlQM9D7ik8wbmVE2kn8qYi1vHVOcDHpUKzHoAQP61FLJMduWUhT2pzXC7uQMHjiqS\nAlVN0WI+SoycUbxJFHgFDx/+uoUfySMZXcvK09Z2KscAADGDUWAlZvMdixwBx9afuVUUFsJ3\n9RVQt8qkyLu6kd6ez7s5VifWmST7l4zwvam/MGCjCAVHG6q21zuX+8aarCKZflMxbj2xTGTC\n4G1mIPXCkdzT3lEcKjd8xOTx09qr7toKqACpzg+lP8zzE8xjhu3sKBDnYs2/7pz90/zpRv2n\ncFweKYNvAIK7uhNHG1NpJCNlvWgCZVxFuUbWTjPWnBjuHGwn7wzTG3tIf3iopORTJGj2nnIB\n5YUASKrRyMBwM53dqmjIbc6k+Zng54qGUny0ZWBTpjNJCpLKpbaM/dA6ipJJm2mNTnk9Fz3q\nXzPs7Lk7jj5mNU1Cxsx8zKLnFKzFlXeQ6dfl60xk4uBGpYKVOeKm808M3zjtj1qszKY8YO4j\nAYDgUrSGPygoJA9O9MTJ1YM2ASp/KtGG9ZJArjd6rWUJjPKQV2r79qmtplCkg78HHvVisb9v\nqCpJ/rPlP3Qe1btjqmNweQOOinpXFQvuG3Hy53BT1q1a3S8SJkunGexoEd/b6tH5iRuWYjkY\n6Gt6y1d1XzSSFz9zNeaw6onmbsMWz0zWzFrEskcZDZCtwOlBJ6Ja66JJQGBwxxg/yrrNPvRt\n2Dt05ryKLXRJKBgqSQd3aun0vxEsWVL5brg8ZrWMjOUT0y21R7VlA/i65rpbW8W4hRiwBrzW\nw1QyN5pYEdMNXQ2WpBVyzDHYKa3i7mOqOvuLcSjaCSprm9V0CaBmkj+cnoa07XXQsagDA6HN\navnLdQuhOGxxV8pF9TzvcWXaD82fxqbaEYOQckYrb1jQhD+/g+Y98CsNmPCNktUtGg+SRlQc\nBX6ZFORdvU7s/pUbZWMggHJx71JHHtbaDzj86QDgu2bKnIxSldqkNznrTFzGxIPHoKmUfMGP\n3SMH1oAXaFGVbginDaQMAA0y3j8tSJAeDwaWPG4AjnNSxkn3ZsE447U/eq5fP3uMVHI48wcZ\np/3lbjB7UxiLnbx07ipd2eEbBI71CjPtAK8dzSrtVTnkmiwD4yVH3hnpmjczEgnGKUKBGMDF\nNVczBmPHegCWQFQh3ZHemyP8hAx81NbiTjkZzijChuuC3IFAxWk3YUfKO9C/MpZTgdKaUcbc\n9D3qVlWOM7eWHJFMhjWUtwowMUeYy43DB6UsnCqOcMMn2qNY90Lg8nqCetMCy2Fxjp3NR88n\ntmnRjzF2k5x3p6N+82AYHU5o3AVkXyfkOCTzxSswLfKMKOKaPvnbyDS7uQAORV2GySFtilm+\nbnvSgkEFeD1HvTFYNwRx3pYUOehxmrRDsSbPmDbti+lOWbdkjgdBTZF/eFB8y9adDsVg3VSc\ncdq0RnyjmUxrtBO+njO37uTTVUmQsTnngetT5RI9y/Q1pcnqMBPy7uRmnpMpLFBlemDTYycE\nK2c1JCBE2AvzGgljlI257dMVKh4YY3kDPFN5G0SY69qfGgiZwH60Ma2FjU/Nu/WjcJVUBvmX\ngU5VeQ7cc1FzuYomKQx+6b1FFQ5kopgfoooHBJpFJbgCkb5hwMU4cr6V8UfSjehzS7mxuPSh\nWDNj0oY/KSDkUxjcEtlhxSq4UHAobdszn5aR3C0CAscZFK20KD601WG/BPFKw6cEigY7cPoK\nU/MMdqb97gdqRfTOaYh+doGBSH17n0p6KecnigeooGN8sUiqc5xxTmXJzQvynBGRQIVQVGGq\nNsgZxUvHfrTZGOOKEBESGjyBg0m4de47Uh3HnHFIp7mn1Al3Bu2DRg9e1RjPBByacpKr60MC\nUMPxpFIOcim/dx8uTTWk4IximIfuwuBS7T1FRo351JuYyA/pSGJnb1HNMb5l4/WpvLLfL360\nj/MvApAQrGRwDTkUtwetOCjg07bxnPNCEC/MORgihYz60/aWWkVTncTxVANGFyadwVyOtAG1\nvmOM9KXbsUg9aXUBHxgf3qHAC5HX3p3VR60xcNknmkwsIpLMOwpAxLkDn605Y+4NKihT0yaY\niWPavJPNZfiiEXOjzxkblKmtIrubgfhTL61E9hMp5UoeDSA+IviZaBdQnG3bg8L9O9eM6pCB\nM7n5Tn+lfQXxctHtdSnEibCrEAH0rwLXlT7U3YZB/GtLDOZvtzkbh8ijOawr5Y5B97Df3fau\nmvIcg5+7iub1KzMfzk4OfyFNEsw7m3hWPA+QZ5brVK4tt3YluxFad38rIYvmXH3qoySFSRkk\n9cVrEyMu8RVUSKcSLx9az5MoSMhQ3rWnfL9oRTER1yc/rVC8t0UBmO8MMBasXUrldqr3XPIq\nrcS7VLR8Mp6+1XAqyYRX+6PuntVaaNdpwoznkLQMgjk+0IMgKpb71RNDt3y79pzgtUsam3fY\nVyp5X2pjMeWVw46M2OlUMgk2bcqPlNNKsy7F4lzx9KcuFVtowAetR4LgFGwgOeadyRn7xiV5\nVMfN259qjZS28KMbuTtqWTdLxu3YpkmRnaDnH8NAEQXy+B83pTmby8xvGwVhzSHayqSv68Ur\nZ2dcqDQK5FsQc7TjPHtTl+chidozgU/Y3z454pip5keBxxxnpmmMa2ZM4+XnApnTCsMAcZBp\n7D5FQ5DAZJz3o3NJ90AtjkGjoISRGV8YwCOoqPauwoDtXqR2NSx7QOGLACmLhs7ozmlqMjeP\naQOMY+Wkz82JPmGOAKe6gqXzujXgVHjdtwcYPGe9GoxFUrnnJx0pqvt+Z1KjH3lqTa5ZVU8s\nOW/pSOxFuNqE9iT2/wDrUaoQ1iy4O4MjDAqNkVcgtjsO9SXDYK7SDx0qJN24sSAfXFFxWJtx\n3KBw4HftTEkwrsw2tnAb1pVJHKHJHU0Hb8pYZXPI709QsNkfd8qvh15NEu7dGcjDdh2pZEGW\nbGH6Cm8xqm3rnnPajUoTcrEhflHf1NDMGibHC44Wn7w0i8DGNxpCo3ZK5VueKLAMyFXLcuBw\nvoKULuyQduB3P50zcOWz9/pQw4KsMtnjFMkQ8vjBIA4pGyqkhct2PpTpCThW4xzgUi7Iw24k\nhug9KBCyfK6jPDDlqbt2lsNkZz9aQhXRQTtOeKHj+Yc8A530DE3dNxy7GlZfLEgJ3AUrMV3F\nVBJ6Zpi524dST/FSYhVUSLu3Z4ztoDcYC8kck0Nhcrt5x8ppvzRqQxzkflTASNfbvgc0u4nK\nhiNvbtTfLXaAMg9QacMxtgn5e+e9K4CjcpG4biewphZo8qOXJxSx/KrEc88Uit14w2KBjY5O\nGXHfFG4Jwx5pdqx7SB16mkVAc7huGeKYCDzGU7uef0p/ysSRyAMgf0pFVAWPKn3pGiIKpuG5\nj8tACiRpN2wYYf5xRuKqWGCx6r6UbmErEtjHX60RjO4Y+Zud1AhOGQFMn1z0ofd5YYnIzwKc\nHCqAeCOMCkfYshy+PRaBjV37gVbcRzimbRHJvK5ZjktT1ZVJUj5iP4aRG8xVKjII59qYB/Ef\nlwSKEVljOcZPFI5LcdWHNOWQEjKYXH3qQhNu9VYHoQu2hvmV9o2FTy1GSxVgdnGdvrSqSry8\ncYxQMbGynawO4560uRGxAXIY5ohWNVI5Vsc05cpjd8xHSmA3y953BxjsppNjryzfKo60feyT\n8rZ6dqcF3Z2nB7ZpCGqSMZJC4zjFLuG4bm3gjoaczSZ25DSKOfpSboo2GR8x6N6UAQsjKxUD\n5uuTSqM8YGD1zS+WdzAv+8J7ntQCi7twyg6EUwEVUyMLyvTNO3PuEZK+WOduOaPmZg2M8cUc\nJIGPDnigYrL8xWM7UPOaZGg5OcKvO3tUhXcx3cUw7QoDc59KLCETLbiDmMHjNSszPJwAOOTU\nYjRvmU/IOcUm7EeQMjPNAxyKSCpO6k37sH7qCl+YID93jJ9aBu52KpVhzmkBGw3Y/wCefWp0\ncqpGPnqPaWK87VFOUKp+U4/CgZPFu5VmCq3f0qG4kVY28s5K8Fh3FLHIzKyuox/eFVlXzGZi\ndopiYu1nVCo+Y/w+1PXLtuDfdBpI8cF84ApPu5K8g80hDtxZVIXazU5YWyeAWHPWmCPq+cZ7\nelR/LuOGYt65oGL82SxbJz/kUrARluME9hSYGWQnkcimxh1Y55ZjkfSgB7/KqsvPY4FJ8sbb\nACueQTS/MvmKnGDnHr6035kCu7b1/umgQ4IZN+MAqM/N3ojdG2gqVYikkxjcRjngUpPmMFzw\nOd1MBuEkJCjBHemKyljwcVI2C2zOM/xUbSsoyQmBjb2pDG/wnHWkaQiJdy4UHnFKM+Z97jrS\nZ2sSwPPFMBzRhn2qwYdRmjadxbYMAYoVdrDLZ9x2pF3LI5yXXGMCkA1lwM5xQivGvC7s8jNK\now4QjJ60qZ3EOcc8UAAj2qw3bec5oUuhLA5xS4Vg2TlR2pMsF+UZBFMQgQnJ2/LjJJNK37iJ\nWJ3FhkYpPmb90x57imxqNgQEfIcc0gE5HB784xTmXzFCg7do4oYs7bt42/zpBt8vOOD39KYB\nz0JBA5zmhGLqeMY5pqwoIyME55BqRVHls53McY2jtSAYCsm5d3zDkU7bh0cZ96QMqEHkZHPF\nGTsJPT1p2AXb5bNj72c5ppw3qVx2p/LMdgypXgmkWfy4flGD900WAQY3ZDZbGB+NBysZBYb+\nhpgbzMIOD71JHD94PyBwF9aeoAwG4Fl7DntTd25uOmcY9aOpXD/KDjbTlU7iABuBqQBSGkZ1\n5A45oV3EZ24LHqKSP5gRnHPShsMm0HBzimAnlbVyzleOBRGX4bgkdKPMG5UdWCj2oZto3EBU\n68UwF3tIpB5xzmhWcjbk/SnSKPlYHJbptpoJBZwd2ByO9SA0qV4YYbqMU75SpJTikH3gcc4y\nDQW3bRISDnqKoY3C7yBkjH5VIxGFKj5emKjB3cA/N1+tA/1mVJIHb3pW6iHPhTtKjA5yOtJH\nxktkoaF+YsjHa3XJFIGbZ0xz09aYCFuVjxh8cGnlVMe1V2sO1NZvlBZfmz+VKd0fzdyetBIL\n90KOcctSoWaRkBzzxTSrLEcHdk8+tKrHZgA5JzSAWXcMBuq8cVEzosw3HnGKcynft796NoV8\nbd47mmOw9cRsSBnIxmk2bTz94dKY+duVHOaXzd33j/jRuMcHbHOAcGkVxtAIyf7woLKOGbPv\nTgomUsBtVf1o1BjDwu4AAZxScbdpOfpSu207/vIR90dqQ/Ko2ryfWkJDQw8wd/rT2B3YBwc5\npGfcpAXDj0pG+b7vO3jPpQMVW3KzOdxz92k2jnnAPNDHEZOM0rIdoPGGGKYAoztbfmjd94MQ\nG9BSKm4H5eB1oCkLgL96i4BuH3T8rUrbplBB4HFDSBwCRgqMYoQK33PlzQDEGcDjmlbfGcdC\naaNxcliQB0p20x5LfOWHHNBIjRr5mGOTSRx5Yr78U1VHl5P3h+dSbQysvPHQ0ANILMT0YcYo\n+Xbs7k5zTi5GQw+YUzcu5WPI9qYDVVY8OuRzinrMrIwIO3Pbik34yu3IPJHpSjg47YzigBwk\nRm2k7RjimHBXp9c0D5tv8RXtUqLu3M5wKfQCJUXaGVuM5oPCZVSRn7xNOZl+8RhR19qHy2SC\nCrD7vtSAQKvBAIHQqTTWBDbT8wHPHakDZZSBz92nMflAwRlsF6BC78NvYBvSgk7gF6nkj0po\n+7kDK9qdzjds59aQxGbCYVRvzyTShmkyo6Ac+lNwqxhpRjJ4pV3ggYCnPT1FCENXOdw+XH60\nvycsRk45oVR82R1PHtR8p6fNQgFhYcDbgEflTVkKxupbinK2Tgpx13fSkaIx9wdx3BfamMcz\nbX2lsADINGf3bEjB/wBmk58gBxkr096NwikRs5DDkUCBVORtGB6UpJzz1/lTSdoO7oWzildg\n0m5VxHjkZoAQMDnOd/8Aep6/KMHJAOaG+8QCMY3fhS7mzkfN8udp9PWgBV2sxZTjd/CaYyFV\n55f1zTnPmSKWUqu3j1pGUrhhggetIdhFKjPJU9+ODTcDvjGcgChULNgfKSelPKbVYHG/OKAG\nrltpXG1zkKfSlX52IC9smmj95wDsZe3pR8rMv3hgckUNsAwOGzjnke1KwEjcNj0FLuU9ACAa\nasY2llb5icgf0pgKu5eQcMO1G0/eIwT1pMqgOVy3r6UqqY1wx3A1IDlVWy3QrzzRJuZnwm04\nzj2pGVgwcfSnfMdxUfK3B56UwGMvlnKjIPy4o5VR/DjgineScFSw579s01mO0KUwcc/WmIVd\nhY4JPfdTFbkHj0p/302k4HtTFUqNuOM8t3oFqKWfaSBuwcHFOZm3K4GfVaBgMQp2/XvTPm3M\nCdwxk0DHuqxv9/cp5yKN25M9cUn3VIKgnHBpuS8e0rg/7NAD2b5UUHC9aGB28HDfzpNiHqPa\nkZgPm2napxuoAcqiQMPamqzyRjCnHQAdzShgMsv3icYpPmSbO7KkYNAxVzu2KcMOo9KRpN0h\nJH3eM/1pwkCsWJwenA6+1R7pGznhT/nFNgP2v/E2B1/ClX5HyoBDUOzPIoIyvApP4yQoUKcV\nIgjLfvMYJP5UhbCKSuDnDAelKrMyMAvGelOWNghBHzddue1CAb8qyfKPkzxmkOcOy89h6il3\nbiuFKx9vY0vQlgMgdfemFxu4smCMGlkf94Co3DHT3ojdWJYx/MelJDlmLgYfO3AoAF+VSScn\n0pSco5OOOgo/5aEsMBT81LtLBvmADnI47UAIMZXflwy8g0u3512NwPXtQzDjC7mHANN64UHB\nzmgCTaBkHDEnPBp/zMDnaoXp2qNFO4uSAegFJxI2Cm8DqDQMcr7wNpAJOOadjZ8rDLZ60xl8\nxmO3awHApY0jZlO87vX3oARAqZTlgx/HNPYFiR91l4Ipyn5SnHXJam9ckc+poAXbu5KYYelJ\nkjDY+lLnKkqevFKjHheuO1K4huSJGK5O4ZNI3y5APytzxSsxVspxngmm+X5nO/aB3NMB7fMw\nZc7sYNIxO0/LmkVTtIBwxPrT5P3b/Kp245oARcxqH25H16UpJB3ZYDvxQqsqKQ2BmjzGZir/\nAMX60AKrfKWb5m7UNiRgeQccmnMp3bQOccH0pqnMJDjgdaBCtGQ2wcnGR6CnEiOUKRlcc+9N\njyoG08Hp9KcysueOPWgQJMVZ024BGOfSkKv5HUAdB7inL/q8t34BpPmDfMCqA8UADMN6gDcQ\nKVhu2s5yM0m5/u7cq3cVIMcOTg/dApoBsci7WGdqgYFPikO4Io6j8Kdyx2lAOeTSn5UGPlPc\niqAf5nzEYIIGNrdfrURIPy7SW68dKGYSSckkf3u4pxEbBjkqw6D1pgEn3d+Bu7HPQ0kStJ86\ndR1B6H1pWUeSCee5UUkm0xKU+Re+KBMdu3AnPPUClG5lAB2knkUQkSKR27L/APXoERZZC3yE\ndKWwh3+rZkAycdqSOFZfusGC85PGfam5IZdo7dacih5sMNynv05pj3JXO5eBke9NXdkfICAO\n9Nl8z7m33LZpu4thGOeetMOpKj/vOVww5I70BmdWPQH2p8aMu7uTxTSpXG5unpSGNaU44Ulc\nY3VXZk+6ox3qxGzYOASM5Oe9MaIBSSmA3J+lMBsh3RkYJximM3zkJnHTnvT9oTJRm2kYAPpT\nDhmL8o2O/epsAR7VzlSG6bjz+FNKsnzSZDA549KE+bcjk8jII7Uu6PywgOH6c96ALNiVmcbR\n8inLVYvlXcMbguQVFUbMm1mwpycc56VYuL1/MGTliOGpFFlrs3EWSpcoMY/rTJ/lwxwcnpVW\nyvU3NvBEnTI6Gi4vGmkxGvyjr9aXqMuNKrMoUF2zjrT57jdD+7wGPRvSqcMijnkt6inrMsKs\nNuTnO0isyidW8yQEddvXPeiSR2mQAkDbktim7GLKcfO3O32pXklCtgfL93HcUMaJY8MSAO2Q\nafG2GDHk/wBzoaiVXDBmGHH8Of1p4jMkhaRQV6g1I2SYdmJY/L39qgEJWQFhgMflINSySP5O\nAOc/pUcbBkdmPKjIqhE8eyTcdu0r8uaa3+rKq2O1JDieNioIbH3f61E0m5Qu3lR2pATKGRlX\nGQOtLJIrqTsyoPU1Gs+07z8qYxmk3ln2A5YdeaBD5Dhgcn1GDT4vmXbuKg8nNRbkYFR8rDnd\n6+1NaU3G/eu0bfl5xz9aALk0xVAhIPbb6+9RrGdo53DrwaqPJ5kanH1YcilVyJUyzDccE0wL\nXmRxzDYvbOygXRhdGZcZOePSqxm8uRpDhl+7x3qO4k86RU2FR65oA07rVFuG/dbgqc896qGX\nLnLbk6VSOI90aN2601ZBJlASG6YpjZcabZIdgyq/xVHJMW+WUjY3NVfOXYEbcFB+bFOXDRYV\nvnblSeaQE88hiUYOVA4OetNkmOOhZifventUAc8xkZcDk+nvUckrIqtn52bBwP1oJLMtwhkE\nbbo3xnbRJcNJGDkFlPFVJLh9wUHIzjd3zTeSxLcEdqBlr7Q0kZ3jBz0FCMscJHXJ4qo7B4+W\nwfbrRHcn5RjdjjaBigC7cyFioU5VRyF9ajBZtp+87fw55FV2kDZaMk9inqaJLgfKNu0DqKBl\nr7zEN0H6VHM7/Nzv7ALVWScGPAOTnJI601ZiwLq2WPA9hVIRbWTyyQG2t0OKnnm8mPOzPGR7\n1l/vHVgpALGp5LuZ0/eYKjjbRcTGNOGjJKMWYdjRbzGMKGDFPXvTJDLMo2MqJn7uKZ5mWLPJ\nlemakNC410XiHzYGcioTdLndtG4HOKrswZFBbILfdqNWUbio+Y+tVcC3LMvEjsd5PSkCtuLu\n2R3z3FRM48v5hyOpx+lOZXdmjdC25Mr6CgY5nDR793lgnin+fv3DdlPUDnNVPmjhQfexxtP8\n6kjzsIwc9Mf1oZJJHNuGAd5B+YH+dNkm8uTarr7EVB5ZQuBk4GW296bJHF53mKGDKMZxmmBc\nSTaz79p46+ppkZdo8IFLdgeai3RqVVAXcjJbtT1VoZNw27j6GpHYfLDLCqgny1PPXvTJJjIu\n07t+eWpkkjv/AKw/Ln60f6tSc+a2elIZINiyKFBUAc5pqv5kuAfx9qZMzlthIPuDSghUK456\nEj0pkkizOSu3GG/UUMT5jkHBxUa/Ko44XgGnbnj3AgbvU0C6jmZkWJsfM3WlkOZOflx1WmBt\n218/MDxil3rGxGG3E85oGOSb5lH/ACyHQetKhKM+0Yyec9qjVf3IxyS2AvvTvMfYQgHPUt0H\ntTGNuJFbPOP9rFSZCygqxJxtO096hSRlZUkUMB0p0a71YoQD2zQxWJFVN5XBJ74qZdyuBnAH\nODxVZMoueM9Cc0Z3Knzf8CPXNMLFhhI29n5HU461EG3R4EZI9c9KjZs4beQRwcU6bEfyx/Kj\nDG3Pep6gTtNI0a4TlD1PelkulaFXC5lY84qsjO5VWIJUY60wTBWAQE4PNUIt8uEUkAA/xVJE\nSY2yQSD26YqDeo+Yc9yDUjMWwhGCOrDpQMl/1kmzIAxmkkaSJifv8cc1DuDKVIwe3qaMM+0E\n7T3UdeKdhE4YThd52ydz2p3mDb0ITOOKgZ98QJXaG6ZpQxiw2dzYxgdKkCRpADnmLHTnP51N\nu3RbmJyvOar28gjYnGe54p3mKswxkIx6GmIke4CyZQnA60+N9p3KpIPPyjtUTbluJBsUA8g+\ntNuGOAoYqcjOOKBGk024YUjGMnPWpGaKRQUBJzhucCqBZioAVUAPJzzSSFQrNuwjHG0etNCs\na6zBZAoG0dQKmt9aLMdjYdBnkVkQyFlXsVp8Un758DbJjcPeqJa7HW6fqG6EyTHHcEDvWtZa\nszNhlXGfxNcNb3jq7qHxt+/6fStS31FtuzOwkggGmI9P0/VUlkAVivHTtmup0/VBtRGGxs9+\n9eOafqxgYpuYjPIHc11lnrbnaGXnGApPNXGTWxnKOh6xZ3yyLknbg4IznFdHp8rvsO/jufav\nJdH1YxzqMHYfvAmur07xExmwrZVemK6oy5tzmd4noKzKVIByoPNZWtWMO3fGoRjzmprK7jmU\nEkbu/wBavLCt1HtlGcdKsdzjpCO59sin5KrwwrQvtJljaTagCDkVlpywUnnPNZS0ZaJlcKzK\nfSpYcgDeMJ70xvJDHC8nvU20sFLnKjoKTY+UsKP3ZOOOmD1NI0Z2EMNtTQgKAWGec5qeRVl3\nAjryTUMvlM94AXUjrjrTAGRt2cjNXpIQoGOmKg4Vvu5X0oCxEqhpH5PAzScsWXbg54p7xv8A\nM2Nobg0wNz8uc9qoQeZ0DcbaflWV2I7cVGV3ffXmh4SqbMkD1oEKyn5SOB60KPvNnH1pOdi5\nPHQKKeq7+o49aoB5YsAoGe+aUkbgrL1pqMy/KjZHfikKkR8Z35z+FMTHKWZWXtnA9qGU9AeR\n1pzbSw5wxGcVHGTbykKdzsOppkk32gLk7Ao67aN5kkyOQOppig5QlQCOuOlSx7I1OP4j0oQA\nq/LuHHpTY87tpPzHn6U6I5Vhu2leeaFbzG3g4BGDWhMmTKVVeDupGZioAIDZpn8IXcFcH8xT\ngQWPGTTQiVSsZz1bFLDiNcRrhuuDRuidQG4PelChhuifAz3rRCHrmT5icmnbd0bLjC5zTGYD\nGzr3pd3mA5PPZR3piHr82GUHjgmrG4qykn2qNZG8vYRgUqRlly3Qdu9PUkczN0JBGe/WpApD\nAEZOc02Nd33gN+eKWFizOe68HNMCWTe0uQ+F74oUs2QB9KAfL3MDkfrSGQv6ggZpiZFvb+4K\nKbkUUWFc/RVsdKQMQCD0pdpfnOKakZycniviz6hkZy3KnBpykhSCc05Y85xTvKyM9KBEYVgo\n9+1GzC5xmpxFxknNNWPvn8KAGBeRzUmNq9SacsYXk4IpG56DjvTAj+Ut1waQVKYwQAOtJtCr\ngihgCg5HPFP29DnihVLY9KeooGNYfKMDmmhWzk1Mfu4xUaq3UnPtVCItpfcw4FIfvDkkVP5Y\nwSePao1X5sCgCPYdx9KDGGbOKlZTzk9KRVPXoKXUBgi5449aV124wKlzsXOKQjDZNAEbRtwc\n01l461Y+9yDxTNgOfWmBGsfAxwacqnIOMmnqm1TmpFGxME80uohkhON2OacEO3ml46HrQPlU\n96VgYzb2xRtPXFO3bVz1pN3y57UwE39vWlK/KAetAx1xQe3OaAGsuPek56mnL3B5poXa3PI7\nUgFdgwGODTeOQKeOM8UxgUyxFNjFPbHGetPHy8UjAlBSKvPNIkkXLLnvUzR+ZbScZwOPrUUf\nyn1qWNicoeh96APlH47WJj1QFzuJTj6ZNfN2tW5WQyNwM5APevrf4/aesEjbkLZzhj1GelfL\nPiaIRzMp6r8tUhvY4zVsoM4yp5xWBeR/aOG47mulvFVsBsnI9K5q6zHK+MnHarRmZlzDGYVR\nBs2msa8j8vcT0z1rZuZAgDY9itZ14qSq4ztHXNWhGW0fm/xbePvVQaHhj1x2NaM0ilCiqSF4\nHvVSZdy5x8oGTiqIM+RHlU7YguTyelVZECr8oKgHk/SrM0yTMOSB14qGSNpIi8bbTn7tWBVE\nhmDKxwGUstQtjajIjFwOV/rVtEjnYZOCvBzUbb1Z1Y4UcgiqQykuZJD3B4xikdR/BgqvBxU7\nIsymVcA427umag8tkjBABI44oJGcCNn2/LnGO9RBXUEKfepl+V2ycNjBHakTaiscDp696oor\nSBmyB6dulIMqykHcmMH61O3ywhlG0n9KideFUfdJ5NJiGROYywLU2QOQCJPkB6VKyjzAgAAp\nkkZkcoF4AyKAuNKhcnOWz3pjAEgqOc9PWnKDJ8xG3AxQrN8qqoOOeT1oC41iw5PVfSnGUHPm\nZ5Pak8x2ySnlgHlaRgPmP3e+6i4CiFmXcoyR2zUZXzdh7g9hSs5EZ2g+UG6D+dO+XcAGweu6\ngY0sdpG4BVpkgLImD8/6Ypclo3Xr/OkbEjIu4jaM0hdSNo1Mm8ED2p4jO5uML6etDKp+YLkm\njeLdzuOd3UdqrUZHGpDMQ2M9qWNSWIY5B70r/dOw4BoCr5e3Yc/3qYhsnorZYUnEj4cEdwff\n0pysOdi8jrTVjMnAIPuexoGNZAzDHGO1KzBZA3T2pFXrzlxwaeq70255P8WKBXI8DuAIsZ3e\nhpYYwmSctuH3aFjGAF5CjG40n3YckkHOBigQLnadxxz900jt8pA6ep7Uxm4LHJHSnFflChgG\nPAoGEgG8ENlMde9NOG+VRkDmncCTYegHLe9Lko3yvs+ooEMkzNHxksOuKNxkwAwXjpTmk2nd\nnap4I9aRmBUDgjOBQIj3eXIMDf2Ge1D4ZsMcg0+MvtbP3AcbabtOTuxgdKAGljsA7j7ooh/2\ngS459qGjLSZJwuPwpV3tgA7U7470DG48z5l+Uk4K96N5VsOOB+tJ/ECo6d6coKfMfm3UAIAo\nb5eD156U1W3NuJ2+9Pj8wYDbRx0zzTWUn5sDavJoEMkYtufZvHQ4oUBlBOQf1p6od52gCMt1\np21lUhcKd3GaAGKP3m08qeaOmRux6YpZN0hI4VvbvURyu3HLZ5HrS1GSMjtx/EewpW2SZ3HG\nTjOO9IylZNwHINDdGJJY0wEVPnABICnr607n5lRQEFJHt3biSnoKFRYyQH3MxyeaBCARs5Y7\ni2OcU1uF4P7vrtIp23ghWwxpGVsFse1AAjrIylVzgYBPamKNiSBn5Zsk08N5dvkDIzTWHJBG\nBjOKYxV3Ox+XPGPwojx5mGY4xgUKWVeuVJ2hqVoznJIIAo6jsI3l8/xAdxQ23buxtGOB6Uq/\nu48on3uMmmvG2FQAHmhhYAdqtu644PqaNrK2XAEo6/ShkKqMAlmPJpWTbJkDPNIXUFxuIdNy\n/rSqqbvK+4rdBShj5mFbIPQntTQpViWO5u9LqMa2cbVHyqcEmghHYe54oXHlv79qam4J84+l\nMQbJMMrEjnj3pw2xjgZ6ZNB3McBtp9TStGwf5emOcimIapKqcfxHpSup6Z+bHSlAQ5J+/jgd\nqRv9YAME45PpSDqOYs3zEDkdKbt6Dv7UnJIzng/dpSQ+XBwpIGKBiblUFWyR1+lKML5eTuDD\nINKpLRtjC4bANNBdn2kBk6ZoGIxBkZS3Q9KazNgoQBz19qdhl8xAMBj170hVGYKxyQeooJYr\nKJJlwSF29KazBnwFwopeZGKZyOv0pjK2373yn0piQ/ySFYg474NLtVwW6NjGO1MYAt5hY7VG\nDmmr845OBSKH4GzcMFu9N8wLNuJ3cYHoKF+8dnI7+9NZSxORt45oEP2sp3N0bsKb91twUsg4\n20o5QKTnilWb92Pn2rnHSgBNzNIf3eQR+VAUYAzyD+lAZ1G5eBn86ThcueWPGB2oGO4w4Q89\njim7AIzuznrk0NGu0Ybbjke/tSHLfe+UHvmmINo8tCBkZ696SSRM7TlnzT2ZCoHfoD60kjYY\nhVBZTgmgY0KCrAttz2pVjKsdg3EDPXihgHUb1zz96lUM24oSNvBPsKQCD942Dxx+dHIVc8gG\nmxgup/h7j3pzKFkwMksMkY6UwEXEW4g5DdqU7FAV85/nR8zZBAJHbvTHf5dzDntQwY5mzhgC\nB2pNyfMMYDcZqRidxHQHpUbDj5Rhgcc0iRjKqptwQy80u4YwD15/Gn8xrtk4Y8Y+tNkQCMgc\n7Tk+tMAVt0eONwP40sG+OSVd+Sf0pFXa2NoO4Z+lCsrbzjigQLk4U859aEO1mYj5TwB7UgU+\nWMn6UPk4ZBwevpTRQYy27OAOgpdoZvYj9aG+9kDNKMeYccjrSuA35W4bIYDik3sy7R1PU0vz\nTZbhVbgeuaVVZWKydVGdwqhXEDfKeMnsPenCNVbdu5Iyee9JGp2uCN2fuik2rtCgENnBWoGI\nu5t3OTnintGF5JA70FV3YxhqbhGYo3XsaAuBbdG27dye3Ue9KrkKcneq/KuR+tKjvGvHPYt7\nUKQ75JBUjJ9qYr6gqFCoA+duTjpSD5cgnYSetDKvmZLkenNB3RtuwG70xsYrfIVPzc8EUD5V\nXK555p0mZDuTq1RBv3hVuoFMkk2gHKckdBTWYH1X1FOEZdQQwBHIxTW+Vi2cg0ihWIbC/wAP\nFO3CRmC8EdCaYik7c/NTvm8t8p8x/SgQ3a21dwyc807bu5JwOuKTlV3DoRg0CMh+mRjmk2IT\nLSNjpTlYxkquDx940jSZb5hjtxSqpXI25PakOwgceYSQSO7DvQV3K3GF7c80u99xBIUdwB0p\nu5VX94SBng+tMLD9zbVwtIGVWOeMjrik27ldgf8AgNHJjDBuowc1Qgh2iTc6Z9qVZGw2Rjmk\nwq4UY5o3Kv3j16CpHqIMJ8qj5+p9KJJAWVvw4pRtVWTPUck1EnycgYA4xTAlLHYzZwTwQKj2\nqrZUcEZOaVSdw3D5hzxQcMhIPO7mpGL8pXA5XqKQYkkwGyMce1O2qykIdp65prNtxt6noKoB\nynZHkjn2pGboRwTQP3cuOvHI9KBuyCANo6mmA37zeg6U5l8sKD1HcUBRzjknmgM2Npx9KAB2\nyQT93PalkkOwkDFNCllC4wetK2ZF56ntQJgqhnBGCSOaaMP5gVsAHn2NCoI15zjODinthQUV\ncL1+tBI2RS2G696adqru+6O604hj7L60zaXVSGDAj5hQMdtyuV+8e1EbM6yE/e6CkVsqMHaV\nzx7U/B2hQu5+o+lAAFRsYOGxzSwSOsjKowwOCW9KZlV+YnJ6FfSl4VmUEtuOSx7CgCTyyhYj\nbUbTBsbBgdxS7l3fJ83tmk8o8kgbCM8UDQrbWU7RgnjbSTKwA3tkL/CKURsV6rtxwO/1pq5V\ncZzgZPvQIThkBPyKO1I2GXeCdvde9KwMnBGC3NK2FjDr82TgigBMhmGF6cikbO3IDdfyp33D\nwcrnr7elN3H5sHcpP3aAA93Y5XpgUrKUH3cj09KRUEYYKDjrzSrmFueWPTNPoAq/PGwYEZ70\nxZF6dD0zTm3p/rGANAV2bO0VIxG7ddgpxjXcWXJH+1QxZVweBmkX5fvMCmc59/SmA5+Y1+X5\njxTY2+Yqg46EmpP4jng4yFpqqdh+XOe4oGIynaSOnSlXLMASOnHOaGdtoB4VRx70jF2Xdxn0\nAqQH7flJBJUdqZuDSAkZUelKrMse4KRnpzT2ZyoVV+TGWpiEbMmMnB9KiVTuBC7c8U+Pueh6\n05FO0vuw+aZJF94nA2gcf/XoOGwV/EClZZBIWb+LikK+Wu4D5umBQA6SMsBghtvPFIf3nQbS\nvIpI0RUO0lGPO70o+6dzP9KBithlBB+bq1N+ZeZGyvUU8LvYbjx1xTQuWPGPQUgHPkuGUZUr\n3pJG2rGMYJPIzzSsi/fPHY80qt52GVdzL39qYC5cgKn3Ced39KaqpG5C52L60NHgKUIIJ701\nl3KuOAetAC/eU7V+9QFbgEYB4zRtyDgkKOKXCrgE7vagAZTJhSuSOFpfnV/mUDjBqNi0YK91\nP40K26MPgg559aAHeXhirD6NT1yrbOp9qiDKrEMS7N6U4LlgqnJzyDSAAnykufmU5Kijcuza\n+V3Hhe31pTHtY44Oe5poPmLh8ZB60CYrHayHIyp7d6UbGZtvJJzz0FIjR/OwBDdgaQBmbOMd\nyKYxzN8oXOT1oUlv4ugz+NM2sNwxyRnjsKV1CpGD8rHuPSmxME2ybsZJxmlLbVUZyCKdIoG4\njgY5xTGARFUcd8e1IY7k8KfrikXmTGTuH8qNgKksdo7UbB1D/wAPc80AAUrls9OgpG3swfAH\nFJzkcHmhoxwD1PH0oEPEg8wlf7tMUhQCTh/WnnasagDDZ203AOWP3RxQAu1S29hg/wA6OQB3\nPpR5Y2dG5603a/l7CcDPFAXH8RsGYYb+6KaTtYMq49aXaI2OWB449aGYFep+negQquFO7GG9\nKN+6PAb3+tAjMkm4jjHNHl4jyB82fu96Biq29QMcd6du+UkDJHWo4+Cfl4pxbqVHGKQxyxlV\nKtxu5yKXCqp2jOKYqnapzvX0FOPOPrnC0C6iqV+Y/wAI9O1OKksmw9eregpNoWVgzYQjtTSd\nuAAdnTPrTGODAu235lB6etNZgjHCZJ/SljJ8sAJgZxmjlsnOcHFBNxd6xpjZhj3pY2U4LMfw\npGGOWG7/AGadCw2bQdoBzTQxm0qp9Pen8ybeMFeeaPMEkhyvy4oY9Pkz754oYCFjKTnjnt3p\n8eVZhjr1zTflVdq5xnkntSb927nI6VJI/lVHJJBwKM5LjJbH5U7HlkAHqMUmGRggOD39aYAv\n3QwkwF52mhdzR+YW3mk/5Z4bHmg8Ie9Ct5PHJB9RwD6UAOYsJF245/Sl5Vju55/yaRSPlxkt\nk5Pahg3zktnjihAOKjdhn3A8/L2pzfefK5LcgA1HwyxAKenJqVs7MFsnNWA1GDYKdeh9qeg8\nza8o5I+764oXaT/d4yPSiPAwsmQe30oAVeuQnHYelIrbYzgbaPNPlso7HIxS+Y7wYYbQaCWD\nR5s0UDHOSQacqlAAUO0dyetNeMRqPnw/p2pzfMv38g8Z9KbEIVJznIQjipYUGFXBG3kNTVUL\nFgScKeBTvMxnaCc+tMYSlVYg5LHoacN6sgXac8NxShSzDC7jimOjxgDHzg81IEq+WjM5GMcD\n3pmVaPZgnceD6U9cEZdcUsQEikDrnNNFEDMCEKElVHNNMpTkHcD0p5VlkMJ2gdm7VH5fUAcA\n8UwB42jy55BGM9s1WZmCKM7mB5NTysCxQ8Jj9ah3eWuc7h0NAA0hVQ38W7lKVlVsux+X+76U\nrFY5A7KTxximPt8z5WO09R6VID1YiNQqs4HJqHzJJmbcAO4Gegq5bOFjIxlicLVKVh5nJy3Q\nrihAPVVYgg49aesvl5AbnvUCEqpY/dHRaljdWUfJg4yTWcii3ZsNpDABOv1qxHu2EkjrxVCN\nky3Bfjjmpo8bQu1oyfWhlous/wDpI5JZhj6UCXy2J3YAGKgdmbawPP3Rk9PemOxLbWxx1/xq\nALSdyDlgM1Ju2p8/rxVMyLtXAYD+8KmG5sguHBGRTHcsMflOW5PcdaYIycKcCPvnv7VR+1Oj\nrFjIzhj6UsjhVYAk4PDZ5oAttNJCZCmE3fdX2qNp1Zg5/dq47VCV8uAhjkMM59KiZkyuBkYz\n9KCepaKxMAXbfEO2e9MlXfgZwP71RuAflPHfjvUczExgHge1UBYjkfht4J+71omZ18tUP3hu\n56Yql5m3CBQvOTntUrMF2kNwx7UDJ5JnVdvY88VE9xiQfOzEdOOKhnYIjZJOOnrTdwZNynaV\nHSlsIsLMpbeq7eMeoPvSfaF+Ytlm7iqzSvgqOR/CRSNM8kZAThfvNVCv0JvtCs+AcSN0X0py\nzYyGx9R61WDKxLgAHGNxFNOzy/lJ3Z60ASrI+3aT5ascndzT4ZNgHzYweGqtuL7VGAme9OkY\nMxAOT02ipKJWkKsJR87E4pkkzW8hZhlmOBjtUeWLgbShUUqxt98rvz+GKQDhcMZiCMHFPyzI\n+fT73oagDbQNzDcp5oaf5XCnKmmBJtD8LwynG6nBnbhhnHU+tQqfMYENhMZyP5VC5aYjLFDn\npQJk7XWVJG4Nn0xURn8uPDPkt1pN2WB+8oGTj1puFYkYzzSAkRz5agLk55PtTt3ys2OCcYqF\nYXVGO8/SnIqGElzjHIqkK5LG2ATjaQPu+lOVRMpl3EleMCq/lvhtrBgSBkdqF2wqxMm2TPAX\nvSuA6SRmjwoLc5z04pgj3MNi7lY8D096keUr257/AOFORQfnwcj34FHqFyJoSshJG0g5z1py\nxqs6FcyN1FIr+cpBJ3Akj3qZV3LEfuc9KQXHSEtmQLk+woW4eSIKC2D/AJxVxowzOJC0ZxjA\nqjNtVRHG49sdqoNyLG2TkkjPAx0qVWJkZmfbt5NG87QoOSTyDSeSseT94E80wGs48xlQltw+\n8tEK/M4C7vlwcnihUZThRu/nSKscTeYScHqnpQA1WV9qo5O324HtUm5XfzW6L6Ukkm5QYk2q\neS3tQqtuZeoUc8VIhU+RiQchudpoUfIWwPUsKcq+fzyUxnApvltLGQhCbeSKQkNV+fkUcHkm\nm/vBG5wpLHOO+KlRSFQ7dzdx60rRESZzsfrg/wAqCiKMfeDDdx0p/nLGoU5k3cA470K0kjn5\nQAOR70+Nm5DgDd0poRCqhbfBYFlbJ28YpSdwLNuPOadHHticMwHP3qIQxUOpyq9GqhjAWkYS\nL9zGdw7H/GnNjcAz5VuuOmaFjby2YHA57dabHtDbWyVxkt6UFIcr+WzHbk42gmmL+6VVU+a/\ntT12lW3ZLIcUbCuAoDZGcg9KCRmPmYpG2QOVPrSLJ5mwDlduC3oalWRgG2liDxx3pvlhVJPX\n+XtQMb8u1RyWxz7+9CQorF9zMR+lP3MMELyBgZoQY3qOD1zUAhqwxyK0gbLjv608IxZmYbD0\noa4Z0WNl8sYzx29DQy+VGefN38lvWnqAQEW8gbJ2j15BPpS+e9qryvkux4Tr1pm8+WA42gUk\nbCaA89/umnYkcZXk2b1yepOe9TqdzYRdrAZLE9fWqkm7KnOEX7vHNS+aXBO7BXvQ9ChxnM0a\ngnO3oAKnWQALg7yfbpVdmKxmORhtPTApOVYNuYbeN2KZJbaYRsylQ56fL1FLDIP7u/b3NU/M\nMigK4G44LAc1KsnkHaGGAOP9qgZajBJPzYwN3NM3hk8zPU5qFZDyAfvCn/LHlecigQ8yLjeu\n7HZj3NSKw+zgjaOeVPrTfP8ALjC53r1203dHJw6lXb0oAmaYIwI4Pt3qVLho2BIyx4xiqyyF\ntgMe5h0xTwoZiVk3c8g8UxWLUcohlccNnnNTxXSqAcHbnPFZokOBj5UzyB3/ABqYZWQEHavT\nFMVjoYdQjjYuT8oHCD1+taNrqT4D7tpUbvrXLfaFiXYnCg1Zg1BlXpkD9aCOVXPSNE1JmjDS\nN8+OAOpJrd0/Vlt5EwSHz8y15ppurBJkHzAsMA+9dJpt2szjLfN6da0jKxhOPQ9l8P8AiaJl\njiKh2Zzl67XTdSM1wVdfl7GvCdMuxEyZmCndk8+ld5pviYqUZT+6yM12QkpGFrHo5Uybgwwr\netYOq6OIZPOj+WtPS9WW++TcM/rWg6pOpR+R0FNpMakcWoJkIJ6VcgbdkdcDNSahYyWM7Hbl\nD3pkOcZAz61hJG8S3Djbh24bpT1kVdyEFiBTVXZGBwXJ4pywlmYlsH7pFSaDWx/F8voKWGFe\n/IpzK27YTnbTo0Pc45zTEV3TLswG4L2qDauA+cVZkjK7ipxuPWmCM7sMPlx1p3FYqvu65DCh\njvUZbAzyO9PNmgkYo5C9QpNRyZWM55bPFMh7j0ZI5BzuXB4xQJDjaRxTN3mR7OA3enrCduAc\n8VQmRNIyHCrljwKfJIeCeGxgmhVEK5OST+lN4jyxFMTF83YR8vzf3qcJjk4X95/dpPMBwSPm\nxTofuthdpPegkcqkqCWx60sa8nBJHWmsnmbTuGF/h9akXavGCPxqhCNjYQByTkLT1zIgwtIk\naq5weeopwcswwMeuKoQeWJJcD7uOtPX/AFZjwDjoaQbgzKRx1zT4sKuQueapC6jY/lYKVA9T\n61NGAoJUcZ4NIzYYgYyO1CfMuEOD3HpWjEx6kbzhuT2qQ5WP5FxJ/epVYeWW9DtFEe5pBggL\n3zV2I1JF34G/knmnSN8uW9eTTFQsGGe9PkUMu0jI70ASBGQLgAn1pzKWYgDGeuKidyse5eg4\nxUiTFc/zoHYcygnrio84+ZuT0pAxZsYz70vlhVZi+FzQSx24+goqHzP9qigZ+jip3I5pFU80\n7cWjyKFywweK+LufTDPuktinr83IBpUGFPfmn7sdOlMEhAu5eOKav3m+XPFPHzZpFyMnNAiP\njGM89aVTsBpfL+gNM/i9aYxpUyMOwqVVHQjmk2/hTiO5NAhFU9adsPXtSY+XrQzc4B4oAXB6\nA0vHXpScnpSZ3ZGOlPqAuDtyRmmrnk96dzjrTVUjmgA29SaGUNwDT2G7jOBTT8oOKBDWycL2\n70cNjGSKeVXbxwe9CqdppiG9toHFGD7CnBd3tSr0IbrQMD97mhuGHekAyevFOOAuQKQDed+C\nKReDjrTyp25HNIRuU8YNIYbaYcjoKdt24z0pyruGc1QDGQspAqJTj5SMVO35Uir82TzQSxFX\navvS8KPU0c96c3BHfNADevNMxuJ5zTvm5+tJt2kY/GpY7hncMCkKkLzS8feHWomkAPX8KQiW\nNvm61ahUdc8/zqgr7m6YqzA3U56UAeR/tA6astr5yj5mTJGeOBXxz4qiZJnZl6ueO9fcnxr0\n83Hh5nWLJA+9+FfF3i2EvcSb1GFOdvvTDoebXkjAk54ArnL6E+ZySSec+1dTq+zCDqx7iuev\nJPLyrDJYVoibHN3+Vk2YLp13CqFxbyHcAdueQK3ri3O3ghUxyKrMQVCkbuMVaJOfmVpIpNxE\nZ/vCqKyMmFJwSMFT3rVvoVtZN27Ckcr1rKuI/lALDrxiqRBVkx5YHl9TzVeXEcoUghvSrj/M\nqhRypxtqG6US3Cqx5Hcd6q4ameiHzG3N+HpUc2epz/vCrV1H5ILAck4OKgZSrBc5OM7R1xVI\nZW8xTGu35A38NQbWVt/Vx0UVOVDtjbhAPu96ZDj51Hy896rYBrM7OqtgcbhxTZF434AP90Ur\nRkAc7hnimtGoOTw/6U1cBsymRQ23ntUWO+MnoMnipG+bPoO+aj2qeQGBbgc0hDGy+C45Xjik\nZnjwOfqOtKrqmzzAdnRuetLIu35lHy54z2oERsDtYDJbHSiP5VVc54yTikzJ5jAMCMU8q3mE\n9dox7GrGN8xmkKj5kphQjPHyt605QVXOeDzjvUbZl6tgDtQA9dyb8MPm4C0yP5soRu7H2pvl\nleWwR/WpmjKMgB46kVNhkEahVbAyRweaPLTJYOAuOVFPkZWVl8scntR5fk4QBenSgkibHOGx\n6ChZm25Cqx/umnMpY44IA+9Ue4cn7pxgrTARMNuZ/lbOeBwKGZuo3N6UscRjYdgRnOacAF+Y\nnC9yKAGDKxlV4J5JqNtysFyduM5FSKudwz16E96AzR8YAYDv3oKGBV5YA78dM06NmjK4bgjs\nKb5e1SMYbO7j09KImUM5AO0DP/1qYCKXMjKATnvjrUaOPLxnBU8ipo5CsR9GbgVGzBZgwVQg\n4OKQhWQsAMZU85PakVvn2cbgKViu5guSMZFDD5FZcA0agxg27fmHJpvLNy3zDpT5PmAPbOea\nSTlgScj+6KYgYs7klckdj3qLb8xZvlYc4qZ2V3I5zjORUflttVSylG5/Cl1EIyqwVixC9+ac\nxADZUsByDSNyzDYoQDgUu4eWFJwMcetMBI8CPcTgnmnPllyOOOvak4aPIO4/3aayZG4Hj+7S\nAFjBXJPbkU19rDLNwBgbaXd5jbCDnHNNXC9Fx2NIaHK+/YyruAG3P9aRlVFLNkjpil2sob58\ncdvSkQHgluCOFNPUQhO5QuNqk5xQ2C+M9s89qGXJJB3EdR6U3duBxz82MmgBPLEZVsli3alX\nZu+bls9KJFIk+ZyDjjikXdIOm3nmmMdtPLqT6bD2pdrRKCed3FAV2Y8AqP4hTiuWGPTA+tLU\nBjYK4zkg0ix7dxxyetHz9Su7HBxStuZTg89BTHoCjHT7oHFMDbWADZbHP+FLysQVh0P60jkq\ngMagEHLMaAF2mTaMYOc9aRl3n5sbd3I70MDu3kj5hhSKPLIwfL5IoEJMoUtJnzFb+EUr7WUd\nVbH3acqqqHOcD+EUseyabLDChelAgbHlo4bKdCvvTW2hhztXpnrzSbEkjPy7VU5GelI0jN0w\nB2oGLk8rjvSsojI5yT2oYZjCDp6etIUC4Dc/7IoHbUVUGcNwn3ttMLBJTgYB5+lScMNoTafr\nTdqkliBv6UCYjIqgE9falRg2AvNR+YAQejZwTStu3YTgnrSGLGA5OPmOaRc/PsLFs9Go2xL0\nQhvVTTl/ds3zZBXpTEIFLKVc/P6Y4pFyp2lRjsRRHlvUEcc0R71dhINmOlBPUM/vS+/dt4o+\nVWBxkjtSqw8s7kxntTFYlQxXanSl1KHqRuIUg9wKjUsqtxtPc0uF52j3oxtX1JHT0qhiLjJD\nBiQOGFKvQIrDPX3pd5KA4xxio1cD58YbpQId5nysdmSeCRSsxKIuMLwQacrMsuxTuBHpzTN5\nVjuOcHhfSkIfuJ3HbkevpUSkbdzLk9sU/lFZQ3JGSPamDPlkDpjjNIYvyt8mMSdfpSqwbLM3\nAGDimSZj2jrIcZIpWVJFxnb9KZPUThOEJxUisQwHRepFMjHbP40ZG7BbJU5xSGDN5qlgMYPQ\nUeYjj7mQOTQH2sxT7pP3TQuFXOMjv7U9twBgrKPMOO601n8xtzYGBT4/3jGPhQDnLelJIkaS\nEKNwPWgZG3zK2w847inM/TjkjmnfMqggbhjoKa2F2hkz6kdqADc6qVDZOOnpSFv3eQec/N9K\nXeMnA+akDJnJXPrQIczndmMdulDFt3XDcZpjSHbypU9vpUkjBWVWG0sPzoAYo+ZyG+fHWl4l\n6c5GD7UNhhtPAz270vEbk5ITOABVITEbLDaT9KUIJDncPlFIGCkgj8aSOPbGcjJzkGkFwVh1\nfkgflS8sOoy360bsI7Z/4DTZGUhX24BHSmArqY8bjyOTimPhk4OD1A9aGy2NnIJoRQpO/wCY\nZ6UmFh7Rl2AXrjpTZJM4VfuetPbDZA4UdMdaZxjk/N70DDg7sHNKrlcgpxt69qNrLuKkNgdK\narfu1Und60gFwHUbhtB/ShP4xu3DoaRm3KN/y4OcVJgFTtHHUU7gR/OWLcjFP8wbB0DMetMR\n28sg8D0pVhVmBc/gO1IAZFUEDrnk04uFjIxntnFHl+SxVzuY8qR0qOOZtrbh8vQfWmId92MY\n5PejyxIUUHnqfShGOxuMHGPxpqkrtIXHZjmkJjtqkY2dDwaGUmMsOCeMUNzt2NlO4FKyt5gA\nb5fSncYxsybNvBHFI33SwG3nBY808fNI/YAdKjiA2lWGO+aYhx7BRj/aFJk8Oq4IPSgdy3Oa\ndl41JUZ4xTBjSyyyEZO9u3SlO5WG49OmDSYHl5JJY9TS+WoAI+YdOaQ7hgLuJbOaQ5XBzlc8\n/Wm5VoypGCp4NOYldvYf1pB6jtzbmRRgkc8UjdEB5Ipshl2nccPuyD6ClUlH653d6AuGBubn\njtSrIok2HIbGc0ijCkk5PSk+Zfm25pCFVT5fydM/epNoY+WvTrSgbUDAHJ6ijzAuCTjnkVVx\n2FkT5SF5fHalyMAgdBTZEWNS6yZLdhS42wjH3h1z3pAN4ZTkDJ/Smqx8shxhugp7Mp5Py0xm\nDN1JboKYxdr7QOSe5oYrtyF70KzMAN/fmlU7Q3r0xSJuM8xVUHrk8YqTYVZc/MT29KjVWVAg\nGDnOacyssm7qO5NMYif6z0KmlDb3kwduevpSKwXLqMnqRR/ED1UjOaADIXgcYFEecZzgEcmh\nvlXcPp70McSLtOfXI4oACrLGHHIB607cFAfv/dpgYbipBIY4Apf7yHll4pgDKWUEHAPUGhmW\nTAx83QGhXAfaGyP71O3DBXHzqfzpBYThpNsYwB1z+tEn+sG0YTGKRdy7iOvel3bs71KMBQJj\nWOwggc4wabtZcENlj0p2Cq8tuGM02NWCgFvlPf0o6XAdu5KsoBPBbHel/h2nkKaTcx3KWyi/\nxU7Pylh83HSkmFgkKswGzCn9KSQhWA68dKEVZAGLcdlpG/vkd6pAJIqj5icHpgUz+8FTJz+V\nTSbfMAPIYULC/Kqfu84pICJcqzjOCFojkPU/MvoPWl2jnjJI5oCgmTauBn7vemA4K3B4bdwc\n00/u2wBg0IVTH8WaXd14BHTJ7UAN3bpG6kYqRv8AV4K7geppPL8tWO7OB94U5VPl5L4TP40A\nRlgvy7cj0NKqopV2Yk+npRNjcuDzT2xuHzZXvQNCSttYDaTz0pWRY92RhMdfSnHbhjnAzjdT\nFYSZ3Ej5ehoJbEPyqADkZwG9qlVtox17YqPhcAjOelIxMf3FPXk1JQi45Q8nPWnorK3LDFJt\nPl5KYOc0u75So5HUmiwgZgOPwBHSgsI2VVyx6n3pVYBh8o20iuZGLyDYAcbf60AIzeavygoc\n8g0Mw3H5eBxSbjuYLyrdDSSN94ZxkUwHFiuMjev96m7gq7s45wacZB5UXBOOoFN3N5m4fd9C\nKGT1EXDINvzFqXzDIwDADHBNKzb3Vj8nGPl70n8RDryOQRTKDlnDHjHAoaTJJBx2pCzKm8Dc\nw6ilbZIyHDZPVR2oEEa7U6bueafNjKso2p/dFNUiN9o3MM5NSyLtO7G4N0UdqQEKgbskEndS\nH5kOwcqelOwVyp+/14pjNkDA2tnrQArYkXcp9tvvQ7FdpDcqeRTmIjkbHJ7UmfM5VTu70gFm\nKMSR8pbkmkO5VORnjihl3NnqOwo2nDK557LQMbGx3blGMcHNK/ysGDc05iwYgjj0FNZlxgg7\nepzTATa5YEnBHO31pzssjJzt3Hhcd6V88j8vXFIzFkCsNoHQ0wYv8WcfN6CmswU5P3u9KuVY\nDoKF/dq2cMvXFAC8qpwDlupprszbQeQtOOSwyNvf8KNyckjAY4HrQIQ4OVwR3NOKj+L+Ede9\nM3eXnIJPQ4pGby1U4JYjHFAxy8xgAeZuPOaMhm3NGMLxgdTQykqCRhu9KMqo+YKh9aBCfwkj\n5dx4BpGJ2jj5uhFL91WLYIB+UU7cQzOF4Jzk0AJkM20Lk+9C4GVPHH3abKw3IwDOrHBxSuET\ncSWKg4xjmkMVcqpIbJIwPY0iyGaNGPyv0I9aSVCSqggAcjHrSrubKHB/xpisKZAsmduSBSFu\nd394dKMbOnU8Uxc4BZep4qQHr5inOPk707aWm4PzAZ/ClZShCHkHkmjcVPDYPv6UIBqruBQN\nhic06KQliocI2ehFI0gjVj13dMUGNW2jG5gM5/pVC1HFjyCPmz1HemRsMZAxjrSsxmJIOw/3\nfSkfbuwFLN6DpQGpIFyd2MD60pIEecZGaRUCRn+I59aaNy5Vzg9RQA9lCYwSVNC/K21scUi/\nvACSI8djTlX/AJaY3Z45oCwMqELgEn3obDKQf3YxkKKTcRAEP3s59zT+FGSPmb+H0p3GITtY\nYUEYxilWNNgL/KKbu3PvYYxyMUbSVznOeimkAiMACC2B6GpI23QsowMcims3mxKSvKtggUsi\nszAMNik5piFH3UOPmY4zSt8obvtOC1DPu+QcAcgmkjjDyAM21D97NIQhZnKgqDznOKly00hG\nQFYY59ajm8xlIjZdgbCn1pyufLG7qp54pgK6iMLjscEetEi+XIDjd7ClkK7QfvMTxSsrrlgc\ncd/WgYihuQZB1zz29qePvHcoXacEZqMyFvLIG1v60vy4ZmOGJ5FAh8n7zbtYDBpjSMrZG2T2\nHNK/ypkruU8YFEZYtwoVAMYoGO3YXeBtz2pfkT77bgefamxxtg7jxnpUjKqjy8ZyeSO1UibB\nvGxSy7vekWQbE2jdu7GkVU2ngrtPC561J5u3BVAAAOKfQB0e442rnI49qeJW4GPm6YYUxd6s\nTu5HG1fSnqxDEueGHekFiVMqqsPlO7n2ps7+ZMxCMrf55ohj2qTuxkdDTWLq453DGPcUhD5c\nKACd2RzTRs4Cq2/HbpQwOTkZGMZpw3eSoB2nO0EDmmNETHbyw+UdKiEm1QqnLMeR6VK6rtIJ\n8xF6/Woisccg8tdp6kUDItu4uF5yeajVSzHIxgfhTpl2528c5pjSMUVnYZY4KjsKCrAZSigl\ntwPamqwX2LnnPcU2RlVC3odoX+tRhjJ8xHyilcViaWQBtoz8vHBqItsKeYeR0pflbgcDrUbH\nao3ZyTjNLUB6yPJJtxjPaljk2qV5yCQe9T23k7CWG1cd+tMt1WNnYAqOu01LGWFAjIyBuK5/\nCk2t8oJyc8c02TGQxYZIBxTGceZuj+8ozR6jHG4x8sgzz19amaXaCrDB7EdqrNmRc7csO1O8\nzy5AVyH6ZPakXuWIHY5cjHai4V1jzkbSe1U8+WGG7qakDfuV9fQ0hWJpH242Y2d/Wm+cIcN1\nTPftUA2Luxy2eeaGYNGw+8fT0oAe2+TeSTw/Bz1p3mfeU/ePPtVZN+1grNu6n0qR2ZCcHJHr\nQT1LVmpuctnIHGM8g+tRMrRFlJww5LetMLGNd28oepxUX2hpFJKBl/vZpgyZyrMpXJXHPHek\n3bY/vfP2piSKy8HC+9Mj27s7tw96AJ1ul3bSCzEdabHMPM27S27qT0qItuLBQcdaI252ltiY\nzto1YMecwb2Q7w2QPakjAZUG/BUbsZqOGXbMzONwxxjtTRHlXbGGY5J9BTEPOGHzHAY0/wAs\n7GjV1zjvUPO0MFLdqWJ1RXLHIxjGaQxjN5a4HOeM5708qGdS3ykDkLTdybAAoYdQPSlXG3c+\nQx5HPagBy4HTduGSDmlaRwqs2RnjOaVZmhfdH91hzn+VE0zTYdyAq9dvSgBjbo5k3IeQQGx1\n96TgA8/NnFOGEbers5Y/Lu7VG0YMXmFSz7sGnYLisBHypPXBxTo9kLAOSQDnjmk2rtxuyeue\nlDRuzYUhlGDiiwdR6jhip2A9M+lReZ/CHU89cdKHQLJ82VBHAzxSvEq7W46cHFFhsIyGU4PO\ncE0saneoVvmzjJ5FCxouCGBDdfWrMNuu4NuAA5x3piIAD5zq/wAq9+KNu052g9lJqW4ZmbAP\nDUjW4mRiSf3fIGaVgIlba5x/rDxTYwV3KG2p0/GrCxCSbafkOM5oW3EinBJAOKka2IfuMAmC\ne4qxbukbbmG5uoNPWLDMSMPtwOKGhVYRjJ7HApjFW4MsuScZ521DyzFlThsgcfrVoIJEwmBx\ngvQuTsQqVzwGFUBBDE8Sjfy/rTlhfsAeckVLsWGY5JcqcfNUjRqzBui9WoEVht7nDE/d70kk\nEeM7SDnp61YIVY22KGA+YHuKczBmzI3lgDPT9KkZWeIxrHz8ueABTzhuoO7ODipJGwgP8Pv2\npysFUYcEdSaZPkRJCBmIDc3Xr2qLy9qlicx5x8vFSEgylo/l3etImN3zdQenahhYVSmNoOCx\n49qY0akhQwL571YjZFXJTf8AN1xTi6SSZWPJz34NSMr+W0cJRsb85Bx+lCqrsokOz3qdT5zZ\nK87sbqVtpQqBvYN83tQIqlEVmXIPoKFgSSFdrYz/AA1aEm2bcFV1XgnHFAXzCrooA6+1O5RX\nZQ2VUsFAxg9M0fZzHBtXD5PPHep2nVpAV+UZ5GKeu/5m4K/TpRcDPjhKyFAC7Ac+9CRShtuU\nQ9RirMkO/ac5bPPP50eWiFtihsNignqVB94n+L9Kd8yq2V9/rVsWqqxPUNn8KiSEiPYPmfuf\nagCnteNRlgS3TPvR/wAe6rubc27bxVv7LvkDkgFBgL1qA27MwYjbzxxQxoiXDBxH8xzz7U7B\nUYfhcZGODUjW5ZyyYXHXFN2iH5sb/Q+lUNkXllmyzfL1NSs4XIQ544bHFJgMS/VsfnSNujYN\n94N0Hb8qRIkjO0YLHgGkXlU2vktyRR5aqG3nJ60MI1KSK2wYxg0xj4ctI5Cl/wCYNKqt1Zst\n0xTGxu/1jKoGdw9aSNsLvPIqW2Fhd+1m37Qvr/SnrjavPHuP0pkbF33D5/Rcd6eQZA2Fwcc8\n96oQvG15M8gYCf1pMkr5rSAjHIzTTcttIGxAowWI60qx/uVT5cNyTQIsOUZE2jhhSu/lBc/w\n9u9V96qqqu7rwQak8zdvyu5xyB3oAsrI0kYHVxyFHFJtXll5fuPSoPO6sWCtjkdxVmFo440b\nJbJpDBZQq4HyjruPalXLRli28q2fwpCq/NgEhjwKhkxGwRcM7ctjt9aaJZdWb5yzIMqM8HvT\nRMz7HKeWWP8AEeKrhvLYEklTxn1pzSPkl2Uhfuk9qog17dxJsIlxjtjH5Gr9veeVJ5hlZ8nA\nI4x7VhRtghSwkOMgqeM1YgvBcMN3yDp+NUQ0d1b6lH5iFJFLEY255zXSadqRuIcGXiPuvr6V\n5bbTeVdDdgKerZroNN1IRy7izA9FftitYsxkme5eHvEywSKSdpA5rv8ATdU+2NuBxH2r580f\nWBI+c79vJXpmvRdH8TSwqCrAR4+7XTGzOZq256vdQiaDc+NpHcVgyQCBvmXKnpgVL4b17+0r\ncpN94HCk+laU0asWPXsDRKNjaLRj8Z3Ecr09qVpsMr8HPWiaPyW24IHajyVVctyawehvfQf8\nrZJHPXrQHCrkj2qJY8Jkg57Uqt0JH4UkMeFHlnJIHUU1mDbQykLQzheGGW7ClaRl25GR6UwK\n8g3qzLzg1WfzSysqAgcke1WpJPvKV2g96qtnbtDYY/yqiRi8SFyCQalx/D2NIv7tdueTQrGR\nxtwvtTJY7d5bKxGExjmm+YrSMDggjinEj5t2GHoaYuAwxwvaqJYxcrIMDmrAXyiQTkMc01Js\nSEHnFP8Auybt2T6UEkhRdwKn5fT0pGb94dv/ANeo1k8wsxOFpyqVP7vnuatADYEgz19RU8bM\nm5wOMVFGQ0rFQduP4vWjH8JbhutMhjmmk8oyE85wB6VJG25+Rg4z9aFzuDY6etPDCRvM4Par\niAyEYkLFTz3NSMmF3KfmPb1oYM3JPy+3ambVXDfeParEybOB838Xb0NKm1Wx3pithiHGSefp\nUuFDBgu40ybMkXcrA7vl9Kkz/CD81MVgxz0py4j+bP1NAyVccAjinyfu4yWXqelRKxZSTg85\nGKezuy5J3Z4pgR7XypXp3pZI1mDEEFR2p8a7VO4YGabmNlfadvtTAg8tfWin/L70UCP0fUfK\nTjpSq2cHt6UvmfKRigYXkjmvjD6cQ4ZcDIyacxA+WmSH0p/HHy0EibgMc8d6azBWDdqTywzc\nGnqqyZycCgQhbcvWmlwuMA04oFXpQvK0wFxuHFIq7WxSq3yFRyaRTs5NOwxwAZ+ORShRyCKY\nuOoOBSfaNrY25osIXcemKQBsnnBp28cjGKCwLDA5osApBK4NMbKgZPFSDB6nnNJwykjnBpgN\nZtuOKQSZb7vFK3TpmmKCyDsaBEnG2hSUb2NKoJTOOKAvc8GpAXv6HNODLt55NNyA3r70pChT\nVCE4PQZFITtGOopS2Fwv4009h+NSA5c9entS89aQZHuKX+HrmgLh/D696Td8uQMUqk/SiRTu\nB7ChD1I2bkDFPXHekb1NR8rnng0CJmULikY7aRewPNLJTARM96Reh9ad95hxSKu1ixxzSYDf\nvAYHFRNCGPHBq02AoAqNlDLkcUAV41O/k8VPEnoaacDjFSKDxigRznxMt/tHhqZMHgHBz3xX\nw940hZLhwvC5zzxX3x4ktftOhzoD8xRufTjrXxB8RLDy76dcHbvIBPcZoGjyHUEUK5AAIGRX\nN3irNbru+8P4q6jVbf73B4Fc3eKzIBjA9qtCRgzDa5XP41UDlSV6qOpq1IrLcNvB29qzrp/J\n35Q8jAParEzPvsyT7dwyeAtZbeYpJVP3inmtK5QKqGQbXI6mqvMnAG0YyfetEZlJZlaYb027\nuir6+tMuP3cfylSd3ftT5EKt5gPPIANVpG2xguM5NWBG8ZZjyfVs1RO4O5yQSOversybmUJu\nO4/eJqpI6xB8A7845piIJMeXGpO3n7/rTAQJC0vyE+gqZ8MyMBgDkntTG3eWcEDJxSC5E+ws\nNp+fuv8AWkZo9xVxkY65pGhxITtwQMfSg/6PG2dv161QEJVl+TcG/iOO/tUbONxbB9h6VLKo\nWQjdggZFQKCu8BwW69OtIByrtxj5wRyppNpUgOu5Tx1o8zJCZw+OfakjUM3BIdeR6UWDUYFB\n37V4AwTmkEYOQjng5PNBYec2fq3OBTPMLeYpTapPDD0qrCJJDuAY8emKSaNflYcfWmrF8m0M\nzD9KcCsihF6Z70agRs3775hhP7vvUZkbbyMnPFTyLhv3jAgcAVEoP1BPSi4xV+VfVj/CaazK\nuSTuX096czHeoUewz3poA3FSMMRk0AKzbo1ZAS3pmmuqrIMqEGN2Rzg0NjGUOOMEUiKxjJzk\nfyp3ENVd3zM+4HuKY6EYGN3OBT22lVyMA8cUnRsE5/oaAGyZ3kFO2M+9R7ljkAkJYdCalVW3\nbSc980rLtXLDI9aQyON9shA5bPQ+nanFWVixIC/1pGYQtlF3D1pvl7WPOC3NMQ75mRVKbHUc\nelC/Ip6BvTrSCQqAQdw6URlGj+UkJn+IUDEDc4AG48ZJ601nyxVI8Y6mgsOcrk9mx0pQzO2Q\ncY7etMLDSwkGBximsPMACnBqTayqSQoFRlm4ZVGKBMVgAoIUhuh5odSzbQQDQy8B2OQeopJO\nyg4OelIBD80gO0nb1bPSmhf30nr1/Cnbhkxkld3HSlMe3J/AUxDPu7mU856UNID/AAnIGce9\nO8sMzED5sZpm7rhe/wCNIAEm0HK7cnrSLsVj8uTjpTiFaQhue+KbuxJwOfWgpBkFBkZz2pNu\nWIc8EYGOgp5kVW+XlsUxycc8HrgUyRNphwMbSRg+9LtDZHCd6XEmQxdXHdT2prqwZmZc0AG1\nHjGSQaR3ONqJn1al5+feu3/ZoSRdvPAx09aCkHA+7w2Pu05WGSdu4qM8UkcgkYuVAwMCmHOz\nK9c0CFVyvJBy3YUeXhU475HvTlztJzgmiOPoWOF7igBGJ/umnxodpwcg+tNDkOSvKDoKaFfj\nJA5yTQA1VK5DA095izDIwR3FKuGY5O5aQ527fun1oEJkDksck02RfMdjuxgCnjG5FbB77qGw\n24DBbPQdKBjWBxjrx+FKWUqpZdx+7mmRg7gWyF6Y60ucbwM/KfSgYir8oTBz1pcK3qe5oVn3\nLtHO3k0ZC5LnBI/hFAC87lI45puxF3ljg9c0KqttHfPU0/aF+ViCf880Eke3LKOGB4GaFyoY\nDlgcE05W2HDrll5+vvTNwDORwSfu0DHRqA+cZyOlM8tfMwzEMOeOlOXPm8AjikZl2H5uQfvU\nAPYEzZUjpnimhQyjccZP60jgKyndtbvT1LyOSenQD+tLUQ3LSYDLgA4yaViJJimPl6DFIqlg\nc5cZpN3zlcYFAajG3JlTShtoO05OM89qJNm3dyw6e9Ob5YygQ5P8qYxF37mDAEDkelCYZjkY\nBHFEjBlKknNNZhFtx17Yo2KFVznaflehnLYBxuHQihlK5J4HU0NkFdg+SkSL5ZZlLAKMjc1O\nclWYsu0Z4Apvy9QxGfXmmcq4AJY+hpiHbfLyzDcCOvekZlCh1X2xTmUDAYk4PC05TnJ8vBz0\nFAiMR7l68559qRdm45X5ugIpfvEgNnJwaVm8nheXzgD+tAxuSrKpGRSbiXYq2VAxtoTEZOfm\nY+tJ5ZRflORSAdtDYBO44/yKFkPy/KMg0mQmGx97j6UJg79meO5oKQ7cWJwMbegFJG0mfm6E\n9KTcFKnex49O9OTcsh+YMcZ60xDJIx5xKD5R1NBAEZx8wpVYNkdCecetDDao3cbT0oARmzgn\noBSySCR0HXI61GRnJZcKadkouM8dKYhI/K3nOT2/GlXKyEEZB6+1Iy5+VRnvmnbWaIADk9aA\nGxsfnA+Y/wCeaPmVQ3UgcLQ67QpU8jinyblYFsEYxjNPYXUi27YiWPzt29Kexw21iB8vFN8t\nivv6U1VZskDPrSCwbQFBBzmnkeWnDDPem43KCBgdMCms0Sw7GDFg3BoCwq4yPXvQcbjxnHNK\n3yrgjqetC8/eO0d2oGIWC5Rcgsc5pFbfgKcDNP8Am3HGCvXNRY+UFeM0gJdv7z7/AN6hlAjK\nElm3fhil3L5i8cDk0h+VnyeOop3ARV3Lszhc/nRtUuQDxmjaWA52nrSqrycL97PSkK4m35eW\n4BoZgSTj5c4FHljzWOdoHc01U+Ve/NBIbXk3Bhj0poUeXnOCOtSMrKvJ280jKkajJwx6e9MY\nrbWVWClT/OhvlwcbDRIoX5Tz33UhyzLg7h/epgNb945C5x39adsIQg8DsPSnSbjNlRgAc0i5\nOSw5/pSHYZ94Zx8opyxkOecZHrSgM/Cnio41Usctz0GaoBVyGI/SjaeAW4zkgUDK7+MdiaSP\nPmKc9qAsKz7pBmP5RQ53PyDnqB2obc7FtoHYmjzGWHa/DdqlsA5Z8scMR+dG4cg8H1oXJwp5\n5pdp+dcDBoExGJ3AD5R1Jpd/zYySM5zSR/KTuGGA4PalVu4XP86Q7dRV3Ix3Ec0nGGGM+poa\nM+XyckmkWM8jdgd/8Keo7jF+8cDdxTlbPO3ccU6bIX92NpHymk4Gc5xjtR1AJFIRcc7uhpPl\n3K0QGcfNmky0in+ELTuy4XaAuOaOouozd5ylQMDGee9SYXzPvYAGKbgNgEgALjFNUhMLnGao\nBWEm7g7qCrJ8rDeR29KG+8UVgR3NIuBJtLbT1D+1Ji1DYCOOR/do+XnK7j0FPU7twYce1M24\nXAPT1piDcQpXbz60qyEZC846E0pz5mQQTjihQWLIcBz82aRVhqMZIlY4EnXNOC53MoySOc00\nK3yjIHGKcN6wvGcP60hClR5Cx8eo9RTTj7xHTrTTGWwV5989PalxtbB+UNwaoaBlDBlBztPF\nOZi3zOdy9KTnccHgClEZVcK4IbnmgaIyo/vYFJuDLy2BUkg3Djhe5psi7sjv2HvSFYVlXyyC\nDualRvugDdwQTSFyu47TlRg0u8pIV5wRzjtTWwXCNd0S7hgULuVSM5B7elEkTbQgOSB1FL/E\nGHTGM55oEN3nnHOOAcU5W/jKtuFI2Fj4HP8AeHSmK5dC3OB0oEKSFyP4mPftQrKrMzEk4wcU\nPGGXDOSQM03zA0RZfkA/OhgHCqWP3B0NK2Fz69RjmhWHlnAB3DHI6U/dtXDH5sdRQMQMVUjc\nMEelLtG3aT15FBZdqjZxjv3pu9XyMfLjgUANZRvB3Y7ZPNJHGwklDenT2qQZ9QnGcU3gYVj9\n4Z4ppW1Afu/d/Kfkz1pin5wjcelNb5eVXDE9aNqsnCndUsRK33SuOQfyoJGz95nB/wA5pPvM\nNqnkYNEjDv0XrjsKYxI8Lhd249Nx7U8DAO7gL29aa4JVeBs68elBXcqkt8rd/wClIARgsZ+X\n33elBYgqBw2KRpDu27MEcYFKdzRjIGc8UAE2PLGW2HOaZIo81Q3zE84qRUZsO2PQ5prBQ428\nDPUUAD55CnGKaJCzbOnFK2RtZhhDyRSMmYwc4G75fpQIVV2s20ZwKApEIAO4seaTmMq24gsM\nYpI8bvmyqqeaYyTBjZwfuYzTdzMoAO0YyDS+XtYtnjrk+lEjFZF+6yY6igYoAHzLwT1FPSQJ\nlyM7hg57Uxm3Yxyegp2TyvQ+lAhnMjrs4O386GaNmKP9AaQxBdpHUcZpyqqvuxmgQ37rcrgZ\nx1/WhQFkYKc9+tO2hW56ZziovLRlBAwxPWpYD92Bs2En734UEg4YZC989qVnOVwDuU44ppU/\nOCchjTAVSFO3Jb0NINzcHkA80pkMe1SM8dMUshKld67d38IpDE+9uLDB3daGyvJI29OeaVVw\nxDcCkiXbkHkk5zTBiqAuAx+ZulNZfMJQjbgdaEYSK2R+6B/Wlyy7QF5xnd7UwAfM+VbjuTRy\nqg7gWzwPSkGNobqSelKcLuEi/eHGKCQyq55yncj1pVVR8oOV6570mNhGxtx29KABtBJwe4NI\nYit8rpnJznJNK235cjcvpjihVG3JIUnrmk/dqowxznv0pjFORxty2ePal4h2py5UcU3zGViV\nznPWlRid7kE8UCFz8wXO1jzQduA7nNJuZSrY7ZpF8pv3bE465oAVt2ckY77aCvmNlVxgZP8A\njSbUfLbir4x83pSR48rhyzL+o9KAJEcOH4zxnH9acoZl5AfHPHao/kwvOH649vSn7RNJujGz\nj7vrSARuVMg6f3aXcsm3cmS3Q0jNuG0Dp1FJ5eWAOfXg0DBjuYADCjnihZC2cqeDxTm/iZRx\n6elJufai52gUxCAbWDFOW4PvSswWTYp2991LvIz0weMGnRSfKQicgc0AIWYDbtyvUPTyNyje\nMMR60jb8jPKsM4oLCNS7fMOgoARSquMoM4604MGySePamqSFLN6dDSf6tHbOR0xQBKx2shC5\nRurGiOTcxbHTgA02NtkYbqvdaWTd1Hyg+lOwCeYS2GXbmgnzAM4Qg4Bp0nPPUDvScbSWGVz1\noGIFZd24Y/H9acjFir5zg4GRQCq5VenpUsjPNH935F6e1Ahkn8TMwPrSnDDKgfMOppqx7FRw\nNxz931psn7xmYZB/u+lAhSBGoV/TOBShipXcMxkdKWRd2CeuKY2Jk54I6UaiJY9vzNu28cCj\niNQWbcSM1Gp4OF5xzT8kqpBG7pjFIBVYZGRkdjTtzAc8t6e1JIxkwc4Xpigf6zH3mximih23\ncw2A4xyTTmOUDAFTnGP603eSpUfwikiZlZQ3APX6UxEqKGBcvtboB60m7zl2udiZ5NNXYzED\n5iDx/hTmDLGDn5s54FBI5ssv3MKv60u5GZV6D0pqmRm4UKeuQe1G5f7uZM/e9qoCaONZJpWU\nFTjrSwqsh3El9q/dFJApkLHOFzz61o5X5XSPaqjHApCKao3khmXcD+YFKvlmYDBC4xzQijMm\nx2IPzHNKxxjJ4PWkAvllMjO6M8fSmtujI5xt/h9R60q/OxwSo/ut3pySO3BALD+VAakEan95\nkDJGQKjVTuAPzcct6VYb5/mHHHQ1XnY/xcnGOKCiCaORTtQKRnqx5qtNhQZCOlWG4IUtnj8q\nh2osLjaXb9BQMj3K6/cJbHQ0zhlwAVI/hFO8xuDx0xmo2lCp8qnfnBamUSKDIuOAFOCQetRu\nwkyFJxTDhVA3Z57U4LtPGMZpE9SWPcFIHXv61Iqu6IQmxc9TVdW2zNtbL44qSK6ww3HehGPx\nqSiVlCy5xuPSmGMo77fSppJFZcZKv2NUlkfcxD81LEWIzuYsxwcdKbuMkbHoq85pqh9rHG4/\nlTfmkHy8AckdKfQB0oLRrgZB53elN3FVILfMOmetMkbbFvX1HAqTqzPtBI6k1JVhittRnIO8\n9P8AGpsD7wb5tuc+tR7gx+cfJihiWh6EHoPpTAa0hlQtyg70vCruLZHrTlUMxQ5HyfhTEw0P\nllRj3NMRK25pPvZAFR8hfkUAZ6k0bVKnKlSozxwKViWWNgFIzn2piuRzMIXOASe4oZg2BtK0\n5/Mb5sd+tDbvO2ldzYzkdKAHIsoyNwz1yKjydw3nJY447e9ORmkY7RtHTPpSeWZJMkAKON3r\nQMeVTcQOSo7d6YuxvvHBPTnpTiqx7fuqc+v6Uj7HX5vlJbpigBVz5mwA4A5bPFO3eSoIX7x+\n760iBo43LH2A9qamVUAnLHsewpBsBVdpI4XPNPjQMvXcO3tSSqeFI/d9gPWnNyVXZtXvQgGq\ngXBVt4Xt70MvlKFA3CQ8ge9MjZkV36N059KnhYiNGXDZ6GkCGKpSR1xlRx9KcIwuAFLBuuTS\n/NHn+81OWM8hmO0jH0o6jIol8tsMM9yaczA8p948D8akCqkarj7owAe9PWMkZKgH27UFFaWF\n2hwwDAdzRGrkJxuYcZNWvs7Pw/yhecU9YdjA5ymOOKCGVJF3MGCDcec9qm2jll6Y5NSxnZld\nuB71N8rR7VwDQUVZreSMKQmVYYz3qzDHGsIJBWRfl396MiTbvb5h2FKrBYCQN2expDt1EWFf\nmIHLHrRtCqqqMEjBHoaWSU+WhK/IODjtT9wwMnjGACaAIXVcEN1H5mlChHckEADio9/zFicd\ntp7e9T/Y5sr8mc85OeaaBCIoVQQOvJqSPEkm5m2kdKT7HczHiMqwP3RzSR2M87nZExI4PFUM\nrSKPmw24uc1MZAqxADeCP19K0LXwzcyQjzIZT83ykcAn61fh8E3jLhkf5efm69akLM52SQnf\nlcDpgdqbLcMzAsApPyj1rsF+G2oXMcUpt5I4yThuxwcc1Yg+Fd65EjxMyqeGXmgh2OJWUfJs\nAJxkhu571CsvnEmROjdK9Pj+Dt9MobZtVhwWHJq0vwYvYYhuZJNo4Cqf1ovYrRnlBIySwxzk\nKKWEtM33Sq9q9VX4Q3sakCD52/iA+X86jk+EN/wGXOP4YhnNK5NjzFWZdyn5QOdhHWnO5dcv\n/EOo7V6O3wovntwHtmik3fccfMRWfP8AC+8jkMaxMOdxDKentTDU4RmMciujY4wT2ohIjJP3\nf4iP71dr/wAK1vl3EWUrI3VmGFx9aryeA2jSTIaJlwpDdT9OKBHJyKA4ydoI3CnLcJuHYngi\nt6bwfJCod43OOi1Tbw3c+YHjgJI/hPp9aB6lFWGxlZehzn1oW4EiYjO1c8hqbd288bqPKaMH\ngE+tVIoZz5qtwF9ByT9KQFyPlWG3BJ+9T9pU4wq+tUoJW/1LEq+M9Oaltx5quAc7Rk85FIXU\nn8tVJAkJJPQdKkVEjjcuGXsMetUwx3IQGbFTLMI2fccMR0Y0FAyQq2CrBiMbvQ0xrd22gnIU\n+tWP3L2+Ff8AeKMgdqb/AMfDDZ1Uc57mmMikjbcAmAem0fzpDGzfKyfL/fHenFY+TIrI4OKk\nKqFZi7ZI+UetMCiyp5brjAY4A9KftOVlOAR8uatNGfLOMNjgnHSkMW1VxT6ElMxL57eYmS1R\ntGGaUMQDj5PY1daF5MqoBPX6VXaM723LtCryfenohlVd0cYDNkHg8dTT1wJAT6cU6JGZVkf5\nI84HvUnl78kNuA6HFK5JWDZkL/dPbjFO2ltx+7nk0rRmNiD93B3HrTFTzAvlsdvYGmFgC4XL\nR8Mc7SaULvVsA+mKF+aNyeWU1Hl5I8ocDuaWox/yjaCvyjpg05lHzENuI5wO9R8btqHbnru6\nGnlUjjRhlZDwR1oJQ+OIzGN1TIB5IFTSb1UgkBexXr+VV45Nr5UkHONuaVmbzOMfNyRQPqSq\n7hVwcY796kZY0kG4HcepB61BCz7umV64Penxyo0jM4OeuOwNMLE8yJKVZhyBw4PH5VGzHLYG\nRjkkUsc5aELKdzfTtUgfMy7V7dO2KZAlv5UQG2M56ls96lRnX5woG05OahwTIEHBJ4PUfSmp\nJI+SCdpHcfpTJNFboSIcqQX6H0FXYbt18sBm2DjPrWVbyAcOc7e4/lViO4BARAwYHg0yWjrt\nPvGjiJXcJcZLe1dpoerLJEjl9zONuAeM15bHespILszAYJ6VvaNqrR+WrIVhHOa3hIwlG57h\n4T14wyfvJNoB+6K9Qt7wXEY8sZUgYY183abrRjJZPkG4Hae49a9K8H+KCAVy7dxk11qSkjla\ncWejXcEkMJ6MepNZ+4Px3x1rT0q6S/t8E5c9c1Hf2w5KjAXpjvWcom0JamarycKxxkU6HO7a\n3FLGSy8/wmkbC9eCTWD0OoVm+Yk9u9RtubLN9aXaz/LnODyaVt3mBTjGOKYivIfOUnJX1qHc\nqldwOegqXaDnc22oGbdKFQZxzmmZiMfmGemetO2q25XG7/ZFRsTJkMdik1IqpGODuIH3jViH\nrhlyVBbpimrj5RuCKDk01vnYbARxyDTljKrgkA9qYEnBO7AbnIIokwzBlOc1GxKtwPm6GnKx\nDDC7PWgklZdu0Ag7qesZXo/zVGNzMARxntS5xIxx+FWIkjVmLAnjGeKTque3SmLnHLYz0FHL\nYUnYD37VRDJGUhMHP4UqKJMxZwQM8GnDEeFzv/Gl3BuVTHPXFUhCRY3Hk4AqTywcHOXPNIq5\nlzgbTQuIWZj97txVjJlVnXLNhs4xT9vluSPm4qGF3YhmGAe9Pb5SFP3c9qYE6rGWxjBApq4L\n4GSM/nS7cMX/AIe4ohm+ViBwDmkIemOVVTuokhZVC8oOppPMZo8xcMTnmn4kdcluOv40xitu\njVM4ZGHzDvTAytNj7o/vf0pW+6AzgGmsp5UdQM4piLH7v1NFUPLl/vNRTA/SoLtzxSMT5fPJ\nqTJ/CozkscCvjD6MF+bHHNP3EcEU1GPTHNKzEtjFMQMo2kimqobA70qttwD1pWwvOKAFkQBc\nZpv+r78UIxbkjNO52YxSAjDDPFDEEDPWlXH1pdoOM/hTGRuu1c+lQtlefWrbRF1JxxUDQ7uB\nQDEVulSqrKcsMnrQYRgYNTBtmAeRQIaw24IHNJld2Dx9Kc3UZ6U0rzk0AJIew4FIvzLgnmnN\n83GKOOO1Ahy/c2cjFJsO0570oYs3Apd3zUxajF+XrxTmxwCKUsCckUmCeaQxrLtIOKXnrjg0\nFju55oZyFJ6+1CEG0lcdKTIXpyabuZk3E0bMKrMcUMLip+85zin7j3pu0Yxnmk3bgF6Uh6jg\nvzcnimso34wcUSN0A604k7hkYFAtQxnuOKT3NKV3MT29qGwq4BzTARelL/D60YNN3cYpsY5s\nFcdDQ3y4GKbn589qRv3fU5BpAC4ZjmpljGOtMwBjpSqw6niqER6sB/ZkijgsCpP4V8c/FfT9\n+rXShtqrISD0/CvtELvhdWHy45r5L+NOn/ZdanQ/KJMsP8+lSEdz5x1hW8uRhnHSuWvV8mPI\nORXa61CeSp/Dsa5a/hVkwRjHOBVIRzd0Qy7ivXrWFfRlOcEHrnrW/qC+WCScD0rGuiFjywzu\n4xWoGRcSCYDIPPI3VmTTfKQqcjgHNa9xHuYgDbtHWqMyr5JYc461SMijLIZYxkcd196rSlmA\nYt81W7hUbdtOCvB9CapTM27aUGT6UxEXluwfYqsSO5xVLBMfzcuOvvVxj5LSIWIGN2T2qM4f\nLAEnGelWIgbIZQeIyOgqDydzNuO3BBAqzuO35hxj8aiuI872U5lXjDdMUCI2V1LHdnPSm+T5\nXzquUYYI6496XbuYEtzjJxzRHIyliMrEw/ioGVWB+bI+8cf/AF6hRgnmM3POAB/OrEjiTKcD\njmqq48vaWIYHj0poYD5DuIGT1pGVo/mQbqVt+McAN1XvTpIRCqlc5zk5q9RkLsGXBXk8n1p3\ny7QHztb0pTF99icgjqO1QCN9qA5YUhEq/LFnk4OABxxSR7GYtgp6Uvlq0ihm2DPGKc8ZEm4u\nCv8AdoDUrs3zfMdxz3700qy5dQQTxg1YYgqflAGeuOlRssqqCjbucnPegOg3efLCkZ280m0N\nsDDOeBSKwySflYnNSMD5YfcoXoMdc0CGmMKFVAATxUbL5DYLNj/ZGaeqEqQx27Ryaacbflc4\nbvRYLCgjIJ+72qORemFyc1JtEMhLsWXGBSNuA56Y7dqY7DVTMZO0buy55NIFBAXPbOP6UrRx\n4TGWY9RRIFOG5BXoaBDJJRwfur6d/pSccjuf84okXcM5689O9Md1WQk5HH60hCOrgDkRjOMV\nKyDcBkD296jXG4ll5I4JpVzwFGR3pjHNJt+UgE9Coprfu5DuAKn0pAw3YC/MeppfL3Jt3DI6\nZpgRMvlSbmB2dqP43xxg5FOOAu2Qk0jhfnB69KBDSNzEbsLnqeaJsNJkDBUdO9OWTapO3Ham\nn7wIOOKBjd+5lYA5pVbawLHIzwKRifMUoPkAxRu7n8B3oAm3BkZWGe+VqDaI9gQZRevqKdu2\nttK7SaQbsbeN+efpS6gQybo90jDcM8H1p5bjgbVBpzyfKwx8vvTT937uFPU+9MBol+ZhtyPW\npOWAXpjpTYyyqQQAO1C5VlZmyPSkxAy7SWI/KlkJ3Kyg4xzTcNhgCSTkgUp5UAHIxTGJuaOS\nUou8E/dPrSJxICwAAXJGeKeT3Jx68VEqhmVWPyEZ20ALw0e7GOelEm1XAToR1pflVQykBQcE\nUmFO7+72oEK3+sYYyAuaY3zQ/L0xkn0qQsRGNmOmDmm7wsalR8+cdO1AAshbbtXaCMClb7xR\nucYOaOPJBUfxUBRuLd6BiHdIwKnC57UpyFO/Oc0FlVVbd944/GlkDG4G/wCUDGQKYg27igBz\nj2pOFk5XJPXFK0m99y5Cg5NMZ9rEjueKQA0nPyjIpoZ9xcfQrT2YfcHy/wC0O1CLtuFIYMOh\n96Bjfur075zQrYJO3OOaPkxuzjn7tKzDy8EZWkAbTtUkAAn16UwKXUlhtUn5T6infKyYXhQO\nBScso3E4NGohysvzKOTjFMU7+GG0ZyaedpwFOB05qPbhDz3ximAv8JC8t254xSlF8zYwChaY\nzYxhODx9KeV2lgDy3JzSGNkjTaCgBXPSnDYrYXPrmmYClTjrxxT+uT2WmAxdqsdp2q3XNKrB\nfnGGHSlZg3G3G7+GmLncQRgdMUAS8buMDNNbd5mM/MPWmrlwFI6H9KCVYEkkDPFIQh4ye/rS\nltrIwG5KexXzCpGB0oeP5TtOF6AUDI5SWYsMDvz6U1cs33+DTmA2KJBhl4xSFP4SCO/FMY7y\n2HKnLDrQzDcCmBjtQZlUEDIPQUxe7DqOvFLqSPUeZIR1OKjP3woYqAccdzQAq8jPPOPapSFX\naEAIPO2gCNQVZizb3JxtFOJ+YNt24ODmlwd5yuPSm5G07uRTAbjcj/PlQcgUrN90ctx1pxA5\nJHboOlR7vL+Uj5cdafQWoNsVgefr6Uocrxjgng+tNiLN0HHfNIpKfeXcM9qRRIo2SAM3Hp6G\nm8NgH7wbkjvQoGCSQd3T2peOmcN39KQDnLJkhOnIpp+ba4G4nmnEbfckYpqsdvy8Kvy0wYr4\n2fMeCaYHEe5X5HYClZd6Y64/OhSTyRz0oAbG2yMNnBPGD2o2/IDnDZ+9S4LAoRnvmkyWVwU+\nUYoAcq+SuB8xPP1pBl8MV785pqlyuQp+VuKdu37nLYYHO2qRI0tjDAEuTnBpzBlwFBXcMmhv\nm2kn/dFBZ2GSflUY+lIYBgy4X5SPWmf8swffBo2nbvAyO9Kq8d8HoKYDZJP3gAGQv8VG1mcK\n+H56U4odhUHHcijhmyoIwKQxGz5uf4AcHFOcBWYjOO3tTPmGWUjYetLuA25bd7UhC/6sD+Pj\ntTcdXI5PQUsYMe4A4LUm7dtyuMHmgYrf6zfnHy8KTSIA2HJKv7Glb98flXK579aczBd+VwFO\nBQRuNBDFtw4x09aDu2ocbVHFLyygjihVdQd4/CmAjfvGYsd23pihcc8ZPb2oXHKqME80bQFJ\nz9aQBg7csNwNEnzOD3xgik3naAR8nrS7lbcQMehpgDfMgVSd2cn6Uu47gN2T6e1Jy0ahfvd2\nFKz7XQt1A4NMBq7mXA4Y8CiRTuXaMleOPWhgyxjjLZ4NDYVgAcDu1ACfMJfnztbrxQykxoOh\nBzup00jpgdRjOKiVSyqc8jtUljpNzbkGHU9fSnAiTbEMblFMVUVSzHbk9KcqbZAV64xmncQ1\nULZJbBpdqquQdxPWgxlcrvyxPahsdMfWkAsjb1A/GkQB5CpYqSMihWzuAHPUZ7CkXC5DMAzc\ng0EsFUKwJ7H8KVVLg5PDN8ppWYIqqRgH1o3Nt5XaBzigL2BVO7a3zHpn1PrSbFwxx8o60rt5\naLzkkZob7vJ5PUUx3GbW3Aqdyn8qedzEbpF/3RRxsG0Ywe1NC7skgA5wDigYLGGJHel8sLjI\n3noOaJMqOue24UkkflsmDgHoM80AHGQT8uOgpOZN5xjH8NO2jEm76Uka/PuzkhaCRN2xtxOR\nim8SRkryTRGv7kFjuyTgU6NsMqqMetMAXDxgjG4DGO9Jt2yDaTkjDGl+aR2wMYPBpOWyB8ue\n9LUoXygzAluM8UgV9pJO1t3H0oHQkts29fWkVi3A4z0NBLFfsQcj0px3bQSM5OOaHQrxjLD+\nGkDbo9xXp1FA0DKWfJTEa+hoXMgKkYA+akiXb905yCeaBtH70DLdMGmDEUL5g3H5B81KWEi5\nJxuP6U1XXa6sM9xTs45bpx92mIXd8pAOR3o8wMvTDN/F7Usa7X3A5Xs1NUj5iwznuP5VOwDI\nx/F8x7H3p/3gwB2e1IV8tTvOwY6daeoLRbgvHX3xVAN+ddrL90cYoYjccDHuOlOchANuSrYI\noYtuwoHl9cd6AGMN23jaMUDJYcAcUqs/ln5QxY4BoG7r1HQigTD+L5Ru7YoVtmTjcvTmlGNh\nx0zzTQpO51yx9TSGP+9naMjtmmceZhhyPSiUjyw4OSDSuGaHdz5mclO+KZIyRd67yMAHmnKU\n3HZllx+VAj/iYZyPu00rn7p+T1FDC47cd3AwAMk05ZvLhJb51PNM4b7vX1NP37lbgc9qTAAC\ny56JjjBpA5VdvejeGhYD5j1GKbgKuW6UDuKxPyjbh/b0pR8vAXkdKVccMD8x4oVmV239QODQ\nMCeAxwB3FH3lGV78UqKfL6B93Wo1bbgDpnFABnDMzAlfansvAU8jqMUNIcEAYoUcA5xjgk0w\nDaiyBiDwOOaQS7m+bp16Ujr1JkyM8U5m+UrjGeWPtSGIVxjLZ4+8RS5Zo+V3DPJx1pN6YLBi\nQOBxTsngOTgjOO1AhMDoePxzR+7aRVxke1Ckd1+Xt70m4Ix7Beo9famAL5e1G+6AeDQqbskE\nkg5Le1C7Fjxg5PQelBXdx3FAAx29BkZyKWTBKjOzPXHakdj5eVA+X1oYqyjcPn7H3oEMbKfx\nfKv8XrRuYKhI+g704xnzNynB7j09aSSRmZMfd/vD1pDD5lYgkqfQ0jY28j5ienanKrjczncW\n6UxQzNgpj1pAOVjtVd3zAYxSquDtDZHbPahSGclf4R0xShlbJVc+5oYDSSByMnvSRk89xims\noaQZz+FPWPO5FO0ZyTmmgY7cqwphQdw6elN8v5cqSD9aQ7Sw52gDAFG3BDKcgdc0xCg71wVA\n9DS8j5XXJx1zQ3yuoPKmkJzuDLvGeGWgBDmNgVXY1ObLcsMN04o3MUDAd+SaYi43byRnJBoA\nef3bDPz59qRNykttGR0pQo2Kc7R1poJVt4UsT/DUjHAmQks3bOAOKRgWZV3YUjPy0SDzWwPl\nOOfSk5TG7JUccUai6jwqeZxkr3zUcbKVxjJzTzuQBgMoTxRwWGQFNMAkbZg4DClVQzAp8qHq\nDTNu7OF49aH2eWCVIUGmMkX5ZWwuExjB6mkZv3YJbGOMUNtHBJD9RSqQ3ReSOlJiBmK8LjHa\nk3GNS4T5s4NI+RtyfmHRRSSbvvA7xnLL3FIY/aOWL7d3FJkhsEgjoM0it1CplW5C9vrR1IYd\nBwNw61QCtIFOcZA9RTnb5+oXPPFIqjBP8DDkU5owIxGvznGQw7UCDlGJJyAM0NiNQD91sEVG\nvmcsVLKOKlfcW2kY/wAKAFZiGO4blxSfxMMD5hmh3L8DnPFIuVfB6r1oBjhnbgDPbae1L975\nc8Y70vBbzQpPY03YShOMLnigZJ8sYCj5/XHSo2XdCoY47rQsREeVxwcmhdzMB95aACRfuAHz\nJG644qVSWUAHae9RgLu3FdwB6GnxxncTnjGNtAANqt5o5IOAKXzGhfrgtTOIkyx+bPAFPMbB\nWj+++3IagVgRSWLHoD3qPK7SCCecinMzCRQx+73FIzFmwfmye1BI7em0sqlRj7vvSrjywc/M\nOeKTlVIz7UqK27aBwBmqAVdpw5HynrRHsM/zKcDuKFby96MMgnijJPOBmgoFbqyggZ6VN/rG\nweeM0yEdVI27uOKecrJ6BeOO9IQhUMAYkywPQfzppc7jlQH9AevvUm7bnCnJ4/Co0VmYlThe\nnvTESbSqq+cfzoXDq2TtHUUowyn+905p5jGBxuHT6UEjhH+7bDZwc1I1xuUJkoMdqYeyovOe\nR61IypJw3ysvJWmA9V/cqF5XOee9SRyKuF25Umo2UFQEJTvn2pyMT90AEc+tAg8kBmJOOaQL\n5eWXkf3qmUq0O49GOA3vT0byudm4MPxpFalWbtzh8ZxVXc3lbc7mY8irjnDFT901EdkYwBwa\nBlOdTu2joPSoJBu3YGBj1qy8ZjRsfLnnFU23qpOOT3zQMr7j5YDfKB93NN3bSVx1FLMw3fMc\nrjmomYKBGMg9aBj/AC+e2P60jKpdQowB96o3B2j5sknJFSRrlfkOD3BpAPCIylCSO/TrTcFR\nleV7U15FZSCWJHanyBfkZPucce9IYqyHqT14Jpm8eYQv0yelEg65GV68dqARt3MpK9h/WkIc\n7FlZi+AvOBTllBdWxk9vcVDGu2BwGzk5J9qeq7kHIbnPFIqwu7zFY5A5+6KVmGxcnlmztPf2\npGj2fMBx/dFEZC4DgF3P5CkANIscjLnr/DQrMFQtnc1O8vdMuRgA4Bob/WBVYEqe9UAz+HLH\nGeTzSncy4wD/ADo4ZWGAWB6U7y/Mx6460xDP9WxJ+8o9eKaseMDPLHJOelPaFm2rjIzg0FPL\nkdSNw65oELu2nljjOMUHPl7P9qmpITuBOSTgU/afmLHCD7x96QCLmLK7cKTxStG33F4Gc5pA\npCq2W2t0B5pP9VlRkiQbt3amMUqq5LgMuaeyoHDDrjvzUasOGX6YNSKqhT/AnvQIHZmKISCG\nOaX7zNj5RnAzSIyiMuVwBx6mmriUBXbjOdtIocz/ADHIpzKJArn5T04ot496uobjPFSK3y5x\nyvFIRHJhY3AXejYGRSndCFIUHjHWnbtuTjj1qRv3mzC8L1NIYixh+GbD9RTvIK4P+s5wacVX\nbuwTk4FJ5KlyqSHPegCJom84jYz+/arEcbxlecD170fPzGh+6MljTl27G3t8ue1A7iqgySrN\nt/usOTTAzNEWDbUzjb3pkv8ACyN0PAJ5NPZZJjtUZZvRcAU7kiTA/KvPTNIHEMfyklycYqxY\n6PqGoAoYnCxnBkUV2eh/CvUNUhE0kZ8sjKrtwWx60ho4aOQuoK8nJHTn605YZdijynf0wp59\nBXtui/B23uNrTx7ZMYY8ce1dXa/DPR9KZXmnWRVHEfSgXMfPll4V1O6jDwIST1iCn5fatSw+\nG+oXU+Gik24yRjgGvoRm0axH7m2DED7wqCbxIlou23iXcfamoyJdRLQ8t0z4N3lzGhli2Ln7\n3UV1EPwZhtlRp51Vs/dDdBXSSeJp/K2FtrdflqhdanJIg3SsTnPWmqcrk+06i2nw10W3keOS\nUvGOflPP0zV1fDXh3T5gYYFO3/npzWat1Iq5Viwx0zzTJ5m5BOSec1rydGS6jOlS80O3yVs4\n2HTG0VP/AG5pnkeXFawnHP3BmuRLDzVwu8evpScrnau0FuDR7NIydSTOj/4SJCp8qFVGemOK\nrTeINoby0w3XgYrH/ebWQYDUbSduB2waOVBzSN6PxNNNCCpKsvSpI/FEyZO76j1rmo23BgDg\nKeKkXOckZDc59KnlS0KU2tzfTxMzqwBbzeox0psfia4YkvgHHVQAa5/dtDsOOxqcSY3KPmXH\nFPlQe0Z0EPiiRRtZcg/xEZP50/8At6NziTGPXHNc0zGTq2FA6U7exUHAyKOUOc6b/hIklHle\nQWT17Yp9tf6WquXtl3AYBYZrmlfLDL4BHalkZWfGDjHNHL3H7Sx0jQaFdRENboS3BMfFU28E\n6JdW/wAjNEQ3VjmsPzmXchXy+cjHpTkupFUZfeM8e1L2b6F+1J9Q+EtjcNH9nl8yVhnIHHFc\n7rHwbkzvhXD9Ca34dWu7cECXGOc+1acPjS5t2VQoYYz83ep9nJD9onueQ6h8KJbUMrQvtXkn\npk/Wuam8Gy2qyFYZFPoBivpq18Y2l6oS4hWRu4ZRgVauLPw7rPyTQxrMw4ZePzrPllcuLT2P\nkNtHmjZlX5uOOec+lU2tp4cyNBhgMfvK+rbn4WpfRyfZxEyMCAwQA5rjtT+FNxG/kPAxKrje\nBwaZpY+etwjXLuVbrlP5VOrE/KwKZANeg6x4DuIFcNatAinbvIzzXPXHhW5TGQBgcqKYGC0g\n3HzDnBzgVJ9ojIJBxk8dyKS8sZbXK/6xivO0fpVNozHscKVdRgq1FhXLbbtzEElD1+vrThbu\nsYJl38/lUC7ywGfm65zU0cw7dT1pDJWx846BRkmovLxHnsTxUm4Ss8gGUxj8ai3FmBOcf3RQ\nAk0PmFc8ntgUMoYMoTaR6HvUgK+W2G+ft60gOX2HG7bQIgk3k7W547VA9sEUPvwfUVcUj7Ox\nBCtnB3daZ5O4AdV9qaAqldqjng96gWMrkBTtznI6VdkRJoiUzhTgZ9aYyny9m7aO496oRVWT\ncqYHfnNLuMMiZb5lba1PnjQMqZyQc7R3NI2EhYuMkHr70CJr2WBoNkS/Mr8mq/lrKymM5QcZ\n70xm3bUMewHkml3KzId3yg9uKBEqk7AQ+UHWmzzR+ZuCNsIyV7/WiIr5EqoQgBpNjNCDuDle\nq+tIZPHP5jAjCkDG0intIVwA3B6tVdtskQLHaPQU6KTzEZgMhTgtQKxYjkwmEThTuD570M3m\nRyYznOc1DHMOTywPG0VIWk2uQQqdx1xTJHecP4QRz1o3OJiQ/wAvfFNDhpFf7qn+H196cVZF\nLsAByBVIRbt5t8Ua7+T/AMtBz+daVveEbFD9eGb1rDiWOOFVjB3qMls1ZSQlUdF3diOh+tUm\nT6nZ2+oYeOQN8inlR6V2/h3WRHHvMjBCfkNeS2rglm3b/YGuh0PXDHMFTJXGNjdBXQmcs4n0\nP4X8QSR3wUyKCBnCtniu8jmSbcd2/f8An+VfO2k6ukaiQNxnO5fX0r1Hw34k3SJNMxKBcls9\nK6YrmRz6x1Ovuo9sjADK+tRSfu1AI796uQ3S3sbAcEjhu1U+Y8rIdx9awlFo6Y1OYiXI3rjN\nQtMAPdafJxLkE4pJANvIx6VFjS5VmJkk3L1xkAUilv8AWkdRg1I2yFsZ+ZqiXETFW5NUkSNV\ngW6YI9elIr4LMep4AoaB3JAKleuaRvvjqVIxVCJWYcY+Y45NIrA53HHoT2NIMKy4zt9KlbIT\ndjJHSmOwgceYo+96+lOZfMJZj1OBTGY8EjaSMlqdIPMKqp7ZzTsIk+WNunI9KVFZU3ZyWPem\n/ciJJz2zSugk2uD+FUQO8s8E4yKR9p+9yMZpqMHJ289qmkUKuT6Y4oRLGKV+Vhn15qyJtrdc\n7ugqureZHj0NSbePSrRBKjfMcDJ/lTlV1yTgioEypBzlc4FSybtwQj6kVaK6Em5hIFycU0MG\ny3OM/jStvZQF52nrUiszL8oUeuaYCxt5mSDwetSQ7fM29O9QqSitnAyOKfC2cMRjjGKBMlVt\n8jEjavalXb/z049DTYlLKSfl5pMj+7THqSMoOMLuk7UjSHqOW9TTVwzDJw3pT3+6CzfMP4aQ\ntRm5qKTzUophc/SUt6UZPsDS7eh6UAq3K9fevjj6MbnHXg01mLMOflqXZ0J5pNuOhoEMj+Zi\nTTmU7eopxXd3+tNJ6AigBw4AJG2lchuOoNDtu6jJppXGMDmmAeXt4Az9KTaRmn52tjNRht2e\ncmnYZJCxxg803aobNNbIYYHagBs/NRYQ44oY7uRzTTjODShhjAIoAUtvUYHFP2jjNJtGP8KQ\nKN3JxQMX37UHDg4HNNbPTtTgNuMUyRFPzA5xSqMDPWn/AC46UbeB6UgG/wAOMUbh0p4GKZjg\n8UhkX8qPv98Y709lLYoXHI6DFBPUZtxwelO+XqeRQy7QOMUxiAOKRQ7IPTg05hhQMZ96rrJt\n6ino5bmmAbTvH9aH3L15FS7d4+YUjJ8mAaBdCNG96cCKPL28DrRtIXmkAvmetI33qXcOBSn7\n3TNMCPjaB1Gae67sY6e9G0dMU/b3IoGR8U9WGMYo2gAnvT9o46U+pJJCdy7CMg185ftAaXGL\nxpQx+bpx/CO3519IWrDcAeteGftBWbbVkHQkucDgL0xQB8ha8uwtGSDjjFcpd4jX5up7eldp\n4is9zSfXnn0ri9Tj3Q/KCecUIDmtXthvEhfcOm0CsO4YR9RnH94Vv3xZpAOuDxWVfIJhIMYC\n87h39q1uIxZAJo3bP0FZs6tHkAY3DHHetEw/u9wGATzVW4Ji5J3ZOOnSqMjPjzJGQ64Tkc+t\nUbyIw7GTdzwMVdkjMspUbgo5NVroFYzj5h2GapIllFm3hvNOWXtTZpCFZs4yccUHe+4MMIaa\nvluSh2q/UZ6CrKGXGQq4BHOMmoX2r82CZW468GrHl/aJGY8eXx14NVZFaFQZOnYCqEEkZjYM\nowejLULdwUZl65zU5yF3n5h6HrTHdQQoJOeMCgRVAdVYsVJboKR445Fx0xjPpUrfKrjHfB9R\nUTRrFgDnOMjNIoiWMnIGSMU2NyH24aQEfd/rUskfRFfHOeKEwR8py2elUgI2jVTtQnbjk+9I\nqsHyxG3HUVJHj5+57imsw27RyAM57UnuIgVtrbsZANLlFI3ZJPSnLHwdwHPIFRqwj5MbY96B\nj+GlBB6DpTI5GZXA4zR5n8S/KTR9DTEQrhVIIBPc96ckflxj5txznbS7RuYYy+KQYXDHqODi\nqAe7bmZs/u2XGD1qKNhHtQjp92g4RSxO4E4FEjq7DPyqvY96AFdQXVpBnIzil3FlYZox5m3P\nPHAFN+baxYfIp60DF5ZQehUdaj5yMfMrU7aucp1IyfamqwbHzc9AR60iWMVVXLNlQvGM00f6\nwgDIxmptrFSAobHXPemlfMccFMDmkBCuGLFskY4pYx5asxO1RwKSRCF+U7txxSeWWJHQqf1o\nEPWT5SWHak3KNpztJp5V9pDjk96Fw644yKoZHk8qw5z1poDLLliGXPNS4CtvJ7c+1J5Y5cnO\nR2oCxGMyTFm+6O1ImVY8cN09qbu2x4X7zHvT2+UAtyfagBuF2lSThTzTSuJPMfDHtipGaRvm\nUZz/AA1EymMBmB55IFAXHMD5gJ5zzupG+8H755pVwFLEtg8iPvTQygEr83HJoEGewHU55prq\nrTZyT/s/1qRcTc5IOOlDAAYUYJGM96AIeQ25s+lK+VG8pgdMVImfL8srwORmjjDFvu+ppAMZ\ngxVecenrTlY8kcY7U2Ntyqg69d3elDGUnbyBTAUNuYHqO4ppXCsxPyrwDRGw2453FsAUu1cF\nQuQD81AEbfu8BiDnrSt9wIDt7052Eh5XYO2aQxlsjOV9aBjd7lSxH7voVpQu1gAeOookwCdm\nSMYPtTFBVEAO5RzuoHoSNufqMDrTWKttCjGTQMMrj+I9BSsrlI+hZeuKBA3ltlQOVGc+lNjd\nVZwp5xnJoVx5jYTg8H3p7JH5TD7hJ4piGLIdg+YAH170qr8wON4HNRbGX5Scgd6fueSMKTsH\ndhSAI921nPBYnj0FI2GjQ553du9PZfmKpxxnNNX95tOMNQOwpU7ScYweBSfNGxDdCOPrTtvz\nnfzxTlxnbkN7+hoEQnbHHnP3untUm1pMIGAPUZpsK+ZHIJAAATzSGMqDzuwcbqAHnPVgMEYp\nq52qAuW70MVYhVHPWlY7dpAPXBxQMYzqv3jkd/akO4xkk5xyKduEbYxktyM0Lv8APJODkcrQ\nIXPzfJ164peJFOeWH8NKrOW+Vc5NNGPvA4b0pDAqZiMHaVHpTGdm4P3gfvVI0hZ9+duB2prE\nYOSDg0x2EV3EgC8inNIY23jvxUYyoRiCFJp0jeWTJjrxigBAuMseefu+lDE7wG4Gc0KCqkJy\nD39KGy+Fbhh3oCwMQx3Y3N3pqgtlvvKeMntT2Z9wEeAD1z1pm4KpibIOcigQjc7eeQeKfIwa\nN88H2pjNtjHfnHvSqwePbtwc80gCQgIgHTvS+YFhGOueKRo/mK5BB7im7EVyd25cYxRqAFhn\ncTmnnlQcbsdqZwFUBSxJwM1MvyttUFj0JpiI1kEal8ZHcU3eqtyNysOKcVZY9oUfe5pituY7\nR8o4oKBsqAcYHTFOZfmCoMg9aCCG5+ZWGDjtT/8AU7V/i6A0CI3jG8DGAOSBTZGDZJGD2pfm\naQkdutCllyzDcx7dhQIOAvzHn2pflYL8vy9eKTJXLYxShd0ibWyO5JoAPuMW6elRxnbH8xzz\nTpRJtX+IE8UHA+QYB9KB2GK23l+R60/Y8cgJfGaT52wAABjOKPv85wehzQAENIsnzcMaRmG1\ncjcBxik27nGTgCgR4Vjv5zn60CHD5sLjg9BQu7legB5o3GTax+VaQthWyu5Sadxjm27QD8vq\naF3Rk/KD6UkiptB27uOKYqszKSPm7/SkIepbczKu4njmm7WYZPA9RTlXaxYnGOfwpPursDbg\nxyD6UANKgYEY+XPNL5gGSq5PQihj8wCj2Jpq9254OKADauWJ44707K7lOcNikkHmZ44HNL95\ngcABh1oAQfM4789qGk3yNk4yaVcDOeD0BpnlhW6fLmmIcxOz5l6+lLu3x8HOPWmqNsh54NPb\nehI2gqR1p9AGyEMS4OMDnFKYyWVQQQw3ZFJ8oQKvJ7+9NRlBIAJX+VLoJir1ORk9qc3ogG40\nkciL83I4zt9aavPz9PakUOPmR5+YdKRlO0c5HrSsSqqSudx/SjczZCD5f7p/nT1FYVXOcgYw\nOlKzKu47tsnakZdoHzcHvmmMoYk43tnmmOwK27g80OpXJA2M3FN43EBenAzSnCqBn5lOaBCs\nq7sg8gciiOQAkYJNJGwZsjqc8kU4FmZQuMk8j0oQyNW+V2PL9hTtu5fn4Pb60kgLO205296U\n5mjUt1NACqo25xhsYNN2q0O0jnPXvTsfNnOBjGaIhvOSfY0WEK2GjDMNx6f4UrAyZ3HOeKXl\nVcce1IPljO7kg9qkQxYXXK7hx3NK7FWVsc464oO3bk5pdrHq2BimHUauNxUdTyTTsloxgDji\nhWDEuVKoBimKo5JbHoKCxx3Fwo6Y5BoXJbDLz61Gw3MMcMOafg7ic4OOhpEjV38kHims3y8A\n5PWpAq/KQ5917UfMoJ+6M4phYRUPGDtx0o+8M5+fvSswXBQ5HqaPIZo9+cEckUxDFJk+UdaN\n23B+6MY3UrNll4AOdvHrRtVWO4ZGOfrTKBV3qQo46/WhW+TK/f8AfoKTnqDtyKVmAwpG7jOa\nCWB5II+T1oD9WY5B6CmmRFTdgsV/hp33dy4yc0gAZ2jnA/Wk3qrcr0o56dTRgtMQeijpTAWX\nacFfvMfyoKh/u/K3o3pTlwI2O0gtwPam7cLkcgD/ACKBjC4MZz8iD0p8eBGCxCMeg9qbtUqQ\nOC3PPQUBQygkYx096BArYZj1ToM1Ly0YPU9OKZJlFUbhzyaFjaOPfncCecGgAIbzQQcKo+7R\n5n7slRsYnrQx6KBg9ee9EjDjaMfSgYfe4B5xk0vReBjdyaadyMT14/KkHpu4PSgQ4f3v4fb+\ndNyApxzmlk/cSFDydv3aVMDG9eFPAoENhYwEtjIx9005exyQD1BpNx+bOCM0jY7HIxwtLYTF\nYFuG4DHANNbcsRJxwcf/AF6e2WUAqQpGKayKPk2lm9e1MBeI2G0bjimKxBEmcYPIpdwMYUj5\ng1G0fMDwCeM0mA9/mJK4AbpjtSszbQcLtPBNNWEKjEv8oGSO9DN8qiNctTKE8ny1G30yTTlP\nljLjO7pTWyVQN98HGPalQ+ZIQDwD37UgE2beOc5yBSq4x8w6HNIzOp4IxnG70pyqV4yGYnAo\nQCCRVO5TkZzg0rHcGychu1DqI5GjcZwcbh2pMM27HIA496YC4UhFAx2xQpCvwMbTg0KpdQwb\nBxSI4ZcMMf7Q70gB8JlQcZIOe1OZhuYdRTE+7tZsnrT12rGc/ezncaNQGqxX5QPlxkGgLuAL\njBpQNyMoIYetAVfMGWJHagBmQOc5GeacrBSSOQO9GAzSjG0YyDTEwmADuGORTCw4SbQRspY9\n4XOQU7E+tI6jg7u3Wk2gx/M42DoKAHcrJnByRkn1oTCuAv3ccU2RjHlQc+jUKCwH8OO9IQBt\n7Nk801WG7O7A9aeqsNwyDjnI601kG/eDlM4xjpQA7JwBj5SfvCm8bT1C5xtp0jASYBynqaTc\nN2Oq55NAxFZVBHKheCPelZlZQQnz0DdtKjrQcsowMt0oAcWUMePmHU0xGCZ8xW2HkGlklOUC\nnJxzgdaXmNz8289x6UANwwkBxz1ApcuGGBgMM496CDxg5C8mmy/OAckJ2piHbcrtYdeaFZmX\nGAyilbooHHpSd2Cc4GSo6UCEGEfevK9Oe1ObK4KHanekVVY/McJ/dNIysVQKox1xnrSAcYys\nx+cFcZoViobJ3HP6UFPLkcAZGKSH5t2Tgg9aBsbuPmADIXrik3hFDMeWPSnowXergbuv4UK3\n7vBCjnrQA3yyVLA8elT+Zgq2MjH3aiWRuoTB9KXceB1GaYCiRdzFl2j+7TUwq5DZJ70/70m1\nsFRyBTVcrxjcxPpUsdh+1cqSufemjdtJC554HtTl+8D0x2poZmYlemcfSmhjn+ZEC8LnoOtN\naRuW6hTgUJtjJH3WzQw+UA8c8imIQ+ZjBAI68Uu4xRhoxj6U9W8tiOW7HIpAoXOD8g5+tAmL\nuZo8nOW4p27dk5yelG4MqkDG7pioxmNSQD8x6UEkkMn7xzj+HjHrSRRllBYky9SaXyhGy/Lh\nvUGkTcrM5ORnBFACht0foW4NG4YULwOhBoXDMQeO9KxjaQK2Seo20D1Dy/L4Vsml2oxLgFQB\nggU3ays2eCBnbR5hbbkYzjPFAxqsflBJ9xino3Iwp/OmyfPJyChUYHvSrLswrgsuMUwHrl87\nTwOKc25ZEwcgjGRTFmVc4+Qeh6URY3EkkcUAPU4Zwwzk1GsnyjaPvDGDTtwT1LDmjlmU7Plx\nn8KQmCy4JwuSq0LJIF3MVWTHJpu5lfCLgE8DHFTKp8twcFu5I6VQ0R8ycnqeckVIq/6tm+63\nU+lNEbERkHIz1PenlCFIHzHP3aCRfZTk/SnRn98TnovIoXJUEHAXimriOQpt3M3U0CHqzBz6\n5wKTcqswHzEenrTlzvJPU8Aim/LGpQfKSeTQIfHLhVZ4iy/7NSRsDlc8Z3Y7iot5iXh8p2x6\n1N/cbcPmHLVQ2LtIJEZDE87qftLbSfvDg0wbo2KDbg96nWMhhu5XGOPWkJj48xkdCTTo8pJh\nVwfSkgjBwT0zyasKqsSV+9mkMhjUmORHTCE8fX1qU5jb5jgbcCkUMVZXypznFDFJIyVXLUAV\npdrKSi5wcVE3zMYwvzevpU6BZGb+ENxj3qpOpj3qpOVO0mgZDNI8zYTnHBaqsq7cENuHce9X\nHzHGF4UdvrVO4UbOHwc+nehAVmcRsW8vb61AymSPdnvwKnDMcs/IHWokY5YryhOTSuO4m2OH\noSSRz7UokCMM9v4qcpDKTwCDxml3eYuD1/vGkMFbdMWOCuMmm7hlj1HYCncllGznvSMuxtp/\nKkMQfImQck8EU3zDIu3kYOOKkb5Yxjj/AGai2mNvU46UwHqOuxcFaQYjRvlO48jFPwOjD5zT\nnLZA24OOfagBMMQCSMEUknltMuOccZo2gthhlQuRSqEV+QR6D1NIoXzFjyCcZ4xUcf7puRk9\neaTYskTbj86ncf8ACnSSecySAbRjBHtTC4/bhnbpnpT0kfdsCYwO1RIsYmbkkGpEcKoJBPPr\nzSEIsj5BY4C9vehs/NuON3NKskksnyYU4y1RkfLkfMP7vp70CFVCoJ27iD1pPL24K8NnLE+n\npUitvkLZ2pTW+9szvz/nNABt3Z28qentSFQqbTzgcCnwgKpUEMOufWlZhsUOMEnge1MBoyqh\n9y4HUUKg+8rZDetPWJQpU4VW59aRl2oUHysOaQBHtiVtvJJ7DNSssa4yBtP92kj+Uj5WHepf\nkjXHQNz70h2GRorMEQFcc7u1SR7NxJbB7fWiONnQgHA9TSrb/vIyvKbsdec0gFjj3Ngj5+pq\nZof3RAI570xd7OxXAKnJI9Ka85dmKDEf8qBiNmNQFJZRxjHemtcrs2A4IPpU3kyMqhFIZun+\nNa+j+CdR1i6CNAY48Zy6kBj7UB5mDbkTSYDHZ/EyirtvpMt0xMC5PXbjtXrmh/B9FVTdMkY4\nLbeT9K7qz0HRNGhSFLeNthBHHzfjQZNniGi/DfUdUnVZrZk7jcPyr03R/hTa2cSG9nDS4HCj\n5R+PeuoudYbzG8pVjTtgVk3V7cMx+fPPODVK4uY1bXT9F0NflhWQIM7uuTU9x4mMkSm3RUH+\n6OK51vmPXg+tMlcx4WNeWGD6VaiJysjRuNYmaQqoIz6VTkuJGk+eTB9KgZnQYzTPll4J9xVq\nKMHIlaSRl3KcHPSj5mcMq/P3JppbbwTw1PVMSBS2BVE7ijduDYXI6g0irnO8c9RSSbuSBznr\nT8sqhsZPpQgE3eWSUXDMMCo9oWM5z5g61J95nY9R0obczLu+QkdaYxF6gknNLGpk8wF8gcgV\nJuH3SPx9aZt/eAsNoB5IoEIGJXc4+YdeacOmQ2B1Pt7Ui4kkcbc45BojXJJ3Hk5K0AOPyqQB\n170oYKFBPFOba+c8jsKa0YVggXPfdQO4jKZARxtzSbdrHnPP5U9po1O1+G7AU5VDg464yaBD\nF6Mh5PUU/AaM89Rim7zuXaMeuetOUjdluU9aAGpv2qBjy17051ZvmXPpnsaSNm5KDco9u1Ok\nUoVzuwwyKLANkyIxuBBziopDEo+UkHpipd2dyE5brzTPL24ZlwaoBocfLG5OaFUtNsC7gO5q\nTiRWP5NSLH0w200yWNZdgK4xz0qRXdSoZtg6gimMmSxY52jrUW0jg/d9WqtxfDsbdrr11Cql\nZGZlP97iti28a3GQtwqtHnk4rk+UXaB1pGL5AH3QOlZunE0VRnogfQtctZElKrkZP1rEvvh3\nZXEBMAXBGV9z2rlvN8uTg7SBnitjTdevLfO6UspHQ9BWfs+xtGp3OT1j4ctZKyOvnE8kqmMH\n61wureCZYfNymc+1fQ2m+KLO8iWK4+Vm4JbpTr7wra6nA5iIkRhxt7fSueSaOqMkz5QvNCmh\nUqF2bRwSOM1lSRyqN2MADJ4r6L8SeApY4zAsfybchiOv096851rwjPZucQ8L82Mc/SkB5p52\nI8fdGeMcUecFkAwSp6PnHNdFqGhPLCWjg3ZOSpGDWHNYuuXb94i9exBpiFz9qPXLqPmXHPFI\n7LuKhu+elR248tw6nZI3XPpViJWRWLbQG+Y4pANYRfMCNy5+X1pyqy45wMYB9KaZIw/3c55z\n6UNmQDJ2gHO6mIZI2xTtX2LenvR5P7tCoBGQSanZkZScfvO4FLHGvltt5x2pjRXcFrhuAu4c\nH+lVJIY41Z2z8hwWHOPatGZV2gdC3T2pJkAU7FH+0T0z3oF1MuRW2g4IRueaXZnCY2r1+tWJ\nD5mRn9329BUSx7WKKC7gZDGgYz5TOYwmARk+lIu1WLpIORtHp+dPaM7S7DDY6+lLg+YCdskh\nXqv+FBJCxZYzuXaOh5yM0blXaryeXu6hTxTW/eT7R8qgcrSjyyp+Uc+tUBIsG3JLZXswNPVi\n2X3bT9361A22OEIz9+F9fapPm+RgQynGB6H0oETuwgXYPm4yG/u+1KbjZhmjaZTz+NCy/vCx\nHGP1pW3XDMwBxjhemKAHMWZRjC/xYH8qdDOSpYfMG/gFV/M2qF6FR29acq5dVGQWHQetVczs\nXUnJyVG1+m32q/DIMxoh8twQS3WsYKVbaBluVye1TW7NbxptIfd83mdxVqTJlG53Wl6hlSSd\nzk4K16b4c1GFbeFkJDY5GcivE9L1LaoBGCvJ9SPWuu8P68I7gIX+U8gAcV1Qmc0o2R9A6Hrx\nmHzNtcHA966aK4S6HzLhsV5JoevRySJtI9a77Qbx7gGZ/lXOB7106SRzaxdzUmTCYyBiqbTP\nyTtOBwKuysrKzH7wFZskTRqxJGeoNYOJ0810DfOwZTnimSSbV+7gjrQkhAww4xxikb5uX59q\nQ+g6MAspUFfrSyR/N+pqNt4YKM04Lt+ZnJwelPoId5iGNBtIahTtjcYO4n8qbuQoro2QOMY5\nFOJcRjHJzyfamihQx8vB5BGKdtJUFPl7UzlowF+Xmpdxyqls8dKYhyvmEkHO2nBstnqPajau\n04G0dPrShVGSBy3HtSEOhxhgBt+lK2dg25xmmxsI/kC5/wBo0pkC7QT1PGKZmOiwzHDc+mKf\ntYKcnJ9KYuVLHPftTmkKrvPJq0BIedv06U6JertncDTUy20kZb2ok3Nlk5YHmqQEuGyxx83X\nFFvl2JI+T3pOflcnG7gmpYW2xunJxVCGurSSAbcDpzU2PLT5l3Ed6ZGxZRnr709c9zk5+7QA\nqhmySeP7tM2uz4DbQOTR8pLZOxqaSd2T06e9AxfvZy49qY25seX1z3pVxtwppxcLHgDcSeaA\nZN5/+ytFQbYv+eTfnRSJP0sVuDS8FunFDKcZ7Up+XntXyB9IJjdyOlJnb2pFbAPNLg4zmmOw\ncEZFIwDc0Mo4x+NA7gUCsO5AzRnnPajiSPrRjaPWgBrJ3FN2BW3CpM9fU0xcqtPqHQNpLZBw\nKcq85J6UjZZRjmnL7UCGyKGGRTVVTGeMH1NSewAppXHuKYIWPGMH86OnGM0N2ApygnntUsBP\nLJOc8U5f0pzHaORn6UcBckUAJtPbGKa2eKk2jr2pPUCgBrelImVyOtPB6560wZyaQkDY4pGx\ng+tKV3cUnlndntTADk4zUUi7hkCrAb5cd6ibKrkUCuQiPqc8+lOx6DFSJGWXJFPX3HtQPoNJ\n+UDPFLxtwR+NKcDAC5prKUYZ6GkIUY6jmmMwDe1P4VSRTCCq+ooGgKBsUvzRkHqKcoHGBSqd\nwyaBMUc84o6DJ59KC3bpSfdUUwGD5iT71IyjHAxSceZkHin7crQxix4JHr0rzH45aaW0MSYz\nwck16goCgcc1yXxatY7zwzIGG4gflxQI+FfEsGxmB5PTNef6t5nzCNsAHHNen+KIla6miHG0\n4HvXnGrKkbOjEhs1SBnIyoPmLNis24yqY/gz271sXR8mRlUbu9ZE5EjNx8xH3atCMiSIHI3b\neelU7lI4lYq+WPB9a0Lw7VVtmWHBqg8aMX/vYq0QZjltoWI4UHBPrVebEiupHzDirEkix7Qv\nTvj1qtIwO4k89SKolooBXjyAu4gc+tV7hQ0eRHsH9avyW+NrL1IzVZlXzDubDAcLVkkLK42s\nD8p6imONzZ6rnNWPMJj6hm6YqvJM/wAsapt2nk0AQoCY3duQp4x1qGbdHsKrnJ61ZkkFu26R\ntmeoAzVeTfIysvAz09qZRGzBmZXGM1XeMqxUjkjAq60Ymb1YDNRSBmwTgt229RTAg5UYAztH\nLf0pisqxlgM7uaVbc7jmTDLzgU5drqSRyT0oEIp2qCsZGeCaRflUo3I5x7U6SV1VVJJVTnAH\nSmyDf8wHfOaYEUkLMU3P8+Onajaxb5vpil8xlmAYZU0hYmMnOCG4oGIYzu8ojkDPSoIn3OVQ\nZPQ5q7I8kjKp+VsZyPSq52jcfu543e9MlhuUvnBVgO9MjQxuWB+Zu1KxDKhYbZFPI9aPmQNI\nBvBPK55ph1I/LjYMx69Me9NV+EDoM9CxpzYkYbVZAP71JJIc/MAyeuKQMGA3AeZt56Y6ihcr\nJlRhPSlGwK0g+c445pHQqwlYZH90U7gO8wnJGFbpmoo1VWz1OetSELjcVIbHTHFMwVXBxnrU\nhuLt8xSNxDZpWVlcMfuYxj3pHDD52OY+BtqIs65jOdpORntSAVgWbjjb/COtNXncHfYcZzil\nbOAd3z5ycUq53uxGNwqkIRssy/N1HQ0nyxuVxnIpyttjIZd2KJfmXpt54YdqQ7DMCNctkt0p\niqcNnIz0qRRIW37gyr60xv72SBnpVAN2jjjcVNC5WRi3zDqPalA2uWUn0pxyudx4XmgRHudm\nz0XHFNiBCfM3GehqTcdjYGFz1pCu112jg9CaTGRfKrZOSD609n24JXd9elKy7juxwelMZd2F\nP6UybDv4t2O1Rhh0JOegxStlTtA4I4Jpzx4ZBkIG6kUFJCjMfDkE4zxTDI2Qo5HWl8wbSfvJ\nkrwOTSRfuVOVwx9aAG8szNtAHQYpY+jKDyOop33iNy5I6Ypgyu4KBvJzmkIdDgsZFAzjApB8\n3ysMDqWFNx8gIG3noKX+JlYgg8j0ouAvnRs2SuD601SFYtz7GnScsoJHoD6e1JkycbdpBwTT\nATlVBzucmho+MFSy5ziny/MwVSOO9MMqsu3JLf1oAT5ZOM49Dj9KbuKMEKlSD90Gn8SSHccs\nvoKJGMiLt2gZ/GkAn3ssf3ZzjFJ82cbcj1NDN13DcKVWO5AW+Ud6SAZwoGFP3sH1pWUMpToN\n3SndwSzEHk8U0BfLJBPXjNUMk+g4AxmmhmDKCvQ9RQ3Ch92VYcqKawPXG3dxQA6Vx869SehF\nRkhIh8vOMcU5R8wEa/Kv3mNGRsZzwAe/WgBGRflBOQRg+lEgZcjoM9KXnYwGGXOeaazFVYnh\nscfSkHQOePl/4FTjuLdcgUO53BAMcfnSNHhl3Hg9aYgZCy4z8+OntSGMqyDdnNO68g4IOM96\nFAaMeZyM52jrmgAb74Ucc9qGTyVbI3N2NNyZJNzAqQfvU7BDPtIy38JP60DBc+WN4wrcAgVE\n0aqDgEuTyp4p6kOpRW3YP5Gjadpjd924/rQII2wBG470iyOpKbcknqaGyzYDbttLuDfN/EvU\ndqBgQd/B2dqaSqqemM4oVd3Ibqe/WkZUVjt5PpSE9wclDkDnFEmJHDIOQuTTvMO0llxil2tI\nAARuxyKNQIVQ7dzDg8ipWxvHAXcORTdm5cFuB1FI0az7ZQDleB70DBowG4OOOBQyruG0ZGOa\nRsMCSdp64p64OMttyOtAhjYYAA4VecU5H+YN3PBJpojO0AfMTzt70pwWGThv7tMBVbryN2cc\n03cWbYGBZew70jYX5hye9L5g2lv4iOtA0P8Aljzv45xQzfKI+p6j1ppVWY5OTjOO1NjxuPBb\nIxQASr93HJBydtGS7dMGncKuEOBikx8yndjI6mgQ2PG4Ky++6ghFB+XnPUUYBVtgye4pVxIu\nRwMYoGNZjhFXtTZfmLN0HTdTt+0Y29O9CfdZW554FMBuQUQlWLKMHb6UH5WBA+U9KXcFyN3J\n7LTdy78xKRtHc0gY7HVm+cdd1Iu1lBBySenpSIw5POT27U8MWU4x6DNAhJFLZUDC9aUKW3AH\nAHHvSBWjdfm3UmWEmeQ785oH1Bl8v5T1x+VG4M8b5wuOVo2lQowGOaT5wcMoAI4xQDHhQ2QV\n4Y5pPLMbkMQFHIxSq3Qt8p6YqPIXdk7yxwDTEPboD039KYzYkwFyvQ1IjFlIZcFeADTFjY55\nAAoFqAOMoq/eHWmwqDkFt2OB7UuPl4BU+tI8iqV2L+FACKyso38PnH1qRtynkgL0/GmfeUuf\nvenpQ/z7FwSvU0ihfLMfLHeQfu0oZljbdhc8+tIzDzNxB2kbef50KvRTn6+1Ag2vgEsBkfdF\nNi+TjBOTgUNH+8G1vlzS87WZWwCcDNPoKw54zlgSCtBTbgCmnC4Jypxjbn9aFUFcEHI6mgBW\nYnHODnFL82WxwcYpI9jKRg565o8x4wVHzqeS1MeoscaPJGdp2jIphZ0XauOW5NOi/dp14PND\nAlkydyjsOtFwGP0VmOD1470bQuWByWNOkAZuuAOlG0FvlOeOaAEZmYFVGR1xSSbWXLHGO1Oj\nyrE54PekMf7vdtA/2vekIRdqqp5O7gikUGTcp+U54+lPVmZQdu0jqKR5GwQE3E9/SgYvlljg\nHpzj1oRVd2ycUuWOCq7QB1pPl2sQfmxQA7y0Vd2d3NIn3TgHLE0jgKu3P1HfNL0jwT8x6EUi\ndhGyCNyjNKzfMAV+XrRt3R/MfnHeo/8AWduPXNUgH7mwV52E5OaY2W5A3H0peJAPm/Cli5yp\nODUj23DkMu7G7vRzllYZ4/GmxrtI3cjPJ9Kdt3c7s+hNAho+ZQADtHrTmPlgYIPOeen0pCG8\nvBP8XahWjZsNyP4frQUBRAGdBgMecmmy5bYeSM9aeVBkCjrjPFG7bhsc9zTQiNWV92DjbzSK\nzSx4AwAOvrQON2UyG6sOtKsca5Vec1QxBIAVL9OlL+7C5zuOeMUKdgwV3DoGpMgrtxhgeWoE\nwOOHJ4Bwae7HdnG1cUjcrhflbORtFG4M27Hy4wfrQIUnbgscDHPvUa4ZPlYkk0O26NzjdjpU\niuV8souCRmgBxfLDB+UYG2mZ2788Acg0rOFVmb9OopnG0DOR156UAOyF3HG4Fsc9aMOyHjAH\nNICNuMn3pVUcBSS+c8+lACeXtYLjczDINK2BgHjnpTcdSeGB4NK+13VjxxgmgAVvOmI7Y796\nXzF8sKBkZ6elCxqyt3K/dNNVMLlR9akGGwopw4PNP2ho8BcLjvSM0bKMDYc4NDJubBJVB3o1\nGLHJ+8O4fw43HmmRtuZ8cnpmnKSuBj5lNKBzkx4JPWgTG/wYxyD370sm9tmQAueTRkM2W4GO\nMU1VGBh8UCH/AHiQTtTPy0isygFkbHTd70nJRienv3pWUkId+F7CqGNkX95v27e3XrTnPzAN\nnbjNDEqS5IYdM0wN95iNyAc0ASKh8zp26mmbvLYKBljwWpT82wFiq/5xSnGGQn5utEhDWUxz\ncHDY4zQq7ogQed3NP3fPvYbtoxSBtzYVNp689KBirnzCcAfypC26Tk8+tMkkHGA31p7KzFdg\nXZjr3qQEkU43gkru6d6V1Xdv3HA44pNxDevFKV2jB4GORTQWEZAsg27sGm7W6g8Z6U58YATI\nwM5obKFGZd27qKQCYDbixGfalP8AAcZGMClk2x5bbgnjFPizIuTgJ0980IBAvktj+JxgU3a6\noSTgA4wKZwzEF8jOM04Da21Dgk96oYMSu3hm9eKQRsFLBMc0+SQu2M42+nSkGW+ZSVz3J4oH\ncjYAnAO7jhacpT5UKfe4Oe1G75mGBkDO/wBfak52nOMEdKRIi/LvUnK9qJvlhTHHPOKVV2qe\nM8cUKAO2SDjJ7UwDaob7pJ6HmlkA5AyFJzTNx8z5mz2oCndjOaZI/b8vQFeuKarh/lz3zjtS\ntnzBgcGkZhuC4IUVIx3PJT7/AHpFwB9/J9KFYQsGX7vekHVcjKseTTAfGxiZQo+70qOPcm7K\n/OxyaUv8+RwqniniYys2OBmgZHvIC7F78in/AHTgd6TG47lOKGBaRSvI6fSkA0fdIXhc4wT3\np3zKGz8gxziiRdqvnqD0X+dEattyeR3z3oFYI/vEMMjGcUxvkABRiOwBpwYMWYnBxgCgq7Lk\nAsoHzZoaATzDvYNnDdWp64LNtX3+tKw3MshO4gcU3eFAboW7d6BMQqDEFC4Oc0ke4tlWVWU8\nA08KVU/xH1pGXdLwvykc0FEiL/rG3AZFMOCqbTx/FR5IXaMjBOMUrIrLtA+ZW6UhikDg9Rjg\nUv8ADx8vekDbj83G3gU1toYgHk0hWHAjcT1LCgrt2nOPUUmxVXhst3NL5ZySzgjGR6VSATYr\nEsTjHPNP8vcNw575zUbLh8kg8YwOlCx7sxtujyOMUAyXlWOfWkZsSbduVI6Ugz0LAnpT5G8u\nUKxU/LxjvQBEwKKFztwcg04tlR1z/P3okU8fLhS2Mdead91SCN3OKWpPUau5ZPmOfT3p65fK\nqfcjvTfkZlCcEevanq4Qsu35/wC9TGHmLkHGQeKFjR23EEhT69KRX24wvP8AdpzKGbrjPXFM\nYR8szHOPWkV2YEsRimTEr5aqCQ1OCbc889qBCxyBlYNnOeGpfm4BIwOcikK+YrIRyecipTho\nyMYwuBQBHIBuU48zJwPaky0bHLbznGKWPctsDwHHajcvDYJYcnijqA7hnAf5T0pZNmGAcDb6\nnrSN2J+bPJpm3zdwEe1W9aGSPEZ3FPvD2PakUptbYWT3PejO1tu3cMdqGY/cIqikODtIoy3T\nt/WnNJ+8VXG1O4Hf3pvEikYww/ipys83BKnaMClcQ4DLHHzR+tC57dc8k03yyu7hlAPFP2Bv\nmY54/X1pCHsvzEIpI65z0pnKsWGC6jgGnqvlbcE4zz70rL820jhj1/pVEiKWVAVHzNzihn53\nAZIHSn8KAQMdsGkZUVcgYYfrQA/Zuw+RnOM+lWVQrsYPj1J7/Sq0a7mffnaTwKt4WQJF0Cjg\nmpGSKzJlXOWPzDHepI97bcEAE8tUO5myyjGOAWqcKdyKOARVDHybW3MDlumaX5Uj2rwO9DL9\nnyhXLdeKMmTblNoxyaYEExUqQFwo6f41VnGWy2GZuc+tXZMFTt47GqUiJuBAOfSoAqzLj5iu\n4jtVOTKqcnhulXp/u5AzuXkVm3DbcBQQAOhoArsS8ZZvuDjFQM3lLtTIDVNt+Uk8H2qHGWyT\nnP50FEm1dy4JJA6Uu8NIVX5hjJBoZm8skja3b1pAWVm2jg8GnsKwoIf5gNsnTbmlIPmc8lRn\nmkwwIPR+nFKuGyoHzf3u1BSBcysWJ6DIpWIjQb/vnoaZ5Zi4z83apjGrp6sOualjI224+blu\nvFSbzgNt28dc0isNv3OvGaPKQocthh2NMVgVWGSSCx6imiRvmLd+1KzBpQACD3JpVRFkU/Nk\njk9qB6ieWDGExuY88UnzyMyfcCj0qQbNpK+vGKRkGw5BMqnIpANT5dpBDLjBx1pzNn5wuccY\nWnMqL+8UEc8rQzBchRhn6CkIi2sWQr8pxjNO+62By3txUg2lTx8y/pSNEuxWZsljniqAaUbu\nPl+lKibGYEY46mpuRIuXy3v0pV8wneFGGO0g0gGJHuXIIB9akkj2qpfBJOAfSlkjRVY5zjjC\n05WC7RjIxnntSCwi4RVBGZAOPQ0i4VlAjyZB+RqZcfJuONvTNG3zAAMgg5I9aRRDHCx4bIfd\n69qsSx7WHAK9BmhpV37mwF6DBprSKdzFm2dOKCQbaqkoCSD0pGxbx/eLtncG/pSw273jqkUb\nS/Sux8K/DG+1x43AaOIHJkl4H0oDY5COzmuJgsCsNxAPFdp4f+Fd3rEgVnaODozEYFer6F8P\n9M8Nxia9ZPOJI6ZyK0b7W41fyrWMIi8DK8U4pkOZh6P8LNM0RYpLuVZfLx8xH6V0j61Y2aGK\n2t1J6BsZOK5+S+eZyjqxGc5J4pFY+YGCVpy9TPnLlxrM80p2/JDzhR61TBlMm7e2COc0vB5x\njJp24MQM5zwapRI5hGLRAFvmPWlZB1zwRmkztfaxBA70kY8tsA5Y8j6U+UVxFXbH/d5pyFVY\nru+frSt+7IGcqeoqN24YqnzdKYCnHzB24B4xSIqk7lG4+lOB8uPO0EMMnPWl27gG+5uGNtPc\nlirGrMCR05NK2ZM7Rgnp9KdHGY4xsHGMkUqtuOQMHoKBIZHnawAPFEYk4O0Zz1NPfcw2jqOp\nFNYHy1G7v60DFl+6xUZYnNJMW3or+xNMVgoPcg9qfu3tkjHHBNK4hfmt2B5YNzQsiyE54NNy\nyqn8Z/zxSPlgSVwO/rQAjZUjby3tUq53ZU9qjjyVJztPUH2oUiHOT8p71QydeFGRhvc0wPls\nBsE0FgCpPp3pEkEaszcrj06UEijy0UlFyR/epQwcAD7/AFpyyIoB27uM5pildxY8nHK0D6ix\nK+5s8nGT9Kc0y7kjjQ4YUcxqNvybhzmhcxtnHzgU0A1v9YwRjxwcULn1II6Zp2dsO7HU0gyV\nAxls5/CqAPnkG7APGDTcqY135LU4gruYsAue1M535OAMZ5oEJLsjUZBLNxxTWU7UJLE5xgVK\ny+ZtwPm60KrPwGAGcnPWgQxsbWCk7h60SBJGAdvn64FPDIc7SB6GkjYISx+m/FMVx3mN5YbG\n0dADTBG+0nO1s9DUu0TAZ7UwZkBDckdKeoCxqzcAAHuacp2qPQmmqCW2jIXuxpwVZM7hhR0o\nASOT7yhcE8ir+n6tNYSI6O2c4254qk3yjKjJFOUpHglsA9c80nFS0ZUZOLO4j8WW+oiOG5GW\nJx83rTNQ8I2Opxy+Vh2ZSTznH41xu5Jm2sPLIHB/rWjpuqTWrAeZkL/Ce9csqVtjsjUXU5zX\nvh28OG8wsM4DYxge9ef654ZihYAjac9v4vevo6w8QWWtReRcoI5G4BYDGaxPE3w5SeMvbhWV\nvvMw/lXP8L1N1bofMmqaSvnLtX6FRxWPLaMuws3yL6V7H4i8FT2bbyrmPu6rx9K4S/0FSyLJ\n+6XPSr3EcmzhowwIUM2AGp4kDKfm2gcFetW7rS2jaQZ2qpyrYqn5bQx4YbR1FArD3dViXYGj\nY9TTkU+XI4+6e/rVbzXZ87vujmnxOZ0+U4HWgCfJmVFAAHUUsySxsD0XqV609lVdpx1XHHrU\na7gv73Ix1z3oC2pFKu6NGP3W52jtS7nXIZQFbvSrGqxnCny+oobK/KAW46+lLUZX+8CWAYDg\nVCyN5R+Ybs9uuPSr0e3aeitjGaryK/yk4HH3vWmQUVjzKTjB689cUqxjzCC204zUzAtyfmGe\nSDzTQv8AD78ZqgIElGzcy5I4pdjLGh68gLU8IUSHIwe9IqCOMs2SSeG7EelMBFkUAsWwUbpj\nvUjTK2AwIbbnNNLNJtJADDjjnAp3mSQsFMeQeFPY0WELErNGY8ZH3h6fnUkbO20sdpzgYqPz\nCuVIwehpUPmMRGcj0pAIdqzSZkB9B70qyCRNxOWXpihWO0JJswp4bvTrcoJHVTnd0OMVaJaL\nFvdYmD7WwB8xHeuhsbtJ9mz68GuWa4MIGH+YHG0dPrVmwnaGTcW4zncK0TMnE9L0bUprNl/e\nYHUYr1jw3r32j7PEZOvVugrwXS9QEZDHcwJ644Ga9E0LUl/dhHzjov8AXNdMJanFUi1qe8JC\nAu/O4EcVV1BfOTK8SLywHpWZ4f1s3FuIXbeo4B962WhClMkhm7V0kqRl+YzY8s8HrmpDu3Kp\n+960txbmF2A4UnjbSbW2hT165rCxsPIlVgOG9TTFnZWbecL06U+NjyTxTdqsy4OzHr3oELtE\nbjGBkZI/rSrOGUtt+XoB/Wk6/KDx60oh2y/7OOlOxSFXDR8Dk9aem0qMghl70m3cxdeFxin8\nhRnjjmqsU+4q7n4PHORineU20qxz3pqLu5+7TzIrYw3zUkQNWMbOORmpFTa4IA54FLGzNuUD\njFH48/yqrECRncpDH5gasFGZBkDFRQqkj8j/AOvSyMyA4GV7AU0Il3BVKq2SOTxSN2CY2MM/\nSmxud5BHbrT4W+YrIvI6baoVhfMjEIVTuAPenwsY8nOc9Kj3LGT8nB706NQsg2qcYzRqMkWQ\nZ+Y/NSH5QSx59Kb5gyV253cZNOaMrtUjdtNMtCDay4ZgpzU27cmMDf60zy08z5o8bugpDHIu\n5SyoM5JPpTESJuG4nGMUw4jkYhTjtTJMqxYjAPA96dJktkgj1pAHz/3hRR5IoouB+l275RRz\ntINCqdvJ5pzZ47V8iz6EZtDMM8cU9lHSlZhxxTXbbg0CBl29OabyrYpXbavBpF+Y5NNlD1GO\nQM0si9weaRCV9waUfepCYxV+Y56U3nsOKkyNx5pS+5RigRFz2pygqoIGakUAc4qNAe9IB5HG\nRTdgVSO/Wnrxjp1pyrh+eRTQEbJ09cZpyr83WnM3zcChSE6nNUALluO1H3flIyaRj3NH3uRz\nUsBW+VcdKTB25ApN2GweacN3OOlAAzBVzgUzkgY6UqqDnNC+mcihiQfw/wBaa2dh96a/yZya\nj8zqCaAJFIVck804Y25PIqJcM23GaMiMkL1oYkP3FWz/AA1IpLYzVUyEYBNSqrLgbqRRLI21\nutIxyMnkUxgcUo3dP4RQIVl+U4NC5B9aVW9qQ/KTg8GmCEDbWJwcU5cbcjFH8PrTQpVM9qbB\nitkdqQ/nQ3ODil3EYpAKqhuBxjrSr96k3Zz60ijaM5zQwZMPm4JxWR4ytReeHLiMY3/3jWos\ngZgM80ajGtxYyxsuRsPPpxQwPgnxvaG3v7sA5CyMM/jXmGrwtMzV7j8T9PC65dKOBnNeLa1J\n5MjAcv1xVIbOMvoWZip4bpWJdfu8c5I44610V9+8dscPWBfwusgY/JurQzZkzYlnzjp+tZ9y\nzDcVUBD6da0GUmZ8KQAf/wBdU5gSu0DIzkNTJMm6j8sBiVyewFRzw/LkrjjrV2YpHIFYbu+T\n2qrJIJN6fNtPTNUiCpN95Tnp1qnOy7ixTd2ZvarLKFUKwJHTFMZRJGQo2gdc+taCKcm2NhGq\n/L1B9KZuXc2QUbH3jVnepgZ3xw2wDFQzQ7WyOSehoAosP3ZVsOM55pWO5VCxENjn0qS4ZWbn\nhl4PFNaF/MGf++c0xkEj7cBm2lB2qAsVPB5P92p5lEe4AAFhyPSmSRGN17Db1phcbtDYZD0+\n8RTJQu4gNjPPNTJvjYhQMYz0qEyA72IyRTAaC0YKhuvXFJ5hI29DngUrZZio5ZTTGc7GwuTn\nrQDF+aSRcDIHU0yRir/KMk8Bu1OXC7QCfm6+tKy4jdFBLHoBRYCHDLkn5TjGKbIGDAY4Xtin\nMjH7z5c4ApZXbzCDy2MEiqGQyKzMDjCjmjd+8IHpT42b5gRuUDvSJtZsIcHHNACM4AJ+8cVE\n43SLgbAwyfepGDHCqmSTxTZkKsVKnePyFBIhT5SBjy80rE84bAByFFNdTHGC3C9yKeCpUY5Y\n9DSEMWNWztyH7kmlfaI8s2TnHFCqNpbndnBFOwjHaRwozwaEURYHDAkgH7tKxLZGMjrn0ojE\nkysQQmOlM+aQYbjHJxUhYaSArHb83QY71J911y2445puVlC7flwc0qqBIeOW7+1AhrKeVHIY\n/epszFjtU4GalhZWLDGecCo2QsrKeOeMUwIj8qsVUnnpnrTvvdBt2mif5sbQQowDin4LR8N8\noOcUwGbmQZJBOeRQzLuIxlGHLU1mRVZwOTxSsrNGBgL220wHeX12+nFIW+bHLcfeoO6IFFOD\nSMXGApwByaQhskZY8t+768dqQqARgYLcc0kjFoyccE05pM4GecY+lFxjHO0FSc4PAoZGlwRw\nV55pcc9PmoZvKj2v9/0pgH3m4OzPehm67m3jOA1NjJC8LjmnMpOS2NpoEIVdYyu75+2KZHmL\nAYZqRdqkHJZu4okb15qQGBTJnAwPemN5azJycjr7VK2RGGJyoP6U1s8jy/lbkUwBotzFtuSW\n9f1pdwbcBz70GQhsdscCjcV6jLnliKYDUxlhnnoDS7WxkLz0NC8A5OMnOKNoZt2fmY460AI7\nH7wG09yOtN+Vyd+Qx6HpUjcHaz4dT1prFj8xP6UAIrF+AeV60iSB1wR8jHFO3AdsCm7VYe3p\nQAo2qhDHeV4ApI8KqbxtLcgUmQzZSMkr6U4q7SBj/COtACbTuG5dqjp709XKwtGBgsc5ao2V\nmGc575pW3My/SgBArRjbnJzmlZdzdeByaQ8M2OCBn60pUxvuU845zUgJtRmOMgnkE0Lnc+5e\nFOPrTmKyIOckDtSbjIBtHOckVQxuVXdknLHNJtcLlsKPTvRJiSTbkBuoHtSMBNlm7cUgDJUg\nKuB1JpSwVQ2MPUiqFiwDkd80zchbvzxzTEB+6fmzxyRSRxt5eUxn3ob93HhORnikVVmTjKSr\n1NAx+BuxwGxyw6Uz5V3fNubHSnKgKAA7h/KmSFVb5TntuoEJGoAAK4PXdS7sqxxjvxQynJXr\nxxQedu35eMEUAJs+XJ+VutL/AMswqjOTkmkPzKhAyOnNNIKqQnAzjPvQMkO/j5dvOKZtCMyj\nr6inhmEnzNuAHIprLhhk8nmgQ1VCkheH9T3pWdztQgYznctNdjLno2P4adJvG0kj047UDBgZ\ntxQ/dOCMU0fIoYHDqcc80bDgtu5H5GnTMSFUkAsMnA6UgEWQ5Xsw9O9G7y2POSehojmLBdo+\nboTSn7zALgHimAgJByfmJ6jsKCynaCuMfrRMAsKhZOQckevtTlmPL7Oo9OlAMbIxVT3X+9jp\nTpBujUKOOvNRoX2BAeGOaVg3mnvjvQDDc+3ATcepY9qSRt0XyqSMg8UrOzYGQp9QaarcM2cb\nuKCeoq4ZdxOMmj7uWYhsU1l+X5lLhfSlVk2ncOOymgYM3PPAYZwe1NYLGo3Zcnnj0p28vheC\nB1zR5iIrKVLMfQdBS1GMVvLkG0bR1+lKrAMehPXFG5W2FRkj+93oGEDtkYPbFMQrMRjBwrdR\njpSI2ct0PTaf50ifIwMfGRzmjaF34IBxQAKAuDjODwaXcVUkjJ6Cmx/eCYJBFAUsGUEkjoKB\nincNhUZccn6Uu0feySeooWZeOdrDg0sarvc7sgDIoAaGypyPmJpy7SwXGB1psZ8xWzwRzmn7\nSqjDD5v4jTHYbGS0xbOcUbWIbIPJzij+IAna3fFIuW3fPx60Eis4JAIpsjHkNwKVY8FUbAzy\nZPalYHkhgU7N60wGr8uSOnTmkwyqMsBz0PWkQNIrMSQvXBo4OGPzHvmpAXlv4s80rAljhtrD\n9KAwD/Jj19qRcYLFst3JpoYrYYBvMy460j42hgvzZpu3jCndk/lSwnYzsw3ccA0gHOq7gCee\ntIzBVYbv3Z60LFz6+9DbS3CnHbJqkIB8qZxgYwBQu4xkDAXHQ0rMrMNzfMPShfl5f5gT0FAC\nKcqvHQdKWP5VJA+foPamxgCR8DIoXO0so+b0oAcPlVmIyFOKRscbDyx5FEhKpj1O5qCAJhhf\nlxnIpCBmA4YYOeDSCXMJ4yV4pRlpAHx5ZPB70D93vGPvcCiwCbiGUAFQRUiqMYByaZlwNwwS\nBik272Gepp9A1F3Db94HHakOFbA5LCneWFbJG73pFHmAEfw9TQhg23dk+nNJ3Tn5c0Ha3Y/h\nRH8sm7HGMCgQvCx5B3BjmhVG5sp8o9KRQFYEncv3T9aXyyrNHuyfSkFhkY+XIXnrzTlZS3mE\n7RjpTZflULuxng03byP7gHFAmCswjLtyM9fSnlRlAemOtIxAiTkEZpX4c7hy1IY3lYzjnmjn\ncBgDPNOXauSV3HsBSKzeWWkG3PC0DBmCxlkXkdTSKdyBQcsaUfKDlc8YxSFVAU7So6EUyQ8t\nmcbfvCmqVaXEi7UHUmpI8bWUHaw70wrEyDfuYH1pgOk+UjjPcfSmL8uflzk4PpQ3+tCk5Udq\nVgPm2cgnhaYC7SpbJ29i1IrKY1Uk4z19abh9rcYUdSacBlAGwXzkMPSgBFxtYDhWNJt2xKEP\nzbsZp0mPuheT1oZljj2g/e6UAKcKGY8npTWVZNpxg+lJJGVKseAeMepqQMG4I+bGKAGF2VDl\nD17UfwlX+XJyG70iybVKA4b3pWVnUEfNt70AKq7t+0/NjAWm7g2E3bv8aeyAYfdnd1FIrqoO\nfm7A0hoT77btvyLxSSfdCKxCE9vWnZ2xhdhDZ5xSMPlKDnuCKQWHNkLs+XaOpHrQzFtr8Kel\nIwEaqwOR3WkC+YwZl4HqaYWFxzt3Yp8fTjnnFRlUU4P3mojU/MV44IwaQmJG3yuAuQvenIw2\n7lOfwocFY1T+H2po5fFMQ5VLABvmY85phc7gMZIp0LNtZW4PrSov8e7GO1MBPlUBgp3Z5pC2\n3eh4JGeKPMwxQjKtzmnL90kjJ7ZoDYN26HIGXU4/ClZB5eOFzzzTCw2E4Iz6U4IG7ZGO9Aw3\nHbk4HbANHVS3OVGSaRYRnOKNwXhjg5/CkAq53cDJI5NDBI15zu6c0i5YgA5PahsNIcjcVPSg\nAbPygfexmgMu37pJ7mlwCwbGCOBSSZU46ZP50xoVVO3IbK01WPmDnJHag4Vim7YvXFO8vbjc\ndufSjQYfLI5O7Jx0xSfdIQr2zmhWCsMKSKXcecnKjpmoJEXaGzjaT1FPVwuNwyeoNNeMqQfv\nF/Sjdz/dI4IqgE3N8x28NSM3908elKXJ/dlsUjZXHGPegLiD7p/u/wAzQylZAevGSKXiPEhO\nVA+6KaZeQ2B0oBilGZgoYovU/wCFG7y1ZBzuO4nFBjZh13A9FPakRdowc5z9aYDn+Zl3Bcdq\nXaG3KfldWxgd6Zt+oycZI6UGNvNLg5BGM/1oEOJRScZLDjFIxKyAP/EOtJtO3ap+Yc59aPM3\nY5yPQ0APVlI56ikEZwTn589BQu6RScYPTjtUfl/MAxI96QyUBt2cDd3FNDEK527QTSOxj+Vm\n+m2nLvYBSQe9IBNpzxwcUCNg+3uemKSPLSdd3NKq7WJGd3NMQoUDIw2R95s0i4f5ep6j0ojX\nKncxweTSxtuypyBjgL1oAavsfmzzmnbzIrH7m3v60isFQYGPbvSKyyN9f4aaGETE78DvzTmH\nnYZWCsv8TelIGHlsVOx80BVk7dsmgdhVkPmHvkUbjJCOMHocU7PmqvYDj60kjLHnZ8pPGTUg\nG0fgOaezFZNwXt0ppjPkr91Wz1o+dQCwyzjA9qQxed/H3DyTSGSNJcnrSAtGCpbdgcCnKqKo\n3j5j1qgEjUfOMbt3NNCrJH8vDKfwpcbfuLvJpNpSMsoBXof8KZI5mRpHI429/pQPmUMzZPUU\nL3KcMf8AOKcMbRldoXqKADcJtwI28dMUbRtRdgZPXNJxJuYj5v4aVZG4YAKw9qQApdppGT5I\nyO9IrM68jp1FL5Z3FzyxOSM8U/zBM2CDs6bh60xDJHEihVG38KFyvzHhBwSaUAqw4wV6UuN7\nHcvPU88UDFLNhWPzDtxRIreYFT5SD90il+WUAvlU6A+9MLtG8uTljQLqOUncAWwF/KlXMkhD\nnbzSKpZMYCjGfak5yzYJ5xikwHKo3fLwSaM5XlsHsKbKwi4GWZuuKUxo6g/d46GmAu0PMqZ4\nx3p7jbnYc+jKaamGGPb7wpG+VRg7R6UxMd824M3PHIpVz8uDk4yeKbtzuRn4YfLQp6hflPoa\nQhV4VXzy3AxSYEb8/NjrQFCsoZuP5U5SFy2MnPIplIauQr4GT1AqQYUKVGMj7tJu+bGcd8/0\npdoHAB3MOeKYmSLmNTvHzueM04qD8i9c5amKCq7WbcoFKMSIoRsBuDilYQoZpWb+4OnY0rbY\n8EAluuaF2ncF5CjHNEYbaPmGDxTAcd0kacY706Nhv+cYB4H1pR/EBxxkZpqoxQbyMZ4PvQBN\nGvysu0u2c5z0qZcbQWb6HHSolDcoGzu9Km2s0oCjdgY3VIieOMh2BcMPWp4YxuK7iSp61Cqq\njZ/vH5vardurCR2jcN6j2p9BCLGyBg/ztnhvamnb0dWDdqsRx+ZOdxyCOFojjDArv27eu7r9\nKYalSbduKg8H7v0qrGxkkP8AEy98cVYkbHAHzDioGEka4K4Tr8tSMz2fazZBJztHtVGbPzDd\nuUGrtxuX/VnIzzVF8bQGO3cM0DKkjeW3mfcwMDNMWQXEYfA3qaSZBJJjqKTbtKgDH+1mgslV\nt8haU4XHWm7T8wJ5ZvlpWUNlWGOOKcqoTlnO7pQUIrBQSATjgtSED7obBPNPwY0I3A880u39\n3u6Dt60XJemo1vkwW5PQGljk4O89P1pkhyynqf7p7U6RXE2CueM4qWAjzFtoJ2sD90U4ktN5\nitlh/DSsFkJQDAXuetDKyqCvQ8ZFAXELbYzsOWJ+6BQdzc4JyckelPGGYhDsZRUZ3Nkjrjk1\nQ7j0Zo85HyZ4yO/rUnmBVPmHBPRsdfahTtREflPp0pBGuQzvnBwqevvSYCGU4TeoBYZJHaja\nIcvu80Y4HepFl8tiX5XoajZmUBV5Jb7o7VIhI4yzIQMBv4TT2i2M64OVbp2pfLDNlTx0IqVl\ndpt5O6LGCtUBAoDYZuDjOKm3COPc5BOcAA0eSHVUxuFHlx7MYCbTwTzzUgPWN1wSoRT15pQh\n69EzkLnnFPKbmDMcSYAx61EkOyR1ZgjLyM0DsTberOAy9Qaa7o0gbkZGARUUkwUDOS3t2/Ct\nfSPDd/rUiFYmG77oUZB96BmRtaaRIBGxYnjaM5rp/D/ga8vpAnkyMueSBkj8K9P8F/Bn7PEL\nzUyY9vOxl/Iiu4+0Wmk25SyjQNjHHX60akSOT8M/D+z0OJLmQK8oHLEdPbHrXQSeJBYx+VaR\nqiY2htvGaz7u6ll3bWKgnmq/l7lYNzjmtFHqzGUh0zSTkeZLvIHc1DyygvgjOPenbSqF8cU7\nyyvzkjbj7taGdxrRsqkMcDPUipPmchfut/SmNx5b53H0H+FSuuWVxw2e9P1ARj93I4YUeT5Q\nzkAjnNK2VGPvDvjtSFf3fPK0Ei/LJIw27eeT60jN8zdiBxTtjSYwwUdRUbb5nCjkd2xVKwxZ\nGVsBTk96Zt3LhRyxxRnc5QEAjjNPYllVYyBtPJqBiiHdgOMhfQ0bWOd33s/lSKhZWPT5vWnM\npaM7eH71ZLBJP4s8qMDB60K0jqCAOucUir5e4upJ+7xT4Sq4zwBSGhsbE7iRt+bpTHZQzEqf\n8Kdu4J24wcg0rSMIyAPmYUxMjjYcHP8AwGlRi7bScY/hNOWESKCzbSvBpu393j7xzSEiXGW5\nIQDpTNjyZUvznikaZdyq527einvTiWlXpg/p+dAyRVCjLbgR1WmcSKdp+Y9Ae1DKdo3A5+uQ\nadGRGxGzjsfSmIZMx2qGGTUiliuGGB1HFMbcF4GTnpUm2X+IfKwxgDpT6ABA2/NyDSsy7QhT\n6UKPKXnG7HTNNVVVckk7u/pSEL5g+UMMg/dFKuGXP8XqKiZWaNe+Gp0jHaQqkf7K1QDm+UBc\n8GkVRuI3Y/2s0n8PRsEd6PMB4K5GKAHCIbsqcZGajcqmAclfSlWRo9pxnPHWnFSzAgqB6CgQ\n9cq28D5sYAqOOQKCxGATg0/zBI5YtsA4zTIYxk55U9KZIEKwY9x2ApWZlhUgDBOOtOVduQeT\n6Co8Exge/TrTC4m3Cnadvsaft2nryKGVVOQO3ekXO7IOc+lAwZiqhPvDOaTJLbQDjHT1qWOM\nK21R+fWhAI2xu3jP40xBHHu27iU7Yp6xbFZ9+SOlOZ8Lj+VNJLHaOeMnFMY6WZfMTncMfNnt\nSKd5Y7x7Cmhn8svtBGcdKVsFkJ49QKNGMsGSQRoByc5yO1dDoHimS1mWO4BkiHXcc1zEQKyO\nCeO1TKzrJ0wCKxnC5rGo0elXFlaeILaTykUxN/DXmHivwAVZikW5FHXbzWxputS2MoEbfxcq\nDXc6fqEGuQbGAV8YK+tcc4uJ1RqJnzRrWho0J3xFQrYHHFcrrWnrJMqviIheABX0r4i8D21x\nHchF2OoPA9a8e8SeF1hmCOrGWMYPy8VKkaHl15Y+Tahh94HnA5qs0ZEK5IRSefeuyuNBMcMp\nZPmxkH1HpXMzWIikLOu044XrirEQq6hgEb7vRaetwfMYPyMZ2mqkyyQyJhflPXAp0kjfKWOz\nJxjHIoAtSsUjUEcP056e9R7/ADFYp/AOc9/em7WLKDIpT+9Sqn3lVSAe9A7BJGIVBALE801d\nsyL5alyp6092ZcN2U8HvUknH+r+RSckj1oEUJP4j5exi33vSmTZyTJy3UAdeKuSLIysxOw55\nBGajlRmYBVBbsaAZRZlk/eDILcH2pyrj5U+cqM5BqXZKGwV2ofvGjyHjHyjCHIqiSDaFhJTK\nkngjrROzYRmbdgjA709o2ZUHmfKvOxR/Wmx7m+dEyM8g9RTAmkbcwb+8MZpFkJdPLAXHy5pD\nJ+7BK4O77vtSKxmf5QAnQYPIpCJVhXeSRmTpjsaY0YkO0naCOo7UCQmbOf3fRuPSnRzeYwyN\nrN3PpTCxGmFyw+bAxT4cK7Ng8Lkc0jeWrPu5B4yv86RpPMUfLlBjr1pohpmhZzGSZZDI8aqO\nuf0xXV6Jqvls6jcy9M56fhXGwNIOWZQrHt/nitTT5GiZ2U7cHkitYuzM3G57Z4b8QPuh8xtq\nA4H+NevaVcpeW6uzlpMcYr5u8K6kssitI+84zt9q9h8O+IohFthcsVAOMcGu2nLmOKceU7W4\nU7AGHPXiqbEbSQNxHNWILp75C4AHH4VXkzHIAfvHjFaNCjIWQg/OBuJAzikwD947R1pwURth\nSMd/rTI1PO7BbNSXuJkMvPy47VMofYDuA4qNlLMoCgKCCafJhpj129qChpVtww+R6VIv7xWG\nTilbYF+QbmHpSmN8jA6jJx2qWFx3kjywVkKsO/alXEjeZgbun1qPzv3eGBK5/OpU2KwxwcUC\nY5mbbgt+VLIp4CD5/WmYBkHen+aULPzgdaoBJVdHAA+ppeJJMqCp6Ggd8/N6fWnIxXLD6EHt\nVIljt3lsVfHSnhWfBXjI61EVPUfMTUinamC2KZJIpK5Bxnu1Ju3SZTrjBqMxkAMB+dWAysxb\nGwEcUABYLhc80pk8nLHn0owAwBG7j71C/M2Dyue9BaDc+Cxky3rSMxkUh8kf3u1DZWTIUECl\nOZIQRw2eaLjGcMo/iA55qXiZQwyFpszfdwO/NLuXnPHpjtQA7y19TRS+X/00NFAaH6WgjA45\noxnk0kjnjApjArjnNfIn0CJG+bimOp24p2/LYA5pUl3Z4wKoCPaFPPPFLGD6cU5vmXHSj7o5\nNDANvzCjd7YpTz82cCkON3WgQ7C7eRzRjaowM0qj5ckUuz8qAFPA6U0qF/GhsevNGcYNACcL\njvmnFetNJJ5p34c0AJSsuPcUc7elLtJ7UxjTg8U1flye1ObPbg0qr19KliE4POKCw3DFKvzL\njpQv0FIB6tt46iopGHO2lZuearSFhn0NADJpN3em/ewcgUvl5HTntQsJ280wEeQr0/SgvhBn\n71SRx7m2mka3JfJHtQxDY4/MYd6sqpwM0sUIi96k2krnFIYwoV560MDwKex7UgHcdKBDOd3W\nnYGDTtoLdKUqOlMYxY/lJHFIc7RxmpdvoaVcZGRimJkTNuUDHNQ896ssATUTx56cmkIYPfpS\nYO3PJqRE4yeBSrE3Y4FAxsf3umKmjRpYpeoIU496FU4HHNWokLxso6sMD2oA+QPjbYm316U4\n+/hunQc18/8AiCFTNuA4NfT/AMfbNo9Ukk2YLfL/AD5+lfNviKP5WUAdMbqoTZwF+u1nPbtW\nJqMh3qFwzkdDXQXG6OUd1x96se6w0hk2jI6N3q7kGE2zdufKtis26+6WbO/titu6tBuDqc+t\nULwqrBc8f3qaAwpHdWLEAjGcEd6pNH5iy/NsJ6CtC7Qhio5U87qpSwzLMzoA6MOvpVokr7is\nY4+VePxqvcRuse9AG3HkVY4beu73IqOZgsYyMAmrJK0yiSTaP3YHzMMVDIu3duJZW6VPJ/x8\nAMcDsc02SUMoX7zdaA1KXCqJFwADtPrUU28SF1bKDipWKMz4QjAzVdlYFT2bk+4q4jI5ATB5\nbE7x6daNpnIZyU4xtpzMVkU4yT/EO3tTpI9sm9SFXgEGgCNcW+4eZuB44pNpyz4GMdKWTy1V\nhxgnrTI2EkZRjtdTwKoBpjAQO3ygnmo2zCPlyc1NMGfAZflPOKZLGT8wOQO1JiZGuN4Z+ABz\nT49smFJ4PIpGbzCCUwy8j3prPukJUYPU+1AiNpEaZiSTt4pFV9uQQM9c05GTLdt3U01WVUI5\nJz1ouA1m2jBOfcDvTchW2hPmPOaeqRM3THcEnFJJIDDl85zw2OfpQgGs5wOcMDwKYxeNcucs\nxwBSH5QWPJ9Kdyy5V8j0IzVAK3yr5f3x/Kom8xmVCFUDv60silU44Yn1pJOi4cBj0z6UAw/5\nZkDrmlbAZScdOcUm4c5GOwHqaXywGGeXH5fSpFcZtHzNuZRjjFNi3LgHDe9Snd8zEYz+lRFG\nbLfdP60DHM3RQMDnNRjC5A/OnD5omI/HNGQoQDjsSadgCNQzbl4UDmj5dpcH5v7ppvmbZigO\ne+KG75XB+tIAaJ1jyQCD6UjSKir3PQgU3cMbd2O9JwuMrkk8mhCBt0aLgAqeSfSjdu5Zjn9a\nGbzV2p1Tt60b8Yc9TwRTGIw3Rgb/AHz3pXzkdMEYNG5fNAK+9NKktxwCaQCc5wQVQcgnpTFV\nGU+pOfrUikHKnJXOOTTVCqORlc4p2EMVd0nUD39Kd8xk3YDDpupSEIwoy2c/hRhY2Oe/IpjG\nqdrFmIA70i7PKd870zwBUm0Ku1h19aiCmGMrGMjOee1Ah6sBggHfjP8A9akDOvRAd3UdxTlx\n8zH5uMqo9ajhQrlnOSec0AEitgYOQOoFOmb5lbO3I6UudrlcHLCm7QGGeR0NAhu7uBx/epWb\n93liFbNPXEb/ACNlSabsBDBRxmkAxlCsrZ3e1N2qu489eKkY7ZM4xShfnLdRjpRqMandicj3\npcN1JpT8xABxTWJ2kGmBGy7oSWcZ3U/nzBj7uKa+Ni5jOTzmmBWjyAcjOcGkMkLCJSo++xyC\nKX5ioAbk8FaRl/d425Oc0nz7QeEUc7qAEVwise33fpT1I4DHPGBQJgXGF69MDrUTK/mb36Zw\ncdqY7EkUmCRt+Yd6PMDsSx78UCRdxVVPu1OzwuwfUUhELSbVPljnPJp7SbmwPlOO1Jtj8zJJ\nFGcg7uCOlMBB8gKhd5PG/wBKIflWQEjjjFLDlV6cg54p7qqrnaNxPzGmgEwXwF5yOPrSMp3r\nvG0KMFe+aJNitncQhOB9aaqv5jAsGz+lIWoqr5jsuMJ1NHG7dG209BQD5IDbgfmx9TTV+bLB\ncfNzjtQNDlHzFv4h6Uz+JSy4DHHSnBgCTznNO8x9/HA9DQIj3OGI24CnGaTKlsAc+tO3GMls\nZzSIxUY/E0AJvaNSuMY6GhWKx7D8ynmjAUAFtxznFOjZmJ6YBxj0oAbuG3aBgHrTmULCGA3A\nkDPpQ5G4fLgrTF+6WYdT07UDEaJWdihw2MVHHkyY7njmp/JWOHKKQT701k8zHy/4UAJHGXZk\n3AIOfxoClcAjrwWprJx8vCk9qVfMbCM2AOlINhVjG5V3Y28Ck3hgdwKkNTuWYbh93k0fM0W4\nEYzn3IoAa0Y2nnvkGnsHWQKpyjDmkWWPdtbIHX/61MVn3cdBTBj02NNuGVCjkUjRncN+UDc4\no8wL+8wdw4bFJLjcwz900hDVHmPhfujimhflK+hqTcONpye9NVcDaGyWzTJ6j2Hkj7xK+tMV\nl2A43sfWlIfyVIO6NWxupGyrBSBz3oKBW2scDLY6UL9wnjkYz6Uf6vHPBPNKxSPcu3CtTAbN\nG0JVGIYN2WkZl4UD7pxSyZXAz93nd6+1HyxjdjJYZ4osMVgdpOflHYdajbbsO4Y+lSHKqONr\nEfepmV2gLkDPQ0hCfNsU4245BqVlKyghgGxmmMxmweirwRQ2WcHGKBit94gjPvTWxHyq5z1o\nLbJNy88fkaVmxtJGfUUAJuVZOeN3ApNg3Jv+bnNPTZtIY8A8UiR5xlsEdPegBVAMhC8N2zQ6\n7suwAYcYoX/WFscbeD70zLNHtIy2c59aAHEbgB27n+lDKMeg/u0c844bHIpNu2Mso3YGQP50\nxClQ2/BPBxSLt2tn6UM3luVI4bk+1KFLIcDIPT1pgMaP5Qv3W7UnysvIxinyHeF3ct04po3E\nH1pDFXJKqOAOtH3pGKtjIxQy4Uc5PfFH3cZX6YpCE2vvZWbjGTikbldo57ilU/MeeT1FN+Ve\nFGe30oAfzuB29utMbPll06Z5pzZj4zjPFKu6NQeqEVSFcG2jDKDnHIobCt0xxkjNMCv8rgde\nlBVtxMgy/cUdQHZ29BuVu1JwrZGcYwDSllChevotHAj3MSxBpi6gpdI22j5u5oEZ5wcqOcUM\nWwrf3uNtIoBzhtuTzQNjlZOQi8nk56UxSNrZyG9ulOYDgZ46bqNwRgNuT60AKgKcbsnGaZuI\n4OUA5NOkZWViq5J9T0o8tmhA6N3J5zQO4udm1uoYdBSldzDYwQgZOaj5iYZ5B6UpVNpYk5Y4\nPtSsAitu+dV4B5pysX6LgE0ctCFHynOM0522rkduDzS6iGbRJ8g5wck05W+U5X5aZgRtkZwR\n0xRtC/Mc7epNAhp+XKkYU8il3FTgDIbHJoJDJk52/wA6UMGjyTyvOKBjQpVzt5z1FOUsoKZG\nM5GegpPMBXKdTTvLEiqT90nGKQhuGYE5w/pQSGCktk/3aGXbIy9FHFLu3sqIOMHJNPbcBQqb\njj52I+7TI/mUKTypyAfSnLGVZgGAwM4akaTcpZgW2jJpgkJjbMXB4ahojtL5+nY0fLJhR908\ngGlZiWG87cD/ACKQDNh8vhs560HG5SOeOW9KTb5a4HBzmlXYoI6qew9aYCYMeT97uD7Uu0Fd\nzLz1p2DtCgYGaRc7mDdBTAbGyspGfm7Zp+47PmG33700jaw+T3H0pSu7noW6UAJG4b7qgjux\npxxjrxSIoj3p1Occ0nVNjDODyKAFyFDZ+Qg0h6dAR2pSeDhckf3qZkhAwXJzzQMfH8y579KS\nJvm2d80pXauQehzmmhR5hYHI/vVOoXHyEKST27YqNXHlqCm7cePpTl2/MFy7n+GlVjtAHIzT\n1C4q4XOflx0B7Ui5mUnPWmtltxAwo5ye1OdVZQyffxye1MQRr8uS27HGPSlQhdny55+9SSR7\nmBHfBKrTPM2tsUFu44xigByyqwdSrF88AUbSwyw8tV/n6UqsSMe/J9aHOxckE/NgClqA3BVt\ny8KevtRkb9m3g/xVInybs/e75qOQSSKGPy4OMCgBx+XcDgDHWkYbdpzzj8DSKxbcrLu9qG2q\nuG7UgHMo3K27IpzYkjYMcYOc+vtTdyAYC5HYU0Z27Rnrk5qhAqZ5U8NwF709v3cxGRkjn60y\nRtzAD5fcU44VcDlhzk0AJGrMu089cUjAyRq28EjqKcrFYWKEhyc4NIflVcLkdDQMczbiM42g\nelNVdynuaX5lXGevek2+aoIypzUFC/MQFVuSOlJwFwep4IoVcFhu4/vHn8KXIjYDZggZJ7Ux\nCKuxclmx0AoVeSN+McnIoYBxnLKxH4U5trbMemGzTQMaSG5Jz6U35WgYE4bstSN0K4z3BqPa\nSQxG1eze9MQRzfuxjrjBJ/lS5+XaV2n+dBBVWB2juV+tIFPU9aAFbgbmJz2wKRlEbfMecZ60\n4OuQQfm6c9KGwxLbdqgUAI6yOq7G4703au8szn/dpzK0aKw/iHam8NjIwOhNAD8GLBUdTxnv\nTGHVtoUk0RxgY3FiM8U6NE80qRhf7zdjSBCD5sgnnuooKbl2FTnqM0ikMxDD5+gb1o8vcxDN\n5Zx1NIkUblxkAqacy4ySeP7w6VGrZ2r0Hr3pTEyue+R2oAWPIwAME/pSDczEk4I/WiFmX5SO\np605l8pSThmzwKOoxMHyyFbBPJNNUttwFJXue9KSHG0D60dflG7FUNCnCouBkZ6GnEJ5zNjY\nnUUnlqG2k4U9zzQcMzbvlP8AtVIxBtkG9eT7Cnj5vmBw+MdKFbc+AVAx1HFN+UAFshs/nTEK\n6nk/xY7fzpdyKQrDeMccdaVpizfJxng5oH+pYjjt7ihgIrZ5Iwe1EjiRgWfkDAYdM0Myqo+Y\nsMYzTSEj5A3AdBUlITcWOGXBzinSSFm4HApTllAHPOaTdtBBbjqeKZILu8rcePmpZNpJV2wD\nyCKTcSgHUN2pdoVgAM47ntQAKvlkAHPINSSNtUg8tnO7tTBlWaQ4Ix070qA9WOAR92gQ5SN2\n7GeeKjbaAykMHJ+7ShcKCwwV75pI2EcnOStCAdG3PB9ttHkrhhzt/WgEAkH5cc+9KJE2lRJu\nJ5xjmqGJCpKhQNwY85NCZBKjoDS7vukLgjtSNIA2QuAf4jTsA6Ryse1flzRIm5VAOPWl2t5Y\nJOTnpRxJuLdO5NIQOpP3ssvQYoZdvV/3voKFx5eQPlzxSKrMzISM5yKQWEaRh8pGMjk04Zyo\nk5X+VJjzY/LJ2880rHYpwxYLwDigQ5Y/mYj/AFY6E9aUR/K2X3mmPIJGTd6dqdzGQTgD9aYh\ncFowD2pCxwzPxgcYpOerHAB5pd25t3VW4FAB95gCcinxgBSrEFyetC8MQDkNTQwBUOMY/ipl\nIkV8W4VRvqe1Ks25iQ+MVTRj90cKTgU+GZ8FWXaP4fpTsLqXJseWRGN1RMypIm0fKBncPWme\ncskZ5wOlIIz5e0t8vGTTGOZmjBIUMGOCfrTiN2Qp5XmmBl4H3ueDUrREDcOT35pEjip807lx\nkUscasD/ABYHHPShUUsWJPtToxKq7QAqk8+4oAmRT5BJYIxHDY5qe3tmuHwjswQdfU1WiUSM\nrqM8Y2ntW7pwRYGyuzcc0mIosrMy8GNu4I61YgCbWYL32kDtUl5IJG8tTgdcmiFdygBTg9aG\nAqRsxyhK46USsNpP/LTpkVMFdvm4x0FDRfOpTDDPNAyq0YKlAdhI5aqEkZ4Qtlh2zWi0qjcC\npB3YHvVe62qzEJtUjB781LAx7gFhsIxhuFHes68X5grDAXpWtMQzBcMB6/1rJ1DEeCWwc9DS\nAz5XMm4qmT04pN37sLg7cgE0nzbiQeM05GG5SVzzjB6UyiZV3H5RnH8RpiSEs23p71LH+7kY\nZPJ/Ckjj4fBzn06UykL5YjY5IpJGO5R91KVWXJT7zdeaRGaRju5QD0qAEjyrlSd3PFT8zS5A\n5UYNRf6vqAxb7vPSiMnc0gwB0piHSYmb5V2OOSf6Uil0zJu9ttKrLHGTncW/Q0qpjbzzn5hQ\nAKob5mOG9qbhdrc4bstSGPy23ryCaRd7Tbm/dhxw2KYC7j8qO2FPcCm+WQwCHdkcZqUs23ON\n6qfTrSyKFRSBtfHAqRjVtyuBKM9yo5FNQDazKADnjFSxgNKWkzu2446U9QBHyMAdqfQRHu3L\njoO/1qQgMwRWwcZIpUKr26/wmnKcEyYwCME0gGx53KUXCqfzpQ0QkZmLE5ztxxTSvzBQp+Xn\n2oVmYeXF85Y9xxQA6STOGPAHQUxUnmBdlxGf4mHH0rd0jwtda1cKsUEmxR8xAr2Twn8KdP0m\nNbrUTkgf6pxnnFBL0PP/AAP8M5te8qRrdljJ3eZngj0r2jSdJ0vwrCVjUTXI43EcqB2FMm1p\nNPh+yadEsMeP4ayHuJJ2d253dTTsyXIv6j4gvNQV0WcpFnG3uKx44WUb2fIPU55p6hI1LDLL\n6UkbLIoK/KAMc1pFGTbHNJ+7IU7VpVf7oYckUioJlVWGSDmpGYLuYHc3p6VoiCFWaOE7vmO7\nhafuBbpxikWQ+Yg29R1p7RfNz932NAIjk/d7DjcCe3WnxsdzEfez0NCoI1YjkU0yHZnbn2oA\ncx7E4ehnD7E3ZbPJprPtbCjBxnFIGbYUVcueSfSiwrDrhfs6nb8zZ5pzR8MAxHcdqUksu0HA\nA5JHNJcSM2G6UgGqw28nt2FI0DSKCDwey9alMe7B24+lNy4AwQHz2pjQqxmJlDHI9aGZtrsT\nk57U7cXZQV4NPY+Wr7Oh4zT6ARRSmY5OVFEjCOTaynB6UvAZFPXHakmUuCCeM/eoAfuMcnzH\nEe3getGWYYwAMUkoEYTJDKeM1GzMZCuenpTsIfJhVHB6dKT/AFmMDBNP8zccjI7UzzOVTBDf\n3qBCNHhd68uD1NSciHdnLZzSSgx8AZA+Y0lwxWFHReG5IpDFb+F+ueMUSbmkxkbetOZBlWJw\nCOg7UjR7RgfmaYhWcu65GcDtUjK25XD5HYU1cRquTgng5prY5AOVHRaBMdGz7y7Muc4x6ClG\nGyNwABzSHbvHTBpqMis44OemKCiRtw428E9u1JtVMheGbgEnrTZP3hDK2MDBHrSuysu3HOAM\n0wBl8tiHfG0frQ2Vb5B9c0m1m4PzAcc+tJuZGIYZB5NUSxvKyM5XA/u9qXfuUBUwe+KWMM3z\nbsjpg07duXahCsOppEDVjDRtlsbfamsrEDnPcNQdipgvsfqKI1JXLcH+dACDKndG+Cw+bNLH\ntjixgqCetN3fK/8Ad/vVIm5Yk+XNMBI42ET56jkZNEY4yo5z+VOMbbXZjgn+IntSSOJogi85\nP3lp6j3FZ5NvzDC5xkdaVVCudo+WhVPJYZx0p0kZbHPB9KYEkZHllTt+ppG2nLIdoAxg+tNi\nVlGcDjrSvGjZMmT3BHemMcW8oYH3e+KTaeAOWznPtRjMm4L+7YYI9KVT5ZXcOhxSEJ5yzSMu\nxhkYA9/WnoxBRX5xxmkEuwEqOSe/WlUKyMzHaQc81QD49qyEYwc9SKsw3Vwj/u5vL9vaoY8c\nk5INJy3BHA71nJcyKTszv9F1uPUVjtpOTtwW9az/ABF4XS6t5mWIFgPvKK5m2u5rVxKjbdp+\n77V6B4f16K/tDBLFtm6E+tefUi4s76ck9DxPUPDz/vFWPlRkjuK4TUNFmYuqqBDkkk19KeIP\nC9uu94s5I+ZQa8w1rw2I94iiYjPKmpUlsbcp4hcWqqzJ8ysh2lu1ZU9kfPLqzDt5fXPuK9A1\n3w+VwqnY/Vkx2rmLrTmWN3Y7UjPC98VqRYxowq5DKQFOcE0+GfcrHPOcD2pslr5TgoPlY5G4\n5pjOrR7lVgWP3aQupbVV3BXOXxkinRnMZGMFOfrUK3ZZhuA9AfSnqyiTJ3L68cGgY2SVtu8/\nKw52im5Zt4xkkcHvUqnaxIXO736+1NWRWDrtLMx57Y9qBELIWXYozgZIJqMNL5O0fdzkirXl\nrGuGk289fakIRnynbv2+tMCiF8tTsI59e1JulUY3D5ecKO1TNEi7lLrvbofWo2aRIc4wR8uP\nWqCwxnMcgUDeZOc+lRsr+Ww255+9mnpI3GRuT27e1MXMiqG4VjnrUiYtugMYJB2Zwee/rVtv\n3bLtYEKMH6VU3DadhwAfuY5qSPLMCB9V700SOLLHCVfChm647elSbikmVb5SuFXrTfM+9tG/\n1GO1Njbdgo2xOvApjJoozCw3gmXr1p9o6wyiYOyyE4bnP6VXWQlWfJlJPQdfxqQTeXMv7jGR\n6/rVImx0elXvkswXcR1GOM16J4b1YRoZIZMy/wASk9PavJrO4C4BP3j8r9K6fRtTkgkbyjlJ\nOoHWuinLlZhUgmj6b8LXsd1CDv8A4R8p9a07jEjdQWU9K8o8H68Y4o0diQw7HpXomn3BmmOG\nJNdt7nn25S3JhV4XBJ6UOCIwQMNUkihm37eB2/rUW7zPmAwPSpNUx7yfuxkYPemtu2jB4PSl\nZS2O60rMUIXGB/KkMbtdQA3yjOMip94gAIkzxUUJ3ZBc4BzSsv3GADITikMeJCyhtuB/OpI2\nDMS447EUyY7FXHfqKdG4GRjGP1oQA2DtGdpB+8KeuWYrjjqc96a2CoJHzZyKf8zSA4+93qgF\nXGN3TtRuLYwMg0NhZCMUqMVH+yaCBfMbdtUBQO9AkWZiqrwKVozGcMcg9KBuVtpAV+w9ad2N\nD/OYJgDPHNPhBK89PSowWWM5XJzzTy+3AAI9aYhyszDnIUdKcqZjZW4zzSDJyck47U8Mr5DN\n7Cl0KQxflXaAWHqKVgdp2r9SKfG23btyM8Cgfe5bbnjb70ixrRmTbgkHFL5arnOSaljzuJ3f\nKOKj8t0bJ5GO9BIvmD0opm4elFMND9MuNvNRsdy8jFSYPekkYZr5XQ98byT8tLztwRgUrEL0\n70vpnmmV0I2Yde1PHIxjrSGMbeBxTcFV9cUmHQf/ALNBXjGKVcEZwadgHvzQhCZ/hFNViq9c\nihc+ZTtoDZxSYg245xk0K+eCMCl2nNA/1e3uKAG4O7jpT1+9zQ2cAjrTh83bmmMAw2nIpGJw\nCD+FHVeDxSKwzg8UhsNu7BzzSMrIOvFPfHFJxt9aBDDj1pygKvWg42g4xTV3Yb0pCFZ/Vahk\nQScEVJuO3B60xmO7pkUAMEZLYA4qaOFv4uBSKfQ/WnncF65pgOVRt4HNN6NhhT1BVeDTWbK5\nxk0MB2NvFNI7Zp2fl60YwAWoAZnaM9qPvdOBQ/TrgUmOgHSmAu0ryTxS7uOlKq7uDzSsu0cU\ngEDbecUMxK5P4UrKWUAfjQV4waBMTOcUbe44pVwF6UgxxzxSARlOOeKccLjml+/x6UhQcUwF\nySOKsQgrg5471A3ynjpU8K5wT+NIR4N+0JZeW6TMOcHDV8oa5blXdz9ea+0/j7Yq2hiRo8uo\nJViM9a+PvEkI2lmAHOOKoaPLLz7zMeCCcZrGuoPl3E45ya3tZUfaGxn5Tism6AZc55qkRcw7\ntzDwnzZPDegrNvpEVMKMseprSvlJkGfu461nNhm+YgDNaJAZUy/NktnaOBVKZShOeCe1ak9v\n8rnOQWrNuM7myfujiq9DMz93lErsyRzzUMkgePB++Tnjt7VPIpkQnGXxUEjH5Rs24HJPrVAQ\nyqu1UiHK/MQabIhMAlCFew9al2gZGfnxndUOJTG3z7TnqaoCq3zNg/d6GomUkBE6g8Z9KtSb\nZCNwJ9WHeqzjCnGVGfzpgN+WSTAG3YMEetJGokBYqAcdc9KUKpXZzu659KZMGiiDAZUnkUwI\nDH5aEg9Dnaw4FMkJfkjcCOoqeRS2A/APAApnWQr9wKOfSncQxiwZcnK4/WkVeWK8YHzelIpI\nYNn5ORzUcx+XgFlzhsUgJVxIrBuoFRHPlgdhUkm1cbRzjGaj4WQLkk/pTGNYhlXjbz2pJFXf\njG3I+960oYqwVhjJxinYGTyDztxmglkJbzFAPIH86GjZWBJwO4p0qL53HAHpTpGb7QoABBHQ\n0Mdit/q92FzzxSn7oZU/KnbGVmAOD0NJwiBc89qYaDWIkyMYJ46VH5fyncV4OBUkivwDy3am\nMoaEb+CG7UANfJGzr64odsMOMc8U/c1uyoVDBuajf5lOcDBwPXNAh0gHOM7Tyc+tNaMtgK2G\n6n6UKpZgc8j+GkG4Bm+4fekAoXfuI5HWmsoYDHSnqzNhSNuaRV2qQQAB1agBJMiNgDuJ796a\n4HygHkDGaVlVWQ7iVJ+9igbfLIznPI4xTQ0Mbb1YDjj3oY+oLevHakU7tvPzE96VVKea0jZ4\nwMUIkVW3NhCNp6Uzy8MwwMLyeacF2uAE4xwRTGxIuVBLZpgHLMGAyPWkUbu/FAY7WO0hxRyq\n+Y3TFAxqld33Tj1pr7mXaV2rmpMs/GcDHApG37trDJI69qBCYXt82eCooHltiMEgjnbStjaC\nHHGAVx1pH5VnUbcDrSAP3nOQD3z7UMxwB2NNkkO2M4+9xmnL8xOOnSjUCIAJkAdOfrT925QW\nHJGaGxHHg5Yfz9qGHY85HT0o1EOEg28sFPSo1ZWYrI23b3HenKiKwGNxApI0XYGz/rOefSmN\nCJtjI2jK0u3KgA45p2d2VAxjkU1iVXJGaXUYkjbQpIxmkxtVsHn1p2xti/Nlh0zTcbioxuGe\nQKYhdpCqT1prIGYE/Lk9KGVnmcEnav3VFObLqWOVYetIYjYyQDuI+6tI6lsZ647Up4GHXIPP\ny9aYqtlm3cD+VAAzFoxk9OCRSFRvTuPSngBlxjIPSjCs3T5VzmmFxpO5T/eB7UfdUFRgZ5zR\nuRVLpyp7ZpEPyYxnPSgB+7EcgxhD/FQrDap280m0soBYBR1+tJu3KCBnPX2pAK0jBgSq7c0r\nON+2XBHtTWO2DGQTnIPtRv4GRkHv6UxsRxtXKNkZ4WpPl6ffGM7v6Uy4Vk2RgBgT2606Rije\nWfu92oERFgOGG4dR7UL2/hJPJ9aJMh8qN69Pp70k2+ZgcABDgAd6ADaoXA6Bs0csd0Y+djwP\nWnc4LhPlPHtR91CcZGM4oBjW6Ek7znBHvTzlcA/h7Un+s2+p5x60jRFtnz7efu0ADb0XAwfU\n1GquqHLcn1qSQb5iCCEYZGO1N2Btpj7cZNAgY7V+YA9lI701Q3QcLnnFKqhlJBwQcYNKwxjb\n26j1NADmYMwJB+7TF+ZM7d2KUt0IBIC4IoATcgzg4oARpjxt4z2NOZmAAK5PrSfKu5I1LMer\nUkibo1PKleaAGriOMDby3P0pV2l1ySVHp1pyYXBUHJ6E0kaiSQ5GFPB+tAxynPJ+V8cZ9KYv\nG5j6U5Y2jVQTu2DI+lJJuddxbC5wRQA0KswDM2wj+E96RcQwu2CWzSyKGwP4vWkZtoAU85wQ\ne9AMXgMCP4xTT80WQAfm+anOwwoC8Z6UjFf9Xt2cbqBDSUXPyYX+9SHaqgdR7UnRcdVPNOEZ\n25DDP92gQKp27QcnOcHpQVGGJHzZxkUj7mKk8Uuflyq5GaEUK0Z8vDH6UGQKoyN/bpTW3TLl\ncrtOdtKmVYHqrc02AKuISC+WJwFxS+XI0a7cLxyD3pFbbIzYyc5FJu2tnafmGM0CHbmXO7BO\nOlNk52FuV46UkbKcYUtIpwRT+eRnpzikA2WNWVlVueoFCN5ij5SpXqfWlb93nn5s9PSj+FgD\nnPWgBkfysSy5TNCyHB9zTncNhVfOPakLIsZfqelABtSTKhtp60m8KpIyw6UN8jIy/MDTsbmK\nLwOtMZHuPYfLTmbe6JnjrSDklWPNOaY8K4yo6YFAhFkRQWB3ZJGTSbduAKc22WN1YYOaZ82e\nuMCgBzMzXByNyHgilcmPoenpTNo+UBsH+9TlG1SMhu+aYC58x/lHOKZuVmKhj7inyqcAgbSe\neO9R7FYSAZU5xu9KkA2lYwwzhjjbTlUxEjORTvMyyg/LtXHWmsvzZIIXvk800Avy7mynzYzS\nYUsBnnGcelLu24I555zSzKWbO7OetICNvmkBlblewFOclRs5x24pGUBj2XHFKzYAbORjH0pg\nLu2gHO1gODTV27sjcF3cj1pG38bgCT0p27cx4574oARVRlKlNrbs7j6Ubt8jfxAdTS+YVj3E\n5GcEY7UjZZgq7cHpVdCRy4XDHkelNZR5ZC8HqaVQ0JJkAIHFIFVVfc2C3Q0hNiD5owf0p2Tk\nBhsU9GpFzH8pOcDml2jafMbgdKZfqIyjpjPNKG/eEDkYwBTfMO5mIyNtOi2ZUgbfl/WgWwmD\njbnDHgUu0Rx4zl14O6mrGnz5lzhuG96aCTuyOTUsY9stHknBpGcYPGSO1ODMsaKx+QnpRkhm\nCDrSAJv3kYwdi+9DkblXOTjBNG3jnmL+LB5okjVvlP8ACc5FMljGbIXCnAHNIG8vJ6luKc0Y\n4IbCdOKQY2thcspwM0xpAr/N8q5A9aD/AHg2FBzik57cL70LhoctjbnFKwuoSH5dxUg560rY\nTJGQrDr70RoZFB+6V7GjiVQS2MGhjsNUOq5I3Hpmk2t5m7BXHFPZvv5OQaTc0jbFOQF5z1+t\nMQmAzHHHcZpEYqmZFyi8fT3p67Nu0MTSLlQxJzjqKAGBvlwwz3zSBR8uOOd3SpG/dlGzndzz\nQys7OVYAe1SAkjHkt/FyD6Um0bcE89jQyl48M2X6YFLtz91cgLVANOF+aQnGccUfKzAgEKTx\nSBiEAK8DrmnbmXncFTGc0AIGDeaPfoR3pqvuVSTjtT/Lbd87fX3pm1eQo4zk5pgKcsykcMTt\nzninsoSRQWBbHGOlIFV8qPqDSI2Rhlyc5xigAc+Xk5B459KWMllLK20dOlRs3DfLhT1X0pyD\nZGS/OelAAgVABk7e9SKx3Elfl7YpPMSWFUCYPqacP3jKsfyleTmgCPcDkEEc9KOFXhMnpxTl\nbc5yMselN+YKdxw+elAB5fyjbk+2aVpOMscHoKPl3nB24oP3gU6ZqWPoG4c7QcHp7Uu07sE/\nSiRmkk25wvTik2ls54ToG+lFyRjRllzu+fNSbmPzZ46fjSLnjIxkVHu+VFbgdTtqhku4iEjO\nXPNIMMgIXnvmms2/YUQ7iOnrS/MUXaec9qAGhTtUouCDy1SPJslH8QPpSMrcsenekOMbd3NA\nD2+WU8blxmmKCq56jrTstzwMYph4TO0gnkN7UCHcyEu3pxSbguCGBXHOaX5WYF3IIHamsylh\ngfJjpjpQMVlAzxktzzStIfL6n020bQwXruHvS7mcZC9KkoRlVVZQduV+9SKU2oBnOMUmV4ZR\nuPpSlmOXBAHcmmiR8ikrgnEg/WmM3zBTx3NEu9sMTuyO1K6hWDkfJtxmmIarFeh4ZsH2FOYs\nu7AJVTwRQpHynHPXihTiRnVuW657UFjcgndt3LjNJ8ysCV27ui1IzKrfKN/HOKjjLeWSW3eh\nNBLHsrtgMmVBoy0kZHQZ6U1d+07ifwpwj6AHj1zQAincrEAgdMU1TvxjheqmpGZgxwm09BRx\nuOePp0pAM2Fsu2T9KRA0m5sY2jPPenbvlKDOc9aRMlhg55xtoARWBUY69cmguGB3ZZezUm35\nSSvG7inbmfO1MsvOfUUhMI9rEnG0YpFLkEdCQaTfE4LMWVsdPQ07aCUOeMZOKQDeWdQRjYOT\nSyY3Fz3/AIaXn65OQR6U0qM72yaoqw8sYVUD7pHbqKRVWM7Q5Zj7URlXjPGG7ZpY2+Zd65XF\nMY3a/rgA8mnMpKEHGetDKYwSBx65pqsqqB1GcgioAXcoVPNT5TxxT3iZsZ+5jI9qRmLkGTgd\nQPSkx8oCk5piF3BgGA+ToPrTDgFWD4OcH2p7fMuFwKaMySLuGNo7UEisoDEn5gKXzF246g9R\nRtx8m7Knnd/SgjLbeD+FIBNoiO0EsD1FL8yxkdFz3HWlCuYydwB6U0LtXJ5H1oAGYArIflXp\nQMqxOCfc0YLKVPTqDTpN6yDcwJI60DEBODuG45+9TpNrKM9u9NztHzA47LTtw+TcCdwztHam\nAFeCw+ZcZ5p0jAYDL8gx81MyDvxnbjGKVV+Zj94FeF9KFowAssnzYwF+8x70bg8ZXYFbOQ1A\ny2Rjcp9eKC3Uhfu1QuoHcTkfK3Qg0sYJXJzgGmht0mWP0qRmLYAOATg0xgzLuBVWZ/71L91s\n43A9aTJViqfc7mmKu1SSMrmgB64YhT8p9+lISm3J4OcU442KATyabIoK7G+bnikAu3DBuka8\nH60h3LDtbruyMUeXtk5Yq/QelOXuMZ4/CgQsjkqCzAgHtTOGXHvkGkdo3K7VYFeTTmwzAkH5\nuQKBCxo3zeYcgj7tHCxYK7aFQluHJNOb5sE8EUDFZehKFdvNJuEq7TxupSzyBsktTI1RlQPz\ntbI96aEPPzYV9w2jHSncAuM9Peh8MuQcFucHtR5e7Djgd/WqDqOaNfKCqcHqc0NH90DI70YL\nNukTGP1ojIlmb5icDODSDqJGBub5eBySalLIuTgnFN3JtKj+KpGkK4IXbjg471IC/N5AG3nP\nDVIrP5qr97+lJtPJUE5pFjk5J4H970oEWY1aRTtXa6nGa0I7h4tpUhmUc+lUFUl0CHb3NW1h\n3MXQgA/w0CJ45vObc8ee5qzGrs2QCFP8qrQxuOf4h1q4rlsL5nXpQMkkUD5ARj1pThCOMoP7\ntOVTG2MYAGTmkZVGBuyrDPFAiGQxjcFG5M5PFZt4TtfHyqRkfStGZMKcZUAfKfU1nSbsL5ud\nzcEUAZoIKZDFR0C461jalI0xffGdw4C+1bc7GDeucc8NjNY987DcQOOp9qnYZn/cwuOfapPL\nXhi/zdCpHSmL94Bjg9c1IW+YkKT/ALXrVFEkjfuwjDjPDrSqowAuQ2eRTV3JsEa5Pc0/zNrE\n52k9VqRq4jBfMAdDt9RxRkfMoHPXNPkl3Yx86j86a033sAjjHSgYgZVYsoVsjpTYVXOWXaM8\ngUpcMEKLgjqacoMjYYgbhketIBWRVUIvPOc0755MjblsdRUSyH5vkAOMc8U5mZwAFI7ZphYd\n95QAccdPSnceWqsdwU9KkVdsYZHAA42kU1kHytwvP50hji23cFBVcd6QfNJGMY/2utLuDSbd\nuSxzTkURlgen8qBBjBZieM9Mc0vl75k+RnJ7CnlgoDDhum409ZJIQQjY2/MB60CIkxuLyAZ6\nADqKe0Y2BUbaaiSUSKQRt4ye+K2vD+gXGtTLHFEzKD6frml1Azbex82QrCSs2DjnINeg+APh\nbda5cRSTxmIYzkjiuy8I/CeG0xdakFijXkbu9ddeapDYqYLNtse3HTFUJySFh03TPCkCxwqH\nnIwWAHWs6+1SW4wCflxwBWfcXEs2Qp+QcmnRsfJBXgKcYp8pzymL/ECRggcmiPiJtpO/PANI\nzdSfXJqRtm3cQc1rFGd7kX8IfOc8FaVdtw3lBdpHJowu0KPnWkk+VSFBHrinsUSLhW5GMd+9\nNVgyZ6HdnOKYuFBVV3OOdxNPVm3IRgE9aAHsh8s4OR13UyNk29Dv6j3pfuuy++frS7eA2Pm9\nasQKFZhJ0YGmMrOxLHKd9tO4LjjC+tOVRkBPWkIaFDquxQSP4qVPMZvuhSeKcVAY+h9KQYZi\nRxjrQAgUw5dvn5wFp7ruIOM+1N8zbG2TuY9PahVbjB3epqRCJvUbieM8inKq7XdhxnikEili\noB64pyvtY5+ZMcLTGhFxwSflPSkDkPtbhewpSpkRcJwOTmozD53KMCueXqhk+35c8bl6VAGI\nyAuamKBfmJ+QDrTYxtzuUFcZ96AQm5Nu0rv47VHGqRsWOc980qtsUhm+djnIqQAKcueG4xQI\niVWZhtbKg55qcyeY3AxtFIFIONu0dqGUtkjOehoEIu5iTkkd6Qyfu3zkbe1OOVXCnBNNVdrN\nuYEHrQA6HDRjC7i3OKesbSZJIx6VEvzLhflzxT1XGFRTt6de9ACNiSMZTc3UH2pQ7Djbhcc0\ngyY2BOCDzjtTkfaq4G8Hjk0CDb8iv1U8AUqRjOUGCfX1pXAZchtuzt6+1KuNuQ3J5+ntVJAN\naOfG5V+Xoad5e1OeR60pZvMQxt8vRk/rQ0e5cD5snrUjGP8AIy55XHamtxETt3e9P8kLIXbq\nBgAUxsRty3Xp6ZqgI1IEeNpB71ISGDAD7v8AFSDKqQOWamfLCSCSc9cUEi/LKzErnaepp+8s\n2cFRTY2KMQV+VucU/wAwcfxZ/hqwEkPA4wg56cUih2UktjP8J60qyu2Tjac4ApX3NHk4DfSg\nRG3zRhVJ3d809f3anC4yOtDKI9hUEseuaVmY7ifu/wB3vQA5Ofl3cA80r/MpVTg5pFXv90MO\nnekVfl4PzUg6j2Y7lGOlLJu2YCYyeaRf3gGeMdaerDO4bvLPAzTAcdm0FeucECjcORsyexpY\nwEyAefU0qq0i7sYVTihEiKu7GRzTmVG69aRWKghDyTSszBTxn096AASccA7fpU3mb/m6JUS5\nZlVvlp6qy7g3KUwDDOu/pk96t2N7NHMsq8Mhx161Tj/1OxmLDOasKEUBxkc4rGpHQ3g7M72x\n1Eatbvu65wfWsHxBpoXBQHZUen3J05XfJ29T710kmzULFGHBZcg4ry5aHpR2PH9c0sTSKHiA\nCgnd61wOraOsjFQuP4to7ivatc0826ZJ3c4ORXBa5pCSSAyDbJjIUH8quMi5R0ueSahp/wBl\nkZF4Vujdx7VjrCy7/LOUGetejalYgW5mzhz94EVyF1YLAxLBQXP3lrbcxMJQQwVsFifwqwGL\nbiDkDiiS1WFmI5QnIz6VEoSNGCZx1PtQIsK0jyRlXCJ0ORR521Wxt4bGT/SonnBhRkXvzU7Z\n8vdtChu5oGPkRfMXag3Y5BOagXd5ZXGTj9KdChVSFG4kZXnmnbSyDaVZcY3Drn0Ip9AGYUqS\nkWcD73pUEiqyxtECPXNXGRo0iVvvMcBQajXP71RHtIIX2pAUJOMKCF5y1QyRhMHduwcFa0Zo\nXVcFMlfmJ7YqiFSRmfBBXqe1UIi+0mRtgOM8ZIqeH923zNvHSmBUVyQoY54zTwgEm1RkHqOw\npkg0fmSFsbmX36inLkwgnpnk/wBKZEx85uRtAxkU6RdpTjCntQAo8x4tikIXOCemBS+WCw2S\nFgo259fWhWLsFkXjHrTbdSu+Y/MR8oXpTuBJuHl/63cScBRWpp100UylDlug5rMUhG3Rou7G\nSvpUlnceTceYvK49OapNmckek+GdYNuyqWLsO/bNew+GdWZ7YF+WPFfO+n33klFiwC3OScV6\nd4R8TyPHGpG9NwVmHGK7qcu5xVIdT2yIyG3DN0xxUbExsMHAxzUVjeI8Q2yb8DpU6ndCSBnn\nNbMxTEgDZ3AfjQGLNg8tng0K2zJIO3HSnKpX5lXJI6VJohy7WDALz0Jp22PCZGCppm9pOiFT\n3ApdwLjsfSlYoc+5unTOM+1SJjYcHJ9aiVgkm4njOCKmWNtpAI57e1AkRr++GRx2z2qfbIuF\n3DjuKhjI2FQCAD0p6biOeAKYyUlt2VXd2oVtqqDHkg5zUTvIqs442jJFSbT5ed2T9aCQBckS\nH7u7gGiaMyzFhz/SnBHkAGeBzinMvylgcHpimIjhXHBPFTKVGFzk9ajhjCKct81SbS20EZI6\nUFJDmHzhozy3BX+tSPjdtbhex96jt1Im3uccY470qssnmI4JHY+lItImB8vc0nReBSxtHIwP\nU0kZEi4b5T/td6RV6EDv0FLUCb7p6EDvimR7pdxPI7VIxK4DHjvimSSeThVG7PemITyGopP3\nv94UUCsfphnbjI60h75FO7DPWkxjvmvlT3hOOMilbaFJ70kj7o8LwaRc7TnmgroOxuUAnHFR\n49DT84X1NImWbkUMQrZ3DFKV53Y5paXB7nAoATbuzng00HKfSpNwakbDA4FAhI+eKM5an5C4\n4FMbGaYDsHgnpR1XgUbiVJFMUttyaYIcy/KBS8Hr1pMk49aXnHapAQ+nWlIwB3oHzKe2KajH\nvwKAHY5BJ59KRlyeDTWbn3pxw3TrSATaecimYxx2qVstwDTVG3g80AN8sDntS5yMCnHFJwvP\namALnGe9OBXvR97pxSBRuz3oYAchdwHFNLbgBTmb5cUKhagBGXcMkfSnbTtHSjb6ml3DaPWq\nEK3GO1LxjPekz3oYA45oFcF+bGTQ3zZwOlDMOg60Llc96kBBnjNLwe1HJyKRVZVJNBQqjaxP\ntSLnbQ7dAOO9JytIXUdt3L71JHlWAzUY54p6sMDA5oGcj8YbE6l4RnTbuIHG0ZNfEXiqF4fM\nDrjb0XvmvvbxlbrN4fuC4Lrt+7Xw948tyLqZR2dh+tNEnkOpxlw245YnFYNxCEbb+ddPrMY2\nED71cxdSFQV6e9aiMHVlKyKOg6bqypMeZuwS2PvCtm+BlKqTxnNZF0ogkUgHPqDV2JkypcyA\nLhjyOTg1nXjfMAikHqfpVhpCRIoTADZB9TVVnkUEk5I5+lOKsZ6laRWbIPTrntVC4Y+VuPC5\nwBV/cVXH8Lc1TuPLBdWG4Y4qxleYeUrqchj0NV5SN5DMW29quopnwD6Zqq8asx3D+LqOtMRX\n4VsDOOuKjk+4VZsnPGO1PmRvO2s2cUwRj5mc9OBTEBwzARkE4556nvTRlwwxkqM7aWR1jX7u\nG7//AK6i8vanBJQ9WzRcfQSR1jbd94dQaZxLk7sA9RSso5Y8nqPSmqrdSM98UxEW4gFSeCcU\nj7iw2j7tSSMjMuVxzUagea7KG2j8qA6jWDrkvyG/hpnHk+WUO4HNOYOzbs9uBSS7xIrcjjk0\nyiL95hmk+YKOF6UseOA/3T0wMc052Egwx3LQd0jElgB2pAJLjnccnpmo9u2beem3aRmpsFkY\nk7do9Kj3qrIu3AIzTJZHud4yP4l4xTQu1k7nv7UqjaHJ+Zh096UhWZGVsHHNAhjMWVnU7sN+\nNK5ZlwcYY8fWpDsbLhQCB83vTX+YqDwrcigY0ltwVgd3emKispZhknoKkeTy1Bc546iopFba\nhJC0ANLfNtK5bs3vRhVYB2zhsnPrTmQM0agnON1I37wn5MrQFhZMf3sBj+VM8to+GO4HpmiK\n3LKS7cjOKTBbg5IHGTT6BYf/ABduBjFM+XywHU71/ipg+VtoHy9adJISA+3aDxt96QxiuG3P\njjGBShN2M9cUMGL4bgY6UgVtx5woGc5piFyzZOMj3pZNzMHCgnHNRnblSrEjqRUi4VjzgntS\nAT5t2c8HrTHjLgjP3TkLSqwVSC2VP8NINrcbiMCqEDNuI6iiTK5AIweppqtuUHO1c07b8rYx\nhuxoGRxkowK4MZGKcN2GHQ+nrTdrNHjIwvX605clV428Z3UCE3gMAPvCgAhiSpPuKRfmYyA5\nY8EU/nKtnPOMd6AI23FdhO3nI9qRm28p/rM8+9PO/wA5hwQBk1HwzbmHQcGgVtSXa27GPmIy\najXa2dzY7AUqrgkFsyYzRtCheASe1AajVVt2SDkDv6Uu88buAelLMxaMqoIckA5PagsYzjO8\nY2jigBP4ty/MOhFDMI4+PXtTVky2AQCByBSgoNvzYzzQMaoIOVJUelKqncAWJJNGX3E9s9Pa\nkXO7J6etIBUJjL7Tn60Bg2Co57r60hX58D72M4pVyzb8445SgYjMv3gMHPAHalkk2jaQ3znk\ngUrsqyBQvykdabGzL83YHoaYyNVUN0wfQdKeqhOmfalLI/zdxTVQyLndtYcgGpEAUH5XHIyf\nxpOTGAflPcU9VO7D4fjORUe0b+GLZ70AKPL3EbSuehPSl2nhQdq/3jSvncFJyo9etNXDbgfw\npjYqqxdipyVGd3rSquWBY5J5CU1VVuM4J6Yo4OGDfMODTEL6hTtyMn/CkLZj2oMH1pdxMyrw\nBjjikwEzzlj1xQAiKV3/AD5TsM0iqY8ZPynrSbVXfyQW4+lOZcqoPzFRigQNGFY4X6GkkOYc\nk7cng+lJmRgWDfJ2pV+YM2Bt96AEkICkrnf1DetCs8ijJxxk0jY4A/EilC5d9vK5oGIsX7li\nTndSgsqqmPmUce9I7bsBTtAPIp7RtIQ2wq3XNADVVtxYjj+tJyrDcuc06RvvHBHH60wqZEUg\n/MOaBAp+YqOHHb1pw8xmBAwvcdRSMSMMRls9aRcruAyxNABuUtwSo5wDS/MrAZ+UckikVQWy\nMk9KVS8ilMhV7sRQUB3eXwcr+tI2ZCEBCjHU0jAkqc428bP60rZZwOjLzTEM3My4IwV4B9aH\nZV5bpjNK7HKueoP3qXarOTnd70hDNzeWFUE85FKCN21kxJjG6hVaSIhexzn+lI2VkQ/e3Hr6\nUhiR/cGOc0uEWY7yc44WkZBHIRn6A0zzPlJl6Z+9QIk+bBQnjsKI9z5AGEAzj1pGAWNSD8uf\nxpQnynacHOQaaAaHIXaoILdfalUBc4O7jrSiU7gDtI70iKqgsDtHUGqYC7W2rIOMcc0Mp4Gd\n5Jyfakbb8oZyR94mkQBkdskDtUgKM/Kqn5+ppVO1mB4HcUgwygKOAPvetKoL4CsMY5oAFO7d\nnhuuaMFQvl/OzUJnAycjOPrSJjDSEFdp+7SABGy9gBupskKxsxUbvalbgDHJznml2hSSx5PT\n2poBFjZVQqeCRSCTDOoycHlqkZQyAAbdtNy23H3W/nTuAzYWAwAeevrTt26RiODjGBUa7pML\nnBB5NSPtAJU4P9+gAGdpfdktxQRsUFvnXGDjrTdwVgfvH+6BS7vlLL16gUAJ8u3AXPpmh1C4\nVTjIoc7lzj5vXtSnay7T1xkUwFWTy12j5h0OaRcxq+RtYnNJCyHt0oTMkZZh82aaSAPl8vJw\ncmlVT3/h4P0pAR5Rdl+VOopXx8rk4VuRmp2ANx3EH5l6Ae1N2iNiVO7HSnY2sH3bQT+lKyjz\nCP4OoPrQAjqrRAM3Ockik2jbIAOhoVR83y5XFJE5Lbl69SppAEalWXe2BTtpw+18AnlhSLl2\nLdW6gUKdzcnHc/SgQi7juKjcMYzTFVVUnGWXo1OJAkZlyozihcL5ilS24YGKYhHwybgGxuwc\n0pUsoUYAz1pFJjiGQcDg0rk7gWXg8KBTAGU8liOTjilk2ttXqtNwV4kHOcUrfdIxwOtHUoVn\nMSqoyxY071Tbg9PpTG+aNSpwP9qk2s2CQUJOD9KBiqF27VOTnj3pI48Oylvu5YL/AEpz/u32\nqflHQ03YFXdjIzzQIHztjwvLHJ9qd9zLbuaCph4bucikXGDz2xSENbbGo+Q4b3pwHl8luMU1\nstGoAyV4xQyqvHfqc9qEMAFVsk5T+tK0hZgAMGlbKq2egPSkklChMptLEfMKBjfmYBSMbT36\nUrAs2FHfOOwpZGyzDOQKY0e3HzbX659qZI4rIxPOTml80NwEww603pl+MkdQetEQ8vBIJzzS\nGLkDK/e7/Sk3cfL8hx19fajhZPRcc0FcLwd1IWgIq5Ug8fyo2h96g7Tgtz3x2pVIUnIzSKu5\nWLD5elMQKymNc8k9B6UCPdGSvJU80oAjG4jgDj3NJ8y7GUgMwyfamNAAsi8NjJ+9/SlfccbD\ntGe1J8nQqdvUmhSu4qe43CgkVphyCMbuCKjk+ZUBwRj7opz5XaWGR2A60g7bRggYBoYwk+8D\nnOBil8xRwq5yORSDCKCDnJ5pMFmyenSlqIFjdo/kX5xzjPan72aPCDBPrTGWRcKF6HrmlZ/n\nGeWzTGIuFyCeM80uGy7twuMAZpZMM74wPem7VVgoJYYpAKVZ1TDAHpik3ddrEEcHFIV+VQPy\npVZd2V4/2aAEXDLuGVfNLI37zAGTjG6kk3spGVXnPFSgLGVCjIxyTQIiZk8snuO9OVlVU2gn\nP6UrLtUYxtY/5NC7mVwjD06UDGN8u4rn+uacI5FhG7Dd2FNkJQR9+cE09VDFvmLdfrQAzhmB\nYkc8e1K4O4Y+YA8GheWAxgY/GjpncTu9qYDsSFju5IHG3pTDtY5Lc+3FPVcqQDj0pFYSYQjZ\nt5z60CuDbiw5wo4pjMynG3c+cDFPXB3EnnqBR5mQW+6e7GgYSQlmwwxjk4NKWMgyRtT0pu8M\nVVslfWlY5baDkE8CkA3buXpz3+lScGTb0+XimyKUJyM+y0i5ZTu+Ud/agA2/dLcbuR7Cnb/n\nIA56cUxgv3Q24Z+Whvl5xkZwVoKHbDGzBuGx0FI2zy1x3PK9qVlKuCME9PoKTJjLEfMemMUa\niF2MrBI8/Nz7UgV/KYgjbn5hmlZQmCSd2OoNJtIUFT97tTEI275STtHbFPYeY2QMkdcUx1U8\nMS2307U52bgjvjGKLlDfLDKVYYGc0cZwQRx1pZPlxg5HenyKzNhPmbGdtIliMf3YAJC+tM8t\ntgIBK075XRcHIHUCjLxxlSduTkVQBtAbIkyMc0MRtBHSmwoq+7evalVSsLbuWz92pATjcXzh\nKauCjuuck4FOVSuCRx6DpT2U+WSMBaNQGfdhC4yvpSKxXGO5pVX93nOQTR1O3ZjHO6mA7DFT\nwDz+VEn7tsAYBHNDbTz0bGaViW+b+LGM0AM2iPlTlabGpCHDY579qft3R9cd8kU2ZQxXauD/\nADpdQEbJGQMH+nrT/unI5OMjNJ7g4bp+FJna+3duXufSgokGdvIIYjhutNjyyhfl3/7NKi+Y\n27djHQU1Q3mkg4J60gHfKGKBf3fv1JobO4eXxTWYEcrgA8EU9QFUlQWBGcmgAVdwJA+ppG3q\nNg5PrSKAYypXqM9aQyhgGVdv8JxQSO/5aKoGB2+tIcxtgnJJpzc7lyQyn8aZJv8Avbc+hpjQ\n7aN3znPrTQu3kn5c4ApCdqgsvPUmn4UoSBgdc9vpSELGRIu3knOOf5014z5eTxg4pfLyrYba\nw5+opVA4O8BiM5NPoK4hzx8271p+8BvMztAG0Uz0ZSNwPWnsQzeZjzB3FNDBWGHwfwpqj5vn\nQnilZf3gbjLcCnLlsknO2jcBMfNuPHbFH3cfMN3XFL80gUkYBPNKeuMKQDximITbubbtAZua\nFBZgq8DofrSlvvED5uu70pIx5pV+gHJoAXna393oaGVsKoHy9ee9I3cZ+XOR7U52wSARuoAa\nrM0gIXao6kijp8xYOM8e9AyAHDfgTSx5VgAu1j+tDAViWy5+Xj7vpSBWjUKxBLDIpFVRK2D0\n420vl5fk4YHOG/pSATc0eB90fzp33segOaFMYZgT8x4+maTZt4bhR0aqEx6sdxZhyaVo2GCz\nbj/s9aYzGORRKM99tLIxjkLJyG4+lFgD5V4JyCaGZ2U7gCF6Yp27aQuPm7MRQdz4zjI5J9aY\nCDc0fzdFHHrT4YyIgWbLE4CjrTVUN1/u5FOSMbOpXjnJoGiVshmZ8llGOajVm2vkggjPpSgh\n4AFc5Hr3ppUSQlQM85K0iR/zeUORg/d46VKm915KnHXimNIJG+YZwuKnhUxsoI+Q/wAqLDEb\nMeCpwp54pI97YwSd1LjYyngITmpo1DScH5e1Ai1axLFuyMIi5bnvU6yblSTYVjNQxxkMxcVM\nmWxJ1UdFpCLcbOFP7s7W4DVPbxiTCnOE/i9TTEzJECDwas+S6oigYBHWkMXaNxLE4xzxUioY\n1yvHHB60rR7lRdpDL1bNK0YLNv5yMgjtQBSmyyt/E/YCqkkRZchvMyOc1oynMOzdhvpVWZNi\nliccdqaA5+4k8tWbHPTmsK7+ZSuCF657VtakmTnGBnr61iyyBlYEYB/hpbgVmUrghlz0Gamj\nWUkkgDjmoeGQchue9TRsVkDEkH7uKCkSLIPMXf0p25fMJI3UN80xY88Y9s02PaknHI6E+9BQ\nJ824xtgfSlWMtkrhj6UKwjXY3GT1pSpVflG0H86kQrR+S5X8ajxuQcdO4p8YaRnSQkHGRn+V\nPVT5Yz8hHVRTCxGW3pgkE+4p204J65OeOgpwjGGJ4HY1OqZhAKMmRjd2pAMEXmNkkKmKXyxI\nyqOWPAB/nTtscarGcjH/AI9TmUeYrlcqOnPNBSGQxpGzbz5jDinD5VGBuQnAp2wosj/xMOp7\n01VIbb93j8KAHzI0arkBkHSo/Le4kwgyOuRVyysPtUyAAuuenevUvAvwsfUJgzQstuxzvYYA\n9R70bAcX4N8A3euXUTJCQuclj0Ar3XQ/B9h4StBM5Dysdy9sGt28bTfDOmraWMKvMF2kgYI9\n65S8vJ7vZKfm5wdxpRTZjKVizq2tTag23lY+vFZjn5tx+YdMU6EERk9eakk+5sI+jVskYSZD\ntPluGO0dsVJgRqyuQRjPFJtJzls8YNOjiTYcnk/nVEjI42SNWLZjJ6d6fudlYBse1Mjtyy8N\nlVPenMwj3MVJ56CqKI1Rl46Ac1KrOTyMLj71OaPcoI9OeajkUrgKMZoF1EWTcrBk2P8A3vWn\nsyvtAHAHJ75p0all+Y4CngUncsTyT0pAGGXIC8/3qRVKkYb60M2G55waTblTjoehz0oC41FJ\nzg4OciptytGrfdkz0pPLEm1lPIGCMYpGkVV6ZYUCFHCnBz70kfzRsucHrnHWkkf5UIXOfTpS\nyyYUEjHOBimIQZjTdnKn8/pQkgRhxt9DThhxkLx3HvQ3OwMvfmgAGXYscBqRmCSYPHPUU7yv\nMLHdjtg0SBivIAI5FIYgDqpY8rnpTdwiUuQQvqKdhmUDHJ707gZXOSOMVQMhlwyhyOvQU9gH\n25O04wcU9VDxqBhQp701oxvYjg4+8elMBrnkx7Mhed9POJJE3HAOOMUiqG2oTx1PvTmz5hKs\nFOOKkAaU7mUDKKfvmhfukg8dWalDblJ6kdT60m4Ln+6wpjFwjKFDZ3dDURA24Iww60NGkn8W\nCKeqiWMgrhsfepgx8atkZwFoWFlY4bod2KRUCqMglgOmacJPMGcYbHSggTylZRMRtYnsaTaW\nOTwuePal+6qY+YMacIy0xBZdh6fWgoJFVeOh7D3of5vkBXr831pdu2Yk8gA8mmrgLvOcE0AL\ntAYY5A60qtuYtiki+UEkYLHoaJG3Drt5wKQhW+Rsq2D/AD9qjbZNGcrlgevpT9o3DbztPSib\nfJ8yEIc/MtUgIZE8tcoxL+ntSx45ZeTjn60jMfM4IUY6mk5XpxkcimhAXeRRIF2N93FSo6p0\nwO3NNX5JAc5OKaWVlO/rnkVRIqqxU55Gc7aVnZiO0Y/hpI0AlDoTjvTygPRuM96YdBi8g857\n/QVJ5e5sgfLjqaGX5f3fyHP4U+Xeygdz3oFqMVvlwfvdM0LG5jx1OcA09NqHLHil3FVKkZB5\nG3tSAdGfIQrGu4/xNSqpZlJ5HpTFZUXbjg980/gBVB+bPNMY1ofMJBbjOcU/53hKIdqilwsj\nNgYwKEbcq7unSgkbGGjV8qQTUtuxY8r0pQriIrncwPBNN+8QxOCP4R1oGh0cjFHDAK/vTvOz\nGucE/wB32puSigP0ZvvGkZv4jtbjoKBDt2GIxjuFp6yBVzj5gagTdGgOMnP5VLGFaT5iG+tP\nRqzKTszZ08rcKImOWxk5rcsWNuQhb5AfWuSs3EV1kBiDxW5FK0khJHHYGvMqw5Xc9KlK6NbV\ndPjuoCScnOdtcVrGkp5ZVUyOgr0DR7gTMEZRvxyPaqPiDSkRWaP5R1BxXOdK1PCdX0dFZSuR\nzyuetYOp6fFsdPLAYnC+1eheJNPAYgBg56ccVy15pvmSMHf5gOvStYmMkedahYjecDIPQ+lZ\nl1CLdgc8/wCzXX6nprWyuQMKOc9vwrnriNxEcgAtWnUkyfODY+XDZ+92qfd58hbrs4I7VBLb\nvby5OHVv4R/OiOQW4kGNxY/dbjH40wL0bGRwobZuGTntjtSIHHzKm0A8/SoIzGIw6/MD0Vut\nWINyuxaVQSvAz0oGL9m+VnQ8dQO4qKVwseCevp606Nt0ZQPmdjywNS+Uqx85LdMY60CK8okZ\nVG/5vSqsiyLzt+XOCo/nVl4S2xyxjm7FTxQLdImP71m3f3jnmmBRkVUm2kbec1Hlnzh9pznO\nO1WZk2ybesh5zUaqYzvf7xOMCmLUXBVdrMCD0CjFMW4kjlRwMiPhgaVWVcbAcZyN1SowIdiB\njHPFLUCBJk3HbuQn+FhyM0uxmBGC4A421O1wjxxgrg44fFRzK8TEIeMcsKBWEUHgMucr0zUm\n9t2wnB28L3piZmiD43Se1IxUNuIZvV/SriQzU064LAK+CF58vHP511+i37W9wpjcxxjk+lcL\nayjzC/KgDH1HrXQafqEflDcwiXqCx61vF2ZlJXR7p4N183QKzOdzdK9Bt5le38sHbtrxDwXq\n3kyeX5eWx8uDXq3h++W82qzHp82exrtTucLXKzoMmT7xC8Zo8wCP5uueDSNmT5Nv3Rnd7U2F\nflLMduei0Fj5CU/vGNh/DSDbFGCVbrxmnLlsL29qUrwA5yM0DJIYxt3EZznIpY8qoOfmHao1\nAEnLsB9M0u5tpAPHel1Aeqn75G0k07YM/e6ckVEsjLHsPIWl3EQjAyW7Uxk64KnA+Xpz3pNo\nZhz8o9OtRRqzZBGPapVCxr8jYI6mgQ9cxqSM4P8AEKFHHI3N9aY0jbV7E1L5ZHzFCT3OaAsN\n4bG4Ywe9L523PG0imgH5SxyAc7aezrIxJGM8VJViVY9zK4ACUoJDMCM80blhUEMMDtTlO/BH\nc5oGWW2soyBnFIykSAjgeopmwbvvcVKAiqSM89qCrC7du4g5GKiaRlYY9MdKmwCqgHnqRUcn\nsKBCbf8AaopMj+7RQB+l20cE0iqCxyTikU/Nt7UqthSOoPFfLHuDduB0qQbWGB1oXLYHQYoC\n7ccUCEOF4PWnD7ucYoLAc9aD9zOetUAm4LgUfebB+7Tdh6Z5pyqwHUGkMdkZ4FJvw3Sl5YA9\n6TYeufrSCw09eOlJtHUmpB16YqIsWJIGRTEPXO3IPFLxtznmkjJ28D8KdgZ65BoAMH0pFBGT\nkfjQd3r8tM2hSTmgB68ZGaYzZXGMGl4btmhSGbgc0gG9xzzT1IVcHrStH8wwOOtIV3dqAHD7\nopN2OMUBTtIHanNz7fSgYg+9zTWXIxT8bRnOajjyxNAh27gUoX5gc0u35fekwep60xi7cN60\nfd5BpcdaaVO3J6UDY7cME4pu7cOlG4dulAznI9KZI7rgHijhT60FuRxTgobOKCSM9c4pf4s9\nqewPfpTV+QUdRC8nmm5JPJ4oP3eKRe/emV0FYbuQKYMqoHUVLuAWmrytLpcQqr8w7CnwqcsK\njzuYfWpI/wDWHnvUgR6xbmbSZIQfmavib4pW/l61eBRiNXY5/GvuSZPNifbwQuea+PPjRp6R\n63frGMbn3MPaqQj571eHaGbbkdq4y8JkLZGK7/WoThh1APB7VwmoIvmORnqflq0IxLiEySYz\nkYzWbcQqGGVwRx1rebgA4wfSsS+G7duHzHkY7U3uIx7qAbiS2D1qkzKysQhatGYPkkrnjFZz\nuwjKE8ddtakFKaRozluVxyPSqbbm5YADqK0JszcMuRVeZPLjG3nB5FMVyrIxYBlwtI+x5McA\n9ammhj2425LVC0LRgLjAHOaoVyvtGHLDv1qq6u7HYdsf941am3RsB0B5BqtI8cmdrcjjApsR\nA2FIX70R6A+tMYpGw3MXU9gKl4aMh2ww6Go5EEgyQVbtjpQUNVyxaPbj0pdzbvlXIUYJ96VG\nJxnkjvQoG5pS37sc7aAK7wllJPOfQ9KTaqwgIcAcMSakkbaoKDAbmopF3NtVccZpj8yNtzSb\nm4jHHHenbTHHyuUPPWkZw2AF5HApNwm+8cOO1BOooZGPK4/2RTFj3MUbA74pWZFbjO/0pLhg\nFLMMPnFBQFjGwOBwOtQt8xbOBkcYp7bvLGeSDTZMMVbGB3pkiNGGUAdcYNMaEQjK4HrTn+XH\nBKntTWCtgZ4PpTGhA4ZQuOO5pfKLuDnhTkfSl3BdyhMuOlJnc+52wfQUCGE5jK7ctn9KR8tt\n44HpSbuCO+f0pflyGB4pjGt90P8AxfdwPSlkm24yNqjj60ud0hPIHXmhmyvzDPPB9qQhu0Ku\nWc4J4AoZjkBVPHTNGd3TnBpFk8rdu+bP8XamAx2+YZ5NNkjYsSwyo6j3p6jYOFG89AeaGBAG\n45PsaWg+g3yw0eSePSo48R5UtuyM1NI+6NduV55yP0psiDzM/dU/pQIiCjcFC4zT8fPhiBxi\nmsy+dhsjtuFLtAbccdOPWmMXmJgCFU+4zSEb14YFs0773Tr701ol5weOpAoENJO1Q6nn+6KT\nO6Fi+OuAO9KrEKu3KgGg+Yu5mGeeF9akBI9u0KxK+1D/ACgDHGeKVmPO/GT0FNVRIeTgEYHs\naoQcNJuPAUdaRSjc7dpxnk0yPKLkyYAOKkwu7aAAvvQMTzA3OCAf1pqL8rKx5xkCncjkYwKN\nrFt7cMRQUR7dsYOOvGe9DAyMuU+71pNo2sHPyr396fk7FzxkdRQSIxJ5PQcil+facgbDzTd7\nRoctkKfTrTmDthkUiM9qBDBlkBCBMenekkxJJtHIxkmpA/bkUhAbGGGB1Hel1AYzBJsbsHGA\n1O5kViOO3NRyKFXYOud2TT2LGMcjHXb3NAB8rK2RuOKRTtVSBmnIfLwTwTyPQ+1M3FgflIyc\n4pgL82MnkZzijzC2VA+Vu5p6/LuyMbqbs+RVPJBzSLAfdVCmMH86TJXcsgxk1IvOQx+YdBUc\nuGkQqrN65pWEM24bCnbj+GlVuDkcelJI2ZQxXB9R3pWZWUsG5J5A61RI1lCMrqCSeoNS+YJv\nmRdoHWmLIyS4+8cZGaQyHBVV68lRS6jGswkbKfIAetSfIZuF6joPWjhWIA+ZTQu1WcgbfSmA\nHG/cfpikwzMQgwfWhhtjyxzznIpQNyg52jtQA3BbKjBOKayFIwX6jsKkk/d4w24N1xTHVPMH\nzk47UAAVD8yuNuMkClSTuo46gUj7WUBFAOaQqckAYBoH1AYLs7HJJxtprNtZghwSeRTj8rbm\nH3Rwvek+bAOwFvegTF2hFB+8/rRtkaRW3fPSEMW5wh70vnr5gOSNvftSGI8gWTcf4jhqBuUO\n2cDOMU7lZG4BGeQaYylF4VmLH1pIQn8QXIB65p0pXIdeOx/xpF6AAbmbr9KNofK7vmzgf4VQ\nhHlPmKI+nT60SMUbDctnoKFjZuNvIPSiSMLIc5wBkmgYKv7xix2q38PpTUUszc8Dg0uZNvIz\nu6ClcK0iFRgD7wpiEUjJGPkpq/uwyOMDOQ1GzYcMvysc9adHGzPuztYcD0xQwE65HTI6ClK5\nZF9BmiRf3oajjIbG0/3qQDVUyM24bh/epm4cLsy/b6VJ1YAHJzzR/qy7A4GcAd6AG5Vcr0UU\n2NQ0JK/ez60/advygY7imyKvVflPQe9AxTCJLcY+V88U0rjgDIHJ9qcxLKu44odgY/kGRnpQ\nMSNl84sFzxzQwMjEqfmx+lGwKuF4PXFAchSyHPqaYhI2Ea4HJPSl+XzgVyvqaToqMynrnPpS\ns+2QnsehNTqIJFCyDaxIzkHtQrHc2fm70cq2wncMZoZduNqbl7+1NAIjbnLYOKVSfu43E9M0\nFlPyqCuP1okzsBI7847UwF58vywdzjrSZ6Mw6UrSAMT0boG9aRiYxt24bvQtBh8i5Y8vnOKG\nCzZA4QfNSyZ3KxUfh1NNmYfKANuR0FAkIG+VnDY+lIoV1Jx83rS8xEAgZI5H9aFViBzgDk49\nKQxscm37vTvmnSSNNyFAI4wBjim/LuO0E98ntUmSVYn/AFf86BDVUKzBeV9abkI2R84AzTl/\nhPR+gPtSJ/rHwOMYzTATarY9G5205mRtvB2joDQ2fLGevTNA+ZsjhelFhiNyNuMHrT1X/Z5x\nk/SmLtO5mPOMClXeBkvkgUCEHzcg454HtTmZS2V7cUjAcEjJ7UbdrbNud3O4UgDzNuGBwc9v\nSkkAXdIDuGeRSbgMDbwP1pGjO4jaMdRzQAvGVGcjGQTSLIwxxz60u4P98ZfFEfMihzjinYVh\nNrFQH456Ukm9X243LSxt94sM4PX1pwkKxkk89qAsNXcX6cdqRshW2tk5xg05pS/3l2yY7Ukw\nMfztxxjaKQ7iy7Sy/LlMDj0NLI3yn0pp2tswDg9qArPluwONvc0wEHKgD9acV6DdwOvvTJGA\nZRghs9O1O2HzGbpjnFUKwqtmPluc5/8ArUqqdpOfn6ioJECLuzgMec1KzEkDGGxjbSEJuZiN\nwwSfmpwwjMSN46Cmsw24OcD0pY3CqUPTHekUJNJ2PzZpJGC7WC5XGDupzqGVQw6Hp601o2kX\nc7YYN36YoAVowxBUcU3aMEMDuB9ak2hpG2tkKMjFN3Kysf4iOKBBxJGR0OaSORUkbcSRjApi\nKdvzNhgMmnbvJRRjcx5phYQqu0lvv9cU5mWPBXPPHNDKzA4GWPJb2pFZ8ZbHXOKVgDrI6g84\n6UvDrsJJwOfSjcCQcfPTSrx8qeGPNIQ772BnG3oD/OlTmfLnKgc035FOWJ3Hlc+lLt2swHOR\nw3pTGCtvBwdqZ796E+WHnk7uDTePKCsNxHcUm35vYc4oEPjVWZvmpqszKccDNKqoykqfeiT7\nyjG1jjntTGGBtORz1pGzIMJyuMmnEPuJ3ZxwTUaAvKW3bV9aYBHlozuPPYU5WxyBgBfxpqyb\nWywy3QYp24sTuGBipAi3DaDt4an+WUZB0B53elLtGMnjjgdqQZ42c46qaYgVwzs56YIBpsa7\no9xB/GniTCkL0Y8nFMkV9uc5wcD3p6dR2JGRABtJw3akVsLntjFOZVbDlwWXnHao/mZV7ITx\nn1oCwNuVUcD52HA9qXd8vyndnvSMrMM8kg4xmnfeUgkbh09aQWEjyiAMM85BNLtLNwcZoDbp\nmDcOOlN3BSXO5vYdc0xCrjy2IPOaCenHze9DIFwSWDdaUHawb7zdD6YpAMLEuuBzUjHdGfQn\nketIX27nUAjGBTfuqjkYzximAr4OGVSjDt7UpZGjCnnJzS/dwV6+rUz7xIxluuaQ7D1jDqf7\n3ShUZMrke9Eald7Hr2pu3cu3PzE9aBDtjBtgP40nmbOq7u2fWkTkgD05pxbcwPKqKAGIoRt5\nX5gKkZRww4J5xStIGwM59qRfmJ3DbjpimAhZcA4xk8n0pFByQOmeGpWH3cEc8fjSOudwDbcd\nfc0ig2hSQx3d6UN8vA/Ck2jchxk96bnE5fGc9BQSxy58nDDBzmgfu2IZN3GeDSL83Xr6UpUs\npwP1pbDBWwApUZPIFIFPz5bBBx+FDYaM569j6U5mMignAOelMYzeF2bTx1pyq6Kd2HY8il8s\nMx6YA5PpSHKjeWBRuFpkgw/d4J4PVRSeZ5O3OeeMUkbHy3AGAOop2SqYxzjIzUgDB45ANuFP\nQU4ruYgLtHds9KYrEDfubd057Ui/dJJyzGmAFgqYJ3f7S0/eWXcfl29G9famKVVW2rzTjuLq\nCeSOMUmA1myfu7QeQKcp4GBnnOaEYMrljtROB7mjjaQDhiMkU0AhDFjhc7uxo3eWpy2cnaPa\nlIby1If5ie9L5ZZS3VScfjUgNZQoUbfmzgt604g7flXK+lI3yrhm79aX5vmRRtP1qgE+ZQDt\n2n3ocFsHHOetHCgF23HPTNAHUc5Jzil1LFyu71Q8EUitwoRe3TNAP3cYHPINC5DbgMkUWExr\nf64KvLVJI+dylPkxncOxpqruJYPsPXpQFO05ztP8WKCBFYFvMb5ieKGbYpyRjPC0u7aynAx9\naVR8x+Te2CdppDGIwwVK7gTn6UTL1XOI/X3o5DBgMnGfwqRTlhtGB7+9UFhCpXBIBJ/lSvtD\nAMuaVVT52Ayo4/Gmj5Y1LDjsTQAqxqjE5A9qeH4KDCEdaZJnaD1XrikXDNuXpj7x/lTAcrBo\nyCCFX+KkVSFGcZP60mWOfmBJ9ulSqqtgv6Z96nzAPuq2/wCYgfKBTY8FnJ4Zei+3rUy7I2U7\nSxxkVGhHLbDuIxz2FUA0lo1O2PII4YUjOFXAXkDJbsaWMHdhWwuMYpfKG0ZGCOtAArKy4Kkk\njIFIrDaBjJ9acr9SOSON1MXazPkY9MGgBzLtOdvGKQzIdoVWJ70/bsK/NnjpQpbcyk0CG71P\nzEcZ6D+dDDG52OePl9qBt2OpTdzj8KP+WYUkDH3fSgAG3KsB82PzpVA+/nDZ5pCqqBubOeDS\nMy5wucDqKBDlUyNuJBPY0/b5mMJtNNWPgqpwzcilXfIdhbI6ls9KBCMShIPGOgoUkKJNuFFH\nHmfMcDHFLvKngZFACqxICpyf6VKygNhhjIzUK7VbdnknrUu7+LO7B5zQA6TCkDy+2CtKx2wg\nqQo+7zR5hM28/MSOg9KQsrtgphPT3ouALhHRG4Ud6tq5G3cfkDdfaoGTzFWMjn1qWNTJGU6K\nKOgxkkaq2GB2s2V57VImAXPJHZR296HjC7N5J5+8tSw7/MJjxtB+bdxmgRNFJI3y5Lcdasxk\nNwvBIwPTNRwhoXCqeW5Cn0q3C3YqGGc0CLFlGVhTccMeCtXI2f8AiOUFMRS8a4Oxuy1L8zMq\npgEdRSYEpkXKsHOO49aT5jl8YGeDSSbQv3fmzUoi24xxuHapBkciGRVVlw+cn1rMm3qGQggH\nnmtgqE27vlx0zWbqCvIu3OO2TTGc1q8mPlJ6c4rAuGZhyFUj863dQYL5g4EnYmufY7mJXovJ\nz+tMZHiMtknOP7vQGp1UyZdjxjim8HcqrwTmlViMAZweMUyiSNcgAD8acoKtymF7Cm7XIKE4\nINS7d4HJbA7d6kBhkDLkouPzxQu7lmzkDOPSiNmZt4AAzgqKXBk3kAiP0BpDE5DLICWJ/Wpo\n1Z93HlnGeabFCq4zyQM//WqdQWO4nKnjgdKBjPL8tgSflYevelXDNhxxj7wNPWPbEdo4Bxg0\nJ94heABy1ACgLKoAGZF6N/Sgskf3uJafu2lVA6jO71qFpC0uAmSBk/4UwIcvNkZyp6itbTdC\nlv7jdErPk4VcVY8MeG7nW7rylixlsn2FfSXgnwHYeC9Ja7uVRXZeWfBx6Y+tInmscX8O/hVJ\nax/bNUPkJ1VdvzY+ld/qniRLRRZ2SLtAC/LyB+NU9U8WTX0phgUQwKMfKf61h72kB5BbPUVU\nYt7mUpjblnmlZi/zdS3qKYw8z5Fbah5Jp8UgZ8DGe3oaNu4Hnaf0FadDC/NuMVVDbon+X1qQ\n5kTLt83amJCVwPunr9fepF3M2cBu2T2pCY0Rtt5YEj2p20r82O/NNG7zHBPAp5UBVYthe9Md\niJiPmAbjNTq3HXkCmLhVzswPWmsuVPO4ZzgVSBiDLdiGJ6dqFPzNgbsd6kDMmNwyD0I7UbRE\nrJ1zyQKYiPlFB9TUiqWbJHNN3bQvB2/3fSl5Vg2c88ikApXDkdVojPlEAdKdIwXJxgHvSZK4\nYrkdODQMZzvY+p5pNpbvwKX/AFeUPIJyaVMchfu+9AhJCw4ThcUbVjADHcuM/jQHHK7ct6+l\nLgsSgGRigBhcK2c8NSmTEgB3bqdJHuVeNgHc01m3TKSckH7voKdwZI2Wj5HzVGAZFQD71SS/\nLlwcjPSjdtT5Rg+hoEhw/u/w+lM2gY3DkHtT1Aj2lmBzSO29iuduOmaY2R5f5tqnGc1I0fyg\ntJ1GdtMG4EIX+93p/lkKd3zAHANCEI6+ZsI4xSbgGJx8tOx2xt4zQ237p+7jP40xjVZuewb0\noZiqlj8w6VM2NyFO46GoxvbcMYAoAaPL8kBRgn16085ZQOfTipTtbouW6BabIxVACu1s0AyO\nRSpCsdxPU07y165IXpim7SCRntSncq5xwODQSHzSY2duDSZ2wucbmzgDvTnU7gwY8DjFCDzM\ndmoAdNhlVX/u9jSBgwROgzwKTAZjjnFOXKyBv48elUUOb95v3fKy00gScjkAUkMTGOTaeG6k\n0KrdEfAAxUiGLu81TvG0ckUNhmIBxk5FOUB8/wB71pgXaw7896oAYD8utNUFvmLDcBiiRWZD\nkFWznGacqk4fZxjnmgkRWBHCbm9alXy2+bGccH61EzMh34xkYwKfwFKp8j4zVAKxZcjG4Hpi\nmbwFAI29jTldmXIOWHWnja7NkZ4zg0riDkY3EOuOKRU3J9/BHP0FIsIbADYHXBqUtzwuD0z2\noGNIRiozuGMg0vzIrEYIPFSJsZtoUA1GB1zlUzjdTFcGjAKAjC9TTtjpJw4KNyPWnAER5OGO\neD3pobaxyp6dqYhYXEbMpXk9amhjVoyxOAOdtRR5X5tuT3qVZPMYlU2qfWgQ1nPygr8rc0rH\nYTtAx0GOtDKynG7OfyoVdqk9DjmhjRICY1Gfm9aYkYjHXG3pT+XRNq05WMjbSMA8UILEasfM\nYhWYdM1IR8yggLkenSn7zGpVT3wKcWHk7sbz0PrTDYaoXbkHnNbWl3DOoTHToT2rFwfLwQAG\n7CtDTbjypAh+70rnrK6OqjPXU6iHEMiO5x64rWvIY7+1y/O7gYrKjOxQpPmHrWjb3AVAMc+l\neRJtM9JHnviyxMcjr5JIAwGrgLjT3ikZgckDJ3c17frdit/GxCZbHNeY+JLfypDCvyt6+taR\nlrqS0ef6pai8y4O1j1U1y9/Y7sBkA7M1d5d2LMhLAbhXM6jYnBYkMo9PWtzI43ULF4cYQsO3\n0rPCny2RA3mkfxf56109wMIxdwp6euD2rBvIREyrI+D1ye/vVgRWq7Y/3eGI6nOce1TMryfN\nhFPfaetVoULyNLHyAMccZNEMiKvIO7PzmgRejkHG2NVPrSKQ0ZBkwd3NRhg4HHQ5U+oqV8fd\n2bCefmoGNkJWDIG75sD2HrUbQjgoWBz1ap5QPJCyDYByWHf2qJplZt8edgHUHpTAr3X+uGHy\nx4z61CyvkgMMdMEVakVV2M/3v71Ry4t5NzN5gbgFaAKUjGeHLD51bj6VL5cluqtwY26YpJEE\nKuoyxxn6UsSlvlHKehpiJGbzV+UgDPOaZGxjDORtzxzSbSUHAQk8VGWlYNIcMFONpoJHIred\n8hLFuo7U7a4K71L/ADcgUkkzNhtvlk/w9xTljZlc7jjGQFpoljmYtI5AKN0H0q5ZXDNjAG9R\nwrDrWc2FXKEtzyTVhWwFZvlbIrRMhnZeGL/dcRuNwbPLZwB7Yr2jwrqXnYRd0eOqt3968D0m\n+KuCF2ybsFf616n4W1XEar/y0X+Id/aumE+hy1I6XPZoJhLECpyB1xU0bLsLEfMawtFut2ME\nANgEVvswXkrx04rYxQM4wGzg4xT0jCqNzgj3piyBwfl4zSn95jI5HQVRY51GNuPfINK7eWvt\n0FEcgPB5A9KX5WT/AGs0mMFVgoULk9yafJ8rKuMNimtJuAweaFUMNxPzdKVxjww5T5t396mq\nu7jGQOTS+Y/k4TBfNS+d0BXauO3rSAa0wO0Y5BqdWds5OFNV15QtgdcVJ8xwSNy9CM07hYcg\nZmbnAp8ijaAfvUR7Y9x604KWjB249/alcpDGQSKoA471MW3qFQBSKjCleT0PSnw7I2LEHBqS\nkPyFOH6e1Tq2FyPoM02LO/BXIqUnoMjHai5Qsihiq9JPWjGWweTRIx+VWX56Mdx94U7iDy2/\nuiim73/vfpRRcD9J+OcnFN564p8i7sAcGk2sv8XFfMHsEkZ9RSOxjXBGaVfmXGcCjllOeaY7\nCj7uSOKaxCx8c0m5gMdRTl+9zSAVVHr+dC4VsHpQT1GOKRVbr2pooTlaXllPNG7K+9NKttyO\nlBOw/du4J5pqnr2py4H1pqDax70WAUMV7YoY7eF4BoGecnnPAo2knOKAsNVSrbSOKHXbyRxU\no4ag56MPl9aAsQx8444qdVCjOOtJgEcUgyB7VQg5FDjbz2oXByAadwy7T1qQE3bh6UYB470Y\n2nBFP4XmkMhKkrgCnbCvNPyW6UvllVGTTBkYyeBTtv7vNLs49DS7dvFMQ3dkA4oB/Glwfwo2\nj1pDG7QOg60DPTFOOdtL0Xmn1Cw09FOKO/HWl428HmhVB6UEsX+E5poyaWQHj0pm3ng0CF2k\nA88ULinNjoB9aFUYGelDKE8s9aNvzEml3Y6Uv3u9Imw3b6CnIpBzR+lOj9cZpBYlyWikGPvL\nivlv9oKx8u/kkRdryEKWHt1FfUiuPug9eK8B/aJ0c+StwQQu8fMRxmmOx8h6yJGlZcYRe1cj\nqVqdzHO0mvQNahLTSNjjPI9K4m8jzISxxVXEcvNDufBJIrIvrVlwUOR0FdHdg4ZQMc1lXUeF\nIzz/ACq0Qc9cddhbEgPNUpgrMBj5s1ZvGEMxIO5+9VbqTbIpYY3Dt2rUiT1KcxKSddm44AFV\nDNuYllycYNW5nLZ6Er0NQBgoTnJxjFBJWLN5Zc/MvpioZZBIf7pA4q6qjOFYEZ6VVuo/MUjO\nCDTQ9SncMVXH8Z59qrbR1UAkmrkjLjzCcjGMVVkZT0yveqEMQK6P5nD54FV2bY20kg/yq1GW\nAG8YPUA0NEDgsoJY8nvVCKjRlQfn3D2FBuE8sKFIHripZkwoCk5J69hVZ8O2zPAPJoGK2ySQ\nkdVHNQmQhOuGz+lADuwEeACep9KJGHmNtO5R1oAY7FRk/Kewx1psg/cs+MN3FOmLMyjPB74p\nHBXBycDg0DQ2QnYCBk4A6UsibhyQectilaRmUZPyZ6U0rgYzsHcUAJtG/K/dJpnzKrLs3Dd+\nNOLDYq8e1MkU7xsJBAycU7B0E3nG7djDYxSuArNk8HkYFNGG27hhSf1pG2sGXccqetGohzfN\nwMKMZL96ayErhjtz3FIrt5RXIIPGcVLsEeOPLwuAp5/GgZHn94qgZ4wc1FIqr1XBzxjpUzR8\nBmbOOS1R45JzlGGetACsx8zDDgr19Ki37VAHJ6U/hY9qkt9aR8IMLySOaBMiZShwW4PpS7mi\nBB4X9aVVHVj7CkEazZ+fgfeJqhCKTt4I2+tEL5VivyopzzR5g2Y8vcnQULgcNHjj7opFDtwY\n9dwPIzSbZJ42TGTnJpm3Y21jtB5FOUk5cEtt4xSEMZjIdgHH0prNtTAG4k4O6pHZGyy5Xd2o\nUxo2GznGaY9yPysyEZZdopI12Zbr2Oaev3STkbupzSMVZFBB68N2oYhm0yq7A7cdM0jb9y/N\nuXHOKezhm5XgHgU1sr2wD0NAhG2hQccZ5p2GeMlQAuetMbbu+Xk+vvUglO3GBmmkAzyxu4+d\nQM0ZZlLYHApAu1eOrcn6U9GByMY4wMUAMiRpkZXO0ZzmkjwzMA3PalZid1NWMDBA9y1BQn7v\ncB1bHek3HzMOuWAzlaewUv8APzu420xVELlcEEDrSJYrSKzLjLbuppeFGFkywP3aRcbl3c57\nUhZX3bF6HBFMYpXcSu3DH+Ko8F2G7gLwRT8S/dU5Xrmlb0b0yT3pAMAwfLIyc559KWRvNdHA\n4A5xTlYsRuXII601l34VPlC9fpTEIzGTbg4UHNKWDSZD/MaBIsjMMDbjjikVV2gMwznpSAY+\nXVW349Qaf94fKMnH3qRJMceXgDj606RfL+fPB6CmMav3g4HzDqKNxQjJ567RTlZjhcjB60TZ\nUKvXIzSEJu5XGDt6iowvlA/Lh2OaezhsL936U0sVOA+T6UXAZId8gXbyRTw20AKOelLzG3UH\njJxRl9wbgAjNIBDMGlYgZUcE+9NZlZlAB9yKeCvAI256rSqPlYAYBqhjGYI2Qpc9lFNk+Zsu\ncbuMUrbtuVXBFAUBQ2MyCkIesf7zaTjaOlMXEm4HlvWjG58jr1xR8/zMvHNADVDNMVXCjHU0\nq580AtyOvpQqqVZm4YH86Yvyrv5Yt/DQMlZnbJQc9cj0pjS7sk8cZp8cYYbg+B6DiolUKrqW\nwzD0pgSYO3GcknpTnmwoj8pTzn60xty7SBkYp33VbC5YDrQIY0hLlsbSfWlWVggPZTign93u\nK84zQF+7tB2Nyc0ANUHcMHpyaRgVJcfNIxzj2qS4VY7gbTlSME/0qPJHG0g9AfagBWJjmO1t\n0nUgUrO0mR7UR5jBAHz9N1JwqBf4+/rTGO8ssm3OD0qI+WrBCSAO/rUqsWk4yCT0NDOdxCru\nOcEGkA3yjPnLBfQUz7vGcgdac+VUKx2knrTShiZuM9jQA05eQHBweOtOACSYbnbSqPMcDBA2\n8fWk48xz97HylaBDNq+Zub7x5yKf5hO5tueKNwkZSF2DpzTVAXJJ4bjj1oAVv3aqc43DoOtO\n4VVJGaACuC2D2oVsk7SoYdc0AN+SQkYyfeljUKxbsBwKWRo+Cy844K+tJ9/ZtOB0oGNG4PyD\nnGSTRgJHgj5etOZtxGMs3WmDClywzkUxDtvy46ccU3crSAMM8Y3UbvusOex9KVYgcAHvwaAC\nRipVQQW7/SkBAY4J5NIFz5m44bpmnKAI0JH3egoGIx/djccE9DihWwobG7jrT1ZpyMAbB1zU\nUe5pJEB+UCkAc8MoG49KezMEO04bvkU2RhGFI/hp3mE5JXBamIajNnJ6Y4oZcRqAclTTl2IC\nSc4oj2upycCn1GgcrtZs8kc1BhSq5JHHapCv7kELk5596XAb5cYOOlIQqr36qwximuobGxsA\ncAU1gFUZbbg4NPEexiw5GOTmkAqt8wZvoKapfdnGRnpQu5m+7kUcrIDjC45qgAKrSEM3I520\nZQsdo4NRn/W5DZ9aVclVbB20C1HRqkW4bPm96aqlUYe+KfI5Ylx2GMUn3o2Ochjk0mMQDox5\n7Yp7M8OcnII4ApNpjUBT15xQq9X3bgePfNIXUb8gTCcv3PamRg7yCC3c5qXDMPlG7BpkbD5u\nTyeaBixtviJK4XP3fSk3byBjAHQUNIY9qjknrQ3zNvU4K9vWqGI2yBSpPJPajYWVeMnOadw0\nhYrxjp70u5vOXHpyPakA1W/0nzG+6BzSbXXLn58mlZdzeu7oKRf7pJ8vOCaCRZJDhWIwR/DS\nMQzMU9OlKG2krt3DorGk8zbyOOxGKQgAMkYJGQOtOV8AjHbtSZ2yfLwD0X1pehMnTjGPegoF\n2MUOScdjScCZtowx6e1IzKwVeQT1p/y+YT1CjAoJGNmPJAyvc+9D5YAEZOM8UvzNHhThgd3N\nLlm3PnPFBQqqXAY8CmtJubbJnHUEDrQzCOMOATjtRJ9xCp+dutMBdwXDIMN6UMozuC+w54p7\nQtJIv8JAqLZgEE8Z+6aAE8sSuy9PVqT7rFj0HG6nDlSmcv8A0pY8qABznIAoGNVS0hfdnAxt\nphztySC2e1SlWVcbdn0oRSV2AAMTS3JY35kYfJuajaWkUFsIT8y0vktyCSJAeM96bxuwAST+\nlAgaP5pD1RT8op2wJHgHkmmMVITDcN1p2BGzAEuM53UFCBh5TL/H2pTjbuZsAHH1pN37tmxj\ndShWOWAypqtSQ+6CTwP4aGbkH7+5cj2oCll+YFvT2oVQFJ53dlpFDdwjUdTnrikwn8JzuH3D\nSkSbg3T1FST7VZio3D1oAhXLMARtwO/akZn+ZM7ieTUiqXUOxwOlNjYgsOhx971o1AbuVVHP\n404gKp/iY96VlVowVGB3pB93OM46e9GpIuSy/N1H602PGQSc45IpGb96HVfqPenLH8xY4X6U\nyrjUaPcwK43cinMGG1Oo7CkbLYCqCadyzjPG0UCuxOSxA5PemtsWIgDkn79KPlh3A4fdnPtT\nsDIAGe+aB3BvlkDIu7jljTFZmyMbec7aVVaQMR07UKh285DjvQSxcfMR0GMmhCWBZQGUik8w\nq20/M2OfpQMRqNowD6mmgGp8u7ccAgj6Uu7akavuMi808BZGweMDrTVVWJVmIXtikA5pkEn9\n8NzuHSkkZPTAOORSRseQRkYxT1V1VAoBb+lAXYqyHeV+7znmj+IBl3behFDZdDxgU35lwWYI\ntAAZBnI+VVHTvS7gwBPOe3pQyqjEn5jjNTLlvm2jOOlMCFmaPauNxPf0oTJVmzgnjFTNyoI4\nbHNV4zu3bjgY6e9JgKVYqp24waJEVmbnnrQ21I1Zid1JJnzF3NgfrQAKx2gsMHuKc0x2hdmV\n9abuIVj1ycUfNu2L970NIBGVRnnI9KFx5fJOM0rLtJY8UrKCCw5C8hhVANHysWBwO9GFVcj5\ne9LtEkigYUnk0jMu0MF+cnAFBSFX0HcdaYcLhcZFOZvlDngjggU3lcuuQDQSw3suRkAnoTSL\nnKljk54oKKylnfJxxSxsuCcbih/ClYB6rl8N1z0PekUkuT5ePQelIFLxjn5mbP0p0jeWvDHO\neaAEMmY8Ljr82KVWAk27Mkjil3KwAACM3JpM/e9M0gBVZUO/p/dFNOxY/k+XnPrT2bG1lOB3\n702HEpYqNr+hpgOBJwWGR2pikmNtpxtOT6CnsA2MnJFIHwh8skHOeaAExuxIpycYJo4ZehBF\nO3PJnAHPccUKo8tmEm4jsKAGFVPz4pzZZQd2GpFy0Y+TFPaRnQ5HyjrUlEe5mwAFL0pG1cZw\nc0BVWRX5anY3byRz/KqJ6iMN4wOw5NJ96NQrMBml5aIbR908+9NMgUbynyj+EetAMdtSSQlg\ncDjPQ0iyeYDuQoFONx60u3KZzweR7UvzN8pPDcZpACKyse56/hT4z+4YkZOeMUwZVPlJAxgj\n1pywouG3lVx93rigBsK/LjP19KVzkBc5PoelC4Eb8frTNolAXblqLlE20Bm3PsyuAKZHnySB\n/D/P2o2rtyS3UDkdKfGgM23IHpR1JHLGE2FgCx7f1pWVd25jh+gFSNhZfmGeMe1QuwZV+XBz\ngUwFWYNvEjYHYgUkbC4YqrbcLwfWhY9rEE8n8qGZdwQDLdx0poVyPaTjcdhHpT/lfndlhyaT\nmQkr90c07cNu4JxS3GhrLEzZBZAec9qQMmQE+Y98U5W3LlBge46U1RGrHYMMT096QDsq7kON\noFJwVGDtBPFK2FGPvMTg+xpxYycmPATg1QDSm6TOcA/eHrTYxHudEO5P4falbKyBk/7574py\noGB2LtBPIPakwEbcF2qu445HaiNW3YHBH8NJtEUZTc249/SlYtyGbDZxx3pkiqJFyOjfypEc\nbN5XCn5So708sYV3KcHpn1pNpRfMI+ZuNtAhHCbQp/DJp8atHuUrzjOe1IrBVHmJuXPX3pFU\ntkYITPXNAAcFggOzcO/apFK8dyvB96Yq7VaNxvbPDe1Ii+Wx/iNAyVPlO4j5jSqXkz0BB4qP\nd8xYAh19e1SDYyrvb5mOS3pTESFSsIbO4ryfUVNGVVQqHLMM81AwWFuTuDHB+lSiOFGDxAlT\nxk9vagZNHKFyvQLyWqVcSAFD/Fy2KjAw/wAq89x609U3MeTG+O1QOxcxvlJwTx+Iq1ZxFfkP\nHG76VVt1D4xwcdTWnHGysrtjYR2p9BEysDgAfPjqetWYI22k4w1QRom75jkE8VaWTb06Kfum\nhEjlVsgnAyMYNWl+ReuQBgsKhCqZA+3LGtGOPapKgNu7e9QMoNC23c/7xe2Kzr6LEMh3BWAy\nCa2DuwBjbzgmsrVlCo46n0qgOK1aPzFaXfvOPmIFYckY3Dnk/KV7Zre1K0EivGDtC8nnr7Vh\nbSwDY+UdAetAxwXyuCcEGlUszBl45596YynzFUJlu9WYseYWYFFUdKZdgjX5txHfpnrUsamF\nmXv2NNjXy5MuOGGRjtUgKlieQcfdpDGC2LoSrDPUgU5YymAvJxk06KA4BB69akVgvQEnpmkT\nsM3Lsbnk8DA4p9up25APuM/rS+WEkUqOD1WklIVtrfMfQcUFoEkQqSR905qOZjIq8BAxyCKm\nUMFOQN7U6G3efcueUGcGmBHJJ0Abc2e1dN4P8GT+JtQhieNwsh6JyfrWn4D+F154ivlMSZjP\nzGU8BRXv9pZab4B0tILWNGvAnMpGTQRKRW8N+DdL+Htr50pEt03RW5wfWszXNVk124di7GNT\nwnbPc4qC5vpb5zNLIXdjkk1Ey7VJU7N3U1SMZMi+YqdoAI6cdaFR/mcqFOMGnyII8DnjmmyM\npIZST6irMhvlrHhQOOtK67sqp5649acr44NSLCZPmGAVGfegGVyrSYLtz0A9Keyg/KDgDrTm\nB3BhjJ55pFYOrFhhs9OxoJsMUkAgAuO5p0jhgqYwPSlXO3A4pyqrbSV74zSGR4EY2HJPYntS\nlfLAYHrxTyhdmHWk48npnB/KqQDOmVJxTgoxktgnrS8hdx5PpTgS2crzjvTAYhWLKMevSmqm\n5fvYOakwGYBuvUCm+WrZ2Ahs0uohy4Rm3nKgdKYioqZjGFPPWnLAGU5PPpSH7uCvAHSmIQoW\n6gkE9aRsMp+XaQcY707cQu7OAo6ClZjLhivJHNSALt4yeDxmhQVUojj3NIqsoO5flp3yxkDd\nuXrkVRQ/y92AFyQOuagA3MdgAPTNOlVvMBD/ACdcU5UCqcAKvXNAmRwqzsV6gU4Ybgnn9aI8\nmRgw2oalVUjXI+8OMGgEM4bChcH36U0xqJDu5zxmnyOvy8YzxTZAQ2FDMM9cUAN2qWU4+6cU\n4qSpGOp6UvKsy+2afvU7ux7e9MCLbsPzHL9vakXaqsXPz5pwYSAMDnHWljIZjvwuelMQsa+Z\n0/Omqsi5IOf5U7AC8mkj2LJsT5s9Rmn0GIjlm3dDnOKdyrMT/FTcKoCj5nNOdTJGAOoPPrQD\nFVRtwetJ83H8frSqpDDIxSxA7m2nmgVh5ZV2qBlRSRqFjZmXBzRGvmKcjIUdKcykHnIUdqCr\nCBDuwhCjvSqWdiBgnHX2pu0hs7c04Iqy55HHJqiSN08nDg49AKQ7lO7Abd6GhVSXnIbnpmnb\nfIRt44PSpExFYsSu5V9qjaE7gyvgrSsPVcnHWmlVRQE79femMGClOpxnrinMm1lHKjvR5hVc\nAEDOCR2pNxXIzlPWjUhjS29toORn0pykBiOTRHt+/np2p2F5AGGPNUAK4X5ugpfL+XcPmLdP\npSMpZcquQKmGeCnBx0oGJLCHVVXjHr1pWzuBU/KByKdGxbaWHPNN5KsQVOT0piEWNfvFsH0F\nKMPMUHKgZ/Gkhwsnzj6U5V8pWYfxHmqQrDyo2hxhiBzmmqGkw23jHPNAX58Y+TtS+WBH947s\n9PagYnATcMg0vFwpOD1xj0p/yhcKc0sZChgvBJyaRLF2/LtIwexNAjbcd3JpZirBQTznpT41\nUHc1AyNeF5+U9BU0eA6ox/GoxtBLgbgTgU5Ad+dueKBi7doYg5HWnKwjhOB83Wo/mZW4xjtT\nlkLquE+bGCD0oAawaDazEEnoKnWZl2kgDB7UiljgMvIpIT5krDIyBnmolqXF2Z09jqBl2fL2\nwK24/kkDjk4rldFufMV0dcbT96un06QRg71yM/eryai5ZHqU3eJcmZJLaRSvDDpXEa5paTyL\nIy9OK7WaToQvHrWFqckckMiNjBz19awNDx3Vo3j1SRW+THCjtWLf2/mR7VAV+59a7PWtJfzD\njBYnOa5u+tU2MMNlu+eldMSDibyxi+0ZWMhgMg+tY2oQv5bDyhIWOOn3a6i+hZi4AKhTndWL\nMr84IJPStEZnMTxyQW6gDb6rUWHSNQoUK4zWneW7tJlzkjjaKzmjaF/LI3cdKsRIvORGds2M\nDuKuRttjVZF3nHP1qjCxWRxvB+X7vpVmFniXIxLz1HWqAfuwpDDeD2PahsSR8nZFjsOajBLM\nTk5J4GPzp0kZV024JzkgGpFqMuI9sMOWz3JNRyN5bZEfyf8APTrVlY/MLrjd6Me1Qzwsqrvl\nGM5wB+lMCttkkYu55Zcmo2ZVdjuKjoKseWkbZdiowcD61XkXbhXHzZzRYLAqrMN28nHb096d\n5YjwScf1pFXllAKA8k0TAzxqjPg/w+poEFuBcTYDhFHRvf0p8u+1JLjBY4qNIzEB8m0g80s5\nYuokBZD2YdKoQ3a8vyouVLZz6U92deZGHHGO9NUFVk2fu0UZxSBM87C+4cGqJexpwyFpk8ts\nAL0rsNA1OS3uNu5ij/Mzeh9q4K1kWGRGO7gkE9vpW/puqo0KtsMbA8Ln+taLuZSV0e/+E9WW\na0AZs45Bzzmu7sZvMUKSTXjPhDWBCscbDHc16nos6yfMHyc8CuxPmRxvQ3tyjcAoye9N25RT\nnDUnmDaAR82alLCTocN3oKDYAuFwD1NI0ZVtwOWxwPWm/MvLdM9aex2sCOR6imMWNQV3/wAZ\n4K46UihZPlTnaeaa0hdumVz2qRGC5CHj2oGIq/vgqjvk052K7u/PShMRsQeT6ilQjyydvfqa\nQDlfYCMcY+7SxEhcHr1NIijHmFgSTipDHtLc5OO1IpAHRcZGOaf5nVcYB6UxMlVBIz6VI2ZF\nJxgL1pFWFjysRyOewqRVAjUPweuKYqnePMPDCpoyVOGG5R0oGPj3MxPamovmMd/zKDT0V2j4\nXBz609iVwqjBx0IoKHvhsFDz0xSqEaZkzkquaGXau8Dj170JtUqVUu/UipBkXnf7Boq3uH/P\nH9aKRJ+kDNgAnmm7ht6UbhjmlyR0GRXzh7Io4UcZpV4GD0pG/wBXSsRtBx1phqO4XjtTGYLy\nFzSsegpGQFckUhAWPGBxSqx2470wv8u3rTVX+InmgB+CeOlOGdvNHDMAOtKRladx6ibe9Lxn\nijAVKZGvfPNIEKWAz607LYHNNbDdsGnKN3NADsbgCRRu/h7ULnbg9KRsL7mqAcV2YbtTN3YH\n5aUL0BPFG35jgYFMLEauVYgCpcDbkHmmhckgDFORdvJ6UhAu5xjvTj8uBilUd+lJn15pALn5\nhjpSMdzcHOKB6dqTpyBQAu3dRznFICTz0NGecetIA3dQP1o6KM0rAcZo3Bs0AK3ToabubBHa\nlUluCaRVPJamA7d0PtSdOKQ/KTgZpy/N2xTAOdopFXHJHND/AJUbif60wHHFNbG3Hel+7jik\nJLDpQAig4zjNG07sjpT1zuKg00cZBoJF7Yp6L8ppi4HBpwyvHWgY5WAYDHPrXl3x0sY7jQZJ\nHyWjXcvp3/WvT8dTnmuS+Len/wBoeGGwCQq4IAzmkI+EPEkf2eSR1HfB+tcVebs7f1r0PxVa\nGOSZSvO48dR1rhNQQhSpGKYbHIXbbpG5xWbcxrt3g5OcGtfUoi0jEDBUY4rNuPltiCMselUi\nGc7qMJEm4qFOcg1kSfNJ5jHvjmtvU1LW7lSR71hzq0irgZAHWtLmZBKnmMzK2D0qnyG5GCPW\nrsz/ALtNw+ToarZLMQSAAePpVoYxkAjLYwx5GKimLSc8DjmpNwZiBzjnmoJCDGxZSq+tMRVl\nIwVb7v8AdFQTN5aJxgVOyeVIQOrDIqDnbl+oOKuwBuEkbsUOP73eo1YMqBWOOoJqZV/dkrhv\nX61E0ixzKCPmPG2pEQqSudykA8e1RNCIXY/6xduNvfNTyM5kZQMgVDIxhxgbiw5qhkG5FKtt\nKnGMds0jfK6qmCzU6X/V4ByhHK+9QFV2rjgj0oEL88akEYPb2prZT7/PqKmbJmQq3GMYNIGH\nz/Lnnj2oKI3B3A4wvpTN2dxPPPI9qk3MqF85AOCO9IzHd027/SgREqhm+UZA5BNIC3zENj+t\nSKrKxAbIGePWkbAKhfvZzj2p9hDI2OMlM8dKYrl3IwBkfhUjAsrHJ+ZsEe1MT77RqMRjpuqh\njFYsyqvynPPHSpMKuGEnDcgnnihVXzPl5bHrSbljyMZJGMCp8xCt5ZUgjJ9KimjMaZXAU8Yp\nUwI93A2n7tHVSXPekBEoKscKc4pQuEx/Ex6UoYlXG47RSKdqg59xVCGRzBcbgDj+dOLbkLAb\nOemKZu2sF28bs0sjbicg/hSARQfMPQLjNIshRuQwB/Sl+9G3OTjgUnz4Qfe9RRcYrL5kypjt\nndTP9UpEb7nznHrS7tzHLbe4pq5fLAgCgQqk7/3i4yM/Sk+Vcjk59e1G7fjdyRStnqw68AUy\nxHbaoIGT61GxIbaTx96pAvG3rjmm+WWxxz1pkgZAz+g6inGQHCt8wzzTFOSx/SlbMiqy4X1F\nADSoIwvygHvS8qxJVRx2PWkmYblGMgjmkVQykbsv/SgBRlWBY4BOTSMCrMVIOefwpJPmwpHy\njnOajBDYUdz29KAF/wCWu09CMilYA4jGQOtPkYed0LKBgU5VPPQHHWgBm4yDptYHhv6UiMWU\nnGHz1NIZtyj++ORmn7WZix/iHagRG3UMT8+evpS8rn+NSefelVSp34ACjnmm425CgjdzQMQs\nwJzz7il3eYvyja2MZNIBJGuRyQelGWkZh0FACbWfG85b/ZpZAVTAHPenfebCkBgMtmmqzKSS\nMjP50AJ8xVRgDBprdS+0AZ70u7z2JI2jPApWUOvzcAelADWwq4PzHrx2pdu5ht5XHIoLEAAD\n5jSSfKxw/BGNtAhYVDtkrg0c4KDgKOGPWj5V2hcqQKVdzSFiQvbrSGIynCYGQf4qj+9Jx2PW\npC0jcjGRxx/Oo/L2yEHrjOfWmDJURdrFSC3eon24dMluacI8fMTjcOgobLIxK7vT2pdRDSvy\nDkFuwB5pWYyAgcYGRTpANxKrhlGB70inzGz93jFMBysctgg4/hppJ80EDn+6KbuCL6Fm60uz\nBLB/mHSgYozllMZz69qbIxKgAbQODil3sTtZsEij5lxgfUUAIjbtxxyOAPWkyFU57nge9ODb\ngcN+FMxw3ygnFMQqsY3DPye9OkyzDkcmowNynLHOO9G0cH2pAKys0nyklehpyKpTJ4waZuHl\n7lbg8fjSecI4yG5A67aAF2M24kkr2wacwO1CDtP1pqtsXABy3NLGqs5OCGUdDQMSbG0sW3Dq\ncChlY/LuycZpDJIq4Xgdx60smWxt4wvLVSEJudk3cA0jIJQGL4HtQm0qoDbsdvWnKqyZP3cH\nIpMA8w7xGvIXnNK+WYFCAh65pgZFRiP9Yx5NMZfMjj2tscNnmkBIMMg3NkdhQrANyCT6miXd\nu4AcryTSrg5H30bnFACbR5eC23tTShAIUYB53UoULIFJx6U4LtViDmgYihneMFcD1pqoNvUA\nMcfSnx+Z5Jzzxn6GmbQqKe6jOf60CuJsZPnByAMUnlny933m71MuJFYDr70wZ+7gZxQMYrho\nwpPTovpSeXlgu7aOopNp5JGTT2Z94bbhV45oAdNIFXKLggYNR+aPvkA/LRGWGSMHJ79/alYe\ndkhQvbb6UFCAnyyo+oBpjYzGSx4PIqVj+9zjjGB9ajysfz7S7LwfSmSKoHmSBl3K3NNZh8qK\nDz3qTceNy43c4preaqNkjn8xQIcuPJJzt5wc01WVm54A6+9KNjQqGGyT1PQ0jcycLk+tIYvA\nIDdG+7SNlWO7qOMUMpZV+bHzflTpFdWBLCQt1NMQ3jy1YAtzgjtTt3mTFFGMDkDtSLyrDsKd\nDJtznkZ+7jrSGCRhs7T0GTnoKiUALJ6sOtT42sVC43Hke1R3GDIYzlFxwaYhmf3SIRupkirJ\nGvJUZwak3KqjuV/lTdzLuLfMooEBby40+bmnbsfd5PrTVcOoVlKnHFLz5ZHSkMVMfxLls9aP\n4wm4qPSmqzRhSRk077smQcg9c09QGq3zcLkYo8wbdgTGaVXMWU70Rrxlm47UAJ5gR0faxZTg\nU3zMsxI2tnIFOXJbJ5BprMefkzz1pCsODOsi/wALHq1IAzbwxCjNJIxYLnr2pzt+8VwO3zUD\nEjjLRNvG1l5piDAUFeG60LIA2Dn5jj6e9SsQCm1sgDBNPqIa0blTEp5xkE0Zbau35z0LdKdu\nKt0yT0pV3STbm4wKYMiYp8+fmIP3RT5pFjVXf5jwdo7Cm52qx243d6RogdqDlv6UgH9yR/GO\nAabIWK9eehpPJaNkbdkdAaRvlwWXcM9jRYAyNpLduM1JLGOF6oKJELLsJHrSNlztzhcdTRYY\n1VZpMYyFpT97gcZo3g8fdJGAaRdu0qOijk07dxWFaQqT8uQeDQudyqOg6/SkH8OAScZHpSkM\nzEZH4UJDGMW+YfcVjxu9KcuVyVbeQOhoZifl6/WkMfzAilcpCrucbufz6U6T5tu457EikVTE\nOSRuOKF/eNsUbFHr1oEEa/McNhu1IrfuyrEg55ohDbn+bjp70rL8+CecYAoELGXVSB07ZojU\nNIV38YycUki7AowS+R0pefLkCYUt7UgEZizBy2SowKav7yMtjYwNKsiwybV+c9KFbcWwcjvT\nFYQMOdq5ApvzCMIOpOakG5YyqjcPWkdSy4HX+VHUYzduZcHFLg+YQDuB6EUrKI1YAMf501V6\nqnAxnFPqA4ZCgk/d4xS7tr/NzjtTVbMe0HJx92hlbauOSBnJpMB27cCB65+amMg2ja+QBjFK\nv3ATyW6Cm7QqttxnPTPSgQ/7uAtJsDhmxtcdc03G0lgPmHr0pyrkuXOc9KYkG7ESqThs9qJl\nYRFc9+4pfk8xQOPQGm7iWY/fxxn0NMBpO3H90cZ9TRuONp4PXNLtDqFI+djTuWUhuNvHAoC4\nxchGYHI6U5AFTu2OePSmxuI9ythlIp2GjVgBnB/SgGM+7JkcA8hTSrsbcWXb70m4q2WHHala\nPOSFxSAXIQIu/IPQikVfvEnJXp9aXKhQNu3+dG0K20gqetMQpXd82R/Wo/lXDsp69qcigKT0\n9u9CsWADdByKBoRj8g3HnPOKc4G4YT60n3E3E7tx6Ukiru+8fmHFBQ/aW27V6jn2psbKvzck\nZxweaIeFC87l4BpYU/fEkcd6BWFVjwDyvWhQsjfO2c9qVfmXYnY8D+tKpDDIHOOfrQLqNZkX\nlmJGdtSR4+ZVHIHrTPKZUIODg00eau4oNx9aTAkZvLX7xDY60xAuzCnvnmlkkLRAsu32FAwz\nDjaAOaYClmfI6L3JprYbGcHHSlDFo2bG05+6aOhBUduaAGhvMUg9PSjbyMNg0Njyzg8jnikX\nC4IO4nnFIBNxU5JB7Cl5VCmffNBC7cY+bOc0qvsDErkGgBzKMIcjP+zTAp3bj06Chvlxnjjr\nS8gDA3e9ACFwykbeh/WleRo1xtVu+M0SbioOFP0pNp+6F6+vNMBJAGZ93yjHC05kC42t8rDJ\n9jSI2dzAZOcEUNh++BQFhuGVRgfU1ICERtvzD1prHK7lbgDHFJ8pVQM80irIXcPLUBOM5obB\nkJxx/dpQyEkbSGUfhTVYLwTgHnmgmw5V3LuB2nPehe5VdvPSmSMQgDAheop/mOIwNuQevrQA\nhXZIF3YzTPMG8jPHTpR5g7jB9KWTLNGSNqkZoAcW3KEA6dRSBSuJNu3nr2ojYBiXyf605d5w\nOME9O1GoA+5shm465FCjADBiAeKFDbnQDjPekkQspG7aKLANhkIQp9/5uvqKk2lJTtO3vj2o\nVXMflkAYA4FIq53OcLj170xiLlN24sWboewpdzCPBYYo3ER46k03nzFGNqnvQK4u0iIlRzTf\nMJ28bVFPykYYZ6nFJgxxgdec0ihVztB64NSbo9pIYq5OMUm4yL8qk45IpdwBB259aVgGcKCG\nGFHcUpxxg8EcYodmwcgY+lK23avOAB96mSxG+dRjhe/NOG6Q4b5dvfvSDCsDj5+3pR+8kJ5y\nx6mgBwEckeCSTnvS7Q/AOSDSFWRcMcqO4pJE6bGx6mgAZSrEbstnj0pzMrHLr9TikKr5mSDw\nMZFAJ6ZyPQ0AJHn5mxyD931FG796CvzD720dqf8Aw5bg5xxSRnyYyoUneOaEAYbcTuADc7RT\nUYfM7DHGPpQihYSqkqaGymGPOeCKPUBGYKo3A7f7w/nSspVVAJbnk+tDRlvmUHb0ye1OO/y1\nJkXK9BjmkANy2U4df0pfMC8BhluMkd6TJZi2AOOTSQp8jFm8sjmmAscZVXEh+bpR5LtJgZYY\n/KhMY2Ht/OmiR/LXLYJOflpgPjDbcMmFX19aCSxUtlec04kyKC2QM0xjJJ/D+NBLHtuZmC8c\n556UbgrncNwbg4pdu6HJXJX3p20FVXGB1z6UANdSf4CxzgEHoKeyKq4xwOOvehW+XJ45wKFV\nNwIBKg0D6jGVo1yW3DHPoKf8shjbG4LwKRjiNgMbc8D1qdWDKsQwq9T9aBWDYWUlvmOeD0NS\nxsAPKKnCjPtTNr7gHXL5457VNIZIwSuDG3BoGPjzJCWHP86sLvaPYevRsVWhXZx6cgCrMJdc\nru28Zye9ITJ0CpHjOT7VetcsAXOIwMVXtkLEDGOMmtJUXy88BaXQpLUnt4U8wMQHUCrYUTTA\nlAV9aqxwrsVlHz9jV1FZVUfdyeTSM2SrGTlsAAVNGx8oYVlccgGpBGGxnI7AVZjZp/kxnaOl\nLqUim8O5c52nqQaxdWIlY7Rsb9K6eaEMmSvGOnpXN6mqBW3Z5qhnCXrFpZmI2/N0z1rJVWm3\nD7y8kj0rY1NFa4yBxn71Z7r5TNtOc9x3ouBVhU+YPLByDk5q1HIrMxdcZPeoUjMki7SUK8/W\np2jMmCcEA5680ihfmZsjPp+HpVhYz8qI4XH94dKJFZGXYu4dc0oaQqzEgqeMYqkMGRYWIJ3D\nGSQeKVtuwOqcjnHtTpEVl2k/KcfLTZCOADx0+lDEMVsAykFQ3bvTW2ecS5ypFLueSOQEeWBy\nC3JOKfa2E15NGF+VSRgmpGPsYWumKI29Q2QvtXpfw/8AhdJrUgknV0tM72YnHHpXVfDH4NC7\nU6pqkTW9h9751+aT2A7D3rv9W1GGykFpp0YgiiXGB/KgTlYb9ss/CunrZaZEEMY5YHNc5dXT\n38paQkk/NSyZdjK2WzwfqaXaqrkZzjigx3IY2WRCcbT0xTyysgQgkdqVW+UHbhu4p54xx+lU\niGQMrqBnknrQyDzuMDPGKsyMCpOM45BqOSNeQfvdc1RLIUUK5LcFehp+xmUnOG7n1FOkVYnV\nSN2+pZEOCDhTihB0K/VR9eDTWCsSp+91qdE+VsngcCokwMkDcQeaoRHtaTBZSAKkjG7dt6EU\n+Zd0JVVLZwS2elJHH0UDauOtAETI67fmwcc0m5Wwu0nPerEiLuBHI/iNJtj2koeM96YDDDlt\nvQgcUpXaqhTlv51MuSwBbOfzFJtjXdlenSgBkuIzuZeQOcdqZIy7UK5/CpZvnTPY1HIxVlHQ\n4pEsPLeObhtyNTXVs7R261Iqn696eEO7K9T69KYEGzK+UvUnOaewVPcr6U6PnLY4XqKI/wB3\nvBXJPIJosA1tvIYE5XjFQxjEapsKknvUrR7hlsZHrRhnxwCOwWmUM8pkY7zg54FPcfJknK9M\nUu07iHGT60iw7OC2VBoAawZQBncvrStGc+px1qSNSi4XDITmmyKycd854oAEP7sFh0oDfKOe\nCeKOfLPPB60R7Gj44wf4qBESTHc3GdvrSo25kO08DBHrUqquT8uaVVG1ifl+lAXG+Wvz5AQd\nqj8lSN27cf7tTsqbRkbjSbScAnCmgQyQtuztwMd6i27pF4wOpNSnPnEE5UcAUFfOwu3YM80w\nGsFVd5GGXq3tTs+ZiRDwehpQgkjwx3BTT5N0gHAA7ECi4CBD8rE57mhcF9wGMilCH5uO3WnG\nMqsZzlAOtMZHuMa4A5al8xmc9x0NCnbknOM8ZpVGVxjAznd6UyRFUqwAz1zkfypcNuJPyntS\nrsjdsHO7tS8Z55JpoBr7GZgRtOM5XimMxAAYBlqSRd0mBxgVEWVo8MufTFAiONTsYnJpY14x\nt5AzzUm0KoO//gNNClSQPmJGc0wG7isTZGGPSgt8qkDjFOj9TwfQ00mMtgnbnpQMUxq23Hy0\n5lKsNwyfUUiR7WAJJXr9ae6qsglJPoVFBEhrCSPcAcccAULJhkQ/ex96pPlfdg/N60gVWXOP\nqaBkkWdxy2PYU2LYCxVabFhWBJ6ml+Ys20ECrEGH87JAYjotObbIxZl2tjhfSkaPdGpRvmJw\nfWl8vBCs5LA5wPSgY9UMkecfMozkmnx8YZhy38PcU18bcAEd80/zFCktw3TNAgKlVcDBI54p\nq5k2Mo3cc+1SKoZWULkgZz60DK4ycHHOKBEbNnJK4HTdT1xuABJXpupYyjbiuT7etC7F5Ay3\npTsIcNvnBRknH4UYk5A6d6VMMMyAq3+zTlAbdjkgUAMX/WBeq+lOjyckjDA4p0m2Pbgde/oa\nkVRIoAXJzzQWRljuIxihFMa7mjwTxu9alfbDGwOT/sikZflQlvl9KhlJF3T7ho8cdWwPpXQS\nXH2fEgUuT+lcsxePY6NznpWs0jmJTmvLqx1uz0KbVrHQ2ty81u2R1FZF7Gvy5BKdzUVlqTwt\nh+QTgGrmpYuLfKsFI5IrGxqc3rdoAjMF2sBlTjrXBataO8hIwgXr716bcOlzBt6k1wviiyAm\nDE7VPLCrRJxV9H9ojwMDB5x1rm7xWS4dNoCYIDV1t9Z/L+7ZQzc7s/pXOX8T7v3gw2eU/wDr\n1qhHMXSl4U3Z46tWfcR+YCEJz198V0l5a7YtseGXu2OntWHKpk3blGV4zWnoIzPl87KrujK8\nmpInbzi0eSWHQU5oyqnHQ/xHtUSxlZkA3KepbsRVCLeV84HpnjAPepUVZJAdwXnA3dM1VhYh\nZFYYXqCeKnWZfLXJGfQUgJWQrMweRWZepXvUGGbdg9+pHapPmGN5AbPanbHVT8+GzkUgKtww\nRVwGXHRsdaiaMPCSeD65yRV+QMNrSqGHfHTP+FQOxYM64jHTdinqIqNDJlRuBAHOajdX3HH1\nqTb5kZZjlTwNvFR+WPMiRzkk4DUxMkeaWNQGwzYyKbJ5h8rcxO487hjmgXCLuYDc3QNjOKZH\nHvkXEhfPPPTNMBZIwzOGPOOR0zQq+VEVfcqY3delIrM2RndIpxyKlYj7QVJDEDDKf50EsS2H\nmKGUb++41PaN5cwXdhmOCMfd96gkPlyBOifw4NWLd5FkUZBY9TjoKtMl7HoXhfVUVtobzSTt\n688V674dvi0iR7WXjPzV4Ro1xFHNGyOu1T8y9K9X8G3nnyIxlwOig+ldkGck0es24E0ayqMo\nR19KsqjMmRxVLSZCLckHMbcgVcjyzbycL/dqyEOSRX9yOKcu3n+FgPzpp2tGQRjHINSFgzfN\ngDGBQMhVBu8zOGqX7q8AZPJIqORc4B+77U1nbjJAX0AoGWGkVm3DqBgim+cnl/OucmlVRjco\nOO9DYYA9fagByjCgBcDOQKWNh8/BB9aSFjHIWx8uOKUKVDb+A3NIskSEFRz81OWQljH0xyRT\nVDyEMPlAp7IzspKjOeTQULt84KQcDOPwp5Y4APX2pqg7iQMYPFS7QGXjJ9qQyzEw4XGQadt3\nyYHG3+VQrx97k9qlRv3eW+X3oKHeZtzG2SGORSxx/KXOV5x71GTuIxnHrU3nH04qWINooqT7\nR7UUgP0eOGYkClVcrj0o2lc0LndxXzh69wZjs6UZ3KBTmTnrTOVbPagY7huvFHoDyKccBeBR\nwfu9KBMbtC9BxSlR2o3fL60g+b2NAhMkcnil5bpS7SSAeRSPnzMrxQUIyllyOKbtKncaezHG\nMYo3FlIxntTEJgnDdqcrbs9hSqvQdeKFyc7hxQAcscdKFwRxzRuDHP8AKjbtb0poBc+3NKBn\nk9KOTx3pdoXipAZ19jSsTt5HAp+2hs8DrVIQ0twKI2O7GOKdg84xTfvdOtSA4A/N6Um3avWk\n528HvTjhsH86ABieDjijowz+lCsJM+lG7HA5JoGIy85zxSqMcihlHGTQSCDjgUxMNu7NGeME\n80gJC470oHy9KGIQ8dqdwo+Xmm5468d6XI7fnQMRvl5NN69KduDNzTM7WzTYhxYnpSsDtyTi\nmcP0pwX5SKYDmYcYFJyOetBPy45pBlfpS6gIzdyOvpUiyfLycUxgzc9qTcNw4yaBksfzc1m+\nLoFn8OXKuflAzV9W7VHqe06ZMJOY9vI9aRPU+FPiBC0F7MMcKx/nXlGtMzyFVbGeTXtnxUs/\ns+pXgHzBWYfrXjWqwhQw+8TzVAzjr1XEbsDkZ/Gsi9z5ed2OK37xROwHQKelZ95bgsyHGMZp\nohnO3SrLCU6N2PrWRNGyhg3447Gt6+hEOCOvb0rLvITz5a89T71oSY10jSRgDaB9Kp4IiKBS\nW6lvatLy/O+Zjs56VUkBWUnO4dAKtE9SqxVlD/3RxVZv3jMrZC/3avqBtZCOCfyqneK/2gMB\nt561SGQPHvZcHcRUE0DNlnYAA5wKtTZ+8vy84wPWoW+VWD888k0xdSPnb5keFTPI9ajdVaSR\njww5XNGNwzzszjb7etK2yRm25PbJpgQbTktnkDJNVysm0OGzxirLD5yFb5cY3VBIhxktx7UD\nIJYw0ceBlmPHPao+IpN2ckHBXtU/Vcn5h29qYwTgEfK3BPeqEMVgzFycCkLbF6EKen1pxwVV\nEAx2oxuZv4iBQCIAp5zzzz71PGy7hknHYUzduUkrtYd6TzGJBXoB1qSRRhpCQRkHj/CmFRlW\nVuSPypiuNxwMdy1MVgsfyHgHJJ70wFZQFBwSe9KrfMMkEU0fvAFQ49WNKFHOFwF6+9UMFA3P\nj0qPYScjgYzTmAjw+M+3eolUsjHJ6/pSDUQRjaTuyD1FBcLIADlcZxSBhkFWyOlOVioJI5xj\nFFgGqzbgvRfvfWl3fvN2ADTolC/OSd2OBSSbWBeUbQRjimIZy0men+1RG/zuB94/rSQgfImc\nrmnNiORTxg0gGBdjYLYP8RpsbHcSpLY67vSpFjZgynkdQajVSzbjgHpimAyRV4bHDHrTht/h\nJIzinB8Aq/ToKTb8gUAA+tIBGXczHhAODTfl5ycA8YapZMN8pXjHJzUTn5QMZAOc96CheYzk\nfMOmKbIxV9iNlsdaCTnI4BPFGzKggbWPOaoQ2KQLIT2xihVVOmfYmnSMPL+RDkHJpGl3FlPG\nBzxQAn3d5GD2pMFgd3AAzkVIxV2yD8hGeOtR/eYIo2n1NAxrYaPeAQvcVHkBdynCnp61N5jN\nwRhlPPHWmYMvzEd+3agQFiI/mOD1Bojxt+8fehmPIKcDvStu8tWYZOcD0pCE4kUkcrj7x4ps\nkYEOd/zDpzTioxgHe2eaSNlaQh1wCcUwBV3QlZGw1OZRtUg84xScySO3YDGaTdsbJO7tQMTn\nccHK45NPQnbzyKCmM/LkU1/9XuBxtPKigBnyNk9+5pW3YU5AHakRAF3gYVjytOjwdrD5R2zQ\nIjZ9zE9P9mlVd3ABAPNOl+5kDjPalPzLnlM0ARx4YcZ3LQBucfL83rUrqm05z06jioVwinBO\neooAWNxsYt1zTlYYycA/SjuxIBPpSFQVzkigYR4+YNwrdcUm4uyr1VeBQpL7s9APu0MN0ilR\ngbaQACTu4ztODTvvdRtzx9aI8MwB9O1IcGTDEpjpQIRWKk7vXihhuy2cA9RTjIe64btTQxXd\nxnd60wERRsKn8KGUrsJAJp3lfL1yMdqRVH/Au1LqMbu2sJXGRnp3pWYMx6h2OfwpoyoI279v\nzEUjYaQ4b5eob+lMBVj+Zudqimsu0Z4571JH91sjI7VGMMoXuDn/AOtTEDM3lkdfpT925lI+\nYAcio1z87EY7UZ/dmPcEpAG4DI27QeRUm+MqFUbfXNMY7VXadyjgg9aRdoY8ZHvSGP3vtIXD\nEUhb5vmHK0Rqu9hnGRimldshyc54NMQ3ld3p1pZmOFbbs46etA6Y6baEEjZbrTuAiKkZ8wja\nWPApdodmG4K2c+wpW27gzruP6CkKqsfu3WgCPcA21ckk8VKznzB5ifN/dppby5QEbccYzRuO\n35RkqaQx3mnyi6rgntUSSKyj5cc9KlcHywMYJpuE+5/F14oEOZTxtO4ZyPam4VpGfpt5Poac\nmVkO4bTjhaRTtz8mRnOKkYj7pZA68DGdtKy8tlwHIwKRW+UnBB7rQ2AoHQ1QgGVaQE4foD2q\nMblJYndjvT25y33s0gygG8bVPFADpHEZ2biTmmxyfvCTlhnNK2doPByM0D5gOdq+lADVCsWd\ncj0+tJgrgscnqQOtKc4YL0U805NuXLZDKcA4oHcQqA4bGF/WmbmhYg4Oe1OZlLZkG09lFMVf\nM+YDBBwc0wF8sxhc5ZjyDmhl3N83XqefSlOQvz5VgcgA0KoeQ8biwoQhJG3qu4YHpSFhHyDy\naYAWYDOSG59qmaMHLcfKfzoYEasMltu4gZAp53LtCnAxnnpSbVbeQefY9KONoUZI70wEUny2\n7nOQKC7PI7A4XHC+/pSlWVFbru54obZLt25AU5/GpAcbhlX/AFZPv6UyQ+ap8wkHGaazOBnG\nQflAzSsVbcrcMuBQMJM+WrL96ljzJjgqMc8dKdujY7QSABTFJ5cFgegPrVEDmjcsF74yPpQr\nbZst90DtTj+72gZAPGPSkXdGxKtlumKXUryI2XByVwOvNEbK0o38Ac7ak+bcR/F/dqPaq5Oc\nH+7VCQKwCuz8n19qT/Vrn5mBwQMU7G4AOcZ6ULl8oWOR0AqWMXAXB+7nsaPMVlK4+bP4U1VT\nzBvJwO9Kv7uQg9/btSAYzlcKoy2eKRkK/KOX7ipMtxtGOaXG6Q5OD60w2EjGMkqC3pSbudvl\n4BGBSGQ7SdvluDzTixi5YdecUAJJuYpuG7aOD3oVi6uoyDjJJpY2IYFhwTS88qTy3SmiWRZO\n0ZPUYC04sisFZSwx94etPVgvynDdiPQ0nJbeRyvA96YEbZkO4vkoOKeW8wgkc4zUbblUMU+d\nm7HoKdISyh1GSB26UDEYlt787QMfSnSbWBUE5A6UfewwHlluB6GkVvLVgRjnFBQu5d2CpIUY\nFNjZWjfjincjocknJo+cRgIAfm61LAN29VIG0dBjrTVJ8zsMdM0gXyxuY4705iygEDKnvTFq\nG4KwHXJwTikChY8qf4iDSsyxtnO9cc4okUCPcmcHkg9qB6inEucsRt6H0okOwhmIOV+8KI2W\nRtqnAYYpWXC7gMlePrSAcF2qGwMsMCkbKsNy5XvR8wZd3zE9KJIy3BDDnOCaYhuGBLHgdQKN\n0bMB600IDGDuyS2KRVEmUC855pASMi+Zz8rUjD9ziJgCvJpN218Px6UzcCrNjHPSmhEkn8L4\n25pP9S3ru7Uc7hldwxxSA7o8le+Bn1pAh5Yxx7tu7LYz3pFX5mB4UjBpzbi6hug6/Wo8Dc3f\nPP0pgN8sKwHVAcn3pZFVVY8sSc/T2pXYFRkfLSfKrhc4z1pMBGxtBUf/AFqBj7zICfehvrhf\nWk272J3ZA70hdReOSDkkdKMYwW4HfNKZF428cYJpPJVkJZtwJ6UFDWPyr3wc7alWQBfmX5jT\nGbauQN23BxSrsc7s5ZudvpTECrtyGGDjINNCkKHY7T2FLhmYkHkUoxJIDjtgk0xCeWxQtnIP\nBNIy7ZCFbG7rSx+jHaM0fMsrPwy9BQIbgbgB8zA4Bp0incQ55HU+lOVmCnAG48jimqRkh8sW\n4PrQMQ4BUkgv/epzSOxCEDA/OmN+727sAYzTC3mKJCMD1piHx/KzEjnNC9ywwfehpG4XOFPp\nTYy3m/vBlelA0LgMmBweopeXUM+AQccUMoHIOOaVWGz5vvA5qBg+I2bIz347Uu5WwYyQG+9S\nOxU7xg8ZJpM4YN1Dc1QDtxU5j+U5xTnODsY4THNN3HbnpuoXGTnl6ACPEcm0fMpBwTSbHZie\nVHsaVsMm77oBpgZ1ZpMfuz1GaQEuWVsAfLj603bvXbx5g569qVVwxIOAOxprcybgOW4p3ESN\n8zAbcJjqaj7YXGR70rZ3YycDk01kBVWU9TxQAbRtI79aRcMoO3BpSrSM2MZ7UjMjLHkNvXg4\n6UgBlwuVO31p0mzjAbpSMv7wZXHpzShjuG4cU9RjFctnPYcUuQCmRjI5NIPm6/Kc048SZDbk\n7g0AIqLGzDkijPk7dvzZPU0u5fvn5h0yKTcU+XGCTTEOb52ODs9sUxWIjIGT7mnZ2sE/ibrn\n0pWYKy4+7jFLqNoTBjwRgbh19KazE/d55wWxSnK7c/xcjNK6kLydvfNMlC8bic+3TrTE2tk5\n3YPQ9qATsLEE7utG1VhwuQe9SUMLnzMnlT09qfuJV2PIUcY7GkyxiA2jk9aGO7CL949aYgjY\neWhODu5OaQYztIwg6UrLwoK7TTAp3DJ4P6UxsfuO05GBnilZTtBUGl3GP7wOKaisx28jJ4pE\nj+Wx2PekUFcccg9fajADnJ27TyQaN7MvfaTkeoqRh/q+hx60mVfcO3b3oDFRvB4ztBIo3ARk\nYyc8tQA4fMwIGw4wRSHBIw+5Ae9B45XkEU1VwQrUwHu6K2NuSaaGYKcgsO9OO1wV2ncOc0/c\n2z5xg4/KnYaGM3yqE3KRz1pQDuwWAY87aMKigoTu9SKauNvK7WHNILkkbOMkkenNIAGkMagm\nTP3qXCNhlOc9aRZdu5gO/B75piE27JMEnH94c8+lTecXVtx8sgcVHHny8rIUV8npzmlVd2Bj\nK9zTARSsajb8x6nNO+UuGXOcc0igM2QOAadHiPf8wwecigBjZUFt2KNwdlJJ6cgCnRlV3Fo9\n+RxRyzDAB4/CoAVdqQlWPPXml37o12cjuTTf+WxxxjgZ6UbdoAY5GcnFUhCbl+bf+GKA5O1s\nA44OaF2jAI2hhnNG0MNpk49aBjhleCSATn2pJAAxcfpS5AjKK3Hdm70m0R42kEdwakA2NtO7\n+LkeopwjO0AqM9dvrSMwZTzzTQx84Mpyyj8qoQ1IV5+/5g7VIQxCKQqH680fPIrEnLN3U02T\ncrKgYEgc8UxD2mYMf7mccilPyx7c5yetNjQtk/wUv8OP4c0Axdu2Qkkk47dKXI3jJ7flTdrN\nGT06496Vdu0EDDHrQNCsfL+ZvvY+WnxyGcfKuCw5qFWCy9PlHH0qxlZI242HpQITYGZAPmVe\nSelSMCJHXgbeM1HsztBbgd/alKsZWO4MvWgB24/IF3Z7+tTiZQqK0ZXdTVkPYDpmlcmaONcb\neeKQFuBgrkbc8YzU8KB49xOSvAFRxRuy4JCt7VJEwDArwucY9TSAv2rM24hTnuMdRVwYyFJx\nHt49qgt18vb8+GUckdxV3y1kIDHcvXgUhli1VVgBya0I1O8vjJH3R61WjBaFUjUdeauceYBh\nsVIrEgb+FxtzzirVvkr8owR1BqOLlCcZIb0q5boZWYnigRIu2ZNuNq+tcx4gjEKlfuhj9411\nJzHbnoOeDXP+IITNArvyyjFUNHC3USbCE5Ddc1l3CJDDsIxu4zXUzWK7QT1A49a52+hG5i4x\nuGRmgoyVc7AF+T5sE1at13eZlOMYyaii5YhhuOM46VOjB2QH06elAEkK/IuG9gp60yKPZlSx\nwW6n1odljyuct2pkhdtuTkf3j60BcmaQQsAwLMTtBH6VFJH84RfvdGwc/jUsNnLcXCxx5kOQ\nemfrXceEvh/eapKEEGZJmwAwxgep9BQFzntG8L3OpS24EbSiRwiMB1/CvonwP8J9N8N2Yv8A\nV4w1yPmRew+o9aueHvDOn+BY48hbrUCuF9F+lGrahNqTbpZCMdEHTFIly7FjWPETzjyIGKQL\n91V4rDbezZzkkVOYd3A5HWk8lh7UyN9yqylMYUnjtT1Rt6/IQOpzVxVeOSTptAxmoOTjJznj\nNAhjAjcyrz60KxGMj5SOalkQ4Lr0HFKsY27m4HrVIllZk8vcpbO4dB2pFUxqFDbyepqzuXcj\nMMnpTPLCMTjvyKYiJ187BYgFTwaaYzIzsz4JGMVM0Z6OMKxypprRqDk8lf1oQiLlSTgn2NO2\nkZbpuHCipUw7Aj5venRod/3ueeDVCIY4W3c/KuKCgRcZLHqaseXt5yMepoZm2FduT1DUDXcr\nqgGByB97Pp7UNtyoONxP3e1PXG3AGcnJpDH5cg3DOehpgNDLuZepXvSSErt4yOpqQYXfkbV7\nChmHUcnHSkFiJh9ojJAKjPGaJF+UK3zgdT3qVlO1ARjAzj1pJEK8J35NMRDGoZWJyPQVIoMk\nJ8w7VA4xUkaHbwQKVVJ3A8j9KAKojJjBXj1ajHzD8qsW4WPcu0uad9nXd83HpQSVY4zI7E8A\ncU/bjao+U57VIVKn5RkULgruPLdPpTGRzKS2Af8AGoiCyhtpA6ZqZVDSfKSTUjRN977qikxl\nVV2tyf8A69TmMNIvOO5o8l2jZlw7k8HFKytwSM8YJFUBHHDuDgHBByDS+X8pc8n2pZY9oPzY\nBFNhj8uMnoCQBnvSAXy2X+LPGSKSHl2Ynnt6U+aN4W+cbuOCtC4AyTjBpiE8t3LE4Jpsu19p\n/h6cetP8vdLlcj37UxEZpGTG1R81ADdpYnnG2mKWbJXJI61Mgwu4nk/w0qsWVyiHb3x3oEG0\nttUMOmaR1dWABOB/CKVIwAGZSq9hUioFYndlutMRGuWTAyppoDnK5+XvmpMPIpKH5Qcn1oT9\n4wI+X60DGFfMCKTgdM0NH821T8o/vVJGvmM+75TninMgVeSGHcigBhVecDIH8VIzDanygNSK\nNyMM4HpSCMMY93PqapAhdhbHHGetMXCMyOOOtTeWfMYg/uxUckaY+Y4NAk9SGNVLfd70rAFi\nFJ5pNm3DAsB796kYYxn5fpVDZF5LmbeRtQLRtEgBHX0NSrlpTuyUx3pPKCtuJzQTqMj8zJUr\ngU9EYZy2fajJAYqSxPalDDeMKSSO1MB20HgLtXrmgqIlG4fIeeKXzNqnLbvalkOFAIJGOKoQ\n2PghlI254FPZSMY+YEZ+lNWFJME5THerCxruGQSoFAETxmErj73Uilk+ZlbGG/vULzu6k1LG\nyqoBBds0ARYU5wSDnpTpIXbbtGTTgn2huVwueopVAfIU5ZKGA1lKMNpK5HJpQMKRjNSLGG5L\nbu+KSPd82RhPWgVwfYY12nYQcn3obZ5nyr9aTJWTkbV9cVJgRyfKd4xTENP7xg3XBqQY5Yjb\nTVbgELgg05mO126DGQaQWGMccY3J/eqbIaQBPSogoZYwDnuaEmbnCkK3SkyyzHggtnnvRuHl\nEuv0FNTdt2bcL1zSs22Rf4gBzUsa3HqokDK6kDtVuNfMhVSdvNV8l2JXkE81atnEm6P7uB3r\njqq52UxscvlZZ/mw2AKnnm+1W79VP96qknm5XbggGrFvMwZxlcf3TXKboz2aRSACRWR4jtXu\noQG25XtWzfARsrfMHP8AFWbcfvlKnb1x15ouBwl3b7GKqu8qtY2oQxNDgghuo9Qa6vXbc2cb\nNnaScVyE0ztx26nNaR1EYrSMwcOQoz+dY2oRBULAYycEnvW/fR+fMSqYCjP+NYeoBZXCbjg8\n/StEyTFvHGAAMKOtV1aPC4LSAnkg9KvTKQrqAOMgZqi4SCNkTGcdAelUJikh5NsZ3c8hjV2P\n92pUp83ZcfqKqRmOLy93QDk461YXEbbmkJ9F+vSgZKYi0bPgh+KI2+8GORjqaEYRyIdrMjdR\nnpUkqhYQ7AFN38PpSAZtG7lvkxyKbNtRUXafZRTyBIxwflHBpyoJC7qPmA4OetPoIz5iu0A4\nQZ5Apnkll3BxgHIY9qs3K7lVmA3NwSBVby0eba0m3jp60CIGk279o2yfTg+9DScx7ieBk7Rz\nU0zGNWQ/MAMZ9qh8wKqeYPl6Y9vWmBJLIFQtEQ+4fj9aSN04kBZh0ZgO/pTYUaMMWTKk8N7U\n5o90gIJA7MOKBPUYgKq7uQeeBUm8bSA+HYZ46fnR5aFWLDavbnqabtZim4r0xu6VaQjX0mXy\n5It33QM59a9N8M6kIZOpaMDgr2ryyynC7cxkZ+XH/wBeu/8AD90sZiCgKO5FbQZhUj1PcfC+\nqfaokBJAzgA108My+ZjHOfwrzzw/dBTBxhnIPvXosZWeDMaYPTNdBzLQe3LAsPkYVH5hZyoU\nkdmqRmKxqo5KjGKc7HapAwO4FMERvjco6Z609WBDZXbgU3cG9sHIpVlYF8jII5OKCx7dgGxR\nDleJOmabySCOdxHFSsGbdu5PagYxcqzAZcZ4p6Rjzg024DpjtSjeiDv7UvmPuEbLu5zSGhwY\n5K7iPpUkcjLzJkgHimeWCxYDGalKqqhN+W9MUDHBGfPXrkVOuQ2CvzYpkbFgVOUIqaOPcwbO\nSRzQNCBhu3EbRip1JZVIYY60xodzH+L29KVlTgLnI7CpLHp8ytu457U9mXbk8D1piBo8E9DU\npUSMVB96Qhu1P+en6UU3n0ooKP0kydxBpy/KOnSl98cUL9OK+cPVGltygkYpBxg9RUnTkjJp\nvG72oGg3Amm/w8cDvTj97ikAx9KBCcnlRzR069acrFs7RwKXhhnvQMbyOaZu2855p7KRznih\nUBycA0CE+Z/pTtpHsKePu4xijae/emIT72ccGkZiqAE07heM80H5gBjmkWI2F6DNMZi3fJp4\n+UYxk0KPmzVE9QXKtmlb5Bk9aG7HvQ+GYZpAxNzUKfQ07+I01VCjrzTQhwU4FJkbqUA9A3ao\n1+9gikBJ3JAprZ8vOO9Lzj0pGyVxnigYinLbgeKXco5HWlUBVHHFJ64psQu3PJpOSvpzTsjb\n0+tCgdxxUiYg4b8KVSVU9+aQ428daUZqgQ2T1xTt646cUjL83XNRs21fQUDEkY/SjsKb5m9v\nanAFhQA5VKtntSLup2NwxnBpcheBz60gHrnaTmo9xPOOKkFIFy2AcUAJ1WgqN2elKPvHJpjd\nBTuA7he1SOqzW7owyCp4NMjO7A6VYj+VTkZUigk+QfjPpf2PXrmNsMMkE/r/AFrwPxBD9nb5\ncHcK+ov2htPSPVg4x5kg3E+v1r5n1yANM2/7q0/UfQ4fU4xFggdetY825VkI5z61vapD8xI6\nelYknfb8696ogwr4t5eHODWZLIFkKt/dwCK2L5PM5ZcDNZc0YUAnp1Bq0SZ0kYiQgnc3fFU5\n12qMH5vStGaELyxyWOazJlDXRGSvHU1RHUrbtzMeFUdqrTYKgtnb1qwSvmEKdy5xmo7pG34Q\n5TpVIOpnyLuG7djJ4prRm4kUbtoHXNTzQhmTAyV5K9qjR90mcYBPIqhleRjJJtC4A4471XXM\nwKYEag1ebaqFRkMTxxVWKDMhzw2cYNAmRyMbhQMbRj71QbhIjDPTjBp8khWRojk7TgkCgxfK\negU/nVCK6xvlV5APWmurRnBGfm+92qyY1RgpZicZxUU7HaqOflz2PNAEU2S2SuCuMe9I3zMx\nGRxzUjQq26Mkl153H0qNjGjKF54oAY0MhUFOijP/ANams22PgZyfmFP2nY+wkHr1pjDcmCMZ\n5IpgMbcyfKwVM8+9MUBnKhMKozu7UbUMuCmSOc0KrRxkE5LHI+lAg6Rk5CqWwKG3bhuGOxpN\nm7eQcN6Uj/LEoXn1B5pjHciQpjcevFMZWJJU7R0xT9pX5QcMaQyFV8sjLZ60iiOVOBtUAH5T\n9aY+VQrkZXjNTN3xz7U3y13LwNp5NIkZ5g5wvCjPvTNx3D5cnG41N5a53kgDOKjWXcPmHy+v\naqQCt80nAAOM1FtzGWxznAqSNtyl04XsKXcJIgH4Hb60ARzfvyoB2t3prKV4PPfIpW3ZB+8c\n4NKyswKjhaBjFYMmd3JOMGlWMs+w8Y53ULHvjBXjFJs3RsCSWouIDg5xyKQgLgKuSaUONgHQ\nE4xQ2I5SN3ai4DctJGM8bTmkRVw2WIPbNGNwyelLgNtOc84NMQ18rjDDcRRuO1C+OOD70rfM\nrcDPY0wYCBCdxzzxQMduVWwvCN+lMVgflJU7TwaMFVIQbsnNJGFVScGkyhfMLM248d/pTt44\nZV+XtTfMIYZK9KRsk9Tk9vSkSwQsrN3LHoaCu3duzjtSvHu2k/eBx7U1vm3qMtjkiqEI5G0E\nAgr1xRIwZQT0znNKu5MLjK9c0nysHyPlP6UANkl8tSIzwfWnbgGwOrAUm07SOqDjnpSx5ZDg\nduD/AIUupQL8uATnnAHpSLGJGdP4aFbcoYjCdMn1pZIyoAB+Zj1FUJkbfLGEx3pzsWQDOABg\nUm4quC3OaGyF6/NnJoJF3rtUgYA6ijO/LZLAdGoZdwwpzmhVby2O4Mw4osMbztClck9aQ5OP\nk2gUSljtIYgdDRKflKjr3NIOopXc3XDHmmk7chiC2eg6UNtYDaflHX3pWkO4MijGO9Ax7OCC\nSOoxTI9uFABXHahlY4CtuJOcU1v3me+D96gQrEIC2CeacwLRiQDljikx8xXO/I+XFJg+WrMC\nNvBUUgG7kBY7izjril80NGDj5u1OASPJwPoO9JGoDH92W7jPb2oAMFV4J5pflV+ThqTupLY3\ncAUNGN2DywNMBPMCuRj5m4I9aGTkuy5A6U5oxuDbhuHrTWh3LnfkZ+7QA1VZnRicOfSl3/xY\nznrj1pZAcLkbcGkHKY2Z55FIYSBZY8r1PG33pjYZVG3B6in7TJlzw69R7f40Y8zcxwuP0piG\nLiQ8DLfzpdx5BGGHNCn5So+Zui+3vTY4ypO88j9TQAowz4x8zdDjilVcNtY8g9T3oXcpHfno\nKbz5WQuG3d6BjlYs5I4HQVHy2fnIXocetTNIqt8zDPTaKhk7Rj5ecmmIVVONgOQeo9Kawl27\nAMnOKAW3MBnPc1MZCxXbkY4zSGM5jYMF+7xSxMi5O75m5waaY5VjLOcZPC0nzMFyPlxj60gQ\n6RJFYBmGG7CmcbeRnafu+tL8xYHB44xQ3XIX5vWjUQK2cyEYzwAe1P3PGo/iPpUcbbl9WpPm\nXcQfn7KaB2JAzcbRtLNyPagSFiw2846mmM2WUn5SP4hStjzM5yMY4pgKq8KG44+8KjVSVCP8\nxz1qZkAYKWAbHUVEEVW5J8wmgYMp67vu8ULuMxUjEYP3qWNV8x1fl+3pRgqpCkhyc0AN3FlI\nxt+bn6UuDyvYncT7Uq5i6gNu60nzKzdweuOwoEEu5GBwNrcKcURs6scDGRx60kfmMwD429s0\nKdzMS3fimIT7PtkZC4BXk5PekC7gNrcHvR5acvIcEjGabnYpBGApwKAHcKpB6fzoVioU7do6\nUg2eXv8AvPnGBQ25VDOQecBR60gFVY23EDB6UH5EXJ4oit2Ksx57U1SFyCMletMBy/dCk47D\n6UrMNpVePU00v5rAk8LSqm7e+cccCjqMVVKSDHK46mhsq2X+Zuue1JuPkna3ygc0LuZQD83G\nRQJgyszrIo4/u96FmDfMG5U4GBxSqu3Jck98UiYZWwQD1xSAAz7iWGQeeKRc4YKvvS+aN27q\nCvShH2xHIxnnNMB0kg2rs5fH3fWowo2kHp1JoZkaMAA+uaRVCSMrHcrLlW7UxCMwTkHPpUqt\n++3CPI29aayloT6LSJJmMqM9enepGCBQhJG45+6O1DSKinhiWOMml2u7gp1HWkZmkGWYDHb0\noARV79ccCgYGWxlqQNlsnkd6enzAjHyn9KYA33ckBlI6ZpkjTYXKgDpR5Y4Vefc05mby1Vl3\nRnqfekArBl2qwGevBpNqrks+ccYpNqjLDhVpWZWUhhgYzVCEeHyyqqevJ9qXcD/FkjgUA74R\nhunrTFXu3yL60wsKkYkYs3brSfMfkZcAdMf3aemX+UEAH+Kk8wrlWHzZ5+lSMRcNuyeAfloi\nXGSXB9T6mmggqyLzk5zREu1dig570AJnCEN06kgUu4ueueOOKf8ANHAc43McCozI4Kllzxji\ngBW/1aDGH9/SnO/zYccAdqaf3jBjxzUi5MoGM981QiJmDsVC/u24WhVdYz/CF4PNEjFVCEbs\nHBxSrmNSMZ780DFjDNgrwyjtSj5m4O12HIpFzsZV6MM/4UrbdqtzkDkUgGOfurnmpGWTo3zj\nFNKx7Q6s3XlaVv8AWYBwT0pARtIN2W4PsKkB2OCOcjFKg/dlywORxTIQWIDHBHP/ANahhsC7\nSqlgc560rKPMB6HutKcOVAPTnbSDLzZIw1IliqzeWcjJzgetMHQkAnB6049CWGCDxSNIXyzD\nd644oGIqk/eJDdTSqy7ivTimtgsMA8jIpI8HOTg0AOZTsLtwnTik48tT15pFUshxxzx9aXb8\noA+9nn60BYVV8yRh90AZpDlkyo4Y8kUbiuQcknqPWlXg8HbjsKBAqhn+TgAd+9GflbdwPSkX\nPmOcZ5x/9enMAXGeOepoGhrOVjIIA44HqKazktGFjwMcGlYeYxBIIHTHUCiRQ0fzN8gp2GMb\n5mZhy3cU87lCDbgtSxsi9vpSu3zhsEntTJYp+XcTyc801W+UjHXpR8m45zyOaVlYKh3YGelS\nMZtfywS2MHinq21chck9QaVQq7435PUZ6U3lVViNwYdKroAnysoLLuxwPWm7k2hQu456U/ew\nG/qQOfpSbjtxgE4zzQhB8pyAPmHakGOdwwTTIx5bBDxnncOcU7bulJJ+6M9eKABctyeFHAqR\nvmIwMDHNR5LNkjDqc7RUv+uk39M9QaQCYCyIQmV6cUnz7z8oIzgc0HKqAp5J6UkjFcqF+ZTn\n6UxDizRrsKbivHHrSBSyhsexpT5isTwSwzmhpJGYE4C9DigYITuJxlR2po2+U0fIUnJFLuKZ\nCj/gVOjk+Xeo5PBFKwDC6SAFclfQ1IFEcxboMZ20wt52QGAGOmKRWQAB+T0piE3bQWPyknJ9\nhTWmVs+WCR61JIqqx+Xdx1oXavyEZ4zx1pgRrnKhzjPIYUrMGSQfxDmhXy2QvHpSgOkgBIG7\nvSAHXKq+O2aJVO5SBgHB5ofDZznIowGUHPtjPSmMU7MkFc46EUKG2tggDofWiLH3uu3saYGE\nahsbix/KgYoIZRGR8i8mjefqvaldmVsleDS+YU6JjtyaQhnBY7SWbH3qdtOxF2/MvfNDbl3O\nOBjG0etC4JDEnpzn1pDYBCykEZYdN1IAwjz94HsTStGX74btSBfmAB49aZIrf6ncST6KKbys\nYcLknipGUsu4ZQep7U3LFQGycenel1GIVRlzn5hzimh9y/MNu41Jt4JOMjkYpmC6AE+9UAcu\nwQD6fSk4aUipEwsh29MVGfvEBcmgCSRdyAs/Q9KDH+8Chwy9TTV8zpjBPJpqqyofU+lIB3C5\n3HK54AFKu1sMQRjnrTA23Bxlh6VIGLnkjJP+RUgNJC5YDryKU56MRz1xScDcGbv92nfIV3BM\nHsaoBrYXoDtxShfMKnGG60L2ZjuX0pvDcq2fTmkIc0heNxjBz+dOmj3RqxbjvUcb+WxBIJ9K\nkOWIOMLimMVju2shwuMEikXauFIJUrmhR0/hJPFCtubeR0GzFMBAytGojwpB70M5ZWVB360q\ngMy5jKds0FvMU7zgLyBjFIYSb48KJA3cbR09qeMtCDu+cnmmD5dp7t3pdpjldAdwxndSuSOX\nKszLzxgikjZdrfJhu1Pkk4DKAOOaRm8yNAwwCcimMbGR+PTNBH73B+UjkLR5m6QlOHHf2oON\nyu20Z9TSsMP4clSAo596VR+63ZKqelCr+7clsf7PrSsv+jouST/dqhCN8y8qTxxmlj28rt+8\nOB6Gn/OOwJ6dc0zaBxu3OaWwDMhFwwyo/Q0rKrAZz9aFKrjjJHelM2/g8+lAA2d3GMrQm3fz\nwT1psoVWHBYgVLlNuX5GPvDrT6CIlhEasIzgfWnbiNoCgmjaiLwCSRxTvLfrjbxw2aQDTlk3\nDg5xinPuVRnBGeKVMqwBfcfSmsoDHblivO2mKw4htpx0/lQVXhyPp6UrOYyQWwCM4pFBYYJy\nmM7aBiK4SMgrncaesbMNvJI9aWTaMKEC88D0pxV5G2K/GOSvf2oJFOPMCMMYGcipNqeWNx6n\ntUSHd8rAIR1OecVMuJGAH1FAD0TGT0qfyzuUlgTjIFMjUzHc/b+GpP3cbMcE4PTuKAJ4w3Xd\n7n3q5DGQFPl/NnOKqLGjpjcwB+b6+1XIF3FMuQnualiLybQoBxgnOcVoRsqts4xng1nRq0jj\nKliD1q9bxM83mMAQTjb60i0tDStSZPlXaD2PWrIhkY5XnAx+NNt0WPgfJnoKuWyK4Zc8+tSB\nYtYRhRxnGDmra2zK2ODxUSRBVU4yOgqzHtKjYTnvxSESeW3k7GQFf61z+rRlyyoCzA46V0iK\nzLkbiKoz27+YY0K55JY9TTGcbc2ZVdmzDH+LvXLa5aqXMcoK4wR+HavRL63RYdzOVZedpriN\nQt5pJnEuCrNgt1plnLzRna0ig5zy3pimGQLnuCM1buVJd4ydqpxz0IqGzsZbnPljO08en0oF\nYjRmMfmKMMO2M1r+H/DtxfOHljk2sc8jv7Cur8A/DG68QagrPGzbG3EgfKPavfNC8B6V4fjE\nsiCWfH3WHSlewmcD4J+DrTbJ73bb26kPx9816QZINLh8jT7dF28ebjmrE1wZAy7SiDoo4FU/\nJ3Z+tTdksz5lkm3vKd+TwT1qPALDcOfU1otGPLyQSc1H9lHOVIz600SUNpzgD8acyMqjdzzV\nowHbtyAPrSLZliNxOBTFYpFQJN2SQaVV3NkDpV8W5Gen5Ux7fauR171QWKflMwwpwTzxTZIW\nyA/Ixmrnl5XgkHHNKYcxrnnPSgXKUVXEYO3JzT1YrkyLk4qZlVeTwOlDJtBA5zTuKxXWNXxu\n5HWk8vdMw28YqxtG7HTHXFHllju5GaLisRQxfwhQgpu75mKj2qxtCxjGaYqFDwRtqkwsQFPl\nA75pZVdh6ds1YktizbgflxnrSMq+TgKW96LgVv8AUp8i9sZqNoTw2c4q6YwqgLzxyDURX92B\n0FAitIjeWcnLmli+UZ28jrVkRhlLbcmmRosjfKMAetAiIjzMjdyaTy8FVz9FqxwvOMc4zQIT\nMc5ww6MfSqArqqmRlCgLjrTo1VjgHAFTyQ7VCryRzwKgjHzEJxnsaXUB6gqT5YH1prFmUN90\nZx060+PPlgNxzxipFYqrJtyGP3qNRFW4VtoUDnrUbqT86DBHrVqWNNpGDu/vU0w5XHbtRqFi\nJFZRvPynFPWFeucZ5PNO8t8YYYX+dI0YZh8p29KopITaFVSvDN2FJ5bcjG0Z607y/ugsEcdK\nWaN2AK8UCsNm2sy5AHGN1VgnncAM+D6VaO7yyhXf/tGlW38pN4bHqB1pCIZlLAKoJJ6mmNG2\n45wQKkjXymTjLHnrUrA8rjkn71UhMr8qvTAPFK6Hj5i3bipTlWCZUg8ZpId3zAHbj1FMCJlC\nE454/GkQZxs4x+tWWUMNwOOPxqMgwY3Djtj1oJGrIWYrtyQM/N0puQ7A4wzDBFL8z87fLcHH\nNPlYxMpIAzxxQMYqmNNoXLYxim+W0gVcjcvbFTbWJJORTtpYBlGKYWG7d0gBx60nlK0LDBU5\nztp2NkhGfl707yVBypOf9o0gK/khO+DinbCxdQNu0VM0m9cbM9s560MPNwV6g80xshCMse0c\nkjNQ7RcJtIwy81MzbW2MvfOfakCn5n4UdNtUSRqdyDKn0FEajcwdd3pTt77EUp3+9SMvTtz1\noAJlbAwPwprRiQr2AHalWQbmKqX7CjaVk2DpjJFMY1VxuH3SvIPtSxZV9y5AbgVMgXcAc7ak\nMaqpXJB6j2oJK/k+YrAYyKdG2cAjbxjmpIY13HccnHOKWOP1PIPWrAd5OU2HOD0NAUjOWxjg\ninFnbevGR0FN3GOMMwy2ORTAT5lbcFGO5o2H7ynac9DRGp27cbs81IVG5fTvRcQIxjYk8Ie/\npSZEbbsgjswqaTCqc9BUWSzYKjbjIwOKBXECkJhTtdjnmlClmwh/PtUy/MFLDB7Gmhg0n3QM\nUAN8p/mZmVmpoCiQZznHJqx8pypOD14pFVWVgfm9qYxixhs/vAg9etSbGVOG3rUcce4sP+Wf\n8K0/aVi+UfN6UhCzKGxgcAdKFG23UfdUdqVmMcYLMD2pscg5B5HvUjE4MnXBxxS7hI2R1HX3\nqQtvlDrGBxj60mzHzMcH0oKRIuFOWbHpUltI6qxPzMe9R7R1X5jjoafD/rVGMHGTXNUXY6ab\nGT3Xkx7jz/SoI7oq+8fMTyatXc6KzF4sAjoaphY9ygNtJ6iuHqdRauPLueTnpnae1ZJHlzFF\n+83TNX0bzMxbtpzwazrn5JnDHLevpTEUNatRfQ/MNxTt71wOoW5WRYxznv6V3s0x+zk7tozg\nD1NclqEbecVdcBf4vrVREctIrxs5zwO9Zd1GzHa8eM/MCa37m3MYZDyM5P0rFuGaRQN24Z4P\ntWnUGYl5GzcmLnp8prMuiryExj5+64rbu12LIV6jpWVdArIpxyRkkfrVCKu53U5OHIwTUisz\nQKRjAGMf1pkitJgROFGfummBSdwL5brtFPUZfVpWj3KNy4+6KmCiWEjb5W3rjpVWJiVQA+Sf\nTNTrI7K/mDGOB/jQMeyrNt2LlMYLKeDTRA/2jcsmFA4Sn7drGMt5cRxj3NIqqjHaOnGGNPoI\nguNwjAxtBP61E2zawYZkxjFWWJaM/Kp54Gaim3ySKCuzI5OOtSIotbtyN5YKfu0f6tfnXL9s\n9hU0/wDrXKcxgc4qHGG45IHr2qjMWRTI6gv1GdoPA/GkZgVIAJfOODS71kcxKv8ADkelI4RN\ngR8567e1MELnzmCs2XXnj0prvGXZ5AQgPAHf0pd0as46AjLe/tTfL6NHlQeQPSgCxFtbKeZ8\nzcleuPSui0G4nWSEyPyoyu3+tczHs8xedvfNdHpOqRxQqCFfb/EvXFaRdmZSVz2TwbeN8jzt\n5h6ZHQV6tp8zSWqMuQOteGeEdUUsqsdqnkkdK9q8NXSXFgQOew966tzmas9TZUbhuz8xpGT+\nAdepNLJuG0Kyjih488oM+pqhCiNYsZPXvQr4yAcjp0pFjypHTbz+FDLtwVO4HnFNFAv7uTlO\nR0NSNIZZORk9TS/e2kcEdRRyzltuPemMcDnG0Yp3mN1C/NRtAOBye+KT5l75FIseNxGTxT4s\nmQd89/SmeU23hvcipVwwCq31pASNu84FxkYxmrAY8HGagAKjA5qZVdcNxg0hkixnzCWbBojU\nliB+HvSbtxy45qWLv2xUlCFiyhSOc80m4NJjbjHenRnZy3LUvzSKWAHXpSuIN0Xo350Unln+\n8tFO4z9Jv4aU+gNNdttKoHXNfOnrhux1FCnnAFGWOcj6Ui+nekIcpCvzg0h9+lC8Zz1pG6gZ\nz7UBqCDbn0pVVV56E0vHHYU1h3oANvvSfdPSgnaD69qcxGBnpQA/cGxnikZj3Py0xvm6Uqrg\ncnNADlxxxkUuOeKQnn5RxS7gO9ACbuelJz16U7J5FIfmXNAhS3IyKGILdKGyfpSYDfhQMdtw\nvPWmKB0xRu796OnNUhDl4zTdw3YofgA0uMLnFJgDdaQkhfWnfexijaNp5yaAEx8vpQzBcfLz\nTjkYOMilzu5OBTYxn8Q7A0rHPGKcwB/xpv44FIQikMM96Xle1HGz5etJuO3aeaYCbt3TimtG\nGXBNO8s0KRjOM0AMWIZIWnYZVxipF9hilwcUhEbY2ikU89aew+TaKbtH3sZNDGObPHpQvTNH\nJXceB6U2RSuDn8KQDgOQaazEN0pV+7nOKQ520wJCu5uKmtxuG1j1NVRIVYDHFWYZPmGDijqI\n8E/aO04QyQzY46Bq+VtcjEm4kgCvsz9oexW48PxyqOY2wV9zXx34nt+WXP3eabEefat/rWX9\nfasSRfLYgYX2rodU2sjYPU4Ga564hYkFxk4qyDK1H7mAe9ZFw37vbzlenvWxdKFQgjPtWXcR\nlVJz061YmZF1N2Ix71m3OZMlhjHT1rQ5kYhl5J7/AM6hmCqAGG8g4qkiDK2yHp90c0kas0bA\ntletXFjZcqg4Y8jvUbQrbyNlePTtVJCKUgTaTyB/nioAoK5j+VByc9qsSbmXAxtJ3H8KrTZb\nafu7uaCiLzS7EgA4HBqPzpLhjvwjquc9M0nmIrsQcnoQO1MkmHmA9COvHaqEM52kPwWGeRiq\n7Ltwm7LHnNS3e6RUwxcZ6e1MX5QTgbd3GKBDpJFnwCcMo+99KguMKFZhkn0qWSaNN6MhBbrU\nABZS+8HaMAetUIdvEeXHyN02nrj1qJpkkkJwOBjNO3luWPmP/L2qMRhV2kYyeooGNZt0TKR1\nqMpmNSD8wHOakaPzMDrtNKQvmEqMDH4UCIWjbzlwwzjJ+lRbh5Z3KWfBOfSrHHlnH3s8Go9w\nMZUja2MZplDVkQhNwwcdRSRlk+XuT/F3omhjCowDAgfe7UrZ25YdO9MRGu47txwwp6fdB3AD\nHSkILJ1yrGnsq4KqmMCkAxWAPXI7mojlicN8ueDR83lnjP0pOAuT8o6YoQCyMDhSOtOZDHg4\nDKBjbTFDquWGVzjil2mMl+cevahCYzdtTG3De1K2O4woGSPen+Yrc4+b+7UaiOMt8+/PLE1Q\nwZdrDGFYjg01kcxkK2eafuCruILL0FMUHccccZqQF44xkDvQZcZVRg9d1RvIeAozjk09WVW5\nXcpGaBDJF3YPB9PrRwVOTn1pwVEXCjr09qI1EfUgLnBqikMjZBlME55yOlN2DGV49RSfPtI4\nB3cAelKdpYhu/al1ECqTjHJweKbGuVAxuYHJFLG3zbQpTHakaP7wA4znFGoxrZWQ+X8wJpRG\ndx3HjH5U4Y3ZK4Wm7T5h4Kp1zmgQmGAAyDH39aauZEPy7GU5x7UqbVzubcWPShcMxIBDfWl1\nEK3zjht3fb60vPJQbPVaZ5YWbcBtGOlKvDZJ259aoBNm1uMnPUUbQoZe3rQdyyYVup60RpuZ\nlyFHvTAI2VVPdmPBPSmKoDkpuUjjNL90EfeKnpSlvlP6VI7iMQvBy4PWh8xcBecZ/CkLEYVV\n46mnrgtuXnIxTEMCo2ccsRupGwACxLHGduKeuGHTc4PFKdvmfMCZP7ooAb0VZY0OO6mjILld\nuMjPPShVMjYUsCePpTVXdhc5x+tMYsbbsrvGB2pgYg4J2nPWnDasZwu3mo9jyuCMbc0iRy7k\nJxz2o27FIJycc0NllILYbOAKVoWJHO04wSaQXGhQQu3p70KoeFgXAbOcnv7UOwwAB8oPIoY5\nkPHuOKYxdxVQqZGT1xSspIPUsO9KzkxqR1PBxSPJ8m0nL5wKQCYLJuA6UeYWXhiSPSl2lWPY\nelNXfuwq89aBinO1S4BGevpSlSW5OB/epu4NkOuB049aOCwwdr9D3oAFx5hB5cDr2/OldWlY\n7G7crTFxHHtYZXPanq4jVmEZUdxQFgOW2EjjptqPd8wAY5zmnrlQNh+XG4Gmx8bC/wAueuaN\nRC/I0zEMc4pshKlQvzcZP1pVYMXwcc4C0DaqygHHHNABuGQw+WQ9aawCyZbnAp7qFhViuCBS\nfebGe2cUwGCTfnIPzHGKUZ3BV+YDn6UivtUbxhs8YpFVdrHO0570DF2puPc9RmhgjNvP3upA\noVflJbikk3eUm3jByT/SgQrN1Kn5uv0pWby4tzfNkUisVO4jLH9BTWUnJxnnqaAEkYsiM2c0\nqZkjYvnb0FObHyoxwetDlgw2Y2enrSATmRI8MEUd6GcL64J4pfl8pgQFBPr0pPnCAnDDoKYC\nK/ybtvyZw1OzECzBWUYwCetNZT9wjCnkkUvzbgu7J7CgBBt2ZK4Gc5PenRorZfG0U1S8jNuG\nPrQZAw2NlQDzQAiJmTLH6Cnqw25AywPOaZJ869cEdDQWbeNnK45oC4cjLA5z19aaGZhhD/8A\nWpdoZBngk0zPltggkZ60uo73HYaR8EgY70vzcJtzzz9KYuOSRwTg7adzHHiM4QHjNMQvG4nP\nfAHpTSy+Sy+9KzbecYLcD603GIW3HDd/rQA/aY2aHbu74J6UXSqXXYCT1KkURfe3seSOWoO7\nqpxQBG2FBjGMnk07aFCjcvHvmkwOc4YEciiONWX5F4FAA3zSfI2GHJT1pMkLlSBn+I0ileSR\nkdCaVY04G7cRzTAY3yqzfeOOTRtJxz/wE08fOuT0zjHrSmRWX7v3uGNMBEUNuI4UcbfWkZiq\n7DkE0qx/IyA8D+HvTpAOBuzg7cUgGtvjYJnIx1o3DzF4wMc0BOuG3KDjNHmbvfJ24oGJtAYk\ncg/pTsBm+U4OKRlZfQdiKFXLcYz1oJsDL8qnO0+1O2llZSwx3zTdp8zOMjt9aTbtXcRyTg0D\nDywN205FOkLY3Y2nrxSYUDGdrCnYJkCseGoAjUqW+ZtuR2pI/l3q3PHFLtDNt2/N2oO2Rcnj\nHt+lIV9QZvugpgYx9aJPmVdhwM/MKNxkcMGCr6Un3Mk/NuNAxGG1iVzgHt0p3+rXk7+OAO1L\nub5gowmMGmqxjXpg9s1QCuvlxg9Q3GPeg/6o59cfSmv/AAkNkZ70sihmYkEY6nHBFAXFk2+W\nM449KOcAE4BoXHC44boaQMfuHnnvSARjvY7VKkCl3BlYAZFORiu4BTk8c0xY8RuuOSeaBCfN\n5gCDoMn0pdwaQhT83X6+1NkZ41wDgU3hVLYywOAaBE+8SNuI4A/I1GrlU+ZsgUpj3BSob1bF\nC7F3kDaTQAkfltK4LE54AokcR8H+H0pvl4+b+Mnk1IzGRiWA4FADMK3Tgnk09SFUjqCOSaQb\n+/3uox0pMOI2dyNvcU7gH3cBDzjqaI+G3Zx9aPuxkleP50i7G+8SOOKWow5bOeacMNgjg4xS\nbmEgQLxjlu1BYZwRz220gBdqqA3K9h70knmKuFAKkZ+hpSp+9gOO603b5i/K+B/doGPH3Rkh\nTt9O9NVmxtPDe9ObHkjBG7PejlY/n5Ycj1oExNu1SC/Oce+aad2CDwOmaVgAp+XJzzSMm+Lu\nyjmgkHZlQYGD0FDZbYxHUZ6dKThl+9x6US45AbaN3FMYfdUNglc/lTcx5IjYls5+tPVmTpxz\nwKaFK87QMHlh70w1HjGGfuvTimYEjOx+Ud6dt2/KOh6A03eWH9184NKwCMrKykA5bk/SnPtk\nWP1/u0pY5xkDHagkYDLywPSmA5cbmO0IR/EaijUNlnHy+vrT1++XYbgTz7Unlq28txt5oKYg\nZeWxk9KduLkZQ5xjaO9DESKh24bGaNrli6kllHT+77UyQYfdXbyeOtNZd6jB+6cUvzfK3U9T\njtTtp3BlHGcjFTYoYRlgc8+9Oy5k3bcGkXjzGcYP+elDb8AA5GM+9BLBmzGeTjqRSFC+2QHA\nZcbTTlHmKwA5I4pp+WMEthugpiBVCLw31+npTSUxtIxGx/KpPk8s5XJAzSMB8jlNhxxSLGoo\njYDGO2407eGY/LkKOGpjRlOpGDzT0+ZcdscUCEZfmQA7u5pzY3bguR0pqyKsXK80sm7jZ0YU\nyQ3FMEDdz+NO+6rHPGc7v6U1njXHJOePxpsY3K3zc570xseG24Ln5euacrKd5Xk44xUbLuOT\nyOm2nRKY8qgyT0pAJGwVSDxkY6Uzd+8AZM44qwd7Kc4GPWq+4M2SSEH8XvSAdJnG5uBnGBQM\npkhg2RgUpzwS4Yf3aT/V53DB7VQCR5ZPm9MYpPvLk9R0FIqt5Wejbqc3yyYJATrigBCvT+Fs\n52mlZfn3bQQeeDSb0LHk8fxGlXYsnIxkZC0gDcQpOcCnNny153k9hTAFYlWzjqQKbGNsfJwM\n8fSgB3ysqkjevWgqjseCw7c0kifLkHHGKRcwRjawyT0NMB24bd20jsBRzj5ucDPSmsrI2W+6\neM087oWPIA6kUgBlZVGD85GcH0pgbdGGA5Bp4Jkk35xn19KbJtbJHCr6d6kLD5DIV3kcnt2p\nPmUDPBPehlbC4fAxnmjd8wYZUZwTigBHjZTgYGe+aXJ2HOCRxxQMGRjjcp6MaTG35t4C0xoO\nVjC7sHuaeQ7ABR81MYHgAjaTmmtI3mqTlgDgU7jHtIcAnkg8ihQqy78EL1Cmk+7u4+bOcUvX\nIblT/nFAmC43ct15wKCoZgCAAvI29vekRlPUbewoiiKs4JIyOc0Ei/xE43E80p28Fhk/3QaZ\n5e6Mk5HYVKG2qCw2L+tSOw1YxD/DyecUu1VwFBCnsfWgLuY5J3evpSSEN1OSBwBVIAVV4UjG\nTjNKcedtOduaFXcFIX5h70MSykk7W6GkOyFf+LjkHj1+tDEIyhuWYYB96MAY5ZTjHSk4dSpY\n5XoaBD/MYRHncy9KiZj8occtyf6U6NTGzOSCfSgMHG77/PBoAVmLybQoUY70u1lOEHy461Gu\nfMw64Oc7akRgQzKSg9D/ACoAB8o2MeT/ABelOZG8zDsM44A6U1fvhRy2PvUKx3EAdOtMkdvS\nOMr1buR29qJPLZVYphv7ppu5lTBHJ6GnLiNhkZAGaBiKx3MHHIGRikXcxDSfKMdqUOBuDHBY\n5B/pSSM27YrZz19qYw3CMMc/jTjkAYXcp61DcNu2qeV6HA61NyrgK2+PGCDRuAm3c3ysOO1I\nuE+YiknZduU4A9BT5V8rY+8Ekfd9KAEdjHgvwSenXrSyKsbYDhsjoKQDooJx13UgEcgK5JPX\nNAuo770Z/vdA1O2+aFJBAXj60oYCMNKue1RrGSo3HAB/OgAdSq78YGeae2GYlMhccKKJEcKS\np3RjqtN84lgVwBikMe2CRhOAOaex2kKFxkckdKjZQIyw+XnkZoZjsXBIoEIzbm3FcKTgH0p7\nxs0arjy9pzwetNaFl5HXruqSRRuV0zjGW/rTEBhLMAxx3/8ArVYZy+1guzB6UzYHjVt2VPTN\nSqwbB5MeMikFhIGLPuJ4Yfdq1CBNdbWcK2M4HSoVh+75YAHerEO3zNxAAxjHekJlrBlQDgY6\ne1Pt13OEzjByfeoYk81cfdwc1ZgjK7cDPzc0gNSJl5dSVA4I9au2KhpNoYKW5+btVVVbyyoX\nAU521oWkBEe502tuzz1xQNmhFneMbTGBg+uasWsMk8528IBk471XRdrjHfmtGzjwwKcknjaa\nh7jNW3jDRKgHGOvp71Zhszt2gcf3vWltoRA28clhg1cDbkUD73Q1IyGOFlUuo2Cq09i0jCZF\n2gnrW2sIC44zj8qa1qkkbjf2zQI4TV7c7SduS3UmuU1LS5T8u7ceqgcV22sQeZcAru2oMEDp\nWdJavdTR/uyVJx060rlanFw+HZrqTIXzR1YkdMdq9U+G/wAJTfbrq+ia2tMHHHUV13w/+Hqz\nRC+v02Qr91GGMiu+mlC5hgURwAYCdqV2Iz7O0tdDtRaWMCxRqMbsctUbIZM55arLR7QHPTp9\naQKBJ6e1AFJo9y7SSDUTRnaQVwc1pNHuUnoR2qJkMjEjAPpTEVGjDMBtzQ8PmL83X9auNCRj\nbz70jQruySd1MVigbVVXJAao1tSI8KOGPrWm9uFB2jjFN8nEYGOv6VQjOaHBC7TkfrTWt/72\nMenetLyjyV52jmmqqNHvMfNMDNaI7NoAHeovJyoI4xWssai3LEfNmoWs2I3cAUwM426iM7vm\n71GFOflTIPFai2u35iPzojtdrZbjuPSgDJntSoCr97qaCrRrlSGXpWo0aux+XI9aryWob5mG\nKZJT8vdglTilMR29lq61v90Z+Wmy23zY5UHpuoJKbwKzliOoxTWUbAqqRirc1vtYFW3EdaPK\ndmwRtBphYpNGxYYx9aPL/dkkc/SrS20ijHUk85oaPauCCo96LisU2iMQyucY5pGUqBkZHqKt\n/LGSD8xIxSLCrIcjgcAUXDUpojMPlFPZW3KSflPYVY+zmNT2BpRGNgz17GmgsV1YhmI7elRS\nIOxGTzxVo23yvhsUzYoTGOOhpisVk3FSzfQYqWTf8gH3cVIiKq/e+QdBSbTEF+bJzkUCsVtp\nZunTvTeNpVs7iastHyxI9yadFHuXePmWgRAIyyjbzt605oztBPX0FSeXlcxj5PalZOMg0FIg\nkVVZeOT1pjJuyQ2CKmZcnnq3FJGm3cB1HBoAhk3yBSilRTlUr1IOakz16mlWMgYKc9RVCsUy\nfLRWA9s1IMs6nAHH8VTlQsP3M4OBRNjdkjJI5NMRAtusjkgZI9ac8bLGSeT0/GnRx7c7T2p6\nx4jVmcsxP3fSgCrHHJIh3fK4p6grFGzcuf0qWTO4vj7v6ml8sdzk9aBWIGjG3OeM8+1NdSV2\ng7+fvGppI+menrSKv7wBfz9aLjEVCzYJwBTyqsBxg55pyDauCMknvQ2W+761QiPbtZSvKHrm\nkWNOR97mp1VVjwRnFJGpaM8BfbvTAheEoCc4X0qLyFVfkfDE5JqwSWUg9aTaFjx1b2piK21v\nMzjoeDSCLbJgrgE1P8wbdjjpilZMqAByDzTEQhFGdwzzTH+V+F3JUu0+YS64SjyD8xUYHUUA\nQkO0O1Rt9vanKq5AUjOKVlZmBHAHUetSRgchV+b3pgNSPLbs5UUjqSwbBJz+FPWORk2kdDml\nkUghgeB296QCf8sygT5j3pAskKYxleuTT1i24VmwW/ip5UOpyfnUY20ARqyA8nJbq1KrAKc8\nikjj8tQrNn1FTN+6UlulUhCbFkVT933p68fw7xTV+dgU4HvUmHVSx+9TEMboBn68Um1WUgDH\nbdUiyCY9NtAjKblA991BRGYJNybyCvpT/J+dgMBjwGPSnsu2Nc5d+pNJxtO0jHp3ouSMLKq4\nAyw4LUqruY7cEgcdqd91QoIJ7ilW387knYOtADecLngigNnLhiPWnNbllJQbQPem+VuUKxz3\noBjOOH2kgcjNPMZfDEAHqac21TtA4IpqM2750KjHGaGAbWIGOBnnbTvLb6gGpFG6FmTGO9Jg\nkZXB9h1pFDdh3bs/lS8LIGzznP5VIyqqgDjPXFJ5abc571jI1gMv5hNHvZc46jNY6yNLJ527\nCg4IraucRxk+WGrnWLecwTlCc81wNanYjbimVTu42+1ZepQl52JYLxuIFT29wIV2vzUF8xlk\nBA60mUZk0iPb7WUk9ePSsTWmUwhz97PC+1b0iOjYf5B6n09Kx9UIuS7LHtXpmrQmcrfTJJG0\nUqlQTw2axLqMw4WR+CeNo5Fb11FFuYy5dV6DvWJeSDzM+Wdw6VqgMadSzPuBKkVnyLHCp3Dt\nk5rduFSRCN+JAu41i3TDyQducnmqEZ1w0bwhQSgHNRxvtQpjk8g1Lc7FXLDLA5FQed5kgbHz\ndhQFh0bCRQRl8HGO4q75jiNcjLFsFfaqMK7Jdp+XJy2O3vU8czMok3blGcDuabAvSbWkA3As\nDwT0HtQNzZEZCsDkdxTPMdo1YR/vG5LHtUkysroAQWxn2qRdQZtyMeA7DlveoWyVXLfdHJqU\nuI1chcjtUcitIEd1yR6UFFS4jWP/AJaNj0UdaZNhcrKvl7uanm4Ubfm3dSD0qvNGd2wHfxnn\ntVmYxmjQEAZQdP8ACkO1fmK9v17U7yyrMrDIQ5xjjPrSMrNI2WU7j07UCHMsPyGRsD+L0PtU\nbndcMNrKo5Ce1S7jGyK4BQHrimtyZfn9w3t6UCY/ekbK2z5DwauWjRx3G7ZhcfLt7e9UPJeO\nFBK/mA8qqdvrU8ZZZhsOFxzmqRLPQPDc0iGM7uMc+/vXtvhG4KSBXcZIBwp4rwTQbh9sOxwr\nZwfp6V6/4RuEDRhW3tnPPr6V0xZyTPUYlMm5iOR0BqaPeUOBjA5psILRKzDaxAp5i2jJPynq\nasSEGHUHdhqYq7JyfvgjmpdqqxwM7Rim7QY8qdrZzxVFDwojj6Uiqx+XPXmlEpeM/L3p5wW3\nH5eMUgHQgoxHXPel3Ertx0P3qI2WPjqPWpY1G4Buh5ploFG1wcZ96FyuSBjB6VL5KiM8984o\n8k9c5B7ipKEXlsK2Qecmp4oyysc5ApI18tNu3j+9UqsY48KoGe/rSAaz7iNzYPGABVsLnHHD\ncVVXLONw4FSR784LZC9KQExX7wYAY4zTIx0XufWkdSzZZuvpTtrcg8gCkA77D/tD86KZu96K\nNRn6Stk4GKVVO3HrTV+779qG3YxnFfPHsC7TjGeaFwv1FJ90880L8zHJ4oELw3fNRqqs2R16\nVLjaoIGBSbcqSKBAOF+UZ+tKyEr6D2oXp7U4fKnSgBm3p3FOXbzu6UKdvbrRw2MikNATxt6G\nkX5eCKdgdetKqhs880xCA44FDYDdATSYwOlKy91/WmNifhzRuXnJpVfb25pmBuOR1pEj92Vx\n2pFI2+lKMZxjik4x60DDjFDctjNIygjOOaaYz1zTAe2VTmnfexg8UMcpyKFXbSAQ/e4yKdyz\nc0nKj1p3oQaBDWB3UfdznmlbgcdaXjHvQwEzuWkZAq88+lO42n1pGQ7eDkCmA1fmXb0o45GP\nxpW+705pGzwCetMAUHaOaUKRkYoH0pjzBDnPFDAcv3txPPpTlIwcmohcxsvHrQsys2AM0gJM\nDaWpq+vSjOcjFO+6AKQDVJ3nPIp5+770bh6UhODnqKYDedvC0E9aXcS3XHqKGwo9qAGqpxhm\n+lTw/KATzVbbu+YHNTKTwaBHEfG2ESeFWLHAUZHHf0/GvizxRGVuHA7kg+1fevjnTxqPhW9i\ndVb90SN30r4Y8aQ+VdyZGWUkZ/GgR5brcI3Fccqc1zsx+YndnFddrCFzuIxuFcpfIUyqjAz9\n6tCTI1DO7cVIzwMetZVwm5WDE5xz9a2tSbZDkjntWDdT7o9qg7s5JpkspSKAcgHdjmq7YmiB\nC7e9TtukkB3FcmmtlQy8YzitCTJmLwyfI+Cap3EjMjBDkk5+taN6ELAbcsF69qzplG3Ypxzn\nFMXUhjjLRbmO05zt96iuM+SAw5HOKnzvUxnletRSqHVdnDDt600MotGVK5GC3OKX7N8xL48z\n2qY/3Sdyk5JNI258lB5Y96oRVnXMZcnDdMVXkQggpxtNXmAZGY/ex+FUnU+UW35UnJx1oJGs\nryZ3kbKhMXmBSvG09qfMyBkZQcd6jmJXBQ5LDnFUOwjxlt/lkMR0X1qNcbR3zwT71JuVVwv3\nSOvcmm712oPegLDfMdQQyn8BTdwkjVM7cmpJiVZ+c1CsLtGMff6gGmAxs7lKNgdCKTl2YY3A\ndc9qfgNweq+vY0jfvEG3KEnBz/OgQxWZkwh+b3pjSbIyw4PT8alCbVYo2cdajWMeY3pTuAR7\n5EUH5STz701WP7wMDuBxtp0i7lBPC5ob7jMOvY1IDVXy1JIxTTmQglgT1xTtzMDu64GRT2wr\nFtuGGAFplIhbO3dgq2aRmfaWDY9almc+cCTjjlajOHyAMDGaEhIj+5jsDzSSHkcfMeppXYlQ\nNv0pJs7snK8Y47UwFHzEKDvXOaauI3Z1G0H71O4aOMrwAecU2QKzOqn8DRoIQjcWbpleMU1G\nRoPmBAA6/wBKe8n3QBt2jBpoKvlN2TjI470rAJtMa46o44b0pWC7VjPzHuaVXbydrLk9TTW+\naRCPlUDiqGK0ZY7157UigrnI+bOaTIaQjOO9IFw2Nx9aQIRWyzlhhqb5Z2nnDdTUm92Us3EY\nPA71HIoUhgck96LjYp+VdhbOelNHmBTkcDih2G3ldpzwxpY/9Zgg4x96kJgVKqdqAg9aThXA\nYFQRxTl27SMFu+aUx7oxtfk8kdxQhEMbtyoOSM8kUcLHhxmQ8inqr/NiTj2oLF2Tcv8AwKqA\nYir5K7uucUMpZ9xwNvFOZCu7GGAORSSL5kajq2fy+tADUV1Y+uOtNVy3IGQv3hUrbgVHQ9Di\nmvGrSf3fXFAxSuwblP3ucU3hFwDhvSkVWjjYgFzkgfSlVsFPly3Sl1AQ9QqKVxzTi+1emWbp\nR8yBzkAZ5z1ppG5uOeOCKYC7TkEqVCjJwajTCjcBg9ue1TcgAsd3GCKjdhGgKjJBzu9vSgBF\nbdtxyM9x1oaNS2FODuzT2YKu5RndzURwD+P3qBCkD5m3bSDgUSKVTdu3H+7SvIittAz3zjvT\nMsmSfn5yTSGKV2qoPGeTSKzxq2TkE/pSt8yliN46g0m4s3yDaCM0aiFTCnIGFPemSr0A9c5q\nTftC5GQetNDHdjGQORTAQIu3aGIbrTmkYqDkZ6Zo3E5JXr3pFbcrAJzQMC3lndjjoT/WnMw2\ngAbB2aiQFlTIB7cUu4M5IxuxtoGiNYZPMypG/uvrQu5VcAk80vKoF7Use/nJGPWgQ1YxtwTz\n2pHwyjzPm7Z+lOX5ecYDdKVVPQ845waAI2bDrtC565pTJt57t1FKrB0Ygc9MUDdEQWUHI5Pa\ngQ1VY42glOmKJFJ4HysOKU8qPmOF9KD0A3fN1OaAG8fdZc470jhSxyuV9BS91JGMnBoijZyC\nBkUAIylh8xwO39KUswZSoyM8rSL95huyegNKOFwvKA8j3oAOGYsW4Y9f6Uuf4W6A8UzeuSsn\nQn5T79hTkVo3O/5nAzikMTjcwYb2PTHUUH5cIPmzSKowxJxnsOtKzHaoJy3oKBiBUVSQN3PX\nvSnDdeDjmneYVBLLkHq3pSRDzOjcdCTQIRWbA2E5zjn0pWUR/MucD8xTZlZUx3B49aNwC+WS\ncsM4pgRQybt5J5NS4HDA5kPXNOWKPyyRwQMtUbRgSLlsHIzQIJVKONx3emKf5QbcF4Jpd7MX\n/iXOAabsaNQEGeeWoGNddxUcfLSMxkLEkqo/hx1o2iRSUyTnk0BcLvVshuD9aXUQisFVsAqW\n5pGj/d8Ak0+U7WDenWmtvwxztX726mApUqybzgUNtbLlvnzxxSE5UHO8nvigqZpAgYKaADLM\nOT944pqs8jH5AoXjI74pz/KMK3f73pRG25yM5I556GgBu1DsYnbupWxH86nb220qruXDEZ6g\ne1NceZG/HzDpQMRFDDGdue1CvsZwBnjAqRYfMbhtmBnNMYtEuG6dc0CBiSsZyBtpvCsFIxz1\nPQ0knEgBUEHkVJxIpBHK8jNADTwXYjLH5f8A69O3L8wbjHGfemn94rOM7sZNKCGC/wB1ufag\nY1e27iiYjywFzu3cGnKxKnnIzgZoeNmyAwzjkDtVAJMfvP1ZeDRKrIq4PfDGnNIrIrhOpwfe\nmjJyxbI9KCRyMq7tmc9ie9NyzLuJ+TNC7VkTccZXNCyDG1RmM/e9aNRisuZc5BzSbRwF5cNS\nMSrAFdy9jT/lRxs+8Rkg0dQBlb+EZPT6Um4yR7WITnsKb5hVWzjPXigMzD5sLu9aQhHKxqQV\nJ9WFKvb5RuHrTvM3A5AC5xu60xss25sAjjA/nTGIuQzD+90HvRIxjjUOOScUqoWhwSEbPBoj\nwzYk+btzQSNV9ykMelSfOygDmNuM0wYVjxlelAUZ2jKoe3vRqAYEac/Kc4zSsSY+G3EHIxSA\noBlhjIx+NImFGAcnON3apKHtIW2kHikZW3fKcetNb5TjBAzSsTuyDxQFhZI/mzv59KYzE8FR\nyc/SnFOA4ILUBMnJxtP8VArDUJBdtx9hmmmUSMAx2nHpTmU87Rg+ppzwFWBGGoENK4YnG4Y4\nFBXA4OGPVTRuAYIT1pNwCHByM8tQOwnz7uSfpTgN24Lyp4NLuySBJwvfFIHDAANtT1p3EGRg\nk9MYFC9gzgDHOaPlXIBLDpnFDKitn7y980DB1+UHdlR2pflSQBRhSOKSVPmAHI6ge1JyzgEc\nZpDHcFuDjt9aQqF+UevQU/Yq7gTkDkYqL/lpkkjI69hQAqqEZvQc4pV27ePmkYjPtScrHtPD\nMaRnjwFyV7rjvQIecrM397+VNY988e1GQZB6Hin7ljYKASAelMRFuT+Ec9yaD3DfKW5pVbEr\nErnJ4WmyYdjvbpz9PagB248YG4jilUssbEnjODn1oVu+Npx0qNo8EFvunk0FDtzZBI9vpTiS\nrYYZFIVOAOgz+dH3cluQtMVhAucgfKx6sad95geAvTNNZleTA7jOaT93IrKARjqfWmLqC5WN\nl3d+MUnEKggkseuKcGVtqgdORQ21s4+UZ4z61JQ3lV2g7j1GO1Pbfwy8AjlaUsfubSA3G8et\nNUMvOd47HvTFYRRtjxyJP6UpX5g6tgDqKB833vlahI9xGW2gH86Bi7w0mSM85waG38npk8/S\nmyKSzAkNjnFOD/vEYZ2Y+7SEI23+F8jHQUixnywwPAP3TR5g5JXAzR97LZI9MVQh0mJW4GKQ\n72UbkY454/lTPMXyuTtPc0rFwVXd8vUe9SwExt7E7uee3tUm4qmB09qTKtJ0Io2bvvcelAw3\ndSV+9xS4e3Tk596RpU3YC5oc+Y2AuFB/CmII23MGHAPJBFDgqpQnO45+gp3nb5vnXauOKaq5\nY7uecg/yoBAqnucHGBUq5RcsQNveoGYrIADvOckUbgG3kZ56UmNkn+u3fNjNR8KrRE5Xr+NJ\nKpVyGbkc574oSMK6knH8qoQ3C88Fjn5albLN13DvUSLt3Ac46E0u07cFsE84HegAYfe6gkcU\n4bl2FlyAPzpPux5J+bONvpSsMY+bcf5UAJuCy7gMdQBQzDYoAwQc0u4R7mKZ560SEK2AQQRy\naQCeYA5OM5HWlUnlXXGe9BUbdoT3NM5PzMcrnpSH1FDExn1B6U55NwwAMcZpyBckngmmhTG5\nJXI7UdAsEgOV/jz/AAjoPenKQ7Ficr0574pNqrIQx4Ipse1lYdwcAnvTQuord2xuQ9KXKqnl\ngbe/zUzKrxn8PSllVVXJG7P50itgUgjDgkZzinQgfNu6E8D0psbBztIO5u/pTmQxrn7wHANM\nXmRqwVQAMk9T6VJtRMZ+cU0ny+44FNXDHLEj3pAPbcrB8gL0Apd21gp6Zy2aau5iRjkc/hS+\nYfLDEgKeCSKoAZSkzbiDj06ULyuMZOcmjlvmbGPX1pVjHDbsc/5FK4mEkYZvL6k8+1PdpXUq\nBlQMUzKsRIx2E/w9aPljUMcg9eaAHfNtUhty9OaZubdhhuANOG2Qb+qnqaSRX65AXGBjrQFw\n++28np6U/krk/LgZB9faj7u3YwDY5psjFxkcnqCelMBY/wB4hZOWHUd6VZCyHPIpsh2Ihj6H\n722jai4Ct7laB2FHzNmQk8dqazllPp3pcjaAHBOc0uER2PLA849KkBQDGqmNevZjSbVaIqFO\nOuB2pvGCMEt1BpWBXvjPpQA9dzSIVYEqKdkuzKeMnNNRhwyEA9MUbjHICvzj2/lQIIyxyFXk\ndycU3zVkw33SD+ftSsN0bD+L73ynP4UqyCSIOxzjjpTQhWffMAx2p2x2pql9zZ/1eeG96VsK\noYDdzkmlb7o2/wARyaRVhWxwGGW64pJt3DJgOTzRuUEBUJ/2fWpFTDbSNrHt6U15hYjnJbaA\nACPvGkLYX92NxY8ilXcjOJRhRwM9frTY12qyZDEcrikAg3+YzxKAuMZPepGVNkZU4PdSKQys\nFdcBsjmm+chkAVT8o5zRqJ7gWyuFbAp0Y24IPB9KRIQygL1/vdqX5WkCqdp9qNRDvmK8kMT3\noXKx5fgZ5NNkbdDt6EfnQULbTn0ABqhiqVUsrAgsOOe1OjxtVSvHQD1okUqwXAUZwaPmaTDn\ngcAUAJ5Y+btz0pxd34AGR0pWUquc5HoOtC/LliSwxngUEsaTvdn6nGOKejKkigKyHHzbqbGy\ntGTsIz61JARtAkG5u59KYD0TMZG75D0x2qdmKso2gLjpUUcny8AFSdoWnrGPtBRnywHNIZJA\nBNzjCKexq0seFJU5bOarrGY+ANueeKsKZZtqLtU0Ek8OGJI6ng1fhhffnb0HaqEK+XOCfmA4\nwK07YkqWbgHs1SyrFuFB5rDfuB4yetahLSY+bngVlwx/OrFO/LVtQofkKDPNSBbs1YSjoVxj\nmtS0ja1lDoMDODVC1yJ8Nyh6itqyhbg7t6dqQy7G244A+bqK0bK1a5ycY44+tQ2tud65A2nj\nFdJptoirsAwuKh7jSMowyCTBUkLwanuIdtqWAC8VvpZh84TA9aq6tYqLNioyD1zQVY88uB5c\nzZBYk8Ad812fgXwct48d1cjEec4IrI03SReXyxK2Sx2+uK9fjsY9J02G1h4Cr8ze9ZsZXuJh\nxFFhY1GAB0qu2Jmx/FipH8tWOBk460zcFUbeBjrSExqxBoycDAqFSVzgce9Tx7YyTnr0pfKE\njc9fWqQrFZFLcU+KMSBhgb+lWGj3YHQrSquVLKuOxNO4rFPywowR9Ka0ZVueuKt+WzOQRnji\nntGsi8t82OlFx2Kap5i4J5pwjVGzjPYVLsAUDoc8miRPL+Xqc9qaM3oQbTlhjB7mmSfLgCrH\n97PT9aGQeWNy7CexpjKkUZbIIzmpJIxuUE/hVjyzuGDwKVFXcWbB+tMXUqyR/Lkjk8YprIWj\nCnhqt4B69c5qLy927HXOaYyobdljwDle/wBaatuWAEh5NXmX0GPWhYSMkjJ7UEtGcYh0YE9s\n+lTeQjRtv+ZgODU+1m6j5acIOMngHpTuIoeSrKNw5x6Ux7ccVoPGXwKY8fOCOPWgdig1v867\nsktwKbd2+7LE5GOK0mhZuvNKLdPLJPXvmi4GN5CsuCOfWjydg4XvzWrJbqygKOnWkWFd3PzC\nhsDMaINgkfLmmJa5fIOADxWp5K7iNvU9KHgC5IH4U7iMuWJWzs3Fie1QvC5PCn0xWwqg42x8\njv2pnk7VbnOeQ1O4jHa3ZVCnineRlsHr2rR+yl1zncaY1qw5zn1pk2KLQvuK9OMg0gjOw/lV\n9l8zAA3EUxoVU9MjvQKxmtCEG0HaM5OKAvfoPQVd+zEtnZ8tOa1VnAAycdqB2MxvmY9jUnCs\nPkzxyatPalX5AWhbfG7n86YWKnlgSbOh60RwlctnOD071Yjjb7549qM5bIXA7t2oYiFow0ZY\nDaKZ5AOGUds1YZXCcjKn9Kj8vn5XIAqxeZCsbcvjtTX3NjYmT/eq0sJZiN2KaqlOpy1IGRLD\nuUZGAetNmj2yenNTuCvyk44zSbA0iMRxjpQBDtK/NjIpBb5jZicH2q4YyVzxg8VE0RztJwO5\nFUIgbjbnkinY+YEcAnpTypEg2twB3pdpwC+M54oERRxsxcN9zPFSRYZhuHTjNNGZGIBO0dRU\nyQll5G0Dmi4its3M2BznqaYM888+pFWeeQMDNQrGNu05DZzmquBDGSwbn60MDuBDCpdu3euO\nvQ0nkhVHoRyfSmFhH5TrnPaodryLjdtIqcR+XGcDcOtC7cqFbaWGRQKxH5eGU5zilbZy+OVq\nVfu4Ybj1oaFnVSAFz1pjsMMTMq/NgnmkVUj5PXNP8succkg9adgNyRTFaxB/EX7dcU+N0Dcj\nryDUjKNowvB70eTxyDu7GmTqBgRsEKSSeSaUKJXZUGVA709MLgfe9c0nyMxAwhzTAZ8u0E9B\n2pdrY3jkelSeRuJKjK4pyRhT8xwcdKpCsQrH8gG7g1L5QI5bmnhQygfd5o8olsk8UiiKMYbb\nywHJzS/LKWcYUDtipXTkqGP4UjIGJ2rgd6QrEPljqBtPrTmywVQ3P6U5tsi8jkcUqxoihqAZ\nH5YZgN5HNKY1UEbtpzgU4qA25RnngUBGI8xundfegBskQmYEfw8Gk2nOAcmpFXcuS2007+Is\nAPTNAWIFXa2OSvcUu0R4K0/a3KlsEc/WnsqsqlDjjkUhjY5N27K4Jp+4beeAoyc0KqtjI780\nq42uoXOTis2XEr6kyqoZDgsvB7CuZktwWYbyGznrXT6laqbDLvsVec1zU0yRsrEHBbGa4Zbn\nZHVak0AijXdK2Vxz9akkkHlFhwccE1Rt5VhuZWm5HQd8VZZSzAKcqOcipK1M251BmRldGZf7\n1ZN5NIVKbeMZyK17iFZmdyCnaqMmWjdVIcqp9jiqRRzE7FXcFd6gckdqxnxJMV6sc49627rE\nIKqrEv1rIXbHcNuUptGAatMRjTRBW3kct2rPus7cbNpY4G7rWpcKxYJvBTrurPuIzLcNknjo\n3arEZF1neYwhDL95j0qqFDkktgY4q7ewyRRsZDk56L19qpKsTOA8beZ09qaEB3s8Yxjb3Hep\nozIzDYgx0yx6VGFdQx3YVeh9fansSFLcggcijcRc8zcACSwHGQOKcA6llPHfPtUDcKpz5a+n\nrU7FmXdyuePwpDG4KyfexgZobzGlQPnOc5FOTcYZVQgBBkK1Ktw21H25bHTtQMik+VDgZDcY\nxVbyzayMVXcCMNk5xVwIyqAR+8PIA6VBLD5qlxKokU9KogoBjblmEpJY85pyqTISy4Lfwk0+\ndDuLADpxj+dNCR/u90nmEc8fyqgHrGZE6bUAqMoMbSD8ozUqDygCp3dvLzUYLbnIPzAfebjH\ntSEOZUMSmMkNnvT12bSzbgc4xUGD/rEJ3kYB7e9G5t/l7vu8k0wsdNoEywyR/OrspyoU8g16\n74RnZWhlJx5h3D2rxjQ2iikBI++eGHUe9ep+GbpE8p43OQcdK2pnPKKPftNujd6ajScHpxU6\nqI3G7kAbqydBuDJag7lc4/EfWtTdnIK5x1b2rVGVrDhjHmDHzfnSsCcFRsNNZe4zjsafD85+\ndsjHSrGNYSRjC4fJ4qT5igVxjnmmdinTHOB1qXAZfl546UtyhVVWZeOAfzqVoxtyQQc8VGv7\nsZxg4qzvZoQwIzjkUAJwVGfoKcJPLXcO3GKSFl28febtUpBZth49aQxWy0fGdx71KqbVBPzH\nvUaliNincFp3JUBSV5qdRix/MSenPSpVY+meaiWNhJu61I5yuAMUXGSlfmB4x3z2pVY7cjlc\n4qPduCgA5FSbuCVHFA7BhfQ0Ubm9qKAsfpAO2Kdj15pMfKMjmlbA+lfPHrjdwNOZl4wOaZx2\nHFC4+lAmLllyD0ahVx34pWbNOVeORxQA0Y/i6VIxC8gcUmDg8U36c0AKzDHpQT3puGbPFJzi\nmIk5PTikXC7j3pPm7ClUj8TSEL/D6mkyB15NKG/AUmznJ60AKDkHIpv3frT8gde9NGM89KAA\nqeOcDvS8dKGYY6008KCKChzc9sUn8XPSl7ZJpDwPrQIUMcdKdyCMimn7oPanAHueKBoAQDnP\n4UjY7cUqhVpuw568U0Id6EUgb5eBThzkAUi52gcZpCE4Jo3Y9xQnysW9acoH0FADGY+lJ97n\nvTmU7uDSbSetMkRsr34qpcYZcZwPare3PHekkt93GKRRmKpXpzzV2BSoz09alFuFUcc0/jB4\n5oAcuOfWjHzUR7fSjbkZoAcQDmmtwny8GjoeOlIvysc9KYhGIPQc0bc8UvG3NJ81PqAmzbwO\nKljbGMjim4IXJ59KI2JajqAmsRfaNInjYYXYea+HfHtnjU7lSNpEjHH4191TK0trKgPO0/l3\nFfGXxYs1t/EV4qrt5PH15oH1PDdchPl5BB56VyV0fmZWHGfzrsvEcfyhlGAvWuUvFEgYqBmm\niDJvlSaIgjJxwK52e3bacLyvvW3cMzYA6ZwTWbfeZGp+71x+FWiGYsvy8g8j+E1WmnYQnIG4\nnpVxokmYn7voaz5MfNgbznHNWiOpRmAAIc7kYYJFUZFXDImMjgZ61pXMPlLkDA6ke9UWA3hi\nVBFUMiVXjjPGG6FahjALncCvsakkUiXapwh+YZqNsu3PzIO1MCGQmNWBUuexqpcSMzAsWUAd\nMVZmVzMJGOeOg61XumaRjkHGKpARx45y27I6GomRt3yD5cYxTkYSNl1bbjGAKbMSNojYAY+4\nOtImxC0nXI+7xyKjkUpGhUAN6Va2v0kP7kjgUzyyWZzxhcLVILELbi+489zmlypUnjpUbArJ\nIWJY4xTo2VlG7I4wKYhkiltucjHOaZJvT94Q2M8U+RvMyHOeMACmqGPAfIx/FVdAIShw2G4J\n3CnjezDzMYxkUqqBC24YoVmVdwwxHFSAxlPJUjDDFR7eGGeBUhVmX5sL702SEq3yuOetMY3A\nZQ+cAdKZgyN75qQhVjYk4Hr2FJIUaSNlOBjmgBgH7xt/p92kjywYdR15pTgsXx7c96jxkYBI\nOeaBCyY2c/Lx1pI2yygfMcY20r/KqsF3ZOKVv3akDqGzRcZGz7gRjJ7+goZcIHzgelOkX5cj\np12imM67V7A9qYxokZV+Y/M3SkmUqoOc88470twyhgnbPelVgrfIM+opCGTFXZXQ7dp5FOZh\nJIQPvdd2OMUjMu4EDdz82KCCvBGFNMQSxkKCjdf1pu75SGU7DxQvzMFIwB3pPXc+Fz09aAG+\nWgXaMnHTilZTCuW/i4p4XA+U8nnio5ozuXn86EHUTafLJ5wODQF6Dg8U5s9uFPamtgfLgjbz\nSGDKdyiTkjotNKvuyBznmnug3NkneeAe9MXdIuOjDgk96Yh0bL5hPbuaTeOXT7ucZ9aRsRld\ni5Hc0r428JgdfrSATaZNwA2q3GaYVKrtGcLxuPelOSMqpz9aPnkY4ONv3qBoXy2j4PQ/MDTM\nhui49TTv4cbt208012VQrHgsM+1UIf8ANj/Z7mjPdVxz1ppztUlvvU4R5cgt8mMCgYjKWkOW\nwcVEq/IpDcBsgUZ8qPIG4k7aFiCtsz8nU0uoxzLuYuvC7vu+lKzszYzg9h60jOVjIUZINBYG\nTHByKYweP5uGwwFRo+Mn7w9MU4kRtnGRjmpAvzEBh5f0oJICwaTaMjd2PantH8oKDKrwcetO\nZQMkAGmsoijyPmyegoELlt3QbgM8imKPlJY8tzT2b5lDjaxpP9ZuP3SvUUANUBfuk9ORS/N5\nYOPl70R9QWXcOpp8YPzL0HULmgZBtG75TkHnFOWQschOO/tTtvPBw1MZA2cAk0hCo25SpBH8\nqFYJnaSB05pWztwvy4OaNokXPX/apgIASyqV+VTnIoVd0zMAFBzijlVJ3GiNhkc5A/SkFw+b\naRu3DPTNNkwrDA3dvxpwUgtsKrn5ufSmgNwDhTjP1oGLIdoUFfm6+9K+WY/NhiMgUhjLqCAc\n/wB7+lA+bCnl88/SkIakYGCTgAZNOXb82GMgIyMUqsFUsOxxiiFgu5lXjpVAR48wqeidcCnT\nfOhG7av0pQDyQMe1Iu0nLHBHSl1AjXHmKBkp3Ldqd0+UNhccMKCP++mPWlVPLXa3JU/epgKq\nIu4r8q5xk9zQ0e0EZxnvSO27JBB5yR2pDIVQqAOTkGgBpAJyOSo4pEbygGI3Buv1p+0Mcj5S\nKZJEGYkNtHYehoAa+ZGIwVUdKVmXcq/MH7sKUHaOfm7H60qEozH7xx6cilqV5h5gRsNw3ZOt\nJ8skmQCPUelOjkPkjCbgOS5pu5vvKvyt3zTEKyieRmHIUcGmLnaXBx2zU/liOEKvzHuB2qHC\nxxg8sQeVFAhWKBACcsOuaGG4AHkjkn1FLNyy4Abd+lNVlWYjkhRnIoAIxu3EdxwtLH9xdpIb\nPT1qNpGlck8Z5VcYqdTuYEkBtv60ARszLu4xzSIoYeWOB1570u4Z55fvQ2G4cfSkMGYrlCMd\nuaTbtVo+rY4ob7vPBzxQy5bKnLdM0wGLiRVY/Ljin7S0gAwV/v8Aem8FmDrx0pm0KFA9cYoE\nSRjbGwxzzSRqJcsE2kDvS+WVLAPgjvTEZ3jLKMdhQMBwqkqSTnmnNuKjAAA60Dc6jJGelBUc\nbyVOMmgQbUSZznjPQUMxkJDHC/SkdscheW5GaVmJkXcPlYfNigBJGc4JHy9Bj0pEA2nHJzin\nbsSDjaMY5oCkqwAxt+Y/SgCJWk+707ZpeY1RCdy/ep4bbwecdajAaHcOvHApoYn3lbrgnNSM\nPmKqNvy53eooXLLnjbt/yKTaXVAevb6UwCNQoHykgDIpoXHCc8cVIzfMyM+U6Lj1pMEHAcFl\nFAhJWZtny5IHIpFX5d33WJ60/DMu98eppqsJFEo65wPpRqAfMzBS3zDmlbaWY469aTaPMLE+\n2aPulg/yrnj3pbAMZQ34c4pw+YYPU9CaXB55Ge/FMZVVTvODnO3vQJEu5l+RAFP96mSx+XGG\nY/e9KVYmZcbgD13UlxDvXncP9qmhsbuZtuR14FC/OrZOCpxR95lBbjFEYVmI75xmpJHR/LJg\njhhgE9KYq/NgeuOaXc7M2ei8UKhkwwO1u/vTDqKyqzDvtGKTcqx5cYGeKGwYmxkHOM0bjkDG\n4AYpFDmjl+83zjtg01W2tkLn+lOj3xsSzc9MU3aV3NnHPSgAVwzY2ZJ65/SnSKeQeoPNIWLN\nuKc4xxSJyrFm69RQAMS2WzxjBpQ+5sjIoVX3iPpGRndSeYVBXGcGgBPl+8FJf17U0MFYjbg+\ntSNuyuOKR9zOzkbUA5oGNztbJUEt1NIoMzcLjbSlV8rIPPpRGGOQPlyMfWgQKNyk5288+9If\nuqccA8Gja23HYDFHzNtCnAHU0CCRg0gcE59R0qRFyoG/vk0xt+4rjaMcUxM/getAXZLvRpPl\nGRk5pitIyHcdnzccdqVQinK9c9KGZhubOX9PSgVxWZ03IeBjK0K3ljzAAxBwM9qFXDAZJBHI\nNMU/NhRtOeR7UAL5m6U4PzY9KVmk2bwMkdqGZWzj88UbHZBh8fQ/pTAI/mwz/L3zTWUNu2nP\nOcU9l8xdvGaap4IQYI9aLFCMpkYMVwQMH2FNDAsU++oHUVICNuGb5s9KFUKxAGAKYEbMEwcc\ndhTt2xT/ABKw/Wgr8pyMN6etI0Z2qMAc5IoAQ5WNRt5Wl2CRgFH1pV+aT5mxRHHvYEHDHIFA\nhsuGwy8c4+lO4ZtuMqen1pIcbCxcZVtvNJu2lsHJJ4IpDDd1QNgr/OhSiqxJ+cnvSvGFUMBk\n/SkaMzLgfM3amFx8ar65Y0M25cE7selKqvhsqFK8DHpQuBwvyn1NMkYFYdBvl9Pal58v5h/F\n19KSQ7mZclmI5ApVjZF3hc8fxdqBjG4Y/wB3H50sbbVODxjNAkC7B97jBBpW+UEFMZ6NSAjM\nYcjIAYjNSN90bhxSNs8sHaSB78mlO0KpzkZpCYxZArCIHLdelL5ZIbc2z0z3qRcI7YUAkcGm\nupk2hvxFNAEvCFQvzdM0N93B4GKXI81gG3FeaQqxbLrgsOM0IY0szEAcjGBQzeYjf307etOX\nYkZKNlum30pN23aw53HDChjGFfusrj5vvAdRSsAkuN2VxgY7UZ2KeAOe1LuO0bE9zmmSxGIK\n7s5bPApUYqMYz7Uhw2ccL1pcfNtQkFhnI9KAGnARjtYjPUU+OPcAAMZ9aTIKY3H5TS/K8xct\ntTHpUgN3EFwq727Zpywqy8Sb+afG3f8Ah6gVH93g8EfxUAPKDaQPnGeB600qGX5F2j37Uo2g\ncMcZ6ULIY8hR8vcEUDsG0gkr6d6TyUBBL/N/dpdo2glSAfekLCNgNvPqaQw+/kYwARTTlmJJ\nxt4FPky2Anrz6U1oyvJGQOcg0xA25cALgnnJpGc902j07Urb8DkE43BD1xTcFlG8/eGcUCH8\nLw4HTIb0pjYMZcHPYD3pHUZAJGR3peGbcOvcUirio52FuQQeCRSqVaHcOUJ5YmnM23aRyoXF\nRrj75T5ew7UxDgqqrJt3k9GoGduw8qB196bksGKjqOD6Uu0IoUNk46e9MaFaJ3Od+3ikDLtK\nkZUDGPehV8tQpkOeuf6UrY25yF59MmgkXcQMDhh6ineW33x82e1JuMjZXJb0oQs0gGeRQAu0\nrs+X5V7imSGbzBhvMHY0/cVLDPBNNjXyZGKnpyRnrQAsgkDBWAU5zkdKX5VZn5+bihVJ+/0P\nOKTeFGV+YnOOKAAruVjnG0c0pYttKrtQryKaqYYbWIY9Qe/tSj91uX+HPT0pBYWPYqgr0zim\nhfLkztwSKeqg8Dp1FDsDHk/Njj3poBMxqo8xdinvSZCswUcjnB6Yoj3+UVYYJ6URvtjCv82P\n4aQCLleRyW6L1ojZvNZThuOgNKzRqyiMYYnjnpSrGyk7hzu4ancYR7N+0qSvbHrSqztNhlwO\n3NLIG3Y3fjTHUbgAnPXOaQh7b4eDtA9qGEa/Ixy+MkdqaW2H5RktwDRJtjkHG3PymmA5cOpC\nthfQ0fP5ZLBQM4ODQcI3HK9B6mj5TGc/NzyO1KwA2Rg55pzbQ24SZAGSe+aapLOVVMrjNO2Y\nUEYLetFihvmPIQwwy453elGF8xVRwF28N/SkUqoIkXIo8zcyhk3Y6YpkjNoDABvq1TqP3XHP\nqfX2pu3Dfdw+c7hTml2yHaowaAETEZIzjI49vam/L5m0L5TDtUm1s5K5Hr60ZB3MwHoFpiGm\nQL8xT2yKaFYNu5U9u9KuTHlecdjTkUiQJIOG7A1IwOfMwx3KD1pctIzHcCB0Jpm5Y2cKjDJw\nc0piIYDbg46Zpi6khBwMY3fpTNzN8u7b2JFS7y2OAoHFRx5hXGRy33gPWmJj23BsMdygfdFS\nRSI3y/dXGeajWHYmQ4zntUxVGwMYOPvUyrA0iFAUQqvQH3pOrBsYI6+tOgG4HIIOOM1JAvzA\nfdXPWiwFhVbgvnPYZqaLlixJyozio1UsxDcEdKs27I55XEucbgeAPekhE8LHcXO1V7CtGKNp\nIRGQueuRVYR/KWIzluCR1+lTwXDW7s7AY6YxUDuy7D5iyfeAVa1od7bJVG4HsKz7MLIpZl3B\njjjrWrGPJ2eXg4OMelAFqzR2mIRDv963rbMCDJ2/zzWXYyMjMx65rUjtzdKnBADZ3VDYje0v\n7wZh5nHB9K6PTZCzKuOCcGsOxjEOAvHFdToDJJc5CbmUd+/vWTZSOgj0lWgO4cYrmdYUq5hQ\nnb/OvT7ewH2Al/TJyK861C2F1fNGgPzNWXOapFjwB4bBmuLsx8x8dM810F0hmmIO5cete4fD\nH4L3Efg2KS5QxzS/N0yGB9/etLUPgwG3ERMe+5Rzn0qeYfKfOUkLMGCj5hTRHnGQRgV7Vqnw\ndmhyRbyH6DnFYN98K5YYz5fmS5PdcAfjT5g5TzCNlk3Dbin7CEVlxtHVvSuwvvh/dw5HlMmP\nTvWXceFru3jbEbMB1xVKRFjEZcYbf+NI2N+AeauSaTLbgeaCD16VD9ndZMCPnrmjmERqAwJH\n3h3poUxtuzUzwsoyRt5qHyZBnccU+YQyT5h0xuOKbtO/A+btVlIw6Dd1HapdqniquFrlbysR\ntg7X9qZJGXK4YcVZeEI5bNRCP5S23NPmFYYqljz979Ka8e3B609UWQkA4I54p8mXxg4xVKQr\nEXqwHPTFIkZ8wMRjipGUNhcYOc5qTZnknj2p3AhkT5Qvf1pjISD83Pap2Ur1GR60zySWTbwG\nbnPahsTIUBC7eoPXFTbBsCbcj+VTLHt+UAHHU01AMlmGDnFNElRk3DikVd3PRatqoRiT0oaA\nE4zhcZNCGV1jXyzvJyOlM2+YvIIFWVIGF6j6U1owpOTiqArhQi8jA6U0xrGmB165q2qhY/mO\nV/WhYR1J3CkBUPybSRz60ojDMG3cmrLRhQSRyeMUojLbcEcUyblNYju2g7R61FwGO4cVekXK\nt/FUa25Zc5oArrCFBAGKh8gCQlj8tXo4ieAvI65qOS3LsPrRcCr5Kr8yrimzW/3XY4z2FXli\nJVtwx6U77OsjLk8YzTuBnJDgZYkrngU5rN0Y54B5xVySJRjYce1N8vzDnOWxTGZzQhTjGR60\nT2qyRrjj1Bq+tr5a4+8PSkkt+mFzTAzWt1VQmD1zupstuSoCD5SelaYibj5eKbJAS3GTikSZ\nv2Is2XGR09qRrMrlDx6VpeS0agjJHcUwRnzMnpVJsLGb9lVV/wBqmGMxrkxjPbNbPk5Y/Lke\ntR+R5gKkDd0BpXJsY0tv8oycnqcimt82COH7A1rtYbdpPz9jVaS1DMTjvxT6ElNYi+Rj/wCt\nSbdqsG6dKvfZZQnTKn9KY9uGGHGSeBRdi3KTW+zZjknnFCxM0xH8PXmrnlfLjOdvc097VnjY\npwoNO4FHyzu3Lgn0ocfKzb/qtXPJ8tRgHn2qNoyF+U5GcdKdxFNlwq5HOOtG3dwD2q08O6QA\n8Ko5xTGhXaGHI/2fSqTHYprGowdx64pzR7VPz4xyasND5hypAFMjyoYkA9uTT6hYrqiN84OQ\naXyVU7QM57nrUzW6soOMH2oCESYbgj9aYrFdoQrFlzjp+NPXDIFb5DnvU3llm6/L6U1oyobf\n8wHQU7iElVlGU5FNjjLRHK4z61Lg7QMcjnilYNkYHGOlMZDtY7ugCjpSrk4BORT1UKNzHk8b\naci4Ugrg9hTFYjEYWI45Oaf5IZhlMLjBNPVRNgoMY61LEu1cE/MD3oER+TsVUQ8etPjj+djj\nIHU052EaZOMk0iqy/MD3/CndlCsQcBV+uaZtMbFjylTNGeckbyOPSo1Xadh545p3ERGI7ywG\neODUTKY/mB69Vq4MDEYBJ9aiaFl+YYXmgRCnCklc5pxUtHnGSO1L5Y3l2cjn7tOJVpiVBVQO\naCRkanYR931pWhLbueg4HrTljDKect1+tTLsKhfusetArEDoFjUKMnvStyM8BRUiqd2COPSk\nWFFUnO72plEP06+/WnrhhjGDTzHhVzwSetOdPmBHI9anYojZf3fHrikCsrYxj1NWfJP93jri\nmsGZXK8MByDWciluVdUlSSxeNV3cZBrjriZY2VmBMhHCV1l9n7EwbhQu7PrXI3DBnVmOB1Ht\nXFUOxfCEcySRt8ux+hU1ZiYKp2EjjBzVJdn3TJ82c1ajBaPAHzdagoqPdFVxICS1U2mRd5KZ\nGMVb1Lb5IIU/L1asiSV2A7AmgDL1aHahkGST0NYNxudTGwKyHk5ro9SIkjdCdqYwK5q8jkjQ\nM2TjjJrSLEZ91D5I+Ugr/dPWs6+UbgSGIZc/Ka0rkqqqwOTnrVKZ/lPGV9cVaAy5InCqqtmL\nqfUVn3W2ZDjcsmcDHetVpI2YleAozurLum8twwbcX5+lMRVJKMi7iB3PUe4q0siyZOQAeMel\nQRxvICuQOc9KkXyo9zE/PjaF96BFvlf4hIOozU6NJM3TjHOO1U7dhDEGePLE4yTVpNqqSh/3\nuaC0KwR42ZWyw4+tN2r9l80tgngrmnssa7im0gU2Pay7Y8K/U56UCBk85VJJMajI7VBJHhgx\nyCeqqM8Va3AsoA2Eck56j2qvnbFK6gjnI3UE2KzHcwbPyjIAqLlsOBl8kEAdRVgERjIGT1IN\nRNsX5hubd0A6VaYhyyRxqIyuIyeOeQfWiVFuN5bJA+UEdGPrUbDu4w69Tjr7U4p5hBZG2Dgq\nD+tAgYGGNMnHGDtqLySk4KnnqakK+avlRDdtO7NRFWeYhfl4600SamlyK1wu98p1xjmvSPDk\n5jVVJ+QHpXlVnncHOSU/hru/DM/2lkVudpyTu7f41tEzkfRfgeUyWm0qQOtdTuJbK/dJrhvA\nd9EJlTeXRkypNd1GvGAeOtbGQiqzPgHvUysvK459aYoHIB5PpT/L2sCnzcc0CsMkVtwKH5ql\nZWVTgYOKaq7vmxz0FSxtuzxlh2oKEUoYxuO76VLGvG5R8jdQarplV465qzGrsvJ5HOKQx0cY\nUYHQU6Imab5fyNOj+YZYflTl2KcqMN24pDH7hGuQvzZ+6OpqVmJCjGDUZj6HGTU0fGPlzSGK\nobJGOKb0kJOGx6U8AxtgZ5PNKYwM4AoKGK5BPGBQ3EgC/dIpVxt3Kc9iDR1AIGT6AUCYzyR6\n0VN+H6UUDP0hAxz1pm0Kpzz6Uq5ABpCPm56V8+esJkqpOOKeqggbqRgD05pdp9aZIcNxRuO3\nrxS7RtxnDUhyFx1pCDJ3Z607OMDHNNjbJ6YFPXjJ6igYc7uDx3ob5VAHJpuRnvTtwBz1yKBi\noSpOTSL8oyetJyME9KarEnPagQ9s/eJ4xTGPvzTj8wPFNwAFwM0BYcvPvRu7dKXcByOtIMYJ\nzSYhVw2c0m7GB1obDYx1odCOc0ykDY64OKVcj3FCqcYzx3pn3T1pBcVuKXcQwyab97oaFX5u\nelAhzsDgAc1JgDjPFMb2pThVHrTDUNp5Io7jH407OVz0pOEUk0AL7UE8YxR8uNwpN2OaBDVJ\n9OaVs9hT1HcdKaQM5zQAd+mKdyDkGmr90+9AVsHuKYC87ckgimfSkf5VGDxSRqduT0p2AkVM\nfNSs2evA9qi3Fl64FJyV29qXoBKrKV4PNK54qNVCjpT925RmgQu4cADmjeF46tQ2ODTFQ7t+\neKAsOUnvTsdCBwabu3ZpUDEYFAy3GVKkdfb1HevlH48aX9n8SSNt2BvmHp1OBX1bCnIXuTXz\np+0xZn+0kmj3YCfdxxkDmmLqfLeuW5ZXTu1cPcLtBGPxrvddJVncDtXGXag5B6DpTQHLTRss\n2XGErPulJym3gc/Wti+yr4PPNZ984SM7+PStEZs566XypCR8q9wKp/ZyZGdvlVTnFX5sxzOD\nyuaqqu6RkHCk5q0TYpTASNnb8vYGs+4tw8mF42/Mff2rUuNrAsFzg4FU7gFsKp2tjJ9qYilO\n6+YGUZ46elReYX3LgDHOKWZQx6HA5OKZ5Z2ljxxkAelUIhmUmJcD2yarNlW3M2VHy4q3IRtC\njuOhqmSPMZD2GRTBEUiSnATgE4LVHJC4Yv8AKAPlPqam2yBB82F6c+tRtt3KobAHU+9AxnmA\nYZlLKowAOxpHZXIAG9zwRnFLiSMSOpG1h0NN+VYx5gzuHShCKzfvOVPTrTFjO0nOec1LNGEV\nR1/hBqM7l2qG5ziqEDOJcbV+buajdSjfPyeoxU829oyEADA1EylSWb5uMfjTHYjMuW3kcHqK\nZGQyk427qlVVZB6ggc02SEeWfmxjgYqbCSIXXcu0njrUbE4+YYxzmpZoWCjJIyOSKYqmRVVO\nSOrGmIbH80eCMgnNHysuD1Bof5cE/Kwbkeop20bmXcMtyM9qoYu0SHBPy/3cd6iPmDgckHH0\nqRc4Ib76npRuChpFOWP6UhFbhpCynBBxz3p7Hkh/nz2p247QdoPvTB8q71HzZ/SkwEHlgEKx\n6UjH5RxuJ4o8w5LgfKeKeqBWLY521QxoBfaWAIWhWPLKMZOKYq7oyeQM9PelY+WSdpzjNAEc\nfytljxnlaUuu4hgcdcVIWbydzJ15GKRZNzZcfNjj0pANkx5Yx97rTfkZtxXgjrTmZvTB6EUj\nKisfp97sKoGNjYqwZPmUfw+tJJmTcD65+lO4UEtyR0Ipm8tgKOWP5VIhJMbRg5Gcbqcyvu2h\ns0MuFIAGM80iptQk/KfWgAk3yyFh2oKs0wYD5cc+9Im7qp5xw1K+Y4yHOWPIxVAN8zbyRhCc\nfSkXcDgMCKVmDAEHjH5GgtllO0N6mkAcc9yB2pvy4PzEZ6+9Ksn7xmAzxjFOkzGV4wMZNIaZ\nEAIQu3JB7mkZVbapGdp71JHI3zKFznn8qavmNjplu2KYCDK5fbld2PpS7csQ5+XqMUsiru2l\nT+FIuV6tjPb0oAbGGwQByRgCggJjOQMYP1o3/MQQd3qKcFDMccqeuaXUBu0Kp2tkY7daRV24\nDDj1pwV2X5APlpGbdlDzzVCuyJTtjYA5Ynj2p+5pIzwCepzS7drHJAHoKd5aYDDOB1oAjOyN\nehG4cDvRHuj54KAcrSSKQ3PJJ/IUiqF2jduB6+tIYNgxnJz3HtRIdqxkN97g07hcqPmA70Pj\nahPPPSmJiNvbO3oO9CjZkt8w6GnSOFYjquelN4U4PHpQAqxlQFC4J4oLkKFxhenFJ825tzYG\nOD60rICu5m+TodvWgBiqVYsSNoGBimsoK7nOBjotOZhtChfl680qq27lchu3pQAjsY0XAL5F\nIymJRuGGPB9qRmZo+uUVqfuTMokbYMelBQ0oN3LcY4pUb98pxvTHAx0pgfbEo/zilD7Y/veW\nT0X1oELIhhhxGTuLfN7U5n75+bp9aVG3KBjB7io85R0f5eeCKQhwYyYeNNwYZ5pHQuoUrg9S\nBTnYMx44AwCKZiRQMjJI6UwI1Vjnggjgc092KoMjcc4OKcAeVPHHBpmRE6Bly3ekMRpP3wyp\nwPypV5Xnkk0STOp+7hCcCnxtywbgjimIj27W2qcjvSFtq/OOhqTgKyk5kIzULMdqZHB4JNIC\nRm3fMRtP8PvQV2sSwBBGaQjO1T0B+97UMf3nOTk4A9B60wFVlEeQO9IrMrtJ04wKaV2qRu6G\nlkz0zuBGcUDFZgIQXLbs5CrTV2rtYBju5x6UvnYK7F9sUfOr5yCBQIOSwYDofm+lJna7FDx1\nApSp6bioY8YoULDuCnce+RQAMvy5j64zTY/9SQDg9zilZQ3RtrEcc1GxAYYc+hHagQ8h8KzY\nx0AzS7NynB5U5NIBtIDjcO1KsYhZ8LkY6ZoGNCq0vH3TzupSzMfmG1emaSM9GK8+v9Kk8vKv\n/cPJHpQUMbazBVOTTbf7z5+Ug/nSsiEqWJX0PrTnILKWHfFMBjMN23Gcc7qWPEshbcFG3vUX\n7uSRl+6qnkU/y/lV1AK54oFYRcYO45xT1+8OygfnTQpZXJAznpSSL5irhsL39qYhwi3IdyhV\nzndRtMvyDk4+9SxKDIMEsF601y33t20EZ+lSLqMaQ7cE5KnGKcueipnuxpWUOoDgBuoYd6ay\n4X5Gxnr7UDHtiTknO0c0CbPy4Kn370nzKRsA2svOKXgbQo49KAG9Y/QfxU1XxJlTubHAI4Ip\n/JPBDJ3b+lJyvLMF9qBjQvnEBuBnhfelfCyBc/NnFEMWQVJKknqaJd0bEA4HvQAjbNrY+Vs4\noMirkAbmxxSMoWRecjH60q4JDZw3pigQ1WYsAvynv3pxb94ofCv7dDRIxk4VcH1pFh+UHog7\nUwFdlP8AD34HpTipPysQx65pI1VpPvbD2z2pNm2Rt3I/vCi4CeYWBYcY4p24MN8nzN0FQs27\nAX5mHWpsMvOMjHA70dQJoyrW6sE7frQki/Z8ycsOlNhnkhRVZAwxUUis2QVx7CqFcYp+8xTJ\napNokGANnr7Uz5flx34xSKG2nBwDwW9KkQ9sFd2cr0FMydwA6jnFO4GBjLDrTSTu+U4bOfwp\nFDmJ4wRjrQx+ZcY2/rR9wEJ8wY801UG4sR8pGMCgBdu5mYdd+KTaoYk5xnk1Idm7PmALjkUx\nYz1RsinbQAZv3Zwd3fJpvzSLvXG09aeG3H92uWHBzSMnykhhuPQCpAF+ZeMge9BJC4PU96ZK\n5Qon3SRinGFosgjLdeKYCs+3KlhnHFIu7y8E/N/e9qU7fMXevBFB3YKjG0c/hTAHztXA+v0p\nu5UmJXkEcU/DeYSTxt6VGijyzk4IPWkJhtbCg8jOeaRf9W0h+Y7topQHaM8gc5HvStn5Qvyk\n9VNAWFl3M0eDzScshAGWB6U5lf5W3ZOO3ao49qyFiTnp+NAWFC7RtK9Tk05lA5HTuKaFdVkL\nN0z+NJ/q1HJbI/KgYvyrHycgmgNtbdnjp0pDzGgAGcZNK2fL+U8frTAG4O0HJPUU7AWMcZGa\nbuUqOMN0PrSOoKlQ209qCRgbzO+1lb86kfIYtjAxytNkQhI2VdoHWpN275T88nXFNjGFQpO3\nk5z7ilfcSAeSRkkUbh525k2lvTtSyYbdz0H60wIWXepY845wKdyzKV+7jOKcvyqvZu4pSpwz\nEdKVxsRn25BXk96ZuLYY/Ke3tSYZuW4TuaX7ww44oJHAAMd2CD7cUq7lxuUBR0Ipv3oyWGOe\nBSbNrZDHaO1A9QZikZON+T1oXf8AKFXb3BpWV2Vzuwn+eKXdj5ZDzjp6UXEIP3bEhsPinfKw\nDMcsTyBTV+VuRyBnNIZN7bwuBigByj53ZV6dGpqsZuS3GaVW2rweMZPpTmQCMeWoC+560xoi\nVi27I71IzMp+UdqCqBVO4j1FKuFJCnLNyKkYzy+S5OR7U8MrYJGR/dFEa712q231JprYZgAf\nnHegliJ8ykdmpr/7A5zg0sDGNiTz2p4GZAVH4UwI2y0wAXtTmbMm1mJfHT0pykCQleD2qJWB\nkLquWzgk0DF8sZ39P507khWK7VJwKVkcqeOO1KzAsmB/D096AbIwsfmMhJZu9P3Kx2Be1MPy\nqZByc4PtT9yyLGMFuMk9KZKG/dUZHttp6jY+c7RjkCkPltk/cHvSbhtJGSo6MaCmL8m1sL+8\nxxzTmc+SqnkkYLU0qDtAOWIzQGwcAYHcelKxKDG3hRlRSx/L8zjPONtN+XaSD83pUjY3Ipbk\nDtSsWN+ZQzHBweAKartJlwPl6E4oGVYkg9aT51kKY2rjO2gS0JJJFJVQrYA61HJKFyzZLL92\nn+YXZRtwOuaSQkMAUwDTGI0ZLJIwZWYdM8UjR/KVZsNnO2n7i/U89RTVHmSB+r980gsHmfID\nn5jxnvikUpGud2XzgKadtX0yM012WRsBMMO1BPUThd7ScsCPkx096WZhtIOS2eNtJucKSR8x\n45o2dOctQMXcNuCPl9KCCGC8njPsKMEAAfLjnFKgaRj82xW6t6UhXEYkcKvOKUY4YHJH86RW\nMauC2SvANIp/dj+8f4RT1GhHj86QknaOpAoViofao3YwGp0e7LNtyccetAxJjaecc0wGJv2p\nngdB9alhjPJzg+hpu35WAAb0pdr7VBHT9aYhGb5fu5INPY7l24AJ/Ok3LIhym3nBxQBhVyCB\nnAHekMG2qFb7sg707yyVMiuCKaMMxVh900BdsLckc56UAJuJjxjJzupvK5z8wPzHFSR4HzkE\njsOlN5ReFxk5PvRYBdzfw8cZxTljV1znHbFJxuDYIPTHoKXYsnAbjORRsMMtz6KO9NTLKCGH\nzc7fWkbIQ7upPAp+3dIpxhB6UrEiD92xDYV/fml3Or5k6449qcsa+YxBO7HGaSMEKS/I71QC\nqAHJUbmK8E0qsF3BsO2Mc01eVODhTxkdqSNR8wQEnOX3UAC53BjgcUq5aTcVyOpoWblTt4zw\nKM7yzD5RnBFTqAgxI2UO3J4B7CkUfvSD93FKxDZyPmHQ9qcWOGPpTKI2fbjAIB7etEimPDPy\nTx06VI0n8IyRjNIr9ZNueOpOaEIVmZcBowQelLu8qM7l2n1FNZmlVQMEdfpSsjbhg7if0pbC\nHLIHhKMDvHKkUeYfLIJGfpSqVQHK/vAOvrUXk7Y8vxz1/pTAcsgVVAdt/XGOKGZpWJXB9adI\npkbcMHA+5RC3l5LMEXpjFACFVbcCfmHp0pWYOqlflk75/nQuPMbI+RuaRSrKT7420wH5aM7Q\nRJu/ioO3auMkdAO9KzCNvkUAgdD0pI2OCcEj0x/KgQo2JCRtIJ4OaiVg0bAp071MPmwSN6Dn\nHcUnl7myOO4UUhMCVKoqnPqccCpYgdh4wAflY+tRrxkLwxPT+dSyQhcZYvnsP0plD+WXLPtb\nOParEeW+Xdk/3aq/M2Fc/MDnFXIl3OCxzz2pMGPXMec/MTxnFT2qyLKNwUOOo9qYY23SNI2e\n4FXFxtHR2xj0qQLkO+RyzENHj5VqVdvlkN164qCDPybeg6+uav26+cxVV+bucUAatnHGqKQN\nrMKsrCEwwJbB5FUjJwmMHsD0xV6xzIdjYzn86TA1dKwxO5c85NblhhZFVASGOfpWRZW6+YQj\nc963LNHgmRh82ewrNjOjs7ZZWGMEdzXYeG9OC3EW1Mknr7VzOlw/vUUsFVua9C0GJvtKCMYU\nCsZDidvJGn2KReF+Xk/hVD4P+BD40+I1vEU/0eGVWZcZyvv7VoajGbbRGlI3MVr6G/ZT8AjQ\nfD765dQ/6ZeDALDlV7VkbHstvoVpZWMVvFCqxRjbgDioptBhkIBRQPYCtnf5nUU0/MufSgrX\noc3c+DopD/q1Oe5rMm8CxjeCgXP93pXcqpcDHb1pzBHJJHFILs8quvh3BJCf3bBtw+8M8Vh6\nh8NbeYE7Aox/CnWvblt0xlumajksom4KA49RTJPnS7+FNu4+W3Tc3HTmuavPglBE8gRDs6gK\nTkn0xX1M2kwMSDEm0+gxUMnh21duEyPpUSA+Pr74JSL5hx8mMgdQPbNc7dfCS7t32483jPyq\nSPzr7ZuPCsEhwoA9iODWbdeCIWV/lOW7KMVQuU+Ibv4dX9vJtWInrhQDWS/hG+hm/wBWzL3Y\nDhfrX27N8PYtoDxnA9qxbn4cwzbgIcjP93FGo+U+MpvD94rFnhbb2PY+9V7jT34Cg5xycGvr\ny++FMLs4eEEAdI65m8+D9mJQEgkXPJHaqchcp8wLEF3YXJHFR5Ik3FTjpX0Fd/Bku2EAQM3A\nArE1D4LzQh/+WY6qMcmlzC5Txoxny/mGKRZAF+7k16MfhVfSOVMJjIHKuQCayZvh5dwMyPCw\nTPXH+FaKaJ5H0ORX94uM4pPJLNjPA5rpJvBc9upYRSBR7VnzaLcx9I3H1GKfMhOJmYLMyEYG\nKVl3SKiqOn4VK8M0T7JY9pPShUbqQc+1VzEuJEUPTbnnmnjaFLdakVm5wuPbFRFQDxVJk2ZE\nrb5DuGPQU9l3MCwxSSRksCG/KpV+VixbJIxV3DUj27ySBhf502TK8quf9mnjH3f4val5UgHo\nRzTuIh4YbyOfSneWGUY4GaeqY+XqaXjOe+aQFfhdygYGead9nLKSpp746noTSrw2Pu0CKyqW\nUljtIpzjcVwOO9WGQM3OKXYejdBzxTAqNCV4zSxRhQ27uKsBQy4Yc0q25blW5HapY7FRAoTb\njJHQ0qx7frVxsYICc+lN8n5c4watDKnltk9/Wj7OEUnBx1yasLDsYMxz7Ujr0Uj5c5pisVTH\n05xu9aV4yjDjNWGUMM4zz+VI0TFSQcUgsUvvM3OAeBSrCPlUjgdTVvyAFzgFqFjYqMjAoCxU\na33fMG+TNJJHlgYxkDrVxlHlhF45pBG0Y2jv7UxFFozuJ280xojyDxV94xUcgWRSAMGqFYpQ\nwvgjOfaho964K4Kmr8cYeEArnmlaFmm6YGOtImxnfZ0bgdDSiPghetW5bcrnAHFIqhkxt+bv\nTGUmt24I7nk1D9nCM6sePTtWq0IKggnGKgNqM855pBYz2gRVA6L/ADqL7G6jONoNankY/wBo\nAdKbJCZOM4qkxWMoWoVhz0/hqJbdFDgjpyTWv5JGeAG9ahkhLLvXg5wfTFVzE2ZmpDuj4O1M\ncNUkdvuTBwW/vVoyWqDbgAA9qj8gqzBRubv9KfMBSWBVYkfe6VC0RbJODWk1vhQqkMTzuBpV\ntByQOKEK2hllTu3KMcYIpvON2eP1rUkiA5ximSWiGMkKarmFyszlhMjYP3+oqRo225P3s9au\n28LpgjAbpTpbN14zwafMUolJV8tyQOT1FSiNfLGF5Y8n0qZrd1mHcY604ZXhhmi5PKU5Nsaq\nCN3pSRs7fK4wCeKt/eZQU4FK8ZLbQu0dc+tFxbFMp5fzOc7TxipG5UHg55JqXbtRhtwT60LH\n+7UE/WndgV5GCn5OajOZGAJ+XPerHkiM5U9TQ0JkyhPNVckrTYaQALn/AGhQu3IDZB9amW1K\niTb8uB60LGzQrjG+gYyOI+cSCDkVYaNjDtKjd/KkWNFw2CHJxU0aFWZs59qLgyBY8tnPNJGh\nmcheAOtWfusAwGOtLGobcE+U9SaAIPJGNxOOfumlWLcpIXb3p8m1FBJ3E85pWmLJheAf1pXK\nI2UzfMM46GhoQ1vjO3nrUnMcZQD60bkKleRxxUsa3Mq8hJs5F+8O3+FcXc+WtwVL4I/Q+ld1\ncKzQsrAkda86upXa8aMhUXJfmuKodUdhfkaTrznlqma4kU/I4YZxkdMVUhCOxYv8x6VYUKsZ\nIXKniszQdqG9rfYmNr4FYw+Rim7eU49qvzN5bqeT6CqBWUTMpIQfeB9asRn37OysqrleuT0+\nlYl87OykjKdxWzcSFUYnJGclax7iT7x6Z5+ntVJCMZ0LfKG4LZ21BdzrF8qL35arNwqi7GH+\nVvvEVUDBS67N8eejfzq0BmTZLNtUiM96zLi4QylFQFehb0rZnYfMcYUdFrHuF2qwKjnn3qgK\n8i8jksvoKWNTu3BSExxup207lKMMYGVNG7lyOB2oIH25eTycDd8+SD6VoSbWYsgHzd+gqijb\nFVpD5YPAwOc1NFuZB8wKrxQUW1t1SNyU+Y8k0okVVAMYXI6g0zClclmweOvFLHGsmSSVxxig\nqwMwjx5gUj1/pVe4jZsAtkN90Y6VOMbdhw2T9001mCSIQPlUcr700IqzYLKSwO3hgKZ94ZX5\nV7Z7e9Suyqzv5eHNRsv7tdy5J7g1JJD5rPswN8ecHPG73qT7u5F+buRnrUUr7flb5UB9KWNh\nLMWZtx/2aYiQyOJlYDblfuioWULIU353Ddj+lS7R5hZgQTwAD3qObcGdSoUDg+tNCexLbFoW\nLA4BHKmut8OXC+ZGyJujPXHc1xjRhQp3tnv9K6bw44jVV3kJnjI61ojGR7z8PblW1CPcCpXg\n+mK9aZR5HyncjchhXiPg26MvlQr8pMgya9shUxx7QDjH4VuZWJFYEABdh9aGmIyF4Bp6xtwX\nHX0om8sttXgUy7CRsSwG3OO9KGV2IAxzy1OjhbaSD9DQy7lGDkd/rQLqEahZCc9Kl58xSucH\nrTY9p6j5qlXbtyysTntSGSKxjkxinKs+5jgH0pqxvI6gHOTmrCqWkwT09KQ7D+NoyPnxzT1+\n6NvU0QxsHJJwOgpN3l5y1Iuw5Vbce7UjKScn5aTlZAQwJbrmiTDJt5Iz1NAg/d4z92mhpFYh\nGGfpTsBcEfNxTQ24DA+bPNAhcyetFS7x/cNFAz9HSTgY5FBJ9KFb5aNx7nBrwT1EH3RxSKcn\nFKe2eppCu3nOBQIaxDPnPSn53Dg85zTflBI4pwUdRSKBSN3pTge1NbPpQ+Vx2zTEP5prYDDN\nG4qqZ5pwbOaBi7gx9qjVgKdnJx/FRj14NAwaQE4APvS7faj+LmjOW4PFBLEVckmgDqRikb7x\nA6ULhV6HJoAVT3FIsh24PXNLtyo2jJpAOcEdKYbDl7nNN27u1P8A4cYpvPrQAi4VMj6UoQqo\nGfxob5eQKQZ2k54oAVVK9elKWweRmmZJxk8dqVgce1IQ7du4HNDD5cZ5o4VAR+NIF3AHNAwR\ng2KezA9BxSFRj3pygsDQSJ2xnFNVfxp+0enNIcIMd6QwXoKXdtJx06UfdUU1WPPFUgBgGI9K\nRuFwDmn7sj0pqlQvTmi4Aqgj3oIAFHC9Kd70AIo9TS7hz6UKvJ9aRc4yRlaRNhcDbjv3pOFF\nGcjgUjN8vSgLDlUrTkBU5pM7lznn0qTcVQUwJoWCkbua8Z/aCshJawTIO5GPwr2P+IEV538c\nLAzeFmdBuYDj2oKPiHXFd7qfPTccelcTfhRIwFd/r9uVuGU8etcZqFuBvYAHtVIRy+pQmPDg\nZ3dKxLpTMBu+9jvXTal8sYyeBWHdKskZwfmzxVol66mFMnl7t2Du5NUp12/Oowe9aF2p+bAz\niqBUXEgBO3b0FWQZzOWUqAeDnB9KgMnzEFMgjg1ak/ePw2Bk8VUJVHZM4brVIRTmjK/MD25F\nVpZnjwoXdu4Bq/MwX5upYYAqjIqvgNkbDkEGqERN8keWG584PtUBjDSY7/3hVx5BI2OjdTgV\nVmYxjcmcZ5oEQTIsmFDnGahZURyuM5/nUrDdkqec/dqNh8rkPhvXFHURFIrNEy7wCKiVxIQx\nXlRjmkmdzIny7SRTGleH5E6VVhDmXczAsAvULUW8gqpX5D1p247Ud13b+DjtSIjKzbzkA4Bo\nKY2bhlVW4z+VNjBikZTgknIJ6YqQhWUrnnNM2CSPyycHdyTVCEaQxsyOAWPIxUEi4bB5JHQV\nZkUbyqLkryWPTFR7f3eerk5B9qAGlmh2At+FMO5XcMBtYZokUq2dwYEYqO4YFUxnI4zSAakj\ncK652jg09tqqA43sxzx2pNp2mMclW4NOmG1V+Xofmx60wI2bC8DnOBTfmU/KvzVK2FBAHz9R\nTJiyKr7vrigER4BX/b74pPmTKvxkcYpVwrNtGM8mhhtYq2Tz19qLCG+WFyMdOme9NZt0gPI2\n96kfEhZ8/h7UxcKpQDcSKQCK+5SwPy7qHMjA5Pf9KailYsY2DpShQIySeQaBjtzSQnnhaiZQ\nqgluvNO3EZXP4CkbYMgfMD3pj6ilt3fccUwKvAHKk9KU7cDaMMOKUsgjwODn71UDGsu5Qi8f\nNSSxlZCQwAApJQ0cykNxj7tNbDZyPmx0qSbicNyevoKDwvDZ7GiMfLlvlYd+1O3byRt4BqgG\niNHkXaGx0yKZtMLSLnexPSpowORnA61Hkuu+P5iTjdQNBt2shPTPQUcO5K/dzSIxjVi33um0\n0io0PBOQwzmgQ4LsYnsTxTHb5sqd2T3p23coG7gUjZ55465FACL8xIBbI9KXkqdxwe1O52hl\nHHfFN5CtuAPpQMWZFfGWO9eaQrlfQnmnbSq5xnIpMDaRn3B96QCMzZQqPnoYO21in7wjOO1K\n7fdTv3xTTwwDg7VGBzTCwDZu+Xcobj0qLcUXYR8xb71PLbiqgZbqc0SKWwM/LnPFADXjdWD7\nlIzineYTEQf71NVQxZipBpdxRGLcrin0F1DzBk4yajRCT8uDgfSnnbJGRvyDyKRsqwcdcYqR\nhGrNkDCq1I3+pByNwP6CiU7duOmMk+lCDdyBuB9KBMGZAg4yxOQe1GQGO85HqKNu3AHyjvmk\njQKzkZK+hpjFkUfJnk9qAu4GLO0ZzR91lLHgc57UsgMkm5e4zikISZxKoVRyO3emNuUAg0LH\ngb8YfOOKVoRCw3fUUwB1/eKB0bt6UNjcSeufzpq+Znk7cnqfSn4C71wck8NQAx8/KBwM8j2p\nzso2n+Lt9KTA/wB5s4o2fISwwwPFACsyqAm3Azu4pPlDt3yOBTg/zYxuWo+Y3Gemc4oH0HcZ\n3Y4PVaftZp8gfJj71Mb5n+UZLdqSRgrBQdu3qKADhpCBnbjPNIN3+8ev0HpTmJ6hNxzmkDGV\nWZfl55FAhrb9o43HORmlKsztkYbHT1pqqzNgP70vzMrDOW9KAHbcEH7r45BpsjblpZCdqjcN\n3f1pu0/TmkMFVnbcv3qVW8tSFOOafvGOBt/h/GmSsUiwUyc43CmIiC/NuK7hnipPM8tiTjrR\nIzMVCjaMDpSSMMhvvjPKmgY5X3SEqAD7d6b5YXOTlxztp7YjyVHytUPknk7t3OcUgJRkYLcZ\nGeajDZkIUduQe9SMoZhzuj7fWmMp3YzhyOV9qYDk8t8Apz70u0RbgFUhufpSLtGME4HY0js6\n4wMgnmgQNg4AyWXk1Ht2jeWOC1LysbbAWOetGfMGOwHU0hj2RemeBTAu1cFsnNCvuVmIxxjF\nBwuGc7lIwKY0GctlRkjrn0pJOjYGD1FODHd8uQAO9Mj2rcdzkd6BDmb50KkNx1A70wf6s7jt\nXP61LuBYnG0KOPamMQ+1yCT3WgRHu3KzMM4H3hU25Rbk7F3DkUir94IvU4PNMxGoY7SxPANM\nB2CpEgOC3OKWMj5Qx3bsgZ7VCDnGQWK8VK8ZKgqv3RSAI1jG4tyPukU0x+VhWz7fSnLlY920\nA+lJL5ixqWHO7ikFriMxkkARsADGaWaRducFlHygL3NPb5SCwAXsB3NMiUsuArL1NMBPLLfI\nPlJH3qdJsSMKwwQcf/XprsGGNxPan7f3ZOd7enpQLUa58yPbnkHO6hvmwGPJ7UbgGx/CRjn1\npFIaTJ+6o6+tAxJGXeoUFmHGMdKTLbeTt5pzb1k+Vwvf8KSTDTM+flwOaAGu27BUnB706THy\nqPkI5NDZ7AbN2M02RQ8oYnGDz9KBisyqwU9W6UsmVXDfdHFJuLunoeh9qGwzFSO+CaAsMOFV\nSB838/enf61QEJD55pSeSgQuQOD6Uixv0GSQM5FAhWULNzzjilVf3xOTyMdaYV3KoIxu6k0u\nwMGIJ2r92ncYKu6VRjAXqaYqkN1ymcCpMBlOzO49D7UImNwAzgdaLisDYkm64bG3mmk+Xlfv\nHoaGwrDuW709oypPTA7igBpUeWoAOfypECtID2QfnT1UbTg5ycgUkikIjs2ZG6CkSLtViXx8\njdqa2wZHTPQUSu0eB0JPPpSs259pBOenFMYjKVj2j5WPXFR7VX7gI4xn3qcvsABy+KgkYFsA\n+/HQUgH7fMUM6fd4+al4yTkketJliNrnIzgmmktuCZG0HP1pgL948cjoaeq7chTuGOPemsxV\njIF25446Cm+WNvDe5pFCrHtkBZ8eo9KXaWKBRy1JGT5mSN645pdxbovQfjQTYTbnAK9eopuA\nVjHIYHvUoYyARp8pHPNEzOpXI4AxxQO42RgrbQvzEYFG3zZgAMYHzD3o+dWGQAe/ekBZoWIP\ny5xTSC5GSdzHqe4pyhuHYEr6D1oTDKNv0NB3GMhW5zxSExNyrkH72OlLkLnHXPSlO3yd5XLg\n4FNIJySdrdcUwCR1ZSQOO3rmlP7zbwOnLelO3KqAZ3N1x60yOMLCH3feOeO3tQK2oE79gz8q\n/rTtwyWXucdKblW6cvnp2pRtz3GT60FCeYQpyMD86VUDP6jGc9jR5hEjgdewxxSfL5Ybp9KY\nuorBnYDJz1FG53VxuyAeeKfEo8tiG2vjOfSky3G3GSKBiMd2CvYc0xV8zAxhOtGCq9c89Kd5\nbdPUZ+lIBsq5UsPlI7e1P3ANnG4dhSbgi5A3A8Glj27t2D1p3Aj5VyByV5xSbR8ykcdQ/vS/\n8tCzHcrD7o60kkfmD2yPlo1AFIXJcZ47UpkVXXaMcYp2CvQYXpikVh5RITLZxzSJfkDDAG3l\nenFNYhuEUj1Jo5XbxtC9fengryAdxPJoGMVCrc8gc80pgbyc8Yzn8KdsVsHPTv2pqr80jOWC\n7eQOaQDtqBnwfk9qa0iswx8gxjNLu+UFceoxTlZuRuU5HXFUIblty7gBjoBzSrG7SHDAH1FM\nVfIC7iDk9RSouVII5IJFMBA3l8quOxp0mY1Bx+VMwTGByeMGntGFjVAx6ZOe9ACbi/RiFp0b\nA7v4l703hpARyuORSFlWPaFwWIK/SkAL+7YFuPRfalZxJjPy+hpdu1+vbrTFYbl+TdnrSDYf\nIo8xeRtx6dKTIkwC21O+KVSWkAZSVxmmrgEts2ntQAu4FskYK8K1DSJt3YPmZx7UNvkwD909\n6Ukrw3IHU0wDhc5GQwpcL5YZDtxwR70jFVXOC2eRQzBdpPyqei+9HUbEy/DdCDnNCsxnOBuG\nNxU0Mp+Tc3D849qT7mceuOPSmSNK8D+E5yBT2J3Atk0MVZtwHQYoX7pb7wpbDuBX5ic7VpR8\nzDa2fwo5dS3TFJtO4bW7c0hiMwZSO/8AKl3bl3McAdGAp2W5+QAUhbaOPvPxTENZV3BmbGed\ntHKrk4zng05kG4LwMCotqsN2TgUtRjin7xTvznrS/wAJxlfek3bsYI54p7OPMBxsK8HFHqDI\n1jCyDdxxkinc78jgf3qc0vUqMjOOaXnzNrc8ZBpAiNYztOGyT1pFZFOMbhjAFSxruY7x83bF\nMUMp3YB5qkDYqwhWzuyMdD2oXcvAztBznNIyrtbKkuxxSsvllWzgDgimIVk+UsCSDzTfMwxD\nHGeBntUuHZshgENM+/NvYe3FADdrSMFAJIOC1SMhJEeRkHPXrSgeTnk7utIzAs2fmLDJ9RUj\nQxl8uPO4sd3K+lK0axsTu3Ht6USL8seDjnpThny2yu4A9O9MGJtLMW8zGF6/0pM524GJD0+l\nOwMZVCePWhWkZF3JgKeHpMELIR5ybhyopF43DdjcfyprKWbLnhuvtT1UR45HtQIY3GSD071J\nu3KMLkdSKYuJFbb83OTSsdsXBwxPUU+gA+GTG7Zz0p0e3zMbsSY5WmyEr8zgNx2pxxsBiwQe\no9KQELR7mwuc5/KpFxndjnpt7H3pyxn+FwB3pCpMZVTgZyPagBNzNkGPC0vmE7vl+8MUceSF\nbcxU5H1pOclVXG4c5PSmAu0NvCg7h1NIrNuAHyjHTHWnTb4tqggbuu2hmKzBWJU9M9qdgGjL\nkHGFzigqVYvywBwKe21WIzuHt3pfO/gUEjutQAejMM8j8KOQzc4OelMYFs7TjHrTnYbcbueM\nv6UxAqhiwDFWHOB0obEmY0H3Tn60MDHLvB3EjkUis394fvBnCiqEEWGly/ylecHvTi26bft+\nQjPHakkT7rAg445o27cruwG59qAuAkzJsdccZU0/leRwccim7RuUd+nNOdMthj8p4HNMBI4i\nuHZv3Z71KFAZxnAX+PPao22NG2Qd33cCmzf6QEBOGzyoHp3pMZLvA+bZhWHDeo9acsiwgr2x\nx3qNXXGNhAB79/epVXKEr0bp60kA7aflcZLNwBVyGN0+YcKODUMOI5FMjb+P4e1WIQJGPUrn\njmkxpkrZWMEctn71TbVkUOGAOeR71FsWNyGOHH8ParVnH5jEFM4PT1pAXLfa209X7+1aS3Bw\nFUHbjkAVRtoSsTB/lZjx7VegiEUI2nLY5ahgTxyI7YCbzjO7P6VsWe2Pnr6e1Z9tAUiVgM7q\nvwxssmc/h6VIGvZg+YjYwua6qzjDLx1IrmbPEKqzncv8q6bTGWRQyGs2M6HSVMTRKRkA16v4\nViEhHT7uc151o0CGBBtw/Uk9q9P8J25ZYwMFDgHFZSNIo9A0XQjrl5ZWIQt5jhcYzxxzX2T4\nf0aLQdFtbGAbY4owB+VeMfA3wvHcalLfmHcYEC7uwPeveZVO3g8jrWRZWZsdTTFIH0qRow3Q\n80xOpBFAEnTjNLuO0jFNXGPSgEjPekUTrnbzxSSZbrTFbcMg07dupkkHl9eeKlVCsYwadx0x\nSKcLikMHXdtzSbRj5hkU5m4xik28HNIAjVGySOPSm+WjHmMY96kRdvSlPUDrTAqyWMM3RVH4\nVVfw/DI4OFBz1ArU5XI6UjcYpMDCk8LwSbgVBPSs248IwTZ3RiuuOc4/HimSDc3TFA0cDceB\n4s8qGTqeKz7rwAszMyx7l9uv4CvS9hVc4yaiRcuT0qhM8ivvhnazRkPDk5/jUmucvPhXDMzE\n2ihRxtA4r6D8lG5IzUbabFuJCAKe1ID5Y1T4LxzSMywunoccVzV58F5FyqFlkz3GRX2A2hxk\nHjOfWqUnhKKYHACHsp7UtRnxnN8KL+HhoWYnjbisC5+Huoxu4W2kVV65XOK+3ZvA0RUZQHb2\nFZl58PY2V1jiQZ5O5c1VybHxHceELm1bDDPfABqpJoM8bfPEwX1r7Hv/AIXw7Q5tUOf4QoBr\nHu/hXDcRsr2Ww9sLn8apSY+W58ifY3SQgoQMct6UyW1YMq8k4zkV9N3HwVhK7WjVBjhmHU+/\n1rmNS+CT7WcL5JHG4Dj6Zp+0I9meD7mX5QuT3NIpO7cMCvWrv4L3USnadw/vLWHdfDG/t5HC\nozbf9mq9oT7M4Roht9c8mmY3Mcjp3rpZvCGoRyY8hqzJNHuYY2Z0IUHrjgfWrUkHIZq4UcjD\nU4EBeatGF5DnGFHcVC9rJjKqzH6U+YnkYxpOmRilV1V9wHOKUqWkU4yQORTmUhtxTH4U7omz\nI1bL7zxS7dzEA8HnNN+82COKft+XIOPUU00FhFjJIyc80eVuznmpNuOR0o3HgZAzT5kFmQeQ\nsmTyuOtPSMbdpH41JtdW4Ix3p/y+WB0NHMFisdu/hTimtCA3DZHp6VYYquPehSD9aYyFlGMg\nZprgMvfd7VPtP8PPtRGpy2RtpiK5j+XOM+4pPI3Kc8DFWWi2KVx1qPy9uM9DQFiovy4yuD2x\n0qVsjHmN71ajVVXDevFOeMSdsnGabIKEkYkyecVGsY5ZV9s1dWP1IHtik2jayY/KlcdiERFY\nwpbP0FM2M7e1XYgMYIyfWm7fm5/Ki4WKqw8nPfimSQiNcYzV1YTtJPApvkjyySN3vQFjO8hZ\nJOTzUvkhV2FMirW0AA9+1OZSwxmhAUBbkHkd6ZPbgLkZL+1XjGynJOaAi5LDmquKxlfZVEeQ\nMc9PSnpblW8snaMZrRa3O0kjr0pv2Vmxu/OgVjP+yhyVxketNaFo12svHtWl5K7ti06SHzFI\nX5cetA0jJW3dmXA/SnzW7lhj7wrTWNtoXaoI71EluGZ2JzzTCxntAxG0jrTPIHbr3rQaEKuT\nnI6e9N8g7QTxT5hWKJiPlnJqNbY/eJO0citNkVui1GY84XuO1VzEuJnyIWH3Nw7mozHvySCF\nFaX2ct06Un2ct06YwafMTymd5YUhVGSR3pWt9vzA5bpg1daEqowOVPWmmISNg8A85pqQuUzv\nJK5I6HqKUxA4A+U960hAFYEj5abJD8wAXmjmFylJo13Ack9qQJ2YFW9O9XF67Qu7HFKYv3nP\nXFHNcLFE4YYxjHelLFlAHfg1b8sM2AM0xbctu2jOOgp3HYgWHy24/CnNEAodiGYdBUqxLuVm\n4FJNCJskcY6UrjsQq3lvyvPegxsybnGOeKmWLMe4r81LuG35h+FIfUydVuNtqFUc7s7hXn2r\neT5hYELN1Fd7rQCqRwq9RXnmrSImpFUTcpXG6uWR0oqx5/1uMg/exVkSGPBGTH3FVPMeNgsP\nzDue1TxzNJ8pGGHPPeoLHXl0sfATey9aoXEu5lkk+XJGAf0qzcOiRl+rdCveqNx++u0LJ8oG\nQDVIRXvMK0mU6fxe9YV9uw2SFJHKf1ramHnSMUY8ngGsDUrcGWR5DgoOADV3AyWkLIwUANnF\nULjzMM7nCrwCO34VocrulVOegXvWc5dfMkDEN3z61YFK8jaPaSSxPPFZ00myRncZ29s1q30Z\nmjQlirqcgZrNmUtJhsAScb/Q0ySvIA7Mu3BHIIqJXDMMtyOSvpUku6CXb3zjPrRlS3yHaB14\npi0BXKtvZfM9Fq7HJ864ixn7wqoZVd9irk4ycd6lhQtyzlD1+lK4y6cKu4t+7JwB706TOPnk\nCp3x1qJcuNikYI79qXZt2jIBxglqBjnbyZonXDL6etRtukRg/BboKkjQ7Crr84PDDkGogFVg\nWb/Zx/d96BDXAbZtfCY7iq/nMsoLJk9kB/Wrcijbhhu5wuOlVvLEcYQDL5xmgVkMYl42aX/v\nmkjbaEMY2v0zTlwkzBuijOPWmKyzRbtuznOcdKYDmhbaMnaM5z70LIQxWQZYnPrTXJVhyCoG\nAaSFlcsxBUDjHp70yBokYSOV4XGBW9oruWjSUZjUjDdPxrHhXdvVl2KBgt2Namn+bJKobb5J\nGMA9apENHsvge+ikYzIuY425YdSa9/09vPsI2AwGAPNfNvgOZbe6t4oomRZec54PNfRujzGT\nTULZ56VpzGVtS1JGQclvypBH5jfN1qTbhV43E9qWTcpBwB2x6VoUMVXjjIDZ9qAyhdrDGeen\nSn5PlnA4zzS/OFGFyG4oEEa/KCP0qaLezED7tRxL5OC/r0qdlbeCDtHU4oKHgbmDjjFPaLaw\ncHGe1JDhc9+e9SEDdkjJ6Ui0Iu6SMr0PqacsZwvpjljScBNhbn2qX7yKp6CkMaFywYLlexpW\nA7kD2qYw7owS35VFsz9aegiNV/dswHSjIXaf4jUki+WpjA565qJmXG4j2zQSG+T0FFM8s/3q\nKLCP0jZTuyv40Ku/mjnmo5HKDGMCvAPWJMgUxkLd8URtgc9KdtzgjpQFhu1VY5Gacvypgfeo\n+6eeRSt8re9AAueRnmjnjPajocmjHB3c0DE3ZwKkUhe3NRIoPfBpVz60gJMHJNI3Xjmk3ds4\npQwA4oEDE8GnKB1PekXHJPNIwIbcOnWmSDL3HWm7yvvTzk/N3pu07eDzQUKudwPrQPm3expQ\nPl9hSLnb7mgTFz3xx3pB9/FBYrwRmkCnrQFhMbWIDZFLxyM8Upx0C/jR5e3rzQNCFTx6dqeF\n+UFuaOOlIc7vWkIc+PLyBTerDApxYDg9MUbtq4xzQFhWUjnvRuPUdKaeh9aTBVP6UxDm60Li\nkZty9MGmbivA5oAlOKN2fYUxW3DNBYN2oEKy4680qpuTpRHlhg8GpVUqp3GgZGoI7UD7vTBp\nd23kZzRlyc7c0ALxjAFIx2rjrS7ucUKvc9KBDcbhnp9Kcqj0pT8qn3pGJ2gCgQ33AwacudwB\n5FN3dM0L9/IoGWFY8YFcz8S7ZbjwjfITiQR5U46V0ithlrM8aWa3Xhm/XHzmNgD+FNAfBHi6\n38ttytjnGPxrz68nJmcEYANepeMLd9zZTkNjGK8v1S3Kzk4O1v50ITMHUnDOyevNYV4jx8dB\nnI966G8gy3PVeM1h6lEVcNnOTgitUQYlxHuRgCVJNZVw3lzADkjvW1IwjzuOBjJrIuPLZsjq\nec1ogKkkgSRgF3Z54qnJGWmJ4HGcVPPhZDj5UA5NQtJ5kO7djH60yWU7hi3QfNniqbLtkLEY\nXqauyMZHGV5HpVXbtkkVhuVulO5JAZVEbv0GcZqGaTcycnDLngcVKYysZAPC81AQWkGc4UYJ\npgQI24OyY+X8zR5oj38fLnDL71IpXDblGO1MkjCrhemNxyKYbkbANt3DB/h9qrtkfMTtXPen\nn94AC3HXdSu0fl7mJYdMUCsV5tyxqM7gTjgU1JFhBRud3TFS7SVZs7Yx0FMkbbhuCpFUPoNV\ngXYEFuOnpTWbbliMDHOKGmDLlBhhzSZDKW696CR8zFvLI4yOtNZgqknsD+FMZjIrYXOB3qPa\n3AYHaw60FWG/u/JV87ix4pGyye2fujqaV1CxqOhzwvpTSSrLhd2PmNAhsmVGBwG6n0p6xv5e\nN+QBzTZXD4XG0E7sUsmOcAqevNACqqfKHI3nhTUsVmJC6sMFBk+lU42VmD9CpyFrWtpyzb3G\n0n72PSmBjyfvGceWQeg9hRmRF24D98+lW5mWS6m2fLG3QVVA2RYx82fxoY9LDGOxgGUncONv\n9aYit5ZHepWkEfHU4zn0pu4nGG+c8k+tNEkfLRnJJ/2aQncq/Lgd/WppNvmFs846UwndgDmk\nxjGXbKeMnGD6U3leAABUkmeADn/aFNkb94AOaBiKFwCGAejauznnnJpPlViQB703czZKDIHO\nPWgQnllpCcgtjJPtR/qzk8g9GpPMMsgAGw980reWqnb84zzigkaytzyMP/Ce1IZCPkPX1FSM\noZuORjqaZkSbgV2nHXsaaGMCt5ZDdT0FSxqyxqv3VHNIzNsUnl19KGcsRycN1oGNb95uZjzT\nV+dc44HrTs7lPy9DxtNMk5B52imA7b8rAHrzSfMDkEYA6NQ7hNjD7o4xQ2VBYLuPvSAA37sl\nRuz2FBxwB6Y59aI2YhsYU/pS7G27VwxxnigBYwwjbc2StNZSygBed3Azik8xcdwQOaWT5lG7\nmNuaYgETBuDznPNMbDSbSCTnNLHhQX5J6bTTpG3bPlyO9IBCzyFQuAepJ9KZICoI3ZGc7hSq\nQM4BXnhjS8KzIedx60ARsp2Ehsk9BSnEaB929V60MhjJUjB9KCrH2HXb6+1MErCsNyRqAAQM\nj6U3y/41bI6EURxs54/H29qdtC8D5MHrQMjztUd+eaFLeaUB25GQaVFG7BYnJ+9RIsnzBuQO\nh7igQoRsgOeeppFZljO4ZLDihlO1dw39Pn9KPkzggkDoaQAoJj2kZGMmhZNzD5vlxilX7xAO\n3cKjVVQ7Q2WNMBdreWcnjPGKV9jYZSeB3pJiqrtDYb+7ShCqqMdaABX3L8wLGmzTExhR16n/\nAApzSEMcHGD0prY3FguAeDSANxjKsBtBHJpVIWM7jvJ9aRSFjG5cjrzRGzKxLcrjIFMA4QZU\n5Hf2pdwWEFlySetIvy5aMZJ6ihR3wWY9aB3HINvHc01huOF4HcmkwcNg5Oc5oDDnngikIV5C\nrAxruI601GG7dgsD1x2pyHy1BUB2JxTN6FcRA7W4PHei4xS3lqfky5PH0oX/AFhYEIcZOaGU\n+YCScAYzTWwQCBznGaNQAL5WWdSc8hqcj/MH4BxSZL55zzg+1AYI6rjcvQGmAbwqFDyrc+4N\nMT5W45Hoac2C5bbjacfWlC/Oc9+gFLUBm51wduBnP4UN+8wF+cM3CmlVtw2jPB5zTV3FmVxj\n+JSKNQH+YGwcfe4HFMDBUIY5ycCnROfmJOcDjP8AKoVXMwDjGRk+3vQBI2S2ByAMkU5AV+bH\nzMOGPYUKyEF0bcBxnpmlcBUwuVI53UwG87eBux1NLvbJKjcmKR5gzhg2MrjGKFxggZFAhM7V\nHlkcmjyzjDfIc/d96XcEj27QueOeopm3y8YclelAxd23IPPYGlVSu35RjHIPamtIQwBxt7Zp\nVVo9xlOc9MdKYDmxI0kaZzjikLs8ZDgLtXGaAxVm5wO9NOVIMTBhjH0o6iFHzKVIwB0poUSZ\nGdpHT3pWXcpYfL60jLglXbCf36AGspk2pgqQef8AGl+Tc+XOf4cilC9ck9KXy1ONzcY60hjM\nfIoOQT1alVVwwZjzwGpFkDcvxjpTW27lIHz/AKUxDvKKZ3ctwODTmYsMZ3EdRQq+ZKRyoxnN\nN+aIYxuB6N3pAG0qp79+e1SLcFlX+7jGRTc75Ny/e24waarDyiDhsHqKAFj5yucnOTS7stlR\nhhzmmqu5GOcE+lCknI4R8YxQMHkDfMcmiSQNGqYwCe3WljVmwhO49qI9zSM23BWgBGXarHIx\njG6kC8KhwRjlqWSMKcqcluSPSmSNkgsOnAoAAwXLBWYZ6UiyBoyE45yc8/hTtxOAflbPOPSh\ncnO1flzigQrMpZCUJUDgCkLbeMYkY8UqnaxK8kDGKR5Dt3EZf6UDF3HaxxtPQkVJGRFGSjZY\nr0FQSZjy38LfrT4/kKtgigQ51fYg4zjkUir5oAzs2npSTHnJOMnAH1pGj8thEHz70AIokiXI\nGNvH4UqhVbOSpIpGDbsfeIPShs7iAvBp3AQnzV4GecZpzbI9z7sJnDUb9q8/KewoZduQPm2g\nMRSFYAu1ePlIOCaaq7h8xIC9DSndsODwuenc0bnYrkcEc1VgFXHl/vOBjK05flXIJ3ZxRIis\nV/i3DpSNtddwbBzjbS2CxGD5bMMHnuaVW2sCBkHg0/5Qx3Hg9aYMfc6c8e49KQgVWY5JC4OB\n707AZZB/H3pkmEXgZ5/KkSNm4ViZG5+ooAczfudqs2MdKRjsj7b++PSnMoVSVyR0ao1iYuVT\nkY4zQBKqqsLSAkhuo701dzRllGB0po+UKD8uD0p3DE9dvYCgGNddzDBxtGacv3gQ2R79s0iT\nLvUFec06JW3cj5Tk+1AESqY0AzjJ79ad5hVdmCQRninFldVGMsnU+1NCiSRlzzGcHHpQUK0i\nspYJtONuPX3qNVMKmNvmPWnNjJYfc7UKzngfMOwoAPlG9FHyelIdrbc5BUcgU7dumwV4xkkd\nqVRtTco4POSeaBEasv3wQ3otLsSOEYO3n7vvSD5UPy/Keh96ey/LGwGAP50xgvmOpXbgHrSF\ng0flgUkjM25UBA/lRHhs/LjjGD396NgHKP3ijPOMYpPlX5AKFXcW2nIU9aI2RlIZevekIayh\n8xkHd1AFN8orhvmHoO9SLhuvy8YxTI2VV6lc9zz+FO5Q/d64weDSrKVYjA9PahTujG8d/u+l\nNkZoycrk9fakId+7YYbI/wB3pSeYdvynAzwfWhZGGJFj4IxilC+Wy7u5zTuAm1t64HzjmjmR\ni/3BnH40K7tIzM3enrtjbOdp60aiGcs5DcEdQaYylVJxjnmpOqsw5yc01l3cAlgRnPpQKwmD\n5w53Lj8B7Uqr3Bwc05ImOMfKF6j196TAWQ4f5T3ouUDL8qorbQTk5pu545XcZUYx+FLJI3Oe\nlRlfmVwTnHNArCxttAX+90NKFTkA8d6UfKc4V4+pNIFTczBTg8gdqNgFblcLwPehmWRlwxXa\nPvU1vuDgrSlSY8E4XtVCDcw6HAJpXXdg/Q0NllTP8PSk2mPJKl1ApDsCsdxMYwD2pWkzywyM\n8e1HEYB7n0o52HpjPSkMR1Cj5AW5zTvvbQp2js1NPTd2pu1lXp8ueAKCeorYKgZyVpV/eEke\nlOGFkJQdvu01gDhW+Vs5zVIdhOflGcqBmlkYtgIPl4yKG2rhU+Zm4+lCp8qsCe/4VIxFT5nH\nIUjHPahfmj64VeBThvdslgvHBojK7SZML7e/rTAacbg7g9MUoYq2R09KCzSKnHmMD92jOcsD\n3we34UwFX5UY9CeaTlOBwCucUMoRTz165FLHgZDchhjdUk2BA6q6t17UJhh8xAxSK21t/UDj\n8qT70gbH4UwEj+U8EsG5pzyCNlZuucAUsjCSMNnaq8Zpd2VGI8nsaRQgUqW5354JqPcFJULk\n+lOkznAwPp3pNjoRIRnPG3vTEIPL27QPm7j0p4ymOmO1Iyr94EE5xTvu5Tb15GO1IBsalWOW\n56ihWLbi6/MeM05U+bcV3cdaTzCchgQBzn0pCE524+4/QL3pzERsBn5cUMpkIPDHHU0jN5mC\nOZF4IpjDIX5g2T2pdh8wMSA/9005shjwNzColj8thk5P8qYDx+8Y847U7ag+QdR1OaSLG4jb\nz3oUkoSq55piFbKEMRuOcDml3YZiR1yvFNBKnH3u9CkKznBYg8AUBYM+Yq7du5aMtywBwx70\n75goJAA9QOlRhfmJB/CpKJGxC/U5x+VMJLLsU7xnJp29mbbjJIxR5ZyqovKjnmkAMvqMA9qF\n+XBOD7UN83LHb6D0o8sO24El8UAHyhnKHg+lIqqdvX5eooVisfAGAckU9W5Zg2OM4oJGbYzy\nQeeOPSjCx/cBI9PWnxfKoPAkY/pQ2dxK5+U4LUw6iKF8vI9cnNPZufu4XGQaTzRuwy8H0okU\nsowW9Qvamh2GycbWQ4Ge/ancMz5/76ojkj+VfLZmIyc9KSJuG8z5Vz8tIQqMQ+HGFxgE+tNM\nxAIdWfBxnFPBM7YXDACmq2yYdRxyTTAUc54468Ubzs3KoQ+tNbLSbgpKk8sKdtLZ7gdAKAGx\n4Z92/C9wBS+WqxlgdhJpS/TJCKeDtFLs2/KVYt+lKwDcnC4GPrSr8qqVGOcU3dubacg9QTT/\nADG6YwF5pgJx5ZePBBOCv9aRWVD843dxSqoUbWBzjK4pEWN0BJKnPNMQ9cM+8jLYyBQ+Wh27\nMtnJFHlFiWHJ6BR6etOVRsPPA/i/pQFhu5VBDHjHHtSjzFuMBR6bvakKluFHy9Dup7Ha5Zec\nDjmgARmLEnjbxmpoUVWEhbAAzg96ZCp2bZG3k8gU9mEjBCdgA6mkMsRBNo2gZbmrUWWbYE25\n6MKqW6iORGYfKO9WYvuyN0IORikBaBijjYE8YwFbrmr1iwj+ZOGI6VnbN3ljGXPJyKsxlfMz\ng7gcUgNFmXzFZ+W9q1bPYse77wbisyG3MgwBz1GTWnDHtjjQDjPIoCxoIxjXG0ADp7Cr0Y3Y\ncgYIrPjhOVff8vULmtWzQmQ4G7POT29qze4F2zBdiNuQvPFdLo1vEzAupJ67RWFp5aPPy5JO\nM11WhwtwCvzf3ql6jOu0ttzRw/dyMCvXPB+nvG0EaDcwI49a8z8O6YbqZHYfd5B9cV7Z8N9P\nOqa9YxDIBlXn05rKR0R01PrX4UaP/ZXhWAum15v3nTBIPrXYFgzEUyxtRb2scfTaoH196e3y\n9uayEQ/d69aQZPbmpuq89aZ5e45JxQBGB84z0peecDmlAxzj8ak6qSDSKIlUhsYxUqnjpzSc\nLzzTvagBtPXDDkc03gH1FOUHsaNR9A6H1oyD7UdwOlKy460xCHOcDNNVjt96f0PuaMbmweKT\nDoM+br1pV460n3W6mnEj0pdRIcvc4pjfN14pQxUYxxRnqDVDuQsGIOKRVAHvT9vHBpVU7sZx\nQIRVAjyOtSdvrSbdv1pFJ5BoGOYbcntSbQOTyaFHGDzTiMjpSAj3EA0h5XJ5NO5pu3nJ4FKw\nDAqScON1NksYZOGX5fY1Ls3cqMU5TkkfnTGZ82kwEAbB+IzVS68K2867cYXvj+VbPtmn43Zw\neaAORuvAsEn3TgdMjis258AxNkRqpbu2Otd8EKscnNCr2I4oEeXXfw3ic+YbZWI/2elYOo/C\n+1kYgwbFZs7VGAK9zEXeoZLdJj8yDNID5vv/AIPRuJGW2yMn5sdq5m9+DCz8LC0RbkMO+Pav\nq6TToWUgLg+tUm8OwSKQVBPYn+lAJnyDdfBuRdxhPGM4K/NWFdfCu+aIhYm3KcDdxX2lceEb\nWddgQfgOfpWZN4IgcECL/gJqkB8TX3w/1SzODFlem5QcVlSeGb2H78W5v9noK+1rjwDHIreb\nGS2fXj6Vhaj8N4JDzCFIHTbnFO5DifHsmnzR/eRgKqyQ9MjGD9K+q7z4T2cnzfZFDe4rE1D4\nQwXAOYAidMbRVcwcp847SFHykc5qHDbjlT1r2vUPgvHCrGNnWQHKq3esi6+Et8gyiiUMONvH\n50c5PKeVKyRsS3NN+83AwK7e9+HF9asxMIxnB25JrMm8H321mETEDgbRx+dWqisKzOc2ujYP\nWpegyeR3xVqfR7mPIZWyDjHWkXT3txh/mFVzk2ZAqjcAWOO1IYRtxu5zmpmhdPlI980xsDjG\nT3qlILWGuvyjtTcMzgUOpLZz+tG5mz0U9qq5KiO8vjBxnNR+WFd8DBp/OwbuueTTSzr6E5oK\nsCKYxnHymk8veuRwc5qTc0nsM9KcY/mHakSRLHtyWOQeopI492RnA7Cp2A7U10DgH7pFUBDI\nqeuDQoWTIzip2VN2MZFIFJyTjg1QEcaiPgnmhk28YABqRl8zJx9KjMLSNubOBR1AZgBtp+Yd\n6ApCnAwKcvG49aljjY4J5HpQIqxRlW+fr2pFVpH565q60e5h2IqNozuwB+NAyF4CnzKaj8r5\nSO1WxzineUOT1NFwKi22F+YUSW6rGKtAeZnso7mmvGzLnt6UxbFNcjH7vIHek+z/ACFyMFjV\n9Yi0e0cUz7O23axoApeSGO3GBR5O5eOmelXPJJX0pvlnqo4FArFRrZeQep7Uw2Py7cdqvmNW\nPIwaUoVOM0CsZ0du0cO1huNKIQQoPD1d8vaD81Isf7vd17c0XAoNb84QbW/vUn2fzGbHQHGa\nu+WFOOimjyhvOz7oOaCWig8WQdo2kdaZ5J2gd/ar5h3K3OMmljj3ZT7px96mJIzhajkAYxzT\nXi8xSNuB1zWmYSikE7z60mwqvIwPakVYzYrcBTjJIppsw6szDDYNaXk9hxupJI/JV8jcSMCr\n6BY4PXN0Mylz14FcBrCj7Q25wjdRXdeJLxZI9zLlkPT0Nee30yXEzOVyRwM+tczudFtCC1L2\nqlXyXc54qaPcqvn689TVWGSW4UgnaQfvelTxr907txz97tQAmPMViR1H3e9VbqYxqq449atM\n0luxBIOfWqV2WaPCqCScYphcZIyxsoUFl/2aw9UjUFsbi2cnI4rVaYBgqgh84NZ2oIQ7kuTn\ntTSAw5jHJM7+ZhcYwprNuHPksqr8nc/1q9cKsOTg4zknHFUruTCkAY3cj6VoIpzD7S+Izz79\n6pXWHBjb5ucnPY1cn68jY2KpywbowHfBBzjHNMRUmcFeVPH8VMB6hiMkZFPkUSLuU4UHBX1q\nHywx+bhj0zQSS7ioBxyeKswyMrDAXd61SYhZlVgcY59qlWMOeG+XHr0pAaDRjaWGI1Y+venG\nHc6c7ge/aq6qpVYhl2PSpTH9nbfhvlFBQ+cglCBx6KaPMeR1BRemR/hTmKSKrouTjjtUSr+7\nTBy27B56UADKWyQ249WHp7VVkfcu1XBC87e9T7ipkIyR7CovM+UMsYaUjJx0FUgK/mbGVsZB\n7EdDU88f7xZUO1COV96hWMiTc43Kxz9KVmXcM5IJwfamSKyl2GRhT/EO1RjHO8EnOBjuPepF\n3/MmMDqPcUjFG+ZCcrxg8UmIiSQrje3U4Vav2gLLudWHI6HvVGRt7F24C9D2zXQaWqyXEKOc\nDIJyOKpCPRvBd2YZ1Z49wwMLnlMd6+kPCsgOirj5ifmUeua+btKW0FxGtpLuVmxv6c9xX0f4\nV3LYxxjClAOlWYbGwrH+IYpuw7mPXtUjKCCGJZutI4LbWPCitbjDHmIMDA6UnllOMlcGhVO/\ndyFzkVZJXqTxTAgMImYc5apVhPmHrwKVZPLbdjk8CpIy5Vi3X+dA0NjYhfn5A6VMxWRlKnHr\nQiiTAA69qm8kJkEYpFoZ5ap8zDOTxTl7lhhafuDRjIxinA7+o49KgoZHGWUnP0FKgLSEEc4p\n2dv3Rikxtbd1JpiEMYQ5Zsg1C6pJtQ8DrVjadwLAFfSkZdqtx17mqFch8se9FJ/wI0UhXP0d\nBBPWq80cm7OcrU+AnJ5peOPevBPVIgv7scVIzfKOKXaWHWk7daABcN14NJuGTxzS7crQTtXN\nAA3Smu3yilVyzDjIp2MN2NIOgzGcECnHO0YHFA+Xp1NHOOtO4hGIznHNJuCtyKGXccdBUUit\nzSAtRyrKCBTjjByelVoIyAD2qVulBI5G/u9aYc4x3p/AXIGKdwozyc0yhuc8CkHyjJpeFXI6\n03t60MQu+lHpnik8tl96M+2DQFg3ZzjNSbsYFNRj6UcryaAHbeckUKwDZApN276UbTtzjik0\nAbuueTQfugd6Qr+8GOlKSBgGkr3GABI680qkKeRzSZCv7Up9T0qiRJAW6d6YylcY5NSEnbx0\npobccZxQA0cqfWjaeMU/aeQBmn7Bt60AJg8dqd0OcZo524JBFN6d80DsOzz0zR94YpN20Z7U\no9aBCdME808D5fambttOOVXjpQSEudoGOaZtOBk809W9TUe0tkjmgA24PPNKyhQOcUnOORSt\nyo70FD1YY9cVBq0bXWlzRqMhgfl7n2FSr8o5qaJRIrD2xTEfEnxOtWtdavkKMvzk4Ixt9q8c\n1JtqnIya+iPjpaC18QXuF5Z+PpXzzrSnk4xg0hWObu2wpY/KM1gX0hUAuMrnt3roLqTerBgN\nueRWLeIGyccdhWsSbGFJtY7GX93WPNCFkDKcg/w1sXLHcQOmapzMFj5XlfStBGd5IZWUYPrm\ns+RUikKsu1+oHatCWQMpAXgnmqVxh3VcksO5qiCtIjLtdeMnBqqy4V8cEnGas3LFj5Y+VarS\neWkMiLy2MnJpiKnl7Y8ucgHt6VDtVWHOM9M1M0wCopGBjnFReYjKSy529KYEU8cirn5SG4qt\nulVOeWBxn2qzMVfjGxsZ61EjL5W0HLZzk0yStJCCpZe/UVGN4Qt2A6Gp5sSJ8ny88moyu/AB\nyDxzTQXZXjO4E4Jx2psybgMdSefarnlFWHy4qLcrZVlIP9409Sivxgdmx+dNZSqhCu1j81WP\nLDIAeCOB71G24qd4yw43GgkhVGVWLtgdNvc0BT8uTlVHSpJty4ywJAqKM7izPleM0DBZlVZH\n2lvRmFNZyQqY5AzmlhXzdwUZGPu9jTVJb5M/PjrQOwbg7bsDA601mEkmVGfrSNnyipXJB7Uu\nQJU2/LmgkgZlyzfdJNWbeQRNlWw2Oc9DUZhKs2QGGc4FMa2K5kJ+UmgZNNcJN82NhH61V/1u\neeeuaHZnf5SCByTTZGKLk8bqAHbQ0at2B5HrTVVV3Ybj0pckLgdPShotyk/dPpVDGnHVuG6Z\noWTB2Im7Hanoq8bjubHA7UiZGc/KW70gZH/rDgDy/rTNxVvlAzjv1p7KFVlJ/ixTFjK5I5FA\ngSPbhjyO9JhfmVTxSsoH3QV9+1KwY4ZP++TSERYKqARt561IMBSBgd6JN23PEhP8OajjkK5b\nbgn5ckUAM/hGOR19qdKS21QMMeTTmYFShPPbHekfhd56kYwKoCMsI39s9acAJUYg5PXHtTRj\ncobkYzTo1LqXUbeeVzzTGJbsFkznaMUFlbK7eCetLCu1iGHPUU1t33yuG7ehpAJGqsORzjNG\n8SLtY7eKTcXZtv3QOR70q7WjBK7Sozz3pgKV/djYPl7imbhHnBYD0qbc7JkHGR0qCPcfmP3s\n96Qh/CKxjbdnqD2pgYSbRu5PTNTKMqSw/KmJkM5wu3HHHNMYfMdoPPOCcUrBo2yTz0xTUlZo\n1ycYPSnbdw75HegCM7jnj5f71LxuDDJIH3qYdy8Fzj6U9WCnAIIP60hApH3s7W6ikkjb75Ge\n9GPmYHn29KWRmUZxkdMUxirjYOeCckUm0xORGN49+1K5XAOeg6U1vmyBuU4zn2pCGuC2Gzjn\nG2o5JChIdvvcU/5TtD4HGcevvQ0nyAYHJ43CmAMglCjzSqqMBcdaMFtvPG3tSs6p8wXBHFG4\nMw44pdRakTbmUMjYPTmlwFYLu3N1p0i5DY7DtTMHauRmTGRTGLG/ms5UA08tujGeT3HpTSWy\nvyY4yRRDiPOBz1+lACSKWjBX5cHNDKCNyNuz1FDSE5YjPrTWO2MMmSf5UgFZTuQE8gUpYryB\nx0zSNmRhg7u9L3Ut93tTGNhyqsF4P96gKM7A2TjNLJuYge9MCqJWCgk9S1ADmdiVDJy3GQaX\nYsftt45oIDclsED5RSNsOWPLKu6kIc6HClMbSetJud41bG2RfSljZWUEgqeooTbHjA+Uc47m\nmMhXDNlgSDyRS58luBwxzinszSdCoGc//WpN3lqzE4IoAM7lYL164qNshE+U9efan8qwZ+GI\nyqjqaWNxNKc7grDHIxzQAkjjdgHk+lIpZQrgckc57UvkqrDJ3beopVk8zoNiAYxQOwZG7oCa\nTcNu3dhicc0xWUqGIyoPNOZj5mNvBGQvcUEhJG65GflxyPU02TdIAJOGxx9KUksq54YHkUfO\nJm3DII/SgY1W+62zOP1pWk3qCxwCelCszJ8q4xxSb9x4G4ngCpQiQv8AvAu0FaZ5hRmBUHuA\nO1Lyoz0x1WmPy2f4jVDHD5mJYZ4zz1o+WNVBOSx5psg2uuBk45pw+Zzgc4xTC/Qi2hcsy5AO\nAaX5X3c5Yc807adpI+8vWlZjt3lMMw69jT6CGhiVDnoRjbSMoVQo+VjTmYHb3bGKQcqrH71I\nBThmCIPmFN2/eDjd6fWnyZEwIYBT6U2dWjxgbhnJPtQwEZQyqxb5jwRSIAIzu4XpzTmKqQwI\nPOee1I7bXyyZB7UAN+RUB2E+5pfMVscduDS4OwbuSwzxTdw8tQeBnFADgzRqQp5/nQ0bKqqW\nBb7w9vahvvHsR0FNwGYkNg/WgAaQlkbAHODinR4wXwAh9KbtwhAIUk9W7U5SNqkDPp70CE58\nsCM7Mn71G1UfJxnoT2psh87aw+UA/d71IylVCnG31pDG8qxCnHFIu5UHIJ9uaNwY5l4boKRi\n6cdB1x60CAZ2lcZpywuq7QwYHmhJBgkDB60n+0pxQUDDDFmG3H60SF12gsVUn0pVhb5nLbtv\nODS/wlHOe/40CGSDapK8HoaQOysMHG4cH1pXYr9wc4waaVjXbksxHegBVXHLHd3oRiz5A+T0\nNNDbieCq+tKzKuBuJY9BQMdt+Yl+R1BpDjnj5j3oRisigjcD94e1J8zgMmSvagALbXyMg9Ca\nWRm6EcdaRXbaSFyP50rsskgIU9MEe9MQ0KdqgjGfWpFkMUhbOc/KR7UzOXUA5INOZcSMvZjS\nAcY083I3Djp1FMXG7ORheop37yGRUboOR3okYsvmYA59KYBJIm2Py12swyRTdoAHG0E8n0p2\n52ZSWGM4z6U6RQpJDbu5B9KAInUMNyEMQehpjN82GGO+R2p/3lJ+6vUCmr8qblfLHtQSHMkY\nAGfmyacyr689lpD8yqd2CDyaR1bc2Rk+gpAJ0blsEU7aJn3NwMfnSthowpXbjp600/MhKdAO\nRTAXkt8vKg45qTYE53Arj86j+VWyrcOORSCN9pCsvy/NtNIBEDDkgFu1OLPtULyPajjZktsZ\nhnFCsA3GQu3r70mHUQALknPvTg+2TIG4GkRnZVXIXvk96OGU545+6KZQzG5SMYGegpcrGzfM\nemOKRW2KcZ65PsKXdvbITPoPWmSxu4rGFA25PPvQTyBnhflANDtuwShJpzKrM6tkEe1FihmC\niHnAHQU/OFQv17mmjLKCfuk7aFYByuMkce1AEkqmMkKRtbvUfyLjcWZv6UoIRsMMk9aXa2AQ\nQw6jPakALErbsDapOeKRnCxhVGTn8qF9cnHekViN+7kHoKYh5jfAGMjqWFRttyxAwOx96dGS\nW2glBjoehpQrMykHYOaAQknyKNwIOMkf1pcsoG35gRkk0gcLkAZbnr2oDA/N0PTFAw3FlGOh\nPSgo0ilmk3MvQUKwjxs7nmkwu7ev96kMCwkxhOaVXG1sIfc0YyzSf8s/WlbMkJT7o60xAxVy\nAp4xwcYpy7gyRrtK4zz3qNtxCvkE9CKT5CoUg59aCbajgrfMc7W/lSYU4VULMep7UuBx1Xtu\nPNCjLNyRjo1BQKGkV8dRwCaQLtjUYye9O8s/edss3T3pFUxxlgOnXNAiNf4wOjcBaWRSqrGD\ng4+YU5huUMFz3puSrMQTzzzR5AgCtHH8x3A8AUHMm0bc4OD6Uu0+SpJzk8ULiMFgPkDY/GgQ\nuPmYEZC+hpgDMoPQsentTtu1iegPIoT5SMtznpQANuVUG3j2pGOACBlqVVOxl6NmkDBeSe3W\ngAC/Ly3fgUpYoxKsFB60jZkYD7oI6VHtT7uCefXpQBLJ8u0feY/pSbY45FDNkfnSMcMoP5ip\nF2qxIXoPzo1KuMGCxxkkHj1NLl9zk/IG42CmlNrK8jAHpt9KdvJyp7UANG3bwfnHQe1KuOdw\nwOv0p3ybNoUhieTTWxtKhuhpgDfLHlTwx4NLkbck8D9aSMea2zONvIo8zc5YKNw4yKkAAO4Z\nBJ/u0vyljlsnuKSPczD5sY5NAQLj5clvmzTJYjD5WI5pwYBhhSTtzTVJJ3KAPrSiYuuFyJGO\nAT1oATAYnacoRyKFbKcE5ztpVjaNSGb5gfwobG7I6/3aBgyhGMYJLKM8Uv8ArAD909y3pTkj\nO4qPv9S1RqAJDvBYA5pgSeWFCsjBkJ496JAEwU+8y8+1MjPO/GTngdqX5lZmzx3pAMiDsMBi\naWTcSozzS7ircDAxnNIpUASbs+1ADgTy4UblpSw3EjjPJxSKCsnypw3UGnF92X4AHFNCGbs4\nDZGD2pVYBssc89qNx38DcuMk0oVyp6cH0o6DHDKyO4yVPpQG3Lx8oP506MkME3bT/eNMXiQB\nlIkP5Ug1GKw28EkZx0xT/MUSZI288UjN+6I2/MGzmjzNzbdpZT1PpSGOZjukC+uCtN5WP0Pq\naNqg7hySfvU5i0cak4YE4xQAucRK33sd6YEDZO4nPJFK+VXkfLnt0pQS4Vj8gxzimAnG0BeV\nPX2pUi+XcvDd/pSD5T0yDwPrSKsixkScHNITHf8ALFiMZzimxoGYFsj3xSbE80Hn/dp6SFty\ndgflNAgXar7M5cHOKmaRskZyjD04qJvMXLvtyDt4HJpJtsbKuMg88GmA5WMSnIB3cZpfMZPK\nk6lhjbmmKyseEwV5p2UdlLRnIHANGpQZZMhzz6L1pWWJlAywbvnoR6UJH8rMDlyeh7U2Jsqy\nU+gnuNZVEwXd8nX5ePwp29VYqPmPUe1Cr0Cjn+tPCnaSF2gfec+vpUiBmyfv7VxmozIY/mHr\nS/fjPcDpSliyqf0NV0ATjaRtJzzwacGZeV4fH3aZna2Cv4jpS87Tk8nrQgGwqyglzk+lO3Ky\nj16GlZWj464GaG2PF6MaYCFXVlTj65p4b94yeWGPc0EbtuSAaRW27+CWJ4xQIcm6JgQ2R3pz\nDd1HyH/OaSMtJycAAfNnoKbuVhgHt07UAPaTcwGdoxtz/WmpGqv8zbiOmKkWMsy4I247/wAq\nY/YM3y56gUwJbdQGkJIC4705mSVY1cYTPUd6ZHuOCPubsU+GRJNwIyvXIHFJgWImEajPTP6V\nZ3I3zHO2qkTbgCiMv8IYjirqxlVG7AbGCy1PQZNCpVlUsQTzitK2x54C9B19KpwxqpVI23cZ\n5NXIGWaUIitH29qQWL8MkbyeWVLDOauBgysRwF6Gq9vbs1xgjCKPzNaMcaYIJ3d8UgJ7NZCs\neR244rYhVGwN2OMlhVZGEdujKc+v+FX45FRQ2wAGoYF+zkXaM5/Ku30m1G1CTtB9a5LRbNbh\ni0mCMZ611tiWaRVUEgMPyqbFo77w4pVlVTwOlfRn7Peh/wBoeLlkeJvLiTzPYEd68D8G6cjT\nLuO5CM19afs16aWjvbsr8qsIwfcYrBmqPf8AcfLUnuM+/wBajf5TvPSnMflx1H/1zTCePWoK\nG7h1HNG4dTSO21enNMKluelAEwI28CkXAPSmL8pHpUitz0xUgGQ2RihW/OkxhunNBBxTAXG4\ncdKdxjng01TjoOKXjbk96YwyCOvNOXod34Um0dQKAD1NAgHPI6il2krnHJpPumgbsnnigQ3b\njjrTv4eRSdARjml9O/tS1AXkKPSo2bnins2aZ5RDZ7UmUNX5c5p6kHJNIq4Yk80ZHTFAg3BR\n70bc896dt7kUDHpTGJ05HWl4b73Wl/nSBhnpk0xC7R2o6rjqKTnb04obO3aRzQAq+lNbGD60\n4YXr1pPvc9BS6gRKrevFODY9jT9oIpGA9M0xgi5zQ3zClXgfWl+6BjmpCwu47cdqa3zdODTl\nOOCKOM8VQWE4VgB+dI2W70gYN1pVwoxUsQ4RjAPNMkycjtTvMwMVG+Sc5pgAAJxgEU2SBG42\nKF9MU+NSc84pedpyelMZSfTYZmO4Z+gqvJocMinKK2P7wrRX17Uqg8nFAjnpfDNvNk7U/Lmq\nFx4LjYEoFUdxjrXZrB3oddx9DQI8+uPAsW75UUD+9isi78Ax8MIkmXGSGXjP0r1YxrtHyjim\nNapIAQOlAzwzUPhPaOrMbdMY5ZVrnbj4M2kny+U/PI2/z6V9Jf2bCy/MvvULabDvJ2jFAHyf\nqXwbXb+7cqvqw6/hWHe/Bu68seUQe/Svr+48OwybgI1JPdhmsy48LQvn5FY/SncVkfGOpfC2\n/gAHlPn/AHTg1izeDbyFSTBIQOAdpr7ZvPAcNxHjYp3EHjtWfP8AD2CTIMe7PAUdK0uyeXsf\nEU2lS27bSrZzgjBqGS1ZYvukGvse++Fto6tmBN6n7rL/AFrlrz4P2UkhVoWJ9AMA1XMLlPmP\nyiu3C8d+KSTl9o+9X0Bq/wAG7dYWW3gZQTwd3Nc3c/BW4XMgVz2ww5o5iXE8gXG7pih13Zx0\nr0u4+E1+uTGisO/IFYt/8PNQtUZxC5Tpu4601K5PKzizxgdqVQGU8962pvDVzbjEiMpzj5hW\ne2myqpAVt3pitOYdiAKV5U7h6UpJbrwPSnLbvbsAAffNJywJK9+nejmQrERUJwO9O3DaByp7\nEU4Ak8rj0p2TwNv507oQ1WHORk+tSRoNpGetKy7lximhegPUU7jGrj5lK4Pr2pccDHXpT5Iz\nt3A8dKbtwwwMigVhNw24YcdOKQqfWpNvPIx3pGO7kL7UBZMQLt7cUg+Ubs1IF2jDGm7O+CQK\naAYFG7OOfekjXapVhxmpG2vyRS7AcHNMXUgkVeO3pSFNxHOKslQcYGCKaRlT60uoESx54IpC\nhbgnHtUyxmT+LA9KGBHy4oFYgEQJ2j5vemqrRsV2kD+9VhodpGBhcUu3bGaBWKqod2OvPWpN\nuWyfu1Js35GdrDnNOVQ0fI5piIvLXPPRjUE0J7cLmrm4SSYK0xoy0Z/vZ4pagV/LCuM9KZqU\nax2sj55UZwKuxoHJVx2qDV1SCwkdjhcUXsCWp5J4gt2DJvb5WOdv1rhtUTy3kATYe4/lXb+I\ntTWZnReHU8VwWqzGSZAD8qnB+tZam5WVnUBHTHqRViCRChBX7vQVBuDB2Zueh/8ArU2RQ0Ia\nMNv6/hQMkLK0JaTl1PAzzUFw0ckwdGKjH60+RU8tXLAM3p1qG4/dxEZAoRJAzeXuKEM2ck1h\n6hHNJdbmfb39q2JJW8pQoHXk1n30vmAgcMR+dWBi3jMu5UHDflWdJJ+52gb0xx61eucFdrth\nSeB71nTYjXLH5ic5FMRBJMGZFLYYDnjgVXu1/hkbJYfKVqyxWRnONoYYOao+W47hh90e1UMp\nTQhljIO0n0oYKu7epx2HfNPZRuPyHK8ZzVbaow65OG5U0xMlV5I1ONrAnkHrSgK0+R3HboKT\nc3zSeWOOlTKXaQAD5du4jFICRdqyKxbacVcDqImAJ3VWDFyMJn0qeNmfdG6Aj+8aYCZ89gfu\nDGME0/yyy5UqXzg/ShbVY41wd0ec5702RQs2RwmM5pFETqLYM0m488LUTNjacgHGBj0qZpFZ\nmAHmMwzz6VCzEYkbaAvAx0NUhMjClshWAA5NRxh1ByANx4+lPmUxgFTtY8lDTGZi444bH4Uy\nCVtrKCm7ceC2eAKhwZPMiXLITk59qlmzGenB7DpSAmPAH3jxSJGTE7QwIKNhSuOnvVi3Y280\ncjyEoDz9KqzHbtAXHOD71fsY/LkRiPOzkBf6VoktyZHovhXbLcRGLDL1P6c19ReB8yaSjZzn\n+LFfK/g+R4dSQ42BlClT6V9R/DlcaHwpx1xntzTIaOiaPLfhnIppYscYqVdwkPHy44o2HGSP\namJEbfKwzjp0owsh6VIsKbgzHPal2lgcLtwetUhgyltjbcc4wKnVW4yOc0c9TzxUqqTgHiga\nDHlzbwvX0pyrv5LZJqRV+UjNOWEfKBmgsaqs0Y3KFpwjXd6VN9nLNg8CpWjRY8scgUiyqFx1\nFO8r+IEc9jSrJuGex707YGXJ7UEkbKAobPy5xUdwhbgHipseZHwCE/nUbqygArwe9FxEf2dP\nWipvk9aKdwP0Nk45pvmgqOMUg3dzxTMcnFeEemPD44FOYjaMDmmqu3rTmB2g5pAO3AcGmtjk\ndvWnN90Z6Un06UwBP9Xx1pGby+etLxjnijbxgfrQwGiT5uRzTuGGcYqNl2tkgmnKBtNSIkCn\n8KaVGemaMlVHNG7g1QxTjsce1KGG0A0xWVsmnbS3Q0CHMoHfilyD34pPvcEZxSbgCeKQDJH6\n4FKjHbmopGy1LlqBlnnAwaaF+bPWkX5l64pV6UwsOznvxQ2GwM05cbcY5NNYbSBSAN3y4xzS\nCTHy5p2+o9pOcYoZI8/Ko9TSP8xzj60LlsZ7UuQr57d6AsNPJ46U44peA2OgpGXoR1piE8zt\nSr3wBQ2FxkUqqdxI6UwBGZuv4VLJygHbvUfPBob+dSMcvUccU11+bNO+6tNZty8UwFGMDPSl\nbsQOKRRiPmjO1cZzQAm4NxjNOL8YHbrSZ2YJ4FDd/U0CDdu6DmlXK0Y29DzSfM3WgQu75Tmk\nGMDIwaTkrjvmnbd2MmgQjKduSKmgBRuO/NMPUDk1ZhUbuTxQGp8v/tFWL2viDzGXETgha+aN\naUSNgrxX15+0lpDzwW0yqx255A9+/wCdfKOtQ7ZyGHyngY9aoLHBX0H3iOnpWNckNk5wMYFd\nDqeGyFGCODXOysu5uM4zVLQGYs0e7KA9Dnd61TmQ7SOATxir9021m3DKtx+NZkkny/MjYXjc\nK0uZ6mYzYcDGCwyF9agboRu5NXJvmO9cBhwD6CqMwHzAc4HPvVIkp3XMi7QSPWq7hU3f3ycK\nPWrbyBgyA7CB3qjJiRlPBA6+uPQVYDLj+FQArdDnt7VAYysLKfm561aceYQFBY9QT29qiWMR\nyOXJYHrmkSVLiEbQep74qsImY/e2jPFXyFjU8ZBP3qqTRlgpPI9qoCJv3atjHrj1qLhkXjnP\nT0qxHCFG5jk9qhmQRkNu46mgCGRpI3HOR0qN2ByC2516VK21mLKMgc0x12/MMZbnFMCNn8zl\njhgO1SRZuFwfmxyVz1FQOm1mKsMYy1N3/NwNoI4amIezeZkINy54pdoZmIGT0K0A7guW2MvQ\n0z5l3Ybk55NADWZWUoW2gc4FRMMqGVTknr7U9ogkeCuDTipjQgkdMjimPUhkZBg4wx4obK7W\nAyo61J8siggAAio/lPDMR2pgMchVIU5PXINNfMkYxTiqxtjOD02il3JtaNT7lqLCIiu75R/3\nyKa8e6R4yOnenbFEf3uPU96fxhjuxkcCiwyFYysRJO7FN3KyEkkEjFKkZUjJzRtG0jp3oGxu\n9Y41PXtT2+ZecEntUagMvzjbznFOXb5eP4c5AoExBHiQt27im9MYDLlumKdJ80Y2ggZoyzNw\neexoEMZWZmVBkKe/So23H5lOQeCKlVTuDeZgdMetInysRmkAzaVYlTkLzto2MwOTuDck+lSM\nRGxGPbNM2p90kgdeaAIyxY5UhQPWiTlS4PXrTgsY3hjjuP8ACmFSFR15GeRQgHKUmXCrtXuT\nSqvykqNvbJprMVzgdTx7USZK4briqGICynrkY64pJGO3OeR0pWUjaM5GKau5vkA3DOaAEbEb\nKB95hk0pUM2GO5f1prbmzztpRHgkBvxoBhswh52c/LSMR5gYdMYP1ofn7zcAU0N91s5BpCJo\n4/3mc5OMim7S2S33u4pVxzngetM45+bnPWjUYu4DgKGNMyWZucKeMd6m8wSSfIPLx1PWmGR5\nMdCM8NTERxBWIDNtPTBpdu5W2kDb0PvRtLByy/MDmlXbwQvvtoAZ5YO1m+/3560/yzwQ2T/d\npWYSbiylSOmaI2BDOAVyMCgBikIrtjcaTaZC5JO1R1pVjO75Txjn0pGbapYcZ/hoH0EX5hxz\n/tUudp+cc9vSjyfLPzHORzik4XJO4/4UADKGUheTjNIvzIvBznpT/NG75RztyB60jZVh2b+7\nSEKFXd14z0pqjLN3pT98bOMn7tKzbnJK4wcYFGoxrLwAOT2oMZ5VB9STTgfJbcVz/Ko+WkJc\nZTGTzRqIULuUbSSKcu/aQMAZ70zy2VN4bbzTXX94HVzu6mgBfLGTg4Wk5aQEnLDotOfAkI7d\nd1I2C2VHOOCKYxFBWQknI6mmrIWUhRtGclj6U4Mozk4J4oVV3YLe3NAwkCq5weOqtQVZMHGS\nwwRigqMfMe/A70GV13Kpxx+NAhH3s3zrjHWlOONvyj+8aGk8tQ20s7DBz296aylmAxhl5+tA\nDvkaNs5X0PqaTAbkj7q9R2oEh3OSM8ZANIrDcGBO0jk9s0AKzCZgvChTkH3pG3OpJ4cnkf1p\n0jbnO3G+owqkMT+IpAJ5ZMm0thvankjOFTPqaFUKvTHFNZSq4J9+tBQqqkhZdpAXnB4pDk5a\nVgh7D2pZHVVRRl89fekZT8ucMSOn9KCWLJuLAhgVAzTRnLIW5Ybt39Kc2GGAp3jt2pu4ybPl\n2gGgQqsWUbev8qb91Tv6560sY3NIB8pHOe1MEe5DwSevNMYZ5BZfqvqKeNobco69CabGx3Ak\nbl/WnjLbsgtu6c9KCRpBDF8n0p245OwHdjrTRny1UjI6596N/lknd8/daYwaZU2scq2MHPSm\ntu3FnkDLjr2p0y+dtP3T1zUckeNyt35I9KQDmBMg4xj0oXgEFsMKRWCkgZUk43UPgKfmyynH\n1oAG2qqrg5609VZV2sMbj3pjSF14GaGzjAckryT/AEoAXyxtPGADjJpDJI0ZOMhT17mmu3nM\nXXjjhKcpkdFCnZJ1KmnqA1M7s5LD37UcOrgjJHINJGpSYjdlT1X0pT97heM0DDy9yjcfmx1z\nSjO4fLmj5Vyc856UgU7mO44zleKQgbMh3MOpx9KNpRjg5ORg0v32yjZOO/GKjRfuENuzyTQI\ndI2Zjt609ofMcDb05601WDSFgnGeg600qJ+SScHI5xQMcMlXYcjPUjpSM25fm5anSKZCQH28\nZx60zd8qEDcc9O1A7BHGzgjo3r2pysBuXHIHNO3BWIJPPak55GMn9aAYj7UjXacvnPH8qdJt\nkZT0bufX2pqqmSQMY7UkZ2MGYBuelAgbYy5UtuU+lM37jjIAHJp7EtuOMc5FN8s7BvXGTwRQ\nAuwswZW24OelDKzKSRuct9OKd8+7B5QdBUcbGSRsAegoDUc2ySNUUcZpfmaPIPOKGVlUhmDN\n6jtTd+IRk89BQA2P5lwRjuakY5kO3AGMjFIzAL9z5u4pjSfPlU+XuKYCr97GMDPWnKxDsAcE\n/wAVCq7s20hOcik2nc0hG4d6GIYilSeSQeKcVO3P/jtNjGF5PU1J92Ysx5xRcZFIu3GPmXrT\n2kPDr0xzTvmLcYCgZxUb72YDjB5IoGPaR5F5wAfSo5FHyqBgZ+9SsPmICkHHSmspdQucsOW9\nKBE0sC7fl4BqPJYrzyPlzS4ZY8dQ3TPahFJXA5HT3+tMka3yNyNxHU5pPMdVOMY7UvC794J7\nClP8BQdB0NJgJjjIG5sU9Yw7Dc2OKRVCsSRknmhVXcSzAg9B70hpDQNueMgcU7cIQQTkH2o6\nqV796biSQbQykds0DHRxrLlmOOwHvScx8YGc4PrTspIBn5FB5PqaI2ePJdVVM/xcmkwGKvlO\nf4vX6elPmctyoCr7elMYBtwUbQTkml2gSFc9uM0wFyMAL93tS7XZjkZYnJPpTfLcKd3G05Hr\nSmb+Jg23PSncYjMGyp6dF+tNXevzEDpwKdxlgqZGMj2NHlsysWJoERvlotwUh89qevIyVwPX\nNMGQcBW3HpTgpZdzvtcfwUWARV2sRkEdcUu4PyD/AJ9KThVDY+bPP0oeEKCoPGd2c0iWOYls\nkjIIxTXQ+WuD06ClX7xBbIP3RTtpXCsOehPYUygI3OCq5OORTNwjZiBuXHWpPlViAdyrTGUL\n8wyw6GmIb91QRlgefpSkKmF4JY5OO1O2jd8/Ax0pqqG3HYc4yKXQYr5kwg4w1KEOSW4OeTSS\nSHMYUbhjk+lIyiMsoy6nnPvSAXjzyMbfSnbvMY5Xy1HX60hyyjj/AIFSFfl35LHoaoBDI24l\nW57cUbfL285DdaXdtIzhiePSkbcy4IC4OKBDt2QxIyw4HsKOu07t3tQ/zMcDjFEZMSncPoaY\nhDlQTvwT0Wm5Lx+WOH65pWjZs5wBjJpu7C4LDYT2qQDdsb+72H1pfLKx7t5bJ5GO9GfmbJBC\n8ikVgSz5wV/hoAP48MMDrSclyfl2novenbndSzjIxxSP8uzcM+hHWkAih13EcClzuUArg46e\npqRmLPg/lTCo3ddwzTHYQbvlBALjn6UOoViSODyR7051JXauV5+9Td23ecbiBTEBYNKo27eO\nlOb5S3OcdqbuEwAVSseM89qRsL8obOB0Wgd9RY9wO8x7jgqufemt83l7RsAODSq25Cd5DDmn\nKvzgnkdQPWgYNhmwpzzQw244/KlVWZi2du48+1NjxGrMH3AHA96Qg8zI4+8KUllYcAKRRGd5\nIxim7iy7T/8AXosA5l3KGxjsT2pq5kOM8KMLSnDxnDYzxihUPIPBxy1FgEVW25Y7vTFOw3Qj\nPGeKM7eVPGMGk5X5txAPFAgjYtIzMcKF+7QGYqOmfU0KdrZb5lIxRuVuCuAOjUixFzu+8VwO\nTUg67ScDG7d601ZCsDAj5e7Um4Nt/u9fwpkCfw5U/KehpZMo45z7etDY/hUheox2psincHTD\nN1O6mA9XADI3A6//AFqThmUKuzuc+lGwqzBs/NyN1K4bcgzg9/ekNCK+xmf72TjHfFSbf3bf\nJlBzSLtVmJ60mWVggPansOw9tjRh0YAelJ8yqMttB/WmiMLuyfm6YpdjFdqtvI7Z4/OkAGXc\nQwXIU4p0sg+UjgUN8q7QeT6dKZwRz296PMBxkeM5UZOeppeXk44bu1INy5HUEVGAVjALfiKO\noD2O1Tu5OeBQVC5xyMZpXwsqg+lCAR/LnlqGK4yOQFkHVP605lyzKi/KvVjRE23ORhVOCBQW\nO5/m3KecelMQikfu3HKscfSnxkiQjdudh1PSm7W2kKMqDkCkXPXbg0B5jiq/eQ9egpeNqqBg\n560u4ffK5B6LUZYgZA4Y4+lIok+RWkPVic+1CiPaeCWxnNGUVhGil2P3m7U112KpRuO9ArDt\nvlDJ4LCjaZV4b6560oZtxG3Jx17UwgMpYHLZ4FAAih1bkpjvSHbGqsPwI71IWO0DGQ3FNkUj\n5QBkc9aGINzcZQgnml3lodjH5c5IFL5rKu8kFumO9MZs5H3e/wBKYC7jtBX7meaXlZMgjHr3\noaMBlyflI5oVd0hC/wAPTFCAcdzudvQdc0xv3TDPyuf4h/Kn8qwJT5sZPNJHv8wk4UEZGTSG\nJsdYzGG2sTSc/M7jdj5QPf1pP4hk53DIpVcheTkmqEPby3VTtOAMfjSHPlbvu/Nz60sP7zlj\nhPQChof3gCnA9aQCNGWAAPysc0qruUrt6H7tKzLtOeGHHFKshk2spIHQmgBwzGhYvzQm4Ljb\nkEcHvmmbV8wbCSCMk4pyoGkV265/hPamBNuMaKFXL5wT709VKrHwFX727vTIl/eM44Ufd9c1\nIHWRWJU4HHPWkSWY8suCd+7oo6j3qxHuZc8IAOGxVa3y8j4G1wMAj0q0u5VVWAZSPypCHRZ3\ng7th/vYrTtpPmBK4Y8Y/rVaxjLHawyMZBrQt4NzJGvB+9zSGXrRWZcGT5vrV5cbl2ccYJqlB\nEY5CoXap5xmr0LBphhMAVBRox52hVGVA61pafayXAIdsAHis21ikTcGfOT+VdFZYaJcgf7Q/\nrSYGlotm6rg53ZrrtLjka6Qpk88risfTdkcKc/PnjPpXU6M7RyDeMo3asnuaRO/8KxlSFAxt\nO459K+1vgDp/2HwcJAuBM288f/Wr468Mp5zRfL98AA5xX3l8PbRbPwjp8Kjb+6XI79KzZodD\n5h7dOtC4Jo27uegoY/L0qCkBxuzTGb2pxxS8belAhm4Htinj6Um0Yz3pSu3HcVLGNyetIDTz\njbgUm0t25oAeOADS7vUUxelO5WhgHPUUi5bPOKXBUUoB4HQ1QCcLjuaGz1oxtXnrR1XNAhWX\nvScKufWlzgetI2CtAxPvUpOOKXPy5pAxJpAJyq5NO9wOtBz0NJk0AJnB9RSM3XH4U5e9C46k\nUwD0PtQW+UkChWFA9KABfujNDdc5pccZBpA3YjmgA43DvS53AjpSZwxpQvfNSwGNwKUMduMU\nrYK0Z3ccimAKaXbuOc4pPurgcUMMKKChdvvmkbNItLSEJSn19aRuKVecc0AJx6c01gSvPFO6\nGhl3YJJFADVPy89acDuXBGaQLzmlxjkGmAceXgjmjn8KU/dB60nXmmFh8bgN0pCwfd2pOVPQ\n4pV5zwKBicHgc0ewpajYkNkClcQ8naBTSAQN1DMTjil2lhz+FMQi5K+nNI0absgYNOUHnd0F\nIcvnHFAxNtHkq2DjGKUDCc0Att6cUDIZLdWYkgc1ELBJWy6g/wC13q4v3vagqCck4FAjKl0C\n3lbDxqR9KqTeF4pFb92u36V0C4bPU0MDggHFMRyc3g2JlL+VGf8AdGKyb7wHE2fMRSCM8jNd\n+inkE5FOZQw2soIpDseQ3nw2t7tMSRIQpz8i4rn9Q+EsFwpKxN8vVOma98Fqi9hioWsY5F5R\nc/3qYj5ovvg3DNgLC6gcjeCcVy998GZfMfywofPYHmvrmTSY2AAC5qnJ4dilLMVGc+1Fwsj4\n5uvhPeRkiJd34d/Ssi9+Heowkh7aTp/CM4r7QuvB8E0Z/d5Oc9KzLjwJFJhfL5PQkUXkTyHx\nVceEdRt4ixhKovO5uD+VZ0mkz7tzwuqDqxFfZ918PoZ4ypjVu2GXoawp/hTb+Zl4o/RuP6Va\nmyeU+SJLaRm2gcfWozGy4UjJ9q+mb74N28hcCDyTnI2jnFYOpfBJcblRoiem0bhVe0DlPBZF\nZWHHHenN3A+6K9auvg3OoO7KEdN39aybr4W3Vt0VWHcjNP2gch5vIu7HpikYlUweBXWXPgLU\nLUsWQAZ4Bzk1QvPDVwoz5bfkaftBcpgNGQoI4HentjaMcVeksZY12MhH1BqtJCxcLtwB2NWp\naE8rIlXGTkk0bSrfMKk2suSBjnninMuep5NPmQcpGMNwflNIVAQsDlulTbNxx0FRsu18549K\nLisIwBwC2DQdwXAGaJMscMMGhGXzAPvUyWI0bAE0pj+63epugP1okG3BPegViMq2wkNSLHtU\nEj5qkC/w45POalkXnA5461VwK5TY24j5cVl+KGJ0cqnMm4YHatry3eMjrXPeO2a00VZOmG+b\nHXFS2VE8b8SYilaRsCQ9SPWuMmdzMTtUqTya67XZQYCsvJJyFrkJGP2wCIbB0INZ3KGyEtJu\nbhR/DVmHbCQ7HG7opqmPKkmZUfaQcNu7VM8ZbaVbdg4plCTQLDnK9TnPpVaZfMiG3Le9Wbhy\nznPAxxVGSV1RA3Cc4K1SRJWdfMk3BmVR1FVbm3ErYMmABxmrlx8sIZcuD1UVmzLKJA4A+h6i\nmBjTZmdsLhVOAG61TbM0aEDYc4INXr5/Lym3vuxWeJEk5Ayg6fWqArzZdlZn3AnBCjFVJiy7\ndi7j3Aq/IycqFKNj5VPc1SO5UAUbZOhNMko7TJMSBhO7VFIp8wYcEdhUriSNTj1qBh3BxzzV\nIZJDnJXPzejVInzLyx49Kg3AnITIH6U+3BPQbQxwGJqQLaxssSBXwc9O9WBI8ce1j8xPHtUE\nYaS8SPPy9GNTtKPMkjC/MpwrU7jJYwzW7HeBzUbSnB+XeqnFKuNqqGyScMaGYNG6k7F3cepp\nCI/NZfnCjY3y/SoSq7VQfKuCcVPMsm0KUUDjG01G67uVGGX+L29KAKvl5yxO6McD1pPOZpFL\nAbcYAFJcOJG29z0PanJ5v3ioAAxTJFVPtEh2k8dQaZ8vmMS+7acD29qfD5u1srjI5am+UfkC\ngMCf1piHSR+aA/3So6H0qezmeOYBU+Qjg+9V5MtLkfI44xmrMe+NlMI3kjv296ZLO88Hqy3M\nbMfMyefQV9V/DZT/AGBJtbdIhx+FfKXhFnWaEMRtzlmA619afDGELpMhHPy4J96u4jeVN2fX\nvUksIaMDIBq5HCWz8oz3NK9rnkdfpVE2M9Yd/wAvVhzxUyruQqBg+lWVt9v3hSrbiNuD8pp3\nHZkMajqeMetSbTJwBz70+SP5do5Ge1Swgu/PBHFBSQ1YzuA6CrCqZFwTijYQcsOPWnKvmMp6\nCoKsEfyoVJye1IV5JxgGntH1wOaXYW7HincoiYgRqAO9O8n95uPTHFOkjPHHFScrtzQKxBJC\n23I5qNt0n3hgCrTRsOOnc0GNGHAIFMTKPkp6UVc8kelFBJ9+NyoB4NIvHHU0jMfxpQD16V4h\n6JIrdQ3XtSMRtK9KYy7TnrS7dxDZzTGOVhtx1pMnr6UvQHApI8dDSGPZgzcDFIFy3WjgE+lH\nG7IFAhS24kUgYdMUMwUihW545qepQvy9Mc0Mvy0cKxI5NJk7h2qhCKlOUFRmhlbcDil37cZG\naQC+YVwaRsbeO/WlZvm9qbjrzSAZ5RbpxUijjB6im8jpmnc9+tMB+75QwFR7jyTTmYqo6c9q\njbJ6cUyRyyfLyKcW+Unio1y3U0jfLuz0oDYmRgw6cUzcMnBpsbFVwacVXbx1pMY9ehx3py47\n9TUO7AGOtLk4pK4BLkjHp3pyMTjuKApJAPTvTkXnHQVRI/A/iHFBxxjgUclGGc80N90AUgFU\nBgR1A600DCnv6U5WC5wPrSevYUmAhJC9c05WXoBSH7vPGabtIwQe9UA9gOgPFNIG04605myw\n2jFNOcZ6mgBzN8wyO1JyrDmjBKdKOG5PSgQNlmBxS5IPJ47Ui5IwDxQ2doBGDQMXG1s4pOO1\nKzZOPaom3KMUCH+2aswvggGqQI71PCT680IR5z8ftP8AtHhnze6+2e9fGviSBYWbd1B4H+fr\nX3X8VrE3vhKZcfLt5NfD/iy12yzNu3hSRn15ouM811mFlkz0BrnZLcNn1Bya6bVFaT7zY9BX\nO3ytCxZOfWtEQzHvIcK4CZxyM1kS7hGw/hPOK6G8mVrcZB3Z5rAuWRmYL0JqyTLkG7LDgdKz\n7pWh3Y+Y9yK1ZG2yME5wM1n3KtwWAHcmrRJQm2yRNtOH75qGSPAG3jHepVYpMxcZDDApdoZ+\nvy4qrAVbhkTBQYz1+tQtIF+baSMYNWGjEqsisA2apbZJd8bH5R36dKYCMwVAmc5OcVEWlLhU\n5TGOelOmjKxxFuDuomY4AAwh54pgV22rIABnH8Oaq3Cl3Z87f9mrTf6wYOfY1FNtjXCgtg5Y\nUAQnLKCvBAzmkZ496+Yu0/xHHFPjZXmmj242jdu7Cl+YzBlw5xjDdKZJXMcfms8ILZ6Z6VDu\nZFZWXknPtVhlJJJG1qh3FsBvXvTEQnEzFsYwKcCY3WQ88YJqbaFZskAVAwXpnOemO1ADt27O\nzBHXn0pWU8NuDA+lQ7S7HPynGMr3p0g2IqJgEe9AxsiHyyEBJz+FI0LQt8wBJFCMzHG7anen\nKic4OW/2jQBGrKhLbcYHWm7QyAgZB5OOtOOVzgZB701WGW3cEDpQHQJGEeMpxjhajGT0IzSz\nOZFAUdBnJ60n3o1cDHNFxES7+AWwScmnsoCsjcq3JIpuz94wLHpk0ikYLKSxPSmA7ZuhDbdy\nLwMGo5Y8rn7vfFOUAKcAqc9DTZ/mPINMBFZ3fpgYpsjEKCPvdMU7aWXAbgc01V+YgtnHIpAO\nfDAbT06+1OAjKHd8w7NTY5ASWxjtSCQGPb0G6jUYrbGk4OePyqMyljyucdDTpSMEqPrTY8LG\nWzyORQIasbbWLYx1x70iKNm7dgNwaeTuA+bdnmmhhzt+Uj8qoAXAxnnHH1oyxUjjHdvT2pN3\n7slfmY9/SnbTgcfNj7vqaQDP9cuzaQezUm4leGww4xT9p3ehBwQaFhO7JOCeKVyiu3ybkzya\ncqDaCeOac3ytuxkrxmnu3y5/iPbsKYiI7gzDZ8vrUWFbO7Ix0GKsu4XAIPTg1CQcAntQIkG1\n4+wY9Kai9S3ErfKFpDvUjLDaw4HehtysHb7oGN3vTHYeZ5OhUYAx0qIqT8xOF7Yp+4ZLN37U\n07o2Hde+KAE5ZTg45xQ2N4Q5Ge9DsqyhWHBHGKMkyj5M8UAIzYYKx4xg4ojxJ97I28CnbUkZ\nSuTtGMepoVnZtxQA/wB09qAFBI3DJK9cUwqWAIIVepzSw7i7HO0GhmHKtyvr6UCGFhwc5BPA\nzSMpbJ6KeMVIpRI/U4xz2pCp+Xn5fU0DBj8uUYccGkyWk8wjtS7wUIVAAT3pF2lWIU7DxQIN\npkAxnHUmkkZYlbllPYUKYyreaWJA4xxTEVXXqc9eTSYEzyBlUnLLt6d6YwzGuOA3UelNEjBm\nUrnHBFNUboj8+MdBTAcIRypydvvQvzsFXqO9BONhVuO5NO27cvvGfQd6AG7l2MvvimK+BtVs\nBTx705ZAq52ZVu9Mk3bVjjG05zk0AOkztVsdWppZZN0eMtmlZtyDcDlTgD+tJgpgocknk0hj\no23DbndjjPeiPGCrA7xzn2pvmKFc5wQe3rSnflWOSCOtMQD5iC31/Ch2PRz8nUHuR6Uu794j\nHg9BTZDubnnBwaB3HrHyCp3dwD6U2RQVIX5VJ5XFPwFXBbvxSBnVHA5AoAZ5e7kc4HWmk52q\nDj2qTBXJPGOtN4Vg5GcilqMa6hW2khj3ApfvHIORjApNpBLhcE9PWk3Dn+EKPzpiHBiqqCPm\nB4NDsQ2AvzAfe9aGceYCSQcZHFNObiTcDk9KQCiR93vjJpr7mbbnbxuqVo29cAdKYR5a7guW\nPHNMlDVTd33LQ+2MA5O7ptpqsflIPfn0qRnPnFlTkjHNBQg4OAvUdaaFG1CG5zTo5dynnnrT\nc+Z80Y4pEgrbZNw4xxineXhieMGlBCD5j83cU1VUMQH+b3pjEEgZioH1anygwt5Y5LLxj09a\nTy1jOwHLd8Uiq/mHJ+bH3/b0oAZHIzoYyMnrmnL95Wxg4xt9aTac7RwWpUX5CvmYcdd1ACAt\n12fMT90URt8u5V4JwR3o83ZKSCSPf+dIGO7J++T+dAAsaszLuCN157+1OALDcD7EUyVVYk9G\nXmnBzMuQVz+VACLheewP602QFgV3YJOaGXzGwx2P/dNPYEYDDJHcUAMX7yswwpXH40R7W8pf\nMyVFLtCLggtzxSMu1UO3HOAaAGybWUrn3pwXcB7LSsRtK7cN/eFKqls5GCO1ADY8tJhflXHJ\nPrSvskkBJ5Hy0u04xjC9aRo94yBjJ5oAHhXcG3ZxxtpFwNwA7ZNSDG75uYxxTNu2MMpyoOT6\ngUDBd6jbtzxnmm8xsAy4z39KXO5ixBK9sdaQ/vIy2dozigBclJvuhs9f8aRgvmYxszxxSsTH\nIDnHQcdxRISzEcYz170ANmK7jnOAMDFPLHYC3QDihl/dFCdzeopEAb5jliB0qgEVn6EEL1O6\nmsq+WDyhz0HWlbfKQr/e649qeGHJxuI6ZqRETKqtk9fuketOaJIowQ+4A5C4o3fIHfle+fWh\nCGmY8Z20AHC4fJJb1FIrlVDKOecr3ow7MpzkZ5zT9p3YLrnG7igYws7KGPCjrtpVGMhT1Gea\nZyfnHQdvWnMyyKAoOfWgBGIMYB557dqVVcEnjmlVVZG9OgIpqptAUn8aBDvL6M3QUm4rufy8\ngenpSqOnO0dMn1ppZ2zklX6HFMQ3zPmD54anMPmyX2r2App4G0jhRx701VDb2UdPXtTESIrL\nIG3bge1IuWBO7kHjFC7mUfwrihVKxjBxzTAR87iM54zin5zk4xgU1WETSZXgjhqPvZwC27vU\nsoVJEzjrxk01I0UsQMhu3elDgbm29B0xQpVRnBDEUhWBMr8sYy/96l3kjoAwHNIuG+VWKDrk\n01tzZA796Bgqndljj09KVmKttI3oOfbNJ5m7K7c+hp38BUfKAc0AIrYG7rzyKcuAxAPy479q\nbuxhh29utJ8+3eCGH60wFRsqdwJfPX0pfMVeAOfTHeg446ov933pyqyt94E9aeoDQ27JcFW9\nqaWDZO4596duLMTnr696a3zK2eCpwKkkTJ4YE4PFD428nocGkkYsAvRsfdpY8oobrzzTuUGS\ni8DCZxuo2jdsHAIyCaTzAVYOCxzxRgqASMsKQkLBna2V+bPBPalZtzDJw2aRmJkzu3Y520qs\nmGYqcfyNMAVsqwPzc80EhunLY4pFO0Hn5iO1KPuqAcYFUID6fxY5zQjuzZVeSMYpjfM2W4/2\nqdGNytuOwAZ21JQIq8hcg+tCq/CgjGehpsO+WEjqe1PKk5z1A4amAhyjMzH5emBQuduScAdB\n601maOMheST19aaFZ8A8kenamSSK/XKbyeNtKGYNg/e6EUcsNn3j6US/6wM3DL8p+tADZlG3\n5jgZ7UrEsoGMUHCfd5z97NBjBYgMVHdj/KgBNkitu3gDHNHyNgkc54NKTuznp0xQuQpBAx0x\nUsBjbWU4GexoOF25HGc8U9SVyOAOppu7dIGGPXafSkArKeVJyG5FIqbu+3H96kPOVH3TyGpV\nxj5iSccH1oCw45LbVILf3qY0LK3ykD8acqheFBzQ0YQ/M2M80x9SJWZSVkyeegp/yxt1+ZqU\nKyqxIzjnd7VGqlgxZfmPT1FUHUcByGAxjrS8x7nAw3c+1O2twSQBjpTuBE5I3qD0oQMj3Bdr\nFevSmtv5yeT29KkLCRUGCHzkDHSmttWRtxP1oYCqu2M4bn+VIo6cDZ3b3+lO8vaqgyYyc8Ck\njUeedpw3rQAFSGIz05/Cm7tvzdM/xGn7z82FyBwaQsNyjaDx6UrhYUKMk4yp96Z/eBJ6cinN\nGPJXJy+f8imJuKuBjPfPXFAmO+U4zkcUhZs7W+7Tv3e0AZPq1IclCS2OcA0tQBm+VTtxz+dK\nzHO7AVem2jll3EZUdfc0jbZDj7uaQ7gu4EZHFO2p5mHOE7kfypPmfJU/KBg5pFTcueuKAFbD\nbthKj+7SFegXgAcnvQrDd8gI4yaYx2q4BPXggVXQQ/cF25+dvSnbwFYg/wD1qTzFyueG7mkE\ne1yM4frgUBexJuDYAG//AGqT+Esxxtoj2suSuCvWkcZ4VgG70h3YKS2TsG5vun1pGVHZcAqw\n6/WnOu50bHPQUqqJITsbL9TTGM8sySMG4OOKcsibSu3GBj8aU5H3skYyGFL/AK/nb5anp9aQ\nDY0SPo520Kw3E4+U8GkZdq4XGT1z60ZKsiqu8sMtigYKg2gHk54NDYxgHLDnNLIuQzE7B2P9\nKF+VVHDDuKRIrt8yuB7EUjLtJZW4/unv7Uisy9QQvenLGDIr5JFMQsSHc5U7u59vamMCFywy\nM1JtbGC236il3Oynay496B37jWT5eX+Ujp6U7eqqGXkKMURt+75XJBprrlSSNo9aQhWYqd23\nb64oKgR8ffXkfSjcWUEgnAxt705Yx1YFqZQ3BZt6yAsOp9vSgfMzMAAT0YUxUIjYlclj0pVA\nyAOB6UALz5Zx25xSrIIQB1duc9aXaOA4HcH3pAoZcDCkfoKQrDWA8wEgY608Mkh+bp7ChUXk\nls5HBpBmNQev4UwB1Tzeu5AAabJIPMO3KripZFEeXf5Vx2psSbCC3zKRkUAIwOwZPzEdaRsO\nFIGOxBpdpZTu7Gk2+Z2y3U81QhzFtrgAEZ6imrh+2PenmNI5AFJXjPNIo8tQ6fNzzSBi/LvD\nBt+08KKdtZsqeHJyT6U1N77kxhuo7Z/GkZX+8pznjFAC8ZJYcDo1PjkwpULkGmjdD1cZHVaj\nj+WT7xyTw1IQ9twt9vQDpUsYCwgBeT0NNjkUyElc4OMepqQfOpz8hB+7QJEkanAQHBNSIx8o\njGTnFV13tlc4YHjFTqp3sVGT2H86QMt24zkp97v6VP5gWcc5Ujbx60yFNwBPyr2PrU8aRHIH\nB/rQMsWO8sOcKpwRWpaRrIxOSJD07VTtYQCqvxnr71f3jcAq7VB6UAXLdiZHLc7Vxtq1AoTb\nls5bpVa3jzIVzuDDn2rSt4cSKq8L1yakZpWsf2jcB97P4YrYtYynCncfesu2jCuNqjpk+hrY\n0/dNKJdvT71SwRrafJJHIpK5HTFdxo8Za4jDAleM+1crYx4YMq7vTNdt4ZjlkkXeOMf5FZS7\nmsdz2n4d6bDeXmnwAEqHUcdSM19waNbi302BUBGEA59uK+RfgjYLN4q05BHuxztHYcc19kRx\nmOEAHtxWRoBYFaRj8vAJppwq46ml3dCTUjSGyYGDjimeYVqU471FImEz3pCJFmBXnrT9w25J\nqsMk5AqVV4pDHMuWzilXcaRfm4zThnd7UwEOB259qcG4pGba3Apf60mAzaSfalJAcYpSO2aA\nQc8VQ7CkksaTlutHFN2njBoAd2xSHpS420etIOgu0jHpQzDqKbk7fakZvlxQIefm7U00K22l\nU5780hBHnPShumKM7ehpy4amhidTxjFJ0b2o+7xSbg2Qe1AAuMeppc7e1HfI4oOWpAN3dcDm\nlyVGTR/FmlwW+lIoAM/Sk55NPZcrxxTQ3HNMTFVTSt+tIuRzml+8vvTYDS2e1HG4Un3R603n\nqOtSA9vvUnOcd6VvmHFAxnOTmgLAFHUnmjIb6037vNPTk5ApgN6GlwG56UmMZ55peqn1pgO2\n7h6Cm87cYpVztGTQc5NMQjZxwcik/HFCnHBpWYcHGaBg2VWkP3evNLznrxSNjbQIVVO0ZoyQ\ntJv9accHkVIyM561I3y854pAfm6Ufe7ZouMRTx7mhs+tKu3d/hSBfvYNMQ1D1pxYYA/OmnK4\n460v8JGOaYWF3dTmgjuT+FNH3RR/DzSGC557U5c9DxSA/MCaUvyykfiKYhSvbNIFPfpTegHG\naXdu46CgEO8vp9aaqEk56U7zOgFIW6gdaCReR0pvOznmhS3TFK2etAyBVHAZRgU7yY5G+4oP\nrin43ClZfakVYryabA247FJ71Vk0CLyztAAbnArTX7vvSnIHoKYjAPhuPy2AjAJ/iPNUm8Fx\nsWLKvI+9XVZ+bjmh88HFAHBXngO3uGDeWspAwCRzXOah8M4WYsbcKn4167t75xTGt1m4NAHh\nOofCaBlZxb5XqcVh6h8GILpQUt2X+LeM5r6VNrEpJVR+VQyWayqc4H0pDPlC9+EKws6qzE9Q\nCua5u++E91vJC49AVwfyr7BfwxaMxZkVmPPTNUpfCEMx3gYHOOB3qrhY+LtQ8B6jaqwaMqy/\nw4yTWU3h+eJQZLeRT/eYYr7RvPAsM0bKyByD/Eo/nWBe/DS2b5Xj3oTnaFyKpNk8p8eSWUrS\nEiNiB7VGlmyyZZSARX1TefCWA75HhWQZ6KOawtW+C9tMu4QNAccHGa05yHE+dNu04A5oK/3v\nyr2W8+Djwscjd6Af/qrGvvhLdQyFVVW4zgZzT5iOU80ZWbDDkVKqjaMda6+7+HeoWyYWJmYd\nRt6VmS+FL9IyxgkAHqtXzIXKZEKKY2wa4X4l6gbfT0gYZydxWvR20u4t8loWCjgkckfhXjvx\ndm8y6UBiDFwwpXGlY8w1VjM+92+QD5RXPTTbmIU8Vr6rN5ceD91uM1hyW5UgjDDOKQh0MIjD\nZX5+uac0oByDu7AUpZA2RL0GKhV2nVmceWBwFHX60xk0yFZPLJGducGqbOI2xt3seAoP60si\nuuGY7j0/CqczJ5wwdoHPzVSEMWabcS3HFVGmKszE7jVqeXcMs4zjAFZkkzR7i3C7uM1VgKM7\nFssY9xY4CmqUjBVKsMEfzq1dSkSY++T0warNIcYdApzyM/rVCK8zFjiRdr4qpcZmkUBvLBH6\n1amwxPXGeOKqTTKv3s470+ginMkkZaMndnpiqfz78EYzyKuXGDcFkcY2/lVYyS7wVbk9iKAF\nEwTLKpDdCKWFRNGA78lvy9KPMyyNxwcM3bNC4WSRpm2HOQFFAy4jDooJI4z61aZv3e1Fzx8x\nqrFIZNu1dx7Z71YhcvIUVCrKec1ICBPLQfMrAchRUh/56p17KaP3bbgy4YGhVeN9hXeCOADT\nAjkjfGR8uBnGaZv8uMNsynU5PFS8NhSenaoGdSzDkqPypgVHYeZgbWVzkKO1PdVWbKOcr1Ha\nhTtY42jP8XpSrs2q/Ibp7GgQsknlyIXY4kXJHrUcbLNlQNuzsam3I9upbDupO2onVfLUFgDI\nfm+tAMhaMyFiAVdedxPFXdPhlZgE5I++1QMjKEjUhlHORWhp8yR9OM/KSo5qjM7zwrG8M0Sc\nM6nO71HpX1t8JY3m0BuNq5wFr5G8LxSxyQu7+/0r7E+DsJXwrG5OXY5NAHWeTtbJ4FHlqvHW\nrjRjjndSxiNc5TmmXYpfZ1PIB96a1uG6CtPyjt45x1qPyeQOgphYpJaqq/dz7037GRyBzWo0\nJ2jA4Bp/kgNggnii4ym0LbRjg0yGEsx3cmrv2c+YCMgdKlhhCu2etS2MqiIKMBeaa0G1TlsG\ntA2/zA57U3ySW4HApgZrRmRl7AU6S3MjAY4FXmh/U0NFzhc+9AFBYSzH29aTyyzEbavSR4YE\nDI6VH5LKQQuOaq4WM75/7lFaflt6fpRSJsfcreuKXaT3pUIzzzRwOvSvHPQHdce3rSjGOBRx\nt5oX9KCdhQvoaayncCBSmUKOahWZnJHQUhkxyBjFB4HBpI87sk9aVfvMD0pDGnb36/SnLxnb\n0p69PlwD71G0e7GKAFOFwadweetMXCnDDFSLt28cVXQA3NuyeBTWYsfbNObnA7U1Yz1J4qQH\nbflPejjb0waQZUHuKbJ90ENiqEPyRxR260md2CDk0D2696Bhwe2cU0t83tT++F4oZVwMjJoA\nbIAv3e9QycoQetWDGMimeWA3zUCGxk7QKecU9VAXJFO24GdtFhlYHa3NSjkZPBpfL2tuPIp6\nrtHzDJpdQEIJxT29KcMbMk0hYf8A66OogjXvml2n04pdwUZx8vrTVY4IPSlLckHx601X45GR\nSM3AwM/WnhdwJ6UwEOW69KB8ueKG4yKFyPmagBOeufwoweccU7cF5I5poYspPemAvIGM0o2j\nI7Uitj7wzQB8p4wKABflYAUr5ZixPFAYs2cYFIUJY0wGjLZ2ilZvmzTlRscHml8vauTyakBj\nR7uccVKsY4I6ihvanRg4NNAZ3i+A3PhW9Ujd8lfC3jlf9OmPRQSuPfNffV0nnWM0DDh42H6G\nvhz4l6cF1C7QLsZXJ5piPHNYUhT/AHxXMXDfuzv65zXX6ouyNywy1cpdBpVIHHrVxJMS9k3M\nN68HpWTND5MmAmQ3Nbl3GBGwdcnoDWRcYA7nA4rQkx7qMeYyowBxnH9KpTRboASct3HpVy4j\n3OZduRiqshRcZGCBz71ZLM6WIs3B4P8AOoZMwxkMcPntV6eRV2rj73SqkhTziNnzYwc0ElRm\nWObdu27hVWVnRtrfd9cVeki+bPUE4xUFxbtk8fL3NUMqsfmUtllzTNyeaMNwBVny9uGHKr1F\nUwj/AGgoIiExndS6gO5dZDjO08VUkk2KMfdPJNTuxbcAxWqoVn+XIGD3qhMZ57N5gRQMjOfW\nnKdyqSwMnXPp7U1lEcwV+ndlFCxld23nOSPp61QhLiVjvbICj9ahz8gZ1JY9KWbaUCsCvqDU\nvltEgJw64pDKu9Cx4xt65pHHIKpy3celSqySFTjC52ke9Lt8tGxyynp6UxkL/vtu0428Uwgt\nJu25UVYYlVz09KibcMrj5WHSgVisqpuO7JJPTtTtreZyMqOKGV4yOMj+6acuWk6ADGaBCMOp\nHy44x2qBhulXJ5zyfWrEn3WBOTjPH8qhKs3KrzjPNMBrK8il1IUYztqOQjcVAKrjOfapFUsv\nHBA/Ok3FlTjay8fWmAhwqjknI602PPklF9c1IoK/eO4Z/KmyY55xk1PUBo27AxHPoaSMsrMe\nM44p21kY8ceppNx4J5HSmA1QI+Rkk9fej/U/MwzTgRgqw4NKu4xlRyvemIYynarHCAmmRsFY\njbxnqaf5YkYEsVCr39aazFk5IP8AtUwEdVMgbdlelNjz8xA9hnvSfeYKDhs4B7U6MlZGVsED\nqaAI2UpKQMZx09KHUqu786BGPmBJJPNL5Y8sZbj1pDGbiY1KDAzUhxtyX2t1BpDlMdOO1MaP\nau1xuJbINAh7KbndJvCsvJ96ars43MvUUbiFYbQPc0gkZuOA3TipARwQuznB5pV5BUj5VGS3\npTmUhgC2Mc02RsggLuz/ABUxiRkMGUjcCM/hTDIZDgDAz09qergKduQQOtKuYVMmMsRkUxke\n0bt8YwynjdTpG8yNvMcDnO0DrUbYabzCCmRjNOaPLDHzR45J70CF+62wjqBz6U1YS6sFOPel\n53KTwlO3ELgDC7qYdBGwoQZyQKhJeNg5HynvmnqpYs7dAeB60nHBcZXsKBiR/LuJ+QMacqn5\nirc+9M3MOGXgnvTlY722noKQC7y3GNuBzSbV2nd8uecUjKMKgJH8YPr7U5m+dnK5H92gQ3AZ\nkwOCcClkj3SbWO1VOcCjABDdaCNymT8KYCKCytu6ZztoDf3BmmtlpA2ccc+lNXcvOcR5zikI\ndu3KQ/D/AEpW2NJnGOMH2pWYLkZ4akLbcBV3e9ACMDGSR85PehMcLjP0pVznJ+U5yaGYAllH\nB6+3vTGIq/O2RwOaidPM+YKFbPSp9yr827c2MAdj701BtTLn5/UUFCM3l47L3FHLMXPC9qRd\nzJtYYJPWlVW3qG4QcUiRI03gnfk59KaE8xT0QjjH9acrDEgyQc9PelZ1dhu+UAfMPWmIjV/L\nxhdxHWhmZlypzmiRRIpOMIeiinR5Gfm+UDv6UAIxIYAHIPrQ33i3tj60FSVGQc9hRzwu4fQi\ngYIUfZvYlR1wOlKyLyM5549xTdp2kJzzk5p+yOQ4AIOPmJPegQ3j51XgsOcmnjCxrxvOMYqL\ny8Jnb37daFVcjGVyevegYNlcOMk9NtEylWGz5vWneY3mNtXpxmkkzJkAZxSEMRT5TENvOerd\nKXzUjQdVOcfLSSZDJtpCxH3Rhs8mmMequyn0Xmo5Jt3J+UdgKfuGCqn8KbHIqqU6nPJxQJB5\naKVUfLk59qV2O7HU+tPJOFwMjNNC5UueBnoOtAxm1lUttAxnIpiuWHClcc/hUoKscLz25oxt\nHCkjPIHakA0KokDMSM+tPm+ZSHTB7YpdyybWyDkcH+lNXdHlenHTrQIRf3aggcHg02EDkHrn\nBpztuUKMkd/rS9N38JAzxTART0IyCCQKY0TpCC/DNzjuaf8AMUUY3Ht+Peja0hTJ+YcUmBHG\nMKMLkn72fSnhkkDMRwOAaGRpHMYOQe9G3yiFU7FA6Uxhx3bK464pv3lAU7eeuKkbL4ZcKKY2\n5skcHPWgRGYWy2csfWnjcBxjbj7p70LuXLkksf1pBu8vjqT0NIBVJY7ivy4/KkZgVwegOaFL\nllAGI+6iiRSnQbuaYx3RE3D5t2fwp0zSMwKgev4VHknJI7YzQqlFyDx2NAMX5ppAFGMjOD2p\nWXYxRmBGeNtJtZ33g5YrggHHFCY649zmgBUjUgjO09abGerLt3dMevtTWYeXu6sTwBT0j2yM\nEHA7+9AhG3K+5eSR6dPakZQ0e44HONvvTlT5SwbHc0YJHTI6jFADWyygkY7U3cCoUfexkmns\nw84EZxjO0+tMVfMwB1znpQMVc/KEJAJ7+tIwOGB4ycfWnSRn7wbHNDljMD0X/PNAhu0KRk44\nxmlWEqoU/eHOR3FDJnOWLegApRGVkIOcdqAE8wNJtI3KOoHrRPIjFdqbO1KvzQoFADk/N701\nlPQpgqe3egBrbo5AOw5PvQ2FKll+YjINOaT54i64GcZ9abJG/O1M/NgGmMNwVQVjLE8Uqbow\nDjOeKVXcK3GBnaCKYXZeM/dPNAtRVwzlMFQOaaXJfcRhSMAf1pzN8pYrwepprruI+bAXkUiG\nxyqzNuJGFHehsMwCttb1p+394XLZRhjAqIR4fCnLdqChwYNMzdOMUw/dJxu55+lSSA7n7N2p\nrOflYcHHNADGkWTEaMT3+lKqtgDOMGnD5Y9+zYc8n1pQv3BsIkJpiGfqc80uGAVj8pAycUKp\nVmLfdHNJyxPOdo6etIoRmIwCcFuaNxaRmPVRxT8BlQjG7uPSlwFlyxyKBjcfKN3J64FNBO3r\ntPOFp/PLYPHSl27sMeKBDF/d4bOe2PelEjMAdvy9DT2ZUJJ4Hrik2no42sRxigBG3YxjKjkU\n1XEeAFxk8inhTwuMf560snlt1HsG9TTAj58zbt5znmnzMPL3cnnHFMVW8whjg0rMFb1Pdadx\nitngOoCEdutN3ZXBHy5zS8Lyee+2jnbkL8pOaRBHu58zGSeAtKQy4B2r/Sl5k3Oo+Ve1MZ1A\nJYHH60alD1YycBvxIpwUsQzMNo4prMGOB90jim7SysAu4ryfakAqg4YiPnsc01mO0DG0H7wP\nen4RlG5imec0xsZOATzTJBl2Rg/w9KcHCgAjrQnzL94AZwQakfbGjMCMg4oBkfmDdkpgL0pz\nKG2t91Bz0pVZXfaRgY60nzbQp+YZ9aAH7tuB1RuwqNVjVfMkDF+o/wAKczFlUHrnAoIkHyY3\n4/SgBmzftJBAAz1ocbAWPOeBjrTywyD+YqKTG/djApsBdpVxzlsUu09OWHU5o3beg3P0oYvH\nhCcvnJHpTEDhmYZwqfrQW2yMuM8UFBvJI3f0oXdjHLSfyFSAjAlV2odvfPakb7oYcjOD7U/c\nWXaWOc0m5d3Pyg96OgDQhdjn8aV9vDFCO2c05i3zNuJ3H0pGCSjCksQc/jQWKwRlBwQM9FoZ\nQF4OFzQrNJuG3Bz+FHzYyp+b0osAoXKk7iCDwtMXBYrncxGeaVcseoX1FN4dhgEN60gHru25\nHytjAHrTH2oMnOT2qV8LIGHTGCD0pjIduf4M1QDEAZhv+WpdrtbyYGI85J700/Njj5s53HvS\nrjzMk8Z7H9KCQDYjUng44xSE7l5GM8Z60rRqys20q2emaarOVYKOnf0oAftKtkHI+7z61FuG\ncYOQeT707gGIltwY4/8Ar07uQPXrQAZ+8QCBnHFIvyqmCFyO9Lh1YhDn1pFY7M7MOvApXGG4\nyNgAK3ZT/OlV8nYeHHX3pr/NGCCc+1PfGFZCDjrmkIjVFwxK474zyKRVEbZ+8x+YLT2Ksdw+\n93BphjTd8wy3RcVSGLGrednBdW9eOaau7c4ZcY9exqSSQ7lxyV4NJIp4DHLMakOozbuwuffN\nAbazDOOMAVJJtCrzzjkCjcmd4HOO/SgY2PKlQPlJ65oVTHvRyMFs7hT/AL0YJb5sZFMwXUH1\n6rQTIRd/zFhz6+1Ir5AKAoelP2qyk7sEU5VEijaw2+tPoA3aWfJ6Y4pN/wA24ru7U5cbTlsk\nfw0qqNhAyh64pAEbr5ilm6dKcsaxoc/MmfxpjoF4xhcZ57UpXYocjrxgdqYxN/l4DDcQeGFK\n58zcw+57+tIvJLbs470bWx05x+FBQBRsXcOc5p67Vb5fl45NAbcu0cnHIpqO21l7elBNhrfP\nEAeV3f5NLtQ9SRz1qQLubGAnFNjYeWeMnPB9aQx3ztlgOB39qaqltzH5W6gn0pS0yj74LHtQ\nE3MSDnjkGq6EicbfmbPq1LGA52BNzn7vOKT/AFRVSmWNKwZmPGD7UCG7vLyRl2Bxmn7n4IGR\n/fz/AEpPmCghcA+/ekU5zlsg8fjSHcAfmOZNyrztA70hmaTaR1J60u1Y29TjHFDBWA2HcBwf\nrQADG5ztJwMkZpeJIBgfOei005h5cd/Xmh8+YcEcjgCkUPT7m5lAHQ5phDRjkb48549KfGoO\n7cCEXjaP50isu3BLf7tADivfbtU8g+lDSbiArcng8Uu5l5YZUdKVVxlmCsx9KdxWGqvzYJ3E\nevTFHPyhlwN2AR+lAkLMeBgDFLCpSTOccd+gpBYGzHkN8zZ+7S7dqZAwTTXDcArg9c5pGwjA\nfeOfwpiHKf3Z+TLdPekVBKpXdtyMMKVPukdwcmmyfe/dn5DjH1oANoMQjY5K9KXeFYbyRgZA\np22KNirZJYcj3pvzSKBx83HNMQ3akilgNxbnNPZlZUXaV560LH+8CLwV60u52kGcbc4xil1J\nHfK8hZcbR+ppU3dCm3NB2klQfkU8+madtONoOU6k96YxY4woV2OCDnNWY2THKkI3GaijXzFB\n7DpVraVC7sbz0zQxE6qJMLklMYzjoasWpXOMAsOPrVaOYsoU/I3RquRI6txhgB0HeoH0LkKv\n8rluc8KeauQM8zY2j39aq27FWX1xWjZxmOQORu3HA/xpl2LNnDtk3ZyQD+Nbdvbu0a/Moyuf\noKzLa3kkQv8AKrK3A9a07UM0qqflU9alhYvWqeXDyST2I6V1Gh6fIYS7j5S3G6sazRUjMbHd\nW1Y3EuPk3EDtUMa3Oi0+NvtARV4/lXoHh2zLTIRyOD7GuF8O3bTQEuoUq2M9zXomg/6tAnbk\nj86xkbH03+zVpavrV1ctxtTAbGSOnFfTjnzMnAX2rw39mjTPJ0me4KZ3DhvSvb2YbSD+lSwG\nEHOaaoCt60m7HBNSLjI4qQGHO6nkDuKb/FTguOvWpKGbdre1ObLKcdaVugpvSkAq/d5o3Nnj\noKMcU3aemeKAHkgjOeaM7hnFIFCr70u09RwKADvmlYbeaA2Pej8M0xgPm70uQvJFJ9Bihsle\ntMA4PejIxmgY24IFC/dpCuJtPl0m4cetL2wKa33ulIBretM8znO0ipSM9DRg8jrQAiNnJNSZ\n3LxTVo5XtxT6gKM9KFI545oCnk5oVemKQCsRtFGTx6UvGTyDTWO7I5FMAbHY0vQCk+6AaX71\nMroHrigfMpFHC0m49QKnqSH3VHWncYBpvOM9qAOfaqGBPtzTh+tIp9s0ctUgA4z70q+44pMj\noaQHnrxQApxnFGPTilZe4xSZPU80wsG0bfejtSMT1oPQGkAfQ0fdwTQCNo707jHrTQhjAn60\n77q4NIMnPPSl60MB3GKaMBfehWxn1o43AUxhkelJ74oBwp5oP1pALuxRn86VSF+tN53Z7Uxi\nhRnNHHOBik3Dpjk05vmPpSATkqM9abkD2p/cYNIV3daYxjZPAFIzDgd6dHlV55pmcvkCgBzZ\n4FNCNuwDT1GWJPFBXryRQADimld2ecUdGHeneXlsg8GgAVSy5zzTh8q4/ipv3RinH64oFYFy\nq7TzmkCnkE8Uu3ao55pD+tIAHyAccGl3Yb60KG280Mu49aBiK3JXFIcsuMU7gdDSKp554pgN\nUBV96azHpmpUXKk03b045oAauc+1PVjuxjAoUHJ9KXnbmgBORTlXb9KRVKn1oLZOO1ACeTnO\naPL+XAHFODHtinKDzzgUAVWjHPHNAQMMEVKG68UzJ9KdwGfZI1+YIMnqcVBLpkT5IPHpV1dw\n7/hQq4zz1oEZTaBbSRkFOD6VSk8K27DaUBHbPX866PaewppXdz3zSBI4u48CQNkcqvp1JrMb\n4dQcholcNxyeleiqvPIzU6xq3bn6VaIZ43rnwvtbexupJYEaBVLsyjB4Ffm18SNSNzrmryFS\n0bTvsX0UHHFfqZ+0D4gTwf8ACPxHqRcR3H2doo2PcnI496/JbxBNuuA7SgFh8ynqSatEnMTK\nqMDLzgZrKaPzGLbsEjIrQvmXYgD/ADVmzsUgjXblhyTWiMwt41jYDhpCME9qFgdWYKV46j0p\n8is0KlOFPJqJpEmVjG7ROo/i71QhJMxxud4f09/as66kW3K7kLl/TsasIy+ZgfMcZFULmTa5\nLP8AN2oGirNMPlDDDDnpyKp3DySRKVj3gj/JqxNtkUh3w+MBvWqUhZoxtYjb8orQCmJA5J8v\nDLw2agnk8uPhd7McD3zVlo/MbC/cHU+p71D5e3n+BTu25oEQ+d5Rwh3tjbg/yqlcKcbEHB52\nehq7N0aUrsfPOO9UriSTymYnbzwRVCM/ceCybUz3qJVHm5bkE9/0qe8k3RBiwPI+WoWj8zvj\nGDmgQkyrtJDZGeRRIhWM5OADxQp2yEkbVPHrVsBcq2M9qkpE0GJWQr2X6VYikT52DYXv9aq7\nitxjaeRwR0PtU8ckYPlsQvcr6UDJOHjBB6c4p3mIpEikOe4XqKRJkZmBOzsDTYYyzEMmHHVR\nxkUCEkzb5KJuZuaquqPIykbcjPFW5FZXjMZLYPCnsO4qOeMuWKlSGPPtQBT5Mm0MuAOOKQRy\numSVIzgYp0bq8bHbjquf60jKoXZvOU6UxMk8vymCrGG7nNV9xmZnZdpQ4VKI2kWHcTuO7inu\nm+SOUMd2cFaQhrI4ZMnaeu4cgVasZRn0jDZB9T6VDIrRyMV79aWx/eMQQUC9eOtNaAz1PwM6\nTyIJjgMuArdQfrX198JYPL8LQM3QZXH0r438CzCeS2UrwGBUY5IyOK+1/hWA/hWDA6lvw56U\nmB0qqrZZRgU6FWbJxn0qwsO1TjpSrHhcA4NMY2OPdGcdW6iholO0DrU6oI+gyaesee3NF2Mi\n8rcCVb5RQM9amWPapGOM1KqLtP0oArjjHemsCpYHvU5i6c8YpqpuYdzSARMrwPSjy3VgAPlN\nS+XtbJ61IitJ8o7Ux2KojZzwMCnrEy9TkVa8k8sDxQEz6YouwKjRho+Rj3pVgLqBU7L82D0p\n+1tuEFVcZW+yn1oqfyZKKOYk+zN+KPM3AYGRQMdQMChY+MKcn0rxzuRNFypBFIyZ74pFViw7\netSKvzHJp3AryQllGaWGPa2etTsoJGOKNq5yvWmJCKB1pCh5/OntllAo3YPIzS0GJwR15pU+\nRcYzQXXIwKFPzZpXAbJlm6UyP75B6VLJnrTSmzqc0ALIwYcGlyCvHNNXCqcjIpycYoEKcdhz\nQylmHHFKpABz1pFbcuetPUYbQpz0obhuKUsDxSPnb0o1ARWK8kUHG31o2naDnilUbcnGRQAq\n/Ng96cUy3NAX5c4oJ3Y9KAGbi0m3oKkXONp6ZpDH6HmjjkZpagOYAjAoXPQ00Hkc8U7zA3bm\nmIR1OMDpSKoYHJBPpStn1pNoz6UCF5VcYyKTd8vpQrbW5zilJDL0FJiBcYwRxR91uKD972pQ\nBng8UwE60Fs8kcUN6D8aNvykdKoAHOMHNDKM56UFvlHGGoPbuakA57nihg2KOM0vLck4FMQj\nNtAHenbug6U3aN2etPXGctQMVW+Y8Ubs80itu5xTsgLgVLAb61IvYZqP69akQbV6UxE5xJG4\nP93B9x6V8f8Axy0/yPEl64Xahfj29BX2DA25sdu9fMn7RttGuvsUGA2GIFMR8uasokZwVIJ5\nrlNSQKxwMCux8QZjZwM8nOa5LUl3L061aEYF4D5YKD5s/hWLcEqXz81b92pEa4696x7mNWk3\nEdPSrRJiXGQ2VOB/drPkl8tstGSc/hWlfSHn5cAc1m3FxvMeeOemKsllZow0j+Z91ueO1VZl\nBk7EL0PerzSOrkYHsapXSq6burKecVROo2OQsxyAq9AagabCsG+7n8aftyoYt+7z0FVpI90h\nL9M0xjeJOcY54FRTqyruU7TUjF41ZlwGH8NMMjNGGf7x6e1MCoxMa72GfSoWxtPmtt77quHD\nZOM9vxqpJ+7Ul1yc4wKESyFnjk+YAkgU1VfyzjILdaJpC2AMA55I/lTx8wXn5jwF9avUOhXl\nDFhu/h6YpYZNyvwdx4qRwsjFSCMcfjTfLCL9/acUgEljSSONFXcc4P4VDzHhh0zzmpfOKqNg\nxULfI2GbOOTQMdIxmjC5G7dwR0x6VHIn7s5Y9eBmm+ZtVio4zwBSMjrgsQzY/CmIDHvXAOGH\nQHpTJQzn5mxj0pWVioGPmHNEisF4GR6UCYjEqgdl9h/jTfvP5bdcZI/rTlYsnJ6fwntUEv7x\nlkB2kNyfUUAK2WAx95T+JFKxVpGbjC8baTP71iP4uh9BTfL2yOex70wDays5IwoPrTD8xXHr\nnNDYViScsRgUpiyvDY45NIY47mlwD24NG7d8rsCfT1o5k3FeGC8e9N8wtHhlwScCmxCMpZQB\nxg9KOI1OM9aRjtK84z0+lRFv3hCHdg/epgK2dh5yuaP94DZiopMSSbl6D1p0eSxDn5aVxrzF\nXA2gjOM0Mi9CcZpctuOwDdjvR5ZZFDNhqBERUqwxlT0P0pxxtYk4C9KVmXfuz7ULIAmNvGaB\niKAvyrli3P0p27OBt+bqfam7yoO3n09aTlgGOVYUwFZi3PBB6CmdGzjpQcyLkjac96WMMu51\n2sKQBu6seuKY3G3A6jHFEj8HKZ9hQV+VQB70wDnGGwPWnnof4lx1zQ2MLIVwDxTWUYJ5K0wI\n2XO7nlP5UpL+WCDw3NOVQuSxwT14pqzfNgE/iOKkQgIyCRnttoO3dhWyM/lQshZgTQqogJA4\n3UwF+aNSSdy5pu0bgM4zTVVpM7em7pUjJs5zwvUUmO4hkbzAuQwx3qGNVjJJJB96lMiuFYrt\n96VcTKQTubscUAMRjtCse+Q3pQ7FI0BU7s7c+tL8xIUFfl5J9KPnZss4Y5yKYDWZkIXABz1p\n/JjIxtZTnbTM5UluGU596d8uN2fv9BTEJLIqsNwySPuimcLGOd3tT1ddwKjIPr2pu392wzkg\n5x60DE8wfcKcnp9e1KfljBY47HFG5toJ/L0pG+UkucgdMUgHfxZIyvvTf4OOO1BdBgs2AetO\nkwRsU9eQaZRGW8rbn+HpS7c8njcc57UuQRtIy1M2naATkKcgUiRZJA3A/h5NEbbsZJOeVzSI\nwG/zCDmhQNgCndj07UDHHK7j1XHOPWm7gypkZUjgUqx8ElsbuMUbPlCgZAOP/r0xCkBV2g55\nyT603DDgMPLxjPrT9pVizfN23VGx2gAcqeTigQ3cGcO2Qy9MVIrkguVw3U0xlDqWJwR0p6v8\nnzjAal1AiLCTlcA9frTyu4gZwG/Q0xoxHnC/N1IqRYwrZ3Z7bfegfURcpnbk849qQszSYGM0\n9W2qQVx7Uw4kYkSbD0pjY4SNHGWcjOcfWm/cjDYPzDn2pqRsYyrHkHIaiZ3DRtnKDj60hDWA\nhx8278M9amLZ24AI/vdqHwW4ADdqi+ZlJI4ouAxsoynaQT0xUh3eZgcAj9aQOU78bcDNIGaT\nao5b1oEI7EDZyGU0jOxkG04B61MW3OwK/wDA6iYKGG4NvB6YpagKyBl9R0yOx9aezbWjBXBX\nnbnr700t5mTnZz2o8xeVIKnHBPNMAbDqSwxzmiMtJlsEZ4Gaco3L8pyV9qRW8wscgkfxZoGA\n3btgGBjqajjXy1YnnnB96kDFoyrLnng+1MTLSMAdu3nH0pjCNdpPO1/6URISzBmx7+lHnBsN\nICGb0ps5Ea785OegoEKrKq5x944yakbLMAuWGOTSSbGwNu4/xfWhmAhODs9MUhDWwsgXG4Y5\noG7J+XPajlo1CjaTR5jq2MfIOppjHbvU7QtJgbTjndzTXkG5iPmOOKXd91mHOMe1IQ1MMQwy\nO1NLbGbJJHXFTSKYwBuAP3sUxVUQsS3zMePagBnmbYTtBLnkH+lObdG3Ix6rQzHyskcDqo/n\nTWUKRnOW+bP9KYARvUkcAnB9qdNuk2qDyBjPrTtys2H4/wBkUxmIKjOAOlAdQjVVBCn5++aV\n2PzDON3SiX52XAwe9Ei+XnncfSgYRKPu/gRSLI3mMVGR0206RvuFcbqQP87R4wO5pCGSZ2fL\n86nnNK0haRQOGIxxSKFVWJbimBxuGMhs9famA98rHgnec4NHmdsY7c9qQkRg91zyPWgrn72e\nf4aAFYlmwCU7ChWcHltwBzTix3DHK4pgztKnjJoADGNu8AsfbtQrFuc+1LtwpGeRyQOpFG6N\n/mUbMfKVNMBmdqyEjKjnFSfejR1fCMKAyM3zD5G6U35V37jkLxj+VIBxXoB0Hb+tNcDOBznv\nRtwyhPTqabtK8A9Oc0AK0e04HzYGTSSjjqOmaX7g80nBNDFPLVgM56CmK2oqsq7Qy546CnbP\nuY+U7u9R8DaxHQ/lSY3QsWYqd2QT6Uxi/fxluRRtwwDHb/Ko3XhcjntinykDb/F2IqRCtt48\n3LL2xSO5ZgFbvxnrStj7P6Nnmk2r5gKr8wH3jQMcyttIxlehNNb5tuBtOfvU+T5VyX+cd6Yr\nPu35GzvmgVwOFbLDkHt3pS374jHH3qVJAyuWGSeAaOS2MYG3GTT3DUFY5JAz7U5mCuCVyq8s\nBTPm8sYUkZ5b0pQw3OArBSMUDHPt8zPQMOB7U1mEkihONvSmK58zGQ2BwopFxySe3IosA8/M\nGG7J9KTbhgrEMCOM0xcbScFQeRmpXxsXAyTzupi8yMqWY9z9eaUYQ7e57GkYATbg2DjmiQqz\nBycsPajUdwTaqEHgj1pd5xkdW/hpHwrZPRqULvDEnOOBSEJNIyybQAnHIqNmIYErweKTeNoB\nyTnnNTN5q4JAweKQxhZTt284OPeiMlWO4+vHrQsigK23BzgikZR5ayHox/EVVhCZXbh1I54y\naWOYqpfHfFLtG5g43bv7vahWHRfmCjmpEN8v7+TkfepyY4ZsAY4aiHnOTjPOPal4aXGcLj7t\nPoAi/LISw8xSMhhTFYuxYjyxnpUiqFVimd3bNJuIXO3/APXQA+Fl8wFuQOaGZyxJ+Vc5yKjk\nk3bQBhj1qR9qKMAhvftTQCHKscjIPcUmNrLz8vWn7xuCqd2erVHtDM5LZU9qBdQ3q0hIO455\nHpQrK0m5uvQUq7kXaseR/epNoXAOCxpjD/lsee1Cswzg5Y9aGXGTnp70iB4ZOdvzDqDUsoVl\n/esc/KpxSySMyghMAHj3pACYmAJBzycUNu+Q43Ede1IWg0ycHCsrZyfSnsqjBQfe5Wl3KZcg\n8dajZvnJVSQemO1AxxYJnBOW5FAbcgYDvgijeGAbAHb3FIcHcdpJ9u1PUAibhjsJ54PahXSR\neN2V9KVSI40zu25oVVaQsDtGfuigBGU7Rubk8gUrYj3BssmM8HoaNuGLN2BxQo8zYetMGCsV\nA2qGXGc0isGVcKVOclqcSqMRjH+z60isvXd/wGmSGCrHa3y0LI24Dd8xGDik3KzMM4APHOac\noMbOYxk4zzUgIFbaQqjI560jfdA3fMODRt3FX2lc8Z96Uhm5HIXihDDopG7DHvSru8vHWQfl\nijjgBfrSbWHXn3FMYZaNcdPemqy8DGfak3Bmwx4PSnSRqu2MjB67qRI7arKMnDg9KNy7/Q+t\nMX5lLk/dPQU9cxr0Dbucd6AGyN0CAZzyxpw2sNzNiTd0x1prKWyOqjp9aNu4qSOAOSfWmMcF\nzKQwxj1pr7VTI6k96V23Y3cc4zRwc/LnHT/GgLgwJ2gfM3oKczDJ+XDevamM7JjA2seAaFjd\nt25sDHJ7ZqRjkB+ZSh56kU3EbcqPLC8bf609dzKGEmwgYz7VGflfLY8vt9aCeo4ZVcbQCeua\nN524HfrntQyqPmdifWj+Dcw+XOAaB2I/L3Z38gHt3qdVLNgf6z36UxcRyHsMdaOF7lu+KBCc\n7CHG1s0eYdoQHkmpNv7wh225GcYpqpuYgDnsaZQSyMiExqMjr9KezsSp28EZ6U3a28AEAd6X\nb5LsxkyQcBTSAQLtU5GXPIOaApXnIx1yfWlkbC7usmeKarBt4C89WPbNMBq/eBIJYnr6VIwG\nSMnNJIwXaOQx/lRvZQWflchR6igBM+ZiQPkZxTiGXJJ/Kgr5it2A5+tISVVCTuJPQdqAFC5f\n5h8uOQPT0pBt3crheg9hRGzLvB+9nO7tj0pXyyZbjJzxQAq4UEquWPH/ANemF1jjACseaVYj\n5e9SUGcGpG+78oJ47jpQL1GnHJ28kd6aI90eCpPP3qkh/f5UHlR3pPMKOpJ+TvxSGHmBn2qd\nmeoPeh8IpOCWPGfSh2+0MMYBz+NLtLOdp7/rTFcQspj3R5YrwVxSsPLUbTnjP0pG6H5yCDkg\nCjYVYHOd3OKeo0KrgqG/hzzSqRtYZ2nrk+npTP8AZG0DPKk1JuVFIkXEY5yvNIkjRvmwo4xn\nDUbvMUAnac8e9IA3VSWB5BPBx6VJMpZlbheO1ICOQhWIY5z/ABAcU5l2sU27lX+HOPxpWZ9m\n7gxkYKkfrTFG1ijZJI4piHTZUA7ge/8A9akkxwFUk9aXCSRLtYOQe/rQyleAeO4oELx5xflc\nCpJGwu5Rk5xUSI0ffeG9ad5gVTtbduOAMUCCNRtKhssOTTlRGyRJtfOQKSMh0w/ylv1FOjCe\nXuxsGMDNXbQdiyrFmQgd8cVZ++JN5JB5XbVOFWSPDHk8hu1Spu28J74BqAsWI2DFRyOPSrdm\nu3BD4ZTxzUUfzY2gZxwwq3DEryZbG3HJA70iku5oW/8AqyWb5j1q/bxHy8bsFRnPtVFWDKoC\n7SONpq9b/vcrnHr9KkbRq2bLhGDbm64rWtmV334+XucVj2IXeig+y10PksYwpAAxkgCkxl6z\nUs3Td/St/TYR9oCnjdwBWBptvIJV54YevNdXZq00i4whXvUvYcToNNtPLOMYw2TXo/hu3VmB\nHBPAAHWvP9HheS6i8xsqSAcV69oelia4t1hwTIwX6VkaWPsD4F6ebPwPECpSQthhXorH0GKw\nvA9j9h8M6fGOP3Kn68VujJ+lZvcfQjKfvMnmpNxzQq9Rik5pIA/Cl3Agn0pFbHBFJjg1NyhV\nbPNKWHYU3OMcUbvm4pgKrB15GKX0App+lO470gALluaUZC4PSm7cL1pwb5cGkAq4x0pV9elI\nrBaGOTQAgHOSaXbznPFIqnJ9KXhuKoOgnOPl60dAB+dL91ie1NXJz6UiQJ3cCk649aUHb1GK\nT7vJoAC3Til3hWzik7Z6ikb5sUyrig+tPXKjnmoWVsggVKo60gFHbjrRj5jzinclRzTOGY9q\nQBgbsjrTgQT0pANueaRc88UDF65zS8DHHNJuxSLndk8incBfvMTwRQD1pF6daFH1xSYheMYo\n7cUHAX3pOetAw28UowBgUhz96lUgsCKYhON2DSkY6UY3ZNA+6KQxOxpVXchNAbnFObKrSAaF\nP1FIzZ4FKMqfak4LUAKeF4FC4YelJzzQ33feqQhVGaAw/u5NGCcE8U0KQ3XimMVe4NLnoSOa\nRvYUuQevNAhJMckcZoRt2KCp64zSY9TigY5cbiaDg5pMYGM80u0rUjEDDrjmiNtynPWjB28j\nGaVV2N7UrhYF6H1pAp3cnilA4JPU0Kp3etUhiOcfLjmm87vSpOxPeo5FYLnNMBynqMc0c8fW\nmbjtHc04tg9OKAFK/ePagdKYJgCB2p45pEi8UbfU0uOOaQKSc9qYwzu6dR60N8xoX5SxPIpO\nFIx1oGO5/Gmqvze1K2VbI5pNpAzQADkntS8r1PWkH3Pej7wzQLqG7atL/CWNHB5xzR9fypDG\nKrKME1KM9O1N47k0ueKLgLu9BTIx6inKxVaac4pgOUD8aHBxSFsc4oHfnigBmMc9aeCG5ApD\nxzikZG28UAKwyT2pPu0gY8A8GnPjFIBI2O09qT+LmlVht4pQN3amAR/Mfap41LZwOabEu1TV\nm2Q7lz9aFuQ0fLf7f/iyXTPh/p+jwPhr2Us8eOWC84+lfm3q14Ly+YONkkfG019afty/ERtc\n+KIsky1tpcRTGeNxPp/ntXx3qEjTXEswHzbsH3962RMuxVvivk/vh5ZHTjmqMv7vYVYyKwwR\nVu+81jGCVLdce1Vmx5LkDLA1ojIryTC34VWDeh5FQvcOzAsPl9ankldmAIXaBVSZlnzt+Xjp\nTAdJNtcSQYYkYIrJuJJF3AqrHd/Eauxqwk3tiJFOBk8VTkfczFlwDyrdaYFGfftVjyA33aqu\nZBI+wgEcirMl0twfLXBYdTUDMFZiBvG3kZq9QIpGBjXJwzdQKikVlKlccdR61MUIUDGNwzmq\nzRhI8/M4z1zTAR2LIVkTnPXtVC6mLXgjKqFxxV6Reincd3Q1nyqs0yiQjao4x1qkSZkzJuIi\nG5Af1pFUFehDHuacJAskhA79KRm8xeTjmgCPyZVU7GCKvPNWVaMMHzlyvSo1zIzIWy7DC8U9\nVbaFZlJXgkdRSKJ4W+ZS4JbHA9PerUe5Yzwko6g9wagM3zKE+ePby1OtmHzBAdp60BYmZpPL\nRvLXeT2omy33iUOfWhQY4wGztzzjqKMmMDq4U56UgHyTlVGGyehHeqbRnzV2MAjcHb1qyrSL\nOXJUIwznH6VBx94DaSfu0CK+3arIwChRu609VdoX4Azyc9aS62O3lhSGxznvUblQyleB/EGO\ncVasJiyMygBCdu3kEcUuwosYVSA2Oae6sj7OiuOPpTfM8slVc5xgVIhrKVbYrZ+bLMe1SWrN\nO7mOVSM4DD2qGP51YEcK3X1rS+wKlqJFKqW6470gO88B3bTXUYRcydB/u191fD+1Fv4YsgBj\ndGDnH6V8O/DGOO41CBvLMe10P8q++fDq40i2wP3fljFA1sXNpVcAZ9qcq5IAFL8yyc1J90ZH\nFIoQDa3HJp4Us4xQrAngfjTl3KNx70XEG3uOaVlDDpinJnnjAobDH7w+lAxIxlMD1607aPM4\n+760L0wKcvyLjGc9aY7EeMsT2p4Vv4RinAfLwB1qRNzLnHAOKQWGJkxnOBTWX5eODU/kjcOw\npfJJAz60wsVv9ZnjmnRfe4birMcexj/KkWNfmJXFAEflv60Uu1qKYz7JVdy4pu3bxjBp3Py9\nhSn73PFeWdYIrKp54pFb5gSM0pO7gGkZSO/FFgA/e6daOYxkDdzS89qRe/NAAzdDSqu5jzSb\njsGRmkViuWHHtUgOfC9Bk0YGaYTuYZ608N/CeaAEYg/ShgWX5eTS7enpSocSYHSgBFXtQvy5\nyKfyvNIueuePSmApwy5pir8xK9u1O43A9Pam/MqkgU2A5uOe9EeO5oU7lOetKmNw4pXARl64\npSjDHOR7U5sbvag9sHimAq43AZpOMEMe/FJyxHrQVPfn0oFqKW2npRzsHGaOHyCcUv8AEADx\nSAj+8uVHen/d5xxSLhWYEcUv3eKYC/xHIpN2cnFHOKTaQvtSELtHWhnHTFAUk5oI+bikIN25\nsUoA3AUcA0w8HNNDsKSN3Ap3OOTTOWYDoKcylskVQhd27k0mRgc803gKAaXbk9KQDVPJFOZt\ni4NHAzRt6Z5FADkwpGelK21uc03ad454pduzkjigBVbcM4xTto25B5pOvHSkYhcY5pWAVfmw\nAOaep28Hmmq3fnFOz6UAWIwVYEdeorwP9obQ4oZ0uA3MgOVx3r3eOQ/SvLf2gNMe68PR3A+h\nAouJnxJ4izHK6OMHOBXKTx+ZkHgDvXceLrUfaG7qOtcdcxD5lR9retUgaOavv3eUPUelZM2F\n3DPOK1723K7mx8orJmZeMjk1qjMw75GMmCPlIxWY1q8cwLMMVvXFuZJOu0Ad+4rLvLYsxy2R\n1x7VRLM6Vg0hyv5+lU2YRttHzA9BVm5TBKj/AHt3tTYYY/s7FW/eHkZq0Ioyf64gDYij7p7m\nq+4jP97Oats67TkbnxgmqzOg+4pBAxk0wImB2sowd35VAobaFHbpmnNG0x5GFJwaZIpB252h\nentTJYg/1WDxnkj1qurMp3ZwnpUm1lHI5A71B83AbkGhDI5mUhSideTn+dRsu2Qbx0HBFTMM\nyA7fkUYpv3VcOpw3C1YiFtscm5mxu4FI8avlC2Wo8st5YZc4OKVwQzk/Lt6ZqQI1zFtz0zim\nNFsRiw3HNORg0YLcMTwKcrCRzu4A4qhlWRfJkA3DBHSmYK5O7IPAzT5lYsZAN4zinTAYjAGW\nzkYoEJEwEh5GV6Uxg7K3OATQsf7zLDGetLudVIIyvqKBka5EbADPH3qRWKqoZOSODUshKquP\nmT1qOaN1Iw2V60CGfKMknB/iqNgFGc/T6VKctGHZOWHSolG2Nkb73b2oEJu3A5GG6DIoXjt1\n4NLvbgtzxjpQFdgA5xzxVDBQQrBjt7BhUcmQpL/NkcEVLvPKEblzjFRGLao5xk5I9KTAYzDA\nBBz0pqKnzRxls5+9T2Cq4Uhm5yT7U5cAMqAnJyBTCw3YqqMc+5pskfIydtO27V27O+aWSM8H\nHHpS+QEfBbCNyOaeZd2SCAR60xSijBNCRh8qOfagQ7ftVhjcSM8VC0z5AVMHvmpyvlqVXqvT\n1qOP94pZuDVDEMa53Dkd6RmVvlJ2ntSYAAHT+tI+A2VOG96ABmbaPTPBxzT/ACR0De5qHczM\nBjnrTy0kcJIwcHigaJWBWQN07ZqPhR97ODQsheP0brSSYZgM/P3qXuA8bpCrKfpmmSbn3Ifr\nTd3BXOR6UKyopRs5pCsIV+6pGW65okZT06ng0m5eXLnHbFDL0K8s1MAZgnQYHcnt70blHQ59\nKSb5cAjcvTPrRIylsAYwPyoEKoOwnHBPNMK7elKvzRhd2aBsXnBPbpSGNCiReSSevtS7lVd2\nCgzinsq7gFcEdvao2VtmCcnNMA2qyna2DnmlKjaBu474oPzZ4G3p6UiqMbPunsTVCF5j3ZGS\nemfSm8+WAQMDoe4okLxyA7dxxjFMVCGKHJ7/AEoHYevHIGT6GkXG1s9+KVW8w46bfSiJf4iu\nAe/vSAcqsVyvIUYqJkKdeFansSinBwDxSEHdEpOF28k0hDl+QHcAwxxUe0shyQjdc0rH7qs2\n0jpS7ssqYyc09RiLwocjnp9aGUtgjAHWlfc2WzwONpqNFIUq3B65o1AABI2VHzD7yn0oR8MQ\nowMc8U9WPmEgcY5zSQZ2kAZzTATA2n5v8aRv3e0Z4IwTT3yql9q7gMDac0jKylHY9sk0hjSo\n8s7zgL/DSKwUlcfL605vuE4zuP3aXgv5ewk43ZoEM/d7iC272oMhWUMo5x0obDDcFG7sKduE\nb5xu7CkIap/d56c9f6UjMNo28EnFLv3RkFMYoZRkj5nUUxg43nfnG0Him7g0af3n6UvmFlJA\nwCcUpzu/Tjt9KAGFm2nY3PvUjYbblsA8fWhWMsjKigY5yelI2fm46HimA1oyZDgZwKaQvlyY\nbgdKViXbAyc87elIckE9dx9P0pMBWC7VB+bjGc0uUZcq2GHFIpG4lV2euaPmY7SBgDORTECj\ny+P4e/NG8nAjxtJ6daQKf4fmPcetPZtq8Lhh0FADR94qRnnpTpG2yKQOR3pu4rjHLHvQxO3a\nfv5yKAHfNIxJHWowyowGwIvQmnMHaN3B2jOKYzhMFju/CgAj3De3X/ClVyVIwFfHX2o3FgW6\nnv7ikdcITkbm/QUFDjs2gqc8VHGzIjDILjkZpW+baRxijaWmxwMigkduG1Wdg3Hb+tNx1Zkw\nnrSRxgLgr8y9Kc7F/lf5R39KTBi8bh5jYX+9SR/K0uV+YH5V9qVRvYrjC46mljwvmcckHApj\nFUjcMqASMc00MGXBX7pximIojh+fIZuAM075n2rtz2JpoQ59rfN36HFMZFWQblLL0pz/ACqQ\nvyhR0oi3HcXONozSGRrJt8zI24P6Up5fGcEjIams25efvKcn3pZNkku4/KfbpQIRlHAI57in\nFipB27lHaj7+ATz/AHjQzbyQPlPTNADlCtK7tlSR26VHtZUDYyw6e9LiRc/OpprZ2YLYAoGS\nTMEZdv3uDt9KaH+8d3BpAx8xOx/vUbWjJDAYzmgQjANIrEfKtOPzZUng8g+gpGIaRtoO0j7t\nN2/JnPHQr6GgBwJWMhRkdwaQOqMDg7cYz15pH37cjjHBNHK7CDtGfrQMVgIztL/N120Kx29M\n9/moIVJG3Dn86Zjc3PC560APMyrIHKlMilwxbaeQR96glZAQWPsKTjHJwfSgRJ+qjpimSfMo\nAXPcnvR8yqSH4/u0Mjr0PUdc9aAGNlosbW3g8EelSckdACuOBSHAwWbn+VIpMfTkH+KgoVpW\nZlzGOeq0xtxIJAAU0+NPvkt8/Y9qagEjAHgn1piEkztycbic4pNxfluB2U06RfMyAcU1o24V\nziP+9SAOq71GXJx9KduVUJA4/iFR8o21eOcU/YUj4OCetAgWMcAvkE5X2+tN3O7HYc4OCR2p\nYxsAGRmjy2UsxyueoFADzty2RvbHJ7YqNNjhWR9yngUFxBg5yOwo+982fnI7DGKYhxbb94cr\nwabuIJHbru7UpQurHBLUig7eG3DH3aewajl3sdudq9aSQfIF3ZbOSaau8R7eN/XntTiv7tmz\nUjGbvlBAxzyaWTZu3KcRjuKcPnUAcZpjeX5YIBEgbByO3rTGLn5AsZLIzZxTiy+YynPB4FMd\nd/VTgHII/nS7T5hcHcfWi4hzbFTzHOBnHSmL82Qy4HUVKy5jA4Y/exURw2JJW4PTFACyOdgA\nHy+w5pTtWEEHLE0qKRnJXAGQTTFI2b2GW6kHvQAFjx0IIz+NNaN12q7ZXrTl27Rk5/pTWjZh\nuLZFAhW2pMMgkelNVV6H5lz+VO2v5YX+In71JHlt4HG3qaLCFX+LYudvO6jcFx23Dr9aZnEi\nbWwD1alZSAcncM8NQDCJABg54P3qftXJOfmpu7ch2nB7051yMnnimAqshblty+3BpqgRzsue\nOgFLtCsNy/vCKG9CMY700gE2yw8fKcc0M7SRnd8p605dnBYdeM+tM+9u4JFAEi/OuAQOONoo\nOWAAwfpTSPL2ALyR2pD914sY75FIF3HOzuqjnAP3VpixhnO/PtS5zCoUfN1xSvuk2nOW7tSA\nPLSFlBTIPU5p4LfvEIAQfdOO1I20s5wVOdozUA3/ADNngH5qRSJVztClvpS/eIVxnHTbR/CB\njryKGZfMVmBO0dKCRg4LfJubuaGkG0Y+Sjjb1O5j0ApdolU4GNtBQmFXG4YoRiqlC3BprN+5\n5GSO5qSXHTaBx19aroAnyopBbOOlMwrJvXnnNDFfMA2cY60qsF255T+7QgFaQNltm4etGS0W\n3Owg5HtSS5XOed3TFO27/m6hRyaBAF2sCx3r3IpNqkZC98gUBkjjwY2YnkntSR52EZ2seR9K\nTCw7Pyqp2j0OOaaRgli35Uv3WHybiOefSlLfNuVenIFIBv7zywykY7DvTnbdEE5D5yTSAjcG\nYYOelLu/enjPHemMTA2jnlv0pUwr5ycAdKbGEZWLhg3Y06NMqS5BA6fWqAjj2j5WTzFY/lT+\ndw4yi9d1G4soYj5xwaGBdVAXnrhj2pEsXIbcAu0nmlVSuxgRnpTGXczcYPUc1Iu1o8jg45FA\nyNgFLcnPcUuRjHPSo+uAeD/fqVjtIBHGMBvWgQnyqqrnOak/iJBwccj1qI4GCVwKcCzMCo49\n6QIUt5kgOSVAx0puQACNxwemKXa3mYVgN3OKXbtV8nAxQMYwEnzdHJ/AU5WRmb5cnGAaRdyR\nr8vysMZNL5e3DLzg46daQeY1SIUBwWzxj0z3pysFYkjevTPal2vJIUO3HXFJG3Ug5I4xQFxp\nAYYOVAOSfWnHARtwKnGVX1psbP5ZyMEHJY+lP8xZmyo2Lj5SaBCs2Y0Azubg57UnKsUH3l5o\n3BlBJOe/FGG2ccn174oKHKvys/Y02NQWJILbuWpW+8D/AMswMbqbjPyu2F9fagBy4Ubgwx0o\ndGSQIq/K3U0nlpI2VXgDG2mfNsGCTzxQA8KFYE/MQcc0+ORV3K68c0n3eThv8aYONuRyT36U\nCHAloVZl4B7U+P5f4eW5xTUeSNZM/dPAWkPMK5cmUdxVCHBdrDGVJ457UqL5hKhsIp5xSEbg\nXGR6hutNVRyVyD6UDBmQEgce+akVvLPmsS7YwBSBxtJwA3TGOtRLIY+UGFFIY8MG+ZAcngmk\nXKxlAclTkmpPM3JJDtxnkMO9IqMqjnCY5A60gGrjzgQM5H3qkMi8hhhj2/rUS8xnbwueKdIx\nkUDG589fSgVhpUbc7io9PWnSFI2jJyzddtKUDMeMgU2RgpUsN7H9KeoDZtrMwIOcbhinY+VI\n853DOM80rMYlJUB89qVo1mWP5CDnqODmgkSNjIAMc9PensxZty4AxwGpqs8SsTy/T3FKVZkB\nAz3pjGp5smWbhfTFHG0s5xngZpWAVdy5zjqDSbTNGrrzkZO7saAFAz8owZFFIys4CkZ4yBQy\n/MjKcdiaRmVc7iWAODigmw7cWAIXDYxQp3bV+7jrjtS5TbkHnFK0aFUYtw3BoGPAKs24h16j\niljy0att+WiJmidTt8xF4wP4qdGzwswYYRjnbjp6UxMkj+fEbnBzuOfSpGXdubBVT0Hc0wqJ\nPmUEkcFqnX5kUtkhTww7VAFu1Y+YnTOPu1o2u0SfK2zqcYrPiBVvMAycZzVuE71Eqjv60FF+\nJvNbft6Hp61ehiP+0o64xVBcyzIpPB5wtbcan7qjIx0qWUWdPhGQcYCtkHvXT28nmLndtJrA\nsGUrj7vPTFblqrS7ABuOeRSYdTV08PNghcqp4Yda6vSbF5FLnjI4FY2m2vlKNmMg5FdBZzeT\nIhYkg8VmyjqPD2mkSqzD5VPX3r2b4W2v2/xJptuWwBKG3f0xXlGgTMzAqPlHX61738A9Ja+8\nZ2x2h1jAlc+nNZMs+vdPQQ2SR4+7wO3Sp93PXFLjGccjNNK4cZFQMkC85zTH9aPWjG4AZpDQ\nh+ZgaXG3OKbS5LKaBgMdD1o+X8aOi5PWjb0PWgBdtIQWo5wcUiZ60AG4Uq8ZpNp9OKf1FIAV\nhjpmhfmHSjhaC3y470gHZ24GaRW5oXpyOaORTASQmmbgy5NOJDDHek3KcbRimJjZG24A5px5\nxSv8uDRgr1Gc0hiHIYY6UuR16U053ZzTm6+tAx3O2k/SlWTrxQ+1gKBhuK8EZpSQV9KQthqG\nPynjNIQe9Bz2NNRjxnpQ8g6AUhDg3r1owW47VHzzxzT+fWgBQBj3o9BSeo707+H5jQAm7cxB\n6CgZFIWAXA60rfLjvTGLkcUnQ9MUi9zS7iy5qgBTj3pVYNmm8t9aRc7SKVgHHPpxSqN3fikU\n5XmndAKVgGt+tJ1X0NPfsab/ACosAm07R1pdvvzSq3qaCRyRVAJ265oHHToaG5XI60KflAI5\noATnuaGYUNy1A7gigQ5fmA5xR8pyKTkDI59qQfe5HNAwXhR60p+cjBoyQT3oX7uTUADk+Xk9\naT0NOJDYyaRfukVSARc7j6UbtxNCn1FKpHTHNA0JuKduDRgMMk8dqXBOeKXaKQxNgwSDSM3G\nKew+XAHNMZW7imIqOCvB4qWOUsMY5pJIyzbiaFUjk8CgdiaMl19cU/dt4xUcY/uninr8zYPS\nmAbqM9OMUqrxyKQEgGgQD5m65oxz6UK1O28etJgwVML0phPT+VPOWzzzQFoGM53DjFOPWj3J\npfUjmkA3se5psZyacOOTSKw5wKYB3NKGHQcmkXJyaCmOQaY0LzjBo4o555qPlfegB/PPpSEt\nyRSLIfTil+9zQAg5601W2tzTmOeMc0cdD1oARVOeOlSg/Ng9Kbyq4709RlsY4pMCWNRuznNJ\nqGoppOlXmoSHYlvEXyfYUqLgH0HPvXjf7XvxAbwL8Gr8wTCK8vmFtGucE5+9j8KI7kvufm38\nYfFUvjLxpreqytj7XdMQQc/KOB+FeXNII9yM3zNxmtvVJHmmZWfDDj9K5yTDrKVfbIgIDfSu\ng55asr+d99XyZl4BqOaQrGvOCTzinQ4uo0Vjgjljj9KikkCkxgYGeCwrRCIkcSeaGB2r/HUR\n8plIXkgcmnTlVhIKtIAckL0qA5RA4XGenrV201AqTYkjkEuQmeM+tZrsu07mPAwOe9XdQm3s\nsm3d22+9Z9yrNwUwTyfaqERNMruDH8h24PuajhZdx3DBHU0/yl3AA5Xv603y1+cOwU54SmUN\n2+chBY4J+Ue1IqKnK5C4wG96XeVY5A8vHFG5NvB8wEfrQJle6zLCXYEFP7vesi4jVWXax5H4\n5PateckYVj9MetZU8gk3qPvIclaBalGaMSNhl2svShmEYIADbhg+1ExHmLuJYE4/OkYKpKE7\nT6etO7ENkZo1UAYb1FSfu3UYQgEjJqEKOh+9/dqyq+Xt4wp6knpSGTWrBXeJgWxzuHf2q2s2\n1GATyl/2utVlgzk7tqnpnrUrMs64d8DIHIyaljJkDLyDncvWm4dIwwk+Xuf6Ux2eIqijkZxz\nUiho5CeCWHTsKoRC0mFbaoG7+InpTJIVjABPGOPQn1qRQtwxPTtTZVSSPJ6DgEnpSAimhkkk\nDJhdq4INRssZZUUAKeoP+NSsBIepAC/epI4S0aq5CpnOT1pgRxqxYE5A7Ac02STbIzAbhjBH\npSnO9VAwo5BodY95kUt0544pEjGkFxgoSDnO1RWgk29sZ+6eFz+ZqnYqeQy/K2cEc1ahtlcs\nocH1A64/woGken/CYeZr1vHy/wC8Ug+w649eBX6Dado9xb6XbZRkVkBCkc4r48/Zg8MDWPFV\njbbNz+YNvTO3I5HHWv1i/wCFbwzWsW5MMEB+704zWEqljSMT5fltJd+CpP4U/wCyuoGf5V9D\nXnw3EnKxqoHBO3tWJcfCZPm475yBS9oVyniS45TbzT4vmBBr1K++FZScsqMgx93vWPc/D6e3\nY4QnHJwOfaqUw5TjU/1fJpjBNwPSt+48NXSZ2wtx1zwaoy6PcMMCEgepFaKSYmrFLbtAYYPr\nTo4/MbJ6Va+wOq4kRk7UzyVXACsPWndE2YxYwHPGB7VLg7So4zzTmXkHoKGQSYOcUXQWGMox\n15p+3cuM80hJY54IprYXlTzTGPZRjg4PekKk4zyKU/Mu6k2naMttb0qbi6i/hRT8j1FFAz69\nkyMDtSbvk559Kex49aaq5bv715x1iLncOKXcOrZpWb0pGznJGRQAu4bsdqMr+NNZf4s4o4ID\nYw1ACtIQvFNUkAk804ikUc8nApMaGq3OetP5I4FJ94AquBTkU84NIQq/KuetDN8vHFCrtHtS\n8bvamAZLH2pOi7s05W3cYxTdgz1yKYri7skAjPvSvuXIHSnKp29KXaevWhgRL69+9PVg3tQe\nG4HFLtxUhcXGOtN65GaVs7s0deRj3pgLjOCKGOeKN1ITnkjBpiDv05oA25z1NDHb8+OKPvfW\ngALHbzwKJPugnrRuGNpoOe/IoAU4zgdKO23rSL94Y6UmTuJA70B0HLkdRzSYK5Jp27cv9aZ5\nnGCakBVYL2yaGXoetJvHShV5G000MU8cmjdxntQeDzzTiu5eOKYhqr81OPpRu4BxS7TwQc5p\niGKQ7YxilbIbGcinqvYjmmlSpNIBWYdO9HpS9hkUit8xGOKAFVl2470Y2qcU0BuelLhgCe1A\nAASPrUiemKjXOeeDTtzL8wNQgJFwe2K4r4zQ+f4OkKjAiIJb68YrswflzjmsTxvaHUPDd1Dg\n9M9M1fkB8IeKLMtcS84GTkV59ewlGbjvXp/jS3+zXFxHhldXOdw56157qygxsy9aYHL3p3xu\ngOPaubumELYI/E10V1HuVjuw5rntQVt5wPu961RD3KM90zsSq4TH3qydu7c+c5PWtSchoSdu\nx+lU3U7VwAR3qiTJmgZmK8AE9aquvlnPGAcD0rSuGMh2FduOtZ91MwfytoZPp0qiSu3EpJwq\n/wA6q3ShflTGTU8sm9sY3DrxVNZhtLOpGSQKYhN33U5P0qCSMtIwHzd6n2tGA4ZcCoHVo5iQ\n/Dc8dqoTKwkafPBU9OajdQsmwMGwvX3q8du3cfxNUnhWPcw4DHNAEcm7ATeBxUTB/OUyHhBx\n9amVC8LgjBxw3rUbDEIdjxjHrzTENZSx37tq9TUUUgbcQ3mjs3/1qXb5nDHP+z6VGxZVdABx\n/EKQMezEyCRhhQuOaj2r5gbPykdKYzEIF70ZCrj1qgCHb84PA7GhY8OMHjFNZQuBu47mkZcD\nap3Y6kelUIbhWUqeCT1pvBbanO0+tWGRGI5zxkVWhk2b22YGfxqbjHyEnp90/wAK0zncpyMA\n81Ju+YEDbxnNRMRyTyDTuIbuOGUnJ/hzUcineHXggc5qcqy2+E5Y9R3qH7rHce33aBjPmZdu\nPfNKZDKVBHTjNJ5rIoBHU0vLNsB5z8tMBm0QyJIOTikmkPdT071IF2qjnlfukd802YgsR37i\ngLieYVKBkzx/DRGxhLgcE/oKRfmXKqQc8E0Mpkxk4OPu07DDy1MZGTu6imqQ8Y5y3cZp7bWV\ncH5RxUcbDzjtxhT6UxCFxIxCJ90ZzRIpZmIO047U/duVxjAY4xUQXy9ykHAGaQDmV274bqDT\nJIwq7unOSaVf9WGB4p2zIzuylIOgziQ9cgjNNZkbaQNwU+lOuOFyg49qarOzAL8q/Sn0BIYr\nFWZsD6UoUkKOu44FEibM/N8zHmmxqsany9xw3NICVWjwo2lWXg0x9qOWA+ahM/MT83YGkXhA\nc8k4zQNjDgsOpYHJp+5txUDr3NAI+bnJXrTd0m0ZHPUUhXHBUUEdSegpF5GGGfrSBSzKejE8\n+1Ob58jqv96mAvLMC3KqaQZaVm6D071Go24Xf0OfrQ7AqTnEhPTvTAX/AFiiU4QjjHrSOX42\ntlT6fyoGzZgk7h2pVzxt4XPP+NIQzb8+MdKkwZI8lPlBzTWUmQgnB9u9K0pk+XBVOmaAIziT\nO8Y7giiXczRnHyng4qTIZQFXOTgCo2YhvLXHoJPSmMcyhg2T0PB9abHmMbVPX1pcANk4+Ufn\n70n3chuQW60wBRuj2lgvPJpJGfAQE5U5pdqjdgEoelOyXYHB3AYNAhpkLybT1POcUjrtjRi3\nQ4zTmXbHzjPtTGRfL/vc5xQAuN+F4C+tIflbIIYg9aGyx+RPlakHyjgZA7Uhj1VpWZ2OPajz\nl8wK65G3kio9jtkMcdyRT8iTPTAHWgBrLhy38OOKVZVGA5w2OPSkI3rj0HFHIIzjBGOaAHbS\nVPrTOJMIWwMZ5oVd0IwSG749KLiJRIB3xnPY0DBlDIH8zDjoKazMSMnAx1pdu/BxwOtNZcqW\nyWUHpTENWQK2cYOOaPMj5GdyqvenbSc7QDkcmmxxrwcZB4OKAFV/MUKBtX+6etDYiyN3XmmE\nESAoOW9acyj5wwyPT1oELIx2jA+uKcqlW3A4K+vekXONw4XHCmlUmReRgjnPrQBGxJYnPzHn\njvT1lJZgvBIyeP0pG3mPKrkZ5PcU4M331IAx09KBjG3YUA5PXFD+YzAgnAPT0oXG3J6nrTio\nkxklAvftQA1tu/eMuM8g0g+WRjjBPQ/0pONjLnvkUNId3yLuC80ECBvMyTwRwcUoxsB3EEU5\nZMsBjapG4+tIMsjZIXOe1BQ9l+UMnOOgpgzI0h+7laZGWcY/DIp+DtZTwfSgBNm5ePmpwVtv\nBUr1Ipm9fLyq7ece9O2r5mQCFK7aAG7d6DA285zSvs4bHA6mlOY02YBxx1pVGV5GP9mgBh+V\nBzwDzjrTcZUZHz0btrrtG5Oh5pWU5dmyCTxigYNgheqlevvS785f7yg0H5drMSEA5NHJ4C7U\n60CEJ3c9GPWlch1b5/nUcj2pJP8AZ5Y9qDtZtxB6YIpALtWNe0oPT2obEcYKN1HI7ikVhtL4\nyucZprKQ2zbjPOfWquUhFzJ8wGOxGetP2+YpKttBPOaRUDSbghAxikWP/lnnJJ4pEjljU5y3\nzAY+tOVAiiGT53XliKb5I3Kd2SD1+nam7SzliMZ6mgB37tmYoP3ePvN0pq52rjgDvQyKxPOa\nVgepbjH4UgGqoaZi5+UikbErNxtG3AFOOGQKRgZ4pFUecU7kZ3UARws3k/OANvepWnHkgEbn\nJ7Uvl7lx26UrMy4LD7oxwOKAGSRkrnG09xTWUM6knCgcinPjaJC2dxx+dOVSzEnovBWmAwTI\nyszDIXpikWQ7sqfmI9KUr5xKgbR1oO4Y2DPGDSGHzBtq9MZzTdvmKMnC55WljxtO04b/AGqR\nrdvlBPOck/0pgxGw6vtBVh0pY23bWyTtqTlZDzsDDHNM3lo9q4ABp2EIymRixbA60LhTkknn\nj3pEYKpcnj1qTfuRZF78VLAawE6sFByT+NKV2jauSBTMts3Jwc80hIX7u4ZPIpgPwFYZpBlW\n37cn0pzY644XncaDIzbZFX5e4pgJks2RwMfhTNx2hh8pXj61IcHIDAL1qOH5wxwCoOeetDGK\nrdcDOR+tIm1lO4kMOtJu+UhRk5yRTpFbbjuR92kIbJtyD909Qac8zblyMg0RvwSQFwMZqIbp\nCvUOe1MCSTbuIHelV/mxtwajRgHbJGPb1p7O27LHnHUUgYKrPvUvgYyCKPl3fOBjplaauJIy\nqNjJ5yKFZVUhSc560BcXb8u7d0/g/wDr0pUMcBSP5UMyx9M5pVkCybuR6DtTATHBfPzA42ik\nXe0ZJK855pY1CXDMx5YdKYV3RlegzTsMciPIm0Nz0/CmyDyTsUMFA55qQKuFAkwB0YdzTI0O\n4nqO9SITbtZWQ445zSNsjAx+8/pTwq7iDliRxTU44AwR1FACNGCwZxTmKqCz5PZdtBbP3uRQ\n3ChQDkc0Eir8o2sMMelN+ZlK4x604t82QMk+tIQdo3Ng55p3Aa7BtowcDildSzEDhlGeKVnC\nthRuzx0pcoF5J3g807lEe4t/D8zDrTkCnjHyY5pVyFOAWz3FGzapGOG96NRWEj2oRkZ3cjFS\nK/ysWPy9MYpi46EfLjHFNRWDFV+5RqIUL8wP3/bvTtoVxG/A67u1Iv3VBUk0SHzPlb7wPWkA\n1SI9wA3c4p7qygqeeO1JIuQAAAe7UwA+vTvQA6Ri23H3wKc23jC4ahPmJO7tw1M+98x60AOk\n+bqMey018NkgMTilC/MAnJ609JCrnI56mkFhjBlXhSd3Oc5xQsbthOGTqWNIr7GLDPv7U777\nbS3B5pgK27hh67aRt8bDI5JxSgZBUnpzxSBhuBJJcdM0WANxXzNpG7pTVYbgTxkcmgkH5SMN\nnJNDAE7GU880ANyfLdRginKgZQCe2TmjhVBIyq8YpGUqVxyGPFADWZVbK/dxTVXzGAB6c05o\n9qsFG455X0pQwU5CY45zVBcfgspKn8KY2GKqoI7tT9zfePCkdO9MbORgYYdu9SO45WKZx8ye\ntC52hmxtPH0pHb5SqJtz+tIrDhTwPSkAZ8vaM8ngk+lHdj0Xpx3pxHUleQMCmsMRkk5PXNAu\noqnIJ67ei0M3RQcHrn0pyqSVJwuRnjrTVxtcHA9GoKDcOvVDwMUOu35OuaI1KJ/Cx/nRsG3I\nbJPf0qiQ+7gNyO2KUs2Rlsim/IrEAE+/ahUC5yOo60AOxllOMJSDgsUGG6ZpjY8sckqO3enj\nCDIYhG70gF5l5PAAprNhQOWJGd2OKSZl2hUyuDk05mVW+YEoRxQwGtN9wMCM05vljZC2Hx0F\nOzuIAXKd80LiPMixluwzSvYBirwCOCFprOPLCnv60/5iy85Oc/h3pUVlZsqGQnpjpTGM3NHw\nOnTrnFCxiPLqWx354qRVCswxk4yCelM58tiSGzyV9qAsKzBZFIOSec/0pVl2lsrtLdDQqFo0\nwV56U5Bt4I3Y7mmMGG0DLZHemhiqhvvZGcClbG3ZjnOc0LheAcMD0qRWFT95yBwwxmk/eBdk\nhGBTlPQkfKTwvvS7Du3fex1pjIZMK/IzUjYPLfd3YpygMrGQ4WmAnynP3kb8xQAMd/K/Kc4F\nSBRuIDAZGG/xpm1lhRlHzY5p7ByNqr82Ofp3pARkJ5ZH3cDjI5PvSW8YkVdykt1HOKn3DcoO\nCAKJF2jPy49KZNhnlqpfaTJ75oZgjq6DO3qtNXMa+ZzyO1OjZeMgs3c0DA3W7cWX5j93FJuG\n4E5U9zTmjaMb1GUzxxSFWK4yM56UxjvMVizRjH8PvSoQy84RQMFzTFBj+YgKaGxJzuwcZ4GR\nQCHM4dWzwPbrTFYZKrkkDJJ6UqrHGC/zbSOfrS/My4zgN3pAJHlxtHHGeaAxUERsAWPNEMO4\nFtxYjjilhUsjMVxRYQYaPKr1Izupn2lGX5QWK9RQrI3O7HbJqTCx7VZAOfvCjUQkbZXIHUZO\nRTk+UqQ3B4ApzEyOGbkFeo/pUfylOQV5wppAPRTGxV2AB796RWMbFQu73FHlorKjli3UikVR\nmRidoxx61WoDmby2zjAxwPeo9u51VsqO+Kcu5gq5LSEYANG52cttyycHHtUgRqxyyYG3dxTm\nfcvGFOeR60reW2Hxk9TinsuVJbAPY00LqNkUMxZT2xtpyZ2KwAz93aaawLKNxyoHanQ5Zhn5\nR/DmqGSc+YuDswMU7zHXJdiV6dKafmfbzvHWnFfMZUUHGetCJY6OTCgDJA5q3HMquV2kbhz6\nVBGvluUQcg9McVahZlYghSeuTzik9xos2v71RxgYxVi1hMi4C4KnOKjjX7rA/MT06VchVVYM\nSWY8MR2qSizalRdF1HI49q3bUmTaDjJFZFrGpnYD7h5roNPt4R+9f5dowKBlqGNV2k8BTmtu\nx3TK7ovHXb61lQwtcyKE5Tqa3tPj+zl1Xgle9RIDSsFlZim7awGSa6uxQNHFlAXI71zOnh5n\n+UZVTk102mRySTLnop61gWjtNDt90A2/Lt5NfVP7Kullry5uycqq7CT6+lfNHh+PdGjAAc5I\nr7I/Zr0cWfheW4wP3pBHr0zUMo9kxhQo7f40MRt5/Ckz0J9KRQSvrUDF3Nzx+dKv/wCuj+L2\npApyc0FdA4xzSMNvTpSrjbg0D1NAkAxjB60ctyPloXG3PejcWAFAwOfrRyPaj+HINH3lBPWg\nEG47eaUZ25xxSEbmxmhfm6HikA77y+lKclelMblqerfKaQhFbavWgkhuKafpSnJ6Himhhx26\n0n4Uv0pPXigBetC59aQKaMZI7UIBO5yKM9KXnoRRQAvT2pG5wBxQ1G0/hTGSHAXHWmE9s4oB\nK0z171LAdxt5PNBlVWA6Gjg8kYqCaM7s9qQFk5XrS/eNVFZl65IqZJNq+9AEq/dOaf1Xiolb\nipOqjHHrTAZj1NKGHpS7fm460jHdziiwCLlmpdw6UK2OcU0ruOQeKZQ7hR7UuNvWkx0BFKMd\nPSmKw3pzTt+V4pCc07bt9qBCbe5P5Ui9elLjjJpG/WkIXAXtTeOnalwWOCeaPUY5pgCr74FG\n00i+howc9eBSYDuemOaQ56GnZLdOMU1vU80wE5zkHmlz3pSuBx1poHrSAUsBn3oVcrSL93Bp\nd23n8KLAN2jHqaUfe6cUp+U5xmkDjkUwFZtuB2oyN3FIO2aemNvvQMRQe5wKFIzzQ3TANO25\nAoATdzSt9aGHB9KbuH1pFDeNp4pxUMBxxSbgq9KXdn2zSARYwDwcUcnpQeBxTAjBuvHamgJF\nzzzRu7daQ570zA69aAJFPIwKdjkjNRjpkGn7Tw1IAztPTNLzRu+b2puaYBwF6c0cjAozuFH1\n7UmMTcV680n3eR/FSpypYflShTyelNhYap2qc5o7Dmlz296D3NIQbTj2qMMBwV49akG7aeaN\npI9aYyP7oPNOX7lKyjPSkP3jjpTEJ93nrSff9qbtI5z+FOjyxzQAqthgTzVhULMRmowvOcVN\nDhiecVOoEscJk6cj+vavgr/goR46a98ZaX4ahkDW1nCbiRQc/vGJ6/SvvLUNVt/D+jXmpXDb\nYLeMyN9AMmvxw+NXxCl8e+P9Z1tmb/SrlvL3fwoOAOvpg/jVxREmefaptEgdX3BslieuazLp\nVW0JQ/MxyafdTZUlurcVmvIlw6hmOw/drpSMBkbbsvuZUzjj19aWQ7vldhleetNQJbqyruKl\nvu55qN/mYMV3HOMDtViG+ZGkbK5w5PFRxyKF5bI7Zpl0p+1BXAwvO3+tQTOJrqRn+XHQVQFO\neV9rZUKAcg1ntMxU5fcxOSTVu83C4AA3ow45qlNKEUoI+c8nrQA15isYaNPmPG70psD+XtUp\nvLclqf5hUDjJA45qDzG2gGTDVQEjMHVo2U7weMelMk3bQISp55X0pCG3ZY7cDrSqpbDIckdC\nO9ICq0i+S3JZlPQ1l3f7sM5+TcMnmtV4keTdkqRyVrJ1CSR5v3qBVz8o9qAuVVV/IOEyrHIN\nN+WRtoXc56bu9NAcyEK3HpUm0MoJBx6Y6U0DGxOhmILZA+U59asGYtCyDgDoetVY1PzDYrPn\njJq15ckeCSu49l5FDFqTRKQw3Pu3LgH0NWIWWNyCpJUbd+P1qGBB52WG2RecZ61KkrFlBXke\ntSMWOJljYsM/X0qSPCsBnGV4oaQS8Hg+p/lUe7qqj5vei4xVgljjbcoXHIb+lQyENt+TL4+7\n/WpcExZLYIPOT2qFpAXfYC2On0qkBCrlpDwcrzUm/wAwLI/G49KYyDcA2VBORinRgQsQ6+Ym\neueRSEI0nyseGYce1AzubJ2qBz6YpNo2llGCwyRTgokAyPc+/tQSJbgztuhG/A4GcCrun27r\nIGC538evIP61myRgMvlkqrDOOmK29Dk/fRK3KMdoYdaT2KPtP9hfRhqXxN0+RYWkhHO0npgj\n5v0r9XwiqpHVc1+cn/BO3w+brxRFc7CYY4tqH0YCv0idByR6964pbm8NiobWN1+7gVDJZR/w\nqAatM2ajZi3QYNSWUptOimj27QR3zVebQYJIGHlqA3HTmtM8DipE+nFO4zjW8DpIGYKmO/HJ\nrIn+HsNzJxHtTp8or05VG2mrGjNnGB6Yqh2PIbz4bxOrII9xB4LCsW++GG7aAOO+BXvEkKZy\nI8VXFinmZwOaq4rHzzcfDUq3HmBfYf0rFuPh/cwrIzO2B935evtX07NpUU2AyjHSqc3hmKbK\ngLtJ4Bo5ibHyxceGbyI5SJ8Y5yvSqi6LcQSHemSeRmvqK48GQ7j+6U+vvWddeBY3XiFQOm4q\nDinz2FynzRJbvG20Kc+mKb5JaTD7t/0r366+GyyqcogfPIC5rDu/heqhj5H0+aq5xch499kb\n0/WivTv+Fey/8+9FP2g+U9yCjPTJpjMynngVJ95cDrTGiEnfpXCbCZCqD1JpQpZDzxUbZDYP\nSpEXrigQ1ugB607+H1pzbeuMmmkFsY4p6jG9WzmnNggAjilYYwOtH3Tg0ABycAdKRWKbjjtS\n5+bjgUqZOaYCx5dW7elIq56dac2fLz0GetCnnjmpYCc7qTA2+lO/U0exGKaFcduCjGSaXdtO\nB1NNbpTMjjHWhgPZtoAIo5zmkBLHmlJw2alCDouaM7lHGKa2OTmhAc4NMY7IB46UMSWB7UbS\n3A/OmkHp370wDNOOAue9KAB1pv8AFjFMQ7gnikbG4jJxSIpLHJ4oHcdRQIXdtYDHFG0qWx0N\nD/My9hT0f8aAIpGO0AD61XEm7INW2O75hiq6wFXJPINAD0p24txUiR/KeMmkKeXz3oGKq/Lz\nyaWmrxz2pzMABxQIO2O1KDtT0NMIPQ9ad2559qAHe2eaaylevNG4HtjFKv8Aq8saAGM27HNP\nVsA56U1cY6U7ORjbigA2/MD2pNx3deKVFO4jt2pPuk+tAAvLckGkZSeOtOVR170KdzccCkA7\nlgD3pmpRrcafcRsCAyFd1OjVgakj3NBID6d/SgR8OfFS3+y6pcoqghSQOOvNeQ6gpiVwRxjJ\nNe//ABq0w2Xia+Lj5d3HsK8I1j9yxJ5Bq+oHI3C/vM84NY96wdSigbs/erpNQjVlLjhsdq5z\nVFEK8HnvVonUyrpd2Sw56YqouB8uMD1q5ccgnPOKoSH5snn1FWBnXeFuXyQSTms+ZiQc8H29\nKtXxDyFuhXpVJmDMWY8Y64qkQyvJGXk3p8qYpskK/KhXeD708s0i7T8qeoqMncOM5HA4pkMq\nMirvG3AziqzphsA+1XpF8vjGSRk1V8t2IfaD6rTGV2m8th8uQOPxpl1lW+YbsDJIqzJ8oBCj\nI/hqq+5t3PLdQOlUIgaTzJF+8BjtUXCFgFbk9DU7RueQOBQvy7mJyTwKBFSU7cZTanYioGf+\n4M45PpVySNlCnOUJ6e9RvEVBQrlSecUWGQyBZM+hPBWl2xxnBG7+dPhA818g47HtUUqnyMj7\n5P3qoBiqm/Zvyx5HtSLu8s5fBY4yKjONwcKSV7ipJGSNhsX74zmgQyRsKGB6HFNWQNlsd8US\nD5s/e4xx60ix7Yxu9ecUAIyjdyecc49KRpt3JHygcKBSzYb7vWm5SOVS5wD0piHeeY8YxhhT\nHXcBg8+tOk/dsQvzelRrhkIYEHqPekMb5YwHzkmmlj2OFHSpWVREjIvIpGXdlj0PXFAiLaOE\nboOR9fWmSAqxZeWbqamZdvQ5bpSyRhmGT8gXJp2GQOTlQeMDkUF3aQKRx60Rtwu3pmlIKyEM\nc55o9B7givtIypbNNXBY84bNOKhUBU7znnHamsrF8AYNFxA6srL8wA96Y27d15zye1OaLO1c\nbiDmmR+Y0jY520wF/wCXjAHAFObCxkYwp/ipirwXI+duKds4ABxnpSEMVHQY7YyabJ/CVOAa\nUt8gUHODTWXCgjoxxj0pgGGkwXwADmjcPmwcA9qRyJCEJ27ajxnLEd+tIZZZlaAbf4RTZNoj\nTHQjJ9jSLndtBwCM5NI33SnYnlqLBqM3eX0H/wBegKXbY3B6084DAIN46Cl/1Ry53HvQNDBm\nPKnkdBTI5HXK7MYPWpCxVwu3PcU1VKsXzweop6kiOwaTYmFYj7xGaYzKMucFh8p45qbhcg9+\nhFN4baNuPU0DIt5YqVGO9KwMkucfe6gUoGXfDDA5p4zDIpHz7hQIjdmjjJj+9naM+lKx3KFy\nSR1GKNw3MGHPUChfmkYodyUwGqpLhccfWm/LgqeTmpN27pgelNG0N79/Y0hhsPLAc/yFIzDz\ng/Qd6GYqpbnrTlbbuO3KEUdBDNzMv3gVznilkOHX5sZ5o+6QuccU95FEmHH8PFIYzcS5JwQe\n9DH5mBAxjNNZmVFyOOuBUirtbdnKsOKYDUXauQTjrkdqTJjXgb/xpqsDkZKIOCac/wA7Lg4w\nOuOtADVbarEDLEcml2qOoycYOPWly3GB8meaRA0ZxjOehoAPlYblBB6fWkYkLjb05yaDuZlc\njau7FLuYM25sZOADTAMho12kE9CBTY2WNTgZ5xz2ob92QqgZ7kUMqrxnljQMVWPDDGGGNtIu\nFPCnJGKGAYD+6vymkBGItrHCigQj8REgfvB3pVX5gRx8vKj1pysxbG3PfNNVmZiNu056+tLq\nA3Abbn5WXt60ZAk3qfvcAGndedmBnGKjaVEC7hx2FO4h652FiMbTz6VGzPtwAQCc09V2r8qc\nN1yaVflyud3160DEXMjf3Rjn3pFxt5OB6UozGyktk/3T0pu3LZ3ZOckHtQA5mXcuwlz3FJwy\nsA3mLnilUlSygDHWjeyYV1yWoAaFAcBkIzSPjJTPyg8n1qSQuzIMYK9z6Um3buyMhhxQBEdr\nfMCXI6mlaQFhGx4PI4pYyzbY1G3FKwZWYYDHH5UCEZhtXadp3YxSS7tzbfzpYwOH25K8804E\nMwUH5n5pANTh0IAYjqKQyCZn2gkbutKP3KyAjlT972piknIwQue3SmBJjc5B+5j86Yc7EXnr\nwacyoI8Bu/rSLncvPy9RQAvl7WZep68U1R8odTk9+elLkNIRkE54wetI2JMv909DQA3lWLA7\niTz6U8OVUhenoaZ96EDG0hunr703e2TkEDtQA8YWPJOT/e7in7zGmSGY9OKaqmTGW+UcmkXD\nLtDdTuDGgA27sBlxjnb2oViHYpyO4PpRz5vP40vyqxdQd3b0xSABkscHHy5pOPlGfvDPNKrf\nMWByMdKTcv3tufb0pgLuDNhj/wACFNaU4I6jNKzjywAMc0v8B4yBQAYV2LD5cDp60hk/d/Ny\nPSm7WTb83DUKoPzLyQcEetAC46Y+Y9qaHBZn6c4IpxHzZHAJ6U1FC7iOVJ70ALKN027B2Y7U\nk3Yq52txtpSrrGFc7Oc/hQpWPLY3UDDowB4C8DjvTVWRSUIGM5YZ6U7c3l8g8+lJt2Rhcbs9\n80CFbAb7vFNMh3EjIApFZpOVVgq9c0NloiuMNnP4UDEZf3gbG9WHSkVCGKbjwcinbiMAdM0F\nBvwp+brQLqKyyTIxkTGw/M2etMkxJ93juT7Up27Qd5JHr60453DjG6qQxrqrKFU5TOTTshtx\nXgYwBSqokjO3buU803YcAbcbueKkQoZtqgYUY5JpJG8tdxOc8U2TMkYGOKerMrsFXzBt4DUD\nBQjLgNx3FDZ+UIcU0mTar4HpSJH/ABYJfPegQSovlkjsecURp8hA4J5FOVQoKjDEnnNDOoHp\n2BoAYo2qc9cYIpWaPargMAODzRIytH06dWHemyBI1QDJ3djVJCHDHATp1pWYxsABtJFIwXoT\ng4wKaytF5W85OO3Siwtbjtq7Cu3kcn3pRIm1V+9mkYtu3dR7UxA8ijlUwc0FD2/12NuAPT0p\nNygHg4NKdxZsdTz+FMwC2QefSpJHb2+QbVPvQ0hTLsOM4FG5mB4CtSPIGjCkHcDk+lMBjKX3\nEmnxhkwcbqT5QSykke9OZgSNvNA0Ox2AAPXFIc8KPlJ60m0TSEj5Mc7qZ5h8z+8DQMlkcHYo\n5KnBqNirKpVWGc89qVJGWRlCfhinRq4iyvDDqCegoAapxtOMqDzR83nMR060NuZuDyfWiRyd\nuBz04oENbdIzBRilbPyg/MR1pyNtZsHGeue1Rsrnkcsfypi6j9xjBV+B1FJuCn5vlBHUilbE\nyBfTvQymPBY7h0wKY2M+ZYmGSuOce1KQu75CcEUJ8qMrHAPQ0m47sge+KVwQ7JhTOdwPWm7/\nAC1JHIbvSq37vJU5bqOwpDvEYB+YdhS1Bjgwk2jac9BTWwzBCDuVsmnKWZegTjANJ8yLvfAI\n4z3pCQ/O6TI5ycbKbGCspXGR03ClwvlktwvUmkh2qhO7Jfge1BVhC3mynCYjXjNO+8u8jHYC\nkCmEYbJz/CPT1o8slSSNw/u9MUyRvzqDj5T3pzKGVCGzzzSKuVOG/dr1NOC+W+FbduGRQxCK\nBg45BNBChshflBoZOikd6RXVpDtBABpjQu8qHKcE880m4lMOdx659KXfuYjGB6+tMSMR7i2W\nB9KQIkmmPGU4xnimNI24MRuyO3anrlFY7s5GPpUcbqy4Ztu3gD1pjsIxSOTChskc56U4Rb1A\nztbqFpfO2KyuAc9BTOA2c8gYyKBCyS4YsBk4wSKQfMoyufSl27cgUscg684XrQIbwrKWc7Dw\naVV8yXbGNq+9NkPRR8oJzj2p6gsGKvn+dTcoQ4MnT6GnsRuDFcVHwv3juBGRS4ywQ8gigPUQ\n4kDhtxI6U6TakYTbjH405e+JMjGCvtTYlLriMjB53GgYixkBnY5GKcsauDk7QBTGz5bMfoBS\n/PleD83BFAXE2gyFg4+XjihhtGwj34oVSoJxkdTR1Xnp/eqhD48LGQTuB5FJL8oA980by2W2\nbmAwBRwkRLsN5/hpagNeQ5BC4I7Urbhl0Ue4pArsDIfTFGDtUZOTxQMd8pXKkYxytMDtvj7r\n6e1OZQpzjpS/IyttHNOwhgDszB+Ofl9MVKXY8bv3fcU2Pc6gnt0HtT1g/d7G4JO4fSgVhrfe\n3NxnoaUq5UlCM45+lMXzMOnBBPenJtBOAztjHNHMFhFmwVyPlxgin5dlKxrtXryKZuWPjGSO\ntOVTu37votSMZvYtjZx/KnqQwywIHQUm47uBn1bNIoG07iR7UCHbDGp+bcM9aFG7JIAYHI96\nRo9saqWABOaRhuVyG4FBQMTJGwH3s/lSiPdt7beo9aApaP5OFB79adkMpwSWHJoF1GtGGbPX\nuBnpR96POOaVFOwtgZY8E80KefnUqCOO9O5QsXGRnJNNZmVQilhn7zGnI0gwCoXnoeTS53ZO\nPlB6etIQwx/ugqNgk8U6FhyTk7OGFLIxZgyjBzS+b87AD7vJ460BqNTCqSrkk9B7U/d8ucc/\n55pN4Z9qfKH5PFIkTbyqtnj8qAHn5pVG/wCbrntUancztnAHT1pSN7Jk/d9KJG/2aAEVi0LE\nodx6EmpIz+5G0fMOGpNxZhhSQB0pNpiUvxk9vSgBAx2kbcqO9NYMxBU7h39qdHtjhYscIwPz\neh7GmRqIVVC545JxnNPoJkqb1XA/dnOQaRnG0s7EBjgmpN21m8xRjsFPNN3KFBwc+uKBDX+6\nY1Ax796UMQoDDL/0pHB2scfLjtTAzMyEHkdB6igCVcRR/KMv2J7CkjZypYoSemaYrZY7eRmp\nVd1jKncecmjoA2PEKMw+/S7SVGT96mxyBmYDoPWnFfMYMG+UDjbSAVcHcrcNng0ikxsyjjjn\nmgScnH+s/p3pvlgSbSNue2aACOVY8BUyfQdKV1O7g5yc4NBxGu0dc0ikqjDPHpTESM6cncfl\nOCtIsjZAZf3TH/8AVQVDc7R9BSiJd4MxYJ0XHQGmA5pG3rnuKeuWjwARg561EquibGwMHAbr\nU8aljuU8jtTQhVyu0PncxzuFWo5MNwmQeOajG51yrBxngYqdtzKQ2Ae6mkO9i5HJuXY21n2/\ndz0q9bYZTvGc+nrWfb4coY12gdSRk1oxusahh0Y7foakC5DC0kgKfLtrct98oViC471lWjlZ\nFJG1l4+tbFpOFj+VclgSf8KBmrYsUUbe54rd023MikythielYGnK0joQeB0Wui015Vm3MgI6\nYqJDR0mjrHG2xOFJ5z3rprCLO1FHGeveub0/Ekiqq7TnJ9q6rT8PJGoOOcVgzVHceHLZ/wB3\nGvzufu4OMn0r7i+Cultpfge0DFi0nztnscYxXxn4TtHkvrKALnc6rgeua+9/DNr9h0GyTGCI\ngCB24rNjZptjOCaQEhuOlIHBYjH401nwp55zUlDtxp27uKaH5AIobOevFA1qP/h9aD83GKav\nH0pQx7igSHHhelIPm5HFIzbm6/hS/TikMG9hS8KuCcUme9DEMvTmmAfeXIGDSA7e3Jp24YBp\nGHegA560oz2pP4aFbvnFIBRnNJyMil+vApARu4o1AbtPGDTg3r1o/iyvSlZSwBxTARm6Y60N\nnb0pf4uaMccGl1BhzjnikX7tJz16mhe2aYIXb3oOSuScUfxf7NH3uaXUA5bkcihe9GcDjj1p\nfQUADfdx3pWTd1pNuaBkqaAE8n5etMxt4xUqgstM2luKAG5I6dKkU5Xr71H5ZGctxTo/u8Ch\nDH5ypx19aX7qihc46Un3femMcACOtIB2xQOoNG7tSEKc9evpUf1OKev1pCuSeKWwwPHOKTee\np6Ui5wc80fe5xVASBjtyOlIW3YzUasd3X8Kk3Y7c0iQZu/egH5ck0oWjYA3PNAxOASRSc460\n7A2mm87aYxvmEZp4YMMVBJlW65qWNS2OMUCJfur603dntRk4xSLnbQAdBj8aPvc4yKUnFIrb\nVx2oCwiNuUj3pI/v8jilVhz607acikOwqqKPvc9KU55IpOijNGowJHpTuSvFNY8ULkL1pgNO\nenahfQU9cdD3pNvpUgI3L+1JuH92msxXPelXJWiwCscrQFzyaOaRQVOc1QArDnNG4dAKXbnN\nC/L70gA4BFLz3PFIzDd0oLcYApDFZvmwKTHzGljw2c8EU1fmOcGmA77uM+tDEb/rSYbqeaZI\n2MnHNFhEgwvFLuBX3qsshZs05WNMdyX7/SnFflqPlcHFOdi3CnjvQA0NtJB5qRW28UwqF9zS\nqu7k9aADleCc0wrxwfrUuwMc0x1AHHWpENXB5p6cY9DTEXEeO9PT93x1oGSqhxU0cfmZVRzU\nUSt1J71ejQZJ4UDuaqImfOn7cPxJTwX8J5NIt5mi1DVX8hFUgEr/ABH6Y/nX5X6tfJ5giVRJ\nswAV9McV9NftxfFF/HnxTlhtbjzdN0tTBAFxt35+Zvr/APWr5UuiDNJMTt9R6mtomUtipIWj\nSRm4U8DPWqTf6RsKfd3cip5drx8uTk8A1VWQfMuzbs/unvWxkOeRchsbT61HNI8PEQwW6554\noZh8gAxu4LU18sF5xtOB71XQCJ9zTEyKQ5GNw9PSqrskMcqt8o/h9auLMfOyDk5y2elZepMs\ntxLKDhc4VfSkBQaYMrfMzgDGPSo2kdWTy1DjHNOcSIFCrw3JIqt5m6TanDY5NMQrKGZieW9j\nUKL5jHPJXgCnRqFkZcHOMmpoW65XAxwKBi+WXwScHp9KbHGyJsJ+frmpUXegYkqq8/WoJMSs\nGLYC/pVAZ94zrny3+eqEmJdznkKOc84NX5ZEhaQY3q3RqzLjdCwKthgPuDnigkiZvIiycrvp\n/MkSIj5Hc9Ki8xSwflueUPaliQBXZjtbP3RTAkjjbcCADjvTwxZtr8NnI54pm3aQVb5cfMBU\nv2iNWTC/vOg3elAEu7aQ7nk8BhUvDTYK/KBjcKbxJBu+9xk+1SNhRtTaQR1JpFisysxOC23o\ntKuGj3mMhielSQxtuxlRJtyPeot0jMA/ryB2pAMmBOW27k9OlNk+XIB8rn7pqz+7ZmaYkDGM\nf1qt521SNu9ck+9CER/M0YAY7AedwpBJGqlFG5s1FNJIxUsCi5yV9qfsSGX5hw/IpiJNvmP8\nucKMYo8xPtGNxG04FIzBY/KxgE8EU2Vty+WBkr8xHegRM0iSKY+hHFanh2CRb9FLKxyAiAdO\nRWF5i8nBK8EqK6nwUv8AxNIpFJE0jARr1J5HGKUthrc/VL/gm/4aez0S6vJV+8nB7D0x719s\nTZZiK+cf2G9DXS/hSs5OGnfIGMY4r6PkbCn171ws6I7FdcK2OtN2lifWlDdsc07bubJOKRqR\nqm3ryaRSzHHQVKq9Saa67mHYUCHJ936VIvOccVHCflYdqeF54pgOZtuARSBfzpGY9OtGc9Ko\nYZ2HAyadu6Y60bqhYncSDQFiQtv5AqRV2rz3qFXYYAqTJ79qBjXjjYgBMGoJ7FHjPyj3qxIx\n6gGlyWX2oJZj/wBmp6UVp7FooEZ+QeeQKNw2/L0pOduT+VLwFHvWDLEwCtEbZUEDBpV78Uin\nb16VIClSAMHNO4wDzmmr69qcFKqaoAxznHJpGHUN+tO3fKMcGm/Nznk0DFUgOF7YpSDzjimZ\nBYn0qRSSmaADllCnpQq7SSBxSfdXJpUbaMigBOewpeuAeKRic8dDQFJ4PNIQx1KqSpzz0per\ndMUvQ46GkbLcZwfWi9xCH723pT+GX0HqaZk7SWOcUxXLgAnAzQCLCr6jil2fLkcGmO43DHTF\nPjbcvPWhARbSy4BwaeBlQRRx2pF74OBQArenakX5m9qVmAXGOajUNzTESKMZpPukUm7bgHvS\nluenSqAFwp55FKWAU9qRmPoMU3cN3I4pCHKRjpil+9wOlJkMOaEIC+lAEm7ZyBmmsdx9M9qN\n21OT81NbJXPenYYo4zmnKwJ5H0qPa3HpTkbbuB5pCJMEZPFA+bk8YpqsSuTSZBbGSBQAjthv\nal++ue1L7HkUIp24HSgAU8U5ugNNbjtSH7vPSl1GCksvHSjnrTzhVBFN+8M96YgOfShmAUYy\nKB03ZppYGPmgCWOY4PFSbtqkseMVWjbAHXmrEe3b8xz6D8aBM+Yf2g7Fv+Egdj8oI359sV80\n6swZpQRnnGa+v/2j7NJJ7SRRyE2k+vP/ANevknXrfy7iVNvGeapCORv2Ecfy81zd8PMdsndx\nge1dDeqR8uOOtZNxbhlcqvPXmrEYDJuXb3Ws9toVix281rTQncGHyg9ap3NvHIrFSSfTHFUg\nMWcJKzAY24yd3Ws2cuqlSpJXg8Vo8ZfdwAcc1VuIzvDIxbK4571SIMqdysgBBK4pyyDyyVOM\njip5IeOTk4xiqZXy/ldSMVRIZMx2Mc5HIqr5e1nUseBxV1XXacrg1VuM/ejHPYdqQFSRduAw\n5PUioZhtA2t8vQVZbzCpOMtVbc3Vwck+laCEWMhSrZJHSo1YM4T7uasfOw37uRztI7VVMYXJ\nPUnIoAY+fJ2s43ZyKYWP3gcH1p8kUe0A8YGcntTQ42sDynSqGSIxUSdz6VVk3IdhXrzinqru\npCt/wKo7jMe3u54yaBEDMyyeUBhe+KPvNjIb6fypscjKrc4J7kUkj42/wH2oEOkBXcAMDFRZ\n8sgZ3ZHT0p7N1ySxqORdnXq3pTsAq4TfnJNNYrJj5c0qttJBbrSBdq/JySck0AP2s25envTN\nhRircE85pRJubC5PvS48xtoO5hyDSGM+RN2xsqepNQtuVcEjYOc+tPkkU5UpyP4qGjDLk8tj\n7tUITcVHOMMMj1pgz5JLMR2CmjDNgdf6U2QZbOSTnnNIARl24I2/1pM5y2Mnpk9aaZFkk+Vd\nuOM09iFb2/nTQCDCqw+6MZ/GhtyKmRy38VOkUMoPXJpv35iucDHQnjNILiSK2/zFbB6GomUq\n28NgY7U5mdc5GT0xSMhRShH40CFZi7AnkdhSPuHBYGTqfpTdxDKo5wOKTyyrHI3MerZ/SgAY\nfuyCcEnIp27Kqqj5aSTaq5/Kk7DnHtTuMccb8EdOpqP5VkGV99vtTmjkaPk8daY7Hcp2lh3x\n1oAXhpDk7OMj6UnO0N1BPWk84neuMHHFB3OsSgcjmjoPoOaNgEZOOMmmyKuMA/KTkse3tTss\nCzEEgDgHtTV+7gjcSM0hBNlhuzkdsUm0jaFHXrQuOCRk5xS/MzEgYIPAp3AFxluMAcZoVl8s\n7mxnpTDnztv8PJY9qd/EARigQhUMuD2FMVSrAKccZp8n8OAPl603dumAIwe3+FACLvXIxu/i\nBahd20BeOMFjT9vJaTKmmfIMBhle31oBhIu3btXKr+efWmoRIrMFxz+dSR/MxxwPft7UxI/4\nTwSaYBkcMfXFIzkNtPC0LiMY4YZINMZQzBt3GcZ7UDHgBmJOTgcGhlLRgZw3UE08ZZTtBC1G\ncySKwGMevSl1GPGVZT1OOtK0g5KjAPWm8+WzAcZpjSAR5BOD1pgxPM8zIIx/KnElhsPXH3qj\nyWTO/aR04pdwWQFwd+OopCJI/lBUnjHrTGUxxgsCSDxg0BAG3EZ3Gkf7rnndn8hSEPyQq5OR\nnJUU/cJHO5ct29qYMxxgkZ44NRN+8ZWHytnk+tUA7zFDH16YpwRclt2T29KZ96QkcY6A9aTb\nuR1PQjhqCgDKu8fe5wadGr7tgAAApE3LCvGWpPM2rnGD6UCF3NGrAqW+lPkz8o3YyM/Sm/eU\nv+G2l2kKGC5NAhF+U9d3p71EcbssOR/DUqxll2gnPWhsqucD5uCtA7DIlzuY9OoppjAO88E0\n9lVcZOAOMUMVaTYvHFMBrSBu4z3BFOj27DyFPYGm/KsoB64+9R8pYg/Mc9aQhrbo+4x13Yp3\nzlgGyWPO0+lKrLtZzwg7UAnDPnABxigBWJ8whcHA5XNMVSrBsnaeg9/SnbU27ujZpWXbja3y\n5zQAjMVG7HPSmqw8w7224PX19qduG9gOjcc0jL5jFcYPpQBHs+9k4UnORSsBuDLlh0z0p0yF\nlxjZtHamt93APUdRQAi/K248jpjsfan9GbJ6/MQO3tTXjPlgJ0B6+9G0tkt95eTQA9vmjJ2h\nf60wYUKcErjAxQzZXIX6/ShAj2+Ac88UAN3BipwAOhxTWZVGFb5c8j1qdY4dwGQuB09aYjDc\nTgbegoAWNgrM23cPembT0P3jyBQVBDHHJ4FL/ADn51GOaBiLv3AIOO9PZVk47IOtRspCqc4J\nONvvTmjyB8xHIDAUAIvzJtP326GhQFUDOSDg02RCcsgxtPBNLuVGIJJyeaCdR7I0blyRtHak\nVgI+fvscj6Um0DczHLdAo5oVgsm1gCfX0pDGhjIdu3JzUi7lQ7fmGetNOVwM4GetKwdSVXJP\nc9qYDdpZgB1pZNpk+Vdo7+9EmGGBlaMswU9hwB/WkAqqrJjuTxTOWQ4G3acHNDPtxtOM9/ei\nVmZRuPHcetMYFdzYDEDHSkLZRDu5BxtxS/whuh6fhRwuGxn0Ip2GPRXjJBcA9R/hTG5d2A5C\n9ulDKGQvk/4U2NtykLnB5PvSEO5WNdzBVxk89aQIzKSv13H0pdp2k7eO4NC52lsZU8YoARY/\n3W/qKTcu3B+Vv4ff2p21WUlThRxtpu0biOnoT2oEIcbAxTDf3fQ0p3+Y2DkCjkzfO+4Y5b1p\nF+VSoOCWzQAu0Mw2kBuppJJCV3Z2jPao+HOxvlINKFZk+XovO2gCRV6DPQZ+tIxZUxy4b+72\npWXdycgEcUkasNsag+tABtLLtyc+vpRvZlPOCvShZPLZyuc9KTcflTb97hgKBDSpbOOrelKw\nEfygHOKeuFbarYUDg4zR8zKCcOe+KBDGXdGNo+X0obE2AAARyCaeylOS+EI7CmBdqhzwvTFM\nBOFPHzk/zo5yMfMVOOtLCp3PtUrjvSKRn0B60DH8LKWYkpjJqNcdD3OakEaKoycc9KbJ8uNq\n4O771BQrDI3KeF6mmuv3AVyp7ih49zMMYdjQobbnPQ7cGghgwEZG5stj5VFO3HBBIBxnFIBu\n/hyRwTSbRGSOpJ60hgrYUErwetORfMb5BsX0xTJBt+Xd8vf609pVyMZ6feoARZEZTheQcdaR\nVV2PPPrS/K7LtXI9R3pqt1KjYAelAaj92FIySMd6jZd0S4JBz0pWk2cgblNObdGFY4YHgYoG\nIWK9PpnGaCyqcnn/AGl9aVcrGxUYPcGgM0m3CAKOooGJI25QO/XdimtjYHb1wPenmQrJwflP\nTFIytIuduW9KBDJGEKjrt68dqPvKO/cU/oAMZb+7TFmC5DD5vSgBR80ZQdM5NJlFYMCQelIy\n7m5O0kZ4pxZPLIGWb39aepPUdnsW+v0prMy/MOV6LSfKykMMMODQdwX0UdKY2GA2U27l7j3p\nSjLhR8y+nak84yYUr+Pehlby+VPtSYA2I0zg4zwKUgRybs4JH5UjIvyABs96eq+ZKynp1oEN\nADZKOwPYkcCkIaTI3byvXFAyoyOAwpGHkEY6HqaAFbsBxTixVt3BbHSkkxHtK8E+tLIp2iQt\nxnGO9DGCb43JznJ6HtUW4rwykEnFSfOquuNwZsg+ntSqW5HRR60XYDPL2qGIxzj6U77qsAcn\n1po3spI53cc08qAoXGWApANQMuHOWTHNEkWCDjnPFIz5IVshPQetSMzM2NuBjGc1VhMY7BHz\nIMH35xTcgZIG5T3qRkA+Vh0FN4UE5wcYAFIBCZFhUovG7BNLsEmdwxjmnBiqgOeMZpGjDgPn\nCHvTARlVm3A5GOc01SInVjyp4wtDFrfIwOfxFC7pFOF6c5pAIm09QQOtEkhWMKFJb+91p24M\n0YHQ8EGmru81VD7eelIBx3DYUH1yOaaVbdu4b6cUpO5pF5GO9IIz5frgUADbWYAflSvujKsG\nyc0q4kB+XaQM5xSKpYc8KBkt7+lMACH74cO3pSqo5Pcc9elN8xZFzgoO6460fIsYK43MegPN\nAxGy3zCT8u9Oj2qrEgE+/alLCOSIlNuetIzCTHGAzZpAKWVEAwT60/AjQbhuH8qarKLjON6A\nc+1JuCEnnLcfgaBj1x5m3tjgnqab/q2IK/OeMUrJh2Ucsoxn2o/1R2g7gR1HagmwkeW4I2p0\n5p3neXyv3MYy3X8qa+VUgDPtS5KKmxsMBjBHP0oBCSMRIpX5lPU9qdIcLhXw2eR3FDN93au1\nR19KjVsMJBwc9TS1KJNwIZQuSP4vWkGDglsUcI5IO4tUe3auckYPemKw/ft6DA9COaduR22j\nhjyd1NMgZT82W9cUGNd4cHJxQIFwxw2Sc4+gokVAu0ZznHFLGCu3A3nPI9qe3l4Zicc9KCxk\nsbbdqg5yDSMwbOQRyOlPbzFQZGSeR9KDkqSyhSRikQyNmO9kXgg4U05jvcpGduOp96VlKKow\nN+c0qsFUsvDZOT6UwG53MCz429qVWbr/AAk8Gjord+MgetNWUtCMEKvUg9qB3HRSGRlJUgfx\nGnRqCrsSQc/SmnedpPzR4xxSn5wB29CaBgyeYwVOGI+7mjzIxICAQRx7UNtJwvUUskytnHLH\ngcUBdgjDcx6A9OKeAdhK84GTTRu29eAOad/qyCBuBFAvMarY2sD1/hNLx5pJHy+/ehVCbd/z\nMTgCkXBG0nOT0oGLI3ykdG7LikjzCd5GMjApFuAJCDGSRx+NO3FgpBzhuVNO4mNXewZwOSaX\nc24h2wP7vvSsQzMM4wdwHakJLHpgnkCgQh3CPg5HdadH8zAhAiqM08NuboVGMH61Gufujhem\nT3pAINsy+ZjJzTyPmDAn0IpzxKrDb8g7jNMXfyAv0J9KoB0eN3KHp1NN3FY2CjbjnNJHxHtL\nfMvSh2XcCD83f0pBYU58sbUwW55pfvrhVyehLdaY0ixdAWDdKeAFkLMSBtyaQDWUsoO3btOD\n6mnlFjYlgdnZjTVVUYMrF88g1K0DMpZSdxHftTQDXVmDEHG3+VNzuXKklh0zTlXygWY4Zh25\np8cYyG6pj73vTY2RxsykDaGz97Pb3qaFDkYIDHp7imowaTDHP+elOEf7kbjtKnhfamIsbSq7\ncZI5wKsxQvuWTpkcKahQbiuF2gfnVpZAVG4YYHORSYFy2STb1C5PpVyKMnI/iXpmqtuCdpz8\n5Odvt/hV63zuZiNzdAKgC1A3zcjP90e9bOmozgAcetZMO9cZUEZzkVuaHhQ5JG0nIB65pNgb\nVnCqsrkYx2robOU+WR5eRnINZVivmxgsvPQ1qW7SCRIkOT09qzk2NHSWEO2M7evUt6V1uh2r\nNtG0Zz96ub0tTDlSNwYc/Wut0nLDjlgeaxNUetfCazN/4qtYgflaQBvpkdPyr7nt4ylqiD7q\njAA7D0r5H/Z10VbjxZFKynbGvmbcf5719cySFSAOOOcGkyhklM7805mDZ9aZtyM55qBjl3Dg\nn6UNJjIOSaM9OeaY2aBjlbfxU6nbwBk1XT15qSN9pzSYEi/Slz0z1qPPPXinfxbuooQDi3YU\nBuxFJx70nPTOBTEPBGcAUbuRmmp8vJ5NKTmgYrKW70AfLzxSdOc5FO3BlHFADeq46075foaT\nbzkcUhYd/vUtQDIBpxJ28UjMOmOaBhjzRqAvy7hyfek+90pOW4xkUuCenFLqAbe4oo9qOVGT\n0qhB2pBn0p24beBzS7qQxFjNJ/FnPFDNQemO1BTBe5JoUH1opMc0Eijdjml3ZakLEnNCsPTm\nmAbhg5GPelXpgdBSZzgDrTtvzUupQu386B1wR0o3dKUv3IpgJ0J9KPvcdqT+L2pd1SxCK3y9\nOaUevShe+Onej170ADfL0OTTcHmndKO9NgMWPbznmn5Cgk9acMZ6UkinqelJgVgzbsk8VOWz\nTNhp+3B6UIBy4HFN6rS/dbmk5znigBQi9SOaFJ3e1Lu9qSNvmOelUPoKemQaamQ2MZpWYY4G\nKXcc56UB0GtSjOKGztyBmkXLj0oEKF705W+U5FNbpg0qvjt+dADh93NGc0nLDB4pcnB4xUgH\nBbkcUnrxRzil/ioYxpzz70bs0n3jQ3pT6jA5xwMmjOOp5oDFelJuOTxTANwOcZp27oMU1Qep\nNINxbOOKAHfLhuaAAoHamqw5BFLnK80ALupP4c96X73Xr7UD7pxU9QE3DZ05o84I3v6UHhcm\nodv8Rqhk6ktn5sGmtncR1pqLubIqVV20CIljxxUyRjHSj5V9zR75pADcmmlRSc9xSnP4UIsT\n+IYqRmH41Hu68fSgklenNBLHCT5T3NMbLj0pVbZx3oky2MUCE9OakVe/UVCsZbPFWYh8oGPr\nTGTwqShOOccf5/OuM+OnxCi+GXwt1vWnKrLHE0cHzY3O3ArvIVU4218F/wDBQ74rLqd5Y+D7\nKZvLtCtxdbTwW6qMA/zpxJZ8O+KPENxqV9czO5meWQySNnucE1zFxM8jFyMIDtC1oakHj80M\ndpzlh3GayGlYxkhW3bumOoroSMdyK53RsNx3HsoqN1WOEgn95196XY7LgLxuzg9qSZfmbkZx\n071qQClUw5OUB6GmspfHVVz+FMWbzEIYcdcE9aS8BVEwWUddvb8aAK027dINrFOnFUNrHcoi\nIU/xVZmmLMXWQ7QPu9s1WtZJDI0XtuPPagRVuJvK2HtnbgdKrO4WQlUyVPSpI1+8kj5AYkNT\no8yE4Xaf73rVAMQFzvZtvGR/hUkamRFfft3HABqDcAwUjcnc+9WBIpwi8jH5UDIVYybw+6MB\nsEVXkVdhIBGeRj+tWI3iaN12uh5PJ71XkkdkRAw3YwQeOKAM2+iIjaVDsQH5qqbt0YCnaM5z\nU91JulkVmLqP4VHFUf8AXYI+VDx16UCJPKDbjkhlbPPpQ0bbg45zxgdcUi5kUH+H+970pf5V\nXOCfSmAqnycbfXuKnVt20gg4bkkdKr+Xtdvm3P7mp7eMsuwrnPOfSmIsKu7cB0HXHpU/zLHj\nAKtyPaooxjLA7gBjaKkWEBo5N2PbPSkUSP5TSoSMAjGR69qbHI0fAXcmOSe9PWM+ZtRgzHkn\nriq67zk5PynGT3qQDb5jYc8dR9KYcswRRlM5NK0gdmZ328YzUMzbVjdXLdsimITznwCBwTg7\nqfuXLYHmkDqelM2MzoM4UDPNCMMfJwM9KBolKNJtChTjnrTOGuNyjGOtKqqrloxvfuM9KSMh\nZmUjGRnPagnUiWB5lyRtJ6DpXoHwx0OG+1yyMis+JAgVum7t/M1xMciqyBmy6+1e6fs9+H28\nR+OLG2hR0jmdMuvrx/Ws5uyNIxufsZ+zz4d/4Rz4T6PBIu2d0DtuIJ/OvRpGDMRjHrVHw/px\n0nw/p9oesMSr7dKv/eauM6FGxBtKsW609cN1HNO7UnTJxTLQHjmjrzjijhuetCsOnakSC4xx\nxTv4TimLjFOZgvbimhDc5A/Wjaex5prNt7c05W3AetA0PVuzUm30pwx+NDdz+lAxiqSx7U9d\n3ekRm28jmnjPr1qhh14pDytPwQvakA+X1xQBDtNFS0UDsjIYFTzg5pygMOKQndxilY7EPFc/\nUkbGxBPel2dyc01V9D9aXnmgY9Rhac2eh6VHy3U4p24bueaAQ3rjmnlgMGj5fTFKcbcH8KYx\nCoApAPrihnwuMZpVbC80hBuDdulJzzgcUdKVmI6CgTEXgYzmlbhODg0zkNnoaPuk55oYx3GA\nepppGMnvQsgwMCnN0yR1pJagM47fjS4DHPFIG55GFo+7k1Yh689RilQ4bPamo55HH40fNnHa\ngQ/IweMGkyNvHWgtuxQdo+tAw25YHHFKy7mytJz1zgUnPXoKCR3DU0kqvv6UH7vHFHoWODQO\nwK27GaUKrZ7UZHQdaRu4HamIXk/w0HHejccdKPQk0gGufm4NKrYFO+Xdu7UmdxyelA+gnPSn\nqPlz3pBkHHU0FjnH8qBDjilXHcVEpDEMOlPVstQMX1FJEpUde9LkHrxSD73c0wFblevOaXO3\nGRkd6TacdKN3ze1IQMMtkdKUDb9aAQecYxTN2X4oAGY7ajOSaf0bB6U5eT04oFqNjNTx9jio\n1wue9SQ4zQGp5b+0Bp5Hhj7Tt3PGw/Kvj7xFGPMJIxk5zX3P8XtPGqeCroMm8KMj2PrXw54i\nRftDo3LKMmqQjzvUo83BIbAB2msTUZfLkUDjIxmuh1LY0vXjrXO6kD5igDK5x0qiTHnaVm2s\ndq5qjKp3SLglT37VqTqJd2B0PJqlMQAeyirQjKkt06KNoxyKzztjJRiAKmuLgrMdnK+pqK4U\nTkM/A9aoRQaMrG48z5c5qvIyuqgjj1q3IDKrJj5f71UiwyOfl6YqiSvN8q8kluwqqGYuOoGc\n1cm2lg2OemarmF1J3sDnoKYEE5fzM7sKem0ZqGYbd3zM3pgc5q22IYTuGM9MVVml/dgbcKP4\nqYiIMzNl/lIFN5YEu2G7AjHFPkjxjfIORnHeof4c7d7McA56VVhjWXzYWbp2zUMsg2KcYOck\nVI/yxsFzkn7tV5Mqyh1y3TFMTLW4Z4IETDgVWuAnmKhOWHQ0/jdgLxjBX0qPryUPBpCIZcKp\nGMioWb5VfALDsamaNY8s0mN54XvTJFDDKdAvemNEUSsFJxuLH8qf5ZVgfvHoaWNf3SsD1p0z\nKqg5PHWmIrxxnzHDLjNCqwXccqAafIrbtyH5iMj2prAzScncuOVoKGKzx5Yn5Sfuio2Z45JB\n0OOKlWQbtp4UDApnlnzD/E+Mke1IQzbtXc546YzTmZvuoP3nv6VE2ySQtjB67ac25V3EZzTE\nRNnOFJOTUrbtwK8r3NJsZjknLDjNIoCnnJB7dqA1G4XaykYyeooZDIvUkr3p20eWAGAOelOb\nKxs2c47CmBCpZlUhcYNHlloyG+8Tnmp1+aENnaMVC8iyAFuvTikBErHzNrHOOlO+ZSccihma\nPaCv0o3bei/OfSmIb6sflAPFEf3iNu0HrSEuYiGXJJyaUKWG5lI7YpDGBXBwfu5zinLnncmc\nngmnyKucsdg24qLh1jy5UdjQgHyOVB5/ACoPMKiTyjuwcE4qZPmdywIwO560iRbGLfdRu3vT\nERLlvk7dzTg2M7T+NOEe3dhvrTZFZmUt8q+go6FCzEkbhyOMgUn8QBYg0p/vD5V6fjSfcXrl\nj3NADZGCKDjI9qQMS3XGRyKldgqjvUEknzAkfNjtQIXy1VdvUE96kXJODgDPftUYZOARlutO\nbaMyDOaOoWB2Cq+QW7gimL8zAgdBmiRv3O4Z3Z6etOb93ImOe+2gBvnGRiM5PvTWYDOeduCK\nfJgSsoXHfNIv7ngjJIoARGEzE9F7getBkbbtX5Xz1PpTxMNoZB7Gmsp3FiO1ACbkU521GmUU\np053CnD5oA7DAzijy+Qc5Ud6BB5bsxkzk455oVhtAX5weRSM3cU5ivDdT0pgN8xVOFG49cY4\nFLJ90FuQegpdzSFtwyfUCmrlpMFT/SgY1sbuBgUKW6jlf1qRm3YCR/N61Hho33fw45oEOJEb\nEnk46U0feDA5PX6+1BAWMAEnd/KiOPZkg8UDCTc3OM/56U0scABMe9PWJo85bBbn60w/uUyw\nLAnqORQSDbWkORvGKUMD944yOBT4T1QDqOtMRgzfdzgUFCJ0OQSfXND4O3A+agfe6dfyprL8\n+5Rx0qRDo8ZJYcUKTsIwcZzSQ5XcD8wPPNLt3RnkgHtmmAhbaxxnNKr8FgSTmiNAqHccN0pP\nLJG37o9aYxzEOfm5piMMbyNqt/FQAqwMXPfHvT4Su5QEwOu3PFSxDGwVBY4XPWmNDtYlSCrc\nD/GpCFw/mOCCetO2ruJYHO3AamAzyztKk7h0pM7c5BPHQetKCxUbGGB+tJIw2k45bjPcUFEn\nzwsN2NnemhwrF2HydqRsy5bOFA5pMjjHK8Ci4uoq4HOM5GQTRuZZFPRuu49KRs7m9hxxTgw3\nbDkkfpTARt0rl1ztPXFJnc3BAwOcilh6t82CfWmtuZghGPX1oECqFzlvk/rTlwq4I+ZuppAw\nLeXIn3RkCkkPlxqCSHPOPQUAEjBshfpimMohO0j58dKf5h7pzjA/xppYsoY8sOMUuoD1UOyu\n67EAxUUbBZHVl2kHHFPdmbGDtI7UkjcqqnJI/wAmjqMkWM5+XDY9ajx1P3l70TBdqjJ+Xlh6\n0jMZGIC/IRyRxTELzwxwV7e1M3beI2yxPehVXgD5h6Um4K2V+7096AEZizEsM9jinsq8gp8/\nXinNMuzkHg4AodWVgw/E0DGBegxg5p275jxkdPpSTNt24OWpGBSTBH3h1oEOmO48D5FH60h3\n7SCcD1ohGRzk8daViN3zHO7mgBC20KFbr1pVYxsQPnoZVjcHbncMCkU7UORxQA1cKxXt6Ukj\nK3OMevtSO21QWODuz0pzKkkhbcFWgYnzBuBlSMZo2+Wykr7fjTvMK5B2kHgUzcyzbSrEY64p\ngOMZVsZ5/SkbK7tw/CnsRuGCDxyKa0m5sn6Uh6CRs7Jgfd/WnyN/q9j7R0I9aadsfI4B601/\n7wORQIVlOCB6/wCRTm+6V655J9Pakw0kOUXLddtLJmOMEdT1oER4dV+Xn2NCMeCo/GnyRuAg\nPzjOQKbtLBuSMn8qBjZNpkAcZbPanEtkkYJ6cUbumDt/2qQHEh4wQOg7+9ArjVYBsHdUjhlt\ny2cc8HvS71dcA5qLA4LDBHHPSgB4O7Cr82Ry3bNHztl14Cjr60HOwDG09sd6RfvE4O0DtQAq\n7N0YA+YDBz60jTeZGQQAytk474pI2BXrx3yO9M2jBI4zQFiaQs8gfPykevamIu3gRkLzhjTV\n3fKxG0DtUi4Zm5PPWgVhsjEgMc5HWnZKsAVEidaVmaMhicqR0psIyGYHb7elAxpUSEsSAM4A\nNC/eCE7s8DNP4kjJwCFprfdHHPXjtQA2NjtYlPmU4+ajcCh4zu6ilOWO4nBHY0gOxWJPygZy\nKYrAy+WqhG4JGaVv3e7jcP1prFV2IPmbr6UoJWVsggnj6UhixgNh2TK9PxpA67WBBUqfSjG5\nvmyCvajzAz5PA96YArDbnoT0xQn+twyMOOTSycsxx8qnijdIxyW4Ix0oEHyqSvXuAKaylYwE\n+ZQc805WKqfmBIGeOtJ824EDAYdD60FAJAJdx9OnajlWz6/pShNrHb9Dn1o24zv+6OKQhm3Z\njB+XsO9OXdvy5KqR0FI0hMAAHzZ4+lOEg+WTBLDjHY0AI2I1Kowb3pBhosbRu6+9JLhZMY4P\nOPQ0p8mHJySSMEU7CHyfKMAAnt61FG21XGOvUEUOAq78H8KVmPmbjzx0FAIRcFSuMMecsacq\n7sBiVA56cH8aRTubkA470qhhJuPT+7mgRGysVzwD6U/zWbazjeV4wKfJ3Yng8UKmyQSKcfLj\nB70dACNdzEA8deaAo3Ackk5pGXoQvPU4NDY805YqpH5UvMYbQrFdu70FNbeuEcAjtijaApcn\ncRwDRny8O3zE8DFAxcH92zbWX9aT5vnAXfg5z2FJt6LuJwKFDCYYJVccj1p26gKinzAqc5Gf\nY0nzxsVcY55pNoZT82xs8Y7CnNg9eT6tTAN3OSu3sPSkLDcedozSsBIxxlY8Y3H1pI124/jC\njO40hiLGjSMzEk9iOgoP3SGY+m6lVjnd0z1NHy/MzHJFIhg+FUKDn0Y0rNtYEJuOOVpJAqYO\nNxIyKGyGV+efyoKF3hnyoznj/wCtSfd3sOj8D2pVYeYTnce2OlJ0ycbSc9e1MkbtKxrGGDDO\nOfWlZTHIybgdvXbRGGb5W+vSlb95kKMKBzSAYzDZk5Y+lDAKiYXB6Zp+4mHYfm7gDtSsx8tW\n6xrQNA2WyAoJxnimbg6qV4Y9jT+JFL/cJ7Uh+fBxkY9aB2EbcOc5ZeafJMxbI+YsM/KMYpu4\nyLnbjaM4pBIUULtyW5Kn09aCSMuyXAO3IAp+4SYZowvP3hSp8ynBynTcaYu2Mkk5U8UmMduy\nxz8wx8tDMdirt+YUildwAyPekXazFiGYZpjH5YShWxH3+tHnDkbfm6c0nmfMpIyO1SpGzyF2\nwOPu0ANcrGmQMlqNu2Ic7SaaG3RBtpGDzStIUZTt3Z7UmAu45DA8jjmguG5dvmH92m/MzEY4\nHJFOVirDOD0I4paiQR7WRnDc9NtLI4X5ANx7mmbdzHC7ZGPbpR90HB+fPNUUOeROCw287QTR\njbkFcoDndQqj/loM88D0oXcsmOo/vUCF+4yKRlXGaI/lZsAY6UBWU4kGCBxSLjqQRTJHc7Qr\nnAB5oADSY3Z/CkjBbBbr2WnbnXdgD6UhoRflwxORnFN5SUsTvT0pY8r14OelJtxIy5z3oAVs\ns2W5xz707y9yhx8o64oXa3zhdmOPrTWO4AMSDnjFAhV3FuVwH4pMeWFH8J4PehmMnyscbelL\nGSuOeAOtAxTuZdnTbz9aa0i/K20r605VJB2KQvXNIq+cpw5I77hzQMRoyzcHaAM7qmz8gIxy\nMfT3qN0VowMHA5NN2hjlenYe1Ax20Km0swyME4/WkO8lRgFFHUdTTwx+8eF6U1mZuBge9Ah5\nJGJFPGOvpTFYFI2A5xxnv70/5do4IQdD70LMu0grvA6HtQA6R244H0FNTELEkckZ4pu3y4S3\nXnoKe6PsTbjn71O4rDVO6PP3Pwo2nasm7cQcDPWmuw80AZBHpUyFc8D60XYhkpPIb5fRqZMx\nO0EcDGakkXfIN52lf1pPmQnjch429/rSAbJGu5m5L5wtPMg3LznaME+9OkZuMdM8GmnCuSWD\nE8Y96AI/lXP8OTn605GzEWGG5xjHehtwVsjewO3Hp70W6skZ54B5H9aBod5a+aBgBgMkelNV\nnZTgbyTyD2FPCvt5G5Q2GJpQP3xVdynHBpiYKAgAC4HbFJ5g8txg8dTTGb5soN3Yn0p7BmIx\n9ymgBZo9ysnPAGKb5gTJdGbnkCmqp55CqDVjEnlkHAz3obGLaxIzHIxnkKad8y7H/h61D5Lj\nG5trj86mjUluu4elIbJ0lMmB1fqatxllkyYzIMZwOmKprlpdu4jvwOPpV6OSWRcxgbF5K0El\nmCX94p/vf3euPStNcMqgLtUj16Gsu3WSeQNuCH7w46VpKCZCOhH3gD+tIEW7JWaZY3kIkXna\nB1FdDaqVK5Xbg596zdLT98pA+Qcn1+ldGuyRgduAeVBqGI1IJNgUdzW9pEQ4dh8w6CsC3Xbs\nMmCOgxXVabDsQjHzf3jWTKRu6erSKAOrcAV2+h26W8iSMOO/pmuT021kWFTjkc7q62xb5U5+\nXHPoKg0R9Rfs424+13MwB+6MEDj6V9ElxwB2GK8f/Zx0UW/hM3L5Mj9PTbgGvXWyGbiobdyx\n7NtxincZ6VGjAYyeaeOpqRoTcNxGPpTj8q5PNNYjdx0pNwORmgY5enoDS524Bpm04A6CjeQ2\nCKAHjIYE8ipNw5z3qNfmbvigg7s44oAezdhSPnHPNNZulCtu74oAUbuCOtOY985poyOaBjHT\nNADlfC4xTt3HvUe7oaUHcpIGKAJM9jx70xiC2MfjTd3QHpTlk2npxTYDscjvSc9QKQNx6Uu7\nJJ7VIDmztAU0pY+lN77qTcexoAXcAfagHrnpSL6HkUEjd7UxjlYKCRTApY5zinY3LkUD7oHe\ngBdh9aXdheTzQWPQ0o24AxmgBFNABzz3obC9OlJuO6gQ4ZDcUfWkyc8ULkN1zQAHHpQMBeDS\nk4NG3qccUhgG4460Ft446U3jGcYpw6ZApAOb2pFwBz1pEJ6VJ8rHkVQxvPak2tjPel56ClPz\nYPSkIbtzg96Xdk0h9uaUN1OMUgF3Z5oDFm5qMSfNinjkHApgJz/FT8njjimfU0vO2kxCsvzZ\nJo+8emKSjdnjmjqMXB/KkDY+tGNvem7dpyaYDtpK0EYo6Dihc0AJzS88EdaTd81Bbk8UDFb7\n3Io28c9aMlUoxu6cUDDknOadyQaj2nmncn2oJAg44pxYsOKbggdeKBjk0mMVWGMUjLt96QsD\n2peTyRxTGLtOOuKPbqaTd0FLz260wEUYfJpxyM+lM2ndk80jZ/woAVsbct1obLdKViGX2pFX\nOMHFIBf4smhWx170dMAmhhk5oAGAYdeabtGSKX7zcihgNoOKYgjUrkdKfv2nBHFMRvm5ofLN\nigYu4ZHFOYjj1pmdrdM0uTngUhgxNL/DSc789RSsD17UMBAw6Ac0N8vBoXlsgYobluelACBe\ncsaUYb60HpSDnBxzQFiRfl57VPGAwwOKgjjPQnmrtuojJJOMDJoQGV4y8SWvgvwjqWr3Unlx\n28DPnIHOOOvevxj+KXjq+8XeLtQ1y6laZ55X+Zuu3Jx+lfbX/BRL41HT7O28D2b7TOBNcsrc\nkAjC/wCfevgHVrqKPTbmaQfcT5U7mtoxMJM5O61aW4kuu75wW/Co2ZGhOGZUQZJqhYyExShs\nsGOWb39KmS4LMEXKqeo9a6Yogc00j7NrlkPIHrUjTrJdDCFQBgikklRrcFeCpqJcs4JOO4oQ\ngaSKPquTnj2qGWRslHO4t0FTXGWhYquQPmNZ8kjzbdq4ZRndmkIqwoEldGb5FPSq8jf6Q024\npxgVNdN5xA27D1b3qKTG0RlhgnCirAgXY0hZVzkYJzTY4d2VR/LUHO1qJvKhUIp/eqc4FPaN\n7iEHgZ6g9aQB/rI2KgAg5xTVn37mVdv4UK25flXITgAd6c8joq4X5nXBUUwIZ5isW0FSSck1\nWu5AsZk43MNvNWpJFih2LGGLDvWXeBQC8hIwMAUDKlxM6qVOAmMD61U3AbHx93jaP50+TLR4\nY89hTdwVsuny42k+9AhylYwqg55zj1p8LCNCGOW9RUSrENwXnHejYyrlSGLDkd6rYkkhWPBQ\njezHO/OCKuJH5TBQ2CR61Vt2DfdXheOnepoY1aT5m+ccilcZOkpjXIU5YVOpQAOVY84J6gVC\nrNDGrl++ClWG35JUbY8ZC1Ix5kZc7NvTt3FMaXzIVD4QZyD6+1DSGTB3KpA5GKbuVowgXzEY\n8UwGSyJuKum0kZyB1qvtjViSDhhjHankBrhSznkYUUkzjdg8KODVDGpEsYZwDJJ0C9qcrheN\nmG9KZH5mQV5ZDuz7VJNte4yQUJPXNIBscAZgSdobktTFwtrIwOSX2r7+tHktNM8e5gqr8rdi\naW3jMbRxRruxyz+v/wBekQWYLN7q4VFOXUghf731Nfav7Avgd/EfxKsprgL5VsVlLdRkDG01\n8cW8OLy3lhckl+fbt/Wv09/4Jq+C226trc6L5YiAB79MZxXLU8zWB9+Pz8oXG0YIqNsA4oyR\nJnp7U1zuPQ5rE6A3Z607cu3FNX7vrQF3LTL8gHApVxzxzSAHuKXaQ27t7UMRGQR/OjzOKdkd\netCoNuDSARcsvTNLt2855oVtrbRwKeFDZJGQKAFBHpzQ2F5xk07auQVxjFNZtrZNMYq5bHan\n4CnpUZY9elKr7hzTAd/Fz0pPugntTgAwOaZtLdDS1APMHpRRtFFGoGTj0H1NL0Xk570jMOee\nCKdtyoI5GKxYCLnbuxignCkk5ahjlSe1C7fL4HNIAX5l5HNBIbgDpSJ97NCZ3HNMA3cAYpT8\nuM9aUL1JIpCdy4PWmMXIxnGaduBbgVGp24U0vO446UEj+vPSkPLZPSlXG3k0jfdJ6ikMSRcf\njRuG3kUu4MuSKawPOTTARPunBwKePu880ke3bt70m7CkUAKy7SAeRQPvEHp2puSaUKRg5zQA\nBfWjduyoPIpc5oZQvfFABuHGKXcPTmo1YLxipdwz057UwD72MClOe/Woy/ltjP4U4ZcZoEOV\ng3Skox8uRjNM2nINSMfuw27FKuN3NG7cuDxTc/NmqAezfLTM7lIIp4kXnIpu7tigQYCrg0Mw\nU4oYZxk/lUeNzZ7CkBJGD97NE0gXoetLu6AdKa6rupiG7wVwBTwx3cEUxl2tgdKUYVQD1oES\nrtb6U7jbgdKamMHigYVetBSAscUmQODS7tvGOtK2CelAhn3celIuM5IxTl2t1FKygnjigljF\nbLjIo3bWPpTmx+NMCnJORk0DHKp5PanQr8p9ab7d6njxGQeoNAGZ4wsWuvDN4ucr5ZOK+EfG\nGkyW+pXDH7gYj8K/QHUFLabcR9dyHiviT4nQvb6pfRMMEyt+FBPU8P1eNVuHQcHrWJeBVyM4\nOK6XUIk8x3IHmYrm75t2dvB71S3JuYMnyMRjGeaoXXyvzwpFat0Q+WUc4xWTcfvdpbnB+7Wp\nJj3Fu6OSy5U85qBl3x4Pyn1FaV1IWB4wOwrO2tt+bO0mqI1KVw3lR5A3e9VZY8sApwuMmrVw\noSZSBuDjFVmU+aRnDKDxT6gV87ty+lUriV/LyFJYHGMVaZRtwwYnqah37WDKCqnjbVDK5/eb\nTnH95WqtMwa3Zedu7oOtXJVk6SMpU9Ao5qIoIyVDDpRcChzwR8x7GnvITgKm3HJwP1+tTJEs\nSMw+6DQx8xMKcAjr61W5JUVh82V4JyDTZFZlVsYAOaS4PlsoQbx39qaJGaMENuO7igB8kjeY\nflAyM5zUHm+ZGxQHA4J9afN5nmFmOW6ADt71EuV8zPKDj6mgBkhD7CF+ZetMVn+bnnHFTKsi\nqMFTuqKS3kX5xzz0z2pjIlPnW5KgqVNIzHb04PJNLJsXA3EZ/hFJyAVPC4/WmFgX5VEnMnoF\n4pnzAbsgBvSlWQrGuRhehFNdPLycnNMTDyyq4I+bNPMwJ+TIPQ1EFkZR82N3A/xp+2VWBVQV\nUYJ9fehlDJEWSQ4wOOBSNtO0lvbFKWEjBsYbGKYy7M4GTQISRfmyPlQelIo3JjqAc8dxT2AZ\ndu7a/elZQrHy/vY6etAyu7KrEhcg8ikyVTcGwSfu0rYZgGXaMUYBhKnrnijUQ4sY1yxymPu1\nF5mY8FAM8hqe+1lX5SNvrSNjzBjBHXHpQIZ+8H3juGPyp8jKY02th80oChiTyD2prqNoBTj1\nFFhDOI22jnvmnKzNglskcVH8ocxg57k0MTgBR8tAxxXecM3IPWo5DuZcdN3IqSNdq8ruPemt\nFu5BpgLIuXYJ160jsSo5xx9339aR8xoGAyaYx+6QMt2NJgP5JPTpyaFkPRhuHQGmuGZ1wAB3\nxSzIGYEdKoGJtaSTYo2L1zQ37xSf7vFDZ2goSDml4YneQcdx3pARtH0OevqaODgqvPShmLso\nK8Zp0iqGIUYx60DuJyG3bOehpFzJ0XPOKAc4z93HrQv3cBTle9MQpwGyDjtimqpViZBub2py\nBVJdl3Y5xSIWZWZFwzHkHsKBDdwY+x60km1Rx16E0Kdo3kZz2pVby+SM8/dpDDaoTaRlCOTT\nF+VdgOMfyp7SbuXXb+PFMkbbwFB45IpMAj/eH5GyB2NJuUMctxjBpVIjChVw+M+2KI9vIxkt\nQTcTcu5Np46e1LhVbr+FMUbgRjBXoKcqsyq465waZQNhkO3IGaPurxwOhNSleT2qNSMkBsqa\nBAwO1Tu2sKd5Z2jv/s0xVHmljyfSm8srZzjP3qZVgJMa5B3AGnRxt+83A7c8H1oKlFy3fqfW\nnGR1BG0lOvFIQhw2FILKetIF/d7U4Td0ake624C5NJ97gn8qCQZRHISD83p7UMF3Bhkcdu9I\nz/vgw9McjrQd0S9MZNMLjlwy8nav3fxpIxtjCHgg4+tG072bqMcLQ2GXJBJ9qQxrR7eAeho2\nDzBz8vUUkh24wM7uKI9vmApwqnBzSARxujc7u/ansp+RuCNvUUMoyWHXORSGNnYEdMUxgrKW\nbGSQMZxTdqLEoLZx1IPenM29tq/KMc4pPLDKoK4XOfqKBByeuGb6UuSuTne7cYoXD7iuBjpj\nrTMDaGJ289PWmApt4yoZW59RTlxG2VG4hqVWwdnU9TTvm8tsDnOaBkLbuHKcMckUqksxKcqO\n1JMG2qBIfl6kChWNvJxhVPfPNIQm4tH8rEsTyKUtlmwNrHrSHcu5d3P3gO9I26SMOxwO696A\nBVEnKtlByfWpFYKpYj5uy96Z5adMFF67qasiHLYy/QUDHcSF3xhwaNmfnyS7DnP8qA5lWTdw\nGHNM+7Ch7DvTAVnKt83A6Y9qFYlhtXCr0z1pZNodVKknGQ1IQzDb360DDBycg896ULjCtgMB\nw1Jlg4Z2+TuPX3pfLZpHiIyv96gTE3GMbguZD3pse/d8xwG604crs53LQDsYArknnbQIayhf\nlGRRI6JsJ+/UjFZTtBw/vUckZfkLntQAeZyeAecipNytGQEPrz61G2xiAgG4etLlmGcFPY0D\nHQ2+2Mlzy36VHJmORQQT6E04sQTuJz09hQFKry+Tjg0CGrkNuB744p3mKy72X5ulMXyxGGOQ\n6nLHsaXy/wCEjPNA0JKH3cqSMdjQNrYBBC45NPGBkZOQOc0zdkKmc5pCESQyK23lV52kU5l2\nrwA2RmgE887exobEOCh3elMaXUaFRFIY5OOppqMPlGWHrmlYbEOCGOc4okY9WI+YUCAkxqW4\nGTjpTvLPygAEk/jTW5UMeGHAFKW5VwcEnBNMBI2x5m7gKeKUY+8Rj1NAUK7RsQVPSmMvQE7g\nKQx8busp29CPzpzYDYJwMfdpixlX4PvTtyknJycZFAiPv8zN9aFby+Mg45y3pTi53jupHamj\n94pBx1/SkArfeyB2zTJWHmKyNhcc/WnYEeAhLOTx9KG3JNygK454piEkkO0KE2t1OP50pO7J\nZvlHBHvRuRsBeGPX6U7/AFfCnevTJoGxuNqqepByM0kcriRtw2jrSqu5iHYKMcUNtVuPn3Db\nTIAfMuGP3jnHrSvGsanncT09qNvluDH6bTnkClRV83gk8c0ixm0qoDMCPWpFwrDDdRimiMbQ\nGOFz3pPL5/HgUAH3XIC5xxmlb5T98f7uKRwW+QcN1pCx4bbjtn1oEDKY1Ug7lJx9KFVI1OCS\nwPSjjbknHqKfuUsOML2bvQBHIwaQE8MeKQKJF6Hb0Jp7Yf5scqcZo5dOmBngirKGrynPJXgH\nFIu3btzuOc5NNVmbc2eFOKPuq21eKgRLMfnDAbePzpGDFfX6VGi/MvzZIHJpBvVdw9fu/wBa\nAHcjJzmgsAoLcOOadu+U8cdc+9G04O5e2aYhF2sxZvkyPvY70KzySKQAdvajaWjyT9EpQ3mR\noxG1f4lHXNBQFTuKE8tzTRgYGOnWlUeYzYPzDoTQu4KB0weW9KQhYm3bsDK9qNxK5K7cdqRc\nScZw2fvHoaNu1i2cmgLjdu5mfsR1oh3ORtTj+81IZN0YGw5HOakkOI/3e7nk4pksZ/q5CrfM\nP0pWUs+3dzjjFH3VBdcj1/xpEVHjLoCee1MYnC7duRjg5p6qOinB64pzJ82P4mwRSMo3NuG3\nApdRCIUkYsw3J0/Gm7fLZUwWJPX0pzSE4GOfbpTjN5ceF+8Tz7UCG/KLghQVPYnvSStgeYec\n8cU9vmjYfx44IppKts5wAOfrSGhGm+UR7cju1IoxyMkdOelCZjRlzuXOaa2c4xgGgoc37zIV\nuVGc44pJM/u3bgdPelIyqjftfPQ0FeDnkqfvUxCMiiXafm75p3yrwxwem7rSfNwSuc9KbtyQ\nACcnB9qYXHfOzbEAcjsT1HrTI28zock8nsBUm8/M2fujbimCMxqBt6ikCHbR5PXHfFLyzLgD\nGORUYKA/IGOBjmpFB2E5+agLajUdi4J4X7oNLtfzMkbQPehC5woO1R3prbtjMOUz9339aQD2\nbzAMfK2cgYpo+8WcliaI2c4QjHejKnO3OM0CF35b+6BRu2nkZHXijaZGU5K88gikX7xIPU4o\nANo+YZ2559qkbDKqZAJHfvUb4U7y2R6GhW85N7LgZ4B9KACRQFw579BTiokbbnYoGV4/nTU+\n6ynhl5GfSla43Rtx82doGKYDflVs7iVPIK/yqNd0jCRuP9lqlcBVAA285x6UoVWkChSWC5z7\nUDGLlgwwduKMCSROQUUfdoHzZBBG08HPanTMI1B42kY4pDHLtaQ4444B6VE7GT7p2YpSCoGD\nkAZxSwxtKoZlwOoWgAGVk2gZUYOff0qZhuDOWw/dfSo2zFJjO1D19c0zaGOVHAP60AO2suFB\ny3vShsJ1+alkk+bO3BxzUZG5ehXPegQ/cfvd260LhgxBy2cAUi5jbdJ93HHNCHcrqPlY98UC\nF80qxKLnI2miQbdqfxUbQqgqc+w/nQyq6F2B+WmO4CQop3EA9CTR8ynGSuemaYsYPBHXmnrG\nS4TaVKnO0nNIBzZ+VWOQD0NPaQbcEYTPbtTWXcWKjcTxmmRr5bBSQG6c1YhxwGzvwD0alZlT\nH8SsOWo27sYQHHUUkiiNgxXcuMBRUsYrZ2jbyR0pI/mbbn5uuacAd2ejYyT/AEpIyu4Pn5PU\n+vpSGIucnjJ6Yo4CqhGZM8Ur5U5U/P6UhzHGXX5pM9KAHqoZjk47EU7mEfMv0NQshjAlbLbj\nyg61JG5diyg7Om09aBCFmiYk5xniljXdIX2HaeOtMZiWYSc96dHh0kO4qqrkfWgWo5Y/vALt\nGOSxoQbSoHSmP8oUEMT3FHlncQvpQO49ceY6OcoeQfegjcwJXGBz6U2P5eYznHVqVpHmIG7A\n67e1AxNhDByflz90elLxkuuQM9DQVK8EZyegNLhGjYE4xxtoAPmZwVbA7+lP2ttJ3cZzx2pj\nJiFSvJLY2ilk3eYoQ4IOD6UxCsoMa7cbw2S3fFDqRISOlHk/MXLZkPoOBSJ83JBx0LUEsdsd\nm+b8D2pm7GWI384pVjIU7WB3HAyaYFkV8ZyBwQKQyXf5nB6djSNsMyAANkYwP50kbCRiCNij\npmnKAobH5jrQA3dtkkKthc4IpWZ5DyAqAdBRt+cEfJhecDOaMiNF7f0oF1Gs7BcgFk6H607e\nUYAA4x+NODEs4C5GaNzMMkgrTGMReCxypJpTndvXkfd/Gljz5e5jt4yBQsaNGHXPzEEtnjNM\nBFU87hginx7BN94uAuW9qRt/mN82VHNG5XXcBl+3ahALt8xuh/3m61LHtj2hRn1JqPyzKqvn\n2OOlTSEqAuQB0NBRLbpsckAgnkZ6Vbj3qzY4yOR2NV0yFQFshegFXNrCMYbOTz9KRG25PZ/I\nuFyvP3ielacTbZJEI+bdnd7Vn2K7pWwu7jof51oRwiaYfN93+Jf5UhbG/pSoqBieM9DWzb5X\nhhjPNc/Yxor5OScZCmui0+LzXLliMDArNlGlp9utx5pZiCvGK6PS5iZIwcggg8+grF0Uq115\nZXIY5P1rq0gjjmiYD5sgVmy0dTpDeavPyjOa6/w3Zm8lXA6tgA5/wrk9KiMnThQf1r0DwTYt\ndaxZ2wOXZ1xj6ioKPtr4Taf/AGT4K06PGA0ecc59PT2rr2OcnPWs3QYfs+iWcYG3bEBiruea\njqWJx1NSJJnOOaZ90gE8dqcre2KQDs7cdfelyM5xTfUZpw5XOKkoVmPB7UFvmFN3D14o3d6Y\nh+7jIPNLvLL71Ep708NuTcBxQGoq/NxTuFGKarDdRk5oGh+4heM0LnrxSbjjnpTdpXoaBj1I\nEgzyKTrnnjNN3fN0NOHzZ4piFbbtHrSt6UmABRuB6ihjHK3ZhSKfmPpTWb1pyndyOBSCxIee\nM0ztTW/WlXIX2NNAOU0LjBApwYcgU3cuPU0gEZfQ809BzzSZIX1NJu3NmgSHSct7Um7FL2pF\n4PNACjG2l3Y5xTQwo6n2pAPU7vak/nTQdvWl4xkUgF7e9KoP0Hek9xTWl24OM0xi/ef2pzZ7\nHFIvf0NJ+FIOg4H86fu4wOaZ056ikOGxjigQu7bwaXl/akVRmg5D5FPqAi+lKrbsgcjvSbSu\naRVG4kcUxjm4XpRuNJuOMGlzS6jEbGPelkztHNI3IweKF/GkSIWO7pUjYK5pu7jApo+VT3NH\nUZJn5aMFlwaaCdoNNBPToKAJMgn2oYjtR/DxSevFAhccc9aTPY0gznnrSgcZNUUheD3xS+1J\ngbc+vShc9SKAA9DilGFxupM7vpSbSzADpQHQJHC9BxSowdfTFRyIWbGc4pVG3GKkSFycH5ac\nD8uDSZPHNKccUIoO3IwaVeB1o5ySaQdOKoBGY9qXd68ijn0o3DbgjmpATaD2yacXxzTQ2GoJ\n+bOBikAFd/PSnfdwM0n8XHAoYr0zmmAgzu9RS7xjpijp04pGzjmqHYNw9OaUeuKQcketOX3N\nAhq/rSj5qVvvZFIw289zSYwDbeDyKBk89qXb8vSk3flSAPr3o9utI3Jz2oTO4kc07CBlPXPH\ncU7cABxQDuzQ0Z6ikMsQ4ZsmqHjLxVZeB/C+o63qMqw21nEzsc5JOOAPXrWjbqOSfl2jJJr4\nL/b++PU8yp4H0e5/0YgT3UiHqQRgfz+tXFakyPkb4wfES7+IXj3U9dvZGnmupdyKxztTPy/Q\ne1eZ+I9Ra4jCNnZ02/1NTTag8sjljyG5A61k3bPudicqc4WutIwepWjPyFFk47g96c0zLFuU\nZZTiqqkFdrj5qkG6JyuxghHHpVakjzA0kICtz1NSyOPLjyeQecVGsgjZWPTviljfcZAEJbqA\nKBF25eOPbLnEIHO7jNYMrhSSrbUzkc9a0rkW9xbRiQlDnDKTWTOsca5AxhvlFADY0lZzvGVY\n9c/pUM0LI7gjhRnGadM7tIEzkN19Kr7TJsCv905JPpVAJnayu/ynGfpSO25WkH3vSlYCaIsP\nmZzwpprR4Xng8AgVLAnt4yilG+UHmolK+YEG5+cDIxUnmvCxAUsDikkwsgc7lU9qoCAMfMcS\nY3KfXpWZdAsrYcGLdliR1q5cKrD5QTvP3s9KpTzB2KdQODjoaBFOVjt3DBQ9PUVGEHzZO75e\nGPrTgxWPKkb85CHuKaqtJ1ICt+lAxqyFoxuXB74709lIAMY6nvSBn2k5yBxkCmrG8i5HJHIx\nTETW2dzsXLOONuKuW6+Wjg/N81RW/wA6l87N3BBH+eatwoqdFwuOe+aQx+N2cp75obeI/vcH\nqKEdw2+NiuBx60saOOT8wbkj0pAJ5iLs3jdkbcAURMWGz7mDgD+tLu+QbcAsMMKiaUSKh2kd\njTHYWSItnkbV6AVEiiSEtn5e+aVmEUnyggH1pmCJNpbKMece9PoLUC23CK2Nw6ikkkfcwkwR\njrRJ+5VspjacBqbHCpj3yFmduq9aQFm1/ebY9vykZXnipNuISwYBs4wBzmmLgqu3njG3pj/6\n9EsgFwj4LMvt0qW7AbHh23+2XghZS6KRkD+Lp+tftj+xV4NHhX4Q2khQJNcKvzYw20DpX5C/\nCLw7JqnibTrZI/NNxOhbaOxxnH4Zr9zvhjov/CPeA9HsQuzyoQGGO/X+tcNSXM7I6IKx1TY6\n96h3fMQRillOeAetM/ixU6mgDKknuaevyqTUcjFTk1Gsx6nkVQFndlcj8qQN8v8ASm7i3QU7\nbkZ/OgYmR0xRIpZgegp+R6UZ7npQMbGp6mpRjb9aYcSYwaNo69qBC7hTW+bI60N7CkLYxVIY\n5VOfmJxTtoRsUK3rTuDgH1pgAxjmmrjb6Ggt8/PA7UZy2ccUhsdxRTaKBGQdu0EDmhWCx4zU\neT+FCsGUjHNYATYGBzxTQ23jimt8vGKjbIwe1NCJcqc84NOHPzA8VDx1FCNgkZ4pjJ2Aak3K\nv3hxTMHbntSrjo3WkA6RgFGBSK21cmlG2TpwB60m3saQCgbqfJ8qiolV1b2pzMOh5poAyfwp\nGUlTzmlz04pT93jg0/ICJcqw4qRgMcUudyn8qTG38qQCBh3p2B16CjHfjFI2euMimA1pAoyO\nRTWYsQaXaCowMU7b3FIA25Wl5X2NDZpdpb5jTAYR/ER81SRjauSfrQcd6SP5lxTEAAznoKdu\nNK33RkYpjZ3YqBjsZBAwTQMNxjGKRThsdD7UozuJxVEgWGSBSk5pMjaTjmjr0oGg75PSlXC5\nHem5x2zQFIbPWgQvy7sYpcHbjg0m00j5X2PpQAbRup235xuFC00uexyaB2JlHXnim8KORmkX\nJXjuaGBHGaEKw5s4J/KhWCr1pcU3bj3piAH5TkdaF+UZND5Vh6UjfdyKWwuoxmAbOMilUhua\nXduXAH1pew4pajBfl7c1JEwHB5qMt0PSnR43ZpgSgh1OegBz+VfIvxws1/4SbUNo+9ICPyr6\n69R936V80ftEaX9n8QeYqffXezgUEny7rtuiPvU4Ydq5HUP3m7Bwa7fxLHuXAGSx+77Vx01t\n85GcLirEc5Mrx5OeKptsYEMpBPQ1pyw7mdS3HbFZl1llwOMGtSTNmi+UZP1qkzjcOeAe9XXk\n2y9MqBwf6Vm34lZllGMHkqOtBJWM6yNxy47+lU2w0mScuTg1YVHVd4QqScYNVpoX804O4r6U\nxEMkeMMGwagkDEEA89anIDKu7KsvrUc37vIwCD3qwKskjYJAGemTUDKR+8J+XGKsNjaQ/Ge1\nQ5CtheMc1QivuKhx03HjNRyPviITBbvUxQySFmHJ7VGsS8kdPUfyoERyW5jddnPamBkjYZUq\nuO3rU25cBwCeaiaP5WXO7J4/wpgReSsitlixao5AfL3E78HGKI1aHf8ANjilkUKeW29zStqN\njFj+YkA/Smyxufu8r09xU7fKq5b75zSLnBKnIHFDEU2Uq2zjdjjNNZVSIB33Pn7tTTR+Xjn5\nvX1qGQhhuC7pDwB61QDpIx8y9T1FQyeY3ykjgZpxkZinBUrwVp+9Cx3DBPFAEBO6MbTjsaRV\nLYVCxOe9OVfLbpx0pXZkYMG5zjimxjZSY5DuXjGMim+WVf52GCKSSQDcepJ59KGG6MORk+9D\nJuIyquWbBz0pMpyxYgnilZhuXC7uKRpBuUquRmkh3DBwFY5Hr6Uw/vOcZ2ng05ssCCfmzmkf\nPzbW69qoQxnLLjvnrR5IPOQTnvSOrL8oHOM0YQRh+euMUvMAVSIzk4OaRWKsw696fIm3bx15\nFMmy3zg84wQKYCcOxYrhR6UgI2vg4AOMUqsTG21T6n3pvB+cKcN70AMDbsg8GkR8MeOMYpQW\nCkbeT0oWNWXccjB6ClsMXzgq/Nx2pJNiqGznPSnIwfOU4NMk3BVxjGfyp2AGG0BlzmpCC0Jf\nG31FRyPmQLnjuaTaiuWfcUPAwaYg2qCoO71JoZEUnaODzS7lwi9yMgU3BWN2756UDFVGk2Ec\nc4xTpI/LbaSG3UiTBYxhufSlmZPMyOgoEVv3bSqFU4Hb3p6sdsjLlSDjHrS9CCBt4/Ohc7SD\nkbuq0gEVgVUA9eopd23JJ9sCmLCFGBTlUNGWzznpRqOwxV8raG4X1oGQ2epz1peJIypOVzS4\nGwA8elMYLgF1deO1M2hjnsvNK0i+SzKCSDyKbFlVPGNwzSESFkwTnc3Uio1Z2jwq49qFhG4F\njzjtTt26MkDA70xCNGW/djhvvGmNhRtBIGcCnLnzMDjjk5pm8NKcLzQBJ5wWPnkdM0CQdNvy\n1Ev7zcG4IPB9aXcWUAfLzQArSN8uRndxTpCCxPVgOaCoX5geO2aGgbO8nkj7vrQAn8ILcBvW\nlZG2gKxKZodizDIxt/hpqttViW4J+7QAjrtbrz1Apo+8CVPX1pzrhlAbgc07iSQEf/WpAIGC\nkjbx1pqsPIJ+8M9TTuV3Js7Z3Um7pz8uKBjfvY4+X1p6ska7EPJPQ01YiRuZipB6UmC3PvRc\nkeufOwOmOKRC4LHblOjUm8YLbTu9qN3ykLnpyM0FIRY1VyD/ABLkfSiNCseOV7jPelb5ck8j\npTGYhQjt8mc4oYMc6sc7Cqkng03Hq3OeRQ3lhECn7p496VVVWy3c5zR1ATbGuWLbRnFOyjPt\nA3N2Wk8sL8/D5PSkOVkbaM9896YmLt+Ulvpx1pSp3KRztHNN5b5mJLdxTVUPuySGPGKQCTF9\noYDjPQU9gGYNIv601cYCkYxTwvltljv7gUAEi+ZIC3Bx1FNG3zBlsn1p8j5+baArcU2RhHGh\nVN3PX2oAYznvkgnAapGUSYRQN3cgU3ceQgHJ6HtStvhyVIY+3UUAMYmNsOQo6YFI0ZZGAbcv\nXFO3Kiq7fM+e9O+XnAzu/i9KYxm8MoLDtxiow6MGbcUzwBUi7o12Iu4txmmDYsnAyRwR70AE\njKyqqn5ulOkU7QF528HmkUqN2RjucUrcdsxv270CDgx5HDZ60iM7hmPzY4pWJWHbGuFz3pBG\nqAYPDHrSGLktg56dhQrmNsN8hPQUYChuckccUNsk2Dqe47imIUZdsqBketJ8jcN/rPrRJksW\nHQcfWkRVOMDmgWoBSu0EE5OM09hsJIXduGKGbyztLfLnNNhmRZGXHbueKBjdoVcFaRZG3Lhg\n2Oo6GhQPM5LZ680qjzCQFAHc0DGy/OxAATnLc5p6yKshITtkZpki9hjA/iB/Sk7BNwC9R7VI\nDvM+duhyMn2p0y/MpHyqtN+XnCfNjJ96Yv3fmON3zbaoB5jUruPGe9RtEoO5eQvepMmSMMFB\nQcc9qRowyj+EZ4U96AI2ZlZVP3etPDPnaVwM5p+zbkgbsfxe/pUa7mj3E5OaCRVMcpxIcEdC\nBTWHHAzzUm0Ebeo68UhUw4kJycYoGMiU/dVcnqc0pYRsCuM56H+VOVnSYgD5mXlqZtjSNGkB\nMnt60CHKzHK7MEnr6UkkJkhwMKq8sfWpFLEkAEHGfrTedpV/lUj7uaB3ETb2IAzwO9Oab52C\njIxjApisPvHCYP1oI8vr/Ec5FAdQMJMILEE57USD95jrxTvMX+7t7AULt3FS3OOaYEXlqWBz\n05p+9NjpjJb24FNZd3ygc+tOEfByMDGc+tNgRjITajZX1pylmwcbQe9OyeC/I/uikaMbgVP0\nFSLUJmbIyu5MY/GkMbLKMNyBlqckq7WBjO7pmkZSrKcFV6e5oGNeMspdWz3peDINnTNNVdsc\nmTjn5e1LH8rD5gwIzgdKAJZZkZ2yPmUcVH5itt3qcjncKNp5JKhT1XuKj3Ej5fu/0oAkizuD\njLLnnNLuLblXhM9D2pQw81SDgheBTVw5LEbT0J9aAEbmQBfl9Rjg0SyPniMFfanLKWUgcjOA\naa24ZKnB9PWgBrKm3dzj0HrRuEZBBy56+lSMxaEfwc5ph/eE7UJOP1oADGHbafuDmk2jzMRn\ndu4PpQuOOO2D9aXiPPHPbFMBrfM2Q3A459advJUgYGBzTB1yF2hj+tK38QbljxgU7gKrOygh\nccdaGVjCdo2kn5s0iEMoXkFeuaXcWU/N82cj/CpFccAQy7sMSNoH8qaD5cjIThsc0jfdw49y\n1L5a+aSGySOtMYhztyp3DvSrJ5eOT5nUf4UMwVlUNjmkbLH5zlgevpTExyyBVLOCe+QOntSN\nKCuQuMnqKAvylRnGfvURthtrqCOgapKAfLkbs5HFIoG3BHI7mgKFJLknHTinmQOBiPHrzQQx\ni7XT5l+Y/rSxqYUBPc42elJyyqTx3FG1my+Mt1xQArzFI8Z6jrQ23cuBvGOtNVFZUI4Poafw\nH2Z256mmMjXgE4yCaTafLYHhc59zTpFKBVVssDT1/etgsNw9aAGBV4fdk9ge1Iu/kFgTnOKA\nA3QZOcfWl2bTuIwc4p9RAp+Un72TigyFVBHygj86aq7mI6BTmnSRmRVI4UH8qYBtJXp1pvG4\nAsQ4FOYZy3zZx+fvTlUlQ4YLgYO6oGN+9MwDEbexpOGzIp9iPWl+Y7iW+Y9SKRdu3IGB0Jqr\njFRSq7i2UHJpGkwp4LhuhobhsIdoxyT0pV+8wPG3+GpAZ86xg4wRTlZ5CMjCUhw3yhmGeadu\nKsM5KA9B3oJsJk5Jf50FBfbgkZ9PWnbldZMIQeuKY0ZkIJONvSgY9yyqB5eBnINBkZ1Bl654\nx0pkOYmGSSe3NO3EZz8zZyfSmMaJF+Y4yW4Ip2CqnJ4ByFpd2z5yOGHA9KQryCR+JpBYaoZs\nBQeueae3+uKltyMO3akZnbAA56fhSBtrFBg+g/rQIHUSAljgA8n+lHl7cgDbnkBumKVtoIAf\ndx2HFI8nlsq54x1xn8KChIw3l8HgnGTShWjkI3ZA75pRgkgLjcOVz0pFUEFAMbfWgA2jHIO/\nqKXAA3AHk9Kc2QQ2duBjHemL+75zluvFMlCn7wIGV6Ed6T+IAtkZ6dqVjtHyg5NJj5QXwyAH\n86B2FX93IQhy/cNSCb94XA3HoTmjcdqll57r6U5Yw2CFzQJoYzrkbGyD1OOaVs5yG+QdRTlj\nSOQnaQ393NNYrHhyfvHGR2oYkSLndvHI9KVM5LLytEfDFsbmFOj3bGCgEdaRQSDlWXGD0AOM\nU5oxKxDHacZ3YpOdoA5J/Sk2hZPkfk8Z7UwI42Z/MU5yBjHr70RqyhV7epp2GX5MADr8venD\nKMC/zFjjntSFYauCdxbPcYomUsfkOc9OOnvTsos0hKny2+72pkjblDJ16EUDFMaj5l5XoR79\n6FXe2MbMCmK25iMFSOnvUjRssinjPrTEG0x/N95j3puG6qPm7AU1VeTOWwpOCO+aAWXIB+7w\naQLcfIPuKTls5PpUnHzYH/Aaa03lqoHJpzKGAdTz7UDETLSfMcf7R6U3+PJP3TkmiRCxUZyu\naQL5jcggg4I7UCsOdgkjE8nvijbmEt0Y8Y9B601n8tc43r0pRnaSR94dc0AIUVcAMT/WnQlG\nYDZhh60MxRVUj8aHI6hTn1poY5WLEqTgUBtozj5F649aGx5Z4w3Y0cs2cbDjp2pEjFcs2d2O\nw7U9Q5Tr0PI9aTKsyufmX+tEaljz8xznIoAE+Y84GTRIrfd5AzkNSqw2ElctzSSM5UO33em2\ngVwblgMBhQqndgc/SnRkFCrLjAzkdaS3aQBgHwOuMUALDGTkiQ5JwVpz/NI6MokAGNwGKYY+\nvOWIzt96cjS7gcZdhgqKQB5bKvyLkeoNJCflKgAAnnPenHO0qp4701cqeQCVpgLgyDaowy9z\n6U1ZPLOQPkp3Kr5m7OTgr6UOA0mOq9gOhpjGMT5Z3jDE53CnR5ZTtG6m4XIAcqQejVOMwqdp\nDFvzpi6i/wCs27icD8OakWYyADC4zycVHtO0FjuUHkD+dSblaI5Uqc5+opDLcLBmXcNrZwOO\n1WfOjjmwgyM45FVYtskZx94jg9xUsCiYgEnKd8UhGiqhflU/P1rVtWQlf4jjBUCsq0ZWJaQY\nOdorXt4RuDL94cigll2zkDXWNvOMDmt+xaQIVIwSc5FY1nbtM7Ow2puxjvXS6agVed2OwIqH\nsaI0tMk8mRSMk/1rsdPUyuAy4fqK5zRVW4Zg6gKp4Ndtp8EW4OOobk1iUjY0vdGoBGD6V7F8\nHdIN34r0t13Hy5lJ4ya8u0+2+0ZPTuDX0D+zLp7XXigsykxxIXZvw6VBoz6rhAW3RM5wOMU4\nccGl2iNQByMYHFMbHODzUDHYG0E8mlY7fmPSo+h9qXYzNycAUgJFb1oVvlNN6qRjn1pegHIo\nGhwHFAP5U1X+anKfloBsVcNx0p4yDjtUa/d44NOLYwM0DHL9OaXcFUZ61GxYNxS9jmgYqsef\nSlJpi5OOaVm2n1oESfw8Gjdt4zTVY+nNKvp1poGOOOxzQzY+lNUDk0Kvy0MYr/N9KVMj6U1g\nduelJv8Al4NFhadSQc4Bp3JGO1VlkO7mpY3JJ+bigCQYzx1pAwycCkHpSj73oKQD2JK+9Ivy\ntuzx3pgbcx5pC20etAEm4ZyDmjzBz61Hj0FOwGoARWx9ak48vFNxwT3pxYbcAUAIvzfhTs9a\nbu+Xim4+apYD+nTmkOdtKcryKMnr1qgBf0p/HSkX7vSlxxxSGNUHo3SnHH0pOooyO4piE9Kc\nV+XOaTgdRR6cUvMYBuAMUhyvPanfjSbcdTkUxoT7vB5pcbulDMMcCkB9eKkBerDI5pd3PApF\nPc80K2eSKBWBQd2cYo4LdKB3OaUNxjFAAvyikwW70nY0q5Y5H60AH3V65pfM96aF3ZxTcYO4\ndKBkh7GkVjjpQCd2e1A+ZSaYh4+ahvl6nNRr8vOcGpPvcnrTGH8NH04pA/zdKGkHTFSA7btb\nJoGAKYZOlOb5ulIA24b1FG4FsCmgndincLyBzTGHO4+lJxShfm5OOKOB071QB3ODSZHQ0J8t\nLnqaAD+HijnbSJksc8ClX3PFJgJ0HtRwMHFAz14xSthuBSACM8k0xs8HFPXO0qeho247ZqgE\nHXNK3PQUD34pyrhs5wKAEGBSUY2sfSjeG7cUmAvODk0m4L1Gc0jY7dKGXavHSkMbxznpScq3\nHSlUccc09GHIIqhBGvORyKmVCfU45psMeBnNW4l6ZXOT05waAOS+K3jqy+GPgLVfEF86IttE\nwRWbbuc52jP1xx35r8XPiN46u/GniG91F3O68laRmbsCc4H0zX1p/wAFBvjtb+MfEC+ENLnZ\nrHSH/wBJ2n5ZZfTHsCK+HJmdmba2054WuiCREnoZ1+4WMxQsRzyx70t1dbbWMIVJHHvUF1IY\nd+8fvM9RVa4MUYiZss561sZMVmabADbDjPSnbnaMYkLEc81FwyBsZyMDBpFcLjjaBw1PoQW3\nYCOPcQSx/KoVmRbj5MiTH3u1KXEbbSuS33c1Wk8xVSJFG4tgtQA6SSW8nCSEYz1pt1HGrAKS\n+ODUb3Hll0Ay44JxUMbbldskcYxQBDNIbddwBKtwPWmJMrRqB99fanzMMop7Dp3qJmZlY+hw\nPpTAc5hhkK7j8y8AdjSsGVFVBn1NRBo1XEiHno3pUttGNplGQvc0ASRzHAVXG/vuFRXl0PLw\nclhRH+98zG1WwSD3qs/yW4kkDF842gUxIp3MhSIeYcDGOtZ7YjXb1JGQc1duJvlYSJuz0z2q\nhKyySGNsbFGd1IOhCynavzh+eWqTITO44PY0xI1Rz/FuHFPWNShVvldecmmCG28Lsrsj4Oe/\nWpQr7dxPzf3V/nTY1LIzHkse1SR7oFycEHjHWkBLteaP5VwF52+vvWhH5nkLkjb1Of8AGqcM\nhZgqj5du3FSrAVYKWYlei9qBkqM3mOpX5COAKVlEQAcMuR+VSxh5GTzGCntt9qi86SYkuNyj\n0oAj+XvkMz0SXDJ8qctnbnFSO0YUsW6/dqK4uFkKbQAwGG4oGRSbz8jEFBzwKfxEyoPlOM+t\nRSO6oHhOzJw2aWGN1bn+LofWmJksjeY4bGTjBqFlcQ7Yxzng+lOfcr8Agnjb3HvQv7yAlHIC\nnGMdaBEgYrIgGMdTTvMkZlwuMtxx7io2jEkaAcMeAe9XrHT0vpokeTYwOGOeTWcloM+tP2Cv\nA48V/FOxRojNFbuJCfb3/Kv2Et9sMPkAYRRhfpXwh/wTM+Hi6foupeI7iH94Ylt4W2gHaBwe\nB16191rJvcn864Nmdcdhzyqq81Ek3PFDYfIxxQsZVeFp3LHPll96ZsbPtTi23AIzTlAfrxTA\ndgquOlPX5e9Cr8vPIpy4/GgBOoIPHpS7flHcd6MdzRQIY33j2qQN1FJuDGhl2tkc5oATtRuF\nHPSnBQoHGTTAUdKU/Kc4zQ2cZ7UhbHXrQMXG4ZPWjkdRSLy2elG4rk0hsTA9DRR5vtRQTYx9\nvTHNImVz60rZXaCfelJCqcDrWQw+vJoZcx8imkYUdc5oO8Nk/doAbt+bINLtGckUpUDjoTTu\nQcZzVXASLO7BPFSDDZpcDOQKaVxwOBUgHtQyncMml27enNNXk7j1FUTcc25uRTGUde9Se5/C\nkHIyTUjEA28GnbhTcjqSaVcbckVQxu7gjGBTlYAHvmkkkG0DFNVvSkA7bnntS7vl56CmeYM8\n8Uv8OR0pAKvtTd25c04Y28cGk5Dc1QhRnAJOaXd3zxSFlxxQvK9aVwHMcjNIG9OtIvzNwaRy\nFPoaQDy3PJ5qN2PWmtls0qRnZjrQAsbc5NTs3y8VErbVwRSht30qkAvzdPWnN1xkChc9etMZ\nOSc0uobCLnNSdh9aaowvXNOXtQAK+GNJu3MSRkUMRu4oVsKcCgkXflvaj5dpwMU37oyeQacq\nkKaCkCttWnKRIOetNIPHHFIGw3tTJFLnbiljPqaafmzik3dBikBI33s54pJGAHFIx7U1srjv\nQIXdt6U7cNvPWmyHAyKTO7GBTAXORjFKmF4HWkUH14NOVevrTYEkeQc5614v+0hpbbbScj5W\nXb+Ne1RoABnrXmn7QFm9x4ahlCmRUOeB0qRHxV4lVvtDKvDLxXA6pblhw+PWvR/E9u3mOxGD\n1rz6+BDEke1aIk5yZAoOOAe9UblTsyD9a1L6MqpK8ZNZk8ZKnccD/ZrUOhkTbRcINuFU5xUM\n212JUA81Ymj2yFixbjHSoWOyMfKFoMygyfaJDGVw2cg1TmtTHNKAeVNXpcLlWPJqrIygPtPJ\n9aYFGZSuOQwPeq13Er4KnDDrUrbTwOx4pDsLF8Z4waoCh5e078biP4ajkLIvEYO7r7VakkCs\nxXrjsKqTNuUEkj2qiSNT82WbBAxTd21SqDk84xxRx87Dn604rJHGshIweOnNAFdifl2KQO/1\nqNRJGzkY2981P800DRg/MGyDULMM5U7vVRVInYqspkUqg+dv4jS4KsSRzjBqSZV8wMkm49SB\nTEw0ZZht3HijUdxke4Lsx7n2FNjDLIynhev4VLJ/sth8cntVdtr5LHcRzuXpSAW4YMoUevFQ\nvlZNvfHUVIzBZF9SMgVGyNISSctnt2pjsJ83J29RxUMiu69MY71IzMrLsOQDg01pGbdu4A79\nqYBJlkQjkk4xTfuOyFuO9OBEahmbn27U3awU8ck9e9MCNYwzFCdq+rU5YQWKE7jjtQ2cnzOQ\nPSnlQihkOQ3egCHL+WTnBHApW+VemDjOKVsZCHmm+aBGwyOTgZpCIycNuJ+8KFZWUgH5hyKS\nSPbIq46c59aFHzblGM8UAIzmRlIbJ9RSNkMVI4xn8akYKwwo27TxTJsZB3ZJ64qhgFLKCXGc\nfdqExyKpIIB9Papdo2sx6dhUaqWQtnafSiwyVmfyAqYwD1qOSMLGFQ53HlabwzLsJC9805sL\nyG59akQi7yxOBgccmk5ySjDPoKm+QqMc1GsYXcWGFxQIZuIlX++evpTvmUuHXikjwu0YwOzU\nK5RmU8k55PSqQhknlsgJO00xmMbLj7h709o0k2seMdKbtLFD23Yo1GPLERgtgjoDTWy0m3OB\njilSPdOytSSDcSOjL6UyhseVZjsB96fLub5WXhucr2oePaTg5XP41HJmL7vXP1oELI2VjUH8\nTTVdvMJPPYUsrI0wynAHWm7Tt6ck9fSgQPGFOS+DSKywyEspxt5Gafn5cEZP9+m+XuXjnBya\nQxzLiMYGN3IqJk2gdeOaeqeZGd4KntzTJPlUZOB3PWjUBwcRqNo+Y849qDypbdyegpRJtdQ8\neU6n+lJ5mJGdlHt7e1AhF3MAQM4ob5GOBy3VaSMho9+c88LUjSfL8o5oAiXEcmJMEkYAJpfL\nKsGLAY6ikbKgbhuJPUinqq+ZlBt/3qNQIyflPQPnIXrxTcdPXuBTtwjYjAB7Z7UvneXyOT60\nwECgggcgdM+tLzuwSW9DSRqzHYBu/izSMSsigD5RwaAFJ2o24Z5pWwmAwzxnFN8wtMRj5cda\nN+5N5GO3PWkAnmDzcIOo59KAAHI3c9wKfvX5Sq9Kar7Q23lj3oAarMPmUleaXcXZhwaDuAAI\n3fShm6lV9s0CHMrBiN2cjFRo4GSSSF44FKjoGO7PIwDSqEEeFOWoGKfkfPqPu0ZypB5Hcihs\nPgudrAYpGYw4CfxCkHURmZc55HUfShlPmDoN/f0pf4cdORkmiRAeM7hu55/Kn1KGNlT0yVpW\nYOoyOvJp0jHzF2ryvPsaYcSNuHDZzt/pQIcw3MAnAxRCw8wljnA5WjeZGO47B6elIFTzMrzn\ng0wuCSMwfbgc8Zpqt5as8nU9xTl+8wPrTfmzkjK+lAgYKzDHOaeGWTBU445zxTdzFmGzcuMi\nmFmkjXeQGA6+vtQAOxI4XJ6j0NBcyLk/KQMbRTmG9fmBAxn0pi4ba23BHNLqMdKDwxIAC8ml\nVhkEHBIzTDJjJAypOSp/lTm+7jIyeRx09qAFzn5iM5OKarAEhRgDjNL1UENk0MflIAwKYhOV\n6nD9veiRQqfLgt1aj7yqWJ4pnl+XvydqtzmkAMfl2KMURyMFI+8fQdqXaZI1OcAU7n7wXj0F\nAxr5VuBzilbEiqOhpjxt5wwdqYyeaccL8xz6AUx2AgKrMx3BTjAoKs0XybVYc+/0pqkssin5\nR97OKdgt1G1+gP8AWgQ75WjAwc9T7UxWaOPJKsCeMDtQGWRQFY9eTjqaEyN7nlunNIBY1bzN\nw+7Tizs3lbVA6ncKRvu4jbg9hUe5klDMSxxzTAkbkB2OFBwMUkbRnOThugHp70skZZjsOIx2\nPrRvLLllCnp0pAMkzHGB95egVR+tDL5m0BcE8H1p/wAwVQoxz96lRn3MzDdjjb3o1ENVW3PH\njJx1pFJWMZXoeaIpSzHB2gck06OZWYOOe2DTAa1v5gbaSqe3Q004cAHqowD6Ukn7tm2uSGP3\nafvZY9pHPrQMYeoCn5cZOKc8nlx8fNnnimr8ygBgvPzN7U6QqvQbvQD+dBAnylflBQ98/wAq\nbtTaAMnn5hT1BaQswwoHT3pBJngcMDknHWgoYzeWyjcS/UemKRmOc4z6VLM2ceoGOKjXGRuH\nykYxQAvmFlG489xTY5GT76ZPalLKqDdyB0peHyCMd89qAQr4+VtvyHgqKQxkHaG3v/d9qGyz\nLt4TvTRtWYyb9sfQ0DY6VhIpVhhgOi0kSg9RhsdKJNqbWTk5z9adMcYaMYyQT60wYjrIcsq7\nVxg0MwYBM7cdaV/nkK7yFIzik3xyNz1xtoJGLxlznHrT9xXOFwexpGO2PCnGDyKcNp+bkntQ\nDGBi6ZY47Y75p67nUfOQ/vTVY5JcZXpgevrS87ueR7UhoayMGDlfl6E9RRt3YUKfqBxSq3Ug\n8AdDTIyGVnJbkdM0DFP3g2Mp04NODHGNox1pke0R7ex5yO1A+Rdy8ketAg2rwy8gnGaftDNg\nLj3pu11X0LDkULvjZSxznqBQARnAboOeWpCpLYJwex9qXaY5PlUnPIJ/lQ25WwoyDwy+lADY\n1Vv4s47GmeY24pnAPpUzRsg+YAc8UwxsrFicA9P8KAGxqAcLkr3p2Fb5uh9TT4n25AGGI6Gg\ns8ZydpGMYYUANLHaBncCencUShJJNoGDwKk+YsrDAJ4qPhgDuzKG5FO4C7SFKkZQdRUcahsg\nrheuKl3fu3HO8mhkJwcNnbjPakLqRq25cjlRwFPahNjHIG0+/egxKyAZ59RSjDHPVOntQMRi\nGHC42nqxo3GTCA7GY5BPekOzaCuTzhuOKdNJtwrjPptoAeYz0ZseoWmbjtUAA5FL91lOcZ6g\n0kiNHna27jKigQ3cM7Twe9OUbmPOWQc0nmCaIOIuQcFvekbiPLcHPT1oACAFADZXHHvT48LG\nWDfOP4aN7FwzDK44GOlNCnnPyqetAWFYlhHk43H8qVvuOoIZgcbSOaaYUVV+bIXmm8ecGJJ3\nUxj1AdgWOD9aRlyCANpzn60mRCpMisQDxinNIGlGF3bhlfYU2SJleCFx9KXcvIJzn9KQsxG6\nMfKDyaRsbj8ucjOakBVHltjIbAyaCrMR2B54oRlfJPpihWMcbHHzA8LTAVcRyfM+G6c05lXz\nMA5WmsyZUkH5hlTSD5oumWJwB0oGN4j+fOXPSm7mKjK43dSakGJisZG0xd/Wj5ldQ3zr2HpT\n2Bsbs24RJAzE4OaXlCwI3HPODTWCjAI+ZifwoWMrkbsr2HrUjAyD5hg5xxmjhFBPDEYFDOYx\n8o+9xj0pApbBYH5f71ADsHzAUBBxhjTW/eTYX7mafIzZJLBVYYwtIq7l8tThhzmmArEbmO3P\np7UFV6kbAwwRTWQLuO/PGfxo2mSNu754z6UAGfLVjn5VGNtJHvaMSEZGOQakDcr2yOc0NtYE\nbiKQhqsWUkckd+1Iudo5BXvSqvy8Z2+lJyRlY+M0AhVA6xrkHrntTlIwyn7vTPpTVVWbb90H\nv2pAqqx5PXkDpQMVY/3eAwQA8e/vSHCsV5b0amf6zd/Ee3oBUm1o8AHOaBDEB2s0pw2Pxp+4\nr91T0pXkDKQWDH+9TchuOVLCgBWUhSxYZzge1Cxvuwi9Rk0RqqMwyGU8n60kbFQw3ErjlqBC\nsJM7YunUiljYDhjg96bGgVAVbav1pG2jbuPO7n6UASSYkkHBz/eFOkVVUMFyp4A96GJjkbZ1\nHGfWmspYAEFT3NAMUSbSWVdxxtOKYybckE4I7U9cLjacDHPvTlU8krtPUL6CgY7G3DD723g1\nDJu8vCnlj81WDEWkEik5FJIo5OMknnFAiNl2xhkPzZwFJo8xmkAkIHb1p3k+WzDPLDgUyO3d\nlIKjd70wFkYseQTg9B2pGb5wVOB6Gnq3lrtJw9JzJgEc5pAN/wBW28fnQI96qAxJLZxTmClS\noUuM9jzRt2uCh4HagLArbid2AA2D+FO7Hoqk/ephVWbdjj+dDI75YEFMdKYCNH+8CoQ5Pf1p\nd6qxGG3Dgiljj8lUIzu705WBZjnlecetOwDGY7js4BHJNGWWLywOM8mnbm78Kx5FCsrbjuwM\n4AqHoAixjkbScdKcyqMkBunO6j55FGDs296X5upfdng+1MpAflUbvmXHGe1KsjYI+9kflTV2\nt/rARjjHr70u9d/Jwg4NAXGljuDAEEDFSM4ZURRlhyaZHiRi4OI+m5qIVcsRkYz1oJDd83yp\nhc04KYVIBAJ5pFzIzRt8ijvSrHs3sDnnHvQAqsZigdQvqPWkZBtZTgkH5VowCwG3ZgZpdy8E\nj5qAGvvaQfL+Ap4IXJBw3YGnxGRYXbblP1FRvllJPK9uOaABi0mWC/ie1Ejbl+Vtq54am5fh\nMjFOZgqgA8LwKAsP3Jt+Y5J7jvTW2Rr6N/eoZi/IqNpFYbFU59+9MB7KODhSW4HvQoaMY6np\niliZfO3hckDBHpSbUVzKCZCTgUCDb1H8XvQshZQEXL9z7UkaswYHB5+9T4cqrchGzgY9KLgS\nBmHzhfkpGmyowDu9+1JGoYMi5Yj+EGnRqpjdQOc/L/WgZYgU7g2cHrir26NWIUkbhk1QtVPm\nAnqPWtJW+X7u7PWhgXLVjIqs3Dr09hWrZ4lj3MSOeGrIht9yD5sZrZ01tsPkn5488PjvUso3\nLFTHhj8wzkit6GMSqskZLKT0z0rDs2MoyASVGDWzZkW7IgbIPGPrWIHRaGWdXzgDdmuv0qTM\npj6jO6uR0mMeb/dHT6112jFlk5Xg8A96kaOxtHDKoQ7e4r6t/ZT0gppt1fyKfnOFb25r5S0y\nzaSRNw+TIya+3vgFp6WPgeFo1ZRI2ag0PSz8q5JzxTRjOQKcwz9KZ0qRi5HbrS7u55NN70vG\n4dqkB249uKX5cc9aRDtBJpy4ZeRQNDeOooyAoprZXO0ZpU6YPWgGPVuc0/jb70xflPrSls0D\nHbc9TxS9Bg0zijfgc0CuPbDKMDBo2jbgmmxnK9eaByTmmMeD70u7GcCmbgtKH/KkIXnbkcet\nO3fKMcUzdjJxxS5546UDFDFlprdCOlC4zmlwN2e1ADNvHvTlU9OgpsjbcULnaO9AEysPTFNa\nQ7unHrQsnABFIrnJHagB3G3+VMVTjrT+OKOG5zigBwalHA7CmqRmhiPQ0DJlH4ik3cZFRb9v\nenA+g4oAcGG3mkOfMz1pC3YrxQD1zSYDt/7zkUM+0jHc0i8c5pMbsGmA/PXNHmYwKbuw2TTg\nwZuRmgB3WnbgMCmA/lT8bsZ4FIQjHPajdggHkUKuDk0DBU+tIYrdRjpSbuwFIG28Gkb5s44p\nlDtw24IpR938KbuHl4PWhZAccUgFDHAGKCfm6cUm7qe1OwTyOncUCQjY257etKSBjHNNYhVA\n6j0oHy5x09KLCQuaduwvvTd3qKaynrTEOXoKOrYxhTQvI60jZA65pMsdjrmmj5hkcGjdxzS5\nBTjrTRIZGcYyaU5pMfxE0bs5JNDGHfikkztOOKcrDacGkVvm55qQEH3R6UvTNGeoAxQFPSgY\nIzL1FOGS2DwaQKeSTxTQwGaYDm9epprYOKU9h2pW47cUAL70Y7rTW4IB4p7A8Y4FMA+914pG\n5wO1OON3NIRSAX8MUzd1IFPV+uaQAjJ7UACj5eetHJbuKd0UGkz0zzVAJn5vUUi5PWhchefW\nnZG0nrUgNY0H7uKVfve1AHzE4pgIaNx29OKMYNLuIU5oAYuQPlqSNefm5NKucdMVNEnPTmgQ\n+FN5wBt9+1ePftYfHS3+CXw5klhmVda1HdBZp1IHRn+gyK9h1DUrXw/pdzqGoSrBZ28ZklkY\n4CqOc1+PP7WXx6u/jV441G7SWRNLgYwWMIJwoBOW59f6VqoiueN+INeu9Y1W5uryTzpZZC8k\nmeXbrk1gXV4ZF3A4zwE7moppy0IG5jJ1IJ681VkBbDZ59a3irGTfMJNJu2nk4/h96jkn+XJT\nJPY0SSGRgFwozxTcFncMNzj7taEsYq+Wi4OdvJHeiaVZ5MN2649aZHgqxGcHnP8ASoGyTsQY\nXqTnrQSTbppJCS4OBwDUzM4Cv91BgbTVGSdWVlz8wrShtzc2ySrIBtxlaAIdQWO2miO755Pv\nL6DtWZNcLDHKRk84DLVq9c+crZyCcBaqyYZtpTy1PNADpGXyU8yT94OM4wRUdwQku7Py4/Ol\nIEhJ+9g96TaGyGTDD1pgJHvmjVWH3TkGpWnMisoTYc8NnioizMxReFxnNOKb4UxyCce9AMjZ\nGbdk/vAM8VB5gjVgwZty+uastIEXduG7O3b3NZszfNLGd2eoI70EIpSbpI8A7mJ/KoFyluAB\nuOeRT2kSBlUA5POR60jYa3VgwZs9O9MtgFWVlXPzdSo44oym05Py+p/lQuHjyByDwvekZWuG\nJ24wOQe9AkOj2bsFiB1A7VNDGGzvfZu5x6VFHG24bWAwMkn09KtwSLNuXy8JnketIYi/N97g\ndARViKIt8rMdjdCKZh5FZVQKg+6CakjXcQCCcDt0FPoAbWhiYLJuYHAoSQr8rNjcuBj1pI2U\nsARgA4LZp8sKLlcZzzupDI4QqsFK7+x+tNUK0jHOcZFTwsm4AAkY+96VFkLHuAyWPK/1oEQK\nrQxngE9fmp8hdViAfB38mo9rSfM3zJjtToJFkZwi7iBkCmMS4Zri6EwOMnaanCxquI22rnmo\n5Yy0S7X2nPNMj+VgD879RQInjZljztJC5+aum8EaLJqmsWKxIJpHkyVJ24GRzn/Oa5WG4Lbi\nylDnBx0r3/8AZL8Bz+Mvipo9uqiS1WZS4YZ9D0rnq6Fo/Wn9l3wHF8P/AIN6Tahds86CV+Md\neleqx5Z8gfLio7eBdPsYLVAqiGMIAvtxU8H3frXIdI1t0fI5p6yE9aftHOaNo2igNRrRljnt\nUi7ucCkU8081QaihjjFLj5Sf4u2KavMZ9acOg5oATpgdaN3bFLx2o3EkcUDDaOKdx0oxilPz\ncAUAJjvnil9WxxQOBSHtzigYobjpx701vmWnfdGOeaT+HFACtgKfWow3zYPSpv4eOajON2cU\n0gHeWvpRSeYKKdgMjrimn73tTWGcHNDYCnnNYjY5mDY5GRRu3DGeKjVQ2CKkXG0g0CH+/Wmq\nfmpqqdhAPNMTcvXrTGWB8uRn5aibLMcMSBT9ofgmiOFvpSAVc7en40vC8GhlO3GOajPHY/Wk\nIdu28nn2pzYVc0mcc4zUMztuAxnPPFMCTzizYxmmqzbcHgUzByCDgVL95eetACN83UVHt7g4\nqZgOgOSKYuN3NAdBsY+bmpt2FIzgUIvzZ6jtQeewpAJ95aeELctxRs6cZFLt5xmgBu3rxxSF\nR1zxT+MEA/WmY9elG4DN2DkUbh1zk0/aGXOOKTaPTBpgNX71ScqMg8VHtI69O1LI+1QBzSEI\nzZX3FKu6kVS3GKnVcEUDG529aI1Jye1Pf0xTDlVxVEh93mnNheSetNbjFJy3zGpYajsg0v3R\nTEyG5pT60BYTzN3bFKJCoGTTdvFG0sMjkVQx6vuJz0pzbVU81BltuCMGlUErg9aQrEg+ZSBx\n701Qw4z+NOVcfLml3YwMc0CBlIUUjZXtmhs8YPNJz1zk0wsL8z9sLRnatHJXrRyQaAF29D2p\n7ADoeajYttA6Zp8age9MB6seD0rlfitbi68I3GAcoudo+ldVt3d6o+KNPGoaHcrkriJsc8dK\nEI+CfFmJFcE4YV5rrTDdtUfjXq3jO12ahMikkZJ+avNdUtTIr4HIOaok5W9iZoSD0z1rNMIV\nGDtnjrWzcORGwJ49KxpHLblxxVokxrhXWPGMgHr61TaPzMsWxx92tG8zGCAOvSqLLt8twcE9\naomxSkhCtlm6CqFztY4HB9q1LuNblG2nHNZc1u8cfTp6UCKjqqKW7ep7VC2FXKcqe1WDb7sM\nW+X096hk3NGWxznbVIRTOVwSw5OOKikjLSHHPbFWZAEY4G1gORVVWO09hnIpjI2VmdsfIAOl\nSL90bsNx0qLaJCXdiEGR+NNiBjJxzxTRJGy9dpKP15/lUTNthOF3M3Uip/8AWMc4wozSbSpZ\ncbQep9PerEUo1WMhI+rdR6UNE0auX5I6U/Hk7mHBzjPc+9Ql/mPfJ/ipgQRvt+YfN6g0Ku3P\nzbYzzn+lTMudysNmOePSq/mJ5YQP1PpSAkljMzK/VBVdgI/mXjng5qwuHkBDEEcDNQ3EJb7w\nGAcigBiqPvMMEnmk8tdrckp1xT5OcdcEfiKjb5CqIN5PWqQETId2WOB14qR3Em0hCGHWntg5\nAG49MUSP5igDjb1pAQ7twbHAPHNMZnRcKRjFOkZfMDH5V601j5mQvBNMYKT+7OBk9zQV+Zz8\no2jNOMfnbATgL096ZJtUFVBJPWkIRFMisx+Zu9N3FEwvAzk57CkbMZPzbcjFCRrzzyByaBsS\nVdrb1+bvSN18zOCPanKqqwIbdnikjbargnO3rVgMbf5OScOxzSfLncOfWn/exI/Ix0qPzAJc\nhfwqOoBuX5TjAJpPNVHKbc80M0jHcygJ6UsjFfuqGNOwh6YKsd2DnhaN3XJzTNq/LIQQ3QrT\npMKcD5ie/pTGIMhiT07U5vmPIyuMUjENJhjwozxUazFlIUHHWkDBsquCmcUzMmcDCH72DUmS\n3LNzjimSqd6uzDAGKoAYNyD95uaTbhcKPx70rsNyvgt2oaQg/d2g9KADY2fMXG73prSJlQq4\nHfPrSlWznoR2pVwVbI5oGhuQ0u1jjI4pkkmz5Ry1OUfdRhjdnnvTQrInycn9aQhiA/Or/eHP\n1p/ClVfAJHGKVv3m3I3Y60xyBMV+7/dpDA565zzihVKudr/nT5EZVAHLdRTcmT5duDmncQY3\nMMDL9qbtYMysMt1NJIrNnJ2n7ooTfknPAG3NAhIwGOVGCKe2Ub5eABk5qG35VifWpPlbqTtJ\npDIlY/eBOc5GakhddpZzuf1ppwxyOgP3aFjRhIehz0piHMqvtLHJpFAjyGOfSlVkRc9c9qRW\n3ADZ83rQMcmFB2ntUZZfwp0cgVmUDDCkCtJnsO9MBPKVVBwcdeKN25SwGE/iPpTk6YPB/SiN\nHKBcYye9IQm1Qg3LnPIpDkHcenSlkJUhTy/f6UkiYbAb7w6UwGx7VztJ8zNOGUkYgfKetI3y\nxKzdd2BSNIVVmAyCcfjQAmQIyOQM5pVCjHGN3rSmRcYZuQPu4/ShcBmZug6A1LAGXflQMhaZ\n8rbdoIPfNOX5mOMqCOtG3y5FDNkY4oRVhzbW6ceoprKrLgrjnIomBjAC8kn5qVWK7scgdjTE\nIcswA+6KaFI3YGzvT/LBXcflU/nRHhVYvk8cGmA1l3c7u+DQ21eg4BoCjdgjBPNKyiNmUfMc\nc0C1Gs2Wyo+U9cUqszAlTlfehS+QqgBv73ahTnKBckdSP1oAQZaMlfl5po4JTgZ55pzDkIBw\ne1BH7vI+hZqQDFjOGLvvI7ClJHG7J45xSxxty2cEnpRtChj1NAkJsDMm1c/NnaD+tBwzvg4y\n3Wmqp5DgqSMgilGGZN3GOKZQshH3QME0jZVgGPPpRzt4HznnmkVt6hmHIoAcMliB8y+g70zz\nPl+ZMFTTiy7gUbaRQW+Zn/iPGKQgfgqSc0EAsTtx/tZokwmwld/9401o2lyfur1C0AIN2Rk5\n7YoU/vDzk9xUm1WUM42DH61EqGMjcnLDO4UAOk34Dfd4xT/vn5WG3+770xmfamTkZxSNuaTG\nNozgmmMfuZsrIwZfbrTMou7a3U9DSbRuJB74FL5Yj25+cN1xQAsmIygA59u1JF+7Zw3zBqWR\ngmV5OOlB+bBGAelAgUHcyEkDqKGycbv4uAfSl5kBU5UjqaYzDaG7jgHtQA4Pyq/eAPNAm8oS\nZ+Uk8HrTcBvvLgk5p8jGQgIenXigLEDZXgHhutTMFLqQOgxxTlVYRtBDBucU3dsXcFyScbe9\nLUQc9hznvTZN0ak4z6insxfJUc+9N3sqgj5lbjNMZFGvy5I606NvLIcjJJx+FLsZYXOOvQ0B\nisajv70hWF8wrIQn607dtRiTkkcAUyRmVW7EjrSoDhGVtwGM8UwuyOMDeCrbSBzmnNJ8oAPz\nZ607AaQgDJzzT9qSyE44AwfagpIhZUkblsL3NCptyGPfhfak2ouNvGeOKXaqtgnoKABfmkbn\nA2/gKRVXYCTn27UNhE6ZJ70bhIoUZHr6UAIwRV3nPmZ4NP3szAD5jjk0eZuOAcheNtM2nscZ\npiHPsVlLLg98UrfMvJDehApG+TLj5mXqKSRwpEgJAx09aQBt2oM8knHH86Ar9EO0g/nTmUqu\n1j94ZxUQy2BtKOv8NAEm7LMc4I6rS79xBX5VI/Ojl2YEc45ambiyFSMLjg+lAC7lVfnG7J4p\nSoXpwTxxSrzEqYHHO71phO1SWGXJ4xQAKgXau/DDrR5YMhzyv86ftJXcy9uT6VGzHJK8AjGK\nBdQjkBUseRnAFJwkhbcd5HHtSrhYwrcknGBSMjKBuHQ9KChVVtpBYkdST2pN4VRtbcGOPxpy\nxtMr5+VSOlMRQsQwvTjFAhzLtYZJYjtTV3fOo+dG9fWnbjJJkDnGMU5Y1ZCoBLA544oAYyj5\nSMq2cUkjHcd45Hansp85GyGz2Hak5eQnGWBxQAmBIFIY+u0UfcLkEBSOaGQo24N1NOWMLJtB\n+U/ezQAiP5i5yABxSKz55JPp70Tfe2gDHtSvn7uaCQUmNSAPvUNmNCgGTjNCqWHXhT0pCrbS\nxYg5pjG4MseG+Xvt6ZpN2/BPAHAFSyRuVQlQT6k80kky52NxigRGqnLFuh4FOEZ24ZfzpyyR\nsQpFKFk3uGbdG3pSGMTK7gTu4yVFJuXyduN0mc/QUu0bcL1FGQkqnGEYfjmgYj5OChIXGTRx\nI2TxxkULuUMpPfilbduwflIFACbkb5WOfcULhuny49e9CP8AvOgPsKGby8t09jQLrqOVt0Th\n2BbOAKb5mPlJycYGKRj324b0ojQMwDHY1MBWaPcg2ssY4x60oGPkVs89abJ5ysVQZ75NOC/M\nA5wzd1pDCUpnG3gdcULgxjHzE9G6UpjCqMsN3pTV3MrBmBK87v6VViUmKU+Ubhxn1pPlJZBk\n7hkUMFZScbuPypNp2qE6+nrS6lC/LgLnYMct70kJGMquWz37+9OZmXaoTKjnj1p3liRdx6Zy\ncUMVhg2+WxAyScZpJFDKArcHvTjnceBnHGfSjYIcFTgHkigYx2C9jgCh9zld2RgZwakkiVsu\n2Q2MhaGy4Rs7j3FAyE+XGuTlj1pyFdzN2I6+lEmI5No+ZW/nTGIDZ2kDoRRchi7vlXC4bOfr\nUisWYAdVphdtoJO1/X1FLIpYjjaR1xSKHKfu7hgEYzTljEUjdJB/eFRk+YwwOU6fSkaQc4zk\nnOKBMnZTIyoTs4z7Uw7ZMrjJHb29aSVj5gVRvYjp6Uka7uB9TQJDtqFTt+ZB0NRD5Y/MU/xc\n1KqlVPy5OaRcq5AI47UFDVYbXwNpzTdpVdpbPfmpN375w3VenpRs/du3/LTPFAhsasq5KBjn\n6UeUOTy/vTnVZkXB5PWjczNt2Yx39aQDY9isuCS3vTvmSIgAHdyfWhemQMc96FUKrAN1pjGt\nGDt3kbeopuxmkGMOBzk1IVG0hjnvgU3aWjZ84GOBQAq/M2d3zjtT+TN88mFxnFNX95g4zx1F\nPRcgSY3t936UEhH1IHb5s+ooUtGzErkM3ApPmRgDhiR1FOkYqRg8rz/9amAvnMrNzjtRGRnI\n47nNM2+p+983PahceXhgfrSAfJ8uGY7O4pqyIyMzEgnpTFVmVtw3d80/5ZBkndx27UwEkLja\nQobj8qdEw+bj5gM0xNzMqnkdeKe0iCYkDGBikARsm5goOPWmRqOSMlQeaau5skYAJyalhUsp\nwMjsKB3G+ad+5+Kb95TtXAJ45qRQOcgDjv60za4w+/DHjgUATFHVd+cDGMU3y0ByBhiMVIqp\nFGAPxBPeo5HVZBjnufamIRVLLhhyp4JpQNgbjA6j61IsiyfKDz6mm/OpCqOfWkwEO5tpLbOO\nR3pdyqqSk4Y8fWhozGw3nI64FIIfnBA+XHegaEbdyvQMMijzGzgJtQfKKkbbHEuD+8zxRuaR\ny3949KAQ1ljkVWDbVU8jHehgdrHoScjFEjBWZQcuB6UvHljA+bHSgBWYMuAeT1pCpZs5yxPS\nk8vy2RTyepxziiGYNK4J3HnFADpM+YHHIU4IFOXhmYrz1HtTkk3RHYAgpq7eQylmA60wHqrs\n52kMCMkZqJWDKBnameoFH3k+X7pPU0jIWUq3Jz0pBYB5asBz1zuxQyiLdhtwY5z6VJujVWfB\nBHGKiH3cnljTAnEYhlG079w7d6ZgfOHU+31pz5VUKjA6e+aZ5hVsEbj059aBDPLG4DBU4yfS\npZP3TDOPmGM0EFF55bvTG3M4VuAPxoAR2VY9qHcB97605TuwWXgcGk8nEYbIBDfd9ac3yrj3\n6UgFjxGvydzU6xnBYcvnJ9qg3HeRsyB37VJDmRSScL9aYrlgbVPILH17VdjwsaksdzcCq0eF\nhAHc4+lWYI2kkDHmJeBmhiNS3tzLCqlsAHkVqQxsikoCCo4IqlppORH1yc1oqowUGQfSoZSN\nPTZWbGzG7uK29NVWk3Yzz3HOaydH+6zRgKT1rfsbfcwbdgn0rMs6bQ7f9zuDbzk/hXV6CrtI\nr4xt9aw7CNLeKMpyvf3rqdMkXYNi43cVBSOq0uCS6u4kQ7ssBivvT4Y2gsvBOnRpkJ5YIz1r\n4h+H2lm81qziOSHmAb16ivvrSrVLLSbWBD8qxgZxipZWxYy3PNH1NJzihmHHQ1mUGeOlO3Hb\nzTGY4xSqD3oEKrbl5GTT9wHBpi8HjpRIfakUSBgO9I3PIpFxjnijjPpQDHZGBxzQrE5DHimh\nst7UMRjnGaBIeGXpTOVPPIpGI2jHWj76MehFAEm7vmk/i60m4NGOOcUbsKMDnvQPoO56np2o\nz0o37gBRgHvmgEOJ4xnmhWP401sZBzinZCDOaAHbiMDGKFbqKjRmbkmn443A0AKW3NzxSfdP\nWmM2OTTwM896CR3VM0pOelR57Zo8zHGKRRIFPUnik6ZxzSKSDz0o6njimA5WO3kU9j8o5pnO\n3Bo9c0DF7ZNO3Z+lIqhhyc0q9cY4oANxxg07IwT3phJY5oDBmHFT1Acp+960qr8oo3du9IDh\nTmqAMdacrbfrTUYbhml3bsgDBzQIcNzZHFLxgA5JpudgA70qsMY70APLHsKQ9vWkJwlIrg8d\n6BjgOuaOi5PWjdt560jc9+vSkMSRgAPWncrTGwCGJzSqwakwF+vSnB9oprNxg0bcpxVAhWYY\nzjmgt0x1oVvkpCD96kMVmwvI5pWO7BzTd34ik2jrTELk54FIzfKDnil3U1lODipYxvmYYjtT\no2IXJpvlluc4qReAeKEALIxPTincspyMfSkyOM80m8rjiqAeqjb6mkQBqMbjwMUrMd3AoAee\nEz1pOdp5pvLY54pWHpQAgHBHWkwKXB6qeaDwKADdjtSsSUpNwA6c0L3JqWMVWDdeaCx2mkVw\nM5GKN35UIQ4ttUZHNJuLHpikY/U0u7jinYBdw3AdqP4j6UqsAD3puRj0oATcWzjpS7vlIo+X\n1pOQOlACiTK+tKrBh7Un8Jxwaanzc5xSsBIcFcjrS8nnoKQ/dwMGmLnbknrQA7gEU/hjnvUY\nx3FTxrleRTAWNSze1WYoWdiBTYFOCO4ryX9qf492vwF+HMuoRyQtq12GitIJCdzNjGQB6f0q\nkrkdT50/4KEftMLZ2g+H3h285kHmalcQtn5QeIsj361+cTawPmRYdwb5QMYxxj0rT8XeJr7x\nNrl3eXk7S3d2zTSu/XLHJH61zzTC3UuihmHyj2roSM5MgXMk0m/5WHTFUppArkBvl9KkkuWQ\nsQ20/wAWBVf+EMeRnrWtiL2HYO0mQbMcjNMRZJF8xXw2OfXFE27y97NvB/h74pXAkjQRcPjq\nD19qYEbNlc52jPQdqbJMOW24GOtLLny2P3D0/GopG28lNzY6rQSV4R5y4VeSepra0uYSKtsj\nJjOM55z3FYqwrGfNLE57CrNoogmja3bZg5J+tMZc1GzS0lMco2yq2Rjmst2MinPLKTVp/PMs\njyMZCzcGq83zXAcDanQ59aBjUZJNp2keoNKrFrogIzFhjinhXVnaRSgHTim2V0IpC5zkenpQ\nIX52JVvl28fhS8eXuj/CnXC7N7iTerfMG649qqSMsYLkkluBz0oAbM37vGwbz/E3asuRVUZL\nkS5wAD+tXLiUw7V3bt3GetZ9xINokJy68DFBIzapkxn7vUmmsp3gEbBjgimRsZoTu4yealVT\n/E4KjtQUAUr8oGHPQ+tKymWExsdxHNNZmbkZ/wB6hPu8cUwFjQx/MeRjkVZRSisEXBNCqJYS\nMjdjtToWWRgwbkdRQMk8sJt2t8/WnnMmWJwBzUSOG3nOGBzmrUWeHkH3hwKBkZkTa+OdvQ4o\nViep496iaTKyKflYmpJGLKGbA46UhDlkWOFuQBzg+9Vo7vG4Y3Oo44p24RyI6hW9VqLzhlVA\nC+/pmgQsWB9xuo5FC7AuR+6foR7VJ5IRVDMGY87VpVVFYNypz1NAhu0rkHo3RfT3pYYvJiA2\nYbpnP609wFkHlncrdjRFz88jYZWxj2oGKqElUUAMx53HAP41+lv/AATK+E77rjxFdQbRbrtj\nfH8RHv6jNfnh4Q03+0tSgTG+R2AVD3ziv3A/ZT8BxfD/AODulwlWW8uIw82/qSMgfhjn8a4q\nkuZpGsD1tozJI2Omf1z/APqp4XbjnFLDlySTSs27txWXU3FDDFKy+X160xG+UinZG05OT2qi\nh6kMpNHp6UJjHJx7UZ7UgFXO4jHFKM03cc09VNMVhAw5Hen7ulN+Uc96Tml1ESK3tRuHQCk5\n6ChQVbnmmMXI49aCPmoPzNxxSHO7nigYrD5qTktQ3rSA8+lAdB3T3oA+U8Ufe6dqMnPJ4oEM\n2+woqT5aKBGIycdaQrnsPeljG7nNG055rMobHGF4zkDpT8CPknIoUlF5oXcyknGKGPYco7jq\naVh0z1pFzt60K2T6mgBeBzSqwaTg4FHG7kUgxGvXBo6iELnfxzS4O3J59qGb5gcUfe5Bo6gJ\ng7dvekYdMdqbkrUmPlHamBGyinbO/al98U5ugI6UrgV2bDArSLJ1UjJqUxhuACaRY/LfigfQ\nfvMaYApEz3p0ny8g5zSZB7YpCEjY81Kp+Un1qMMu2kVuAM80ASrhckjikbbtBxxQfmXbTXXa\nOTkVQCZJXjgZpdpZc0K3y4xxTdxx1oAd1XBOKaR6cgU3lmp7MBjH40gELHrUkbDr3phTvmmq\n3JA5pAWSy9aiLgtQuWzmm8btuPxqiWPZjz3oX/V4xQuR70jMSo7GpGOGGbNO24Q9yaXg4IGM\nCk6LnvQA0qStG3aoxzTuWUY4FIzBD9aYDWU5HalVdxpGbJ5p4xigBrL/ABCk3Cn4Kr60xhtX\nPekSKuDTcjnFKOFOKRVKtnFMY7qvBxSFtq9OtJ/BnvnpRt7mgGLy2OeKchZVx1o3Dbimbjkc\n0MCZWP0NOnXzrOaM85U9s44pijzCCKlWRVZkI+8pBpiPif4laX9m8Q3mVCnzD2xx7V4xrTG3\nuJe4Y19E/HjTZbXxReAgqowV+hr558SR/v8AKnjGDVActdsCC2AwrGnzliFwDW9JbrJGw6Yz\n+NYztnpVIhmLdSfOFZTxVG4wuMD5a071irHJ61lXbDnGeOea0JKsihpCBxxmqUj7Q4znIxip\nfOMztuUx7hgYqCRU3ktyFGM0GZS2vH8jYx1qGSNzG0g4Ufw96t3EWFBUZbqAemKovI7R7SOG\nbmrGV5MK6uTvc8EUbQ2VTg+/apZEC4VVwM9KgkkWNwB68kU0Ax1CFP4gTyKhlmWaaQD5ApxU\njsI2IzwTkGh4xtJ4yw59aCSptVWbbknoDSncIgGYdeRT5lAUhD+NRTMuB64GTVAMaT72/lqq\n+SvknDZYHIqeTG4kr2+9UczD5QoIPUigREytHtLjJfgmofK2xsFIwvT1+lTs3IZ1JHTFDf3s\n7SDwKaAqxg5ODhx1B7U/5JEBZsHpg09trMxU4Dcn60jQ/dHU1QDUyoww3sOlQ7n8wN0Yc/L6\nVJICOVOJBxtFI0ISYnccfxUgIjIjNu2EHO7jvROmXyBkN83/ANapnXb82QFJ4OKhbZncWJA7\nUAMb958q9R2NRrH95s/NUiZbcSAgx1pvmKG2gcbcc0AIvyruZhntS+WfvbqBGg2j+IninLGW\nYgH7rZP0oYFdueT29aahMikhfbaalMayr3BY8Ypm0ruUtnsaofUZsWOPcrZGeKdw0bueOOFF\nIVEmBj5ulKVKttHzDpSHYbt+RDjtmo/LO7Hc81Ky54PUcY96ZK33SBtwOfXNIQxVO3Pb+IUu\nS3JIUZ7044WQBW3Bhk0khErBGGO+aYgQncRgNSBdq+5NHG0leOaBtXcQeT2oAGbbH6Gk3fxL\n8vGCKcoVlKngjnJ71HHnyyC3z9cUANbdIcY245zSegZueoqWONZQX67TTJGU8gYUnGe9MBEl\nbLDGD1polfIDJnnIJpzoQw3Hdj86Iy0z+ijue1Go0C7vm3yhCfamyF2bBwpAzu9aU/NwRuKm\nh3Un5wTgUwEZzwSecZpFZlK9jj9KVH3x5VcsOx64qLlZC3UYpMCRvlbCc1H5e5WA5YN96pOY\n1yD8rCmFgsZO3nPO2gQu/aQQDn+dKXZSSetLHtZSDlR2pNwXK9e9IBhkHltITuYdqTnywD2O\naWR90e8rjPGBRyykAdaoYOvcfd9qSXPGzp3xSFf3YwcGlGVXaB8p5zQArN0bHI9KbMAzAx9+\ntPjycDIpgQjILcg0AI0Z46ZpGkcyZxwKf97HGR60zOGYDoKAHKFbceFPc0K3O0j5fWmxneDh\neab95ioPPvQIWSRjnbg89KVwDtGT2PXmgwsr5XHSlZVdlDfKV6kUkAF/mO4bs8YHWjKMzL0K\nDj/Cj7snUlT6ims+442cUAESlIz5h3hucgZ20KpbgEZ7D196X5VZT0HoKSQ7sybsPjimAPEW\nf5iM47UD/WFhyuMnNNB8tQVG4txTjuXKkDHfNIBGbcvf5u3tRJiPC/eA5zQcjbgYJ9fSkZVY\nEZ4zSGKrYQszAe1GE8vrzSDZksVyAOKUFTjA68mqEK2Aw+fGF69qa+XVVc575pVjWXKkkD+9\nSOxWMjZtbdj8KCkIpx82OhxT2G7oMf7VBwzDv7U3aQpK5dScGgTHMq7flOSewpkijaqIwRzS\nsyxbMbmBO3AFOkVF5KHj3pAiONcZDEGlYiQknkYo+6uBg5GaaeB97BxnFMocrfLlxgjvSMRz\nwcN+tDEqgYryR/kUimSRgSNoHH4UupDEXLNg8bejCm7t8uGOe+7FPVWjchTw1NHDMSmSvUH+\ndMAkQtGDuwR2pA3y7OR3FOVv3bP6/wAJpNxkKkDBHFLqGo9f3h4Tbx1NNEh3cryKCx+baDxz\nSDMkm9evTk0xjolZlPYZyKRwTGf72fyok8xZAAcepoZmZhg57EetIQu0MqdWI70yTcqBy2VB\nxT8+SuFb8vT0qPcZBtyBjnGKYAATI7A5jxwPQ0LJuxuJJI6VIqfKG5EfsO9Ru2V3joDjpQAq\nrlOCCVOaIx8ykEEdmFJKivjBzGTk4pq7AzlCenRqXUBySMsjpgbO6nrSKwMTIB84OVzShRGy\ny8nIwaTyyfmDZ55UDnFMBcNgktlj1HpQy+WmDyBziiMoygjcARx25pBHyQCcBuSfSgY+SQuq\n9FJ6Z/lTdzOqlBhgcNSMqtuwpBPTPpSxr14Kv0oEIQqqS3GDnAoX5l3A8ngDpinI2GZdgZuz\ne9IrmbIMZKjr9aBCsBhUU5wefeozuYhulClnkUqwyG5/woC+XK5Y5Xt7UhgfMHG7jqRS4yM4\nwBSBlVQcFuaVNrHcxxnijUQ1mMjfLwMdTTdxCjs2ccdDTs/KUJzjpSjsDRYTFdWkwpOD1yKV\nY9m8lsKaZ+9UnaNw/velK0Ybgjgjr70yhY3Zeo+VhgL60n+rj2k/N6eho4aIMX2gcYoLLt3E\n5Y9sfrQMZuIyHyQvGaUMWU7T93mk3FlwMNlsmlT5ZH7LigBFQRfNncW5GKUMVyBw/UrSKWGF\nbAVuRS5XzCCcHH3qAYg2q24/eJzStJ+7YMOc8D0pHcLhuoxjpUq27m3knA3RxsFZqCCNshQQ\ncMeKMlfUEcZ7Uj7dyrg8dW96VVZ8gHIHamUKyjnPGeTTdqyMqp8pzk5p33sg53q2KY5YqzZy\nRwSRSAe6r5uVXIBx9abuzLjbtPWgSBI8HOcZwKGKyqpY7SBkGgBWbdnPzDOcUwyGVtqDac5z\nUhXP7wgYxUUihWWRfummAvEc20k5bnNIzZVgwy2cA1JtCklTknkGo1Zc5YksTz7UAh+51iAX\n73UD2ppk2qGQgk8Yo84I7jG/IwPSmj5kDbf3g680CHMT5gDA0rMGbAYs2aSN3kXIB4Odzfyp\nTlskEAGkMaT5bfLwvWnRyEMSrYJ6+9MLkdiV9KYP7uflJoGSx/u5OB8p6hu3vRIRtO0cZ6jv\nSxqiwlX5OeD7UMDtwhABOAKCBBhFBbk+1PXEkZ45z96mSRssYAJw3NEeBGS5yDxQNB91fTPB\nNDRjgmQMvSk2s0Z9V5yabtOfmYYIzQSOmULtAfP86V2dW2EKQeScUiglM8Z6ZpsituZlGQAO\nvemMFXL7se3vTyCnG6nK4EeCjbiOo6A+lIIx94AnjB780wGy7Nm3PDenWkUeZt2j5V7Gl2t2\nGfXA7UIwZcYIGeKkaE483zSC65xs9DTvLK7gx560m7DBApKjke9OkkefMjrz0oBO7IxhV3Bf\nm7U5WZmTcvzmkz8y7jgEY4pv/LQfLhl/ioGKrnBwMqex9aX5+jjcaV42XZgYHWo16ku2Mn1p\nkscrMGyQQy9j6U2UFuRhT1FSIzNg9AOMk1GsRdWJbnORQCHfeVAy+xHc09FVdyAYQjuelNLf\nxEYY0hZI1dhy2OBQV1GRxlCyZ/GpAoGGL5x/COtMQsqrKvEpGOaRuWG35SetIY/5l+5wD2pd\nojxs+YZ5FJH8o659KaoK5VeD945pkCsT5m5h+HpTlAZPukbTuzSMwwCzALSyMXGwnanfijqP\nUd8zsW6sRnbTcBo15+f+VI7Bk2g/KB+NG4ExnGTjHFMYiNhSqj5uzHtTMNkAHv8Ad9amONxY\njK9MKKYiAthOfc0CsOZgzBFwGPUnpTVjdMjGEPFL5fynGOOaVfmRmzzmpYxija21W5XqakUB\nVBAyxpjqNhC8vnJPtT1YyYRQMYzQIbvHmb8fNtxQ37tgAASBmkXjBGNvegr8+QD8xxQFwkkd\nsMOBj8qWRWjjUEh2/vLUsrbpfkxlVxzUa7y2MgHrQMJPmY5GDS/Mqluee1N2MZMOOp/GjaGJ\nRTtcHuaBgqjptOcdadubbuXtxgmk6lGJ6jBpkiqFJUFmzgjPamIkdfmVmOMjmkVC3IxjtSGM\ndDn2Oe1Ku5VDHAXpjPWhgMaMqxCnLEce9Jlozhk+bHSiWFcFmJZz0UcYFTNGGVRtKNjuaQiM\nbokAX5M9hUrZ8xQTs2jcV9aNpVRhsMOMd6PLKsPmwSf4qBDZcbTsPy9xjvS7fnjwpxjNJOrb\nhggAr+dPRWbaN3OKB2EYHcxIBXrim78ZGe1PeNY/4sMfyqNY3Ow7gABu5FMVtR6gtGSeO5Pt\nTV2sRs4Xrn+lPTc8LBjndnpSOuFjVQCFHOaY2KsY8w4UrnqaYwznaRjNO+0fLjHBNJkNgLxz\nk0MkcsMYiPOM85pV4IEeQMc03y12uXY46hadgqBhckrmpKsIzK0bbj+NOyI9hydp5BFIpaRS\nSgK4xihcoio5AG7IFAuorMW2sxyeuKdyC6MFOBzQwBlQsucHikkG+TLjA3YzmmDEkjXA7bQK\nZ5oyc7iwOBUzKIYf3nzZPBFRfek9ieKEBJt+YMTuz/D3pp3OrEZEic7SOMU5o0jO4jLjjANJ\nJjcQzAFh1pDQsmJEDbAGB45pqjyjyOWbigKZIioOcc7R6UqzBo13Aui/dx1oGKPvseN2ec0s\nfLEsM+lIdjvu+6uM803axC4BEZ6UCH7tuHjHQ01cNvaQ4J7gVJGq7fu5x19qYqj5o+hbpQIY\nztsAxn2HFTBixGMfUU2FUIKuM4457mpFPTK47bqAGtHtjZT8yKd30pww0Yb1pjSOu1cZB5J7\nGnsm88HaPQdqAGb2jB3HKZ6YoYfvCQOo6AVJJEF2kuDg/d9qNzrI3HOMAetAELA43A/hmlwS\nu9gQuelPVSGyF28ZOaiRjJMXBzz+FVYB7Eoww22ldljzs+b2pEz5hJwe4JqNm24GDuJ7dqQE\nhQHEhGX7UKUMf73Ik3dMUrR+WuQRke9IuQpaTLEdz1oAcybk2oxGT3pwRYmWPknOOtMVtzYw\neuamVv3rEJk4yDQBNtCtkHgHGB1q3buzN8y4WobVfMdfUjPSrVu4BBB3nP3aQWNSzXy5jsyR\n79q3IZdxGeKx4WYcvgZ5PrWrbLwuBnNQyjotMt03biNoK9K2rOIK67Tg9qx7Z22KV6qMGt3T\n2cbWONqnj3rMZ0Wj2891ciPPy469q7qzhFvGmACAME1ymhAqA+7BY/drp7dXmTd91R2qS0ey\n/BfTI9Y8SWC52ssivn6GvtgR+Wu0jJHFfJ/7MOkl/EZlkj8xIojn2bsa+sGYgY9utZyKI3BH\nek296Gb5femrxyeakB2DjdmnBtoz1pB78ikGMUdQH570j03PQ9qN3XmnYYbhjOc0nmFuMUir\n8tOxt6mpELtPY0qgNkmmbtuDin7h271VhoXjn0pGYdKTduIx0pG5yMfjUi6kit8uDS5qJfl4\nNObI6cZoLJGb8BQufxqIFuM9KlBB56iggGX5dxpvHrxUm35TjpVeTjgdaCkSqxWn8t3xVdZN\nrZNTZ6H1pCHbc8daVcoDzk0bvzpOe44pgLzjPc04kEY6GmbxwM8UZ4z1NAhWG7AzmlBK9sUj\nBcZ6GlQfL60ih3O7HWnZAz3NNRtoxSqvOe9MBACq7s09Tt+93pvQc0pG7nNJjHDHakzzkUKc\ndR0peM7s8UwA9z60hYbaPMB5obDUCEX7wp5Y9ajp3IXNICQnOPWlz83AqNWJ5xzQrHbk8GmB\nIX/yKFJ3ZApqsu3jOaVSd2c1LGPX5VweRTSwYg44o53e1I3TAp9QEwu31NJGNpOe9OziloYI\nUkM3NL264pF9SOaXG5sngUxIVFB70NleO1Cqo560dVJNLqMQMMcClb7q8U3dtWk3dzTGPAHP\nrQeF6GmowBJ9adySDnipYyJ3wuaS3ukm+UHNJdD5SBzn0rKt4ZEugVOB3oJN1lCmkAJbtik3\nblGetIqk/Si4DsnJz0pQ3qaY8oVlFLjeSelCKHleABR904PFIzbcd6a2Tkk5z0psZJ24pnmD\noQaPx5pMHqaYiRsfexRwQDTAx6EUu7IGPu1LAdkM3vRxupgjxIW7Uv3mz2oAdu7mm8g57Up6\ncUpBV844pgIuBQewxQSN1C8MS3TtQwF20ZO455FJ95cmkz0xS8wGuflznmm43qPepSu7rxQv\nC8DmgdhyjauO9KmMkGmp945qRV7ikIQY/Gp0Xco47jNNjTdyRgfSrSvFbRtLLIiRqNzMx4AF\nWkBmeJvElj4I8N32tapKkFpaxl5GbnkZwuPc4/HFfjh+1B8cNU+Nfjq61i6lMenxHyrO3LcR\np03D3PNe5ft3ftUL461m48H+Hb9v7Fs3CzeWfluJAQTk+g9PX6V8SalqEjRpCzo0GNxbHOfe\nuiCsYyZRkujHNGQvyjjJ64qO6kDRtjAf+EDvVbzBM29yRkcLTPOWKPafvc4NbmYkjedbgquH\nHWo/NdkGCCVOajjx5f8ArCgJ5NJLiNgOp65WmSSLMM5QESk8lhSiUNKWI2gDAI9fWo2kP2ff\n2Y4zikEm3txjG096BjDub/pqoPLVNHDHNZ3MpfY8fRfWoWkDI25ghHBC960NN01ptNup1KKe\ng3tgGkGpnWku5U2qWT+LNTRyR2dxI+zeANw/wqCOaRW2bRtU/Mo6fnS3EiPI+VZlVh83t6Ud\nALDWk8umyagJRHl9pi7/AIfnS2cYeMtK6hkHzA1Tkk8+NkVtsYbpUcm3y1O7OOvPWmBJczlp\nisb/ACAcg0Q4MhcYHYCqqL5XmfNuzzzUvluWwBwTkCgCf7RHbwuqAbm4P1qqzGOIDKkEZCkV\nOysFKBBtP8Y61Q1DEbIC+cnA29R9aWoFOW5IQcZX9c1TmXfxuxjmpvOVF2tyAcFscVAzKuYx\nyG53UySUxqyqd3HXNN8ssxdW3KeKihXdIUXB7CpW+WNlPHqKChcnzChfAUfex+lIqtPtxhWJ\n5HtTgN0aCNvkHJPepGjCrlM4bqaAJLeESM6spJAz7VMf9Sr7BvU4yPT3ptvmKNiWypG0VL80\neWj+7jk0AO2YOdq+ufWnyDcwIyXbselRBtzDnbnpnvTpG8khj8xHUUARthmCOu09SKfGq7cD\nk8nn0pMhGDk7yeg9PamM7rksdoHp/KkIbGgkjZQRvzu/Ck8t1ZI9q46+9N8zdtIHluT0HenK\nHFzy3PpTAe0ZLLkkFe460NErStknaRnrRvHnOB98H86a8czNtZCued4pDYRSLGyx4IBPXHap\nSjzKxblM4FCwzbc7lZV5z3Aq3YWs99chIULeWwDNjg1nKVkNK57/APsh/Cubx78SdGslh88R\nuJDIwIXj1P0r9p7exFhZw20I2xwoIwAMdOOn4Cvir/gmp8JV0fw/eeLLqMiSb93bgrnHGGx/\nKvuNhtkcg7snrXEdEVZDYlKw4xyaVvu4pWk6AfSlHAOetCKGr0GRStgLwOaUUfhVD1QI3zAk\nU/7x6VGGGT60/wC73qWMF74PNOXK9Tmmr2OKf97OBVDEb95wOKF+6PWjaSvHrTiPmxQIUEt9\naN2KbnCk9qTeG9qARIexpWO4e9Q7u3epAccZoAGG5cHrSRoV+9zSrnnmjPze1AxFPJ9qduFH\n3c0mfamAZFFJuHpRRYRjKfkBxTt5PWkDZXGMClVgxNYjEJJ5J5p/DdeKb6nHSnL84BoAXhVp\nm4qwwMillY8Y5oXIXNBYpzu5pG/Ol3E+5pNu7ntQQHLY7U77ucdKTy/mGDgUp64pgNbHQ9aC\n38JPNOdfl2/xU3A79aQCeYW56CpF+VQSaZIOy9KReOG70AShsDPSmsflJHNG8dCeKZGwYkCm\nAKvanK2/OKVcAnnmkUnJPQUuoCfLt4HOaU4znbTlA2k45ppY4OKfUBxXcAc4pgI3HPQU7eMe\n1Iv3jjpTAUnIyBScL15pQoP1pNvzHJzRcQ7b82R1xTCrAjPApWlVe+TUfmhj14oGNk3Nkc0s\nMbKN2aeuWPHNSKhXORSAVmxgqKU9yehpu47DxTlY8A0hWEwV9hRkbuRzSeYFbB5p/wDCCe9M\nBfQY4pT8tGcD2o+8ppCYmRjihgdtIAQuSKGYsvPAoGhM7QO5o3dfWhm9Bmm+Wzc/dp9QuOkk\nKqOcVEJjng0/aHO3HNN8k/hQSSfwgZyaTkP6037qggc04fNls8+1MaDDHnHNHbnrTtx96Nu4\nZ70rjGAYNL0XJGDS+uRUbNu70CHBmDZHAqbaJMH+I+lRL1HFTRZ696YmfPH7RuniPW1mxzNG\nNxNfLuuWO6SQbhnrX2T+0Vp/2jT7W62lQo2lsce1fIXiSE+fIRwBwfeqA8/umaPeA3Q9qyZF\nAyP4jzW5fRHziAeKxrpsKQwAbnFUmS9zGvl3YU8HPWsy8jCkj+EnrW0f3keWHzZqjfRrIuUP\nIPStCGZ32ZLiRdzcYx0qleWUMLbUY4z82avybVbOckmq0luJJmyx24+8aCDLuGCnHPtiqsyN\nMCei91q7JH5UbbuR2qnLmONvmwc9qpCGriRRhlB6YNZUmY5ZD3BxtP8ASr8kap+85U1XAEkm\nwj5c5HtVIOhWXLQkt68g9qQ7eWPUDrUsn+sOFyOcrUKRlmw/A7UxFaSQSfdOPVarNH5g57Hp\nU7wDkBSSWwae0PGQMBTjH9aBsqPkt0+VeQPX2oSQqrOOSwPDdR7VNtEkmATjscd6ZcK0YDBV\nZqZJGvmyKqt97GahKbQWON2cYNTiZecna5602QhozkZHegCvJmPjaMntmkkUsuWbIx/CaklV\nMxnduPtUW0lywG2NuuTQMbtZWVicqwxgUNIfnG3KsMcc0jK0a8HcD701lBXKHG3naDVCGOR8\nqZLAUSIoOY/u989aeg/ebkOAeoNSTKNzJnHGQR0pjK3mfKpIzzTsiXCsAG9agJ52uSBjII6U\n44ZlYntSELtPKn5sHil3GMfK3uacsfzF1YBRzz3qFn6knPPSmMVt0gLIwCqOBTJlO0Mpznk0\nqx7vlB2nrikYFTkrxQwIuOOdvOd39KkZjtJQ7XP8NJNhowFXkHORRIyMYyqk+vNFgsIzBvv8\n8c/Wmtlk2jhcZzSzb/ujBP8Ad9aQNuYDBXsRTsGozy9/APbrQMsvrjilb5dowUx1pzsojwoz\nnvSAj3FeOB701t24Dj6inbdyqqrkZ/KhuWYYzgfhSAF/iDLhO9MRt6khdvpQu7bycg9qU8YL\nHtx60AKsYXAYbTt9etRBizeWR8vX6VJzJjd1H8VNMm8/KNvrVAN3J5uOd30p/wB2Js469Kaz\nOynpx1pyq3zgcnHyimAigqwIYE9wfSjhFYcs2cGkb5cCTBcelLkrnndnt3pARserKPm6U6OE\nnacYXvimL8/GcN1wadFIoYJhvm/i96BDzATGRjcF5AqvuxCW6e1akcKNkklTjBIrPMYjZsHI\n6CgBsiq6qCSp60MV/h+8fWk3MxGR07UkmY5F3cljxTGODFsggDimqrIdpb3zS+Yctt5PQg08\n5ZFY8CgBkkLsMMcDHFGSu3vgYxS+ZvwWbjPSmv8AKwKgspOCc0AyNtrSE7tmOal5VWfjOenr\nTGX5dpTr1yKdJs3jjmkIRMKdnQMd3vSrhurAD1o8wrxtCj+9UciqE3E9T0pi1H+WV5Hy/jTW\nwzFSuFPelcKcFDk9valkRlIyPrSGN+62C3y9qCw8v5h82elLxuAxkHtTW2+ZktgjimAu4bXX\ndz1qNmG3djCd/WnOBlWC9eKYhZS47A0DsPjwnLcp2pvG4sRkelEcbBt0j5H92kbCAsFyAaQD\nlPQ9FpFYfMc5zTpv3bKoUncMk0iSGMNtXjpTEJuLruIJPQUp+dWXbtwM0f8ALQKwxnkGhRu3\nHBVlPf0pAG7cFUD5cZNNyG+RQd3X2xTmBCkgfN1xSb9q+YOvTFMYuGjZQvXGSKCxPA4x1pxV\ng25kbeRQo2BsjqOKAG43LhsY7UnEq7c7GB5PrR5fyj5s/wBKd5gUnv8ASgQm4RvycR84z60y\nRvLRVbNK21k3N8xoaTauXXcelIAZQAuxxxTdu7kEHPf0NPjVY+g3AjNNEZjXk/K3SkMPOVWV\nZMhCME+9Cr8xI+bsD7UYI++QAKFyz85XPQ0wEOHXBY46YpsXmeWRnODgE+lSKDuOec8UOp7n\nC0EiSR+q8+vamqpzhThhzTsb1A38Uxo9q8nIzg4oKQbnEhOc+wpGj+QqCMMc++afzt+TsOBQ\nvyjIG84/KgOo0k7gzEk9CfSkcrnnpjBHrTo1baRnnuPWl3FWOV3D27UFDRGJDkjbtHFLtLbS\nMZ7ml80SMQB1FIOny8np7UEh8xbAOQDwKaVbdnZznn0qRUO7GMDFJIwU7wSTjAApgRKU3Esu\nTz0NKu1u2Fx1al3Fm9TjsKRnGQrDBB4xQTrcRW/gB/2h9KUMVwQPvd6cUMmW4V88+9JuKyAN\n07UtR6jVX5EV+CrcYpWXaFUj5vU9KRIy8fzP0bOKUjdGeenRaYCodrHcxz70izR7mLHc+OMU\n1mJIIHOOSe1IqhWLhc+9ADlYNgg7TT8Hyyqt82aiT98hYnbz92pcFMM5Hr70ARY8twyjORyB\n2NN8sLkux3DnilXnkDH+1TmZWjY7STigAZy204GO9HDKQB8w5xSbj5SgL8x4pYydzZTaQMBi\ne9AC7x8rA9f4fegsxyHHXmo2b7rFdv8AjS7vMt2Zs5zigBdxJBXlm4pzHK7R87Dg/WkEZ2nc\nu1V53DrTY2jVd6Zyf1oAU/NKF2gKOp96JsFvlUk9M0rMv7squBnketIrNl89zx7UAMfLLgfK\noPOB3p7cKNrYQ9aSWSQMikZHcAdaSRQikYyTwMUFIRog7EqcheQKXjaXxjtSRqQpKdR1zSu4\n3MFb7v8AOkxCMrNEdvEg6GiTKwhPNYb8EjPBNOdSq7/vMfSo12/xNwelMQ5sZBwSVHI9fek3\nspJXnNPVhjvu7tSKg8vcA3JxtNAA7fMSMq3U0clVAywzzSbWXcpbIHIpq71XIJz1oGSqvls5\nypDcD2PpTfLJYFgGxxik2LtYscLnI+tKeFXuW/iWmK4yRdmWU/MeM+lL8659V4zUnlEKWDKD\nScqxIBIbqfWhk3Itw2kfe5obG8Ybb6cUv3d3THTijhmIB4xQAvCLucY5waTBjDKhyW/iNDyb\ncE/NnikjUSNuYE4oKHKpwE9OT6ULCW3DPHU07y2ycNjd2pv3F3DOOlIY5Y/lOT0/iqOHCthh\n8vY1IUG4Lu4f8qbIytJnGAPlC+9AgJJycYHvTV2567CaVVxktyhPTvTWUecSRgAUwHspU7d2\nT1FI3zLkjHOcUKx24HzOOePSnbfmPzcgZx60AMLbl3E/8Bp8YBX7o24pu8sWYKBxkCkVdy73\n/hGcUCsOyyOQgxgZNJFIDJKXRmK8oewpN5VVbOS3GBT3BVWL9+gFIBisW3b2298U9ZH2gCQA\nE9h1pg3cjZuPUmlblQB8p6igAUSM0ighccn3x2o3HaeCFzw39KGz95jtzxkU3cxjKFsLncfS\ngEOiLMMd88VJsVZA7jHqO1Rsx2ho88+tRkbuQSrf56UDF2nc393quaczKyghckjB56e9N5bn\nqR1pqKWZtpyPSgWwoVlGCcnpz6etLnB2iMNilVWRd7YHb8KNwEhOcrjr6UADKf7u4n+GnKys\nfkYOFHNH3cEnJPSnbkU/LHnjLGmLYhXbI+/DBc9Tzin8ffKgHpSDYoErEgE8AUFfM37BtVec\nGmAM3zCQrx0pAp5JXA60rDcVB+9jIFNLN5nOcntSC4q7dw/h9jSiMRqzyfdJ6ij/AFzbn+TH\nemITuZsZHbmkA6NiuUBGD0Uikb5dpUblxz7GjeG2BOGU5JpVwFY7srnoKZQ7b8u4sCx647Uw\nrgoU7dx3qSM7cnHXpTWCnaEX5yeaLgR7HVjkgZPNKzeWxVMnPepNudpbCvmmOu0Ng7iDnI7U\nANj+X5icnpipFVk3biMMc4qNsFiScFhwKdGoLbScvikA9VKL8jABuDSLGqDcx56fLTtm1c5w\nc8ZqOMs0mQec80CYp8uRjnKdttLlwNoHC035tzAYAY9D1BpwXbNuHYYOaYiPzNp3dO1K3z7Q\nOeOTSggK+V3c02PLAbgRzgAUDsTfvGZMMCF4UVFsbkHh93U0oXKtgEN0pGJ2rkH5f50wFhUe\nW29sMp5xT5PLVQFyr9zim/fjMgUfexzTSTksBk0AOjA2kqxBzzkU1ikmWVcZ4p3mbW6c4oZv\nX5Bj8M0hjkVWAYE8DkHvRI25VKZ3LztaljGVbdx2K01QwZcnHoaRPUcqjd94lmOS3pQ2ZmA3\nYYcn8KF4YqzYKnPFIpO1iRlz3oDqOeRW24XkdKaWIO8cjHSnJ84+VcseAKYF8pCT64/GgYm7\n5UVhkE5Ip20AEPkjouKTa2xmVfm4FTyMySBTyccUDIlcsgwcEfLT937vDLlu5Hek8vyVVmXB\nJ/Wm+YFk7kjjpTJYu1dxIGAP4TQqlt5UbVxmlYE7crkjk57ildQ0Y+bY2cj6UigZQrLnnAzS\nDPmtncgK5z6Um47g+SRn+IdaVcMpkGWOejGgdxfNLQqSceoFLGpZiwUtn/OadxGCZPuEY47Z\npEjVUXa+SCAfpQSOVS20pnaG+YmmyACTcDmPPSkkkxnbwAccdqNwWPJGTQMXe3lg4yB1X2o8\nwsAE2sp5DYpqyBsZG1s/dPNSFlmKjozUCBsxxrG/DMcg0Kq8q6bue9IJByrDcytinLt8w7if\nm6+1Ag8tc4GRnsOtRBX8kbQevPrVhf3cvynbxkbu9RrvxnO73FBQzcyRlVwwbrxSygxpHsbh\njkfSjcAcZ/4DRuDSFAOvOfSpJJF+XKsv3j260FfLkBJzu4GKRG8xGy3PY07I4Zl+UCqARgWB\nz/D6VIyfuQ2dvvTBJtlDqMAjFTtavtzjIxk0AQQgbhkg445PWmzP++YIrHjripFj8xhldpHY\ndqdEpaNuT1xQAxQPndgSuMYpqsXVFD8j9aWaNgvLYJOeKSNj5hOzGOlAC8s205Ud6jYBJiqB\nsEVOuWyXyeM0xFLR5zjJz9KAsOELSQ5woRT94mkBLrgrj/a9aT5o49ucc5C/1pG3zNtAzj9a\nYDY1G0tj7vekVjuVQwZevNPCsVYKNoPUUoZAmwryO60C6jl3eY4C5HUmpIwFZdoO09Se1Njm\n2shzgkU/ayqZAwNAEokkjY7G2r71fsVjt8BjlsZyKzo/mVc/frTs18tgGHzHj2phc0bXY2cD\nLf3s1sWjCHb8wJx1NZWm7PMIIyx+Ue1atvDumCBcnoWFZMZ0WiNtMpGZDjit2CGSZUGRluo7\n5rO0bZCCoXEh71tWMZluEiaPOTk4qConY6LaGG1jbpnHXrXWWcgM0SIu7kFhXOaa5jyJOeMA\nV0GjW7zTK4UlfaoLPrX9l/StseoXhPyqAuBzz1r3suW6j68+9eWfs52JtPAMZdNjStnPc8Cv\nUJG64PNZS3sMTOKUk7TgUzdz7UCQc84osUP5ZKFbn3pFal96QB0brxS4VqCwbHFIF9aAHqVX\nAxSnG7PWo/u8k0v3uRTCw5vmFHVQAOaRfU0quOcUgQDj6Ugyx64FHPXGDS5Xp3pAKuO/Wjf+\nNJtznnFN27VGTzQNMf5nFKvrTc/L60FttAyTfzgVFJ1PrQGzntRuG3HegRGTxirEbdM0wINu\naVTS6iJt3U0uemTxTVpM7geOaYAxC80K+48cUxl3d6VVKsMUFEwX+InNKp+Xjio9xxjvTl68\nnBoAeB680vmDOO9MZjuoLYUZHNAD1b1peuOMU1sN0pefXigBy5B65FJ97g0Ckbg5PSkMfx0o\n24qPo2QeKViemKYC57mjfuFDY2g00/dzQMcrGnkjHvUSvlsDkVIrBu2KBB90bqez4wCKQc/S\nkZs9RxSEDfWj8eKbKpzkHAqs0xQkYoKRb3DJI5o8z5qiim3HpgYqXcCo4oF6Cs57VIuCvXJq\nNVOQD070LhXwKQEoYLwKMhhimqPl5pB3xQA7BYU30zQPl75peWXIHNMobxup+Cy4701jjpQA\naCQ2hl9xTTt4GAD604HA601o9xBzUgLuyTwc0/ftX3pyrtWk2nqelA0RON2CRShvwpfvZ7UL\nnb0/OgY/+H2pd3txQrfLjGaaoPU0wFY5+YClHbmk+9kUgXncetADtv40Z6AUgU8EUvKtwKGA\n4Luy3akDDpTVcoDnpmgsF57UASKwU9c06NgfvVC3K5UjNLncBximgBnG7pzS7h35pj+nU01V\nbr2oAmznGOaX+LJqPcVqRsYyKQCbt3ahcn2pv3cYHNP5VeRxTAI2DKT3zViIALmoo8elWreM\nyZGOf880CJYYQ+ATkV8X/t8ftJWvhbSx4M0K8P8AaMwY3kltJgxjH3TjvzXqf7Xn7T1j+z74\nO+zWjxy+JL9DHbR9RGMcu2OnfH0r8fPFXja/8Sapd6lfztPe3cjO1w2SXJ69fpW8Ikt2KWra\nk7TZLBnJ+Zic4z/PrWXM7O5ZeQvAY96jmKfMpfBI5z196rrLtOVBKHC8mt0rGDdxP4SS3Pt2\nqB2kkkUnkH5c1ZyjKwUjd0xUTrtVAG3q/A7Yq7CGbCzCEr0OT6U95DHJuBXHQU15GWQl+QOA\nR3qKWUHbHImxs5H0oYEr3BEDKcHuFFVmnG0H7rEdTzSiSJZGZB143UxnM2MrwOlACSYLAH5z\n/Fipcv5ZRGLRZzg9KhjZUkOQWNSRyIIyQTjrtoAljaKRgqhldjyvarV5YzWrFZdqORu/CqNn\nNvmTYcMxwGqaZZJJgXm3rkAljQBE3lSR4wVLHlqda2kU02GbKkcIKnuJN0ytEolRfl2+9UW3\nK5DKY5M4GKAEZUaZ4yjDacAVbXa0LFoyFUZLDtVK4kKHO7kenJpy3huLd1D+VHj5gRyaQi6k\n6/ZXYt8qkDpya5+7dWmldAWRTjmtq81aJNNSCIbS4AG4cn3rnriJtrRANjdk45JoCxBIp2gZ\nG3+7UcaqGPYLz9allCxsSgZhj8aZHDutmZ2YZ6cUxDY4yJC6rhX9+lSiRQzYyTjaajZsKoC5\nPtUxjEduQThj6fyoGghRo1Kj5RnANTRxuuGZgR3Woo4g1uWc89QDU8mzbG+7AYDGfX0oKJwB\nCD5gGOwo+9Giq+wA5/8ArUvlHdgurE8n29qduD5DAbx0XFAhjBXhYH5m649KWOMOUG4u6dU9\nTTk2xyAkYdhjFO8vayDeG9T70wIhlvm2/KfXsaSQ/MWCHG3Bb1pzKV6yA5OM0SKVUr1HUnNI\nZBJiSJNqfInLGpNwlmZYgcqOaXaU3EKQuORSeSJOWJDNx8poJAQ7WEp6nq3YU5szTnliq8Bc\n4z71HtCKyk75M4K57UN5k2AT8oOdq8HFAx7SiPK4I3cKfT2r0T4P+EbjxN4gsrKIO8ssyqRG\nOuecV58qi4uEXbt6EM3+f85r79/4Js/B0eJPHT+IbuDfa2IDrIR8pIGCMdOv8q46rNYbH6K/\nCTwPB8O/hxo2i28Kw+VAplRT/GeT+NdbyefQVLcHcWweM+uT1qHcdoz0rKx0IRscnvQuep6U\nnC85zS9QPShgBk3MMcU9m4NNVl+hpccnjNMYL2OKfwOSKbHnucUM59MikSO75PSnbgVGKaq+\nZz2pW+XpTKFVtuD2oZtvbJNCsWXBo5NIQA54FIE25yOad3zilJO0mgENVRmlK8k0K1G35snp\nTGC8dBTs8Y701sjmj7wBxTGPBxSZy3PSgdqXd7UiRPlopN3tRQMww3TJx7VNtBOBxVXG6TPa\nrQ5WswFaMbcUEE4C8UhyRtpcngd6lALx0NGaRvl68k0KpKkk0+o2G4Z9Kbyq9adwVxjHvSjk\ncnNHUQgPtzTCTuqQ56jrTR3yPrTAdu/i70gb5csOaUH5TwMUgcswwOKQAo45NBG1ee9Kw4pu\n4jnGaYC7QvXrQpC5JAzS7vl9+9JtyxI9KQCcAZI5NLuBXGKM4xkUueeelIBw+YYzxTV5GB1p\nTiq8jlGJ7UwJGdTkjApYj83WszzD5vPPPSrse7vwO1AFpm2g8c1HnqB1NIWLHpxRyckY4p3A\nZ5ZXqOaGXCgEZqU5fvzR5Z9aBCxqRjaaexxnnJpq5Wlb1ouIO3JxRtG0t1NJuDcEZFJI37sB\nR3pFCrnuARTpXG3GOaapLewpWA3ZPSgBAzYHFSbqQSLj0qBmHY0xE6k85NDNwoIzURb5Qc8G\npFbODQAD72SKQ/lmgn5uvWk+8eaYh7Lt4B57Ui/jmlchsY4NMXdk80mDF+vSiRtqErwKCwAw\nRzSHkBfWgENjY5yTUxNJ5e1eMZpwVtvNICPd1A5NCqPSnKirk0px1AxQAv3TkipY3GVGM1CG\n3U9flzjkVQzg/jpp5vfBEzAbirDHoOepr4p1+2IlkDfwkj6196/ES1+3+D7+MAlWQDA9ex/P\nFfDPiyLyZpgxztYjOPQ0yTzPVIgzOV4WuZuogeSce9dVqTAs6r0Y5rnbwb4XVV5z1qkTYy8l\ns4/Osq6kCtgrgk4yK1mVsHAGPWsm6kDYUCtEQU23L14GcfhVaQeWG5LLn7pq/JDuiwxwexqt\nOA0IUsCRwDVEszp2EwIA2kDpVGZNynDALnJ+vpV1o2jYSEY7VWntepB+XrQIqSxttGfmHr6V\nUkJVdwOOcVedjjYOneojHHI65ICrxxVICjJGWXJ+9ULKRMJCc54PtWh9lDMxSQe+6qckarkH\nIHXPrVisQtnexXLBT82OwpryMqlxgq3T1p0yhcSB9meOB1piIzI5XHHvUgRtJ5ceHwc9MetM\nUGSNlcY288U1lZiBt6jOfSnSgxlWVuwyD3phYrtauzFscYzTUU+WecnHQVdN0I+eMEccd6pM\nGDbifLB6mmJDNwO1guPao5F8zhhtWnybolGcHH8I6kVHtZlLA7hjOzPQUAhGRZI9oPXjpSsp\nihyyqqjjNOBYDp8ucg0zAkUg/OM9D0qgK3mhucbccH3qRGb96SvyqOOaQx/PnaAey051Q7jn\nDHqKAI3RWjOWABHAqE5XAPDqPve1TFFaH1Yt+VEkgwQRnb60AyNVVlUM2Qf0pI1XeyqPn6c9\n6ISEAYjmkOPOJP3uoNAglVxyOGHU1G0gfbknB9ae26YlgcAjpSxruYZHbpTGNaLLFVGAB3pk\ncSBR2cnGM06Mu2Wf+9gU+RomIcrhxxn3oAi2ngE4pm4szhckDoaWNht2kEkHk0RblydwVT1p\nMYFg37xznJ5odo9xG3gDIIp3DcEAr2IpmEkY4zjuTTAjU+TGG65O7FK0m1Sg5Z+QfSl29T1A\n6f4UmC/zFgR1+lAhu3btwwPFEg2srHk0qR7VOcHPIFMf5iMnaM0gFkz5bMBgdaTnjsG6E0rS\nZIVW74qNkwAXb5s/d9BVAOxt4YcUsbBRKpBBZuCKGIZshiQeKVJNuT1OcAUCGs3Uqpxnv60s\nn3s9+hpwYM3+yOv1pjYXJz+dAhCx34X7uMURt+8VfuqTy1CgbgrLwedwqNVeNiPvkn7vtSGa\n67fmw25eg96y7iMJIQPXJFTRTZB2HpzUUkm5i/rQMZt/eFifkA6Gmlti7ivSlkkLBeMg96Rd\nu0rJ61QCLtUZUHc1KzEZU0/cSOOoHA9RUSqOcckc59aWoC7jGmCMg0q7vJ4OPam4IGGGd35U\nFA33D2oBjWY7ozuY87frS7vmyOcdqadzMNp2ledtG5cj1PWlcRLztYEe9NKdNvzA8mkOdxcc\n9sUqr/CDg460wDanzFS2Bz0prc7dztgjNSjYq+WvPGGxUcjBVCgbu1MBAoWTAO9SO1Cqi/Jj\njPPoKj5XAXgipSS3zBgvY49aQCLnLkDePX0pVCMv3+feol34YNlCTT1jGcdcUxrQdKw2Ftvb\ngetRsy+u091oZlBw2cZ6etHHLBep70uog3MG4Py0ZKqA3QnrTFYdHzk8Cpd25wM/ItMZGv3i\nWPTpTjKcg4z7U1ZFbPODnjNO8w7wQBg+tSIG3RtuY5z6UvyqvALjvijcdxPQiiEGNCN3zNzQ\nUOzsXOWPv6CkkCqo53A8ikabAVc4XPNI+YycjK07iF8tmYfJk9qOdzZAVqRfMjYtnIHOO9Ny\nZGOe/wDFQIcPmcDAAxUbsZG5yMdqezEqCGweho3I0gZidwGAB3pAMyFjYfMM0pYeWuQaczEn\npluy0oYj5W/I9qYDQNzcncOwNG0FTuGfUUhXdhlbAU0pJfdsPDdzQIRlHIXg0vmMZI8n5scn\n0pOUhUHl/X1pTkqoGAvTPemUJ8sm9s/dNKo2xkDoec0nDZB+Xb6URsDwB+dAxWLBgyntyKi2\nkqATsbqKWRvMIAbGD2pTlkGTg5/A0hCNu4Udc9acqsu9c7Q3rSbvuAcgHpRJu+Zs8D86AYzy\n3XGRgf3qk8yP+AZ44pOgG7ODz7U05bkAKTTAejHdg53YpDJyMbQPQ9abNIqyLnIccGmuo2ll\nU8t1oEOjA3OgO3cOGPY0mR8m7PTg+tCqCxxzgcGmKrSAksfw7UASI6vIxyQMcimyMS3Q9MCn\nSKOMHDbeTUbcEMeh60IAZ267Rnpj+tSKNuCTyeOKZ8xYk4AHAB7CpNyxYOBQMjk2byxJ3r29\naN37sFTksenpSyMGkORjI4akjI2/MOezUCEViSdw24PWnNndnfux600kgEFdy460dVVAPm9a\nfQBkkhZQVHzE4zSrypLP847etO4d9uMbec00sH+ccEVIEn3oweDx0HUU1Qy9DlCO9OXbEpYc\nEjJ96btAVS/C9aYajvmdcM4bjoe1IQioFztwckUnDNuXJDfwkUfLuXcCG6YoADN5jPht2T0x\n0pWVW2kHYV7etJxGxXPOOwpG2/K47DmmA5mXJOT6j60LJ8wc8HFK37xlA4X1pq4VuCDzgVLG\nOGfKyGzzndTUZ4/l6nORmlRQyvliqikZxEindkgfWmMNp55+tJGy7mGOD19aFYNx1PWnMzLg\ngBvcUCI4+MgKQw9fShZQFVzgrnGMU+MAKTuyc01VMhOAV28jI70CEZTzsOQefSn7trhsszY+\n7jgVH97G7jA5FJG4bGMqM9KAJOGwiDczHOKSRPmbDfdO3HpTmkw2TlU6bR1qJmbzTx9WoDUd\nIQvynHNIzkqo6KDSspk57Ui7CAOpz3oEOUq289OOtCrmDLEsetAjwjBeFJ70gjZMKWBPrQLY\nTlWA2huM0FlZSxXAB6UPhXOQVAHUUn/LMAt8rdeKYxf4NqYBzmnc8qhD880mNoCoMkUpjVcl\nDwTnjrmkFhzsIyGb6YBpmDgjHynnrQAN3OMmnqpmcBj93pQGxHtHlrubBzgU+SPLZ79KbtXc\nTnkfw0NhsEt8o70DGney/dwR+tOOWYK3ysRjPvSSbsKwBAz19aQ4+Zi2TnjHXNMY7EgbjaWX\nr701izqyYEWOv1pG/wBYGccYwfc0GPcQPvL3OelBIHPknnJU4pNxbGenTimq3yuC+ctxRyrE\nKp292NAWJtzL8rEEdqZtk429jk5NKyiSMAdulM+7HyTnOaQrD42YyNt+T1pzHzMFRl+hFJEr\nydDg4yee1MaRGw25lI44FIYu4/xj7v3cU5nBwG5B6jFB688L14pWfLbiPpimMRSdrAH5Rxg0\nOpZhg8Y/AU2RgoIc4Y80JllzkbCKAG/fXGTk9xTosq2RjpQxXcqxkg+/Sjb5ib85z6dqCWJu\nO3d94dxR97B24THNIIzgBMg5p/KSZz8pOMdqCgDBsNtwtGOCFYE+tO8sq5btSPEFJAA3EZpi\nEWQNtB2nHVaG37sk/hTFVJI1YuA+Og60vIX0btTENUBuXGWXvT1YOw3HGKQruyc5YdQKdIyt\nsCjLd8dKkNhuxQrb+S3T0pAxO04wq8dKdlpNwAzznbTmZ2XZtCgDIFADOrFx8qjrTvLFunmK\nMhuSvpQxVo04wV5PvRIzKzOX3bhjYBTuUKuZFIP3Rzupka/MMHBY9abztwOV7ipI0Ro9wLH0\nX0oAa4IXdndtOKdIq/KfxIphPzBex9KczeTjPXpmgBGTdgocknv2oVtgLFSG6EmnrG/JBGM0\njL99N25qACRVwpY59KI2QblA20SbiFwvQYpo3NIpZTgDB4oACrbt55xwKTdjLPnJ607zQW54\nC+nem4+ZihzIRkD60hWBc7gG47gUnmbS5K4Hb61J5jFVVuCo5A60yZfunPB74poVhEIkwSSc\nL9MmnKxVVZRuz1z2p8zBlCbcRL1PqaYzFtuzLUih7KNxIYbW+Y4pnK/d784NP8tfJOFwwpqw\n/dcN/wDqqkSxG5U7cHPejeoGTzjFOVQrZKrs7Y60m3KkoO/3Wo1GC4WRiejfkKXsAzBMHg+t\nI2V24TB780sm5VGV/eDjmkLqK3+vPzKykfeFDb/MyflIGNvp70kgDMMexxSspfeo4TNIYkeV\nPyNk9mpsa7nO9cr/AFpW3ySqAynb780OuVI5B70CFjDbdob5x0HqKkaTew4y2ck+ntUTMHjU\nMcc9alc5Y/wso5+lADGXOQWyT0pz7tmWHQY4pittYknPpT1YbstwMce9AxsjKjBk3M23vTlZ\ngoB53DkdxTmkBXcXx26UzllBMeSRnrQMXcI0QbiTu4PakVTyWX/6xpyr5mCWyo/DmghlYKp3\njuaAGKpyQ5yp61N8sa4ZfpimMo8xVLZ560rZVyxbcq8UAG1SqOvPHNCs3LDg4pGx5QJJAoaN\n4wGH3WGKYg84jDDDDp0pwxsDEcr3FRqo27A2CDzT1+bIXlc8j3pAI4/ceY3yAnGPU0q5dcD7\nuKSfcVBLfKOOelSLGnlw4fJ6sKBDm3fKzBWAGM+lNVcIBzgnOaRflZx05xVjaBKMLyF+92oH\nqRLH5yErw6cj3FQx5aNgRyeRUzDbIHyQWoX5MkNQISPaNgPB7cVNMU8lwefUUAOVVCvX5g3t\nSvkD7u2Mn7x70AR2sIe4jQjEeMjdWjLII5Cw4RRyetUiSuF6H0qRZF5cgrxgGkBAsxjZgq7C\n3IpB5y5G5QrD9anWF1jWTG4McYNRiMQ7jyzfypgRt8qgE5J4P1pHUqygEsMfNUuNnBGB9O9N\nhtlh3BpfnzmmMQOFLK7bY8fd659qTllC+Xj/AGfT3pZNzOELDdjOPamxN++JJOMfdNIYrOZJ\ntuM4GCKFj2ruBwOg5pwynmufkAOAO+KaiSKOBgN03UEiLIEQHJIbn1pscxVcBc5PGRSqzqxz\njGcelOZmDc8Acn2pi6jg24n5cbR1qRdjKMHDY5oVfLyC4Zm5FQtH3XlyefSgCxtB2xnmTsRW\nhDIBICfm2jBAPIqjaq6hmT7w6BqvRzLtEijDtwRQSalukm+McEn5hj0robKPbmTNYVjhc7mw\n2OGHb2rds45fs4Yqy9tvr71LGdBo8gkkBByQc49K6zSVM1wzbgAB+Nclo8B5XO045Wuw0a1R\nGX+FsdKyZodPpUJkmXBytd34SjZrgQBdxP8AKua0NFgRXwDgc133gm3WbUmkIwiDcT+BqRn2\nd8J2tU8H2UMEis6J86qehPODXXSYViByK+YfgV8WbPS/FtzpeoziKzvHC25PI8wkAZP0r6dk\nX5iQcioaLGHGKORg4pGXbzTshlwDxWRQvPrTuNvTmmhcYpygmgAD/LgDmnHO33pvReo3UgJ6\nmgBW+lO3DsCKBnhutHJyMUymG7I60EjbgDmjbt+tJu2mgB+SvPUUD7pNN525zxTgQelBIrfd\nyBSYz0/GkOaX+EkmkAhkO3B5pj5Yehp2RtBzSEBs0AQmQg5JzTklAPPenlBUTR7mxikBMrbs\n+lPyMDioQu0Yo3Hj0pjLCsVXPWhc8knrTI24qRmGcCl1GHXIxSqeMd6aHA6DJoUnqDimA/B6\n05iNoJ60xWbPNP69qAHNnaAOtIuecnmg5z1zR6+tIY4dOlJyFxRuIpx+70piBGwvPWkOce1L\n6nHFIzZ70AG/oMUpbcwP4U0Dv0pA3oKAHn5vpSjHAOfwpu4Ac0cnJFAw+50pwY4qN2O7p2pu\n87aALC5zntQct1qISELUm4kA9aAA+/SoXiDtkdKlJ9qQfdyDQMbDHtU1LwYwKbu9RR5ny4Ap\nCHZ4PJpR8rbu1Mz8nNKrZpAL5jdKFk3Ke1NK/LjP4077y89aYhVbg8UBiOhpu0qOtJzvxnBo\nGS7vl6cmljznByKarFeDTtwLDmkwF49KFwVY0bcHrxQW5wBx3qgsPDbUOKa2WUAHijHqeKbu\n28dfSpGH3c05Vyvr9abu6Z5pfTnFMY1Mqx5wKk3dTUTDaeafk9/u0wJFZd3HSkBpFIjwOtHm\nBicDGKQDujZBoYlsMKauBznFKvHfikIXHy9c57Ugj+bnikYlWxj8aTzBuHc0WGLxuwBilwRx\n2pu/5jTly3OaY7C8LigNR68ZoUFV3GmIQkZpyttQZpudwzjBoViSc0gFJJxgYqdV3qM9KiWP\nf83SpY1ZyQM0wJ44N3PboPrXnf7QHxz0j4E+B7vUrqeI6pIrLZWpOWd8HBI9BWl8Xvi5onwW\n8HyazrFxGrnKwwswDO3PAH5V+QX7Qnxz1v4z+NH1fVmZbbJFtb/wovbjpWkY9SG0jlPjN8Sd\nW+KPi6+8Qa5dNcXUzZBydiLn7oB/zzXn11ceZtKsDH246VsahcR3kMauin++3f6VzrKqwt5c\nZK7unoK6lorHPdvcZu86RpAPnHAyabdMZAjbSjL1x0p1vHvk8pSXZz3p96vmMIydgj6470wK\njnbncu4MM5HFOC/6OGyW3H7p7UMQFPz/AC9gR0qHyxJGCHK/N1FMB8s2ArE/MRjNF0qxTIrH\nLBc7s84qGVBhc8gN2706aVGYEcn3pCIW2GTO0qeoyOKbvOBzgHrRJI5YbWzjPFNjUfMvl5kI\nyPrQFuo64jZY0RW3c5OO1PZTHtAQGMfxD+VRKxXMZ4kxktSqRvGz7wGTQIfC5WZZFATB+7U8\nlxHI2T/3yPWo4pIokLH5yx+73p0aiI8MofOfLNMB9sy28c0btgFsqw9fSgs0kmZOUXhfr60z\nzBGsgI4Y7jn1qwqrsy3CsMk/0pAU2ciOWTCllBAx3PrWZPNIUiBHzYzu7Vs3EUSqVj+RyP4v\nSsa4hCxdW3FsdOKYEV7vkkjn35CjG30pnmMYWGTuY8nNRsv3yz5C+h701lUcCQlSuSP6UwEX\n92G2yc+lO8w8ADcmOR6UyNlMJKpxn739KSNhGxZuG7D+lAWJQVj27D16560u1t21m+X19Kjb\n7wVh8zfpU0cZWPcMAg4Oe9LzDYevAHGVB61YyFjYMivHndz2pvmGRQzR4AOMetSRswV4wFAY\n/WkO45OQVxs5zuNOk6Kytv3HG4cU7lmKSbcY4ojBWP513DttpjCMZkLOd5HAX3qJrcFhkYYd\nVqwwRV3blCL129c1GJBv+T5mJ6n0oGQs3z+UE+XHBzTwp34HPHT1qPywzbgdq+/WnQv5jMqq\nSp7jtSEKxLqQD8zHkZ6UyNVyPmwV6mpGhEK7+cf196bIwk5CrnHO2mBD/rFL4wu7IJON3tUs\ngEjbiWU4yQOPwp0ypJCI16H9Klghi3KjfvCoyeetS5WHY2/Bnhn+3NSjtxmSWbaEX3Jx/LNf\nuB+yT8LbX4W/CDTYxAIr+9jE07dCc5wMelfmr+w38G5viB8TNOa6h32NuyTtIQQqL1ABx1Iz\nx7V+xSwx2cKQxBVjjUIqgYwB0GPpiuKT5mdKVkKz7ucc1AZDu68U8yYBA5NQsu4HOc0itSdW\nDcEU5vlXpUcX3duM1JnavNIY1WO30qRG+X3qDeWxxjmpd+QeOlAxcjoadtJXjgU1mHGBmpFO\ncCmIRTtGKczDjFNbCtgmgg9e1JjFHTNO5xSDPHal5z7UgBW9qUNkntSbgvHWgY5NUA0ngkCn\nA/LQevTinL/q8Y5pdQGnJxng0Bt2f1pSDuGRkU3OQ2OKYDwwWlLbsYFQ5DEcYqZVP0pgHHpR\nS0UWKMXyxjgU5fk69KMY4HSkY5GKxAUckmjfzwMmkVdpz2pW6blODQhCBhnml3noOBTV27cH\nrTgmADuyKAGNhfUE1JGw27QMmopDv6ClRSuADQIft7nkUSMWHpQCdxPal6rk0w6DW9zSx/Ly\nTxSMo2+9N+6Mdc0Ah5UsPalXhSCaUH86Zjdn1o1HYbyeSKfGVyTnFGT06CmyKQmBxSEP3c4a\njgfQUjEbeR81J/CMjikAN3OaryKfSrTR8Z7VEwLEEdKYEf2b5hkZFTbQMAc09eFGDS7dvOM0\nANwQtGMLx1qTOc5FMYFV3dqBBwrDv605mB5HFMz3705VG3rTGPY8Yx2poUrgkik3cZz9aV/m\nNIBSODtpI85HIxQDtxSKAufXOaAFydxHbNG3cDihuKF69aYDdvX1oVV54p5O3nFNVW69RTF1\nBVDD/ZFKp2jFKzdgKTbhcd6TJAYRsnnNJxknP0p23p3PekZQpB5IpjFVh6U15Qo9T7Up9ulR\n7QWz3pdQsCsWXOKkj5bmjaVXjmgfK3SmIf8AxdKcx7dBTN2Tx0pHbaetKwD+1IO57VCJt0m3\noKlVuo70WAVfvZ7U9W20yQ7celC4NMZX15ftXh+9hPV42r4X8fWYt764ibkhzlvxr71aFJre\nRZBlCuCBXxj8XtHa08QX6OQWDHp7dqOpJ4Rq0K+Z8o5BrmrmMeYz5OFOTXV6xbssjDoW5Fc1\nNCRKyNznqKtbg2ZiQiaSVduMEjOawLpWhmaI4LL7V0KFYJiwOBnnNUNSWOSR5Bw2D0rUkyTI\nrL0yv61TkCqGI+6exq1hoYWbG4kcCq90zSQruABApmb3KGC33uVqK6U7QFGRmpriTy1UKA7H\n+Gqx+XjoM9CaCepXulZdw2hRjPNUmRY1G0ck8mtG4hZ1AY5YjP4VntllUtkJVDIriTyYwycE\ntioJsbueXz0qy+RHsU7lbnkUxbdJF65ZevNUSzPuY92VYYPYelMVQY1HIPc9qvalZtbxq6Nn\nPJGaoeYdpO7KZ5oEMl+Y4AYNn9KZ99ufoAatHLMCxzxx7VWk2s5UHJA+9QMY0e5Qu0jafmqr\nNtk3AbiO27pVzcGYAuS+MgU2Qt5ZZegPJpj6FCQN8m4/LnGaNud4Q4HbNPZj5m0LkZBqST/V\nkoec00IgkZ2A3+nOKhjzlkQnHY1LulEzZZTgcUyabcqbRtLcFqYxufLwW3Erwc0mwM2WOAea\nXYTtDHIPcmm7QxOOo4FBIjKynAXjGd1MCEsCcMrDFT/OJOp2gYqPb1H3Se9AyM/KRxhRxTSo\nbd/dPFTNhoVTcCV5NRCQhSMZBoAh2kR8HIB6CpVy0m0HO4YpzbPlA+U9qZ8qjf054oC5H86q\nyZztP5UjEM3v+lOaMbuflLc7qTcq5TduOOtMaFRg75xxUcijaeOAcmnBSkZwOD70gXdJluUx\n0p2EN+WLJVSe+aWdfmU9c88U/cNuD0I6Ujbl2gMOnegQiA7dp4HcVEyrtkCNnHWnNuVt5Jbt\n9aUgYLlDyMbRQMhERbJJ+QcYp7Iq7QDxjpRGfmIOPUikJEhOPl9M1KF1GMhPXoORQqLsXdyf\nU04YiUmTgUjYaNW3bSeAtWMjmQdT+7I7DmlTHHb3o+6rZPyCkZQ8IIyBn71IB8aszEopY96G\nw2BIoUnoKasku4ANtGelSbVbl03YPBoAQKNpB+YdBUXllsru2/jUihY8lj8uaTbGuWHzelIB\nMYUkfwjmmL82cHilkZmXjhe4ph5mYgFU65qhkwbdbhTwAaiyhjYsxAzj609iH+YZwBnFMkYL\njbyp5+lBIArwcHGMDNBYnAK/N/eFK29sHIK01mPQUAOfO3/Z700Mrfc5XuKdJncoxkYp0Y+f\nJAVjx7UhkWwMpC/nTVAbGBwvansvythiMnikZuBgbMDGfWkMVc+Y2Oefu0h+aYg4Hcik6chv\nm6mkJWTrwTwDVCsLjuo25obO4LtKr1zSnKheBxxxRukCsSRgc4NAhCvlgt1Ddc9qj5VAF4Ge\npp6S7lOcnuab5Y2/M/JOdvpQOwHKqp+8DzTV6szcrjgU9VKA88AfLShwqrxuJoBibgqgbevI\nJ7UxgQwJbdzwBTvmdSGTgHANG4rMQVwgH60gHSfvMjIXjH41Hu2r5Z5PelZcEMB94/dNNIKs\nvy5OM5pCHxKFk2YyMdalj2Ou0DkdagbcsgCrg9asKw4K8ZpgR3EIiTLcntUMall3Y/xqyzM2\nFY7sHNV2UqS6nGTyKVgHKwZcgYGODTVYrGRuznotKysq4xletIqclx0HrVDHMuJlXOCy8mmD\nIjOOB03U5ceYSD1HelH7tlCnJPXIpaAIijYChGF55705ZNnzyKCG4AHWmNhYyuN3OaVVyQT0\n7CgBGXy2BXPuKbIPQ7ie1O87/SGfZx0Ipm0NwxJOcimAhZONoOR14qRmDAMvDe1KrEtxznrU\ncb7lfdjav50CHLu+84zjimzIu8BEO3ruzS+Yir8xJyOKGysS+vWkMN67xu+7j9aTcAxXODjt\nSs29s4yOppTIJBvVelMZDv8As8ZUoWYmlIWRiM5KjIqSSU/xH5fpTYQ00mSO33qBCqW6jA4y\nSfWmKp2lnUjnP1p/3sr/AA9KRVKkqQzn68UmAbC0RIwV60Nu3rgckdqCrJFgJhc+tC43AjoO\nKYhzYX5dmAO9IS23BICsKMN8zE7V6BjTW2yMuG3etADWi8mRS4+XHNAXLuQMqf4aViZuH454\n+lKp3DKnaelADRt3BEGP96nNliGUfMOwpzN5f+tToPvCmhPMUtyB1FAxqqrqXUhlzzzSPtDA\nAHBIFHLEDbgGlZGV/KxketIAkUZJIO1SPxoZw7KFUsjH8qQg9GyVHvT1Z2VVH3VOaYhkivGW\nUjK+3amL+75J4IxT9pjkOSeTknNM4aRgVyc9aAHBScBsAdM+tI7/ADLGq8Z5b+lDb5EAHrn6\nYpRH0+bqc0CGvnlT8ppZlVAiH5896dt+Y5Xc3b3pisCoJGcHG2mUEknylQdoH60LJ90FPnU5\nzmlkjTliu0/3SaTju+TikIOFbcOFY85oZwvVepwrUN+8XEYOR600qwwc7s8EenvQBKyldoZg\nc8ZFMbBz8v3fSgdNvRlHHuaUluSw2gHH40hisnlkEZcEZI7Um4KoOwZbgDHFI27cQDhT19KU\nBpFXacKvY0CHNv5Krx0OKazBcFDkdx6U1g0Y37/cgVKypDjB37hnNMZGrDblBtU96VvursJI\n6nPSjaRGxB4PUU5pAMYbAxjFAhkih1DD7n60bstlhgAcEUbdzjn5f7v9aCu1iMcUDE/ul/3j\nZ4xR5a4OCOeefWlMkm4LhR703yxGhB+b5s0EXHbTIwDHbgdB3p5w7EKFRcde9MVdxDKMN3ya\nbGrEyY+Ug5oKHSsFVVAxzktTVXa3zAsWPBpW2yBT1yeaOcF15AOAKBdRWjkOQw2r9aN27Bwd\nuMAU2Q7uclj/AHaHk2KATz14oGA+b5MZ9qRW2kgZz3qRiIzv5BI7UiMEUDaWZ++KBjFddwIX\nDA5oMY2uTu+c54605ZMsmVAHf1pC6SH7xRT0NAhXx5aA/J70sirGVQHIbpTVZFbB3SL23CgZ\n4GMqe3pTAMb5CpJFIqnecYVewNO6LnIA6Y96RmPmBuvH5GgY1mVec7jnpSsnzYKnJ/hFJkNg\ntgsTkY9aNzK28PluhFILCwwF/kyPl5wKdtKW5DNwT074oX/WnHygj71NyI87mwSMHI4piA7o\n24B244HrQsfzfNwxGevFIxbCIHyx7e1I0IjXhiT6UmLUUq00h3NhAM/L600MJGJPYcml8wqo\nA4z1FK/+s7Fe4WgBrHzExn5c+nNLIgVVBO457d6VXZnI+6AOPenK21t3BOPl9qRRHtMcpfII\n6cikkkLbc43f3QKe0m7DFd6ddvvTWOecY9MUyWSSfu5VROpGS1Nbcqgt0zgY702JAq7mbnOD\nT9rMvJ3L2oABmJmwOeODR5gkfaFPHPPrQrl3yw4C4psjfKCqlSB1oAkhzICSeO9TbQFGSCar\nrJ5MYO0kMMsR+lPZgcfwjHWgoTYIo84wOwxTZG+UkDk989KX53TaT8pPWo3JHQfdPC0CHnnG\nzhhyT6+1Chmy2FXPVaAxkYsflxz0pFw3zD5WY85pksTaFmGx8kDgU8M3knn96D92mlU4CD5j\n1NNVd+SD8ymkMc5Mk23b2+6BTg22Rdo6cYNIZWNum3iTO3d3pq5C5xz0znqaBjkO6RsDGTjP\nvTRlcjHyg8kUbvLXP3Rnlfel3bVwOuck+lMBdrTbfL+VMZ6UmVZgxVcr70ixt8zF9o+tDLuG\n0jDY647UdRitJuXJOM0sWxSwdudvBxSeYWVcgbVGOKVV8xc5Az270MQ1ZpDJuBATFIzluFB3\nEZzmnLIGymMEcYApp+8V53dPoKQDo2LQ7tg20xWEjDH3uwp6s8cBUDAzjHr70LlY8hefegBr\nZxtZcHrkU/c8m3jcV56UkaHALk5P8NI0cnysp25HY0ALIFKjqDnNK5PGB82Of8aT7sIjIJkz\nkk02FsszEFsDgCgCSPH31bcmMVEzZjyqkDd0p5hxHuJ3K/BPTFIuEWNVy3bdTJYi7FJboP5V\nL5bsyhh8rfnTGXaGyNwzxThJ5jK7NtYDGKdwECLGSoySp5FJtL5+fkDNOmbaQ6vuP8XvUeS2\n4bsEjOKQ0Kyv8rEgDvRJlSc9CcjFMbCRgs3y46mp9u2PayknHB/rSGxjMFx8nzetLt+Ugdc8\n05cYUHlqRYzHIR367aBPcQKCu4dAcGpHh8tQE53ds0i8BspgH+HPWl8syTBQpzjp6UAMaJtw\nDHBxzSs27OEbAXija7ZXJKZ496c7Mo2g4daBkbSDbGwG3jAPvUjLlk3NhQMbfek2p5oXbnPJ\n9M0MArP5j8ryBQA9VLNjGU6jNDEbdudp65peJmcpw237p9qiX+EgBzQMkRgysSnOMAf1pFjT\n5s5wDiljdld3Y7VIx9KaZDztOB3agQMpjjKsOc09vMYqcnHTb6e9DEBFjHOeTSuwyGG487TQ\nSRsvzEEe27FPhtwozIRtpJVZiSudvdQelO43KTwvc0uoxrfvFLsN4zhaRogCm04AqSTPVBsP\nWmM+1d4XJ7j3pgTj5S+MFsZ2tSLuEZfadhNRqw3fPlnzkH09ql5ZRhsAcUDAgMw3Einr5e4q\ny+4pmTGuMAoDSyMxXaTjuG9qBCKw2knd6AUjy7cFiw7U5W+bDZ2noe9ORiGbCgn1agT0Gqw2\nZIPB/Gl8vcFXdlDTVmZZNzYJPGakjY7w5GB0xQMf5hU7Rnao/OoWGWB3474YVIqOrNuI9R9K\nSa4+YfJuGOKAsR8NDkgkk9O9K+W4Ayc5560qsu4swO7sopvlgEElsk8n0oGIyhRvfhycCnsr\nlcFV2jtSM37whzlV6Z6fWnO29QCfmx1oFzDXB8s/JuFQ7pFBUHIHNTNuRRsfjqfel8sHcYzg\n+lAtyudszHCYAGcU/bE0blpTvzjaaa3yzEqc8Y46imyYeE52+Znr3qkSSRYDKCAT2Y/yqWGQ\nyRqu3aOc5qBYyu1gcj+dSom9l3ZA9KALEbMhALcD0q/EVZAiJgHnNZ7FfN2kFewq8m6Jo0V/\nmB5pWGzTspTEpBTC/wB6uns7rzIVA5QLxXOWkYZvnPfJrpLPDjai4XsRUsR0/hld5851+UNg\nZrrdwkcSQjoK5DQVeD7zkhj8qmu00yFVlQuQU7j3rNmiWh0mghm08Kx+cnJ9q9H8IzNpWjaj\ndyFQ8cJZWc4X2zXDWKjKbBiPNb/jq5Nl8Mrpfum4dY8A9s5rPqVtqcfovidzqwYSbUifKmP1\nz1Br7y+AHxOTx54W+y3Vwr6rZ4Qq33mQDg4/KvzTtZPJjgOSsq4U+/Ofyr2j4R+Pr7R9Xt7+\nxkZLm1IaWPdjeoI646jit3HQz5j9E9x28A/570hf2rn/AAH4tHjDw7DqiBQzja4HTdW+zdeM\nd642rM3HBt3BpzFgeDxUS/OMjpS5x3pDHsRu5peMZBzUTN27Uir8xIOKQFhSfWjc2761Gsnb\nvUitu46UDHc5x3pevPUVE31pVJXrQMk3AduKQseo6U3PrzR1HBwKsBzSEY4pWO5femZ7ZpSx\nUVICtjGKAeKZjHJpePWkSO4Kc9aaGx60egpDwTQApY/Wq010UkGF6VP97tzUEkJYk4qraBcm\nhuFk6Hn0qXzMtmqUcZjff+dWkIboakdyVW3Dj8ak+9zUQcLxjmn59KRQ9fShWO7aeKb94cHF\nC/eyT0piHbsNTg3NMwN/Xig+nSkA9m+bg0NlhjOKNo4pOoPakwHbz0xxRuGOBzTVOFxSbvm4\np9QH7m2kkZ+lLH93NR+aykjNNWTrTH0Jl5o25waijYsDUvI70ALkN1FG0N0pN/yn1pA3SgYF\nSq80K53dcUhY4OelL/Du4z6UCFWTJJNB60xeW6U7adx5oGOV8D1NOZzmo2U5GODQcnpQIe33\nTjmhfu5PHtRz2FLkEZI5oEHuOBT1bdzUe/5cdqUECgYrNyRihcck9aTdxTd3XikBJy3Sm/Wh\nXPpTieOuaQD1ajPJ5pgpf4uPxp3H0FZt2ADilyQoHam/xHApdxwRSELztzQ3Kj1pOwpzHcQQ\ncUIoTryTQzHoOlNPWg802A77y4Bw1MRyG9qXbwDnn0pu3bSAm3Bue1SrjbnHFV1+705pxkKr\njrSAkb5sd6Yy4k9qVW+XnjFNaQnAApoBccU4N1wOaYzFc0obPIqgH+YVUUc9+lMz8tSrluO1\nSFhN2W4qWOMNyRTYkUNU8KlyQPwoAcsJ45GKxPiJ8QtG+FfhG817W544La3TdtP3pD0Cj3Jx\nSfEL4haD8KfCt3ruu3scFtAMqpPMjdlA9TX5HftQftSa38dvEEpMslpokbsLeyDZG3IwSPfi\ntYq5LlYp/tG/tGaz8cPGFzf3kzxafA222ss4WNcenr1rxTU9Ya6X94TuXggf0qncTtJN5r/K\nT/CBVPO5sqxbaSze9dUYnO3qB+b5iSQoxn3qPaWkUl+R1UdxUk11vQRx9B94Y5J9qge6VZlw\nucDJP9PrVJEgLn7KzeUuJWPB9BVXzpD0bezckU+SWKOXzX6noMdBVe3mEJznIk4FAX6EkjDn\nPQDLccfSo9+UVB8qtyBSbjDDhpcqeo65pfOHGQDxjHtQHqM3FLjycYbGc9qrzSYkChuM8Njq\nfSpZJPu8HJPDe1ReYYlaLYM5zTC4rSBpGUrhu9JvZWEgHTgYqSTdHCEUY3cnHX3NMKAyErJ/\nDgA9DTSuSLIu6QZP3hSNiFRJ1OdtNhV8HcAX7U+NUWBxI/73rtpDsPmtpLSFpEQAueGboPel\nhmK7ZXYNLjj5e3rVma7P9nCE/OzLwPQVBaW6suwHc6jcT6AdqQFloo14l7nIbPr04qy1kEYv\nIGEA556Gqdxcf2hFJK5SIgfe9cVPbTHULVIg29nGPmPpTGU5p4mZkUEs3TIrM1OQLMqoPmA5\nBq/fSxrL9nVN2zhnBrKuB5chCtvJ/ibn8KBEBReXBUHP4A+tN/vbiFLfxgUjK2zcyYfPRaVQ\nXkAPzemaY7CrGEgwP3i9Tj1qNVA+fqG6A+tPRs7liG0DqWpqsy4bqfTHFDAlhiRoyzht46Ma\nkt43lUIyfKDncTTI5NzKwJCscVYLqCxYNjPGOhoGLbq7P/e5ICmpfLYTKHOCOuOlRtvkGFx8\n3TbS+QW2iMkBT85Y1IEvlryzjJ25P0pu9MqYyUFI2I2BBJA6N/Sn8/fI8tj37Cmhh5m5WSPA\nb+9ihgD5bAgYGCRSNhJGib5iRu3LTpgqEDHybc/jTEVZNsanLHOeMdKleTYIwmQT1FLho0UD\nDBjyG6j3p0qxecEDfMB971pAD7Npbf06j2qHyzGg2n5FYYx2Bp6qI5PmRirdQBmpPLLLIoXZ\nuOcd8UANfAk8kFtzN8pX1rV8OaPJqF1DBtYTM+FHT8c5rJki81dqnLN93HXNfTv7F3wduPiV\n8StJtzAJYbd1nmeThVGM/jx2rnqaGkD9EP2Dvg6vw7+E8GqTRst7qCggtgnb6/59K+kpF7ZP\nPvTrGxt9J02CztUCQQqERV9B0OO1J97J2kVym+4zbtZcDPNOC8sfehVIHrTs7ep60FDC21sU\nqr170iqd3I4peSR2FICJkJ74p4XavrT2jGM5pV+XjGaoYqsAtJu96DllPFNVenqKCWP8vJGa\nep/hIpP4OTzRuzzjigoN/wA1KpNJwvbNCseSBx2pWAeCrH3pPal47cGkVRxzyKoBzYpcjdkd\nKaetNyV6dKTAXzgHxninBs9uKrtg59c0rSHgdu9MaJ+PxpQy96apB5xRjLZPSgQZNFG6igDK\nRRzluPenZUrimt7dKTzOTgcVmxjs8cDikZj0ApNxbjOBTu3rUoBnH8Q5qSMAcHpTW+mTT1+7\n07U2CGsuPu9KQ5UdKlVRtxTFyOe1Ag6qcDtTS3yYB5pZCV4pI87un51QCdvWnBtqg7eadx6Y\npjLleGpdQQuMtmjcFbsaD90elMVNuXPSmNkgO7Pams3ak3HvQwDKPWpBj/MDADHNIPu5po+U\nU4ncemBQIGbau0HOaZtKjr8tOGNoOOaNu08nIpgKjZHApdzFuTwKTPy9KjWQbvmO2kBKrHbk\nnk0bjwOo9Kav3uaWQ/3RQAm35zzgU9eBzTPdqdtIzznNADtvy9OKZuG7inSf6sYNRRZ3Z5oA\nl/hzzTuNuT1pGBpgU9aaAcvzde1OZl5wOaYjdTjmnN29aTYABleTQrHGO1Iq7uCOaMbcg9Ko\nkfu3dB0pzOPT8aRMY9qj3buKQhf4uKXPy4zTN3XHFKqjbnOTS6linJb5TxSYDdOnrR1j44NK\nBjgdxzVEhuG3ApN+3APNA+XtxTiRtyBzSEHJPoRSS4ZetKv3eaRQu7Jo1GVdjMxIBx61YhVk\n5NSK4VduKafmYc8UwHHPI605CMEU3O7kdKRfUUxFuFgxGOSOcV8qfH3SzD4mvJEORI2Rx0zz\nX1RAyxtuP6V4D+0JpptryGdFwkilmY/U0hdT5L1uFvNck59q5LUMxksBzXca5G3mSDbznOa4\n7VI9y7nHA9KpEnPXyhsMOh5rPuF3NgN8o6mr7L8rAnqaz5AqyOp5rZCM5t+9kJyh6fSq0jKs\ne0jK1aukHUHGe3pVFmG7H8I5x60yWVLqHfIMDBx1FUrq3/edyFFapJf5gMEdqYsQaSPzW+Vm\n54qiTPnLLGsjDG0YGKzZJkDZccgceldZqi2n2dUj2iU8FCe3rXH3VuY2bJ3KDQJkU8rFo2Aw\nB3pobdufkc9u9P2q33OTjjFNb5WwR8w9KBkN55k21GJAxzVKRPLJRR+7FaMkhkXLLg9Kz5sr\nnK49DVEicYX1HWopIyHCg5XO78Kmj242vngdaSdfLUOg3ZFADFcrG3y7lPKgdRUHzPng5P5V\nKrGJgPX0qIsQ5bPfA9KYyOZX4/55n5eKj8sKuA2Fzg1ak2tB8rfPnpUO3+POPQe9GxJBJAVD\nFicsfvUxYSq7OGI561bkb7VCM8MOMe9RSSfMhUYbvQMj3bwBtA2jjNNT5lbnD+lLJM3zE7ck\n8AU1M+YHfr0NABMRsRmHIqJpAJSw6dRUlyjs20EVXbdGwZemMZqgJJGVmJ27GYUzaY9oPPNS\nQjfjHAUZ5pk6iIgHkkZJoEALRozlcjOBio/lVWcnIJ6U59yyA7/kx0pAP4dvXmgBAxkyu04Y\ncN2qJiu3lCGUdalM3ykb8KOiil58kBjjPU0xkEgDsjqDjvSMSGYCpCrSLtLBMdD6005yBtyc\ncmmAFgy78Yx+tCsu3fj25oDD+L5R6e9Bzu254oGJHKS20KPrSyfvlJJwfSmrhSQBk9c5p3O0\n7x15FICN8kYA+WmNGdvt2PpUi52swOR/dp7KRCGPAP8ADSJKxdZozHj2omXdtwoJUYpxXaww\noPfikVm/g4U9zT1GN2ibao+Vl/Whs5G3AbOKdIPL5HI9utRJGZMkH8aYhThctnAP8NOUMqYV\nsZpsqhlDH7xOMUq53AKec9DQAKxkyTjywfu0TEcAL8vrSqyKx2ruz2pG3upOOOlMY3KLlRk5\n5pu09h8vvS/dGWHIGKb96QMGwMcCgQq7C27c3ApFwq4Y/Kx49qk3KrEkDGKjdl3HIyrcCkAK\nu5gOQD0x6Uq7vMYZ+RfXvSeZ33fNnApW3yEKPlGc5oAjm3NwDz1pfLb5SfvU5c7XBOOaN21Q\nMf8AAqXQoZIzsMqOB2pjNubavPGean3boRt+8e3pUUkZbGTg+1OwCorBckge1LzvBxj/AGTT\nSAEw3HPSlbC7SPnc8DFIOou3arHBPYUhAaEbs7sUSqVPzblGeVoVl3HAzRcZC7tFHwacpDLk\njJYU7cjsf4vT60nO5dq4UD5jVEgPlXAJx6Uvy8E5GO4o807vkXNJjKnfkDrSENcuxDK5z1xT\n4pN2ScMe+emaayoobYcjGd1BZI4lTadx9KYDlYKxLctg00L8qknLDpihjtXaw7daI+FHt1NI\nBWV2bIPPc01SVkJYHA7U9F+YjdlG5pJFZGwcnvSsAjMWVgmdwNPYBYtxGeOQaRW/ecDG7k0K\nxYPnjPQGqGhFMcg3biB7mmR4VCB8wo8sLH069KTyyFBBx3OO1AhV+5hGwB1pdyhgVbDYwc9K\nMLI3A28ce9EfPUZ56GpAe0ibPTHGaYw2KM9fQ0mMyYQd/vHp9KQqzZ5wV5INUASKyYfG7tgd\naX51wqqC/pSF/wB5xndjIFNUKrBic45Oe1IBw3qTx0pGjCyZU/eHTFA/1mQ2WfkUrEtJsYAm\nmAm5FXDg7umCP5U+LaNwX5h2zTCu0FmbJGR9KFLGNV6A8DBoATdtQntnpTlYHAT5R3pVwDtx\nx0P1qLIxjdznml1GTKoIIDDGec1FtfqrY9FqWFQyu23C+rdKRYh5gkLZGPwoAaUZCoJA7mkj\nD/MGbJbJFOkjZpPv/LjOaZLtVVZD35NIQ1428kAHGDkinb488AgeuKPLLsSTwenNCq33SwI7\nCqECgN8oXcOpJNBQjIUctTWV+FDd6kVUWXgtntzwaBkMgKyKWGMcU/zI8mMjGec0LGdz7zkd\nRSqvmBQuB60DFibe5wd1EbOwY7Se2PWk/i+UAY7+tB6nnb/tDt7UgHF1ZOM5+nSmJKWUlmAY\nHjnrRtBRlySCeGpjOVcKU4xgjFAh+dyHAx3JpqMN2DnGKI1C5A4Ujo1NDBGVO7dKoAZQFwDk\n5p0aNJ1Xn0pWQRxnALE96V87QFb58cr3oZIkkaq+cbVx60zywc5B56c0uOpz9KftZtuBlz3p\nDRHJliowR7GlKvJ3wVNK/wAze+c/Sl+65bJPrQMjyG+YAkA5NEbIsauMMWOcGlUMobafkPem\npt/u8YxmgY4MGzj7/agBtxJ+XnH1pWXeowQAO9JHGrK6HPJyGPSgSG4LMYzw5/lTmB8wxrgq\nfX1oW3LfOW3EcUki/wBzj/aoGKyttw0nyrTuGVSQcU2RR/fOO9LIxbDDt0FAhTt3NtGeOlRo\n2eCNop2S2CRt9+maSLuwUiTsTQOwp3JHuxtBOBk01lK5Dcn2pzR7h1wvU+mabgBt55+XFIBz\nbmxj/gRpN4VSBy3bNODGNUB+ZiMgUFgylO45piI2bbtLqaTduk2nI9fanqd8agnoc0oUrMzk\nYzxTsBHxJvy+AOnvTlcxt8p3L0zSbfLJyMk8A0gYcvjaT92iwhdjKGb7w/vUir8oLH5evFKv\nnHHltgdTmmyRhmJLexpMY9ZUC8AndTf4flTIFSs0awqnoPvCkjXcrsMqMdc0CGSLlFIO1880\nSFvvZwe9LIgZ1dhj5cY9aSaM+YhwcY+bFAyNkaTYQeDwKk2JtCsfujH41MyLHGoUEMpziolk\nKyZK4LZ60AyP5goKpyp7nOafGxGRjcSOT6URkJGVIPGTR5g2gxqFVvvDPNAIRVXYR1wKdJIv\nkqNvzHimr3VfvDkcUEFRvk7nnHagBWRRhSvOKZyVCjhqftYHdneO1GBk7fvHuegoEM8ny9pY\n9+lSMwkjYFd3fJ6fnQuW68t3zSlibfyh/eyaCiFVPmA43IRwe1O+fa2E+RffvTtx+6x5Jx/h\nRJGyqobqD0zQSN2sY9zEBfQ0MvAK9h0pJFJ3Z+7TUVY4+rF26UDHtuaMZXBpPl2lwpbb/D3p\nFJ2klvYigRnbvR+c4NNki7twC/cxzS5SZQF+XtmiQGOMbWHByQab5KJIZBwW+bApDAINoIYM\nTge1PZTH8pfj2pqqWZM8DrikjYbpQRnHNAxFCscHLLngins44WRtoz1psbKsQ/u53YpdoeMg\nLkscgGgBzOOVBGc0yMDDZJJ9aNw5DALx164oZc7QPl759aAFDFdq4JPcGjI3knle9OaRjy3U\n8BqaVEciqTn2HSmANuh+UHIPTNGz93jq2eaCCCSo3Be2abk7lkYH/dFIQ/sP4cHFJu+Yru+h\nFPKqVJxljz7CoVUqpIOcc0DHLtaPyxkyE9ewp8isI1QDJUdaazFslRgMM5pVXy2BVsBhzk0A\nJI2WUAbmxnHrSAOxO7r2X0pUYNkowLZxRJnYOCcHnHXNACRKozuO7uc9/alkU9Cfm6j2HpSt\nkqGc/SkDeYpOPmHWgAZv3fTC0jLswMZbsaFlbaO59+lKvMjGRiSRjgcCmIc2Y8sMbxxUbs23\naDmTGS3tUgQMhBPmAnntTZseeCBuIHb0FIaEZztVg4IApzlmTbjJxn8KYqiSPGMDORilYNJg\nchetABuPHGRjOaRVIUZJU5yOadxJ8zcEenHHpQ0zBSuMk9PpQIduZpMjGO+etNXDRPyc5zha\nVcbkyPrSbUV2wxJ9qABgu/n5sLkYPFKrZXYMsVOAaYvyx5IzzUhxu2qdgbmgQ1mKkg+vSmhg\n0mD/AKs9D9O1Cr5bsGOZexFEeMMS3zKM8UDBpFU5K45qSNhkjbuYjrTfOLKSF2qRyvvTto4A\n4yOaBg2WUxlcjHNOXczBiMjFN3AAZ+UYwB3qRRuUFQQR/DQKwTOZHT5dvpTWVd3m/dfHG49a\nmjf5V77jjkdKYsX3y3zDOOaBEcWZHxu3NjdjNTNIzH5jj5ScUn2fZDvTCkVGuS4JbJoGOVd0\na54J6r3qOEeW7L94561YaTarMEw396olX98Wk+UEfeoGLwRuXp+tNZVKlidz+gFORlHJ49Pe\nl2nOByW4oFciMZUtvO7dyR0p0bHczAZCr+VCqXUs3G35QKF2qoTb8zGgY8M6xrv2gNUbKJF8\nteF3cmlfbHFlskD0p6tu2qeQegx0oGNVd0jHH3RxUnCtlj8qijnaACBwck0yJdsUu4+YT2zQ\nQA3RrlFLFqdH+8b5OCOQD0oWTZGNrYYDB9qcsfy534DGkUL+8KgAYx/CaYsff+KpWBXEchye\n4B7fWmSx+aowPLCnOPamA1pVdmwwX1x1qRQW27lO3FL5KE7gu3uAf50SN5jDeSY8chfWkGoq\nxiYYdsKpyBUmxXjBkO1c8Co7ba2UbOAeKGZmwrD5AfzpgAYnfgfd6Gl/hwTktT/ljjYjocjF\nRyKFKjocDbQLcUKnluXX5RxSyYeSMfdjx2GaNnmfK6mPbyVPc/4U9B8vZR1oATa0jscH5f5U\nNNtZfk4x6UqR7lYIMZ6801lMDIrHH60Bditt428Mec/0pigszHafTFScqoCkctkA0FishPfk\nUFDJAHbd2UYxRJt4JPblaczmTbtHA6tSOoVS5HC8k0EEcaYi+9g54z/KmySKrMASuTzmp1jS\nYhs8EZHtVeTaMEnLZzmmA1ducr1/i9xSFVZjj/gLVIvckcmnRKD93hf7tMBI9+0cc9zUyqu3\n58q1JIRCSGOD9KezhtozubtjqKkVhQwSQA/OMVdjmWZuMKwOOetVo3+YhgAxH41dsRi681VG\nNvIPei4dDYs1+VFI5P8AFWzp8ZM4GcHPasO3kZZEYr8vXFdDo+6Ta+3HzZ+lSxI6Sxut88Kl\nSOPvDtXd6HZyPcJIOUVc1xWnSJNKuECkcZr0nQYiLUIGG/GTUG0TqdFaNZAki5Y9Kp/HC+e3\n0PTtPtyxeQmR1HQL0HP51e8PJ/pJzyelcl8atcjk1ZIHHESbVVe2etEY3khNpLU5PS4YNV02\nW5jJS6hHKHoR7VreE76fSrqK6X5S7bOTwwJHFcfo8k0Mcsw+SLG3g1Yh1CSGeMrNuTO4r1Ar\nq5Tm5lc+zvgT8SpfC+rRwytv0+5k8uRHbCxknGfbGR+tfXO0TKGRg6MMhlOQfpX5neAfEQnd\nraSQHccqG+lfZv7PfxQTWNPPh6/mJv4Bm3eQ8OnAxn1/wrlqQOmJ7HgjpwKXIHTmhvm3Y55N\nMBK4HQ1zGjAPwR3pVbI5qN3G7PeljYEd81IyVX+TpyKerfKDUSseufwpy5wcHIoAePU03lhz\nTVJ7mlDFlyOKaAezbuho+8vFR8Y680ocDHNBRID8o9aC+cCm7lVvWjduzxTAdux15FAbd7Uw\n7vqKU/epMlkh+Y/SkX5uvWkAzjnApemO9IAGQfahpO2KN3FCsOtMYfw0IwAxjmk3Dd605Rxu\nFOwhd2B05p+4dO9MLbQMjmjcPTmkUPUnI9KcrYJJ6VHz6cU5mzSEOUjPHNBfJ60wdPSlK45x\nmgZIslOXn3qJeOppwyBgGgBWyM0bsKDim7vlx3pGztFADWbaxyaRW4z2o27lzSBSV24x70uo\nx8cm2p1feoPcVW2tgYGfWpTncMCmBKMEHjmkB56U3cVB4pd2KQC43ZHQUoHQ0ikHnOaP5UAD\nZXOKdGxI54NIGLfePSj+HNMGObJHFHGQAaRWJBoXHGaQChSq9aNoUdc5ob5d2DkCk27tp6Ua\nlC5G0joaM7VwaXhW55zQ33vX0pAG7dSq3oKTG4E9KFO3n2pEj+etIfmpgfb15FOztOKAF5xR\nu2nJ70xW+YjNOyrDaaAHFvl44NCtxz+NJSMpFAx+8MuMYFKV5GTTOG60HOQcZpoY5V9elLzT\ndx6dqdmgkG5xxzSMfm9TR9Til+6cjmmAvJ5pRhqTG1etIv8Aq80MocuOR603B3AA8UnVQQea\ncrbVzipAVepJoVTtPvSKDuGe9Sbd3tVXAFXdx3qZVw3HSmwqV+tWUgOeec+gpDGRqG46c1if\nEL4jaL8KvCtzrutXEUEEKHZGz4aR+yj16Vm/FP4raD8H/D8uoavOjXJTdBZK2ZJTnsB296/J\n39pD9pDXPjP4ou2upZIbTcVt7Vm+WJAeAV6Z9/arUW2TKSRP+05+05q3xx10tK0lrYRSt5Fp\nvPlFff8A2q+fpLxpGkxw2OWbpUlxdGOQMy+Y6/xnnms6QK4LFuD15711xikjllK49LhvPO44\njWoWdVmkkwVDHAUGlbYnLnIPYd6rybZHYgEHtVEiTM0Shc89QVPNVXuGXJdee3YE1I8iIxC8\n46saRpjJ821SgHG6mANskiiWQ4cEs2ev0qGNRIFLDCZwBUi7JEJkyD1GPWo2m225G0Et27/W\ngCKNdqsij5N3SiaZZjhh5ZXgCpGCNGgUbeMmonkHBX+I46ZoH0AOZEyo5U4wKasIuFLKzb1P\nIpYWkSSQHAAHB96likWC3YRndKxwaYiq6lWk+cndxn+lOkVVCKvpz9KezbFXj5w3K4pW24mc\nthiOOOvtUgQ4RnOX2qo+VvWhmYrnyvlIyc0iFW2LjHy556U4Oy/MPuf3aooSM/cKtz29hVxJ\nVViSMZ4PvVZbn7M7FI1kzx/u1dt4oI4VmnjaR2ONoOKAIAyXELx7FRM81TkJjkItyRxgHNWd\nVnFxqJ+zqIY1UDbVBGAWQyJkqOvpS1As6gkcdhAUbEkvLYPNZ0cgt3VX+ds8UGOTajhevTni\noJGEzqDye+PWjULD2z83zbSW4HpTGSTdgkKV7A04qATwV7imxRp8wPJ+8TTAWGN5IGG7aPvM\naXldrLwhHDGnLIxj2jqfbtTlVX4bgdAvegY63g2zNl8sy8egqVY3XCkBpgM57Y9aZH+8+YA7\nV4z3Jqdf3in5sY+8D1pdRD43IkMmzaMcN7/SmNMom81jjHXHc/ShtrBzuJKHgD0p5zJsY4xn\nb0oAI2O7LjaScge1OZl2kM3y55WiIq7OuSMHHNJIkZkIU5kAwPSi4yPzFLMyjJxwDTvlcjYN\nyjrnsaa8hV8hdzY2k+v0pIoysZUhVLHOAaYBNiRuSMDjNJHHukMZHy4yGoVNrsueV659PWlV\nTcKzLnK8g9jQIejO3yq+MU+G5M6sWfDqeeOcU1YWZWAPQ5296PKKsWVixzjy1HP40hou6Jp5\n1DUo/JzK7OFjjTBByR39a/Zb9gv4HxfDP4c2+tXMGzU9TjDHeBuRfQ+/9DX5+/sVfA8/Fj4g\nWIMLJDbzLJKyr8oUYJPsf8a/Z21tItPtYreGMRxRIEUKMDA4GK4py5nY2iieaTrj8KhZiq4z\nz70Ow2jHJpr8qCetZGvQbvwCM1DNJubAxT2iLc05YRnJHNAh0JZuT6U9iMAc5pEwpx2p3/LT\npTGG3jgZpVbCkEc0vQnmo1bcxJHSmMdggEZpF54Jpd3zdOKMDle/agQ5V28nmgtgf4UY+UDv\n3pOQ2R0pjF6L70HPGKO5oYlwNooGP3AdRzRxnmmdOvWnrhlzS6hYTHy5JpScrgDmk44zS7tx\n6cUAMC889aXyxjOOaVsNTfm9c0yhRnpS9eM5pu7jFIvyUAS7aKTzPaigDI8s7cihM8qTT1J2\n8dKTdhhkVkITdtGMdafyvsaVtu3rnFJnaTzwaAGrIevFOZjik4VeByaQnC57ntQMcsmaZgq2\nSeKcxwvA5ppVvLB6mkiRxcAdzRk8HoajYHOM8VLt6Fjx0qhoTox3HihE2gkjiiRQrHOaXcdp\nU9KQhu4cKemae2AuKb8rD0pGbLZ9KoBdo60oG7IXrik3fKaFJ29MH1qeoCj5uwpeFzk00sNv\nvSZ9uKdgFbhCucmmnoKcmOc0ir82QaACQHgioWtyzAsM1Z3E8cZpDu4oAZg8Z6CnBgyY70SN\n8wwM05u+BzSER84qRTs4xTRlcmm/MoBPIoAczbh605PlzTerZxilVtoORxQMcvzHk8URg8nt\nSA7hxSrny8GqJBQKD+valZOmKVVPekxoQBtuTTGY7TgU4Me54o25brQIEXcuM0u1QcE80AhR\n05pv+0etAgT+L19KXcO/FHHpg0Da31oGGPal3eX3FJy2eOKYY6YDlZduDwaNwVtvaho84owF\n69aBC8+nNH1GKcc9zjjimk7evNMBR/ez+FIg+U0q/KeBmk2/MD0pDuKfunmmKxRc4pzDHQU3\neeFHQ0hEituye1eVftCW5udBtJ1GTG5Qt6Zr1NV+YDNcZ8ZLBZvBk0vI8tgMdjnvTGfEeuIz\nXE20Db6CuF1RtrMpPA7V6R4gtzFIR0GCfrzXn+t26KjsT83U1SEcvcANIWwNo61lXVu3nM4P\nB5FaEjb9wU85qlcNnjPzVqjN7mbcICc4zxVOQxpg5wavyxO25Ox71VktwOPvNjBqiSvJ5cr7\nlGDt/CqUgP8Arc529BUjYj3qeCeKgaRCpiyN6n6UxGdLCHnMm5gVHI61A2WZgWDHP4kVbuIf\nLmLfMM/xCqzb1kZioO7jcKBFNo2WMmJeM0p+VgSOPSp9oC7Q2G61FwXJJwFGc9qoRBID5jqO\ne4+lV5Y3ZQGGVznjqKnZTuV1bBximRbgsokOMAnNMRDIirG+0biwqKZwqoGG0gYxT1kPToOw\nPSqzRg5LdQfvGmAxsIWc/dPFMhXzFAXpnOKm4VeRuOeCtRsW3FgOMYBxTGRzQho9wyTntUbS\nbZFjYA99oqdnLYVeeOoqu0L7SxGeeX9KQiOaT5pEY7dw/EUrIPKJ35G3GKV143Z3EdDUIwsg\n4LE9aoQjRLuAHQL+tIqkQtyeB+tSSRlEbAyOppBGzKMNt3DjNAELjbjedx6A03fHt289aft2\nyMJDuPc1Ht534+QdqXUZJGuJTuPGKjwkgYuc84qaOZWVSE68GmsVaQhcbFPJo6gRNgrycL0x\nTS7qvyHDZxT+GmPIK+lNZWbJU/LTAiaP5tw+UN2NSSKuMHnt9Kc+MxxgcY6+lCybc78DGR9f\nemBXYKx2ctjilaIKo2sQ1CyFk+XjBpW3cc/WhjI5AN2fvdzSrJtYnoD7UrZJUjAGKa7NtI27\nlHegGC/JyQNzUpk8t2yM7f5Unmbhx0xyaczlhu747+lBOo3ncqjHJwSPWkY43xk7ivaiMCON\nSeoprfLIGPUntSFqM3BePmBPFOVTE5Q5YAUryKq7uoNG1mXIf5qdxjCT91OfU0z7q8AjnGKk\nKqnyqdxP6Uiq209x696AG9VJPA7GkHzAMpBNBkG1TnIzwaNpTPGG67qYw54ZBhu9KNytuDfK\nexpF4bcGJ9QKbKmcPj5c55pkoeSRu3YP0qIgyYCnB9KVm+XKjnNGQI8Z5bqR1oKH4Cxk8e4q\nNmDMRkEdf/1UKhjUnqPQ0jRlFG3+E5pDE3bV8zbx6UDDZGSD1pWkM0h/u46D1phjbd1GaAYr\nZ+bcMKePpTmXbsLNkL2oZSww3PvSqjbcg57YoExY3Czcc8cYphkCkk9z1pHXafk+X3pxi3ds\nDHSkDEdl8z5+SRwKGUDtwB2o+VlJIyyjhhTfMIjGe/50xCqSenJPZqUMcbcYB60NhlAIwfUU\n1lMfIJK+uKVhjOIs+nQAUueQU7jBpwxtOFJ781GdxZSBhe9MQu0nODggUmfMBdn4AxinLlWD\nKfwNJtAVySE74pgIxClSOEx0p5Y+Xnqc01isi4PUdPpSsysQwGRjoKBisyt/Dn2pcMoOAFOM\n80z+H0FIfu/MSTQAnzfe6lh1pzbm2nd2wafGxVNoXrxzTSNq7QOaBifO+NoyR1xSnDD5vX6U\nMvy+WH2seTzS7Rhf4mz1pCGlQV3ZwAabsPzZ+8R09RTuGVsnBPO2mrlmBJwRxR0EKrDaAVJb\noPamgSMMbeh5alEmGPPHSmtv3bQp29Sc0kMcrncdnApWx5n7wFFx09aAUbBAwB2oiYvMxbnj\njPSqECsEVi3cYWoFjLIVyBzyM1L8jId/zID0FK23bwoHH40ARtk8kZVeKdgfeUbRS8D95yQB\ngCmZJ4X65pDFI6o1HmDKBhgDhfrQu8Z3HI65oDHqvJJ6UxCP8rHjKtxketOZFXhOeKRso+Wy\nB02+hpfLG0KDwOfagY37vzHp0/yKeyquC42P125psmGAIB3d/ekj273VkPlkYBPY0gJGUyNw\nQTjpTAoVSFwRjkU5flIUHjGKjVz8y4A5wKAHKBvAXPToaRpF2uF5I6NQ0ziEhwAM4B9aYzqy\n5Ay3ANMLi7sKrDnPY0gZlbA6k5GaN+TsYZToPalwZGCvwg4A96AEZWiOSMEnmnBkDHnOeuKR\ng2/BPHSmjEfzbs47etADto3bVzj3pR+7zuHakdvmXIz9Kco+Y919TS6i6jY5Nz8cqBkinGbz\nG+7t28ZNNbhS4HydPrUske5U2kA45FMZBjdjLc9QPWkY99uWJ/KnSDqC2OOKauflx93vQQxX\nHk/xYJOcUYTzBJzv/wA8U3mRGzwM5BNOKny48daAHMuOF6k5xTQO7HYue3ak6uRnHrR/EOdw\nxgCgaHSEfOM7uflpEyc5JElI2BISF+v1oXczALyetIoaV/dtgHIpTGFYEnpzmnjd8w43N2pG\nUMyhgQM8qKYxPs5ZgyvtPWhchfLc8sc7qJEwWIyDngUrIJHUjqP4qCRVPyvsGVFRp8i5LZPZ\naXZIJHCg4I/h9fWk3BthaIiRR270AP3AxEDgkc8UkhbYuE2tjA9aTay52rtZueadI5ZlULjA\n60AJ86qYzjGM89aar+YpKHacfxVMq+W3PzBh0HWodwZsdQOhxQA2Niy/NlVPWiNkb5UzgHvT\nixCFSMDP3qaz7SMAeme1ADv4vMP3fQUNIGcog2jGcmnMpUAdO9N8zzgxUY+tAAkZkTJO3tg0\nRx7Rknc46Ypdx4ycjoaFWIBsklPbvTEM+RuSxY49KftIKqFyxHGewpI2G0MFyc8Chh96RvlX\npn0oGMVdrMWPP92nNlWQA7/rTFYSMSAT2xTtxkXcy7WXgGkAAmMH+NF6U4lWjzyT3ApnzL34\nz0p0edr87cmgYFi0IAGeefWpfOXd8i4GMHNRsiRkbsknnK0o27C7DCdKCSQsY2Gfm96jkIZ1\nJOOfShceX+oPrTHcj5cFlNAAqtErBm6t1oy+1hgEnpxRgsvlgZI7HpSMxDBWPTpimMSP7pPK\nvjmnLuZduAW7k0NiXkHaM0M7KrFiPbFFgIwx34yVA43U7IkyvKkH86Ur5m1cb1PJx2pyt5e/\n5d3YYqdQE+9g7vbNNXC4zz2yKC5ijAC5pygMo3cA/nTAV93m7QRgcmkdjIfNUMB0IbrQyncS\n4yOmaUfNgMGz7VSQhu0yKFIOCaCqx5Jbnpt68UhZZCVOSc8AU3b8xJbCdMd6kYMo2ZByFPFO\nkb5sFTzyD70x9u5dv0p0jbWIwWz0oEKzJjMnHovfNMkPlyABGLnnb6U5v3m1vu7e7dzQ5fcM\nH5s0DGFht3lieO3anqVbBBxx3pSqry5z2oVVEmT+VADdyquDwc9aVfM3EfeB54FNWQMjjHy7\nsmnNmRdwbaP7ooAUqBndg57DqKGbcoOMEjBXoaPOjCjg5/rTCm7KtyevvQMQt68Ff4aegVVY\nupZu2KNwXou4EYpWH7s8E85IoAQARMMnbuHIoIUgLu2ikKhCQ3Oeimmqp453LnpSJZYbKk7C\nCqr92omULtXnc3JApgBeX5PkBOMmn/MfkYHGcZpiEabAZCVGOQM0xg7KrFP+An9aeYc9ei01\nYfMkABK5/iNBQ1VDupC7FJyuKkYlYHdPvBuakhUBnUcLjKH+lK3yqCwwMcrTQiusiu+HXJxm\nntMVUDbgdyBUnyquD8zMflx2qMyyKp8rBGcZPY0gF8sh9oYGNhndTy2MovQihlYMP4kxhh70\nxt+9fl2le1AidoQ0a87Qo5qNtvlnBCjs2KlTbPJnO3HVfWkZSx8uVdnOVNAyruHyiPsckVMZ\nGVjxgHnmlaP5yeBJ0qKRSygE7ix59qYxWTzIyNwBX5s9qYu2TDnjH8qfKyL8uzaKUqGZMjae\n1JgIrCFSxw+7gfSjo+0Hlh1oZV2liAwzj8aF3NEWbGfUdRTAApVd4OQDytKuW37Dnv8A/WoX\n5myDhelMC7ISeR83BWgBUQ+WWPLev9KHi8uIFDweTTpFKxgZwDzTVhYctnB7VPUB7RFmDE5O\nOgpygsuCRQMbcqMkcYoRl/1aJkscYNAAsZkw7YAB+73qTzN27spxgUxUO2R3PK/LSxEqoUjP\nHWglkkgDzglsbeOPWhZ1wy7SRzlveo92xgAWDZ64zS8yKFAwTyTQASTFmCKDnH4UyX5RkLgD\nqak2Dp/F603jy2x97tTAaLjbHnkjoM05mZdv8WByPSlwNoWQbxjPHrSLH5Kleob8xQNDDI0z\nBWXBzmpHYZZ1GFBwaakm7Py4K8FqbCxXec7tx6UxMVxuYDG04yaczDcjZPHQUjKYYum09880\n2RmYxkDAxxSGh/O4L0B61JkQgM7ZAPp1pqyHsAdwzTTllKj5h60DHgIuWblT0x1qNtobOMA0\n9l+ZCGGF6j1pVK/OoXLv0NIBjMS20JhQcGpNwVsDOMU3kffO1ejYFJu3SRgYZTzxTAf5a7Np\nbCkc88g1IGdcEgNheKhjjWQMdmNrZqTIPDD5uoH8qCQlddqBhuDf3eg9qVE8ticZTH3ak2mZ\nTkAGmxsiKwYb29KB6h5Y/hfgjNLtDRg/dC9u9OkUMgO3mo9rNgDjnrQIf87KWxvz0HSm8LCH\nPMgPOfShlKrtJLox/KnSErN5ZPCrnJ70APkLRrkENuHCjnrUaqqKildzZ+YUQ4Znx8rDpmpp\nBshSVMM+eQKQ9xrYjmbIKrjk0joJIxnO4HIPtQzszBX5HU5qWRiy7M7RnmjUCF33MDkDFIq5\nUEdSclvan7I24KbR2NNhYqyqCHQCgZIyBWUdVY8KKiCvJwAFwe9PZs5I+Vh0NRxxssYZm59e\n9MkfcHzFGMK2fSovL/eEbQVbuaXcyrliP96lbhfmJx2agBu3yyVZs+h9PamnPJxhcdvWm7nY\nFcg/7XvTlXzpDtP3Rhj7imA57h2jjVlxtpY4TvZw2O+e9Qq3mZGct/dqX0PtytMRLjzFLHdx\n0PrVqzkbcXDcEY2+9VmkK7QhAX3q9CEb5QMA98d6kZq2k0uVUgbR611WjtKy/MMqf4scVzFs\nyzxjqP4T9a6TQ7grH5YU4U459qTElqddoliV+Zh1616B4dEsYLDkkYri9BcqqsVZxnJFd5pr\ngsGUFB2FZmiOt0thbFpjwAuT+FeKfEDVzq2uTzbs564/lXrV3efZdJmdm+YivBfEt9HHqT+S\nxZvUVrS3uZVGWbe3kex2lmIc/dFNjaO3yu7aU/hqG3vpoGhmJ7cBuhqyJFk8xtoed/mIHSuy\nxyHWeEdQljvA8RVXXnn0r3HwR4km0PVLPU7WXbNERIvPcdR+Ir5q0nU5A4TZsyea9O8O6+0X\nkjkuvGWHGKxqQ7G0Jn6O/D3x7Z+P9DF5bcTx4E6f3Wxk49ua6GTnB718Z/Cf4jSeBdUN5BKs\n1rPiOaPdwMkfMQPSvrzSNYi1zTob23kSWGZd6MhyD7CuKUbHWmWMfMTT1baeBmk+7u+tNVu9\nYWNCRm56cUqny268U1Txz+FL97rxQA/eAppqyHHXik9u1N9uhosA/v0pV5PNMJZWAxT2yFzx\nRYAb3pysc+gFRn72T0oUmgCxu+U0nI57U1WwvNAbcPapGSFzs44NBbdjPFRljux2pfvHimAp\nbaM0q/MppqtkbTS7cUwsLGBUi5Bx2qLt16UokO0UriHn73WkXnJ5pF4G5utLu9DimAqsWxkk\nDvTtvTmo2JbPSl579KkaJN3PtShuveojnaKVXHGOtAyU+3NAHzcmmhh2oNACluOOTTqYr4zi\nlWgBCflwKdu3EUK3tRjnAoAdHlW46UHOc01WIY+3Wl3D1pDQ/rg5pS2eo4pq89uKXccYpiAc\nA4pVYgYIoU/L6Ubj0oAXIJp2724pgIVulLu5PFIA7ZBxShhnGKTdjGRSs3Q4pdSkLz6U4Zpp\nbdyDR/OqAfx/FRuGeKT60M3pSELtPXPFBG7gU3ceDTt2ckGlYQituHSlzzx2oo524FMCPB3k\n0/gUgA3AZzTipDY7UhgzYxml3dz0prMvGRRu+XrSKHMQOaXnGc03d0G3mnN94E0wCgfe9qN2\nego29iaQBkHg80/gLwcGmUshG4U0AjAhvVcUmdvHOaC3YcClHzYJ602MXHpxTlBPB6etNX3q\nZRxhuhqREacqB1q1DHuXPf0ogjGemBVyG3CguTtC9f8ACmARQ8jI68Zryn4/ftKeG/gPo0wl\nkS/8QNHm3sV9T0LYzgV5v+03+2zpXwptbvRvDEkOp65s2vOGzFbkj1HVhnpX5p+JPiZqXjLW\nr3Utcv2v76bLF3zn8M9B7VpGDZLkdD8XP2hPFPxI8U3WsajeNHNMSiwqcrGDn5V46YrxiVTc\nK0kjuWkbd5jHJzStPNeSPOcs55PsKje4kWHyImBBGQ2K6oxsYN3GXSrGwbeCSM7c5/GqZ8uZ\nvKVGLfeJzUs0XlzYxliuTt7VXUhVcKDu/iPrV2sZjpJA+Qy7SvQGqsjtEqyDruqbzjNbbR1z\nzuqGSV4lXkeX09aYCTKLpiyBeeuKr+ZLuMYKrGgwCeamZSqo4O1D2WmyIgYsoyO9A+pC7BLf\nJbIJxupJFZihVcnpx3p8+35QV3qeNo6fWkYGRTEu5SvJoGJ5LW5IPDehqMs6yDCAN2FSlXZQ\nHYhehJ5zTJFCxAljnOAaXUBA28y7Rkjg5NGFXaWG4E5DD+VCx5Zm25XuQaPuryN7EZX2pisN\n2fvXZuVxkEGomX5mAOSOal+XYcAlT95u+aRZByBjHdqA2Gs/mW7Mfl9B60ivv8vcuQRjHpmo\n2QkKoPU8elPkUPImwHOdrc9/WgNySNPLmMQUE9M9q1JbFBDGWPlyDnax4Iqjb3DWLGZSspB2\n4bpmk1G+ub6YsxCEryq9OKBkF55M14QjEjvVGSXbJIgGzj+KnBY41DyEr83bmoLhleV2/nQA\n8SNtVmbKqKgj2DqMcZzTXRWkVSxGBuA7U6NdkhLfIzA0AKF+0rlW/HFIuCpVfvYxk0SK7bU3\nAjH8PFLIowIwMBDgjPJoAesmNqff4xip1i+Y52jA45qBeIwQm0A/eHUVYtkVjIIxvIG4k0DH\nspkXMZ3ZH3iMdKbMy7QScPjFPjV5N248jqe1NC7kjLDI3cGgdgbzIVQqPvH5qmZI9wj3ZxyC\nOtNLeWxCruVmxn3pJ1yo2LscHnnH60hWHSMNwCMCw7Hg0rNt+U8sFyTio/LLdOc9Nwwc0bf3\nwHQfxA0wB4yuxyQyelMTZGpZj8248UojWYNwxXPbpQn3lG04z+FAhk0n3mb5MdG9R6VMGbbg\ntsAHyhaZJ/pFtiUqoycE03c0YRhhtxx81ICzHJ5kf7xtgzndjrWx4b0ubWNUiht1bdM6ovy8\nsScdKx7X96ypnGenpX2h+wN8AH+JfjKz1K7t2OmWB3ztt9RwM9s5/SuWcjWEbn3H+xF8DU+E\nfw1jubyAR6rqAEjB1G+NeRg+5r6NZgzNgY9ajjjjtEEMS7IoxtVcYwAMf0ppkLZx1PpWGxuh\n3C8YxSLls549KQt8vrSKxZgDQMGJHHanHJIpJFPTNIclcDtSCw/IU4xzShjk8UzadwYntUme\nKAGlhwe9LuwD0pu4dMc0L830pgODcA9BT8g0zHyjmlj+Y47UIEL1Pt60v14pPbpQoJX5qoYj\nfdIHFO5VQOlJtwQetBbcelABxt96QHbx3py/SkVfmyTmkA7aXXJ4NIu7bjpShvypV+9yKY/M\nZt6elHKmlDN07UnKn2pDD72TmkX5gKV+mOM0nI4pi1FooooC5mbu4OKUtuWhk5HpSsOmOlZD\nI1OTtxT1QDknn0pR93p81Lt7mgQpbJ6UmN2eM0x2CtTlYlc9BQMBnd7UM3pxSrhlz3pAAylT\nQgGttVcnims42gD8KJC23ZjgUmwNyFxQwSJN2evX3oc8DHU0cc8ZNJt6HNBIjZyB0FPTnr2o\ncfKO9NV/mHFUA7cOT0pQp25z+FRn5mIK8U4NxknmpYxu09aeF+U0BuOPxpdw20CGsRuHvQG2\n8Y+lDKcgGhVKqSTzniqEgXO4U4t8xBzjtSbu/Q0ctyTkUmMXcFPrTfmGSaXjdkdaTnccHimA\ngc7SKf1U49KYvXGKcp+XGMn2pAJuyAKcy/J15pFPXij7wJIpdQCM7RgnGak3Kqmo1UEZzmlX\n73TIqiB27oaQy7RzTgw2nIqMr5nNJlIUN5nbA9aeqjdkVHkrhRUmNi570xCFt3GM0knYDrQG\nO7I4pWYHJ6EUAG7zFyBijGOnXvRuCYJPBFMz3HSkMkXhcUgZVJGeaOe44pNu+TGKYXFZt2MU\nr42gnrS42qRTQpxnrQIGzJtPpQrZyCOKOeaVtxTAoEJz60dBk0nOcAUhO04PIoAA25RzSBhz\n60jY7UuwbgSaYyRVDMp6c1leOdNXVvCt9E24ELuGBnOK14cMai1CMzafcQk8NGyn34oEfCPi\niHZJMpXlWK/rXmutW5l3MOjcGvWfiJayW+o3SkFSsp+U8HGa8t1bDcIeQaoDjLu0NvGziTnG\ndtZkgHl7upx+NburQ7VyRn3rn2Vlzg/j6VRmyspL/Iw981Rv8opO07farwk8t9jfU1TuiZMq\nGJGeKsky5FZlD4wPeovuuXOASfSr10weIBR81UH4UsenTFMTIL1nwXzhRxVZ48YbdvyMmrUi\nHaF+8BzmqssZL4BIUihCKzH5SwP1qGRjt2gbR3WnOoVtuc1CyvkgEFgMn6VVxDZGB245PSq0\nmCepyfWp1yzZ3dOTTZo9u5lWmhFWQDqeQx5qBlA4zxnirckgMYGQN3A9ars20HdjIOKYEO1M\n+b5hHYUjb41OfmA5P0p7sVwjJhW5psjD5cHLY5H9KaAjXYrKF4RjkGlaZULKV2rng9qCyo3y\nDnGcelQeb5ud696BEbJ+8wG5Iz7UscnljcoDY4pWfzJNu3IAx0pkcRjJ3NhB1poBGb5MgEgn\nkUeSNvzH5sZAFMXkEK2Mng+tIrHzCWPtVDEkzuXK4Y8Uka5+YnAxjApzMCid3yaaGDDceoXo\nKmwhERRtdm4zx2pknzyO6Lt46UjfvEUMcZOcentTNxjlcgcEdDSHcbGpOG3cAdKep/dgk7cn\nFRMd3P3T1p+NyglsiqEK0g3AY56GiRVLbfunoKXb8p2jJxmopNzruB4AyfWmAcqxUKPTNNWE\nDqxbHPWjAOCBlsZzTWAyAc7jQO4JhgCDkA4xSyMyt5Y43UsibV2nr7UzzFEY3A/1oAQMFcR4\nyaTlmJIzzSYCsTjk96eoEe0k4BNG4DdrHtuFLI248Lxims7Rv8pwOtEn7sli24EZo8gHDDMM\nDaAMbabtKqSTj09qRZPMQYyT1zSzf6sDnd1BqgGt5fyhBkA8n3pMsu853FuCadG63BXbx6n3\npVj8uYnselAiFVUoQARgZwaRWBjQgZ55zU0inZvDYKn7tNZTnOOvb0pdQGzMgbAO0H0FG4Rg\nhjvXHB96NwUgj5sVGq/uzg43GmAit8zDsB+tO2qvAVi/c9qAmIhzntSbmjyxPzdhQA5T5jFu\nc4oYlcNsynf2oVlZRjOT1pF+Vtyj86CkNVNudowTzupPmVuSCKVwWjLZ5NAQlRx26UCJOBkj\n5lC5NRt8ygqcBhnNOjKhSQ209CKCFj3qTncMCkBC2Dgg5I6U5W25LHJb+Km+XtUKOvQ4p6fK\nSMfLj86QCJH8p2nINKy4xkZGKaflwTkD0FOCiSPqOT1zTEMXduIYcDpT13BAvY0m3ggngDrS\njEiKTwFPWmArM6r1FQtiT2YCpFZWaT36UgOMYGT3pdQIgW6EZpWbzIz8uG9KfJvjyy9elIrC\nRcE9+aOoDWbd83AJ/ho2+WQSNppFVfmIXaueB3pzDIJPzbfSmINu3O7+Kmyds/KPWnrJuUYI\nJ9PSm7gAWPXNBQ1jhlZW8xe5FSrjfkrlCetNXHO0bfVaYjFuf4QeBQPoPEaqrAHJz3pJFG0G\nI4we/rSZDsBg8nNDKq7lfgE5pEiIGOQOG/vNTywbao5OfmxURION2dualj+dmXG30YUwEk5U\nt/CD2pn+sb73B60/cfLZQCR69qST7oAHy459qQCKylcLw44FIvzrwTkdaf0BCDHHBpq8AsOB\njJX3oQ0NTAG0KcHocUSDlWLBcDBNP37lyG254x70hwYySo+XjNMnqM27djKdylsGnN8rZJ+X\n0py/wdAOpNMjVfMIYnB6ZoKE3Lu68Uqqy5AbC9qFXfuUKCfWm7WwO+OPagCXdtbDje2OPSmM\nucYGPanHK8+nAoIaSMJjLA80ANXd8zY24HC0gbMy84DD9aXzW3ED5T0GaQ5EKtnnPJpCEkKt\nIFJKn+9S+SWZBnj+dKzhU3Ou/ccACkaR5nxjaFHFMBjfu3aMjI7UfdYr3xxT23RyKDhi3vTJ\nEkgk4GT15pDDaMYQ8DnB9afI+Tk9COvvSqfMx/Ce9Nzu3l+MdqAGb/lHc9DT1iy38OBzimiT\ncoZQAD0BpHy7h1wF74pi6jlYeYdn3cZwaV1cxbguUxyF60nzLHkJwT1pFPZSV9aQxu4iPady\npjIBp42SKHxtfGMUjfvMAj5RxSMRuLfeUDpSYrAcL9z5h1yaUy7pJNvyrjjPrSL80a5bBbij\nhcrw56jiqGNZSYwuchjS7k8tSTyvek3GBVyM4FEeY4wNmc84oJsL8q56kt3pMyLyoxjpSruj\nA5yx/hpGbcu4nbzjb3oGLHMWVsjf3JFNdT8rhWTv9acMIwCn3HpShXbLbsp/nigBGUgBwuWb\nkihmIXg/N+tCsoYlyytjt0pq7WGW3A0DsKzNEo2/MzdzTvl2ej4ztFNZiqrg0pVmlyCA2OWo\nAbHIyx5DkVJIwZBk5b1pGI3DIBHTp3pmdpwF+Yd/SgQ+RWVMLyvdvSmjMftu7+tKFLZy/UZN\nDpls5yQtNAMZmYgJww6ml+625vvY6jvSFSq4VuT1zTFk/dlQu4Z5JpAPVk37859aXh2ztwvb\n/aNM43L5a57YoZSO/U7c/wBKfQCREEmeu5evP6UyWYMwEY24IzxTuflY5DAYAHelJDDcq/Mv\nU1LEG0fMwP51HG3Bx8obrT2Yt14NIGEnAG0d/emMGUNjHBBpzNJsIIGTTWypGH3KRjkUSK0c\nifMDnvQMRmViqopGOwoGVjJzu56VKuBId/3MVCrs2IgQzFs9O1AuoNu3Ajj8KVi0eWYhs9qW\nRcM2TgA9zTB8x3E89hQA6RvkRlOGzwtOKiaTAyCOTmmeYvmKrLz60KzRsynoexoAUq0hJC47\nc03cSQoXkHGaliCyD5mKAVF5ikbl4OcUAL83zZPsaFZljwoDY6Z/lTVZju3cj1p8chUFTgHG\nc0CQjfd470sabGEjHGBjaabuWSMOCQc4IP8AOnSfMCFbzB3pjEXAPHyM3NLJ9/cPl2jNIcth\noxlQMc0nLbQXBx1NIBS43Z25UjJ/wojZAob+JuNppdw4XPGc/Wmh0ZXOPm7cdKABtkjBctg8\n0rMVUjcOPShWRFJ+9uHbsaY2UJPXHHSgCTc0KDG0k/xelRlsEADcT+tSCMKocLgLzz3pjbm6\n8Z5FBI2TaGXOQe+elPZfmDqeMcGkVip5GUIyppgPyEtnBP4CgALKyDepJBzuHTNSbyJlLKw3\ndP8AGmOzLgA9+U7U7593zZbPYUAMwG3k/Ng8c9aefmGMcYzn+lI2zB2/KOnSnogJUZBAGaBj\nFYLkLhUb+H3oXCkgD5hyTmklYqwBwATnIpFIV85yW/iFMY7hULdQ3akjkC5Xmn7WZkPTjksO\nKYQ0jhAQCpySR2pDF3YbaOBjNNV3kCvwDR1YlmC8+nagoJOi+WBQIkJE3LLnnNNkkKthOWPP\n4UqgLH8rYYe1LJncoO1t35gelAhqMnGfnzzxT1fzN4B2yAcCmrtDNswGHHNGVVAXO6TOOKBW\nE3blDjPoc0N5gUqxyfapGbLKrjOT97tTNjLIxDb1zjNBQjMyxgocBvXtTmkbzEVTufHOaTaU\nRsEMM9KjCqqEYyxPWgV2h/7yRSQPkz+VIM/3cNnr/WnxK4DAjcxHORRu2rkct0yfSn0ERNvD\nHnn0zSyEyc7vm6GpFURgAHnB60MuV2kDd1zUgMQ+YwTPygdKnkfMK87iDjDdqYwCjGB9aTcp\nwQCAPbimWIzHdk9emaiQFWbBP19KkVnETDpzkeuKFiYs+85X1pgOVXkdSAGAHemEuWkJ+XBw\nDUsKIynnZIo4jz1prYkwArYJycUiegxcR4AHzdT6UrseVX5s9dtLiPzGyGeP+96Uu3owbPoc\nYoGNwpZ3GdgHb1pY0MKueWzQ25X+7+6YY/GkQt/C2F759aAHyPuhCuMN/KkDvIu3I3L7UMpz\nlhnnmnJj5kYlD0DmgTEHliFskhmPNLCrcBeVAzvFNwUyjsN3anbBtUEkc87aBD1XcuAdynk0\nS75MbBtAoDJHEEiPz5yVPpT227SqqQ56CkwZCcrIqnJ96UebuK5HH8WKerFVXccEHBpGJ3uA\nwJByMUAI0hbP8D9NvrSKmIC3RieKazbmBJx9fWlYnOOjdSPX3pgPjt2SQKHBLc4pjLypBJLc\n89KFk6sqnPTdTizsqpj5fT0pDE8tWUsSQB29abFs8wnkrjkf1oXDSSZbPGAKVfl3diOBVCJF\nY+SVY7hnP4U1o2aYMDtjIximrGxYd1xwP6UrZ2DjaBxzQxjvJSOQc70PB9jS+X5ali2CD27i\nlaMHy9vKd6TBVhuH/wCqkMesCtuIX33U1lVsFWyvQ05GZFDjKjOMGhSiq7MeCeuOlADQQzFV\nHy9P/r0i4XHONnHTqaMLHjadwPepIs7icZXtkUAhFZuhXPOeKcJixIPDZ49Kci+ZI4PBbv6U\ngAjITdwTy1BNgTbuJJ3VHk5aQt+FOVWZTtGO4zUjruVUY4HXj1pD1IkmMybiSuOMVZjjlwd4\nwpHBpixjb8x47t71LOrNGAOmfWmIb5biPC4/Co2UNgHI96mWXZ8gU5PTFPjYRttYZLDv60DZ\nVkG3Kjk5yanUMAvRV7AU1v3ifIcNnBp0XGwFz70CEDoGAYFi3twKVlO1mHzY605tu7IPJ4AN\nOWRVUqVwc9fWgZCqhNo3EjrS4+VnwOuKe2SrMF47Co5Msqhhsbrgd6QLzHiEs3zkRrUaq3Lk\n/KOhp/mMfvKSOtMjRkVmI3hjkfSmINvnR8ct0Gaj2krgjDjinvJ0P3ARkL6UjLuYuH4X0pgR\nhTEpbHzetNh3NI67dhb5jnvSb3GDkbc55qRl+VJMtvPPvimAMiF+FHTlvWpCwG3A4HH0qrz5\nhBz9amG/y9xP3eRR0AfbticRuBsPJJrRgjVWKjJXsfQVn/LJhyMnrj1q5ZNvYgAqDUiNzT2R\nSB+VdBZyeXJGenPNc3YRjzEY8he1dPbol5sCLhc4zUsEehaCpa3VhyCK7DTpB5ZX+P0FcloC\ni1t0TflSNtb9u7R4VGzzWZoaHii8jh0mViSABtz714pcW4+0yN5gbPOK9G8c6of7NeGT7zni\nvJ5pJNxZFzJnHFdFI56j1SNOOR5lUA8ryBWlptwVdjIFJYY3ZxiqNnPMsKySxqH24K4/WoFa\nS4m2r9w967Ec7OgdlijV0bDociuj8P65ujkEo37jxzjFcfpiqyukso3Kf4u9Twn7PdRFGO3o\ndtTLVAt9D2Dw1qgt7pk3FWb+LPB9q+mv2efiq2l3C6DqMu6xuHxH/wBM34AA9K+R9JvIYVWM\n5Lt0k9K7rQ7oiSOeOfY6/eINcc4nZGTP0VdCv6c56+9LxtJxXkHwL+KyeJNLj0C/YpqNqhMb\nyMP3iDtknk166p+Udx68VyyVmbph70hbdk0rfLwRTM+lQMVX96ezhhx1pi/ezihutAx4+Ycn\nmjqvHWkBxQDznpQAvuelLwzccU0crzR/D1/CkBKxwAM5NAyqgVErHGSKUsWzSAc5/KhWbsKa\nOlSbvlwOtIB31FOU+vNNBP8AFS52gnvQPUQ4U56k0vmZ+XFNDDjA5o8zqMc0xD9wZgKGYEk9\nBUana3Wg5Ybe2adgJVGzLU9WFQ8+uacjDv1pFD8/N7UDAbkUqtuXnik8wNzj5aQCjjJBoLFl\nxzmk3DJ9KOeo6UAPi+7jvS7scVGrnH1p3O3igB4FL9BTN/QAGnM2KAFzjpRnPahiOMGkDd6C\nkOLkDFG4AdOaZ1+alxnvxQIccsODinKNoHNM3Ae+KG+7k8GkIkZwMUjNhs9qYM9+aeeFAPeg\nEPVs8kUjEshpm4npxS88ZpjFTjGadUfXig5VqTAk5FG4g0xsqeTS53UMRIPvChSVJGM5pjHa\nopGbkGkBI35fSgLgZJ5ppO7jNOyFXJ5NMBuMdDzT0c455NNY9CaMAtx0pMBzZZOOeadtC4oU\n/KMUnTrQWgbd1xQM8D1puS3elORg9RmgB24qcUbscGoy2Gp24nqOKAHBvlzigvhs9abu3ZAN\nAYdxzSEIPc55p+QWHahU9BUkUJZskcVRSHKu5hnip0j3ED+VLDEX+ULuP+Fcr8Uvi14Y+Dei\njUNfv4oZXB8i1BBkkbGcAf560WuyWzrr28s9D0+a+v7iO2tIYy8kznAUAEk/59a+Cf2pv22J\n9UtpNC8FtNa2xYxyakrYMvAztHpzXjf7Q37YXir4uXT2FvcS2Og9PsEGV3+7HPNfO11q15JM\nFYt5cQyC5zitowMXLsS+KdSuW5uZvOaQmRmY5ZifU1ynneY2QuwY69KszXQm3u7ErnILZOar\nSZkXJARs52j0rpSsiWNjkdYgyZB/nVeSUmUYOH7tVln8wqoPloo+90qiWDOVU5X+8BTJEkYQ\n5yTjOSRUYbzcqrYB5H0/xo2vuKlfcMfWmNtMZYgiQfeUUwEmkf5k4HHBpPMC2+HALDkcVHuE\ngO7DccYpI0kbKtgbRwKRIfvJn2qpYhc7VpLORJbeSPYTI3APvTVUrIQufM29c0nzyAZ+90+W\nmMQMhARdwZRk8U7cWiYZ2lunrSLEqsUU4zx7ZpxR5o8btrJx0oBkMkn7tYyT6VG3LFSmCBjn\npU0Sx/Kwz5ueSelL9ndrd5JNoXfgHNITKz+aqoE5XOKk2npyrxjOTUpXyygA+XPrSSM0YD5D\nMei0DWww7JyoQ7H6HnjNQReY6HKjk7f8ak2GRWLLtxzUKFlZAGwG+6fWmIcW2xqS6tjgAdqi\nkWWNtpwC3OaZKqRL5YGdw3bqiZ/NdSTnjGQaBiPI6qSoPynkA9PepRcNJbkbizdS54BHpVdZ\nBDICpMj9GXHak/dK5MZOGHQ9KfQZYj1CL5BHFlh7ZqrPmXzARs3NnA60sixeUkeCrZyWHFRq\nojYneWB4FIYZO1WY/KvrSyeXIUZcgsw60bjE2MBgOvvSeZH94t/uleooIHbcrI6/xcgDtTYw\nZPmVcvnnPanxriNcDAByAKn2jcxHzL3A4oKGxsZMhE3Y70+Tb5YJGwtx8tG3ah8ole5anptQ\n5I3/AMRU0DJGl+QZfleTx1FIZgWXaMh/uj0pI/l+QqA7E9elHltGR8u09BQArSqj46qvb3pz\nATn+6x7GkYYk5Xcg6/X1pUzzjDP12k44pCGs3zIgbef7wpzRs2cgNn+Md6iZkHCoTjmnrIQ4\naQbVxn8KAEEu1XTGAVxgd6dERJGI92Cozj0pmV3An7jcjB7UjwsFd1GQ3TBpjF2ByqLyPQin\nybImhCL87NjcecVE2QqYHlydgTVy3t1jkCu4kYtwMd/QVnJ2GkdX4B8H3PirxHZ2lrFunnIR\nVAyMZ+8fbrX7ffs0/Bu0+C/wx0/Sooo1vpY1e5lj/jbBx/Ovi3/gnb+zj/az/wDCV6rb7rGB\nlaJZBjLYBx/Ov0lbMeF+6AOF9OTXC3dm8FYrzZyaYrNtA209mLMeKUZ3YzxSNUhRkvxwKAwJ\nIIxRwuccmkGOh4oAX+VO5X8abnHQ5oBPJoAfj5cUN254pF+YZFH3RzzQDFZhngUnOeOlO2nv\nxTFyQRQJDgp6dRRt96M7Bj1pP4sigYoPzEU7lgRmgdenNBOeMVQApG0egobpmmyIQevFOzxk\nimAoO5OuKa2FGaXHfFGD+HepGLH0xng0vK55yKZjHXgU6NR36UDE5bODQPU80d+tKy56cUwD\nAamtnd3oLbcDHFP8s7QQc0w3GYHvRT6KAMyOmhhtGOe9Ln5cdDQo2RgDnFZAhQS3tQJByOpo\n3DvSbR2oDYaGAbOM08SDc2Pu4qPGWx0pIxtBB60ySTd0IHGKajLn5uDSOSeBwBTerA9qLWAe\n0y+ZkH86SRi0Yx1oYA8YyaeF6HtQAwbsDIqQPhsYzSs20+1NkB2+9FgFXDDINL90dKaFG3Oc\nGl2lsZNIYZJOcUm5Sc4pWDL1wRSBfWgPId/KkChVyRwaOG46Gk56t0oEG4cButO4Occ01mGM\n4BpD8nFUA7A25JFKp44pFUY5oDBWyOKTAYq/vCe9SbdvOc0m7qTQvqelK4CAn0470ob5iV4o\nPzHI4FN+brTANpXnOKar9e4qST5l245pI08teeaAAY4xxUisAME4zSZUp6GhFBXpzTFYXrJg\nHAxSfdYZPFOHytnvTHUswyeKAHFhuBHrTmbcORTFACZAzQMnBoGOVuRxSNtbJoDHvgUL1I7U\nhdReGwCvFJgdVGO2KazeWucZpwBK8jBpgDN82M0jNgcE5peMgtwaTGMk0xhuLY9aVVO7GaUg\nbQQOKAuzk5JqWAn3j05FHPXPFC8qTnFA6+1MgMjOc01k455NLkK2SOKdt4yOaBkQwvA655p/\nTJFJt/CnLGRwKAEjbaOasqwYAHvwaqlSGwanjOcKDgUCPkT446SbTxPfRAeXHuJU9eteE6hZ\nuMkJwDjNfTv7RlisXiKSRRgugYj36V8/3Elusc4mHzsCM0xHnWrx77ZtrfOp+761z03yo2fy\nro7638mRgTuXNZE0OckDj0rREsxWB6jGDxyKqXHysAPpV6+YLGFHDZzgVRmPnMCfkI5qyClc\nRlSR/F2qgw27vMOD6VfuG+0NwOh+8KzZd2Np/wBbnrTQiKNlVwTlh2FVLibzsnO0hvu1YlkZ\ngFwEKnmqip98k55JFMRXPzMx6evFQqoZWJbnHWrKx7lw3OarSRoMxr8uOd3rTsIbE52lCB0z\nmm4KyHHIzzmnjHKshB7EU+aZFkZSecUwM65jWRh/D6HsKr5+Zsjay9Qec1YmZd3zH5+oFQt8\ny9MbqdwGSTNJjjBHQ1FMrqwyCOOvpUsgCugPIFG7BYOdwbptoFfoVmVtjOo2Fe5PWmMzPlj0\nFXGSOSMLjDdM5qrdR7Suxup54phcjSRmRmD4b0oyWjJz+FJGhVg45U8Ukilup4zyKoZGc53F\nAQOOKaoDhimWbPINCxlWkAbK4yMfz/CnrGdyqp4I5b196TAhikwxAXdSMrRx/IPmqVoWWNtm\nODwKj+fZzwRVCGbk+7tzJ1NABk+ZxjttpwVGjLEgHp7moyquuBnIPANIQMQHG5QMHGKRfmWR\nVAznIp7MJG2k4+lKyhG3r93GKAGK5Iyp5HBpGj2kNn8KbzHkYwGpFUyMVOc4wKdxibUYli/l\nqOmBTW9SOaRv3kYOCuDimqrbcLznnFAEqqjNuc9uKrruZifvD6VYjYZAYhacuS+ByoFIe5Ds\nDYLHA9Kb8p4J4XkU512lg5+bqKjIHUZIxTQhM7pBu4yvFNbfI24AJt9adJH80Z5HGTTNqliz\nEnPFNiHqnynIwe9H+p/iG7HH0pGYqQpY46cd6MjYdsZb1bNHQYsZLMGCbfVhU0ipv3k7ww6V\nAclQqnb60kkhjxjJPSmIc0itJlVwKawWVT2PY0bX3lencmo2UiRhncpGaBj1UJ2xkYNRNnaN\no4zg1K2Pl+bJ9O1DMF6qcE8YpCQkoVpQQPk6YpJGIwR90U6LLM20bV9O9IzjcFxhqBCAj73A\nJ7UxmZsgjC0GRWkIIwRSrtfNMoAwjUOeh420jLuUleGFIu7cw6jrzQuGzk4NADVx5gYIDx3N\nO2DaDjLdcelHMa7cZz60M3lhflHPH40CGsSxP8OevvTuY48Zyc9T6UHC5A+93yKiWHa6g5bv\nnPFADpJPLOCu/PSk+5GcDgmnRhlb1XPU9qTlnKZwp5pDHFflClqau5lIPCrzQGVBh1LHtil7\n5CEADvRqAM3yg9O9N4Ck46nk0MRIvXbTF+aPDtlc0hD9rcHoAeaiGGkYRsCeu2pVUKAdxOTz\nmmNGq7jgKpPJ70DEJ3NyMDH407KKxdvlA/Kmr90Nu35HGaX+HOMjtmqAj+7yQTu6FakO5FCs\nBz0pzbWcE5zt7Uxcp82M0FCtJ8mSpLdKajqzYIK0+SFlG485HAFR7i2FbBaglj12liCc80bV\n3bSOF9aMZwQvIo3jJOCSaBC7sMN23P8ACcdKRG3fMq+xxSJhWB6ntQxwNyjH94jtS6gJ8x3Y\nO0j9aGbcvtTmk3Y2uG9MUmPLyMZoJbGq3tgUit5e4/ePanBTwT930psjDczYIwMAUIpC7eik\nfMaOFLIAXGOcUq/wvvxgZNEcnzM44HdaYhvyLED1GelOVi0ePvEngUyOMqMEdTkCn/eyMZWg\nY2NfvZzuz2pWjIwDwnXA60ctgFqbE5ZTt3Eg42mkND9w2kqMgc0eYzZIG0EfpTVk8xc7cEH6\nZpOZSMDkdRTBDWjLKCD0OSacWTaR0Ddu+aRV3Jwcp19KT5JcckHoCfWkwHBSzLjoP50kar5h\n3Mc98UjD5QAee/1p3ys2Cdrr+tAhnGNy54PfrRIxjXeWO41I2NpbB2/1pud20kZPehgNZhCq\n/Nkt14oGWVwRk/3qcwGMjAOc5pkkm11Y8Z4z2pABZ49pI/df3qEYKrMh5z0qSZwHUYqOSNjl\nsjcOeKoQ5mD4XdhqQM21iBu/wpsf+ucHGfenqqMwKuUbuCODSAb8sjf3VIoVtp8sLuanYZmI\nbge3amcjkkkA9qChu5Vb5s5BwBTmkKycj6ChiSuGXA60zKhtyndTEO53E4znoKRiZUAyQwPI\npQfmIz1FCKdu3v14pdQHFTI4KcqOPcUSKqsAR84P4U1UTJb5guOeaRWEknKsoIwM8596Yuoq\nqrKykA96ady85yAMk05crMyKMjFNdSxIHyqOooGIu7aTuye9Pl3rFwvWk/hUggcfnT/vEBm+\nXvQAm4/LgZOMUrJgBSfmo5VDt7Hg030YMM+9MBVVWc5OVTnimtIqsxX7pHfrQylV3Bvvdaac\nKmXGQDgbe9AxjucZUEDHWp/mREKnO7rR1wuBtPPNG4+Zxjj+FaOghMnaxA+ZfWo/+WeVPzHt\nTwQ0hySrE9CKljVJCQFwRSGVZGkXaDx7U7azSfMm4Y60+4Ty2yq59Qe1NUSqo5wpPWn0AUqG\nZdjEEDBzSAiNGAOCafuO4oVIao2QNuJG09KQh24sseD9fehmG0sADg8VCq7Y8u+MdAKkjUSY\nYAjPJFACruZgAMr1J9KF2thSvzZ4akLDICKc56+1NLHzBkn5WoEOyzN8xzjjFO3eXz933pAQ\nzNtbIznNRyHcpK857UDHL8zEY3KRkGkXb5R4+b1FOjTClUYE45poZQucEDp0oC4jMfLL9O/N\nLIc7XbOWpzIX8vdnaDninNy245yRkLjigLkTKx+QDk8k07ywseWPIPAFJw24gZHQNmiPJh2g\n5bPJoC45cNvXOw9qaHDJyuJPX1pPmkLYwWJxinNCsn+sOwLxwaYuogDcHP1pGba2UG3Pf1pz\ndgmA3qaGHmRj170hi7pCQP4RSMys20J06+lIFC7dozzzSlm8xsGkAv3XyoyFHWmq4Zj8uB3p\nVzH0Jx6UqzMVbpjoOKYDNy7ixXC9AKezFomyRyc4pP8AlptYgjHNNIYKXC59BQA5mO0KDxjt\nSNM3l85J6Yo2+XLHjIY9qSRMMwUYAOaBWEKFYxjjJzg9hS/MnIGec5HpSuA3zlj+FGd0bbXK\njHFAWEUOy5UAjORmkVwSSxwM9Ke2A0bK427eg9aGURkBuCeeKBWHR7l3hjuXHpUMO1DtKnPY\n0+M43E5z2OaTHmSDcNqj+Kgod5a7g/3uMAU1Y8LxwCeaGUKp2nB9KkcttBUcqOCaAI/Lyqkn\noOBSMd7YPA9aWN2jZSRl8crRgSYcD5c/jQAzcPmxgnpmn7W+UL8wxytNki2scJgE07BjbaBt\nb+9QMcseVPcjnrSfKrYIwcZzSeWy5yuG9c04n92xZe2OetAg+RtoC5ycbqaUAcZbADYoSNzG\nGQbSOP8A69ISSyj7zg8t2piJY2WQSDgAHqaadkceDnr0Hel4WQkrkHrSMxjdV27gT2pE3uMa\nMFuGwBz/APWo3bmUD7o56U/KrlWzweMdqcrFMFRlj2H86CxCrJIOdvOc+lNCht5Lbuc5FP3f\nMSc5qLftjZSCu4/ep9AHr8zENnpTlEggwQMfrTSoVcnqozmk42hxITu5+lIVhEJkJDDAUcUL\n/rCGbC7eB70pGCCrYHc04LnfuXJAyGpMYnPlBt+T92pMkdfunjb3NMVl3IshAUjPSmKwkZmL\nbSp4zVXASMg/IBkZ71IrBd43YPbNII90O4HbzSxxkZJIx696QCFd2FB2rTWZsYb5eMnFOkZi\nGZSF4wDTGUKiMDgKPmX1qrIB0bD5WA57Z7U5vvnzGyrDt2NNEiNHvJ+lPX+6T1GSKkYxfmUN\nuyFOD71JJu4DYz2NRbS2eNv+zT44mbktuIHC0yAjjCtk4b1o3EfdPFNjjbYWPBB5qSTEDAAq\nAwzgmgAm2wzKcAu4wPrTo/lcShsZGNvpTVVXmBZeg4Y9qcFXcd3yk85NIBj4ZS7ZdemBSts3\nLsG1yORT9y53L09KiVlkDE53Z4pALuB+V1yc9qXiSYlCenU9aXa/G5QVxzg0c+WSARzww5J/\nCjUY7B2lDjCfN6ZpjAKqych+u3tinrlmCN0I5pONp25LY2jdTDYamNvIxk53Umzc394k09I2\nZAhXGB3pI16noR0FMQ6NTHIVJ2hfmGaRmzlypLddv9aduLtu27jjGaaylo9/8C/eC0MY4soj\nBQ579KMBZE3HcpGM0ip++facdqdtEihehU8D1pDBl9++MUqMOW4KLweKQ5Vsry9O2vzgAHv6\nUwHR7WjZsZDHApIY2jjZsho8/rSchfl6dKfDCBhHOFz+tIQskjMVGMAckCkk2syZG0qd2Knk\niTlywUrwBUHyt1+Zs9qACP5Vfncyn8KWItJtGcc801W3qyjjb1pkefLyX4J4AoGWIlDyOM8L\n27U5VEjbjnHTNMt1K4bAYL2p7HaBgblb5qBdQjZY7gM2SF4GPWpGYRxgBt0n97FMYRMpfBB9\nBRk+V0K570AEbL99/wDWewoUI0R4Kt706NnhhYsofdxSLJtTeeB0JPQUDEZRJiQDJXtTvNy4\nyMqajVTI7Op+RhiptoDBVU7VGSCeaBEUa/NIgfftP3qJG+53PdvSnLhZHATZv5pCpt1w/B9q\nBMkUFmZh8pA5qJmEe4nnjt2pVDxx8cgnPvTJNvUjBY4oHYiwVi+bDbjw1LlQuQpz3xTjCylm\nbBXpt/rUeGUKynbk7QT296BWEYF48kY5zk012+XdlhjoafhtzI3Y8+hpFZjkghVHBqgCKQso\nQLgtzyelO5848YXoaaqj5gq4OMg0/wAl9qjcMnBApMBYWMKsdpYg1fspnMIOApJ4B61nzMYm\nLOOnG0VZhZpOQOVXNIDobH7xx6ZrqNGm27VA4HIri7ORiNrtj+tdVo9yN2wDdzwKhgd9Yxy4\nj/hB5rpYZljTJGNoyT2rmNKvi0sZXqoxhvWtG81NYLWTkGR+OtTa7Gmcx4zvzJcNIHyBzjri\nuGmvHjmMsblc9GrUvNQ3T3BB3DnaGrAikdjKkqhUIz81ddNcu5zT1Z0lvqgvrWWNcMAvJ96W\nzkIjT5uSOtcykyWql0kMcbDJxV+x1MXkwQKSABjFb3MzS85pGJViJQ2AOxFbWl3b+WQQA68D\nPNZl1DCGHlncfSrljlbf5hkqTx3FAjutDvTPa/ZSu1lJO9jXaeD9Ygjtp4rnMcyNhWPcV5Pp\nt5Jv2KSTJ0Fdtpc0FxCPlK3CDLKx61nOPU0iz1/QNel0q9tNRsnMVzC29HBx/nivr/4W/Eq2\n+IWj+cCsV7FhZos9TgZI9q+D9Pvz8gL/ACDHP9K7Lwj4ou/CeqR39jcNEykNw2FbHr+Fcco3\nOmMj71ydnIyaZtPTvXM+APHVn440GK6guFkukAFwh4IfGenp7+1dOflJBJ+tc7jY2uJyORSj\nB5IpOecUH7uO9SMVvlXdRnK5pv8ACATxS57CgY7O2jjqKRmH40u72oAVW9aGYNnikOM9DSdM\n5qeoD4fm69qVZDuPHFM/iyKPu59TR1Ak3c5pGcnmmKp5psmegp2AlDGl3HqRzUDbgwxzUu7f\nxQA7cTzgUqk8Z6U0fLwelKGGcZwKBijhutPVhtyQd1M3DPHNO3bmpWGLuIFKGHAFNDBuKRcN\nnsaLASbuxGRShiF9vSmH5VApRIMY/WkA4t8oI4pytuXINRqxGB1pyrg4FADlbb1PFKkgkHNI\nVA/rQu3nFAh7KOB2peQT70wt0o3YxzQMccr3zQPm60meMmgNvyB0oAXcduR1pSTuyabu28fl\nSZO7nigCTd3FLu+YE03f2A5oZiWz0pAP3Dce1N5bNIG5zSZ+b2pAScqo7mjkgetNVj2o55J6\nUdQFVtxINLuKim5HWjO5dvSmBIcsoJNHWmx424zRu3YNKwEm7pxR90/Wmrlm9qVmBxmmA5ue\nKTdyKVfmyR2pGkDcgc0gFaTHFG4kCmNzSBitAyRuuRRvLfSos7uM1Ls28DmkAu8HtS54HNCr\n+FJuB4/lQNgPYYqTyw2DmliVuR1FWViLKF2jfj6U7CGxoG9sdeM1c8sWkTTTOsUSKWeRmwFx\n3PtXA/E740+Evg7pb3mu6lCswB8u2Rg0jnPAwPwr4B+P37aPir4rW8umaVcnw/oRB3w27Ykc\nZP3mHr6VUUwvY9z/AGmv21IfDNxNofhWTdeLuX7VCQVOQRnr2r4N8dfEjXfHepPqGv6lNf3U\nhwplckKMdh261ywuwykySySOCdpc54zWdeTASbmPVcKvoa3jHqRKRLeatctuUz4GeFVapeZ5\nah4yzlj8+5qZfSRtHEF++B82KrO3k42/cPO2t0kYWLc00F0xEfyjGM4qjF8mAwD7W9etIsiu\nCGDKxP3uwqCWQKxAO4Z+8KYai3jLNKZGPLdIx2qs/wC7j6bSOQFqzDcIkzZiypGKguJEjUIg\n3Hqcnke1F7jIA0s+4AfvAPvE1HkMBtBDL1z3q3Y2M10zhDhkUlvaoGj8tnUsAynBGetGoETM\nnksxwvpUJJj2ynLHGAtT5WbIVdvHOaikRV24BIPB5poVmR+SJF8x2MZ6Yx60kf8AqSUDDHBb\n1qTyV3IrZ24z17051whAbA7DtmqFsJ9n/wBHT5trdeetNa1PmBDLuduvPAo8szqUlIypwTTV\niSRfkJXZ1PqKQbke2NWEatgZ5arDOzW6w4BGc5P86Fmik8sGBto+6w9femyQuMvu+XoaQDWD\nzAtHyV/WoSAv7/O5l4C1LtaGNz1BG0BahMQZcqCu3nbSKGSyHzMl9znkgHgVE0Zlk4I2qflb\n0p7sGjZnhH4dfrUDZ8ndn585P0oEJLJ5ilWAQNx9ary+bNGqjCxo2BgDmnz7GjDBy0g5AphC\nyBXAIyPXHNMqw2PKtIuQuTTlyN2wKz9NpqKQGRkwwTb1NJDG0jcnKEE7qaAe0xUKjfiSKbsG\n44HPXcKayL5a7iS4bJ+lSKd3mICR3BxSAcyYkXnORTV+9+8XC5/hHFNjUbf3m4uTx2xT1Zpv\nkHUdKCRyBmYMDsTHXFSYdcx7c7ueO3vQsbqgyR0654p6KXj3q4THVG6n6UDBt7JH8wkUcHAx\nkVLIyLncm4kdvSo5GRHRipAON3t7U/OxTEBv3cqBSYxBIVRQBkdQG600s8yHPJJ/GnNlowJe\noOAaNoEzSM3zEbdvvTBj5I22odwBGMj1pGiLSSAFSuMdaYsbCFmUEvnFMaGNs/eMn3iPWgQ4\nReTtUuT3z/Snbn27pEG1xtGaZCxYmRcfLzt9PrUqqJI/Mc7nA49qAGt5cca5HI+UUpwyooYn\nnp2ojZWxvj3Z/iz0qRYxHwOcjjIosMY2JNylPnJ2rnsc9a9j/Zy+EOo/E7xtY2lpbyuzuoY7\nflYd8+hPavNfCmhS+INUS3RGMo5X3OcV+wP7EH7PMHw38IxeIdQtDBqt4gESt1UYwTj1/wAT\nXHVlfRG0In0R8P8AwbY/DvwbYaBYKEht0wcd27k+9brMWY80vmdj97r/AJ/Wo9xXrzWFjZCq\nxyc80h+904pF4Y9xTmbcaBhtyeDS9+RTORzTtxxmgBPLAzzxSq1Azt+YZo9xQA/I7dqT73Wk\nVh+NKGByMc0AAx9aVm7YxQv3aRW5x1oAGbbgUmdvI5pWUNRt24oAcCSxOad/k1GflZccjNP6\nse1MAzu5pf4uelNkYKOeBSqdwqgFGeaPrSM3OKNx6UtCkDe/Shvl4Pel9MikIOOaQhQo3cUh\nyrZNOHbFNyWyxqgDqvtTVk2dTx2p24VDL93gdKBku8f3hRVfj0ooArSfez1pAx57CnbDt+tM\nKndgHJrIkUfNwRSt8pxTH9c9aGkAcJ1bGaVuoCsORSyY28daNpUjNHHegBSu3B9qYvHbIpxG\nF5605eEphYRgFUE8GkaRVbDGoby48tt3UVSbzLjJFGqHY0/MDthRkU7+HDHIqvaK0K4Y9qnj\nHynNPUBevGKJFwc5pGcNGAODTlXKjBzSARcYOTQvzfSlVAzY6Ckb5Fx2pgIw5yOtKuG65xR9\n/pwaX2o6h6ibRnjmj3IzTjhO1MaTavTg0CFUsFJ4pjHd8oHNOVjxxgU0ZaU44ApAHIbaRx61\nIrfLzzTW6dc0Lx2pMBQN3Tik3FQQRxSrycihlZgfSmAMflUjqaX7vPU0EYj4HSgdM9aoAXGe\nnNPU7RnFNRvmJPFO5VfXNMWozd3zTgdymk7HigDvn8KQClvkwvWgMVxkUjHvTWb5euanqAjS\nB5CBTlba2MVGq/vA2Kk+8STxVDQpz0o3EAHOaRcs3FNXLNignqObc2Dnin7ty470cKMDrTec\nYoKHeYAoPbpSM5XPPWk4oHOcDJ96liDAA5+9T+ducDNMU/NgjJ9aVn+amA5uuCODTTxxnim7\njSx/dJI4pisL6egprMWPBoZwF6cUeny0AP8A4emTSR53HI7f1FIMnn0p6qeuaBHh/wC0VYhv\nJuD92RCpY+oJ4/WvlTXYdsjADGOTX2b8ftP+1eFIpFUlVY84zjpXxzr2SZCVznjH9aAOGugb\ni4ZQMVnXEHkqxJrWmXByflINZWq5ClgPl74rSJNjnbyNeWOGYdKz7gmNS23tVmaRZLjg9OTU\nErq0rAng1sZdTOYBVL5wDzWe22ORWUkg9zWlNCW3BapTbhEATllPQCmIpTASOccH0NQS5+6q\njGOtW5gy5dwCccbetVTJyu4Y9cUCK0hAwxXHGDULLEzMrZ5X5cetWZP3kucbR0C1Gyx7XDHa\ny88CmhGcdy/PkkKMGkkfhmHzFuc1NIxYDaQAe9V2UhiynOeMUyhr7TIrMvbtUa4jbzMcg4Aq\nV2BQDp649ajZhG2WH4UxFVmLsWILHtgU2PKhiFxzjP8AWptyrIUR2BIyf8Kjmx5mFlyD1X0p\n2DoRu4jZTt3nHJqN3LFQoyM1I53bkTjHQiol3R4ycEHmjQkRWCSMvT0zUUjmPOeuOakuCu4t\nnqaib/WMrDc7Dn0FMZHtKsWADAfdHofWn/OzDI2MRkse9OWPbMMYxj8/am3D9N/JJ6f3RQBE\n+S20nB7imrIwyF+5TmmBlA4IPGe9IWj+ZYyc5x9KCeoyTy1VQq5PXmmtnaSRjmpH+82FGMYB\nprAzDGcYHPNBY2YgSKFA6fjSeYH+TadvJJoLAyKcZApPM3OcHGTytMLCOPlGe/TmmeZ8xwMN\nTvLZj83AHSk2/NwMN71NhDWYSMFH3e4pi/6w81MVO1yo+bHP1qKRMbQgw45JpgxWQNh8bexp\n3K53cegFK0nnKRjCn09aby2cDJAxk0DGkfaC+BtZeMHvUaKTuBIXjHFSuxIAGfM65qDqx4+X\nOfxp2EDE7FJJ3Dj2pJW9twPpUuPUZHoKYqHaSPlFMQMu1kxyOvtTTJ85AXilULt5O4ihW+Xp\ngGkAm7dnjrwDThvUKvU5pOZOhxjihuW4JwB96mAnmEO2BkUiqGkxjB9KQfe65oZV3Yyc45wM\n0wGSbmYhsIenFDbjjDZ4xThho8elBZc8cDGMUAN5iYIckEcmnyMQoYD5e1MWTEZYMNw9ab5w\nMY4yDQAvlhuMbWPOTSPHtjyOuen9acuOhPFI0i4bP0oJYkbCQOCuGA4oVY+Nx284zR5m0ZHQ\njGfakZQzM+fl60DAEtJjG7B4qRmyvI5zxx0qPzMKc9T/ABUKh3DccehqSxV2qGyrEnvSMwZN\nq8nGeKdhmVgeRTfkkChD5dO5JGi7k8zGB02mnSY2j19Kew+Ug8v7dKYuEUnI3YouMVlCkHOR\njjHrSBg2CudxOCKQK29MNgEdKQDblgctnpTAc2W4IyTUcmNh2Kcjt6VIMMw65Udaj37Wzuyv\ntQA7zBIyleCOuabIPmLgbjTpMvgjpSOrLjB+oFADedqnGOMU3dnAHyrmpmIY8NkD0qLyxtbc\nccd+tAhu4Hn04FSbPl3A/UUKAqqGHy449aVY9qkk4NJjDeTGWY4boAaa6k4PH+8KcHyy5Abj\nFDLhSq4Ppg0wG8qgIOTTDJ+8G35R6EU+NWJfAwR096aZG8wb15x09KBD40VFaT8KQKGXJ+97\nUKyrHtkYgdRxSDav8ZYnnpUgODBm+VeAPpUafMSQ/IHWlztHI3A+lDJjjOWx2qgEZTtBB3fz\npd25tjDHuaSPd1+6uKQL5i5LZwc80mAoxuIYcEdqSPacM/A6cVIx3R8jCnpTPuqEGcd6YCbs\nknOecAetDMVxz3xSbQVLEn0AUdKfHHuYDdigBG2LI0YO7nO2jlvlTGM5peI8qeSTy1NkxGql\nV4PpQMcP3q+Y/rjFNVgoyDhc/lRIoaIYyoJyVpf3e4cc5+7QAf8ALMHO5R3FRl1+XuKkI/ec\nJt+vSogxz8pVjnmgBVZA27dj2Ip7Dc3zdRyDUPqCNxp6ruHr9aAFYyMu3hh/dpeEVhj5vSm7\ngGdV7DOaNrcndwDyaXUQrsWhOF6DrUayK6qBg/3qcEO4kN8vWpNq7WwuD2ApjIpmZBvABPqe\n1Hy7uH3bhzTpH3L06jAWkX/V4dQCPQUhCSyNGwXAIJ5p2VaSVCOMZFJt2L6H1NIy/KNx3E9P\nei4AqlMANluuKT/WK2eCetL8seezClLZIBXGed1FxB5Y2gE4+tMkYr9wdKeW2/KeSf4gKRdg\nkOWLKBzSGGHLb+Mevao4ZBHIxByepqTcGVRsxHnpSLucAbQmfSmAkj+XgbeG5PvTWUwSblbJ\nxx3wPSlL4k2suR0HtTpMbtv3eM5pgMUlVyF27uQvrSowKM3Ttj0NK3z/ADHIAGKOG2rHyq8k\nmgBFUGQZPOKdI3mEDbyOnpSeZ5mWJwM4xSfNu4+tAyRcqxIX5cc1XZeoI9xipHBdc89eKRpD\nkNgYoEOb5GjwOMfeppLK2SN3PAqV3Bw27zOME0yRiVDY2EUAK2WO84Rc9+aTjeTtwT3FL5SM\nEdXxIT0o/wCWhJUgdMUAN3fN8oyR3pYpXwcrtzSZLK+1fu9MdaazMwVgcbhyppATPtkiky3O\nPvVV+bbtzkZp8ihUAXpR6E8EmmAu5mCMx3eh7U9fnRt3LdeKZgGIKByvamsxWPJBD9sUAJIg\nyHx8oHNSoqIyyIxbjle1MYFlALfL3UU5l3DaB0oEtBySHyyD8uTnOKaC3m4Vd2R+dJHIUU/J\n8vT5qVvmGGbDe3pQAq4DnHy+o7ZqF5BGDgEk9xUm1VIXHBPWn48kbdgYk4I9PemMiaTau0Da\nw70qM6twBg8mlbC7s8sx+7j9aYqmNgytk9KBMkZn288/Sk2B1yWKsBx6UgRgxDHDH9ae8oZS\nhAH/ANagLEe4RqigZXGPf605m8tsDnjtUbErtJOfX6UZ8zGOADkfSgLCh48sGXtn3pPLIh4+\n715606Rl2M23Ab+KhFO0HPOOKGMRWLDDIduKFbGcqdoGae8LDcAwJI+tIDIuN3p9ykA3eF2Y\nO/PQD3pY1HzbxubOBg9DTFX5sr8vP5GnbWi3/wAXPJpBqICdrZU46Ef1pyRhggKkL0BH86cz\n5jG1cE8HPpTd20qCxCjgChCE5kYA4xjikWby22qCSxwRSOPLYHnHapWmDNuz8wHAxTGOZgzA\nvlGAyKh3YUkZZmPemeY0xQEkup7U9MpIw5YHnbSAe3+rwOCOeaaZMH5l4I/h6UvmMTheB1wR\nSLu3Fc8nkCmK+ovl4JVhjjK01TvUknLDqKknkwFJ4PQ0jN828Dp+RoGMXG//AGMd6cp+QjGR\nnihiSgZhwTS/KsYbnGaYhyjbI2UydvamZVtu/gHgUrSAMTnnFQsyqwYfMmMUWESSIZA2Buwc\nbqQIFkVs7/p0p7SeZ907QoxikXcqhgMKRjFIAZjI2O2c5oZQV3E55xSDnauNtDZZvlAOOSKB\nji43KzZYZ2gf1pG/1ZUNvHuKaCWjTMf8VSdwFOWxg5oEIrSBcAg8VHjcq4OxvSnhT95vlHSl\nXa7bSmRQMaz7mweR0+tKrAZPRqRVVW+Yb3BpRvVmO3A9DTDcFLeXvX5j39aXcyj5V29yBTSo\nx83GfSlXasxYklcbeKYw/wBdjnardeOKTyyfkVtzL0b1pV3Ku4uAOgzTIsoN2CMnrQA+RXaR\nWABHcUsnyMflz9OlJJsViwYkCkWUSKADge9SAKu0AEbWznBoWQ/MSCM06RWY+ZjO3g5pS2+T\np2/A0ARx87geOOacv72FQV5x1NKsmJARgE8GlWMqhGcIOlAmR7f3gXO0D070sajc2flXOc0L\nsHQ4JHU0zaykNn2oC5JFmRWRcP1I7UbT5YGQXPHSo94bGRtZh2qRZCrbwMleCadhjWMa4wuB\n0IFK/UDZtipdxZtwHy9elJMpZVJOB1yD0oFcVW3MQOgH5058Ddt4weKaFMjcNtPTdTkycl8l\nx/CO/vSQh8Y4YBTgng0rKFXG0Er0J7GiNvlVycoBg+1NX5nJfp2X+tA7DudqKwyxPJ9aN4b5\nZRt9DSurugI+4vT1p7RhQ+/B44zQOxGrHkbdoPP1pu4NGT93B+9Qsm5VOTkdV9BTSpbhfXIX\n29aAJPMcsQSACv3qZu+4Ap6cn1p/DZUDcSOTSx5ZSjZXaPSgQyJdisqqeueeaHUtICowB2qR\nldIeOO9M2bgBvIbvQIk3O0eZCM+lC/K2VGDjv0pPOSOQHO4KOc96RnPBC7kP6UAKrMFO47QD\nnAppYoxI+4eCtOUpyxZgOnTrTtgZOcEjnOaAEjVlPyqc9STSKpVSVO1y2Tu9KcLjy2HX5qTe\nwcrJ8zdB/SgBw3mQ7Rhe5pOflPvyaVstHtBKgHmgfd+vA9zQHUfGpYMueSc/So8MvIbnHepm\nzGyjoQMN9ajRf3Y3OCw6UFhJGjKpUkd9po8zZkr83PYUnls+WR8A9c0IHjVUPCk4A9feglMc\n2NpCrhj1PamtGFIG3noBUiD5XAGAn86QndDxwx6k0DHsqtDzkjoyipEO+IsRjHAWovO8xPl6\ngdPpTlc7cMeaCepLubbjAU+9JuYMDkt29hSRPvzzuxSqsjMBuC96XUobueORtzbx2xTY418s\ngk4Y5K0+P5Qw4JGeaaJN8ZO3LA549qYhPL+ZSrBV96fuyXxnePxpcqyqc7e5zT+Y4yY1xGec\nd6QyEN8+8kjHFSuSzZIIJ705ocOCeM8j0pJ167uT60xdSLePMBGS3YU7cZpnAGAvBVh3ojkB\n4xgCh2bkkct3pDGtlWIAznrUDE5KnqOlS5IcBmODxTVVo5GB/ee4oJG7gEIPT+tV5Afl+U7m\n49qnY+VGm4Z8w/dp1uzAu2B8vABpjIgrx5OMjOKPnJy/3e1P/eNL6vjO2o1zjDndzmmHQkhy\nz5xll7NViGSQ5ZurcECq653SKCGI5/Cp7Xau5g+UHFBNy/EwdThcP2rp9KvGhZcLs+Xlq5eF\niWXB2nHHvW5Y3LeWmwgt0NDC52cGoNCEI+8RyRS6hqH2lgx4AFZlrMGhZAcMp4qvqN8Wt2jx\ng460vMRn6gvVoxlpDkelYd1dlUZSM44PrmtOe4eKzWCR+AMiuVmut1w3zsxxzitYshq5qxyQ\n3ViVRPLmHO4nr7Umn6jNCpgBXyeuR1FYcdxI1u25sAnAPT8qlhuDHKpUhVA6+vrWikRynXxX\n8jXCclgeS3sK6HTdSCqXxld3IPeuGsdZDKsaxbwDncOtdR4ZvIdSumtZMxhgcVRDR1WneU90\nWIwxG5QOldFY6gYWZx8r/dJ61yNrizCkvwpwPeti3mNyxePo3Vf609xanc2d4+4EHK5z8tdV\na6lG0caSgg5+73Ned6beGFVAy3PK5/Wt2G/NrcRTP82Dz9KwlHqbLQ9V+HPxE1D4e66b3T5C\n0Z4lgb7rrnkV9m+AfHWn+PtCXULMhZB8ssO7JQ+n0r4DbU4ZUMkAG0HP1ru/BPjLVvAt5BrO\nmZ+yuQJYw2BJjGQR61hJG0Zdz7mVtufSm7vmOBXNeAfiFpfxC0v7RZzxpcBcy2275k9RiumY\nbDn/APVXM4myY0YZcDrQrBWxzmk55IPWlU/N70ihT8q56mnK42njmmHK5zSRg7aQDwx79DTv\nwqPcW4p3PTNIBxxuGDS/xZpONoPU0MehNAC9+DSN7UL7Cj15pgKF2tnNJ0+lG3oSeDR6CgBd\n3U0v3scc03+VL/CMdakBwwox3pd2AaYG+bOM0pwWzQA5XC9smjeOeOabuO44FJ95sZoAUMWb\nrmpdwUbaiXhsYpxB7jBpj1H/AHeQKeGDAHo1RnjBJpTJxxSAf5h6Gk3BfWm5HfihWH1osDJF\nIb1NH8HTGTTV+Vs9KUNuHPNIB275gD0p23jNMP3QPenMSR6UtRjNx3VJnK5NMYHaOaN3HTNG\noDg/zZ/Kmb85yTTNx3il9+9GoDmYhQBzmpFYBevNR7T1p3yjGRmgQ9T1oBOOc03kcjpSvKTi\nl1GOXjk0LINxOKQAvyaTIVuBVAPX1/lTsjtTN3y8cUu4beaAH7tvFIuWGMU3d6cijzNrDmpY\nDzJgYFCnuByKbnq1JuOMg9aQEgbjJpO3vTVztG7r1qTHOenemA1V77alUnuMUqx/Lk8mnRxl\nuM/kM0WAZJnZx0p9uu5yMHFSlUjjd5XWONerMcAfX0r5z+PH7Xmh/DmdNM0K4i1TU4y3m+Wc\nopxgDd3HP6VSjcq/c+htW8QaT4ZsZrzVb6Gzt4x8xkcD8K+Q/j7+3YdG/wCJV4GgJunBRtQk\nXIHBGFH9a+T/AIqfHjxZ8SNQnm1TVGaykPFvHwoGc4/A5rkWje6sUFmpuLhxxuP3fU1rGJk3\nYg8XeL9Z8YazJe65eyXNzM5Zy7HgnqBWFqEaWLKSV/eDlM84qDVLe5ju3S5ixt53g8HisS/3\nXJjklbDgdM54FbqCIuxbiKOJS4LJG/8ADnOKpbvMZsJvcDCgmi7lkmVccRioBHiHzDJhyeFB\n5+tWZsQMVQcli3JGP0NMaR/LaZ48wLwGHQe1PdWZTubhv51JPqkkmnixKKkHdcdT60DuVLiV\n3jZYzkHrxTFj+VVUZ465pybZXEA+VM5INVrjLXD4+UcDI9qQyW2yWIZtuO9RTNtzlBnPPvSt\nIIyA/wBz+8O9EMZuCSwwi8lz2FMCK5uEVg0O9eMHmhI/M+ZxnjnPAp80g3ARgNE3+c1HgCME\nPlieFxwKYgjZklQrGJY88jPWnXEz3V0XS2WML/yzz29aZ80EgLsHPfFJ+7MpfzGUtwaBjlt1\n8sKRuOchh/Kq1wzR5iMbYPTIq2qndhWPPC1Z1GffbxI5AdBk0CaMaNmZ/lG0g85PSluEJkPz\nbWQ546GpEVvMYmPCsOG71XbbbqTGrsrNgg8nNAWJ1d5mDr8ox0qFmLZbJYDn60NtKjBO8dwf\n0qWaFfLjKttYjLRg80CKs7LP82Tuxwqmmbtu2MBi2MllOamjYxOyH94uCQBwfzqG3mkXAiiw\nDy2DzQNCKzSRsm7du654/Cq7N9njMb8v2p9wq3EbhDsI+7nrn6UTeTHHFE6N5yj5296CimXQ\nYAJD9KdKEgUDAZm49hU62sLK+3O/G75ulQzDaseRuAGcUCIX+aQRhdpx19ajxIiIrHC4wan+\nduQnOdwOf0psiHeVILkjOPSgdwmkYsq4VMgKCe9EYZWKyZ2rx8vOaW3CRuJpMsV425qaSZ5G\nLxRGJCeMnNAXK5mEkjBlMaf7XJqZNyMu07VbjGKcqsWwdu/FOVCQBnaF5GfWgQKy7SOBg4Kt\nTmkYMpK/L0VqXy1kwWXe/WhX85sty4ONuOKQAjbsl/nwcmo2y4Ei5zu4x6VLHtVpHzhOm3vm\nljUQgBFyDzx2oGO2qzYIzGOcUIoXLs2GByq0qjzFKghdvOabIVaVZD6YBNMQ1ZF24aU7Scn6\n0nktG0cq7SC2QpzzT1+dnAVSuMHb296bImNqo5bb/HnFABK7CSTIVZCfm21LI0Yiypw/QCoB\nHGZCE3c9TnNSb1J8pRvdR1oAI2aNlZhip7O3F/eRw8tI7fwnHFRLb/aJkLblYDgivYvgT8Lb\n/wCJfiuy061tWaWSVVBVT+HPoeaznJJFxjc95/YX/ZoPjvxpFqt7bY0m15kcHJyOQPfP9K/V\n+G1itbdLeFBHFGNqovAXFcf8I/hlY/CvwTZaPbRxrcKoM0kYwS2OefQHNdkzfMRnBrhubrQj\nY9fSkQluKXI8v370ikdc4NItDmHQCn8L1HFM5XrTuOvWgYZB9x2pBuJxjihFKqQeppdzL9KA\nD7vJP4UjfL0HNCsvJPJox+VABG359xTujZ60i8HpgUobnpzTAXYW9qX+L3oDdzxSL97dQwBj\n+FKeF5GaT7xJNC7snnimgAffBpykuzdqb05PWg+3WgBWXcpyOlG0Kwbt7U5fegKF68+lIOgY\n3L70cbfU0YIxzxSdORSEhTk8UevrRzjpxQFxk5pjBvuHFA+9mkwd2egpen1qhhxnpS7D0NJj\nawz0pd3zZpCD/gIoo3UUXKMz5t2GfIoT5c1IsYZcNSBQrdazArzJu5pkcHlzB344qw5G3nrS\nfewCcgUiWK3y475pr5Zfl4alU7WPHWm85JqkUKN+4Z5FPY/LgcU2OQ8gjiopZj5oA4oAWWE7\nADg80ixfNheBUwAOeaarYUg9aLCHFCuOOKRWOT6UquTnJp3DDHagBq7epP4UMxUccCnbgFOB\nikX5lJIoEIjFlyeKVm4weaGQtjBxSYIyD0FBSFDEfNTOd24nimyE7hgHHenLnaAOKRLBpMk9\n6RXDKQaN3Ud6WJdwI4+tAAzHp2pRwOOTSbfypApU9eKTAdyuOKeW289qhbO4HOQKf98+1Owh\n4YFeODSKx289KOM8UKpbPpQAMvvxijiNQB3oyBxSPgEE07jHKu7qcUoboM0jEqPam7huHGBS\nAfuO7npRnvjihR1J6Uxm2qQKYCkhuhxRgKcGkTBTpSgZ9zQApbHcClGSB60jYYjtTeVOc4Bp\ngO3Dd8tIeM+ppPLO4ZNO460CF6LnvSLJnr1oYMenSj7nbNSxiHrnvTlz9KZ3JxSsw25JoEPk\n44U00dPel+8mR1phbGKAE53Yp7EqoHakPy9OaUmi4hhO4U9VYLnOab944BFKcpx1zQMCSW+W\njeVbmnD5RUezJGTzVEnKfFpGuvBNxEDhS4LCvivxNZ+TcShvlGSAO4r7o8Y6aNS8OXVuw3hh\nux7ivivx9ZkX0qdPLbBpi6nl9+gaTgYOOtZE0bLuDtlT2roNQw3+rNYeoblPBz/OqQmctfWa\nqzFTjmqDRrnLDBrcvGKsxZflxWZNCixkk89a1uZMypVfnHB7VSuo2GOqk98Vp3DD5WVtzena\nq10jSwgMcZNVcRlS/wCs2g4bGCc1WuAyqgHJzzVi4hxJsBz9KhZSkbFT83TBoEQySASBx06Z\npjR723Zwx70ryLyrcFewqNJj1P3e9WBRmV5C3+yeaj8smE46seatSOWU4YHP61A8hXGFIJGN\nvf60DK+3ncD5YU49jUbq7b3421JIT5LfxgHmkXmM5+Ue9ICu7lQpAGG4NRNH9nbd1XpmpGj2\nLk/OrfMPao45BIcOeOwNMmQxcbsDIbt6U1stHJuxu6Gpmbdjn5KjjClnIOR1FUBCIx5Y3fLj\nrnrTQv2dgoGd3T0p/mbhnOd3ykH1pgZpJArHaBwKYEbMVkKleFPU96STbIc5BNSsrMz5G/b0\nz61FIolVQox6+5p2ER+VHHgEESUjIY3KlMZGaFzJIu8YbHSms27fG2Q7d+tLQBFUtkbsfNwD\n6U3bmORm5x6U9ol/iXI9aUnav+0BQMg2+WoYLhSuRmo9wVlULlzyWqdsyQ/M2D6VHtdY9yDP\namAqAyY7FTnmh23SbyML0+lLGpb5ifmIwB705kZRgrk9/agYkcY3cP8AL39TRIpDZAwG9aF/\ndsSD1HWrOxHiVXJJxnikIpMreWdvykHgVGivKvX5gckdOO9WUbrle+ATSyEsqLjlutAMhIDY\nCKdp/iqu6YbbgkZxuq7tLNndtC8VAy9zyFOcetUBD8qxKQCzimjP3s7cn7tG1WjwCUyelI27\nmM8DoGoENf7pbGOaXduXdzt9qXczYVj04pP4sLwKLD6CPlXAxSK23chUk/pTnZiobGSDil8x\nlVwRtB70ANwrpjG05pEXazOpx/WlK7YVIfIJ/GmyD5cD9KLgIrZ5PU0xcBTnk56ilyxjyBgj\n1peqgldgP60agDMN23GadJujVQqZXpTFVMDIbIPWhd4BAJIz1PamIDhQGCFlBwTSMQvbPPbp\nTofMKfK25c9KczK27auGHUUCsNZhIDtHIqJfmyuM8c1I2YmyBnjmo+FbHTcf4aQCiRYmAPI9\n6cxMmSfujpQ3+syR932pqSZyN2FNIpg29lwvGaUKNypjnH3qWEDJ59qarbmck47AUwGhW2Yz\nhd3J7mlADnOBtzSMwbaD90dcdjUnlbGJHIZeBQBC/LFcHPakTK25Qjq3NPUMWGGBwOnSkw/Q\nnHfFACjKtjbj39qPl3GM8KeRikZi3f5h2pf9WwcEEHtTAadiHDkr2GB3pFBVnB5I71JjdkdX\nJzz2qOORvmD854pCDOyMbODSMoOSeQR0ppPlsST8opzsgVdvznOTTESKf3YDD6U1pCVIxxjk\nmjczLuxnnoKRZBE4YsG9BjvQWMXBX5Rg981IgXaix/fVskUxlLEvIdre1OChVUsQGzwwpEsj\n3PtI605cNzuwaVo/mGPlxy3vTXX97uUcGjUBW2knPPbNKvyNgHK0mRn725PXFG1s4AyOooAN\n2+Nzjbt70BPl3q3zKM01kO0qF5NG5uAFx6mgTFO5lyf4uaj8nK4Aye9S7T5YyM4pAxaPdkDH\nFIYkJzHgcketIGcAkru96U48s4yJOzU0HzOekmKokUNtOGGM88Um5lVtuCv60qt14xxy1Jwm\nFX5s87qRYFg0Sj+LvilVoyhIOO2KH++AFyM9BS5HZNq5xmmAisdhxzQoHcfMf0okyjcfPjpS\nb1Zd20rQAgUSfxYK9c0iKoz5S+5b1qSGONyXwemKbJGpyikgdA1AEa4U79pI7j+tPWTzMkjb\nGRgVI2eFDdtppGYxOq9QBjNAiMHAUjg5280ojLSMD1zk0qlein7vO09KRgxcseQ3I9aAGsdp\nYD7zDintlVBBwTxTAcMNpz7mhV3McbuvPt70DYrRk7GJzjjFDr8rBjjjoaTyzIpIzsU/nQzG\nTk9+AKRLEjXzMLndkd+1NRztCFSAp5zT1HHA56U1t7sf4T3NGohVQMzbemMnNIrHdEQpyRxu\npQwdiF/hOM9M+9IrEqNzbtpwDRqMe6tHknBJ6gU3amQOpb0pNv7w5OM96FV1kBxkelMYxtzM\nhAyo6gUu1m2knDfeH0pyoMg524PIpGUM2V7dieaBiSZzuzgN2PalkU9ACwxx70m3a21uWYZ/\nCkXexBGFA4oJHSM3AzximneY9u7aD6UqgrwTgetKrKsfI5p2GNkYBQWGR92lWQMo6jtkUjMT\nGFVQxzwadHwTuPz+wpAMyyv8jbqTaFZct949KkeMiQnAwBkkd6edrKp289RQBE24Mo4CdaJk\nFwuSStKcMw28igEtnjB/vGmAKzLll6YwDSNjcS2S2ce1NyY4xIAfmO3np9af87Fg49wKQA0Z\nHEZ+buvpTJD5UqcfNt69qfGH+UYO/Oc0ybzO+MUDHbd6A/3utDABlzg9qRNscgGSWbjbimqw\ng3BuecA9cGgBWZWkDbsKDjGOtKGZuhwM56dKNqqoVjuIOR9aZIx3ZHLd1FBNgVP3hA5yetSy\nZXIX5jTeV27Tt9jSv80hCcZHPNAxPvq5B5HY03hlLScHHSiQgq+1fm7jtSNtfaW5AFAD1UeZ\nGN2DjIpWcqNxYYP8PemctzjjHak2rtUhunQd6YDs+Ygxzxml3Zx2OKQfKM8rgZ6U1FLKX6KO\nd2KLgSAA4JPJPftSMwO/jJxxT1KzKr4w3QrSSI3TIU0gGZEaqSeSKYpcMQeBjPFSRblVgSOe\nN1JsO5dvCjqPWgBseFUYBwelLI5YA8DtTtwfIU7Vz37Uki7sZbGPSnuMiYvwyn5vUVKhOQ5N\nJg7QwPBOKfHhcxhskc5pAMOWT5fvE8k0xsqwUvlR1NTLu5LffPYUmwKpAO49d3p7UhDtj7mI\nA2Y/iqCRCUB6joWFSbnfJxksMAk0jb4XQE7hjnFPyARmR1wOq9/Wl8xPLBA56dKSMKpyvOT3\n70btzY27VJoAQsI34OM8A+9OuMlgCeFPJ9aTy/LzgcDncaGY7huG4N8xagTFLFmwSAvU03ee\nAefRqGCZBLD0HtTmKyKpxjbxmgTGuqBc5wGGT9aeu1UUg7sjpTcpnJXjGMGhV+cADHvTKE27\nG2lvejyyXKjJ7k04Exsz43Fe3eiQyOx/hZjk0bEkcivuxKQ2RkUcLMobI44NOX9+/XkdKSZt\n3TnBp3KF24PJ3E9SBTlZ15fAXoOetR+ZjBxkZxUg/eMSQAFOApqSRvmbU3FvlzRuZYywHHYi\nmyZbgkKD2x0p6xbNr5JA70D0HKRGATkFhgimbAzKSwBT/IpWG5DycZz7ikMYba46n1oEMLnv\nwuc06RlUKyMRk05mK5DjaO1J8nLL83H3aBoTcOWXkmnq2JCztjK9KTakcm0t83UU+PG4g8kn\npQMgjZX2+ZlfQ0scZcnnGDnce9SSELu/v5pNu2TDnG6ncBQS8LdD74pUt3+Q7t3qKTnJAynP\nC+tKu7DEtx6U7gNCyKrEAZz9004KZGJZdoA4/wAacinqwwMfezUagRrwWYtSYCxlWkyxJ2jJ\n5pWUrkZwRzk9KGVPLB2kkHNK+WTDAk+nekJEDMfM+Th+ue1PbdIQwbO4Zb0qSH5Vyy44wM9a\niRs7Scoo4PpQNjt26QAAFRRIrHJBUehpyn5j8uB2zSqBtYMuDQLqVWxwEDNt6+v1qWHOPdwf\nlP8AOpNyfLk5PTK0yMCPzT1zx83b2oKFVfNURrlSvPFOVWZWJA9qSNTJGqgEHsen4UeWy46g\n/wB0UEsfxuOwYbbzTd7RqCF3jHSk8wRsvOXxnihmO9WZTjODikA6MeZGy529wvalQlotxPI7\n02XaGyCck4ApdpVfkznpimFh8uzICN8m3OfU01XE2Ffp3zSvl2QKPujkU0bpGLD7+fumgY+Q\noq7SCT7UjYjUAfKD1pGVpVw3HPakZm6k5QHoKVxXEzsG3JXHIwKkbbIocyYP+z1qJ28xlOcY\n7+lP8oSKXDAN3NGogjVEbC7mPcN0qUuGBKjnH4miNfLRv4jjrRHLtXDcA8AgUwIfLkZVAG09\n808blBUHb2qVdpUHccqcEVHJsfkg5U0DFjgaThjlBzmpFwv3179KYzLtUEFPapHUqw43Z4FA\nWIWX95geuQPSpNsssmCdwH8VCqRIQOo6mmozryrAf7XajoMcM4JbJGeoGaWMAqUPJVsimNE0\nhGTgk/MM9qlVRubjC9BmgBjN5bH5twbuadGuxc7lNO2qvyMOG6ZpWVJDwNoAx7UARgFmwDhA\nefel3HaW6kHA+lDqFkQBsp6d6fHE0avuw6dRj19KAGRtt3EAlsVIjDDBvvDmn7jwSu1cc/Wm\nsrNGw2/Mx60AMVhkgL1ON9BVpGPXr2qfaMMoIHHHsaYoKoxOd4+Xr1oAdsVWynIxg01V3LgH\nlec1N5aruVhge1KI1DBVGOM5zQIjjVCoY5wetEeLVhj7pp5xt5GPRaTaWZcfU5oEEjLIoTH3\nmpJssrqMq3QChV+0zsduFUVLGAxEm0kjj/69IZHGuzbuLMQOnWn/ADoysPmQ9iKVm2qdpBLf\nxf0pvRRn76igYi2/mpITiPnIHrUa7n+Vl6dqf5sjsQVyuOnekLRK/wC8LKw64FMkjjZXbYVx\nk9aiyIlZcnOeDUjRjc+WwAeCDyai+6pycj9akLCSRlWRt25gM4oK4wxOSetK0f7ssXy/9Khj\nXamSDjNaFDmLK27HGcUmSzMyR72b5TSs20OM4weKWHhvlbHHUUEj9pZvkUIOA3NSxBRI7k9O\nmKg+Zsc85z+VWox84wu6M/xD19KGT1J7ffJ87DcegxWzYy7FHybCeKzLZsoWJwgzwBzVuKcR\nouTk+lSSzYWVoQcv97qRUF9IJE+RtxxnnvVdZB5Ydm69qpyyGZSzuEjK8VRTIb68ZfvPvQjB\nX0rHuI1G9VfbuXg+lW3lSGFkGS+OuKxr87HQk/eYLjPaqWgIfczLZ26L5wYsAUXr+NEWoPMm\nJkzJj/WKOKglZVuWEkPyYwpNFrgsYmGAP4s0xs3tKRLvTp5Ih5c0J59xV6xunt5IrhXMWxsh\nh3rmrVhapNJHIyu/BGeoq5ZahJtCuSvpleK0jLuZNHqVhrwvLTy1UOzDnI5+tbemTR/Y16xz\nKcEH0rz7R9RRrjY/Ei/3T1rurG7hktzID844AIrRGfU2ra8hXghnYnAwa3raRLiPLkl8bAua\n43T7j/STvB4bJz6V1tnNHcTL9mwCPmGfWoaC5r6SDbSbCT1yUNdJpfiiTTlkgEoms2OWib19\nq46Od5JHkkOT/Wrtv8sfnPzzWUolRuei+G/HN7oerJqGkzG3kj5HPH419cfCf43af8QraOyv\nnW01pRtZDwJj6r/hXwnYzlWDKMc8/StvT9Xn03UEltZmhlU7kkQ4I+lc8o6Gyk0fpEYfKbB7\nGmMMnIGDXzR8K/2kp7L7NpviANcxM3zXm7LLn1FfStvdW2oWcd1aTrPBMAySIcg596wcWdEZ\nXF24UlqaueKeo7nkU3jkd6mxQ3d81OXP401lI5HJo3/MCKQh27aMY5oLcZ6+1MZjI3A4oZSp\n9qXUZIp45+WlwAPc1EW3N1xUgYfWjqAEkLzS7hye9IxPYZFIPvZpgOZs46ihj6Umc8Uu6pYC\nhtq4FDE8DpSMD1pT94E0WABkdTSsRx2NN3fNkilx5h4pDQ8bio9adkt14qNMnOTyO1OZiyg0\nDFDHdyOKGP4elHPShs9+aYBng85zTt2FwKb93mkB9KYMkVyuM804N+AqNMNyTzmlcljgDFDE\nSMeMmjJHzA8UbhwKYzYwAMipKH53UeZjGKaxw3TtTVbbjcKBihstzT2HbtTA23PvRkbQaBEm\ndo65oB7j1pOOQfrSBv3eehzQIl9+1IRtBP5UiMMdDSyNuXFIBy54prMd2SaYGKt1/Ok3b1PH\nNICVcYyTTPMypG2kBGdpp6qTVCF4bBB5FKW3KBSwx7evWj2x3pDGkkgjqKFU7R2qWOPBzS+W\nS2RyKfKMfHyRnt1qwqjqR+QzVdZoo42llkWKJBlpHOFHrk+leS/Fb9rLwP8ADGJUivI9a1Ir\nkWtp8+B0yWHHpTsSz2XbsUFyqjGTzXj/AMXP2p/B3wnWSBblNW1XYdtpbMDtOOCzdAM9e4Ga\n+MPi9+2l4t+Inm2unmXQdJY7UhtmKs6+rP1654r571rUJJmW5vJmeWU9WbcaaTbEpHu/xi/b\nK8W+PbrybOVdP0xeTbQMyB+MEHnJz9a+fNYurvVJDczySFi5J5PHtUGpXAUwEDC4woHc1BcT\nSzwhd7L/AHq6YwRm5XHLI8cILsGT1Jyau6T4wXRppJURnbYUUY6H1rKklK2vkiPd3LVQMZyz\nqAD2XNUSWdS1q41QPNNKSScsMfpVGS1e3to5AAIpeAxOcUszRZCxKfMYfMtJtiFo5lkbzQcI\no5waYFOSNobclCAxONpPOKrBUDMxJVMY244Jqe7by1LhclVy1RKyJMrsm4DnHbPamSV1kB3r\nn5Vzj2NNMYkkHZsc7un1q9qF5DeAusHkyE4aNR+tVljby3nZdyKuC3b6YpjRFt2yFX5PZgP5\nGnxNGYwxGHUZqNcyLk8qvHWmoXkj3rgFfl+opDLUOnSXkfZYT82/tVOSVWhe38z5c8n1FH2l\npMRl2XJ5UHjFQbxH5wIAGeMdaYDdoQJsO1VP6UbdsmQc1OwRgu5cFgMEnGajjhjjk8zbtOcE\nZ60CIj+7kyW3nOaczeZMRjAIzyKlkkHmPIyKqgcUwRuwVvlx13GgY61ma1YumHkwQAah8wMp\nWTgnr+NPlVI2+919KiOPMKgZPWn0AazMjBckqo+9UfSMvubYKkZQkoUOZFYZI7UNCFTkkJ1z\n2+lAETLuUOFAQj8frVd5AtwGUYdurdqmlmVmVhkhl+7Ud0sflqqN+8B5HtSFYj5YHzAAN3XO\nAajkV2QIkmFY7eP8ae6M0ZlRfkU42t3qt5YaFisTxyE42k9DQMvbxHNuwGZV25x19qrXFuXj\nE8YYx5+cd6STHkg/x4+Ug96JneSGOBX2rnc31oGRzIBCsof5CdpU1XYnDoQdvQZrXk0ZF8uW\nS6Ro1Abaf4j6fWs68kN9Kz+X5S9FjX+ZoAqM6xyRrk/L1p33VL4LgnscZpyY28oE4xuqOMmB\nZAy+Zzwc9KBDpP3jJHgbM5wv8jUrbg20NwDwgpiMPkBUr3PvU8Kux28Mc8ewoExNirLuTLHq\nWNLJh51AHHWpeBkSD296aflbK9AOM96BiLk+YdzRt91vSiPcAFzhR1b+tNy7YWQZTOSe/wBK\nexEv7uMZw24igB6nax3crjj3pAXWJwi8dz6e1DLvQoThA3zFfWpIxiNxkFeo96AIt21Ys8+t\nI7FoGjIwm7g0XG3IBO319qVovOEfy98KPf1oAjX7qrny88FfWpFmDMEZcHGAPUUrBGw4j3yt\nwMn9aF/dzBVO7jBb0oAYCVkQImw5x9asWsYklZUXluj9gPeq/mSLHhhg7tob+tbGg2rX14sc\nQKHOCuMk/Spk7K41qbXhLw5Nq19DGgc+Ydo+UkYz7dq/Wj9ij9nOHwB4Zh13Uof9OmQNbbvv\nIDyT0rwD9hX9mP8A4SPWG8S6tbP/AGbaYI3D5XJ/hH4/pmv0njhSzt0hhHlRqu1UXooz0Fed\nOXMzqirBcN83HIHSot25ucUsj8gY6mmOAzccD1pIoeMK3IpchsbRTc++frSj7vXFUKwezGlX\nG3FIV3fWlwc+9AxevPfvS87aYelKD60hXFUDdkD607hiRTVk6jGKVfU0DE3E8Yo9Mmlb5aTj\nbx1oGPVtykGlVuoPApoPzdKXIznvQAJnmlH3TzSBvmJPApdueapAJ97g0HjtS/xccijHPJ4p\nMBwwQTnihVzmmDK4HanYyeuKAFzxRt6HtQy4Wkx8uRTAc3zd8CkYFqD90DuaVs7qEMTnaBRk\nYA70EHd6Ck4XDUxin3o57daU8delJyvJoAfz6UUzzD60UAUS4/E0z+LOaasgxjHNOUbuorOw\nDT8z5P4UbQmT1p2B27UhO4GkhbjWJ2kn8qj8z5QMYPXNSMCIztOSeKjW3ZTljnimMeG7dqcy\nq3JHNNx8oApQpXPekAvRcgUmQSc80q9zmk/h4HNNBuJgfShW5OKVvcc0owB6GkA/qpzTdwVQ\nBSL8p56UcFhTuIdv+YGnPjHWon+XpSbz0xmgQ/cW4ApGzwKFY7uKSQj15oAUkb/wpNu3PNI3\nKD1psj7VHFJDJdwHGOKYp9aFb5cH71O2leTVAxGX5RQrKDjtS7u3WlCgdaXUkTaA2QSc08uQ\nhI5Ipu8dhTVbaxx3pMZJGueccmmPktjFP5AFIMg5J4oATnA9BTmbcvy9RR1UnFGQq56mkAgL\nMMt+lIFJ54pV+77UcL3xTATGFI70R8H3pWBFIuS2TxVjBl289aBmRQD0pzN27UisRyB0qWIU\nMOhHNN74oyGySKQMPvUAP4Xoc0hyo9aQDJBpJvkY4P0oaAduGBxSS8rilVd8Y9aAvvQAvC4A\nGaZIdp55FPbAyM0n3eThqliDaNuf4qCvyjJxRu46UbuxqwQfKoHGSaXdjGTxTf4s0ueQSM0h\nkbsd2MUgZt3FStz2p0YCZyM0yWNkhM1u6tyCp/lXxX8SbF7XxDqETA5MhPPrX21H8w288n/6\n9fJ3xy0p7fxZf4/iJbP15/rTEfO14qwXDdtpINZF8Vdi6jHtW7rEPkyOWGeawJJFViD196oT\nMy8RGjwVwKwrg7ZwMc11V5EGt2ccrjrXK3DMuc8nOfwqkQyheRhZnCELxkiqMjFY8qd3bmrt\nwpaUOBndxtqlNEysSOAONtWSQxlGZkB/eYzms64yzZZeB1q5MvkKrfxMcZqGdXZTlc89KoRS\nYbmZgML1/wDrVTm5XptcnP4Vb/eHAZcHPCmq1yG5JGWzVCIGw2BjAqOdn84BSMgdadIyRyBJ\nGxn9KgXf8w6nPFPzAQY2lQ2R3qOZt2VJ+U9SallXdGQvysBk1VmcrtYrn6UwEkJ6oAwxgYqB\nFXJ3Lz3zVsSNwwUAemKjdQ0h3DkjiglkEpXy/lT5OlRhAMYGfYdaccxZbG5fu496jZ32hkba\nw6imIj2MzMdv3fSmR7mk5yW61aVhJwPlc85qBt7SFlO3jHNMYxWEjMpc4JyaVoQylweV5pPM\nBT7oBU806SRbiaMDIUdRjikyiB8ttbb2+9mmZHK7cv1qSZgI9o55yMGm7t2CBgHg0E2sRybN\nowfmJHFNZtvIPGalm8uNflOR09waahRGy5zgZx60x7kW1WkJyRnpmpFjXZ97C+lKuNx3Lhjy\nD6U/cNmcZFAiGMgKDn5T0z61NGn75UB4PLZqIssg5XIH8NLG38RIK9BSKLb2eIWxwM8etV2y\nyjbkcZ6dqv29wDF8/PbFN+ziaTbng/5xSJM4HKkAYzzzT1k2W+CNzDoavTaOFjeWB/ujkNVD\nYyhdwznt3qhDFxsGeCeabNt2sqdMVYW3AkdU+fI69hUF1CysqDoByaAKMY3KDjbjj2pZMKij\nBY5yaQxO8nJANKyl89S3rTAQMZI8kYOe9NKnBA65zmnMA7AE7SBSMdzYXkj1p3AaCVy55A4x\nShhnruPakVmZhuYKKRlYvjt6igBXU+cpfGO4pu0JuYklWOBUjYkK57cVGuG3Lu5H8NMdxGy0\ne3imsziNBsyc80pkDMxHAXiguoZWDFmPGKVwQm0+aMdCeRUbBmD7WPXGKlaE8kNl8fdpu3pg\n4fHIpiFWNVUBSQ2KX5uqsM4wabIoG09WHUg07ad2DjnnikHUQfeA6EjBqI7lUxDkZ+96U5VD\nBjzSbG3KgbA6mgByOZCQH3BRzTdoZSvIPUcVLGAsbg4Bz2pjNtXjhjQMagDKpPK9CKUYXcMb\nh7UM0isPkCoBSr97Kng0gEw2MDj0FHsSSRyKY4dpC+eeg96lST5Su3ce4qhEQVmky3JNG0bm\nAJ+Wnck7xwq9RSbTGwIOQ3NIpCZLMMHk8UjKG+UkfKcdacrBeFX588Uki4ypHz9cUxMRi3y7\nefYUzcY5ANuST92nyFppFCDYoHPvSbSACmM559qQhsmB8pHGepqRWEJLAZXpTfvAqFzzxz3o\nXcsZB4GeTTAYjO2Tny6evzfLtDikyZGGPu9vehUKKdz98jH8qAE5kbvtHVaX5HkDAEdsUNG3\nIV8A8mlRiWGDjAxigBp2hgpJ69BUjcKQMFe1LGxX76jPt1prkRsNvBPY0gI1bLDGQfTHFINy\n7snLk8YpSoyc5DE8c0rKEwX+5nn1BpCuwSRdnTju1MQFWP8Adb1pX7EDv96lbEjKM4+tMYbC\nw2jnHv1qNtwBIGQOq09sNIMEsQOR0pAp8vJG0/3fT3ouAKeAGHuRTVILNgYx3p0WMsf596Fk\n3KwK4PamAibhnAyaJJDuUbfqaSMs2ecH1o3ASFv4cYpAOyqkbW5zTG+aVSo+opFIUDbyM8tU\nsLCR8enGadhiB3IJX71MGCvzqd3cULDLHuVj1bI+lI29kOeu6gRJFMY/kwOmCDT9yspCjAPW\nm7irZIAGMc96jjOFdT35oANnXIJJ6HtTdu1sEZAGSaerCRVGcEVE6vuKhup5NIB8mQu4MNrd\nBijcke0qaeyhmVQclBmmcrkldwPamAuzawQ8bvmyKXJZsZyvc+1M5hKHGW/lStukLHgZoATd\n5YCjJUnI+lIdiyM2doIwPakkDBto5OOtObLABVG7vmgBkcbtg9R2NO2vIrDO1h3qWNipwOnc\n9qa7bmOTtX1pCGFhJ33H06ZpPl27snce3amupbPPyjpt4pdp2qIxhByfWmFhY5BuCuCeakZt\n20hvzqMox/gzzmkDIygBSCpoGKv7wnceAetKdsjb/unpmgHc+cbV9DQyjccMN1Aw285GG9KT\nou8tjHG2gfNuGcU5lRFHcegoCxCyu2CRgehqbJbd/simMwbZyeR60u1ixAbA6UxDRl2CgbVx\nkH3p6LIpCkEHqT7UwZGQTkDimBm3K+7oeff2pATSMI18wLk9PwqNm24OOf7tLkgsT0PIFLh9\nwJGRjmgQi7RyM5J5oUkbsNupyqEXgZHWhl+XpszzQMiB8xhuO4ds8U5pix4yW6H2pZI96qGI\nx7U5FcKGUA8d6BiKXD5c9scU1VDoUTIYHJY0fNsJz83eh33FSQenRaAFZgzEuOccEVGxZMFV\nznoMfrT4V+Vm2ke9Pk8xUUAbm9KVxEflburAt3oUfOCMY6U4R7VIxg5yfakUBo8huQeg70IB\nvXIY5x09aFjjXoct1FDSDbuC4P50bypGNpyPxoYxd2W6FVPBNMkWNlOD93t609iGboRgdPWm\ns5I+QZPpTECOVGVHBHNOWELFgj5uuabwowfmP5U4M+4KAfxp3GPkUHMgOMKBzTDM3DLxgY9q\nSSQOyk/Jg4Bx+lI0YUsmNvf2pALg8njOeTS7G5ZjlvQGiQMysVYKp5IoRg2GHLEY9KBCQ7Rg\nZ98tS7WUmTOST0pDGSjbx06YpNq7l5IGKAHeWrN8y/L/AJ5pAy8qXA9/WhWDNxxjuabtDAtt\n2npigQrKZGChsLjOKWNkbOAVYcc0wKVk2jO7HDGnbX8oMH5zg56n2oGKWf5mGMD1/nSviRm7\nZHT1NNblRzk051KZP3j1+lAakY2rnBII4NSMu/APQDIpFXdGCR87H8KVc7AGGMUAIsalSzHM\nvqfSkk+WRNw+lOVlkycfLTId0aNIz78ZxntTYCLIm5gMnnFRn3Y8GpFVpl2hQued1KiiMllX\nd2JY5waQbkcamPLFcqzfe7CpZmXgAkY/gPf3pGjd2IBwcYIpvP3cZwOvtTJF8wsq4XJzyppZ\nBsYH7nP3aRYzt3Z47ZpWZ3wFHIoKG7v3jfLk45IqRZD0YbjjJPambsAHAJzzSMDHIQehFDFY\nMADAyGPRhTmO2EjZ8w7juaM/uwN2D70AE7GU5wegouISPG3aB7n60/J5Bxk98VG0hjZgPWpV\nfKEk7WxnOKQyJndU5+YdsDmnq20KW5NCqxkXA5I5FLwG2sD+HagNBFZ2znBY8bvSgsJPnznZ\nwRTgURiBlhSLB5alj36LQAjAM3J3buRntTXwFK9j3FPj2tGyleM9aFPykMMnse1Aa3F3RtDt\nZfw70D5uU+Ujpmk2l3DZ+YCh8nYQM46470DBfn3F0BOetKzKoVs4OetC7d3BIDUnCK28bhQA\n4t8rAn5PWmeZ8w2sNh4wKXbkAsu30weKb5Y3Fl6hsYHcUAIy/Lt3EnPHpTyzw4YcgcUAsrFi\nMLnp6UqyEMy7MbTyfegY0ICTh8nPQ1Js2tuGcY6n+VC/KM5+fuKazFnXLHHYUiRpk3q7DK89\n6WHLKcDOORUh+USB8Fm547VEEl4KsAQKYMkVi2WJ3kjpTOH2jkSZ5FEeHTO0/J1poVlk64Yd\nqAHMo6KQADk+1Em18jrhhz60iqu5sna3oe9KyjfkjG7tQK4SeZ0GAq9hSthpAGyUxniiONot\n2QWGKIv3ihuhHAHegBGy7K237vAY1JjOVLZY8mouNxU7ic96kUfvMA9BnNACIvV25A6U5AfM\nLOcDPFRhmYj5Gwe1OYBmxnBz0NBY4x7SxXJB5NK6tuOPuVGzJGx4Y5+XjpSo77yc5Tp+NIlg\nvzcqxDA8jHQd6eqj5kXhDzTUUtlifypyjAzjoaYhChjTCjJ65p2Nqk8bmHSlZsthj16Ypu3d\nwflYHkewoAdt29Txt+7UcfEYBIJznbU0iBl3A4j6+9QKq9TyO1IBzHNvhlOd2Sc06SQrgj7n\noRSFW3Ek5A5FLksrs4yQc0FBtAbk5LU9nZI1DfN700Ky7XA59/6UvllghB3YOQDQFw+XawJ2\nknipHVViXaCo75pjR5+8uG3ZxTgzSyZJyOmO1JCElz5YVBgj+Klf98VU8YGeKFU7nMmNv930\nqSNhGufvnHSqGRsi7sOc7uhHUU4Dqqj5R096Avlr5mSWPYU6ORXB7k9/6UAR7l5Gct157UOx\nMeFbgnmnqMqWk+VR0ApGHyh9v7voBQBMJvtCbc4UcDNKq7Mqz89sVE58uMAjnrinb42XJ5Pp\nQMVY8K28gt2pyhZMOOTnmn/ZY/J9G69ajjby1yBwOSaBCtnfI27C5p3mBQpX5u9JHHI4zhVU\n8hj1p2GPfcw5P0oAEYrlmUlm6H0p64WTeT2x9KAq53xnn+61RtHyr5J5yeKBdSaNQrE5DHuK\nc0iq+V79u1QrmWRkXbzzUyxrDGoIwWNADo40jwzDdJULRv5m9+561LuOcLxkdaZJywLcj0B5\noGMZW27Vb5t33j3qNVL794GOgFOypHy53Zyd1Njb5SS3fpQBHKpWQKoGepqNceYW34GPmVqs\nyN5bbegPU96rHacbkyc/ePp9KBNkbEKxZs7SOCKRgWjCqc1IzmFsGM7DxUc0YRfmbI7KDTEM\n5hzuO4H1ojYdAc+tHOxRtyPQ0jE5GRhc44oAkaQNhF7d/ep45BsUOWPPX1qFcRsGK9DipFJZ\ngRwueDRqSy9DlVdgDyc7T0NWI5kWMseT3Wqkcx5Vzz2pd+2NR1ycUhWLnnI6Y3bX7L/Ss+aT\n9yGbpu2496kklHyALn+9UMyo6kh/kAzx61SC92VryYQsrHlmXbn0rIuJpJWZjg84B9PerMt6\nI8FlLHoCaosC5II5zk1QD3ujJsRsv/tU9lBkVRkL3NJk7XKgOxGPoKYkzqFLHI6YpXGXM5mV\nUII9+9WVkaG0aJvmffkY6CsqRxncMnH8NbMNs9nGty7ho3XhWqgsX9OvilwHwN/867HQb57i\n4VkIznLRk158k8ZGSMbTwRWzpF82JHiRkeM9+/41pEzkj1EXH2ppJWCo4baqd6u2t8bZB8hE\nqnNc5pV+btELPktgkjoK345meT5sMgHB960MTrINVhihjh8sNJIN7NnpT1vhLIEXPlnj6Vx8\neI5t/mkSLztrVhvHt2Riyvu5yB09jUNFXOuj1KO3ARmC8YDetT2OoJPGRn+LFchPJ9oZJCwI\nP8OasJMbJtzMVGM4rPluVdnYG4mjhd4iSQ3I9a9C+Gfx11/4e3sKpO1zYt8jW8zblVSRnAry\nO318Jsjc7VkGA/WrLXEW4SRsX4wSOKwcTWLsj9CfA3xu8O+NoUUXCWd8wX9xI/BJwMAn3rvy\n277vU8Dsf88V+Xun+KpNIv1k3N5a8rtPKn1Fe1/Cr9rbUtAvDYavBLqlk3Idm+dBzzWbgWpn\n2wuOvP5YpFXvXGeBfjB4d8e2pazukjlPSGQ4f649K7gKGUFcFT0Oc5+lZOJuRLlc+lMVvmIq\nUgq2DTNo3H1qLCG7R1xTyQq5HFJ91STxigMJAMDikUG4qoHc+lIrYbng03lc8ZpGYs2CMUwJ\nAPmyaXd83Skx+NKqnBOelJgG7nrxSo24ZPWmjCrkjmlHzLkHBpIBx9+lG7b0FNDDoacwO3PT\nFDKF3fLyMGnBsgCotxJFKMhuD9aQE24dutM3HkGmqdue/vTt3y560wtoGD3NOztwc/Won+bG\nKV25xjNUHQlX7x4pd2FyRzUSk5B9KcSSeTUiHqRs96XngCmEjbQM9c4pDuPz8wyM0pwzZ7Ux\nZBnkUrNleRQMcW9RTN2SD2o5PPQUnfOMCgQ4vxijcSMY6UY3N2pyxnBNADmkwMKKZvPcVIEL\nLnFCrlcgUAMGSwJFP2ntxTtvHPWpFUYpAMVOQSKlVe2aVI2bpyKlEZUMzDagGS3YfU0wHIgk\n5A5pCmGIOBjk5IFcp4y+LXhPwBbM+qazbrtGTHG6lj09+OtfNXxa/byjsbG4TwpYo6FTGtxN\ngsH5GR14+op6hdH17eX1no9lJd6hcRWluiFy0z7RjHbPP6d/xPz18TP21vB3gm1KaZ5mrX27\nARE+Q45zmvhHxj8d/F/jgSf2vq11Ok7BzCsh2qcYwD2HsOOtcDda1c6lKGvseTHwFC4yff1q\n4xFzWPefid+1l4l+LEksa3UumaSowLW2JUfj69TXhWo6x52oSbHLHqff2qnPdNEvmWkqwhhs\n5HNUbbzbf95tB3tkE1rymUpMtX146yRzBHVeuwDgGmpY/a7hSxLO38THhasXjT2Z+ZlkjYZK\n9xVSa8LeWuQqemcGqSJH6o0UgSKCXzSi43DuRVa3kkLxxhtwzlgetSramYboY+OmRUWox3Om\nxQsY1i39XJ5qgRHdQmWV1Em1ex9Khht+u5lbHA+bk1St2nmYOcuNx46CtD+z0k0+5lRGjZRu\nEnv6Cgm5m3WLaTJYlG+6wHT2qFVPnA53e2envVuOWPywQhEpHMb/AM6rx7fLZNp+Y8+n51XQ\nZCcMzxq28Z5Y1Gq7VYgbgMnJ6mrToY7eQ7MZ/iFVm3SKNxJyPvdM0dBCmVRbgrH1PI71WVnb\nKy7lDDO3tVnzD50UaHIxnGOhoWMzZ3Nlfu59aBlRpdiruXfniop1bzjGDhduc9qu/ZYfJLib\nDocGM1Cyq0ZDZLdVx2pBqV4I4Yl3ygs+OFpBsfbvQBuoz6UvzLGGb51B5z2pskwlb7uXx9/+\nlUAfu2lXGZOfu0kjOzvhNm09COlPMkaxgAHdj7w65oeF8CRW4xls0AMmjEiqzfN6r2qOLbJI\nylSBj7tPOWjYKeR81NjVopFcsAzDO7+lAiJpkmmAWP5E71NIuZIwqqSTnj0pI4zKskm3arHB\np8ka2pwp3v069KAK85aO8YLgkcn/AApYprSOGYX8jocZSNBnntUV4yQzj5uSOmeaqv8AK+4c\nA9C1BRCwKqigHLH16VMr+YgQAZ6H2qIsVwudzg5qKBT88qLgYOS3Q0tQHzXkkaiMf6xT8p7U\nzUrmSa6jRF+8v8P6mmgNLgFfnI4bPApY1lSQlRjjBP8AhTAhSN96xZ6859KjunaNTt4Iblqs\nyTJasMnJjHzeuaY8y3W8tFszyoPb3oAhE7yN86bU2/L/AI01myy4O1h2pysZF37twPITFPZm\naSPKhe4+lAEcizTLtAGM8021gaQPuXbhulWWX5T/AHgc0h3rHud9pJ5FADdqfdkfaueKfKwj\nmb5cJgAYP60hjMaeYTuGabIg+UjcqE9xTAerGNSZPmPQU6MbQCeVznNNX/VkspJGeKSRdqx5\nXOTyAaAFZ2+cOMZ5U0/BhEahfM3csKhZiZCgTdjnrxUqEjDkMD0DUgEXYknOYyeMHvSxqPOy\nzcAdPU0jnzmCuQzhsUeWilpMNuHHTil1AWQbmXeODTZHl2gIOnOR2FKGMkOGPft2p2FUApyc\n9aYDJVjfBQsq4yV9aekQ2Z43EZHrimtGY1BYYLHk1b06xRlMsku48qsdK4xtjbtNg43ZGB/h\nX0f+y/8AAK4+JXia2s7OB513D7Q+PujPIB/LmvOvhT8M9U8caxFY2sMkkhcIYUXLN9R2GM1+\nxX7NvwLsvgt4KtYhCp1q4jBnmxyoODt+tcdSp0RrCHU9H8FeDdN+Hfhm20fS4/LtoFwAQM57\n59ec1tFty57dvamu4ySBx2poYsvWsLmwv3vek27cjHFISfl46Uok3E5NIoQKFxT16GmryMdD\nRg8DvmgBVb5uTSbiX9CKWTGc+lI3zMM8fSgBw4Xk/NS96Md+1LkfezxVAI3HNHmBOACaTd5h\nHHFOb5sEYxSATcduTSqR6c005p33+RxRqMVSV60qsF5PNIuWBycClH0pkjsjktS7s89qYxyS\nT09qNxXgiqQxNwU+lSYB6Go+M5NLwevFS9wFOS2RzTvu8mjcOMCmnC8t0oAk3EduKa0nGMUp\n+YDnFIqnqTTAVfmwTxQfm9qT+dOX5lBzQUgbOMHrQcbgcZApPTNObkcUwE96c3bIpq+/FDsT\nj0pgGBRTdtFAGYv3jUm4nnpQMc9qSPHQ1iAbh24NI52cd6D8vv8AShmLY+X8aYDF3N0OKerN\ntORz0oU7SR1FJTAcy7cUm4ryTSfMEx7019zHgZFSMduGc9qe2VXJ4pu3dGARjFRzMzMcfdFW\nl1ESs27BBzTC3eooGbn5ePWpkAxk9anqAv3lo4A6c0GjjsadhAvzdxQo25yeKjEg6gVIchSR\n+tIBS3zYFJ5e49RSLlgMmjbjk0xCYBxinKgZsnnFCqCaN2MjvSAbw5JpWXbzR1XIpN2enSqA\nU/Lgjp70i+mc5pQw24IpY8LyRU9R6Auc8DpRtOScYoXgZz3pJJOMDrTEO6rjNNHTntTEkOcH\npT+enagTCSRuNpxSpkqdxoGGGD1pxXauO9AxOVXHrSthscc0u09SKbt6E0gFbjnrS7gzH0pF\nA8zb2pyrtzxT1H0GHvikjY7iMYFPxnpR6k0xAy9AO9Kyqq7Rz601ATz+QpemTQAdBnPNNbpy\nMil2nvQvzZPalcBVwOM0gXarD8aOF7UI23r3oAbuHB609V+b2NI+F+tAUkZziiwAzBV4o5bH\nFH8IGOTSAmmSDfKPWjcI155Jp4YMM0yPr6mgYofpml3UjgdD/wDXoGec0gLEbbNrHoPSvnb9\no7TfK1gSKRmZQa+ho5NvFeP/ALQmliaxsrojnJXP4cUyWfG+vx7rh1zkrxXL3Sqh+cYNdn4k\ntSt9Kp+U7q5DUbZC5bOO3NUBm3c+LdlU/K3BFYEp2qDjC5wSa3blhD8hXGKy540kikOOe1Wi\nGZ33mcdR0DVlXWFON2ecVdnZlkCNxkYz71m3XytuwSRxj39a0M2QyMs0mAN2zjFV2m3ZOCNv\nNTyhlDdCStUP+Wflq31GKBEbZkkL54IzVNmJUo3yjOc1dmdU2xgYOMk1UdvMwOzcUwKskazY\n3DvULwOrFo/mb0q7NG2wHcCQcVWuFe3kBA4781VySkWBbduOwnlgO/pSbjIxUqAF6c0+4+9u\nSNlHrmmN8sYygLdaYxsjE4bGMcVFN80mVP8A9anxoFYq3Ab5hSNGd4b7pB60xFdxvAIbK459\nM1XkTYqsnO44NWF+aU7TlOajZRHGRnHPSgBZFCp833V5zTFaKTbycH+EiiZS9v5YbjvTNiqy\nqpJdVycjigQkkIXAJ79aj2ttwDkk1LIyMVyvHUikVjsZwCDnimMiaNUhB4+XuaijHmf7Kqdx\nqa4QBBk5VuSPeo9gYkDOcdBTAbtypYgYzkD1prFcgEfLnJP9Kl27SgByG4Ge1QruG8ZBCtjH\nrS6gPmlx8u/CjpUe2SSMAcAnvTdokk5GD2+tWNxaIFjtIPJpgR3EYUbwdpxjio1jHlj86lnK\ntGFxnJ600IJo2x93oCKXmBIk25ArDB7EVNuaIcEhcZ3A1VhZ1ABRfl/izVk5eMsAQ3U/SjqM\nWO+eHdHk7GGcnoasw3MMtuGCAvnHPaqRjLyqhA2sODTY1MG9Txt4z60xD2Yw79hxk1C5Yxtu\nzuHPtU0jZVSBiT+lQM0scYA+6wzmgZWmKsuQuDjk1W+baNlTtloWGRURXLccLigRH8o3E/Mc\nUscisTjnsKfGqQp5m3kdV9qNqtK7BdgJyBTQDZsSEBvTtSSK25FyACMcUrbZFbaTuzTWO5gH\nyrHgEVQgf5mUgcKcYpWYFtwjwxHWk3eVxuyOh96XlVVs4jHAoFYjwqh+cdyaczBo0YqA2M4o\ndfbLdeBUfEkYHQ571PUepJD8xJK/U0ifxYPFKwLfODsHTHrSqD5ZJO05/OmBEqjaDjLZ/SnM\nqxOGB3A9h2pVlOOgOeBmkZQGAB25GOO9ILCblySflHpikb92wKnNKzFpAHAyowRS7Rlix6dK\noBnDFsKcdTTWUvgfd708dOvXg0Nl2CBe3JpDGyfv8OCcDhgaPMG7CLxjBpfLDbQDtYjOPWkb\nLIFXg+1IBpYqeRx0xTpCy4KttHanNGWG/rxg0xlbbhh1p3EIzDHt39zTVO1vm9Mj0qRlX5Qv\nzLjn61G7K0e88EHGKYxCWLcnb3qSNiygkjIPWlkjKxqSDj+8abtIIH3u/HegBzMoQ4ODmosq\nHwO9DMQx+Xj1NRtGFlHJJ60CHjCsX3EDpilVTywYY96FURuwLctzTDGVUuDkdMUAPX94dpbk\ncg0yOROqgkZ5+tO3FMEjtjilC/NwNqkc/WgAIbkgd80bixHGFPU0gYqw3fexigfKuc49TQMc\n7fNmNDt6ZNN3Y5HLe9NWTzPug496eGxksAU9KBDXZVYuw5xgeme1EcY2LubJ6k+9PkxJhQcJ\njOKieFpmXawwOaABmZmypymeacCsjhiRkUeYNue3ovrTdo4BQrnvSsApzu5GR6Ck3EA5/L1p\nVHy7w3GcChpMt93gHk0xEZX5QQSGP3aMZ5J+boakkkI+fHPYU3IbaxXnPNIYqhUTLDOTQ0n8\nITPfPakOGmOeFAzQkm1iVyRjpTHYOFb5cbe6ijhV9jzRGoZwRndjJoaRmbaBhetIBrA79pJz\njOTTt+75g3HtTPl8tuuT3o8vy489CR0oAcylVAJDnNOkYFSFwrYwTTdqqEH8ff0oY+WxIXOe\nBRqINvQqR0596MqwyD9aRsn5tuCOopWVHfYy7B2NABhWjVk+8RSSKW2kH259aU7lUbeFUcYp\nQpfazHCdSaAGrjcSTx3NJ/B1z9KCqncqngnIzQ7FRs3Aj2oARW2Id4yW/SneUY145ZuAaTAR\nQDySaZ/E25yyjsKGMOfusDhT0FODLI52g8deOlOHzISv3fU8U3zDGqgcjrjGKYgXO1nK89MV\nGWPl/wBakLHJy2FPX1qIIWYsCCmMlaAJFYj3GKRtzKCo5z09qFkGwKM7j/KnNgMu0hiO4pdR\nhJt+Ubst3wKZsWJs4yRS7iNxUilYHqR82Pu0D0EBTGWPI5pisp+ZQzDNSxQjB3cFh+VMbK4U\nDBHTFMQrMi7c8he4pRt3ZxkmmqAse08knJpJMj7p2j1oEPUrK24ZGOCKjKp5nzAhPU0+Qoyq\nOQcckd6N2I8D5if0oAbuVyXHBzgr/Wl3BGIUElvWlZV2o6gKvQ4pixn5nVuc0ASb9hXDDHQg\nUxiXjZWbe2c/hSMpVs4G1uvtTpNzYO35eme9ADWUeWrJ8oPBpsmVYDc3TAx0+tOVV3jJwo7U\nFVZ/vbSOf/rUDG7SzYH3P7x70rMMgqCAODUkeeWIypGAPSowzeWcIW5waROo6PG47smLHSm5\nLLgMR6Uu4/dPB74pGXpubleRj0oGAZPLGRnHp1pWCocFR83WlbCDd0XrSKw43DJ9abATanIA\nIXp9KTgLlcEj+lLtUZAJBJ/WgxDeCBtOecd6QEbtu2EHBY4+lSSJ/pBC8AcbhQmAHJ9eR6U3\nYPvZNMAZwpxt47k00MFlAViF/vHmlEayfK2efShY1++Bt25HNMY4tuJEY3453UmcruIO2lVi\nisQvLDtSLjbySCB0PekLUH27sg5GOlK3zFVK/ulPShF6BkpN581hhlI9e9ACxqwZiWzk8ZpN\nzjcMbT3prMzN975ew96TJaQAgsf4j6UDJV3bcHGD3oeReuPmUYC+tNmjM3ylsAcihccgqfXd\nS6iD51Rc/eJz9KCp8zHUfyoDFmCnIHUUbv3hY8KvemAbTt8sAjnJb1oRWj3HqrDAFLuZlb5v\nmxnFCKj7drHfjJWgOgiqPLCAkkHkUrful4OcD86DujZj0wcEHvSLndnaQvoelMSEK52N/q+9\nPj2BHOzLdz2pjMQ29lBPQU5f3al3zn2pDGHClXU/KR19DTlBVmHt8zHvmjH8XY9KVQZMbhj3\nbvQIafmhVVOGzj3xRDtjZgB2796CvzfMMMDxTS+5iuCHoGHmbnHBC07+JiDwOlI0oWMcZPSl\n2spGOQR070xCbV2gnqaJNoVepxSsVAwB7Hmk3Zxnk9BQMJo+eCDnnFDhVJKLt45pZCerNgDg\nfWo1jds4PP8AFmkA4YWEMoDNnqetKZTtJ3cn2pdzI4wnbOKGZW5HIoAMErhASepNSZHnNjOB\nwc0ikmHLrtH94U0R5YPngnnvQFgbbllTj39KQ8bTy9LtDK6fcYHOfWkWMK5Ib5elAC7nwxA3\n9+KGcNGMoRH3HenR5jPLZHtTNzRvgHI9KCbjshegx/CKVg0SsF6qcGhmWPlxye3p70xmLSMM\nMMjOSaBodt3OgB289DSYxM5xlB1ob92Bu+ct0GOKFxHuLj5SMbVPNAxu4tEQR9D7VIrbvmC/\nIy/eHrUbYEACLyO5p+X+ygZ+RaBCruLZzwB6UxmwwUggDk8UpUKq/MQGHNOEg8vaDlum496B\niFTtYjlm4FCsF+VhkrxuP+FN2/Iys2cdxShjtG87uOvegQo4Zmxu3dM0MxbaoG0jvSSbWj3k\nEEd6fGXZWbjaB+dABtZWwBwBkkUMxK/P1xnjrSbxtBBYd8U4fvYyQMN/KkwBlMkYJAwBndSL\n8zK+OOn1odjGmwelJF8zAD5uMYNAWJAxV+PnOeFWmsBwwT5y35GhleFsFsH2qJ9zYQHJzmjU\nB+7qc5LHinlfLXJTGepqORv3h3D5qVWLbsjtjnpTGK8zsu5Tz/CPWnbtg3MM7uCPemLtCgH6\nAe9OjLfdwG28ml1Aaqs3ygcA1I3lx4U4wOSBTZssAVyFpjNlFGN6MMFR/jQA+LETsB93dTpG\nVW3MuCPypGhCkgn92D8vNIu3cS2WHpTAPOORucGM9aXdxuPK5+9/SmqqfdKEE9BUskfaM/Mo\n5Wgki27lIddoJ4UVKiBY8hMbR0amwyGNQ8q/L0A96lyW3fLu45oCwxl/c/3WbtmlWRXUDuRx\njnmjq6nhjjkUv3WAUDIHSgoaoO0Nv+cDkHintISQzZC9elNXywwKIQ3QselL5jRoGH3M455o\nExwOxvmO5WHAojk2sF24zTGjYJkDc+cjmnbtuWdcHvSGN+SJpFHTqD6GnNGWZfLGZO/vTWXc\ng4yGNS8eYxwdw49qYDpMOoC8Eds96iUtIpLLhv71PKhkyo2HvikVizEL8oPGDQA5E8vPmNkY\nyDR8rKCpJJ/KnmMf6stuOOlCxhmIjG0Yx70AKGR+XBfHbFOUpuZ+Gfbwp6Cj7owPxz3plvGz\nb96/J3pAM+dVXc+HxyoqQMGYFkPTp60Rqj5OcEcfWnlzJhe4GKAGxusn7zkc42+lSdcMBhuw\noiURRj5cnPFOkAaZWcYA4wKYC+Wy5ZvlOO3anLGWU87sLxmneZ1BHPb2pG+XdtOB6mlqA2Mb\ndrKuT3xTpmEm0BPmzmpvsrLlicFRkhelVGO6QbecfNz7U0BOq+YCqHcVGcGo2UKzFztJGKfC\n+4vJJ8pI52mmSYbaMZJ+br2pDuN6rxwBxUSwn5vl5AqZY2jyw5B79qbNnhh0oI1GtiZVbbgY\nwXqopYNuJ2npmp8ssjLJ1AyPeomK7tx6d6YXI2fLbSx45PpULK3L446/N1xU537TuP0xUPzO\npZjh8dD2pgIJDt6gdw1KpZmyfu0Rqu9VK5zwM0bT86rztPIoEPHLsGPOOBTlA+UAlD1bPSop\nOv3SQoyT6VJ5mVU5yOopiJ42KyHIyf6USS7/AJcEAc7veokZpPmIKNnkmiSTfkK2CKBj2kkZ\nVVTjjl6hlmjSMxj7gXlu5NJHlpAU+bFR6grfMEAKdWpisZ8ybtzr0U4IqONTGxR/4hnd6U54\n3+VwcJnlTUDMHyzk88fh6Uhi7iruYX3kcEdsetSIpmGVHyg4yKjUudzJxGeo70+OSaRTKmAw\n7e1MBDNJbsRGuGbjca04yuoWoJJ85P4SeOKy3ZpZCTk1LDM0T5f/AFZ4LCgLlxZRAhjcEEt+\nNbVjqEKhvs6sEx8wY96x5tPmmkjlgQ3EJ5yh6fWlVlj3KA0L5wVNWiXqdbpmrS29xE0R2qf4\nK7vSrpbm3M0jlRnG015hZusP2cMG80nI9xXZaDN9otZJVBBVseWTk/lWqkYtW1Oq27W8xwMn\nofWr1rNG0geU8jo2OKxkV5o1bB8xeqjrirHPmKhOxDz/APWp7kXN2O43SbQquD0YVLc5GC/7\nzisu3k87BH7rZ1WtJbhVizLuz1GKRVxMqcMchVHyr6VoaffYmVJFyu3gd6y/tCGQFXGSMEEc\nVK2IWEm/LAdR6VDj1GmdBchJowsIBY8lsVShzHcMEO2TpnpVS31Ce3bar+YrcjFToyz3H7wY\nUjO7Pes7Fmxo/ia50F2YtI+DnzIXKsDXvvw3/a8v/D32S0v4mvbNeJlkPznpyDnsO3vXzRH5\nlvJscbw549f881EzG1uhlsPkHPrWUlc1jLQ/Srwj8fvB/i6xMiXq2k27HlS9uAc5/Gu5tZ4r\n1RLbTJKhGVZDnP8AhX5Y6d44udGvnjG2eN/ldVGOP8a7/wAN/GXWPD+LvTNZmtgfk8hnJJHp\ng1DjoWpH6LyKWTaw5zzg01FP0r5a8G/tiXf2eCHWdL/drw1wp6j6V6rZ/tMeBr60dlvvIudm\n7y5ODnHQ/j3rLlLuj1PyueaYygnkVzfhf4kaN4jtVIv7eOUruK+YCuOO+a6S3vIb+PfA6umS\nNwIwfoaOUd0Cr74FOxx1pN8ZZkB5B4pzxllzzUNFcwjYbFHHYU5Y8cdxTtpPtUARbe9K+fwq\nRo88UnHRqdgI8/lR97IPFSLGu3Gak8kbfelYCBcqMUr5ZeKfsCnGd1HlnrSAbwFpA1PVexHF\nO8vnpxQIi/rTu3rUgUNxjmlWM9KCiLaWJ5wKXkpzUwh3d+Kd5PymgZDt6fSnL8xwakCHuKf9\nnLENggfSqAYsQ2k0ixlhjNTKu1TnBpPLb+Hr3HoKLC2GLHuPpTxCezcCmNewwyRxyyRxl+m5\nwM9v51zPjX4o+H/BulvcTanamXONiyqWHYnGeSPSjlA60wlkIzimhdmAD97gc96+fdc/bQ8G\naDBKsbXGoTjhWVeCffGcV4140/bs1a7mDaTaxWIOUEkxPTIPHoTirUQbR91yRiKEyMQi4yG6\njpmuR8TfFzwd4Lt2k1fW7eJ14MaMGfpnpmvzl8VftReNvFztDc65cRCVcII22AKT04/nXlXi\nLUNRnvkNzJM8zjO6Vi273yarkSJufeviz9vXQrFpLbw/ZefIpIF1MfkHoRjqenFfPvxC/ay8\nX+No7i3GszRW8jYFrbrsz/tE9uhrwGFvJJbYCoHLY71FeSPbsssLlWxnKgcZp8thXN3VvFV1\nfQtbTXzTCZgz+aST9K5mSR4ZWheQt5fKx9RVScNPIz78yNyWpVnlwuB06+tVYktafIiyM7El\nwNyjHep1sn1RjPdkWsR9Twfp71VklZYzINqMg3Cq0t1JeKksrl2Azt7flVJCLF/c2KQbLcNI\n394jFZTySXDbi+GByE7YqaS6RbURhAA3emNDmFcZ3L29qYixcSNJbIXyrYzyaozTzScIg29y\nf51YuGEkYRCSq8c1Umj8yFSiMAWwTmgZZhZ2UR+cwTqEU9TUn2WeeMRyOzDqUY9B9adaRBFZ\nFK71GRnvVVJ5QpMr5Bbnmgku2+l2zSIXvFtY0+b94c5p+ozWqtJE05O35g0Y4I+lZck6XCyl\n/ujgMw/IVBGxkQmQ5dhgEfyqgC6uFjmHlFWEnI7ke1Q/vIVJJxn1HFIwht4412bNxyC1OuG2\n/IwJDccUAEjbVBBDhh3NV5PO4LkKq9KdJCLdcEHj7vvUUymSQEPtb07CkIRpQkbBiODj3pHw\n0aqz7i3pxWlYvZw6fdS3SrO+7CketZvmtGxEWOefXFIorTKEVUaNt3UmpGZWZlIIj+6NtTGa\nTaN43Me/tULMV+Y/ckG4CqAgmy0fl4AVeOtRq5mUqsfK8ZHTNPkmRUJYjceAtOVx5JA+VMc4\n6k0wIGxuyo2hT89NlaRpGwx2Z54qRo2++F2qRkg1F5Yk2sSyntz1oExYWMjOgXJA4I70i/vE\nGRiYdB2FTxxosbMj/MetNuNtrCsgHmN3HagkqNNLE5ZeWb5StM8l1k2H5SfmIzTkmSNhO4LK\nx+72ps9x9olLxhlToC3WgaQm1FmZtv7zHJbk1QdXuhvY8Zya0jaxzFGEwBHBaqExVJmRWzGv\nVqXUsrbo49xLbsmnySbrf72fYelNtYw1wQ4wDyB2pZU3geWdu1+cdxTEQi4STZjOMflTpJHZ\nUVidind9aSNysiYjwjnAOODU0mI4TL5TNjqvagAXy8lpwDM3IJ9KhmUSGUs2BnAH+FPaZHkZ\nnXLAfLSTMQqF1GXbjjpQAkUpWNmZcnopoVlZA7jcS2P90USKEZs/eHbsfeliX5ZJB68elA7C\nMyqxOf3ZOCaVo1wAw3OT8ue9EytmIq6pGfvZGeabNGnmddzMfWgB2SBsx1ODjtUsjbV8tFMs\nY43E85qHb5ceWk2jOOnOac0jwn90+Vbg8c/WgbG7S0bl23MOmOKcvnLPiMDaoGWPrRJGNrAN\nlxxSx/K7ZOEI2ke9BIm1mZjjcd3I6VIzOIyiHJz3qIqzBsAhsYBFExXyQrKdqjLt70DFfPmx\nuRwON3vRt3kKHKvuyx7Yo8o+Wu2Rih+ZRU0YRVklZiGC/dx2oQiPzi0chChXB496fGvmKXC4\nX196SOREYKqkoxz+FTIxbzEjXhjnb2o0GNiUtDlwSAc/N3+ldN4W8My+INSt4IoWYu4GVHTP\nAP1z/WqWh6MdQuIdvmANJtCLnLt2AH1r9EP2K/2XhqWoJ4i1uPOmW7iSJHXPmHAwPqDXPUnp\nZFxjc9m/Yx/ZptfAPh+38Q6xZq2qSLiFJE5x/eb619TyShmLLzxgfSm/JHCI4wI0T5Qq9Bjt\nUMkm3aO/SuOx0oUN8wBpCxZvu4pm/Puc0/cSTnpRYYvLd6VV7Y5ph68Uq5I9DRYY7HcGlB6d\nqbhl6nik5Peiwh+Plwec0u0/4U1idue1KD8g55oANxC4602ZeAAcCnIDuNI3rQA5QfL2qc0o\nYAjtSHK854pv3jx0ouBJgs2e1Bb+764pq855IFIVzyOOelMB3LZHpTs7qjywPSnjI5NAh6/d\nbNN3NxkZNG4+nFOXPB6CqQxMrjnrSk5NG0MM0fdINSgF6KCaBICvTIzSbc8dqG/uimMfu6nG\naFJ28U1vQGhflb2p2AHzilH3Rzil4bNGM7aBjh82M0DlvQCmsSvbigSZjyRTAcpGD3NLyO2a\nZu2kcdafnHJpdQD5vail+X1NFMDNbAYCkbDHpSFvandAPfrWQw2lk3AcVG8h6DkfSpAx2kD8\nqjYnaSPyoAQZHalVsE56U2Fg0RbaeuKUDcc9qAHDlTzTVYrx3pWUKfl6UifTrQhCnrnPFLj0\npeF69BTeW57VQCghVo8zPQZpp+7weTTtqovXJqQG/Nt6Uki7eRyaBkfSnr8rc9aYDNp28HHt\nT9vy8nikVeSfWl+6MHpSARWXaTjil3D8KbuB4A4psaYJ54piHM4JGOKe3zcrik+Xpim4245x\nUgLkAil69BRjIyBSMcDiqQhc54pXQZ5NGAwGeKTjsc0dQE24GR+tHHmDPSht3l8UqEYpMCPa\neRT8nHTmh8Nxnmk9ARTQDkIxmn43c+lBxt4FJuIqgEZiOpp27GB/DTGHGTSFSwFSBLuHpz7U\nrNj6VGc7sDpQSe9AC7sEYFMY568U9eFpPlJ6c0gDB6g80c7s4pS3y56UKwK5FMBcZHBpFB28\nCgZU5PSmqzLnBpAOZh3NDKWGQaaFO08U9U2/Wn1Aaw70vHY0cbaaq981QDmbkflSr3pvH40M\n4HHakIXoMd6aPkIOeaGJPNDfMvSkxCDLPuPWlZtvXNDH5hjpQ6nIBqhEiN0Nef8Ax0sTdeEI\n5VPzRyfd/Cu/B3EDbzXL/Ey3N14O1FSM7VDj8OtAHxB4vs2+1SSk59q8+vA3zEncOten+MIy\ninjBABArzrUcxqAqZBqkBztxiaZd3UnFVrtjDGY2XIb0q1fWzLIXHbnrVKTM0O5ztK1ZLMO7\njMe0EblPOaoTMrSYLbRnmta5VWQqzlVHINY7KDMQy/Oen0qjNldozG28EGPPFVZCY1ZsBQT/\nABVZu7WVZMlsIO1QzNuhMbjep/ioJKE++43DjAH3aqvEE4XgN0NXJozGMrkOvQVS8tlVjI3z\nsasTGTQyKF2tuK9feqEzGVxuJTJwQa00ztzk7l4IH86qTsskzgj5gOvrQSV5F3knOYlqNlMn\n8WT1FSq5VsNjbjkUiK+4sAFAFWMrtuKqdnU4pC25iMHaODUkaiSN2349jULY2snTA5PrQBWk\nUq+xdqr1z3pu5lyoHncdakkiJkBDYGOB60jbRgfdQ8fjTAjxtI2j5f7xpu0mMqRls53H+VSr\n+838Y8v+KmSMJGBU4OMkUtgK43PuRWHHNISwVT68YHSkEatcEjKj3p7RqrBSeM561QDXXaoW\nQAtnjFRrIqxnd1P3TU7SCRW2/eHrVdmDEZ6elAB80v7wfMe9QbW+YKeOpqypKMCPlHpRtVnb\nHyofzqRN9EQQZ4ywI74qw7DyygwBnJ96jWNUAIFDyfL93GT1qh6kcnzEY4x/FTiwVTt++Rgr\n/Wm8beOBnBNMYkkbR3wDQBYjyrAswYYxjFWrGMsMEY3epqlAxDHH0Na1lnacHbx361IFe4s3\nVC3XHSq0kI2oGOSeetX7y8SONY87nPUVmrtZiGY7AeAe1MB0iHyyw+bB6imTAeWJG+VVXAWp\n12lRhsn17VHcZZFWTAYc4piM7ywRkDBPrTWU8Rtgc9qkkkDsVJGPeoc+Vx1/GgB65LMoORjG\nKay7V+UbiTRudYxgAMzDP0pJoxG2QOAexpiFX7rKTjHNRSnywoYbiTxinsdpHcNScx44Bx1z\nQUMKiNX3LuBOBUqiMKSGyAMBTTFjBLA8s3I9KYgCtnG1h2oAleZlYHPygelRTLubbt685pUZ\nnUiRcHOfenNkqqnkdd1ADd3ygMMgUiTfxDO7pzT1j+bpSMu08HiqAj+bdwMnvSyLtwT1pcpu\nPHPWldv9oAYzUgR7QshyN4Yfep/DEnGCKXuh/gpFynmLj1xmi/YBqtuZT1T+VRt5sbHnknP4\nVLGvyEbccfrUezzMv82FHNO4xMDcGzk44xQGAUvyB606FQqgH73UZ9PSjcF3E/dIxj0pgyMo\ndw+clSM0qfNHu3ZJ4we1OBG0KemOD60nKsCcFe9BIm75QoTcO+KQ7UB2j5OvNP8AMDELwBTb\nhGb5VHvuoGJ8/Ukkdlo+bcGzg+38qYysyqVb5vSn/fUkts54X3oEMYngbS27rRx0zntT/M8o\n7gOehpGjPSMYJI/HNIBoKEYb73TNIn+s6nHpTmVvMCsVDKfmAHWjdukyBxnAHtTAYrMO+OeK\ndjk4bDHvRJ8u5g25c8Gk8x12vxQArJlh2UfrQW2FvfofSjzF+Ytnnp9aRRyobj1HrQAbtrYy\nppcDBYAbj27VF8q7jtzzUqszMS+FGOKTAXd+75OOKQfcG0dOtJn5AAcnPSndZMONopgR/wAQ\n2rn2pY2ZRsI4bksf5UHDMV3EHqFNI5O3JG1/WgBoB5TOO/tSrN07gHn3o25bc3BxzSZCqoxy\nDQA9WPzOeG6haFVn287cjnNNfLx9dr5p/E3D9RwP8aBjT+8U4GOcFqbjzMEHbjjmnbVXCDKp\n3J9aQqOCcnB7UgGr5vLfd5xzQrDO0ck9aPMIj5J4OR60rLuO4naTzimIGXOB05pM7ZCcbz05\np24bjzv+lIrDacff9KXUA2tvJznjtRuO7YmMgZyaVZPJ+bnngj0pGOBlTyeB70wB2Zl46njG\nKVtuGYkFvSmvIVTIGDjBpE+8AB1GRQA5FKMeeaa0JZgPMz6inMzcux+vtTX2eYPRhUjEbA+Y\nD5xxtNLlVIGDuPUU6T+7jkUA/LuHXpgdaZQxFCo45Lg5BpnmOCqlMhjzgU7blupHrSxblkGe\no55pkMGVQzAndQPmXbtwOooXhC3dj0pzbdo5O7oVoAi24bLcUnmBW5YirDfeTIDBaqqpyxxk\nMehoEiVVMkZkBUKeMHrSRsI5ORubuKTCtkY2/jScMrfLn1PrSGSKitkjlc5zTPmKsQcknvT4\n8eXlhwPzpJGV/uLsHXnvQMbIznYMfL0yKf5il/lbOOoxTY1IbOcKeaarhVcHqT6UAxxZOTk5\npscn3Vb1zmnqxW3ORgUi4WUPjcMYxTAWQAMzBvmPqKarfMATlvQUrOzkgjJ7U3zNsmWOFPBw\nORQAowoxngnp6UBdqlgflHFL94fN93Py0FgPlA5HWgBPMwoJXc/pUm/c2X44+76UxpVfCqPm\n/vUDeQWA471SQhkkatKcA4xk0cMu8DIPGe31p+QzZVscYP0pFkJUop/d9MVJY3ncefujoO/v\nSljJ8qjHGeKNyqRxyo4NIxZcY4d+npQSIq7ZFZRz3FAL+Y+MMOv/ANanndHgng9xSfLzg4zQ\nBHHvnO4jC9h2pzMyqWdMY7U4qd4x8qZxtz1pWkO9iV3KBgKaAGHOVJXg8D605yVZo3GD7dqR\nGdVABwTyKSNSJCWO09T9aADzGjGFAK5wTTmcbApO3nrTeFUZGSx596X1VhmT+FaAEZSCCBgd\nc02TYVGDgE80pwzbmOOMEe9M/wBWmfvgnHTpQIduIcHOR0FKFZkICkNnOaRZCsZyNp3cAelC\nSEtIWyTj5cdKAEb94uGbkdGH8qeTiNGAxxjJpiN+7Ujgj+dKoZgTncRQA5WDJhlwV701QWUs\nGAboDTuVVhjd7U3oAE6UAJ1wXOAB96nHOzOdobtRuKyYVcmiT5WGRjK9M0DE+UjknNOYq0Q5\nyM9KYxDMAjfLjrSsGGCgyucHNIlgZCrZHA706Ty2jLKu3PHBpJN7OQGGKZ5Q3cNjHJFMQ6Ri\nqFR0brmkV9q4JO1R0okYs3GGGM8U5t2FULgH71BQ0bZF3bSD1Ap5YOq4bK9xSSSLuK52lf5U\n35G+4Cu4dM0ADMPvEEDoBTplAXJOQDxTWYSYGCNtSSyDcRjAzkUAyP5iQ7jYegzStu+bAy3U\n0Moc/M27nOKPmcNg7R03U+ghgwnO3mmNvkkJA5AqdI3ePzMcLxTcPuyOePypAxDlXPAK4x0p\nPLVACchhTv4mVm3fxHHakVWVlZm3Z6UAg2qdpx8x6H0NAX92yNnGefrSeaVl+b5ueKd5g2sW\nGBnA+tIY0cRFck4OaVlDEhTuOM/WhtykqU5J27qVlCYwN3VaABNy72J2/LRDhF+V/LJ6MaMZ\njK9hxTWwQmVO4cVVhD1/jyPmYcChWJRQy7RmmrmRsJlf6VI2TtBIPvRYQ1UO5wuD3FODq7KS\nmcdcUzai7nXOe9EJKfOOAODSGIzLJvy27PtT8NIUVeQFx9KRpRu5Hl+1OX/Vl1kGcYOKAGq5\nK4+9tP50M/yE9DnAWnLhBlOV6YprLncpGMc0AIXdmKlfl74pwkU8BsY9egpuxlbzFPP92l27\nSCq7j1xQIVQf4cEHq1KsZaVR0HWo2RmGM7SxyaexMRIznA4NBQnmfMU24BPJpATu2EcKM7qX\ncWUuVxzjik55I6dMmgBdxWFgVyT8wzUjNtUDGCRSeYy/fx6KMUm0jO5tzdNtIBzAfLn6Ch2L\nfefHbikZvkztxjrmmqq7wCu5TTEhWV/uscn1pB8qjHPOPSpI2RFZh95emaJNxUfLuO7PFBQx\nY1K8uGbP3R2p2/5tqJ8x4NIWTcBGMOeW+tMbKyHDDJ5oEPUdm6DrRwY2DLtKHnnNO3DAVQCe\nppouFiY5+ZwCGalqAu1t4wVy/RO9ChUZ1dSrHj/69J8oaNlYFev0oZTuYfeOc5/pTAWNdzrg\nkKOBmnNGArIeWpuGEY44pdzKY8ckjLUAN2+ZGM5Kg8tTmZdqjkcZB6VJ5m1yhHye1Rtt3Dgg\ngYw3ekA7b+9DFxgDvSfezsx5n86Qryu4YGacufMJIGB3WmIaCWdVPBB/KpM5kY5wvf3pFTzI\ntw6Z49aUYClgQNvJzQIbg7iqcKe9LuDtheHUYyaJGMihlOFb+KhG2t8vI9aWox6fc+ZsigYx\ntOAvTJobYVClsDNDLufaeFUcUwBs7thzxyGpI42aM7gCxP3c9aduG5CfnDcEinTfdXIyc4BW\nkAI5XIC5PTHpSbiwKZ3EHkdM/jSn92A2c9uKZLsXDFSD29KBjjjdwCBjjHTNLIsjKFyAepx1\npsgbcny7t38X/wBanMu2cAnHbFMBAzqxXO1uoNSfPH/vkfepP9WrKxBfPFOhw7f7SjNAhfmC\nqdpZh1ZjQ275eTnPanKrNuZh8pGeKRFaOJdnOTkmkMdJb5RiTxntQvGSyt0xnFCrk/ewGNSL\nIu7acnb0PrQARkhfl2sOOppd5k3HIODwO9MLGRiVHuafE4UBuMHo1Amh7M25SwwfSkG1pGjc\nZwN3+FPhmLyc7XKjg9acVC72U4PrSATe+4sGwHHNDZ5GNppiSHcH6/7VLw0n38CqHqCqrNsI\n+WlkYQP93g/LmnLEvmEMwwvzH1pvySYVzjJ3LSYtSNHaNFAb3K0jSGWQdkx29alOB8qj5veo\neOCgOWPK0CIpc+YMjcKjaOJvlZgF7CpGXzJCqPgEZqHf5OwBQ3HJNMBkkZC8uMKflqCTdtL4\n56E1ambzBhyBnpjtVcsW6nKimAkjD5SGwQOoFDyCSRJRlHH3gO9IWdVxsweuajiz8zsNvotM\nRIGPLcd/lzRuVV3FcCmKMEyEZ9qkdQyqzfL6jtTELIwkt96v8/pTOvQ4YjNLvDfN909MU2R2\nZVC8Me/rQAkTfI5yVI4PFOb9xExaIuWX5cnj86t6HdQabqZuriIXIReYzzWVqV6bi8nZBsjZ\n8rH2AoGQXNo8ccMjyBWbgKpqs0iIFRhhV5albdIxfPC89eKEjLN90MW9elIBvlrb5c7trc/Q\nUseEjJXJ3dPpQ8jSKcDdt7HvT96jAUFQy9PSlcY3bIsh2HjGaAp8sqFLqOTTkQ7sEblx0zTE\nSWFPLJyuc7R29qoCazu5bbyxbsypIRnkiup1DRDeWrXsNxGxhQF1HU1yLs6oCfoMdqt2Uk1j\niUpIsLcEscBqZJt6bJd3n+qRZRbqMknBre0zUjpupKZWaKF4yzYGQW7CuUXVpo/MMbrCrABt\nvetC31ZhCY7gZCjKn0qkRJHe6LrbLcYDjYeTzkitUT/bJfkJUZ4JHWvPbG4l2LJEwRc5LGuq\n0S9Y258wkvuyre1aowaOqWURw42EqpqeTUPNKIOp/iqnY3iyHyi/3xx9as2diZrpoW4kRSVX\n1oAnWKKbcM8dD/jU9nC0MRYP5m08L3qGGN2Uvgrt4ZSKfb4N0GOVA/LFIY8XRuJJnWMx7eOP\nWrETBmjkjf8Ad5wyn1qH7QJZWxhEY4DD1qGRtq7BkhmyGApajOghaOZmLkEjgD0pstrB5jTz\nOTIDtX2PasZZvsjBzlm74q8dVikwzRcNwR2+tQy0UVKpLNLcR5Jb/ln296aboiUErtLdFH6G\npZrNbhWe1XKg9M/nUcKtsBRCwU8MR09qgouR6pPDsjmkYxA5Iz+lWG1+KONy5+ckbcdh6Gs+\nFYryYLOxijJxvxkBq0dS8Pw2CCYSLIwABQ8ZHrWbWo7l/SfHl3DCkCySLExwG3kV32j/AB08\nU+EcfZNRkEOPuOxII/z/ADrxySNUjBQ/KGyFzgk/SluluL2RfPZstwGY8ClYpH1f4N/bO8QR\n4+2WNveRr94n5Sa9BtP2zraOETah4fkihY4DRPuX+VfAk8dxCsgaQlF6svFbeg649jHKlzcs\n0G3IU/piolEtOx+h3h39qzwNrUyxySS2eVLGSVTjI7V0lt8e/Ad65WPWokPXMnA/OvzKivA8\nzyC6eKNRng9z2qpNr199oZUbepPDNU8iLufqrH8XvB7hSNdtirdOcGrC/ETw3dElNZtRjuXA\nGK/LyTxlqNhYJAZFZG6DGWH41ah1RrixLnUJIbjr944+lL2Ycx+oo8V6RKm+PVbN4/7yzCrs\neuabJD5g1K3xjr5g/wAa/JiPxlqBnEBvJAqZw28j8adJ451K1GYNSupUzg/vSB+VHsxc6ufr\nNHqlrMyvDcwywnjcrj/GrguLZ+PPiH/Ax/jX5OS/ETxJpsa51K5eGTBUeaanuvHfiWzhimGr\n3Shz0WZsjI+tTyMrmP1f/dqOZoh/wMUeZAyEi4iwvU7hX5Rx/FTxbdbn/tq8KL/CJT6/WiT4\nseJcAx6pej1xMajlZV0fq01xax7Sbq3x6+YP8ahXWNOb5ft1upPfzFx9OtflrcfE3W76FUe/\nu2dVyGSZl/rWJceNtTu2Fu19do+MjM7H8etHKx8x+sk2uaRC21tUtEYDJzKD/WuV8RfGrwbo\nNuxl1y1km2MUjjcHcwHTP4Gvy8k8VX8M/Oo3k6DlsSkZqleeMt0csJgdyw+Vs5K1ahoLmP0l\nT9qTwXHoMdze36wXzfetYju2jPBz9K5PWP2zvCWm8wXE16w5CLgLX55299LdSGN32Rtzjsfa\nrdnam3aRpVwq8rkdfQZp8guY+zvEH/BQDY3laXoCjd0e5OT9QBXAa/8Atu+Nby222fk253cN\nEMMPSvmyd45plyGTcMsyDO32qnHGsF0wQsFPO5jT5bEOR2/iD46eNdcilku9cnlDsTtZyNrH\nupHSuJvvFF7qG03Mk0rdGZnJ/KqtzhlCRsSmfX9aeoZY1cqDtPc53Yp2Au2urWwkMMrOqMMn\njisqe2My7y2+3Z/lPpSy3KR3DEJvV/0qK3jWa5WNpGii6Bu1MTRN5I87AYtHEuVPuK0or648\nStHaAq7ryjMcYFY0kyJcOEchFOOf4qr4MR3rMYOCBtPNMDSmVY/OW5b94jYKxng4qo16sOfl\nDIR1zyKpNBJbBZPO3+rMc0TbVkGD5jkenFMZNayxLazZ5dgSCe9QeX5luhJ5zyveoy378ADK\nY59c1LIrSMpjYAf3c80AxrR+ZJsUZBPTNO2utrIjrtcHHHYVJD5fA2CRQfv9Kbem3jAkTcST\nyOwqkhEAuEMaR+TuI6nrSXCs2GU7fU57VDJsb95FIU2nJx3p6/vIy/mYJPRqACaKWNBKMFDz\nwaj2yPCyiT5evFXI9kdmzy/NJ91BnisxW+z72PDdAo96YDkzHvBY4XnJ4NS/almUCJBtUbif\nWqnl+ZDuYNJKW5Ud6m+VlzEgj28FM0uoDLjbcMMExnr7VRbzVnKK37sng9h71qJGbzcVTy+M\nZFUPIcxyHaS68cn9aoCdZlhk8yaNbiNegqNtlxKzu+0HlUHao4ZtsscW0Etxk9KS8Y28gDBT\ngZ3UAI+UXDBnYfMKhijDEk5ZnPHtVj7Q0q5QYbb+YqIzAvEBGQxGfpSEIY0WF1UHaG5HvUMc\nahgwOSOPlq1Jbt5isX4J5Wqny/eUbT1P50DGSMzb1H3vXNMZh5SqSOOx/lVjzDtJYDOePpUU\nkkKMx2b5ccR9h75oAhMaK+9wpHYY6UxcSKwBOAc9OKRpG8xUKcnn6+9SeYWkJ274sYyOMGmA\nhj/eYaYbx2U/pTp2DJkDnue9RxMsaksoLs2QO5pk3mllb5Qm7jFAE1xbxmOFPN8tjzx1PtVm\nys45tQt7e6fbbycsTwBjtWdcSJNcK4jO/GVbHHFQ3GoSxxbZADORkbj+tAiK+bytSmjhw8Qf\namPSq7KVJXJJPYdvWiKZ4lClklcDJYdeaWSB0h3gkMp5U8cUgGqoWAqGyf71QJGNm1mJVu4q\nwmWtWJdVIPLentTrpTDZQSH70nSmMgkRFtfKIYy5xu6H6UTGSOO3h8nZ3BYYzUU00sqoWYAq\nMj1+tSTtcXBhknl8xkHyn19qBjLibzESB42QL8wK9M+tIkhgjdYtziRcHP8AOpvtTtGyKAG7\n8Z/Coo5TIrlhgAfdoAiVk42YYjg545qRA06qAcfN0JpiRjcFdgpJyfpTydpjaM4GelAhEZVL\nNMd2eAopkf8Ay03525yKnaJWXJPGeeOtQgRrJgIxA6YNAD42DxtsXIxUYQhgVG1vSnMwWFnC\ntuUdKj85ppc7dp25pjHbsr+8GX3YH+NKq/vmbPltjFJ8zAEYz1JNCx+Zcbmk4NIGEZPnH5Q7\nEc80jSCPPykkHLegp2IhK3BZc9qbG0jW8j43hj09hQFkOZmXY4fAY9hT42eRmB5QH86QKsOe\n5PI9qfITjB+UnqaTAa8fmPn7iZxTljErlB1PAxSyEkIQNy4zxUkce2WTyVLM68Moz9aZPUsa\nakcl15TIIjjAY8jNaGn6et5dCKOFs5wzJyTz2FQaTppvJBGN0RT7w649yfzr6d/Zr+AupfEn\nXLa1gtj5SsHkuSvyhODkH161hUlZaGkVdnW/sifstz/ELXbe5vLeSOwjHmz3EiYwM42j0J7V\n+o+h6LZeGdLg07ToVgtYU8tY1HYevvnJrJ+H/gnTfhz4ZttH02FUjQZkfHMj9ya3/wCM1x37\nnUlYHJ9Oaa2WwT1pTIM0fe5FADFwrE1MmNpJNMbG04696Pu9qYxw6Y6GlxtbFIrbsE04/NnH\nakAm0gEZoX7uD1oHOB0460K23OeaYCpkgjqKe3GKjT7vXjNPZeMip6gDZApWxtGOtNVucetL\ntO70xVDYcN2pf4hjpTen9KVWCrg1JIm3c20HA61IpC8npUe3dTkX5vm5FPoMWNs5NOZj0Iya\nj3jzDxinhjSQC5BOTTWztpcevejJqgBMtntT8+opgw3enJnkE0DFaTbwBQfl5pDnd7Uuct0p\nDBVyu7PNJGetLgdO1GOCKZPUcrDYSOlIWBXIpAuFxQueMigBw+ZeTTv4famk0p+7TY9Q6kds\nUsjbsY61GucnNOXOCe9IB24+gopmT60VQikoyM5zSKDye1OX5c8cUm7PArIoFA4pWkUL93ND\nN0FC8Nz0oAONuMY9qjCfNmnMwXpzQg/iz+FAChBnaetJLuUcjAo9xQPm5bn2NADFQgHJpyj5\nfaiNl3nPSn8FePWgCPbuJ9aNp25JqThX6cYpnVTnpQAse3BBNM3hc96VWVVNNjJZtpFADlY7\nM96OWAI6d6cV29cCgsO3A9qAGj5TQrAZzSv93gc0ccZGDQITdlfekZQWzupzYXkDJpvI6Dig\nYuSOjUADI96DRsLYJPHrVCHO209KRcdaB05ORQWA4xSCwm7A3D7tN37Qc805pFHAFIwBUCgL\nAq85qTjseKQEbfemx53cDI70CH4/i4xQzcZxQq/KRSgjkYye1MA3DuOMUjEfLSHO4ehFN6tT\nAe3zt6YoXGDxzSH7vvSqNuPSoYCMx+lOTBIobK9aQg4yDQAnUnI4p3G3GMfSkZuAPSjdu9KA\nF3bkx1NID6jFIzhRSbtw9aaEO34pUfLGmq2eo6VGpLMewo6gTMuGHPFN4+tDNuUACkPHIHNU\nMGYKcmjgimtlvrSxpj71IQoxtxSdO9O3BmIAprcZpAK5wOBSxsTxTMk8mljB3E0ATquAWBGa\no61ZJqWh39vKNyvCwI/D1q3j0qQruhk91IFMVj4W8b26QzPEF/iIXPYfWvKNVL7iDwATXuHx\nS04Q6vfEgqwmIAA968X8QWu1m2HnOOaaA5fUIzK3B59az5l3ZRu361q3R+zxkN8xrLkYEfMa\ntEPcyJsq5z8o96zbgBpNyda2Lxg3CnI9BWTcr5chJO0egqrkspXEhOWY47VRkctIAoBAFX5h\nyjHDRsM1TkC7i2NnpTRmQtMdvmMu5+gFUZC7yE7PpmtAwb4z821vWq0zFY22ONwH8VagZ6zu\nkbEx7dx2t71Eyr5ZYDc69F9qnZ3WPc43A9hTFUSgB8rk80CsVi3mEjjaPQVFGdoKlzu61NK0\naXDOpwoPPvTDGZkZgMKx4NAiKSPzG3H5cDpVeSTy+AN+e1WzGGkxuGcY46moTGZJCMfKpwT6\n0wKMquGAZtxI6elExCogZM47Cp2jJ3gjPpjtUQk3NkfdAxTQ+gyFgrbhkZ6g1FuaPIVN3OQK\nsNlt4CYI6GqsFwScsMDpkVTJHyNtk+ZOo+7UTQkzKMZPXNWWYs28gHjAIpkf+sLF+o20hkbQ\ns8x8vkY5qvI0TKeMMvGauRoY237sY4A9aY0Yjy5AJ6kUBsys0kcigZOe2KVW3NgnbjrmlKmN\nVcKCzNmmNIdwaQjLc8UmIOGwMHGefWh28v5CcnPytS7maQcYz/F7UsihmOcEf3qaKuRuo8zO\nOf0JqHzD5jAjbjmrG1o0PzBqbzMy7ht96oRHGF84O3Q84rUt28td+cg9DWZ80YJY55wBipVv\nAVK7NvsKmwieSb99vA/Md6iVUwztySc1PHJ5qkhfkxjmoFUrkYwPWkMmj2R4U4KHkVBJGrNK\n7OQB39qGd5lI2hVWo7jLKOwI5qgKr/MmcZycg4qGQHzAxAI6bRVg7dowD6Co2ZGk+X5T0OaY\nhkylJFPUUpXcCxwBnjmhmYcD8qRkJXJ4OelADV+TA+8abLndhk571IOxUYI6k0M24li2c0hk\nYjCn5s+opU3NEcgA565pwAC4CjPU89KYFKkluc9MdqYtxG5ZWPDdN1L0yM8Z60g2Ku37z5zj\n1pdxPBXb7UFITcyyDP3emaTbhic5p0KiRn/2RxTWU7RuJ600IRdrZJ4GP1prELGvynnqacqh\nlJYEgHtSqAz4XJXqRSAYq58th3H3aJY92edpp3fC9ccUin5Tu6/3qaASTCrt3EcdaYqhZNrO\nQuM06R8oEbk57U+TG3BHGMfSgd7kR+6uBznrSvhYzk7mJ4WnptCgEbl6fL3pjbdoJG3JwoNM\nQjkR7VPzY547UMyswBXHf60nAUk8L0zS/JtO45bHFAyMsWyx+UZzjFL93O1iQ3rUrHdGpY9D\n2ptygXBRwc/wmkMibEfH3qFdVXoQCeM0/wAtYhlxub0oXIbJOF7CmSL5i91yAaFk3OVb+Lpj\ntTFYNkHj1HrQFJfgbFHr2pAKpCMw/DNEoRYlAGGJ/Gl2lfRl60x5MuoK89AaAGGENkHP404h\nY05HPb0o+cA5z6URk7imMnHemA5pAI1yvXp9aTBY7mO4KOTUa/LkueP7vpTo4y2GB4PBWgQh\nj3KDu+VjmnffU4P50nKpwcqD0pSRgvu2/wCzUsBTlguV24/OgMQ3T5vWgIGXcxJbHbpSL+6z\nyM460+owTIDBx9DSTMnkjKklecUoDkghtzdcUvzZPPP97+lAhhYth89fug02SJmYKG/efyp6\n/KpZvnPTH9aYuR8wUkDJJFMRJI+5eF+ZeDSMTJg4ximsu3BByW5NEn7vHUA8CkMR2/hIzmm/\nMhJJxj0pXY5UDG4enenLGWY87T1pgI2PMyR16A0nRgX5NB+ZQAmQoxTVRhgdB1NAxWA25Awc\n4+WlkzGwTAY9SaP4hj5QaGA3MT8x6Z9KknqEmCeTge3SkbaOR0XpS7egZcr0zSKnkox3Drjm\nqKELFl6e9Kp3MCRyOlNcyMy4OQBnAFGdpBce4oEStGJBuDfKT8yGm5U9edpyM+lMWTjJBU5z\nT/kmQrjHcN70gBpBuUHgsc+4prY8zIP0xS7WKkPjcP8AIqP2H4gUDuKu7yjnl85x3p7btoLE\nD1HekBKDGdp9aQjY24Dc396mIAu0lZBuyeAv6GnLJtj6fNnBNMb92o2Nz1xTpGZlU8LzmgCN\nomZg4OCOooTG7cRgdDUkrBWORuz6U0fMoUjCk5NArDJImRt2G49aN/B75PWns5XlRuGcdaYz\nHbxHjnNLUZIuQucDGcU15Nq8kFQefWkVj97qh7Z6UKoXGefamAjEBgQSd3QYqVmUABsYHU+9\nJIx8sbeGHIphjPl7iQT1oBht6nOe4o3Y2sF2setBY7i+MJ3o3GSQDbg9Q3tQNDpF6c4bPShV\n3K/IBHGD60SOWUso+bPUVE7NuBZfnNIZLu8vaWAbPFIzNluNtDfL1XPvQrAjBOSaaEKw24UN\nkdQRTdxjPP3TwT2oyI8HBCE4xjNLJgAsrAkfwjrTJDaNmSo29BjrSbmjjwBhc0gj+VRFySc4\nb+VSS4jbGCBj7ppFoYzRleD8/p3qON2WAqRuYNkZ7UjYMwK/KxHWnebJu2qAT3NAhWdlXkty\ne9I/7ttzfMfQUmcr84JftTspuUquT3J9aBAy7oyRw/uelN+7Hggj/ap7bPOyQd2OtIrfeyO3\nVulAm+wRyAqMdAcfWneYGjO5duTikf7yELxwT9aRtu53JJ77aAHKoHcYx0prSMuCMNnp7USZ\n8lWBwCeeKbIPmxuOe+KBi7F6nr1oZX3YBG326UoQnBJ4oeRWUxkFTnORQA0OzSdumBQiMud3\nB54oZEKkKPl/vUKqhS2TkDoaAuGw+T83GD2pu1GjIU5JOCvennlWbqo5FNDF0LKMFTyfan0A\nVlG3ahJ5wKRdoyT8ijj8aFZY2453cg03jeQQX5zikA9d4G5Bk4yM96aceUpLYfvkZ/ClJ2Md\nuSD60uOgJAxzSEg3blG1cY4BpsLBlbcCeCMZ70+Pd83GR1zTHjRQxHAzgj1piE2uqqO55+lS\nSsGYfLg4/OkjkAOQDt6CmsrfL83O6gBZHO4BE2ccml3bbgEvvAXoKCxPJcE55X0pF+8cHaTy\nDimUAYyyjYMjqc96cwZcjsx+96e1NVjtZWU7uw/rSbt2Ecnd1xSGLIrFSpbj0FLuG5txwM4F\nI+4sVxk9c00MJMDaeTmgBzIPLJB+cnA+lB+YKoOwLQXDbcjBpDIeQP8A9dFxEu5WYsZcj0qJ\nZGVQ2N3NJuCssbfdb0FP3DbtxwDjigXUBthkJx8xpVUqzArk4yKTCv8AOy5deOKY3dlJDH1o\nGPRdqZypz1PpSldqjA3HOFpGAWHaMDPU9xTc7NpJyelBI6Hd5kjPj/aJ6U6N13cdKZzuwFyv\nUg0QlUVnIxg4Ap9Ch6/cLkZFJj5iy8KR3puP4WLAN0pV+aPAG0r70gDdwc9euaTh1zj5fUda\nI8sDuGHPakZQq7VOPUUxWHZXK57cGmtJsVQDz+tCzBoSEIWQHGcdaQZVkBwFxjNAh7KBjIDr\n7dqZ5awksBywpVRomAY9eRT2X5uD9Ce1IBGJ9flPLMe1OZWPLL24xUas3mZLduTilVVX5wSV\noKELEfNk9MZpdwVDg5z/ADpVwmDjAJzQqhmLsOM9BQA1h8uD1zx6UsbFGweQeCB/jS7Put1V\nehoYsrZK5ye1AhQ6fMGyAeaTou3qGPWjcPlGOD2xSzKWUD7u3oaBjlcqcsuSo4qNpA2Hz8x4\nxUp2t90ksRTIdu47fvdOnA96CUK3YAZ4wc07a2VUHA6/UU1chTn5l7t60m/P8JHpzQWPQjc2\nxRtIx83Wk/eNGRHx6+tNKBmyxwfb+VLkthc7R6igBFxuLE4Hp3p2QxLYAPvSGMBMHls9QaFZ\ntuduRnb/APXoEIzNIucAbedy09SAyhUB3DJ96aihUc7uO2KRXb+AYbGfmoATb98bff6U9Ycc\nk54ySKWGRt21l3ZHOKdGwXA6HNBIsishU7s8ZFMJbKsvPOCaZ85+91z1pSxiUKoz7Ggol5G8\nBhjORmopGZl3sdxJ4xT/AJiMbPmxzSxNukUAYIHA7UAJtC8ufrTTgEsgYKRxSspkXDHD5qbc\nsPBOVxwR60ARlguxVGXI5okG3Bxluh9KRVaR9o+U9d39KkaEZ+/82OnagnqJg7dgXOemBTC+\n1wo4Xo1TfLtB5z2waa21mRnU9aRQnmruUKufapckLnP/ANaqyfMzYOFzgGrQZI4wGbApgLGq\n7C38a8ioo0faM/K79jVt5EK5wBx92q8hLBcnDMcCgTG7n27MbccfWlSMrjJzkcZp0jDaq7uS\nMH6037zAjovFAIcA27cASyjFSwxjdvONuOnoaTjzMZxxk0m0SZYAqvegY+SNGVjglw2BSRxi\nPd5n/jtCyMucnIpUJ2g5xnqTQA14/wB4qhshucVJF904+UKelOaRVO5FyvTdUcke1iGOFPzf\nWgBQo8wLvwWOdp9Kllwjb9hxnAC1CpEhU7NuORmpg5VsZznvQSRLnzDlT1/OplKb8Ebie1KF\nYx+q9RRj94uBtPcUhix/JvAGDS/MrFjyp/hprN8zADHOOafGzAcjgdKYxxXbgY+U9vSlkUiQ\nnau2g71AcfMPSpOqkt8vHegRG0cbSN13ds0wr5jqeMJ+lOgYlSw9eDSGP935jD7x5oGNZQYy\n5PzdqjLLNCPmwo5PFTAKJBzkehpjL5iuu3HOcYoAguY4xs2ttZhgVCdv2dIzyR1NTODuVmHH\nb1FROwTjZkdm9aYmQOMqVC9e9I6bMMPl7cCnNKfLxghqbmToTxjNAhjY2uy8rjge9QRb9p3k\nEEVNKzSRlBgHtioVBVdpGAvJNUA+PYxAJwvQr702Yh0CD/Vg85oJXcduDkZqNpAZM428crQS\nPklXcFC5C09mAj3HoONuahVlbaNpJzTmAk6cEHGP60ASSAQ2oYr8zcBe9ZjMu8hk5B6NVm+j\nljnHzhhjJOaqO7PMAec/qKCiN2QBjjc+eMdKd5W6VPnxxTVJV3Xb8voe1SLH5i/vCF4+WpEI\nxEMLqwyzHtQSwjCqu4AZ+lOiV2GVUFenNIWIyivgnikVYW4mVY8BTu45pqtIHB24GMkUrRtH\n8r/PjkmkWNjKN7ct0ANVcQR7fJaMZ3Mc5NWJvNuo7SGINPM4AWHPAqLydszIWCEDg9cn0q9p\neqnS45JIowL6QERyHnywaYhby1ltZI4ZYdj45XuPrS8RrtL8Hg57GolabyvtXmmSRxlgxyT7\n1HIzSKGZMKepqgept2McieXG+TkcHt9K39PvJYmc7lZVGMA8j2rk4dS/drmTGwcZNaFheInL\nspWTq2e9NMxlE77S9SjeSPd8uDkZ9a3lunNwjMWUk568j/61cBZ3iBcMASG4bNdXp90rKhuH\nYBud3crVpmdjpptUa+YqJNu0YyB1qa3Ztu2NdxI5NZSyQvMFjP7vHB9quJOsx2wMY9nOf72K\nAL0un28dormUrNuB2ZqGzeNvP3cK3rTLp1EizFuHGM+hqta3UfnkOpZSDjFFykXbcrGpBXzI\nwM5PamMVkZk6qT8opi3XmKY4gVXv9KWGWNbqNZQQGOQKkaLdv5oO0LsAGML3+tXVungt3jO3\nce4qi2oC31RXkj2RBeCDxVO4mhuJmZSeuQRxWZZf0+G4kuGO6PyxyVIzTtS1J/tTB8HjCj2q\nC1nSRQA2Djkg02/s3NwlxE+719qVigjtVRS0ijzn+6uc4qz5E8lqrgq6jnrzVGKRo5l34bdz\nkGlt7hVklMSMMZO3PFKwEfmPeyHb8gB+Zalj2Tu5YA7eStMaTzFMjR+WW421AqySXQjt1IOO\nvrRYe5LNIqW7Rqvz9CKIpRwSPmUZIpLhdoJAIkXgjvVORhIC53Y9RUFXJryaeZcvtHcc5qo8\nzsyMXYADAHQZojhaVSwk5JxtPpTpo2ZAAuG6Z7UagSL+/gDvxx8wA5NOhmiVgnlbW6bjzz6U\n23txcW74l/eIOh4zUFw8u9VZlDgccfrSA0rm83RhWGSvbrRDqX2ieJp2IhUd+lUDhVBAyx6j\nPBpt4qtbxSM2xB/DnqaoDQmuvtDOIT5SjoQeopfOBVI3YRuvI96ypZtiHb8jN0NEcJaNJQwc\nY6nsaTA1ZLyRrzIQnaB/+qmSXP25nVwI5VG4Y44qiZmjXDybXJ496jXIZmYMBjlv7w9KAuXW\nkVUbeSQOOD1qktwfnXbgEcGhGDLuV8r12GrVuu+0uJTEpHf/AGQO9IpMqWbyWbeZAdzq2QWq\nb7bM87TSyNhjkrngVS+0DaoLcM2QF9KkWRI+c8UBc1bfxA0cbRqiLEOQdvJNULi+kvHdyMHO\nNwGKoHKlnIyuc1NHcTuwLDyoSecVLAlW1eZWdHUD+IE81C8TRxkO5XPC5qH94MjqrH7w4p0y\nvcbEJzhfl5oK6EyaeZPJZiGRl5JPeiaF4Iyo+Z84+XtVa1mMYKTIxjXgHsD601blpGLZbyc9\nfX3oETARWsf7wZfPOecVDNbpDcMDIshdcqw7ZpbiBE5STfu5we1RRxiOLO3LN3BzVIQ+Qsqi\nIgMm3OKqyHYY415PXI61Za3khtQwz5jDOfQUyOBxuljXzCvVqV9QIJt6zeYF+XpSjLSBz8hU\ndM1MohUtIXJLDJ/wpZLdLe1EkibQ3O3OT7U7gRKX81Ec4Rjk+wps0PnXD7G3RLwPSn3FhcKo\nd0KoV3Ln0qGaKaxt8p8wYZCjk0AHyqrAJlc4pu1Jl3YxtHem27KkQdjhmOWX0PpVhURZDLIp\nCddtAGYx+UZYsvXFWpIRC6EFXRlyOaJpol8yQRkwhuFNWbeRbuPEiKvGR2xTAprIVwVPzZwM\nU6SFI3E5LI4OCnrQ6w2u58iaX+8vamrcr5auDuBODu9aYD23RZydpfkD+tZ8shZSN7NuOCy1\nLdM8zuS2V6D60y4jVZItqbvlwcHHPrQMi8sK7Mq4AHrSRW6zxuH3HI7mnSZaYqq71x97OMUw\nyMFAQNnvxQINwXYqRnKjHXrSySF41GR06kcj2qwsKK4LnO0546Cq19eRNcNHEpyADvx19aAG\n/dQB85PRhTFXzA+RsToWPrU0k0DKqIG8w/NuPYDrVJ5lkXH3QecHv70wGzMTu2nPqaj/ANdG\nyxLyR1/pQzhVB3Zy2NuKlWQQ52r82eBSAX+z7lo1YJyBy2eBUc0bFWVGzJj7o71ahaQ2z4kO\n5uq96zroKwIJYHH3h2NMCNlmMjRqp3gdxTFjmVixOAp4X3q20gRQfNBlPBNMv8Wli0nneY7n\nBUfw+9IZTuLi42maM98Ef3aozOfMR3JYqMFR0x606aTzmVQ+UxzjvR5jQjbt5kXGOtMBftAj\nhDAKzY59qS6mN1ahSzYYj2qKWOONsjliMkHp9KLiQqsWV3KORjt7UC5Rstuu1C5yAM8dzTlu\nVkmjDvtK9A1C3SLIFVGyB3HrTY0/fEMoVw2QWoAa0aTyCSNjnvkdabHGbfJyJRnKpnp71NJI\nm9mVchRgr6mnLHIDkx7I25CkUAQSny9jRZJ3ZLDrRhWkcMdvmcE1a8syRmRtqBOdppPs8TQq\n2csx3bgaRVyGNUVWDIxcHp3pGb5CRkMOi1I2UUqr7d56ikmLJGQDhiMHFAhrMm5VPJY/gKj3\nvFI3K89MCpWkHy7mBC4zgVFLK7KrgALj5eOtMoVslW3kBiMYpI93mAZCZHXrQW3KrYJOeSBT\nZiV2FiE+bGaCQ3vJhJIdo/vCk2qVAUY54560rIQyIrblYcDNHKqq7l2dM44zQIeqvuOxQpxz\nUMSlY2BJLs3QelSxoCxUEhs80KzRylTESrdDQAbRuJJ3Ljg+9TLD0aRsKp79xTY/9U/mcD+9\nipo1Miqm4Mv3snvSYDY235CjapPb0rT0uzLMWhJVj/k1FZWM94xCH5evyjpz3r1/4X/De48Y\nX1ta2URM5wAoBxITxgH/AD2rOUlFDSuanwR+DuofEfW7bT7C2aZZHBd1HHXg5xyPX61+tvwb\n+Emm/CPwfbWFukbagwzdToMbn54HsM1z37OPwD074MeGYgY1l1W4jDSyEf6vI5Ue/r/9avXW\n+ZyT0rhbbOqKsrA7buaXd0wM0xvuilRipz19qPUsONtLwqg96TcG6Chm5HHNAxS2BkdacTxm\nmq4Xg80Ht6UACsdtL2PNBI4x+NC43cjikIf95QOeKOF6c0xZOSMEc05QF5JoAU42+lLuO3ik\nHP0peh4OBS6gIWO3ijc2M9TS/KFyaRTzxVD6D2PyjPWkbDHApCwL8ik3HJyMGmShfM2tjtT8\n85HIqJT83rUkZ+bGKQwCjgjmnZx1pvqM0ENnpQA7rSjpTc7e2aUEYzTAU4PAFO46d6bz6cUb\nh070ih3OfWlU7vamrn8adt4HY0Eh6kdKRgWUUp4pu3oc0AKR849KU/KRmnbtvykfjTSQymqD\nYXcdueDTt2QMjFNXDKOxpWyMZ5pjF+7k/lQvzL05pGYY5pm5uGBPXpSCzJNvtRRuPrRRqGhR\nP3cdqAyqOeB3oVhgjFM8vqTWYx0bBj0+X1pGxtxmlXCLjPBpF+VskcUD6DmxtGBio2XpTmyw\n9qaqk4Lce1AhWzninNkKW4zRu2g5FC8qTnn0oAjGOPTvUg46DrQu3bgDmjcF69aBCkFQM9aa\n2d2MUu7PJ6UY285yDQIbtyp4zzRtC49aXcOeeKHwVz2oGNbDA8UL8oBHNIsny4xmnH7vpTGN\nZskYHNKzDGCMGmM26lA3d80gH42/Sj7q5zmkPTvRwvzHkUAA5Oeo9KMM3sPSjeFUMRjPSjcc\n8mqENVRkq3BpSQmO9I5Y80rLjk88UXGJuGTRt8znkGmmRdwBXFOBzjbQA5V7Gn42jpSY3DA6\n0biFPQ1JI4Agbs8UzcfMJxxT1+ZQOxpuCsgweKAHKw5JFCkbs0NhqN3tiqENZjjpSM58sAda\nVnpOF57VLEDMac3y4GeaZuHGKUqM5pFCclsmlA5zRz603aQvWqsA4qTmmR8Meadv3xnHWhV2\nqCetMBORmlVdowetEbfMc9KcrHJzSYDW+8McUrN2HXvSN6gcU04POeaAH7vmx7UrDGDmmheC\nAMGlZ+OnHSgkTd3XgU1mCrluTSr6HgUYD8ii4wkbgYGQO9MUk8g0v3QR2pV+bjpSEOR+atWr\nntg+xquoVeO9Sw8ZGetBR8r/ABysWt/E16CuAx39Pxr571td7OxHIJ4r6i/aUjK6hG4yPMT0\n7Y4/lXzNrkOxW8zhgOoqyTiNQ3NJnZism6hByDwetb1wW3YcZz3rFvo9xD8irQjJliEK5ByT\n0qpdL5qD5cSjnmr1zGrMWByw4rLfzfMcD5hjlvSrRi2ZtxbPMflJCioGQK2C2VUZrQmf5SFO\nARj61ntG33eRtGSaokrSJ825W+Rup9Paqdyu3K4+Ru9WiVkkKngY/CoZlMi8LlB/EaoZUlUR\nKoTO0e9RLM0inAGc4zT2l2rhVyc4qPl5srhGx90dKBEEq9QwyO3FJLIV244C9hU7W5kjDBt7\nLyRnrUTfvF8wHb6A1RJBJEGkRo2w7HnNV55JmYBB8oPNWZowYwcnA5NRs6qyr/DtzQIbG43H\nGenzVDJG0fIXKt3FOBZpGKgDaM0RyEFmJy3b2pjIwySblIIYd6rblWMj8iPWp+ZA+eoP3h3p\nh8tmXcMA9AKBDNuxQVbk9QaZHtXdl8GpbhgWwAAy1TZAHPO1z/DTAtBSwG58Co5MbDkkhegH\negbflDPj1NOfO3GQ4J7UxFeRhMU28Y5/+tUQwmd3Y8VLMoRSVXCjrTJF9BnjP4VLAPmQHq+f\nTtSyKVUIjDGM1EjeSoYkgsOnanGcKoXbupoYh3Njqd3BqXhmMeeg/lTPljwU6ZyaE8tdzFiX\nY4xTEO8rMIIbd6gVFJnO4YxjFHmYjAAwxO3inoyu+30GPxpjHwqzRqpOAKfdMVj2KepobdGw\n5wp/SnPERk5+ZeakZTaTch4wRwRQs7NGeOKJtzNvAFMVdyndxxTEIJGaPO3aTxkVDJCTGSe3\nf3qWNWX3GPu1G27dsfp1O2n0KIPvckMRjrSlBGFbeTn+GpFQrCwOQCaTCx5x8xpAyJRuYjce\nT6U/hVJ4wvYUj71bOMk9qBtVmGOaBDQiy8qSG6nNOKENtHPvSlm8wcZGKGwsmASeOcU0Gg1m\nVV+7n+dL5gZVPRfejzPLkAC5JpuCQQw3YORTANz+XggDccbqRFIjfec44FCsdvzcjOdvcUM3\nzZI4xzSBCY+7g4yOaSPhjhsDtmhG+XC4/HrSt+8xxnHamA1GCM4J+bPBoZCqHvxyKc0i4IAw\n+c49KT5tu5j+FIfQjdm+Ty8ZHXNK2Njbyc98UoXc2Cm0YzTVwuATjnihkjmUNtKExjHGBTow\nNjBzuOOD703IkjGGPynp60rHcjED3/GqLK+0+WN3yrn5s05mYkArz1X6UrK0kCtj5s804qzM\noPLd/pSEID95lXfxRvVtgYcjqaCxjzjhu1BZ/LBJAbORgUw6AVV3zncp60o8vZgggZ7UnB5d\nduaawZWwOjUhDZGBx7nijzGaQblGBxT9uVyVOccYpm7o/p1FAh8SqrNlufam/wARB544xSeY\nJGztx3pWURx/fBdjkDvigZGGMbNkEAjP40iuw2senQ1Iy7QMNnceQeookOGK4yBzRqIR13th\nCAewPejcZX+Q9ByaZHhs89806TO3jhhySO9MBD+7jJP1NPlhzg9MjP4U1WGQSu4HkUoZs5DY\n7YNIBkeOCvzc4xSxhm8zAB570+RljHyrg9TimKqtndnLDIFMBZM4B6cYzSiMJCVGT3NMTGQC\nfkxS4PAU87vu0DELeWeFyGHNI33GVGO706UsjPLIwGEC8k0jMdy4I+YdTQIdtVtuTs4xTdx2\njqVzjJqTcEYR/KxxTXX5doOCOR/hSGkRlWbJOMetLuC4UHIHeiRfNVGU4IPNPXoMDBPOKNRD\nZGLIERfmY/gKViB8n3m6GiM/vCCenemrGjIzuwIzxigBH+Y7ABnsaTyyvy55YUsa/eJI3Y69\n6MKrLIWPTGKYDySwVB1FNZTllYdqbuKruHJJ49qI2O773zDsR1oGIpbgKeR1PejO0YI3HOea\nSRmaTbtx3+tKqgSdecdKQhGcNlyfbBpS0YUI5GW/hFJ8pyTwV6Uo+fLsq9OvSgQL82QvIzgg\n9qZEGjcv97tTiuI+f9X1OD3oaTyxvPynpRsMUJG0jMDkgdDSYVFUk7vpSv8AOoKuAfpSMQCr\nfcI6D1plINuGJJA7j3obMjeYw2rjgCldRt6ZbvSsMspU5I60CI5MMw2nGOtDKZmDA4UU6MpJ\nMVYYY9TTlDFiAMIKBDFUsGdRge9PjJaPJwG6BTTY28zec5X09KXg7lB7ck0DIkRJeFDDB5+t\nPYqk2CCeMVICIwpVvlPaiVhuOR9MUCIdgUgklj0IpDsjyM57VLzGhb5Qx6e9Qhl2qFXJz82a\nB26ksa/uyD8xJ69qaVHlnYcnPNJvwgQHCnmlXCoUUcGgQ4IsgOz5QBnrUbbmwSwznpS7dv8A\nFk4xSeXu+Q8t2IHSkxjiG3ctx6Uvk+Wobhj2x2pqoOSCTt65FKrFY3w2/d09aYhNzM249R2p\nWDK7Mo3MR1FKqs0SluGpG3KpKcFTTARWdFAzk9SKb8ruCxZQffNP6kMQSD1FDKHYtkLH0APW\nga0GEkbhjIIwDSeWNqevTIoZnQMvbsaIpA3C89vagQ7aQdwb5Rwc0jMvmLgfL149aSZjtC45\nBxxTlXyJB9M5pDFZZfMyeFNIjMyMr8DoBimnczEqcRkZ+hpVk3Llmx2zigmweXth3fe5wVzS\nM25NuNvOBTtrRqH+U5460jDcMEAc9KAEVvmAJ2BeCaVflZthyp9KRmj8vG4nnHSnY8o437R6\n4oKBSW69fSkG1ATkAk8k01VClpA2T0xTtvzDJ9+aBDSwUYPzD1FGCCSf7tO3bsgDnP50kuX/\nANjtmgVgVtoBUbi3WkKkZwfnY4KjtTwvlxnByPTvUZjZeQfxoAXopLDJHy5FCuqqPSmoxVSu\nMnrS4P8Ad79B1pANZy8gL/M1OEe1sScHqppV2eZuHLDnmkVsszu3ynp7UABk+9IMnPy7abJH\nk7c/e5Ip+1thfdtXtkc0cKMgbmPekMHJZlRTgY7dKTyyqg53YPT1pNj7uPlPpTtrg5zx3Aqi\nbBtXacL945J/pSK/7skDDg4A9BSbGVsB9vcUIqbyQ2046GgZJIw2LgcHqPSo2Ui4WVeFHFKr\nHDnrxRISIUDfdOOnWgYrAKDIMtzggdhScLsKnbzx9KG2LtVWPutKVTy+G+XP5UAK4XYWYfMD\ngU1WVO3mex9aSNyFZhhgDxmkjz1boaBakm7aAuByc4pDhiwA2nrTdrRuDkFW/SkMYVsM+Cxw\nMUAP5Rt2eCKFjG3zC2QOcUhLQth+McYoaRSnK9sgdqYwZRndjcOu7+lSMnmAPtyP1pm5Wh+Y\nHeR+VMkZ8rtbAA6GpAd95sY2kd6VstJ8w+in+dIG25bnJo+XaF3Zye9MY45TG/GOzdqbhmDZ\nwGpy4ZgM4BOM+lRrGUzk8jpQDJH3lkZfmUcYPU0Knl7ieGz92oo2bdgnfT2QHIZuW9KZNhqs\nu/lcehp+QquCM5PGabtHyIeAvIJ703l0JPY46VQiWVM7GyC2MU1l+YDJx3XvR5e/AYFh/Kn+\nS0W3+NuuKliI1VGLEgkf3RSrHshGDkHnFP4YkpxIf4aZJJkDYOOhoGIrFmXdgrjjFKrFtzK2\nD0pFjEewqCpHQ0SEqwZhlc5JFOxVwjUjKnvxTELx4dSRg4weal3bpC2/901Luy2E4Xp7VACh\nssG/u+1DBjGw6BucnmlDF22EbQOtMZjFIGR8rnGKYEiq8SghghFJu+blSV7r6010HmZ3ZH93\n0pwVv3gycZxQA4sv3Q4VsZDkcY9KYBtUjr3CnrmnZ2Kufnf1xTd0ayED6/Q0AOSPdcJ/tckV\nIwUsy9MGolCsAQ3znvTmjTgZwc80hjdoC5Bxz1pHJLhQ+w/pinGPcMo4UA4x2pyxjzCX+bjA\nNMkjSM7sFsxk9KM/MwZvu0keVYcZANP8zzN2Vwvc0CHeWGU7H7ZzSxLuUZX5hzk0142jj2A9\nRkYpNoZASzDA5+tACFA8yn+Ee9G3OSvPPOaU7VYdfLxk/WnLIq5BG3d0FA7saweHADAbqex2\nlizbUHG6m5DsVB5Axk9jS8c8ZOOT70CCEfIcnHP6Up2rhei560woiqsgJz6VI2VYMV3D19KX\nUoTO4lgp2Zxn3qQZ54xxTHYeWTyBkfSlVWbJB4HG6mK1wkU7t4GRjBWk/wBYoBb91nNOVfl+\nccDrk0pjEgCgcE/5NLUY1UEoZejZzTox8rKeSKayqzFRn5B971pyq3DbgOKYAq/u8Djvmmqr\nMuSw3juOlT7lRQrDI7tUbBBGNvr8ppCF3hiiqA2ByxpzOImA25B4pY4T5YOMH+KkG0MH/wAi\nmMRVdC3zeYTwFFLCXk+6MKOpYY6U5okzu+bd1yKVmMhUK2B3yMUAIw29ss3UCp9y/LlcLjrU\nR2hiAwZ+3/66fCGj+9gjr8vNICRESOTPVT61HNHmQqW8w+vpTm+YDgkk96RlKNknI74pkjw3\nllRtGAOtLHtk3nBBxkYqPcNxwM5HSnIz7cBtpxigB6kRrw28N1FSMoON7bSOlRbt3y427Rwf\nenxIzKDJ1pDHSY+UtyTSYbcURsL15pc7skcBRnNSRfvWU7gQRQHUVV8uMgsSf60HdsA3Lv8A\nSl2qrYU5OcnNCqNpc9W6cUrjGYdoxkbXz2pGbkDPHepo97Z3FQ3vURyF2shJY5BoEEf3skbl\npLwRzsmNy89elKrLucBcDHGTTmU+Xjrt5pjKU8eJVCj3qCdW2nB2c96nk3SMQi8rwc0k6uYw\nGXk0xFdkLFdxGxhgmqsa8YGc9M+tTja7ZXqpwAelO+904xxTJKrF1jAVdzZ5+lNeZVUgjcvU\nmnSZXCZJVuWqORRvVQQF96YgjVpNygZjb5laoNsm75sAHjNTSN5PzNnYxxkGopghbh+B0WkI\naV8ttuSTnqKW4+Q8sCx9O1OXDPjOWx0PSkKbvmZfm6Y9afQZUvbd4WBlPy5457VC/wA2V3bR\n2qzeK01wRg57Z7Cqsqsr4YAA980FEseJN5xlFG2kYrtCEfMowxpFkQR7i2Oc4pVUtEN3MjHO\nB3pAPkmDW6RqpTn71N2jevlgM/X2oLMyDJEYU55/lRHwQMZLc4pDQwMzD5WLDd82ad5YfdJ3\n6CnRTCO3kWQZkLfjQu1UI38ntTuJjNjLIjb+3DHvTuW5I288UjshUA/d7e1LtaZcrz7UgJJX\n8yRTv2RAYNPVvMzkfIvT0NV2xu2kbx/dFPmVwEZDgFfu9qtPQCeNV5JXr1Xtj61e0+P9ySu2\nVA2PxrJaSedgGAVMADbV1ZJo9gjG4LwVH86Ymb1ndeSJba4TCvzuBwRWzbajIkTRIGEQGAzc\n4HpXMmZZRHtXzJduT7VrW8yXTM1ucnb86FumKqLM7HceG7rzowGw2zP3jjIrYbUEhjG0eXtb\n5XIrhtFd52Nu7bGYFuTitz7cjQrbyIc55J6VdyLHS2dx9oD+YV24LbienvThJGrEjlT7Y/Gs\ne2YRLlSNvQ89q0tMurfdNFfxyPGy5jK/wnsaQiaO1a3cTB1MTj5VB6D3qeG+DzhZEV1UYD47\n1DDEJIiVb5wOn9aijtjuYI3HQtUjRZfCTEOnzemc1ArBVckDOaLqZIVVMlnyBuqGORpJBxxj\nJ96hlE8ccUd4hV/kbA2j1rS8ox3DIX49OwrJuIwse5hgnkY7VC0kkqh4ZGDYwaQy3JZsrNlx\nx91c9aW0u2UACP5c4ZRVRrh5o13D9+p+9nrT1hmf51+Xd1XP60ii81ykhfeNr9AoFV4LiW3u\n1eM7QP4veq093HMwQEh16gd6WKVfldeYycUhdS79qEk80rrvLcb/AHqBVkkZlQqNg3MhpzfP\nGUIGAd2QabdWoNqZo3zMeo9jQUMXasgnZiF9u1Okuo2RozgKeQw6msxpTCvlKxZPXPel+Vtv\nmHBU9qCkmTXTO3EQ7DOKhtWaNJA6MHDcFqv3F8iYKJgEY6c1Sa6DRy5Yk5zUgOa4PCqdr+pH\nFNSNnPlsd/8AEDSSSgZJ5GO1MhvE5KZJ6E9MUgFnjWd1kOVCnp2qdpBDEsOfLUnNVJOWJ3nZ\n1NPJ3Abznv8AhQBeis9qrOGEqZxkc4qvNJI6lHBKE5DUyG6aFv8AR2KxLyQaRppZJNsjYQ8i\ngdhJGWOxDAEBjwBSW8xjjKvN8rHmOlbZuIeXCdlAyKfbRQXczIW2bVJ6ZNMYy3mg8zGzO08D\nFJdeSknzD5O1OEaq4ZJB0xjuaikUrmJgD3zQwIHZEjddxbccqKmad5YwrOAFXjIpscI8xDtP\nFStcRtJJug3ADr71AiWzVJFSOZtnGA1V2i3OojztTOeetWFaaaMtBbtP5fJCLkiqt9cf6mTy\nvLkPDJnBpDJ47wravGOnU5HSok3MCCuwFc1o6XotpqatHf3TWeeUdRwfY1XutJfT7uQi4+0x\nAbFY0AZk6rcBQVJ2/wAS1KluFjD5ITParMN09qvP+qHXC8moxc/b494AW33cDoaLsCCGAP5k\nkjuQD932qwlwYMG2yobqrDIprMmGVRz2OaasgVQi8Se/f2pgV13zFtjLE2clmFCtGm5WJdm/\nizUaQy3k58weUQcYHSrbRWUlvJFuIvF6E0wK8aySMI/Od4+uG5/CjzPsU3luHVypI8wdPSpY\nbTy1BWTEm3JOe9F5v1CzSe5nzIvy4xyBTYjMaMLIHJ3ZOce9W4PP1DfLJiOIjauT0qI3C42R\nxK+3ktntUN1NLdgKCBGT8ka8UDCZgytEzKQPSq6Tna25MhehHSn3US+Umw7XJ5XvTI4XYlUO\n09MUDF8l/OMhG5T/ABYp0yCTdgrtA4AqEhlkO5uB/DmkVkkY7Qdo7d6ZJHKqiMMSVC8nmieV\nJFxjBHU0TbdgBGR3XvUDSr5C5Uvt5bb1xQMerRhhl8+rUZC7mBO0dCDUO6MKCRjI3YA/SpPM\n823ZVXYSM/WgBi3knzANhTyajlYtt+bO3kU1lUj+468fWo5pGhkP7suVHQdSPWmBOsyeahm4\nHtVZlFtJI5O4BsKKIZkuME8nvmmRl5hLvK/e+WmMk3ks4ZcMBuFSxyxFDARiRuRJTZGFvCHd\nw8jDbkdqp+cCwZ4yyg0CuTyTGKQ+WcnGN3rVaW6Eq4HLHqMURzm6klZz5eBnnuBUKyPMv7pV\nzjgZ5NACxgSbieGU5FNuBHdNJGXaMY5bH6VPdQTWm3zVEbOnCj/GsyYLHCXUMZCcHJ4oGit5\nzRyNF/ApwvHWpRiXZncG6qf6VHdO5UFWwx53Y6VYmuLi8nQrtjEa7j/tUAMhDXF0ybCrj1FN\nm3Ybf/CeMVdvnS2tRFHPvvGOZHx09qpxphdoDFz175oAfbwS3ULOHjSRTtZD1pm2Tlnjy+7G\nTThC0TYdTCeoY96kk/eMrZ9iO9AhsMILOuA0mM4NXNqNYxFm3SIMYzVRWljkZlQkYzt65qxN\nJulzIih2AA29hQMbIrNCd3zofQVVmQeUF3fN2qXc0KkYynTjp9aJHWSDZGMuvO/tQIgKEkYX\nPqAfSkYGTLEggVEuVbcW4Y4pAu6EysxHOAaCh3EkjAJ+7x96lkwzRKG+RRtFEitGgG/nHQUi\n5X5VOGYc7hxSEAjMY+Q/xcZpZAJEw+Hfdk0yWMfKV4x1+tCsGbapyG5De1AAyrhNzqEzxzzQ\nELNgN3x0pqokkmwR4Udz61JExT5nGzHBOc0xMRk8glA+5RySKsQ/vIg0ZYjvmkRSrKvB55FX\nAyxrsaLZk9VoGRRxhoSvQHv1q5a6Wbm4RE4O3tzz2pLK1DyEpk+g/wA9q9H+Hfw3uPEmpWaW\n8ch+0SBECLlmY9OPTPes5SUVqCVyb4Z+AZ/E15Baw28k1zI6okcYzvY4r9Tv2Yv2abP4T6LB\nqerQRS69KoIVlBEA7f8AAv5frWf+yr+zDafC3QbbWdZtUfXHTKIyjMPbd9TX0bJ6qc88n1/z\nxXDKXMzoSsAJySB9ff3ppY9KQseMGmlvmosaIA3Ufzpdx6jrScg80je1IZLGw6nrS5Dc5waa\npwMEUeXgZ7UADKF560q0fyo5X6UrgODBVPHNBb5femFvmx2p+3ueRRqMVVyM0oYMnzCmK1O3\nDJ44pgODbmCkYFNcEUgfIpwK9zQSKfugt0ojG1gaMgtlunpS8fTFIYbgrcjikY8ZJ+lK3IyR\nxS4O3ng+lMBDwoxyaOcZzg0ik4OTik5bryPWgB0ZCsAaezEtntUYbt3p6NhsHmgB3mbc8cUi\n42kH8KFzuOeRSrhc8UAODcYoU4AyKYrZzxUn8PJoAG+U56elHmEqfWk5boOaVV4PrQFgXO3m\nnL0z3ppyBTsfLigNRWOVPemKDzx8tKPukenWl3fKM1SAAdzccACjcQvNIOOnelXgjPIFHUpC\nhhnHSl3Z6jimLg5x+tIuSvoM0x3Jciio/L96KCbFRWC/KetLu55/Km5wcnmlHOTWRQrbeMDF\nHmBlPrSe4pNpZTk4oAUNu5H4UrDd1o2hcDtSP8tAhO3NLxjrzRuPUYOe1Km3lmGKADoBjrTD\nndkjihnCnNCMcHJ4psBzdBg0m8lcHFIPmXcOBQo5yakBvHcYFDSLtwOlKcM+M8U0Rjd7UwDk\n84pcnYC1D54C9O9IvJIPShuwC9x705VG7AoH50fd5oAUkq2MUjE7Txx6UisQcmlZs5waEgG9\n8449KfuDdqCflyeKQY9c1RIfKy4zyKTd8pLdKXb5fPemtlsgjNIroDduOo4oRcE54p2P3Y9R\nTffvRqLoLuO3pQuPSjdu60MwCcfeoEKrbT1pV4fJPFM45HelXKqe9IfQkX5s4HFNkzt60buw\nppBUc0XEIzErgDmj5lx0xT93y+9NXBFABwwx92lOFA5zRu280jZ69BSAGzwMcUh7Aml3E5wM\n0jYY4qrAOZR9BSclQCfpQ3QgU1cMvJ57UwHFcKRQme4ob7vvSKp25zzSAN3y4PrS/ecDHbNG\nA3XrTt4Tp1PFADVk+bIoZty4NO2hs0xulAhuNzBv4aXIXpSqPm64FNVTnpUpgK3UjrTGUqBj\n1p2Np9aVmG3+dMB4O1gTzxU0Z6t0ODUDSBgDinqdx9BQM8e/aK0qCS0tLvazS4bceo2jgV8m\n+II1cOCcgGvtT45WaTeEEl2gvG5HvzXxnr0e2ZkZfm700Bwt8p28f3uK5++YyT7AeF7Cuquv\nk42574rndStxHudRtbvWhDOeu2kgkYt9096pTOY4z8m0dcjrWjdxGZdu/K9aozRFo8DJIq0Y\nPcypN6x5Hz85ziq8kr8nG7jkVemyyjtGP4RVORSuWX5QeMVYipMy5j3cqewqPb5zOOpPtRvx\nJk9F/h9akG5sOPlY9jTAzr5AmEGPz5qt8qR9MnONwrRu9vOSu5fzqiYklbZK2zjIpi6laRWP\n7uMhQec5pfLLpycFeMk0rQFZNgfkc801gyZRz97oe1AyJn2jaeQ3A4qpIzMSCduOKtySeXNt\nK4jI+91waiuYxJOBggkYzVEsgjUqp5wW4z7VGyFWYAHf1yKlWMYYZYgcD60jTeXHwcccmmMr\nqq7STuTcc0yaMrgo2ec1YkmTaocYOM/hUcm5HZWYKD9ygCpgrcMQckjmnKRtwACP1qSZTG42\n87hzTNowMjapHNMSImwvOMEc4PNII/l3+Z15qQFFUlmxxgZoeMRqpBB47UEjFw+BgkHnPrUO\nWLMAvQ1ILjYMg5bPpR+9wyg9ecmgsibKgEngccikXuNm8dalcB1BZsY6g0ZK4HbrQSRyZ43A\nB6VW8z74xzikYhSNh3U+QrGdzcD0FMZH8jXGEXC9OfWl8tUY4IU55NJ5iJkSHBI4p8UaSKqk\nbc96BE6s0YySCvQn1qs7BVfaTjtk9qlZttuFUfvM4HNVZt7su5cEHBpjQyQ7lDNkYHFNbcfm\nD5BqR8nJOMdAKgXezABRjOOKXUQTMF5Q5zxRtbABY06aNSMAbCDmhgFYc5PemUhqblDZBNK8\n3kL2bsMdaRmVslmK9himNGImQM2R196QhCpVgwbOaazeXlhyD275qTyxtbZzk5pjITwAN2KY\nWBjuZHB4pAp8xienr2oTasY4PXlaVfmOAflB5oFYaGLSYUFiO9NZjgr79qVmI3+WSuOfwpVJ\nZ1C4O4Z3dBQAzczc4wR2pCTjhSy55NS7SsmVIDdKV08tsE7hjPHTNIZAyqrc8H2pHZVPJ2nv\nTm+RiR19KZ5O5hx7+1MNiQSDgbfxprP+7z1bPSlaTMgJX5elOZhJkbRgc0DGq2GUSf6wnIz6\nU0yALyASx4U9qklKTKmTtK8j/Cq55wR90HrRuIfuH3gcdto9aVyeGCmmgBpQRxjk8daA37xi\nSR7dqoYrLubAJ9TTc5XqSc5zTueMHA7nFITHuOdw9BQDGIwbIOfanc7l2Hk9c0sfzjO7Ht6U\n10xyGynekAu1wrAjjPFLj5ck5b09KUwsqhS2MnIppUxsQp+Y9aAsMO7hc857UrIEl684p4YR\ntubn6dqRlVskNuNJiI8b4sjg5oZVYGQDLLwKMdAflpcqudhJqhCbNrZHznHNJIG+X5eR1p27\ny1A7mgsWXDHIJ6UCI12rJljx6U4tlgvTJ6d6GUKyo/AHzCmMW8wyE5BPB/pSGKyGPOG4WnRg\n9MZb1xTRkN1yT2pdz9xg54pgSMAwxnGOtIqZ3EcHoDTW3rLyQRjnFCKHPoopDIzxJyC6D0p2\n87Qqqpb+8DQu7yz82Bnml3bGzHzjnpQAKVUuQpxjBpu3cvIyeopzcrkDhjkgmmKuxiMkH9KY\ngXaqn19TQVK453DrSjayY9TQvykjHOOKXUYhxt3Aj6DpTwd0i5P3V5qJcKRkYB7Uu0jJX5x6\n0xD2Ufe79qj8sO2FYE+lL5hXIyM5xt9qTcN3ygKc9TQApj3R7lTkdaav3iOvGfpVhW2tgHcO\ntRtEd2Rzn9KQEZVfL5bnORinHvkU3/lnjIDBuKeuGOSSeOSaYDWBjhDZyDwAajX5myvC9/Wn\nYO7BbjqKdKu5gy9hyBQBHj5gq8nPBNOlAX5DyeppGk2npt7k0kk2fkCZJH3qAFZN0YUcKxyK\nUssfyt83saj/AIVUk9fyqRAobLfOwPGaBjQejoOPelmm+UFkJH970o3ndlk3SNxS52oQz5xx\nQAqx7sOW+TH3qaq/MQRgdQ3ajzflZVGRjqKbljt5IHrTJHqy7Qx5ycZFLJhcqTz1zQkyRxnc\ndwHIFJG+5pCV+U8DNAw+ZGxjCsOwppVlDA43E9PWnMxWFxu4U9Kj2uwVhx6ipAJoQWBb5B2F\nJtcqw3cetG1mOzO7ng05mDfdJwOcetMBvltvUBt6gciljVeRgxjORml2hsDcQT07Gl3BhtP3\nweh9aXUBP4QKGVlA7UrIXwGOH746UKu2QHOKYw27ewB780RllcHPGfvUjsY1ZWXqc7u1IIla\nMBSRk9KQgMjKCQuFbvShhkYHamMdzxhW3beCtOVg2WXAG7FMBV2jGc5brmhjzwdpI5pu794Q\nVPXg05VOzH3jnAagYm4qvHJ6Gl8sbjuboM9KNxMgBGAvU0u7evI5J4z6U7gM37sM6nb60mQq\n8jKZ420/5/LJGGIPAxTdrK21wPm54oEL5Z4AAUnnFRk7ASeecURqPMIOcg43GnsuNxCZGcCk\nAkalsZ+UU1kRmb5Tkc4pdxbHfb2pz3DbhsUF+n1oQDY23P8AOvy4yMU1QzNv+6AeadJvVShG\n0df/AK1KuJIxhtoP8NMYO2OGxg8g06SQjauFAP8AH1pI/wB2uTz2FIsW0+Y7bsdBSAbGpXHG\n1OuD1pyrhi2Nw9aafmjyKeCQy4O1Mc0ADKCqsCQ2elKzYbBGBj9aZ8pcbmOSeKVkUSZZiuel\nAg+UKvP7xjg01WCnBOVP6e9KqkSjIB96Xaw4YjA/lQMY+6TMcakAn86RlRvm3bccEVNu8tRJ\nj5s8AelNKhjyoG45pEjVYmXc3BA/OkVTIrZX5SeM0rydDnjOKarNIxx90GgYrZGPmyOm33p7\nYfopUjrTNp2sQTkVI0JZmYenJpANkUZDHPtim7gq5A69acSBsJHQdu9NkU7SdvzZzgVQxTMA\neCCO1IzhsPjc2elD7NyYUJTmPySfKCQORQIayj5wmMdfehHVgFHbnmlZlWNFBwxHBoWPbhic\nN0IoAbO298j/APXQqn5Qozz3qTyo1yc/iaI2IXOB6ZoAFhiMm3djPrTJFOQg4Ge1SMqqq/IS\nfWmyfKQQu0E9aQDWjIUktk56Um0Ag7SX/lU/l9XAJb3qP59vzLt55o6ANYblK5yOuae7BYwo\nTt+ZpJF4B6knHFNPzYRzlQegpksVWbbiQgN2XFM9CMMrU/zDtZcfMTjPoKasYYcYABNBQkeY\nySAdxp7CNc5bL9cimqGj437n9KczbQAVyx9qQxUVdwz8ydaY3yMrK6kDpzUisF+Q8H3HWoxH\nGy5245zjHSmA7B8t5QcL7UbUEaSJ1PrTo2ZiWb8B603Pm/LjDfpQIWONXbec5zj6mrMMYXcG\nAJ7g9qb8ixBeuCCT71F5x+ZurscA0ATsqxTBsErjpUEykNw2Gz2phk55yXBx1owUTOOp5pMA\nZDK+zfyOSKZIojwSSeeAKkOI2yevU01k2t5hO49VWmKwqqzJKpIz1Bo5VVU5NOZSzKpIjJ5I\nprDkqScjutAwZPIR2H3sfKDSorMqMdykj5higMG+Ujp36809mWOQZ3MSOaAGtzJgBiR3p0m2\nNgGXD/3aTbtYjO0N/FmhVRVxySOm6qARlwCu3Df3jTlJOUBIbqTSb2MhDZZQOnvSRLIZCyuE\nQj+KpEPRi2BH8zLyaMq247ajRVzkMcg5yp4NOzu3oflY8g0Ah3ktvQZAPYjpSLtbeduQp5aj\nlJOMH5etO52hUOA33qBsZ8pUsjbeM0i/vNhByehpdrbj2x2py/cJHGOlBI+RCvyvxz+dRrkY\nVBuzyRRIGwjM2456ZojLqwK8EjFA7BMx2cZPpRIwjaMj5jjpTNxVgp9anb96p6Fh0FAiLLHc\nr9eoam+WJChkb8alkjMnLcyEYK9qjMTbPmGT/dHagoe0YXrxjlcUhZmYYOM9akcGPGeW21GM\nlVb7p6GgB/mBWOYy2OgFPWXzY92dvoO9NDMuCA3X7wFM8sRyMSPl5OQaPMB3llnLFuSPwp24\nMy7W2rjHP60m5V2lzjjpRGFJcADg96CRJYlVsJuZe9WcAMOO2ODUKttwAN2eBQqjDk5+X3oK\nJYh+52fKW3d+v0o8tV+Utz1qFN0iZCdG606RCJmIA2nvSGK7BhlQRk0/gShyoHovrTOVYD+H\nvS7SvzFdw6c0CHJv55xnrSMwjTC5LZxyKWNGWPupznaak+baMtn1AoAR9y8+2KEkBXawwe5p\ndyrnpn+dJgMAM/N1pgM3LFGSPv56Y7VLGrR7WYFYj3po8zkuoK4wDT0O6M7zuVRwKRJLN80m\nFOOOMUz5disPmJ4201HZioB5xTmUqu4ckdqYDvIHADbW6k1I0J8tSGHX71IoDIDtYsDk09V8\nsE92PHtSBjVQRycklT0qaSRSOBt7ZpOMYVskcGnSKuwK65HqKCiFz84GOD196kijjRjtymaU\nxK2McFefwpdpYggkjHT1FMXURm2gjv69zQPmRVYYJHH1obJVSBgg559KdGw29NzD2pFD5Fb5\nBkE4xgetRxqdvL/xYz6GneYWbzNpA6UMyLJlCNvv60E3BtytgBeTyT3pLlT2I5PIFKzqsgxz\n6k+tIyOdxxwOc0agVDbN6EKPeojI4O0Hcx9e1WlY9Gb3quu9nZymMdKBkL/NNsByAOWqq7Oy\nkKMjPWrEhZmBRdvr7VE6kZU9G/iFMkrSl1VVC57ZpnBVucY4OaftaIZBLhjgf40kiptyBkg4\noASfDogY5UfdqF03SbuEUevrT2bLAEUzHmKS3zbT0bvVEiByZiTxxxim7ismQc8d6dErfOcA\nADvRHDLJC8oXcUO1iOgpjK02+TJDkEVBg7cnHXrn9KdGvnNw/GefanTwgMqgcqeo71IwGWwg\njGSc57UFQjHkllbik6xklsHd9709qczFQN3Dds0gEXDKSw3R5z+NNWT5GCnDddxp+0KjIRt7\n+2ajVXaNWVd3qBQMUsH+dhggdaTbtUPjcOvtinOzShflBAPOKarlo2AGADxkdKQwb5uAAqHG\nFqXzPvKrY4xUe1FjAcfOT/D2oXb5r7SFb1NMAO5WQdAOSM09lUblPyvjI9KiCBlYAjavOc1M\nWVo1ODubv6UySWxhCzRnkkkZDdKt3ly0GoYVNp/2en1qGzbMcu8hcdM1Esku3oNv94nk1QFy\n3uBbrlSXY8GrNjdGzVwE3GRvvA8isrzlVgPXjirkFuP3eG2oeTjv7UEmrHMfNOZCXxgc1vx6\ng32OI78leGQ9a523ZVIER7kEP/jRNvtbpYxLuSUfez0oEdpYXyyrtOQew9609PimvpDG0u0o\nNxyecDtXJWckp3xtIGB9ONvvW7Bdta2abuT90SL1zVIho6K31PyrqKMnZn5WPtWjLdpbuWtn\nEkbHLLXKtdCVVx+9bGeOtaUEkeI4w437eV7UE21Nfz1aRCI/Mwwb0BFR3lwNzPEOM4EYPNQF\n4VXy51LvjIwcCoFu4kcttJCnGaTKHM8yyYlYhMZx6UsMitH8oJJ6YqzcSB2VCvyMAc0lmqr5\niq3OeARxUDGeb5PDrye61OjC8iYIGjK8F/X2ptzamSESqQrr2J5q7Dbn7A7xsWZR82BSKMwf\nLId8edo4UDp71EsE7rL5cTMFOTtGfxq9b776H9yNvYsT1x2qS3urrTZdygxv+YIoAqWd4ZJl\nt5cIc/ePb3NS3UarM0andj+6evvVW93XjyMF2bmyx6c1W2N0L/KflJPWkWloSsrRqSyg55Bq\nElryZPLTB756VI6u2xQ24bepqx5YiATdjjJYUhirai4kYLNGsijpI2M+1V45JIkljjRQ5O1m\n68UjWccsbE/K3Uev1pq5jjwvRj1xSES+dHbOobEijr2H51LBpUN5JvsZULE5MTH5hWZcM0rG\nMxbVH8QpkNulq5mgdklQZVqaGW7hRHNLG3DDjOOvtRIrMoY4CqvX+lLfalJq0cIaNRIvLOP4\nqbEom3KflB7UgImlLbV3iPf0FP8ALwQrNwpwTUJjC/KBlwcgt6VJI3kW43fPuOd1MoWFAzOM\n8A55ptvj96ZpNjYJRl/lT45PMUrt+lRKqM3yZIHXI4+lIQj5aPrx2NKsjSBRtxt6ZokkVtxP\ny5OQp/lSo0TOsefm6/8A1qAHbnmy27y8flT5AyQsrcNnPFRxyp84X5gTgEihd6M5ZduBk/Sl\nYC/pWpXOk7mtJ9s0o+YAdaR7OW4jkupSN+emMnNQFUmt0eB1Vv4s9fwp7R3MdgWWbBb+E0gH\nttjhjS7lXcWDBO4qW5ukkC7T97nkcCsUM00wjmPzjn/69S7mVfs0oyg5osMmu2F1NsztGOXF\nMht/lMMreWkb42jv70yRlZgI2OR1x3FOt1kufMVYmcryCRzQIZdhbYjay7c9u9QcxySecnmK\nRkc9KfcafcRPGZoHiRzkBu9SQxjks33hgg0wJI5RJ5XG3I61W8t5N7sfnU8fSrltp8k0aqIy\nyk4DHtUa6c8sxSZWj2nGR3pgU1kcZ6AAZJBpkMm5pAW3HPH/AOqn+TFbeasxbrgCoZLjy4Xm\niTfPjG7HGPSkwGQ5XerKRNnpnGRSMyOqorZkU5OT0pFmjuraOV5Ajk4KZ5z6VBJH+8AbDM3V\nh29qEBanZBMGJGDz7CqPmG4WXZwf6VOVi+4QSigkCqiSb5HVE8sEY60xjrf97EisMH1zSsxV\njkbUb5SRVdIm2SbPnZal5ubeIA7cHJHqaAK7bl2bn4HSljmFuWwm5mG3FHCNlm3LnBWqc1yG\nkEgB3D+H2ouLYmunJjXjYc0jzozBA5Jxyaja4G5D5e52GduahQlUIdc7snjqKYC3UwbcQ2U6\nHFbPh9tF02xuLjUGaW5bKxLyc+lYDbFjJyFTOOetNYws27GHBB/+vVDJkk3SMZVKhjxjt7Vb\n1RrT7KiQowkUckDqapTsrRyTFvLhHAVf73rVZk8xVcSkbOc5pB0JZI2hVGJ+Vv4T/Kobq4mZ\ngm3BA6L2o+2SRw7DJuizvCMORVW6uJSoYD52OTjrii4iSSQFQCMA9fWiSRA21fkMY5kH8qbM\nnlqZOrqR8vrTdhh2yZ3BueOfwNFxiG5OxTM7OrHoTkioJ53dxsgaSEH7vcn6VK7+dMMIAvue\nRVi3HmXSjefL6/KKOghlvHE3mSSHbKVwqnt7VCsaWm1JMl5PmCj0q9cYuVPkx4VWyc9fc1Wz\nHNI0kTb8fxN29hQgGSW0bTZQ7c8nNLHbnzFIbBU5zUkY+ZyQSvQcc1aADqzlQpjHamBFLG8y\nE7vMAbgGj7P5Um9hhgOMH1psckkjl14J6e9K7BWBcEKeCe2aAEKhWADnp0BpsisFwwALHqKb\nbxb8p91N2PMNN+VlKs/I9aQDJG8sAAbsHmq7NukYnKgrk4p80exQoG7nPWooo5fM3BgEXs3p\nTAbHtZo8Z47EdaPM3TfKiqmdvXp7kU6aVpOgKlvunb1qNVEiqVwpyc+tBOo5o48nYPmzjOeD\n70q5kclzhE65p27y9ufnLjGKRVZoj/Cc7cUDTBo2ZSxYpuxge1NmjImVPu7hw3pUkYaS3fZz\nt4ANMZZFCAgHI5yeRTQO45Y18xcNt7ZqdY0WMqybecDJzmo5YxHCGjHX5Qx6k1PZwbVVZBwx\nyCecD/GkMasBUrhcDoe5z61dFu9wASC6jjIqW0sGkuJXTr0C/wBf1r0HwB8OtU8UXUNlZW0l\nw1x8u5EJwQfT8qzclFahu7B8Pfh/eeIL6CCC3klE2AFAySe34V+of7Lf7Kth8NdFtta1y3Sb\nWJBuihYZEWRwRT/2V/2XrT4ZaTa65r8Ec+tyKDFCQGEK9u3Wvo6aUsxH54NcEpuZ0xjyhJMO\nQOmP8/yqMzfNUeTk+nb2pOd3PSpLFbd1p5+Ve2aPvAGjaG5J5oQwH3eab24pysOnWkPHWqGO\njkCggjNLuJAHem7fQ8GmFyrjPWkBPS7cAVHg9c0q7upOaYD3YdhTuNvJqMfMdo60rfe4phqA\n5UYHennp600AjJpVJwRikIcmNhxTfu0i46g09fm+lIYjgL15NO3ccim43Z9qXJyOKT1AXkgZ\nNL90gmm/c+8aduwcEUwsAk6+lLyy4BxUMjBe/FSCTcowOKtbAD4Whfu570ZoGdxOBQBIOmR1\npfmXr1pittJyKTcWHzHipAkU7TzSHLLRu3rkcUg3D1zQBIGORSK2OO+aRWG3njFG8biQOKBj\n29jQGK4yKav3cnv0peWHBoGP37l6c0Fgy+9MLAd8U5cnBxxTAXbgj0oJwuRQrcnPSkUZ+lAC\nL9MUu7dwBmnZJ4ApNu2gpITa/wDdFFP3fWigZR3deODRt7ClaQ/3eO1Ij8HcKggUfMu3GKRs\nevNHJUkHJxwKrxhvMJbr6UDLDDpg5pS3PSm5/KlbuaGIUKAue9MfBjwaDJuxxjNNPzSUAM+9\n0GaeSdoBHGaf5fI4xTZIeMg0wHbx07Gj7vem7emRikdgv0pDFUDJOKdwvFIvOATTR8u7PNCE\nP4C5FNPfHemqxxntTwdvJ60+oBk+n1pgYkEY5pwY9D0pTgNmkwGuG+lOx8vahW3Lz1o+Xb1q\nkA1vmUAUFduMU7cpXFRMx4HOaliH7iz4PFG4Mwo2dDRgKNzcUDHEdSOKA4ZcY5oZh0HWkCnk\nsMVQCso25FIcKOetKrLnPQU1mCrzzQSO245zSZ280bgwG3k0KwLEMKkBEYN35oLFsZpFAwSR\ng9qd1HAoAXjnIoPyr60xmZVGaeoPXtSe4AvTmkf5sD0obqcdKFTqxPWmAcqp96RvlXA5p2OO\nlIo496sAOFx6ml+XbwOaa3zdKFUqtIB3O72ocAKD3pNx24x0pVYHlumKYDdzbScfSkV/bmpF\n5Uc0mAuCetIAVSuM9aYfvEZyKfnuGyKaqntwKTAb8vrRz68UjfLk4zSbeB1qQH7u+KbkNnHW\nlQ5yuaPLC84piDHNPXoAabuJOT0qaNRuyaBnLfE61F14MvcruKqD+uK+MvF1uqXDkMMJ1/PF\nfdHiOPztA1CMR+YWiYBf618Q+NLXbv3jB3Yb1poVzzPVoS2GHGehrnb5nCP5nzA8DFdNq3+p\nO0E4Nc1fKZIdp4Oc1oiWc+0ZBBXPHaop2Hpgd60JAFbCnJbn6VmXjbZWVhwe9WZ2Mq52qxVT\nuye1Vpn+UqvHHerLIo3FThl5IqiykN83RjmqRLKqqPMBYjdjBGKjbMe5XbK1PJGwkZh1IqrJ\nKxhYMnIOCaokpTeWpJCn156Um3zgoIUkjIbtUxXccgduQTVdsQxlv7vI29qZJA0/zFSOOgY1\nEqvIuHwCDmpGbcudnLDNOjXcApO1V+bFAalc5ZsBRtHLZqNm8uTCkkds81JMp84uckY6VECz\nKG27VziqQ/UYVfa8sbZU8EVWZN+Mn5ugAFWdxj3kthTxgdKapSFssNxI4+tNiKc2VjKAZwaG\nkXo/zMRVi4kM23KAf3vWqrRhZVdeQRyKQXGljkBuvUYouG34GfujmopFPJDHPU0OxKY3bM/x\nYpjRI29mRCgAYZyelJJHsZdvSm8tsKtvO3oaCWjcYP1FMlIjmVUVRkb2G7gUNH5iAqcY96QQ\n+Wx3fO2OOaDlfvHAIwNtIoa03CsUyucfWmSM7FuOG/MCpdu6PaEyQfwprL5fU/N60xDF2rtw\nCFHGaFZVDMRuzwPamSK0m3KlCPU0N5a8jI/pQHQc0nzBHUFuz0zzG3njOOhpC3QAc54c0qoW\nVmGCQefemA9ZXK7pMHb0FQ+YW5IxnoDTmUsodTx3FLJIVjyBigBjKVhOQMk847VG37lhgk4F\nTs7n74yuO1Q7jtkGdvO0UBYbIw2szN34Heo2mxFkLjtUjqJIx6jk+9RMm5cjjNAxzRssasTk\n9cUuxcgZ3M3U0qMQwAGQB1pm48qDjvQIY6Fn4Y8UpyuAv3vWlkcqMkZbvihkIZcHcpGc0wEb\nKqCG6nmmkFCxCk9qN0nJKjZ2FO2ldvOO5ye1ACsRwVG5SMGk4bqPmHGaFddpxwpPApm4rkEc\nDncKYhykbh8ucGnM3zEKMnqBUY/hIbg0LuWP72G68dTSAaxDuXK4P8qjGdpIJUdAp71I0fnb\ncHHOSKZ5YWY5fPpTAf5paLOzHGPxpsjMVVl2+4pejc8qf50uU8w/Lzj8KQ0IyrwdvDc03auP\nu49u9LgrjByKTerICw+c0FDQsoj3cAZo2/xbs809pAFJYfJjkU3hQpA69qZIpLLkE5B6U1gQ\nAWHHc07BZct97PAomYjG77ncUWAiZxgDGCTxTd5bAkO1SOFqaRVbYw4UHqaZKvmScrkAcNQA\nbjuAY5x3p8bLIzEBjjo1RDLDpg9Mmkh3x5Tdhj1oEOkXMJ6ZJqMtwoU5P61JuCkuTlfSoz8r\nqQuN3NSBJJ/qwc5buMU5fmXJHlEdKjlG1C68t3pzKX2uvPs1UMY26Nich+/FPVjjOMjvTI40\nWRio3MQcgmlGdoGdtMQMC7e/8PPSlkIZQGGPemLtDkn6Chd+WJ+Zf1pC6iKoVsgnnpUjfPIi\ng89D7UkZZm4Hbv2piuVVwPmJ70FAFXjGdxPNJtKsTuwoPelkQIsRzh880SY+VOMsck0wE8ws\n+AuM9RipA+HKgAEcUNK3puIPBWmuufduuRQA2RTxuzu/SkVnkXnBJ4HtTlkE2QDkd80jL8oV\nTgZyMUAKE2tnrxg4pBn7x+90AoK7I2P8WaSHEjcnbSEDBfM65HejhfM28BemKdJGylkQqQR6\nUm7fEHYYwcEe9GoCeXukVmK7jzxT/LUblZS3oabxK23+I9KZIsigbmwFOB60xhuC/JnB/Wnc\nllbBB6D0oXc2VIViejelOaRmXbnOKQDceWxOR15PrTf4iG+VPX1ob7uPxIFI2BIoUlkxyDQI\ncuxuDyR0pvKrjGDnihWG1uKBn5SAfpTAcuOUbazjkMP5VGqtndjce3t7VIw8tSQBnrQqgx56\nZGTigBvK7i3AA59qXO2IbcMDyaOTEVXgdeaax/eKDxkcUtR3HMxP3RtB6U1Y3KkAD3zTRG67\nlbj0NLj5XBywXgetAiNVMOVGADUiN/D7UQqsi4xt4zzUalXXg8CgCX/lmWK/KPzpFHzOy8E/\nNj29KOR33j+VPUnjB3HsfSmBEuQ2FQlX+Y5ojby2Y8596WNyqsSMihtvkgE/NnpQAzlvnQHp\nzT2kDBUYfJ39fpSLmIkh+D2pPLMijOAc/jQAM4flVwg6DvSZLJnoM96VnbeUC80EbMiT7vrS\n8wHbjH8q5JbvRtxkt0oVVAUB9x657UjqGwoJIJ65pjBmH3sEn36CkKNITzhh8wPqKPN3bgeF\n6U5H+UqV5I60uoiMcrkjBPYdaQoSSWOG/SpSNudg3KB19KRZGK/MnHrTGLuVRknt96mLjnnO\neQe1O8xWkAxuTFCq0hYD7oBoAMs4HdT2oIKSZxwOMUyJWiUMz5OcgHpUvJkLB9wbt2FAhowq\nttJ3evakLMq4YEkdDUjMix46gcZFRtuWMfPkUDEDho/kY78+lKsinJfvxnPSmsuNpxz2xTGY\nCXy2A3DmgCTywrA44+tIVEchHQEZFM8xSzSk8gYpjOHXBzvbvQIfNJt6nPfk0iMN2doOahbZ\nyg5I/OiGYtkHHy9j1oAtw/K5z0PY0x96TbTznpiljOV5GMjIp7tJ+76e9AxOV+RiDjpQpESn\nJ35/hpi/KzYHJPU0+RSV3ZAcUCE5jUBlzk/e9KNvljk5Oc7TS/MV4bBxyaYzDbnaWXofegBX\nOOcZFARW+83B7U9X8slTwMZx6UxFDSSAsNvYtS1AQrJGyoMZY4B9qkkG6TCn7ox+NR7huHzY\nI4oWMmQnpu79qAaFO0gM3y9vxpVYqxIOAerdqVkMbAj7oHNMXDKS3OT2pAOWTbGxJ3c43YxS\nhmHDD5W5JHakbnBbhc4K/wBaVflVkAI/qKoBFzE2B83PfpSkFdxxuakIbg78rjGO9J95QF6j\nn3oAakhjPCZXPIanOodjzznpSt++T+4Qck+tJt8zDHG096AHsylQdoCimNuZstS7V8tgnK9q\nFxuweuKBiFz5mwJyOjGkkmfYA/Dk/dqRY1lX5uG6mo1yzHI3DPyqaBByrE5Kqeq0rfMuCdpP\n3TSbiANzYOTkUEln2bs0ASJKdvzMdw43VYZS0BBYZ64qkq7cjd7U6NREAGc+1ADUkCsTtzxw\nBQ3ySDjtndTk2eY2R7jFRbyG4BIxyKAsPkbadoG4nqaVGJ3jGABxTY1brj73Slh2rLtP3u9I\nBWYLGny4Y9TSNiOTGcr1AoZQWfnik37ownQdM+tAClS2CzDZnOKXlZG+bKN6dRTI4wzY6KtL\nuXexXjHU0xkiL+9OTmMjrSLGq5YHC56d6aqFvmQk55FL/rmJ+6B/OgRJgMrKBz1NQLMG+7wB\nx0qxI2funZjg5qONVRixIxjj60AKqhQDtzjksaAqyAkHB67TSLGWLktluvB4xQ+WZQBg+tSI\nPmJDEg9topdz7vLPTrmnJGEJx971qPewJbHXiqBDvLUsMtuLcA+lEinfs2kMKT5WjYlsAdl7\nUrOxwUfI2/eNAkI6SBmjZecbuOopsbFVBXkeppzM0gDhisijP1ojO4SE8d8UDQbfLxk5BpVi\nEZJ/iakR38tMjKsePYUrbsOzH5lPAouPUCxbJj6qcYPenH5ZNx64+7UaAFS44PXBNP8AKdth\nkG/HO0UtRCfd6nKkZPFIu9TkEHPb+lPZdyqdwLZwG9PajKow2HjOMH1qugxQ20srD73f0oVw\nqnDZx1pfO/eckbehHfNRtlCW2f8AAaQD2boNvJHBpigMSuO3PpTvMTCBVyc4PtRIWMrJs6nA\nNBPUPs+2NWOd3Zqf9nEm3DEP3NDPmJEBL7W5XvTvtPlvIrDYcZBpFkc8QWNl35H05pit5aBf\nmAxxUkzibkdR3prN5jKW5FAgVWKjb17+lOXLqcOFYVKqhcoDgHkmoNqsxI5ApgPO1yHBOV9a\neV85MMdoznNMbDr0CnHIowfkRTuP92gAbeuFX14prMwmUyJlc4OPWpPLY5kcgZ+6vcCkhUqx\nJbrwO9AwbcPMJClB/E1J13cZyfvCpPmVSilXGPmNRK0YkIVuW60iUSIxPUYPTHpQmM/e5Yct\nSNMFIYjbg4+tHmZ+UrwecLTFck5aOQ78D+7602FtygEYwPzoZY5sfN84FMJLqAOB0P0pDuWE\nK/KpbPcikbduO8gA8j2pmY4pPLZu3Ax1pQxaP5Rk55U0xD2ckgsdyj9aGkLrsCmPNNzlcHlf\nSldyYwq/MP5Uh6j4kTyzgbsUu3gOOUUc0DCqNzbUPp3pBEduPM79BRqMe33fvZyMj2pihl/d\n+vel8zyiu1Ttxz9alCKwXfnDdqZIFSmAMH3FLHJhiCvJ+Wl8nbGY1+UZ79qX5RH13mgY5WZY\n2ToV4pCxMZBGGA4pVIWNt4w2OTUalgyBQW7H/GkFieNcIpTA9SetSufnXb9wdTVWSNgMgcA8\n4NTLjGEHy4yaQx5UovPzF/yxTVd1ONuwr/KnMcgLuwDzUsMA++x3j09aYiP59hPBz+Yp45JY\nqSMY49aVlZXLDr0A9KQBgy55VT8x9aQyVQVUJkbduTUflbtoCe/P86cWiZie3QCiRjzmT5ex\n9fagkZIymQ9Nq/w4/WmO21fmyR1xT5AXkBHG0jIHpS3A3q2SMnp64plIqSKI92Oec49BUEjt\nKymMcdyKtKxyRjORgmoJkLLnPloOAB3pgVNmzDE9eajjjLuW37QeDmrAhBfGcbailJWPIOMN\nnpTJZBMm2barZ29KrzMYyPl3euKmky0m9lO/0pj5VflGDn8aLAQswkxu6npTGzI3lqMjqxNP\nkGPmwQR3pYpRHuDDdu5zTEJGyldwzhTxVm1mS3sbkSuU+bc3uKpLJtDbjtUj8qivI5GdD5m8\nEZI9KYEDSL5jFFwn96k3HCsz98ZxSKwVdrDP0qbCzLtYZHt2qRiKpUuF5z82KZcKWaPPHf6V\nJuj+8xbjjgU7ygEHzbS3QN2pDK7becNknuaQgg4UnG7lc1JKqlezDpkUyRR90IznOSy0hBLi\nEsN7FzzmljjbycocHuD1pnmLhlVT65NPEjfe9BTGEg3R7U4K8470scRZTuABByPWmK48sknl\nuKd5bKM8jPQ9qYiVtps8BQz7ui8UrSeWyKRkY6UxSoLD+D1pjRnaJDnOfvGjqMn2FkLYwc9K\nbDC1zODj5RxSrvdtqqc4pA7lQp3IO+OpNMQnKyMM42+laFqvnW4BUkoMjbWeUG0FQdhOHOKl\neKSEqyNhOg2mgRqi9h/s6e3mXL5+WRuMGg7PJt4FDPcvymRyBWbJfStIv7sbRzt/LNaY1aF2\nEVrFiX7oZu2etMVh8l48MgVQTL91sHv6Vq6fcyz27p520rxg9RXPSKYnxI2516AdzUkMktv/\nAKSD5suOVzjH1oEdZa6r9nkEiZLAY5HJrQtbxpJEdmCszZxXH2uoSXUwywLEZ+Xt7VoQ6hvu\nER8g+nvQI7lnL3Ctv3IxxnsKusolkIdTs7kDrXKf2nJKypG/3PvAVtx6o0iq/mA8YA7igDR3\ni3kzvMkYH5e1TfaTNHsjIVT3ArHhaRd+ULEnJIqdboBQ0RKt0osGpsWsiswR22kdcmmzaiIb\n53tztUjBQHg1kRwoluxlkLFmyeanjjgiUZDMzD5QDSAuQ6sYWCvDhXOSVqxcaojLH9mBZd3K\nsORWdukmi2om1VPOecUv2mRfuYHfgVBRJJI5yit8rfNkjinCGKSxCg4nVslu2KR71brCkAKo\n+6O5qq0jow2A+X1xTGSfZZFjDAsFGct7UrW915JuEjYxqMeYv3RViG88zMZPlKezDiup0GN1\n+G3inftJaWJIyOcEnnH4VLKRxTeZId205xxirS3CJCvG3jkEUCN44VEn7snj/IqNY4Vx9qyV\nbkA9cUhELxhrhnQ5yMn0xULR7UJJDZ5wDzVuGTy2Jt1zH/damR25uJS74VevFUFim0JhuURD\nuVhu47e1PXHz7wVPY1blhh3YUEjrkVXaIrIdn3e4bsKRRXkVZguASmfvZp7btpQYU5707loc\nLgZOQB3omVZrQsrYkDcrSGPtJ/JkVHjHPG6nyXTx3BJjXyh0HvVN2bcpEZI7VJIx3oZP3mTy\ntWII8bm8xhtbJwR0qNvs0ceR8xPT6+tWZbaWTe8KjaB/FUMcMkuxfLG7OTmoYFiK1hxF+9wd\nu581HNceZIFUZzwfpUe5IZCJV3Be1SAoJpJQ4UY+6BzikgIpSyopQ42jJ9PrTf8AXRxtLIwJ\nXIqWOVJmSNlZRIMBiKkZZYZWA2tGemPSmwGTIm3eATNj7wqFWMDNJjMY4IzzVj55oyQh9N1R\nfeUxmLBPRmOKSAWMLtM8gwOy/wAqDczy7yHMBB2h/So4b94R5QClgCCfaq/nGa4AiOzr8tMX\nUuyRuzr5txJcsoyu45/CoIv3zDPOD930qKGWe3YLv2ruyaLu6iS62w52sMtj1pdRiCSWOV4x\nPIvP3Qxx9KCqorCS5ka467dxqKJHMjAtuXqKY8gjyTjze+e9Ahs0p/1u7Ix0NWoWWRQr/KrL\nkLVITwPMN+RH3SnXWpJI+5V2RqMDigZXuLWNI/RgdxAHaoVm8xsqSdo7+lSCaZh5235AcVXl\nXysFfmaQ5Ue1MBzXcc9yFyTGOCKk2/vH8tf3YGdx7VQfZHI2WwGOc4/SrHnEZCccdDQIekvm\nKBkR5/u9DUP2lvMYkYPQfhTftkeCXj4HGegzVV5THhyrbWP3gKAJriZo1AAzu56VBJM0hQEK\nu2o57kGQnG5V4yKjuWVtpGHI5JoGSW6iSUs/zbRwg9Ke1xE0ioWIGOJMfpVfcFXZ92T7xPqK\niOcZHPvVDFdDNKc8qpz0qe9ZJronIjj2bQf6VVV3t22sCu7p+NTJOkO8vF5zZ+71/GmK4R3E\nUcOHj+98o/xqK4s4GX5ZcsT82DTrgi6j804CjovcGpbWO38wEvlsZ2+9IsLtbbycNy8QwSwr\nPjUR3QKt8rLksemPQUsuVZklY/N8zHrUYJWJV3YUdcigRP5sUXmSsGDH7oqO3MhD7Rtizht3\nelaMsNxG5SMH+lWLdWaJvkPlP8v4+tAmRfZR8zRsqvt43enpT2jiWNXhDrc4+Ze31pfs5SFk\nb5iO9OS3bzFG7GR9719qGOxHHHM0bNJ8hkGOtPhjNnG2FEu4bcYqxIoO9mHbaFzSLbSMqpkK\nAOuaQEaQoVwH2se1NbMa7cNIzNztFW7iOF41dG2hSAagmk2gyRsGxwPWqEiu6NGWaOMFV64P\nIppnK53rtRhkbutT+as22JlYNjlqrXStcTFFYARjjPtSHYcrBkXa+HzkY/lUAZGkdOD8h+9w\nM/WmTSlGRzjcCKj4aR1JION2ccGgQkm5fKZzzjGF5qIny22sCGJ60gVpGyT16D0pFAlUuzHr\ntye1Mm7JWZtwBbkcbvSmH5pgwOOegpGWQHO7MfrSqgYbg+SO9BQ/lmABzjv6UR+Y0bOY/wB3\nnANCqVYHb75qUqvTeUZueOlAiONSVULj72CKmj8q4LonyunBz7//AKqdDbxx5G8H5chR61PH\nCFfCLn196Llkcdn5aBz8yrzjPU1qafZmVQdjKWGQCKs2+ji6liCjyh1APc+n+fSvV/hh8NdS\n8beJINNsrU3UshCjavC8jj269aylJIajdlH4a/DS98T3cNpb2jvLO+xEQcuT2z2Nfpp+zX+y\n9YfCzT7bVNViEurMBKiMMrC2MY+tan7Pv7Nek/B3S455oo7rXJFBklcBvKzn5RkdfevaSx2l\nfwrjcnJmvKWGb5cL0Xgc9KikbnIOKjeTb0OPWmq2Vyaj0LHeYWyKFGD97ikY7Vz2pPvMPT2o\nsBMrfJ0pWYBcnrTVYdzincbvUUw1DcM8U7J68GmLhs+tNX16igCX7qtz24pjoGCk9cVJuTaO\nOaOKZYir0pd20kdqFIVulN5796QiQDuDil5UZpivtIFLzuzQUO3ZGc4pdxxjpTOW4IxSlTmj\nyJJFwrdM0nLN6CmrhmxStlhxwKAHjqaN549qbk7fQ0bcLmgB4+bjGaRWycGmR7g2egqThVOa\nWohj4PbNOUgL0oTlenFIueQRTuxj/MKr2pqks2c49aMDnNKMbfQ0ASH7uOtIw7d6au78Kd95\nc45pgKg29aVZG54pnzKM5zUkbfKe5oAMetJuGOlG7OTijdtU4GaAHdOe1J5mM4pFGevApeMc\ndqAFTaygmn7ucdu1RqVZcYxTsDqDzSAXccYApyv1BFNTJXJ4NKTt7UDF3HscUBsjnrTdw25p\nzDdgjimVcXA9DRRzRVBcpsx+tJkjqO9KVHAPFIo2s2ayGg2j1wDSyduPypue2SBShdvTmhCY\ni54zxUkhHQCo+cj1oxjrQxIRvmAA5pVjC/WlLL5n4UmfzoAcW+bFJuABJpNw3FqZuJyaoZHJ\neLnjkU6ORZlP931rDv8Az4ZjH82087qdZySrCsY3bs/epMDdxgjj8aXjrUVvIWX5vvDj61YD\nKFPGTRYRGU9CKUY6HnFIrBqX7q/NxRYBCMkUjZVSMZNO3BhnpTWPQigBysNvIwajbHJp6sGz\n2pOKQEb/AHRipM/KCRQyqOOlMLHPtVASNkdD9aG+bgdKapDc09cKvoaQDV3DqaUN2pSePaky\nN3SgkDggimFl6dak29xSCP5jyKQDGO05HApyZX7xyKJOoB/GlWMevGaBg1OD8ZIprfLnuKRW\n+U96YWH43Lk0hfcvSmk5FIueQvJpCHLnGSKCdwoZiOB+NJtIbNNAOzim7gDS7v4R1NNb7vTm\nkwHE4xgUrZ25zRHygz1oVtwIK0wBiQo7mmsh/A0p+6PWk3YpjFXGMU0v1zzSs2Fz2pzMGxgD\nmkwsJGvymlXuM4pvKtjtR3NIYpIxkc03d155o4A64oYjqKRLGbtp4FOyfLyaYu0ryec0S3Sw\ngBufSgB/3eDUmcmo/ODLkrTo/vdaYErotxbTowOGjK/pxXxR8S7IW+pXEK9FkIK98ivtiNsB\nsDp37V8mfGfTms/Ed/8Au/LkaUsT1BzTQrng+pDyvMQrxiuZ1BPLgwed3Q+ldVrUm6dwBtau\nZ1WN5EBPBPFWiTnbq329Gx34rHmVzy+Sc1tTMfmGOVNULhXkjZ8cLVkMzZCsmCy/P04rMuUL\nSYxg5rQuJtrh154qrJhl+Zs7q0RmyC4ZtvEY6YzVBvnX5ufareoKdiBM8cZqn5ZVuJBjqc0x\nFbO5yIxtbvkVTuIy33fu55FWri453RnBJwcd6qSNhcgcd6YDGKJGQR82eKjZw0gKgkAc1Y2q\nWU7RuxnBqBl8vOR8rHn2pgRXEgUjDYPYVWmV5VLlsL0xV2TAiIVVc9vaqUkZBznBxnAqkIdt\nHlhC273qKT5UO/JXoTTGJVQ7Hbmmjc2MMSp420MYiyIcZJx60xmXhVbke1SzW/lx4VwwP6VB\nJGVcEEdKBWGzqZcZ4HtTGjMKggBucBvSplB5I4UjvVZY9innleRVCAwyQyDJ+XtimYJOd21c\n5/8ArUiyljlmJ55pzbNrAtw3r60mA7fuYSBcKaii8z5g5BQE49alRisfzcdqgTesj4GdwwWp\nWGO8s7lLvgZ4I/lRuIYk4YE/lSK3yhWOQOAKdJ+7+6wAbj/GqEQEnc+DkVGzr5YA4JPWprnM\neB1XpuWouFxuXIz09aY7CY64+Zu1JMNs0ZPGRggd/epDJtUFRgN6UyZjKq/31Oc+1NiHSOu/\nB+5jGBShU3Kob5enNMVgJGbGQR2qVl5U/dOMiosIr7mjY85VT09aSSRWyzcbj0p0bNuZpBuL\ncVECVQjbu2mmA5WCxkqMnpTAwfl+MDgUqyeZll+UY6YoX94vI+agBvmfKwHcUrfKqEjoMYpY\n8yZPCqBg1ANyqzds8H1pjJnAZieChxmoZsoeG3e49KeuA2MbQRnn1pvlvCNoXdu5+lACR43b\nTnGOpoCiRgN24+1LtboD+dIV+UFTtOe1HQYrKUBwCw9O9Rt90hAd1Lhly2T6CljZjjaMYPJo\nCw2RmIUbckCjBwpBwcc5pxIaYk9SKZwvuf71Ah7KvmBmbAxjiiNSrAAAqOM0iqFb5vmJ7mmM\nu1QxPOaADY2XzwM/lRJ8sOUbMnf0oM2WBJJzQrNgt26YoECq6kDPXqaSVSzhl5CjFBDRruX7\n1N3Fvmfgn+7QUgk25CNu9eKVWMw2DA96CxdP0yKTaygYUAf3qe4DQjSSFfu7e9P8wY2/zoba\n7hmYgDtSMQwK4+bsaXUQm3zSN3GOcUrKMjDDHseaWTDdVI4wMUyT93jahJH3iaYgb/WBmPy/\n3aXcAudvPqaWQrt6bj1BpjKNy5fjrikMdIvlhfRuKZt6nqOlJj5SpfcxPHsKTlQRTsAuArcj\noOBRuVGByRn+GkaR1X5sMvr3oXGzknr0xQMPlQNvBDHgGkw0S/PzinsQw2bTuHINIzMpyxHT\nn3oExpYrk4y55z6Cmq248cMepqRcN7bhjJpiqI15JDdKQuojM+4EHHahT93ac5PanNzle470\nbRGzKCNvYe9ADG3RsuRvJ55py48zBHyAcnrzQrGNVyM4457UoyuSlPqAitHHkFtvHApykxkK\np3MwyajWONnySQO3FPj/ANYSeQOGHtTGAVSuF5DHG6kOY1ZSDgHg0m394VVuh+8KdgqxBJYH\n1oEJ65+9/OmAgggcY5p7AFOuHzxSbvlO1eOhJoGR5KuJM49u1ODM67VHXnmk5I2sPu8+1Sbm\nZMkYB5x3pDGMxG0g4I6+tAwzEknHvQyhQCnPOcelDNvky1MQyNvNJ2Arj1p8ZLLtxx1pGXIJ\nHDelPjy0m4DAxjFAxGYLuCjtk0wNsZQBjjPNOXEkmGGO1Nlb0HC9M0CYbm3cKHY+hpdrltik\nD1NMV9y8Jz+VKqfvCXPJGM+/pQApULu3ncR0xS+Y7bVPypt6ikXKrhW570pYkAN69BQAkmZN\noU5YDO319qbNgsm4cjkr6e1OVtrEqCTQsY3Z6E8nJoAZM26TAHykZApIzuYOjHOehp7MsihS\npAzwaRVHDBslu5pCBvMkXJOHz6UhYKM4CjocU8KDlt+XU4AHShgI1dT8+TwO4pWAYOTxwPQU\nqkKzbSQP7pobLMABtXFCxk5AbA7E1QDdgST1OM4pd6MMsMN603aFxzg/3j3pxVdpJ5PpSGDQ\nlTknfx2pOnlnOSwz9KerBVDL97+77U1cSMSX+XGAKYgWNTISpPPekMfCkHfn+GnrtXBB5AzT\nWzKofoB096BiR/MAMfuwfu0MuzMnfOAKVx5hHzhXxnNIyllyTye1IBolCruI3LnB+tLkEZIx\n2xTGKqzRgYGM5qWNfMdVZsH+GjqFgRtrcNtGKAzK3I3KetRssnmNuYEKe1DMWwScJ0piFbIT\n5BtBOBS8+WRuxzggDvTT823sF6GnMd0nJ4xyOxoASb5iCv0ojYQ7uN27ilbCYTduX09KXcF4\nHUHNACKD5a7QCMcjuKGVsquQwPSo8fxq2GzyPWnYK7kVsbTkGgB207sLzt9agc5UkjLeverS\nxSSdCB3x70NCVxhfnPUUAZrLwM8U0SnftBz6GtGSAtGNuGPcetVJLV2bAj2E+tAEBUKGXOSe\npFPVRtxt/Gp1siBtbIJ4yKfDakbt5yq5FMBVjZfcgcY5pWYyYzx+PNCxiOQc7Tjrninsu5WJ\n4IpAMkdo4gARvLZFHmsyjJUMeTTc7WV9m49KRtr/ADFD8pxQA5S/zJnaTzSndwCQP6UxSec8\nEnr7U5VKqWByuc7aAFuP9WP4ufmIoOFdiy/KV4/HpTmk3ruGCO4pGbr/ABDGDQA2Tc2xkToM\nVJtPfJcjpTfMIdAvIxSMxDMwJU4pDE+9kNnjjIPenbdq88pjH0NMaIsm5GO3rz3oO8IcMMHq\nPagGK0i4B6jpt9aeZGxkA5PAHpUTEk4U88Y+lTbm65wFGTTERpGzSFh8xA6CkVtzAICHPHPr\nSxuFzsyGbv6Zp/mMDzglR97FArsT5jKOOKXcDuUrtbHFRLJ6EuWOcdMU+ZSv3+CDk0AmJCrs\noJTDdAuaVUCIoK7ZG60ZRhv+YNnimmTdjI74zQMkZQ6h+6n7uOtNZn3kbtpIzRw6kdR04NKc\nbQMfIo4Y0AMyW2Z5Gce9Na48lmIXcQf0p0bAxtkHc3SmKNqhCMuDk+9AD+VjEg+6x6U9W3Mx\nI6Cmbc9R8uc57U/jyyV+XNAxscahSVPB5JbtS4VsHP8A9ekYg8gZ4xTjiNQNuSOTQIjIPLJ8\nx7U5nljdTsUqRz6il+Vt4RinGQMd6b5Y65y/QikA1Tt3tt3D2pxUtGoUgilUeWgAHBOBUbNu\nYqvHamTcd95SqDB/nS7dqn5flxz70nO5NpAGOtSrnaRQBDtLAGM4C9qf97lV+ZuP/r0rK27A\n4prBplIHDLyFHekNDsGTK556HPegERrtC5xSsfM2jPzkdO9MUvtZ3G05xgUDFYIsaqBgnrnq\naFULghvbbRuVuoJbHGO1O3BmweMLQIXcvRTn1pq4YlWb5ccCmtk442J60qwlsjOM96YAU8sD\n+EEdaI40Uhuo9KUOBGdwyoOBml37cYG7tTQgLFVYdQORTPMDuTgAN6U/5lYkjIPHvTY4scZ6\nc5pAPkjJXAJG3B49KZ92RieSegzQkj7WBGABnPqKFcFirL9MUDHRwlyTwGHanbXeRl3bGXvT\nR8rKOgpvzNMxL43HrQMk27tpAwP7v9aJtqtnOVHp60KuX4yUYcN7Ui7BtAGcHkZoEJ+7Vsld\n/wDOpCWwD94+tM8xt2CnGeDTsOqkr83t3oGHyRhmP8QwR6UgYttC/TdTVm3A569xTsgKvzHb\nnOQKBCSBo27K/r60/wAsyKCRweuev0qIxySPtI3DOQ2elMaMCQvIx46c96A1LLjY2AmD39KA\nGB3bOPSo3Y7UPY9ee9PkUlQzkow6EdKQxCpYqVOGzxTt4j8zIy2MHHrTlZJPmBwcfnUe4BuR\nx60xMbFnaN/Ix0qaNo9zbQQMcGmnDNtB696TauSc4Cn9aCkORvMViqHOeAaYwDSBt+w9PpU4\nkG8nouKhbEr7Nn8VBLJmYMoXGwj+L1qJfLg38B3I4qRxyVbr0GOlMTbFHjAyf4jSEEf3sNhh\ntyB706NFMfOQx6/4UmBHsIILZ6dfxqXcVkJYYT+dCEMkjX76LtGMYpQo8hTg7t2DQ6JuyDls\nZ29aPMMS8fMOppgSOxZUQgFs5zSGQcsvBzSTKF27Tg44FGGiGCPvHmgdgXC4JPy9akZlVto4\nLc4x2qJs7SoG5c5NJsKtkHPf6CpKJWG+EcE4OAtEe5WGRtIPentuXaFJbuQB2oZvNYEjame9\nO4hhkO/YfmGan8xYsbiCOwpmQHYg7QeBxTpFXbGyDcc4NJiBpdrZYlj6CpPJLLgcZ6Go3YJJ\nnq2KIWY43Erk44qhjZQygZ+Zc4JzT4wysMHjpnuaeI1bczLjb/Fnin7FEYYHcTzSGO3KGUEH\nIP4UpVQW2cL39qI+VXqWJ9KbvIjMblULNwc1IxWKqo79t1PjVt20ng9McU04hQKT15Ax1qVZ\nDgGRTjH5UxAuWLIX+ZafJhAoDFsjsKTgthDx70rKYWLZwlAA2z+BSQRyTSRgOg3/ADimvIJH\nCLxxnjvUvKqoxg44o0J6jtuVYqOSe9Cxr95xk4xx2oDHqV9qVipjLqMEUxlMrtcr1B7+1QPj\ncck7R09quOwuFyo2n6VXkjLKSOMUBcp3EajBJIb2qsyyKpKnGattH5g9PrVaRgqsTnGeKpCI\nmZo3BfBTH8PXNQS/eO75Rjg1NuG7avI6kntUcsZZVbO4etUIg2lW/eHc2OFz0pW3xfORz1zi\nlZcxtIF9s03znePYXG30oAbId2XPPPI9arSSsZmIHljGCpq4rxxyL5g3jrtqrcmOSbcnXPQ0\nDIY8BgW+Y0/zCqlQAMnrTGzhmA5JqUxo0eWOT2FSA77qhCwI7molWPdvlJJxx7+1PjZUjUYy\nQfSkZdpKn5l6j60gsM3Ii7tu1Afu+9LteDLBsBhnHpTpI92MjnGStNyjKWkfaV4AAzxQMjWQ\nMxJUsuOTildgoKrgp2NLM20BgOPSl+XzArjMXfFAEB/eYwNvGCKkjYbkUk/L0z/Kn4+ZmQcD\noKgyV+Zjk5wKAJdvyBU+c55psiyyN5b/ACgcjHSkhwZGKfI3Qg1Ivmb2HX680wY6GR02srHe\nDgVZhuYlaSSZC8rDauOgJqmd6hWzld2M1ZiYRszMuQvVaA3IwDhhncCM9ad5M0MRG0uByPT6\nZpqsrbymQJDnHpUv2gwpsEu4Z+4emaoQtr5MYdrs4JXIxzz6U2ZPLOFOd3Idf5VBIRJkv8o9\nfepVneFgV2k4+61SA9MfIXY56kelaMNiW0WfUN4CCTYYV5/Gs8eZH5byRgbhyF7CmzSR/aFS\nJmS3Y/MueKYidXCgNG3lyY+9mrcV9HC6BMkkdfeqatAuc8jGBTlzsBO3axxjvQB0K3ANwkuf\nIXGGPqa1LPUljm2Pzn7h9TXNWd1HPbGKd8BOnHBp324GbbnMaU7gegf2p5hhjizHMfvHtSzM\n0cbOxwxOB9a45dVSOONoZGe5U5IYccVej1m6vv307h3Y4UgYFAjptPuvPj8mXhwfvEcVdSKS\nNy8ki7R91Qc1zH2qZpg8R+cnaCfSta3ZpGUTypAn+0epqWKxqrcrtGwlWHLehqPLM24jYrnj\nNU8n95EH3Ln5WAq4s0X2DMsnzxn5Se/tU9SiEKsbA5JDZ4FWIYZrh1jgfLtwE96pxu9wOjRP\n2qxbLFb4lkbEinchB60yrG1caTLFaIkwBmHBb0NQrrF/Fpf9lxvstXbe8f8AeYY5/Ssu78Qa\nhdSOGmI44OOlRNcTPEVdt7quN1IC+biW1ZQfvkYG7v75qs8n2y4EDfLPIcKSflqs07yKI5Tl\nV6bqTzPMUSBv3ing+lILF4xvGriRmj24CqOM02KVfMHmysS/QZ7UserSNDJAYUdG5MjckfSq\n11IjJG6qN27AxQhk0LZmdY3EZXoH70+O6d5PmVducEjvVM27PIHZwSepx0oj3wsdjZRzg8UA\nXVaNrokRDYnAYGpP7Oljum8ob9wyVzVG3m8lpFdN/feO3vVn+1NrIyfvSOcigBJF2t85IPRV\nx3qsuY925SX745q1cSSzKLqVMox7dqrnUUkYMI9qg4zRcBfNZriPymwF6jNDSNNO5O7d7dKR\nvILA7jgn0olkMLbUfbnkr6CkAhV1jLbcL3PWmK6hfNkYKHH4imx6lI0YhkYPGGyvFLdWqeWp\nD+YRyQO1AE63TNbsjD5R91qRmRIFKtlnHOO3tUMIRUZ5X2pjIBqK3vIo/k8oy5GFI7e9AWLi\nXDLauA/Cjhc1Vvb55LeNimW/uHgVUkuJJWIC4A4AHc1DdTzrMsbk4xk8UWAvw3yRIfMhXzG/\nizyKheQM3Xy/Tiq8LRtuMjg7eq96dPIFQPkNzg96YBIBt3byxXnFVplbcWVgrsM024uVjfK8\nOBkjtUbXZk+ZwEk6jikBbs2h8tNrOJScHcaJljEjxXEoVuoJqjFdK0m9F3Sfxc/ypl3ctLNi\nRQWT7xz0osBYeRFhVoXEjZwQaYbrcVyVUt0HaqhuflKIAxJ4x6VA9wFt0Vk3nOAw7U7AXMtN\nJJvJOOPlPFNj2+WS7HbngGqq3DQgAEhWH3veo/NldUVj8rk4FICzNJ5is6lQVHU9hVTzGXad\nxLMOtNDHhSMY69xU91Nb3TRtFHs2DDkHrTGR3EjMyowwi/kfemyXzZEe/wCTsuKheZjIV+8B\nxSF41QiQZlJwp7UwYgmdicBQnTbnk0m6K3GGOd1Q3En+kELGQyjHt70qhdud2/jrikBOJPLj\n3oobnnd6Un2gy4UjC9QO59qdshXDTSgqP4R3pJI0kjEsbYn3cJ6CmAWaKZGVm2seAev4VD9q\n8pZ4EVWbOd2eale1drXJOyVySOaSSAxxRrtXzGOCwpg9yKSSOVYmjj2YHzse5qSzli+1K0qA\n7DnBHB9qg8t42JPzFhy3qKnhV1hJWInccgelLqBf1TXEv3iCWcdqY/7ozn61myMk+CrCQ+q9\nqsw27yMVyD8vJA5zTlt5IQsewbj3UUDGSRbo8Z5AHA71NE0lwyquUQDHI706HZCuAMyZ+9/S\nmtN9+OUMoI6imIdLbjaVxlhz171LIWaSN4k+baAxqhDJHbxsASWHRSf1p6XTSMGDbeOcVI7i\nzAhhEynOdxxUb5MjKvyIR94mlF9IzKpUlv71U7iaGNWkDh2ZsAGkBPHMqxgEZUHls9aikh3b\ntvAzkNnt9KYuphYFBhV16nB70rTQ3aiRw2TwNpxj2qkSRLIPMG+TlT96k8xZGdl4yfvUk6I2\nJIlBZTgqetV5JDJlFXZnqT1FMonZw0oyAQvb1qNl2ofm25+Y7j2qNbpocsi7gBt3MOeacfOn\nijJVSFH3j3oEK64XfG2eKasblACMbulRxbJIxIWKbmxt9Kk8t/nAcsf4fegRG+V8tlQ56Hmn\noo81vu7c4IpUiVS0eSTtBJ9KtwxwszIw4OME9zQMq5+fBUoue9WzH828EhCO9X7W3i81Y5xh\nTxuUZxWpcQ29nbxjyxKrH5Sev4ipbsMxrXS2uBHlOWGSV9K3dH0U+cRAjP6A9c1oWemy3Sot\nshU4x06/QV9Gfs6fsy6h8TNYgzBJHYwkPLd4O1evA9+vHvWUp2KjFnOfA34A618Rtahgtbfd\nK55aReEXoxJ7cGv0s+CnwJ8P/B3QRHZ28UuqSAGa7ZfmJHYe1dD4D8A6V8ONFi07TLeKNlA8\nydVG92Hc/wCFdGZN2TXK25O5vayJo5gykHrSs20c1BGxx0pwYtkYpAL94jpinfxBabtGORT8\n7sYoKEZD68UKu1hzgUrL82M5FKzAALjvQIcyfNSrkr0xTOSc+9Lkrk0hj+e3XvTvu9Kj3Fct\n69qEYnJ/KgGPZSecUqt8uO9NjZtvzUBhz60AmP4K9eaXHr+FMx8vSlVsnk0DFbpnvQvzL6Uv\nC9eRTerYpgP+6uCaU/d4PNIvoRTvpSAcuOmMmhcHrTFcr0peWXng0AC96Vs9+lM3fLx1p4zI\nuM4oAOeOeKXcN2D0o+8euKbjnkZNDAerHOOgpQTuJxwKavHvTv4etNjBgWUmmqx25K0q8qcU\n7nbz0qQE3My8HipFbjioo8DJzUgYBeOtWIGHOKdyq0xmPGRmlX9KQx27ceOKXBxntTOFXnrS\nq2FwTSGOX5+O1LIwRSQuTQzdAOKRT1FAA33QcdqVcbRxg0m7ctK3zHFBNh+4N0pG3ZPp600s\nEQDGT3pdz4xjiqGHHSnLn14oPGPpTc44pjJOKKTaKKCCm7FdhHXoacxbdyaaRvPTpS5znNZG\ngqL8rMTz2FJ5hXtSLjdilzuJ449aABcnnqaP4if0NL/ECOtIx4yetAhqpwWoU85xmlbPl8cU\nnoAaYDd21SccmojL82Ap/pU3GNvem5O7GOKAIpkEoB7e9SeQOCOlOZRxinbeODxSAjb5VOBT\nzwoIFL91KazdFFWMVV74xSt83U5x1prMV5LYFAXbyTnNSA7sTimuAqdeaCxzhaRl454oJGLh\nl96kbpxQvoBxTGLA5PIpDHsvmMD2Apn8QHalaTEfAzS/wg0AOXHPFOC8kmmk4460rEsuOlFw\nEGGXrnFBbjI6U1V2gjFKuFXB5p9BWF9gaNuOtLnd7GgYXPvSQxN3+zStj8aU/cNMVSSDnGaY\nCsvyjB470BVX7vTvS4w3TNJuG3OKQXG7ctwaWPPPb3pS3y8cUmBtG2miRQPlA70udnWhvmHH\nFIqn1yKYCLgNn+KnNnd7UNgEcUcsevFSwArjPOM0R4Uc5pGJ4ApFXa3PNBQpcEkqKbncMYxS\nFiAR0FKrZWmSIMcg0q4zjNKx29qQbfSkA1n560M2OR0pjqVyc8VW8xl4xyaB6llJA3GM07bt\nIqKGNuucVMzbevNAhu35iQPw9aQxRswLj5qXdt78HtTuC3pSAQ4AwBgUqMEbsTSEjPWmcZyO\ntMC1G53A9Bnn6V83/tBWJh8TSH+FwG4r6Pi+ZeR7k14j+0Zpqx3FrcqSxlTn2xT6ko+UvEUa\nrIzd8Zrk9QYMFLlgvBrutYVJGbco3dCPeuF1ON8ucfKD93tVobOdvrgm4cBdq+tUm+6xzkEV\neumEYJIyrVQdTgbOhPIrQzZRvYh8gQjA56VmMB5jsMBelak38W7rnArNVVyygAc85prQyZE0\ngkUKO3Wsu4Y4O0YGfvVfm2ruLce2KqTRpIoIB2HirEZ1w3zBlQcUyPOMnFXJLXYDGx4qpIqx\ngxqelUAhbzGY45UVWnKXCsDkLjtU/EUeeSTxVXzDyD93070wGxxkQhgCBjHNRMflBY7j6irL\nTLtUo+dx2/SoZlTzGG7bimBUkjDFmdg0ecgAdKYqiFvlb73T2qVVMasCRk9R601VURlXXL9R\nQIiuMW7AZyDySKhDLMdwGB71M0YaM7W59G5qJU3tuznHUUxhcfvI1w2CBUMzZjAY5z2qdsLu\n75HAqOaMbV3/AHsUCsQ+WY1C496h2s0mAMrU6oZGDK+QOoNRhTtY4+XPbrSFYcrDaAysWzgV\nHJHsBJPOe9SRgMGXJBAyM1BMoPzg4YHnd0oHYG3ecvGwntTto8z1Wo5Fk2iY/OD8vvT/ADF3\nKo+Xsc1Qh0qloQIyDg8GqbMxcA9ScbhV2VR91eAOcDvUPl/KT9wk8ZqgISrKfL3fMvah/lZW\n9+R605FVXyGyw5NOj8ySQkjBPTIpAPhURsSE3bv4fSlmwWA+971ZWFoWCyMPmFRzKNhA9eWp\nXAoSSNHMQp+UiosvuG0EnpirF8pXBVenWoUY7lyx6/nSEEisjLjGO9RENuLKOAak4G7r97OK\ndtLMWX8VpgMXZI7ADZxkmo2IjIUngipzGO/PfbUP+szuHSgBrIeCcsRzSx7lZifm449qGY7w\nRz/hSspOCrcfyp7gNX/V4P389KRshgTxQV5yTmjDMxU9uRQMWPLMys3HWkjZNnJzg8e9JyVI\nQHdRHC4iaTHAPWgQzcCpOfnXgGk4MXT96af7n5e/1pZlVsOrZb+7QBEyLuB/ixinSYypOSPT\nFG0thiNpzimNuizk5HvQAjbM5UFueB6UgXb8u/5ic7acp/dqwThjgk9qbw0jr/COlIAkVt3J\n57AUxSxXDNhuuKc2GK4bAx909acyNKAqhV96pFIRgwwHO1eoo8xmBiPPcY7UcMgGNxXgk05W\nAY7U/E0hDEJ8vBOX75pzxlk+8Bj0601WLb8gHPOV/lSr87K/GOmKBCNI2zBzkcijzGZeDx33\nGpGyrNu7UxVVjwPlxnNACNmRQAAuKQAM68cAd6T73OcnpxTvM3MAEyOh+tAyLafMLP8AL2GK\nkEZUZc4XpmnMV+bBBPQKe1M2tuyzdulAhNwXO3nt9aTBZgrHA9RTuD8xOCBUSyfLznrVDHbQ\nuQTnJ4NDrk5HIHpTssqkAZ74pI/9Xhjg5ycUgF5Zd2OMdKazCQ5IPSjcY9z4yBxgUkjDj5vl\n9cUxMbtKyYxncKd8wBAIB6imsd3zZ+7096cAGUEDApANaR2+ZsEDqAKcqHcSRgFcjmjb5ecN\nnPWk4f7xOe2KAEVeFDnHOeaXc2WZu3PHekkO4AAZPpTnbaxZR8uMEUximLeu5V2qRnj1pm0y\ncg9KGwqhWZiOoxT1APIG3byRS1AYzA4BIYD+dNkbzMj+L2ok2qpyMDrnvSLsVAzHBbvRqId5\nhCqgHB6+tOWQMw67unTj2pu4tg8ZHFLuZQS43HsFoGRqPmK9weSDT2Yb8BePQ96b2wDljz06\n04tKxBwqgcc0CHSYG3H60wHfxu2ehpdpwF6nOcU3y2DHfgZ7CgYu04wTls0MoZCu4kZ5NCkh\nTICWxxxTvM8xSowB14pgRMvzYJ+X2pxJ3BVXKjn6UzbtbzVPtUiqTyzHPcigBoZhnjbk80pV\nTt3DJ7MDSLlm64/2qZsKyLznn/JpAOdsKBj5ielP8tW+XjIHQ0j4LEH86RvmVdvDd6BCKOcs\ny7V6CkKgrheBjvT1U84xg9z3pI1+Uk9uMUhiGMrCGz8xOOKa2dpYkZzmlVvMjO0/xU5YlRiw\nGW+vFAhj9sHLdaSSQyfKFIPqtK23dnaWbHNKjNGoYcMaoAkwyhvvKOPxpPM3KQR+8zwKGUeX\n8zYz1FJkqylVyOxoAVmG4AjaTxmmsqsuxB82cU6SQs2cYGfxpeFU/LyOc0h2GEMq4HA6H3FP\n3YXZ/wAsyOBTTH5jcNgA0NG6tn+HtQA5UCcL864/KmebsII4fHSlUfIwb5AaYuGl244HQ0xE\njHbhiO2cetNVl3528nn6UFZArAjd70jNgbgDnGKkBWk2j7ue9Mkw5D/dUcmpsfu+vbmmKu3O\nD8vU1QCeXubBOM8ilZSp7YPBpY1H3xjYenNNy7ZXGTnimABdrbe2Kcq7kJ6eppr7o25H1pXU\nbSPxzSHYZtDMGIxg0+OMhmzyGYk0oyOcY46Uo+bcA+Rng4pDJFEvmLzgDvViJv3hDcluA1VW\nztPO7jGPT3p8QVF3bsKBTEWlQRMyMB/SmyTL8q7Mn+9VXz3kww5WnLMX+TZkdaBCSdwGJOfy\noddsed+fUU2XEUm/769wKQTKshXpxn2oARjF82471P8ADURYt8yr+7XtRIWjIYICjfzo2ssT\n4ONw6UAG7DLtGQ3PuKe3yqw+7k8rUajaqtntjHvSH5ZBuOWJoAk8sFcryR1BoXbuyOgHamKw\nYM65BBwVpqSeXuyBk8k+lIBW2opQcM3NCq6KBjtSPtbbwXZvw4p2TuPzdKYCiQbWIU7ulBbP\nB7ikb5gu1gC3tSqFZgG4bH50mAjcKo3Z9AO1ObaoGcZPoabu+YBQCaTcWkyy4XPTFMBzKI2G\nDtNO/iAx+Z60m5l5baFJ7nmmOu7JU/hQA8HccEBT6VHGpDcnIB6VIoXcOcsOfpTdirlw2cnp\nQKwn3d+fu+3WkXdJ5akjrx/hTztdCF+V+9J5YjdNpwRQFh7c3DA4Ur1Wo2BMnQbW9KmaPBb5\nwvce/tSSLlV2sAT/AAikA1VWNwNuaY2Y8qxBX0FP3BJgoUk96ZtIc/XFBQ9ZCrrxkHAFN+Tz\nHbJVuhzSrGEJw2cdqOrcr0OaYhvIUKrZz3Panhdx5GaRsEHPPPSl2oBuyVoAZDI3lnKc5wKd\nu2/d5bqaXb1JPyjnIpFbggHk89O1ADWbcq5Hz5zTt3llmX5g3B+tG7bwTkmkkUtFgrkj7v8A\n9egAWFoeXcFSM8GmRMBt28q/XPWpXBjhjDLvjbmmdVRiPn6AUAJ8qyCNAdpHJpYyY3xnvTVa\nRWJIBHTNORlVicbv8aQrCsu5i4/h5NOVhE2/ONw4qPLIDxkGkXPlg5z9aBEolbcFGAy85I7U\nrq4kLcFSajml3tlSBxg+4p8JBbKn5T3PamWIoZM5BY5+lLuVmx973HFObLBmY5A4C+tVmVlP\nzc/TtQJk6szSKGHy0m9m4wSeuaWNvMAwcBfXvQ2ZMHO0dPrSBicsM8AnpnsaBKd0byLu2jnH\nrTZcCRflz9KkbLbQCD2piG87y+/J6gUkhMu0phX70q4WRWC8Y6e9Hy7pA3DZxilqMIyI9zSZ\n54203L7SDtXccjHUUvy5C8nBxRJuZd+77pxnFMBYw2JPlDoBktTspGocLktUccZRjjgk5qZf\nlByOeoHrQANH8hbG3PQZqNDhTk5ankFsgHBBzxQAPM5GfWgBNhaTcxwqjp60u0B/TPOf6UjY\nZsbflpfusdvOe1ArEeP3jSD5RjGKcxK4CY47d6cj7cMVBOe1NyWRieJA33u9AyVpBtBI59vW\nmMy7eQCM9DSqm6ORdwMmOKGYKEZlHyjk0uoApZpuU+XHAo3BsqDuHdaHO5RuJBJzjHanxqu0\nBBtGe9MBqv8A7PH9ajVh95u3UYp6tuYvjaueFpdrGQs7AJj7o70CGxt8obtmpG24JPy/5602\nNU2nOQoOcVMzD5RjduHGe/tQMiXKk7kyCPzpWDTOSfkXufShdysSc4H6e1Ksgbcc8GgkayBU\n2Bsc07duUfKDx+GajWbDBNuSDxxTmUovy9c520DJWjeSI7OGB/MUnzSNtb7xHH0ojYiJAh+b\noRT33b84zgfnSAY0ex08rhOhH9aWTarEf3eg9aRcyLszt3d6Xy41Vsr8w4HNFwFh+cK2Pm5q\nRo9mxmb5m5OaiKq0K/NtOcmpAoeMbQz7aZQfxZGcDtSqu1R/CScgUjb5V2qxYjnFOWToG+5j\nsO9InqO+0bZCCpbIx16U2MAqxJ6fwmm27F5NvRe4qQqrFOCFNAxIy38RyMdPSpPLbYcEr6Ux\nQySHH3c1PKzKNrNnjtSCw1oxI27sn61KrecucBMc1CxDDa59/rSwL8hJO0DnFUASShSG2sy5\n596d5gbc6g4PHNN3ErhR3yTUy/6sBMbM85pC1uOjZlEZPToKWWMSSscAVHCx8wrzj0Aq0tuY\n1DSAAt70DI2UEqRy2ePapv8Alp8+SCMVEq7tzP8Acz2pNrdFPHamKxPHbllHI2qevekydhTd\nwDnBp8cMknC8r1/GlmYRqhdMPu5wKQWGKvlMHVd4btT2kd+APw9KZH5rNIQcx54AqUZVlG3m\nkBGJCRjAC+pp7MPLAHApqsNzfWpVRgxJIIA600BHu4AGBjqcVUYCNj3yfWrZYKrRucFhkZqr\nIq+UrNwDwRTAqTMfIbbjdnAFVWZztAXeVHIFW5Y0Vt20+gwelQyKFQsDt/vUuoihKojyobEr\nckU1gyjaRlvSppogwZVBBz96opMSFUDdBz71QEIljWNo2U8nO7NQtGY5iVYFMVcRUkJEq8Ed\narLCyz9Mw9BmqAh3Dkk5HoetQysGkUKuAfzqdomZHddv3sDmq0kbhgvIfPJqRDk+XcCcnoPr\nUkiMkIOF3HuDTI+HK5+vFHl5YM3KfzqWMkikChiy/wANMaNMo3IRud/Xmn+WZgCCBz932qJg\n8Ssi/OpJx7UyhZF/erID8w6qaW3USTuSOoNJtG4MiMRjnPSnq3GEjw3tQBWZdvy5PB+7Uw2e\naPlO3HNKzAsflw3Yn1pkkkvCkDPr60CsOhj8+4CICu7oT+lRYZZpFdcInBz609JXtbiFz0U5\nz6VZ1vJvY5lTfHMm8GkxlK28pt2/JJPWpC3kqvlsd2cD6VF5efmLFQe3pU6lY8EESMOgPWmA\nyOISSx7pCyHkr6HtSz+Z5wYjyz0IJ60yTEjb1+XHG4dverX290RndFkJ4DEc0DRA/wA5AC7O\n/tTYpT5RilTAJ+9jpSKziMlvmdjT2jdZArnJx1pkiLvaEsqbVQ4DetSZMsCkBS+7JU9SKdI3\n2lNucKvWo4bUySDYW9RSESs4DRpna2PwFI0Y3TcbmUfeqM7lbD87egqxKrt8q4Kty3+FA7Ec\nc23BypaTsR096RmWTgyBWU/5NS2F1HpN5ukt1ukxgJ3FRtM7qxeHKOTjav3famKxoCUC2DLF\nvRhgH3pu59o3rhSecCoI7UmH5PMVI+OehqSZpNsa55z0/pTQEkLyLvMQyo4OTWjpsyhgjuA3\nXPYVliPyoC0hPmhucdAPSjzEEfmx/MnU0gOii1tvM+zYASM8tjrV2W4lmSQDGc7eT0HtWBDb\n7VjnY+ZDONxHpTrXUUibbcIzx5wNvUH1pAb8GrTrgySBHU7VrXhu47uBvMlUSg8j+tcPHN5d\n4zvmSH+FPetOLUEReI8MaAsdUNRMVuUD7lxgfWqxkLQhjnGee+DWTZ30bQ+TJzvOUf0qdLr7\nHNvjmD7OqnoaRRrx72y+3IA+960rM3mNIWKq3BwKzY/EsrMY4wY4m5ZSM1O2qLJ8yP5bHoPa\nmBcfM6oCvG7g04gx7h1PcVmJrUnnIyASQEYZQO/rVp7xdpUfdzncakZYjnliYNEvyYwR1qzd\nXyT+QlvAsXOD3GaqRX8M04QJtVhghT+tW9NsbK6jumOoxwzRZ227Hk4/xoAdBuDMSVX5Tyem\nahNxdCwVhDlQcsy1lT30dwu0yGMY4B6Z7VNHq00dvFCAAijDHPWkwNGGQkNskHzcmkVbebdt\nlEJUfePTNZn26GNXwSA38R7U28aCWFVgl3qwwT/WmwL0l68aPAr74244Pf1qukaIYwZNp6fj\nVVWNspQMCT09jUTOsNwh3c9Sc96SA1o7iJYfLHE4bBY/zFDyDzQEkV3HBNZ82orNGHlOHJwM\nDtVaO4+zSFkG5WbrTGbRu90nlFVEef4etVorhGD7HICsRiqF5ueQMG2c5OKm+a0jR0QFZBnr\nSJfkPmklZiEO4+ntUclw0agRMPO/i57UnHBd9u/NZ7NCkmUkPAoA0o9QeJ1ymcjPWq41Jprh\njKeBnC1WgV7iQbAeB19KXmGMkoJFZsM1AEz3SEgSfKzeg6/WmR3jWshwV2fdHOarG8j+Y7On\nyg1DJOzYJXt92mBdvrpLxlKgIOhx3NJIkgYsSCqjg1nreMYwixBecsTRcXXmqEByKBk27MYd\nB8w5qNpnkDMOXHJI7n0qq1w20ru4HQLU0My7QUTB6NQIkkky27Z8voOCDTJJGGWUHqDu9KbN\ndPDKy9f8PSq63DL5hHK45OelAEzZUBd4G7o1Nhumj3BIwWBwMmomjYIjgMwYZDGmecY5MvHk\nscHHX60DJWx8jurIW+XrUKOrOpB2Mjc+9WpLfFqk0koRc5CmqEkKySNg4yOD61QyV5IvN+/s\nLHO71ouWSRo9pBwfwNRvEFGCAVbA/GoNu3BUYAbGO9IRfEfnHJYbey1GEfyZEjwWY4p0doJC\njKTw3Jq55ZSV2XKLjhsUAVBAzNHiLLt8vXvTZFeDeR8nopq+lrIytIJPmHRj/nrU/wBh3s5l\nG59vQ9aQyhHavJ8srnZ1z/Spri3EWCuV/wAK14hbx6cdoxchsdM8VWltmMDsH255+bsO9Aim\nukmeRDBLn1Bqw1tJbgxtncvT39Kkt8cFTkAcY7+9LcTNeNFGWI5A4p9QImaS3dd6gORzSXE5\nVcjg+1bM3hS+tdMkvpWjCKcjcckj6Vi+dGHHnLguOnpQUE0kOwbVKjoW96jhw00UUi7pJPlV\n81BHIWwCcIDuZSM80txqSmZVXGFHy8dDRqAusaHeaDqH2e9UCI/MrpyDn3qLy/JjLIQzA/Lj\n+VWtQ8Z3GpRJb30OY4h8rDnmseS4Vvmjzs+8cUElm6uImYJlk3D5sdAapX8CqpkT94irlvU1\nH8/zHdkNyM9RTJZBGXO7GF5HrQA9Y1jhUo21Oh9jUUIZVCg7dxyOP4qWOd4cr5Pmq3zA9vxp\nJLiRpFK/Icbfp70xjuuZJP3b5yBTTMdoMZ3kn05qEzeZG8bDdg55qSWZyseeFbptFAEjeWzj\nByW45HQ0zaVmK87Rx16+9KsBb58sHziplhEjAEk7T1pCGRR7YQhTLZx9avHTxIFkLhExgjpS\nQ3IhmKJHvBGBI3Y08ZkkHnHcuMqqjj60AM8mCO1YxnzApKs2KljtWKoWAKfex3q3HYFbdP3g\n25zt7mr0Fusm9UU5xzRdDRVeHdErpnbnAwa6fwv4duNSt1TynlVnwFC5OT2HrW54F+Gt/wCJ\nNRtrOytJLp5TtWOJd2WOP8etfol+zj+xvY+DrWDV/FcKz3qcw2YOVjb1Y+tcs5GiWp5D+zV+\nxve69JBq3iSJrHRlIfynU+bKByAB2+vvX3noPh3TfCOjx6fpdslvbR9FUDP0JrQDBY1hUYjX\ngL0GKZ97jPSsNTYZgselJtLcA1KvzZOcDpTV+VeOtGoBuLLwKfuO4GkXOOT+FKv3cHrTAUOd\n3rTw2OBxTfucYpy55OM0gHZ+X3peCBTVXDbjxntTl+6aQwX9KAfm96RR8uM55oUfMaBD+2TR\n8rAZ4HWmr8vvSsDt4xSAXHfOKM9Djj1pN3y4xSqwPynpTAfnjPX0pB83OOaYp2544qSPOckU\nB0Gs20ZPOTjFPD7V5GT61HL22/eoXcOtAiZc8c0fdP3s03txS8dKChysOeKdznJpgPGMUbj3\n4o8hjudpOMGgDawNLzgGo+rYpWAlHy8HrSGZVOSM0H7tEag9etAxQxbkDr2pW4ak3DdihQNt\nGoDguCSDn2pGYso9KQdcjpTlxyO9AgVRuyRTlO5TxSllwAetJ2JBpjAehNOH3RTON3rS4wOT\nQA9aZu9RzSrRnPOOaAuOyTlj6UMSqj3qPcXOBTxiRs9OMc0AO2jaGp24dqZtwpO7ihc9CaBC\nsxZhxinN19RSBgpwacucYpldBy9T24pF5OMc9aTcfrQzFm4oAdvFFR0UagRbgqjuaUMCvIpr\nY+8KDJtUEjIqBhhVYkcijcMCjCgcUHG70oH0FZfSk27fek57dDTm+XFBJCwYtx0704D8qkdS\nqnmmY+XGaAFKhfm61GpKnHWn4OCc8+lIc7RjgmgBxUj2pqfLuz2pWJ288mk+8Cc9aAE+YnI6\ne9Jz1I/EUuflC0mCVIJIHtVB1GlQ3B5p0mFAxTl2np17UjfMKljYvBGcc0zjdS8quabuCtkr\nxTEObj7vShvu5pdwYccA0xTuXjpQA7+EbeRSFWz6ChX2naKAxI9RQA4dPenbT1JpikM3JxTt\n25uDkUgHZ3KfSm9uKbtG3k8Uqtk+1UMft5J7U1qXyyB14pF4ODSsIG+ZsjpR9R9KX7ophy3G\nTSEPycUbuvFLndjHSmtu8zcKLCF4LAngUH5eOopu0vmnfdX1oARgBwDTS4RTxTiBgZ601h8v\noaBsFzJ3+UUuTs4IyTTd2OB0pWXqQeKBAcjApfutSfdXLcikweGNA+gNhfegN0GKX7w4pei9\nKoENakjkLbhigMOx5psJ2yNk1PUQ9iGXGKY+IwSQD6UNn607yzkZ5oYDVJZeeDTmUFeaTpSh\nR3oAQkFMEcig4LUrEHB6U09zQAz7rZ6ikDDqRzTmYbSelCMrfNigCWOUMu31ryn9oKzkm02x\nuEDMApU4HA5r1eNvm+XoeK4z4zQef4Gl2nHlygmqFY+J9a3LdSMwwWbpXH37f6wEcE4r0HX7\nQNcMwIyOSK4LUlIkYj7rc1SEzk7yMJJtJJWqNwBHllbGO1a2pRv94n5RWTIqNzjNUZlKbY3L\n8DGc1lbjMXKDaPfvWpdMA2wdD2rPmBZdw4CnqO9Um7Ela8/1Qyu4gc4ql5bvGGU7QD92rz79\nxG3cp75quR5Zbby2OaogrM/yuzjLYwM1nlvlLNgmr11IWXZjK9SazpgQ20D5KtANdZQofGVb\ngDPIqouFJOWLqc5xV1m8v1Legqq8ibyqttz1zTQhhVWxmPBPzA1BdKDyPxFSN5hcZ/1Y/iqB\npvM391XoRQArQiRlOc+hqtJG4lIL5I5I9qsnb5aqOh55qDlmIVuOnIpiGbPKIOfcZHSopF2s\nQT8rHt3qzJGflVmyBTGQLkA5OOM1SH0KvzRjaGBweP8ACnY7ufmIpZMMFIG3b1OOKY33W3HL\nDkAGhkiFSz5GAgHaq8hdRwdozUjTfKoHGThvUUy4IbB+8KRQ1jxkg/X1qPaWkwfvZ5qdmbC8\ncKKZuEi7/uvQIYpKyMG5J4FOj5yDtwP4j61H5u2QE8kU9SFVlVc5OSDQIUszSAk4B4/+vUUk\ng2gkhj0/+vT5VGwKmU7k/wBKj42gYIYcdKoCGQs2CMbc9upq/DuZVJIH+z6VTjVI32g7snJW\nrdnMsVx5brgk8UFFry9rbCcjqN3emXeYI8KoU/xfSrurWZa3S4Xlk6gVj7Wut5+9xk81IivM\nrSEKMgZ5xVSTMYIJ3HPQ1qBWlVYgny4796V9HDW4cng9V9KBGcrYkGfuntTmOWIVtoxTBbtH\nu5OAeKd5a7SzHnt607gN2kcZycdaZuLR4+6gPJ70okZsYG09M0ziPOBvf3pgK8Z52An3pFby\n4yOpNT+YFwSeCMHFQMwVmAzj1pjFChl6getR7vm2np604bdg3Ac8Zpuz5WA69hQIRZPLBU8D\nPAqRnMi5zwPWmqrMMsq8DHPrRtEnU4K8mgBY/vc/MetRM+7PG7ng1IkgYkZxnoKbsO7czYI6\n+lAwyxwq9OppJFMzAk5UHn1pUbYxkPrxSNwzDPDc0xAgDZxwAeKYzgybW496VW3ZA69qRwcq\npXnvUjIpFKknHNSoxEWcbWI60TZfnv2FI2V2tnI9B2qgBWcRhep9aApL4zhccmmxsAN2N2eS\naWOPaxOCc8jNIQgUKpKfdJwRSsFG0EbBn71NVmdtwXbzgg1I/wC8XGePX0oAY2I2OG3k9DQc\n7QzcChAehbPYVJ5ayLsJxRqBAvsCM84pVk8v5R8ytUkhDKQRntxUTqGkypy2Pu96YCbVZxuX\ngcCnt+5TPbPSmgloSyncAac0gbHG5u4FMY2SQOqscDJxgULt4zyD2pfL3bSRhuopqhmIycY7\nGkADPzAqd38JoO5SMpxnrmlyd5LH5T0pvAXkkg9aNRCruSRlI/wpd2YgMZ5pPRWcjjg0c5Uk\nfL0OKYDeMHIxuOKT7rZ3YxxjsafuSOPJX5m/hobqowM9aQxEXc2SM8cCmvGG6LsKjJ+lStI0\nnQ4GMcVHww+9lRxTGEbCSMBRjJ696Y4dOVPttNOb7hVTg+vpQrNsV2GSOKQhwWT7y4YDnFLI\nSq7icMepqNoyRgMVLc8UrfKFGcjoaNQEbDSDHz560s2JPkPG3pTXx8hHA+tN3BVJ689fSmAr\nNvbccgdOOlOXAyM8n1prZbBB+QD86FjA5/ixnmkA5nJ+ZiKYxMi/Mm3HQZ605futuA9hSEYC\nkHJz+VMAGWcsnBx/FSKPlOTuJqRQPmJyRnjFNZFBGDhTSENGI4dgB3GlXIb5+McDHel+Zclj\n0/lS7SPmH8XPPamUhFVSGB6UyNmXJ3YB4207ktt703yTuLscYFIkTDScKMZOOaTcZGwvLLwW\np7Hdg5wcUirtU7SSe4o1AJnC7QBlW4z6GhfkhVsB5em4dqUjzFHzAAUxeGG37o6/40agLGqq\nwyTnvSsxZs7sc80isBuwpIz1pQ2/IXlvSjUBN21T5fBz0o2jbxyx70rfMq5G1ulNBXdk/cHp\n60uoxZN+VA4P86CrIpDfezk+1KVIkDEHB6EUIDncW+b3qhDRgtyMqeM0eW3GTtUcChssuMfN\nmmrgxk7jj0NLqA4bRv5PynpSRnPz9OOeaQqcl805V8zhRjjHSmAm794XI+9Ru2v1+U0bWX5e\n49aVQp+YjI6UadQI1Y/Mc5HvS4HJxuGcUrKYWKg7t1Iyv8wBAXrn3oARFZVZe/WlU4h9WHNC\n5UEMct7U1v3cmAw6UAOMzKQUAHHekVcqZO/vRuXYAR830pGjHGzOc9DQJir8se/H4USZAb+E\n9qX5vm42joQKU/MpJGSeOaBiDPyow3epo2RrlFLL3JenIwVRz9MVH8kjfMSScjFSA/j5if3i\nYxwaPPww5AbHpTVRgoAxxxSbvmwV7VQIkkZd3cgjBNJGDH8oXIpm3eyx84HNPjVmZhwOPvZp\nFBtKttTjvQZCFHPOafFs8k8gMOtRN8qgr8ysaYhzSbWbC/KVzUSlvMRtmOKfJtmbI+TtzRuO\ncD7poH0Gyhf4wxyegNDFWOWJROlLwrsRzgZoRSwJIBGelIkb8jMCOg7f1pzYO0sMkc0eQNvB\nwfekOGU5PHSgdgxtk3nIDdqSTZ94cn1pTKcKeTto3A5O3nrTARgM8bm9x0p+4tITjlhikDl5\nBxhcZ4pof90Sp+Y8c9qAJcZwVG7AqNR5igovIPWkaRvJGx8nODjtRs+TCjaO7UASqNx+XG7r\nzTN+F+Yck8ikZgBlVYgd6aucE43g876BCsoSX5umM808OgYkLuBHQUxpFZTj5geD603lVyp5\nxwO9AEisBwVznoBTGXbIpUbQaFbpKCeP4akXLL8y5Oc0hiZKhTj/AOvS73KsjY9aZhnAXcPl\nPy5pV+RmLct6etMBZJBGqhwSD/DSHAUgEdegpEX92cqTk/eJoKFtpQbD3zSECscFgMY4p2Wa\nPcw2DNRiQeWzOMLnFO2llxkDjPNAC7nWN3AGMdT1pIchSOrGlUyNkvyG4pvKxswPzA9qAH7Q\nuTjc1M2ruAJPI6U4Kd2VOV25JprM5UttG3HWmA6b92Ao4Y9VJprbgDkgcYApEKNJl85xwxpB\nuj+ZhlfpQAqSFUARPmA5JqVGZceaT8w4ApsY+U8MN3TilEZYlTJyRgHHSgBqb+ed+05xnge1\nOaTaMj69KZbxrsDY5zhjng09W3KwHHPFAEMbdQDjd1Bp+UjbYOvXPY01lLYPAP8AOlOOAF3D\n0piYMdrAsclj90U5toJO3I9KF/3eBzTmbgkDaTxQIZHbg9OOfSpbhhjY3BHQLTVkABB4oyW4\nOCT370igZ2J54VT1ppcOWKjawz81RuDuKFuhzT2QmLIdVB7UAxY5GZVBGfUijy8tgjHPGafJ\nGcLtOVIx8vamljv2PlwoyCKQhVJYMfunpQoRNoJy3X8aQMGyVHAGaNyjluT1FAxNpUKM8Zya\ndyjeYepOdvrTVIZXZkJPakCxybdzlTjJzTAXzJdoBAJbnjtT2b7pC546D+dEbru+UbhjAFOZ\nPlyOHFADVd2beQC/Tr2p27kndywwQabuUsBj8aft3HmPcR6UhdRq/e6bW7DNIud/T5iccmms\nokBByGHehUfd5ikAY6GmMfJiN8HIweSelDRllcE7T2xSsu1lfdkt364NN8vcyhW68saAHPM7\nKIyAr4+8BTZIzHtP3u5xSvKjOxHB+6DSrjhgR6NmgQ3cinJUru6HFI218AAlc4Oam3p8wL7l\n7YpB+8wFTag70CCNjH8pIfn7xpvPzHaQM0uEG5Cfl60MDJHkqdvRff3pajHH5PvAM57DtSKp\n3AfeHXNJygCEZX9aRo0EmeS2M4zTDUk8wyAtGVIA4NM3B2UHlgM+1PC4i6YJ7io8NGCoGT1z\nQMnjztZscNTTj+5+NN2SNg7SqY55qTglY8Z3Dr70CG/eb7hXnANSIwjYyOvyjjdmpUUnaCVy\noIJqoyBkZmyxB7UDF2rIxkB2jofWnMAq7N3B4yaajARr8mH/AL1PyJNyscr15oARcQylVX5Q\nOlP2h1GOp5waadu0ckHtSt95XIyOmBSAcse5XBYDA4UdacoZUXZwcUQxCJmYdCMUn3iqj5T3\nNLUBY8xQsynk9aajO0YwMBeTQoK5/jHTb609SVYKD83XbVCY47/Kyw56hqdwqIM5PamyTKz7\ng3zDqBTlPmsCV2lRmkMdtO0gHB75qViHwAnBHWo1IYZYZYmm7i0h+XOB60EjfusFbkZwDUis\nwV2644zilSVFkCMOGHH1ojYszKD8o7UxolVvuqY8Ej71RtBuORuBBzTlaRfvLnjFP3EKOeeg\nFIYquvzMOMelTSzCWFXfOV7CoZIzGuSNvPNPjUZY5yMcUEsRpMMhY4Vv4afJ8pBXt1pgZCpL\njJHTip4V53EheOpoKuSxzFOVBx1FMnmaZ9uCOM0uCrElxnHSmMrSRDaTjPOetIXUFf8AdgZw\nwOalZuj8jpwKJog8IC4T0J61IVEWGJDYGM560g1EeNSwYc5POKVvljIQnIPIIpqYYYwUOcg9\nqkuSuxVVhvzjdTAhmUTIkjjlTx65qJrcSDeTkDrxnmpW/d453YODTG/1jFW2LjnFO4FO5XLb\nACe/FVLhmkUYXuM4q80PlruX7pPXNQSQgScHaD2pElCaTLOzLnJ9eapuu0FFyG6/StMwDkPj\nFVpB5Z2DBU/rVhcqNlmR8YGMD60y6B8sjO498VIWRV2FipJ6EVBvWTzNpIPTI70wuRTER27A\nEjjv2NVuWwxYs23GKsMo8sEnOTg5qJQCWQKWA/i71IC8jaVwM8bvejlWJZcHp7VKkMCYVVJP\nU88USKdzFjvHYUFDfJLOAQeaWT93kMCTjFSSS7oVYj7tRNPhQ3vz6UDGbmVVUEgH1pVSWGTf\nu2jNKW8xsA8fep11C8OcjduGT7UCGySBgSRyDxTNpY7CfvDNP8pvJwo3YGc0NlY1JUjP8VIo\nibbNE8ZUptHc1oXExutFjwuw27BCvt/hVGSEGQPuLk/wjvUqyeSNzHczHBQ0wIuWXdn5c5B9\nKayjerFcFujetWVWCVZH8xoewGOKgjXoXOT25oAi8t1LKwwpNTLA6xhmAeNeBzUTDdvOTnPT\n0qS4eWSJCFwq44FBQ3zD1C4Oe/akdWEgVDu3fxUZVpM5PPTHakTPzc42etBA9snJJAVeDjua\nmXfDtCy7227gB/Koo5vtEJQL8wqGNR5hbJAUcnP6UCHmQlQ7bt+ckY6U928yExM+05DK1Jbs\n3KEg55BJpSrRyYkXco9ulMZJCq+USHBLHDA+tOF5NGzshIXOBxUUKgEjb83T2qSWZt4GMIvA\nPqaLCLb69PJp5tSq/MwYtjBqhJI2DgnDH71OWQLvYkAjjpmi4URqodSGbFAjQEU0sfmr9xBt\n9aiZfOhKBQrkdu9QqPJVf3jAvyRmkWQs4LvtyOD3NIo0bjU5V0mKGJ1AQbSuORVCOSWSTeCY\nwTy3UUs0O2MKxBLHgCrWn6nFahreaLoODjoaRJI8fltHmTe3arFtcWwXy5Ayzs2A4PArM+0A\nMAx3OxwG/u1djSGaGMiVC+cMCenvTAvbo7WCaJW8yXOQfeltd029pD5bIPm3VTkhLXQjVhz/\nAB/1qxNcCBS0jZTozetMGXbGYNIzNICu3ggZx7VHDeI07LymBWXYXy2chA2hSSy9/wAKfca4\nnmgCARSd09fekWjdWTFqqxsoRfU9ac14A2HdQT3zkVhC++0RgomCvVaga62tGTyynO2gRu3E\n1zbyb1fDY5YelQR3D7jL98k53d6X+17K8hCXBZTnBI6Gs+RhZ3A2SGRG5+gpAaf2iR2L4GzO\nStXEvlmjI2bR0yawpL0KivyFYUR6p5cIRj907uf5UDNW4nfynJOI1GKbb3xVQQAyL2HWspdR\ne9kkVImkUjcY054pr3MRlby0aIsMYJ6VLC5tTXglbzmcBMYxUJkXje3yN3zmqC3QhdwPmQjb\nzSSLNHbxIV+WQ8AimI05pFkijTcWXsRS/ajGCh+Y9KzI5ssqg544AqWW6i24xmXPNMpGgrNJ\nHGd21ged3NNumeOQbpCdo454IrPW7HmAbtp/MfnTDeySTLk7gRzupgWm1PfGCgyc8LmiGbzF\nLsnbBWoHuIssojWMD09aas2I8livbFAFuO/ezwY8nPDD1HpVyHVoJLcrHCWYjv0FYYuD5mxv\nmCjcKX7Zs4YFe25e9ITLTSIqlcZbOfao0aSYYTPGSTUCkBsB899pNH2ht4KBkLcDb0NIRLK6\nxrychurevtUe5FjbYS0XQ+o9qZiK4UbpVz1wacZvJXyYyoU8k+tBSE8xN4wNvHHpT2Kwkszf\neHA75pki/uDNIVjAONuagGZmVido/hJoESq424lbcw5/wpu4bzn5VYdqVlMatyvmHo3pUK5V\nS2fMfpmmNj/MlYIpJwv3cmnGZ5JDuODQlu4Ul+SDkMKdGit8h+997nuKVhEV5M8saiUZSMY2\njv70vlubcuiNt6BvSpo4RHu8xst/CfatC0VlhJH+qbqpoGUHhSZFDMSQvBHHNT21sY1DFPmx\njLVd8hVTaqc/eAJqUW9zJCylTnPQelK4Iigt1uJs/cCDkikvLdlcCKXIatrw/wCFbvVRIlu4\nHVjv4z7VevvAMljotxfSuytbsA4Y4zk44oKscpBE24tGPmB+bcauxszbwOXbn6Vr6L4Rj120\ncWd6sdyoz5cnG4+lRaj4S1rw3E0+qabcQwj5VmCEpz0+YetIRm2qiVGIG+TPKg0+VvMVQ6ZH\nYelUZJPKjXyGYHPznv8ASksJ3W8jikIaJm+8xpoC19oit4xGYSsrHO4H1qs0i28oZH5610vj\naLTtP0+wFrh2kG/zh7dq4ZrlZGVmz5ZbkjtViN2fWrq+27piUUYC57VkfaFdmY5cryaRrgRt\nsjILY5Haq0M0UULDOxicBfWgQMvnXDupLMy5AU4FMJHKj50H8R9abE0SwzbyyyYwMdKg8t5V\nYI64XkjPWkhk0k3yhOp7LjmouGTpt3Dkimt+7kzk7yM89qbJMMrtQtj9aNRApRWWMtuc9Dng\nUxZEkLrINpA64oZQ2HkGGB3BRRuPmdjkflQAzzHmYRDcit1x0oYksECkgcZFSQh1XI6Z+760\nrRlZOV2BuTigBi5yYjtGRkj1Aqxa2f2hsKcKo3AE8CpLW0QTJmPIP8TH9KvzBPmhWMgj+7QI\nqLGlvuTP73dn2xVpY1jhbywMnk/Smm38y4DcBQOcd/apgu2YbUbbnBPoPSk2MX7GTHGEbYG5\nORVq109Y5y4znBAz3qW0t7m8mZYUJTHBbiuj0Xw/d6lJHHHE0rFtuAD/AEFQ5BZmLaWJum3x\nEq4+Tcfrjivcvgt+zz4h+JN8ltp1k2zGWmkUhQucEn2617Z+zz+xjqXiO4t9U8QwvpWjLh1R\n1xI5yDnafrX3Z4T8I6R4F0ldP0a3FrEOW7lj6nn9KwdToaxicD8BP2c9F+DOkpI0cd1rbrtl\nn+8o56rkcV660hXAHA6fnVfzCc/MfzpTJ0Oc1lY12Hf6x22jaM07vjvUO7dyMgUqttIx3oGT\nbtq+tMMnzZP5UCQbSOhobDKaAHB+c7aduAxk8UijavJyKVcY5FJgKflzmnrhsEGk+9z1pB8u\nSBSGO29cmnAluKiViwFSMxDYoAdwpOOaFYKpz1pOWb+7608ttbgZoExEPbrRvGcEGk3HHGM9\n6XPI7GmIfH8wJbg0DLHpTSeDjmnK2VJ6GkUIcnA7Zp+ctTBnjilX7uehzQA91+bPApGOcADN\nI2S1KoPOOKCROeg61Jj1GKZtKt6mnszCPmkUg8wbqN4YGgY6gcUqgLnjrTGG7sOlLyvYU1vu\ncetG47h3oYEmRtGeKTd82cU0sDIRT1zxkYouA1VJNOHpSbdsmd3FJuKtkCgZJt468U0kKc01\nc0o4bmgCUfd5wDTWYqM0mQTk9aduwelAgX1zwaDzmm7sZFKGA7UFDl5HSnKyq3PNNHyjngUf\nLsJA+btTAeowpPTPahgVUdzSM21hnnikVtvUE02AbSrA4yKcrDceOKXgMATx1oX5cmkSw4bt\nT3amRt6inbssTimxgW2qD+dIGzyG4pTgqQe9RLHs79aQyXn1opN1FUBGjqe1N4Oc8imQg+WT\n1IpG+771iBIsgU89Kcjr82Rk9qbtD7R2605V5IFMY0N+754NKswwOM4pPvZI6UmMpkcUCEZj\nIATwM0gYbsZ5pWwVAPNBj+bNIAVTyepp33hyMGm7fl5pcbm4pgGCzDHSkb64o3knpio9u44P\nJoAbvKtjGfenl/lzkYp8ihgCOlRsgZgOooF1HZGxSvWkUFFOTnNKMbQOwoZd3fAoGIWLrgYF\nRyBo1HOakaP93kHJo+8uaaJY0Elflp6gbcD8aBtGMDFAU7aYxv3ZMmncdqRvmQZoUDd14oGL\nnjPSkZjsyOlOyGyAMimup2gdBUiEVsj5qkXG3jrUTKQQuMmnbSrUAP3HgU3zvmIPWmMxXnFO\n2AgN3ouBIxwBtGaapwzZox0B4Wo9rKx2rkGgCZVBUZ6UJgZx9KbtO0E8UdOMUxC7lXigsetI\nWzwBzRnjHekIFYbOR81Bwx56U0HdzilX5VwRzQUHl55HFJHnv0p8h9CM1FDuOQfWkOw5nG7g\nfLR5mSOOKRlJbAOKd90deM4zTFYXj6UhPbqKUNhun1pjv8x5p3GhFVVznrSkgc4pu7POOaXO\nDzSJFBzyPxoJLZIPFG75eOKQYSMfWgAI3NTVyTz0oV92ecU5eI+epoGLxgZpkmWBC9etKzZp\nN2aQxp+YZpdo2j0p2flpu4dDVIkcvUBeOawfiRbfaPBmqptyfK3A/Q5rd5CnB5qtqVub7R7u\n3fkNE3Xvx0piPhrxM58tSMdOT3HJrz7V5Ft85ya9H8ZWf2O8vISeFkIx6e1ed61Gu7jkL1pi\nOW1B/OU7nwOwrJnUrtIPFbt5Ckzbcc9RWI8gVnXbkA1RL0KN0wZcgY5wfWsqU7UZS2OfuitK\nRso4685qjOo3fKMtjJqkZsgaMFcv9wc4qrIuZmkXgEYJqy2duCc55xVOaT5iFOAO1WQVplIV\nipz9KoXAR5PLDndt3Z/pWhJJs+YdD2qi2GkYhDn+dUgKrOyxs68Y71A2GUOE+b1q80QIOwAj\nGcZqAKHTf0PdfSqEVGcAtnd06VAqKsRIUqCe/eroZVjdz83HeqzRll2lsA80dQGOjGMSEYPY\nVE0hYEAdPmp/2gRqBjdg0iyAZc4GTwKoQrL8ud4ywzj0qpKzrGoyGOeop+7YzM24o3BPpSNG\nkLKseWzzuNAIhYkTYJoeIqu1eD1BFOWNIZDuBLMcUpSVVJBGFPWgRUbLMTjd60nl7o1ZRkqe\nVqYOI1POWY56U0pyxPyLjhveiwEUgaRzkEA/3egqOP5W2gFql2fugd3PfmmH5XDRnOeD7UWE\nRyIGUMvY8+1Mw27cG3dhippGZd+1VOeKjOI4lwuG7ntVDJI2+8rKXK9ajmbaoYHv070/e8eW\nH8XJz1qJmU8g/M38OKB9AbDIcLk4zmo1uNuwkfOvNP8AMDMcfu16UhYZ4Xceh9cetAHQ2d2J\nlcY3rjBqhcW5tJnRSCnYj0NMsZ1XcF4P86lk3Sf6xdsZPFSBC0joEC4OKk+2v9kKnr24qCRk\nDlozwvWhpGkXeAFH6UrjKc67uSc57e9RLCFYuQcDjJNW5GZRxj5qqvG47nHeqQrDPOD5OOOg\namLlXB6mpCQcIoye9NbhiRjgdKAEZdi7gu72pskjNheDu60xuCDmhmG0Ko3ZOTVCE2ouQ2M/\n3aSMhhnbtGf4utOwAXYcgdj1pGIkjBJ259aLDBgzc45/SnRkbtx59R6e9Icbh8//AAGjcfvE\n4PTb6ikCDaNpBAx2PfFR7cLjLEelP+XYefm/kKYVdV4bNPUQu7PXkDtSNGW+YHj0HWlZdyqg\nfDdaRQH3E7o9tPUNRrx7tpztHpTf4SEDZzS7epA3k9Oafufb6t0oGhvBk55GORSNvjZdopy/\n6ssBz0LUz5e3WgGIfmwAvzdvenKWSXLnj+VG/wCXgnj2pFYtGVKn2pCEjV5GOXx/hT5GIIAH\nQcUnlsoGeBjmk3kv6jGOaAEYCPY5+8Ow70cz/eOP4uO3tTWU/ezkryFpqsyybg2Ax5NO4Dvm\n2kJjH5Zpu8LgFMfzqSSN2+7/AKwdqRQWyTjOM/Q0gFP7ld3duNtMVgq7SdpJ9Kd5u5VOMmmP\njk78nPC00O45u6g5BPb1pspHOWwelJ8oXaDz3HcUKpLDjOP1pXAX7u053L3pjNJuYFcDqMUx\nlZ92SV56VKu5iOc5GKoBI5mXfvQNxxntSxvxtJAAGcUrY5yMEcVEyhVDEbjmgBzZaMZHzZ4P\ntQy+X25od+uOmOKcrlgMdO+aQC7/AC23oMpTGVhGFX7zHNOj2KCVznPSmqv3nY/NnimMV8eV\nnb9ajMzGVAMBT2pwb91/sk5NHXoAR2oEKwKyeuKa3IOVyOtL833SDnON3ajzCpGP3ig4IqQG\nbVG3P4UBN2XzhentTyNmWBG386Fx03bQT901QxiMrRkZ2sOx9aHYMSwPQcj3p/ljczFuegxU\nXl5zk9scetAC7sjk/MaFkPIbt3pGV/lwfm6GkZlO1SMkjmgRMGK8K2O/Pem5EyjJygP3h60m\n5TDjG5B+Ypqt5fI+72oAdx/Fzz19aVpG6EbR1FMZj93HfOae0pZcn72aQEZkLckfL2ak+YsG\nLfJ6U9mJwxGATgU1lDK3OWXtS6iHSbGwQD74pCpjYMo2+tOjznAXkDPWkaQyf1pgNXAjbAPX\nNLgbefTNDfKoPXnmjcGYDGaYBGx8kgH3/ChcbRtU5/pSHKqwUYoVDlXB4AyeaBit94nPyelA\nb92VC9e1EajcXcF0bkD0oVdq4Xn1bvQIVWION3Hakk+aTLHA7UisqMGQ/MOMU5SZG3SfeI6U\nFDRh1J+6D0qMqzOqbdqZzup+eDH0brzTdpZQpO496QgEw5GTjOAMUsit5ZJPPp60/aFOVX8K\naMSZ+YhuuD2pgEanyzzgH1pu0bVBYgZ4x3p37tgVPIHNDMh8v5TwelIQxvlbhce4NNbBUAHg\nH86lJXzTu+U9RQCxAGzg9SaAI9m6MBgQc5BBoFuqsz4znnrTm3BTjkClE26Eow+XFADZGX5G\nCEjoaX5BNuOcdjQrrtDMxzjFNaM7MDketMYHCktnmiRdrLzlsdKeI1EYC8nvmkbc2GIwR6Ug\nEXMXQDbQxCEcAseaJEVWAySz87aTYJHO3GF9+aWoCfNt3jjHWnt/rI+fvdqaykrnpzjApPmZ\ns9cd6YDtzyOSuAAOfwpHb5go4yM0r/KqkDHb60qt0xjPTntQAxscDqO9IXEabQeR6U9mVjn5\ncUMhVTIVJAHIApiIgxU7X4zyKckbnPzDj5sU6TPGNu7j86jLeZI3r0Y0DFZvlKgZDc7gOlJ5\nbRqVBLEnJzT/ADArABAUx0oXO7cx2kdqQhsmGQbcjnBJpDtwysMgelPkkURhXbaM8HHemRoy\n5Kt97t607aAO8wsoVDnvtpQM5yMbv4vSmbdo3MQD6CncsFycmkAH5JOD8uMZHekCgOSOjdqd\nEAysp4560iqsoUgkbTzTGEbL8w27eMfWl+ZVAz24pJCJtwJAwelBZ2A3MFVegxQIF+9gnnHb\npQ2+XJYgAcAdKUKSoJ6Z4IpG3bSSOccUAOO1oeuz8Ki+7IjKMjPNSRsGQMBkY5z60i+h4zQA\nrSFZDxlW6GkYliONvbPahuSAMkL2pdqyJ04znANABgvDz8uOKbkKFw25um6g5Uk43DsO1EbY\nwmwZY8e1IB3yqOXyFPT3pM7lBDEnOcU1v9Uy4EbA8t1zTtvyjcSHx0pgOk+ePA+T60xQJODy\nf71DbSmMHPeg7VUAj5O9ACPjEmCVIOBRIrbVweMYqTzAcgLwe1LcMpKrnAWgYA/KFT5CBzmk\nP7xQXGBjkUnmHdng9xSswkYccdKQDWYMoAGF6AUkUnmbmHUcBTS8NjAwPWmmM7iRwM80aiH+\nY0iqScP0xSJnzPk5PekMg83gEDOOlOA2TYQkf3vegAZduMjC9KXjZkHGD96opd7YYjC55GaS\nEJHvcZ+bjFPpcYbDtLMOc5DD/CpTlWAHGV7Uqq2eDgheDTI9hj3q+Gzg7qVwBgRlf/HqFPlu\nDjJHHWmEuqkn0xTo1HljC8dKYWFmTMZyAWJzT8FOQuDiol2yA4JwvBqRVDRs4fHHG7vQFiOH\n5XDONzZqSVk5Yrhc8ClVlTr1Hr601tzRkZwSf1oEKrbY9ysQCcEGl2sw2q2Pf1qJlH8T5C9V\n9/WpPlKkAnZ6j1oGMWMhgCc+w7Uq7ihBTPel+b5H6HOMD+dSnK9T0GPlFAiLzCY8kY5xRIcM\nNo3ADO6kZ3WQKo570iq0zEBthzyD6UALGrM6HG12/lUqtsR85yp6Hqfeo45T5h3AkrwD7U5m\nMivuHTnmmwuLJgFSOQRmmSeZIVIOATgkcYpN2YRnBP8Aez0FPaQllZcFQMfLQSPWRMNtG6Ne\nDnvTBGi7sE9eFojZW3ArkEUoZZG+RcZ/ipDYx2EcY45zkYp7HOx+39aXP3ty429M0qD5QD8o\nY8ZpXGNOZNxI2460RhpFJ2HB43GkCli3OQKkeTbGq557/SgVhFYZUbcKvp3oDCNWG44JzxRw\nuSBzj5aTa7LtJwMZye1MLA0m8fPhV6bqVXPBBymcCmLtQAsN65p8a7XYMduPm+tAhy53OCMj\n1pksciOpBB9DSwyHa+BuY8ins27az8H+6KRQmH8kgY9Tz0p6sWUc8EcNQCqtleh6nt9KiZCW\n+UEKOc9sUASf6xSSTgcBfWp0b5ecAj+GoBsxnf8Ae5A9KUMDgdaWoCzbCcj5QevrTFX5fvcU\nqtukbdgRqOKGY+WNq4YEGqAX0BPsAadHtGVUYH8W6k2gSM2dz+9EkqbtrNg5xmkwGLMdzZXJ\nHAB71LCzSLyNgNM2sxbHMgxTljdmwDyOTVdBk8WFAXt2pN4l3qyYb1prNgDHP6c06MlhycN7\n1ICiPZGCrbselIrbuinOOfWlVz0VNgPp609t7MxX0xTERBQoOxNuf1qRIRhXlOD6Uwsd4Bbo\nMU9vvFX5OOKQhJF67evbFOfbuRUPzd2PepYrdsJggOOTTGt/nLBgwz2oCwyVozwRtx3qSLO3\nCfe70xY4wx48xs1PH8uSy4PrTC2okgJKHccA4NIrHzC7nAB+Xin7FZflBOfepFZdoDJjHGPW\ngOpDuZ/m/hJ61JwsgXdgnmnOR8oK4APFKIxK+XGR2xUgL5bDgncAcipFA8w7/l4zxSJiM7un\nah9zLuB3fSnceo2GMxgYbOWyc9cVYDbmBx8vWkRFbyvm568CnqI9p3HkHikIcuJG3EfKPSme\nQnmEnLj27U7zCsoxt54+lIvmIhTcCc5GKQajlVCfmBHpU8caMpLAD0oSMydSAwFEbLncwy/r\nQO5Alk+4nICdSxpzRAFSuGBGAT1qxtab5WXG49RTktz5jELzjaOaLE9ShcbRtIXgHms+6VsD\ngnccithoVk4DZxwfr3qvcWbjaQw6/lTAxrhCAdxIH92q7BI14QuDx+Nas1vGytzlgc5rNOCD\ntOwZxzTAhkhKwoVjy2cEsahMixW5gK4ZjndUzL8rI7FR/nmq6WYjXDSb1XHzGqEZvC71D5an\nJEyspDcn07VLcbRJIyrs5wKSJfPUbBz3NIpCSH5sZ2t0oO2blWwFGOfWnoqpG7uMYOM0vlhU\n9eMjFAyFdw3Rn5lzUbxFFO45Qc1KzCQbl/HFM5CkDgnjBHFAya2jNxJHsQDdwGNWpo1SYiR9\nzLwQOlQ2tx9jlXcu4qMjFMkxJPNI7YB5BBoF1GzxCFZAjEbu9Mt8SOqO4wB91qVjujVRyp6M\naWOMLMpaPcF/iz1pajIunK/MF5O3rUfl+YisWyM7qtyeVtfafKdRn5fT0quSjKoKsFYUxXEM\nO5j12huV9B60slqGkdWfYo+6fWlaY7WVDhicZ9qGEm4MxAcdKQDVVmYDABA5WlkRlRWxxnsa\nVVaKXcQNrdacqxhmUKSOoyaBkEAzMeSe/TgU3AbcuOWOAc1Mse0/e4PemtC0kbEFUweKYh0f\n7jIAKyDj60M6vb+WsQyDlietJ5zAhmGARgMaiLNtBUZbPzCmA9VG0le3NP8A9Xwr5dhmo2kX\nywNvOc8Uigu24c+1HUBygbSGbCHt6GmKzrk53n0Pp61I0Ufl7Wb5v7tLJhk2nH3cDNFgHneo\nRtpwR97HWgLJJIAPmXt9ab9qmmQR+ZmOMfLx0pluzJkgsBjIPvSF1LTRtCsZmb51PemSqzIT\nhWXqPaommM+4y59vWlMgKKW+VV6r6+1AdBzM6qpI5xlTSszSzCQvtfbgiljcSSKH+7nO30FR\nj5bzcPmjLfkPWgm5Yihd4tw2pt5I705PKCLtXdI3p6VMy20aOyXHmsT+FMT7Gkf7wnpuU/0o\nGh0jY2KVI+hpTGsqkSBvKzkL60RzO6qySK6txjHIpFVZJm3hiB05oBlcOY8kgLzhW9Pap7fy\nmheSdCLluI2PQilYGJWDcJjcqn19ajM091GFkO8YyFA6UyieSzurOKF5I/LMmCOc8UWtxHBN\nMZlD5BQN9aYZ55owjEnA2jcelVI1Mezd8yHkE0AIsZll2qeVPStRZbaaMiZWWaMbeDwaz/M3\ntnbg+q0saFWKlidw6EUWAnuL6W62WzjZAoAG0f1qGW1MahGbeWPyA9TSRQ7Sd52pkqOeacSJ\nGAdtwj/TFICW1urvSpM2nyysME+1aa3Vm2+S4VmuGGCFHFY7+ezGTeAH4X2FTwqyuitjYflL\nN60nqBqTW6xPDcWrMXAz5cgGKjvtWuNSmV3ULhNu0DAH0qlJcJHImxiQvWopLgyyBQNrZzTA\nduljZPK2oVNS2rOkyyMyu7dqq4CSBQpYk8tmnqNrKpG4r0YdRQMnkkzIWXCxqeR71E0jXDKw\nIRR3zyabIfL4IyhPB75p6qjS5H8JwaQChvLhHm5CMeGPc9hSxyMzGNuE/WiRVUsST04B5qLa\n7qm0YzwTRqBO7/vmxKqgrgEiokJ/1jHcpGQuaLeKTcI2ZWLf3hVoxKsxRk+bsB0FAEEYj25J\nO5h6fpU7Xk01uilFSNeNq9frUTRkNmQ7h0z70PDJvwkmfWmIiZmjfeqgnsPar1valonuHbDK\nflGP0qnHbiXzFZunVa0WjdnUplVHGKRSKSWLSSMWyVxvIXoKWGEssoY59Pb0rTiurmOOS3RF\n8hhk4HNVI0bzDsYBX6jH6UwGW9rbtbEqS0qjlT0PvTl04KoAkADHnPapJdPuANoxtP4cVch2\nLGcnzMcncOtK4FVrEKoO5sZqT7BGsLMW/escA56VYEUsn3OjDOD2qW1gi3bWJZlOCTQBDHps\n11II0QbcZLHjNdPH4Fv5NJjvLWaK6XPzxRn5l+tZDXKMqg8BSelavhrxU/h2689cyxuPnh7G\nkMS18MhZo/tyTRhm+baOQK6yw8M6Nd5NvfzRyKeEZecVhXfjm11a8klcNbx4wqYyc+lZ2p+M\nI7OFVh2O8ndf4fxrNq7GjuYfBqwXOYtV+yru3LIFzg+9b8mh3ci+Xdahb6xZuMSlcK35ZrzP\nRviZcWsiR38UN1bk4bb1I9a35Ne0tdSgmtYJDYsp3dvnNKxRLqHgOx0zS5brTb8pcM+FjY44\n710Wg+Ov7H8Kzafrsc+p27jIQnOR26+lcjq17bybfNPlwOuQueQPWqZ1i2toVQOtz/dUtnj0\npksxfHNz4W1CFrnSWlgvFODDjjjqM/jXFtJ5kQUhg2dwNd75mkamskCQRG66njBzWZ9ijVj5\nTRb8bTwKpdySrY61HqmmpZXMYTyz95uSPesjWNNg0e58uOQyBl3DArcbR/LQ3KR7lXggVBMy\n3TGRkA+XaA1VcZy91KuYoYk2kLyc/eqv8oXP3xnHHatu80a2TF1AzK54IbpVK3tfLhGV3Fmz\ntqiShJ80ZEjbIum4c1YjtZVs0kEfyMMrz1FWFig85yAcH7q1LDdFbc2z8RtlQ390GgDLmkST\nayt8w+VlPaoVWS4jMkSlUjPDHv7Vp3OjxWuwbvMXH31p7qj2oiHCA8Acc+tAGRbQ/aCzsxDd\nWVuOPSpdokTKnaTwF9qtOIpioOY9pwW9am+zRqxkA38cUhFMxFdjZwrDAPpT/LZsoDkrj5vW\np0VZMsSRt6Bf5VLDA0mTnaWHA70rDRWjjeV2KHM0ZztJ7etXoI5ZCGzl26npU1tZhucYfOC3\netAW7STCPZg9Md8etJyQWKqWrLGpKHIPBUVp2+mmaRWLEOx/hXNa+h6Bd6goSOCSQZwFQZPt\n+dfWnwB/Yn1TxOIr/Xk/snTmQOWmTdKxPOFU9sdzWMplxj3PAfhj8FfEXxI1CCDTLZ23uBgg\nj5ehbp61+hHwK/Y48P8Aw7htNV8QKuqa2oD+Vj93G2fvf/Wr2bwL8PPD3w50eHTNFso4YkH+\nuZR5j+5Ye/bNdC3fI4rG7ZryiNIm0qnCDjC9PwpvmM7ZHekYKq8DmmrlepxSGPVwuQetNbLc\nqMDvQuOcjNG7avPNSxkgYlcY4pVYDjHNNjb24prepPNIonXa2Mnmkblcg8VGMYyeppTwnFMk\nm3AqQtKrgcnnioo22Lk9KVcLweT1plEqyDaDjApyt3zTM56HHtS7vmxihiY9s/wmnFWDKx5H\nrSM64HHNByyjJ4qRi7g2ST3pecA54pnGeBTiw4HY0CCX7oOO9LletN2npninLgHGOKBoU43A\n+vanbxmm43Hj8KQxngn1oEibduQ/pRgNHnHNIvOeKFyRjOKBsYxO7g5qQsQucUL9OaFDMetA\nDlk6k8Uqn5ueRUYYs3tUmR60AOzhs8baNx78E00YbvS7uPegYbR0pRhaSMhs0u4gZoAbuZX4\nHy06QFmBBxSYxjHSlZSrL3pjHbSR0pxPQg01WLUrMMgUhAy85zxSySDcOKbncoA45pVUbuT3\noAXqOaRpAFyaQ43Hml2fKM80AKrFlxjml3FepyaTkdKd35FADvMDLz1owWPHTFNyAeBTl6gC\nqQxONuD1p27p9KRsbiOtDds8igQ8MH470L1xTVPzD0NOyFakPzD75OBzR32jg0bueBgetHqf\naqBCj0IJp/XoOKbHny8mnDngVIB5YopfLaincdyoMxqVBznk04cjHSkZdrA5zQ25oztGD6ms\nRijCenpRv2jGKgj8wf6wfSpt3B5zimA0Kd3XApW+7gGlbDR55BpnG7rTELtHQdadywySBikP\nytlemKVcEc0AIzhU56Ukft0obnjGBTMlEwDk0gJGJXp0qNgWIcHB705ctg9qc33TkU0gG7ju\n46Uq/ITkdaRZlZQuPmoUHaS3HpQAxgWxj72aeIzyD2pykdQKRsnoeSaAEPGMcCjYQvFOdeMY\npjFkwByKAHYAwKD9aRfmOQOaTaG5zQApBYc0kceFO4cUL93HelZiy7R170rgxCdrDHSnFt3X\nrSfwgAcepp2CPSmMaG+XdSqw29ee9Iw64PFJEu1ST1oEAYbue1LgZz0FNX7xIpxI44xQIYZv\nM4A5p/PTvil2hJM9qarDceeaACP5hS7SnHWnKwXim8saBDDntRztHGD60+QkMoA+tLn1ORQM\nZtJ56U7ac5zijo3HShcNljxQAvG6m9s+9KCuOTg0D5V46UCAMM8daRvflfSnccZ/Sm5HOaAG\n4OM5ppxS5LfKDxSMuM84NLqMdxwf0pk0gUjjinjHTFDIHxg0CGsflBHQ0i9uc0NyAPTr9Kdt\nC/TrTARsLzSFvmAxmlBBbI5xQrdfWgfQbIPm4oX0p3JpSo45pkjRgrnpSyKvFHHO7gUjYbpT\nGKcEjA4p/ltJFKv8LIQc0J8qc81JHJxjOAeD+VAHxF8ULGWx8R6nC6/8tTt+leW6jGjNgHJB\n5zX0L+0Dpfk+LLoqpXccivnzUITHvJ45NUSzmbqRY5Ww2PmrDuiv2p3To3at3VIVUZXhs8+9\nZMsfzMduCBnNMiRlzRhTlOh61lzMVkaQfKPWtTiM5H3c5INUr7EzcYx/dq7GJmsxLbwDzVW5\nVWQsCQ3pirskrMpi3ZxVafMkexhtx6VSAz2kAARvlPqRTQsLEnOWUdQamnt/3YLtkHpVbCIm\nSuDjJqxEDTLkoVwGHpVZQWkYJxxVtrjzvn27VxwMVXTMhUhcH1pgQSrtK919BUDxsV2gErnO\nfSrfl+Xv3HPNR7kZdyPtwMGmBW8kc7FwfX1qBlZW2quWxznpVmRmVlLcDHBqKeXfGVPOeuKY\niL5mjIZcA96jBZcnO4DgZp5k8qFQPmXPT0qIhmY/wpnJYUAIvcs/zdcVFcRn5ir7cdqldR5j\nFeE28N61GVkbDAcsaAIyDhW2YJGBmo9jOXEnIHQZqwx+UqeinkelNmgX5XVsNj+HvTuIrxsP\nlwnGcc0kmI5mXbtyOKfIpmVSOO+abvWXO4ZPQGgCFCem3POc1IwyodGzz0NK0Z8vy85PU01s\n/eI37emKLgEil5VU88ZPv7VGzMW+SNdo/SnGYLIDJkf0psi/N8pwuM5oKQ2PZuYYyW7U5lEe\nw43FuMjrSKzScuqjHAx3pspLSKVJ4FMQRsYWweVPetJZhJt+bCYxtrMG1VGDuHepomVFCh9x\nzzmkUWiVXMaqOOee9ROqyL5e7bxkCofMG1jj5s9aimcMPmJzQSK0jKpjQ9OpIqESpIpEm4Ee\n9LG7lGUMExztIpjMHfG3jH3qYDGZWddvBNK6puKk7W9PWgKWkAVvmAz0pjKC6FmZnznJ/lSE\nG4spwMADrTMMzJgc+3epgNrMPX5vpTHkDR5Xh81SQxCo2EBdpzzSAGTLfdAp3ooPFDM8Z4TK\n4wf8aQDAwkwYugoyGOerZ+7TY1zu2tt/rTiyqQSpz0x60ARbh5xCo3uKlhIZ24IHoac29cMm\nFHvQGba24gMRxirGN2x8DJBzwaRkK78nBpJZPur/ABUxpOC2SW6UCAsFjBPB7MopCxWQ4IXj\n1pN3lx5x+9PGBT9reWMLuoARWKg5H7umB/M68d/al87y2KFTg+3FI20rg8DPFSxAMjJboe3p\nR5I5KMSPrS8tyw47UnRuP0oAbjEZwW3DmlWXCq23LNxSszMxC/iKXnhhjA68Uxkf8chx+8Uc\n07G2JSBg9TSKuwtluvJ9cUiy7tuevYeopbAKzAoDGSrZzTPODRkLw3cGiRWVSGGCTke1OWNY\n4zghnbjpQFh0cY8klTz1xTAnmDdnAHrT1Y9enakZvLzu59qAGHaDuA6/nQvzZBPXsKcqnAyQ\nO/NNaZWfJj2n1pjG5+XI4GdtP3fLsBwFPJ9aRg2WYYYHt2FBYKVxwvQ0CE8zaACOCetO3EyY\n2jA60bt2VxkA5zSYV1++c+1MBnG/cOU/u1J5ZZgi/XI7UKuUxnA6+9CoqqxYsCemKQBIuzJD\nUzdwvHXqadsLfL94YycU2PazjIyV6Z6UagJnYpIO5SvYdDRD84BA6ck0M/lsVH3R1+tJGsmw\nqDgZzmgBzMcEAYzzTVZm2nHy9OKeQzZbGOMUL90r0waYdQXdGHKAbuu31pGYyqrBAmeoqNZG\n+dQcE8UqKWVQeq+9ICRFweVx7mmhAc4OD2NHzNnJ4WlYiTBA2rQAzcrd9rjrSKwbG4598UOm\ncnuOhpWZlXJwVPt0oAFXymMY2/Nz1pfLODg8n+Aik2/vgAMHGc0BdzM5bHYmmgEmIVAytuA6\n+1JJtbGOOM0fLtAH3aPLDsB0PpQA1lMZVgN3cL2o/ibIxI3alkVlXcDkA4xQoGCckHFIQ3zF\nkYbQVP3SfWn7V4Q8HpkGoxHuKvkYHPBqVysnzDr/AHT1+tADVb96VbkqKFYiQFgMdM0CMshM\nZGf7xprORGMDG7g5pgPbDDH3hnkVGTHtZgWLDp9Kk+UNyfnA4oGI1GFye5NIBp2uyoH+UgHH\nanbQrMwy/HPvSY2vkAAGlEgRm3Hj9KYEe7ao2rnnOcU5WZQG6cU7nlicn0FRMpdeCSOmKQGl\np9rHNE8j4LZxUV9bC1EcidD1qFWa3nXY3TkimTXDXEjFzk9cUDEZvn4zt6ketI7McH7vcUxW\nLZ356ZBpVw20sflPX2pgPaRNjcfNnApq/Mp3Hb6GjBKs3AGePWl3M20nDL+tLUQmVlYNkHtS\nnO3eT3x16UjfNnYAgHPNK8YkGSTz2HSgY7AOCw3etRyAHPBx6CkljVYQQeR2zSrnjJ5PPPpQ\nDGop25cDK9BSR5k3AfKDyaWRdsg5wvXNEZLZOeG6UaiFjO1sM3H93vilaTau5VIAPH0pp2Kv\nfzM4yRTUZvLYdecUwHbQ3LjOaVl27WzgL+dNaUFsMu04pV/1yMRhD1DUgFXG7Ifg8/jR91yF\n+6RzTVAQuCvU8U7bt+cjJxgqaYxr5B68Y4FOVw3JABXk5pArBQ2AF7c0zywdxYkkdRSEPZW8\nndwBnPAp25RyrnPembmEeUPB4C0nKRlZF5B5IpgKw2xKDhmz96m8+WRtwc5JqR8NjAyuOhqH\nzirBSpBJoAczMy5C4OaVtyyAleopSzK2GHFH3mO9SQOlADGUyAAjJzTtokkyTjHekZiYQT8o\nJwPWn7TGuFIOaQhPl+9wVHYUgG5txO3n8KVW3chcY4NMYKy7lJZM9KYyNVO5lY981NlmjwnI\nz1pFI3EgZf8ApT/Lby/l+UDk0AN2q8ikfKc8+9Db1Vh94luc+lDkrHjr3BpJMtGGBJpdQEhn\nPzbxtXOBRuG4scgY4PakZRnHP0NP2llCBgV7+lMBuw4VSdvfjv709SpdjnIxgZpW2rhgctja\nKiYDIHJ+lLUBY27Kec4p2xSxCnae4pTu4YDYv96k8tlmOWVs8bqBiq2wbAfl60jny9rjkUAl\neMc4pqqY4lJIIB+7TEK2SWJBKHrjtSj5W8tTjuM05z8okXjPUVHtdsv971PtQBJHhuPfrQqi\nZWIXcnSkWPcp29O/rTWLKMKPlHPFAxUjJjOBhh+dOaQiPt9DTF3bshu3Ioij3qTnNADNrSMj\nbsKOKfuXzCwbbt5xS7T5JAI60+MffUpuYcZoBjFcLh4/vetNfKsOy5yfel+VVK5yp7/0prKV\nYAnPHAoAdtDyblcBveiLCRkkHIP4UNgrtwGK89OtLnOdoKx43YNIQinDSbskDngU5ZEEeFwx\nPOBQcruOcg9Md6RWUSCMDbLjOAKYDeD0PfmnrjaUIBOadJncRjgdcUg2LynLHrQMaF+cKDxn\nr3pLhtrEMuPp0pYSNrevvTVUSSZznbzQIkjbyZF2j7w5wKViW6r0PSlj3NnDBcfMKjkBVvmP\nXkGgAdkOWbO7rinbW78EjIxSFg0i8buKdsEcu77340ANiTfndk46nFCxosZK5C7ssafG21WI\nJxmmSfKu35irHNACoDyVOE5wDTY8qxCsVkbgUu7zSwHBPFG126D7oxmgBhPzCPO1x1+tA24L\nOWV89hT90mAy4DdN2KXb5YGQDzyKBiRqu3cxO7PFOZcMGY4P1qINnORtUHipDt4YDnPegkRo\n1ZQwPU4zUm7bnavzdPrTH2ZXsCeaJMs3yn5hx+FMLDFbaQQhDe9O5ZecA9aGZ1ZRjdQillKZ\n56k0MVx+dhX5t26nM/7v5m6GjeIYwwwSOMY601pAxOV2d81FikIkg8tyRnsNppyjhQwB45Pp\nSDdJIuMFeu7pSyRjzGJGF69e9MBdxbAVPYP2qTI2sM/Ljn3NR27cmNQ2W53HoKUMkmU24PXc\nDxTAB2UDgjjilZGZlJAPHNNy0bKQevangM7Luz6UCsRhgd+MIBz9aNxIQ7dpPb2p0kOWYAZV\neaULuX943zY4oAUsscmFGeKbHIfm3/xcYpdx2gDnFBBk+deO1AwxluB0HNSYCgMB82KTax5j\nO3H3vSkMxYkg/KeAcUAEcyKpJjJPrSK3lq2BjJ6U4M/k7duSOc0yTL7HypH8Q9KAHNub5AuX\nbq1LJGpJULvwdzZpi4G51fkdKczNMu6NsevqaQCxzFpBj5c9KllmH3VUHsarxndGyYw56E07\nA8k8jeCAf8aBakn+sKszjctC5RPmO9mbimpEWXOcnt70pVjIshXcE/hHXNMB3mJuGEYnvT4b\nny2xjJzwKi+boAwzRt6Z5b8qBkjRHcVx+8+8RT2Yx43j73Hr9BTrdmMn7zO4DimEmLc685ON\n1BJLExhZ2J2nGCvpTF+4cNjvmjYzKrD5h0NTCFY42Xy8kjIalqPUaqiNCeRg9QKmJLR7ifl9\nKjWSSSNRJ170rNtlyVwgHSgY6NlYYIwD6VJtOPvg5/OonYyMNq4qZH+7tUZ70yRduFAblupp\nkchZmxwKlGWDjGTUYVuqcCgpD8bvl6n0pY93QKVGcGpI2IkG0Y45NSMrsjgHb3JoEmLtWMZj\n6e9HG7JGD3Wm9doJyCKXaVYIBnPQ0FCtEIiTjI6+9ObBYMQQMU1dkYIckEnlvanyMrYVDvHX\nn0qREkb740CjvzTgqqxJGR2AqCFAi70Qkd6nhYqrZTJ6rQImT96oAJVRTnTy5Dht3PTNRQgs\nrbgc+lWY8KDhfmNJiGmGPDsFIduc0SWbNbliec9D6U9YztKA5LVYihd1YPwemaBmXJAgXIjw\nMdaxriyR5SwG0f3a6s2qyYDcqvVR0qvcWKR5Xb8vUNimFjjXUnO9e+PwqLyUxIoDELz04roL\n7R5IpEkwTk8NTodLkexuTlRJ3z/FTFY414luJVZzhc5AqZY1Hyrkc+lX4tPRGGOWP3lPaiTd\nbkKF2qePxpjRmSRRMvUgqfzpkkm6QHGOOmKtTHbNtAySf1qFlLBsnc2eTSGQqojG4YEec8U7\nzD8+7kN/D6U3AZsRHAxjd2Jo3MzdQCoxt96LgRDLKR+ANDFR/D9SDS/OhKtgHG6m/JuJwSfS\nmABZI1+UAjsajZ3kkAI5HUU/5zt5xzSyRF2OWKjpnHWgBse2Nm3r26VGshkHoO1Squ7eC4Ax\njimrGwwCSE/vAUAKf9WdvzHvik4mwH4PrRj+JdwPQ0jIWkMY+XvuNAxWURYQNnPOaURrtwJA\n7Z+6etLHGvncn/69NZQzFUPzHndj9KQhCrOxDn5M0LGrM+4YXoGz0pWUMNvPHORRuVl3ng56\nUhakciny1UnzCp60qAtMcnbgdaeJFX5m7+lQ8PkBsljwKsonmmE0wKR4ULjiolC/Z1LqdzHj\nbSxO0LMSNhHemBzdSBEJ3dOKQhWUMwJBDL2p9vZi8ukiSTc0h7/w0wwmCRt78jq39Kjberbz\n8hI420ATyWbxXUqJIBtbH/1/pQ29Y8g7lB+8B1pqqTGGHPr6mm7zI6qx2nPy+lBJLITLksNh\n6kn0pqOpRg44Y4DY6e9Onka4wxAZk4460xnwy+Yvy+1AxPJbzNqnIxjefSpV8uGTp5jYwMUR\nxjexz8uP4jRHGx5jwrDkbqBj0ZOuCHzgjFPZMbgYwynnPpVdmkYktxk8MfWniQuhUPjHUUCL\nKbNoKAIB/OnKrLHtzhmOee9VZMJblUJIxnNSx3LxqiSfPxwe9AupLcyO0iIy4YLk0Qosg3eZ\nsIqKTeFMm5pCBxUJuHkiXoq55HrQUWpY1aM/vTv64FQRSHy8gblHrRCwcMeV55JqRo0jZTgl\nDzhaAHq6pHkDLPwKLgrcKjHOV6BePzpyJuyyj5VP5Uz7O27g8k0xiOy70DKQQOM0quu19yE+\nvrTpptsygrngfMfWhZjlhsBZu5FIBsLK0ILZBzgDFPuIzblNzeZtx07Z6VMrRnAK9sfjRGgA\n2yg47EDvS6jRD5eZNpPzEcU6NOowNw4Ld6GVmmBTll/DmnKplJEnyv3A6Ux2uEcbK2F65zmj\ndknCY3HlqccnaC2E7EdamhUykbmC4PAoAbGqecuz94Au4hulE6NJMCkW1chiex9qf9liW4aZ\nGKlDtHcGr7W9pfQL++KT54HYGgRnNIrSFWXhuM+lLJDtI2D7v60gUpv8wfdO3jv70KxXDODk\ndKAHoiSQLv4lB4NOVmYNuTcwPX1pouPNlIAxwO1WY1YqQGVfX3qRDOFO58MzDO00xI284Orb\no+uKcsimKRHXcexpExHht3zjjbTAXYJFdvKwc4qZSflBY7ccL702O6RQwHOac0q/MBz6ikMe\nyrI5QyeW2PXiomaOCMMuc5xt/rTIMTTK/LL0FTNs2sWbPOCuMUDLEJeeMvK2CvRc9fpSTLLu\njJGSTx2/A0yV41aP5w8foOMVdiQ3OnzziQMiEhUPX86oCKSdUkCspBbpt9Kr+dHHMzk7Vzkg\n1A10v2dTnPPA7ioTJujznAz39qBGrYK15cRxWoWWWQ/Kh4pbmC4junguGTMRwdvSsuHUJdNu\nI54W2yA/KfSq+oahdTXUrtL+9c5fb0NQxm7cTWqw5CedMG69B9ayZrgiRiyjn09KoyXTsAoZ\nlDdVFOW4RYTHnDevtTsIt7/tC4jQZXkM1WrTX57W3aOIhWU9z1xWU1wzRsI2CLjGaoMJWZQR\ngHkt/Wnygdhr2tWutWtv5cu5lUbyOnuK597hrNsxH92DkdjVO1jVVkBO0ZyQP4hTJGG45BWI\nNg80rAaGpXcxKOhMBPJ29fzpYdYkTEm1nyOo9aoTXktwoR+YhwrU1GltVAJwh9KdgsaVr4u1\nMr9k3hYpG6NxVnUdYaFIIim9gu59vH4VjBV2k7vm+8GNRtNJKx4J77m5oEXbrWZZLdgke3B3\nAZpkOofKVwzydRtNVvlXPHLnb9KIQYm3jiLO0kd6AZelvfPjWQpseqzFmXez4yeMVK6gZiKZ\nUjcKrpCzZAP7tG3Mh7UxFiS5LQbSxJXnilWP7q78tIM/SpPLWQElf32cD3FDIzKMpj+HI7UD\nI0OZtoVgw46cYqxDHt3eWSVx/FRFzIGGSfumr/2Tcyp8wzyMDNS9BleGFV4dcgj1q0sJkZVD\nc9AAOladvobTLu2MXTnb3HvXY+CfhPqvizU7e30+1kuZJXwyxpkj3rOUh8pymn6PKzAiNyVP\n3sd/T8a9u+EP7O/iP4kahD9g01/IZvLlncfLGD3+lfUnwX/Yhh0zyNR8ZMSYypSxQgBxjPzE\nep7V9YaTpNjodmlrp1nHZQoNoWJQvHpxXPKVzRR0PJvg7+yj4U+F+Lu5gi1TVDjdIwyiYA6D\nHqK9qWNY8rAqxr1AUYx9KazHaaj3n+H8aC0iQSD6Yp/mnbxUOfmyRQG79qQyUv8AKCaPMB4x\nk0xmDKKVSF60hjt+F245oPzdeKPuqWIyfemjJ+Y9KQEgbHFK2FPIpm7oe1Lu7nkUAPOOlHCr\nk8UmV78GhgGPJ4piBeV9ql25IPTio1xu9qfuDYIpDQuc445p+4bhzzios7SMetPXoT0+tIY9\nfmOc8U5W3fWo1OFIxSg56ikMfu2jBpS3TAqMKPrT8rxSAXdlhk8VIx2tgj6GoFAaQnP4VKrf\nNg0wFXI6Hinj3OTTFZQdp6e1LHyxJoESBs/WjbtbJPNM3dcUvSMMTyTgUDJF6cnmgZAqND82\nT9KkPyru7UgEBo9QKVcc+tIG6CmAigr1qRSW6jFMLdsU9QcAtQA5W25GODSgjbjGabk9acvC\n+9LqJi7t3ag5DDPSmgndz3pzdeaYxGAH3e9KvvQSNvFM3Hd60APyOOM07+E4pgJDY7U5mAWg\nBE55PJp3XORikUfKKczYBNADd3vShvm5NM3LxnrS+WW6HigZISG7Yp27jiog3c5xTt+MHFMQ\n77nNORgRTWzTgo2igQMrbuBxS7Txjn1o+cd6bzkD3oHuS7sZBpO3qKXbjORmhf09KCxBzjk4\nqRcnB6VHuO8gLx609clc5oEP59aKZ5o9KKQFf6UKrPxnimtIo7/lSsp4IbFQMGXd3wKAVVT3\nNKo6ikbKnpSEIVLR0m0IMnrT+WYbj+VIzBkx1qkMccGP0zTexFCqWUZpHY5AxUgJztIzSPH8\nuQeBS5H1o2nb7e9ADDzjnA9qczBh1pWYbcAc0wKCAe+aY2NtlEbFsZJqVvm4oPCsQOnpTf4l\nBPbNArD87Yxg5OelKq9+lHHpzRuAUgnmmAH1BzSZ+al3fKABgmmZK9aYhd3zYBo2hTRtDc9D\nSmP5RzSAG+9gUnlnJ5p23ac7qjZvLQuTgUAOZv3fFIAeCDz71Gqszg/w4zUwG5d3Q0AG0cYP\n1oUhsjtTWYKuDTRnJI6UAPRdrE9aDhlzTd23GDmndVPHegOgjEycY6ClyFA4GaCecZwMdqRR\n8pPegQu1cdaFU7sngUm7C8jNHmFuTxQO4rNuXOMmkVvXpQh5NJ97cM4oAViGXKnimN932pR8\nq46Uh+7xzQAu7K8dTSKG24Jp38PHWlX5ck0CG78A4zUe7GM5NPZsjHao1kGcDmgQ/d6DrSFi\n3AXJp2DtzQKBjIWYgllxSs+3AxxSls9DQOnPWkIF5bPY0x22kgnPpStz06VHJGxYGmBISABg\nfWnbf4h0pqL0zSMdqnByM0nuA/G7HNK3tULSeXgjvRlmXOOKYDm+ZeTupwx7Zpm3CjBxS9Gy\nRn3oAfH3NP8ALDA5OKi3buhqZWAXB5OKZLPnn9o21EOvQSlSqTR/ez3r5n16MtMw4G2vrj9o\n7STNZ2EobCfMOnfNfJmtwFriTqOcGquBxt5iRW3cFTmse98zacDgjNdBqUccasxBBFYGp3G2\nMZHGOtUiJbGFK7cgLgfSqlwyqo7seuKvTM+1cnqao3mn7Vdgf9rrWiMimvzRttKqRz71UfdI\noHU9zV1VV1Dk4WqM428A4yc5q+pI0AurLjcB+lUbiN1HP0q2szLJtQjb61DcKR8zt36Uxmf8\n7MAThRTJIQzDJwD/AHfSrEyLt3n7ueaYm3zZFX5l7EVRJUW3DFwpYhe5pivGV5HbtUm7bG+0\n8Hg1DcnbbhSuAPQUhogmUSLjzPmB4Wopv3n7tflZu9WvJQKAFOMZ3VHt/dlNgx2J60wKUcLw\n/JJyefmFOVQvcn19Kmkty3KZBxSLH5nyM+3jNMZCyfe3k468U2NXZQxPyU9MtvXIwO9ByuEz\n8rdfagBm/aG+UMCMVFJIIwFU/MO1JJtjbKtyD0Pf2pFh8tHLfNI3IPp7UCGKTxvGwjpTEYKx\nyvfrTvMZvlJLHoajljKZOcjPSqQh09wqcohL54PpUCyARsoGGzksfWnYfkgdeOaOVjABzIO3\nrTsMXylRMt+8z3FMjYFh8uOwyae2772ML04pseGVxIcMvIx6UhCOoWT5QS3fPSmH5VbPHPFS\nybRCGD5LDjFRljtGDkjqDQA0SoVJUEjH3cUMpU+g9DT8FW4YNuHToajm+UnJO89c0gEE3zHH\nHtRwyfOCRnORSCTsBz/Omk7SRglcZ2jsaYgf72VP3uBmiZdpVQPz60N95dq+9NU7tzyNjnrT\nGLIERmJJ44yKgZiuOQw6ipDIWYrgEZzzSLIrRg45PT2oAbN0GOCeTTFYdj+lSnc+z+HIprsG\nbOcgccCkAi537QPxNJ5h2ydhjGaWRugzSeXjgYAPcmgCNdwiUFcgcAin7dvIbefQ0LmJSrZ+\nXkN60m0ZLk5ye1MfkG0MoJbaPTuaTbtkywwO1Ksa5BzyfWk27srycd6AI2YpJuPTpRymQeR1\nGKOHYl+OKE6Z6oeKBBH8znAwcfepG3KntmnJny8dwcUjc8A5ApiFye57fnTHcooLJls8Y7U4\nFt4GPlxzSSRhs7DjHPNIBnzSZyVA9aVVaPZzk0vlrGwyQysM8U1crnLZB/MUxiu5VjuXgmkf\nDbdxYHOcDvT+JG29cDIzTld9pcEFgPSmIhIR5GIyAfX+VIylcNjHpTsBlw2S3U0Y24yS46Ck\nwEMjNIQvzbhQcrHkD5umKQgRtnOPTFP3KsZIPzdaQxmNwXDfN6U7I8wjbuP86azn5Sq4DUfN\nGrBQAf4STTAaY924PwScinNgEEDJxyKTeGiG/JcckkcURsGGcYDcBqBjTtD459aRsxxkDvyC\nanRPLkJLAgDBxURBjYhjvHWkMRQWw7nAHXjrSsv7ssvyjNNaUPxkgelPWNm6n5fenckGkDKC\nBz60oZscrk9cetNTHzBQf9000K0YJdsqeKYhGLsybTtyc470pXMpyCADn2qJh+7wrYT1NKsZ\nhX5WLK3vSGSbo25XvSNt3Dc3B7Cl+7HtKf8A16HXauAMYFMAVj5gK529OaRpdu8MfmzkU7lt\nrA4Pdaax+YuPvdhigGJHIrA/Lz3NCAR5I4zQ2A3B/eMOlKnyjYw7daABScFSOeu6liUhSV5w\nMYoXMi7NwBprBt+1CRj0pCF5KZzg9DUbMqsAzEg8AAUrZOB36U9uUQHpjJoKGD5c7mJOMY9q\nGX5QEP1BpdvX5sdxTOdwyMj1piFRNzNu+VcUyOEYwz/NnhqkYmQ4C4XoR3ojiHOW4xgH0pBY\nj3EBgDnHrTm/eoAB83eiTG0BRkr3HehR1IyRnn1oE7hgMflHA4A96Vxt+8u5WGRTiuV2Z49R\nTtm5QF49z6UxEMjMId23aTxilDBlCsvB4pd2WOfuDoaDtkbOcpQMYZEWfYQS+Kc/zDA5p/Ea\nBAckn8abs8osAc0hjVGF9eO9KpHlAFec85oxvjHOKFbcSDQNgznO4DcBxxSf6tgQm4deDQFM\nanuCe1OVvL4H45pjsLueRsjaCKb8zKSwG7NG0DOeM9KOT904+tBI1kXBXBbv703YOCB8vTHv\nTvOCsEXr13UrZZicEg0AJtO7e3HamD5ZAVJJ/SnbgyBSCMGlDRsWOO/WgQ0t8rHcDnqKVGPy\nJnLHjiiPy3z6duKcyjBCngjGaQxHzuYcELxuqFZNw2kk89amjBUbMc4701bdFdix5xzQAbUE\nO1sg56npTgx7gKB0xTNy7Sp4wcA0ikoxz80QH3RTEOkxuCkdRxmo+UYgEYIxTvlkQkAq394n\n9Kbs3Nnp9aQD1Q4wTkgUvy+WC3zNTSSqhiMN0P0p0LJlue3emAm6PfnDHjj601+gOMsBzSsV\naJSMg5wR/WlWRVJkzwOP/r0ARxx/Lu3Ef7JqzGgHz8AdGzULFtwOdyk/w9qkjbfuVvu54oGR\nn922Ac5PpxSswDYchhUkw3oRgEdqiKFQAfl79OtABI+2NgwJGelI2flKkY4+WnrtRvm+cmo8\nLtPUNnigQYE0j5+6PzpvzKuFO/mpGUou4fMe+KTcYl3A5HbFIQKTjkcA8UrMV5bjvTFVtv3u\nrdKdI21GY9V6UAI029gEGB396cFYqAMbPamMu1kG3l+S39KdJGGBCNswelMYu5VUhRj+dMdm\n3Bgc46ilPbccEDORTflX5i/B56UCHsx8knb1PFN443Aj+VKV+fI4QjilCsyn95vZf4aBjWwG\nyB7DNOVl+6MZ6nFNZSzpn7gHel2BJ9+3AIxSAVpE+YCPftPNR/MEXCkDduI9BTwp2tgcFvzp\nu1pAzZKoTjFMBSrJu7o/bPShVBX1NKrFIyqnb2JYZo+8ozwgGMCgYjSHzV3feUYpWjC4PUMa\nUt8oUD8e9M2hSODigQqtiTheR+VK2fLYZwe9K0e6UEnGB2pCz5D4/dkY5oAGYMFx/CKRsiM7\nBkEURtu3DHOPwpHXcoA+UYOaBjEOWwW2npU0fyrtUcE03KhI1HzEdaWTKqxPQnqtACSIzSL5\na8N19qGLhiA21c8t3pVHbJz1zmm7maMjHOaAY4MrKyBePehGPBYYb19aUsyptP3hzmmRt3Y5\n3dKWoh+G83O4KBzx/KmiQrvOPvU6TAOV6/xfWmfMqljhi3G0dRTGPkYqF2lcnk0m4RSEgliR\n6Uf6uPacA+9IFO75SSe9AAc5YseG9KSSRvkK/Ljp70+MpuIYfKRSIgMZwwKA8GgQmHJO47dw\n7fypPLZQMMoIHJoXEmSSdxNIy/NtAyc5Oe9MZJklhxyRxSvjaQ556ACk+9weAfejbtXrjBpA\nN/eRkA+nan7vLbd0Xv7UZK4O0kZzStIEkLlSDjp/SgjUFXdEzJwCelG59o3cY9aUMvlrtGC2\nSV9KTKeX/eJON3/1qQDUVxkg5DdMDjNKF2vjzCvGSD7UpzvwDs9hQV3ORzn+8aYxMNtCowye\ncUvyopOwsw9+lIzs8y4XjoTSiRgW+TdwcAUDE3jdkruBpF2R8PktnikaZo0QBcHuvpS7mjc7\niGJP1oAccLnPPGackfGfUZpy7G427m9abkbTsOOcc0AIpyOAev3qkY7VyhG/sKiXuFbGO1LI\nUG1lb5/XNArDwGWTIwT1psn71WJXnsfQ0LkfM4Oe2KdtHzdsjvQMiT7o56Dn608R9C3THU1J\nGgUYQZBHXtQy/Mkb85POOlAiONWEm0kgU5GO0gjauamW3LSSAHB96hePa2WJzQAqZHJG7ng0\nkmJvlZjjOeDU8kaEbR3Gar+Wm3DBh6Gl1AXhgdoKD3pzAqV3Dk0m3cuWJbtjFBZWUKSQc4DU\ndR2Gqj/Mw5APSnLuVSfvDuBSrsjYjzG3D0GcmlUFotwO3B5HemAsGGUYOFz09adOG8sbhsXP\nAFOfO1FHyp1LCldh5ZP36QEany2AB5NDQfxKQy+lMbONxwv8zTogy/dG5G7HtTAGx8vyYTNL\nJMAuVj2rmkVX+ZCdoHNJG3y4PzDrigBXVo1UnBJNNk2yYKjBJ2mnMp288sentSwpi3Ic5xzw\nO9ACxrwMtypxx2p6rIr+hqNZBuA2/e5JFSvukUHGG6D0pBqIrO7kNlR70scScsTgf7XWkaM4\nB+8wGM0chQXHzdqEIcyiSQyR5UNxkmnxKVyM/L/d9aSNQ0e8NnnnPc1JDGzsMDPPNMB9u+I9\nxGMHG2nea27KHC+lJIrLuDcLmozINhKnaBQUSqodSSd4zyaXyckNk5/umo1kVo8oGA61K+9t\nuemMnFSIaI2KsS23ngU/fhgAMepFFsvynd80fbPrSMwZiqrgHrimA7zuvz7QP1p6jzIQM8Z6\nUJEjRjegz0DUm4qwVeKQErMY2VAuGH8FTQj94wbhSOTnpUHlmOTKyblbgD0NCttXLt8pPSmI\nm2s0WUIwpx9fepFXoQ2fX2qGMMrEk4T+EVJBt+YYOSetArirGNrZO4HsaQMqKOO9T7gseG/D\n1BqPy1kUY5OecUg1JVYyccjPA96sbSyIu4DaeWqvl2bKtsxxUm3eY8DjPJoAnwXl+ZlVOx9a\nWCFtzfN82eKYyheWXcM8H0qWNWLDapwec0gJoYnSYL29atLv2hWGe/vTcg7QcU9oWkO5ScqO\nnelcCaNVRsZG49cVK6pNCVx84PFRx26LIgJyTyTV2GNTNuQfJnmkUhVtFubAZQErwCRWBfaX\nIq55wvOFr0nw/pomZyVBjxmsLxBam2DFvlySQB9afMM80nt1t2mO3cuM5A6Gsm7jHzKzZzyT\nXSXG5Z5Y1QnzOAMd6pf2bJczRo8RDn+BRuPXoPU+1HMI5yRY4o9shyvY1BdRlotyHIr6G8E/\nsh/ED4gLBdfYLfRdKkAZbjUCASv+71BP8iPUV63pP/BOP7Rbt/afjCO3lPPl28GV/WncD4Tk\nhkk52cDutPSERyBhhnI+6TX3bdf8E2YUjBsPGSm57ebb/L9OK818Z/sC+P8AR9x037Frtvkl\nfsreXJ+Kt9KnmQz5VuQtw3D8r3/pUDbmU7HH3uTXfeMPg/4r8C3EsOq6BqFpswJHeEsgyM/e\nXNcSdPaPCsVVs8jPP5U1ICMKfL3E9+velQlpCScjp+NMWXZdFHO3jO0ipWwcMBxnkVVxEEi/\nOx242jqPWrKzgAnrIBgYqIsGZioyvSh4XiY8ZGe1UtQHsxLYQ4YjJWldtrAP8xPNN2lsN/Fn\n7w9Kk8kMrAvgsMq1IBjESKxU4I6cUkbeSu0nG7j3zT1B2ou7O3qcdaay/N93cT09qYxrqVXZ\njbz1pVEauw25wOfrTZMySI7ElcYIxUm4YYADOO1AiBVOMYBUnPNHlozBsDPtU+44T92Nrfxe\nlRzMWXaB8oPUd6AE+Vl5QsFOStWdM1SGztbmKK1L3DnCS/3arvG7BSvAyPl71IWCtK3R8dRQ\nMryyFQDIvz92x1NRlcSDBLt16Vce4khiRdgMefxqEriYtnDHr9KBEW/GSvU0TLLb5Zlyp6cd\n6mh8t0ZclCTgEilkVzC53kjOCDQIrx7trLwHz94075vkMnzjoAKTacfMAMc5p0knyiNeeMgm\ngOo57gsqqqgfNilmzeTMygRrEuSvvUasI2RgOOppGYeeSSTnt6igoWXdJiR2JHYU8gcHGTjm\npUZJm2khfRaiDGPdu52nP1oENuJD8oXgfyqTezTGQMMBcfU0xwZtrKmzPcmmndHIWKjgZIH8\n6AsTxtKsWXT5c8DvTvMR+HTbzndTPNMro7IxTHHvU9w0MluISDu3ZwR0/GgBkcgk81Wxjrtp\nkasOWf3C1JJNHc2vlBdkiH5WHGfxqFmaSTd90quNvagCeEiGNgSeTnaKuRW6KsZupCCTxGv9\napurRPEzL8+Mgilh1QrdbpU3J0oAtXF8l0DaxoIxHxu65/Gq7MiYDHBUdfepJLVpkdootsWd\n29ajkhNj5UjjzBIeMdqALNpGzLIH54+X0pk027C5Hy4PFJ5webex2Mwxt6CmyMiR5f5j935a\nQyaNGa8GxwFxnmgxbGG/O5jioI2Td825Hx+VK103zDdkYxnPNMCeZRDwnJ7ntRbxNGpUD/a/\nCq8RU4LN07Z61OxDYEL7s9d3akNFmDy4W653nIX1pkjIkbMg8sMeBUatuwrHayc7QOTUl3b/\nALyNJAyLszj/ABoGMj8yCJ5GUuA2N1PMvzDj5MZx6VAWubeFXkV/s5PDY4NMkmZmOzj1FBNz\nQjkXzBLGcjGOnFNmLM67eXY4A9apQ3EscfloML15qWG4ZY3J/wBZjA9qAJ1LQzZlXCe1OvBD\nOxdUYFuRVSG4KvhjuUjoT0NJ5km7YsoBHzf/AFqYEgU+X8/XOMCrbSKLV1KjcTWdJdZXOPm7\n0lndCaFoN22TJO6gbLUV06wfIPLAbAFF1erJMqOcj+LFVpLgQogYbjnAYVXWb94pJCu56kUg\nNaO4WFsbfM3/ACg+lVbmVrVmiVmZW5Ze1UvOnEg2jYwOcN3pskr7pN7ctzn39KYjRjuvMjUb\nPm6H3FW7y4sfKSK33NIvLsRj8Kw47zb8jAlgcA1agkVlmLnEvbFACNMY9jE5Vj071Fv8vepb\nCg/d+tQLL5Owyc7eNvrQzBmLOgOe+aAHySLwPmXtntTWY8qThcYIpJGLQqr84PAWkuJs4Urt\nHTPrSEEcO7DscHGNoPUU3zi0wB+UY24pZAGKLnAxyVpuMv0Ckd80xk8Ny8McqtCrKDgN3xUD\nSrNGGX+9zTlPzvsk56mkZS0ZKYIPWkBIwX5Sr7MGnMzQMof51YbhikWP5FfZkKPvUIq+WCwZ\nyvIPoKkBUYMSXwFxhT7mkEbzSFR97HzZ6U9Yg0h5OAcqMVKqKVAVT15FMCGOPdIfmG76dKd5\nJ8vEjbWDZ2r6VYmt5I/lZQsfU+p9qfbw7Y97pwfusT29KoRHKN0gXnONwbuR6U/7AY/32wnz\nOx6/SpbOzZpAXUN3DGr1wrzTId22NRlc9jSKSuVvJkZuVDcfd9KswwrNgspVAMH61btdPNy3\nyMWdu6jvXRab4RvJGjWJGldm+WNBlj7Y+tQ5BY5y20tQzAx5kUZLdjXXaD4Ru9S8hYIJJXlb\nCKoJY/h+dfR/wf8A2L/EPjDyLy+T+ytPZl3yXS7G5wSQDySP1r7O+GP7PvhD4VW8TWVml5qa\nj5ru4UE5I5wOw9qwlI0itD5S+DP7EOs+Ixb3/iCJtL00lZNsgzLIvHbtwe/5V9p+Bfhn4d+G\numJZaLpscAU5MzANIT6luv8ASuljdlVlX5VbqB/T0pDIWbnGPpUFpEguGkJ8w7h1weaDIV5x\nwai3LvJI4xTt26MZpMqw4SMV6U6NyEwOtRxseQTxTpj5fIORQBN0XOc0i/MOBUayLtxUkOFX\nGTQwHbip5FKgzyeM0zccnBzigNzkn8KQEkhOBzxSrjGM1EM8jOQaduKx8L81AyRTgYI5o4Gc\nGmLITg8A4704KGX0NIfQcGDHb1pVYEcimqq9jzSL8vB5zSJHqw59KkT7tR9eOKd16dqRQ4/d\n68in/ejzzUTLuUdjT1J4UdKBkm7pinFgAT3qLnnnpS7s4780wH7gpBpyNlj2FNVl24PNAXn2\nNMB64VqXBL5pvNAYo39KkkefmY9KbuYt6U1WG49qUNlemTQND93Oaf8Ae61Gsh7rS5yAV60D\nJB8rHPFKue/So+WAz+NO75zxQIf935s0qZ4Jpn3uO1O3flQMduBpeW5PSm7Qq7j+VOUECgQ7\ndub0oPvwaN4x70o+Y0DFZvl6U7PzZ6imc0rSBRgUgFb8qPutnFGd2M0fdySaNQAZOaGHy470\nreoGBTQ+eMUwFG5VximSwtNgBiuOTipFba2TyKduPODg0ARxxhVPWpdx2e9HKrgnNA9aBoZy\n3fpTueM0iRlQTnPNSbTt3A9KBAVHr0pVyVPrTdwPI4BpydMdaYx3LKecGkUlgOx96RgOmeaV\n0zjmiwx/OSc8UZ2jgUYO4D+Gjgj29qLAKckdcUjH5cd6AwPHbtTfvNwDRYBdtFG6iiwEHkDz\nCwOQfWpOD1pFXA5pq/Nz0xWYxS3Xjml+8uTTlj3dDQ23adp4qRETbeozRHGOopdx2+1OVhjm\nmhib/lx0obKqCDk+9L8pxkUnOeBmiwiHzPMkJXgU9d3OelHk+gpeeOaQBjaSTzmlDe3PakbJ\nHSlwciqQxm1ypwaVU+TBGMVIqjBzQyhR9aYhp+8BnHFBA64zS7SxGR2pi5XjrQArnaoJPems\ncDmlkYHHAzSfewMUAK3zYOOKdnoOtMKvwD8opygtk9KAHH5mBxj1qObDKcjj0peW70142bAF\nACq25MgYoWQYpApVto5pFj6g0wHsob3p27aMKKOAvAo3bskDFDF1AjoehprEqoHXJpcepyaT\nb70hjmYbcd6Zz2/GnZDHpQM84oAYHJOMcUjSDZ0yfQU7d0zxRtG7I4agRErHdkVL796TaFHS\nj7y45FDGG75eOaQleoFL3pQAKABWOcgimMG55yKXlT04peSCBxQAm0eXg1X2ssxwMCrS52ju\najZR5npTQDVc7sU9m24705AAOmaFAyTSF1I3jP0pWbawA5p4AkXHT3prYUbQM+9AmH3VzQCW\nHTFI2du40uN2CDxSAbtKt9aVgoX0p235uTxTdx5BGRTJI9uRzwKeD8uP4aPl24IpFbqMcUDE\nLD7vekZsLkmgqF5xzUTHzDg8UDH7xtFSRMQpx17UzaOwGcU5MEAjrQI8/wDjnC154Rj+UsY5\nMsw4IHrXyF4ggbzpSpCgcH3PrX2n8UrdrnwhdtjcqkNt+lfHXiZUDSSZ45XHrQB5rq0bzZ5y\nRWDfwny9rfrW/fErk5xnoKxr7LKOhwc1qjNnPXEe1iCcDtVW7Lx7EPRvStOfBkYEcdc1QmxI\nw55U5rRbmTM52EC4deD2FVJpEwSRk9RxVu+ZXbn73b0rOuDIqDjd9K0BFSRf3qkHafvVUuJJ\nJJNsjYHUcVfmhO7ceDjOKr3BEXvnnOM0ySKMIMlTuUjBz61ULp8/DK3TirjN8qgjCdfrVJo2\nZmJbv92qGMjjGwAcD6U2R/vAfMvTkU9mK9Bnt7Uit5mF4JH8K0rgRLIHUIMD3pjYWTcOT3NO\n2/u2ATDdaruwbIYYOKBCNcCPggnLUjv5HzIN+Typpsm5doK/KRgH3prqsyKQwJHBHvTAeypG\nOFyDzVOSQNuG45HIOOvtUzW5UlmYkoevtUbMzRo+Ms3pQFyA5bCscjr0qTzTypXeAM7vSl3B\nuiZPt1+tJtJfCtx12+tAiCNhI3ynaaGV2k27eep/xqRdoBwpCnjHvTWXbFjfz+tAiu8BEbBX\n/eZzg+lRmJuHTlumanVQyjcdx7U0lj8uGBHWn1GIz/u+Wxzj8aaVK53AbjSpGiqf3mW77qQ4\nZto5HXNMCTzEUeZgfKuAvbNQeYyknbxtz+NDbN2ACKcuI4wGbcT04oEReXlAwbLkZxQzPsOR\nu/2qXLK3BwBTY5l2Mm0jnJNAxqozhSeWFE0gV9yKQe9KMvIccADg9qdGxCkMeSOaYWI5mbgg\nYBpsrBQOQc09sY6Z7dahUY+UjPNACbsyCThmXqB6UsnykcfKeR6U4FI5HKggkYORxTTGVXaD\nuo1AJFXYjA4K1EGw3IO3rgipOCQjjkc7RSlhu4yOO9MCOT7uCMHsBTGV1YHqQMipJEGVxnPT\ndTWAyST93ikAm3dnLe+c9KWNNsbNndz+tEagqSF69DTVTcrDuBz9aGCBchhkZbvT/OPmZIwO\n+KY67wHZtuF7UmFyoPzAjNIRJuC54GDTHAVhuywHRQOKWTGRgYHemfLuzu+amMdgyMcJtwM+\n1V1ZmVio781ZjY+Z8pyO+aNqKrkfdo1BjNx+UBsk8UzyxHI2/JzxgGn7Q3JXBHSo1XcThyzZ\n5oEM24YgrgKeCakbO4Ar15oYt86tzznHrSq21Q332J6f0qhjGCndxhhwKUZVl2DHYimzH5/Q\n9T7UnGRtLNnnI7UgJGG5SScMpxTTtXnfkYzxRJt4KvuPQ7aNqL8gGe5agLdRm5mjIx75PpQz\nfd+XIPepNrNG+1sN2qIK0bLuO4DvQASY+ZSSFA7dqVWVo8A9P1pZd8JBA+9zTdxHLDP0oEOW\nMthRwOpJpqBYwQ3K54xTvO3OAAU/2jTtgXPzBx3b0oGMVmikIJGGpob5mC/P2zTs/wCxvHam\n+VtBA+RjzxS1AHUrjao468c0gdiOTgfypfLVNoyWY9Wo844bjdg8ECqAXcxkwFOcfe9aR/k2\nggjmneZtIYnJPekZZGkG4ZPXmkAjN5a5K5OeaZIGBxhQG4HNPYM2WdcIPSk3YlGBuVhwSKYW\nEVfL2ktkdPpSqu6FgTls8GnKvDHGOcCkYM2cDnoTQAyQmNlY9SMULhZMlvlxyDTmPygEgt2p\nGb94MdMckikAijcVYEBQaX5QzM3XtTZHMh2bcAc0ix7lGcsM/rQIdIUi27urdx2peVbcOB60\n2RV8xAy89adIN4PO3PY0gECrI2S2DTo490f3gAvc02TG+NQhx3Oaa8bLwDg7sgZqikCoHViv\nNJubapJ+VT0qR2OchQGxyBTDncMck87fSgBzNl9wGWJ+76Uxm25VV57UFgjE7sP3pDiQfIeQ\nKBXFjb9590swXnFDOOf4SeTTi5jIJB+7yV61EsZkAyME9KBj8gkBTnipHVlXDcfSmModwGOw\njtSlwx+bPynikIbvAIUDH16U5gGlCqB0oVvN3b+vpUa43fNkEd6BDtoLF89DimNhWHPXrTlO\n7OTleopir+8YHkZoAduCsV6UhY7T60rMrMBj8aOJGOeO2aAYgYleBj+tLuHc5bsKXyxsG1vm\nHY0MgVc8E9SKQCKAzEF8N/KnLMpYL2A9KhjjJyAOScj1p3K5x+dUAcR5A5/Dmmqz7gxbco/h\nFSK0hUlflOOc+lI21QoU89c0DEM29iCnHU01tqrhf4ugpPM2ngbh71J5YkUGRtqqcjFArjPM\nJk2gbQB93FPjy0TbgAvoaRFMmWzgZ49adtDtuY4wMZoKGiYyoAikMvrTN2WYFTyM0pLDIJ3H\n1pMGNMdO+aCWIWVlx09qVWADHpgdaTcNobGTnHSlOWZkbB46CgQkLGReUwp6bqf5a4A+8M8G\nmbi+xcEgdhT1JaRd2MMKQDZwckY49aRpccgZwPTiiQKxbLEgcYx3pyxvkZHCnIPamMFY8bu4\n7fypGxu2kckcUjfNJIzHafSl3bdhxu4pARqxGAhwc4I7YpyykvnPy9KeuAzEL8uOTUXlgr6e\nlMCVlVFODl+u2nJI0mPlyMZAqHA3AAEtjkmnbmYjap2AY5pAJt+Xcpyc0m6TPOdpGc0jbtp+\nbDdie9L9yFQo/E9KA1EyFwCx5pT3A4FOdjIgBwAOBxzmmwhm3HoO+aLCHcjaJPujv60Mx+7g\nlGOcGlCibp34FDb2zg4K8UykRmYKgBBVevNJjnkctzmnSL53y/eIGcUrMGQbRlcYzQIUZJdM\nHGOCaYCfKZCmeOtO2svAJxSLu8sgZ+Xk0CuOZl8uJVBDd6YpKM4jOHPeiOQbQx5NCqMnJwx5\nzS1FqOzvUKx2nPNPUltwIyMfKTSEbV+8N3Y0kofAKnefSmUJCroQC24/3aZuPmMOgY9Kf5mx\ngQCuetOZUaYkrjjIFBVhpVguWb5ugPrTeI0+ReM8inFgo2r07+1NmUM3B+UD86QhfNVWAOef\nbpSBRF1bIJpd7ZTPyL6GkK+W2f4qNREjfu/m2k46VC24bd7ZHUKKk+ZImUcoaT5VCc8Y6UAK\n2x3Izjjt0pRGZPlyFPXBpgZW3BTkdcUND5rZwFzyGoAVv3hGEwelJu8s7lPsFNObMi8DaR3p\nny/Nk54zQAuCykr8vqvfNOib5D3k9MUwq37sg44/KmI+2Yklj/tYpAPVsHdISCO4pxUY3phu\neDSN8uOQ59QKVdxI+UKAc4qgBlbarEbAwyR3pYVVW3OME/yprMZDlj904HNLIfU4/wBqgBZP\nlhIHOW4PemPG642v94YyKXI8vkHJPWnNHuX5ThcYoAWPkA8ZHykHv71HHHtBUcDOcU4t+5AC\n5xxupWfBVyMDGKAGhiWI3DOKUKNr/Mc9Caa205AX5vWlJRiSr7aBirGk8gZRuCj8zS4OMMOT\nSYIjG07Wz971p0ig/IzZc9GFAhQzBd0bfMvFL95QO3X8abs28A5OORSMFVOPk9/WgQu0+WTx\nnsT3oXC8ufnIxRuz5YIzzyKVgv2jPTJwBQAm07Ad3A6etDBfk2kh855PFP5ZmBAG2mMhLDjI\n6igYu5zuQ8tnOR/KnrIv3VDAnr7U3cyruwFPc0bpSCcfKe+MUtQItrSZ3HamevrSqpV0YA7e\ntJNwqEklc9PSpVYhQDwvrTGKruqsVAGe1N+ZGQ/fHcd6BIyyE44IwKBu2lSdvH3hS6kixrGy\nyPtII9KcqYjOApGOM+tIVwyxo+SRncKGULGVOS+P1pjHsPlTap39CDSbtv3hvA9/0po3llIP\nOBmosOu/cTweB3JpEli3dmkZRhUbkr6Uu7zOAMBelQLIdw3KVOOWqVgpxwQ3bFMsk8zdgswD\n9+aUtHuD+3eoGUHJIyfapI2+Ujb7UAG5mUuu09s5pqseN6bV96GjO0Jng8k03aGm+YEcY56U\nhEsLbcn7yk8UgfB2kDaeRTclmEanYvvTshZsdV/vGiwEUmFb7pB/vVJhtwKtnI/OkO1RuGWO\neM9Ka0SFtzttPajUCVm8sAKCy9waRpBt2qOPakwFYDO5j29KXaigEnHOBtpgLIh8xdvIx+VM\n5eTBJwvPFK0YOVZzjr70nyxru5xQBJ8z5CgljQOyjIZeopu0tht+0YzxT2BjkQE/LjO6kDBH\nMbEbSSe5py5VzHINu4Zz2puxjkbsnOalOXDZXhRnJ70xPQjD+UnABbtUi8ck7ietDKqyKEGd\nwycdqR4xt2rndnrQMfkxnaT8p9elI25oSOdg6UNCfLGTu281IwDfPgscUhg65YFf4e3TNSLK\nWPmKNuP4aib1IzmnwkHodoA6GmBZkG6NWLAg84qIxooJA5JpFk2RhgPlJ4prSMVLMMHP5Uho\nmZuBkAZpI1cTY3ZX19KhWY7ySu4AfSpI93304JFMGKpCs24554YdKkmieRg0YCMAOfWm/PIF\nG3jvUm0xnLnBHApEkkcYC4LdDk0/aHjbopPRqiIKKCWznr6U/jg5+XrmkAtuhAG4hiO9OhYK\nzq4yGORmmR/MS/vT1j3SByMY6570ASqRM4UEDbUsUbLLg4PPFRqI0nGB1p0inzGPII6GmIdP\n5cU5RzuYjNEOyNQC3zZ5FJHGJcb8bscE05YU3bgOV6ntUsCQyK2Tgq3binqz+VtX5j16UKfl\n34+vtUvmsq7guPQ1XmIbAw3fKpfP8NX4YmaMqOo5C1TtV8ndLyD6VZSQ7yQ2R7UgJ1WNYlTY\nTIzdD1qwrbS5B2so+6epqONRMVJ+UdmqVbM4y+en50gBoTJlw2D3HpWzpyrGqR/LISQTj0rH\nYJ8oRst/dra0uAR24Ykbw3X2pPYaO68OwBlYqdoI4rlfH0LNfRrwDtOOw613fhG0DQzzgZRB\ngj+tcp4thk1DUMxIWZjtVeO9ZFHH6L4Xv/E2rWumaVaSXOo3DbERPfqSewx37V9wfBn9mfQP\nhzb2+o6jHHq3iTGXuXGY4v8AZRTxxxycnv3qb9nv4M2/w+8Prql3H/xPr9A8hcDMSHBCj06V\n695nlrtzk+tMCxwoXA27Rgbew9B7U9HVfpVVZCy460u7C4oGWWmUtzyKf5ir0JU+xql5nbHN\nIJM9TzU9QRdmht9QiMFxBFPH3WZdw+nNeP8AxF/ZF+HHj9JHl0hNKv5GyLyywnP0PFeqCQr9\nakFwzLyT6UPQZ8IePP8Agnbr8dxO/hXWLTUrPaSkN18kv0z614L4s/Zn+Inw+ONU8N3Zh6ed\nboZU/Eiv1wSQKB83zeuakZg8JR9ro3VX5B/A1pzCPxCutKmtJGimjNvJ12SAofryKhWKXLrI\nee3Ffsh4s+CfgL4gREa54ZsbknrJHGI3/wC+gQRXhXjT/gnz4L1TzptC1S50Ys25I5cSIn6Z\npqQj85GV7by4lO4Mf8ipEjdshgAvf2r6b8bfsJ/EDwvBcXdnHba3aRtki2fEpX+9tPavCPEH\ngHX/AA2/l3+jXtmB0MsZAx69OlXcRy24xyZAO0cfWnvHuwwbbUzxllXZgsTn+dM2jYwYYyeF\nPf3p3GVXBwqDJwc0qylg0ezHcY61PIyxq2V+bsahZlG1hlW70AKrMzOj8hfSm5PmBcfd6gU5\ncRwuSPMLNk/SlZlZ1AXZkcetMCPb5245wfWnQxBmXAzu4NKoDKzKNgHUmpFYlsKPekIgkT5i\n6nLZ4Ham8tICx3E9D71LJGOFJyOtRf6tmQnLHo3pTAVsuzL5eCo9anjVBbgbszYyy+1MXO9T\ntyPr1p3yKu7OGPQUgIRt8zjr6Y60LFFHuY/MxBwf6VKkBSRjjlhjPtSRxrHIsezcR39KNQK/\nJUYwFHJprb4WwRvVudwHFWJIwZeeNpya1RaxXWgvKYtku7j6UxmG8R8xVznHO5e1DZjDDG7v\nz1qxHbBVYuSCDxjtUe0eYXHzE/LuoERKzN82OMcVJa4t8OV37sgoKSS3bzBjgr1z0NPklJAC\nHDDqMUhDpcMikPjuE9PaoFldmJXl89O1DTNwAueKVdrSpHGGZmPUDkGmIPn5ZkO3+L0pVDKV\n8sfM5xtNPEYbzgZCVAwQfWoFaSNVZF3EjAbPINBRPD+8kyxY4HH+FQKXKnA4znOeRT1hkVNw\nY5zz7GhkRZCjNknk0DCGe4aNo1ZwOenSp5biaWGKN2GFpgbyoyEchQKrRTtJMxdd3GAD0+tA\njRhxcWsz71MaLgE9z7VW3AW6oScdcUxocEnquPvA8Zpsf3t7Dd3pdQJ1kKyja/mgjB3cGk3O\nrEOoI6DBqSaNLpVwMS9d3T8KhkgXadwIGccUxhGTIdoPl88mn+YsP7oIQWONxNROpWONSh45\nxT9yRqJM7mJxg0DRYt7p7WYBmDcZDdcU59Tmu3LnJk6biM1UWVV3ADJ9DTlZ2VUA+ZvwxQI0\n7vX3m09IJlBCHgRjrWdFcFsnG1ifXNNV1to2BOZQSCtJdRyWS2++Pyt/I9OaQFmcuPlB+Vuo\nqNpZRIAONo5HtTBI7ZkmTYOgemSK6E+am/cMbVNMCf8AduuUb5gck0x2YqS2FHUH1qOIeXhV\n59R34pdylMbsknhT1FAEjlXi3Lk56mmq6sPLjC7z/FmmzMyIMRkc4Pt701YxtG8AJnIPQfnQ\nBIyyBWVmDbTkAVHJcmXYF+UevpQygqcN5a0zy+Rg4KjPPekImkuDIytI+WQYDCoo13KwKkhj\nncelNkdPMwRypwyirNuxZQ+N6YyU9KYDWUSOd2FQ9KdNIG3LF0xzUcRxIcLvQ8j2p10ybkWB\nSrt97Pp60gGNGZIyVBbbxg1GG3bVxwpyakkbYv7tztYgGnsu2bcRtPt3pANdjwY8kH0HSlkZ\n5lUcHHOKXcZN4BKv6CnGF22gkAMPSgBsFuu0s5GQfu5pFjZm6b+c4FTNCuSpGD1BFPWPy13K\ndmeDTKsV9ixyMCNu4Y+lSrb7U8v7zqMqVqVMR5LHevTFSKqOCM4bHGKQhsMLIwDZyOo9akVQ\nrsSMg9u/0oijlDAZyT932qzaxh13P8pDfMPWi4ytHIFaU4Ddx7VLDDiQYAz19qna3ij3nHDn\nCj0q1b2yM37tN394CjQChNbllyWbfnofSrkNq8JCxDzV9Hq9Z6fNcyBNuA5wAfSuq0fwjf6n\nIsFlYzSszBFVEySc4H5nFS5FI5m1snkYBSNvpjqfSt/w/wCDbjUp3WK2aSTIyH4xnHQV9RfC\n79iPxJrUyXGvINKgXBPm4Dgd+M9f8K+uvh78AfBnw4t1Njpsd5d4GZ7pQ5yO/PTOKycikfGH\nwn/Y68T+LvKup7b+zLDcd01wNowPb+X0r7G+Gf7NPg/4eRRXD2kOraopyLu4iB25HIC9OvNe\nqpLtQrgKmPurwB+FMeQnJHNZyZVuo5JjCwUYAXpgU7zP7xqHr1pNwLZPQUFE3nYIHc8Cn8+9\nQr/eB57U9pixGTUsYqse45py/Ngd+1MZjnINCvtI45pDTJwdoweuaWTCsB1qFW+bH86lyNvP\nNAhWG3BAyKkVj1AqFmBCjJFKzbVBGcUxkqyc5JprfN8wHHrQgXBLd6FBVMZ4zSAVR0x0qRsq\nxPtimr8vPalxx1zQIVo9yqe9KTtfap5xTVyw9MUn3WB/WgCRWAbpTlUgk1Ex+b8amUjaxzSA\nVVOc9qcrAfSo942inYC4HU9aRSHMwaTpRHlTknikXs3UGnKoYk0hEijqaBnikjzgcZpW9c0A\nPBG4cfWl37mHaoy3zZ/lSg59qYDnk+bk4ApA2RmmlN3B59acVGBzxSEIuGbNSqO4ORTGUbcA\n/NRgq3tQUiQkjBFLyuT2prfKcZ4NK3MY9RQMcGKqSec9qVXK53flRH97PWms25uR1oAlVtx4\npc7Vx1NQ5KtjqKkVjg8c0CJV7E045/wpm1mXkilzhsZoGKDt60BuaM/N0pq534xQBLGD3NLt\nApmX8zoKcrfMc80wA544py45yfpTecnJprD5gM0AS7ztx19qFPqKa4xyKaGLZ5oAkGeaPuik\nyfxxxQuduKAJM5XijcNuMc0g+VR604HtigBpbb7UJI33cfLQfm70hfAxSAeqjAFOjX5iB0pi\n/Ng+lSKdynHBNMAZxxxzTlVjzSbdzjb6UrSHGNtAx2TjmjI28UY6dqXdgEY6U2MFx3FIpDZU\nHBoYbmU57U3yfmzmkBLj2FFN49aKofL5lYudpwKdn93xQw+XHSmDJ+grC1g6CrII1PqaSPar\nEH60DBBOM0u4beRRuIVW3dsCl+UqTTW/1IOec0KjYHHFIYuGzntSox28fjTsnkdqbGDtOeAa\npDsLu6UEbee1I+QvAzS/ewD6ZosITdyMDFKzYIoLgkVCzfPigRKynv0pp3Y607cWXHWlb7nv\nTAjkdmUY4NLjjrzQVPFN2Fc4IoAduVscc0LlT14oj+5tHWlVgc45oAFbnJ6CkZuxoA3Dnio9\nrFuucUgJSuO9Gfn9aRsMobvQV70wF3bOaaeRweTTWIbgcinKvGQeaAFAMa8nNEmFjUUu7g5p\nrYkUD0oAWPBU+tNOVPXNO/g4qNvu9aAJGJ28DFG49jRztCnpSMB68UAMcFtp70bjuJ7Uucde\naaFIGTQAq/MvPelyd3tTVA3Zzin59s0AN3BeT0peCeaRvm6CjaW5zQA5QfXiheOaQnbH60K3\ny4AxQIM7nwKXhh64oX5s44NM2456ZpXAf0UAUMMZNIpJXGKTJ3e1AD9vyAHvURBj4J4p3mA9\n8UknbnmqGvMN3Yg4NN3bV2jpmlYtsxijjjNSITJ4xzSSZXgVJxzgYNIMcdz3pisMwevSm+Yd\n2O1KSd2D09KTj7w/KjcLAzZ5AzTdwbtg05etNLAUALjjjg+tI25cY/GndFyeQaE+VQvc0CuV\nNftRfaFe27LuRomOPcDNfEfjC3ImuGA43cCvuK6jb7LNk/JsbcfQYr42+IEC2+pXgzgeace4\noEeO6hZ7iwz8w7GuevoXyewror6OZryUpkq2a568L+bg9OnNaohmPIdzFGyAapXUaooA+7V6\n8O6QjOPeqU+yW3IT5iD1rRE2MS62SE7xuGeQOtRKkathW+XtntVq4hjRunzHvmq01sFj3Fdq\n+uasmxWm++3I29DmqLyMuSowOw9KuzRxs25RkcfnUFxHuwNuGpklOTErjdxUMq/vGUkHA+8O\n9PuMsQAnQ4JqKNicsMbV60xDJIgoGD8v90etQNmPGwbXzU8hbcDgDHI96ikwsYc/NKT0Pan1\nGNmy8ZKtTXjABIPakWP5jj5cDPPemSOduPmB68UEkLKMYaT8KZ5e2YRqPmxk0/hSGb5sjNM3\nbm3ZJY/nQMGZnygHOctmmE87YxwO9IQFLHO4jk0nPkLhNpY8GmSHmGEFox83qah3F1DuNjZ5\n296c6lUAIy3Umo13dhkH1oAdHMFY4Py+lRswZj8uR61Iw9vyqHy13AH5c9KAI38xZA6DjAzT\nHAWQ5c5bvVldyrszyTjdUVwuYlUHJVvvUDsRyJ5kI2r908n1pvLKWA2r6g1YSJV+bPJ9+KhY\nbG5YdelCERyOuUAB/wB4Ujbo+N3OchSKcGBZtx+Wom3thz9BVAOZvmIzjuabvZVKsoPp9KeM\ntHlRnaec9ah+9knIJpjJ5ISVJBwg7HrUbL8wDc8U6Ft7FMEh+MVG8Yj3ICTt6mkBGriNWTGT\n2p8bbV3cfL1z1qOUcgA4I70Mob5hk/SqAWRwxBQ/Kxzk0KxDdflHQ02Rlwp2/L3FKyZUIQRu\n5FIBpDs+/blemacrKzEMCABxmljYqxRcqvWnqfOYMSGp6jK8gLKQpAFNz0I+6OCKttYpOrBG\nJKnOFqOSz24JJxSEQ7RuIViBjimtGVQ44PU1JtBX+IsO1JuDNlgQQPvUgG+XtjWRTn1BpPlO\n1xw+acWMmMDGRUasu5TngUxA0rbmwPr601vmk+YbRjNPwdxJ4Gfxpq5kwCdpDcbu4p6gN46F\nSq9cGpM+YyknIYU5nLIC/wA5pn3fmHyHGBSuwFAk/iXkcfSo3VkjUrwQefepJFZkXa20nqKY\nsbRsCG3expAEkocqxJPP3cUrLujJHDZ5NLu2ZGwbs0xt7YO392femAjZkyM446Uka7ehyuOo\npflUgsee9AXYrKDwfmFIAhZV4xj8OtN3bsqrbfanc+XuGDjmmuo8vzAclqdwHLu25bheg96Q\nkKoXueaFUbcAE7eaOCu/bls96oYsjbpOcnAwKYc+XnPSnt8zYK89eKQbVDY79RQBH5jBlI5F\nMjT92/zZYtkip9g2kMuF71EpIwy8AHp7UtQH7vLG5T2qOTeRkHJxS4G7DnAYZxTY2ZIyVJO3\n1pjYuPlUl8H+7TlmJVs8bfSmLgYd/mB7VIdkayBOg5INIki+Vgu5SueRT2G3GXz2poJkXdnl\nfTpSqrR/MSGz60DDnIIY8dscU7zGY5JCgdhSr+8kXHGAc1GoLFmGFPejUB8m1ozz74FB/gLD\n5cU1tqnJXcfanR48wk8Jt5WmIifiQY+91p0bMwJHzKeoNK0fy5x1HBph/drgAj1pDYF/LwcZ\nPenSMpbPKjrgUTScCMAbCOPWmfMUQ9SvG0UxDvoMv156Um0swJGR3zTs/KTn5jx9KUL/AAtJ\ntIFAwX92DhSx6Uzy1YZbJI70sjYwCDkdxTZGClSjErnkGpuxkq9u9Mk+8dvykLndThv3Myna\nVBOD3qJZGkbdghcZK0aiGr5aqpOWc8mnMqqu4Nlifu0u8NkABT2HpRtbjKA88k0CE37G3bsc\ndDQG87luc+lOZd+MFWAPIoaPZJlV4zzQAELyAecd6RW3Lx0HU0MHMjHHy44PrTFzkbvlHr2F\nMY7HmYK9R3pQ25d/Vc4NLJ8zZBwPamrGW4U4XPSmArLtyAevQVFGQshz0NTMiqc5yPWoZFO4\nEHCsaQh+3cCFI455pvmFo1ULz1p7Dy5AucZpWAViBwzdKAYxiRIpPcUm3BJDc559aVUKLuZt\nwHej7o8zHzH+Ec/jQx7CrlGzndnims22MRDpnIpWIOH6J0FH8IXHHdqEIeyvL0IUY596YU2q\nCOPUGg/6wbScnjDU3nd85OaYCS4xnopokc+SeOfeiTeU4Gcc49aRVXy237tzDIHpQARszMA/\nHFAYHK5yT/DT2ZdqgOGZT1o3Mkm7jB4yBQBAsiqSGHtxTgp3HcMZHHpT+OVdc45FI258bj8h\n7UAIAf4iF284qLOwMQTk85qeRckFD93rn0pix7n69RkCgEC5wGxj3p5mMhXavCjpRu2hR1bO\nNtMaPfnnY4OTQFwPyQs20qWOetJHMW+85w3b0pCrNGADwT96nsvkuDgEY7d6XUByokgZtp3D\ngNUG1lYYJznmpDvYgK2zPJFLuJ3bc7emaYDt2eOArcU2T5uQflHGKNoSEOy98fWlUiZ8fdA7\n0AHO5ecMKaxMjHcxBx+dCjezKCQe3vS/Nglxg9OaQhkqqZDngYxmnL03bSyYxilG1uRwepHe\nkxu3FSV5GRRYYMwdEB5Bpyur7QBhCMhRTJF54cBVOMdz70jIscy7G+ZulFwHqwLfIcAZpGZm\nQlVySOTQVC5UHnODTmwr7Ady4pjIo/f0p2AuN2cZzSjEZPvxSzsZFBJwR0xQIYWbdnOVzmmM\nzjLAZDGpPJHytnpyRSqy+YBjqTigVhq/dyFwPpSD96xbOxR/FiljaXcwIBGM8dqWFdqsCOeo\n560gsIwG35RuY96f88aBCOf71JG/7kEjJzzxyKX7yE7sgc80xjY+VY5winpTmaSRgMbt3ehQ\nxk2jBG3caPMHGcgtxS1ACpXGwgnOAaYrAxvu+eQHHFPJAVvL/wBXjBz602HKLkfKv900agJu\n3SASA9KGwoIPUjr6UrBlbaTgGm7D8wxuJ60wGP8Au/LO7K5xUkmM8c+tCKrLu25HQY6Umx2c\nNuAOMY9KQ0KU2ooHHPWlAfaMrlQeopJVPykdR1NKMEghmfJ+6KQhGPzEsc9gq0M67mAX5l+U\ng07ay7vmC+2KZuZs4X738XemAIeit2NPkDeZt24X17UDDA5GG6fWmEbf3almPU59PSgY9flY\nHZhc4/8Ar0yQqzFVUtg9KWSZ4VH8Weg9KVWDZdRhsUxDNoiDemOKcuJI443XaOpanKw2jK4U\n9SafJhVAPIHIoAiYO0OUHy579aRZB9/BAAwKk2qy7hleMDnpUTx7m+b5gozkUAO+XjALHuM/\nrQEHkl85HvSbSyBduBnJPrTmRzk8HPAoAFYblI/Kh1HmAKvJPLUeXu5C4IXBowxVccgdaAHM\nvygnk46ZpVfeikJj3zSbVjjy5x6DHWo/NYNgKNvpSGSJIYy3GN3HNIzfKiLw2etOfdJt+XGR\n60H5greWRjjpTEKqiNd54ydoX096acK3B3Y6fWmsVkbG45zg0hUrIqp869TmgB0Py7mJJfuD\nTnbzOCDGoPDU1shywGB2FNM0kjAMdozyKXUBxzuznHtQZBuBIJB/SnMpjzkYXPGOaRZtyk4G\nfX0pi1BVV+pyucCmOxV8rxxinKo3BhgY6Z6GkKFZFZuV7YoDUeu3aFHUjkVJDGy4Y4J9KiVV\n+Z2BHPBo2o2DubjvSGLJ5fO1WVs55pRIG+ZRz70351UDqCetHGGyDgHrTAXaZFEg559adJjo\nDz1NKuNwZVyntSDa3zOeCeFoAEZmxtIx0DdKNr7SMbVU8k0bxJu4x706FTNFuUZI9aAGiRTj\n5gGz1p+4fN820g5pfLEkavtUHr0pNoZQc53UhjIf3mQTjP8AnNOX7zBm3Z4B9KPm8sgHD56e\ntO8sbVbKhx1piGJGVbJbfninLu8sqGAHc0i4mU7vlJPIpvCsFXKLjGaQDo5AzYIymOaNyyfw\n/IDUW0xx7UHy5xtqSIHB9u3rTAFKsd3Q561JuUSZxgngDt9aYsaR53HOedtIZC0ygY3AdPag\nLk0a4Yk884NRy/LuIGRnbUiNwGGNueeaWRgFyTgdOP50BcRlKttznb3pdxbhmz70hZVyu/nq\nD60kjdDnOO3pQMUfu3JB3Ailibch3A7h2oV9yZA3D+I+lNaQ8bW9cYpE7ixzGMHCkjpup3mb\nRgnb3z60ecODt4UU1flxvTcTyKZRIvzRKwODnlal3BWO043Co0fbIoA4JwaftG8dFJ6elAhu\n1pAqhfkzT/mKsB90dM0qrJGMbacWK/Ofud1xQA7zD5Q2jHt15oLO2C3JzRHIZTnhT2+lPyPM\n+bg9qBjVYtIcjIzwac+Rjcfm9FFELnb93Ge1KoOS3egBY5eoUcdzUwUSY2/McfnUSsVZduD6\ninKCu9s5GelSIeuUhLyEA559qVPmHI3qf4hTPLyMA8dajVVzgbi2fuikImVv4c9KkVxIwQnj\nHBpjQ7lCj5mzz7VLEgYsGHAHGKYXHJH5i7s4CnrU1xKu1dvzA8fjUS7I1Gxdw69aGm8sjMJU\nnnFMZKqmSMvxheuKfI2Y1w3yMeRSQuu188AngVIqxsu0nFG4h0Odz7l4AwDU8sbBYz/Djmmx\nIVzGTnPJpfmztYfL2pEjlAbcrHbkcGrMMaAqGPUdqhhXfhccf3qnhUBsAbvSkMuwqPORSQFX\n0p7SM0jHf8p+XFMZVVQV5xyc1JGsdw2c7GIzikFhRGEZcDKj863rHy5WXjDY596xSV8zywdz\nEda0NJY7lQDezHaGqRnrHhWI2fhqeRzgsSB9AK7H9n3wF/wlHjB9TvoRNYWBDENkbn/hH0zz\n+FYNnp/l+H7eNfmdwMjHc9q+mvhZ4bTwr4KsrMKqzSL5spA/iOeM/TFZlHYNJnI9+R2B7imt\njrUa/e65pz+mcUwCNvm61IG+cYOar8qT3pY92cnikMsH73XrTWXnp+NMjU8nPNSqwZTmmMYw\nfcCG4PWnr8uajbBwM0vVjk8UmBIJAy/1pVY+pqNPTPHepNwBAxSJJo5scA4707f5mC3Jznmq\n+75dy85pFfLDk8daB6mgs/lqAHPuASM1DqWl6br1t5GpWNveQ46Sxhj9c1BuHWlW4Ibbzind\njPJvGX7H3wz8bGWRNKl0q7kOftFnJsx68dOa8F8ff8E5bqPzZvC/iCK5i5eOC9BVv93Prnv3\nr7Z8z5SBUizHb8zYz+dVzAflF4z/AGX/AIg+A5JJb3w3d3NuBg3ECeYOntXmN1oM1iz291BJ\nbSKSCJEYYP5V+1yTYDKTlCMFT0P4Vyvij4U+DPGUZj1nw7Y3oYYJMKhuevP5flRzAfjidLmg\nQ5IBXrxkVC1uZFP7vDDnI7V+jfjP9gPwhqzSy+H9TutDdufKYiSMcjgDr6968R8WfsE+O9DW\nWbRruz1y3AJHz7HbnpjnmrUgPlGONkVgw4PO2km5ZcLtbpXoniz4M+L/AAfhNX8P39sevmeU\nWH0yK4qWzlZiH+VkPKsMGnzEmfub7pUdcZ70SRJsKMRuPRqnntzDInBQk/ebpUE0bW+9/L85\nWOAR/OnzARqrwsF289FNSKjMu0Ju9yKsNuXaT8w24zjvTFifaVzh+vX86rcZXaZgxWPhQMMP\nSnMo2o7sVk/hp0gCybWIyw/Oo41y7l/mKr8relMB0m3DAruduDntWxIy/wBktDu+UKMkVkw4\nhj+7n196GV2cor4ifg+1IRGrbZWCuduOGIp8hDMqcnjJIFLJCwZQoBxx9aN2392Djplv6UxE\ncltvmPzk5GVpnkyEbQuHP8WKnVdsz57DimNIWyjvs4yCvWgCNg20LggHjOO9TWeIpxOwA2cF\ngKlt7eWdtiEFl5w3pT/JMTEuRtbrTEzPlVZLiV93Eh/OoNrw5XJ9varzIQxcrkL0460nms/L\nRgL6nv7UDM75jNjJ2t1H9amYfuiir7lieTWstrYxpGW3SSHHTtntST2dqssjdEAGFNIDIVWR\nFGQQx+6actowBJ55zWnNb2900Z8poxgAHP60l5ppiuiC+1FGcg8H2oApRW8rSbF+ZX/AVBa+\nXhioIPOD61omzWOEhJDk849KqJbBmKAbWAzu/pSAYf3q9QOfWpVvGhx8uCDxmmW9usbkn7nQ\n5NSTRmNtu9ZUxlSBQMiVpMMS/JPLGmsytDtf5Ax4IHOaWaFVQBX3SH5uOgp8gdkU7l5496YE\nZUwsqHlwM47mlmYptZj1/u/yp259vXcVHII/rUK/dBI2pndz60tQJVk2sJHCtIhyq4/SpLrU\nLrUyrTr/AKv7oAwBUPleczY5GN271oSZ2U54wOo7UAK294trHevue/pRJhcBAW2nn/ChlaOH\nCj7zYJ6nFO2hc46jtmgQjXCKpj8oDB5OeaiBKyAKp29QamkiMbfMoYNzx1HtU20RqRjHORTA\ng8+RZCy5Jxznv7VG0gkZie3TPQfhVpShcmQbFxngUsccTM2F4zy2KBkSs5jAaME9mNRNGZOX\nPHQ5q7NHG6A7upADUktuskhCrnjgNSEVoI1yV2bm6nNTBSzKqDaAMH0xVi3WOKHzH6Y28etD\nYjjCBdwPWgZE6xxI8gOI15DL60slu8gIWPIVct9aURDzMYCj+92H4VJHJIrusfQj7xpjK8ML\n+XllDDOAvSnRq/mFD8xVsZP8qm8vzpNrtg9cVMV+YuQODy1IRVt0eOSRDjzGqQxNt2k4bpuq\nSNXaR3kAHofSpYocoSzDGOD60CIBCfMwSCQOGPenRxt8qSfMd2TirENq23ZIw55BHIzUy25R\ngCMhfm+ppXQ0itJGuC23KAZpVhSaHeQIzHyff2q7b2P2pwcYjx16Vqt4bkuLSN1iYEHjA+9U\nuRVjAgR9xXaxy3p3q9YwtIGUpg7uM+td74f8A32tSRQafp9xqV1J1S3jYndj6V7l8P8A9iPx\nd4nWO51KFdIikGd1wcMPbHFZ8wWPmLT9JubhJGWHcpOMtwM16P4B+CPiDxi/laXp81xlh+8C\nbV59WPHWvvTwD+x/4H8GRxS6oJNbv1QKzSHagYc8AV7HY6bp+h26wadaRWkQ4Eca4AqW7lHy\nX8Pf2BJLfybjxTqexThjaW6gsB3BOfWvp7wX8L/C/gO1SPSNKt43QbfNcb3OOevrmuma4aTj\nk5oZxu96VykTpcO2Tn25qVXAPXqc1VQhuSOPWhT261Iy3u3ZJpiyZOOgqGN9uc845NSqwYBg\nQVNIpEisSx+lICGXOKTowNOPDn0PagsFbv0FICZGwO1NZl3YHSnqcHI4qSRzfulwxy3tT4WD\n8ZzUUbHkld1PDcfKm0UxEsjFcnGWpP4M5564pFYZ3d6VcHO4YNAIf5gZMkc09mPbrjp2qMKF\nYc5FKF+Yk/WkUSSSeYoOMdqf948dAKh3h8dhT/M3KVHHvQO5Ju2xjP8AFSY5wOlNmbbtA56U\nNu69DmgROpyDkUgXPBPFRqx6bs05WXk9xQBLtDdOtKuEXHvTFk45GOKUsOM9KBDtyg9c0udx\nx0qMruJ2nAqThcDvUgOVtvAGRTxhs9hTB8q8nrTgw+lDAFkK5ApfQ0xHGDnk0q/vOQCKAJPM\n4AxijnHvTAxPBqbcFX3oATc23k0qbdvrSKp7ninKu1fUdqZQq5GPWpM7W+amqpDAkUL8zHND\nGDY3E5pdp2k54ppwAD3pdwHXpSAcJCnCjIpd3Wm7srnFNjZt3zD5cUASQ53nPSnqx5zTUwMg\n8mnCQ7fegBwboO1Ln86YxO3kUv3lB6UCH8t3xTgT9TTNu1ck80b+9IZJu/OlztOcZpic804f\neyTQA4t8uKDgHgZozntTPmViaAsP3DZ15pBhQCRmkboCOtSKeB3pgL5nSl46kUxW+bpT6QCn\n5+/FKue350g/Snb/AJsDpQMTb8ueppB05p2NvQ5pGTcp7GnYBiM+7pUyNhqamV75p6nLHI/K\ngAwTk5xS7QoBpPu9elJ5gPuKQE3mf7PFN3fP7UmSx4pzMpHSmAvvijI5HWj72MDNIzbeelAw\n/Cik8z3op6kET5deeKI128mncFj6UqKcHPTtWJqGwLnng01ox36VJwFOTTGl/h6imIQbfSnn\nO0+lR7hyR1pd52jg0gsAY7cY4oXJHPSg5I9KQ5C+1Fx20uOLHov0qMqc9/wp7H5RtoVjQSM+\n7QF79TStIF78+lIvytn1pjHLSbf9odeaBjPPFIqquWJzmmIViOec0kn3RgYoYDIIpQvPJ4pA\nHCngUxlC8r0PWnLlW9qXAIwDxSARcrgHnvSrwpoUjgY5pWIXrVAJn5TTGZsDAzTzjbkdKYzm\nMDJ69KAGxFlVsjOaX5tuQMH3p0bfnTm4x3oAXhuM0wMrMVz0605V9R05pqqFZiRjdQA/ll2g\n0i46YzRjHemhtrdOKQD29KHj2r0zTGkP0NNmuDHgAZJoAGyzZAwBTTkHPan8tgZ4pWHy9OaA\nGLiTntTx97ikBA4HSl/nSATJYkAc0mws24dPSnK20nj8qWNttMBshC8Ubj17UEZwxoXOznim\nAu8KcikDDBJ60Fdy5HFMweKQDkz1J4p2evHFG7gDGaTce1MWoq7WTpimN16Um7HWj5ug6HvQ\nMV8hQzHApNw4yOaHPyhW5FJjc3+zQLqLuO4gGhSN2G4NN45zjNNYnPNAxW+6T2pijbkGncuC\nO1JuG7ZjmgBeOuaibA6c1KenyjimL0YVICq3y8mkVvmznpSdMDvTsnvTJJo18zgjIPJB/lXy\nR8aNPS18T3wK7AZCdvpX1rwcE8c8V83ftGae48SSS7AkUiL857tjp+lAmfN19M0bSKg4yRmu\navIUaQkt2rptSj8nduGOcfjXHanvS4bP3e1apikkULmOMZbOMVmXkgQEIMEjJq9cRsoJJz7V\nnXEDKwOfritDIzGkVhuYc/wk1BcMXZWb7i8k96uXEA2ksOc1Rk8u4j29fWquIjvrRoVU4G1x\nuyDVJ4XxkvtA5571oXVw32dI85A496zJV83LFiCOMZq0SyrtLIwznqfpVeRNse1fxq08TRPk\nfdI5qE/MQducdQfSn1JIWt1kUYfJHOKrSMFYFxxmrkvALKNvf61XZlk52c980+oEfmbmOORU\nW1lkJPCmpJW+UgHDVFGzSNGCcrmi4hrRlmwx96rfOzMFwrn8qu3CkyMFHTmq/wArc5zkdKAK\n+dqjeMMDyKSRmUcj5SRins0iuuVyo55NRLJJuPmEEZyPamA+VQqxuv0OajYIykDO7qaklyyA\nj7uMVAxA6MB6GgBY9q44Yn0pkvyjlu/pzTzMUby8nLAc01pE8w5IZ8YI9DQgGE+ZGxUlQDjB\npYl8uPL/ACnvSFo9oypLGmTMY85bDd6BWGxusm6NU2gHp61HNGdoGOnNO4SMMgOPem7u20nv\n1oAjVSMsy5B7ClZTJgY4zx6CmlXLbQeW9KUt5bAM2BjBx600MJGOwtHjZ91sU3aIlGWye1C5\n2tEGAyM+xpkfCjcMY65pgKpZZBu4A5zTm3bsk72bjdThGGIIbcG6imFj0P3VP6UMBGCrgn5i\nBz9aZtEcRw3zE5IqR2VdpUjB9aZIRuBLYGeKYiIITEQTjmllLqRlstinNgsxJzxxTXyNr98Y\nGaBjd21ST1al8vaowvXrSsSmwuM9qPvMxzgDigGG5rVQYztz1x2o+0eUChywJzupsYyxOcN0\nANI393GSOpoKH7jnKnBNNY+ZGVPXOaB80hHRSKQBdyqMhs0iWOktyMMDkYqPyh5eCAee1TNI\nvklcn73UVHuCk56daeoEZ37h0PNAbMhJI9PpT/lDMN2CRkVHu3KGU4cdcimITaV4XoOhpSok\nypfoM5pshdfmYZU85pFy3O3dGe9ADuZDjHOOppu0cDv2NSSZeNSF+ZTjj0psiszBjgAGkA1F\nkjYsGB9aFVlYZbHc0FT8y7sn72abwFyMtk81QCsqr82N/qPWmtISpITFO27cgGmLzuB4qQFk\nwyhsY7Upk2sNoyg/SiT5cfLgelMKk5WM475NMpEiyeWpYD5f1qORlBBB7ZIpWkVUB3+2KOAo\n43FqYDwrgM6nHHQ0hBSHLISepNIXVVVnySDjA7UNIVjwTleppCGSSNsBJH+6oo3IXU8qvr70\npDL5bjA3etKQV3p8rbuaBjXbzGDKM9s0o+VXD8Ghl2orAfgKjYFeWOQfWmJgcLHjdkGlaQPj\nauSeGpm758AgrjNSbl4IOQe3fNK4hix4GV+UenrSqeASMnPPtSr8qkDjnrSxhhksRjHSmMbL\n8vzKN3sKFjyFGcjqTSqw2lgPlbjFBYqmB9496QhMENxyKWQeY3J2jHJpCxX5R09felVXkBAA\n9SaAGsA8ed24L0Aobjv/AMBp+Aqgjj2NMZVGTnLGgYxI8njrmnH922P4valaMbcqeOhNIcyx\n/MM7elGogBEfGCxJ60rSGMEEAsxxThIeOfzpr/d685+9THcQrjluT2FNOIdo698VKWffuGCM\nYLVDN8pVlXdzjNIBerMeSuc0khDLub5FpeVPPCA84pnyzLt2nG7imMdgSbNuBzg05dzEjdwp\nxTZCzKAcYB7Uvlhm7gY60tRCowVjxt75Peo03KrEksWPTPSnbTLt3HhTx9aay+Wx4yW9KVgS\nHBGjTazcnoKUAKuW59BTVwHZm5wOhpWY8BEwcZ5pjYLyXLDbno1KmMgZIPSlWQtGQQBzwtIH\naR27DFMLDYF+WQOTijhlXnIHapI8urKpABHzZprZjUKBigRAWAkU435bH+FPcZkOTjB5alEa\nlc7cEHNJ1xtGTnPAoEKFKygoxKHqMUzzhHuVAQxODn0p3mE5P3WpDliRjtyxpD3JGCrGoILM\nTxTPlbJJ5FIz7o1yOF/WnM54wAE/XNMQ3IlUMCWYdRTVXAIMmO4/wp23lgQFBNAKPubrt6k0\nAKFf5SG3dytRSqVX5x1bIUdqezNNwp2evvRjcRvG4L05oAFZnZ9wXcfSkyFj2qcc8A+tOVhu\nIA2yDkUkcituzye9LUBegLMQTjBA9aHbb5Z4x3pXYKynGeMmmxt5m7AwPSgA3BmYE4/rSGTy\n414wGOMmnq3zbTjpxTWVv41wlMpA27cRxhTTGzuLDqaFkMj7SCAR0py52nJ4HYUEvcaHCkFu\ng7jpTj8y7hgqaSPkFTgJ+tI24LsGOvapEO3M2FxtPqaVmKJ12gcD3NNO1vlydwpcLtULzjmm\nMZ8yuE6k/Mac2WXI49vahciRiR97nNBZs5QbhimApwo3EZOOOetAXdjJyD/EfWmqrNbFmHfA\n9qQBmfP3lHGPegB3KqzMMgHBxR/AAv3m4xSDcpI/Fh7Upxu3evT2pAIVO3DDac9acyjAYYJ7\nEUO275urdKCu7GOM9aYw+fdz09cUwyBVOTz6ihZCCwbOBwMU+OMgjaVduvSkBGikcj94vr3p\n+3neTk9AB0p/+qyRxk1GjARsASwJ/KmLUbsO1jnce4pI2YuAOF/2qcVEPzIM5GNtIuV688fl\nQF31GqS0jZkAUDAxT1hVYVyxLA7s0j7ZVG1MH1pc5x3Ucc0DFkO0AjJVjlqQYeM7RjnvT1aT\nO1gCBQ4/ebVGBjOaAI2YKNwXJxipPM8v5SueOvpRu3MFZdvfK0iNtZiu1+2aBXBSdvqO1I53\n4AcZ70u1vMI28gZzmlRY2chV4zzQA0fKwDsHfqPSlkVdxJJGfvAdKRdu5gBx70jSE5G3B7UA\nKjPGm1RhQeFpxY84UMfWmbj94fKf7tI3yNtB2jHNIYrcKVI/GkXDqWVtirx9aayBVPzbi3PN\nK2fkAAANMmw7zNzAldi/3T3pAg/eg5BxlW/pSKPlGVJP6Um1mZg3yqByTQMf95dxOSMDNJ8s\nfCHcepamqw2EISQxpVUKjc8+lAB5pODgZz3pRJtXAG0s3XFKqlCPMIVSM0rL8gwevY0B1JJF\nKsT1z/FUarn5QenOaI8xoeTtPHNKrbX8ts+vFIBMjcwJwT0I6UvC43A49R3oTHmYzznjApVL\nbjjBUHB+tK4CMX3fKfl64p2doyGyfT0pq/u423NncenoaT55dq8KF61Qxd2CvzZz1FJuGWXf\ns56UeWqbXz8uck0FkkmaZUJXH50C1HZDN85BXoKQxkrzgelNEiM5wpEePloYFtpBJVaADaUj\nO0ndnJ96NzKwOSinrzxSt8xzz+FK+DH8g+f1PegAcrtOBk+ooKgKAXJ7nHenJlskEA96j3mN\nc7DnPFAEjANGQ2RgZH+FRso+QofmI6+lSJI8mFOGPUVGSfmO0gCkMYuI18sOeuT6VYjKkFv4\nelMjbzF7ZUZxigqVX7v3udtGohWj7End149KXB5baduM80katJkjk5oiLSyFGP7s8qc9BTAV\nF8xArcg8j2okkEY2kDHTA70b/LcoB7UhXdIAwBakJjW3Z2rlQf4vSnOBCm3O4tzuxTotq9uc\n9TTppNrKMgOT39KYxqtnpw2Pwpil1YBsMO5HanD5Uw4wvUAUKFVCFOAeaAE5+cFcqRwRSjzN\nibG2DuO9LtKkFGwDwc96VVMZOVz/ADpDQrO2F2H5RxSZXaoyQy9MUu1Vj3gkpnpTX3GPK8kc\ngCjUQ7hmLMSPwpjbZEKuCPXaaezbgjFseuaRt6sSxBz0pgOXaqoFGQO9Ey/NuIyOwppH7sN9\nw9NtNO8qMHJyOaQEjTBmUBcEDG6mjOSx+ULzRxCZM4k5wo9KVcFSCSe340C1CZTwyrlmGc+1\nMbHlYKE88+tTQ/um2v8ANx+VKWJC/LtQUARMojUKQcHn8KnkZeNvXoMik818gJjB/vCmLJiM\n7lw4OKYx7BeSy9uaSPO1vl3DGKWM7h83Ld1NEPys43fNjhaQCNHglVyNowRnqakPyxoAoEn8\nS+lNXKrw21y2S1O5jAk67uDQIaygnarbfX3qXykUguc46YpjKNwBP1bFK0gZMP0Xv60agKyk\n8RjBzmpNhaQc7hjp70z51VWyFHGF9qkBC/dGGznJo1GN+YMR5hGOrelKP3i7ozujHJal3ENn\n1602EhYmONoZuKYyWFXEh+XhhkNTHLLMG9BUiyhTuZsKOAcUMd6liQR29aBD4pN4DE/MtNwz\nY+fHOfwo8piu5B8vrSRssbFGG40AWPNjjL7Dh8fWiJWZRvO0tyDTNwtVB8vO449cVLJMV3Er\nntt9KQC8FgA+AvX0Jpy4hmVmPBHLfypYVDwDJ5PaozICx52442mkIlhUt5gX5cc9aVnz5Y3b\nDUcPl7RtO3nlal2+ZGznGc8A0AOEXUZwO1SSTBnQ4yF4/GoYd3Rucnuak2lZjxkDnHrQUSKr\nNtbaQq80/cfN3Lzu6e1OWQSKGzjH8NLDuy2QOeQKQhfMkUkLhR3NWEVmCM7ZXvTfIHl5Xqw+\n9RaxhYWXqfWpJLYjOMBtq54qwreW4A+UgZB9aqqTJhxwF4xUzIzgODgnqDR1Atx/veXGEbrU\n9vEqq56gD5ailhhbaylsAcrmpNqoYmiyATgigosJbskaM6ZLc4re8NWZvNSgAwrK4Y8ZFZLX\nErfJs3gDB9q7r4Xx7TK7RjKH759KQz2rwXoceueILO1KloYsSSKn90Yr6LEaQRrGi7FUYAry\nv4H6WhXUdUfaVkIhQY59Tx2H+FeoyNnp0HA/OoGPVtpzilZhIoPQ1B82MZFSZAXB5NIZJwOM\n07GUAz0qBju6cGhWbI/WgCfcep4p24Z5qJnDMB2pQd1Axx55o5AOOab354FJ0b2pCJNx67cC\nnGX5sAVFvIXApVbuAc0CH7hk4pVJzz0pu4cnFMMnagZO8ilvSlEneoeOD3oVjznpQMn8wqcA\n0/eeh5qt6c4NOjbjk0xE+77uRmpDM3bpVZ5NuPm61IjZ78UgJ/O3Yz1pQ3csefeqrSBQTnmj\nzuBjrQBduYIbyPypooplIwRIgbI9K888V/s6/DrxgzvqHhy1SV12NLbjyz9eDjNdz54GM5DU\n7zuRtPvTGfLHiz/gn34XvA82ia1c2DYIWCdRIoP1x0rx3xd+wp460MGTSXt9ctzxi3+Vvrg1\n+hIkIQbuVqTzTsC7yPoaYj8k/EXwO8Y+DGePVPDt9aR7seZ5ZdSfXIFcPeafLbuFeMpjq7fL\n9Rz3r9o5VWWLa6rKh6rIAwP4Vy3iD4S+CPF0ZGseGrC4JHBWJUP14q1ID8ffs6XC5aNhsGSc\nHFRDTX2nG4DvjkV+lPib9hn4e6opbTftmlzZyGEoZT3IOc15T4s/4J/6rbsW0LXIbpd5xDIM\nMq5PU5wTjHaq5hHxUsLkNk7T/dbqKaiGPp1/r6179r37IfxG8M3Vw8ugyahbpyklqwYN7+30\nrzPVPAGs6HJvv9Ju7XcTjzImA46jpRcRx+0zKWzulAxx0qvJGRtVuCenpW4unpBuRxtO/pjt\nVOaxaOZiqkgjhSMD60+YClsVpMEHcBz70+FUTEjRblz3q1NA7KuQU6LuHOabbJ+9xJnYo6Y7\n0cwE6X26NlljCkHlgOTVaa5bzGjVcrjIz3olk3J05zgH1qHzJGkJkjO7GAoqlJbiEVvMYY4Y\nevakWQyA7U5HY/zp0NvKylnUqvTFMZW8wlW2gcCjcQ+K48xBuXdzjb0P1qc+VI3Kbgnf3qtH\nGN2/O5CcY6c0+YDzOhUfxUwHRyKUbdxk556U9WXcrSlijcL35qDcuDgbj2Wl3NNkMuzA4HbN\nAx7LwyHLjv61CIlRSyNtU8fjUomIUADEv96k2YZR0Y/rSAqvCuNsmd/Ugd6cdijc4wTgFR6d\nqtMTD+8VVLLx71D5hDK5h8okn5etLqBEse2RjjGONvvR5LsrZUFe4qx5bLC3y5LDPuPenKpD\nfM2cDH1p9QKdvb4kAz2/OlW2Lly/Ckbdv9andg2HI2pnA+tLI/nNtJ2MvXHSmMb9mktYdpwE\nC8GoNpMe4nCfTr7VZlkXlWc5xwtRx73UKo2nNAIjhjZZCXQgN90Z4pWtDHJuJDEck9Pwq55Z\naPDHIHJX0x3qK4jEixSZzzyakLEEUf7xjnBHzVO0YmuHCnhx36Um3fvONyg/mKdGjyMVUbEB\nzz2FUOxF5LSBhjdtG3NSRqi8KdrdSafhRv8ALz6/WpGZVVHwMkfjSJIk8s7mVdw/kaR9jTKY\nVI6ZPY1MFWSQKv3mPIHSppbfazRou1RzupFdCr9nbftT7p5x2zTFIJXDbjj73v6VY8xoG+f5\nWYcYoaxkbaZDtHB3DuKBIrtC0aySYyGONpoZW3IUy2RgjtV6G1LM5IO8nIB6U9Ld2TZtKknP\n/wCr2qtiiG3h28EAuKljWNQoKknGT7VoW2ku7BVTDnpnOOlaVl4fnmAjSFp5SNoWNSc8+1RK\nRSSZgRxhuF5bO0k96ka3lz8qhx0+leteFf2efGPi6GE2OgXqox2iWSIqv1ya9y8F/sE67qUK\nSa/qEGjRKRmEYkdvfjGKy5yeU+PbXTrmRRkbo89UHSur8M+ANT168MVrZT3RY/KsUZbP144F\nfob4N/Y88BeGoYjeQTavMp585gqM30HavZNB8M6P4bj22Gl2tmgGF2RDIHbnvUuRVrHwP4D/\nAGLPGXiyWNr+2j0a1Zdxe5+XA+nvX0h4J/Yl8H+Hfs76xPJrMqAbodwRCfw7fj619ALdHjrn\nPY9vSlWcbiSDn6mgso6D4R8P+FYY4tI0q2tFjGEZI1yPx61pmd2bA6dCB3qDzvU5NMaRjypI\nNIkWVRuGab93nrz3pVzjLc0jY8wkAgUuoyRZtrcA4NSRsueepqv5n7vHvTlypDHpTGWlJ28n\njNL5mFGBg1Ej54NSlc+1SMc2JAccVWghdZCynCnt2qwmNvHWmhhtIzjmgokjL7T9aesjbs9a\narFW3dB6Uobd0GKlgL8rLxxRk8DPFC7ehFEarJnPXtSAlX72M8U5pCrKP4fWmRkchjmlblR6\nZp9RMWOZfOwOal+91NRKqrgKMGpOG7896oRK0e0cc0nmZPPakQkH2p2FZv1NSUg65boKc3yb\nfem/w7QOeuKGbv14pDHkgMOe9O8zLHPSodw27e+c1JuDKuPXBoESqmFyBQfu/dpFY8jOfSkM\nhCnI60DJEw3TmlYCQ5PQcU2I/KCMUrdSe1IQqtuPHApS3zZpFwucelIylkC+tAEwxu5PFOaT\nc3yjio5I9gAJyaI9w9hQUiX+HIHNG0jvim7tqnvSrlhuxQJDs9Kevr2FMVc5b0pVZtpIHFLq\nIl3rIO1LHn1qFMLyRU6yA/w8UxiqzRtyc5p4YdSMVFu3NS+Z83zDmlqGpIwB4x70m1ZFxSbi\nzEngU7cNh7GmMQfKMUpbjpxSbgcE9aPvD0pMY4sY8HGQacCevem7ht5NCnp3NAdCR88HqKVW\nVlwM03YRyT06U7kKCeM0MQu3cct0peGGRwaRc96FcbjxzSGOILcDpSLlQFPWmlyoqQMMbiaA\nF57E4obIA9KA21cd6cpLdeKAE8sqvWljXb3o/iAzR/F7UeYMczbfel5HNJil3ENjFADl+daX\n73AGTUaybeSOacrndnpQBIuPoaGOKYp2tzyacWPHagBXwyjAwajjzDIcnhqfu3MAelOwC3Io\nKQvO7GciljxyAM0m4YwByKFX+IGmMkA/A0uBvweKjOQ2cfSl+Ycn7xoEPVtuR1o4bHFRtnd8\nv409chcHmmgHeX7UU35/SikBB5h8zYBxUi7tpGaZFCwJZutPDbRkDisrliHPQDikZvlB70GQ\n9AMUKoX73NArDSflBAxmnq5yBihWTOAfzpdvftQhCsd2cnFCdwSMVGwLcAU1eF96oBwA3Eg4\noXLZXOBRkYDAUrYY8dRUsRHtxT9wIzij72MjFIynaaaASVuRxwKN67AKdzIvPFN2jA7UwBfU\n9Kc4PToKSQOzAjlaJFLLjNIByj8TSFcNSnCcDrUat8xzQMe3PNR7WbkninvkLuHSmLIGXJGK\nYEi/dpsmGwcUeYFIBH+FDHdnsKQhNoTBxxUkjBRgU3uO4pW+UYxk0wE3Hbg0jxmSPJbFKzKM\nAc0knzY5/AUgFztUY59aHf5emKBtCHmmMp3Ag8UgHFt3ag46nBpE4zmm52uT2pgC4wTTyxVR\nnk1Fu5OOhp68rzTAcjKFyRSK3zHA4oGdvNHTGKnYAbsc45pj583/AGacw3rjPNInyr6igB43\nD7w49qJJOgoaU7TgbqjjDqfmGWo1Acz7e2B7UzdxinffyCMUiKAxzzTGIpbtwtLznqMUit2P\nSh2HpxQIfjPek+63tUcbA8ZqTbtHr9aYCFvmPpTfmx7U5hke9RFjk5OBS6isSMvmYqNx82M1\nIpBXrTZPlYdKYwUbcDNIzdTjkd6azZYCjacHDUAgZiykLnFMjU7feiNW6Zp7ZU4A5oAH4YE9\nqbGwaRsjINIWMmccCnKpGcEdKQrFiNTjgcV4V+0hYj7RaXXJDKRg+2a9zic7AOleS/tC2v2j\nQ7J2PKlhuHuKEJnyJrtsdxbOR1xXG61b/MD26133iOHy2YA4GK4rWIV8nfv3Z4rREs5yQK3J\n5rMuGXcVTqOTWlcQBO+38aztxjY5AYetamJmajmNQQ+N3as3G1iN35VpS7ZGPQkdM1Su/Kt8\nEHLN0HoaYrlFIJGJ3dM01reKNSW5YntViaY+X8jfMDyKqXDOzA7QT7VRJDIobKxk59zUUis3\n+sPyDqBUvkkMXDYHpUXmbid3TvVgV5FVlJX5BnC81VZiGG5eOlTMmWGV+UHNMlxv4JbPShiI\nZFMb7kjDHHUmmbBuRuhzk/4VPIAvAPHeq6qr7gHwevPamIRnKs5RuWPPt7VCsZYHHIXmpZl2\nRqN+W9h1pvKrlfSgoYyHywwVSe/NQyQjgMcd6VpgqqB827nHelcIy78NG/oe9MRBllfnhe1R\nSW+JFcrwfep2Bkw23K96ZIdpOxcmgQ0kM2Dncp44qGRvn+4ASefWnn96B94t34wKa7OFBQeY\ng/iPUUDQ3hVzgnnkGmsx6YBVu7U75/vDB3U3bv4Iyme1AC52qUI3Af3TUOcAehOBipdu0N5b\nbee/aol77jsPr/KnYQnlkyPtbDdM5qtI21FLDa1WuJCCq45x16mq8kbrchiNwUYxQJilSoyV\nKs3RqNpYfMMnHFLIxkfJOTj7tOVCoDtyvagY3liAo2tx0pXU4IPHpinZHJUdeBUTSvEMMMY7\n0xDGRTjJ+b0prAFUGcAGpGYj58bgaao8whc43dM0wFX942E2k5wKPLJbnB280jZhBG0hlqOS\nb5lbb8hHPPShgK25nCt6ZFEaqVbe3NR7wsgwxJPShs7TgZbvSAdx8znHTimK21QCGZqVVxjv\n320jSbnLk4HTaOtAyRcJHnBJ96jwR8/QmkZmSPaTwTmmll6c80xMk3jjJ2j371GZCVKscDNO\nZvlXjeKZtXazOduRVDEUouSTn0pGZpF2ghMnmj5NqNtOOmKcFO7gAKPWpYDm/fY2HgDHWmtv\nVPLYY54PaglVUjBx1JWl3FVUnJU+tMkCrR54I96bHxIdwLLSljuIL5pNjBA24delAwYlVOeK\nIZDtxsz6GkYfKR1J5waWMsWDAewFADV+aTDHJzTXkCbsIWBOOO1C7lySTwaepKtkAKT1z0NI\nRGkhkX7pU9CfanD5lKg4A4BpPmBZWOATxR5o2jghQcY71RYCPIA25560u7blGGRg96Rs+WVx\n3z17VH9yRs/NxkUCHo5XBVOoxim9Fz/Dnmkk/hbcy8fdpQMKCTtx2FSArZbtgjjHoKSLHI3f\nnT1k2rgDg9ajZhuxt+lA0LvPDbtq5xilZgxBb7mcDFM2ncO9LwY2Dghu1UITDDcSgU5xxSMr\nNggdam2HHqMcmo1z5jrkHI49qQAe2fujg/Wk8sFvmOB29/apN+3II3KvBqGMeaHYngHK5oHY\ndGAM/KQM8Z7UDb5gGcjOaVScHJHPrTEIXdkbVHWmSODD5tvK9aRAIwec7v0pdhYMig47EU3J\n5x82OKQBJuKjAIwMZNKgIkQ43N0zTwzO4Ibb9aRt+7eOWzigY0qEYnPzZximsAucnmn8yZIJ\n3jmmKpWPd1JPen0GKoMYOWBBHpUe4sqnORnFSt8uEZc45z61BtMkmQuwZztpXAn3AK2DjNNZ\ng0cYUY5yWpdpZSPvZprLgdcDpto6CEZyrMMcHtTkLNgkYApW2MAT94HNKufmwODzSQhkeNjs\nefQUI3Uj0/KnKQykdKZgp3xx+tACNgIzZJOadI29cp6ZpFVo1VSN3GMUn+ryxG1Txj3oKF4+\nXnNKxYMWCHjnaabt24UfMOpp0bEyAFs9vpQIbtEkgdR2xiiRtrDA6enekH+sK42kdxTtobkA\ninqA1yJDx8h746UpYyEP6cUnljysNyxPSnnJAUcDNMRG2dp5JPYUu4rt2HAH3gfWhVX33Kea\nSds7toLL3oGOkVdvP1NGE244JJyoNRBdoBzknoO2KdHHv2k+uAKAHDd5hUKMd6arBQwVC56c\n0qs7O4+6B0Heo+MZTKtnBJ7UASYXIBXtz9aFCcr0YdKftKyKfvLjr60xctuPl496TAarHfub\nlulJHJGvy9XzxTip8tWX7wPNN3AtwvzdcUxCs3zZK8+tB2t91ee9N4IPPXmnIQFwfvikMYJF\nVmIBIx1PanbgseU5LUfLg56VGVVV5P8Au4oESDa20hs0rMJFKk5OaT5Nqpnbk9cdDSybRNjt\njHTqaOowaMlhl8EdKY2NuWJzntTlRmCgmlkiDqcHpQAyT5jtJHrxSFgjAE89R709B5i7scgY\nzSFEaZS53cdKQiNZGVXcLlmODT/MRBjbtOBxR5bZIA2r2FIFAYsTn3qgJFYOhCnNMDHa2OPa\nkVQqnJ2nqKTO0b+meCKChx5QfMdp7e9KsZWMhWGO4pjJlB2H1qQMu4Y6KOfrQBHIzgcLwOSa\nRmPl5Vc5p0jMwAXkHmjlc7Rwe1AherLtGFxzQ0ZXnNIGd0xt2nNBXzDuJ4HH40gYKHX7x2jq\nacNwyRwD1FDx/Ny2VPFI2Y8Nu3AcUwE+5/FtUetL+88ssuCvt0pX/dHICux6LTXV1yWbaf7v\nagBVVv8AWKdrelDSfeJ+9jG2mY2hW9DS+YHZsIeeeaBCLuQZbp2WmtEWbHI9qWTdwM47il5w\nT36DmgByyBWBYcqMGnrIsjbkPQccVA0gaN0I28c57mnKzNMp+5t46dqQDt4+8QfN6YpyruIH\nQ4J29M/jQyhFMmcIexqNHMibdwKg5pg9CRpCr5XuuCaaoCglSSxpXb5wQcAjnPp6U5mZmGBg\n7flFIBszYwenakbqMZ96jXLK2TyOSPSpeX2sDv3DHpTDUTAgwzBnye/WnOVkOB34pHY7dy5O\n04ximMx8rJBHOcd6AFlz2+4OPemNH5ax7vvdRmnjM8nygKvXmmN8wJLZINLUQ9Fk24U5IpJJ\nBIrfxMnJFLIHkAYNjHUUx8ZIU5LdRTGOQjzQOBn8hSMuCVzn5ufepWEflbumOqio5FPBQ/L3\npCY1l4ZW79KVd0ZRXO9iMcU5k2sHPzHHTtSfdwxXJ7UxoerbQxKkhe1PYBk3AjOOOarLIVlI\n5JPUdqlhYeS5UYOcc9qAEBEXIPJ6+1KFHmBc991G545FwA31okG5t/Q0gFYeYTt/d85O6kRR\nuw54PGQOKV/m4Zc8dRTe6xknb1FMBzfdC4CjPrSuw4Iwn0pphJY78H270MN0ikLjHB9DSGSb\ndyr6dAKZIQJNo+XHUUL8rYZsL7U0fNHJxkjoaAFZtql921c7T9aXYGYbvlHUKD1oTCICV3KF\nyVo8tWjVuo68UydQkj2yYUnaT096FYbm3HcegFCscFSep49qa2fO6bFHO/1oGJDIFYbfl9d1\nSNHI0bYOG6+X7UIfvkj5yOFpkYMeGPUfwmgBzKJMbsDI5AoCpuwoYbeQDTUC7iTwWPAo2HLM\n5+YHikBKi5YsSQ54bihcKW2rkdh6VGrBmDMT9M0sauG5Iyx6Y5piF8wyfOR+FJtZm3j5eMHN\nTKpZWJ4CmmybnyGIXvmgBuz5gB8+3ninJGrsXbr1G7+VJ/GmD5fH50qwh5uGJPXHagYNtXGA\nT7HtTViAIJb5fSp5I/LZQzhu5xURjMbthsK3XPakAIu1CCN2MkYpRu27g2SevtSMWhVUHzn+\n93xSsysSy/dx3/wpjBfuEMTjt6UNiNYxnk9SO1IrNGgzkg/w0DG3c67MGgQsgXnAyM8e9CdP\nulm65NDBtihh85G72xQQ/fpjINAwL7l5HPc0pILAopUUrBWBAPOOQKb5nlx8gkYxxQSIzANl\njjnOcVYMbNhkK8ckGqwO5wFXcCOc0qu+Mqfn6YpDHcuuSRnPWn7nXGRkHjFMZWVhkgdyKXZv\nXduIVedtAD1VpMhhjHNDrtUAHvSxuFiJBJ3DgUm3Ea5YBh270APZVmyckHpTZMbQMZx2pJGM\nLI4BZ6UsGk3gfNjkUAOjb94D14zTWZdwGc54HrTfM2nITHtinbPMkQkYHXIpgLNJIy5YgIvG\nPekWPb1OQRnFSlQ0wYAspHSmRsWzuTauehpC6km9I/vIXkx2p3mEx7c+9Iv+sVyNo7UihWUg\n9d2eaYxIlPmBvu57E06aPzMAv5Rz19KV8YIPXHAprEyZRVJI45pdSbsVYPmJdmZO3vUzHdnA\n7cCmKzsCmdjKKVZBw5G8MMGmFxVbeNqviTP3fWpNsnDEAgcMR61ExQkYXCk/r2p//LTZywxz\njuaAJFJjk+9lj/Ee1LsV5D5bbj/nNRtHtQEDnNSx/KxCqSGGKncLku3H8XH90VFtWQnI+Y8c\n9qduCrleccD60q/MhPG49aBipCY1APB9akWNWzlttRqRISS2cdqn2lSuU3g85FAxrbVVVxlg\netTxsC33vmpgJfk4UDo1Tfu1yXBznrQSxobgkdc8j1FTwYeMkBgPeiRwzDywOmM4pQHAUFs/\n7tAyzHkbQPuLyBUoz94qFDGoYo2WNgfwNTKvDKG5UcMaQCeWxlG0kc429h7mrYZh8mAT6iob\nZh5LBzjjJAq7FsWwVzyCfxqQEjUeW6gc+pqzCzLEGPPb6VAm6RgACO+KsTKvlrsYBs5oAsab\nI0kgxnG7nnqK9Y8E2qRWM7Lj5iMcde+P0ryzSYh9qV15Pp617L4Jtt8ljbEY86VAfxIpFI+n\nvhjYjTvBdogjMZdd3I+Y57mumD7s9qh06FbPT4Yl4CIFx+FP+7zjrUgiXctIMtyKjON2c8U9\nZO4pDF3ljyMUiycmlILMDmkZaBjkODTtx5wKjDYwKect0oEOPalzu780ikN1OG9KZ91qA6Em\n7bJTy2OnNR7T1zmlThufSpAeudhJNHAYZo8zCjIoIG3PrTGG7nOOKRsNyCaX0FKceWQeDSAa\n33gafjI9u9MVTu65p4bcMdCKAFOGXGOlIJMcA8UgwMiho8qCKY+gMvy96AuB1pVLBeeaM+2B\nQIcG4yeam3L9OKgj/Wlj/eKwJ+akUh7PldvOKI5AvB5NMH3sGl2DdnqaYFhX9OlI0h6c49qg\n8zy/l604sGGAcGpETRuwznilwSRzVYzHp1Pel3HAOeaYiwHKMRuP4mq91ZWWoqRd2VvdKf4Z\nIgwoVj061JG23jpVD6HAeIv2fPh14nYPfeGbdZTn95CAhz68V5h4i/YS8D6nGx0y+vNKnY/K\nxcOv0xX0b8/LZxT43yoH65pCPibxB/wT+1iFGbR9fh1DYciO4+T8PwrzjxF+x78QtGDsdGe9\njiG7fa/NuHpX6T+YQgOcnNJHKW/5aNwMdaLiPyU1/wCFPiPRXP27QbyxKnAYwMcHsOn+fxrm\nZvDd1HIVnhaFh8xDgg1+xlxFDdwtHLHHIvOdy5JzxWPrPgPwxr0YGo6BYXIzkBoF4p3Ez8gB\nbyrIY1II6ncCBio5bGXnbF5hYYAVfyr9SdY/Zh+GmsSMz+HEhZju3Qts5ri7/wDYl8Eahfk2\n1zfaenXyw6kfgatSCx+dD6TLCu0xsu5cEHsaq/ZJ2xEVGPu5PrX31rf7AthcTM1j4pkjjzlV\nnhDEfiDXJ6t+wHrqqzWWvWU/YfIVJ9DRzMLHxpLYPbsoKbcjg1EFkIw6ZiBxk96+ndQ/Yb+I\ntq4CraXYJwu2YA/rWBrf7JvxG0W1VLjw/JcRM2CbZg+D/hT5hngNxbu8b+WvllRn14py2fyJ\nIh3165P+zz45tlUSeF77DdCqFuPwFYt98J/FtlCVm0K8jIOSfIYY9ulHMM88eEQ5AQszd6hL\nPJGnyYVeortrjwDq8cYMum3auDniJv8ACq83hW+RowNOmjweWdCAf0p3Jsci7FAPmJLDaPpR\nPDJBkBx0znGfwrpZPC93Gx3RSdQS3ltjpyOnamweHZRN80Mki84DKR1/CjmHY5iFfMQEn94v\nVccfWlEZVjtVl98da6mPwoQpXypQPTac/wAqe3h91KtKjkLwGKkfpinzBY5SECZXUJhx0Y/y\nqaG3MbfvRnODx2rpf+EfkUBvJYgHIYIf8KB4dubhldIXZ89Npx9elPmQ7HONH5m7G5SfQdqa\nyqu1FBJHNdjJ4LvpHxBaSzKRltiHr+VT2vw81i4ZVh0i6c/9cW/wqOYmxxqWrycrhQRu+tI0\nMjkoEy5PDf4+1en2fwa8VXiqlv4cvpu29YTitvS/2afiBf3WI/DlwqseHkXaufxpqQ7HjMem\nyqSMrkckUyOyXLeYSn8q+lbH9jP4jyf6zSo7dm6iaQA/kK7HT/2CfFd/DGb6/wBPtR/FGSSQ\nKlyA+P5NN8sb42OScZ6Va/su4UqGYsn95MEfTrX3TpP7ANl8rX3iNY27pCnb8a7zQP2JfAGm\n7WvXur8g8jzMAmlzgfnDDoj3U4wV81BwCOp9Pauh0f4f6vrBVbXTLu9mkPy+TESM9x/9ev1B\n0f4C+AdBSNLfQLdvLHDTDe31PNdfZ6Tp+l4FrYWsAA2jZCo49OlHMOx+bfhz9k7x7reTb+Hr\niBRgMLjCjn09vevWPDf/AAT91q+WJ9d1e201GHMMHzOv419uC4MmATg+tNmc+XjfnJ9aXMM8\nF8J/sS+BtF8l9RkuNYdFwMtsHTHb/PFesaD8K/CPhZVGneH7FHUKA0kKseP610CyfLgE4zTz\nIdpCj3qRkqFETbEiRY6LGNo/IUbgTgk5781WVvn3UN8zc1PUZYBJVsnOOnenbskFjjPYVXXj\nmnq3XPNAFiOVN2MUPKWzngVEGXGVGD3pd28AUw1FWRcinFv3jA/hUW0KSoqVF+XqDSuMFDdW\n4p6sXzkdKbtZs80hyoGDxQMHbDY/lUzHhcn/AOtUTYTBPU0obdyelMCynDDuKVpPMYEcVX3H\nbTVZ2bAGKljLav8AK2Oe1Kcccc1ArGNQffmpvtA5+XmgLj2Xuxzil84bc4xUCttXnk0qsWxk\nYGaQE65yM9amjYLkA81X3AucdulOVickcUDLK7RyxyaTl8ZBAqFflwc/WpfM3fdPFIGOGC3B\nqToKh42hu9OSXcCuKYiT5lJXPbNKpbhh1prttUbefUmlbqCDSZRZ39TjDUwsNnTApMjOe+Ki\n3SbSMZFICUlWwelPXG3PbNRRsuRnqKlLqvQZBpjHbSuCeBTlJYgfw1GsnYninKvHpSBLUcuc\n56fSnqffFKq96Y6/MCKYMdxuPrT/AJg5I4A4FRbSzDIxk44qRid2M96Qh65aMHOWB70LIedw\n/KhRuDY9KcrHb0yaACPnknAqTcMfKaZyqgetKuO55oAdxtHU0AkcA8UoYFeeKFXd04pDF+8O\nlO5KDFNjyrHPAqTPGcUg1FbGeKRTuwW5FNDegpFYqCOlAyX7zbc4FO5RgDUI6e/Y1N1XJbNU\nMX7x4GacFzio/vLgZBzSruHAGcUrAKWxwRxSLJubApB15p0YGCTwaQEqyBuO1K2XbHaolJXj\nsacrFTyuKQEy9N2floZcneKZGfkIJ47UrZ2gZqgHIc5opuCi+9CqfrSAk29yadHICCCOaYDk\n8dPehc7jxipAVVPJLc1I2OADTeFU9zRwvvTAfjBGTxSsx3e1N280cq3PIoH0Hlc4oHyrhqZu\n+XHvT/4eevagQoyzf0pWBDZP5U0NtwKcDluelBRJ90cjmmhyzcjFJvPGeacv948in0JFG45N\nC/KuP50isWJI/KnZBIzxTKFZipAPNKsnVm57U07QxPOKVMLn0PNJiHMxXGB1peMZIyfameZj\njFOXuw/GkMXeKKN/+yKKAGKxfJzRyowRkUhBRsD8aXJ4rK5Y1lYnNKrbcmhmwRu/SkYbMtnI\nqieoSAMQcc0MCq8c0seNtIWZWz2osPQa+4NkHApVzt6UhzwD96nAHv0pAJ09qQ/eyCKXIbIz\nTEQDJJ+tAhzfdHOeadglabIPl2ge9EYKr1qgFyAmCeaRVLc0jEZyeacuSBnikIQkrzTtucc0\n1yWO0dKcRxx0oAFwsnr2oZQr4pvIzSyYBUjk4oASTcuMDIpApKn07Uqu3SkkJVMHgmi4yP8A\njCE5qT7qkYyKgggaPO5tzVLywOT0qRDlX5van7htBPWkV+mR0qORhnvVDJG25GKjZ0R89fam\nrnncKa8e7nNAD2ZWbIWnBvmqP0pe4oEOLDfTcZYljxSyMq9KQLuXNAC/Lt460GTb2o6jpULS\nFSTn8KYFjdnHrSnvg1EWHBHQ+tLG3ynA70ASfdXJpvHbgUhbd8ppO5FIB5YDgUgzxikcHjYP\nrSc8nNADSTuODkUbtvFKIwBnOM0w4XB3Zx2pAPLBRTGYEe1OkwyhulRtyaAH5AwcUgkYsRjF\nNjfPIH50rNngGgB+7g45phZXXHX1pOVI9Kcq4Yg45o6hcRl444oHzY3dutLtIzmq9xcCN0A+\nbNUBPxyelNK7R605m24OOKa2VLHHFSAKu7nODSMe55NIqndkn8KVm29qLgRyMTwBj1xT7dQQ\nTnmlCgNk96T+LIHFAE6/u2UHmuA+NNit14OZicCKQMW/TH613n3lHPNc/wCPLFL3wjqKMPM2\npu2duO9O9iZHxJ4ngzI4X5gpOTXFalBGYWJbvnb2r0HxAixtIOwJz+dee3auzNtIKnnHtTVy\nWc3qCp5eWP0NYt0PIjDfeBrX1pY48bQQevWsm6yydM+9dKM2Z07RpIdh3DGTVK4VXj8xh3wD\n3q7NEjYVOves68JyoPyqp4pk3KT+YY8hdreneq20rHuYNuzgnNXrnMbJtOAR+RqkTu3ENuHQ\nriqJEkQxwMucgnJqqzeTHuC/KT0qwGLIUzx6mq027cMNwOxoEQ3DmYrjKg9qgkyuSpAwauSZ\nWMZb5j0Aqs8S7dpGd3WqERNnuMn+7Veb5JApfBYVPuCzbADtxgsahkCiTDjjtQBEkzSEYXhT\nj5hTt26Q44HUA00YTc4DD1zTYbhJJOTjPQ0DB1jblPkbHQ0wxjbvLZbH8Rp7BmYnqPXPNNxn\n5ux/vdqaEVt00GFIY56HHFEysT3yepqeaRvMQ7ht6c1WuMhWVWyc53VQhm1MFSSPb1oYbWDI\n+3I70+Rgy7yhBXgD1pkeZMlgDjnHenYYySMuysH5PUUsfltIU+76URs2S236UwOFmOV2t1qR\nCSI3Q/Kc9PWo3jOM9efyp83+kLvzhhz+FR/dYtnK4qgEaMx/vM5FEXyyHJ+/2o8zdnJyeuB0\noeRDIuAdzDA9M0wEkCpkHAwcU1sso+bJp7KuQH7dT701cL86gMe1T1Cw2NhwhUqc5+tNb5s5\nHOeSaWclgWYn6dhUMa5GUaqAlJbbgEHNMKtwwIODytBxHhj98nGKVlXazdD3FAiJZCZDuDeg\nFOZdqkHk44oWQH5Y+vXmm3HDKA3ynrikMb5a/Lkml2kTYU8AU6RljkGF5xwaYsmThjtJP40w\nsAyqnp15NQnPPHy55NSyAk7QMDPT1pjZjcgnaKQCxyMoEQ+dWoYmNtm3FJGR1ZufanKQVcsM\nnGAaBDJJgq47HjilaUeWQUDnpTVdI4zuUHIwKRflYEH2NMYu4sQ2OPQ0JlmIYdaPMUK29icd\nlpFcSKvY0wFjwuc/KccZ6URodqlgWU5B+tKYWYbeDzmmxu6tJuOB2FAWEki3DrtINN8sN0PP\nXmlP7xVJPzZpp/1hJGe20UhEjdyOWBxTVbEgGcij+8B93OdwofttXk+lAxc+ZI3GVNI0fOCc\ngd6MkQjI2v1IHpSJlmZd3B5x6U0IJWIVT1IOaRt1w5IKqKXaJOAT7UEeZyCFxwfegYuFDqeh\n6Uxm8lzgZY/xVIDuT5eq9qZu3LnII9aAvcZI5yD1Jp8bHlim444qONdqtuGRn73pTo2Hlk57\n0xCyK0ahuMMaZIPu7RkEZNWVYNhQcr1prx7cgNkYpDKu4dVODUruVVSDkHg5pmASgbAz3pNu\n1WVj34pASYPzAMdvSmswVcBegxuFKM4yHzj+GmhiFKk8n+GmArfvIzsPzHrTcHy8bfm9aAzN\nIAAf8KcuRIxPIAoAaF/eZyGOMbRQrfIwI+b1oX93GWR/no24G52HvTARiEjUFiJCeopw38bh\nwRkbaQ7TjBDke1HzBSvrx7ikDQiruXGO+d1K25VHUgnqKI1xyFJIGMCkjLKx+X6g0xDlbr2J\npq/MCrdfanPhVwBjjrTVm+5xnPBIFJjHbiAB8oI9abuO5nHzccU1gXYnYcDgMacFPbGPahAN\nCsOQMnrStskKMg3Nnn2pVkxGC2etNXadyo2Ae1AhGwGY+/WhWJB2v07EU5f3Y2Ngj0pobA5I\nG44oGLJskYAcbTzTGjIBPUqcilyGck/KM4xQqsrcnikAjOHGVJHc5pV2SfK/zentRuO7ao49\naQ425x1PJFMdh3Kg4O7FNjO6YlvlXGT9aGlVtmDj1PrSsyqzHkqenrQITzPmy2dp9Kcu5Cfm\nHrSdMDHuRQ2d67fxpaiEOXbcR70bRtYdOMigKUbGTycmhZPvdBt6ZpgH3owRgZ4JpY2LRFQ3\nCnH1pJAdxBIKLThJx8i4P92mAnl7hkfKPSmRgtuIKg896dGxLYJ+YcnP8qYFDKwJ2selBQo+\nUAqMt3b+tJuZQzN0HQ+9NSTdhQCpHGf6VK2Dkg5B420AMjLK2S5I/SnBywIyQfShm+V8/KFp\nN33SF+90NAgVflyGwM0Roo3Ek9+DQqjJU5GOc0fvmjBHzbu9ADcBsBCGbGS1AU5y3U96Cn8I\nQxnu3rSyKqrt5K/3velcBFYFdg6/pTmj+62Aw6YzSRthSB6c01cpkKQSaAHK3zM6LkDjn1pC\nrMoZ2y1NbHBwUGexpzrhmxzxTAaVLPw2FPUU/czKEBAK+lNYqu1cfMOTS42tgZPuKBXFCuFb\nGM+hpchgOCGHFR7mUMHGMihTshQL3OKAF8tt3LYGeppFJijYFcgmibH+rb8+1MXAb5sgnjFA\nx27zGUNyPWnHC5GNxpq/JGUJ5Bzz6UjMFkLEZGOMUAxeoBAyR/DR5wj6JgtxSeZtYDuRkilX\n5mUD5h29qQx4+Zc5wRxTNreWDnnOPeg5VmXbz60xkIx8/XtQIlb5Tlm9qajbVZWGdozSMR5I\nAGSDnmlIGD+f/wBal1AYrERrleWOeakIJkK9TjgUbd0a7jx1x6e1Jn94Rt696oQh+RQW+8Dj\nmpGYNu3nIHSmBQv387RTQu3JHzLQBJGyLznJxxxUcib/AJt2P92iNfMjHyYbdTtgjdtr59RS\nGM8vcvLYHrQFBbIBx2+tOX942duE/nUYVk4TnnJyaAF2NOTuPy1IGdWbpyO/WmnPJTp15oZZ\nMIxXO48nPSjUQvmGMLkZI9aTyw5JUck54okn2yBdu4E8n0pisVUtG2QT93FAx7ER4ONwz8w9\nKJJGzkMetKOYxk4xzikGZGDE5U9KYCqrMNueW67aViGYAfw9F6UyPdKCeUwSN2aI/mU5GVBz\n9aBD1cM4cnilkkzuyfemRSEyN8vykdDTdrjL4DfWgBdpXofmIzSIpjYgjKMOfY0rMsq5HBpV\n+bIX+IetIkNu1hsbPrRjGcDcTTkJ3FQMYHPFPSRdpAXjvTKGNiE5IwCOnvTQreVtOcn+Edqc\nG2uG+8BwM0fMpIU45yTSsBHM6lhg5CjFMdiNoyMGn4+Zl29smkZN/IGV27qYxHf5dvVc9R1p\n+Pm29BjOKdHErIMnCHn3pEUfN12jjcOaBDmCN5Z2kEHrT2kjyGKjkVHuUyKWOFHA9CaF+fOf\n4TgUALG/HH3c8mmrIWjZj8/zcHpTvJ2xhTyM7uKaq+Y28/lQBIuG3MfvYxTWk3RhQOOmKauG\nHLbW9KAB5e9XzhsYPpQA9ZAMDbj2pjbdxByFPpT9wZmJOOOPekR1Ee9hhcfrQAyIhF/vc4xT\n0YurFzg9KVmyQzEJxwoFJ3DfjSAPMEhwMg4xkUvms8agKPl4qNBtDE8MTnin+YeCBzjB4piH\nMNuN2QevTpTfL+Vt7bgTxinyMY4yN29z0zURbcyuGAI4I96QCxkyLt4GDn5vQVJJhsPt4Y4q\nGJhI5yu1upzUuTwueeoBpjGSZVDtXLfyqWM7EBJy+KiXdI4UA7e7VIrNu4OT0FIRKsnlqOSQ\nTzmmyHzcnoKbhw/zDIpnl7pWUHtkCgZLJjcpdccYzSpIVkweCeCPamj+HeTlfQZpGwNzr9zP\nT/GmA6bcJflwE6Uiuqq7MhbjAz1pMBYwoO7PP/6qVWZGDbCQP71ADdyqqEKQ2OrU5Y9rq8h+\ncmnyMZF3AZU0yQqMNJzgcEUAL91my/ToKYswYgscnoBSs21Rx82P51FtXcq5+ZTml1GSNuK7\nm/1ac7aXnYGI5bnn0pZmEnAHI6ilLOxUdu3tTERssS4Ksd7cbak3GORScjsV/rQXxJu2Dimm\nQ7TvbaCfvf0oJHMArEfc560BfM3BeeKFQNISSc45B6YpUYR8dP5UDQg3vCAo+6cc07crSbGP\nOP4aU5UHaM/ypsajqeD3pDBflYhRg+tEqvGykjzCf4hSwH53OMM3CmlG9VxkOVPNMBIpGG5S\nN6559akUAcZwSfypNrLM20cEbs0sZDLl0w3Xd3pAPaRt2CvHSmg7mJVPu8DnnNCqXyS5Y0px\n5gyMMOgBoAVd00YYfKQcYpRuk3qhBb+7SKJFBdDlz/B/WhVZpFJUjHUjvTAT/lmgzk92p0mG\nVwfz9aI8KxLDLdqTyxzubnvQA9jHtCJ8x243dhRHlhkHkd6QLsRR+QNOXByQOOn0oGPaM7+T\n8xH3qdFGFOBzk59qZs2sCSWGKdHtb5VY4z16UEExwY2wRjNNXcq5zhW4zTI22yFCuRmn+YGj\nZiv7sHgDrUsCW3J8tQw5xTkUjd1BA4IqKNiVQg8/zp4BBY7jj+7RYdh6MGjBI+bvR8iqTvwT\nTN6MpPfstSeYvBKjIHK4p7C3JIyI4x/FxzSrMycxj5cYK0CQMwBHOOKIyDNk5CjjNIAhZXUq\nR8tT+XvTcTyO9RRgsWUYCZzT1ky2zbgH170DsTqo8sHzPn65AqTeZI92No6ZzTNgchc4XFTy\nQHYvZegpDJI3O0EjHGM09gzEFTnnn3qJYwzgct2z2qxHFtbcvA9zSEWVKw4LKCzcYqZQzsNy\nBUB+6Kr7Szq28bu1WPO253sTnqaQy3G21uvzE9hSSW+5Dtf7x6ntUNurGQNuwo5z6ik3M067\nRmMnI5oA6PwbaxyX0ZYEqcpk+vrXu/w40r7R4s0qEchHDHPtzXkPgmxMbGeQ7P8AZP8AOvof\n4GaT9s157q4BCQxZT3bPWpKR70uNpBGeaRWG7moy2xsJz609mVz6GkMaeMkjiljO7gdqRSWP\nPShRtbI60APVtrA4/CpOGORwKjY7ee5pFJPegRIMYJxk0373SgMV5HIoB+Xjr6UgF6ZJ5NO8\nwbc1DzuzilJ3cEcUdBh5rbhjpThJknmm9FOBQwCbTjrSAsLjHX8KRnwQuKhVstnNKpJYmgCZ\nvlAwc0jZIyRzUase9Sbgy9c0hiqTjk00OVJOeaQ845p+3jcOlMB0bAsSaVpM8Co2bHbFNOSc\n/wAqBkyyEcYp5O5cYwagVtynnmmiXa2TQSTcjvyKdtO4MKhWTPPehZC3HakBYZhkc0pcBxgV\nB5gLAU5Wwx7+lAx7DLZ6Clb73Apmc44oO7BOMUD3FXarHI/Kn7gxJHQVGv3ck0hk2/SgCTf3\n6UrNls81Du6UvmNtNAE4k3DHakVgcjdUayAw7sc0z+HPSmIlMzRtjqKkU7l3D5agwrKMnmlB\n2nk8UhE6sFWn+Ye361W3BsGpHJFBRY875QGGR7UKx2kj171WEo2jNKzfN1zQImZ933gM0qSM\nuVPBNQKw3cjmn7jyT19adxk4k+UbWI9eaGbPG84x61WVssBjFOZWOR0AoAmWQhduQaY0ce3Y\n8Ubr6MoNRcjPFDZJGDzSFYVrOyZfms7Vj7xKf6VC2haVICJNLs3J5P7hf8KdnLHGSKkMh696\nAsZ83hHQ5n3vo1iecnMC0h8EeG5MCTQtPce9uv8AhV+K4ZshhinNPt5PSmNGV/wrvwox58Pa\nf/4Dr/hT1+HfhWPgeH9Pwe3kLWukhZ+vanly2e5oHYx4/AvhiHldAsAQevkirNv4T0OFyV0a\nxGfSBRVz7RtUDGT3pyzEvnoMUARJoulxt8mm2aH/AK4rVqOztUzttbZQOgWICo0wwJzzTvM2\n4A60ibEw2j7qIg9EXFQtK20nOTnrR9oLbh0qOOXcx4+WmOxMrBsHcR+NP8zg5Ofaq0edxOOK\nVW74pD5Swzjg7uB70zzcDrx9aiX0IyKVsYwOtAiwrBlAPLUjMOpqONcck4NIzfKc+tAD/M3Z\n54p6qOATVcLtPIqRe/egZIuVGT0pysccHFQtIeBjpT23be2KB2E46E1IpO0Cm8MoJGKaG+Y5\nP0pAOIzwGqZDhcHmolPy5280qq2AAfrTQmOzuUjFEQKnrxSj5M85NJ0WgomANHmFVx0NMVic\nc8U1c78t0osBMD8uCafuztP4VFGQehyBT1zu6d80h2FLBuTnil3A5wMLTRjJHfrTkbK/jQA9\nVCqM80u4JgnijcOhFI3zHpSYEhPy5HWkRv3ZHfNRq55BoaQ9OKAsPL9iKsLllB7+lQxqG78d\n6lBHUGkIkTCnJ607q2O9RCRe/JpysD8x60xkp44I5FJ1XaODSlvlHfNOyNvH3qRQ7J46YFOz\nxkcU1vlU54p8bqhAPNADuCp47cUixhcHNL03EDIx3pnmfL0wfSgaJFf5jnpTmy3Wo/v4x0p/\nsDk0AKrBWA6nFPRctTCgSNiPv4pYWIXkYOKAJWUK3FTJH8v86qKx2496kWZg54O3FICwv8XP\nFM84eXj+IUwHcmAcc0jKFYHrQA61mdt2R7ipBnzM5x60yPKAn16U4Dcc4pCJo1AbA/WnLlWI\nGajXduFKsh3betAD+Rz1qRSrZJFRlQR1p8eFXmgCQcx5oRioxjIprY+7njrRHIVByKQDx+8U\n84xT93Tv9aiBIGQM5pAzjAb1pWLJmYjPYe1IqhvvHIo6sQy8Uucdvl9aAD+HA60qkbs0bec5\n4p24NjtTFccsnTIpFLFjik3Lu6daBg5GcYouK5JjavP3qMcZJGfSo13cktml5zg9fWgYrZxn\nNPG4gZORTQpHXk0/cNvQ0hixqce1Lt3d6b83WpBhVzigBSxcc8AUqAqM80m045NPb7vtQALg\n9OKVdy571Hu+XinKGC5oAevze3rQfUCm7ivXqaGyMUgsO56ipApC/WmbiOMdadl+CRigBTwu\nWHNKrEgECmsd7ZzxS7sAA0AOwW5xgjpShT+JpqsKNx9cUyiRlIwMUb/lx2pN20Ak5pyYVjnn\nikyQKk84xSqy8ZGTTUkLfKOfrQFP4VRV9CVVG72prHbnjJpBweKUhtuBzmkLoKGBUHFKjYzT\ndu0gE5pfM6/LxQxjvMopm/2NFUAMpUZ604L/AA+vNJG5ZiMcUc+vQ1zDuKGHJNJgsBjn60jS\nDbjFN3noDVIBV+bOOlKwI75HamnhuDS7SqihiFUYbcaazZzjpSDO496UrzxQMTB3EihY22nc\naev6Uu0ke9Go0NBbr1oBVQec0DG0gnik2hfp70JEjSvzelSrg89aTHc0buOOPpTANvzYyKYW\nKx596DjqfzqPzMsFB4pa3GSq+V5pAoJ60rBccHNN52cLVAOjxuOTTWz3OeaTjaM0ik7uaVxD\npAA2RnpTVwy5PBpUbdTuDjjNIYhxuBzxTZPu5olXJx0FR7Wafk4QUxskZmZVJ6d6PwoPz8Ch\nm+cUdSRrN0AHNOPcUqfMSSeabngn3pgJ8vA796coG3rgUbQ3IpGxjGOKAA/Ou0HFJKoRcYyf\nWlXaeP5USDLADmkALjbnr6UKPlNLx34oC4pgN2jdk9qd0QsB+NLu25JximSSblC/w0AK2Qow\n1ORlVcA7vWk+6vAyKY2VfAX3zQA5ju+XFQxR/vG3CpfrTdpLEipAVlIUY5qHJZvSpHcoo56U\n1cBemT1oYhfu96RNpbGeaZy6kjgGpYlVVORzTDUQ/Lk/hTc7FwTTnPHH61GyFmx1FSA5WLc9\n6TyV3ByORQp2sQelIS2fY1Vxi+dhuRSl9q/N1NA28A0MN2M880AJgNznmo/MO7jnHapHxtYg\niooQV+bqaQiRjuJNNjUqck5p2wtznFNYESYJwKY0O3Bc+lVtUt0udFvomyyyRMGH9amGP4qf\ntjeOUN9woQR68UCPiDxhafZZ50Qc7yP1NeY3m6FpBjdzivafiZYrYazdRD/nqfwFeParCyyS\nlAQFbNWhHLao0ZiZdu5j39Kw2h4571t6iAjbfvbuSfSs2TEatjk1t8yGZ8kDRqWBGP1rNu5I\n8DK5PpWrdKPK/usfesW43+ZlgBimjJ7lOVMIdxxzmq7SCNmA+XI64q3Iu+QOTmopIyzFV5yO\nc1YijcLJuVcZB5+WovLKq5JwpGBmrEr/AGddxYsvTioJHM0PPA9KZJAVVIVLA7unvVaUBVLI\nSfRjU8+YlyTgEYqsNyKCWyncUAMldmCxluozUMiszFQM7eetPf8A1xJOcjA9qiUbZQW69N1V\n0AVGz8xXK+h71GGRVYooyf4akKlcrtyc53VDMjNGQF8vH8VSA1Lox/8ALHaPemyfvAcfMuc4\nz0prKdw2tuOOlOkjOFw3HpVCI5FWNl53H07iomMTBzgkmpyqlskZqGbMhXnDZweKYkV5Ebzl\nLPtBGaeMMhJ4HTd3pY8BWVgNytncfSmsVi+YfOWOcelAhdu5dinPvUM/zSDIOF4qVmQ7X6MT\njFPPzKACCp6iqGQsyyYwenGKgaRVYgLkZqZmXcQhwgqHYoYZ4HUmhAMkVtyqg2Dqx9qkj+8G\nH+rxkHtTo13HOfwNNaLDAq2YscrSGNVirMCOvJqMbVmAzgHnmnmTcTz2xQgEin5OMYAPWkxD\nHQyAmM/eOCDUTR7YyA2HB6CnoyxowYnI4oVg2CvJ9K0voNBuWXBxk4pu9o88ZBHNOjG7fn5R\niog2YxzjHBpCY6PEcZwMt1qNWO0gx8VMqxyRg7ih6ZxVdF2yESHIz260IY9dyqwYZNM8xZOB\n94ckmn8EknjsM+lJt3ZCjIx96kA2M7mGAW3ckn0pGjdVIABOeM+lN52jHY/SnCQRsCdxzQA3\nJReBx3oZt0gHQdP/AK9DMBnsfanMSy7sbuMD2qiSHkMRjIB9KWNjIShCnHJIqRv9Um08jGTS\nIC3mFlC57D0pjGRhY1ZyRimNIWUbenWpmhXYoVeo/L3pJLcllC4U4wT2pAIzboyOVUjmkXCK\no5H4daG3YwQGGcfWnbRJBuHBzjB9aAsNclOQMHPHvSNH8oB4YnJ/wpxdGXdzlBg/Wm7TJhyx\nXHzc0kJCPkq5IIb0FLGpUjgrx3oV/My275qTJZgSx47U0MGZGZcMCRxSSM2QBzzj3pvlqzja\nMMTnNPWPapLHLZo6kjtpjbC8tjoKauG4Ax3NJHIFZsZ309SGJAbbkUDSGSN825eARimtjasY\n/OpDGu3aDx3pscPykAbgaAGq2W5O1QetCrtY/L15zTpdrAYGD0xTfvYbJyOOaWoXFWRNoUjH\nPahpAFZSMnqaTcegGc03aFbdj5ulO4xZWRuFXIxTVDeWc8/zp7SEHaRu9wKauVywJBFINxuf\nmyR8uOtO5j7fMD93vihX+UlU3E+9NjLCQlhjPUUJMESK2xjvPUZBqHaSCzMMVJITzghl64qP\nbu5kbCnnAqhAIwsZYnI9qVVTABOfXNHCZCHK0jbehzS6gDMVkVAo2nowpG3Mxwfanc7fl554\nzSyKVkGGxnk4pgI2SwAyuBzimPIFOQN3r9ac0xXtvHekdVjA5z3oGHLKOdpPJoUN97O1egzT\nm2r94Yzzmo5mEj9cjHC0mA7aerPhv7tNyOR/d5OO9AjDnJ645FJt2tw2F9KBArKcBvlB5xTz\niT6CmqyMC5OWHAOKVWZUwy4LHrQA2X+EryOmO9MkURkA/MM0+bMY4HH86QqrY3Z2nmmAFVLA\nbuSeRTpG2kqOQPSkEgZfu7VBxmhR5ZAUncetIAVisYy3B5/Cmu22MAD3FKVA69M5xQ6Eck8N\n0FGoxW2DGR8+PzpjAhVEY6nJ3dqc2MhhyRxg0443AE8nmgQ2QruZup6HFD7NyhBnjOaR/vHj\nj2oDBJG/hG3HSmA5nyct2FHCqCwpVVlyH5XHWo5BmPJ+72oAcAWyhI67qeud2QcN6GmtsZFG\nNrf3qWRBGFJPJ70uoDDGfnHRjyTSFSNh2+mKWQK2cZB700J8uVkyemKYx/8AEy/dHUk+tM4E\ngBOD60qrtGHbcn96gKqyDcfoKQAy7gV6jPJNLzwVOQP4aRs4Ow4JOcU1m6ZX5c80wHhW5AG9\nW70hbzF+XKheMUBvmzknPpTJMbhgkc0AO3MuHPI6UKxKsDnPUZpdo8tiSSaWSQ7VzwuOGoEN\nLDAAAB/nSHH+7kZ/+tScyEZHTnFJHEu5cnpyBigocceUCOcHJpEbaMH75OfwojZm3AAZzyKG\nGxgTzxj3FAmH3pGFBZV2gZHP505lI+uOfWmGbdwBgjpmkTYc6h5WIPy4pu4KvPB6Y64pu7GC\nOD3akijK8j+L8qBjlQeaf3nC9QaQbQSwbex70vMbE43MTzTRGNp2t/8AWpgK6qMFhznrSnCs\nBtx9aa6MzBQwbinqDLIAzDIGDS1GHysxDDk9xUe8sPlwqqe9LIpeM7DkeoodPMxx1GAKNQH4\n3HjjdySaa8e7ac5we1JxkKV5HBNOkwrKoOBTEG4MHwMDpmhl2xjDZbHAoaSNl2sNvbHv60iZ\nUnd8y4xmkAN+6iDbuScbfWkVyrEDkjrRwMIclT0+tKv3jsG3aOfemA7cfugZY85FIJCu7cMH\nNIxPHlnG7nJp7ZBDH7/TFIBpbEhz93FCsnAHBb170/arZU8nGSTUW35enK8gimArybegPXGM\nU3d+8GB05IPpQ7O2MfMRzTBNyzBaQEhVV3FeN3OCaZgyAct8pzinvgyLkFgw6+lClo2chckD\nn6UDFVm8tgCoU8nNN2gL8nL46+tJtDRANwh5J/pSQt3UYUcA+1FwYLl13DHy/eHc0qqrAkHa\n3Ue1JHhWO5eDzmpFxu/2cUxDACvLNvHp60M21iVOFI4ojYAkY5PAoyIwRjIbtQBIrBl4OG4z\nTTIu4qQWxyMd6TiOTkbM4FPkkaH5ioIzj3NLqMFbzApRcEdRSCPMhZTtX9acuWjYYxzkDvSO\nq7uhXjpQSKrDcTnHYk9KihkG8kn5fSnzKOA6sFxwT3qNtxYEjan96jUZPuG3B4wc0xs9AeDz\nSR4wSV83jG4dKekSNg9GHQUwBY/Oyyvg45zUcZLRnB56U9c+YeNvtRu2545PYUARs2F2sMCl\nj3R4I43cCljjYqx6huMelPEe4qU+8vFACMQq7QmMcc9CfWo1b5gDnd0HvUoTcAzEK1DDcwUj\nBzndQKwjONoU/K2elRtIFIx8q5xmpcNtxkZzwaYRtUhxu7imhijhyx9PvelJlQoZvkUdfeiN\nztxnIPB9hSou2Mr/AAg/KKQxWZZlBB47VGwK8AEj0NPZgAcR846+lK+eMcH1pak3BmAAdsE9\nBSeaxblQccHFPmUQsrMOOPl60qOAGbHDUxEaxhmJzwfXtSRqWfr06e9P6R72+UZ4oDBfnBwa\nQDWX5iQCWpOkmdoKkVIsxRsNzu71HHGcuFPflu1MRLuXG7O1uh96WQ4QbOvqetJu8tc9QeM0\n2QLncG3bRwKXUaJF2nO35SOnuaaWPmMTtC+1JG4VQx4zyPrSopjG5iAcUyhWX5AWb93nAx1p\nxCKu4AB+lNAWSPdn7x7/AOFO3sq4CYPQHFSAzjoz5I6ketP3DdjHOenamZeNTuUHjNIGaQqw\nOB3zTAkGW3Ko2H1qJWLStjJUDqaeWKtuGAM43GomyvUgZPOKYEm7zocqCPYUnCMnG4DtUgG1\n8qcLik3KGUgZ9qAHM3mZONo9DTFfy1AxzmpGK7zjL8ZpxUMN5AHbbUgRBTlmJ+9QFO3YTzSs\nVjkKn7wHSkk+TJcFuei9qYCScyDDYA7CnrmN9wUMD69qJQNy7G4HrSAmQbS2D2pki4K5575N\nNZirYY5XGRQcM21OG7570LJ5ZAQ7ixxhutK4x0jKY1aMdTyewpysoUp1Ld8U0kRuQAQfTtUr\nunynI/rTGRKQzYHBUdaVvWM4bqfenMpVgq4x/epsissoY/L2ApAOV24JyCOal2s0RZjvLHII\n9KhWPdHlzg7uxqeTzBtwfl7KKBEEh3ZRSQM9RUmdp+Zg3GFAHOaGYMM/d5+7SEpINwBXmmMe\nI+mWw+OVokcxxqzMS+ecUvzs52gMuME01/lIGBjpQA6Qh2QqG6cijaF+QjHel8xll24+XHWm\nSMfMPU4x9KAE2oG387uwzUjZb5R0PNII23+Yw2J61K0Y4296BXImyF2s+PYVIsjNGoPJ705m\nGdxT2zTVJ4QN8+eKBD9si4KnNTeTtYEcjGSCai/3SXIPKjiljw5Yjcxx0HrUCJIRE0uVDIBz\ng1OZNzPtUA1By+QowRx+NS8Llzx249aCkK2zaMjDnr7Uv3mwFyB096YrDzADgtUjKW+dRgg8\nVSJuN/ebuE5BzUq7pEOCExz707DNhh07kUSeVu+U5DDr70ADLyQjbX/vHvUjI7qC5ztH3qVF\nEjIJOVznI7VZ2ofM3HCdgKTLFgdeBt7VYQZ+9z6VVb5CuB1FS7zGN4GR2oV0HUslgIQV+UZx\nipvJWSEtkk/3f61SjjHkqzOXGclRxVw/vJMIfkx1/pSYrEyxiHGyMscA5p/mMGGMHv8ASooV\nmIwSY1HQdanjjWaXfGShxyDUMZfVCItxAYd6fp9uNwBXOW6DtVdZtofn5BwMd60dHCz3UY3h\nQRzmlcZ3fhmIrbFThipPHtX0t8EowNFe529f3efpXzjoFqy5aPoTX1R8MdOOn+E7Qdpf3n50\ng9Ds4mG4kn5qFbLHI5pkeN/pSsWMhwOKCh6Z9aNxXjtSZp25T8p64oAGbceTxSJkN1pCp4p2\n0dT1oF1HZKtkdKazFAc9aRW+bGaVvmXrmgYbztB6kml+bcOaRcLgZxTlxu59aQDmYrSbt3Ha\nlJ+Y5HFMG71xTAcBtP1pxYqcAcU2Nv73Whm3AnpzipYEqk8cU3+92ozxw1IPUUAPXAHJx707\nIGNpyKjZc8t92l4XA7UxjmY9TzTQ+c+tOT5gacuO4oQCK21eRUbtntT3+YEA80EHAyKVgYDj\n6dqF3c4pPLO4elPX7p55oAEXPJ607d5bYxmk6KM0bt3FIQ/zcN0pWkIXPaotpU80FjtNA+g9\ngH6HFLjKkE9KapBHTmnFeMZpgNb73tinKo2c0xuVA7k0rNsYdxikMflQcgcd6RW3L7Uhxt4P\nFBH7sYoBj+M0nGTxkU3G0daduyoxwO9BIyPJ4PHpUpJ2j1pqjpzzSKxX5m6CgaHyErggZFJ1\n+71pGk4OOlCSBiOxoGPX5ieelLuYtjnFM5xnHFO3HdkH8KYDvM2t05p6sXUgnmoidy56GkVj\nkUi0SsxUDNI0gIzjmjO5ueaiJO8gdM0EkisOg/GgORn0pVwqscc0xvukngUwHrhxnGKOOMim\noQoAyTQWMjcDAFA0Ko3EnP0p67ljyOtRA7fzqRct1OKoQ5T8uPWhlPTtSKcKQB+NLGx7ipAR\nSFJwelS+YGXH61EyhhxwSaQdhSAkLHoelDsVUBRimnI6ninKd3B6UAPViMDFG4t1496RW3Ky\nn8KRVOzrnHamA9lK980v8PTmo23MwIGB3FP3DBHepAQNhdx6A0qSbmywqPkgjPFLuC4GDmmB\nNuGCCeadkY+XJNQtIUbJFO8w9KAJWyQBxQD0GeajVjuOaasgD5bgUDJuckE59qdx8uBg5pin\nDetSyYZQQealiH7vlPqKYuN/vSKp2mnH+HjmmA9uuKBjpmm7gpyTSrkSBscUFCqecCgr8vrR\ny0hNC4YkHigAt4Sc5O0GpgxDYJ+lQAhW5zgVLw2WJGaQDvWkZircGm+5pWXGGHJ9KYD1Zu/J\np+48etMTKMd3BIp3zct1pALt465NKF3MGxn2FIrYXJHJp6hozx1oAerBWxjg08KF6dfemqNz\nA+9ObPJ689e1AEZZc88CrEKl2qDjnI4qaGQKuc8UDJFAVWYHPtT42VsMVNRb/lBUZFSZAX5W\n5PWkPUfkMucUn3cepNNVQCCxp+4MPu49DTGhzMyrknFC5k57jjmkIMn3jT8Yw2eO9Sx2Gqpj\nPBNTq3y56NUTD5gc1Jv8w/KOQKBCiNmBYnBpVbGSOeKQScc96WNfXpTvoAKpz1p8edp5yKi3\nMFKnke9KqiBQQcg1IE6qxp6oRwaYrb+M4qRenXJHSgBzELgYpy7lxx8tV9zKTkZ5qdcEDBwP\negCTh+eRQuVXpzQvCEHmnBt+O3FOwCM24YApOdw2jP1p67VGaVdrDikOw5cdCeaX7rYJ4pGb\nMfA5ojztG5aAsO3eg49qdjc2fypgYcgcUkbngUASHKsc8g0LkqSOnpSBgxJJ4pFYqDjvSGSr\n2IP4Ug6ljQoyuO9EYK5zzTBDwCw9vekVfmx0p+7sKRc55NJgMTPXtUmSVxxmk3biTnApyqFJ\nLDtSF1F3lWGOak+7gHrUMcmW4GakbcwzjmgodwSBk5pVw3A9aazEMM05cjLAc0AIylX68+lS\nM2aYud+T1p4UK/XOfWgByrlfSlO7AFMVvnweae2Tz6UgEb5lUn7xo2NvyW4oRxgBgcinb+2O\nKAF5XnPFObDZPWm5A7ZFOU/NnHWgAC7lwOBS7iWxjil8vaud2KbuyeDxTAU/J2p+7C0L055p\nVIYGkGo3DSY9KevoabH+tOLBcjrTsANj2Bp6HimfewSMU8Y6dKAsHJbA6Uu7a2KOg6UxSOSa\nQ9iUMOp60bunoaYV+XPrQMKBmnYZJxRTdwopgO7HFMYccdaVXBHoe9DMF45rGw7WG/e+VeTS\nN8nzYzSqwj6U3ls/N+FLYBPM6cdaeOnJ5pJMDoM0AnuvHrQOwqnGSetHAxj8aaxyu7HFI2Qv\nI5p3AQ5x171M5DL17VBgtQCANpouImXaVyOhoYqQAetCqopjAMxGfpRqOwM/akVxtOTTQTJH\nwPrS/dOABRcBOGYjORSiNV5Iz6UiqVYgmn8FSM0XGHAbhcCkbcpwDmmq+TzwKWCTrnjHemIX\ndtGe9RtIfrUhYN2x71HJHyMHNFyQWYhjwFp3mEKTmmn73SkY9qTGP3A44oYjgYwKZ5igqpID\nGpN2/IxmmPoJ/FweKRPmJwOfWj7v0xSZZowAdv8AOkSOKlaGb5vagqxUc0cbT60AB6bV6U3L\nfdAyO9OB70M23p1NCGIq4zTWYqM9PpUmMp71HztweRTKHK24Zxn3pN5bGabvO4BfujrTm4I4\noJFXBJB6U4r/AAgUgIwaP4c0wGuxXikVmI5NNVXVcMc02Rjj5OR3pdQsSFtq+pNG7De1HGfX\nihlJTPRh2oAYwywPbNAbqBSt8x9CKRD7c+9Ahsmei05Qy43EdKY2d3FEilsY6D1pgKx3MRjF\nA+UYFRurNjn8qd5h2gkc1ICrls5GKY0jdKc0gbluBTS23qcimgHcsADwKRd+84Ix703zNxxz\nilXjqaAI2VmU9+aQPtIBo3Ffl6nNKeOepoAlZlDD1ppy+SeKjOGbNPjyVO7pTELk8CpY1DPG\nCeN2Kg3BgRnvU6NtXdjp6VLA+W/jlo4s/Fl4UJYN830rwi/U/aJifu46V9PftBWe3WorkKNs\n0PP4V81apGVmcngHmmnqI4rV41WPIGGByVrCmyrE45/u10OrNtkfK5WsCZxIRgHIrZEGVPna\n2VIYcms2aSTcC43Bhw1at7JtLBWO41myqXQp0JrRGb3Ksqqdo29DUckqtISDhRT3LSN5Snaw\nHWq14Dbrsxk9ashlGa48wucZbPWoJF3KHyN/enmVnWQheAKasYVUDDJYc+lAiOZFUH5t24dP\nSqr7vkRRuJ44q5dQiNfl5OePaq3mMGDJximBB5LMeudvWmNuWQnPXgCrEkw4C/e71X3blwyH\nHXNUBF5jKpx98HBNN2s0nzfdxklqeyljlsIM9KryL5isFyzA0WARN38HUHO72pJWPD44zUkw\n2oMcH0Heo1YhDE38RyD6UxDdsm3J4Vjmjl89hSsWDFdpYeoprlSo6hf5UCIdqKMsd386jaLc\n3ynJAzSyKWUFQNvrRAQ3QFQfWmFiCTcGDq2cdqN3ljgkselTsFXcD0x+dQ+QdqvnHOBTKI8+\nZGFb5MHg0Oy7h82D644qSSAiQg4wOfamLtOeynsaCRsbP5hLcn1oLYjIRTyaSO3O7LnknjBo\ndTHlVYnHXHSi4CNmOMbUwT2pNw2hjnIOMipJOG3HltuDScLFtjHzdaAZDNlhtXBzzz1pJHU7\nCFw49KlkUN98jdjnHam/6wKEG5+7UwGeYGxvPBPSkVQpZsbR05o2qckr8y87aVT5zZ/j6hTQ\nDEbMkZ7helNbEjKGHyfxHHJ9qV9y5GcOR0FN2/KuG2ovJFT1APu7lOFA+6PY1GWI4zz7U5WC\nZ3J5jHkGmMpK7iME9quwxeS244NJhm6HAz0pDtVSTlB3NLuCuCfpUiEJyTtTcehpq5VSB604\nR+Z8zHAz1U0MuzKoMdyfWmIYv+rKleWOaGVed5+YdCKlEg2gFck1G25mHHGcUXAXk4Y8KeKi\nx8hUtuGfyqRs4OBlQaRVHGV9zTuA14yuEClhnrmlb5VG0cA9DSqp8x2z8uOlIrqFHzZJ6n0p\nDBUVWIcEA80xlG1snPpUgVi+N2Qe1QN5gZguPoxoQEm7btOMqB6U1WcSfMcZPWjLtgAE47U+\nVQ7Ljle/tTYgVRI2AwUL1NMAPmBQ3ehoxGPl5OacVC9Rg/3qkBJQPOHzYOOcU5guFb73bPY0\nibWVmX7w5OfShYwyjnAPIHamAjkAkevAFHIUhG2MB3701VC7gQS3UGldvu5UF2FUAm7dKFK5\nx1pAu1nXqcZzSfMWY9DnrT1JEnHKkc0mAyRvlTHyHvS7tmP4mPAoyMupXJx1pAOFjH3h1NLQ\nBEk3bvlwvQmlUcdfl96Vm+UqV+UnrTD5aqAOeelBYv7tk25x9PWnR/6wIvzHHNMKk7lwP8KF\nYkjaccY3etPUQreqcc02EruJ2lzS4CqFHrzRvGGVjjnjFAhu0fMMZ7ijdnA6eue1O8wltwBY\njqcVEuWfecBqBEu3KlcqRnIp0cir94gL2quAVySfmPpT3x5YUoS1MdhVYFWwMik+XcEIxxkm\nnFVjwA3GOVFRsx5IGWxikMdJlmXIyMce9NLGOP8AeLk54pcB4hyc45xTjzsxz2oENjbbJyPo\nKGXJ5Tbmhs+djrSFDvbcfmAzjNADdpRTggr2pV+4DnOPWnNj5Tngjp71GjFmOF6cfjQIWRj3\n59vSkRZGkHGVHNOZRJGWzgjrmmjiPglvbuKBg0u4kbcgn7op7R/McMBkY+lN3cgocU0xgLjd\njJyaAF8sLGQfvdM0rFUAyctjApu52V0Xk9c04F+FKjpnNAgG9vv8H2pN0YmXcevGfSlEm5QS\nPalWJfOGQMmgY3cTIV6oOjUu75s9TTtow2OQDTVZgpIXBpgBctlhzIeAKaVk43LxnFOZtqxn\nnOcHFCs8h+UnaT92kAMuYyM5GcUY3R7X5wetEkaq2C2Oe3rSRsW3KT908k0CEkxuKg5J56Ur\nKZNqAALjNNY7nIDcgUjLuK7WwO9ADgTyGP0NNYL8vHX0okwG2HOaVYiIQd2GzimMXazN8h+X\nvuoyNpQjANI3Egz8xA6DvS5VlYY+b0pAKp3jC4+WlbEmG+6aiSNVOVBBpzE4HRxnpRqAySR0\nHK854xSFg2FLfN1PpT2kJ5Byf5e1EeF3bkBI9aNRCJIfM3fw9DxTFUb2dsjJ4p+8xkFlwG70\nqny2YMd+4Y/wpgNLhvmXqDyadIwbnHI5pG3JCMDpxjvURD7GzwM55oAcuGXd0bvmnNtTAx8v\nXdTRIiqfMOPRRQihlZAeMZ5pAJ80inHCmgZ2rjIGcUbiYBzhs9Pakm+ds5yAOAKNQJPusX6k\nelM3BSU45G7jrSrtjUblOcUhZvMf5MnoDRqKwKqAEg7SOmaGyo2tgs1EzBdqlcg8killAk8t\n1x6YzTGOWQKRwRjj603coZmk4P8ADT/mU9OnTNMkOYnJA3daQCR/d4PJ5JNKu1iecjHH1oUh\n1GF5xzimMzdAoVPUUAOkjbcpKktikXcse3jBPTqamVurFvlK4xUaqFAHfoKYAZFdRk8rxilE\nu3GRjd/DQWKtghSAOtNXbnOMnrk0hjeGztGOaVd0kilT2xk0p+bJUgCk5Zvl4WmAbiV3FckH\nn3pwXbGxLY39qTcVUgpkDmhtp2E5APpQAOrQZYH7q5ye/tR90cjczc8dfpUUmChByTmpVUsx\nYcEdKAGyMcgJ8o75pSskbswO4AfpTCH+8zc0PEzSZ3E5GDQIeJNv8ACkZANNklPkrxyTyBQi\n56kgDpmlY5f5fvdSPWgodtMi7SBxzTA27OVJC9BSMpR9zFuaVmAU/KRk9KA0F3AtuZQOOOel\nIhKq3B3dfwoaMnk8KOgp5LlQ33u3FAhqsHZXPQUu4bi2c/WlZfLXkfUVHuTYHHEROKQiX51j\nwG4POaVZiV2OMlTnOKaO+Dhad8wUE80wGyMZOHywHRe9NkVGXGduKGkePLYCmkb94y8kBqAB\nlK7VU4XGeKXzhxt6j1p4zuxnjoKimxwR93OKAJGKswL/ADE9SKVm+UqmN45Appx2U8Uku+Re\noDDk464oABIVY8/Mw5GOlOkYeSNjlWzRvZvLAO059KcY2O87xjPQ0AIAqKN5LjtTshRu3ZI7\nUit13MpC85psT8GUc9//AK1IY5pFZcjrTBlZFUn5hz9RT9rMQQmcjhRTVjb755NNCDyPMcsD\ngHkCjkFegUjIxSY3ccjBzQ74kwMg9Rx2p9BXZKrAttbkHvUUjfedDnBzil3BWOR8xHWkkCbQ\nFHP86nULD2B2jdxu5AqIS4kOTkDgLU7RhsEtjK4A9DUbKpVGx1HWmJiJ5rOQzfKegpZM42sQ\nT6gUq/M24nGOhpGYqwVmyuc0h2AKWVFJ/GlXO0A/KD1xR8snIG3BzSmMMGJPbimMViNp2k7R\nwV9aGZI2yAdxGNuP1pjEpjaAVwM5qWGR5GCtwDwTigBvmAbSV9qGRTHtfj5sigPuTABwp6Y6\nUrZbkkbMfrQAq5ZS2ec85obayswkwccK1R7WVDtbJ64p7MCcgAN6UCsHLKv8DY5BoMZZRgYA\npCp8vaWx3oYFVCjIc9fTFIYsfzZ/rTV2NhcFR39acrDYR93B+9SbhIenfrQA6N0LNlSQo6Z5\npW+ZVBHLHI9hRNnooyG447U1sBhGrdvvUagPZ9rZz8/QU1mZVweWHJxQoVpQzHhe3rQoCyNJ\nuyOy0WAGUMdzHDEZpdvBO7k0SfMCCuOOtISFVD1PQrTELtRVLhskCnKyqyswycZyOlRSRtEv\nCjOaXcyKAOAe1AiVAvzODj3NNjA8zcRkH+dPSFPJLucDOcUwDcwIU7eooKHNGFkADFucmn+W\nm/cwG0+lNkOIyVGB3p0UZWNQWDE89KAGr8uWUHdnqPSk8wbmwNyn+KlDfKyAFCSDg0lxmPh8\nc4+7UsY5VDRgHI7fWnqpbIfOR6GnZVdvcVD8ys4ILLnk0EkjYKnaPmz0ojbqm0LjrmkXC7ex\nJzih22Luznn7tUNDo/vHc3FOC7JCCc8ZFNyi4wvDc0sfzKdpBBOMelSwuDM20F859qWRWfam\n4kUir8uMEnOBml8s4LRsflPJzVCBmJUq64x0FShiIwep7U3y/MZ2LYX1PejaI9uCXb0NAWFx\nthKvzznil2kYYEZpokRWJyQ3QKPWnxo0wDMBuzjNAxwA7H8BTt/lxs7fe7betMEh3EgblHBG\nKlhl2LtA+gIqWIVWEbMygjcMnNK7FlDP17ChvMVn3ENhe1CkeSuRhzTQkL5QR4yWyGP3ql3v\nnyyNp9qi8s+Xuzt54p4O4hSOg+9npSKJlV1XrsTvTo1jTCl8AnPSqwVhGGz8277p6Yqx5LMG\nZsFuvHpQSTtmSMqp4zx71Iq7m2EE8c4qvHCY1R165+7U0k0jKQDhgeoplFiKNWbOeAMHNPjY\nbQGPAOMZqO3kj8tvlYsetWo44I4yijqeM+tJhYOPMMeBjrjtViGUK3lg7XI4bHaoVB3BSMNn\nG6plI3KW5HdsdKgZJEzbBGMnacgmraqeeMEgZPpUUcm6YMuOeB6VKoYSmVvpgdKBEitFj5fu\n5ya1NNhJbKLt3HJ+lZKp03DhjXSaDujwGG8ZwKQzvtAtpFWHy2+Z+AK+vfDtm1rodhC42ssK\n8Dp0r5Z8B6TLqWr2EAJyzqSoGcDNfWcKtDCkRYMEAUH6VIAMKxJp+4nBHeo9u3OeQaXaVQEd\nKQ+pIx9DzS7Sw6YPrUYYZB4yakjY4J/nQMbuI65Jp6yH0qME7ie1P5PTpQAqkFsmnnCnOOKi\nUAZ9aBIcDdzQwFLBugp8bEkbuopD90AcUvmDv19qGgF3HueppNvBNRv8rc5qQZXnPFDAcrDq\nRT1w3bimcHOKEYocY4qQBht4FKOnXHtS7lyfXtTGU5zTGTxkkZbkUjOoU5H0pnO0YP1pOWPI\nximA+POBTucZHSoi5RenNSbgVGTj6UCFVl3biaezZjyDmoRhm9qVsgcfShgSKdyg9KPlViKj\nBP3TxRuBNSBKrdyM07j71Rbv0p+5XUdqYxJG3N1p3mZXpUSjcxx2oV9me9IB27aelDZbvUfz\nM2M4HWnCT5uBwKQDt2R64oD/AJ0kfQnFKfpVAPH3cmiNt/TilCjq35Uir97HSkMU/ewRxTgu\n5SDxSNnAoxuzzQAu3Gccmgj5cn8qYpaNs9Ril3FsEjigkG+WkxliOhpzMvfmkblCRQMUZZeG\nwKXhevWolyOe1LuHXqKBkn8PSncMvWo0bLZzxUoZSCcUCBW2dTQDu5BxzTGZWUmowx3Z7UDL\nDBgOvWkVtpGTn2pIs55ORTpVXj1oGO+8DkYpGJUKOgpFzgjGaYzblHXrQA9vSnqhzndkUnYH\nqaFI6DrTAlVvm29KRTwcE1GzZ4HWnxnpkYoAQttH3c+lH3h1waSR8fSl4VQeuaQDipYHLc0o\nySMimIOc5pWk/eBckCgBd2Se1PRgGyeKhwytnGVqVsNgEcUdQHbjRuEbHdzmm7h25pfl3HPP\npTAMgRn60bzuz2pM7V6cUmTQBIFD80g9DR2AFOx3ApDFjbqCKRVDNhjS5xim7iWyRxQIc2S3\nFOUnaCetCgleRinrhV9aQWHK2R1xQ4ZgWzTBjJ9aVT8uQfwoHYcq7kJ9Oad5hGGOcUIu3Oem\nKbu3cDpQMkVv4s9elJ97p96kPygUKM896AFRuCDUuDjOOKZHtbIP3qlViABRYBd3HAoYkJkY\nzRv3cbcChSVA4zTGKJC6jIy1OGVXk4poUnn7tKc9+aTAcsg3YZfypBMM9fmp27dHwORTMDO6\npAsLIrLkNzThIeF/hPNRBtj5xxjmmMwboKYFgAtnJGKa3+r29Kbxzjjil3b8DrmiwWLC/dVR\n6VKqA9Oo61BCxDbvTtU0UgdWB4JOaCh+3rSsDx2pFcrhSM+9A5Y56UAKuexp5ZtvPIpqthSo\nAIpVk4xjHNT1AXlRyM+lKjuMnAHFN3NuJp7KzrwcGmAkbZGCeak3dBTEXGCR9aerCTnoaTAc\ncM2c+1BK7sHpQgEec807aC+7HFIYiyr0UGp1IY5B7dKr7gNw5FOhY4yKBFpc5yTn2p27rxxU\nCyArnPPpT45NzYPSgCwrK65B/Cj5eoqONcBhjGakVhsxjmmMVsOuelPT5V4qEbtxBHFOjbbn\nsaQEkascnqBTo3LKcnioo2dQRninqNq4xnPrSFceQu3JPNJ0YdqTIZsdSKOM4JoAkZVzuOMU\nEjAOcUjYXA6g0/ClfagYJk9BgU5OMkn8KanpnjGaTcDhgOKCiSMjd6fWnOd3yqce9NyNwJp6\ntu5GDTAjZDu5FPGW4Pek3EjJB605cspOeKVgHKAvpxQJT6cUiruGe9DqSQQPrSAdkSZOcUo+\n8MnihlxjPSk27VINIRLtC8npTyBkcVHHIuMHmlX5uQeKBjv4iRSsx4569aRTupkinrnigCVi\nN/FKzrnrxTFUNz7UoRWj54NAEm7C8DilRtzcGkUKmB94U8YDZ6CgYu0t1FIF6kDBppba2Kdx\n6kCgQnJP86RmaNcquTnvTowNpPc0/O4AGmUNjlJXcVxT87eaZuAbpTjj8DRYBwbeue1Iy7nz\nnimsxUYA4pVYbuhNMB24+vFHVuBSjGABR5nzDHSiwDue/Sm+Yofaeadu3g01owuGHLUWESbF\n9aKbtPpRRYY7aG+bGFpud248YFO9D2pNu3I9awGhGwy01flbOOKXO2kU/wB7oe1BQ7gKQRk0\nH7oAo3HdjHFIzbWyTxQJjt3btUL7vMDA8dxUqON3NNf5mJAqkT0EZu4GM0isN446Ush+UUir\nhsnpU63KHKW3e1NZe4PNHzckdKAflGRzT6iF+9jjFRq20nAyfSpSCJMKc0yRfLPQk0WC4feY\nH9aVvl6cimx4XjHWk3BWxnikMVjwOMCkDbmxj5e9K4LMB2pGBVWweRz9aaGL0bk4U9KNoDda\nTcWVSvfmlUkEjrTJYfeXjpQvpjmmlmPA6UpJAJPBoEN+zxNOJHHzKOKmX7pNRK/zZ208L78d\n6BkPlksMnjPapNwjyCKXgnpj0qPcVznnNAD1Y9e1LtUDg4PpSx9scCmg7idwyc0gHcqoFJuX\ng9aGyeM4pq8AU7iFZwrAdA1IxCH1NK2d2egpm44OeaCiRVVVzjJNGR3pqtjgdaRsbgSaYiVs\n7TgU2NtowTRuI4OaZtyST1pAS7guTjg1XkwoyBT2ZuOOKMliTikBHDL8wJ4pzMWmpDGM9aZI\nwL4HT1FMRLu2tzTi6gYJHNNyPlFIxRfrTGL/AA5FMbLLjORTFkEmccCnqdo65zSBibgOKj8z\nlgFyKldQzA9KauFPTJpdRDGy2BmlIPcA0P8AM2c7TTWkyo2nnNUBHKx+6OKVWI7HNJK22XBG\nSaViVAYDPqKmwC7hyOhpPljOT8xpQyyHpg0kgx+XWjYA8wZ+UYBpk2UPDcGkDCOPLZzmldl4\n4yKL2AWHG3nrU259pC9KgDYYdql3E7ccDOae5J5N8f8ASwdHsrvJxkoTivlHXt0VxIpHQ8V9\nkfGjSzeeD3ZScxSAjnjmvjvxLEzXcuWIPQfhR1EcJru9m2LwTXOSb0ZgemenrXU6ssnnDI4X\nkGubupVZn3LnP6Vt1Ie5n3FuW3H7vHFZ7Q7CAX3OevtV+Z9iBcnHYVn3UJVW25V271SM2UWj\naNnyfmJxmqtxGZBsZ/m9atM5iYK3Ldy1Q/IzFs8k4qyDLmXaCpfBpVUSADfhulWp44po5Afl\ncVRaE7lI+YgZOKYuolwwRtudxFV1t3mLNv2jHyr71auMkAMPmPSmQ/JIMN34+tUMpQnapU8S\ndDmmuw8wAnI6cVPcQlpXbIz3FVWj8tGw+cc5oAbdLuYccA1GV2Atn7xxUshj8kEuQp6+5qvJ\n8qHnIHIpiBlHmbs8AYqGRSz5jPHpT2G1VJbDEfdpFVo48nhz2qrB0GI2xSwbjvTgV3Fi24Y7\nCm9A/wAu844x0qMNiMk4VgcUxEc58vlR5gP8NN8vaMj7pHI9KmMY6l+emBUUygP8q8L2zRqM\njwGRsLwP4utNYs0KjkEHrShgkgUttVucikkT99w+Gx36UagPkYcKFxn1qLdt3KRnt0pu6RlJ\ncZfON1Bk6eo4NPUWoqKGIB4I55oT5iwVgFzzSDMqkqfxNMjQruHAJFFxj2mG0HgjOAaY1wuC\nu3BzgnFR4CkENx3FPIfb5inoe9AgVVjbaBz/AHqiLHnnAz271KpkzhiGY8ims27leOxFADZG\nGUOdnOSaRsNIWXJye1PkQcEkMSO1RqroR3J447UANUBd5HDfrSYIZC3zKeopDiPdx83fNG5t\nqsBxnFIQ1U8tjjJUnrR5jljg8AY5qQMVGAc5PSmOhEnz/dPHFMq4zcdihVy2c5PNKFLTHd8o\n64PrTuMHHAWmPINwYZx60BuKGCsVI4602Rtozn5DyPX6VKytLHzgDqD3qFgeB1Oc+1MQ0yBl\n5yppu47Quc7jjNLuEkxUNlhz0pWw3Xaee1DAPMMbgFfkx+dIsqsu8KQOlLJJtWNQOR60bW5C\n8AnNNDGyZkjDJ8uaav7zgn5RwcDvT5CZExuwvpTUV1ZQT9KkXUn2/N87LuUZB9qiZUuCrpwM\n8nFAZzvLANnj3oVPlCnKDqKpDGqn+kMI3w1ICGcrnacc+lOwI+SNpx19aYzHYABuz932pCHj\nG4MvPHHvUS45LnGTyDT8rvyOqjBX1pFUyOc9BztNIBVZY2y3RuKVt3lkKRkGmvuwPl3DrTVb\nfK2eOOMdzTESKnXa+4MOpGKbJjg4PAwDSLhsZbLdyfX0oG5iQTtoATb5jEKcHH4U5SGQlT90\nc00fNkoeh596ZyshGQN38NMY4qWXchznrSsPM2gDDqOT6Co1AVdme9OBCHjgdCfWgQAqchm+\nbtRxuHrTNvmDaDg+tHmeZtB+XBxmkVqSM3OCcHuaY/yqoV+M5p0m0yEA5HejcrDbjBFAxGx5\npweD1zURXZJn8qfIvz7XGVxmhdvfrjFMkYsm+R8Eg08SblEbr83YihmdQmF3E8cf1pOVbb6n\nk0hjiuMkCjc23JO4HjHpQp25Dc0kiEMOccVQrDWjFuGK5fimx5ZVP3W6805ZGjUkc+ooXPOT\ngt3pAKsu1js257jtQhK72JHTselM2lc/Lu9aBFlTt6UwG/Lw2W2tTlKsSoXcTUkkeVjLcLjG\nBUa4jbIGFz1oEK3yyKu3IHel2N854UHoc85pu5sYXgnkg05RjJXrSAazbo9u3Pdj9KCF3CTl\ncipOWYHG0jr70j5nU5+4PSkAxmMaDCqQTgGkeMBhnmnqS+0EDb2pnzRsUAyxOfrTGOXG44GB\nim5CKpJ4P508yenRuPoaZJGOCevTH9aQCfdIO3AzxStncGIxzwKWRWk2jOB/DSKpZiBw2OtO\n4Bt2yN2Dc0jZU5DAr3okfzMDuKFVFfb0U9aADa2RgZ5BpXY+YQqMAOd1LtDbG3ZXFOXfsIz1\nPagQzcvlh/U0D1Y8Z6U2PG4nqAelPxvkBHy880wEmVYye+BkYqNkCKCPlZuc0jt5nzkE09lC\noMjdQMGywXecnsaaVRV37yRnGKTcWJ5wuKXAkUDGFxjdQIcN0QIOMNyDTWYjBXDewFKo4y3z\nBeAfWmwyLtL8rzigB7bdoDNtNM3bd2zqBngU843YYcdc01VdgzYEZ/umpGR+Wy4ZhtON3FOV\ntzZKnaRnNLGQ2d2RxgZpWG/bzx3xVCGNInlqCST2WlVZJPuspFKjbtzBcKppq42lgcIxpAOX\nd5nJXf6A5/OmzbipjLDJPpTo9kOTt+YigSbMNgenNMCNVwoyoO2lkkMeOMFjjiho9y5zgN0o\nXllXGfrQMZs7D5QpyaUt97aPvc/QUrJlfmOTmj7OrOD91R+tIAyVUkP8pxSsx5KnJzz7Ug5k\nJVcR9qSbOxuzYzj+97UAHl5jYhsY5xTh93nr1GBTIcPGWKkFlxt/pU3lttB2/N3Wi4hp2sTt\nJLY701Y9qBmyRnH40HrkLg0qyFeV+YdwaYAzKkm4nYV44pNzL0GR1K0PskB3Keec0+MDaGPU\njGakBxdN2G6g00qqk5fGajK7nOW+lNkjYhAOTmqGOXGCeoPGaftK4BP4ULG3IJ4z92kZTzn/\nAPXQIa5HmD5T7YpxIViMENRhowMndGeQB1FKpaRSF+ZOpb+lADVY5OTnvt9qezFApXapY8Zp\nir8hbeOuBRJnpneaAGlXZiX5cHIp6yklnxjcfu0ixhslm5xT5PlZZFGFxgrQA1kG4s+CKRd2\ndxOFHJ+lDKvTdnPbuKUNlQrfL2Ge9IAk+ZsfwHke9RxlSAwOWzjbUzSYHK5ZeMim7o87lHGP\nvUxjXjLLlxwG4GaJMhiRx9aEjVpMGTnGadt+bBO/Hei4hrMcgj5hjmiN8sCOgOcUsQIY8YHU\n0jbnYqvfpxjilqA2QkycHJY8c9qUR7TjG5f60v2dSoyOFOfypsnMuWcEN6UalEknLK33e2aS\nTc0ZfIznFPwPL56jp7VHuBXA+cH+KjUkdt3Fg5z8uaj3MsYBzz3FLs2rgHPPWgcqVzz2oAVk\nfjaOF5PPJqNWaRTjuenpTw4YuRkY4JpikqzMOMjFMByqYMhvmB7inFV3KCMP1JFJGX8sI3pk\nZqSFirDfnJ4PFACLL6DnPFCqr7mL4YevSkUyb1kwBHu28UkkO7fhj8rUAHlqDtU/OfxFObYz\neWykgDnb3p20rIrLgsRzSMzKnBGT/EKQxq5bb5bmLHHNOyy5x+dDjzIwWP1xTUjI+fPy0MBY\nxuc5445btTVlZ23MNig8UbXVeCCSaWNj/qxyvfimIR9qqCVy2c5pzNuyWGG7CmeXuDEs3XhT\n7Uy3XcxZjx7Hmglk8MhC8tu9c9aSR38skfKn92kbG/GfmAzTnxHgs27cOFoGJD80eCCVH4U2\nZiuVI3k0uUjVGUne4yFNP4ZvvAHHApajGrI/k8YAHG3v9aRWJUlBuXv9aF3fxLhuh56ULGI8\nnnYTgDPemAeYfJwQC2aUtvwFbaBz70khG4LnB75ps8as24ZXAzkUASoxXjJLY60smG2k4H0H\nWky/ljpvb+VK8hjwQ3z9h6UAJuDSZbPlDvSxgSSNg7lPIFJuIUZOcnJoXzI5M4GD1x6UugxA\np247Lmnsr7o2XoeDTWzGpAHyk8c9KNr7k2tnnH0oEEilWKZyaWF9uQwyT/FRGSm4kbieM0KD\ngrjvmhgPRfLUMDlielEmMgkcZ+7ScKhLcAGjaWGM8nlc+lMaG7mTKgDOc/hToynRu/INJuDS\nIWUqq8E0pk+82Mxg0iRrYz6nOcVKw83HBUjnIprbVlLD+IVA6Fl+dmjJPajUZO25SA3yjHJp\nvMjBPLKtnrTUk5IY5PQd6l2tGuGbIzkMKYCrGd3lsGK5yTSiXMjBDwozSl3VZC2dq9DTIXTc\n0mdh6E+tAEgBbJ6exoWTy2BYfTjimw43Pu9cgU0B5mKkZApAHmFmdxt4PHrTrfdJkjlT1zzS\nIvDb0G3+6KIWCrhAy85xmgdyw20BFZwW7cd6iWQ7ih65yadMN2GyCuc7aRlMnAz83SkAbioP\nRyOR9KSX5VBIzkcGk2KqjCbWztJzQoZZBGTkZ4FNCHQv8ynaSvQVIsPloxU4b0NC4kyR8qLw\nfY05sOvyyMwB60hW1GLn7OGJO/0HahWCwcAkFqXc5YZAC+g601k2sAp29zmncZOubjcq9ByV\nob5dpZdzYrSs7MNmRBkdCwpNWsCYfMi+UYpklBJPvhU9+nNJH+7VhzjrvpsUjYRQfmPBqbpl\nM8elIBsRdDtXGCM7qcFeVR8wXn71N2uSMAEUpYc4O0A8UgHbWkRlDEYPX1qS3gAUk5yelO3D\ncpIO8DJpsc4mBI5OcZ9aYDnUh1G76CnxxsobLYPbNJuZsBwD/SpfvLggH0oGNjYbVXOQTyKk\n+b5lQ7VBz74pI9jKCvDZ/OiRmkwqnD55+lIdtCx5ina4fdjnFSNsViSgXdzUaw/Ng/KByCKl\njcySFowHI/vUAPgZVIUqST7VZjTzcsemeKrIxVvM3ZZjylTEeZIVVcBRkmgZajUNtxyVOTmp\nCokk3K21RxioIRtDSK2RnGKnULGyluPX8aliJQrRrkrg+1WI92zYoyxHNMj28hTxUwkPy7xn\n36VAEu4LGFI3HGB7Gui0jdC0APDnFZdjHGzhfbOD0rf05nF3CiR79xFFxnu3wN0trzXjPtKi\n2hJDdsn/AD+le/biHxz715P8CtNEMN5dfdBATryPUeleqq+5iBzg0mBMxwuO9OVtzbScVD5m\nTTuOT3qChwX95nsKkALDrioDkAe9OVtrAE5pgWPbrSZ7Z5pik5+UcU/b82aQCmPPA60jf3T+\ndKPvGkaRc7e9UgF2llGOKE+XIxzSCTbjgkUu7dzUgOXGMdTQp8xumKQ/dGOKftJ9qYCj34NN\n3FutBw3WkZumDigBGJV84xTvMLdqTcGB7mhVPUmmAvzb/bvTySwznpTOWJOcU5MYI70mMGYS\nDk4xS7htHrTeB70fxA00BJGdueMUjZZeuKRs9zTdx3YA4pMdhZJPM29iKVWzSBQaMbT09qQh\n6yBmNKHHaosFOMdaGG3HfmgB6yFZOlLHg5BFRM/yk4p0bE44oAfnnbTv4s5wKRQRk01cnPHS\nqsBKqktjNLu7EVG0h4IFPVvX9aQ2KzbnpRJ82OwpCRzjrTYx1oJJGbcwweKGO0YHemkenBpv\nOeaRQ/OF6+1IzfKRRwyZI5pOe4oFYTAwGzT/AL+AOKbGQYyMc5p6sOnegYN8vGOKQ9KkPKim\nyMuKCSJlOcjgU/cYwMcjvTTylKAcAdqooVZAzAUsmPXBpNoXBo2hmPrR1GPjyCCelPWQeYQR\nn0pp+Qc0qNhuBTAdu3dDg0Mo28nmoyu1s+tLtzgk1ID9wXjPahM5JUc0wChZNvy5xQA4bc56\n1LnuDxTGdduOhpq+/FDAmXDKcjNG4rwPu4qFZfwp7SbsYHPSkAq/eBp0nzYx1qNlbzOOlSrh\nV9TQAMRtAFLG3zDnI7g03ny8nihceZkdMUAP3LyBxQv3c9qZuCtT05UDtQFhWPy8HjNNHzMT\nTW4bBU4pVx1H3aAJAdrDNO56g0xsquT0pd21QMUAPwWOO9Ge1JyvfrSMfmHHFIok3N36Cl9C\nPxpEZeSQaVV3NwcCmA9znBxiiIg5PSmEnd1z7UoZVU5HWgB6tnqacuMAjpTG2tgD5R3o3fNh\nRkUmMVo2YZzT1xt2ngmmbjuPpinL82OO1AE8e1QCRzS7uhNRI2G2nOKc33g3UelAh+47iM80\n5SQQx7VAsn7zPX3qVlO3huKYxwJ69QTmnhWZuDkVCMjgc09XxjBwaAF+ZSO+TzShCxx709FP\nWgqy8+tSIGxx6U1l2txxS7iygY5p3llsEnNAxFzsOafGvQjtQE5BY8elSK3UDg1RQ5csSRxT\n4/lGKYjY+8MGpOQvPWkSSLwvApFkODnrTd5A6Um4MuAealgSpIVXpzTtwwM9c0xlZQB0zT0j\nA5Y5NDLsP29hThH5bDB4PWmo3UsCB60u/uck0eoh6f6w85ApjY5xQihs+pprL5aYzg0rDHKz\nbl4z2qRmYMRjAHempJgDjHvS9Oh69qQxWfcvTmhWCsB0pu4qCMZFO+8wPTAoEP3HcMDFTAEM\ncdaiiAkPJqfcucrz2oGPDlhycYp4YZ3CqwZlU5xmnK25QM80CLGSWyaXeepGajjbywed9OZm\nI4GB3pASs2U3AU5W2qDnJqJTtUdue1PZcEEEfSmAsTYycYNSL8ynA+brUbMSpOOfSljkPJxt\nNIQ9mLLkjPtSQy5JBGKFYjr1p33Xy3SkUO4A9+lKvygZ6Uxv73ajaTyOlUBMSNuaFG0DB60h\nYccfhTshTjHFIBzfdJ3UjHagA5zUbD25o2s564pgTwndyDSljyDxk01ZAvG2lOGapZI9gGUZ\nFIy8HFJuLcULllPqKChVjOAR1qSP1HSoo8tweT7U+RjHgnp6CqAlBHWk25OetMVvm5NOXjJ7\nVIhOVI9KkZhjgY4pnCjBpY/m69KBj9x8tRjmn/ex2FNRtzc9B0pwLenFFiugvVunFKrBVz1p\nI8ZJxk04tsUBsU+ggUDliaX7vU5+lNXCk5GRSou1iakoPvN2Apc8c80vHQijjcMUw6CMd2Bn\nGOafGMKSeM00qM4NJuIHy8mqEh20454pV54xSFuhPWnN82KQDk+Xr0o+VuO4oH+rxSKp9hSG\nHlGil/4FRQAm75Fy3NKrY46/Wowo5NPXBrK2oxWFKy5UelNfK9OaX+Hk80FC8Z460113Dkc0\nuQv+9TV37c5NIQi/KMEZNGdvJ5pzNwPWo2bb2+tFw6DvbHvQ+OgOaPvLkHmmou7gcEetVcBx\nkAUDoKMrxg5+tMZj6DHelOM5FIY5geo4NJuPQ8ml3naBTAw8zOc0ybA52ngUuFPUUqspJ5oU\nFlIFIAbPPPFIpyo+XmhlzzmlH3QM4poY1lKcg03JVM4/xqRhg+1V95eTOfl6UMRJGu0HJzRg\nlcmlkynA5pFb5enWhAHtmhm8tQetAXrmm7G5PakIft+XrzTGUkfSlCkketKzbc56UikJCS68\nVKBtqNc7crxRv2r6mgliyMC2CeRQ67mGOP6UjYGGxk0H731FO4xJmCxnnPvTMsuO/wBKewG0\nLTWYKp9aaH0FXs54NIzDk9RTCWZdxOR6UNiNfXimIcCWjxu5xmlCnaMnrRv3Rj1PFDfdC55p\nAwYDAAoK9lOKFbml2jdnNIYIBzk0nkhUwB3oZMdOtKpbbkmkIacMRxUcgG7calyRnjioW64J\n4oEN3DB2jg0+PCrhuDSJtVeRkml2qWGadwFkcbMY5qMT4+7TuQxxzSr905AoASaTKKc0ke11\nweCOlNkwoUCmvJt5pAOc7ZPWmyfN04NIG+cEHNSN/e7UARbivBXB9aA2VODTpHEnXoBUYO3H\ncGhgNaZsHjjvSnCqueKOGY5p4ClenApgRMxaTIGanjYnqMVGq/NuHSpVG/HekBz/AI6sn1Dw\nrexL83y7j+FfFPirdDfPuHAB/nX3fqy79Hvlxg+Sw/SviPxxZsbuRyMAHBWqRLPLdam3crkZ\nrmL6GUKGI27jzXUalGY9zn67a5m8ma4AUjG08VrEyZl3UL4RsZHbFVZVljVmYhnzwta9vDvl\nji9+aq65bKyl4nxglcVoiGY1xH58fJ+frg9qoZMeTjcVPNWjE7KCW5B7VFNGvmZDbe5zVEkE\n8ZfBPyA9arNCFbIfAB5NSzEh2UNuDDgGqz42+S+c9d1UIZI2ZRleP7xpkmw8KuG3ZxSvEzRk\nnJHr3plzKMBUb5tvfrTArtlt56c0jYjXy9ocGnt+7CY+dSfmaoGLrvwvOePpQBBNGm1U6KDn\nmoWV2JyNq54qz5mYzJIR6YqF5CzAgk47UwI2bu43MOlRiMls7vcJUjBpgcYx1FMXjkDGetPq\nSR8fNlii9TiopvlZwWHl5zuq15aKvJznp9aimhYAhlz3z70xldT+7Iwc+tI0aGMNli1TxkSW\n5IGG+7u9DTNpY7v7owfeqCxFj93kAYHODTJNkg3kcHBBqUKGkCHgsKgZZImKY/d5xjNAhnPG\nTgZzmlPzbQnL5yaau9FfJwmeAacZBuDBsNjHvRcOg19z8AgZ44qJF+ZlMnK8E4qV17L271HG\nxXdgZyMEikPoKqgOwAwc9f60kjgL8zZGefelUIynn5T+lNUIQdoyPU0xCttRgVYDHNEm3aWx\n8xOcUYXILAMD2FPLKxPlHK4+9igCFpN/Mce1e4qMMdpBbHcEVK33Qu4At3qOaJVIReo70CGs\nC+0gjOfvGkZdjEFiWJ6U/wAsbl5wOn40gYKrbjvZfXvTKGOvTOcZztFGSoIByCcrTvmjIJON\nw4HpUbSlVJ6nsKCRpmMcgyuQ1DsAxj3YXqadtSbbkgP703jzCMZHQ0DHNGXkBXLKAMYNHluu\n4HvztoK7GAVvkPA9ajyV243E+tAhcDbgfu2HekbpuYKB6d6czZUg80hUlR2470DFO1cDG7Iz\nzTI1+YjcSTyM0u7DDvx3oCvuzgZ/uk80wItqgnApQu5VIOcc06Rd0gwcr6e9DfvFzIu1hwCK\nQCR/MrELgmnblX5HGTj1pJGZV44pY/uhmGTS1AYM5BxkdOfShsqxKjHvTmVtxwcAjpTGYKvP\nHbNAhVyy8c8ZNI0zPgrwtI2QylRk98dKcnyq424B6UAhnzZdSSS3Q0iOIxlRjHB+tP6RDPzG\nmlWWFmcAlev0poqwBdykjoefxpGYbl3g56Um8gZHTtSsw6AEketUIay8n+EqeMU+T+E7c+9D\nONvI54pGVvlGQec/SlYQzdtJVhwxpNw2kp846dMc0km91KEd6crN/GvbqtIY2PLDIOcHmpGA\nb5dpz3xT1ZeAMLxUQZuQAWIPVaQCMdqhQNuSBk0kn7tju5HTIpzNvhLHDEHG3uKJsjBIyvpT\nAY7mQp27UpUiQjoRSt8zdMcdabuOwNkk5pgKrHIYZHPIpXUKhbGCTxSl23YJ2rTY1DZYtlKG\nIfny1yw38VGwDfPuz2p7bNv3ju9KYTHJGqg7CDk0hiN2x3pqj5drNk5qQBW6Mev6UyVm2kgb\nlPHuKYC/xbgNw6UKdsbevUikV9hiAQrxnFPJxg5Dbjn/AOtTEBG2FN3Jzmk3BSW8wL6LUmAr\nMHGD0FV2UKNzrkrQMeqbtp3YTvR8u47TxScSKMnHfAoj/eZAHyilcTFbezqSMqBgYph3hW8t\nce9KuQMLwvP1pw+ZFG7bg8elLcBCvmYJ4YDmlyGCs2Rg56elMcHcxB4HU0qxvxvXcp5Ug8Ub\nAOmZWbzCuBTSQzZxtOOM01s5O4koPWm7grDK59WPamMdwqndnAGfekCjYuHKhqUOCrZ+6RjN\nDqCrY+ZV4FAhd3zLlOBxkfzoOHbaw75BpzEqc43JjFR7pG4OOOgoGKcbgQMIvakEu3coYhia\nXy96h3cIem096Zt+bOMY9e9MCThQpDd+aRmyzZUt9KVmSRs7TgDpilWTauQDkcmgBkCl8opC\nHoN1Iq+XuV8kg9fWjeXzKOCKCG8tAx+ZuTS6gK8n7sJjbzk/Sk3DaQO44xS7R5wb+HGM0zy1\nWQYPU4zQArtvcMrbdq4x705juXoCSOaTy8bmDDjgik3DAPU56UAKxO5SvQcUpXc33wSOcelH\nynOSMKM7aTaMyKRtbbkYpjQjNvjKtyOtKwO0NEcAfw00NsUcZ4/WnHnCjqeD6CgTGtvCgxjC\n9TQyeYoDDbz+dSOdrGNW+XbjHrUMkimRMjDY2jJxSAc2Y2Hy5HSjaI3JJyO2e1IzFR1bOduK\nbIh2ncN23rQIWSVVUN+lAmPlhsc5xj0ppKcFvvD5h9Kcqsx3kARt1+lMQ11wzfNg5496kkj8\nsjc2XPpTZF2qXxxnAJqNs4BJD7ec96BkkfJwWwfc0sm5l6hgOKi3Ky7mGG61J96MHGf5UDG7\nVKqPujcMipPOKSSKMlc4D1GWB4I2j0oXLKW9D0pCFEYDhc5Ocj3prMoZyAd2ego+ZclhjvTo\nyMDHybuuaWowErKWQ/MtKFZVAzwajj/eO4XoPWhThlUgls0xDt26QA/KB3PSlbdu5YFP1oYL\nvKkcZ6UFUU4YfL2agY1mbcFXI7ipWAzkfMo6/WmqzIeDn8OlG4eWxB5/u+tAxiqPLOTwTmn7\nTzhgPeo4lkZvm4THSnLGOADx3pgLCQ8nl4yP881M9u1uBlNu4ZDHoabCBHMJFHCHIqxdagZr\nUBhks3GKAKUeFmUhvqfSpI3EiuM/LnpUZVGwOVbrQrD+7hf79IQm3aT0IPf2p0bYBLfNxwKM\neWdpAYNwMUgUxrhicrx8tFxCxtyCR7EUu0W6kyAGM8DHY0m0EZY8dcg01l3AddvNACso+Uld\npHFG77yLyaSM9lOcc4pyyEcgfNmmMbGrRtvVc9sURzF2Lf3etLJIfl2DbJnmnH7pAxg+lLUB\nVIZSO7HIFRLt2sm3nOdwqQoGZe2B1WoxlQ68EHnNMEOaTYuSNrY/OlhbbGF/vHGaYzLJCNvN\nSKm3B3D1C0DEJGACu4L6UxV+Y4+vI6UrSAruC/d6ilG+NfM6E/yoEMjY7/lXKk5PpTmkjDkq\nOPShW+Yt0HYURQ+VIc/MGGQPWgRJ5ZbBXjvzTCDtIz8x/SjcGhxggL1FIrbsKxwew70APViF\nKn7vTPbNJ9w4Y4TGC1N3lPlI3c9B609kXbnkt6+lNABz95flwKTb+7AJ2gcg+tR7j8wAwAO9\nSIQwRt3uPapC4ikNwCdpHOfX0oKkxrnOMZIFK0Zb+JSSd2BxSYdZACG96YxNo8tefvDOKF/d\nxnBzz07ihcbSrA8dKX94eGAUenegTI0UFzl9p6804yoxUKcHPJxxSsy7Cu3L/wB6nFQ2EwEH\ncZ60iRWVTJvB4oeQKAw5b0pm3y1GGBGfurTwFduSBgZwelMoRV3DDqD3B/pTeFkB2bSOlDNu\nXLcE9Kk4DjLbmx3oENXG7I+6Tz7U3aBuP8OcDPY+tAjOCSu5SeMevanDzflPlkgcNzQNif8A\nTN1DN14p+AsYBPfnIpkZXacZ+U/e9KFZi3H3WPO6l1AVgGXrtz0pfLDck5OMYp0ZBLA/w9ae\nHHBAG80xakaKY9wbABHGaarZB5ztpx/i3cEHvQisytgAD+9SGCkN/PnpSiMyYCtjbz9RSlsB\nVxiomTbIcFtp9qYDi2AEUfNnjdTlZ41JkI54FAHmR9ww6A0bdxU9e9IByuVxu2nPUGkVTtYn\nkZyCO1O8xTIQFGW65pi7/MJJ2J0yKYD9wbBVvl9KZG58wh2Cp7048yKxHH5U1v3hOVGAaAH7\nVK/3l7GjcSRgZKjGe9Kqqd6ICF9aFIG1j/DxxQARNgmPbt4y3FIrBV2kEx9adlmQs3y7jikk\nx8qg7l6YoGEYeRmyeDzz0xTtq7j0KBqQnauEyV6HimrEgzyGB5PPSgRIsm2Tuxpd3yuqqdze\nlR+YNp6KMdKkVt0eVOXxxQMZs2YcMemKk8sTKuTUW8c9SepHXmnA4yc8Z4ANSMcVPmYxzjFO\nXcuVD8ik3fxucDoKWRS+Cpx2NAtxGXOwsMnPT+tSbVUMWOSOh9Kj2mFsk5ftilaN1kO3GOh5\n4qgF3NGwKruVhjOOKd0TI6nnHtStISpQPgdMU1GVclhlPSkA9ZM89UxjJ4pWbzM9FXGKYH6L\n1HUU6N9udwyzccdqRJa0/UpLMmIruRuBTr6+kuFIU7VAwQKqDK9ee2ajb5tu44qg0JVyyq+M\nYp4ZwmQuSTUMSnOUJ2Z6E1I2Q2FbLMPwpMB6kqOBubPODUxm+dCq5Oar7Tb4BByafGrPIONo\n6fjTEJJnzm+U7CealiZGJ35GOmB+VMVnbd8/AOKk3huSO3FAIlGMttPJ5PvS4McQcfMxOMUk\nMjsuHwrdjjpUsn3wNmzjJ+vrUjQMqlUXGOMsR0/OnwJld4+VQep5pEC+WAx3ADtVn7QjWu2N\nNrYxvpDGLK+4DqCeafIxjcALkMcfLTVXZGqMoD9SM9aWFfmYn5ODwaALkS+XGuzBccml8wtI\nxAyGFQx/uY0VTg9C3erDIu4bTkYpgOjtsPw+Yzzj3qxu+Xbt6etRsWjjHQ45zUqyeaxUqR6U\ngLMLIYQ5POcbRV22QyRbQMg/xGqEcPlsuTgg8itK3ZRMpDbecgAVDQGhp9nG8w+bocfUV2Gh\nWoa4EoG0KQAQM4rk7QSq8bBQvOK7jw6rNG6bsF+AakZ9L/CjTHsfDIdxteY7ufTrmuyjbaxU\ncCsvwrbi08OadEOixDJ9eK1Ahxx1pDJlI27R19aeqnbz1FQxt/DnmpFYsWBpDJFy3fNJ5eZO\nKaF2YbtUgwVyKQuoiuQ2BwKlVgIzk/NUGzc2Q3FITtw3UCgZbU7VG4EGomUM3SkSUydeKepP\ncc9jTF5iLJvBBBApoYZ9RTmbcxzxik3ALgCgZKsg6GnNOF+b9Kqkq2Wz8woViME8g+tIXUtL\nIGHOM1HzuyelJ90Zx9KQOSpyOKYyVT1wKcuJMHPFMHEYIpYW3KQBtxzQMe2DzTduWBqPcVbl\ns96VWLMe1DAdt29Kc/IC9KRe3NHLZOKBCqfMYgHmkyyKRRGuDkdacxJGe9IfQFzt460pY4Hr\nTGbbjikyWkAFAh+7nuTSMw49M0M2GI/OkXHcdaAHOB2OaBnaO1CkDIxSHO456U0A/cWG3PIo\nViwJHFMU/NTkkB7YPaqAk3DAHf3p+R0PBqJl75p33lBpFCgbevWhvvYzjHNEjBWxnmm+YOc8\nHtSEOXIIJ6U7cOp5FRbvMXGelJvPYcUAtCbzhuHHFNLc9cmot/bFKDznFDESxNjmhW3cYwaZ\n93nNG7PIODSKSJFyjHNPLL93GWNQtMCMdaXd8w7UBYeq7epzRuK9BxTtqsRzzTN2G9qYxfMG\nBkUbfm3KaTaW5oVdv1psBwYsST0p6v7VHyv0pysCvFIB0hJWmIxY47UZLkikjIT5T19aQD9u\n007ywzcDPvUPLE46U+OQr/WmAshC8Yyadu3U2POCcZNP2heaChGVV6mkSTd04IprfMR/Wl2g\n9OKRJMjEruJp3Bz2qKNxtIzzTyfMTPSgLD1+YYJ4HrSkKOF6VHgYGTgUD5RgUDHFuDThlY+D\nk0oZFwM0NxyOlMA3ttHeo1Y8KelNa4jtsGRsVKF3YbOQeRigQ5vmIHahSWY9x0p20cE8YpqK\ndxPQGkA6Mlc9x70/cdvtTdo24zQvQhqAHrxSr1Oc00nPC0oyo5NBY44RQc807adoYfN7VFtP\nU9KkVtvsaBCMCzdMGnLujbnqaTf601ZC7bW4PrSGSxghGJNKueACMmmsnYNmnog4/vCgRJGu\nxSWbmnBjtFRBSsbE8k0+HPljcaABdvIxxUm3AxmmMwXnGaXeflbGc0xjm/d4I+93ojYdTyaA\nWwCVxT/lwSB83ekAqtu68elOZhgLmkwP8aVSqtnFFhjjiMZB3UqBhjHIpo+ZiTwBUibu33TR\nYQDrnr7UvzNhh64pRhVIJ5qQMNvA4x+tCDUaWYsccnpUsOUUBvm9aYrbYVBGHJpeWY44FNgL\nLIobuQeKcuenem9hxxUhBLAg1A9heWUsOccYpySKpGSPrTcbCSDwaTavTFMolebJ29RUiqXG\n5cY9Kj2/uQFHNPSM/wAJ5oBjhnduAwtO4YHPJpMttK44HNIzGRiFXGOKQhYwrLz1xT1Xcozx\nTGj+Qdc96OWYYPHcUhjlbB29aa+dwOOaXB30jKZHODSEIm/zTt6Gp4wynBPFNhXbG27kjmnL\nhe+QfSgocFWZdxy2OKljVV4IwRSQ/KhYDj0qRdsmCTxQIFAjjJ6805f3nJ4FHC/SkZc/LyaA\nJfLXduPAxTY8LnHrStH8oJP4U4D5RxSGOU7SSaVSDyTSLkk4PajdhcEc0XGSM2fQUfMy+tMb\nAXpzSJvwO1AiVZNxIxSrhF5b8qSNsk05V6gkYNIA570rbggI5pF+VTnrTVk24B5zQxjt3zg5\nPPWpAvfPFMXDYJGDTt25SoHNUJjsnHSnBtrDP5U1GbaCRgdKVl6BvXNIAwS3A2mpFOcmmSHn\n605M9+KYxYyM4HWnMpbimbep70oyFz1NJiHDG3GOaVNyr0psatv96fyx2nrSYByeCuB2oVCO\nCeKfnHGKVV3Lz1oAVR6UvIXvmhRtz60wM3PXNIZMrFSorHuNKvJrwSLOdm7hela6ueDjmm+e\nFkCk8HmmIWCKSFSsj7mp+3ceTxSbg2R2pxzt5XA7GgsdtO3NCrznpTVkO4DkinbwDyvFUAu3\nL57UiqygkUvUYJxSq2QQOgpAOI+UHGTRyuM96aGHU0hO8e3tTAl+7wKVe+elM3BeKd/DzSAX\nCUUzaaKLADMNuCeRQrYUBuOaI1+8T0pqlMHJyc8ViUkOkYsTjpSp933pRjGelDcAUIBdu5+e\nlIsgXOTjtSbiT6Cmtg8sPyoGOZgrA4GaB3J6U1lx0OTQuNpz1pCFC71JHWmDA4PU0LIfTFN3\n7uO9AgwQcDpTjhcUzJbp07mnnb0zmqQxGxjg5qHaU6GrG0LjAprQ7m4oDUbtH3unFJHIWzzU\nnljucimMu3OBg0wHLHtQNuyKRiGXOabtbHoKXjbgCgbBm3R4xRFCu33oXpimbWVs44qepI8Z\n53c0eYM9MYpMljjNHKqBnmmO4m/dJxyKeSd2MimSKVwB3oA745pWEJIwVl55FJ5m5SGGKbxJ\nn+9mnMnmcZwKACOT5sZ4oYHt0zSxrt/hx705funmiwMcOQM9KCwVc96YPm4PymkjYOpJPFAB\nkBsE5Jo8stznAFJuAYYGfelfO3gZpiI9hwwB5p3RACOaeq7eT0IprfewaYyOTKqVI59qRWx1\nOTipOOe9RbMncKQdRd3XJ60LJt6jNPMILU1vTv7VIXHDc3JxRGN6nPaolYq3J5p2TzzxTAc2\nRznC0jAMN3Sh2DREEdKbzJgk8UBYTkrgdaesY6k81Hko27qO1ARm+YmgCZFwcjmnMqhSc5pG\nby4eOTVfJVd2fwoEOkj2qM1BLGS3ByKmb5sFs7arswXIGcZqdR9CVWAXA+9S7WWPBqFWypPc\ndKc07KoBG6mSOO3nnjvSZTaQKRWB9vWmMeoBpDFVNqnn5qMHAUnGaTHy570u1mOcimkAisI+\nGbirMEgXdg5OKryfNJgjiljlCZwOelBJNMrXEEkYbbvQgtjOK+OfiZZmz1K+hB3KshGMdfev\nsiFwq+u7grXyz8arP7D4gvR1LNjp09KYHzxqkJaQjoBXPahbrG3yjcT+ldRq25g/dulcwZD5\npDAk9K2RJTjjEKSTZ7YBrNupDNDtZfl61q6nmK3EKjGeTWXL8sJ5yMYq0QzAZTG3y8q1VGk3\nEkjJBwBVuabY3lDjH8VV5JArA43Z9KszZWkjfcSwz7Gq2zzGYdB2z61cx5jMW5XspqHy9uRw\nB2poXQjSN5FG4YXpUE9r+9yQGUdqfcNJxy2xuKRX/dYzwODjrVCIPl2uWUkH07VUy4YnqO1W\nbhtoAQkq3Wq0io3+rc5PrQIrSHYu4jIz0pqzbWGU5qxcA+XjsPSqchKyDdz2phqPLCOYkEYP\nYdqikV9pcEAk9Ke6/dDfu2J4btTGYK3JJGccdzTFqRtlowAQOfvUjyM6kByy+vvSyKJOFO0g\n0xiIyADx3IplEe028mwnhv5+tOfOEAOWPBoYmXY+7B6gVGzSKwO3IHRhTERMuYwwLNg/e6U6\nSP72PmI55p4kKPmT7pGGxUX8MrBiSen0pB0BW3b8gMMcDNRx24TcGB3E8ccAU+RA0KmIgEcn\n601pnVQhbkDlqGIaVVUYZyfaolUrCDvw2elPjceZvI46U2Zt0nGD3pjG8CQhewzSqyHBZuMc\nrT5I0+9naP72Kj2bWBwDmqYribo1YZUn0FTfP5eV+U+uOlQqqtuDDB7GhlVsbmI9/wClILjW\nZmw+zcM496dtMOQTljzxTWlO5vTHFOULkY5JHP1oGNkYrGoXnnkd6ZtXzQrDg05S5y3C4O3m\nmSOWb5cEDv60CCNfm5+YqchfalaSMsTEuSx5yKZyM88/rR8yr1y54zTAZsSHGfmbNPMZCnYw\nx6d6FVeCfXBzTeIsbcsCck0CG/e7fdHFHOBwcAc4o+9Ido98Usqh9p+6KAI48qx9Md6ey9Ce\nVp+1WUt+GKjk+ZVYdR0FBXQThsLjcSeDScLISfvkYGe1OMijdvz83p2pgHy4wD3FFhCPlZFw\nQOOmKUq8hAPTrR5hfnHen8MCAxXPX0osAzmP/bpXYbcq2G7rTRu2/KenGajXIkJaPOeMigCV\nj825eXPam8qrCRN/fAoVmgJ29x19KFZvLwXG7qSaBDVkCuNoOzFPEZCkK27NRo3ltuz8jdu9\nSMTHHgcAnikA1lCsCOQOxNNkY5dscNxikYfMQQDt7j1okViFIyRmgYisfLKgYGep7U9vmjBV\ns461FHlGZmJIp7R/NlecjJpiGrhmG5WA7sPSpGI3IEywI+9QxLL8uR3ppRoVT58Amn1Ac37x\ngQcKODmofL5Lg45xg1KcHcCuTngjvSSdN34YpMBjRnavB69TS7SseFJ3buxpfnYANwp6U1WH\nIVgxU5LU7DEXPlrISDk4oPyquW3KeelObO1CRnnrSmRjux/qgMZxSBkUce1GZm70u0bSxOP5\n0rYEanH40FgVBK5oAYxbp19DSdmwc8crT1wWbH69qZ94Y/h7mgQsa5X/AGj3NLF8rcgEDqaR\nflwMb17e3tQITGv3hk8kUDBDvkck/SifauxeSrdSO1OmO3aVXHFMQsMc/KeKAHONpjCtu2/y\npZI02Ko5IOaRSxZieig/U02NgwHy4UjnNMQeZ5asGUyOeQfaljX7p3bvY0nRAo5X1HakEpHL\nDpxn1oKHY2yHjg0wx+SQcENTm+ZlBPA+Y0pY7sswbIzzTAR28xgxPGMYWkKlUIB3KOtKzJtI\nG4NTuFkJA+Veo9aCWNaRWAABBPoKTEe4OWbg/d9aVS23IGCxwAe1NdvLG4AllP4UAC4m6gqA\nc05mLIQ2B/tU2QNuDr0Iy1MX5OWywbotICXCtjaeOhNNUld2R1JpqrnORt45FHlt2+Y9s0xi\n4H3em49qaGBJXHI6570jL/Dswwp6x7o96+mOaAGKhf5n5C9KUSF5eG4Ixg0pxGoDnnFN2tHJ\n9RxmkGw5YwWIYnI5OKTemCVPU460DcG2nlsdRQxyiqAB6nFAMG2thDkHPT1pJm8zAxnBx+FO\nfbJHvY8L370LGFUMD8hGRQJCKyfMp+VcZBpvKqhA6+tSKF2NuIxTG3Hjfnb0GKCgZNoy4x9O\n9OLNIFBTYOp+go2yLKCpDAj8qjLPu65GenpTEO4kVnX5R2XvS7Rw+cue1H3Q2D1NM3LG/APN\nAEu1SxzwmORTPMULn044p8ZdMgcrimKoVsEcUhB0bOMjH44pkf7yQKBuHJG4f1qRWKy7c5XH\nWo9nzAAkZPTNMQKwbKhCGHX0zT1+Zyc54o5VQykYPBWhtqknkcYAFAxrfvlHTrz9KkbCoQpz\nnpxUTRnaoUgHOTToQmWD8SZ6ZoAjaNSCWLfKM89M0Lh1xtOM9cU/aUhJXkE8560ruwU5GQTn\n2FBSGyIGUtwvYe1CLtjwxyMfnTc7mO7nuDTnwdu7AGOtBIilSqjOCOQKFB+YOwP8WRSfKQN4\nJXPWhP8AXFWPyt3/AKUAL8srDJ4I4pPM2naVLdt1LsXBI4GelKGWRAFbHrxSGIGKswQ44+73\n+tJACuSHG89PpRuEaeYThumfUUu3au/GW7AUagxnmHaNxyc7c09owflzjHUevvSN8q7dvynk\n0uzbjupHrTED7mXrjtxQsZXOPTGDSsCkqKrZXqalY8lgc0uoxq/eUH5QOOaYQrbuvJxxSsrN\nwTgHkGmHeWGG2nPIFMRJ5bIpG4McdqbHGyMqkAHru7UZKKzFcnNHLLg8LSGO2nsfr61Bjy2K\nqeM8Cp2YKpC9TxUTblVX2ZwcH1NMOorbdqybSGzinSR7d5Jxz2o3IzFQCp6kelMYBWLnLbjz\nSAcNxj6DA6VG7FgMcEnFWOWYdlx0qJmBbGMBTRcB+cjAABH8VNLeaSwOwjj8aQ/KNgG9jzTo\nzuj3EggdaYCbSuPm3seDTN/8JXHapVA2nPBPRqYvzqfXoT70uohFYHBXhuv4VI2w8kEA9aar\neWPu8+tAYhgJAcd/ajUCR4UZvlOB7VHjG44wfWnhVTIzke1NfEcYx0Y0xjPu5Kjg8Zp6fN95\ns5GMGm75FYqR2zQWKhWyME9utImwpjVSgQZ5xzTsb/lVgCp6Gmbgz5QMRnrnigQ/vGYr82M8\nGjUYSfdYhsjqVFJlVcOV3sR+VN+6wHTuad8x3dADwMdaYCoUM2NxKsOtBjMbbN3y5zUaqI1w\nWYAeg61IqljznnocUAHmZXDDv+NM8z+MDCg4205mK4J+YrxmkZVdieSh/nSEOZt/zjhvTFP2\ntu2liSRzimru2n/nmv60u0x7vm+8Mj2pjEYfu2PDFeKUHy0UFsyNz+FRsu1EAbr2x1p21mZc\nckjn2oENBydzDgHoKc7HuAzHt6UtvCzFsEE9eaJEbeG4B6fWkKwqxnIUEBuuQOKYZPMlIbAO\nOgp3IDbvlGaRfvZ2A543UyhV2SY/jHZhxTPL5yBhuuTTtuScHkdcUgXdyxxigkkDBmC7tp60\nwbnY7mJU/wAOaVs8ZI2n9KGUFcZKtnigd2RtCqqQGO4mnYU7s8Y7HrUiokjMB91eDzTljj5c\n8rjaFPWgZGqiRgdp3/XrTmfHLHHONo609thjAwQB1NRt5e3HQdd1AhSDGxLHeMdKUhWUc8kc\nqKDJ5bKcblPQ0kahZCVB5PJNAxI8DA5Jz3p0kjtmPICg9qBIMsvRs9RRD5mWO9Se1AxwYbvM\nbsMLjuaOFRxnDZzTkj8xWV22g+2Kasfz4zyP4fWl1Aao2gtu2nrg0Km1SXbknIzTl+Zm3j5h\n69qQBkUFsPk8A+lMBXxksVPPQUm4Kq9iTyKGkJOAfnPQelOf5CFK/vCM57UhgsYSOXD/AHjn\nk0m4SKRH8uDg+tHP8QxnjmnqdpK5ADdWApkgzD5lGTxxmjyywU7BtK8fWmlduVyXQ9+9G4cA\nMdoPSgOo4KZSM5DL15prAJuXHB5pyuG8wheDwKF2nGePegoUMUQYUMW4qVY3baMfL39aib93\n14zSxs8fIbFIQ3HLYG3B70tudxAZRx/EKSeYqRzuY/wjvSR4OCD8vfFJgSLtXOPn54U05QWV\ns/e61GzFl447YFO2lcLn5aoQ6Ngu7J3PjAFOt8t8jDBzn602MFX2qBn1pJFbzCGPLccUCJN2\n2RsjnPORRHhVJ25J7CgLs4OTgYwfSkRt3CvtPpUlC7NzB8hVUdKVVw4YHqKFiyp4460vLbWj\nwCOgNBIo/dt5Tc56Ed6cZEk/h2kcGol3KpZvm5zgURuq53/NuPSmIeioEYkkDPGKdHhmIPyn\n+HFGf3f3dq571Jtjlyx49BRuWSxwnru3gfxHtSnLcE7T/eFQru2Eq+T7Gnq3nNlm2gCkSxwV\nlym3lhy1JDDJGfpRJH5rK2WAA7GrMszFVXOQB94CgLDVzHKG2knqD2pZGzJnp60iMSoLH5fW\nneUrKxzx2Ip9BkkeI1JC7v8AaqaFWWN3OBnoKbb7Tb+W3FSq43Im7IHbHFIBsSqFy3zetSeX\n5mBH8x9KRo1+cJ94nOKmgO7hSA4HNIm4b0Vl3fKRxntU6r5kaSL681X8tnXaMbM5/GrEe7y2\nA5YHjFMB7hvmyfkHarMMnmJuJCt7iooNwmSVkOFPzA+tWncTsr9GY5wBxTLQ6LLSbiTkdsVf\n07EjYYM3OQcVCiv8wYZX1q9Fb7pIyhIZRkms29Q6m7ZyYkVWTI7ZrvPDMImvLdEThmC9PWuG\ntXaa6iKKd2B2r1n4c2bSeI7JNu7a4PT9fpUDPovToxHYxRqflRQo/IVbXcTxUWxY1x3/AJ1K\njnZjOKBIVXCsCVp6tliV6e9Rs+9h7dcU/wC7g96llEzbthA+tGcxgYxUQJVutTK27HOKLACr\nt6ihdrMFoDfvMZpGT94GFOwiQptYFjx6CpI33fhURG8k89KXcFVfegQ7KyEjPNM2soIHzUjK\nd5ZeKl5HAGKQ9SsJvLOAufWpV2smOlDR7kYjrTFXC4bipAkjUkYJ3elP9qbGojxg81Kvy8kZ\nzQMFUFWz6VGo2qCDg07cVJGetK3zL06UykG0btzdTTZM9VNOZSyg9aTy+uDQFhqE7sipA5+p\n9BTBGRxT+SMqcGgQ5W2njmnMSwwBio1+U5NKzkHIPNAC7WZR60bSWwOtKWYewoXO7OeKYDeV\nyCOadjaPWl55z0pisWXNOwWFjlUMM9TSySAnimEKzZ43YpuOmaQiTggg9cUyKTaTuHQUYO7i\nlK5PzDiqAk5+91Bp3OQ3SmhwF9qEbJ5NABndzjJoXGfmGRT/ALp9qYvQ5OKTHsK2FUnPNIGP\nBxikVT0605vmXHSpEH3s56mkB2nBPFG05605QoLZ6igB3K9qbwvufelVmYikK7WzigYqrjqK\nfweQelRiTd3pW59qQxyg7iRTmJxjHNNUhVHP5UMTgEZqhj0bt3pGPPFNDDcD371IcHpQIVXy\nppm07cd6N3IBGM0m794c8DtSAfyo6Uz+Pmnh8qQDkUzpk0FCjKjOcCl4UZIzmmqysvzcGnE7\n9pHSmGw9PUHaO9O3DbnqSaiYHHI4pUYY9hQGoPJnoM4pxJkAI4xUavjt3qZV+brxSEI2OCBg\n1KucYpvO0YHeljkyc5oGO3ZXBGKaMk880bSGYkZNNV+c0AKxDKcjBqTzPlxUO7dzjigENkt0\noGV9StftMJBbae2Kk0mabyzFMvKjhvWptoZeB0qX5QpI+9TES7txFKHPTHHao0Gfxpwyyk5p\niFjG7OeCKBliaauWGacv7tSc5qRofz5eR1pAhZcmhcL3zTMsrYzwe1AyWNscMeKVcEZNR55w\nRzUiEMuTwKAEzz6ipY8M2etRrgU+P5egpMB/HYVIi7SCe9Q7ju6Yp4l9aEBJ/Dgmmo37wDHF\nIMbs05vueho6gP8AMB4PWnMTtAFR7RsU45py9eRimUiUKdvX607aqLvHJ70wthiQfl70Bu4H\nBpiJFkJ6d+1KwPU8U2Nf4gKepVlLE8UrjQ4Rhl6/SnxqyDb96oyw2gFalXcsgK1IAoXoecUu\n4q2cYpBjJ9Kc+5uRzQAqfMpJ5NO+4uQcn0qNJD93binhirZI4oGSoPTBpZlbaCBimZA6HFSt\nIDjJ60mAkfI2mnrhRg9e9RrIFkGDxUn3mbJ6igaCOTDEE/LUsf3sg4qv5Y3DJ49qsRj5WOea\nQMeuFzuORSKfLkGOMmm5HpTgeQWIAFAIkUbpMk4FKqjceKjb5TnqtSKwbBPWkSIyndxxSblj\nYcVNGo3ZzTdqMx44oGJuLYCjqaeq88U6MjAzTmXI+U4oGR5cKV6U9MbFABxR5nmAdeKfu4Ao\nKBVJbOeB2p6juDzTV4bNOVcMT2oJZKzZHFNDD8aBj1oAAkHNLqMeo3dKXaxbgc0iLtzx3p+z\noelMQjbiQAPrTlf5WP4UBSvQ5zSSD5cA4NSxjlUKvPGad04pow0eCckUsjcrgcY60hi7T3PF\nEoXK+tI2VwRyKBnimLqKc96ep+X3phXey84x1p653YxR5jHKzk4I4pS25v8Aa70Ele+KdwWH\nuKoBrZVlzwM0/wC+TzgZpNvOTTh15oEHGeuBSxsrbsNk0MO4INJtxk4x9KlgSRseGY4qRhtf\ncBzUWDxvIHHSgu24YNICRSVzk5o8z5sgYo2/NjFGFyM0wJNoakaRRjj60bttNaPdGSMUDsSK\nwOSo7Ypqj5gCvNIv7uPFCs1NgP6++OtP3dFA701T8vp60L26ikBLnYpXvml+9jvTN3PPT1pQ\nxL9gKooVsF+n404MvbrSN8vvTSpUZA60AP4OOxpTiNcYpgBDZJzT1w4yTz6VIBkntS7lZvQC\nh2OOaaqkcEUASeZ9KKj/AAoqgHcKu3vSbAuOOaZI4ZwOQPapPvd6wGBbpSsh6E0i43ZPNIrB\nsgH86BiHfGpRuR2NIJOdpHNKxIxznFI8bZB7UAPXG0nHNNK5wd2BSeWd5x0xSFRw3YdqACQ7\nQcDdUO1sbyuD6VMzFsY6U5mBpANC7kzjFKDkUhDEfLTWwhGKpCFZj5gx0p28joKM4wMfjTWk\n+XaeuaQx0QP1zUTt5UmCCc1Y3Y+7jpULh2GSRQMb5h6UeZ5a7cZz3pyKNrcc1G5K/LxmmSSc\n7QR0pG5XHNKrErgY96RSWbg0gFZv0pdu/BAx3pNxySMHtTQ5U896LhsOyd27v2pWY4FRSMPu\nnIJpysFwM5OKZQix+Xk9SaPM7dDTkY7CP1pqIGYkkE0EjZWbyxikV8Rjj8aezYbA5FG0OoBO\nMUhCbWZs8YpdqsxAoZyo+U8UwF2we9CRSH7MAEZxTlYpnPSmMzEYHTvSNJhSCMVQhTu3D+7S\nfMQQv1zTdzfLj7tPMm3dnpigBi7vqD1pEYAHjpTo5i68jAxUTSbQ20ZqQ1JWb5h3PamqDvJP\nGKYsp+VyNvpUnmNtZh/F60XENYK7ZJz9KdsVRkmoNzdPWnfdjII3N2ouBL97OOlRcKcE8UjT\nAIB0pG2fK2afQB+4FeOlKxLfKvHeo9x/h9akaTGGA4pANMjPwMmhuVpIJhJmQLsGcc1HI+GJ\nBzR1Am3AqB6VE2JAexz0psbnOD0PWk3459OKYxcCNcds80FgvA69aRG3KdxpeQpzRYQMQI9w\n6mmfKcevegsGUAcChWXkY/GpANvzcnihm2kUdOSM0wMGyTQAqndk0gkGelIuT0FKuT2oFYmj\nZsjjivBv2hdNMmrRSdWaEZPsM17x5h7jbXlfxy08XWm2V590cxt6kUCPj/WIzGZGHKg1y7YF\n1gqP71dn4qtiJCijau7n6Vyt7GIbUso5J61uI5bVrh5LuQsPlPTFVPLCwH5s+xq7qYZkwV43\nZzVOOIyRuA2FrRGbMmdEdmHb1qi8fk4I+Ut3arUiqrsV+6DSXEqiPcRuOeBVmbINqL5cbfMX\nGdwqtcWvkrkEnB69qsebwAseG/XFVry6k8kI3ODlVNMXQr3KkKDv+X096hceTgq+WP8AD606\na6LqoOAM1XZh/GMLn71MQ5i8khUDYSOKqyRnbtdcAdMdalEpXkfMuePWm3LP5oHRsZ/CgCq8\nzRKQBnPUVDcZWNSVJUnkipZrd13ZIJPIaoIzLErKw3KaYD1RF3M75XspqFZCNyhAwbvUj/Io\n2rnIwfaq0cix7g2QM4/+vTAJpCsRCgZ6VE3yqpK4XqQKnZ1RSFXJ9cVCGeTh8LmmA3y8sMcH\nHT2pu7y1MQbJzkAU6QSv8+4YX5eKXy1MZAAVv71AFeQNvO4c/pSMx+XBwMYp7I/BB3L1H0qI\nRsOpyrHJHcUCHMzLHtXaCajCELtYgsvelk2ebtB3Y5oknEaKETlj1NMCHacggBiOoo8kszBO\nrdTTtnLEZDdSTSqpjYEHtkimMZL8qhVUyAnn2p0Y8zqM4pQTtJjO3JyaryYhV2V2Yt2UcUxM\nsqqrn5c4HH1qrtZZCrH5WG8CpI2xb5PzH68ikkzJJjPbr6e1ITGZ4AIyG6Gkz8mQNpB6d/rS\n5IOAflA4prCTzFKdMfMW9KAESRid5xgH86GysnTAbpSnEsiqWC56CgFg5DjO04FAxrKFXc64\n9PWiPaW+Z8HFEitHnzJBg+3So2QMQTkr03Yp2AevK89u1BcbtoOUC9qAyq58sMxPB3d6TYIw\nQB16igQjMoyVOeMcdqaUCKBvLdzTlVZEJC7SeKZHIVbbt/E0wsOwW+9+lIq4LAHctDhl5XG3\nvTZJGUhljAB6gUgGFvl56Z4FO24b5h9MU6TbJIGABUjA+tSxot1C4lGzYOuaAK8gj8xRhlHv\nTdwZgCuBnAp8ysP3Z+7H3FJlF245PXmgA3O2QqYQdKcdvk5wQeppqs27IYFaTzGwTkEEYwaQ\nEUhBbKZxTg0SMVbk4z+NOWTbsydqjjpQZo43K4BPckVTECqPJBx971pkreXhNxdfWljwoZi2\nCTgKaay8ZA+UdqEGofLlRyRnmjbIu4jhc8UscipDzxuPcUzP3h5hGP4aGA/JXjHJ6YpArLOc\nnJxn/wCtSFigVlOWPUUR5Vsk5JpASsTt3dc9h2qLcMAk8+hoOQMAfNnNDR7m+bgY60xoduVC\nSwx6etKyjcADgnsaiWTcvdmU45pZGMnLDkDANDAVy3O5eOnHrTVUlOANq9aevy47k9qjb5fn\nxjBxtFLUB6lduG+oFNkVmHmbsE8YpVVnIYn6Co9jbS5OMHGKBDxub5cZK9aX72G6dqQswQkH\nvz9KUbWfyycDG7NA0DKzttK7DjqKRQrJ15Bp7TfKMHcR3FBYM24YCMMEd80ICNxuU4Gxs/nT\nZIwPnBBOKkky/U7T06dajyWUkYA6Ed6QAxLRguOMU1AVjAU7hmpGLKoI4Y8DNIqllbb16GqE\nIIz5m4Hik8vcwXOR3FOjYLEgPzetIrEbiPurTAFZVU7V2tnH1o2tJ8wXLDkj2pXYbd23g/xU\nzJVt4XaTwakaAuGZkKlSw4oPC4CY570rM33XYD0Ycmo5FLdHJGetCAdny8ZU88U5Su5sOSvb\n60pDxyZHIxTdwVOUGc9qYBuBkBkO5wMijJ5LLgEEiiRNjFk59RUUjMY+GyT2PagQ/k7RnHHS\njDbfLz71EvmeZ833cfjTwu1uuO+f6UxjuCoX3zk03eTwBjB+92o3LxnJz2p25R8oyEbqKQhg\nz52FO49STRGylnR8pnkGlYBfunBHFEpZW3bcj1pgIuNoUnLDrSqwdjlt2B8tLGQzMFGDtxUe\n3bGEbr60hiiQrkMOe+KcGJYsuOlOVW4DYHHFNSNmyTx2piDKSck4IFNXOVGec5/ChsrtQDA6\nZIpGYRyZwWOMHjikBIshZmOAewH9aapZVY7gex46VHHjbktjHAHepeDgdFPX1NMZGGbb8vY8\ne9SbtxJxyRjHpTY8biM4x0p0bFGZjzuGKAbGnOc4yq8GndF4O7vmmK3yncMHOacGOAQh2HvS\nAJVCqGXnPakWVZBtz83THpRDjczEZC9KcshlYZUbuvyjFMQ3d82zZntxTnk2gYj+bOD7U1m7\nqcnPPtSriaTg7WH60gIioVR3GcA0u4AttPTrUkiLuLMce3amMo3bFXhhndQAm4uAyDk9aerK\nvGzheaaMbTn5ccA5pfMKlVxuJ4pgBYLjPIY5B7U3dJ82TkdlNKFGNmcAHIFNkXjduIpDG7Du\nAB+8OlLuHlMrDeV5wKGwzKpB5H50KoVW2Hac4OaNSRcs+G+4m3IGKCyzbRnAXnilYkbQ6kKe\nN1Nki3OBjYVOcj09aNRjWmbPHIzzTnb5cDjPPA5qWRlRQcAOx4Pr70xk2yNj5jjlu1HQYi4k\nQMeQBjFIpZWT58k9aRY1UA9Q3OKdtKr1xz90UtRi7XZiAce1KqxsD82OxNOXHYHd6U0gFgQA\nD+lUIGwF2od+O9K0iNGCBtI4NLtIkOF5xmoyoMZcrx3zQA9tx2nolJvIYnG/ng+tOZA0avE2\nT020xMc5GF9aAQ7I2sGBz+gpyTBSufTAxUDKJe52r/DS78LwP3fpQHUJlCDYBtOfyp8LpHtw\nWcqeT70zczqdoy3vTY93ClcEc8UgFY7N7t8x3ZIHWnvjezg/u+gWgrvyxwDTPvKe4znFMlkh\nIWPGc45zUKyFlCjnnmpIwzOTtyh7UiKXyU+UjqtIYrN5ec/MM9qTIjcleT60inoycml3jhpE\nzTGOEgbKZyepqPcMFU4GetPk2RN90/MMVHtPykDABwc0D0JG2ouCM9MfWldtzHfx60nl+Zkk\nkDt6Uq/MC2O2MjmkJjVby13L8y98UMwaP73J/h9Kau5f3ajewOGoEJ5YtgDrQIeCx25O4ng0\nuV3kMuVHQjpTU5UvnB6Cl2pHEW+8/wDd9aOowUmNflHGc7R3pdy7SyglSfx+lRqX2YC474NK\n7ndvPzBRyKYtRXWNZOT1FN28Bw2cD7tJGwZATx3yakZuPlwCe4oAIpGkVQ6gnPFG4uz5bCrx\n+NNkkzwvLe1OV1lQKEwcc5oAazDhd36Ubh0PToBSsx2/MMemBTh2IGaQw2jadoI455o8wLEj\nMvzdqa2cZA5zzSq25eR8uaCRfmb52xkdBmmqyMMl8r0OKX90uWbn2pscayM7bgvHBxRqAqk/\neHDjgH1FNLGTlz8wNOLHKsTnA5FSK6LuVI8sRzTGRMfM+8MN2x3p0gyqBsqv8VNdf3Yzyfal\nYSNtwPl7UCDheVPyg0gk3ZGAAfWpGK7dzHAzz71H8uW2cg8g4pdRDo1HLAYHbNTeYrAKfmkP\nBqBF3sCCc9Oae0YTkn58fnTHYAuN+FDDNKq7uNhH+0O1NZl2qVzt/ShXkl3YGB2FADl+Y4Jx\nzzR8vORv96YuIwWI56GnS/KoKAMe5FIYb/nyenTb60M5fgkg9lHWlWQbQG6mk8zdKOMr3o1A\nXOXBC5HrTJi0Kk+XyTkGnhifMVlw69WzSbCFO6TcMcUAIZNsi/xkjOKdvBb0YnNMAXaOre3e\niSRFI4ITPXvTAl37oyM8Z696Tae5AXHfpQzpG5yTyM4FLuLspODH/dpDGSN82GyoGN20VK6q\nuNzbgOmPWk3CNjhQyHoKTaRHwd2PajoFxfMPmMSu4EYpY2+YArtwOtDMCu4sNwH3e9JGCMEN\nuyM0xMkWRpG3ADb04pgk8tT8m4ZpVRURiTj0FDIfJ+99fakBD87S427e/HSnK0kbDbh8nFSK\nzFflO1ulJGoX5+AB+eaBj44Su8Mvy9cGhAWdtp5HGG9KI8hTuJ+Y53Gmbiu84zzkGmA6TDKM\nfeHIpY13sVJPzenrUag43bc5PPNSRsfLIB2tng0CJZLd7X5m+UdDupqNG0Zw2cmi6uHkCBvm\nXHNMjjUqNgwRSAcqhZCcnFEc2GyVzk9DTtreUQAMUxY24LAlsYoAkkkLNtb5R7Uqrnd09him\ngMyE7enHNNDOzKq5z3oAlVGjUhmy3XC0oVlUY79RR5iv+72EHoaRflVtp3helIQRkZJ3YK9s\nU7bu+ZQpHcUxfOYBhFhG6s1OjxHGCwwTxzQIl3GYhSvyj8qVR++wfShE8z5TwvrSMTIAgA35\n60DF3FY3KqAc8CnRlZeCuPU0gA5BGW6dabtbyyG45ph1Jg65yTgfdqS2kcMQvQdqrqvlsCo+\nb161LA/zEg/N6Uhss87jkgnHNR7l2hmOAOvpUa/XnqadMCPlYBlIzx0oAmbHloScFjxUzKRF\nhuRnGarxMrNGSvy49anilDRgMvDH8qQEkKHy+Dkk4AqVY2jYArgHjdRDJHGyqy8A1JPMJGPq\naYh0Me35C20A85PWpI9yqxz3zUCKGXaThv7xqbyz5YCnOT+tPoBdjmaQKD0HJHarNvG4k5xt\nxxVNV8xQPugcE1ejRpNqgjjnioaGXI4yo8vILHmtK1/cRgAl39qy1ZpPL2Kcbhu9cVrxkSSR\npANqseGNQO50fh+NomDyjLt0Fe2/BeNbrxBI5XmOIsc147psYaRWJ+76V7l8DbEu19fDO0Hy\nvbjmkB69u5xnJ6UrKdwqNVxk5xSqwOQWwe1K4yRAGyOlSbeR6VBG21sHmpNxZuDmgCYZ6AZp\nRk8AdKarFZBg8Y70/wDi4aiwx6rtXJHOaXJU5I4okkXbwRmmLIdpyc80wJPMKsR60vDcYwRU\nRfoQMmpBkDPFSxj1J2ZP3iaPmzyaRevB4pzEKQaQhfMAAGKSTDdVNG7cDxinrlsA80xkeN3I\nqSNjtIPNBAGQKUNwMZB70BYbUin5SM0z/lpgU8EMucc0hhH3xSfdyMZNLHx16ULhpCcUDIl3\ns2WGBUvA+5inN1wKaY9oz60CEOZOAOKSTHGKfIroqlaYuGbkYoFYWTdnacml3BYwpGKXczfL\njj1pu0ntmmApJXJzkUgf5RgYp235RnrTA3zcCnuAbP3gPrTWG1jnpUirnoeaSRT/ABDNIQ1Q\nwOe1P3butJ5nQYo+8cYxQMF9O1DLtyQaVsrTWbC80B1JBJ8oPWjhpAexpigcdqN53DjAoYwy\nY2Jzxmpd25eetRtGTxmnKOx60hbhuyAB1o+tIpwfen+WM8mgQiuR0FOYsV56UNHsXO6m85B5\nxTGJGvynjB9acqjvk+tMjPzHJ4p6ydRikUPTaWwOKRmLcZ4pnPTOCe9KwbBOc0CFI2EY6Gl8\nwbSuMUpT5QTQV3c0AHHynNKy57cHrSbQwI70itxjtQGwq/I2ByKWTA5zQq7FJ4yaYVLY70DH\n7ArZIzxQvHOMUjKXHJ6U6LlsN0oGO5ZhzwaVo85C8GmMpIODz2pyybUGf/r0ABX1FCkll570\n5mPHNN3FXJA7UCJWfa23r9KY0nzH5cCkEe1d2aC2QM8UDsTRt8vNK3Ulce4pjKWXH8NCr17U\nxiKW54+WnbQwHHFJtMa5J4pVfjBGDQA7/Vtk9PanLIvIpvO0jrSMpZcn5TSAliXKnnFOkXnA\nNQxNuyKlkyVHrQwHhhtweKarpyB2o8snB/ixTY8ZPOCaYDmm2qDjFOXa2eOetMaAN3pY2XzA\nu7mkBIsgfJwRTto29etRmRfM2jpT88+9AyX5SuMfjQy7NoHPFRpnZ15pd27nPSpEOUNnc1Ox\njg0DLc9qT+LBpjFSTPbipt/ynjNRxgBvan7guT2pgKmWUk/hT1OF5OaamCpzxmgKdnqaQEik\nbSNtIu88EYpsmWUZGPepU6Dv60hEi7lQAdKXqxyKbu6ZGVpxYKwx1NMa7gzHgVKueAOKh53E\nnrThJ0FIolB+fA6nrS7jvHOAKIznnFBYNz2oAViWbPpSszKucZzTN2GwaWTPBDcUAOjX5Sx5\n9qkTcq5xkGmq2VGPxqYfcIBzntUsBVYFdxHFLwqnJ+b0poXovWpGORu25xxQAkQG3OPapsrt\nweGFRKxVFz1J6U/JY8rzTAlVd3A9KYF8wcr3p6/NHlTzmiSULHgc460DHHDybVpR5e0jnNEL\nLtBHGaUZibgZXv70hCoo9elOTG7jGKjVTu9/Snxx87u1AEjJhcjvThz14pokIbBPFOeQYy3P\noAKQA4yvH6U/arYPtUbZVQ2KUfKv60FDywxg8D1o3fKCTxTEIbk04AM/HSkIkVl3dcUNH/Eo\n5pUVW6fe9Kl+7nHPvTAbDuZiTnFPLMzYxwaXdhdo6mnbT1PIoGLuHbqB2pF55xQv8XGPSk3f\nLjHNADg3zKNtKzA5H6U1fvcHBpVVskk5oAXANP2rwT9400MO45oBAkz2pMBZMsRjtTlU9ScG\nkVvlORQudvNIY/lT8xyKAoPzbqbuyvqKFHOMYFBI8AdySaMgKckn2peucDFIcZB6GgByjgjv\n1qKRp1kUryh61IqnnJwO1O8wNgD+GkUNXLnLDBqZMdT25pu5eeOTSnaI9xNBNmSKzHk8CjjO\najBPQk4o5b5RSKJeShPemRsV9xTiSgAPShceXwc81SGhQ25cAc0LncCfu0fNtJFLt3MO9ICU\nKNpFPyOAemKiOe55peWPpgUdQF6rgUqsGJGPwpASijvmmncuTgYzyKYyUfeAzj2p207Tk9O1\nNJVlU4wAc05vvcd6ADllyRto+VWBFMcM3U0YC9TmkBKrbjzyO1HfGec0xcL05xSSNxu6MTTH\n0JNwopn4CiqEN2Fm4P509FbB60gUqp9Qaf8AMB8vJrnZQL8oyeKb5YVv8Kevqw+tAYdfWgBq\ngFjjnHrR5hXjqKa2d3A4NL0XHegYvmZwx4pOGBPahBuU5prKRGdpoAdnavTihgGUY60xt3l/\nKcjvSjd5Z2Hn3oEP2siZzSBQ3OMmmqDjnPTmljU7vemFhcHaeaay7mXPB6UsjDHByw7UrN5m\n0igZGFO888U8n5fajbjcRzTdhbvx70ACHdn0pJcMvHB9akVV4ORTWHPB4ov0FqRRJhcZ5o4U\n8nBqR2Zeg4qObP3sZosGob/m6daHw3VsYpC2Fz61EVLKSDzTELG4ZiSckdBT8BsN0pnlmFQc\nbiaaqvJ3wM0bgPaTb8vUU6P72egIo2L0JzSrIqqe9MOo9FG0/NTdpUDuacrIOnUihSN2M0DY\nfLupuNrZzxRvXJyOaWTHlhj0pCGtgqSpoMfmKTn3pse1Bll6nFO3bSQBigAKjYMHimMDu7Y9\nKkOGIx+VR/8ALbDDimNEcjkN8o3D0pUQjkD5SKcwwCEPzUnzeXzxioYhznAz6eoqnb3Esyye\nYNu18Ae1WTKJlJY4XpTCo2nAxmhD6CGYYPHy0m4thgcClXG05GacF+QYGDTJEUD+IZ9DSSRl\nl645p24/dPSkb5mGOnegBpBBX609pBkqORTWOWwPwob93kjrQgHfeGeg9KhZe6VJHMpQbiN1\nMkz1XAzSGEiquDuwaY3t3p4U7Ru60x1ZnGOOaYCE7sHoRR5o24NPb7pwKjWMj3o6iDjyuD3p\nNx4A4pS27J4AHamofyoAdyMDtR8ok460A7h1zRt+bdmkIXo3TAp6AYx3NRPvbkHimyXC28Ly\nHlgOB70DHtIJG8s8qOvvXDfF+1abwk2F4jbI+ldpa7mhWRuGbkrWd42hW98I3yDDMqM3SgR8\nMeKoHSQ7shT0NcRrA2qse/8ACvUvFUJt2KSJvUjIry7VEeS4aRxha0VyWc/Ju3tmqu3bbyE/\ne7VYmzGxCqfmPJNRPlY5ARgYzmtkzNmE0HzEE4DVTfaCQ/VeBV2bl8ruxVCdE5Ycn0NaEWI/\nnjj3RsMg5PriqszI8hdzhjyPSplk/edDzxtqvJ80rgp+FUSVfL3uQvJByGpjRqdxbk+varDu\ndvyghehwKjktz5IZWyg65PNAFVpDJ9wdO9N8394d2SwOM+1LKxhi4yA3tTdoALE4zyT60xDC\nxkdtxCx9qZLgYTOQfSnsy+W25evABqvJn5QRj3FHURGp+f5Ttx1zTHYTMWHripljLb8r8pqs\nAY14+9njNMQu5mU4OB0zTGXcuH69jSqryE7mBbvtFLJ80bAHJA6UFEW3dkKMD196Qn5NpGTn\nBYU1mMfyjONuTTJziMbW3dDxTAG+ZWCtt2+vFRrJ+7+bk+3NIsBbnPHUk/yoKq+GxsxxxTEL\nGolyI1OcZyaY2eFUfN1xU24R5YcH+960yFh55Z8lcdaYDFYu5V8DjPNQPIWZSo3HOCM9RUrI\nDvOflznFRrIQHKAFW4zQFx24M0mMo3QZHApitt/d4w4GenWpCrSQnjHfNRtv8xcgMQuc0BuP\nbI6AYxzioF3RkHAK9Kn2sy43dRkcdahK9Tu+YdqQgkdWLAJwvH40yLG0gn6ig4ZgwyvfHrUq\nOHUjaCT3H8qBEciqsgYLgY60shOQQc5pXyyqhyDSAlpQ3UDgAdKYxjZIYAZb1psbHbuJxj9a\nc3y9Tgs2OKJv3eABu55xSERNcBv4PrR91eD7jNLuVpDtVjjrxTmkC7dy7sjt2qhkasdpUck8\n5oPzNsU9uTTnZFUkHAFReWynep3dyRT1AdOjxx44NIV/dhW+Y9yKb80kg3MQc9KHXa7BSQam\n4hyRIGCr8uOaGLqw+XjPOKYsyNlcHf0+lHmLlgGJwMdaLjFZWZmLHg96ZIjg4xgY4YU+E/IQ\nV+XH8VPbDKNoZQOoYdaCURIoXHG0Yp3yCMkpls8e1JtaTIY7R60B/LG7Hy9DTKI2UnAJy4bI\npZNvmFsc+9LxtZj+ApFYsucgD0oAJAu8FcEY6U0KyqzckfSkjlG4L6nHSnOrhfkbC96YDUf5\nlBG5ev40755t2du7rSKyr8hPJFLJGZMEcYGD70wI2U8Mo5U5Ip52MwY8HFKG2xlRwV7UxAVD\nK3JOaQh4Ygjcc4/UVHtbcWD7l649KfCpXhuQRjntTc+XJsAwSOc0xgxCsM8ZHXsKaqlYiD0z\njNLIp+VSPl7saYrMNxZsAdBipGSBd0i4HHT3pu0I7c556mkVW2hiN2e/cmnFCy/Octnt/KmI\nazbVTB43ZJFSsw8w7W3KTTB+6DDGR3pI/mztGAvakFhXI5ycnPApjYmXIznoPb3pyr5SnPzM\nTkNTTnawYfiOlMdhynciqjBscHb396QLtVsqRu4yf50N94sBsfbyy96dHlTuyenGaQhsymPa\nScgDhs0xd8q9VUk/nTmyoLEbgeue1DqFVd/PIyRTExqnAKkZfPNKy4KsjYB605l8veysCmeP\nWlChlAYYPXmkUIyquG3ce1Js8xj5YI/CjYsfAGe9MeT7pTcCDg0iR7S5wu3C9MUxlVc+a23b\nyDThO3mP93A9qacsWDLuPQ/SnuMXjaslIxwuCMtmk8ofcXAC/MKdk53BwR3GKYCFcqDvwPSg\nL0yueetO+V++famo3J+fAHVaQAqhWY78AUBTtwx5Jo8kdPXnmkjbc7c5Kc/Wl5gNZTuyWxjp\nTmZWHIxntQep3EFGP5U1tvAP4GrAXcCMY/GkDHcGC/u+g+tOXbuwW+XGSfam28iSb2JKqM7V\nP86AFDB5GzxxQVCLgncvvSRquxsNhiKFjcsAPnwOlTsBJuWNhtGdwpDllyVwRTGzuMZXkHJ5\n6ClX0H/AQaYIfyFOOff0qMKzKWz06c0vAY4GPSkZMH93zxzzTEMcibLMcDoKdJnCFfnH3So7\n0ihmHKgjFNV1DgoDgfzoAV41VR8hB67T2pNxwCOOe9P3SsvzDjPU0m1vMYnsOKAF5XOWBc+l\nIq+W2T81EeJHz93Hp60ENt9eeaQCbd6uSevSnLuLKit8qjmlbbxtbGeopqvvyRwAcZpDDJ6Z\n2ljSf6tmB6etKxDJj7xBznvQv+r5GRnp3piGvvRsHHBwMU/avlsccg80FVZuXOeuPSk3FWO3\nBNMBhkUghfmwPumlVRhR6jP0pzAbRJt2nvUbKW3EHjv7UgFVQvfIJpRuRSzADJ4FMGF4J96F\nLSDLAkZ60ajHNmNeuG659KYzSN8wHzdh6053WNkGM85p+S+Spzn17UwI5GYsGPykjH0pzHdy\nOccGl2kIN5zk4AobJO0LtTofekIa0hVVYHPPSl+8fu8DmkjYKu3PTnFPAbZjO4dRTATjlnGV\nPT60u/MLKAEbvTZWTaODvx92hcMm5lw3fNJgKArKApy9DLt4YfN6imyHDL3z/DTt+wevYCgo\nRs5DY+Xv60hxknk5704S7flPBPajJ3A98YGKBDXPnKNr4IoLbtqY6cn3pfLT5Np+cnDU35o2\nZTxzw3tQA1pAqkRk9elOyyxjaRs64NQeWFGQed3NTKv3/l4A60CHK3mcsdoHelVWbhfl7nNN\nONmGxj2oZWkY7GAwf4u9IYqSM24Kwz05pGG5Qq/fxzzQylWYbe38NNVmChlO1e+RVCDG5lY/\nQU/5ckbzkf3RxTGyrMOmRkULjaF6HqaQrDlkJaPdlUPUDrTpDE037rOzuKiEhLDf24GO9PZW\nRSrEA9cCmMFkC7j74oRiynPIz3pC+6IDbjHNG4Ku9jg+lABks58wY9KXzN2MHPanTI6hOd2/\n+IelNdUZtndaAGybjtGMR55OaeoLNtD7Y+y+tNbHzdXGPu0p2GNT909qAEdGCojHC57dSaYy\nESPn95S5MbNzvUjr6UKCq4A+Y96AHDzCgUcbeeaduL9OG61HtIyobk09kO5drcAdaXUAk3vH\n83yufSho0YgKTuxyaNjN8wx701d0bMq8Hrk0AO9AeVPAoaP92QjBTmkX5l6HJNChY93XOec0\nw3HL5e1SDhh+tHDR5Py5PTvTVVdwJBGOaa3JDEZJOfwpASDLN8pynpTTnaHLY5xikZi0h2gq\nPYU5o32kgE5pgDEbDjJz3FDMF/hwQO9ORgzY+5tXj61Fl2XJHHTmkHUcqNuzgc9d3SlHzBg4\nAOeAPSjado3ckdBnrSdJN7KRxwPSgYSfK3WkTbtK7Tk96bw2VkUk9RipI9q7VwRQIdxG6YJk\n9abGBI7L5mGJyFpN3BCjafSlVNrHA96NQGsNspHXH8NOZmZP3abB/dPahU4LE455FJEzzHr/\nABbQ3ajUBVk3Q7k69DSsrfKrtkHrntSKqyNkJnnB9KkkULGS3zD1o1AnmZfLVIiqxYwVx39a\nqpu2kg4VTz70EjaoGSe2fShpAu0bTsHWgQ/YHbDYIYcU3ckK4Kso6U6Rh94AK3pSmXcuXA2+\n9MYzYNu4ng8CnRgqyZdWx+VMX/V8L8vqT1pFTgcZGfwpAOXdHMxkHUfepq7GX585HQilDHnc\nc46Un078mmA5dy5yq4HNO+bcfMCtz94U1iI+WPB/OpVTcpyNq9TmgBHXDhiuFPA9aFYc7h7C\nmoSISu7O48E0p2yEFlJZO39aABmRlyoKbeCDTVwuTuz6CpVVWVm3biT930prQZJ2/KO+aBDG\nQ+ZvH3unFPMflsHGTxjP9KbH+7Z/nK7uaU73t/lbBBzSKJJFXy1+TB64PNM2ZIPKoeSKVLh9\noAG713Urq8inP3fWkAMRxggDPWkjIjT5gM7qAyyqqbfrStlWwcEDpmmIAo8w5y2eMUqqMkE4\nXoKSOZY3fcD06j1p5U7QQM5GaAGLhHwd2R+lPj/eTEjhVH3fWmvunYAA+Z9OtL8zSYchRjkU\nwGMwViAD0zU0SBIdxbB703y1RBt9cY7077uXODjgAdaSAjWQMM4O3NTOxC4VuPXvUayYPmAd\nDg5py/vNxf5SvzH6UANmY7MN1qRdi4ZeHzz9KZsbkH5iaPnkdcrjB/ClsBIdvUNhmGaWNVVl\nCjg01kHC49s0m3yUyRnBxgUwNBbpI4/LcY2/lUDKJI4+Mgn7p64qs7vtOcFW5x3p8LGPB5Ix\n+VAErR+VG4Q5HUHNIFeMb+oI5pI2C7uCKl2uyEDn2oF1Hqq7QY13MRyaOZNwZgRjvTE3bQpU\nJz2NOj2bmLj5OwpjGRhliwpyM9T2qaPCyZAznjI9aauEUjovXFOiYLMUHCr8xJpCYsZdtxxj\nnGD1p20LGSQfbmmKG3ZA+8c/WpdobIwT2/GkIBhVO4bcDcKnt1Lxgoc4qFZBIMFevA+lTn5G\nO0cY7dqBkzSCRy23BUfnTo0DMdwpq/NFllK9qfCvJ+fBNAFhZhzth7Y9qtxyKxwEI7Yqosh3\nEA4XFWkZ2kB42/3fegB8cxkk2BcDuSKu267XxGevBNVSDISSee+KuRgLhgMqDUsqxfVY2ZAy\nldvda19PhVpw+dy461jxyFZAB3+Y+1dBpYHl7QuWb061IGtoeGkYMxBXnHqK+mvg3CsPhFFQ\nfNI5cn/P0r5ys7JQwRDg5wWr6o8C6f8A2d4TsIVG1vLDdOuakDfyOc+uaUHzDxTVYscsMU9c\nM2AtIBdx3YxipA3ljd1NRsvy+nNOVtynFGpQ/lhuHSlVu/ShSfLx3qP5u/FMCyNu3p0pF6ZI\n4pEbC8D86U9MdR1qQHbdy5HFOXjlhSthUBHWlChlBY0hjt4JGKc0nQbaSMkKQRSZGT2oAdhl\nOeopTnAPTNIBu+9mgkZ/rS1GhVz1NKWIHHenKD6ZFKy/hTELG2W9DTipz6VHDJvOcVK3zN1o\nGNbH1pdx9MUBQGOKMFs4oAFz1I4qVW3KM8VFk7QvQ0E7VwetAbiyKcZDfhSfeX39KRCR15FK\nMHLHigY4IQBkjBFKOFODQVEi5z0pFxt680EsUN8p3Dmm4A981IW6etBTv+dNaANjC9ehpGY8\n07aq89ajOSpxwKAsJgYGetKFCc9qGU7c0ctGcikAmd3PUUpXcMkUFB5YIOKTHA5zQMXdjnPF\nAywOBkU7Z/D2o27eD0oGDHBBYYpVkGTS7gVwetN2gNjBNACtgn0pwU5OTkAU0ryKXaVOfagl\ni7htzj8KU5YA9M9qjZjx8uaihlmkkcMmFHQmgCcIN3TFKpH403zPlHrT1wGyaYxAuckkdaFY\nlsYyKl8vjjrTNu3PrSGOUFlIIxRGhDsSe3FPVgy4xTSCOCaAGMCOlOX7uCOaF+UHHNTLHhd3\n86AI3wOKTaApxwadjdyeKRm+XkUAR7W29M05GboBQGbjggGl5XI6UDQvlnklqTAzml3fKc81\nGoyeTgUwHnJwKXaVb5qTaWGM8ipf9YMA4+tIBnyr0NL97qOKXbuXgU3H6UAOSTzCVHFKshPG\n3kURrt5xyTUhU7zzgYpjI2Y5Bc8elLtDNuHSgKfu9TTZGKsFNNi1JDw3rTtvy+tI3QYpwUjv\nxUiEX5Rz1p65fAPApIwvU80p+6SaCgkkIYKDz7U4Jtye/pTU+bBxSElm5PNACkFvbmiNFTcc\nc0JkN8xxTmXjA60DHLGJYznpT1QKvy0iAquB0pVkCjkZoAdtOzdjFGwHkHn0pXZgvynFKnzA\ndzQII5AcqeMVJGPlz1NNEW7kjikZXBAU8UFIc6lSNtP7AdaEjK/MTS7sGgGJIp7DipFUq2c9\nqb5h+4evWnugIAPpmgQ9VDLhjTMMp7gU7arbcHFSFDkcikxkfzbeOmanjVS2W7UKhC+ooB+X\n7tMZGUO8kHOTU/4fNio12t0GKkXsRxSYCqxXbupz56Y/KnBRt555p/f6etSAzZ0YnB9KNobI\n796kVQ/zEjFCoeSKY0MTP3cEip4/lHTBpiqR3waerE8Ec1LAk2h8DoaG+VuOPanMyxFWNSK3\nmLu2cE8UCI0U/eIBP8qdkjJBz3oClmYjpSSONqgdehpgSKxH3flzRuB+UD60qoY1Cnkmms4V\nwoHNIZIpVR609ZPQZpoAZSwHWlMe5ODzTsIRZir4IPPepWxH35NRsqLtUHJqRVXcc88UrDsC\ntz61Jyw3YyOmBUaqFIIPPpTlby26UhjmXqDwaIlGN3anY8wnfw1EZG0D3pDCSRVYccHrTo2X\np0btTTy2McVKqggY60AKsZRT/ep8X7tcHpSq+e/zelGfl570yREBXk8+lPjkZgw70yJSWHNS\nBtzE9KAFTdyTwKQZz0pQ3rTt25c4OBUiEjUiQgng04qxU5OKYr7ucZFKp3dqdwHfwj1oLBQx\nAzRtIBNNUllx09aY9R/3yoBIXHNLu3PjnFH3YwPWiNT0zip6hqCgKCvapFKD75HFQshVhzjm\nkaPzMjBJphYm3AYIPWnbc8kc1F93HHTjNSndtz1pDFAPc8U1csGPfNOXJBzSDGGY8UWBEkbB\nV5GTSL8/X8qjVgoyBk+lSqwZi4446UWKY7r3pd2SMDBpisccDIpGXnIbBpiGTXHk7VJJye9W\n41eSPIAAqk1n57jc2ec1dbK4wcLSAUN8pXPzZp5+8MdBUKFeTyTmn/XOabH0H7/mzinH+90q\nJT0zTg2eKVhdCQNnG0dKVmU/U01VKjH40rY46fSgoQg/dByPWnq20jJpqt2ApvG7PakBNu7m\nkEY53cCm7huI6cU9cMMZoGJtG3jihtnAAyaOnXpQMbs9D6VQh+4elFN3H0opjuKzBuM9adnH\nCmowFU56k0qsMZHSsBslX5V3EZ9aT5fSo1JVuTxTgBk+9AIRn7AUnO4E08qcL69KY+aBXFVh\nypGMmlwI1ZT1z2pPvr2yPSmDdk5/WjcBSv7v5ePrS7vLhx1ajI6McCo128r196Gh7kqfd5oE\nu3qKb8vSj5SuG4+lD0DYGjHLDiiP7wOcimFTjg5FPC+XHkcZoAMlc4FOBG3mmk/L1607bhee\nRQLqN2qqk0isN2CKNyyEgAgCo5M7sigolYlunaomRtpOeKdHlF461ErGRip45p2AVMMp/u0i\noueD9aJYwRgNRt2jg80wFk3N8oFI0e1cd809WCjpzSOwODUEiSsI4+KFG6LJAJoXbJkkcCiN\nvlJH4imtBh95lz0xTVypbdwaGJXoM0x8yMAaLgSD5uTzRFuZSM/hQqmMknkUR/dLCqEKud2G\n5AHehpPmwMU1maTgAg4psf3RxkikIeqFoy33ai+ZnBNTNIe/C46VWVtpYt1xxTKRMw6kcUxv\nu4OSKFyoBPcU0qyqCeh7VDGHEi7VXmkXHSk3lWyKMHdkdKBMXjaBjDU7nG7d+FAYM5PtUSkC\nTnpVEgGJbPapdu09O1J5QzjNHK8HmpH0EwOCvJFIobacijcF6HbTWkY4UcigAjRN2WGDRJ8w\n4HPaiRlUgDk96R3KkHHy0DGpnGWpxbD+2OKWNBtJY5prAYwevWmJjJN0jDbx609WG04/OiFf\nm3E8elRk4yB0zQMdNHwDxg8UxV4Kn71PkYfKO1MbdyQPxpAxApx8vWlTkENQpIzSKBzk4oJD\nafcjFUb5g1xHaqcyPyR6Vobz5fBqnb24ju3nI+dhjcaAJo2AVV3ZHQCo9bG7RL2PZkGJuKmM\neZPkH0pJt0kThcbmBFPbcD4y8eJ5ZkKN91jjPevJ9Wm3ROv3Tmva/itYNa6rexsNpRtpFeRX\ntmjKwbr/AHqaIkcbMQ7EZye1U5Q0dvI5OT021durcQyOB1Bzk1WXDB1xuOM10RM/U5y+eVmU\nqfLQcVX8kriXOfatGZF2F2X+LGDVGaRo23MN6jg4rQgp3DGZQCQMHqBVOSTdgqMbTyasNIzz\nFVGwdRUEw6h0+ZuM1RIzzCW2g7V6/WmbWQPuIETdTRcXEXlhVzuXjpVe65UAlgnsKSYhs8kf\nl8fOF6VUk/fEF/kBOKlV9rEc47ZHWnPIueF3PjG3HT3qgK8ymRhg/KvRj3qEyM2frVllEigM\nStRq0eG3Hbt6UEleWQqvJIHrUTsGXG4SDrgCrDMPLbIwCKpxj92wU7W+lUO4jSO3737u3onQ\n1KrpIoYDluw9ajZmVcN83FEMbxqNv3W/h9KAuPk2ccbWPBqrLut3wgzk/dxVpod+FDZxzmgz\nD51dwTjihCKUeWZlYYK8ml3OrAHB3dKebfPLdcdRTVAVgc7sd6fUCGaUHccfKvBo3kbTjKds\nVKwDOwC/N1xVcg7eOxz/APWpjE+Y9QBuGaZu8vChdy9yKcMGPnrTtwVTyNuOlMVw3ZDKhORz\nj2okVWQc896iVTGwZTyetJJGwTkYOelICyzHZgHnGQarbS6ljwTxinrINioCd2eKj3eY5BOG\nHNIYqxk5OzHGBnr9abGGhjxuAGfTrStIGAcMS5/h9KkmYeSBt+YcikIZ5o3DCkOe+KRo/LbD\nHHOdo706a4aYLGMZxmoGQhizHkDgUBsPkYszYQBOtRTSeZ91tvFM3cHfJhW7nikEZXqd644I\npoNBDJthAU89zQsgGFPPGQaIwm7B+UqKavlhWPDyd1FMLMWVl3BQc+tSQp50qw/dQnhhVVg2\n0OBgr+Y/+tTo5njurdw+0hwcevNJ7CbstTv7P4Vy6zp6TW6Srn7xx1rn9a8E6pokjJ5LOAM7\nu4FfR3gaVrjwvbM4Xew3celaF5plvqDnei8+3SvGeMcZNM8+WIaZ8fySNbyKJVKHPO4YzUG5\nY5wAd2fmr6j1j4X6TqgO6H97164BrzTxJ8DZlkklsJiSoyI+SB+Nb08ZCZtHERZ5jHMzIcHC\nn2qZZC0Y+mPm61q6p4I1jR9reV5ik4CqDj8axbqGeGNfNRkIOScEYFdsakZbM3jKL2YlwuUB\nHJ9aHfd945GOnekjPzEMwAxkAc01lO3Knnufatb6FkjbWUBaaIQiliaWb5UyOVxxihGCKqk5\nOKZY0bWUYAzmkKgQlM4P1oZ93DLkdytIdu08cds0yWNkj3YZRzgZokZm+VCQTzUu+NpQNhwB\nzSGQZAXAGeB3oBjY8DORgdDSRq3zuT9BQzMcqT0PbuaVlaRQCdpJ5poQeXuYMRj8elG5lYkj\ncxo8kyMIg+VHNPZNoyrYakVqQ+Vk4ZjjOSM0sqtt/Sg5Xk9e1CMduM+/NJhqJDJtBQn5l6Us\nknIXOfVRSbSqsTtw3HFMjjO0yBgpHGD3qugiRSIcqPmz2p0UbQqFb72c571GwkZVO3aAcbqf\nJH+8DBskCkJjgWZSzYwP4aiALMDn5PSlCkKXHO7qKQv/AAkFRjimMccLj5cn0pFPmR7sYBHF\nMkY4XHHrSrLwFwXPY0BYTf8AKqn5j3FDRvJt+bCqfu+tJ5LxtnOSeQO9NCyGQo3B65FADpFL\nZUDoc4pzqr5L5Ujoc03cQ2c4PTNEciKoZ8+xpgKoLEnoMcZ71GG+Ulnxz6U9ZvMyDknsaPLK\nsWTbuxzmgBFj3k8YBFRjDSNlvmHoaVpOARx/nmnLGGbIOFIzuoAX+IKQFxzxTiw3bcAZGajj\nXZ+8Y7geKXdjB689KXUBsedxBHGaTb8zAJwed1Sbhyc0jFtuTwntRYQSALIrFsrjAAqMqr8o\nNpqTaAygE9M/So5G+YhRn3pgO2jbgDhjnPvSD5UJI5z92l3bUAOcdqEVtrE8jrQNAW/d7gM5\nONtDKVO4emPmFC7lUAYBJyCaY8m0sWJweMGgbHyIS2WKgkY4pgQq+EdiSMdOKesY6qNzY5z1\nAp0cgVgmdp6g0EjNqRtuLHng5pW/1uAAR2IppbbuAXczN+FNOYOBlst2pDHhlXGc5HagqGVs\ncA+lKGAzg/NTZNjMApJb0pgCsUYYyf8AepNwbIU8se1J5rDcCvGOQacqCHGDuyM47jNAWEb9\n2p3ybl7YpFDKoP3u9J5YZMnp2pzMdysDyODQIUMi8nkMabHtTO0HOcc0rbWkIUjHX6USHc6h\nT0/WkAm1vmxzxkZpUTaigYHdqWTC7ecknkelNZBlgDgHoTRYGJ5nzMVHHWn7h1ySCucUjLtX\nZjj1FL95cIPmFAAQXYhW4HbFGzd93g0kcjSRs3bPIFIwIUbTjnpRcBxymFfnccAU2QbOvQHG\nKMYYZfLds+tCkLuDdT1z60AJIDkEd+1A3lymMIBkmhSflG3Hzct6UlwTHuQkkE5yO4pALuDF\nsJk9Bn0pXkj2g8hum0VGzfeOeD6dqI1AkLKM8d6voBJtRWwDkdT7Uu1WAHPWo1wzEMMUq7m4\n24akVYU/K2Md+PelGUZgg+VuooYsp3ZyKazDgr+dSIVkVzvU5A6+1IyhlySeDkD1owq4Cn73\nUUvkxtg4JA96NQEGGVg3IPODQ+JCpc4XpS/O69doHQDvSfLMoG/AHJz39qoCPhXO35hng1OW\n75wew96aFDsFUgY+Y/4UhkDSZxkHpQAm0ZLkYKjmlOGt9wG49OaayszNlcMw6f1p3kmOMktl\nF4oAjeNkxvH5U+IvtYFgFx0qHDHuSPU1Iu3gZ+tAhSFVgx4B5FBxJnDYbO7NAYtyVynTNMdg\nu0H5uaAHr+7LFiQ3UHNKsfmsCeBSPgMSw3DstOWfJwU2qeA1ABIjy4ygx27cVFIR2wT61Lcy\nPJhQcKOC3ao5FJbeBntuHWkMSNTuDkHHalkTdMPnwx/WpWjO9fmzxUe1d2c5YGgnqKqmNWw2\neec01k3Pu3YPvTpNrFmTJYc4pDvOGOGZh2pjH+crYI69KTIRDg/MeOetRiMswGAD356UZJ3b\nUyR3oGP8wsOBtwMGjarYByePwFM3bVUB+T1yOanbLHCj7o60DsRKv3sIBjo2af8AMWJxgdM1\nHtZ1KM21f1o8vAZGc568UCH+XgDPJ+tJgxxtj1pv7tsc80zczKWHA3YxQIe7HaW7CnlPuknh\nutNdNrAD5qcJQMq2QBzn+lABtwjHOSDgCnP86gNjI6YqGMfP5mM7u3pT1U7XxgnrgmgYm4HP\nzZHrSM2VLflQ2EUD5Uz0FDfcUg5YHpQIngkHc5GKc0isw2HaMdM1WZkVSSCHPSnSsGOCuFIG\nR70DRF5jFePnXd171MIyr/ORtPT1phULhweMY207b93DZU9z1pXJQiqG43HAOCKk3fKQ3OOl\nR7FVmy2Gpm75Rk89x3plD4yjMFB3H1p4m8tmYntxxUbHdARGNrZpzfcXPzY6igQLtAJTPWpO\nBnKnPrUMbFncsdq9cU8SEhGZvlzigCPyQ0nysd4PNTyRr95GC+q1EAzbyHHXtSTsHKkjOByB\nQA7DJGCG2qKZvDRkKcjOaeNzw8HdH/KlVVzkY+lAxM7lXAGe/tThIjfc+hNN2bZG44I5FOjC\nrH8oGMdKQCsqqpCn8KQ7ZMYOPahSsKAn5mbij5RkdweAO9MQ5l24BxgmhQjSEISif7R70fJu\nywxu4pGjAG0+vHvQA7n5uM4603a64I4B5+anMp3Ln8aXhXYEbl6UgGBRypG9mOTnpTjGqxjG\n4nPrRg9ztHTik3Md2wj8aYxeHK/LlgchRQHfcwOQepGKVkHyFm2kjqKY27ZuVtqkck0ALHcM\n2QSuwe3WnEhi5D7QOi+tQrCyxnIwSOlTKwMY+XnNAhM+Zt7kdc05Su7OW+gprHy8hhjnrSLI\nWbCsMYwRjmgCZm2AhCruei1Gr712ZLbTkj3pqhAxOCyjsTT3Q5D4baSPu0ALJJ5ec/KSO3al\nLr8gzvPc0MVZvnQ9eKbhGb5c4B6UgHq4UEH14pjOWXBbac84qRWC4AA655pRJuZhx83FMBi4\nZl5Kt/SkkiyCVzjPenKqwkgNsT060/evBY7CBx70hDcnBZF3hRk+tOBO7JAX1qNlG3O0jP3j\nT0QqrM3PHFMYCPgoCAGOdxpxYcMcEDjr1pvMcaJ1bOc1DIokkIA68mgCdl3bcnb3FSEBkznI\n9qikG6WPg+Xt/GhAkiMdzJz0pASK3QDpSMVT5gKAyr8q8n1pVKMpG/GOTTGJGys5ye3p0pyK\nwU/N8pPXvSNtycDaD3pWZ5SFiPyZ+8RxSEx7SqnAbPHNEe6Rdzbgo9KU5EgJAx0pZWC5+baS\nNpqRC7ldlw3OOlHDyAE7eKt2bW1vZmKWPc38LiqwYIsgVc84FVcYKN3IJ5+XmnxqDJ5bHcfy\npiyfu2H3pB0VanGOHA3t3OMYNAC+V9nbafz9KChVcKeD/dpGDSEl+eOmafDHtYDp7+lArEsJ\nG4Kwyf7oqQoNwTO1SajhYQzF1OT0p0zFguw/NnJNBSLOxtqqSGyOKl+yjzIwx2ZPLelVWbds\nwM7f4vWpVuH3llyUxzmkIty25hl2wyq4U5D9qtRrJu8xmUH19faqNuqtHtOQTycVZhjL5LN+\n7XoP60hk8OG3FcgEndWjEqtt2HPqDVG1zHE3PfjirTNNC0WU+90xSYjZt7fcoYAbupzWrpbC\nRS2Nr59e1ZcbbpVQnacVq2rCO4CqQCRg5qGM7XwrYrqF3GxG5PMUEZ9wMV9WWMYs7OGJeiIF\nGa+Z/hepHiC0DD5fNUlSPevpwfMfm5HrSAcvGM9akVtzAA896hZ+4FPh2tk8g0upSROrDkkZ\nHSlVVK5U8YqKMlgQRxTolDMVzjvTAkjO1DuP5U8fMACeaiPPAFScqAcc96TGKgIcjORUwUbu\nelQ8kjHBxUseJehxikwsSbvlGR0pu4NjjAqSYAKPypvlHj0pASZIjz3pPvKDijlsDHFP27VG\nME0DEwevahsbSB1pd23AIpdopACsfL25wakjYBSCMmmBVZuDTk+8QR+NMQ4bdvHAoV1yOKFj\nxnJ47UxSN2PSgY7aT83Q0BiP60pLNmhGAUgjmgYNjqaTHPzcinyRnaCfu0iLtXB5J6UAIvpT\nmXbyaNoQDv605cMM9aAEV9rAEZpkuFJPrUrYOMU2SPzFGKABctz2o3HdzSx5wBUrLlsYpgRF\ngKAPl6/QU5kAXJ6im7sY4zSExrK4PtTtxPGOKXaWGSc0m3+Ed6Y7Dc56HBo60qoA1LtzxzSA\njGd3PFSjAHPI9aaq5+VhzT9g289KBAyhj0/KkOQ3AwPWnLkcdaft6knii5QxJVBIPJokHzDj\njFJJFt5HSld/nQe1Ahu0scdhSsu7AxUsaEN160yQnBAPNDCwir5Y9RSlQ7DsKj+bjFSowJGR\nzQA7ftobDHIoY+2FohZVkznrxQOwi7uvalZtrdM5p/3Tg01oy3OcUBYF5UjFBfHHWmrJznPt\nThlecfnQFhf+WZOaRQM5zmk4AORkdak+Q42jGaBict1/Sk3BiSacq+hpfJKgseRQMjjAbJP4\nCl8kZ6gipFG1T70xYxk7aCBGXoRScquelSCNjzjilbAbB5oGJGxDcHtSYIPJ4qQKNpIGTSAB\nsZoGMTKsQac3zMM0pQANjnHNA+ZQc4oAcvyqeOajlwihjzS7gSQSagkkVcq7CmBPGwnUEdql\nwQMms2z3faHKnMftWoqbuWPagljRyucYpeNxz0xRH9057Um0txSKHBeQf4aPlXLEd6GBjOeo\nNLj9z0oAOJGBxxUuehxTV+6McY60+PsaBgFJ6CnBQF+tOLfMcdKZwy4PWgBwT5SSc+1KvyN7\nU0ZwCOtPGemKBCCQ9wQKcrCRhjhqN2RjFNb5cHFAydVJzzj61HkeZtp0fzcsdtSLHzux+NA9\nxsPX1apjllz07UixlQSOvrT0+6dw49aQgU7UBxTdq7uDzQpBT29qWNdxAxigYq+aW+UfLT9z\nMm3pUuWVeOlNmX5RkYamBGu/OMYHrU8fyrzzUe3cOuKkHJAGcd6TGLuypA7UvVTzg/zpSu3O\nOT3pYwdoB49DUgPjjJQbakb5TxyRSRs0akZyaaynhgetAEn31BFOJI/hz70xRxyOvWpFI4A5\noKHIpUDcdw9KVGKtz+VITu6HOKQLxySGosSSqcH5eOeaQqOcDJNRsWOVBwPWpIQqsB1Yd6Qx\nwVuuegprfw8fjUvzBuvFKqB8kdOlIBn8X+zT0TAJU9acqqYcfxUsaiPHOc0wGIoDZI6VKp+b\nCnNAwNxDdOtCyBl4ABoKFVl5O2kXczZ6r/KnqeuaP4Sqj60gASKucnmpvlOCB8uKhEYT5m57\nVLGRtx/DSAC21uRnNOVtrYxgetCgqhGcmnfKUDfoaYArBm9D70/BZcHrTdoYDnBpWk29F/4F\nS6k9Ru3LgA4HenKuWxmkVf3mPXmpF+ZWIGDmmOwrZYYxShgqjJNJGM8c5p7dNu3mgBpYrg9B\n7U7lmz0FGMYyMilVPlPrSsFhrS/MFFPdiBheDTVUK3Iy1O2nvQMVGfYAwzQH3cAYFO2noTxT\nWXspxSASRdrButSLyOuO9MGSo4qTyw2cGgBS3zZHShX3KpYd6QsAuR0pP9awOePWkBI3yk7O\nR6Ui/Mv86XhW4H40KvBP4UxDBt8zg4FScnjvUewKuD1qRF4yetAwwVGDUg+ZeOMdaasYzy2T\nQWKxnA5o6AORfmzmjcqghmz6U3duGehpzBY4wTyaAFjbdjAxg805dzTMSOO1LGdzbgOMU7Ab\nGetMBV+X71KF64/CjeF4Jpy89OlKxQm1lXrkmlVeBk5NIxzStjg5pgP6qecUmFGCaODlTxnv\nTsdMYJFSAnG7nk04DbjjFRlsn5jg0vPNAyTG3k8ikwGYGlX9KRvl6cmncZJRUPmNRTuTYeuF\nkHHaoo43WbJOF9Kl3/MAB+NOYZ7/AJ1iiug3aQx7mhmIXGMUhcfjUZU4LE/hTES+YWYADPvS\nlux61AZHWPjg0qkqvzcmlcCV/wB2AQOtDZ25qLzGPbIqbdke1FxiNtZR7UrYAHHWmRsJFJ2H\nAobLfNjGOlIY/Kjgim44IobLLnoaaqMhG/kGgRIuMY6mmtgn734U/wCVPrUUcYjbc3OaYxrZ\nzx0oD/MAW4p0zjd8tCooGSKBAzCRvlGKTp7ChumBxSM5GF7UwHDCH1BpvyhwfypP4hk8U3aV\nk9R2piHtGud3Sm8R5f8AnSl+ORTGw2VX5hUj6AG3MST8vtSqvmL/AHaZsKx7RQytsyelIQ7m\nNgB3pPu5NRRsQ208mpPJO/rkUD6DuZVwPlpVU+n/AAKmMxifA5BqVWLL14poAkZVXnmoWmXb\n8ox7VJI+1cY4qDhW3feNFwHeY28EcChn8twR3pY/fvSqypnI/OgQzaxO48jNJIp3EEUgbcxI\nP4dqdhmbJGadxg0i7cFTikRs8NSqv3i3Qc1F97JXrSGO27uR2p2UWM4696aqlTzSxMPmG3NM\nQi7V5Pemuu7GOAKbDJtkYHmpWxIwCcUhCbvSlXj7xxSFSmASKVgdpYnIpAJuVmIIB460nsBi\nmrjjAx60bvMbHTFOwhjYj7ZJpzZxtPenHqM8ikkbHBHNIY1sqvWmK/ynccGl8ztTCpC5PTPW\nlqNj15X2pNw2kcUFW4wcijbtYcfWmJDVO0cjntQGLRnmpmxnOOKY0e3gdaAY0FdvPFJt+XI7\n0OyquRx60oX5cjkGmIYxKqO9N5K8ninvgrwOaT/lng0rWABlQOcUbxjphgc0iqWbnpQ6sBwO\naoD5p+PNqsWvXMhGAwUnjrxXgWpK+5vlwmeDX07+0Jp5kvorhQQjwYZuoDAdP0r5s1gYV0bO\nQeKYpHG6xCysr4LAtg4rJXhmZT/9atrVJpNqqp4U5NY5mDqwUbTjHPetUYdTJu5Bub5d3HSs\nxhiIjOFPrWhcSCKQgEFuhqjcKjkLnAzxViZlT755cBgoX+KoriYMFHXHWrc1uGYhvuk1BNH5\nDBRyMY+lUZlNWMjMVGz61DIzeWyEEk9al+bkbcYP4YpWVsEn6/hVoCmiglVOSn602SQKDIBg\nDgetTLtYMwJUelUp/MC4AByaNdwFkXztq7scZNMdULBQMhuOnepV24Y4OQPvU1m3dQd4/iH8\n6aERyKY1GRkg4qtIN7HcuGz1q5cWs1rGPMkVgw3CqzODAScM/bmqEQMq8oeCe5pkkgjyqHno\nG7VNhQoJPUcikVI2Vg/TFAIjX5QrKeG5pgdQWwm4n9KDiFQUbcM9KazM7eWRjnrR1AbI48wA\n5U9j2prZRc43AjPHY06Tc2IzwQaa0ZjwQckccdKYDY2JYMPTrUKQsEYF8c5I9an3A9eKQybY\n2AHzHrkUwI1JVugKAdPeomUsrFsqBzUoYNtUrtA9+tNVl3n5/wACKYXI0DModhlewFIVYqAX\n2jr+FPeEsxK7l4/Cm7VLBDnjvSEN3Jgjfg+n9ahYoMjByamZFkjLZyQcZ9qh2mPcWYEjoKEr\njRIoSPKg4yMe9RmbOxOSFPzetWLPQr7Wps2sLumcEivRvCfwhuZv3t9A6BhxurnrVo09zKdR\nQPM4IZLtnSGJpHHOAP61d0/w3qeqfKYZFXdt+7+ma+iNC+GOl6UuPKWSQH7zGung0O0tyMxJ\nxzgDArzJ46P2Tkniux85L8INTnVQsRV+vzdK0o/gfqEhUtJnHUdMV9CbVwWA5HPSmxSBQSB9\n71HSsnjZsyWIl2PA1+CE3mk+czvjO3HFQyfA+68xjbSGUsMtxj8K+gXj25YDbkYyKcrLGwCu\nucfNz2qPrdToP28ux8yap8JdU0+DdHE8r8/d5AHvXJyeH9Rt/wDXWzYRuWwa+wLiOKRgCQFb\nsOR+dUr3QbK6+eSBAex2+n4VrHGTejQ/bSaszC+Gcyt4YtY1P7yNdrKe1dQnZsY/lXl3izxH\nJ4FvFNuGHmD+EcVY0H4zW99GkdxDtlxzzgH6+lckqMpPmRlKm5anpe4yPuwMUkkmAQF+U9az\nNJ8TabrMf7udYpVbBQnvWi0jSKdo3JnrXJKLi9Uc8o8pWuNPtbtWWaIMp59K5XXPhjpet5cJ\n5LHjLZ2j8K7L59oA4xR5hwQf0qoVpQehUZNbHz/4g+DN5aRTTWh3BTwrdSPauGvPDuqaerCe\n3ZAem8Yz7V9c7V3At8y/nVK40S3uWbhdrdRjNd0MbNbnZGu0fIAkWHKSqRJ/zzbjFR+ZuB+X\ncn+e9fTusfDXTNUkZ5Il34xuYVxGqfBO3WNjCzKuc7Y+ld8MbGRqsSrnjvmFlUKQT/dHWgMN\n24PyvFdbffCvVbC5YRBmjOdjkZArmbvQ9Q09x59s5DD+FSR9c4rtjXhLqbKopdSAE/MGOXxk\nH1oQtkkYz06VHH5kWQ0Uinpu28U+NhJuQDD9c962Uovqa8yYqyAMvvxz601sjk7vvYJFPQIy\ngN8238806RVk45BxnjpVjFC/ey4U9jUe4LkA7mxye1Ig8wZYgDpUjqW+UJtVRgt600UIuCy7\nhkY6UivJ90Ku2kVGEZ5yn97v9KVTgblX7o5qRjGyARtyP5Um4Oo2HjOMY71Pu3jnkkZ49KjX\n5dpC45yKZI4FxG6k7kXt3psaoI1ycHr/APWp+CVJzlickDvUbqsbkDhuoz2pCDH7whTkY/Km\nszKpXNO2s0mSMZHJ9aWRlZgQM7Rg+9Uiug3afL3bskimSblVDg/WlXbu2nOeoFDMXXAOOcYp\ngNVD8xw2+hcquepzg+tOjYhsEk4FN+43LA4PJFAhWYLtH3gRk+1PXYygH5h2GKi2hWIG7d9O\nKF83owAT2PSgQ9oxg87fQU2ZsR47t1p8m5icNwF6VA3ylSGy2PwpDHfKrKEHAo2tGxCgFjzn\ntQWywycDFIzdNp/xpiF3nc3PA5pNvmRh9u09adt28Fvem+YEypyWagBMnys9R3pxwQu07sck\nUMnkxkAHbjvTlztwuCPWgBrbWcsAQuKRl28L067s0SMVU5GCB1FIsasi7yT3pAA253f+O00k\nsxIk2g9VoHO5s807aeARhjzTGkxNplk2g4VecmnMDJ/AGx2NMfO5VBwO5pzEq/B4pDdxryBX\nDBSjdMUsjKqsEXA9T60qShlYOMP1z3pF6D+8e59KCRN22RQeGIxinfMNu07XDcilR/4QvyDq\naVkCysSR93KkUANyJJSwHJNIVYAq/HNKrHywuNrmkdmfGVxjrQA1HDxuuMEd6MlmB3DI4Bpr\nPwzqvJ4ApcrtAZCGPJx2oAUKMO24bR2oXLRktjpSttUk4wGqPc24DHHpTAFYYG1DnvT2xvwP\nl3DGKXd0ZemcUn+sYt909PagYhPsSAMU5gNqrnJPeiMv5xTPygc5pvCtgDA9aAHKpORngUDC\nsxOQcdqTCcszZHtSbtoUj+KkAZ2y8HgjrSlTnHUetEnzNxwvpTVdtrhRwPWlYQ5okjQvkljw\nMnvTmAdlBHIHNNwrL83GBkUz767h8rUwFYhpVw5RM4OaGP3iwyOgP8qZlmBDLijcW47fpTAW\nOER/KzDdjNOU/LkDApWXncgyMYzTlYmNSTgdCMUrjGc/3gWFITtT73zetBzuPA9jUbD92QBm\njURKP7hGT6CmssZABbaAaT51K4PzY608KkbAuuT1NIYMqfMQcn2oVfL6k4ojyu4no33aai5U\n/Nhwc7T3piBpDGeRweQfShcMvowOaerSLGCQCHPT0qPquOh70yhz4K9Np65FKrbVwvyg9zRv\n3xH5c9qP1GOBS6iCRgVKLjZjqDUSsZCSTz1FPnViU2gbc8qopWUNJvAwv900aiG+YGcfLkMO\ntI37uTOO3FH8Jbt2A7Ukqt5q5b5cZz6Uxj8fMu77vXFJuBYlF796Tc27OzI9TTo+OMgDGc0A\nxA26YMQfTFDbcnHzEcgUgm27uM8UsZPDAYVutLUBeflDYBY9KUMV3tn2prxlfvHPPagfJ5hz\nlWXBHakIRQwZ2FJuOwgcMaFUqo57Uu37jN/EKYCR4VlY56c4p+1GXGcNngU1R5bZB3gdRUm4\nH94owM9DQNEMilmVgcEfxCnR7lbAX5mPWns3yttwOckdqR2M20Btq45piD51B2qJMHBOKRs8\nE5UE84oTMbFixB6ZpCF2Z3E465pdRjjGhZlLbu4NNx8oYHBHHNOaRWZcDkUisJN38JB+7TAR\nWU9F5B60LhmLHCgcmhYzu2KnJ5NKMKGyOM0CFG9QG4wx4pokEkxBGVX+H3o3L94jp27CnKwb\n5sfN2xS6i1GqD5mceWvWj5UmDp8wbv707e0kihj+FJ91io5569hTGKGWRWym7nvTUP8AD91T\nQUbcSDhPWlkVsId2FJ70AIzMxGRhBSyHd5bD7oNB+8yEZ75oxuYgHAB6dqBoSSNegfGWzxSB\nXVmJ5QdCKd8qsSeSeAKXyx0VsD+tAhEw6gsfvdhRGpXJZVI7Zpv3Q64yxGQtLu4+Unb0pDFj\njG/cMjvjtUnJBIOOc1H93aFJIJ5p4Y8iPgevemA2QscNgBweQaWR90e7GQf0pyqCNuQr+lQ7\nzGzADccYHtSEP2/MNgwg7+tKqb2wDgd6arOzR44GMEe9Ob5mYE4PtRqBHGVaR1AIUGpjtWPh\nSHpi/MxcfKMUo8zIZTvXuKADByD94tQMoxjC/PQdqtkZ3deKFkzk569R3oGJvBfcF2gdaQyc\nKdu4E4LU8MMAINvPeiYKE+7uVecUwEG3CIEbGc7qVupBOefSlXKwoVyV9KVc+cxUYIFSIdKG\n8oMRsz2pDGrouDhs0m4NGMkkdcUjcruyC3tRcBVZQxLjaCduKay8YxnBwDSozcll3D+tCKVU\nL9TzVAKvK5Bz2xRIu5k54xg0Kyxrkjgn8RTnJ25X7vegBki7WVC3fikG5WbHP8qf5akAnq3Q\n0kYd8kN5ZXrmgBGQdX5X2oVSi5PJ9RS/Oq5Y/Nnp7U/rSuMahWRmwDlRmnee0iEFsBeR7UmD\n2GGFJJH5u0IuO5pCAt5kIPJYHP4U+PG/Ptk+1MaR1kG0bVHpUiqW4P3mOeKYwVmWQ8ZBpVXO\nUdgEHzBsdaTG1mBz0wDim7dq7XPmLigQqqjZkjGOM4NG4mMMeVz93FScDPGF6CmswVSQOnak\nAMo8wDdtLDNG1lYn7+ByRULZkckj58/d9qfBJy3GVAoFclj+ZWGcZ5BPakkjCxrGeVJyWHrR\n1VxjDDkUrZbaP4sU9Rik7+V5xxk0KiytyMYHSgxsse0H5s596WI7V3OflzTAA2PmTB9VpVYM\npyoIHOcfpTHgwwKHEROcU5cHcfmB6dKAGAoVO8sDnH0qeNjHGFLbou3YikWMluMY64p7ESPg\njK47UAPZtoLDk46VHIysqsPvd6crHdyMDHINRqyshYAhScYIoAkRWkOQ2D2FSyLtbcwwT1Iq\nBdu7IYg9sVOpHIDEZ4NTsMGXcwKHANSYywA53Hk0238qMbEPGeB700Yi4LcsTwO1O+gEu5PM\nZVPIpVjbk8kH9Kbt2Rjoc8k96mQlfnD5jx92n0JsSs3kqqcMW6U5IxIxeMbdvB96ijVX5IPs\nc1Mjfudn3W9qVxjiACvljnPIFWbeMYZRy3XFQiMxyZHpzzUu7bMhH3ccrSAtRp86ncCQO3FS\n28IiYnO5m+9VeSIsxOdrEVNbkrbBWbBzx60AXIyxj2cZzVvzjMvYMo4rPWQblJGOxq1HbGRy\n6Hd7VAG3pMYMYeQ5k9617JVklwV753Vg2e5owzn514xXS6esbKFTLNnBGOaljPZfg9o8eoeI\nCzruS3jDqc/xZGDXuqyHjvxz9a8n+BWmvHHd3QOUIVeeua9Z6N6Uhkm3gDGDUiHavAzUauNv\nPJp6nLfLwKCx6j5iRx7UKBu9D1pY8qCSKbgM/XBoETbdyZX1oYH15pQ3l/LnbTW3dSeKlj6E\n8cQl6ttNIo8tjgZA70its56inKxZf9mkLUlaTdhuw7Uu8tj0qONgy4A5zU+3awJ6d8Uw6j1+\nZsChThtppA2GJHSpU2nOetJlDeGJ5yBTuC3FNhUAlecnmnnCttpMBVIQ4xkmnhGPfimZ284z\nUvO3PSkAKoReajaMsSRgVI+G460KN1O4JDNoJxTHXOe1O5Dc96kbB7UgGKz8DIxTd+5xninM\no2gDNPCZ6jtQAhXPTvS+XzgcUoHHTFO2hjyaAGrGST6ClGB19KcrHBXHNN2YYZPaqQBGuRnp\nT3BI4NKy8Aiho/LPHzUDG8dxmmbct14p4+vJpskZC/jQAFRtwDzRH93kYPSnxrtOTyKOWYnO\nKBERi2DI5pQ27HY08gj3FP2hSAQDTEReWW5pdwXAIzQzFe3HtSttbjNSNCx4B+tDLxjHFIrD\ndUhJIx607gMbOMelIgAU560vllTljTli3UriGIcrk09kVvam+XtHHJHWpDhvY4pFjVXb2pVU\nlSOhp6qeMnJ70pzzgZqgGGL931+tQsoXDEVY2sFOT+FKGDLtNICH72AeM1IF5+lK1sGbdnFK\nsYUYJyaYEO0ZyBzUsi+YuRSqpGMinsu7p+NAFdYzsPWkkUtgAbanbsFPNKcHk/hQDI1UHvg1\nKrheDyKY8Y4OT+FSRwkD5hgUrgMVdwJzSBTj3qVF+9gYp5UNg45oFYibdtpFTcMmnlfTilVN\nvWgLEaqVyOgNRoreZktgVa5bpSNGBxiqGRhRt5NOaPcvA4oZcKM0/cP4aQFVl2r71GtrG6ne\nc+9XljB6jPtTfJRmKgfhTAihtVtk2joe9SKmT7VIVDfKKFjKnrzQKw0R7uB1p2zbyDT/ACyu\nT/KiRQ3SkMbxtGOaQZ2k4xzTl+V8DpipOenSgZGrbT0qWPv2oChT0yaV1xHQMQcnHSl2nbuH\nFGcr05pUYlgW6UuoxcdOfrSqoZiQ3TtQrDc3HSnQqq5J4zzQLqCjb7mhlEnBGMU4ofvA05Pm\nU55ajUAjUE88AVKqbs44Wo8HoBzT8NtwTj6UxCrlIzmmK4ZdozilLKwxkg980+MqowB+NK5Q\n5FO3gcd6ceW9Kd/DkH60xssKQEsbAthhnHakH7yQl+DTV/1gPtUm7cSxXJoAVsRqPrSq3yHH\nPNN25APUZpdpRmI6UAScqwPTcKR24xnmm5cuOMipdhfnhT3pAKqO23jAqQr2A4psKlELM2Se\ngp+COO1MYmBtIB5ohDMvHApRjftp5wykLQAqqVYZ557VIxU84zTFUrtFK5DYXH1NJiGeWXcM\nOnpU6hc4HBpFj2rxRGw3gFc+9LqUObCjBbmnxk+WcDFRSAA5A+tPT1xxSYth0JLsSyU8Ln2w\nab8y8DgU0SHcfSgCRsZyBhe9LtXdtxTMkrjP4VIsePmzk0BYXywrA5yKduHXBz0qLzfkIx0q\nWNS21jxQGw9dskZyeBTUTCE5p8Kncc8AnmpZsYAA49qAuRIx25qT/WKCBimDKjBFP2jbw2KB\nihew5p7MVYYxj0pFxCvrTgRgEjIoAFXqe9KFk5YYxTlVY8knINR7tp+XgelAChjyTkfSnCTa\n2TSNlcZ5zSKoByW4PakxjwrMvB5pfmAx3pFYKwwcCnt/OkxAu4KfWgdlY04fdHemKGaTphfW\nqGOf7wB4pxX5vb1peFXJGaam7cQw+lIB7bugFDMuMAY9adG55yKXA60CQwRDB+bmkUGJcAcU\n4jy129TSlSYwN2KVhgoLLycZoUHpjvUb5yo/WplbaOetIAZhkDHNLnJBORTWGcNjJzTlYszM\nOmelUA4feyKdxyTyO1N3My8jrSAfKc5wDSAkjVcYNJxyMZFOVR94UbgM+lADUYK2eQtOPqOK\nSNd2TninpjbzzQA4hSoHenbgMED2qPcC3TGKfu+XdwfamUP+6ucc5puVaTngUm4MoLZx6Uiq\nFbHTNICVWG0g8+9Lu/u01jtjPGaWOTcoGMEUDBgXOSKSPduO7vTwxIINJtO7NUAu4ZI/Ol3g\nK2fSkHTOB70jDcvIBqQKn2w/3aKs+Wv/ADyopAPWZRIF70/hV55pg2N82OaWOPbGS3OayKFj\nZep6dqWNTySM5qLeMBT1oaR4VA+9uoEPZVbjoaGC7cGkVtwyRzTFB8zJ6UDHw7dpGcUebsUj\nGTUZYeYQVwTU67U56nFACKwwM8E9qXcGB7dqZGuAWzkmhY9mRnOeaYCs2VBHSl3FqifftOBx\nRvU45zQkg2Jt20jK5NDjauD1qMYZtx7UjyBep/GkAhXcue9Ej7FHHNMjk3cq2RSufuq3BNMB\nGZmUHp60wyNycU/PUE1E+7jPApoRMMNGGPUdabuPO3k02PcFOeRSSOeNny0XEKzbV+Y9ajWQ\n4OOB61L5fmqNx+lLH8qlVOfUUhkalmUEnmn/ADjIbpSbTnI+7S7ixG3nFMQxVzIcHmm725Ck\ng1LHhst0NJuySAPmqRiRqd2TzSs4TIpwBH0NRSLtkBAyKaAbIxxg0oZEjznIpce2c1GI9ikY\nzSGPZtyZGQKFwygdTSbwq7cY4oZtqrjrTAcuBlQOlSsT5eM4NVmTjrjNEQK5z8y0WFcsxqMb\nWOahOIWwFzSRsZJDg4FSTbSvJ59qYEDsfpSRsy/WpFjRl9TSJ8rE44piHxovU/eoVSpJ6U5Y\nRt37ufSmt9096QDNobcW7dKYrSbemBUiqVHXtSbgcjNSOwu4jA25HrTGYLkj1qRuOCNw9arq\no3Hd0oEP5VT3pFZixDCh2Kv0oaTcucc0hiZDcAc05sbdowTRt2LkY5pi7VyxPzelAhJGZmCg\nYPtQ2VXb39aj3ESZ9aCX2+tMB3mFVyfu0eZuOc0xP3mR0FKu3OD931oBiNtYkdqO2BxTVjG4\n4ORQRtIJo6iEX5PmJzTvM4JxSYK/eHBppyoyBkVQAJC/y5wPagOV5LfSgKB83akkdQwOM0ri\nPMvjvpay+Ho7wEqYzsIHfNfJ3iKNRITHwWOTX2h8VLMah4NuxgtsIcj6f/rr478SW5jaQL8u\n000S2eeaorR7t3PNZAzGwIG5fauh1BhcQt6isER4YEH8K0iSZUs0ckz748HNZtxCI23buM1u\nahaxz7mT5HHNYt7l4ycYYdGrVESM+4dmbk4Gaqzs21vl3KvUmrLxhlXLbm6tVeZgwKA4FWZF\naSRljVQMK3Oe9QS5YgFiwPG0VOzfK2R8o4pjZtl83aGB4pjSKsnzyJHnaoGcCoT8zblHQ805\n1/fjsuPSlf7p2DCnvTJIZHwpUEAt29qZHchskjaRxnr+FNkYLESwy2eg71Gs2HCKoUnnApgO\nvJzP98Y2iqu7GB1z2qZY1+YtkDPSoY1XzNucY53GqE0O2jdjbj6mmcR7v4s9fanMyMfnYlSc\nZoXHmMqj5OxoAhXDRygLnHb2pjkMpBOBjp6VIcKzcgZ9TVYDfGwZuhoQxvlvGwkZ8r0FSR4W\nQgdPSmrho8dcHiiPajFw2T39KbENkYqwYjAJxio5pzJJuUcrwfepWy+FYZ5yKjUbTuHTP60h\niFS2dhOcemfwpmzy/TKjLEnmplkUY/hbOT/jTXVVZmHJb2qkBD5m7O4/J14NKuw84+TFG1Qx\nBGRjJPalIUKpxweQtAiAKFTAbv0qrcDEZJ5Bb8auMjFTjnHOKjdUmZW2bU70N9B9LH0X8J9D\nsJfDsciqsjAA7cdz6138isqiNVwvTjtXinwL8TTJfT6Yy5jZQVwe3SvbpFkVtuM+lfMYq/M7\nnkV01LUhVRyV4HenM0ccJeRto9SadLIIbdpCvKjlR3rxL4hfFBpI5LWzOJASGYHpXNTpSqys\njGEeZ2O+1z4j2GixyHCybODzzXmOsfGy8EzKgXYTkDH8NeY3WpT3cpM8jPuPrVeR1kZTjAx1\nr3IYOMPiPShSjFancXnxY1dnJWXy1bptPQVRm+J+tTMg875l5Dg81x/lEDJYnJ4JJp373l0i\nZj93gGtVTporlid3D8WdUVgSVlm64I4/Kt/Rfjhes5FwBKoOfuhR9K8nEckODLHsk9PSoppD\nHGpkVuvCgfrSdKnYlwiexeMI38faSLiyVjMBuGM49wK8sntJ9PfyLyKaOUcZ5xXuvwi2TaDC\nTghuqmus1TwnpeoJIHt1MjA84rh9uoPlRz+09m7HzFY67c2EwkikYyqcZLEAgV6h4M+LsttH\nGt9J+4LBSzc4/wAaXxN8GYZLeWW1ZklAyvPANeTX2nXmlTvBcBo9hx83RvcVp7ldG3uVFY+s\ntM1yz1qMm0l8zcMlT1+tW1BTcCOegNfLXh3xtd+H7vfbs4X7p3Hp9a9r8E/FCHxE4t7t1jmX\ngehH1rza2GlD3onJOi1qjuWUqvXijKryep9Kesi8sNpQjIGaiZgoI25J5xXJy9zn1BpCZCrd\nO1K0asASvtUT527s4HShN3TP+FPZ3JYz7PFuKkA9xxVe60WzvEIuIVkPbI6VabPUcNmpl55J\n6VfPKOqBSa2OK1b4a6ZeR/6vc55LAVw+p/BmNlleB+QchWypP0Ne0rvPcYz6U9kUYGB610wx\nE4msak1qfM2rfC/V9MjMkQ84sMhecfTPrXI3VlfWcxS4tnQgckD9K+wbi3jnDK4464PSsLWP\nBun6kyO0KsSMNxjNd9PHW0kdMMS9mfK5IeMJ90/7XBp6t/DnLD8q9z1j4K2WoM7242gchQeR\n/jXM3nwNurdD5Mm3dyu7JIrvjioS6nVGumecTNlV4+oprbUYuz8sMYrc1LwLrNn5gERm2nA2\nDJNYNxp15bW/+kWsiOp5OCfwreNSLe5tGafUb5wiYDYRxwaQSGVSCOPWo97KzKw2/wC92pYd\nrYw2O+fStTT0H7vmVcFSDy1KcPvPYcClU72MinPbmo0EnmOWGDjlaYhu47cEnpjPp7044jVM\nNuA/iHelVgY+uB0LetRNIkZBw3HGAKChVbcWyQWzlTUbTbv4dvq1SJtYbx980nmNyqjI96oQ\nvHB68Y3UZbgBPlHBNOWRFmAZckjt2phVWUkMR+NIB/mcHpnpTFPILHJzzS/fVQqjbTY8+c4I\nwCPu+lMXQGkOSc5Ynr7UseNxBj+lOSP5MEY9DTZnK7Pm6UhDI2KKwdMnOAaVX8xtrYVugxQs\njHd/dxzSCEKytjBPSgY5lIOG6Z5NLuG0HbuweDRuwrADcem01H86sN3ypjlaAHeokYgH9KRV\nCq56DOcUPk47ADvUiyGRhuXjGMijoBEI/NRgTjoaa25lyvQcVLIOh6r0zUbKwZQp4NADmXrx\nyRTJFPloDkydRUiK+XBHOMA0zzPlwckrxu70x6ileCOWIGcChSVjAI3se3pSbiI+v/AqM7fu\nHr3oQCsuWXv2JokPdSMrwKI1O4gEcc5pFwxKgZLfxUhAVOzOcFsA+9CKqSfd3KBwSacA24IP\nmI60xsR+YoBJPIFAiVn3qrH5TnmmPmR878kclR6UnmR7QDkkjHHrTTGXYAv064HJ9qCh2MME\nIG4cnHSmrKNp3AjnrSMRt3xjjoadzt4OPQUBYjYkYOcqTT2cs29cZXgim+WTweZAc4pMbEkG\nwqTyc0Ekm8cqqFl60RyfvNuOGHTtSq7xrGSMHFI0ipICOp6UxjVj+RiThu5pdxZdyj5Rxmhl\nG5SzYHShFA3hWDY9OlAg2rGxyeMUH/U9OQaWLDZJAahlRl6+WCcUgE+8vy8gkChVWJmTBb1I\npyxiPccg7R0pBs4Ygj6GgBNxVcoM8/pUcny8oMc/dqSNQqtIG2A8Babj5lcHI96QDt3y4c7a\nYrCMbupHSkkxuw3zZNGQrNleAP0pgOVvXkt2/rSYyjE/IRwB/Wk3rIinaRgdO+KUHcuB9336\nii4CKrZVTwzclqcc7vkXAzjmjOCPm7YDCjG5Bk7mz1NADPL2RurNl85+lOjG3HmDK+tChQvm\ndweppSpbl+OelIBGZVfYDuXrQ0fJYDdn9KMhQODjOOlHOSGJ3ZyMUwHhjGAuckdvemyZP3zg\njoKRfmUkj6DuaRsTKueCOaY0LloxknHsKVsx8k4BHSkaXcGGOvtUfTDMjOB70APZnVcA0pI2\n8HI703dt6AjPrR/q1IJyKQDWy0OIx35NPaLzHUg4IHNMBUpt5B604/6xHQ/L0NMBdsnmmPGV\nI+8aiWN1kBPCL2POakMbsWYPtHpSRswjDg/NnHPWgBz53N8uPX2pI9iwkE556Uih485GQ3NG\nzcwO3A9aAY7Bibj5uPqKb5e5c7ty0IwjlITpjANLwrElcAjnFAhvkfu1KfNg/wCRSqojZVOT\n3HsaEYQrsDbVP6GmSO6rngr3b1NJjHrlpQMcE5OKUZEzA/d7YojP7verbie1OLFV55HtTGR4\nC7hu59KR22lV/ixnFO+8ykDjPJ9KdNbqsm5Wyw5P0pEjQy7T5hO3sKAqyA44GOB60pcErx8v\nUnvT2ZZFxg56j6UDIkVo1U9D3qUruUEnnPWmPtXaV+bPYdqRmJYbzj0pgKS6u0qHI6Uj58sb\nutLgxrgA8mmONrcncx6CgCTjrt6ik6bTt/8ArUi79uTxjtSy71Ckn73FIBCyXDkkGMr0oyy/\nKAee4pZJPmUbegxSbm++oyOmKNRDV3Kp3ElfSn8TYGMAdM0K77eBjnpQ6liOdvfNMADHaVwR\nnvSFjuIU43U5v7275enHeozGdxkRuh4U0DFcjzQQOOlLtVJj1bd/CKdhuWbhKau2P5uj0CFb\nG4ELvkXnFBYsytKoUHjGc4pqttY56etPWNljG/G3PHrige4jZUHacgURvuzgY9BRwoPOW7el\nIuGVt3GOmKWoIF+WQFvv0EsmSRinQlOrHGfWidhH8pG5OtMCPcyrtPyselOX930+Zum6kZx9\n7kvjGKI23JtKkYNAiTdldvVscilRQjA7toxUfHPrmnCNN21xg9aQCt8rBs49MUhXBLHDH+VA\nYBgrcMOR6UqsHUsT37UtQHrzyMAgdKiMhdRtPyng0rsV5HylqEYY+9sx1pgLGxjUN94KcAU7\nzn5BGFPJb0po+Vwu4knnNSL95lPPegCJX28qQU7VI2VZQ2COu7FNUtJ8oQLTm7BuP9mgAViq\ntk8Z79qRZDu5+YdRQzq27KU9XBX7uMClqBErGRiGOCDnipEYeWQ55Jz+FKuGYkfKcZz7USSK\nGByGXHYVQDdok+YHavQUkhPBB5HX3o+WNsdUIyOe9OQhpME9s5oATzAu0k4A6UKd6biSvPAp\nyqJFYrjP8qX7y9cY7CkA5sr8zHHrUa/vCcHIFP45yzMMelJ5LMAAdvegA2nywrDHOc06NU2n\nnOenrUZkZXUyHb2A9qs7d8gYDheelMCMDAyThV65PNJGqspIG4enpTmxMrgjac5B9qZv+Xac\nqQOqigAIbjK5BNJJsyV++fQUjLuiHzcZ60RRjcQD19KAHbG2g7ec0ghYFyx+Q8GnrHIMqflb\n0pFVmXDMME447UtRjlOW4+Vf71JFG6xvuPIPXvTgnBQngfrSS5Y7iwVgMBaACNssS2enWho3\nwvYE8CiaRygwOF5OelJI6MokUswyPw+lMRJMWkCrnaFOB9alhV55FDHbjqRUSsdxYkFcYA/r\nSrI7ZTfjjI460gHyLICw3c5607IT+EqAPxpkWdpZjuxSq/2jjOWouBJtkkX5SDnqO9RxxgsQ\nX59qkh3IS235umKMZJRCB3Le/pT6XAWNflxnaQfvGkWQR9QWBP3vWnlW68dOT6UAB1CkhmHb\npUgIpVcnb3zUzbFj3qPl6j61XSXa23B3d6njZJAoxsAOcUANjhdtr9Dj7tOReAVG9ieafGvz\nby29M81JGvmMZNu0g9aBjnG2MIuc5yV9KljYs5HZRmo9u6feThe5zS7vLkGD8pOM0AWI4xJB\nvyeW9alb5ZgpHH6UxYsOEB3DOafcIWU7Gwc8jvRqBKlw6liRuUA4qSE7l5Xk881WC8bQevWr\n8UaqUYngcUhFiQIuwkksentV6zYPGXV9p6H1qtCySPsGC2e9WrSPy5HQrlqkDU01W2sXw3P5\n11uihf8AlkuCf8K5PS3jb7yFmLla7fw7aiTLqfmTv+NQx3PoL4O2bW/hgyngvJ6/Su+zubpn\n3rnPAtisHhey5G6Rd5/E10ikqcZoLQqsGbkVJFlVxnOaiGHX0NSRkjjFSBOzcDaMgU5V2sCR\ng1GrbTjqKlj+Ukk5oGP4kb5jk0rjPAPFN2BSMc5p235vakMdw3y4+tPjbb8vak/hPalgwxHP\nNACqgjJYAjPWpwdoBPNCjqvWnLypGKBDWRg3H1p4OcjocUke7BbrT1HzAke9SxhtK8gc0uQG\ny1K25RkflQQNnzcGgAOR16U8sW5B/Cmx5CgHmnLHtbrjNAxqtgtipN3yjAxTQAmcjNPwMYJo\nGLtIjzjNJtJXOaTJX5QcinMPl45oJE29OOak9eeaYGJOAOKc3K88GgYpYeX1pEUfeByaVV3L\nxxQOOMUDHNIMjA5pjKZGzip41BbJpzyCMgEUAQLkcGnZ4GOlOZtxbA4oEYaMc4oAjkXdkJ1o\n5aMD3p8UY3ZzSMuD1oAavUingFmHYU1VIyfzqQfMuO/ancBdpPXpQ0eeAamXpg81Hj95QKxD\n97IxgjqaFX5h8vBqTBCnI708fKuD0pANMKhcmm7WZuBxUrL5i8Uzy39cUDE4PBH1p24M21Ri\nhuFAojz5q54oAQxFe/1psn3gD0qWRSGJ6imr8/UUCI1yrHuDUyncoOOaGA6Ux1IX5TVIY9gN\n33STTGA4PfNPVsLyeaY7Dj2oAcy85HSmsCxyOnrT4uOvQ1I0e3lTSuMbtO3g5PvTgF203nq3\nB74p5UbflBNMRF5fzZUZqRo9ygHg0zaQ2Twe1O8xQuTwaTAVoyowDS4IXHJ96OHXIORT/KZu\nhwKRRGCVbPGTxSjd0607AIxj5qFO1TQIaoHPOTScsfanLgc4yaXhhjpTGNXO8cU7dxwcikIK\n9Pmp/Gzpg0EjVxuHFC/fPy4qRSPTNN5yMdc0ALgZOBzQvysTjJPrTgpDc01m+brTARVK5zwT\nTVYbTu60/O75jTW56cUDFSTdjipUbKkbaSPCpkDrS/wcdTQMREVl54NKSN2OtHlj1oYKOBQB\nLGu7nGcUrqCuTUasY1yDTy26LGOaAIgx9OKXaZBlfypw/dqO+akj+Xkd6QxoQ7gSMDvTpGRv\n4cVLg7eT2qLaetMByASIcnAp0Z6jFNOVHHenK8alV3YbvQA/O1d3ek3E807yxyByDToxjOaA\nImHy8YzUkasyrxTljG7ceRUm75TjigBFj3ZWnRp5fDEE0Q43U5VyCTUjGFd3IPSlC7mGeFoW\nRI1b1NIrd+tADtxjODyAacjBgT2zxSK+5j8vB61NGu7gLgUCGIG3+gqXjqTTmAVaavT2pMqw\n5drDj1p6sgB3c46UkMRA6d6UqNxGOc0CBdzEHpUzJhcrwe9MVic8VJuAZQTxTATPy88nGaI2\n3YzQ/wArH36U5VJXPepYx0cm5vu/KOKfDwCW4GajRvvKeDSuyfKN2B0IpjFkUBiOopQwjjCk\n9enrSNhfXFJ5YYhqkRIZDlV25OOaTaZDkHAHamK+Mg/ezU23vuwKAAYMmWpRlWJHQ0LsbkDB\n96kj/eHpwKBhFCrLuJ5z0p4O5euPaooFZXkJ/AVKuOrjFBI/d8oJ4pzY55yaijzvZjyoFOwC\nwweaQxVQnqTinJ+lNXcrYNHnfMV29+DTGSMpYU+NPlO48U1CzHGcU9nSPJJoAPLyAp5pcBZB\nkfSoYXMnOcCpmj2JuY5FFwELFshuKMjhQcUq9s4waXaWbcDUgJtDcGnn5uOgpFVTyRzTuBgg\nc0gHLjbg0eZjHHApFyxI7U/ZGVyOaYCBjjB470seWyT+tJtHWlYhVxnigQsZYdSDS8rzTFU9\nOlSZzQCEOc5PSlVfMGAOaU4zyaOcE/dFIoZ079Kew/ixQuOhwRTjndjtSAaMlTg809W2Hgc4\no8vZk9c03bkcHmqCw/5hjPU80rk9B0pGkZmD9QBikJOcikKw8A7Rzx3p33ug496jV/lJAz60\n4N36LTGKAFyCeKeijkDp2pjYVT/EaaGO3OMUhWH8nJB4p0Kk9RwaSGRenc0rTFc4G4DsKYyT\njp1pW+9UVvMZI97xmM56NUu7NMYvSTPaljO5iQKbuA7U5evy1I7i7tpPrSfNxzzSberUKvdj\n9KdxDhluBxSAKxwRk07vkDNRLI28/Jj3oGS/P70U3dL60UBYcoGD70u4Koye9Keg4pFXnOKy\nsMGC7x7daazB2z0x2p7Z4NM2qW3Dr3pDG7iu4468U1lLIA3BFTMwXHuKi5Zsk0CERQzbu1St\ng49aI02x7cUrsMAAc0+odBVU7jn7uKar44pWkO3GcCmPhsHtQA4hk+YtkelNVUHG3HeiSTuo\nyKdtLLnvSKFZQuPftUcykR7ccGnYEzelLtHbkUAVl2jgdfpRIy7h3wKnuMKmVAFQctHyOaYD\nxhVy3APSomk+TawPWnGQuyKw+Wpio3Hufei4iF2KoAvWk2jGd3FOYj7wFR+cWbGeKQh7ZZRg\n01AseTu5NNjkLbh3pnlhiGJwaBpEnmN5uOi0vmDadoIPrQFDjkc+tG52+UDimIFbyx608Pxv\n4FQq+1yCOaDmTGOBSAtLINucdajZhyR1PWm4bAUcihYyV5NUMFbjgfNTZF+XH8XtTt/ljOOa\ncFDfNng1IFfaedwoU7WVG5NTunYnpUTR5kz2FNiHyFVbn8KZhwM/w1M2GXAILelNUYXmgLEc\nY2oSB1oyV5P5U3cV6cU3zOrGgOhIPmzzg1Gdy5OeKVMgZPenRqwX5xkU79CQ3FeDzTmY+XgC\nkmkXaAoyaQtvPoMUgD5mHBAOKZHIGk2Y3Z70xnKLntTlOGBUYPXNIoh/tGEyNCj72B59qkX5\npMHvyKTyYmYuEUHOTgU98MN6rjFAuoA7Wy3IprMOexzSqw2kHml+VupGaQMbId2MGo3wefwq\nRcK2KY6ndnqtAhG+T64oVzt4pducHtSdOegpgI8ZK8HA70iH5dlPDBe+ajbr8vWgYNkNjFNk\nZg44yO+KRlZuvBoXMZ6/nSJ2JWbJwRUbt8u1elLwWBOaGG1eDwadxjcHaeahxu7Zpz8McGnR\nqVxk0B5mZ4miW48P3sDruV4zla+MfGULLI4Ycrn+dfcc0AuoZI9v8J/HivjL4mWT2uuX0QBO\nHI54wOapCZ5FdYXJjBI6EVz95vSQhDha6u+hCoRtwD1rmtRhETfKcoOQue9aIzZS1BSsBK8B\nuKwpldowh+Vfetu7aVrNSxHy9VFZt2guIQFHJPStEQZdxGIwCoGcYrPmjO0gj5var3kuxkDE\nHHSqu07mDllJH3cda0IZSuswlVJ3Z6gUxm8yErjjPSrEgVmbaD8gyQaqYRoxIp2E1RJDNGnm\nffLHHSoGlG0Kfu59KkkyrBRyT7UjMZMrs6elAEDIGOV5H92qsj7WYkbecdKsMrNMqgYPU+lQ\nNia4K78Acbe2aYiJJJNxGfk7n0pGkCtyd6/SpdjbW5Cle1Rneq5Vc+pNMREw+Y7QUA5FSQyK\nUYk4P92pc5XDN82MHFVmBh2iIBlxkimMjkjaP5wN6HqTUkaorDC5z+VKzbgBj5T1ApADHkk4\nUHikIgljZd7Dhd3GKiWYBtvY8cevrU8nmfLn9OlVcBWIEeNx60xEsMf7vLckHrUSxrIkmDjH\nPJp+4vIArbAvG00xlR0ZevOaBjNqNGoOW3dT3pyyFYyAMhRgAnmjzDtDAYHQ8fpTWc52qAB3\nbFUUMKHHIOKmi3SRrHkdeaSSRt27blafGBjeePpQBAu9vNymNvAx6U3ny8lPlHAqRtyoSx4P\ncU2R2baoOEPpR5kGz4N1qbRdZt7iLACEfd4PX+VfVWl6gms6bHdoQCeuDmvjmPdDITGdrenc\nivfPgv4mW60p7MtlUOfLY814uLouS5kceIhzK6PRtSjM1u3ljc/93OM18oePLOax8S3PmKV3\nMdwxgCvriTGAowEYc/l0rxL44+FmvLNL2JMTgBS1ceFlyzsc9F2Z4vHglQRlScAjrXZeHfhr\nqOryIwQrbnksR2964q23STRKBtVHGc8fjmvrHwKEk8MwNFtKqAN396vWxNV0o6HZUnyrQ4jS\n/gvY7kaZmI6lcZFdOPhjpEcagW6kDp7e9dYv3sMRjHHFI0ix/NIyogGcngfnXje2lLqcLqSZ\nyLfDTR5Y2Q28aju2OTWdcfCjSmiaKOBQnr1ro9Q8Yadpsh3zx7D6Gq1n8RNGnYxiZVfOA2eK\nalVe47zsYcdnB4BtRyrRKR93itnR/Hmk6xKqrNGGPH3uhrmvizILvw9I8XzBh8xXkY9a8Ett\nQksdmx2Uo3bv+Na06LqLU0jS51dn1/J5cinYyup4GKxdU8I6fqkMqTQJmQYb5QTz3HvXj/hf\n4xXWnLHDcnzIM424ya9Z8J+NNM8U7xBLtkX/AJZsQDUSp1KbujJ05U9jw3xv8Mbzw3etNZf6\nRAwyMDrz0PvXE295PYzMwLW82fu9MV9j3Wn2t9CEmjEkYP8AEK8e+JvwlSWRr2wRd0hydvX8\nq6aWIUvdkdEK19GR+BPjBIkdtY6iyy8YB7/nXr2m3kOsWfnWrAg9RnkV8hSWrWV0YrjdDLG3\nRuDmu+8D/EO60e6hSUvJGrdQeaK2FUtYBOkpK6PoJjuwMZIpFLK3Iqvous23iK2a4tXVsjJG\ncEH6VdZCinJzXkSi46M86UeXQh2OWJ6/Wl3cYFOXLc7dvFMHLFlO32qSQkZV+UNg0bi23uaQ\nrnJJ/SkXBjJHWgY6S46qU59aA+6RRt+TFNXC9Ru4xTlVdhw3PagQp+TOPlFOVgysGXdxUTNu\nUd/Whc4JHJ6CqTsHN2Els4pYwTEqnr0rK1Tw/ZXELFoV3kHGFGM1t/w4JzVXUt0dqWUZfHSt\nYVGnuawk0z5g8faXa2OtymMldxw6qPlzXMbFj3Z6D+Ku2+JFqF1RhJwzNuJz+lcc2GUgjqeP\nrX1NF80D2KbfKJvZ4wi5yG+8KVs7WDKQh6nNNSQ7WBG0luTUm392cnPPaug2GsV8hPmxzj/C\nkj3yLIOAVFKQPL3bcv8ArTSo3HbmMYzilYQjMSqlUxiiLJkLA4U9fY0q/Njn5fX3o8zadpHH\nWqKGmT5jjjtuNOVGkjzj8aQMHXBxtzwtPm2owwMf7I6VJLG7vlChRk/wmlbLJknLA4GDTJJM\nRHHJzk0i7Y1yWwh5xTEPbcMDGWpMDcSenvQ2JMf+OkUgj+YqzYzzikA1Y1jjJJIJPFSSZkzn\n76rnNRlWZct1B45pV3FWY/K5/hqgGxyBkU4+frTZGbzMd+tTswkbCjoKhbDneTtbpSGPGWU8\n896Tc5jG0gdqI1LdRtPQ/SlOPm5wFH3qQySFiseG+73qKRhuVc5yeuKGbdGAD971p21/MVQQ\nQvegTEVmjlYkk7f7tKVHl5+6TyT60R5ZnGR8x60iksGG4FF4JFA2Rsp8vAOFHNPh+6AoB7/h\nSSYVQp5FJ6KOB1JpiYNuj3fLlTQhVSCfukcfWn8MrAZxux+FRSKoAP3ew44pCHdPmAxzTlyJ\nGIUdKbtdo94PyjjFHXBQkM3amAKwaPIG3B60RqNpJ4btStCRkn/VjqKSZgWXggEcUaiGFOFV\nTt55p33iTn7vQ0KT5ZIGcHBpGXew2jGaBiSZZS4PPrRu+QAtkkd6VfvFFOdv3hiiSHoSc/4U\nhgoAUlvmOMcUvHlhj91eC1G0hgqEKPehdyZwQD1qhDcpJGSV25/hPU0q7PLIj+U+lMYfMpB3\n5NLGGDybRyD3oAGhMe0ocEnmnNGsjcngHv3NJu9WwW6H0pHlU467s4z6mgBVxnDr3yTS7cja\nOCTkH2pQfm37enBpjEq65+91VqQCrIA2cZXOPrSsqrJkN8vcUu0FgCQD1Ipkir5mc4HcUAD7\ndu7rigr8o2NkNzSMoBB6ehpsarHnnBzQBJu/eEnrjBprAbkUnBPX3ojO9mz9M+1IzfvBg57C\nmNIXcm5kXhc8mnOxZcL1HX3pOJGyF24OCR/OkVgzOxHA4zSYAzfu+Bg9cmiNXMXXHOQKGjO1\nehHU0jMzsqKe/X2pCHFtykHJI54piliox97vnvUhIfAThMctRuCqehIqgD5Q3zoaThVIC45z\nSw/Nkrye+abu2K5U5PTml1GJ5ny429aVXMfAOfao9zKUByx9PSlfczZRtpPHNMQojH+s3ZI6\n5pW2SR5A+YHPPWoxEyH++OpFO3Bx8vXrtx2pDF87nJwAetKxTGxR/tcUyTDorgZGcEUMxhYB\nBl+vFAMVRlSQNvr707G5Rs6rz+FMLP5jNtKtjNO2smw7uTzgelINhGcqxycJ1FOVm3F1bt07\nU1zvkKA4HUZob7u0nAPemIduLRglQp9aZG5kY5ORRJEAwZTkjpSorDIBw3WjqA1oyWzjB9TT\n3jUMA/pnFDIY1zK2cjgClYBWRid3GOaAGR7fvA/LmnO4j6HaCfunrSMRtIK7Uz+tOkUeWuSG\nOaNRjd2/IAyOuB1pW3Kqk9M0bvmIzt4+8OtK7Bo1H3yO1AiOMN5jEcLUgkfaS30o+8pX7jY3\nAU0sX+8RjOOKBihmjUAAcc05lVycDORnkUrKcoR8wzg0xv4z1APBBoARXZhy3OcUbd3y56d6\nRG+7uTnPIWnfI6lsjDHA7GgQLJuXcp6Gl4kkdupxTOY+AMle2KXYQwYnAPpTAEztHPzdacN0\nakkYUnjHSnsvGAcHGd3rUTL0DEhcbttLUB3K5JGc9Ka0YALbsmpCgYJg8daijD7X3celMBWk\nTIODxS/KTvOQtGdyhx97vRu2sGZsg8D2NAAuVwRwT60hXbJ1APWh/mKknOOM96HO58kDNADN\nzTNt4Azyac2OBuyc/pSKrF+V+XoMU9EXcWHOPloGg5XgdCeKGJjkJ2/L0OKcYzjHXtTFzJEc\nA46H60gGyZkxjhs5BqT5pAWLcfrmkPy43den40q4VgN3XijUkY3mLjkKfWhZC0mAdxHWlkZt\n/Ck7eKXClSQGB9qZQp2sc496aoDKM8EnHvikUFVwASx7+lP3KrAupzSEIzFW69OKVchQQvAP\nenL80uSNq9RSbh82OST0ouAr7X4dcNjIxTU/eLhhg5zTmL7T8u40c7uOTQMWNhsZ9pIWo2b+\nMMTn+6KejPtdPXg+lJA3ymL7oFMBELbwMHJok3ZyTkg/eNDN/Fzuz2qR8KvUbD2zzmkxCs2/\nDDGOnFI2VjLen86bjY2M4TOM46U5isyFByFNMBrKWiBbg05VVdqqetLv8vIxu46VE6lYw2eM\n9fSgZL+6Z2JXpxzSfIrHPKAdqZHIqzFWHmY5+tPjk3EttAU/w0uoCKizREh9rdaftVVXdlcj\nrUay5cgpsGMU5ugD5JUZB9qYhFZ0Xkbl7Ypy/vcplkPXn+VJvzgg/T0p6q+0MG3HP3h2oAR3\nc84HHG1uaQqWU8kZ6c9fagyDzcqOOpzT5GWTBK7dvKkHvQNAqNtXnJHb09qcS0ce8KDzjk1G\nPkUkAktyaCfMwV7fwmgQrMrx8Jg55qSNzC7BeOMjiiTKtk8qwwPY0xmLKoHJXrQUOM0bnqwb\nuMcU2TCMoU574HTFPaTD7sAoRytQlh8yr25z/SgQ4gqBhtwJ6jsPSpZlaTEW1VXtzk1CyrGO\nQShHb1p+F3DyySQPvGkxArNv2fdRRgmlYP0yCF6gU+RQxDZyMfNUe4fOVbO3jigByYjwrfMO\nvy/yp27bkkYz09RQqbWG351Yc05WVvlf5QOhpgMjlGCG64wRUqKGii2jAb86RV8tWOMMfxox\nt2gvuwOKAFeUttIyg6DFPKmJcsvy9RTUVVZQTkY6e9Ok3jAZiB2YDP4UrgKjhf8AaDdanaP5\nlIA2AYyKgwRtLKFBGSvepfN2xhQdsQ5xQAqqvmfKuPc0542DZPyc/lUZkMilWwFJqT5o2UE7\n8HgUgHx5k+VeBnn396fuCth2ZAOmKI1eS6ctgMFyMcCneWzbjjJIwCORmgA2ll6b6kRfM257\ncAH1pI4W+QnHykbuakdFfc+crnoKBD/O8teTkqeRViNBIwl6L1HpVdf9HBkVQTjjNWkUlQxO\n3+Lj+VUMRWMbE4LMeenarsP7yME/K3XFQhicSBsc4wakhwcvgsTUsZPGS0iGNdxDVrwEbmYc\nE9TWRbqV2yg7XB5WtrT13Z4yW6fSoEXtJw0zO5VAvIA9a7zQWaHy1CcMeTXE2FnhvLUfL1yf\nWvQvCtq015FG38TKo/OoZaPpbwmuzQbTcu0rGMCtRWXrn8Kp6ba/YbWKFc7VUAD6VcXDBQOM\nnnNSMc2cgqPrU+0Nt9aWMleO2KNvSkMD8vWpYwOGNJw0e3vSxgNgEYIoGS7huyBxTmIznOKY\nqlVPYe9SLz1UUAOReMZzUix4bOOaSNdpB21Jn7xoAVF2twam3BlP86gXDKP1qVV+UY4FIB6x\nkoSKdwsY55pse5CeelKMqM4yDSGLycc9aGjJk55FHnN0K1Ir5b2oGH3elLkN7GnLjdntTlVJ\nF3DrQSHl/LuJpuRt560rLu47CkyN3zdKQXHFdy8VGsZTv9aezBQAtDfMOM0w1Fjztz2pduWJ\n60qsuzApPoMGgNRM45x0qVEy2c8UiZX5jyPSgdd33RQPUXkc0ZMnvQrdqczhV4FAhG+8FpzR\nhmxilGGUHPNPXO3pQUMZdi9MCm+WRk53e1T7TjnmolXb+dAxFRtuTwPSljXdnb1qXdu96Agx\nkcUBYbGf4T940gB3H2pfL+Yc8+tAJjkKk596B2F5ALHke9ND5XFAyzEE8CjdtyoHWgB8bfhT\nsM3Xp7UyNN2exqXaVGBQIiK+o+lOePPXpS4+YE9KRjuU45oAZtI+lDKwwB071IqsyqW4p20s\ncCgLELJil8voRUwwVwaZ/q/l5xTuFhFQLweaasY3ZIx7GpWx1FNVTI2TzQAnl5OQ34U84VaT\ny/QYp7R7kx0qQI+WFOjB6E4pVToBzTyqquSetMQxueccUxo1kXaRzU4AEeRzTQPlwRzQAyKI\nLwBxTlY7jkdKkMJ2jHIoVQoPGaRSGv8AN93ik2hl9PWhQc8DIp207TmgBowDkc07bu7YNKqg\nLnpinLzyTTAZtaNcjrTVBaTnj61KqFhzStCGxjqKYEX3SQO1OZxxkYNOVe9KycjIp2EyNmZW\nAPOaXcNu3p70Sfu+TyKSNfMYgnigBv3Y8DmlU4xlacFKtjtUrKGbI7VIEbNnFLu3KccdqXaG\nzxSLkYB70FWEbKxrt/GnhtvUc9qFjMjYXoKGDEg4zigB7Y2jHNEhJXGMUm0ggDrnmpGUjORz\nQBGFO0VKw2hTjikWM44OTQ3zHr+FUAya7ECnIyar/a2ZTs71JNb+f9481ZhtVWEbUHH50mMo\nmS6bAGPyqW3tzG+5lyx/iNWlZVk5G6pd3mHGeBSQD1hGzIPPepFUeWR3qGLG7nOKexwcDk02\nAxWPIxT1jZU5FKOm1gc1Mq7Wz1HpSGR7MjinFWCY+8etP6dBn2pz5jQYGaQiBVOcEdeanSLL\n47Ypu7y127fmb1p5Urt96YWAx7e420sWQvoKRYj5h3ZxTlG5tvSgY7hl570pxuwDinbQoHPF\nIyhmGQanqIPOYfhS5LZbOGpP4ce9SxpuY7R7UDG5JA7E9aR4+mfWpNvlkbhTo8TEgdulMZF/\nGNxqcYCUwRLtw55BzmnRnO736UAI+TzSqvmKSBjFLwaeF3Mp6D0pAIG3L83TFH3Ru7elL6g8\njNPVQWx2pARxx78npnmpQ3mNjquKFUeYRninqu49Ao9qAGrH84GaniXcx7YqHcFfDfhUyx5+\nbPB7UANkcqoaNcDpSuwbGc05VEa+tG0MoBPzeppCBfuk9BUi7QQcVErCM7W5FSbdxyDgUwHb\njv6ZFJvHzfL05FG453GjlvrUjHKPlDZ5x0oZQ0JVhyRjNAYHGAeKkVg0fPJpi6lGGaW3UxlC\nWA4q1avJJHmYYPYVIwJ57U4AgBmPy0DEDeopVJ7DAoZh1J5pLeXzFPtSAf5akkg0iyFuByRT\n1yqnCZNJGuw7hyKYCRgljuOKV2IUbRQvy5JXilZgy4XoKQCR/wC1zTiVaTPftSL90k05V/iH\nApAOXIYZ6U9trKD0qPczZJ5FAycHoKBCrt3dc0rKzDGeKQseABinl8kBhmkyxOOMUKzfMTnH\nQUcKCcHrTlJGDTAG3hRg0bQy8sRzQuR75NAHlqSeeaAF+78qnK05e2f0pu5jg9qdGwx83BoC\n4fdJC08gCPB/GmDO446Uodu4GKAHR7VJPrSr8xwTxRu3YAGBQy/N1oJCOL5umKUYjYlhkUqk\nqadJhl9aAF4kUEcU88ruHUdqjXBIx2p/RfQ1VyugZO3OOafuwuQKajDvS/w+1IlDgh53HHem\ntjiiT+dKq5xn0pFgre1DEeWD3py/Nx0qJlOelMB3me1FGG9vyooAmX5m4pGYjIxTywVQfeq0\nzMZSM4FZsdh+7Gec+9MB3McfjTNm0EZwDTh8hAHIpB0JGx90c0ikbTkc03eFyzDFODAKD3NA\nCqcc+tOBHXFRSOFUcEZpis8eecmgCRm7inMpZc/pUfmFgMinMXI64+lIBMMsZAH1p27y+j/h\nSEkAZ5NJJGHbhsCgYvmenFEbnaTkY7Co5FXoh57mkC7UBJyc0ALtaRetOZQqYzmkZxwc8+lB\nb2waTEOUfKCRx1qOQ7nOGGKfvRV+9k1U5ZuBTGSbiDimiMNkDinqw389aXaN3oKAGrgJRxkE\n8Ckb5Y2CjJpm1p1HONtMB7MWfAGFpJJGjxt6d6cowvJyaYq7dxc9RSARphIDtGGHr3pVZvLB\n9euaSNAVDDmnupOcHj0qrCJVxyB3FNUHbhm4qNf3YBB5p6ruxk4NIYbhu2j5vrUqoF46HuKi\naTa/TiiN93GeaLAIJNxPoKUt0wc0Mo3FcimsyhlVeD607ANaE7twO00eZtbG6nkjYeckVWZN\nw3dDSESSMOn60vy8BlyKikbjaBg0z5hjn8KQE0jKqleppqyMsfTIpyIrcnmkmG75T0FAxYmG\n3d3NG07sD7vrQCFUYAIo3fKTmgkYT823qKJGwucHA9KYsyLgll59+ae53KAnIoAj5fGPlzQ0\njJ8vUUjc/ebA9qA3y8c5pASKw25xxUbN8wIGKN3GKVMMOecdKYw3butTwMjfL37iolXC9jS4\n2/OBg+tAmNfhjnj0prNnIPK0nPmc80+RkjXCjOaYiNTliMYHamt8r8mpGIwD0pu3dz1osAo5\nPJwKYFDZ70/gjFM4XIU80gGtIQQg4pS25Tu59KNobBHJpPXHWmgA4YcjFNkbbg9sUeYFfHWo\nZydvHIo6h0J4bjdIq/eGa+XvjlpfkeIdRlIIPmce4r6WjjaPLA/N6V4h+0FZtJIt8CAs0e0J\n34zzVIk+ZdYUxxueig4Arj9QUt8wXPr6V3etIJLeRX6d64e+Zo5CF/1foa0RBSmZWjBHA71n\nsyLnsByGFXbhsKeNqnvWXLKjbhjI6ZrREsp3TI0ny8HOapTLumZjk/7XpVq7Ty1B+9gdqgZi\n2NowD1FWZPcpXUbLNuU5QjmoZlXyhuTYCM4qzuPmAHrnoarXP7uMlnyWFMRno2IXcnJHTioV\nkGF5OW61YwzKFxsqAW7SS4XgjoaYEEmSx5we+Ka6lV3RrvHdac0LRuGALjODTwwjjdWI2kdB\nQIpyfvG+b5cDA96bv3rtYfKvBpZIWMalnGR0oKhs7fTnPSmIhZgMr/F2A9KjVTwI2OAKmRj5\nxbaASMVEz/6SUjbkj8M0xjsbY9i8t6mkYSNGUI5AyaTzAffBwKVRmM8nJPLd/pVDK65lwAGJ\n/lRJG6yBQcnrUnzrKQMkN2FSSQsse5m+ZRn8KXURTkUyMOMNnkilVVkymdpHOSKkZg24jo3Q\n1BMBGud3P15pgR7mAJBABOMEcVIMsDjr3PallaJl+QluOaRbiMrnkYGA2KYhrNyFzx60skxb\n5VwoH8QpqTJIuC33eGJHek2+XhY/n7k0mAx5N/AyopjnYokVdyjgmpBINzA8t2GKTcYzt3Db\njO33pMAJ3KzEEjHatjwjr1xoesW8nmYRmBJHpWOZPlUbcjqVzRy3zt2PHtWdSPNFoTjfQ+wN\nOvYdX02G4t8NG43Zz+tZ3irSYdY0qeFhuLDH+771wPwV8WrNpp0+VwzqcqucGvU/LEm8YJUj\nBr52cXSmeTKLhI+QPEGnnR7+4gAbbuwGPGea9f8Agr4qa4hFhM4D42jceD16VQ+N3hPEMOoQ\nRnapxJjsPWvNPCurto2sW00bs2GyQK9J/v6budek4n1wYwjAde3WvN/jDrF/peluYMmNfvbf\nT3/HFdtomqR65pcFzCd27htp6GqHjrRINc8PzQOuZCM/iK8qnaM1zI4425tT5Un1O8vZTJcz\nM5IwuO34fnSWslzbzK6FiwPPPH1ovrGSyvpYWJ3xtjnjv1qJGkEhdWzIPyr6C0ZJWPTjFNaH\nuHw7vl8RWE0N2fMnUeW2TwR06Vz3jj4RvYW8t1pz7+d3lsMfXmq/wgvv+KgYbsZX5ue4r3q7\ntftNs0bAYYZ+avLqSdGWhyzk4TR8cTMVkEMyNFKTggCtLRtcudGuRNbthl4BHb3r1L4mfDX7\ncpvLOPy7iPuFwr+31rx6NisksMilJFO3b0xXoQlGrGx0RkqiPor4b/EyPWoUtL6VBcr/ABZ+\n99a7+ZY7yFcEEjpXx/YahNpN0k8b5UHlc4r6A+GnxAi8QWcVpK5FyhwdwxXm18O4+9E5q1Pl\n1Rj/ABS+F8WoEXtjF+9K5AX1rw+4hns5zHOHgmXjHQ5r7Hmj85fLbAiPTNeNfFX4dteQy3kA\nIuUGVcDhvatMNX+zIilUs7M4Xwt48uNFuYi7Nhfl+VsBvrX0D4W8T2/iCwaVcCRcb1JyB0/+\nvXySouIW8q4TypYz81dn4N8cTeHdQGWOJcAeh/CtsRhlNXibVKKqao+olyScnoMj0xVcNuck\ncDrVLw9r1vr1itzAwI25ZAckVeUFn+X5VzXhyi46NHmyi47jdxfLAcU1e3P1FSSMVzj7tMjO\nM5HJqWSDMGX7uD601wdoAO0DnNSFjkKR260xo93yk8UhDNwfoafGuCeTSDbu2omSe9OwW4FA\nhVyrAntUdxELiNl3cE/lUy/7XX0pFUBjxgdfrTRSPAPi5brDqYIXCqMkeprzhtyiMtghufpX\nqHxsRmZZycKpxt7j0rytSNoSTkjnd2r6vC39me3S+EkELPk5C85qMO7Ody4H0p0cyyZC9PU0\nbdvO8lq7Ddjdx83d2/rTny2w5y3elEZ3dRuxUbQ9cSZXuRQDBm2t0KNng01mMyvuPT+KnKBu\nGzLjpuamyNt3KRlf4sUdBajXkDKqhfm9qfC4j3AsHKjkelVmvoI5Q7vtwOBUCzJIGLsBuOdp\nPJ9qQFvdlcRnjqSe4pY1Hm785GM4PYVGsomUkYyBge3tT1dlXBGDnFMRIuF5xgnoKdGwaZVK\nsWzk1ESCo+bnPT0pySOkmc4APX2pDJlVX3Y+Uk9SKiuFVR8mV2npUqsJmK5+QDikZgzY6nFG\noFaT9ziRXJLHoBUxUFcAbjSeZlVV+noBTkHYDjqDQIjKyPJuA5+7inbTD1AxjgVOsWPmzw3z\nD6VCy/Lwdx3UFBncyZXLd/agKUkJH3Dxj+tNwWnIf5BjGex9qRV+Xax5HFFhAGZIZNgy36/W\nm7VCoFXAxlvel8xWkUjPy8cdxS9NzEAKRwKAHLnaFboaYuwKRgkdMmnIxaNc9OlObjjG5cdK\npANZuSQM+goXKwhsBsnoaV1Xg9EYUMqyYBbOBUoQjdSQQBj5lqNV3NkcYGR9KftCyhcEgims\nE6qMN69vpSGN8x2U89eop8bD1zjpkUbF8venJNLlmxxjiqAiWP5WYgjPajbIuFYgA09m77sn\n0FI5dsEkEdN1Fu4hvG4LjB9RSlRuJVs/7NG75umT0zRnb8ykdeaLDE6yBe5HApGTK/P8xB+7\nmnMxZSd2G6jAoWTywHxyxweKYhfM3SEbecfLgUnmOMN1bGNtBYr+7bkk5yO1BcHJ+6F9qAFK\nhlDNx6r3pJIy0iqMCjjblcHd0yaOV+Vs5PcikAjN5YAzknqoo8zdEVBAAPSljzGWI+83GTRy\nu87RtUfmaYDJMeWrYPPGCKeVP2cgetOzvATIB6896TaVbng9wKBkTKFUF/nx0WlVgedufalw\nGY7ck03c+SqjB6E0hJjsHhui+gpCwkkIVdqqOvvQW2t1wO+KF/dyCSNueho6jFVW2oc4BOGB\n700K3zJnqeBSqeNxH8X4UrRnYx6PnjFMQxc5xnBFSRlDuLDa4/iFMljDLuDgPj86cu7AOQFx\n+NADVjXaRg4pflbktzTZiQqDODnmlyRISF+XHYdaQCupwGD4A67aY0arJjPUbuafCqyKSfl7\n4pm4MjFuucUwHLJt5LfMR+YpMrM25vlPQUinbhfxBo3bo845zS6jHR5ZSobDjpSMz8YKhvUU\njKI2yp4bvQsIjbPQnrTGIykx/IMDvT/lbDY7YNJuyjgDavY0u5lZVI4I69qkkGbzFQK3bp3p\nFBzkfM3TFJtVlw5AK8ik3H5WB+ZuA1MY9gob95kEU35edpyfSlBPIb5jSZESjI5zTEOVdrEu\nQBjGKXzF8sY+lRsg3570/wAvy8YG4t1oSJuxGX+PPGO9N+aRQT0x8tKxCuVPI9qe0LHBwcL0\nA6Uihu3zVAJ59KT7ihQcv/do3L91R9afCqs5I6DrmgBCF3E428YIpqJgknkDpUkKq8e8nB3Y\nCmmtu5TIGDmjUBhjVo22s2/uTwBSzH5VcjK8cL605lZgSDx70zywGYqxYLwaAH8rGxzgt2pu\nBCqgLgnqabjfGT1FO3FVVC29Ox96AA/KPlO3B6VJu81g5UZ6ZqOPbuY546U7KhSpOBn71ADm\njMfJ+YmmGQbc9/SnNJ+g6+tV5HXcCTw3b0oAlWTC4HzZ6CnbjwDy38qiEwX92F71Ngsx2nAA\n6UxjfM2hlDc07ztrDcdwA5x3qD5JGyTjb1I9KGePeAG9gDSGS5Pb7hPT0pVjCMec1D5hViFG\nRjB+tSQ5ZhuHz9qZIpw2QeTSOi7C2cCpdpMZOOc1Gp3YyPwoAXYeVBJA/ioMaqwJ+72x3NDM\nVky52kmkkXawyeOpoGOVvmLJyOhWjO7cvRccYqvDJtZ+flqZZMsEzsFACeYDlguQBjmg4G/K\n5B7ijy1jVl7nvRGpxvIxnkL7UtRC7XblX5UdPWlViFy/yjGeKTcWXg4/z0pDlsMQQVNMrqOb\nG3J7+lKv3c4zgZpu1vmzycbvwpSCpIH8X8qkQ5WMi5XrSZ3ZCDdJ70u3yvTb7UoyWZlG3bzT\nQIbtl7yDf3xQy7Fxnn1FMOx8EA/Mcn2p4+6wC5x/F7UxCiRto6Ej+HvTMPvPHXmnbQOR/EOt\nNJZdqryO9IB/BjGH3P8A3fSmsodSCcN2HrS4Ei5Hy88UMhj4PzGgALbcrjL+lO+fHzAB/akL\nd924Y6YoXJwpOAfzo1AVJRuAxh+hobEasOSueVpqr5LZb5l6U5fMg3pw4J+77UwGplW2bfrT\nyo3MuPlHNSNJvIY/KFGMDqaYqmTJ2nrigBskgVQFxk9Aacsgb7xIIGBgU77O0hJHVfUdKSSN\nlUEAcnBOaQaibRCxDKCTg/8A16eFMYZV788dKjXLybWyxA6+1SejDAXp170kAxm/iA7UpZPu\n5+YikUCMt/FSLuDbimQPWqGJvy3y/eH8XpVhsqPM2cMOGqBWKqSBjNJ5jsoRjyORQBKjMpCu\nOvepMIXxkbsfhUMmY5Qcb+MmpGPzKij5m5zSARtjTDavtSxkKzFl2tnjcKj3DcAvB6n0FPMw\nk3BwSVHP+NMBV3SEl1wOtPZQoVd3B5qJZnVG3HKkZB9BQvzZJA6cfSkIdIqMMI+O5p652sFA\nIYcYpsZGBhcGlVcNsLbWz1pACbo9oHJ6H2pGbbIFxj0NPXjcm/A9etKu7q4wq9c9aYEi4WFQ\nPXLE0xlBJ546jFC9CY+Q3PNK0kgYDgeuKBg0YXa27B70rO4UKH6nP0pDt8wMRkVLiSabOwRs\nB0/rUgG4ttK/MqjqOtJ5gmBwMqejf/WpQoYgHgjnjjNHyKuzuxz0qhDlw20jnHUepqSNnZXL\nDawqOOXa22pI5Qscjkn2FFiWSQ7pj94xnHUd6nRzGxwc7uy1Cc8BU5Xk1Kscj/OqY2/NSZQi\nsN5L56Y9qdGxSMkYK4xSt8uSeVY9AKZnzFBB2j0oAsW7fcMg/d1ZVGjYEthTk5qFAyRj+6fX\n1qxt/cnj5v7x6UCJIVC/Ow3Y7+tW4OVCL25qqkbeWGLAH+7ViOQSKRG2DQxlgN5ecjvV+3mL\nSIkY+U96z4ZAshBO84+7jrWpYQn77nAAyFxWQG9Zq/2iMOMk4Feo/DuP7Z4is4cbirZJ7cV5\nto0he4JZe2B9a9a+DKibxI7+V/qk5z2NItHvcM2IgQcsOCasQssq571TwEPTmrMTHjjGakZZ\n2ttOOlSrlgMcGmKxEZBPehZGONozSGTLlW3DpT93zGo42wOcGlSR9xBAI7UDLHDRgZqQZUH6\ncVDGx8sno1SK7MoLHmgBZJDtGDz6VJA+5Dnrio8/NT4UHmZzip6jRPHhV6VJtPB5xmo0baeT\nkVM0gZQBxT6gOb5s+poX5cKTn2pG+Tp+tEZ3ZyvzUhjjyPSnRsEUgjJNR7huGaVGDMSM0AO3\nFVC4681IuFpsbccnml2nIoEP37ee3vQWDAHFMY7jgjIp6rheelIYq43bgtPT0IwKhXK9DipD\nu9aAHcKcYyaYxIbFPxjnvSqu5iTTAVlDIDmmNllx1pyqFXGeTSOwTjvQAbTnpipI2BBDDFNV\ni3Pb3pZNzMMVICovfOKkbKLnNNGDwR0obgc9KYCYk5+an9WHFNVvmBobIbOc0xkmfSiM7hyM\nc0xYyBnNP53begoGh7fLz1FMYjIJGKGYrx70jKWwT0FAhdw25xzSrjjP50zcN1TR8Z7UFCKp\nWTGcjrQu5ptwb936UuC7HLYFNkPl7RjNACvID8g605eWwBxSLH83P407GwZHJoEHRjnkULIG\nXjihThTkc05WXH3aQxF+6T3oVT97I+lKuG79KFAHOee9ADFQu3B5/Snbdp4/GpMbV6Yz3oX5\nuP1piIJJI4hkjNSrgqCOhpPJVvvDiplVeMDigREqt5nHAp6427WFNZ238CpFG7JIoBDFG00K\nPn5qQx7jwaZJywHYUDJIQRn0pDhcjrTVZug6VIEXPIzQBEuduG4Gac3OCOlSsi9sfnTNhUZ6\nilcBrKrYGfrT2jBUFfu+tKI1JzjFSbcLtPWmMgWNjkk4p235Rk/NUm09MUKuT8wx6UCYnyqn\nPWmYZlz0qTaHwCaPvEqentQIrMpbikjUDODVnyflpPJULnoaBhGu5RmmyR7funqakjj2Lyc0\n/aCuAMmgpEKrjIpixvvyRxVhYirBjyKUZ3HGaAYyFMLnoad5fcnFSKM8Y60CM855AoCxEIT1\nBAp/LNz90U7aFT3pXTzCApx60CEVRxjqaUwAc9D70uNvB60hzkZPFUBG0K+YMHJ71JtaPJHA\np6gNyBUh96QEO0fwjinJH3PAqRVXbg8Gk3LjHekhjApjye1Sx4J7Zpu0hgT92lEeW+Wmxju2\neuKfCCq7jTVjbyyP1p0bLlcnipuMfvGCAMUqttX2prL8/PFLtG05PHX60AP3bmxjPvTMNu5B\nFKpV/lHHepo/m4FLYCOORt2amUgtljimLhecU7aJMDHNMBGXaMdzyKTzCuB1NOO1WXPOKeyY\nyQMZpdREfLKGHHOTU8bryy8cVEFODThjy9q9aGAFs5PfqaWI7VDDjNKNrYXocc0q/eAHQUgH\n4DLhgc0kJwxJHFSsxIzimKN3PY0xjY1+ZiPu+9SK20DecH0o4VcUrRsVVuxNIAIO07eM0xgU\nAfrUuNqkdeaRgd3AytAAM/K/rUig+Xk+vFN8st0OCOaesb7AD3oAfHhnyRkDvTVlXzdu78Ka\nykYVuKkihjX5ivPrQBIWCnI6+lIVMinjnNIyqzZFKrMrHI7UgCbGxf1pyN7Yz0pEXdnJzSjG\n5ecHOKAQ4/LGQTzTljz0ODSNhpM8YHBp6k7cLxzS6iBmVfypYx8p4680KDuORQ24MOuBVAP3\n4XBpcZXB5FMRi+cinqxbjoKBiL8q88k0Roka8cH0obCDJNKVH3+9SwF/1jLuPGelSfxHB71X\n2GTBDU5QQetAE02SR6UMq8KpGabHkDk59KNqrlu9A0iRFGMMM0rMFXBFIqiRTnNNZNygZ5pC\nAsCxUHIoVt2RQsYjPHX3pDgZYDPtQIkaRsg9qP485C1E6llAyamWLZjJ7UFoAxwcDNCybpcH\nihj+75PGe1MkXcyvjmglkigq2C3FOZhtI6ikVhnB5pduN3pQMasZAHNPWMZzmhmPy447GnK2\n7OQMUDEyegoVSRzzQjHcRjgd6WJgwLAnA7UACZYkdKefu4/WkUhmzn8KUtjjPFBIqqVXrk0q\nt+fpUbbhIGyNtKqlpee1BRID2xT2B2g96Y3zZxninltu3PXHamA5sKgzzQWCjpkUnOOeTSew\nOB6UWEPHyqN3Wgye1J5m5tmMnsaTY3XvQMdyc0q+nWhW2r8wzijr14J7Uhjvn/u/rRTfL96K\nAHrtKgnqDTZ1Cr8o5NM+0ZwoWlDKz5L9P4RWQ9RAhZueR0p20LyeaQOy52rkevrTXbenXFO4\nagvzljjNLuUMMjHFJH0PaklTdjnmgQ52ZlPHFNDDcAKU5ZBz9084pYo1XJDbgaBhIfTpSpjB\nZuBQVIHy0wM23B4FIY8fOpwaZyqnjJoGS4ANP2upz2oASPMYJIyDUTEj7xwPSpH3MRzTeCfm\noAY6nAZVz609wdwAOTikMg8zI6YqJmkZuOlOwDpG3NgfjUasQxOM1NtJOSMDvUZYbiidfWjq\nIX5VIYjFNaTp6VLhcAtzxUEhw3J4oGSswGCp5NM7YHBPU03zl3DtUiR7sc8UgDheAc+9MaNp\nXGT8opxHUAYNNzxjPNMB+5Y/kAxTFYLyTkU5sHlu1PVEjAPUEUhDBhxuAwadx5fP50evYUwK\n4XA5XvTGICfK+9mk8xo/mzgUfxkfw0j7eTnI70dQGtNn7w5qVGWSPp0qhdalb2bCOU/M3Iqx\nbOZIxsGT1oAlXlW29e1N+6vJ3Y60oZt24Ltx1pQgbPYZzSJGcSLu6MKGzjgDNB+SQgDI9aWN\nQuWz1pjQqKG4B5pwRZNxJ+6KYNsfB69c02WQeQ6qcFu9AhqkOGORjtWfPeu0jxW53YHLdhTo\n4RFGVZyC3Gaa8ItYzJvCp0OeKkDJnt3V1MZJkzk81vW++S3BfjNZtvqdpNdpEisT03ngVuqR\nGuO1Moq/Zw/yg1OsYjj2jkipI2XlsdKYznduzxQIi2MzEZwKSRTGmFOT609m27scnFQyMQik\n/pQFyUZRQOooZiqjnrTfMBjAxS/NkelOwhVYck9elNYBsgCnNjJyMUi8d8etIRGy7RwaTmME\ngZPellU7Qg6+tLtO0E9e9MBkbKy5NOCllOeBTto4GKRs8jNDAa2Fxjio3YI2adJkLgfrUYhK\nuO+aOoCMo6jr6Um3byx5pTxIKbMg27jnNDGQsDKSobk8V5b8eNPM3hmOZQS8D4+gNep/dYMo\n5rkfidprXvg/UAvLbC5/CqJPjfWI/wB5Lu/1bZOa4nUrVZM/MQM9K9A1eENDIrH7pxXE6lbv\ntYqePatEZswpLf8AdPv4CjrWDJ+9Uui8ZxXRzfOpBbK96w3KxbuMR56VaMyjKm7bgGqUyGZZ\nGQ7Hj+bHqK0JGK5OCEPAqpuKP8w+UnBNXcVjP85jJuYAFT+dMmXeuXXAJ4q1drGZiwHy1Tml\nadwiAKq85NMkqXP7sHBJdeSahaQx/MBncOtWZAWmywPTOcVDKwU4P3SOtMRWeUllCnCk/MKi\nuFA6cKTyaRS0aucZH8xTlk8xQoXBI4zTJK9wxVscEY4NVSGlwiye5471ZY7mZAnzdzTY4w0e\n45Q/Sn1GQeZwWzgYxn3pFjMe1goDjp71KcFggXtmqrbvLVFP8XINUMdJGI5A4AJxhvb3o2sz\nMysAf50GIDchOG6nd0qNiYlJA4xgE9aYiRc7cKwC9T7U2O6a3JRhuz/Ko/MzESWHtUUis0it\nI2F45oEWry1dYxKq4VuSKqeWqtux83pWtcXX2qwVOBIDxz2rL3s3VgoU8kjrSGiJm2lsnYMc\nYFDZaFccKOadLjLENlTyd1NjjkVjuI2EZGKAG58xQdmF9PX3oChXHzYHXinybpNhU4A4puxd\nrE8noqr60wE8wqpCgE55NROV38AE/wAqduZV2Ff3i9cdBSbQgBIyaYrajYxmTeRhfXNIJAzk\nD8KRm3SAKCEzwe1OwHY4ByKQ2aHh/WptH1SKe2yCpBbnrX1L4T8RQeJNHWeM7bhVAdSe/r9K\n+S4SRjJAOc8eldz8LfHMmg6rGJpT5DkxsG9K83E0eZcyOStT5lc928Q6Qupae0UoDo3ylfUe\ntfL3jHTDoevXVt5bRw7soemB14r63jkjuIIpwVdJFyCOmK8w+L3gYappc91bIPPi+dGA5x3F\ncFCpyvlZy03Z2Zk/BPxlHK729xKySMPudvrivY7iGOaN8kEMOD2r490fUJtJv1kG6LLgNt9u\n1fT3gjxfH4m0uJWZTcoMNg8gCniKfL7yHUp21R5L8ZvCslvqLalFDsjICs6jgmvMFA5xweBi\nvrXxhoCeItFntCQrSLhfr2r5T1zSbvw9qb2t0jI6kqCRgde1dOFqKSszpw9RWsdn8H40k1oO\nzAIWwuB1b0r6K/1sYVuCea+dvhBIkWsbGPyEgj/ez/Ovoncdq5OfqK5cVdO5y4j4kQ3Fsk9v\nJFJ86EEAV83fFLwy2k6s92sWI5DtEgHJ9sV9KbSxK54PFcf8S/DSaxoMxKB3jGRjqD61jh6v\nJImlPkZ8zhdwDhuV/hxmr2i6rLpV7HNHM0ZByMHHPrUUmiXBnMQRhIp2tt6Z9a67w/8ACy91\nIwPMrgbgfl7/AFr2Z1I8uux6E5x5T27wH4nXxBocO9/MmUZf03d63rq3W6i2MuUPGDXPeDPC\naeG4wgyeMYzxXTNGGcEHA9K8GUlzXieO5e8eF/Fr4bosEuq2K+Vt+Z0x1+lePmTaquPvjpnr\n/wDrr7J1TTYdSs2ikjLsRivmz4ieEbfw7qzyW5DW8nzGP+62etezha3OuVnoUJuWjH+AvHU/\nh2+iYlvs8jDevc89K+jrHVrbWo0ntXHlsORnp7V8gnDYcPhgfvV3/wAP/iFLoN0VmO6Hj5Cf\nve496zxOH5ldFVqPMrn0L/CUII5pJQwG4Dp0pLO8j1CziuogSkqBhntRuKkLuJzXhNW3PLku\nV2BdzLnP1pUUjk4IpEYLu3DOeKRshQVXipQgUEElTg05Wxkt8rYyKRW2nA5WmDgHPf8ASmIk\nUlFywBJ54qGaVvJLr1HrUoXy8E802bmN9r4UDNNDR4r8Z5FkhG0hwCFfHr6V4/520BW//V7V\n6r8XJvIlkgC/MJM5PfjrXlxj3KGxyea+swv8M9ul8I5tkiDYvPU035WbcD8p6CmlcqWUc0LG\n23B+UNzXUbsdJsaPIOTSKAoyX+f0HpSqpC7D09KWP9zkJ1PFO4EDTMrHC7ovSmSv5iqoXAdg\nu2pZOMrj5fbpUaRlpoW6IrUriufXXwv/AGQtE8feEob9y8e9FxPGw4PGQR+NLrX/AAT92zk6\nZrEiIBn98uee2CK+i/2U7wXPgG042KIVGAevAr2tvKUjK8L0zXM6zTNOVdT8yta/Yo8a6RgW\nk0VwjMQ7Pkc+3WuL1j9mfx/oold9OeXDfw459wM1+sEkUToy4BU9sVUl0u1uo9jwI2epI5qo\n1k9yeU/H2++HnibSdyXWkXCyE8fLgGsJYNSjkkWfTbmArwyPGd2c4P8An2r9lLjwTpc8Y3Qp\n1yDgZH6euPyrl9a+B/h3WLVY2sICFYt8yAk/j/ntWntI7Csz8kReRqBvcRt0w3GD3FO+1Iyj\nMgG3k+/41+l+t/sjeCtWEhl0GFDIwJkiypzkHt9P1NeeeIv2CfD7Rn7LJPG/8Co5P51XMu5D\nufC0dxu/eLjHfHPFThwxLDhSOgr6I8cfsUa1o9tJcaNNLPsJ+ScbRwT0NeI6h8MfFGhSEXGn\nTOG4B2n+lVbqMxFkGWxlgOBUTSPs3bcYPalu7O90e4EVzaTQuw3FWjII/A1Ct6k27DABTjP/\nANapGTqEZsPkcZ/Gmq22M4wWP96kWQK+X702STbIobG1ulUA+Zsxg5VSO4oQu6EgLjqSaRW6\nqeFpFkByCPl9uKTAljb5iAnUZpTkZJOfYdqjjk+UgDK/3v6U7zGjVlC7gR97+7QHQYrANhuF\npu/cCG49GHWo5Lg/LgZ/2qbNIfM4Q8jg0bCLEMhXP8RodtseFG4ZyarsZI5FfKquOR3qdXCr\nvGVU0DGpJ1xn1pyuY4ifvljx7UvXIXjvkCo/vMM4HpS8wsSxptBzjdjJqORRIuxPqOaeynzF\nCg+4pqrtmLEfLnFUJq47y3eL5cZHc0xSsaEld+Tg+1I2OgYj5ugpscm0+VuwTycjikNkzsyN\nlQMEYxTCzRmM/wAPQ01iYyV+8w+bPbFCt5n480CHM37x2K8Dt605i2FZyMY+6KieYsuOoPGK\nJMMo2rtIGAKdwJQqyfLtwvWnBsEdlB71CrblC7/mHWpfvEDOVxTAWGRWZ9ylhzioWb5SpPlr\n609csCqAimjcuflzt67vSpAcqrNFluMdzTmBZwQdq45oXyxHkj5eoWiQq0fXB/u1QDF+82Dn\nik9B/EeuO9J54UKEQgd6c25cZTI68UAJtXcQPXGKRsLkH5cdvWn5CxnAAJ4GexpGOeGXOBy3\nfNIBu3aowefTtQ25ck4wRQoXYQTg4zSCNvJ3nkUagNjVVUFhkjrQ0qrIAVJz6VIzMqkN19BR\n/Ei4+po1Aax3SHPCkcEikWZ2m6gADFWlVJFIx82MA1WuYxHtBG3HpTAGjUBjlgx9OlMVV6Zy\nKkJDbct9KY8jiRlGNmOOKQCTMOWxj+dSBPl2MdisMhqiUBfnJHTljUm3dHuPLEcMDQMEjTyi\nXPA6J1NNkZXYjaVOfunrUisYIT8nsW61HJIqbSMgt170AO6SYKfJjkelNYAx7UPGc80vmeXJ\ntZsN2pqsGLgnLelFwH7DuAXoRSHPmdgMYojZljx7YFIzKror/Knc0CDIUFRyfWo8N9923Dpi\npd/lyH+6TgUOucgjP0oGNRR91jtPrTmAOcZz/ezTGIUDaMHH3qRpFVt2cjHNMQrSIxUjp0NO\njlJZkLErUTAfKoYBetO3gyHA4xUgO8zB2cjHIIFNh/fAsSCSegpikfu/3gb+LriiGZRI2GU8\n8Y6fSgZJvI52HAPSpC3ybwBn0pEaXLccHnNNjjLZbrVDFZXkjLAjnjHpUsdu5AiUAluS1MCB\nfl6E+tOjwrArIRIv8PtQIW8tXt3RVX5CM1CZMqC6fNnrUjTNMwYt827pUc0O4Fi+WzyKBEc2\nJF2pweppu7bhT9zFO5fIUfd755qTQfD934r1iDTrY7Xmfb8/Ao3DoZ/2yNTgliMZ3dhSyfab\nqOM21pJN6ELnJr7f+Ev7J+iWtnb3N/BHdMVwQ67gefeva7H4K6PY5Nvp1pADx8kQBHTjpS5l\nHcWp+Zum+FfEGrRs1vo10xU43Kh/Kug0n4F+OtakaNdPkgP95hj8K/TKz8A6fp+PLgTfjHzD\nPB6jFXrfwvZ25GyJUHqOKj2iLSb3Pzft/wBlvx1NFtjjG/cB5Q6kHOSc9AOPzrudH/Yp1u60\n1Z7vVVe+x8ttDHyeDgAk456c8CvvddKtI8fKrfUZq3DbQQqQkeMjBxxUuqgsz8pPiH8KdZ+H\nN4yagoCh/LPqGwDg/gR+dcvG23BjP19a/QD9qr4dHxF4WnnWNXEK7o24B65JP0Ga/PtT5LTK\nxwY2Jz078VfNzbEljeq5ZD35FK7KoTADO3OO4qBZm4dkADc05VWQrIOHXpz1qhg5G794O+TR\nt85fMHY4C0/c0uSQPyo3SKS7uNijO1RRcCHyx5p559O1DBm6phc8mnzKzKNnIByRRG2GJKEd\n+tMQ2ZWGAvOeMint8xxvz6H+lNZQVV3G05+6KViu3AXaCeOe9IBscnmKfkIOcc1IWbaAcDHJ\nprK27Bzz7U0R5cjOFUZ5o1GOSTbk7fmb19KdJ+7Xc33s9KdMrSRhtu045weaYZQ6hSN2O9Go\nDpD8oyu1fSlDHcoHQ9PemnG0yD5kxjFND/ITk8DgCgQ9fmY8Ac/pSv8AKqgkIc4FN2r5iH+M\n9qaNqTFHOVY5DGmA+RQY92/J6AY701oisYO7DtzirMyQ28IEb7sn5+/5VA4fcMrvjI4pAJta\nMjcPwo2gzgjcOKSQEFTuO0dfWkVjvJHzdz7UASjeFwu3HUZFNbP325bsKPmXHfPIFN3Boyz/\nACnOB60wFjPyANk5NOlba3PDk8Uv8Awc+lJICzBTjJ/i9KAHR/LOpK5xzTJGWOQn5tzHP4U/\nyhGNzSZzxkU0O0mSyjCjA3cZoF1GwM0gf59vPr1pWYFUjwwctgelNWPccEbT1xipTH5jqxwx\nXpSGCs4bbnn1x+dK0StMd3PFDL3U445zSLG20YbnvSAOVKrjgd6SQM0qlmwPSnq53EffHbNN\n+baWl464Bp3AXbtJRl+mKfH8rkBRkdaZuCxq4BII/Wmt5kLqd2N1MLjmfDFACSetLJJjYq8F\nTn6Uea0IGV3t/epIVLTMSMEjqe1ILhL3ABwfSljO11Yfe6YI7Uz5j/CSw7ipvl27yfm6YouM\nblsMW6Z605dq7n4bP8Q6Gl2mMBdwKtSJtbI2gY7UhBDGJEbe2G65pxYKnzfrUbf7XQ1I4ACo\nfmPaqAWPLMrRja2c805fLeZ45tx3dCKCoOMZyO9RiXaADy270oGTPGscioMlAOaQbtpdYyEz\njaetBlbzBtHzf3u1KzEsMt81BKByiqu0/N6VNj5Rhv3h6VHz5wY42Y5GKRhIylAOeoNIoljU\nMDx82cEVNgKwDLgdjUK4jhz39KVndYcY3DrzSELJjcWx7UscZmIRlwOvWmpIZOOBt4Ye9T/K\nJByeelAEvmfvANwx0wvaiOQxZXcQTx+FRbxu2oufXipI8lmyN3y5+lCAkVfmK5OAcg9qkjfd\n0UAr0qFN/k9eeopZpG/1hwxxjA60dBFgSAAjbu3VNAHX5S20e9QQsvklDktjOfSpYZGZlCjD\ne9IZZ2uvKnIX9asxgfIUGGb1qs3K79/yg4OO1WEjK4kz8pGOe1AXLNvGrMVDgOeRn+ValqrK\no3HKgfdWsuGH5Mo2Wz36Vt6dGs0wRjhSMcVHUDY09lhjL7yvIPNe7fBWIt50zKEZgPm9ea8P\nht8BkUZKcKa+kPhPZrp/hO2Yr++lyzZ+vFIs9AbOQw54q1DJ50eCNpqrC4bBxz1qVJAWBXpm\noY7lsfLweakBweOtUINShkvja7v3yjcVx2rQDBWJPX0pD3BRhcDpnmplJZhjj61EOOR0NPj+\naTB+7QUifzBjBqVfQ1VXIlwelSxyhvagZZjUbiW6U9dnWmRsGU96WNVbLZwtJgOdtwAHFCsy\n89R0pFwy5Jxmhi0bMF5GKSGS7jImD1FSxvgcdagty6rlxjjpUu1s5A4oYiVssvpSqw3deKi+\nZuM5oDDcARigCY/Nnt9KeqlsYNCso6jrSBQzYzigAXf1IwKmj+YDnIqJWPK05AV470AOdd2f\naiPOcFuKbu6inRsu3BoAkZd3SmeYVBDUqsV5Bp24bcsM0ANXJxnGaeMbidoNJ796Ysmxmznm\ngZP/ACohYhiSOKj8wMBgVO+Gjyp61LKBWDBv0po4HIoVgmBtqQyA84NBJHz5mQeKcshJwwpN\n3c07cWfjjimMTkHBNShf4s1Gq7fvcmnbgV54pgNxuzQN4yCeKkjAPAxTZV+YilcVhFj796l2\n4HJpm3GCDzTuWXNIYm3+LJ5pz9VOM0u1tuOKZJOY+2RTGOZgx2jgnrTmzjjrSLhsPilVjuOe\nRTEOGMD170oIbAbgUkbIc47UjL+VSMftVQMdKRtufQ01FLkY4p0ibW3dW9KAF3GRNp6U5P3Y\nx2pFbpxT3XcnFAhq424P4U/naOaau3Az1p7DgHj8KAYqfKhzyaYT+GaVfmbjipMBuKBIjVd3\nQ80jRl+c4p6sqtjFI0QkPU0FDRKVjOR0qTbmMMO9AA27cU9Y9re1AGcunymYytIdg6DNaEYD\nqtSLGGjPr6UkahTg/LVAO2/MQRTTncM5qTftXHU+tHLYJGKVwGD5mJzzRuHHensFY8DmkfkZ\nUcUwI+NxJ704L8vNL5Z25AqZhmP3oArvnb04oznrkVNt4WmSLjBJ4NA7C8NzTl9Ogp4RVXOc\nmkPbPFAbB5fYHJ9KFy2Tjbin7Apz3oj+ZjnpQBCp+YknjNPUHcfSnbT0KYFPYFVz+lAyECkK\n/MDnFTMu4ZximMpx92gLAQH+UdaFjKrhl59alRUZBx83bFKWxGQeCOtBJDGpQ/Maefu4/izm\nn/eXfjmhUO0k/eoH5jV+bk4BpEjDNk9e1TKqbQevrS4iHU80FDQQykEc0LkMT39Kl2hPlA5q\nIRtuJHJpCHK37sk9+1IqLt4FTr8sWCBuojU556e9SA2SPdEuOTTPJIwWx9KsLGV71G8JkPXG\nKAGLt87OMdqdGw3EA4zUca9Sxp8alunHpQBOqllPpTlyF9KFdlUKcnijdtXmmIh8wbskVZVg\n8ZzwRVXcGYnHFTR5ZeBkH1pCAKSue/anJH5bbialVgrbWx0601cDOfmNBQwNlifXpUm0qOFp\nVUdRx9adw0nGQvvTGKuVPXimSRN1B4qZVznAoRCGOTg0WAjjfdgFelSr8y8nA9KaX20u0sxP\nbHFIB27KjAo2nbjoDQoZRnpRy1A+g5lbbkU6Ri209AuOlAyqbj0py7toPagkaw3SDcuMnOac\ncrnuKTzRHjcM0sZO/cp3D0NIaQojLd8d6TcXwxJ2jtSfOvzHgZqV1WTDfdHtSCwL83K8/Sgr\nt+bGTTlAjb5eRSSDc30p9Q2HBOhYEBqdyrAUig/KW59qdv5wFyPWgQ7ceaQOehNKuDk+hpkk\ngLA7eKTGPwVbr1o+de9IimTG7g07Gec+1UK4u4lcEUjNzkdO9SBduCeT6UvAz3J7VLAarLjg\nY9aXYCn1o/4DzSxsWBHANAxBjGQT8vWnBAeaVOU9OajzuOOnNAD+e3SnE7SCRxSN90KDSDj7\n/T1oQWHPlugwKSP7pFDDcvB+WmqhyAvSjoCJlf5QQOOmKGb5eeeaevuMGo8Hk470gQ9ccZxi\nk29QTUdxlI9y8+1Cfv2V+VXFAxy4XrS7n4wMjNOCjJz0pB8rYB4oGOZScHpzT2x68dab/rFO\nOCBSrjyxuHNAkIvzLzUix7VCg8mk8wbcYoB34J4oGP29M801WDMdo/GlckKABznmn+WNnylR\njtRcRD5fzdaljwMjkkc00YO00/I7UgF52H1NPwducZNNUZbjin8qMVRQ3J696crDHP50jKdu\naQMMAGkIU5ZuDj3qQ/e5NN/iC0DAoBCs233oXDfWkQE5JGRQq8lgcGgYbW9aKduPtRQBVkUX\nHy7topbeA2xyrbvrSgDcMmnE7UwvJrJ6BcfMrEjDY9aRU3rz1zSrMGXOw+nNOjK596BisoC9\nKgYlV5NTPnbz0qFVJbc3IoGPUpDGfT3pzfcAUZPtUWwOx9PSgExsVz16UAP2tt+9zT1kDLtY\nc0xVzyTg01WCsSetMBw+V9wH+FNk3umc0yZS3yqeetPVW2gDgd6QCKzLtBaoWk8t24zmpQhk\nc4HAHemgfNyOKYAWVVGR1p64VgcYGKJEDRklqSEq6hS2RSAam5nbutKyDqCF4pdxhR/yFRsw\nZBtOaoB3mblBxx0pjEbenNKqllAY8U7y+/aiwEDQBsHFW+FjA703jPDc+lMZ/nyaVgEG4ZOa\naWCfeGTTnYLjPJ9KYy55Y/LTsBJ5isvT8DSJIvl4IzTRjOBRgKvJqQFZtuFLUqybTgnAFMcb\n1Dj+HvTGY8E80APZ/QUz+E5Gc0biykgULJ+7AxzQAhtLabbJMgLDvUa3XlS4UbQOlTLGd3zD\nK1DNGhxkc55oYFl5V2g7s561HDcRTPsDDcO1NblfRRWVqlrJ9kmkhyHxn5eCaBWNvcpbpwOl\nRzN8w44NZ2iaol1bpHOdlwihWz61oviRhuPA5FAxiylsgpyD1puA0Zx61J5YZtx4XrUfys2A\nOKBWGyL5gwRkDmsswi7kMs3zxg4VO31rTmX92VThjxmoPsrRQ7VOT3oEVGs1FwrFBnsQKuje\n3HXmpVjPlbu47VLFs8sMepoDUcqrsw3WkT95kHgDpQFVstjBp8fzADGKB6kLR4PU5xR5YGM8\n1LMyg7R19aTCxrzycZoEVdrsxHCr2p/3VALc1BPCZpo5Q5C56VOypFud2wuepoDoOY5xQAVY\nkjIprNtQMBkHpSjO3PUUABPfqKR/mXFLu/d8DmowQvWmA4MeMnAprUjHdkDp60bfl3ZyKBDZ\no2ZRSD5WGaeW3R5zg0inevvS6gJtDNzwahmYcr0IqRs8joexqCb5VBblulPcCFlbaSG5qneQ\n/arOe3dd/nIUx9RjNaEe3OGqK42w4Ydc8fzpgfFfiuyNrfTxY+WNyhyMZwa891lpYbgrGBsN\ne2fGix+y+Jr4qAyy4kXAxwe3868Y1eN/LYp6d61RlI5sIWST5fXIrAvoysgHQZzXSo47nHrX\nM3WPtMrFvl3dKokpXCtcZIfEY7VFJaie1Dq+OcUSfvZGCZCA1Y8yG3sWUfM7HpVxIZnPGVt2\nAbzMGqJG45zhe9aciqqKB8pPWsqduWxyoPQVRI2ac5byvm4qnJI5g+dc81ZjZWZioxkdAKhk\nV0ZWH0FAivICUBXlelV5Fc8HhqtzOd3oO/1qvI/mcZ+f1qgI5IW8tdrhF7k9agZtzYIzU0Cu\nwMb885GTTHzl1bAPbFMRDIzRvu2moFwrsWX7x4PpVhy7QlSw3H9Kj2lkXHLL3NUMgaQs+M7j\n0oZWiXLDIx07ipZBGr+ZjJ9fQ1HI2ZOmCR+dDArsEDDKbmIziibdGoVSPm6nrilYtszwGY9T\nTG/cg7uT0WnYkvaTDHLMqKN2OCfT3pNUs5bOYrNjb94YH61DCxULLG2G+6wFWdUvJrq3Ug7i\no27vSiwGbxtOPmB70jb2IXOeP0oMUka78cHjIpsjFQGHQDJoAVstnPyADFMgYrCSSAQeDSyM\nHCEkhTzTZI2Ygj7n0oAN2y4JJzuX9aR2HPB6Y603cD3z2pcrJHv24AOPxpDuM3ZQFeUHGKST\ncjDnC9SRT48RxhYmzk5NRNI7FuAV9KYDZF2/MHDBuaeC9u0UgO1d2aaSPLYbMt/dFKVMkagr\nkehoceYVrnv3wt+ISX2nxWN2vP3Iznp616FcwpdRGMjfG4/MV8kWuoPprI6MyshBXaTwa+jf\nhx44g8RaStrLKPt8XUHq2BXi4ig4vmicNany6o8n+KngcaHqRurONlgYbc+lc74L8WTeEdai\nniJdgACAeCO4xX07remw61YNFPEsg6j8q+Z/G3gW68N6k88as0EjEjaPu5opzVRcshQakuVn\n0nofiK28TWMdzaFSWHzKvUe1cf8AEz4dweJbV5ocrOq/KcZ5FeQeA/H174P1FkeTFox574Nf\nRGh+JLXxJZrPbugk28rkVyTpujK6MJRdN3ieHeC9Hm0nXIVuY2QRNzjpn1r6FVhLCGxycED8\nBWBqWhwn/SPLVd3PToc1oWGqW8sexpFBUAHJqKknUJlJz1LjbuCBzmmzqrKUkUPG/DKe9WPL\nCgkNu47dDVcqrNuNcl30MTn18D6ZHdNN5ag9elbdrYxW8IEYUHsasKqyqFx3yaU4X659Krmb\n6kuTuIj+XFyAG+lLGdyZIBxyabLcQW6FpXVeM/M1eceNfilDpELR28gbsGU8n8K1hTlN6IqM\nHJ6HTeKfGUGhafNMMGVfup3r5z8YeKm8Ram8uQIiuCoqHxF4ou/EWoG4aQgH5SM8YrD3BVJA\nBXoa9zD0PZLXc9SjT5FqCYVgOCtK2WlAH3xyP8adDaPMyrbo0jNx0rs/B3wzu9Ynaa4jLxKd\npXpt966alSNNNNmk5qKPTfhJrF7cWawzZkjjTO8n+leiAng/jWT4f8Pw6DpKW8Y5XjzMcmtR\nd3Javl6zUpXR403zO4hHmZ470uWH3eRilUZVlzg9aj3hB7msTMdGp2/KcUvLYz680kefu469\n6VM8gZP1pDQ+PY5IZsjoKhkjO1gDn2qTazLgcEc01iSCwbBxzVos8U+NdipaBs7fl3b/ANOa\n8jyNqrv3HPH0r3H40Wh+wIwbIxkr3xxXiMSnaox7+4FfU4X4D2KPwkXO5xGfalO35MgntUvl\n5bjk5yMUL83J+U56Gu03YKu3C7dzt3pzYVfkxkcHBzUZY7SQuccYFJIzeTtXCfSkBGsgjQjO\n5T60loh+22ylT80gXg/rTCXbaGXaeuKm0mTbqkR+8fMGPzp9wW5+qP7Odklp4Hs1j+ZREi7w\nuAeBXqcz7sivLf2eZH/4V9YliSTGD7V6ZINvOa897nQMXOSaeGwuRTFK4OTUyxnIHXvTEODt\nx6UryELwaRpUQEFgPQZp8aedwnzA8/h/jUiYgkYgD+lO3Erg7c59KkS3ZQDjt+NDW5U5PHpm\nlzWHYq3VvFdR4kTPb5ulY03g3Tbpj5kETKeCpQYI9OlbzKWHpTW4A5qlOXQnlOE1T4I+GtUI\nkl063kdTkM6DcuPSvnv40fsT6Nrkd3e6NJJp2pSOZEdRmNieSGHb2xivsDd8o4z70qqj5yAT\n9M1car6hy9j8m/iF+zf4u+H9uLowTapZxvtl8tNpX6c815fG3mTGKeN4njOCjDGD/n+VftHr\nHh2z1OAxywI0RbeV7E5r5l+PH7Idh4uhutU0eIxaqxJVlAAIJzyMY611RkpGLTR+fgjRmBL5\nUd6STL4H8K9PetTxd4V1fwDrFxpmr2jwXFvzux8rLnGc1mxubtVfAUH361YXuS6bZ3OtapDa\nRhvNkIUbeepx0r0Ob9mnx8sayW1slzEwBLI/tXI+DZVsfGmkzo5XEy/oa/VP4T2tvrPg20uH\nijd3TLttHWlKUY7js7H5jXX7P/xCt4WQ6HLIFOd6qcfnWFN8LvHOntsn0aZX/uMpBPvX7AT+\nFbAqy+WpB7dvyqo3grTHbc1rExxjLKCaj2kR8rPyAm8G+ILGYR3Oi3W88kshx+B71Tmt7mxm\nEN5bSWcpGQkowSPXBr9f2+Hejtkm0hJJ5LIG/Q14J+0v8FPD154TlaPTkjudrMl0oy6NgnOe\nuPbNaRalsQ0z8+Vbduxyw6il2jGcc4ztqLDwzzKzZKSFNwH3uetS7g25un8PNGpfoDTlWVhk\nMw4xTlV+SzfKR3pPL24LEYAok/ew7QwU5yDTGNZgrArwR1ao93IyQVXv61J91enJ4+tMBbaS\nq/ux1b3oEN3BF6kMT0PpTGkCPtU+YcZ+Wun+HPhBvHfiqGxYKiZz85I3DjIGO/Wvpa3/AGGb\na6gW9iursQSBiLdQA6cHGTj6Z/Giy6knyCs6NwSFH15qVZI14aQMp4HIr6ub9g1o22xahMPY\nqaj/AOGEbvkLqjIOg+Un+tFkuoHymJQzNhl+UY+U1IkqqoDHK+tfR91+wjrNuvGrxYbu6FlH\nPXg15B8Svg1r3wtlaPUik0O7CSochl7HtRvsBxu4ycKSreop5YwyY++uOSarBzHhgeCBkE4r\nV0Pwfr3iksdKsZLs4OCgyPzqbMCi0hjjIIwvUEUySRogAw4ByCa61vgv442qraHdRyf7Uf5V\nBN8FPiD5j79EuNvdduTV2YXRzYLOV4xnnNPVmkYgHGB3rpl+CfjkqiLoc+73/wD106X4J+Pr\ndZHfRZ41iG5iRxj86LBe5y6kqRvIZcdPembtylWk2t1+tW9U0u90e++yahavb3AAO18cj14q\nm6bmX5fLZhQvMB+5RIGByoXkUR3C+YAoKjtmqz3BVkCrk9x611vhn4L+MvGlu11ptqPKzhVy\nM/X6YpajObbG8szbie4pnmbVIHXqSe1ehx/s1/ELYCbLYnXLH/CrcX7MPjuX5xEucZKrn8qd\niXJHm63BVAEXIxkn1qAsJFLM2STXq1r+yj8RMhRDCgm/jll+7Utt+yJ44mUFpIAuTlgxK/yo\nsPmR5IHHKAc4wPSjekOcnJA5Fe12f7GvjCbb/pcCIxwXyTj8MVqQfsU+JJZHX+1Y2IHJkjYK\nfxxRYLo+fJLmNVLAKVxj6UeZuQfONo4G3vX0JD+w/rXyiTU0ZieUETYx7k1Bqn7FniexBltr\nmKNsnbuUlDxwOOmeg+tA7o8CfMfzbiPTNNSQfdY/N1Nexr+yH46nkbzWhjCjO53yD04AAq2v\n7GvjRU2re2qyt03bsdOh4oC54o80ca78bz0oacDcVUr2H413fir9nfx34b5XTmvFDFAsAyze\n4z1H8q8/urHVdDd4tZsJbRlf+Jf5ikIsR/IvXJqQkbCzfMPQ1WjuFaYBTx29/p7VYYeZsweM\n81KAOdu7ow5qPc/LE/M3OaczK4UEkE1GWG4gc8cVQ7jZZwi46VY0jwvr3ibd/ZmnS3IGSWjQ\ntVG6kXyE3hss2OvT3NfdH7H/AIXgvPDsdxIQGXhAvBweOfXP9KRJ8l2/wI8eTKX/ALIn6DjH\nA98muh0X9lnx9qFyheOOG2b70zc4HfgdTX6Z2nhezhjJKAluu45q2uj2a4Xy1x1wBxWTqIvl\nPz50b9ifxBcMZLrV0jgXoIYjk+mQSMVtXP7DLxWzSPq0zsoJLND/ABY4AGenvX3eLGJX2oiq\nvtVPxFGsOg3CKMh1xz1Henz3C1j8mPGHhW78G642mTOMR/fccrkdQD65FZSnypSM/L1r0v8A\naPuB/wAJ5Mowu07SgH615j8qqrBvlJ4H4VqIkaTcNxHfApVZ8FnXI6CmSAbUzhe+accqSHbK\nHoaAF2/MDuAH92myJ5bbwyuCMbFNQyfKPl+Zl60LK0igqMepoAhkCwq7HIPpXvP7NPwnbxJr\nCalK3+ixnHy8tu7YFeG6bYyaxqkNlGT+8O5uM8V+if7NHwqPg7wwklyn+kTKjd8AYz+fY/Sp\nk+VCPXfDdidPtIoQMCNQn5DGK2vOwSDSeTtxjjioyu47c4NccpcxpYa2TliMUxW7E1BqmqQa\naqG4fYmOWPSkstStdShE0DBlPpUlbFhWK89aes2Tn36VHyAccnpSKw/GkBi+PtH/AOEg0WeI\ngEqjEK3fj/P51+ZXxU8J/wDCI+Mry2aLEUrmRMHjBJr9Upl+1W7oRyy4r4Z/bC8BwaPqyalA\njeYozLI3uTgD8Sa3pPoS0fNG390c4LE/Ln0pkjBdnykEcEikQlkDHdx61KrYXcBuHeuozQ4N\n+7Zu1M+ZlAGMEcmnZ6jbmm7gsPGR9aAEkDY4YYOBj+tNEhXIcfu+lWpEjXYRyCMmq7Sjy3GO\nc/LS6gAywCocj1PajcdwHUdPqfWhnDR8ggg4OKQqsjcMQvTntQApU7j8zAilV2YEg4PrikGV\nbCncOgoXd5OHOw5pjCBWVdwyG9OxqZWVuQmCRzTd7MyqD14VvemlWj4c4JOcjmgBGcqu0nA+\nlIVdQAH5PP4U4/MjYBA77higRPhW4PbikINycBztZujYpXXZtVhk56UquM7mGdp59qbllcsT\njuPpRqIIXH7xVBJ9aYrK0mGJUselSrGwTKtgnmmsr7lwoPPLUBcazEtkfcxyaImVUY5BJpQv\nLEEgdOanhaNcl4wTjg4oGRKoZc8nHakjX9z5mNyZ4zUrXBWPaFwh6+tRuhVQx5WmA3cFTIY4\nzmnq3mbt33fWgBRnETZxyKRS0gO1cL3FABnEYwMc9+1I7TsDuGRn0pfMPBVNz55B9KsNPIY9\n7DeB/EBQBErO23bwMc7qb8jErlsd8U5SSTu4HUYqNgWyQDigBd+TyQFx2py7mXbnDdfwqJT+\n84XK+9SbvLZhyTnH4UhEnmALtZOP7wpjOGxv554NPLbkwgyD3Pam7V87nGMUxq4btyFBkDPF\nSSSFI0PUim+YfMP90dqQOsm7JJIoCwrAyYXqzc57ik2kOuWw3cdM0sbhVJcHGO3WmvGZY0dT\nxngUgJJJQCckKq/xd6buGMsOvK4puG3sdql/X2pGMkSgFeD+lIA8xfMx1GP1qQSRoynBYjkj\n1o3CMKAA3vQE8yZ8gcLke5qgFkjbnPy7jux6VJ/q2BU7uPvVHuMcaNu+c9vSlhyyrzk9TQMk\njPmRlmJGDj3qRZI5BuP8Ix75qJpAysi5609GGUDL+FACr/qsLwc5oi8xoz0De9KqhC5Y4B6D\n0o+8jY5YD5j7UCAybmCfxAZNOU7lDKSefyqOPiMAHYOzGpPl3AKcr3IpMB0yHGVGeeRT2Uxx\nheSOp/wojQGP5R8xPXNSZKoq/eweaQCww7pQ3bGPqamnjCtu3BccVJpMis027lQOAagb72Ax\nY5+7QMartGSAM853CnQeZJM8mdqKcAeppfKOGffgjr9KVSjYKBsnkjoPrQIezfu2CuEOefrU\nsa+TgNiTjO6ouC5Ujap/iNSo6bsAHZjGaBCqpZ+DwT096tMhWUEgYIwfaq1uqrJuZSV6DHb3\nqTgKPnz83JP86ALEKqrER9AeVNXcGSPcHBGcFcVSizuVk4A7n+Krynb80mBnkAd6Qx8R2ZCg\nk/pW/DlVjMfynbnd71jw7I8M3PcrWlHLGgyXIVueKTGdDYtJdXcEKrmVyCTX1d4et1h0Ozi2\n4aOJVOPXAr5i+GFnJqXiCPAZnjOdxHQe9fUelgw2wZ+cjA96zKNSzyG27scVaXPTtVC0+aQM\ncgVeXKt1qWO5YjVGm8wIu8jBbHNWM7WAPNVYX2rg/nVoj92GHfvSGSRn5tp/AU5lbgDg96gV\niWx1PepcvkFTgUDJy2eO9LDIEbBHXvTIZF3FWOWp0YHmHcMigZNGTHnAyO9TRbZBn7tQZKtj\nOPanxSeoxSYyRgYm4PFTL+8i9xUPmcE7NxxQsrbgMY9aSAtowm9mxjmlVzuAHIFV3kbjbinK\nzYKrxnrQBYb24NRlmXkjJzTkjbofzo3buKAJyN2BnFL8v4iotx3buo6UpXac5oGSbsrnNK0n\n7vJ61UVtxIzipxlo89RQA+Nht5PNAPDZHNR4HGOT6U8OWBIGOxoETIw4p7Mu0DqO9QLIB7U7\nduX5uKAHZDPvUkCpAVYZPWo+GOR0xT1XcuAcUFdAjyFOKf0A5/CmBQjHniljYOfftUsQ9iVF\nSQksvJxUW1gPU05Sy9RQAtwwVx/dpWBbBU4o3ruG4ZoZScleKNRjjL5aZbk0zaW5HIpnllsE\nnK+lTK457DFMY9NqsMHFB+91zmoWzuGDUgbpzSYCspWT/ZpxkK8YpjSFeSM80bst8xIoAlDl\ngPWjqvIpqyKrAZ5pxI28c0wFUbmIBwAKdwq7j0qN3CgDBqTfuHI4xTHYFGM07cFkVcduaai/\nNntSsQfY9qVhD8lXPpSJMu7kc1Gqurct8tPxzkCpAkHPOaVgexwKTfk8jml9A3BoAY6jvUiH\n5AMZpHBfAUZNKflYYOaAHcEZPBp642461GwBwMkZpVjC8bjmgBVXb70sisI8jgk00Mc4o8zz\nMKelACrlV57VIvzc9aa2BwPypUyzALxTGP56inebngjNGQvyk5pu7ngUwsP5Zc5xRuzjJoX9\n4OODTFO2baVoTEPyD04pWYbMAcigfMT2xTQwHXmmOxIjBlyAcU7Hy5PWk/h4IFKOaAsK27aM\n8Cl3blx97HNG4ScGj5VB45pDQm08EjrS5bf/AEpVYbRk7RUghz827tmgRGqngEHrUjLjgHAp\nGkORnIpNhdSaYyJt+3g5NSqMoC3UUqqAAetO27snFAxFHydOKft2jLGmxs2SCNo96Vm8xs9A\nOKAAAbAQPmzShRI3z9KRVKrwc0qb1GPegQ5Y+ueF9KaVbyyffpUh+ZutNkcliOnpUjIu4wMU\nrRgyginrGWAxzUojEm3HGKYAmd4A6+9SqmATjFJ5bB92eafk7h6d6YMZHGDkgZ9aNw2/NxT4\n3G58H2qPbljnOKkB7OwwR0qNmfnFS9VAxxT2QBff0pARxx/KXIB9qarbm6YHpUo/dn+lKqiR\nycYFAiLd+NOZg7dO1OWM7iVpvlecvB2nvTAj2fKT/CTU8Ue1uSAMcUIo24B796e0fygmkIar\nblIYZ96dvC5IHIpzRjgd6JFCtwO1MZEvzck4qVSFIyMrRGoXAzmnsp2k4+lIYrZZuDhe1LIu\n4gnr0ppG7HPFSlM98LQBHtwp7+lTIpDZI69KoahBPdWkscEvkyf36spv2IjNuZR971oGSq37\nxg/bpTVZi3Tk0JlFwRksalEfzbsgAUCE2g5Gc+1OVucdqi/i96l8wKwLc/SgQEps+YZo8vcN\ny8U6OHdmQDilPzjjg5pDCNQqMS2R6U7IUD0prSIq9M9qWMBl60AP2t5YI6UMcL3zTFJxjORU\nik9Tz7UwBWLLuxhqdjemR09KRJAuWP5UnnLGQvc0AOjUR++adwflxxmmv2bOAO1Ct5nTIFT0\nGObA6daT2H508427wOaRcMoHegCrdvJHIknVV6ircMyzLvxgmjB6MAfanbAo4XFISDeOTux9\nacqhl4PPfFIUxjIGetL5m5toGPpQACTpxkelHG0kD5s9KUYXhRzTVyrHd1NAC7hs9fehfn+U\n81GkbMpUcc5qaMe2DTDYCu1cAiiJiuCTikK7eepNHTGeRSAlLfLkjmkXdj2p3TG6mxgfMT+F\nA0JnnaSaVc5244pGb/69C5YnHA96QxcFnHPFSZUHAGajH1qRI9vXk1QDNvXrUjZ2hiOPSm/d\nXJNPbPOelIPIawJwVqVxuwMVGjFm4HAp6kjOaBagzfvB6VF5AGXLHr0qVV8xumMU7ywaYyOF\ngowetTAjjA/OmsoXtzTuRgipAcOGzxTt248jgdKEw3Qc0M27KryaYBubb60cEfzpnmBWA6Cl\n3hT0+WgB/IYMOacreZKeMACoi5VTxToSFy3c9jQA7cVX+VCkd+KI17t0pu3zGwelA+g7HvRS\n+XRSENlzsBUZpgm5xjJqUfKoA655qLYHYlR05zWZQsjDy+OlJHGQ249KFUKoPUNSiQxthjkU\nDFkkOSvao5GZFHanEFk3Dk5ppbdw/XtQwBZv4V5bvQx7hcmkUhfl6H+9Q0bLgg5BPakgJtpY\nZIqNo2Zs9al3lY/xxTBIS2FHfmrAc8SqFPU96iZzGT9eKlkYNnAOKjYZxSAGLbdynB9KVU+X\nDcnrS7QrYzn3pJWPl+hp2AaykqRkYpqqsa7DknrmnqBjrmmgEkgnFIBrfNk9famRx/LyNtTf\ndwBxTDuYNlhjNAEYUqzck+9OjG2MkvUnATHQVDIUXAznmkA9QFIbrTXkWRiF60snUBR2qJt0\na5Uc5pCJG+ZQSuCO9Ej9McjvSrGWx7imMgXIByaYIf0xjpTG2+ZnOc0vMmAv40kkIRRt5pDH\nMypGVHeq8gdQCTxmraoDsPeql9IhuVjzgUwFVmjbGM1PsB74OKZDsVTzzT2+7knBxTAbIzBA\nFbcRTFUFhu780qyH+EZ9TQnQkjmkAbd24ZwKb5Qxlj8vpT9w6nvTGJUdc96YGJrEYGsWEVuv\nllwWdh6CtiFlXcG7+tNzllYqCRwCRzQu2RTxz0osBMzfu/ahYw0YIbj0pjK20elCyBVwfwpE\nsco+83ShOzUisPLy3FKvLAj7uKBC7TyQcfhSGIRoB1Oc1PuJj2ggU1fm6nkdaBkLNyD2p6tt\nwc/hScd/Xik+XcWNIeobTu+bil3gqT3pFbGCwy1RthicetMLDTIzjG3AHpTZP3mMjIz0p/mb\nmwD9ajmI3DbnigRIzNJxjGKTeEXGcU3ce/Sm5BU5OaAHA7oyRnFIqiNTk5HvTlyIcdc0hjDL\ng80DsRL93C9zSlGBx29KVm+YADGKc2AwO7mgQcMpG3BFMVNoPrTskvgdKSTOcUAV5mYREDqK\nhdw0J7vVppAy/KM+tV2Vc4x81AIiDAdetRXQM0ZycDr9KkkZV3ZOWqK45tsk4Uin1JPBvjxp\nRK2t8Fwrr5ZK9zmvnHXGYyMgHavs74iaW2s+Fbm2jQO6/Oo9MCvj3X7cxzOpO0bsVomSzkZo\nlRgY8kd6xL7YsjnaOTj2ro7/AGhWVW+Yd6w9QhXaOecZqzMwmiKRgx8buTUF3IJEUKNoH86n\nuIZFUbG5/u1Qk37ASdjVcSGNvE3xqrHY/pWdKskWUxl8/pVxpJ5C68fKeCah8uVsyK2T0K96\nskrqjdUIIxzVUyMxILnryPSr3EKoAMbuTVWQRtkngkdaaVhFVsKzH7yMetV2HzMVBGOvrVja\nVbBASMDJBNNKlVWSN1JPT3FMSKiq6SBijMMdaRl3Mw2496tozQ/MxBJ7VBNliZCMe1Ayr5ch\nmIwACuKViV2/JhVH3ql3B9r9CKiZ2bduHB4NUJlaXLL8nOTkGkkhNxg7uQOtPMnlsNvKdAKj\n2nDlcjnGKYdCKRA2FAyB3oZW28AZXkCk2tG3XineYGYBeAOc/wBKYhjqQmVfDN/DT42eNUGe\n/PvSN++9iDximq23g5Y54phYfNudzHyoUZ+tQNHuhwvVeo9RUvml2LHk9OaYy4+bJXHtQBAS\nzFScYUcD0pW85uPNXymHbqKURB42IY8/nTWUYx9044oAauCpRBwON1G8snC7cDH/ANegZWMh\nTyP4jTvvKctubGSwoAi5XkA4PZaCpGGxg+lKGXaroSpxjmlVgyurNlgM5oQDJMcNnBHekUs3\n8JQdiaPLX/fTvntTYpCcqRlV7UASqFkBjH3up96t6PrN1ot6kkL7CGBD5rPbjJxtbsR6U24z\nhdrZbr9KiUVLRi0ejPqLwL42tvF1qYmkUXCAZHTdV/xV4Xg17T5IHAEhGAw9P8a+YvD/AIhu\nfDd99stm3MB8w7V9E+B/iBZ+JrDEsixXCAb9x6t7V4lai6b5onn1Kbg+aJ4Z428B3Phq4aSL\nM8GcFwMfnWZ4X8ZXnhm8DI7FA3IbkfSvqLXvDNtrVq8M6ACRcH/GvnXxx8M9U8N+fdRxebYp\n3XkgVVOoqnuyLhNSVmewaB8TNL8RWbQjdHOVwePlz9a8i8beJb/w/wCKCsbnYp3rzx9a5rwn\nfLDewhGZFaQbkB4r2vxJ8K7XxXp9tOZ/IkwGWTbngjoafLCkzN8kHqY3hv47pDbqmoBiynDk\n+vc11Nr8YNHuplRS2G53DgV45r/wp1rS7idYovtir8w2jnbXGyxXOnyMssM0Q6bWBGKfsac9\nUx8kJan1OvxM0VN22dGl/uKa57xB8ZNOs43WGTzH/ugY/WvnZjcCQOrSIuOSoNSbZrhjkFjt\nzuxVxw8Ex+xitTuNc+KF5qpeSNPLBGApbNcTqF5NdMHuXycevSn2ul3zoiJC8rkZ+7jj1rpN\nL+G+s6myvJAY4yPlbHJrrh7Omrpm0XGCucnHIZMRhWZgM/KO3rW1ofhG+1q42JCwhB5zwSfp\nXrvhb4Px2ccbTR78j5nI/SvSdP8AD9jp8McUUSoR1bHNctXGRjpDcxlXS2PP/Bvwzi0+FC8e\nM8kt1Br0KxsY7GLbGgVW6YqzIojOxDgetDNvVlzyDXiznKT1ZwyqOT1EkJ2ndySaNpwzZ4A6\nU4YB5PaoNS1a30qH94yl2425/WoScthRi3sSBW+lBX5tzgMBxisPTvHmj38wtxcxxyOQF3nv\n6V0Mu3AKjIz1pOLjuhOLW5Eqncedgpo3byV4XNP45+YgZpD83HQHofepDYdG4k3Ec9qhkDLG\negp0SmNiN2B3pzZkV1B/SjYZ5p8WoxJpLMwJKqWyPpXgcZIYMDkYySf5V7h8WNTaHTniUA7h\ngnNeGqpIx9znjj9K+pwv8M9ahsPY9tpjyc4zTfMaRmXDFOmMUjRsr+oHP/1qcsmZDzs3D7or\nvOkBthOASCfal+zs6MrD5uuc0jbo/vnJyOlKdsm7LFSeM0IRWbdJhM7WH8VJbAx3kIQ7Qsg+\nbueRUrfIxjbjI5qvHGPtcKBtpL/f9OKljW5+rX7O8iyfD/TlB+TyFZfxr06UDdzXkn7L1z9u\n8AWMr7R+6CgD0AFeuzLh/UVw9TpIFj5zjirMZK4x1qNcrj3qdRlflGDmi4WPnj9p74oah8Nd\nLa5tnZHbHK9QSccHtXzva/tneIreOIvOzqGy3yjnrxn8K9o/bct1m8HsjrvPGGI9DX5/+W7M\nWbgDjito0+YzPtbwz+3Iyssc7Rw8D93IMk8dc47V3uhftnaRqDeXfwxQyYzt84IG6c5I471+\ndMkPzLkdOc1J5k3LCZiRk/maXsV3C5+rHgv9oXwt4qultIryKOQnb87ggH0yPbFenrJBMT5U\nscozxsbIr8btF8YaloVws1vcmI4wxBJJ/CvZfhz+1VrGgarbyvczxDdsYyvuUjPdew/xrN07\naofMj9KZFKnjp6Dmo1+VsiuG+EvxZ074l6P5iTRpeQgGYBxtOc4Kn3rupO3b8KgZIsxbPYUM\n3mBvlDbv4TVfdtbk5FKJCucdDTvYTPAv2lvgRp/xE8LXc8EXlX0fzoUG0FgMDccZxk1+cOqa\nPdeFNYu9LvsxzW0hjIfPODjIPoa/Za4jE0bIRkEd+lfAX7a/wzsdD1Q6/aRrHJdSYkRF4JHf\n9K7oS5kYuOp8u/aDbtFPGdrxSKyuBkrzX6O/sh/FGLxZ4aaEqBeKwVwoAU8dRjivzeaZJocg\nYGeMfxDjP619F/sY+MW0Dx5/ZzbjFcqGAz90jnp9Aamorotbn6RSN8xJOTUO88im2sy3FrFN\nn765G7rRyze1ctzUfnnOawPG+gjxB4dvrbYsnmIRtccZ/oetbmT0xTpo91s+MhscAd6qMrMl\n6n5D/F7wP/whPja/tot62juHVX+8ueufx71yx5jCpHv2nH196+k/2zvh7f6Jq41K3HnWHR2I\nwck5C+/Q18z2s/n26MNyrjn2Nd176mSJV/eZG3OB+tMbCLtK5GPyp0bFSDn5j2pZIwznOKQy\nCRPlT5dyd/WoriPzpAFdgOm3NTySfKyxnOOfp7VN4eh/tDxFY27/ACxSSKGbGcf5xQI+uv2L\n/AEUlv8AbpIU88t/rmXLAdx7V9wQ28dtbpGoBC9MDjp/n868e/Z78GW/h3w9byxRLEGXdw2S\n2ec/T2NewMx3E9q5Zu7KSQBo8AFfmHc96mXYMEov4DFU2+aSp4wxxngd6yuUVNWWGCxlkZVE\najJ3Dge5r88P2s/GUOparHp9sVmgPPnLxnDHoK+5/iz4ki0jwrfKpXzShVecYfGQfpX5X/Ej\nWrjXvE1zPdMBIrYAxgHHeummZSZzV1KVQE/cPHoPrX3f+xv4Xsb7w3bzyoDdAbvnAPyZ9K+D\npFMkbbRnuR6Y71+hX7GUKx+D7GVY9pdSzLnnHTP0zTnKyGkj6Jfwjpy4DQo23plRwf8AOKaf\nB+nbf9UvOTzWxNlcg9e1RFmZa5+eRbgjNXwjpixhfs8bH1ZRSXHhWxt7WYxxKu5SCR9K1424\nApZpB5EhJ7Yo5pE8p+bn7XXhq08P+JbWWBt7yMxk4xhc4H55/Svn+eTpg5z0HvX05+2TdTTa\nlDazMpjjunK8fNt9M+lfMKqV3YJZc4Va7lsTYrpGrahbLnBLFefU9/pX6V/sy+Ekj8B2Uksk\nUkmzBZB+HWvzWhRpNW09AcN5oJ6469PxOK/U79nZRH4Pt1CshWNVKtnsB/XNZTlyoaR6K2gW\naDCoOD+VSLodnniIA9T71dYjcciotxXJzxXJzsrlIP7Is1YOVXK9OB1pi6TZMxPlLgnpjirS\nsGWl2nfgGjmYcpXGl2qLhIkHPOAKetnDtIKDr6VYWMhvmOB64zTghYgY79cGnzMpIqGzgXkR\nL+Ioa0hkP+rXp6VdaI8/lVdk8sZ596LsCr/Y9ovWJPT7opF0m0VjmNefbpVtTlhikk70XYjO\nuvD9pccMitzwQMfyrgvGXwJ8N+L7Oe31DTYrlZG3ltvzA46g16T5m3ApVmNNSmnoS43Pz2+L\nH7Hl14buGufDrSzxqWJimXbtXGRzj/OK+bm+02NzJb3MTQyKeFYYzX7I39mmoQvFIituBA3C\nvkD9pj9mFPEER1nQoRBeQqxZI1xkAEmuuMlLczacT4zVhLGR1YCnNCNquW2g44pJYZ9OuZ7e\neJkliba2RzxxTHuMxg7ciq1Dcq3S7gfnwwOQuOtfop+yXbeX4TtSmFXy1bHucZr88JfmkUbg\nPmFfo7+ylH/xSdqB8uFGQfeom7IaR73JJtXFQiZmbinzYyx/z1qsu4H61xI2LMbFmPNZHimf\nydEuJ84Vf8a1FQ5471zvxCuDbeHp9pwdpP6VpHchn5pfHBpbj4iXnmtukZyQw6AEmuEZDGAp\n5C11XxQuRc+LLiRG3sJDk5z1zXK7sFVPVj6V1kAAOv38DoaJD8o3N8hHy4/lUkiNvCErt69e\nah8uNmYnJx0zVDG/6mMooyD396glmEEJJOCo7f055qXzlbIP3R3z+tO0bS31zWrSyBDiVgN2\ncAZ4oH6nuH7Kfwxt/EmuR6lcwi8lDkJuXKIwz8rcdemPxr9DtFsxZ2aKE2nbyMY57/rXlf7P\n/gG20Hw3ZSRMQqwhcMOuec8/Xr15r2PAjxj6YrjqScnYqOm4itheab91WOMmn7ctjt1rO8Ta\nomjaTdXDSRw7YiVaQ4APbn60tkUfNf7UnxUuNBsY7az3pIjlvMyMdDgD3q9+y38TD44sSsj4\nnCbZFGOGzjJ9zjP518v/AB08cSeIvEU0DyLIoY5IPBbnpWv+yv4tn0nxYbSAmKGY7mUd2BwB\n+Oa25PduI/RdlK8etMHyg460trIZrWKQ91+bHY9xTJPvZAwK5hk1uh8wHv15rxr9pL4c2fi7\nwvcu8ebvadqtgAnHHNezRkjAHI71S8RaXDrWkXVvLGZA8bBgPTGMVcdHcD8i7qwk0u8ntH5M\nbbCR0JHFRwlvLcBenBHevQv2gPCqeA/G11HErLZXDB4Y+Tt4Ax+BBrz/AAWKgcZGfTJrsvcx\nY7K4UHK460/aF5HIPTimrGqMXdTu6cdKDtVuWIA6UdRgSSQp4B7ntTdmWJY5TGMUvBbDZYY5\n70nzdBtYdcUCDaxGAR/u0bl+UEfMO1IDujOV2CgECNQenrTKQqjczc7DSbQwJaTc3QelKyFx\njn6UgVI12xks2fSkIcy/MhDZHTHvT49+QwxnOTimLhZPmUjbzSK393nJ60xEkwZ3LO4ZTTGA\nU7s8YxtFOWQR/eXfzytNXneWGOeKAEZw0iqFIX2p7J+7Zt2DnAWljzMNqjaRzTdwbcc5I9qV\nwA7goxkYHOPakib5Tn5c8gHvQysvzg8U6MBlO75vT2pCsOkYttQ8E8nFJJNsxySnANNC/IfM\nbaF6Gh2Cxq7D5T+tUBJwEZmbB7Un/LM7xz1Ap2zCggjJ55pCzKu3OQaBiEuwVgdpPB+lCuOc\nZCjj/wCvSbfmGTjjg+hpWwy/KMseC3rQIRm2jLDc3Yr/ADpN2/8AiZSB/F3p6t9nYNtztGBS\nPIpj3P8AM2aBjo2HTByBmolmO08nZn71SNKDjd8q9MetN8stkEgg9B/SkIXzCrNuUDsKQL8p\nLnJNOmnCklkzjA+lO5WTGMrQMYqogCk/LTzCGXHVj0pGwrDI3HOcU2Zj8vGGzkY7UagS48tR\nkZxwWFI2/wDgX5c9aRWALD+Aj9aUEttAb5AOaYwX5lYqBnoeagCk7uWBA6VLt+z4A5Gc7qWS\nMfM7DaSPWkIFTlOBuPUinnLbg4yg4J9KakuwDjtxTV3/ADZPXtQMcqhMBCCMdKbI5VV3cA96\nJF2yLuXDEetP/hAb7vbPNACs6MEUL360qgBjg9OlKY1UDPLH+IUNlYCFyWzigB8WDle3VjSb\njKm0dF6UgVtoQnD9c0+FhCVyAxzzTAVVaRcbSFXuaQRhuV6dD/gam+0fM/y7j/AF6Uq/KmZF\nwOp20tQGFkVlUjO0dulIT0ZQME4NNjYKGCnG7kZ60/yxJGGHDdCtACyNu28FSOBipImClYy3\nzMeSKj8wNmPaaerCYKFIXaeOOSaQErKN6gcEdcGnsxVchcMeCvrUSg+ZtPEmKXzG3HPzY60a\niEHnbSZBtBOPwqZsbhkGMLySKYN0iYUkn0pyqWj3Icc4K0ATGaILyOTyF71Lbp83zrgkd6gj\nVGXYq/PnkmrG/ceuMcbe9MB6qUYgHIJ/yKesgYbPJ+bOMj0qLcu0E8c5p6yL9oUg/WkJlxPv\nggfKo5z2qxGjZVgu5TyHNQxyRtGTggk/pVmNtuADlT0XtQMsom5euTnvWkscbBSoBXpj6Vmx\nOfL3AZboBUltM6yIApJz0qAPbfgvprG1ur5OoJQN9Oo/lXu2i3gntVVxh19a82+GOivoPhm2\nWUjE580ge/rXo1jCY8vj73IpFLU2tw8sKBz1qRnbvzxUNv8AvI9y8leoqSMktzwKiRVi0qfK\nvPysKuRsPL2dMc1Sh64Y/SrKzCMcjIqRg3XLDafapRhlBBxUSsJlKjl6eqgLgNn1oGPwgfOM\nGpirL901FHIMcipFk3c0BqODNkHqasdVHc1W8wbsCnK3QA0mMstII48Dk5zTRJ09SaapG7Jw\naVV3fOvbtSGT8lh2pVztOKijYO205BqVlO4YbGOtDAlhY8KWomyrcc1DG4Oc809ZCeKaAnU/\nLnOB6UPcA4AGfeo8jbgde9JGwZSFFICQODk9BUsUw24FRx4VApwT3NScN7UAO/i+UfNSqxXJ\nPFQrv3ZzT2yy9QTmgZI0i/wnJ70keJmIPFRq3zcipeOoOaAHK3lZ9BUu4Mu7bj6VCOoDHrTZ\nEfpu2igCYTbRt27x61NkFQVGKrQ4VcZzUqyblxnFIRI8nyhicGnKSP4smo2C4IY5FCyKqgBc\n0FCvGytuJ49qVX3RjLflS+ZuOAeKVU8wELgNQMaGVsgGlZ9oAxkmljVVVs9fWmxSAfMefrSA\nkbMMi5XIxR5nfb1p5cOoOeT2pqtyeOlIA2nj86XzC2QV49aFmzn5cmhydgHTvQSOVcjng0/Y\nVUEdaRWDLnGSKcgKgk9+gqyg372A705ZGcYYAY9KbHmNjnrTly2aAE3dgad86nLDK0Lk5xgi\ngMxUjoc80h2FaTd0WpV7cYFR/d5HNIshXg5qQsTb9ufWlz5ilu4qNWDEnFO+79DQAqnv0ob7\nx5o2g4IOaWPGSWFAC9lzkD1oZWbODmnbhuO7kUZ2rx3oEIrbU96dF8wbjpSL60oZvMGOV70w\nBc8kipFGEFKOuaVSGBB4pDQrKFXI5NMjYsx7VIF2tznFKwAY9qoYzcFxzzT2fdzik4P8PNNT\ncqnNIQu08nP1pVXjI5pUZGHzDmntJujCqMUDGkZXPanLjbmkxwP1FO49KLjF2FsHOBSs4XOR\nmh96xgqdppu0nB6nuaYAyiQgH8qkhVlbYSdppIskknr71K2d6844yaAE2jf1zSLuVz/dpd3J\nz3pT0wOc0gQse1cjOc0u3BPzYFIiBVz3BoZwzY6A96EIfGrMp3HKiow2OCOKWRvJxg59ak2i\nRQQaOoxx6Bh070Fge2KFUopUjrRgl1AGQBQAgUjDg9acNzdSAfekyMHtipVUTcN8pxQARqcb\ngeKcE53/AKUsce1dtJJnPHC0AKzEHIHFLuG7PanQqJF61Gy7WwaAHn7pGMmk3Bo8EUJv3Dby\nDTlweOooASOMsuVPFOfs34VInc428075X6jNICHaNwz1NKuVdsnj2qTCjnGPekfay4HBzQA2\nP7uTRtdVJ7elOx5eD1pfmbpQBG0iqoBpr3UapndxUVwGWQ88VXRRuKnmgC3FciVhzwKsfe5r\nJa2ZWyp+XuKEupEbAyaANeM7gWxipM7lHpVa3O+Mfyqcu3l4x0oAcqBGJ7H1pe20c801f32A\nxqx5YbIXg+tAFUTJk7Mueh9qkjXdk4wadDEIchRkdTSrMGzxigYxt5VWPXpT4t0ilCP+BU0D\ncu7PGakDbfk3YoEMWMlgc5Ap+ASfT0p0agKQDzQMcjvSAaiEKDnjNSDAHJxzTWPybRk45qTh\nlGeCeaYBkeWVC8Zpdq8DpTY2bkZ+lKe5PWgCQqFjx0FLtxjHJqBpTID2+lWW5QbeTigCLeDj\n2NOddzFioz2NMkjPy9MHrT93l43ZK0ATLtPB5OKj3DyT2GaWRTuDZwKQrzntSYx6N+55GKP4\ncg80M+ccYFDMOAq1IC7txyBmjBY8npSsDvyBgUwBuTS6iJlADdc0fdNMEqoBu4p/PGTkUwDn\nbheD60inccnpS8NxnikaPkjPFBQNuxuUiljZtoJHWmqPfPtUi/MoweBQAsZ9etHls3PakOOS\nadEx29aAYMwcjPWlb5ccg0u4LyV4prFW56UDEGWY9hT13cccUnI6U5TjPOKCdQK7eccU5d2M\n+tMWQMpXNPZtq9yMUDBcbiCBSnjAIyM0xckciiSYswAHFMCXoQOlK3A3Dn2o2bhnOaAdmKQC\nqx3g5wPSndW9KjZ+nABpyt3IoAevzNg9aXgZI5qNcZLZwaesg/hFKxY5WO3IGDSKCvfnrS/e\n6nJ9KaylaCRy/NyelN+XOM0/crLnOMdqiZUHzMwX3PA/OmIlKkLSc4xjnvTIrhLhdsb7mBxx\nU/3s8dqZQitnr07Uc/Sn/wAPH5Unys3SkMXZ7mil3UUwI15kGR1okVtxwcLTTKOuefSk3DqT\nisQEU7eSc47U6TDfMPypsnztwOO5pZM+WuF6d6ACRkWPaOtJ5fAJXHHWkztyMcEVJktGM+lO\nwIixT2baABUYVi2P4RS7Qzcc0rWEP8syHg4FOjVo2bjPvRGwUdKHyGJBwMdKdwFbO3rUTOVU\n7eWpUUtgk4FL91W45pDGxfdyeKVWDHJo5Zc44pJCMDigY15P4VBoVsrnPNO3BY8gZNR8cYOP\nrRYQrkcYHPem+WeWzxQu7ceaevC5PQUAMbLH5TRHGu3a3Jz1pSgOMHjNKW2hscHNACt8uVHJ\nxUK7lXbjmlaZi3A47mkkbcy4PNOwEnmKuATlsUxdzqSeKAvJbo1M3NEpbIJpCBmZW46d6kVj\n8v8AdpqssoDA59qseWGj3A4FAEHmNu2qeSaz762CusuSzGrx/ecgY560rKGI3HcRQMqW0ZVS\nXPPpVzcGUAjJqFlCucc06NSFAJx3p3ES8bcY2mmf6uM5cE56Y6UkjeavoPWmsx2kAjAFIYis\nrIc8800/M2PWmRuCfansMY7ZpgDRmTAQ0q5Rhgcd6lOFHy4qJmO7A6UCHySEqR1qNYs55p7H\nnd0zSrHj5s5NADY2DDaetPkk2xqmec8035uy496Yx3Nk80gHrubkcCgIV3EHk1H5jKuB0p6M\nFUnPWgBCw4XqaXzFUBT1zSBgVJxyKj27l3DrQMlc/MRu461DHuDHJ4p7SBlBK7fU1EriU/K3\ny05CBY9rnHJqRE+b3pMjkLyTT1zBjcMmpJGtGdpB4qKNQOCOKcxaRjTRuGVxQAskgVhtOPWh\nW3bjTG2K21j83apFxtI556mmA37vcGmM21Tjml8v35peqkYwaBi+YpjwTziqzSFuM8etNm+T\n/eqNd2Sx5HegBysG4VvmFLs7k/N3pkahiWXg0+NdrFyc+1A0VGwsjA/epJvmh2npUjR+a+8j\nA96imk8sHuPX0oBmdqFml5a3EJG1XjKk/hXxr8QtONhrV7EI28tWIUsMfjX2hIxkTAHy+tfO\nXx+8PtaX6X2cwSj5Qo9/WldmZ86XcLQuctnNZWsx+Wq89+orc1K3UzEodymsm+jeSPYOFFbR\nZLSMK6ZT0HOKx9zeXgDGDkk1rXjeXAUPD55NZU6/KCXwQeK2MmRMombLZQMc1HLi3ZlRizH+\nIVLIpkYkNk4qrFIq5BbI6H2qjO5DNOTtIxleMGqw28qRh/WpWUxqWOD6E+lV2XzAXDcirEMk\n2qyqV3GotuyQZAUZqe4Y+WrkdwPenTKskL+ZjfjjFAyjJBnewOWB4qM/vgAMZpx+QKScHHNQ\nNhGwjfK/Vu1MXUSRWKk9PQCq5ZWYYLE96t7jLhWGVHFQttWTai7VJpjIdpiJwPMbqBSszGMB\nB8zfzqbaASR+BoG3YVAHuaYig25vvHJzgqKc0QaXLAKmMAep9ac0Ygxn160mxmt3lPGDVIRE\nWKqwAwemaj3mOPaV/eZxmpXdNgZuMimeaGkOMsT7UAIvywjHBzSzzyDJGCpWgBWO3JHPOac+\nM7QM4oHYjZldY8cZHahlWNwSd+0dqR5FRuVJBHygetNYqsPQ7wcmlcQyRlXIcYQjmmiMNCHU\n/KeKJG8uTBG4sMjjpSbtuMfMo5ouIc2do4HBwahz5cp3DIp0zb2bHIPP0ouJG8pFC++7FFxj\nFwSy/wAJOSc0jxhlG35efzpGUO2PunGaX5ZV+f5W6CmA7dtypP0ps3zHA7d6ij3KWyd+KkXd\nu2sCcjPy02MSOTy1JQDHcVa0rVbrSJ454pSm1t429OvQ1VaLKjBwc9KRiVYhQAKzcVJWYrJ7\nn0P4A+MFnr1uLW/IS4U4G48mu+uIY9QsyioksTrgbuQfrXxsk0lncBoyVP3vlr1XwX8XrvRb\nSKG5cTQ91kGSBXj1sK46wOGpRtrE6fVvg3YC+N3CPs3O8qg4zXbaLNBa2UcDzglRgEn+dXdL\n16y1rTxNHIjFgPlBH614Z8StcvtG8TP9mmMa/eBGdv0rnjGU/dZz8kqmjPezbxSW5YbZOedp\nrD1TwXpepEPc20brnOD1rx3w/wDHC9sWRJo/NHO4k8V6Novxi0XWlEbxm3mxk85B/GiVGrDU\nHTnEv/8ACudG8r91apGM8r1zT4fh3o6S+YlsiseC2B0rYsdc07UI2eO4Tb14arX220ZgouFD\nEcehrHmqLYnmkZlv4N0yzQ+TCoGea0o7SFAihANp/wD1U43dupz5i7PY1Fdala26qTOoDHhj\n6+lZ803oyfeZbcD7xbBpm3LfdyD3rJm8XaXasyS3Cbl44Pf0rn9Z+KVhpp8uKTzWHXHOKSpT\nl0JUJdjs5VSPG7I+gzWbea7aafuDuu4diw/M15F4i+M00ku21YsM4+XrXBa14t1HUpHCuUjc\nZZSefwNddPBTla5tCg29T13xB8VraCJo7ZlnbPT2ry3XviBqGqNII1dF3cPuyR7VybXJkbeu\n7OeM06SYqvyrz1xXr08LCmd8KKhqXLbUpLOWO5QkyBs/n1r1zwL8Y41ha2vW+XGF35+TtivF\ntxkUndkj+D1qJi8crSKjE56DgCpqUITVhSpqSPs23kh1C1jlgkSQMoPyHNR4JyD27V89+Afi\ndceH5oreRt1sx5XPQ173pWoR6tbpLE6lGGeua8GtRlSZ5tSm4MsoSWGePepv9WDvOMjpTC20\nlSOO1Iyl8K3PcmuSxn5nlnxehMmnlEUMcbsY7Z9a8ItWJYYbIyePQ19D/E6NZNFJX5TllBPW\nvnh2Me8OfmztytfUYV3gerQtyk0a8sxbODz6mk2hbdjjBLdO9OUx4VQTnvTGwkhIyewr0UdA\nITuAJJ4x0oVG8zbjbilwV538Got7eZuU7h05pAMmUYYDpnNQoBJJFjj5x/PrVuaNWyoPaqLN\n5csSN03daXUa3P1L/ZUkWb4e2xjIKLGADjrXskinrxXhX7H0yv8AD+3UDJWNQT6jtXvE3Lcd\nq4Zbm9ytnbgmrcLZA5HNVRjdhqnhYBhkZGakaPmf9tLLeCZkJBH3gw6givzykzGvzEjfzgiv\n0c/bMUf8IDM2wKmGBOP4scfhX5wrMZiuW3jHWu+OxkPT5/lY544pfuuAUyG4DA0N8zB1IAHW\nmOu6JSvyjdxTJIJIQmUKmQjmmXCuy/Inz4xg/wBastwzDfiT1xmo23Ng5Ifu3tTQj0f4F/Fq\n/wDh34r08yPutVYI4bOwZ6k/57V+ofg3xBbeJvD9rewbcSpnAr8b5xtCvFJtcNuVfUjvX6X/\nALJHib/hIfBEJY4nPzkL90nAyP51zVI9UaRZ71IAoGaZ14PSpJclcEc5pnO6uY0JVjyvWvGf\n2kPhjb+NvBWoL5X75V3K+O+c17OjEcDpWT4vhMvh+8jJyJEK8jOM8ZrSnKzJlsfjrq2lPoep\nSWsh3mGUoTjHGa3PhzrVxoHjTS7u0J80TBSd20bTxye3+T2qx8Ybcab8RNVtowJUExxMqkBg\nDjOP1/GuWtdQOm6hE6ozjou3/Gu2S0JVj9e/hvrCa54UtJvM8wqoHIwR7fTjg/yro2+XnPFf\nPX7K3xVPifw+lhcIplRU2t93AxjGPrX0VIg6Nzj+dcWz1LI1bpk81OrDb8x4FVWHIHel5VvU\ne9T1JPn/APa50c6p4JnYQq7Ju2sy5KnHHH51+cLNFHJJEP3apwVxjJ6f0r9hfGOi2uueH723\nuYw6vGVORmvyl+MHhH/hD/HeoQxqPK3tgDovJ4Fd0Hcl6HJp8rDjd33elIcbtwzhhnmmwqzK\nhBJJwTnvQzGSInaAFPAqtSNSP5Y0BK4OcnmvXf2cfBsXinxkBMpkVCHjXGAfXv2zXkG8XBij\ndcBm5avu79kf4aW9rpo1AhWlcA+aDzjHA9utKTtF3Gtz6h8O6TFo2mQwIvlqi7QM54/OtL/W\nE4PHWnqgWNIjztGKFTBxXFJ3NBqx7m6VYWMiNyScAU2NfnOOmKy/FV4bTQ7k5Ak2EqODk9hi\nhbknyp+2H44uNEsfJST5LgFFw3O7vx9K+GZt19M1xMcMe1eq/tF+OLnxN46uIp5NxjUIrY6D\nuK8sWNtp3nAxgf0rsSsRa5A0ht0Y4z6tX6E/sbztN4N02Q4zHGy4zngsTz+dfnzKSqMnUsM/\nhivv/wDYrkV/BdpgKqbDn/vo1FTYpW3PqVtvBzniomYjp0p8oH8PBHFQ7ux61xo0sOXJbjgU\nr8xlTx/tUi+o6UsjD7O3rViZ+fH7ZpX+3lBbEiu3y/1r5tYbUwo2gDOfWvon9slgniYSYyvz\nA7q+dfuxoc4PQiu9bEG38K1W4+I2mNJEjhXUorgEA+uD9a/VH4b6PBpvhuCOEYkf5pHA+8fW\nvy5+D8ZuPidpSMv3nBbtwM96/VXwGqp4dtsg7gmCfQ+34YrlrMaNiVzznvUYPy9etPkUs+e2\nKi4HJ6Vgak0YyPQdaq65rEeh2DzuQCozuPQCrMLFVINcj8VovM8J3aMSAyE/KSMYGf6UJXYj\nzTVv2pdP0bVntJjG7L2JBYD1x+H6VzM37ZcK+YY47ZmUnbIWAHtXxD42kll8UaijTvIiynDB\niOpz/jWNtO0KGcgetdPsULmP0H8L/ti2c2oBNTa3uEkHPk/IYz6+4r0Nv2kPC26Iy3MYWQBl\n2nJr8u7cyQSgxOyMOQ2anXXr4yc3LEodwycgfhSdG2xPMfrvo/i7RvEEKTWOo20qyD5QrjPH\nUEetaUg+TAPFflJ4Y+M2veHJo9l6U2uGE2Mlff3FfbX7Pv7RcPxAX+ydV2/2oigrJH92Uc5O\ne3Ss3FoZ7tISMjvTFmHGKtTqrqGU9areTtOakRKjFjzSXNiLq3ZRgHb36fX/AD60kThWyRVu\nN8L354p9gep+cX7Wnw9u9B8YNfRw+TFI5ZmVfvZJ59K+fVIxlepPPpX6S/tZeH7bVPBNwZo8\nyxxs0bYPBPJ/QV+bEbfu2UNtCsQB612xfMjNkbR+Zd2oXl2lC49q/TD9mO1mtfBdusgAChQP\nXpz/AEr83/Dy27eIrAzLt+cZ96/UP4Jw2ieDbEWu1VVQSq+v41FT4Rrc9EkwzYB/zmo+Vzg5\np7foaYV284ya5UWTxqeCT9a434xX0en+F7qTH8HX+n612cK/KDjmvKv2h9QNn4YYZEayRuNx\n6A44zVrcD83fFNys/iDUJFGQ8rMv51lblZkJQ56f/XqDUJGku7lScqHbGDwfepIZN0ajuF4F\ndRkPDCOQ8ZPqailPzYC5Wn7vQbmPX2qOZTtCg5I54pgVZFeMAKnzd19K+gf2Uvh1Lr2vJdXl\nqptkYbPMTOef0614PYWbaxqdvaJuG5hllGTj6fTP5V+kH7O/gcaH4VsnmhCTOASQQcr/AAnj\n2oclFajPX9FsI9PsFjRRGqgKFHoB0q/uD8AcUSKi4UHgDgURocgDjJrz29SyVTwFxzjrXzX+\n1d45Oi6S0Au8bgVFuG5ZsHH9DX0Hrl19h02S5LBWUcBu9fnB+0h47udc8WXVkZEkjWUgyDqO\nDhSK0guYHseSXt3JfXDO7Fm3Eljzj2qfw54gm8N+ILO+tSVKOFznA9yaorlZiuCHPUGmXAZo\n5IlAJA6V1dLEJn6r/CTxMfE3g+xmLrK4iUMy9CfrXZSYOf7tfJX7GHj6efwymlTMpaFtiNu5\nfuRj1FfWUmOxJ3c1yzXKzRDI2Bz/AA+lSxyMMDt0PFV2xuFTwsVPFSM+UP2zvBNteaX/AGtH\naIxt1271XkMTzx+FfE1vMkrbWyCvX8K/WD4neG7bxB4V1GOaBZEkiKtkcnII4/E1+XHjTw63\nhXxJd6fLC0bxy4x229q6ab90iRQU/LjdlT2FCD5SU+8OME0yFdrdCg6gU7YVXcmWOa1JHSRs\npEifKmMFaj2/N3LdqkT5eJMnPpTmbOVXGemTQIi3BcjHPf0FKvl7eeR2oEf3UD/XH8qTyxNG\nyN8oz1/pQNAsv7zbnDL7806PCxOGHXtTISqqFIyQePWkz94HhhzigBecAKpJ6EVKqq0hbdsw\nMbMdahRTs3Bvm6+9P+aRcj+Lg5oELGNrEZ4bn6UYGcdKYrIz5UnC8ZqWNdyN3OOtMQsbDnJw\nopNxaQbgG7U1eIdwPTqKUYRd3cjkUDEyWUg8DOMUOqrhc4OQcUKqkjgsB1FG7bMQy544HoKX\nUQnysxQj73IPalST5gGGSO1Kx3LuTnPA+tKd0koG3DY6+9AXEVyVO4HJ/SpFZY4+fmwODTPM\nIkCkgN3xQuR8pI59TTGNjbcG5zT1bfww+6PvU1Y1h+XPJ5zT2K7QecZxigBFYrAc/Mo60bRG\noYnIb+GnDO7aVO2mfdchuV7e1LUBwAdVJ4wePak4Vi7tnml5+cuPlB60m7fnaOh59KYx3zeW\n38SHtQvC/rmlyG5T6EUYXe4XqBnFACNhthIAY+lOeTawXof71MLHzsAbsDrSnKxnADNnn2pC\nBVSU8sVX+dJGuWOQV4OKD1ABBJ7U8yblbk5FMBrbvLBjPPQ560km7cCFO7HANOXHHG096VnM\nibjJu2nIHegBZBhlbrkYYelEbn7gXvwaSKM7mYD5WGTk9qarn5QRtHZvagB7FeMfO+efagqd\nu4dc9KWPHmMw+VCMYPekj2+YEJJ29BSESbijHAyzetLkDA6HPNM+Uk7Tl+1NDPHvJG7jH40i\nrk7Lnc7E9OB3p6qrw7yuGxUMO+RcO3BFSBjGdoPUYAHegXmJG527wNqfrTpbhhhQML603zHV\nAdmV6fKadN5fyNnLZ+5igB6xqZA+7IpJsSTD5sNjPFJ5zKwDKNuf4aXcd4K844zTGOGRHkce\n9JvWTaoO3/Gnpl0OPmweRTYQrbiAN3QfWgRNHNtkBUZZeoqWSRGYy5wx6gdqrIufmfqRkYqc\nSLHlFwQw6YpagPjbDqc8nsBTjxJt24fqAOPxqOPKfMDsOMc1NIyyLy/zY596BiqrKxA2467h\nT93mfMByeN1N5eNsdduBRGGMaoAxI5IFMQ+FWZkUnJPOKsxpumQqgbnBWokXylDn5T0BNO8x\nlA8pt0ufm+lIRaf90HKvgk8DuParCSB9qOQvGfrVFdreW2ck8kVbijhnDh1O4DIwcUgLxzHg\nhfLQj1roPB+hyatrdvb43MxB2+1cvHJ52w9FVfm9q9N+GcB8wXkW5PKJTJHJz3FSxnuOl+ZB\nDDA6fu0AUbewrso/kjTy23DFc9pUOY4+hIwc9zxXRWu0Nt+7mky4o2LNkWPKjDEc06Vdy4BI\naqsasp61bOduVOR3rNlklsCmN3JFWsBlLDnnpVCPDMADzVj5lTGe9IRL5wXkDDU+OQH+HAqN\nLcswO7in+WzcK2aBku7anA4pscxUgfwmmHCjk4NPLlBgDIpMos/LuB9afuVRkHNVlZWAwCD3\npY12qTnNAmWlwykjgUNIVzjge1V1m6DtUwY7h/doBEyzBmU55qTzPm9vWoGVVUEdfSolkbcd\nw49KCi2JBuwvSpFUsSM4NQR42/L1zT23ZyDzR1AfgRtg5J71JvEONo4701WDYOMnvQzIvTvS\nYh2884qSNjtyTVR2G4HdhTT1O1SN+TQBdHQkDjFK/CKOnvVTzGVcB85qaKb5QrDNAyV8Nzni\nl3hABnFRtjGAeKTapbJ9KAJncFuuKbxKeG4qJcMuSc0+MqWwoxSETR7Y2wSaWRlZcg1FN8v3\neTU9vIrR5ZPmoGOSaNlxzmkMg6803co/h5prTFskLhaZRKkxVRuHHrT0fdJuU5qBskKAuVPU\n+lOaIR4K8D1oAtM+RyNops2WXeowKYv3fmpu9g3+zUgPEm4AqPmqVWMin9agaYRsFUfWn7/n\nG3p3pAOVsd8HvTuHwS3FMkmiZSAeTTS21QNv41SAnSQxufSpVYSONrciqrAHnnFCuA3BNHUT\nLo+82TlqTkLkd6hVxuDZ5qfzlUEjk+lMoa7eXjYevWneY27AqOE5U5GDmpNpHJ6VPUY9WAXG\nPmp3mjYciov4gu4UsgPAHNNjHKxHTpUinDDeeKg3nbjoaduDAKetSSTfdOVFKGPTHPvTFbP0\nHSkaST0xTAsSN8oBGDRjpg5qON2bqcinZzIOcUgHswHHSkh3K2BzTmZC3NKkio2T0oCw8ttz\nxTtx2gkYpW+blaY2WGOpoDYk8zawJ6UqsHJz96olZWUZozx6GqKJWU4puR06E01Wbb15pdh6\n96QEihFHXJpVY8g4pkYVW5NOKKrFs0AKzbQG/OpQpYBx0qM7SMBs0qgqoGaAJN27rwOlNRmb\ncPSlj+YY/GjjbkCmBIq7mXJxSybQ2ciiPnGTxTpI1IIUZ71IyJWO056GnorHkcgUKQVxihGb\noPloEKikDnjvTkAY803ccgN8zdBRGGXdle9MQyZW/hG4ZqVSYYxuHNO8h26H5e+aNv8ADnIo\nGPiYn73Jp6ydQFOaj2k/dPNSLJs65zSAQY5B/Wnr2ycgUxpPmI25FO+9jBwKAJOfoKWT5uM8\nUfebBbik+VWwCGPSgYxmKsCvAFSB/M5YcUcb8EdBQrBRzQIk2/LxxRGu2M4wWpu7MgB+6af5\ne0HaTVALHkrz1p/mDy+OCKj8wqo3Ck8sN1NJjJFbdw3Q0nqFpI49vWnRxszcGkA1JuMMM4p8\nIZnLk8Y6UMvyttOacm5o+TzQIrXKDrmq7ID82MelX127SGGM1VmtgyYGRikBXb5cqTzSxw7F\nJzzUbFVb5j0qG61FLaMtkkDmmBd09ZWkbJAArSjJZfbvWdpLedD5yhtrDNaYAkwcbaAFXCrn\nFPYs2Ag4J5oVSvXkdqcNrYA4NADNvJOaaqrtbdxUzLj3PtSNjaNwxSYEcbDoFqZY0bBP3s80\n2NkLZXpTcZOenNMBwO3IPJzxSbWbOBtapOFwSKI8bixOaQBH8qgE5b1pYmKsQec1FcXUVrF5\nr5255wM1JuEypIoIDDIFGoDmUK2VODRtKqWPPFJIpdCOh7GnbRtVS2cDmgBirsUDqTzUkbhV\nYYxTfKIYN2qTcikHbk0AIAdpyeMUpUtHjNNZGLH0NS/Krj6UgE8zO1SM0r+wprBeT1Pajc/O\nTzQFiQt+75XNGQOcZpqsVzk5GKcmWWpAUNtXHU01WbHIwKkP3gQKXcPrQMjYBsqRmnMwRQD0\nojYFeOooVgzYK5pjHrgDgZqLe27BGF9ak+XOKNoGQTxQMQRjgkZBPNScliTx2pjSFVAA4p6M\nXOTQAxZOq461JGdp5FBIHak25xQA5gc80oxs4601ZAsm3qx7Uu7ccDrRqIUsWHoaFx34pP1p\nshLDBG0DvSAUJySakUHo3OPSoPMLgcVJGAOh570wJBjzM5wPSk8sLk9jSMwUcjJpY2Eke7tQ\nAibo8Z6VLkN1BqMyfKDjnNDSFiCOlAEjLupV44bOKaGpysW6rxQMcjDdgrkfrTmAVsr0NNWP\ncuTwfamRr5SkdR1ouBY2YII6dzTAoVs54J6UyORjHhuvUVKx2rk0eYhJFXgikZVZQHQOvoaV\nm3c07a3agYkccUZ/dxCP2WpjkLwM1Gu5eTxSq3BGTQMeuVXOcUka/KeRSDO0Z470MVz0wKQx\n276UVDgetFIBGt2ZgQOe9LMo3etPG/aecGj+EZqAGbQ23HC02T06gVI2QpB4FJj92MCnYREw\nzGO1OXKr6r60jqQh2ctTossuGHNAxFbdwKVY9q4BzmlaH5gBwKOQaVxD1VkXimtudhn8aVlO\n7CnimnK9aQwbhwB0prSbZMZwKXb5meR0zULKH45NNAThvl20qrj7x4pjffXA4xT8BlJU07AR\nABZPaiRhuGentTmXdwBg1CybTsPJ+tAxxwVbn6U9SqrlqiTgjcuB60SMQ20c0CFb/WZB49qT\ny/MY801cs2AcDuacZQq5J4FADB+7bGeKn+VPmxzUJKsP1FOkl3YwOKaEPKhlzmomVNwGM560\nM2WyOlREMenWkMhhYQu6g4BapZY5mmXZLtXHINVdQV/scnlDEwGVJ7n0qbTJJpLCMzr+9x82\nKBFpUAAXdkDvTJtqsAuc+tP3c/MOOlMYhWx3o1AVk3LnbyKWLG47uBTh83J6U0HaaaAZJJtB\nCjIzTZl8xdo+UmkaT5uOlSKAxHrQwK6oY02kc5qQkHr9M1K3DcjPtUXk5zu9eBUjH7Mrwaau\neF/WnfMFwTikTHJx9askWRg3y5oU446UfI3C9aWRfm4OagoTzPmwMke1NkxtJAxSD5h8vymn\nHB+U/jTYEYXy1BY570N8wHpTmXauAcikCjaSvakJi7vL7daRiDjHAppboT3qRsbdp6UCYyT5\nlKA5qONFjGAMU7blsqakLKq4IyaC0V2YDOw4Ip3mFtrHpQy9MLzSbwWAxgUEseW/eDjigyDz\nAD0o3HJqvI534AoESsqeaGPPpT2kMi4UcVHtyc4yadu8tc/pTARfm6jBpJPm4B5py/d3AYHe\no5V+bAPPekANGrKe5qttYsVPyipzJt4HWmhfO5oC5BDEI8sTlfSpNvYHBqTy1Y46VAWYkhfz\noKEuJQq9M1mXG6ZtiDBNX2BaQBuKp3jFZQEHfmkSR3K+Tblc/U15n8WtFfWvDM6RxiTy/mwe\nwzz/ADr0+5VVi3bhmuf1SFb6GSOQfu5VKsfwpknwt4ghS1mMUeSQxDH3BNY9yFaFWbqa774l\naRJoOv3sE0WCkm0e69QfxzXm+oyFFXy+B71cSGYuqRjzGyM1lT7dg+TkH8K2dQysIlPIbg1j\nXCu0I2kZzW6MWVJP3eWOUGM1WaaLgqOvVvWr903nBTgbhx9az5rcRMAGyG9qsggaT5iJF3Y6\ne49KlkCNbhxGFP8AdHao7jAlVW5UD9aRpkaPyx361QFW4i/dje2RnPHao1ZFbcTux+tSkBfk\n7k96rSxhZCAABVC6kFwpeTzANpY8VDGGZnyu4dCKmmY7sEY9KjWTdkZ2t2pjBFZjt6AVG237\nx+gqXduxsHU43VWZH8xicYzimASSYwuc5OMelIqq6upbC9NwpSoaYKGyR1PaolUqGzjk8UiR\nLlWZkRssqjg0xm3RSRs2GI49Kka4Owkct90r3pWjIjzKR04pjKgYoOVyoHfrTFjMkgLkBWHb\ntU7IBC7ltp7HFNCjam/A74qhkMaqv3fvKeM0s8iMp258wnmo5mw5UAgZ6kU5dzSFUOOcCgTG\nyqBtBGQwpQiHdjqF4BNPjwXCkEHqSTUbKqEuxyGP6UtRDGkKqVZPnHFRqoMhdTgAYK1MrCQs\nEO5SeGqFm8yRlQFNvVjSAQAqpkB20rsdoEjn14ppA2k9eeRRKCn1NUFhrKGbKtkVEyFu3frU\nwU7d5IGP4aGbd8rDafvA0ARxkbHGAfXmgfKuUJC9RmhVXLEH73BpQVaNlwSq9KADcWmJYbeM\n/jScd/mbvikaMSNuLbVxzTvNWNBgBvehARqq7ctkHP501I187qeucVNJukYbRxjOfSkWRl6L\n25aplqBqaF4kvtJuAkVw/kk7mTPX3r0rWPBt1420O1vLdl88KF3SfTv+deQ7QuHB4Gd1fSvw\noAvPB8O45VTtU9eRXm4l+zSaOWs+TVHz7qngnVtHuGSSIMoJBlUZH/16xXjurOQMI3gB4K+t\nfYs+m2UqMJbdG3DnIrk9e+Guj6kuBbANjjacZrmjjE9zCOI5tz5ut9fvVjdILmSLaf4TVyHx\nRqULJN9rkYjg5Y16PffAGEzSXFvLJArfMsY5A/GuW1j4Sa1p7sioZsjcu3uK6I1actzWM4Pc\nyrnx1rhjVVumCZ9ar3fjfVLqJAZ2EmMMWPX6UqeBddJ2tbS7wcqpqheeH9RtpiLq2aJ14wRW\n8fZF80CKTVbu4Xa88jLnPXoaasjTKfMZt3Xg0q6FemNtkchI5O5SBUcdvdbs/Z5AOhYqcVon\nF6lXiOx5K7toI9qNxMZUY6d6f9juVbCRuzEfdwSKbDpt9LhI7aR+TuJXvW3NFdR80SBpAzBh\nnfjB9KNxZhg7SatL4bvmYqkEjIeTkck+1WF8I6xNHujs5Cg9sEUe0j1Y/aR7me8xj37QOeKY\n0gWP72D19q6TT/hrql4iXEilEJxsY8/jXU6d8FLm+i8y5Plx5wBjNc8q8I9TOVWKPLFZJMGM\nHOeR616h8Lb7UY72JYmm/d8sOdu30rt9H+EGnaf5W4M6rywIxk12Gn+G7LS1xbwrGO4Awa86\ntiIyVjlnWUjSW5VsN+Y9D6VKZuDhcA1CuduAB+VLJzGe/wBK8focfqcd8RIY20W43feHIPpX\nzfcsrTEiPHP3vXk819HePryOHTbhZRmNoz+eK+cWkVt2/gdB/SvpMF8B6WH2GW8jfxAdeDTx\nJhXycN0FMQAxk9O2BTl2HO1dzYr0jtFCMxA28AVEPlX5R0PINSGQhhIThehHpSRlFkKjof4j\nQIZcbhgbeP73pVOZlZsfk3vV7aiqxbLGq08JwuV3gnH0zxR1H1P0h/YuvBN4AtkVskJhh34b\nFfRcjEj3xXy5+wvdJP4KeNGDNHw/4NX1Cd3A7YrglubkKk9M81YVcY9arldsnNW4QOue2agZ\n8/ftfwn/AIQm7kzkeQwUe/Un8hX5qx4aMLHgJksD36mv0z/a2Jl+Gt4qr8+CM/gf6Zr8ylVI\n5tp+6pwfr7V6ENjIlVTJbsT8pz2pd6ysmCNo7ZpPM2klUJPb0qRtu5WKgcVSRNiPlVY8YPHF\nRSMI2XAOBwamZ9yqCMHqRUcmGJYdOnSjYNSG6j/ctjp1DAdD2r7z/YX1b7RofleYFXBAX3BO\nTXwY0mGXOAM7QPU46Y7/AOfpX2b+wlP/AMS2WE5XbOUAPX7o5/PP+eKzqbDifbmCWOe1Rgne\nTjNTmMrnsOwpmfmOOK4dTUIyVrP8VfPoN6QuSsZKg9z6VpL1Jql4gj83Q7pAN2UI/r/Sqjug\nex+T/wAdrNk8fzthoXBYyKwwc56VwgjCc7gT3PvXqf7RkIHxQu+mWXeQPQkivM2hWPcFIIA+\nYV6Bie/fsjfFJfCfiyDTL14/JuZcfMQNp6qM+hPH41+julXC6hp6XI6MPqOma/Gvw/dtpfiC\nzuocBY38wlhnGOc1+pfwD+IA8X+BbB5sPOVOWU5LYJGSO2etclSLvc1jtqelOo+93pkbEtg1\nYZfYVEcZ9KyGLcQ+ZbuuMhlI/wDr1+c37Y3wzv8AQfFDammbm0f522KR1yTj35r9GvMKqMnF\nfPv7Wel/2h4QuHMQ2r96QjjGD37V0U3qZyPzbXdtPzZj6A9PwpWUvD2B6YzURD5nDDjzDj/P\n50siuUIwRgZz610kGz4E8Ov4q8TW9iiFt5wFJwDjk8/Sv1B+Cvg8eFvD9rCI/LHlrlcDjjjn\nvXwj+yn4Fm1vxNBqqNIrxSq0JAyGGcFT+dfphpdubPSbeLkELkgnp7VyVZa2HHVk0zZz601Q\nQwOKYT83NSx5bnrWBoTRJtXPOe4FeB/tQ/EKXwj4VumtkZ5BwZFJG3jJ+nSvc9Suvs9pJKo4\nC8cZr8/P2rvihd319JoxnJgZvliQcMPUnvW9NXJlsfN+uap/b2pT3rDe8rEkk5681UIKlccq\nOtRqXG5zycnIp7ArGGwc/wAq6iENujIxxt6jAIHIr73/AGLlLeD7CMjYI1YsvdsnINfAV9eC\nNdzNxjFfe/7FaTR+HbYXCukmzChhg7SeD+RFZ1PhLR9XyfdBFQZG71PepDls5OPaogPmPauI\n1JEJBxjg0TSBYXGM8GhR8o5OfenzRJ5B3DOe1WiZH53/ALaW6PXLRncyKdzsW7ZJwPoOa+cH\nYGPdnLdh619IftiXCt4iCyp8zsQq46L2r5tALRjhQVrtMzqfg/amX4jaYzS/JDIHYkfe9BX6\nveDf+RbtTgbmUMePbpX5S/BffJ8StKR8BZJMMG4yBziv1Y8HOG8P2eCcCMYB7DsK5qxaRrSq\nOe1V9pbjtUznnJqElt3HSsSx3K4Fcn8Wv3nhK525L7SoXPXPUflmuuj5PIrjvi9N9l8JXMgO\n0hCc1UdxH5Y+M5o28UaoIUaGE3LkIzbiMseMgDp0/CshMwyEH7wXPtV/xFIZtdvUX50WU7iT\nweTVIKsjEL6YNdpjqOkLSOMEdM1E+xt20YOKeo7spAzjFMkgTcWOVz0K0ySLaWQAj6813nwX\n+ITeAfGtnckGWOVxG8bMQOeASfY9vrXCMrFhggACmQMIdWtpCNzb1A+uaJK6GnqfsL4U1+Dx\nHosE8JBXyw2RnHQfp1rRbHPOAa8t/Z3cy+CrUqGLFAGz+Feq3Eag7T0rhtZmpEoymB2qVZCF\nHsag4VvarCYbHPFAHn/xx0P+3vBN3528oEKAp1GRjPWvy38R6X/YuuXdmUIELlRlgfoeK/W3\nxxg+G74P8yLHuC+pHOP0r8qvilHt+IWrsflSSUuFHQZ5xXTS2Zk9znNDjM/iC08tDINy7vUD\nP+NfqB8DbNofCdqQm1MDaexGBX5r/C2MSfEjSYpOIpJkDjHQEiv1K+GtqbfwzaR9FVMLxjpT\nqbDS1udTj1NN3bWB7e9CsORnmlGM4YVydTUsRHdgngV4t+0ww/4Qu7VvmzGSOeg7mvaY8cHq\nteKftN+bF4TukjADyQsN391cc/0qo7kvQ/N64soZPMkR23K5wW6EZ6VDGCRvHVuPahnaKSUM\nclWIx2+tC7YwuGHPOe1du5kRtLI0eE45xk02STy1BPXviiSQqp9M0/R9Lm8RatBYW+d0h53c\nCnqB7L+yz4Cute8VHUJLJjDEwVZW+7k9vyNfovounrpemwxKoU4yRXkn7NfwxXwR4WtWfcsk\n0YLRyKMgnBz9K9rkwfu9F4xXJVm3oUkNRhu6Z9an3ZXLHA9fSq4XkkVDqFwLe0kdumO31FYP\nYs8c/aY+Ii+E/D7lJ9sjxmOONWwWbFfnNrmrS6rqElxMmHkcsxznB6Yr3b9p7xBq3ijxWNPs\nrS6mMbMjbASGycDtxXkWnfBvxzqzsyaVJbQhgvmTeuQP6iu6lBQV2zNs53zgrA5/XmqzXSxu\nxV0Ztw53e9ez6D+yP4v1JiNRuPs7EfdjBJI74rutD/Yf3xgXd1NjBwf72exzWl11Jujhv2UP\nGkmi+OEhPMDvv6c52noK/RnT7ozWsUp+6yg/pn+v6V4N8N/2YtA8A3Al0+0bzgwZZpm3OBjp\nn04/nXvNpZtb2aRdlrmqWexomP8AM8x+BgVJHId2M1XkYocAflSCT5gc1kUaEkha3ccH2IyK\n+Bf2wPB0+k68urwWjfvmIlZV6AdT9ORX3qkg2Y968h/aI+HI8ceD7uBMo+3h8kfNjA6D36d6\n1p6EyPziiuNz7OCMZX6VJDIyxNGv3mP5VHcWc+j6ncWVyMNExQZyD1xnn6UxpCjDB9q6CCwH\nVflyMjnNQ+b8xQgbyc7s9qq3UgjXa7BRn1qP7Uqxq3OwclyOMdMfnVDL6qzMzfcGePb3oaIq\ny8ZXuB3PrUVvdI+HBJVfvBgamXO7rnnilqAKqlWYllwetNyhfcw+9xnvRtG1sn5VOTTGU7Q0\nZ564oEOiDvjA3L/Onh/lZVOFz+VN+VANobngkUn3UZQvye9MYquNoXb1OM1NGpZgF+4DzUck\nZ+Uq3y4zSKxz8v8Aq8c0Ejtu5scBWpWwpypww4AbvTYZE+UYxxjPapNplwsoHtj9KBjOcl2+\nQjgqPWlI8xhn5vmxTcg7htO0nBNDt829YzxwPQ0AKcM397b+FO4bDE4xUbRttH8J4p26OTIw\nQ/TFAEa4Te3dvWplUNHtORxk01l3Lhl+daarg/KuWbvt6UhkisrOB/Bx70/zDKzHGNpwKamd\nuOmf4abgbGV22HsaYDmmkiY5OSw54psbPIrcblX+KkYgbRjcSPvetPgRRjDkDutIBvXGWUbv\nmP8AhUqRyhS5G1R0B71GqjawY98gkU7zmCGNstkcGgQqnG3CZPWjzFUtj7zHHvUfmFArAdOC\nvrUwXdlT8p+8BTAa2Ys5HftUgG6QMflOOaiWQ9McYzRIHbIGT70gFVdzEg8A9KU/eUdCeRQM\nbeBggc5pF6hgck9ENMBrOHckkr6CkVgHOc5PYCntu2kNwfT3pI927LH5V7e9LqLUQKfuCQ57\n+3tUvzKFG0HnqajTocgqQcgVKytgOTweB7UACkHAb72aapjWQsyEOOAaGwz5H8I7etKjefgt\n68jvSGPHyvlOuKUMF+XOS3U+lIrDavQd6FYKwYR5OeaaAkXb5BCjHy9e5NIYg8KCTl+2Dgip\nf4wueg49qYsyRks43N0pDQfNJLhztI4z605cQsVZdxP8VJ5qsrHBLLyM0nnSSMQx59PSqELH\nL5ZdVX5SODTjGdyfNsGMnimqw2oDyWOBirEeyPKtueXbwRUsBjMse0I2d3VqkEcZbKHjtgd6\niViqksV3DsKcsabQSDuPOc0IY5H8typBLdaWM7VbA565PWk8vaoZlYtnqKfIzbQGG1fUimxC\nbnMYyuX65qZsSsjlcFuKhbLLwc7uAaf5bxlQzk470gH27M0xA4A4qw0Rt12+ZvducrUcOWVs\nAfUVN8rY2njptqgBfmKIeeQTmrccKuzsWEZB4Yc/hUCyND95MrilkZkX5U5YZP0qWMfGE83B\nyhXkL61LHtDK4B+bn3H/ANamTMggBX/Wgde2Kmt43ym04G3BJ6UgLWlwtcXTREcP8voK9+8F\n6XHpdjawquXxy/Zq8g8Baf8AbtZzKuYosMMjg89K+htBs4r2MyJ8sv8Ad7AdqlsDq9Et2Vhg\n5IGTW9CPPcqPvqfxrndFkeG6xnIxg+9b8cZaYyxEq1SUjSR2UhCMmrUSFozg7TmqKKSFL9au\n24bkk5FQyh+3oF+961Km9F+bkUKyK2D0qRJA2QeBSAdDMJEwOKl3fP8AI2Kg2gN6DvUkQXaS\nPujvQNDpN3DEZOeQKlWYtyOlQ+adx28jHepIlPkkkYoAkX8qRZhHnmo4rj5SGXmpeOy5yM0D\nHRyFozlafnsePeozMAACmPeljmBz8ny0hkygZyT+NIPmYjdSNyN2PlprTKMFOTSAnVvL+78x\nFSxsWweMGq8c2e2CetPZ9uNv5UATwtJGCwGeaja5L8bcGmLcAMVYkegp28c45oGSxnzCDtOB\nT/lXJI5NRNcheACPwpBKZBuPJ9KLBYnZV25HBqMMxkGDS53A4PPao1ba2NuT60AWTKP4uPXF\nCzB8c/LTFw3DDikWNZCVBwKQFiTbGuV5JNOWZmxx9KqhhGCD81O875RxxQMtiUsxJwopfn7N\n8pqrI25c45pUnVMBsgUAi5u3Y/pTGkIwM4FM+0J0GSadJJEfvDPFAyUSBOQwxSiZrhMd/aqU\n1uPLB3YHXFOjZY9pD8UAXlUhcE5PUUnzHHzYqFpNp3Bs1Y37owwWkxCq3mH7ufrTm+RScYqP\n5JOOlPj+UH+Je1CGIJk3bSMGmyH5RnNJ5i7gGTB9acz7mwFOPWhAWI22qobkCkdo1yc4BqPY\n24gt0pH2MuD970pgTRtE/CvQuMkBuexqONUjYcDGOacqovIyaAJ48nhjx6ipI3GBk5GelRRt\n74FO84YPb1pFErOm4gcn1p0Uit0ziqhl3NwMVOJFVcihiHyrvwAcUKu3r+dNWRZRnoaVcvGR\n3poQ5pliXJpnnTyZ2rxUqxiT7y54pm7YpB4IpMBYpHz9Oop/2geYFKkGl27owV4Jpj7t2Sue\nOtIZYUhx15FLjcvTIFMtpEHbnvTs/NgHC0xkkU3yhc4571IznadvPvVZlDL0xiiPcq8Hg0WG\nTswaMBeG7mhX+cblyvrSf6uHaRkmnLIY2AIzxTuIlXLMSB8vakbKsOtNklZlwq7RSq5kUbuo\npdQHeWDzkChmKgd6azKcIOppu4px1oYyaNlXhv0py/NTPlbHanq21goHFAEqJtAG7/GpGYH5\ncYFV9hLE5xUisWjxj5qGIVmwoGDTopQvb5sUn3R03E9aa2zbkUiuhImBjtTpWDMB39aijZdo\nzSFzuwRxQIshQ20nkjuKTIboeM80IFjBAb3prKAwGMCmIlTc2QOlOjjZcjGaTjaRznNOhYRu\nMtQxj1XLDHajbvOPShZF3sBQCPXGKQWGSKTkcr9KdGxXH8VL5xpq5k5JwaBj2fa2Sak4wBt5\n65qJ8MwB/Ons35UAPjxJnFIGBbaRzTo1BHHApGwpB9+1AiZSOOOKRQGJwcU0ybnBHSpFxt+Y\nj8KAW40OsjFBxgdTTlU7SSOPWkYBdrAYNOZizH0pjBZFbkdfen7Tu64JFR7ljjyw/LrStnqD\nx2pMB3yIMZyTTlbbzjg1G/zcleaazMwAVsetTqMmJJXBGaTA28im+YXbrnFP3bsYFUJlSTTV\nky27qelU7zQ2mdQvC962lwGxUjt5n0oArWNuLO3EKjgCrMal8L0xTdo6g09XC4zxk0hWHtHh\ncHg02OPryM0rKWflqTa3ncD5PWmFhI8x5B69jSudyEHmlb/WAnml3AdqTGMh2rkEcU5clskY\nHvTolDAn0o3BvvcUgEw3fpSOTGw44NO3BVyW49KVu2TmgAVBznBH0p27dgAbRSQxlN2eeaNv\ny80xCkPu4GVo6MQeCegoUjbtGaesYbDHlhSYAqmlXG7dg59KVnOACNvNLtz82RkUAIyt2NJ8\ny+g96dnJ5GRSso2nB4pDGYK85pyvuTk80it+55FCruGO/rQIVo23Keop+4qxwOKahOeT8tKG\n3cUD0YBWByDxS7cHJpvOSAeKfu+Ue1IqwmAqnBwKdG3y8HJo+bjgU0MMkpyaYEnljbuPBpNp\nC5P3ablzH83WljLMuDxSEOKBmpdxU9OKaMqDnrQGKqe+aBhncMnNPwQOKjU7lweKfu2x7jxT\nEAX5t+cH9aeqgtnoahlbO0YxnnNOX5sEdKY+hIzDcQF5ox5i8/iKVflbdnimbupHOTSEOZQF\nB70jhVHoe9HTBNLJ/qwp9aYAqDGRyKUAKpA6ntQiqvAPFHEmeoNIB8eM4Y8UKOeelEaqg5yf\nShTkFiMDvSAfHhiTxgClBOO9RyMqrgcCpMlgBkdKYx21h34peGK54ppfoKdtH973pAG7dkY7\n0rMGHtRztOB9aTbuXpimIVvlA44pY2Ij+bg9qQt8m09c8GnoC3B6CgtAzFlGfvGlX5Tg9fSk\nJwwOKdty240AO3bl5HPvSFgYzScMQCaJDtwO3tUiG+WKKTzD/cNFOwC/My5p02NyjOOKZuPK\nZw2MmhUVVJc7jjiswFLbmGeB2zTpJAq4HSmSL8inOSBTOZEAK7eaYx0b/MD0pCGY5BwM09I9\noLFcrTW+9x0oAkbOB60m5ABu4+tM+9IOeKbNtkjO3rSt3AkFwpyRwKimk3L3x60xlPlgYwaX\ny2aPaRSsAK+0A461IcryBweKQlVjCnGaPMIIH8NMBY8qrbutKvygFPxpwZMZzkmo2yvThapA\nRzSZY1Hu7nrUjAbcHk0CEN3OKQDUyygg8elLt6g8U5o1jwQc+wqNmEmcHFICPn7qHqcmnMoZ\ngMHFSKAmABz6mkZD2NAFc5zjtUzME2jtQzBFwMbvemyAYXnNUBIzHhI1yeprJ1e9lhmjgjyj\nY3M1aSs0akluahkhWbBYBtooAri4e8kjRhtj7mtNVWPCxn5TVO0j2MTjip/OG/A6ikIczbGw\nxzQuN+5hTJMOpH8XWjLSLkDkCgB03+ryM4zUMfzSYzSNuOBnatSQyIrHK/Q0IYjKAuPek8wL\nJgD5sU/aGO49KhkxvB70gJVk3MG71K6grnoarp94HpU0jDaPemLqGNzAnmneX8pyBQFCx5zS\nM+W9sUIGCIIxxyajZOOuKkZgqjHemZGDnrQwIo/vDJ4pWwGx2pVXkZpdmSc8UwDcM4xSKvHo\nO9KwC8jmm7jnGOtIQu1eAeaY2MnmlZirbF6etNYbeCdxpACgL7miRfOUjO0+tLjDA9OKRY2I\nznjvQUKvyrjOT60bV6kUw4TGOak+8MigkZ646+lKi9Sy4NJ0Xnr60rFmAJ4FMYgAVgc01m3E\n8cUbeDg5qPawyc4zSAWSUogGc0yNs9Rg0kildvem/wC8eM54pCFlG1j3pqybMJ3p8zArkVBG\nu1stzmmIkuFdoQVqOEsPanTTbAcVFDvlyeg96BiTSBfmbk1UmcSyfLxViSNDlWOKpMfLbaoL\nGkBBe4WPaT83SqF0hjtVGcnNaMyhkyRk1RaQEbSB+NAHz58ffC5uZE1RB+7k+V+Ojf5/lXzl\nqWnl3KB+navuXx5oNprWg3EM+cKpYMv94dK+Mde0+TTbqVJhjqM1pEho4/UHVbcp6dBWLLhR\ng9CM1rzTK25ZE+XqAay7jap55Y8/h6V0I52ikzbMgNg4z70zZ8pMrdRx9at4Ty2+TcxPXuKp\nyRlsMw+TODmrJKl8wVkHDHvxVSVhztGKvXCtuO3aR2FVn/dQsuwFz1piKMkbFg+dwx1pGVZF\nPyncOhpxbyxt5Cnt3pwVo14OVNMXUqSK6plz+NIEIjBwA/8ASp5YVnZlD8qM7agjlyrMeccU\n7jIZVKNlc7CMYHrUTqI1XIKY6mpJd5+YjC0srHaGwCMfnTArBvLV0KdDkMO9M24+cvnPRama\nQqpUj5mOag3KU4+ZgaYCO3lyZwAzd6csjS5QqcevamzR7cNkk9aczF4QUO31NAETt+7KFgBU\na/dVFJCDqDUjMjTAEAAD73vUckg5b7x9VqhD5WLK5YjZj5Vqqit5mV6g1O2ZGGxN/fHT60xv\n3kjELgnnjpQAky4dQTz1OPTvUc6yMI2jwY/pUsy+XtbByep9qTzvlKj5Fxge9AiGNm2tHwAT\njIpn90k4KnBapNny5AxmlUGSMqT8o60gIeWLlwGz2psYfaSRkgcLSkmP7vGf4qa2REWzhj1p\niFdg5jyvzfxfShWJYkjg9KVN0+3HTjgUyVRG3B6HgUDGt8oZWXHHahmARR/epZJF3AZKMT8w\nqKaMMxLMQR900DGySKsb9Tj71OMqSQodpXoQMVHOG5Xdk5HNWDcExLwMfd6dKAG5K8sfLPXH\nrSP5j4zgIRShRt2sflHIzTW+ZQueeuabGMXarFlOSOq+or2v4KeLrexsW0y4lCZbdFGT3P8A\nWvFFVNpK8jo2KfG0lmyTQyvG4/iXtWFWkqkbMznDmVmfZPnW92rNE+cdee/cVHv6YHbgDmvl\nnTfHWraJcebHcyMM4DbievqK7vw/8bZFYR3cm5QOcr1NeDUwUovQ810HHY9qDFlwTjnOGFNZ\nU8zLIrduRXNeH/iFpmvnylkjSbqFB6108U0U/CsDxn6VyunKm9TCUZRGfYbeVghiTnnpVK68\nP6fJIXa3Unp8wBrRWNtu7OSDSTY8wsxzxyKnmcdidTJk0Gx2GNYYwpH93NRf8InpLYk+zq0u\nME44/KtdUBOV+X+dNkO19w5PTin7SfQXNLuZI8L6ec7LaNW6Y2inHwzp/l+WLeP5eR8orTXd\nvHO05odmRiepz09aPay6hzMz4tFtFyDHGB/sqBzTv7Hto5A2PxFX/mbLFdlMVQ2V/Gk6kgu2\nV2sbZdrbFY/Sp1O1flA/EUbDIvyqMilaMKuCcms+d7EjtxwFYZB7ijaNxwcVGv7tz3XHFJ8w\n2gtjjmp1ESBcMWJzx0pi5jjJ69eKcxUrtzg+tEmQuwjLYzTQdTz34pQ+fpMiFWVyA21fSvnq\nc7pXCj92uSBX0r46t5LrSWEJ/fBc8917ivmy4jEM7hQ20Ejd+PSvo8FrCx6eH2IiwjVQ5bee\nmKl27ZBxgGoHYqgYrv5/EUKzYJy2ScgV6h6BIrM6ujYAB4+tNjUZIeM7/Sn71C5C/vu9OSUr\nI52YGOfakBDIzKQRyKieR1V9jc44B96mkyVUgYBNRyBTM6rztXJPrQxo+5f2AbppPC92xyDL\nJyPp1/WvsJvl4r4q/YJkmtrO7y263kf92ndT0P8AKvtaRdzcc461wz0ZoQmQdO9Tw+hOKrth\nXyaswHk9OlQPoeI/tVRyN4EvMDA2MM+201+YrRqJmk4O4kiv1A/amZF8B3LOQp2sAScDp0+t\nfmA0OJZ2PO4nHtXbT+ExH+a3lgADbRhFhA685IpiofJVepFPyMIeAzcCtQHeZ90EY2ioLhjt\nUAdT2qUsfMZnPTg02VcSctgdqQis+2TcrKvA54/z/kV9m/sG3kkkVx5kisFcqp2gEAjOPc59\ne3HTFfGnmFTIxUE4xmvr39gmX97exYyfMRj7Y/8A11nP4Son3owwoPbHAP0qqxG7FWJWVxkc\nD1qoxLOfauG5qSrlSPSodSG2xmx1Kn+XSpFzVfUmMem3B6ttyAfrVR3Ez8tv2hYxH8UL8ddo\n+YenA4rzIMudxB2j5cdzXqX7S0sbfEiVoAdxyJc8ZIJryYOzHdk4zXoGIr7drBSVfH5V9h/s\nK/Eixs2bRbves24/M7YTngfyr48YMzFuCW4Ndf8ACLX4vDXjizluLhre2MyrIR02nr/Os57F\nxP15b5lBByMcVXbjtmsjwXrUWu+G7O6hdShQBApzwBx/n2rYfO7gZrkNAjxIx9O9YHjrw7be\nIvDt7Z3CB4pImByM9ASP5Vur8vGKS8tVvLV4SWBcY4PJ9qa0kKWqPyL+LPh8+F/HF/ZGFYow\nzMoj+6RnIxXE7Jr2WG2hk2tIwBDH1/8A119GftjfDi/0HxN/ayy+fbTuyeZtICHk4J/z0ryn\n4N+H017xtbLPHvAIAQDPzcYb8K7t1cyPsn9kf4ezaD4fSUwGLc25hKOfqK+qsfKuDgY6YrlP\nh54ZXw7oNtEccKCR6kjr+NdUz9jXDOV2XGNhrIG7c1MibcHseOlN53DaamkmEKiRjgL1PYVH\nUdjh/in4pHhvw/cCNSbjymdFzgYxySfQD+dfln8RPFMni7xPdX0zeYckJt4GAa+zP2yPiJ/Z\ntj9kgmysgIVc8Yxg4+lfB/n/AGhixjB4+9muylFxVzFu4LjhiduF+7TZgzANuPIzinfKqgA5\nUHmmuAucv8nbHathiabYf2pqlrAQAd4Pzdx3r9Pv2c9Dt9H8I2YjUbvJXLHv0FfmLpNwbHWY\nLhAXZCCBjgjIzX6f/APVk1TwtakQPbDy12qTnPTmsqnwlR3PWmZRk47nFQjHUnmnSyEnCjio\nGk3cE81xGha355HFJeSE2regHNQRntS6lI0NjI6joM1cdwZ+eH7X1wJPF0CsNzxytkeg7fpi\nvA7qSM27kKuc8DHPSvWv2ndQNz8RLuNG3BeTn69a8bkYK2M7jjP4V2pGTOr+DazR/E3SpIW/\neK27cBnAx6V+qvhVANFtxnB8sE/Wvyu+CcgHxN0eRXMalwGP+yeDX6neD2STRICAcFeCfT1r\nCqio6mu3zcelR7geBxUsjHsM1FjGTisS0Swg7ge1cN8bpv8AijbxMbW24U46+td7bsoGcZ9q\n8t/aGmdPAuosrEOsTAY9x0qo6smR+YWovv1q/Aw2JWBbB5OTmooUZpH+TauB0qO4k3XU3ljB\n3knA6nNSqzeVvLbSTjbXZsZDSpPBY02X5V9AetKwVej59aibaxAz70DYyTD8gcCmRybry0AA\n/wBaoz3HallcNGzZCp2zWz4C8Iy+L/EVvBAGbLhNqDJJPAxTewj9J/2cVEfg+ByxDlVG3PsO\nf0r1i4ZWGPevPvg34fk8M+EbKymDbo1BUsBkcDg+vINdyzHcfzriluagy4YVNEvOKjVt+Oxq\nwilsYqRnI/FTUk0nwzdMZFQFCCTxjivyt8Y3H2/Xr2fcf9aW+b6mv0k/aQ1b7F4LvFZcq0TB\n2/u8cGvzH1CXz5JZQ3ysxY56n/61dVLZmbOn+EFoJviFYsfmfIJHfjp+tfqZ4WQx6HaDGCsY\nz+IHFfl38C4Xm+IVnnAlB3qx6deh/Wv1K8PKTpFqB/zzBJ9TilU2CJabG7OMGljXzGyG4FEv\nzDHekhj2t6VyGpcjG0jjI614n+1JIF8B3jkkFVbPuMdK9utlLR4HOa8I/awvIrLwLd+cPkK4\nyPUjABq46smWx+cXPnSZHc80mxtg+TO2nSZjuHiB3H7xxTJGZhuD7VArutYxIpJFkVyCM+hN\ne6/sr/CxvE2sDU5JDGYmDHcp6c8fjXiGhabHr2u2um7/ACGnYDfjPX2r9Hv2bfh6PCfheOVk\nUvIc7sc4wB09MilKVkG7PXtA01NL01IV+VUXb/8AWq5vA4NK64Uj2qPbuQEHmuN9zYTlW68U\nyWMyLx8w9KmWMswqdrUsvyYFIk5Fvh7plzdNLNAjM/3mCjJ+taln4M0204+zo445YDt0rdEK\nrtVmVc9Mn/OaWaaCBF3yLu7gH+lLmY0U4dMtrdsqi9MdKkW3T7qIq/Sqlx4o0m2mKS3SR45O\n4gY+tYGrfGPwnpGDLfI/GR5ZzkZx2+lLmYHVfZhuJC4UU1k/d4xx16V4/wCJv2p/DujKgjHD\nfMQ3Jx+HSuY0b9rbTdc8TWum24RhNJtJyVwMdveqV2B7zIgGRVYxlWzVuzlTUrWK4j6OM89e\nlEkYVsYzTWoyJZPl4qPULRNSsJbeZd0bjBz/AC+tTrD81SCHc2M4GDQnYlo/NP8AaQ8Av4L8\neXXlxMtrcMXVgPu8nivIpJ9sY2jc5OFz3Nfff7Xnw3k1PwsdVhVGa0VmZmPOOMge/wDhXxD4\nN0mPUPFmlpITtaU/Ljn0C/XNdiaepB6h8F/2Yb34gtDe6jcy2to+JNqxkseen5Z/Svrqz/Zs\n8HSWsIbSIE8tQgbBJbHQnJ68eneus+Geipo/h+2kAMX7tT5JwQpAxx/9eujn1WObeoG0r1JX\nAzUOeugcvVnyL+0N+zNpHh7Rm1PR4/KUA8HgI3XJ+v8ASvk6Heow55T5fc+9fdf7TnxEsLPw\n2+nqytcYw+T8qg/1r4XkIWSRkTO5uOMf5Fbb6gkJjcuAMjPOaGI370HI4xRGWdGBGCOaTcqn\njjPJpCHtmMFugYcAdjQxaONQSGcimKVUbicKwz81PjU7t3UY4NIBBhkwRtI7GiN2ztAYJ6Yq\nKUsrK7nPPNWmPl4OcMei+1GoEPLAqACc06MBlbghuwJpcleG4BPWl8lGJVmyxGc0wBM9huHT\n8aXcecrUbYZRksdv60oYSsuAwYdvakAFTnj5hnNOjXOWA+bqKchbkKwx0xSrGNpw2SPelcYx\nZtwfjMmMU2H5Y8rlcUTMsbA/hxS8qrLk46g1QhVHHXIb8xTlj3xgA525JzUZURhcHqfzp0jb\nI3wM4+8KAGkBnGWI5+7Uh+VuuV9qZ5hQ5Ck8cZ7CpI1VI8ZyWOT7UgsQHcucZwDkg05bg7w4\nHA9adLvVWAG71PqPSt0eH9MPh37VBc5uw3Kt/KmVYxGkyu91VixxkdhSyfeG1ue1RNjGCO/z\nCpo8bM87scHHQUuohRMzM2SApGN2OlNK+Z/GfUGkhA8lyvzc0/zAzBuM4+7TJ1HKVZT9OtMj\nwpyBuOPxpgjkZsLhec4pxLeZu28dM0AO875yX4VRQMbhxhuuKbIyfKpGSTUnAXMnLL0xQMZt\nYy89TxVhl8p8ZDAdu1RbcsJNp3YpfufePb9aQD16sVXI60scIC5f5N3JPpUcanYvz7QG/WrB\nA3YJ3dzQMg8v5QQcAfxGnbTGgfOSaV2WTIbKg8YpQ6lgnK4GOaS3EPVQhz7c03cIVLEBgemK\nC+3b3GcGnKy7WG36VQxUdvKfjtzmmqBIB2JGRSKrNIAcnIp7IFI3fIOmRQDGKoEmVO01LyMd\nN3ZqNgQ5PI9utM+RP3ZznOd1IAVD5nzDPuKnjYN0OQO9MhV1yinLEVYiQ3kaoieW0fX0NCAZ\nHmT5XYrkZXFK3mTIqhwwHX1ojj/eAMeR09KVYGTzZdykKegpMnUCV2qV4ZacoYhcH5yaF/1Z\nO4A9cHvUsMYkWQtIB2APamMRfk4Zx15YetWI42WPqA2eKSMRp8zqC44DCmiPzZmCHBPPtTAn\nk3fu8nCg/MKJHVroBSdvXj0pIY2+Yk8heV60sQUKJOnrU9RkzMtruOQE6E9eaetx52wr93gV\nWTZ5jIfmUjNavh+xOoapBCgyAQTxxigD1X4a2As7ETsnmeaOB6CvVPDqmFsJ8kROM/0rmNLs\nlhjjSBMKABjHFdnpEZitzvXJBzmpA3ooWjkUxjcpPDVusGhhVl5fHNY+nzFNq9d3NayS+W2H\nU81DLLtpcJcJtY4ZfWrSttHBzVGFUDDI6VdRQy5U4qGUWDIvGBk09pleMbeoNQIoVsjJU0/z\nUVQFxuJpAWFYzLwuR3p7Luxt4qtHM0MnH3Twan+Y8qeKQyXcSoGMGmhnZwtRFWYgqTmlWRlb\n5lIPc02SWlVsEEc0LIWbbjpxUK3DK2eopWlO8uODigssiNgM9utN8x16AbahE0rYPbvT3Znj\nHY0gHtctxwQKSEKrcHhu1MXerDjcKn2qy5Aw1FgGfamj+VU74qWOZ1+8KNrL1Ge+aCxx0pWK\nLEi7o94XB96Rd3GKiEjlcE8dqV1PHJzTsA/zn3ZfgCpBMnUc1WhYsxDdc/nSMrQncRxSGWvO\nC9utCtt55OarJMWOCPpVgSAMF+6aLCJlb/gS0JHub5T9KFAjU4OR6UzJzleKBkoiO4gnDU4Z\n9OKryuWXI4NIsr7cEGgC1u25xzTVm3MPlqBJiv8ADTmmGRjrQBZdz5mRgjvSbSzZFRse4HFJ\n86jcqlfrQBKszbuRkdMVNtXjK5GelVfvch/m706O4ZWCsv40AW45I/mBG3B4qRrgL8vQfSqy\nA8seQKHkMvoMipsBLGwZsDOPWpEmVchgapKzw9TkVN5yzL0wfWqEWVZG79alEgTjPFVV2ryB\nu96dxIvB5pDLEkLScI2KjGY+q7vfvQu+NhtfNOCumSF3Z60MBsdxt3ZBFSwzBlwOtNKOdoZQ\nRT/LG7jg0MB+/wCfGOKHUSDKtQrbvkA5pqqo5zt55FLYdx6uOM9akjkUN/jULD5gw6d6E+Qk\n9TTHctLhwdvJpqyNE2V+YHgiotw6rlX9qerFFHPNAFkzOYwF4oZldcZ5xUcdx820rSxbdrbh\n+NIEixGx+UegpxkwSNwNV14UHOB0okAXrnnvQIlXHbrTwuRjdg1EoXGAaXH8QOfakMlPXGeK\nfG/yYxzVdmUqM1IZAFBUce1AD0kYZBGeetTLJuX5h+NQbv3eCfmoiK7cA5FAE5JZdmfpRGzZ\nAPJpgfcQR19KXay5bPFAD5GHJP6UfwhRwfeoz8q9cmlKlWB+9mgCblVGeTT925wM4FNGQvHN\nPITjjmmMTc0fXmplJxnNQtl++F96VW+TGQfSmwJZGCr8uSfWo1kx94cd6RXITGOKarFs5HFS\nBLlcnFTxgZ3Hniq6r82cYqYLnknANAiRWBUkilXc2OeO1QfMu4Zp8cnFBRIrbuD1qTyxuLVA\nsbLJljg1JuOPrQBJbyryD19ak2qrDJyKrPheMcjqaUP8pHXNAdSymGY4pV2sxPYVCu4KOPmx\nT4t0f3h1FADw27jb92pM7hmolco3tTvMEjnbwB1pCFOWxj8hS8dT9KaqFWJFKGO37vPvQBIs\nYTocnNPPy8MvNVoZ0DEM201ZSYMCSaYD+uA3BojYSAjoAabu3fM3A9aUbYsnsaZVriIwZWY8\nKDgZp3mLgAHikV0ZcY4oKruG0YpBawqyHnPIpVwWz0FIo255zTl+9np7UgQQYXcpp8ePL680\nxpNnJGcmpQoL5H1ApgxXwMcfWpUkQgj0qHcXkIx05oVRu3ZyPagESfe5UUsi7lDntTN3zbQw\nB9KczSMwBAwaQCrGN2ST61Nu/d4Jwe1QSt8vXBU0K2eTTAesZVgxbIp8kZYkrUe55EOeKWN2\nZePzoAWJdvBPWnFQWAPAoVSuSeTTsgtyMikwGqBg8d6a+7dkmn43dMg0zad3XdUgPimwvPWl\nkk2kADdmhk5HynFKyk8ocY4NMB/3Y8hd30psedozwaTcw4H406Pv/EfekA9mXapIzzTjjGBx\nk1C3yjk4weKfGplxk4HWgB3K8d6TG3IHSnc7jnpSbgMelACpIAMEU7hgQKYy8H1ojypx39aA\nH4CrjFJICxBUUM3U9T2psTv3oFYe7fMMCjdh+mRRt+XHek3bOSKCrj+dp70fJuGDz6U1WD8d\nKcvfOAaQhTjbuUgijcWXjimqqpwo60vOaYCqQv3jxSswUkDnNJwFzilz8qkLg0tRjZJRBjI3\nU3zvMBDDAzUu1WbnpTPKDZANMOoSbdoAbJ7VIMKMA0yRR65xRFnj0PrQBIzbVyRle9DYDZHS\nmFD5hycLTV+ViM5oESqCfmPOKe0h24A/Oo1yufQ05nU456UCFCsByPxoV9rAGm+eZAVXpTep\n4PNAyeSQFeBzmgt261FzHjuDT2xu6UDHqpbr0pwQLyelV92DjkCpo8N1bigCVVQg4pw9xTY9\nvPGKcGLZ9BQIRdyjijqxz3pxYMMAYPrTGAjb+tIBzgMBnqKkGdvWoeeueKeBuUYPI7UxkjN8\nyjFLuHSmFvm2/jSr13AZFLqISM/vDuqUr69aYFy3vTt25sEYIpjHZNFN5ooKE3eWwwKBhlJI\nzg0BiT6ik2DoD1rMQh2rGWz+dDFGjXFMnCbQqnnNMUlcocetAyfzv4RUJk2yEKM5FGCHz2xU\nSkRNjflmNIQ+HcwAPGKkz8ucZNRsxVdoI69aIcrnJzQMe0hbjHNN3PtJJApzLuX5TzSNGNuT\n1HagQixg4JOai3edceWoOB1JqZNm3IBFSL8jEjkEUDGHbwCaFYyNtxwKdKAVyBio1mMbDaOf\nWq2Ait7jzpGTbjB6mpixX5QaayKGLHgnnNOK+aRtPPSlcBn3xnoelMVRypHPY0SRNCwzzzRu\nZeSO9Auo9lUKuW57io3Uq2FyaMl5hkZ70jSSLIWPA6UDI5HG4cZBHUU5Soxu4anIit9360qM\nJWOF3sOMUgIw21iSNwpGGxTg8kdKbI22Tp+FSSruTcOpp3AaGCRAYw5p3keWu/q9L5Z4OM4F\nOySfSgRGq+YuRwacgCsy55NNuZHjUvCm8jqPakWUywK7LtY9PagNQfDZwue1MVs44xintmPA\npOcdKQyVF3LimuiHFEkgVCBwaQD5RmgBzg4yMYpQRtwRk03eNwHFKrlXK460AK2cBRzSswZc\nAfWmxr8xP3RimrG5BANO/QQuSyjGKX5Wbn5TTN6qMehp+4S8elK1hCNIByvPtTGZn5anydAV\nqIyHdzQUK7FhjoKa8h4C8n1qRWEnPY02QBW+UcUEjGVlYAng07buYsB7UMQyj1pQzYIH4mgY\nEcjvQqkluwpPMVRjPPrTfM3DK5I70hicjgCnKAFG7rQrLtODTV+YbiMU0IGX5SQe/SlXMmM/\ndpnPbOKcr/uzntQAeYseSvJFQ4aRdxGOelBYdQMGnLIeh5BoAdtDDJ+7VPa24g9M1albbGQo\nzUaKNnJz2pdREfll268U9rc7RzxSn5QQBULTbRjPSgB9xGNqkde9RiTBx/CO9E3zR5zio/OC\nR7cZHWmBDfsu9QOp7VWuJvs+wFcFh19KmAyxkPzN2qOZlmUbyMjtSAr3D7k+QZzVSS2DQM5P\nParE33NqjGahupn+ziMdR1pAzA1CH7QjRE5DcV80fGjwmmlawTtZYZF3qccck19PlG3E1wvx\ne8MnWvDErxxh7mH5gfRfaqWgHw9q0R+1FiTsx1qhJII3OFySK6TWtLNrcSEtvB6qawZrV4xu\nYZH611I52UfL8xTLnHbFVrqQblQtkdCPSrEzMrbR93rVWNlZX8zGO1XczZXkjjk35J8sdCDV\nOYCPG1tx9qnkQ8IBiM9TVaTcjBSfoapMgaGOfNwNp4wetV2YrGzMNuT/APqqcyLtYMOnTHWo\nZFdlKL8/f6UxkG7owbDZ5NR/6tssAQTnipnxjbxjFVpGSRdpBUirAZNI8gKBeOv4VWuEdZMB\n/kwML6VOrdGU/L61E6rIxIOWoAYjMHYEbzt4qPAXKrgGn+diTevyjG00jRAs7q2QeRSAVpNk\nIj6v1qFpflz1z1WnzsoA2j5jy2aiVtsZB69QBQhdSPyfm9T1xTxGMF8YPpUm07CM4JHA71Em\nfKyW+Zeo9KYyW4hJtdqZBzu4qHcVbdjB44qdZv8ARwQe/JI/SoNrzS7lIXHJoELuYr03Lnmm\n3AHysOAvanLuj3qH3BuR7U54zHHk/OuOvvTQrFaXDSKSpXd2FJt6qeDn71OlkO1Wbp0zUb43\nbs89jRYBrxllOF3L61GxAjIXk461PcSfLhQTtPzEdKjWFcbCcL97FA7DA3l4ZX/ebeVFJ5oW\nQOBlsUXL/LuXGBxxSLKiEbh8rL096oBke+YONoL7s07y3Vju4GMmjaWjLJ8vYexpE8yL5Wbc\nP4vc0dRakUihWC4yGOd1EiGKMnduOegqR/Lkj2sce9RyKy7DjK9MU+oCyKJEVwcDoQaaqqZg\nRzjipfLRcs4IHTAqFSNwwMKOM96TASOL55gDnOaesI25L9Bmnxud5+TtnA9Kax+XJXaM5+tI\nBPO43FsE9vWo/K3tv28exxTmVT86jv8AdNNVjyQOvalYC5Z6hPYO7wyGM9QwrrND+LmpabD5\nU0uISMBmGSTXDq219/8AABjb701VEq/MPz7VlOlGotSZQUlqfQuh/GqyvPLW7XylxyTwM12d\nv4r0rUI1kimXB6hjyPwr5Ji8yNSFGQ3APUD3rS03ULq3/wBVLIqL15zn1rzqmBT1icssOnqj\n61jlikkAjYMCMqfb1p8isrcqPXIr5p034pa1p11GHcMkfADc/L6V3ujfHaCTC3USmZjtVUHI\n/wDrVwTws47HNLDz3R6mwLLu6nNG3JOR8tYGi/EbRtWUqZPs0zDhZK3LXU7K8jYQXMcpBx94\nZzXJKnOPQwlGUdwk3MowcDpzTVAZgu7B9aseS4izjIzzUOwtkjHWs7PqZ9NQxtyRwabxzmpM\n+uDTDk/LtoER7htByeOxpN3mN83Q96kaIFTn72MGmbSrADGAKVwDaFU4O6nb127jywHP0poT\ndkDg+9Rt86EAc9DQBj+MBv0WZ4x+9VCV/wB3vXzBcMhklwcDeT6d6+ofEm2LQ55GJbCkAD3F\nfLOqTK2pTBlyrcjaa+jwPwnp4YgctMuGxt+vNO3GMAAZXHFRrs5BPzAVJDGoUYPHq1eqegOL\n7dpHJyPwprbm8/k8jtSCMxt85znoBT5PlOF64wcUgIo8+UpAYjGOlRNm3UNty3f39qmSQD5M\nnA5xUbMyzEOrEYyM9ORSDqfX/wCwX4ijlvLiyI+bdke2eD+X9a+8JISuCvQgGvzB/ZH+IVr4\nG8dNLchFSUhGLNjvX6P6L8SvDWuWcc0GpIoZRhW4NclSLUtTRGyI8vgjIqWOEA9KbZ6pp14o\neG6jljPIZHB/Wr8flSDerZT1GDWJaPnX9rxivgeRSRs+8ePSvzSZWa5l3YB3Hofev0r/AGyr\ndf8AhBZC24NtLcdMe9fmdlpZmcnHNd9P4TF6MnVjADuyVPf0puxSyjOSBTFm2jnmnqysjSE4\nB61ZBLGqksMZOPWq5Pvn3NSogKgn7vUVDJJ8xRlznnd6UxkD4O4gkfh1NfVH7B148Wu30RwG\nBBPPv/8AWr5bkHKnPlN29DX0p+w3KP8AhNryBuQ2D6dTzis6mxSP0ZmYbFAHGKrnLCrDRnyU\nK9McCq5U88155qPi+b61U8QRldJmbPO08etW4fvDHU1Q8YTPb+Hbx0yZI42KgDJPBrSG6JZ+\nW/7Rtx9o+I908gIaRyQB+Veabiyhg2fUYrv/AI8XUl/8Srp9ylVww29uB8p964B4yX2qNqE5\nr0DEJPlzvJAYenSod224jYEbxyCD2A6f574qWbIRhu3dgpqpcfLGWQYO3171Mo3K6n6d/sm6\n+usfDzTQJVnVII1YqwYK4UAjP1617lI29m2nGK+E/wBin4nWfh+aLQt8jG5b5gxwg9T9ev1r\n7rGxk3o2VYZHHUetcctDYjWQ9hmpN5bI9aRVIGegqN5NmDjmoHY+bP2ztDa88D7o7aSVics0\nYzjHJ4r50/ZV+Ht1eeMGvZFfNuPmDfwjPGPz/Svv7xdoMPiLSJbS5jEsLjBU9643wD8LbLwl\ndM9tGIomGAAOa3VW0bGfLqej2OUs4UYcqoFShhg55NJ91FGelOjUMd2M1zlk8KhlBAqj4k1J\nbDRbpwu6REJCn16CtCMFQOmDxxXiP7Tvja48L+ELi7s2KyKuwso7ntVxjzMTPh39orxRLr/j\ni7hdj5Mb4EYYsCw6mvK4cJCxxkDg1Z1jVJdZ1SW5kfdI3zFsd8nP41XRg25Tkc4+teh0MCJS\nApGMUrYxkqUB708bX3EnleNtMmYsvJyB3pDG28jQ6lZT7lASUY3LkD3Ixz9K/Ub9n2aO68Jw\nzxSLOpUDzVXAb9P0r8tJAr3EAUEyFhjA4OK/UH9mdsfDvTY+CfJX5VHSsqmxcdj1y4YBjioM\nd8ZqeQbicDPWogpXiuM0HxjPPrUGvFl0e5ySMLzj06f1q1CvzZNU/E0ir4fviw5EZORVIln5\neftCSbviZfcghXKnBrzWRdytg4613fxp3S+PtRdsj5+M9SPWuB+b/WI2HBxtx2r0DLU7n4G6\nLd6h46sLmCFpI7dxux6Z5r9SPCce3RrULxGqALX57fsfXCr4zuY5UUo2CoY9Gz/9ev0atIkW\n1i28DHOOOe9c1V6lRJjn1wKNvvkU2Q91OaFbHbFcpoSxnb04NeM/tOXgtfBd2pYqZIX2keuO\nK9mRRg56da+e/wBrzUFg8DzjzFRUVsEnrnIA/lVw+ImR+dX3ZJCw7nkd+Tk0NIq5Y9NvSoJr\nhYWQM+WK9P8AP41WmvoGRyZQm0dOOf1r0LGZf3ovy45oaQYOOCBk8Vmx3yXDfKpZ/wAOB+dd\nZ4Z+F/ifxsrS6XaFokIRpM4HPTrj2oUWBzDu1xIIoY2dnICpjOT6V9Wfst/Bu9GoJqF7FsRh\nuGRgj2HHtTfgl+yrqFveR3mtR5VWzg+mCD9f/rV9reFfDFt4fsY4IY8CNAq7h2AqKk1FAouR\npWdubKzjjA5UYz3qff6Dmnbfm5ON1CH5sVxM2HQqNx7nFWomKpwuPXJxUNv/AKzGO/NQ+Ita\nTRdKa5bhDwfp/hSvrYdranzf+1940hj8I3dtbSoN+IpufmxnkAV+f8cSKvlpnYvAz1Ar2r9o\nrx5F4x8RXEFo2yKGQgjOQx74PevFtxVfUEbfeuyCtExep6H+z6kM/jq2X/lqzDGf7u4f4V+o\nOix+Tp0QAwoBCj0GeK/L39nXYvxOsY2+VpTs9lz0/rX6h6V8unwZbcdvXqKzq7pFwRKSpyB1\npo9R0pWX95nIpuTuxiuUstRTH5QOOetfMv7ZE07eF5ommIjJGYux9Gr6WiDemK+Wv22rjydA\nVo2DSzjZj0xVU/iEz4ajXazHOWPGfX8arrtEb5bLgk4P8qeWaP5WOCvB/OpdM059Y1qz0+Jf\nMM0g+UdTnt+Wa9BmB7f+yv8ADdfEHiJdWuoFeC3ba0hHO48cfT/Cv0H0PTo9L09IV4XsK8z+\nAfgG18PeGLXybbyV2g9ORwDz79K9dkjRdoVelcs562RUVbUjHLYzxSxp0wO+KVVI/GpZp47C\n3MsjCMEYVm6ZyOKzehRzPjrxpb+DdIkuGP73YxUcZ4GTXzL4l/bGk+zPFbHy7lGwZEUEYHX6\n1jfthfEXzLqHT7eZlZkZnZT8oGCOD6/418kKzd3yzNlj/n/PFaQhfcTfQ+i9f/ay1y6UlbmQ\n7jhHyFx+XauT1L9orxFJ/rXllHQlZj+ea8mC+Tnsw6d6jCqf3ZJyeSTWns4k3Z22p/FzXtQZ\nmF3cQegWU8/XmsK68Z6vegGa8nZehUSEDn8ayUWPzACTkUvyN8xXqcfSqUUtguRNdSXBYtcS\nsBxtZiT+dTaDrR0HX7K6LN5cMis2CemeelRLEiycHd3plwgbbJ5Ykw2QCOtUkgvqfqj8F/F9\nr4x8EWNxbJLGREMLN1I9Qa9Bjtw5wcex79f8M1+bfwE/aIvPhOrWuxZbdyPnfJEfrx34zXqW\nuftpalNHIlrIUDhgCkQGTg8g5Nc7g7mkddz7MWJI/nlYIn1H+NUb7xVoek5M99Cjg4wzCvzv\n1/8Aai8QalcSfM5VhtLCQ7Rxzj1rhNZ+LniHWcILyRI8giRnI/l1pKm2N2PuD48fFrwte+D7\n+wj1OCRmHl5U7h6n9M1+e1nfTaXq639r8kkEpZS38Qzn+X8qsX2rX2pTHz7l5SeTk8Hj0ql5\nKtGTubcD0ycY9K6Iw5dDO59e/DX9rpNN8LxWWq2q3MlumBPvwc56Y78VheM/2tJ7i68ywURw\nsML82QrZPP1r5eZCyjLE54wDin7drAFun6UlBbiudP458dXnjC682Q5WU5kVjnJ9a5hSiKq5\nIG3FRZTdjJB9T0pZWGFYrx0FahcX5VAyTn1FJDgKS3zHPGetOKZ2joM8+9LIqscDgZ+UmkIR\nSbiP5wN6nCj2p/8AeyccdKauI+vfjPvSRngiT7vT60wGgbcAncScVZXCqY2Xcg5z3qvtC4yS\noB4AqXDNGAjEuT+lAhHkBAYHLZpGkJBfZnJxmnHZg/KQc4NC5zt6gDNIYu4YxjHtTNzrGxVs\nHoP8KeWj8rCsd/c0kimSHaCCPbr9aBEW0blx6fMPenLCttJw3ynp7UY3bcHZjrTpoypyeQeB\nmgBVxJllI3A96QGSPLE8nilXay4UgEjBoXavLfMFGAKAGrnzMbdxxmlVmVWwuWbtSZGNqkqx\n5Jpxy0bFD8yjrQA1sxqv8bE8int8ygquARTJF3YfcDt5/GpEV/JPc4zigQ0/vFCckgURyMys\npznPI/rSnLRttkVgD2ob5WVgMEcD3o1GNVt6c9Acc9TStsDDk7vY9KQ43nu2enpR9595XAPH\n1oAnRPs6sVbORximhvlIyCM4yR3pQDvwpA45X2oYbpAm35GOT70ABZlQ5G1/880zjaq5Oc1M\n0gaYhVzgYHtUSoygsG+XP40wBc+YpxuOcZqw/lu29R8oGPxqrD90g8c5BqeHYseD3NAEqMY2\nBPzcZqvJN8xbbkE8Uq5xu3jr92pfLOQdw9hQA3ey4iYbD1GB1qUsgCkkbunHWo3DNMrNw696\ncyj7zMOucgUgEkXzEDE85p8ZWRmT7xPX1pVDTZdeVTkbai4k5AIbOcrRqA7Druiz93nFTYEi\nB1O09CKikXLFh2pI3Krlifl5yKNSkSIrOxxx/tGljSTy8Mf3ueEpJF3J5m7YSN319qVcRsrh\n855oF1FyvLsdrKeQfWjYHyTyG5HtTN3nb2ddoFPXbJsdWpgx6xBVBckMDxjrT1kkjVjCWUt7\nU1/nkVi5AJxxTWd/lXduweopCFUERsJM788mn5WQMB6fnRuCoGY/vj0WkHZ2XBz2pdRdSYQ7\n4xtO4HqPSpo433MwUYHSq6MzN97dk8hegqfzQkLbc9fWmMeW+dWf73dRT4W3Ry8FGzx71FHn\nO7aS2KI8NcBpG/4CPWgRJbKVBGccVKuA5+bKjjpUe1VaTnluOP509S6L8h3DNJCHW+GU7UJI\nNej/AA70MMj3IYKp/i75rgNNge+vY4gCC7BTzgc1734V0m20jSYoI0+Xr5h60ho6nQ4cRlCc\nsBn610dtu3qhXAFZOhlBcBWXG4ferpVjInEmeOMVLZaRsWao68rg9quxxlm2k81T/wBXtzxV\n63kDJx9/tWZdiyvA5X/gVTxgbCSeKghd2xuHHepkh3fMWG3uKke25NHIy4C8oe1Txxrv3BOf\nSqbYiYYNPWVm+62KAJ2kXcVK4qVPnAEZ4qBd0keGOWz1qRf3Wdv40APaN4m+V6fud1w3Pqab\nGwkGScYqJZmjdgRlc0h9Cf50XOMrUyqsi/KcZHeo45GZSe3pTJGCnKttNMZPtb7o60sZ2jnJ\nANN81mQZIzR5jRp0zmkwHMWz1xUkUjeX83JqtuBTJbDHtU3nBVAHL0ATrMz5UHnHSiOQr1/K\nq7bt3mJye9CkuNxOGouMsmY7sil+0FuCp5qusyIpDDPvT0mG3ikBP5Ybndg0vzNwTuFRxybm\n6gCn+YI29RQA9mC7QBzQV8zLNw1Ju3fMvBpucfeakA8fLwGpY2bdz0qPdt4xk09drrg8GmMk\nXLMeeKA8i/c5PpUSLtzhuKduKnigCRZmkXldp9aWSMheOT609dpwVOWpjSGR+B9TQALI6qqn\njmpvtjyYG3CjpUKsXXaeSKGDRtkdTQA7a4JIU0qsWYBuKia6lUcjP0oZmbDAc+lIC6IzCSob\ng8nNIzK0gCt2qHzHVAWzmm+bHkE0xlgE8jNTKu1QDwetQRMjIdvWpFuFCgEZPrQASMVkHOKk\nSQbjk44oZkmXHGcVApKnB9aT2AniZBn5juqeOYqCelQHCndkEVH55Em0jP0pAWZJXZwUbAHU\nU/5gd27GeeahfC4cDA70MyzcFiBVDLZk2ositnsSKevz8FgM85qlHhRtDZXPSrMcZb+LikwH\n/Oudp3U1VAUlmpwB+96cU3b0GOvekBMrlowQN1G5m7VBDMYnIzle4qbd5n3eBQwHqdzDd1qV\nRwR2qrI7o449qngkLbtxwKAsShdygE8VIrHaFNVhJsyfvVMJvPA42mkMcQN2TTt3zA54qNsx\njJBZad5qOh2daBkpYA5AzTlmULhVpifMOaajCPIA4oEThQ/BOGpjK0eSvTpRy8ZKnBpsfm7s\ns3FDAmVgnI+93qRW+Ubjmq+0sKXBXOelA7Em4yMewFPjuNq9MkVF5bABlOaTcFXryaALkdws\nikjrS+aMAd6qpHt5WpWA2g96Y2TMd2Bmm/6tsgZAqL5/vDmliJYEk/WhiLKyeZk5wPShW+YZ\nPFRsoXv25oVRwc5xSAsb89vlNDP8oA55pjN0C1NGiGEnHNAWEWQliSuKk/hJUcVGiFfmBGKN\nxzz60ATLJvVkI3HtTo2PQDAUdKhKncCOlSqByF+8aAEZv3ZB6mnxSAMuRgUzIjfDimSZVt+M\nigC620uTnGOaVsMoI6dc1DHIV5A5xT2f92Qx+bFSFiZCMfMc+gqMbuQRgdqjV/ujBHHWnOzS\nZGaoYquwYfNxU5bjOfwqBcBQuNx6kigHcSpGKQWFmjjf5iMGljUqBzxSbQy4znFTKVbpTCxP\ntb73RcU05Zef170yT/Vhh1zSCUOcK3zD+HFArDomLZyuMVJvC4LH6UxVLIT0buKkC8glelSM\nB8vJGalGZFwPlFMWTdkHrTdz7cBeO9MQ/adp4zTo2C855xxUQk2nknHpT22jDZ4oKWpKOrMe\npoU/LgUxZty5A5pwYbduCGNAhNgLbtmWHepVZjn1pi5CHJ5FJHlm3DgH86BkmB1J3EdaRiW5\nUc9hTdyxrk85PSiPdyT+FMB8krLhWFEe49B9KMfLzTlLKvqDUgPUknkk09SSAuOlM5QFvTrU\nkbB8MBg+lAhFyHPGaFzG2SMU5pFYHHWmq2Y8uvzUgHqx6ZzSqnyZzUHmeWwB6mpFk2jgZoAV\ndyybs5FLz5mQMDvRu3Z+U5oXc6jjAoGO2lsHORQsi7tvQ0K6nIxinGL+IjmgAVjyDyKMqynt\ninbhtAxiox+7cqwzSGPU7sHPFG3kk9KRR+7ODkUK24YPSgBFQqxK8j0qQOGXGNpJpEYr2pGY\nMuc9PSmISFpBI2/AxwKkyfLPGab1Xfil3MyEDvQAMysofHSgMCwY/LSqTj0ApNgk5NIBGkKv\nuqQSblBBFMQiQE45FRyW4Y7s4NMZOfu+tO3ZUUzG2MYNL7HgUAO+6eSCKXb1YUzbjrzml3Nt\nwOtAAV4BIxS7gqYNJyy4zmlZiSoK8UhCNwvJ5qP5idwGak++CD1pNu0YzzRqAqyM2QRinIo2\nnI5oVRgk0isW5xincBY16gcGpFwoyabI/wC7z0JpduQp7UAO42ZpryEZxTiAQaRMMucUAMZl\njjZ5DtVRlmPQVT07XdM1LKWd9FO4Ygop+bj2rRaON4zHIAwYYI/pVGHwzpVvffa4LNIbn++g\nx+FAzSWRWUFRzj8KVcqcnpSMynbt+hp6t8vHNIdh0jYXpQcFgDyKaJux59aUSA8AUCFPzZpX\nAABA5pjfe6Ubvm60xirIfMA259TTpAy8KcDvSbd+KG+9x0oEPDEDPenLlvrTOPrinBnOOMUA\nSbz6UU35qKChq/KDSRnaSCc5qGSR9wwuQeKa3mBwoHzd6yAkHyk8c07O45bpjrRt+8vUnvSB\nWTg8kCgljcCRiScKBUflEOCORUq/OvTHrSzHCjbzQNCNCmDgZJpxYMoTGDSKx2gjr3pGYK/P\n3qBjljG7JO3FPaVC2MZNRSAyL1wKiO5TgmgCfPVsYWmq5Bx1+lI0m7jtSq2z7q/WmIdvzyRi\nmySKMAdaguJ2hjkk2M+1SQo5JPpVfS7ma9t0eaHypG5KZzimJF1ZPMYjb070gDYJB5qTgDji\no/8AVqxz1qdihiszN83zc1JMNqZHJzUSqdqtUpY7cHoaAGKysygDmlm2nI4xTGUo3Xmgrgeu\naAGNHu2hTjPBpqr9ndipIqf7qgHg00yEk45b3oAYse87waX2HSpIVPlknr3qLdtBUCgQ9dxU\nqeDngio5mLHYOtPh37fm6CkdgXHY9zTsNEe5gvynjoaWFm27SMjsKfhVbkflTSGaYDcAlMB6\nlXjJI6cUgb5dppu4sxAGE6AVJGAPvfrSYERZdxyOOlO2q3HpTtqNIQO9NVSDzzU9bgLtQtyO\nabIm7GOOetHmjcRigybVKmmIe3zL8pwopu75cjA9aaCSpHUelRn5l75zQNDlCrkHkmnKpble\nMcGnKiqd34U9eclT060CZGy/LgUzyyTinM27ntSNJ6DmgEM5i71IF3JxzSrhlw1J/q+Q1MQi\nr145pWGFBPANJuCjHUmkLblA7UrFEQjEk2cYUelP3bWIUcUke4ZpVcdAKNxAiheoqRiChwCK\nZuC9+aR92wHqaA1EDbR9aYcDOaTy2YbvSoHWSSbjhcUC1HyqSVI4p7NtAwMmm4L556CkAO3L\ncHFACNKFPIpUIIOQPWoFYMWbGRUXmM2cGkPoWJJhuAxgVBKdu47cj1prlvlLGiQuw2Lyvei4\nhzSbrcD8qZ5QdcZwcU4sDHgdqaoCck5PtSGRyfKpCnnvVNoQvJbJPNXZFG09yaqTQqxB3YOM\nYoFYi/1jEE4AqGSLzJOD8mOamljdVAPU80yRzHGfXvQBmTKqg47VQu/LurcxMobdwR7VoMu5\niOue1Z9zGIpCD0NAj49+Lnhv/hH/ABNe26o3ktJvjdl4wa8v1Te0Q2nB719e/HDwePEGgi7h\nXN5a8kjqUr5G8RWJs3bdkYbOK6IyM3GxiXcbiFVK8sOtZ8yjyhgEnOPxrQupWeNcJu9vSql9\nuhiGMY+8RWkWYso3LLAqCRc7/vAdqrOxVsgAx54NXbjbMisTlMfrVGVWl29dpIG6tiBt1Giy\nbhxkZ4FU4Fdpd4+Q5xk+lakdvLNC8keGjU4561m3wfdu3DAH3aBCXDIrZ2854qJY02uDw3Wk\nZtxzntj8ah8wM5f7pXg5qkw1I2jTb+7O0k/dpkquFDnAOcYApzusrK2SuD6U/wCZgSCMZ6Gk\nw1K52KpJddvXB61Hl22sG+Re2KfJGm0tsGO+abw7KVBCgfhSDUbMombPaofI/dOUPPbNPXdH\ncMZOFI/KkmJkQIpwg5D/ANKpCEmUSMrbto24Puaj4jXGOvU0s3y4bG4AZqPb5gDNwppj6kit\nuQgsAR0pqYRdiNkHk/WmzI/mKMYOMbfalkzGoEYGTwfWqBjo1WJACcknOaWNi25FOQpzVZpW\n3Ybjb+tSNMEdipx2zS6gKyiTfzlMdarLGPLJUkoeDnrVlmJQZUetN3Zh+dSFznIFUwIlRpEe\nNeTnI9/am7vm2t6dPT2qVcMDk47hRx+NQLhFyjZYng+tJiHbR5RBATPUVXhiIYliDtHHvVnB\nkyrDYvc0wRhY9hfK54qQEWOVlIOFXrj0qNcN8jrg1NIhMJUnDdBTUC7QZAeBg4qwIMI3y9T6\nUp3KuSPapFj+bcAo/HmnSW7KThi64znGKAKxAbsxPfJpvnIzD5Ts6cetSPIs0yAH5QOaQsFm\nO4DB9KQg+YEHcVB4LCo23R5UlnGeOOKklVmmXa3yY5FI7BnwxOP4cUFESK249s96VWO3dkZW\nlYNzkd+1R/MxYqMUwE3jbvPyjNOBUxuGPzMeKarMeAowfWnbPmBx82cUCHqrrbhhjK9s1asp\n3jX5DsDfxHvVT5Vc8cj+VRq7Mc5wQflFIRr3ljKsayCNnjbrtHSsyaHyZsgkOeh7113hvVPt\n1u0DACROCT/OsbxRp72M6XAG+JjjNLlGjMjaS3fcJmx2XPQ1oQ+L9Ss5EaObDq2c1ltIigbT\nw36VEzblYoMjpmodOD3BqOx6Tonxy1OxkKTHz8jA3Dg16N4Y+KlhfWZmu2xk/cXGV+tfN7QH\nYo6N1xinI7xyEKzKpOSM4rlqYWEloc0qUWfX1pqVnq0Cy2kiyoeMg96vhflUg818kWPiC901\nh5NzIu0fd3Haa67RfjTqmmxpFMjOsnKDG4j65rzZ4KX2Tllh+x9EtyQzL97kcVWkTaxrzDQv\njjZMuy6RpGY53Lhdo+ldjpvxC0XUFJF0qN2ZuPwrhnQqU9WjJ0Wb8MYYHmlMIxtB+fNQW99b\n3ILRTLIcZ+U/1q1BsmUSKcueBisreRHs2jk/GUhj0i4UhkHRSOhNfM2qQ7dQuQMZRvventX1\nT4yjb+xbtQgJVCea+W9YBXVZictE3IJHU172B0R34dGeqkt8wxuHWpIwJMKH2gdQaWND0bgf\nWlkRVIZiASe1eudgySRmDcfJ90E9aHJiVB2HDY/nQ3Az/DmkaQTO2DgYxg0gGNuz83yc8e4p\ny7+SzDB7+lDZZAX+bbwKj8sB+m5fSgByM9vIsyL8wOSFPWuhtPiBrtlLE0N/OgQDYrSNgAdB\nmucyyyEH5VApI5uCB8w9+1Jrm3HzM9Msf2hPFemgNFftFg5I3Eiuz8P/ALXniCwkRpbm5yvy\nuqvwT6/SvAVh53M+PQZ60oG5zj5QvTnrUckQ5n0PoT4hftQ6j8QvD72F0T5bKUCsevua+f2j\nDMyocLjvSeYQQSnbmhWQrk9zkZrRLsLmb3GRqrRnH59KfE4XB6fL36UzcRHuf5n68Uqq0iqC\nuB0zQIWOQtkEEd/aoZJWzhOpOBmns/q2MHaFpcrx/eHX2oKRGrDzv3v3vu/jX0B+xjefY/iQ\nwkJCMVC7RklueK8BkjUOmTuY811Hwz8azeBfFEGpQYdo24z09waieqGtz9io4W8iPIZfl+6e\n1QNblSflP4ivlrSP23tLjsLeO4hztjGc7c8e5NdXpf7Y3ha72NK8Me4fd3jP6V522ljbVo98\nhiO4ZHNZPjKFjoN5tRnbb/CeR7/QVwFj+1B4Pv54oo7lfn6kSAn61d8SfGjwrq2i3UUV+q+Z\nC209+mB/Wtobol6bn5l/Fq6M/wAQNZchVVpiRgcdSM1yKy/MM8jP+TXS/E6aOXxtfzxss0bt\nnKjgnpXMQ5feoH7zP3a7zEfLjdgHg9feqzYw6hcDqCelTllKuGGGA+7UcePLAIwOtK4XOk+G\nXiZvBvjLStRH+rtpN7L2JIPB/Ov1f+HPiqz8ZeF7PULfapliD+WrbsfjX49SMVbcpOOhAPJ/\nz1/CvvD9iP4qWl/osXh+4llmuoge3yopbhc/jXNWNIn1y6kZ5qu5DZyOKvtB8o449e3WojB8\nprmTLKLAsuO1RbdnAGK0GtxjAqJrfafWmMrL83bmrMa7fp7ULDyeMCp7eDGR1J6ZpNjK1zIt\ntaySMSqAE5Hbj/P5V+ff7XPxGl1LXTpFteM8DEExq3QYxyO+a+2fi14kTw34VuZfO8hhGTlv\n4cdT+Wf1r8qfHPiC48SeJr65nm8+RpD84HB57fhiuqjuRIwMrG0gCk9s1JGxUcAMT603yykQ\nUgqPU0kbBQcfN7103MmK2ByRhyajbczYLfKe1PRQ+Se9M2smVKcf3qBMrMzLeWqqcDzPve9f\nqL+zKuPAljg/LsADDv71+XVxs3xAnkNkfWv0+/ZLlM3w7s2dNpaNZAu7djPNY1di4voezOpX\np1z/AFqEbtxJq7NHtxUXkg5zzmuI0GRnaQSMZrO8TyCPR707cq0ZB+netZYxu5HFZ3iqEnQb\n5QfvREfpWkNxSR+V/wAam2+PrtWATeSwx025xn9K4GfLJmLjnj/arvfjkob4h3oH/LL5Pw9K\n4OSEgqQdyjnA7V3GZ2/wJupLL4ladOA5jJYyKhPBxx+Rr9SfDlw1xodm7ksTGMsepOK/Lr9n\n27eH4hWsBRW818Hd3z1/Sv1J8Mwj+xYdgwgGB/P+tc9buVHcsyYOdtJuPIzmrDQ4GcU3y/M6\nDAHeuYsarbsEdRXA/FL4a6d8R9DnsL6IsHyUdTgo3r9Pau/MTHkdKQQ55xTUnHUNz5Hh/Yf8\nMNt3LPLOeS5YgH3rRj/Yo8LK2ZICV/ukE19UxxhVIKjk9MU4N8u0DP4V0+3kQ4J7nzjpv7J/\nhWxdWGlW8gXk703flmvVvD/w503Q7FIIraOKMD+FAM127Rkr6UjQ/KcGplVlIagkUbWxhhGI\n1VVHGKuN97PenLbAKB0p6Q7jgAnHoM1jq9zREDKVwDzR5O7IUc1dWHC8jI7nFYWteLtL8Pxy\nS3V5DD5fyspbmj0CxfkmTT4mlkdVVRyWOBXy7+0v8fBo9m+k2Nz875RgOo9D+lYXx0/acTzb\nix0tnDD7sm7Cg54PvXx94k8RXvibVpLu4me4djksx6nvWtOnfVkykVtQke7uGlZznfu+pPOa\npzRsFPJCnj8ac0vlr83QDtTX/eKhVycHIX3rqt0MUemfsyyJ/wALWspfK3naUUv0VvU/hmv0\n+0WCOPS4lDK55BZenBxX5afAFpYPihp0Kv5fnNyR0H/66/VjQ41m0u3A+8FG78vSuesVAryR\n9sYo8sjJHWtKS3CqSf4etQr5W4b5FH1OK5XoaakMKMR8zbe9fI37bEytouAR5qOCn5HJr63u\ntYs7GJpJLiEKv8RaviD9szxNa6vbpb2kiyIku4kHv0ralG8iZHyHJK0e6YHduO7J/LpXv37K\n/wALX8T681/fWTSJGVdJSOMZ5ANeD3EPmqWQAHvj8Of0r6//AGb/AI1eHfB/hq3tLlFiaNWJ\nkkkABz3x35rsntoZLVn2XoOipoukQ20CbUVQoUc49Oavpbu3bP8An0r5s179sbT9OtzHbLbT\nSZwhikEmR6n06H9K881D9ti71DzVkZRIARHtAUAf1rz1e+xvY+2Y7dcfMygDnrXE/FrxNZ6b\n4ZuUeeMfKcKrjOccV8N6l+1r4guLmRjK79tynaMemK4HxJ8cte8WLLFPI4ik9D0rZRb1Jehj\n/EjXJvEfia8eaWQonyojEniuUwisARg4zyKsPO00xkfJfPUnmkbLSFsKeOAa6loZDSRJHnO0\n+ppHLtjB3H6UHLphhznij5ywDcgdMUDQ/wCf+Bcjru70BywztxnqaRlaRWJkwfSo48MQpcqR\nTAkMuzBVSVzjIo3bl5OFzwKYxZV2g5BNERf5gQAAcUrCGyRs5DZJOemaeyvly+4Ajmn+WzNu\nU844zQ0jKy7vm/lVFIjjQcAcjHA6VIsR2EnovO3NMVv3jk/LjkemKc8yso7Aj7w70CDd8zEn\novemxs3zeX8w7k0pHyhuh6bqTO6QjiPPUHvSEG7LDHzHrTVw27PTNP3In3OSBimuvRVGN1Lq\nIGbEewx7hnrTnWFNpIy3QHsKbt2nBb604EYY7cKRjNUMVs/fPUcFf603O5grZDE8LQFIt8Yw\nnpSAnYrbsdg1Ah2UUuCpx059fWlkHKIB8oGS1AVl+U4z60xCcMxODnG3+tADiF+Vs5zxS/eR\nSp5zxUaltpUevBpFbbboS3OeaAJNxVmyvJP4Cn7ZEjJUhQeCT1pdylPmbb35qNpCGUhtynjG\nOlAEioqtxzxx2BpqzpyrHDdwBTi4IOeM8UxstgqgAHBNICRGSTI6oOc0kkgYk/f46elNjkY5\n2gFehFN278kNj0oAdGsa8qhyRSSFuCE2nPTvSs4Eiry3rTtm4nc2CDxRqAZLnLHBFCsu7rjP\nGAKWPbyDyScU1VJXYVzIpNMCTEW8qTtGPvdjQAG2o7YJ5BqrJtXJw2zOSDUse2Ub3U/7NIY/\nY8jSDaoHfHFL8skZydyqeDTWkQKSQT64o4xkrjdTARmDSAIO2TSqwUYxwfWnBWwH6FTjb609\nlVmyoyD1Pp7UCGhcduSOtI25SoYZA6Yp6sJIdoPz54oeRiwUnHHHrQMRi65IBDY/Cm8qijHJ\n6gVKqyJGBkOO59KjZG3DJ57mgQrIdwGRgDtU4YKilhnJwAtMbGA/RemO9IqKyspbkDIoAdtU\n5DLnccHFBA3eWBnHr6UsSY+ZegP4fWiGQ+Y7EYPcHvQMXaOFJ2r13GlwI1JA3VF8salj8wJ/\nKpsfMAThiPWkIijyq7lbk8FRxT1kMhEYIXvio1bqCBuzipVXYC2Az98UDAjyYx8pPOcetS29\nr5wyVYoTninwr+73MMAjHPanLM8aYjb5BwPrQA27WJt6xHhB/F6+lQQqnlsW4wevvSSMzt8u\nRIOeOlP8xfMLMMnPQDvTKHRsNuGb524BojiZWJC5PTil3ZxI20bTmk8zchcHljxikKwFpI/l\nYBvpSxeWsjLv++ckehpsalR85zupWjVWI24GPvehpCJ1wsgXbvCjHzUkP7uQsT8np6U8lVlX\nad+0YLGmFUkjl5IamTYQs6o4PyjPB74qSMhVUYyByfekk3uPVuB0oSNy5AO7Ayc0DLEcvkyY\nPKnnbSyFZGbA2cZquysoyTx3qfyVlQCNSQOcmkJkm5NqHGGPB9asQptYInC9arMgDqcZHT6V\ne0uF7u8hh6Kz43UhHXfDrw+1xNLczAMidB716/ptkW2InK/yrA8M6IumaekS/K+fmb+97132\niRBUWTaAM8rUsaNPSbddyxtwSODWzDEzTqN24Cq9vtYZ27eetaMOyEgj161nqao0UYMAjjmr\nlvDgZ3DB7VQ2mTDY+lXbdj0zikaX7FmFTtKbsU8RFGVt2RTOCeW5qQybmXJ+UUiepPGwkjO7\nqKb5m3gL+NDRBlwG59qXaVwrDI9aBiwsd5IPFWVkPGR9ajLiNgqJ8uOtOW4UJtC5PrSAfnuF\n4qZpAIyQuTVZZGblenvUqg5yDTEKrLJwx2E077OOPrTGw+BtBNPOQOT0pMokC5bgjA4pfOUZ\nBGRUYxI2F4NSIAudwpjEbCtnjHWnbvM5xgUz5X4HT1p3PJpCJVZo+eg70zzhuztJqP55G3Z7\ndKnjJjT5Rk+9BXQVV8z+Hg04RiHhhuFM+1SAAFcD2pdsnBPINIkYF/eHBwtTo/y4BzTfKDHJ\nOKSNRuODimUiUZK+lO+Tyzk85zUSseecjPansYz7+lIYqz7n2jGam3K5BxjiqccYVvMHPtU6\nSBt2eKAH4VskHO3k09QuwMvIxVfbt+dGzTRcPwD1oAnVnXBAwal4bhuDULSNxkYFO8xWHPWg\nQ+JirCpJMu2c4xUUfzNgdaVHV2weucUhjlX5iQ2ad95s7sGolUxuQe3ehmHXGSe9IRLJcvgc\nZXp0pNwEZ3Jk54ppmMZVdufWpApPK8dzTHYfCw6/dz2pV+XqcqahYbW5PXpUscTN7gUwY/cF\n+7Um5dwBOM9zUe3acbefSkkcNgFeaQInXH3e1LuRWx096hXJbjrUrMNpDL+NBVh0cxTKvytO\nVopmwvWq6sQDjmnRkHno1IknTYXAHBFSmYp83aqqsNxOcGpNpjyG6GgZbt5PMUndx6UvnDZ0\n4zVOOMqflbjvUqkHKlsCi2oWJWmVTuC8UvmeYuUOG9Kji+bIBBNKZFVtxUg+1UIsbzKVGDnv\nQJDyBR5yyx7kO2o9uOSaRRb3qFHPPpSrJ13DBHSoUhRkG5tvPNSQxtHkFt4zwaBknn4XY/A9\naqPcGNf3Y3EGp2+Zip5NVJCsOcHFIDQs7xbrtjaOakZT1XmsrTboqXTZ8p5zWgjkjIbHqKaA\nlVvwxTRMwkP92l5PTrTWY9D1pMCWOQMn3ufSrSqPKBY9etY32djMGUmr5kl+6eAKQEnrg9OB\nR0xuTJ9aRcsvoaRZGGFYimBPHkY28UK53kEUijbzQGz8wpAPDNnk4qaFQVOKgVd0m4nK1MuA\n2AcCgbJRtK8rzSkjpjA7VFu685qRZAyhj6YoAc8nllRn609rjapUdKhO0j1x+dQld+Rn6UCL\nqfMB3p+zqelQQk44anMxkU54PtQBOrCNeeaAT9/HTpUEc0asFlbaTwDVjJ27QcCkOwksvnRn\nOc0kZLKCW4Ham8qQScin7VxuHSmMnVj74pxXzF3Zqv5mO9SRkY+9mgCbdt2nGeKXanLMcD0p\nqsvOGy3pQx3RkH5SaAEWTbhs5HTFOXJ+YDn3qNflxnkCpAzu3HAoAeG5xjHrStktlRgVCjE/\ne4qVmaNcD5loDoO3tu2sMf1qXcVxtX8qgbJUcc1IsmxdrHNICXcQ24nHbFSCTPyk8VApO3n9\naemFXnrSGTBlX5s5pBJu6HrUKnLDA4qQqEbp+VMQrLu4709drRbSPmzUfLA9sc0keW9s0g2H\nqDkAfpU/Ktnhh61AJGTJ21KrFY8EYDc1QDt2DycGk3HGSeewFMbLMpp27B3EZNAhyru+ZgOK\nTd+73M2BmgqXQtjNKdskYTbUjJYwGQDn606N1X/dqJWMYIYZHSnw4bOV49KQBuOCGGc8Cn8K\npYemKaqr8x4B9KIz5mVbgetADlYIoHU0CUMzZ59qj8zBAHJB61IwV5FOPmNAD02suSMmk3Fu\n3GaM/MQRxRCxVuRxQMkkBVQP4qd5jfKv8R7U1m3NwcmgSFJF+XJoEK54yDg1KuduScmm7SSS\nVpVU7ieg9KnqAZG4ZoI+ds96Rj82B9adC/BVh1qrAMRQucH5ad8v0FDrjgdKGZmxxikAiZbJ\n3cUqqvAHU0qycFcfWl2qqll60FCSr8m08Ckjbb9OlKGZtpPNG3bISDxR0EJ975fepWyo6cVE\nMfjUnzNg54pAMXDcA4FHlncOcil2qzcdacq/Ln9KYhwHzYHbmkH73kcAU3JVvrSr8rfWgB+3\n3o6HI60uBznik6CjUBF2g4zgnmgZbJzmnLjy8svNG0/w9KYBuBUjbhqb5Z2e5qT7wwePekUl\nvagBEiBX5m6UFAenSneWTIBjINEisxwnahDsJtG7LH8KfG249DimxrIG+fhT0qRd23HFADee\npIFO3H8KXy+tC43bTwKBEC5LHcO/UVOuNwGTSxqDuwc02RPTg0DsStt7DB60fdXI4pkOOcmh\n1fn0NAx64bkdacrblyAARUbR/KATg0CN9pI49qYEiv8AN0zmhWVWIP4ZqOMMnBPWpGjDgHHI\npAthxwGA6E05T+8HpTSQqg43NS89SOKBitjfx0pQW3ZJ4ofHHShmGfSgQ/fRUPmUUDuP+eNQ\nByuetBIEm4Hmn7gFAPJpskPQg9TWPUXQRV285zSmQA9MjuaTZtyM5qNm2SBRyvemAobOdopq\nMfLO7j2p7rtbPT6UxhuXrzmgCSMbhuHSoekwDUOTGMZIAoiZH4NAIk3beMfLSMq4HenMoMeB\nzUS/K230poYHg5xzUnmLtxjmo8lWBPINSPtC8daYhiqGbkdOaXftG1FxQsvGMcmmtJs560MB\nQG43YApm7nOM+opq3JnU/Lj60rR4QHPJPNSgHtIZF2gUx1MZGTxT40CqSOlQsEY/M3NMBZsD\nDZ4NKrHbu4xUfDLg8+lK5wuwUDDzsryuWoWQRsD1zSR4j+9zTwGY4Kgr60gGGZiSq8VNCFVR\nnmotrLn09aVGKHJ6VQkTNIOg6VBMhcggcUm0tkg4yc1IrmNRkbvrSBCttKnBFRtll+lPUL17\nUzbuPGRQxjS37srjlu9PhUqu09Kax3KQBgr0qMb+7YzSAm2jnBwaTzdoIxuPahQivktzSuQr\n4QEsaQiBG28tnJOBUpU4wRk0oXJ55wetOaQjPHzAUxkasGXj5aeigDJ5qD5XPynn0p8fzEDs\nOtMCRvlViR15pEXdHn+VDNuc45GKYZGWM4AwKQhGJXI6LSM2cY60xiZFBz3qT5VGWPNAdAjB\nY8tikk9OtMj3YII69KkGO/WmArMqxjI5qOPLA54FLw2e9NZgxwDgelJDHKPmwelNVdrHjvSs\nrbl54ps2A249e1NiJVwvJFKzGTGMYFReYTxjOaRm8tc9zxipGOMgXjqM1H6npUYjL98Yp0jd\nATmjUnUa8i4wKa7bl5IpJ8BQAKbgbCCDnHFMNRg+bIXp3o8kNznbT4VCrkckdaezblwcEetL\nqMryR9P4qJPlOVOARUohCmopVK44zSGRrtxtFEYUMQetStCvG0c1FNiOPA4Y0ARxsGY85Bpj\nQqMlulCr8u/OBmnNL5g46etFuoEG/wC0y4Hy7RxVK6DZ2/nV0lNpC9c/eqvNht396gCi1uQh\nYfeqmynafM5Nax3JFjHbGaz5o8Lk9aQjF1CxilhYSKGDAjmvkP4veCn0nxBPCFLRSnzIm24y\nD/hX2TdR+ZDgcEeteXfGDwi2saF9pgQNd2w/Fk7ge9XF2E9T4wvrX7K6Mh3HoRWPeRyTSOSd\noH8NdVrFvJaz3KyDO3I6YI/CsF4d0D4PIGR710ROaSMgqOFwQlQMCuVxjB61YkmVoN27a3Tb\niqrxmQb84btWxmO88x5EfK45FU9nmZDL8x5G6rLMWUPjZgZJ9ainG2MSyKTk/LimSUZIXtWK\nFMk87fWq7whpD5v7okcD1PatGaH7XsdWO4cNmqVxGclc+YOmaAIPnEYBICg4LGkkLebkfdp+\n0bACCko9fSopOOB0pjIbgFWJI+lRea0kQjJAOamlYfIOpByaSaEMxKDJ6imBBIxVyM57c05m\n2qFC8n9KhZW8sMT8xbpVnywWOeXxyaYitKjMNx+5nFI2C2FO3Hp6VLs3x7Q27b2qGWN1kHam\nA7PynccDqDRMEmxImfM6VB5jeaVfkHgCpd2AMthugFMCGRWWQAjg8UKpVpAQF9qlUZ++2TnF\nRsu1yQcnpR1DoIrLuyxycdqk88rH5bEZPIB6UxWEb7ZV4IqGMgSKrZO5vlJ6fSn1EP8AKO4l\nj+FKqqzBimG9qVpPvF1YuD2pI5NkkhUEgDqe1MY5pDyByeuKZszIMnjrSRt5kb/MBg9T3pjO\neNp9tvb61Axd5IcfeB6//WqNVXgBjt9D1FTNiSH5TznrTAvy5IyP880XF1GsGaNzxgdPU1I0\njssYHXbyPSkUptcHkZ4z/OoY0IjbP4VQiOZI2JDJg54IqNieAenUY61I0iyHy1PzgfNntUCS\nnew2nOcDigCTiNQ27IzSqq7QT8x7U2M7sDZhCcrmnRhmZmDhQoIpjCRioP8AF61Hxb7sHcDz\nmmrvjZWbhD1pdp3ArwueppkjIxvkOfmXHWn7vMA2ON68N9aVnWNtwxk8D0z71FJH5ewPgMB1\nHc0hisG3MXXrxxToY85yu1cYDU1eSVJJ4zT/ADTGqKd209WxxQIls5vs8+5WwCefeuw0+7h1\naP8AejzSBgq3pXEojSsSMBRV3Tr77LMDjKd6LjIb6xSO5kXJSMMcHHbNVGI5jK4B6MP0rrpo\nRqjNMFAGzA9a5bULeSxujE8TAd/egCFMsrMSWI+XIowGIBBHuab8pjCru5/DmnrGuUBb5s9D\nQDH5VVKKef71JJudcjluinOKGUqWh3BHPRjUbKGhVT1ztLA96mwhszKyjja/fbxzUi3ksIVk\nkZZBwDmmIdoZSuStOVRJjf8ALUezT3Cxs6Z40vrGIIZZSScMwbA5rodL+LGpabMrykTrHwm4\n9BXCLGsf3htGeKcrbdy4z/tVlKhB7onkTPXJPjKNQ0+Rbjy4ywIYdCa8p1TUBqGoSSr9xjlQ\nBVbarruA5HPWkZvMmOD5eOMVrClGGw4xURqszMWAyc85qRfmDGQbfSkOGYFTznmkkZlY981s\nULudpFTbjjO0mo1lUyH19cUu1Pu7WBbnJNMuPlBcj5h2xR0GSlyuCPmUcnNC7ljLkAbuntUX\nmIIw24ZPO09qf5YaNXVvvdM/yqRDSxYDcd2ODSvtkUFQVXvTlxImQm51PzL3xTWkMrbcERr1\n7A0xAWXarBckdjT3Eflhsg98U1lMm1l+XtTdz8p8o9DimA91LKCWxkenQU3h2EZONozml3si\nnKkMeKZIxUKv8GeWFIZJ8qhcDI/nScGM5Y/Kc4oWVWjZEPQ5BNEjecoH3R3PrQAiqWYDjB5B\nNOkby+oU8c0bQmQOeKGTYygIDnnNOwyI4EysGzgcfWlj+9ym3ceR7+tNEbcg44bOKmOFGUbJ\n70CGz/MzBcnI79vpUC3Etvs+YlgcD2PrUrYEgOcZFNkVEbeG3gDn2osgu0Sx6xe2+4ISrc5Y\nOQasw+JdVVVH2mQR427d56fnWcPmTAbA68jk1Iw8qRW2lhjselKwN3Hy3Usr7pfmk6fhTC4b\nMhyDnp3pPOdmYlTzxTeY4xgbiOKYBEzeW2zBZjzu7UjRusi5+YY5p4l8xyehUYxipBCGUpuw\nw6mkHUqbcSMQ+Gxx6e9dv8F/iMPhv40tNT8mSa3UFZU8worZHynp2OK4xoV2be3emorM2cgj\nGBSceZWHs7n6heHf2sfCs2k2zXU2y4aMF1b7o4zjP41qQ/tOeFru4WNSFyM7vMG3Fflj9qu4\nY1RpXZCemcD8qaup3sc2fNfg4AVj0rm9g+5rzo/Wm3+P3hKYsovI9ygFgpB6+nr0q9D8ZvCl\nyu9b4f7pxuH1FfkdDr+rRTbVuHjXP3mJyParsfjHVFnLi/mjkXjbuP50vYvuPmR+t8fxP8Mz\nRBk1GNh65/lV62+IOgzAmPUIc4+8D0/D1r8j/wDhYmswAYvJWI+6d3Aqxa/FbxBbybhduPox\nGaPYvqHMj7I/bI+KMdp4ZNrp17H/AKQCqhDkbTwwY9if6V8K2+UbzNoUleT7dv0q3rvibVPF\nLl9QuJJgCMI7kj8qzbfdtdQ3Hr1x7V1wjyGXNdkt0d0ibTn+tV5Gww2/KT606TnbuOPRqVkb\nazgBgo6mgTDjbwfY0v8Ayy2k7cnqahX7hAOCx4Oal3CTau/LYzzT6Ela6iEkirGNwxgE9c+1\nfpP+x3qVtJ8OdPiNxG0iJiV9wADf3T71+bkqs8bBOXYcYrt/AvxY1rwDbqlnIxJ7k/KPwrKo\nnJWRcdz9c2ntWU/v42brjcKb5lsrEecp4zwQcV+X8P7U/iWMtiRCR1xkf1pZf2pfEqrhpeW5\nyjHNcXspm2nc/URFiypLrt+uM1k+MZIF0G+SKZJCyFCVOQG6f1r81Yv2qPEIUZmbzP8AaYnj\n6Ut5+1F4kvoRHcTExjumVP4gGqjTmmF1Y4748cfEjUFDq3lMUbac7jXn8b+c7dV45Gelaeva\nlP4g1aW8nOHc5OTyfrVCOLdMWjyvGOa7tbGBv/DOT7J4500gFAZV+b8RzX6z+B9lz4ftHhKu\nkighlOQfxFfj0s8treQXETATRMGR+m0g19LeAf2wNS8L+HYLJpdsoY7hs3c+o9q55xuWtD9D\nWtTnBI9KatodhI6Drivhb/huDUGkbGfukH5R3GM4J61Uk/bV1FsGC7mwByzqGI4rn5ZdEaaM\n+844d+Qv6/X6USQqoKuyxnrycV+f8n7aupmNj0LL/q14LHpk+9Zdz+2Zrhk4Eh3DBCnJFHs5\nseh+iHkoqhmkXb65pyeVsJDKB61+a91+1rr9xkJNJv8A97Kisab9qPxBebvKlkLr1feQM/Sq\n9nMND9OmvbFV5uFBJqCTWNNiwGu4V4yNzcn8K/ML/hpTxKGUly7n34qm/wC0H4lMxY3B3MwH\nGdw9Bn0o9nIPd7n6h3XiC1hUOPnT1WuG8efGzS/CNjkHyrg87mAYDrXwtp/7Tmu28LQtPdRN\nnCmOTAI75zXF+MvipfeK2kWS5nk+XAWQlua0jTb3E2uh9Ra9+2ncQNLHG0RycJsiyfr1+tfP\nHjz47an4m1CaRJnDvnfxjJPfH0ry6NvMXaTtIPP5UojwmcfKOhPWto00iXMkuri41CRZbmVi\n56biaiZtyg4Kr3qNpDJtUfez0NOVwu/dncPyFbbGbF8vcpVFyh70it5ZAA46dKdyqR7WxuOc\nClCkM+PqaBdLi6deyaXqUd5DK0MkY4kXqOlfRXhP9sLWND0Gy0x3OIBjKn747EmvnIKy57Ho\nRTDFlcY4HJqZR5twiz6bf9sm9aRkb7Uy4OEZsgnBwc/XH4Vy+sftTa/eSFwSQfvKTxx0/wA+\n9eFsuGXjPenbtjA7Wzjrn/P+RWXso9TTnZ6lqP7RXia9VdkvIOCqjI+leeeIfE994ovjJeOz\nM/zEdAOaoIzJEPMGG6j/ABpik89gB1PWtYJRM3K4iwhG27srjpT2maFWxuG4447D0piAbuuO\nPvGpI8tgE5AOc1Qhp5kQI7Rr3YnpSzeScBgCoPBAoZVZSd3fOKRiGh6YOeQKlod2gGI1LFut\nI2zd3yeoqQZbO0DGO/ekViVw4+bHBoSAd5ZkXZjDDufSneWI1U9GqJZW4JOXU9PWlkkZlOcZ\nznbVCY6N2c8jYOzd6iSP94WYbmHenOGIXnb3Ip0aMG68EcUh7DZGLOCDkdhihTtdQ+OTjpS+\nb86j+7z0oRvMd+M9xmmMXcWU7Bgd803A4w2fWk/1KjLdetJudckr8vQYoJZJ6r1pAC0JXBBz\n3pFy0eMeW45ye/tTtrNgs2M9fagCLc27KjnGPWnSKJNoB+YdxTlxGx5YMPSkjw29jn1FMXUk\n8zpu4PQECmfe5I5BxSHHqWJG6hnzgrx6ikygkO2TZ1Rh1Ap0v7uOMZypPAFNVhLAx6c4x6Uq\nrtZS3O0cCkiRcBWKdT15pp+b72R28sUNhpmB6E8e9Ky+X/F83TjqKeoxX/1gVvT7oqOZhI2A\nMkcE+lSxLGoHPI6HvSTLyFQ/7XuaOgDWYryD060P+8Y5G/I4HTFLtaRVCkK2efc+lOVjtXA/\n3vajUQjsF287m9hikzuZc42dwaWRyyhyNwPanMu9QCNgxTAe6RNhivK9M1HJIsY39CeDgUvX\naS2CtBYGQMAMdqBoTyWZi3DHHSlGdoHTac5owS/mBue9K0ZwRkbeo96AY1GPmOxBXjJpdwdP\nlOKRs7WJkxx0NK21o0bG04qbiCP90pZhuPagyFlHcmkP3VYE5zjFLIrr0AGOlMCRWCNubggY\nHFMWR+D/ABN3xSnLLhvqcUY24AGO+aOoyRWAVQ4B5+7UZkXy842g8cUKwjbIIznJJoyrTBgv\nyjpxxmmFhy7OXC/dH60rEbt7DcSOnpUbFo2LBuWPIFJsdoz9fvUgJSzeW+1ep4ZqRd0cQUdc\n/MKey+Yu7Py9OD3qPBLbsZReCxNMkdgKpUd6dsKqAFzjoajO3dkDKnPSpIjtjKk7xj8RUjQj\nNtI78/dFI0m7PdfXtRHGHj/mTSw4J45JB+U96BCRscAdQfWnQcl0Pye9TnyvLRolyxGDmoeD\nknIOKYxS2FKA8U58yBT0IHWmrJtUYGT0wRSLIWkPBwvOKBiblCgL8rCpdyrJEcZDdPrTW2yK\nWI2k88UqrkqW428rQA/yXEh+UEEZqZU2R9Bv7r3qLcWjIL4Y9Ka3oWYvjqKAZLdYyrhuMY4p\nEYyZIXKj0FMjkEinIG7GKLZdvRyFHJxTGOVmDEKBlh2oUh3ALYbOaSGMxqXZsnOVPtQ0imTI\nwWPYUhAsS/vDI2Vzn6+1Sx5lYCMAZ6JUW1mb2HOBTxny8k4kY8e1MB/neWp3ryDj8aPLMaiQ\nngnNNGAuCQW9/WiVQzL8/PpUsRIy7VIU7i3zbqVVKqF25LelMTE2VXJf09qdgrgxHc3T0p9B\njmDq4+Y4HNOO7h89T2qNnk4OPk9T60c7l3NsIOSopCJmVmlLY2rinf6z92G2SAZ61HJsnwRK\nwUHIHp70fKVc5/eHjc3GaBMtRmRkAPyndgY9BXoHw70IzXJuto3A4XcK5PQdLk1DULeEIz5+\n/jt75r3Dw3pP9lwxxRp+nJFIk3tLs2PAXIU9K6jTbMbXBO1s8Vn6HbujO7jANdHaw4mBx2zW\nbLiT2qAKUJyMd6tJbo20HOKhVQpyehOK0YVV4SOh7VBtsWbdjKpU/LgYFTWasmd3rwagjXbG\nAOtW48mMDNIosHGB3NOjXLYxkUkO3Gc1Ksyo3HSgBVXacjoKekwcYYHg1G2NwIJweaeuc5Iy\nKBkq525HzegoUgLnGG9KN3l4wOadHIp4Yc+tAhNzE8CnqzIPak8sopIbOaUZPDcGgBY3Yc4x\nVhYTJGWJwfekEyquMZNHm7uSCPpSAcyiOMdj60iscbc7u5pqq0nysM5o8ryuc59qYx0knlgb\nV+tCMWbIGBR5g7LxSrhkO1vm9KAF5UnAPNSN8yjBwaj2txuJFIzFWx29qBksZIznnintv4w3\nWo1k3YFOTcw4HtSsAsilVHNOGFkHBwaFVT8rNzTgxjOMbh2pjEZRGeOD3oXDcAcU5d0mSVwa\na2Y+O1IBy42/LyalWMdzkEc1XZlVcqfmp4ZvL9qlASLtXgHgUgz94VGvy9eacjbcc8UwJPMP\nyll+tOZkl6DBpj5Zgf4faliYSNtHUd6B2HKpWTcRgUrKMHHBzxR5hX5G5NKrLJnt6UhDmVlj\nBbkGjzBgKF96YrHjdkinYLPwakCQSZ+XGD60nlyK2c5WlR1YkEdKRWbBwcrVIZKqLsJJ+lMw\n6rlXIoVNw68U6Rf3gVaLiGteHPI+Yd6kWTdyRnI60sb46qMEYNRrGV74pASKxyMdakWXdw3H\nrUYbc2NtDMobpyaYEjE28pAGQwzSoGZtw4p8cwdcdxUayPk8cUASb15yPmqWOZGwslQFXjYE\nr70jKWbceD6Urj1J2Qq/yfdNJuwSGX8adG7DjGRTWkJxnrTGG7GNh2tUyv2c9e9R7Y2jOOGo\njYbQDyc4pdREyyJbsd68dqfcL+7Uofeq+4ybsjIFCSNj5RkGgZchlEy4JG5aRZnWT27E1DCB\ntLdDmrbL5kPyn3oABmSYsR0HJ7VHJtZeeabFPLbkgjcrHHNT7VZeBTYEFvGI5Mqc+1WZG6bR\nUSqsMhIGasRsrLxyakYqSFmAB5qTc2MsOKZuGB8uOetK0ityGz6jtQA9WXjAyae7dCvemRsu\n0Y6UqyKOCMUATtJtjPy5NCqkyg9G9KhZtwzT1JVRx+NMCVRvbO4YHakjO3IHc96YAu30PrRH\n82RSAlZmj4AyKlb98vyjDCq3mENtI49acf3bA5OKALD42KelMaTadw5HpQrFs7WyKI5A0hVh\nxQBYh8uRQ2cH+7QQjSEkke1I2I2DIMcUivuYZ6mgpIag3Skg4FTgFjw3FQZKyHuKmVguD1oE\nOVkZvmjDY9am81cnC1D5y7uhpVkjmYpzuAzSEPkPy+gNKuSmByuKbHjbg05ZSikYplEcbcfj\nViNR1BqNVVlJxziiHhgO2KQE4xu4XDetO2lsbjTQzLypyKXzDIQD1oQg2n8KeWxD1Oc9qZIN\nje1KVJzg8GmMlVw2D1IHSpNu6PcflXNQRx/uzg/MOtG5uNwJT+tICzHhV9RUMG6TJZTwadFn\nA9j0p3z7jk7fagCRfc09QN3UVBHLyQRinNIoAyOfamIsR/KDkZFO3EyY/hxTLeQHcGHbilwF\nXOcH0pDHhwSSRxSLnAP4CkjddvzetG/cSo4KnIoAmj+VWz1prSHaAxpWYMCBycVEsa8t3+tD\nAnEn8LLg0nRj/P0pY18zBb8KS4y49BSBj0wR7U5ZssUAx6GmbhtK4wcUsa4xwTxQSPb06nFI\nsm5sLTPmZsfdNSLGFUg+nakPUUptXeetJ9/ABxmk5UBOxFOUBVyVwR3oGSRr823bx606PKsc\nHNJHJtTbnOTnNP3LyAM0DsIinJz35qQsVUEdRSbR5YKnFO4yMjjFIBv3iX6GlAL80ikc8cU4\nyAAFTgelAhRubdztNJCrw53fMaflW/8Ar0i7vmPakA773zYpqBm7U6Jj5ZY8Zp6/6vPemMaV\nKMB1FO/iyfwpCdxxwKRgU5J6UgHbepxijaAppMMOS3HpTlBDcDIqkNDNw3DHSnDvmlVQuSaa\njeYxYcChgKu1WzinKw28dKa2FUD1NL5eQFP3agQ3aV+bGPQ1JgMRyM0xsSErkqO1AjwuT1qh\nD1UHOTSYwpIGTSx7CBkn3pdxVWI6UAK33Uz1prH0pu/OG71Kq/NzQAb/AGyKM/N6A0oO3IpF\nb5Scc0x9Bd3rzTjnIOPlqHG5c96dnotLqBJ5m1wfu0bx5hxwuMU2RA68CnMny4HGPWmFheGT\nA5Ip24Lz2pkYKt92n4+X5vyoCw1WON1OjywLd6VVDNj2pVG1T/SpG7DUzGwJpfvMXPSlwWTJ\n5+tCr8nFUwQnHlnA5puHMg5+WpPlbvz6U3lfWpQyQZYYIJoZ+nPPSlyVAGetCr8xNUAz7zqB\n1Oc0/cy0hXqR8ppy7tvTn1pgIylW3dKkYvjmolzu+Y8in7mdBzg0gCNQZCc9qftLEYFQ7iW4\nHtU2SFwDjigBv4UUbT60UXAkZRs9KjBK/eJo8wOoJBXmkkk2knqKzsArOPzpojOc9e9Rt8zZ\nH5U9vMVt/Qe1IB2fMYHoBRxv4HekJxHluDTGkPDgcUASyMGYntUYOF3Ad6M7sHbgd6VhleR+\nVAgaQ9CcGiNT95hxQqhpN2MVI0m3jtTBEUknXjFNaQKBkdadOEf7p5pCr+TzzijqMMMvzdRT\nVwynPHNDscccEU+MqygHrVAMXHmDjinsvBNMVW3Hjp0pQWCncMCo6gRqx24/SkMY3bsVJuVe\nq5z/ABVGWLZxQArqUI7UgXcC2cGn+YWb5hk4pI23A5Hy0wIlU7STjNSwnghjxQ0A3cmo1TDn\nHTNIB6tubGMCh1OdtOkxt680nLYycGmAbdynaOKQEspOORTmZlXbjFN2tuBHIxQAxS235xtP\ntSXDSfLsPFPdOBu5HejA2g9eelAESHbncafGAuNx3Ck2qZWwMLjvS7FWMkHkUABwW+UYFP2k\nHO6o9rFcr1o+ZR0zSAdvCscU+OTr0JPFQxkbsnrUiqFct0FMBv2fYwOO/WnMoUnsaZJM8mNm\nQM0nnOzc8kUAJ2wOtMXP3TyTU7OSuBjmoNuyTcT9aLAKYxGoxQ23aDjmhmLZKjNIzCTrwKAs\nLuYfSk9cnk0i59cinsp3gEYPrSAZt2k47ULtxnHNKG25Gc0kagtyaBDtpfJz8oqKYblHP0qT\nDbcZGfSmR56HgZoDoLuC47tUbxsHySMntUzrjnvUR5bLGgCPzFhT5uWpqjzmDcqKJFDP0+Wp\nn+WNCO/FADH+bA9KMMWAIpz5GBxmkZtzAmpAa2Ebao79aTy/L4PIpisfNYN93tTxIsafMc0h\ngylcj1qPLiYE4K4p0kwZAQO9RSS84701qAsjYk3VBIyu2WHFOk9aFZeM8UgG+WPmw3yntUO5\ncbUHzdzVtlWQZBwoqFFVN22gCCNVjDEjJoeFZVyopWXK+lLG3lg88YoEULhhtIYdKoMxbn9K\nu3UmGxjPeqjbW7YNAytcIrLuzg+lZl3bh0ZX5jI5rVaE885FUbhiqletHUR8e/GzwK/hnxFP\nJGG+xTndCerEHk8fWvIbpQivgZGOa+2/it4NTxb4XuITFmeEeZEf4uOSM/Svj3xHpEun3G1w\neTg5GCK6IyRlJHETwjzljZdp67qguG8yJgCAYz+da91Zi4haQMflOKxb223J8jc/xCt7nOyt\nJN+7CMd4xzTPtBkTb8zKOB7UkluFUBMqfekDDyfnfa2cfLVkjEj8tT8zZ6tik27lLZ57f40S\nNtkCxtzj5s0CRmyVGB0+anYGUpJC2W+82MCmMqtJ12gDn3qWRTI2e/T5ahyWd1ReAcljTsSC\nRj72flP5mo/O2qVI/Gnox8zkcHoDUUynaTnnPIoHqQMkhYlTgKc4oa4G7gnc1LkrId3yjtUY\nUbnKrllOTn0qkGoKr7CUbJJxxVqe1EcKySNuHt1qssm12Cng9KYWlUg4MnouaYtRJc8ng56H\nNRxttj35JY8HjpUxV2UoQAc521FJIYXPkoc5wVNIohz5cjHOT12+vvTWj2qG3ZMh4qWSLpuO\nI26jvTWG6NQn3wPyoAQNtkLldxVcYPakbcy7x3/Me9NXaWJ/i6HNO2tn5CWHtTESbtygF90p\nGOKMna3Psai2srH+EY/GnyRmNRhgQRyKLh0ISA0eT1J5peG2heMjkUrLlAR2OSKGYfKwXac4\npgKzbIXcnLAgYHYU9s8DH3h09qj2iPbnJDdeKAWjZjvJAGPcCnYTEVV8lwepP3qbtKhWYZOd\noH9amKrDHhWySOv1qBsqgQ+ucCkBBJiN+VwWH3u9N2t5fLDI9O9TMgkj67ueB3pNgWQAjjFA\nEDMWUbcqM5Oe1O5jbjHzcgetOkUzRsV6HouKjEDqVZuHUd6LhcfIyySDg78UyVjuRu3Q1Lwp\nBJGSOvpUZYd/w96dwGybdudvI6UwMJMFwSy8gmnlGZ2OPlxSlSUK5G7GaEBHH8uZW4JPXvUq\nsvlnqRnv61GqqyuDn0xUsce2PC8jGSTQwF+dY/m4UnqKSPo+wZHf6UYzH1+bPBPTFR7m/dnO\n0NwcUgNfQdY+zTKjplV5Bz2ra8aXFtq9lDNEh8uMZLYx+dca2I1O37/+z2rpNM1aFtN+zTYM\nuOuM5FAzlvMVu/CnAb1NCs6qwAGWPJNW9RsI1w0fAz0xVZV3AriqAbwsg8zkkflS557EZ702\nbDGP5SGB2mhlMnJ6AcY9aQDJlkWYZ4Y1IqrIM9Wp/LFc8kCmAETbj8vH3RTAcyqysS3zevvT\nmZgqMUzxgn+tNWRVYjsRTN7NIrY2bR68flQIAyybiybPQjoad99TIp5NP3blBxkHg571HKGy\nEUYXrTAjfauB/GeoFPkdVIck4xjFIf7qp82c7qb833sDOfu0imPaRWK/z7UjNmQu/wBAPWnt\nh9se3g859KhXcjFG+bByDQISa3zk7QSevtQi7lXH3V6U/nLHOMnOKY0TNxkLuo6gO5LmX7nq\nuetJI7702oXGOaa8Z+8HyFOD70q7kztY5b+GgRI2FUbnwmetKQNpOcjtUe0hTkb5O4PQUgD7\nlYoXx3WjUQ9c7t5cHHSmN3JPzdcetDYeTPl7AP1peGJHv1oAaWXggckc4FG7y0T5eMZJp3nH\ncVUAAdBimMoVgwfctIY9d7MSFxgcE96VJQ2UbO2mKDJyXLe+MYpOeqjgUxgpLbtqlR2btSSD\nhWxhc809pAy9Ce2KQ8R/czt7UagM8zzJiWP7oDtSMB2QqD1zSBFEmE4Y8h6k+9uy3AH60aiG\nZ3KN3rxj0o+7nLHA5NOXf8m0Dphvb3pHXy8nv/OkBJHvkmyuNpXOaiGcHbkHv9aI27sM56Y7\nU5my2xTnuTQLqC4VVcdeh+tOLeQuSxLemKjbbtO085/WlmLNsEi/vDwMdKoYvmgnIXOe1MaP\naoXcFPWhlZWXjtjj1o2ncDjjv71IEgTzFyxJ7jFRqrSNhQ27qacf9YxDFRjoO1OjbKhkY5zy\naAEkXcwdm54AxShBu+fnJwGx3ppYbmYNlc8CnrLtj44Yd6Y7sY8QRtucY6g0gIbq2xaTcCp5\n3E05mAxkZ4wRSC7Hs2HYDPy/rUZUhlWNvLcnOf6UMzK27fgLz/8AWo2/INzgs53fSgQijaSO\no3cinXDbYyAOCelQllVuD0P3hzUiM+871O3saGMVmDZCYP8ASoyu1Sr8AnIb+lP55LD5P9nr\nTTG7bVU5UfMVpiBR+76Hr0FPUhw3UnHTNRs3OB95j+VPC7UwTjdxmkIhaMScou3HJJNSNEje\nW2c5/lSxxlIycZHSnMNkKkMDz90U+oDWjJYup46U/wArdwow/wDeznNM2oJCG4PbninIrN8r\nE7OxFIY3yecFuTx9PeiRmlYgH/Vjnb3qRm3YVex5qNdscjuucnt6+1MBu1SdxOQRkilbc2PT\ntxQi7QW2kL157U4N83XIPIFKwyIRGP5lGHbgj1p33SFA2nvT1G5uThiaAA2TyxGaXKPYiaER\nyDauHHO6nsojILPkN3+tOwJoywO4DuOKj2LIn3sbeaYiOSMNIQF4/Kmxjy8tsz2+XirW7awJ\nxk/lTY4A0jjcQaBCK4nKnoo4xmn7lbZtBJx+NMztUjOR049aBLIyFsbWA27vpRqAuPM3Njac\n5xRg7DghAR1pkbFvmPXoc06ZiVHG0DhT70AEkmYwQV9ttL/Fvb5famMqfKu3aerN6mlVRKcG\nTP1phcaNqh2HLMOtO/iQEgqRz70bUWRVPy03Kxu24E+hoAcrKMYHQ4xSbnZmOenO2l2lsbNv\nPPPWmMD5pPODwaWoD+PNBZyPl59KRQVjJyTz0pJAQ3KkqOlKrloy+MAcUCFR02liTu7CmM7B\nRuPy+opSke4gEhmFHllWDK24AYxTuA1ZGYkY57MajkXoFO49xU7qdgLHazHApm5txUjjoSO9\nAxMHjLDZ/dNOOCMID9cU4bO4yvekP7vA7nnC0yhTHn7oLcdKFYrGoC4bHX0prBgQ28/hT1G1\nchtx67qCRoC7sIcn3oMhO0EghT+VO2q5y77X9BUCr5cxcqSjfLmgCxKyNjA2sf1qObHUMAow\nPxpV/hXIG5cgnsKGYbjgElDjOODU3Ae2fMy67F6baazfuyB0z8vrSNIZmI79QKRd3ClMkfpT\nAVl24ZuWNOVQwODhqaMuNrHleRmlU8Ojcd80wBowy5IwRzxQznjJI56U9kXoG284IqNSZGIY\njAOMUCJWjCspB3NndTZJWEhYhcMaWPGC4cDB2kGomVVYhhu560DJdzxMWJHpmmSbywI69/cU\nkkm3GQSCac8cxZQCAvXd2oEEUn3zt56Cmhjwcc09kPmEq2FIpuG+8xx2xSGKrBlYN060nEO1\nHO526Y9PSjGGxnp/DSfMJBJ1PYdxSsIHAaLg5GecfwmheJFJ/OhNu0fwqDy3vTxy5JwD1pjE\nb+J2XP0pRIjMGI8sqOtRsxZcnrnNOkk3R7wvFHQQnyNlt2xuwPQH1pBHLFGq7gxJ5GeT70Fk\nwqkYJ5206UFJAf4/6elIBrK/mKQQoXjFP3M27dlh6U1vlO04Zj+AxT49zk8jFO4xIwFj/eAl\nqbu28cY9KkXcrEt0xTRmRsgB+MY6UwEYlsgLtUjpTuNm1ch8c1GrliGb7ynkCppGBjD45J6U\ngEdTI8YwMdKdhlkKkbccUbQpyDyBxSfM20jkZ5FAhWwqg9TQrFflxweSxp04VXXZ603zhuO7\nhicCgYIw3EA7x04pI1aNSSfYZpV2RsAxw3cikDL1wTk8E0AO+XbggF+4pFLbsKdq+lStGFG3\nHJ6t6VFGmCVJ+hp3sA7d5JyOrcbqRidwPUU9Y1WHcTk56UwH5PlXP1oAVkVZPl4xzS7ii/L8\nwfrSbg2ATtYmlwUU4OW7CkAbfmAJAKjOaRGOdyj+LG31pQViYK43Fxn6UAbNxDAr1KmgBY2T\nzWTOSpzx0od0lDEL8wpJOGVtpVup9KcQGUsvyDqc0gEG5t247WHT3pWyu1mbPsKRpGjhJI3E\n8inxxtMoZemKoBI/unecDPFOCsxKY+U9TTFKqcMhKqeSfX0pdxyxU/Keq0hE21QPLXAUD71M\ne43YGOOgb1pkbHaFAzk8H0pzELIRtL7eeOlGoChgzYK8+oqSaFVuF2tztyRUKTCNix+63b0p\nJWYyAj5x0P0pjJWb5SSMfT0pjRpLhlyG9uKNy/OnIC9KeyryqZDDtSAcwEkQJAULxn0pVUbM\nqwK9zio5FJUOWx2205oym0jn1QUwHMAcshwMcc9aevz7DnK96JFRmCouBjimLGBGD911blTU\niHGQ+Yp4APbHSlcBpMKM5OCaRZFXeG9MCo48w98g9/egC023yw2Cu3jC0J5YyCe2S2ajhcry\neR709mCnakZPcmn0AXzWjjHzBs9KdCfmYtwSOpFIDiTPVSMipFb92wLDP9KAQ2RX2jyxuHdu\nxqS3hS6mUjkLjI9KRmMmzado7V1fhTw0b6SJwcNnLdwBSYHW+BNFFtF9pZWVnOAx7j6V6ppe\nnl41ILEY+96VhaPYqqgbCQDha7rSbdrdflXO4AH2qXuKxPpsPzDnhf1rdt1P3ulV7K1G0swx\n71di+9gdMVBrGI9IGkQ46ZrQjiChTUMD7PlIq1Gw8z1FRY0J42Ct93IqeFAyk9PrUMfytzwt\nW1x90cmkAQw7DzyKmWMMuRwajWRl7VKx+YNjahpAIuWbnoKf5bBGZW49Kbt+tSRxjoWwaYDU\nk+UFuvSpVHy4NOQLuwcEdqG+XnORQAozyB1pqylpNrL0705JN3IXmpFnK5G35sUAIuEk+Ubh\nin7vY0sJGwbhz3pdyY3Z6UFAGMhPOKTc64DDIo5JyO9KzuTz0FAg2/MABTmUxtnGKbukHKji\nn58xenPepYCtMWUcfWhVVuR+VML7MDFO2lO2M96oZIhOSE5Hc05m2nPSmRL5YOGwT2qRVVWy\nw3elDKsAj84ZB96VWI4zhqRl2vuzgdsU4MHHqwpMQ3zJGzk805OevJ701mZR3B9qRJG64ye9\nICTarZGMUgyvyhsinb19PrSxxqG3A5HpQAzHIx070/huAOaXYFJI59qI1D5YHHtQAmCvekjX\nD/LwKkaMx/MPnHtTo5QvGMj9aBjg4bqMN3p7MBj5Mr60nlp1PfvSKGZtmcqKBC/ebjp2pUcI\n4DLwe9NbeikYpPmwN1IQq8M5A+Wn4DRZX8qNm9flP1pV2Z4bFFxkasY/epfRiTmm/wCsBVOl\nOXG3DD2otcB2d3TgdaXhmDZ4pE2qDk5oO0sCvA9KLDSHLJskO3nNPWQLncMZpoCtyKXjGGX8\naQWHeWjKWU4xRtWQHJ57U1FZQdo4p0al2GDkjtR1ESLI23g5YDFAYtlmFMXKs2eKV/m79abG\niVdiqNr/ADUnWQZOPeiNQwBx0oVeueaZQ7ydv3TuqSMoylT8rVAJHjJKrn2qeORG5KcmpAcn\nyjBPXvQYXRdy4PYCpdu7G0ZpG3RyZIwRTAdGvmR5xhx1Wlt5jC3PQ9qfHIsp4GG7mo2K+cVx\nn3pEk25ZAe3NRqpjkyG+XvzTtydO9JtHegomeNW5VqI4iF4PNRJHt5J4pVYrnaSfaiwE8cwX\n5GNI3y5PUH0pkcis3zDHvUjSeXweRRYRJHn5Tjintyx6HNRriReuBSsN2COcUxiwxHlt/Hoa\ntD7owcjuKgG1s4OPanCYFdoIDUgHGTqAvNCs23BGDTYpieCpBp0bsc5X8anqDHKdzEZzg1J/\nFtxUCYbPlnDZ5zUoXJUOPm9aYChWjlBHFTRsBlWHzZqNZNsgBGcGrMyqyg4+brSHYYZCvvik\nY7gStCsFYrjNP25jCjgk0riGqxXAIyTUq4XpzUUcLeZ8w+lT5KtytGoxeWfKim7/AC2JAxSv\nmMZB/CgYUZPQ9qBgMSKccGpvKDRr8/zelNVRk5pw4Hy8U9QHrhVJH0pu8GTA6UkbFpACOPWp\ngF5KjoaWoCRusOQOaVW82QnocUx4/lDbuach2qcdTVDJl+VfmOc9KVenAxUWclR/dGalVSyZ\nzgnkVIhVztPGPelWXJ24+XtSRt8pUnOKGj3DIOPamBLnavvSNIWP9aYpHc+1LH8o2nn0pgL5\nflkMRknrU6ru+cjgU1ixwCMUsknlx/KM565oAduHVTzS+ZlCW+9SDChcAbqcdrZHV6kLCp+8\nwTgUq/K2ByaRYT0Y9KcmBkn71ADocqx9aVITtYsfpTedx5xx1p6jBG47vftQIartGRk4Wp48\nOOT3qJoxkA+uaXcFzigol+7KQQOnBp8cnbHSoAC/salRgsfzH5vShkkmdzAnFNk+YkL0pksn\n7vdjPsKI2ZYV3rhjSGhOrAt1FOaYR5yuQe1CqW5YhTT5FGcE8Hj8aAGbuPbuKmtzmP0NRbM4\nBHI4PvUit6DmgY/cD14IFPik8yPHTmovvKcnnNK2NoAOAOaBFoRlgAePeo1x5hz0HSmRsyoQ\nedxzUm4HIPFAWE3L680/PzdeD2pgxuwRkHija28J0ApDJtpXKnmlXduI/h9aTiMkbsn3pecD\nJyKYxEAAOW3GnMpK7sZFII+pp244wOlKwhu4sp4/OnMxBG0Ypo+Rsn5hTl/1mf0pjAqT34ph\n6nHAqR2DZApqoduetAhF/eLu9DUvO32qPtjoKWNt2eOO9IBfvYJp31ODSbjxtFJ5bNJuZjim\nA/duUAilwVXnpSRlVjJNC525PWgYi4yfSnZ3rwaRWVxkEYoX7u0ClYB23HPeljjdpOvGKYM8\nDNP3EEgcetADFb5iGHNSfeYEjGKQndkk5NOVugpdRiMCvI6Ubh8uQTSspDccijlpBxVASKwL\nHbxjrzSSMQwBHWo3t9zZzjHpTixIIpagPj+XtyeKeuVBOKiwV2npUnLZBbqOKLCEYliABS8b\nPlNDfKoA5NMO5VyD36UDHRr3pyvtOWGaZ8zDHvTm3IpzzijYRIVG3rSZ29eD6U1P3qjPGKkB\nBIJGaYxi5JxjA96lPy8DpTeSTk4FKo7k8UE3Gsq/ePJpW27cLwe1N4Zs0L97caCkHVeuGFJ9\n4ZLZNNuIy8L7OG6gVDCVaQKwIdeopjLe5vQUUbl9BRRYQsrjyyO9RgbVXJ4qCS4SK6SN/vnt\nUs7DdjpzWROpKpVWJxxilZuAhPB5pvnDn5cjFN3BgCWwKCh00YkQEnJphZVUBQTT1k+UknJ6\nCm7jjPpSAUBkYBuh7VGS24jOAPWpTL8o7moZo3YkjnvgUCBZCcnPFKJNxAzzTdv7vGMe1AXc\nwYcUAgZjCpyvNKsnmRqwyO1NkmEvfpQoK4A+uKBjmbyW579aNwGWUZNNmkbZgpkilUjjHU1T\nAkEx2r2PekbdypOc80vl+tJJgLyefekA1m+UgjgelMjfqcfhTtw2DHJPek27sMG/KkA7Ydyt\nnANKGK52gde9MeYDPGSBUab+GPAPQUCJnI3AZoYBc88Uxvl75NGx5ATnjFAwVlOBUshKjoBU\nWxfLC/xVI2eh5GKYDVwxznJo8wfdBwaWMLy44pskaKpxyxoEAkMkhVehpsknKjHI4pixsmCP\nzp+HLBuCKBhuO0/XFRr8x/pViXC84z3qGNdxLE4HWlbUCZXXaOcdqbG27IJqKPEyknKjtSj9\n22aYC+X82TxzSsOwajk9+M0pHykEfSkBHhl43YpFVgSfXvT2+7QufujoaAGrhT8x+tG0DcCe\nSaZJ8rn6YpVZsfNzjpigB7ZVcDioAe2M1K26TGKBARyePWgADL0AwaaJCud3NLt29eKSMDcf\nWgCPaVbPShlPapWXc2BzSMcCgBu7LADlhSMTz3pFLbicU1ZAued1JgIdzc557Uz7rEt+FSjb\nuB9qR2KqTTAazBhgChctj0Hak8zcQG4an7wiFuvagBjtvyAMH1pkOY87+acm45GMcU5lzHj0\nFADZpAq5xwarrIsi8DIp8m6RAvTFRSRmDhRwakByL/tfhSbctg9aM7Yxx81JyzA45pAJIwVg\nDzRs8zJI4pWO1jng1JagEM2dxz0oEVVDvIQOF6U/CR8Dr3qRpFM2xeB3NQSYjZh1HrVWArXB\nLMApyM0qr8xQntT1ZYwXxkVWXczE59+aQwuIfmz2xVAKrNjnPerlxN8uG+VumKijlMa9BluK\nQFeUBVIHWqMsZ67avupmbjgjrULkKxBPtTAw9QtxNG231r5v+O3w78u4fWkBEEh+dQMAcdRX\n05NF2XpnJrA8RaNb65Ym1uIhLB/cJ79M/hmnHcVj8+r6MwAiHkDgqawLiYNIVxsIP516f8Rv\nA9z4V8S3MMofygxKuwxu59O3avNr2PZMz7c4bDY7GuqBzSRTlhG5mYbSRVBoiMFcHmtGWQ7d\nx7nvVHzPmdc4A5zWpm0QXJXA67yewqCTLBtpPTvVhiJVbblSDnPr9KhcARlicbuDVEFZd8a5\nB5ztqaRdm1fvFlzxTGbbMgCgikhZoZ3OOMYFMCu5AbJz9KgkkeGNlxnnPPpV2SNGiyx+bPFV\nZFEzcdutMLkRGdwPJC7yD0oO8xrIBt38laXaGl8zO0DjHrUUkhl+fd0OB7UwJPsqMqvG5Lt1\nT0qKT/RVAJJLDGfSpbO6+y3EZ257E4ouGDM0oIwR0oArQ28qq5DEkc+tOMj7SwxuJ+8fSrtn\nIlvamNmBY5bPtWfcSBo/LToozzSBjJI5JSQwAXtSPCY885D8k0NI6qeCrqu4fSmyR/KGZ8HH\nOPWqAjXCoSWJXp9Kas3l/L5ny+opy/LIRuwcenFIqx+Y2fvYyDjiiwajo1jjVmOZDTZpECoU\n+Zj6UnCttB3g9Sv8qf5cSruUHdnGCKA8gY+WxGGY46jpTW2RqrM+e/0oWR13A9P5UyTjAPIb\n+dICVWbaCX3Y+6tRfu13Oz7ZP7tNiZm2krkjtU2wGM7lwT3qgDcrKrgE9uaayryS+O2MU/cu\nxc8EdKazB5FLKQM4xQBHwq7l+97VFM7KwwSZM42n071Yk+WQqCMMMHFRYWZxG0mMDI460hEj\nMQh2rlQeTUEjZJ6ke/ansMKdrE54NRSsWXKrkj5QO9O7GPyjqNy/Q0xpDIfmXZjgccCnMWWN\ncLnnk0sgZmXcQuKYMg53MynI71IG+82PakkyylRxzk8UFsDI/iPSkIIgGUnoc8+1IpznJwnb\nFOb5V3KcnNM8otIcnbxkimFg3eYgB4ApN38Sr8q4GP60qlHyobGelNRihOM7fumkJBKxypTG\nR19xRDmKTIJz157CmSN5SqAPnB6Y7U+HEkjHpk0XKLFxI8hBySo71BOViGFJVt2Sfap1kdcx\nHBVqimbbuLDdj+GmIZks7Mx4AwKYIQMfN8uKUMf4sY9TSbt0mF6e4oENWM53BfunrT1D+YT/\nABe9R5ZeBkLnqad5L7gfM3rmqGGw7XJG7nvSKBt+fMZPQ05ciRlOdoPSnysVUqw69M0ihu8E\nL83PSnSRtxnr/Oo12t04YUnmtjcSR2zQIVyVb+8ophQN8/OCe3Y05JGzyu4fzpAQrZccHooN\nMAVvKfDdO+D1pqyBmZgOM4x3oUhpMt0HaoWBWQvjHPSkCHLlmHy4OcU9mXdychetMWTO5gOe\nxNP3DbtH32FMBVjHPIx1z2psf3dw+bngU/BjYhugXjHekkVVKsGzkYOKBDfmDkbvc08NIu1V\nOR1wOKbt+Uruwq85xS53crzkfnSEIWbzGycVE29tuMrz0qRl3MFHX9KSSYDcGyB03YpDEclX\nxjaPWl2lpPkAAA/Chv8AVhQQ/embijAPx7CgBTnBYsAOgXtT/MWJgHBC7eqjIpNq7fnxxzk+\nlG6GaQAMxBHUjAoDUBhVGAAT2pI1Ys43fL70bdsgGCUFNZn+7jCVQMU7myxO5OgqNWEbeW/A\nxwBTstjbu4HQU11EI3Yye9ACocqxJIpd2JMOSwK8D0oyNoYnk8gU/axViB8+OKAB4wuBnk9h\nUbKi5POP50jOUUEj950NKcI4GMDGcUhCkorB4+GUZxTvNO5WQfL1waj2kjpgnvTtyqwUsQuO\nTQMdwykhsPTHbcQOgzzQqlTlOUHVqXCso+YnvQA5srkhgUpFyiYXBLHJpEHqpGe1OGOTt4HF\nBXQadrqV9DT1kO44TOOPamKfurjgnkUTKOQvykH7wNBI2UFl4XYO9CqUjwBv56+lOaRm2qR9\nTRCxVnVRzjvSATjgY3DNITmRn2fd6e1KfuKCcYOelIZHZjheaBC7tw6hWYfjTdzrhQ3y46Gl\nZcyq7fdHpTmYxsxIJU0CGttERCnJpGk+UHaVwMZpP4QQcMDzTslpuRlTTKCRtqoF4yc03lpd\np6Z4z0p3nLJu2/eXgA0NIWUKQMdSaYhHLopIyFzgilfb8gXhe9OaQNHsPT0FIdrbccjPakAm\nUUkFScUfP5abePmztNI2GZ+cUuQSGc/LjrRqAjSBJGJ4/ClVtqkqcbRmk3btoP8Aq2OAabtZ\nQ3PCtgn1o1AUytIvzd+cYpGZh9wcdvanFmbnGU9aau1ZSrA4I7dzTGLGzfKW4YnANLu2huc/\nN2poy2RxkDpSR4/u8d6GMcjIm58Z3cU3ersWxkdCBTW/iWNgT3p8eOMsCfapED/vFUFcLmlV\niuDgBRxnNK3yqrjpnA9qjEnXI3EjrRYAdyMHGUB+9igyHaQW98VIuAo3cjHSonUBdzdAcimI\neip9xgc4zmlGCoVufamNIWjLr06+9Pj3qscjAZ6igYLu3EEkAHpRIyySbh09BSnO4gnJY5yK\nZGd8bsyjeDwBQgFZmVg23dnimKJPnUEZ64PpRyzsQCo6DNDFiwUNk9zSAFDhlx1xn8Kd50aw\nsQSxJppZm2qQdmefelbCxtuIAzwBQICZGVTn6AUhLAle3UqKcrOVG4UgkaNioGDjOaoAj2nn\n7x75qNS0mcJtUn8RTwvzBf73enEljjgbf4hQA1WwxU/Me1KvyqyAZY/nSxyBmY8Aim8DIcE5\nHXvQMk2sqhQu2Q9z1+lMjjbJaNflHBOe9Kvy46jA4akAKwuCcnOeKACNfLyuwl85J7UeWGww\n4/3TxQztHswPvcFjSL80h2rtCnlaQCYYs74z2p2SygHOKQMyqR91c5FCMd24+vSmIVYUWQkt\nvYDipFUtuwQGbnbTMBcgnHPSlMZ3Lg/KeOOtICPakbfOdrep/lSyKY1H+1/FnpSyKrOBu3AH\nv605WDTOC27C9MUgImhaRg7MAqjt3qSMBpy+eCMioxEWUEMSv5Zpy5XIYYwPyqh2Ht5cU2dr\nYfoB2NG0qzI23IGQ1M525bJbghu1O2FtwZ9uRmgBCvmKWAxgZIoLErGxOCMHinMAqAA5JH4G\nmKAY93QdKAFLEs0gwTmnYHllgckjkZ6U0O0QGI1we9IsYb5x8rdxSAbuLbO47VI/TrzSKDH8\nxIIpRGHbeTgZ6UhEe88Hq3rT1U/KSR83B9qduXcVGAq0xm3KNp2rnr709QF2KJCm8DHGKOOM\nHvjbQ5ZsSYCgEA+9IqqsjMq/OfXpRqPzADzFIB2896eZBFk4+UL+VM3DhiuVXrSv/EPvK3U+\nopgMkuA209sdfWnc78luKVlRlyMYUYAPakSMdyT6ZqQHoNysGGQexpkO/lQctSEN5LeZ1zji\nnRyFY8jgdM+lIB8hZW/2QOp6URzbQm9cIT0FIzNsznK9KjfKqA0ZUDnb3pgTtsYsd2Pm60nL\nTKqjn3psUfmxkH92etTcRqMjGBgVQMaGVGyRle1M3FsqDhuopN24Mir0ajhjxxil1EOjkMik\nngqKdtH8Y3DbmmsjCTOMLS+YWY8cUDG7R0b7oPAp7KV9MYyBRk+YQcc+lJvGdjfM3emDE8xm\nUMoLMDgj2qVmRs7XDserAfpUanbJJFuxtGR70942jXIGD7CkBGxZVViMgnHHanAlsj7q9N1K\nd20DfwR0pVhbIy3ygdRQG5GVVQN2Tg8Falxu+U8Drmk3fLlgM+lLIrRKOchv0pgJGyKxdBt5\n4zTGYD5wP3hNKrRs/wAxwQOacsiLwrAv1GaXUQjK6thDuXqR6U+Rg0gOMJjDKe9M3bvm+6e9\nNkPmAc5GeQKYFjaVO0bWTH6VH80OCMgdVpOVVQAQmcc08swjYAZGevpQA395tUMeCd2KdGM7\nn28enr70hBZd6kE+nrR87cfdH92gB6RrI3z5T0xQuxUZSN3PGOtNj3K2A4JP8B7U1PlZjnJH\nXHrQBKuWjaLaDjkU6Nt38OxiMGoopDsG4ENnIPc0NuZX5Iz/ABUASPGY8qTuHXiljbd0+Q5z\nuNRqzyMAFKqB/F3pzB22rtBPfFJgIWMjdjg5DetLuMeTICN340BSyfKuAp5NNjkO4yn6ZpAS\nrIDh2JI4AFS/efLfdFQK3zYUKeTk9qlWR1Urt596YCrtycjJbgU1VLqB3B+7S+YzsDt2gD8K\nRZPmBwcN1FAxwkWOTB4B49s0oZ9+S3loOCajWQcKeg5pV3rjzPlaQdDz9KQie3lEMxY/OnQV\nKsXnK2euc/8A1qijVZFiK8MPvL3q7bxoGbYzMzr0xQIv6Pai4nEAHmMTgccYr1rwz4eNjGqQ\n/LuXkVz3gLw+sax3MqsrA8Ljk16vptqI0VUUKzHk0mM09K09VVAcB8D5v6V01vblMKDx1NUd\nMtgIwvoetbFvGdpB5PY1kzUntYDIpUdBzU0cfknPXNS2a+XGc9amWLpkYFSWh8K46jJxU6ru\nZVA5pYV8sHOKkhXMm7tSGSBvLYBhkVN5fl/MpFMDK3U8+tSqm5CAaBEseG6tT2ULz1HaokVo\n9vHbmp42zJkjigY7cQRkUqqcnaQfehss2etKpCcjikihFRUP3smnKw6bck0IoZgOx71KcI3t\nTRILx2FOwWHPBNNXDLjOadkdjzQIftH3aasQC4Bp0cgGd1Idu4YPNBSFEiq23HIp3bJOKUAb\n8kZNHDLgjBpDGjeG6/LS5+YAHBpBnBGaVFVjmmwHBl5yeaFl3rgnNIuxVJPJoVC2Sq5oAerB\nc8VIsgaPHOajj3r8xXI9O9SRsrtjG2pYxG+uacoCgY+9ScK2COakBXI4waAuNDFgcN846ClV\nWXDNwcc00IAzHNOXMke0mgBTgkMp4xSxseg60irtwAOBTzjbxwaAEyeTSrjkD5SaGxGufvZN\nL159aQh3+qj4NM3RnnkGnKvzYzTjtWTjp60APAxH6ilhYbsg0hkODgZFN2huUODTAlWQ5IIp\nGdNuCetCsVUbuaYzA9BxSHYcqDdwcU94fmqOOQ/xinsxbBHSncQK69MbT61JvKsCwyKZwRtI\nyaf/AA57elIBNo6+tOEecYpijzBgUqg7Tg+1AEihjkqOKduC7S3J9MVAjOOAc1KHbcPUCkBZ\n2hxvBzmoNvlsCDzmmbX3ZU4HepNw6kc0wHsDvy3SnyRhiSOBio42DttLce9S8xx4DZFSNDIg\nWABPSrC4k6HB9KqsndTtqaSE+WGXOfUU+hQqs0ZJPNTRsGUnbUR7HrxToZDtbcMelAEqEOMg\nYapM7kYH5mqvGxU9akhcLuOTkmgBFVoG3dB3p8p+dSoyTSOrcZPy+9SccbTQBKVTyeeGqLy2\nZQAc0Mw3bW/OnMCvQ8UgFkyu0YzQZB90DDU8MPLVs801lLPnqKYCKw8v9519amjZWXBGaiSR\ndzJImBSxSfvCAOO1AEqr3Bp6t5ZAweTUaqfTFOkYqoNMCwcZ9Ka0Am+deMUqtujyRzSWznzC\np6UgHRtltr0+OQM5GDimsFbOeKk2p5Yweal7gE0eeY+KVTIyqWHNPMZxuU54oVtvBI5pjJNq\nSNyefWkEkicHketIMNkDg0k0jx4ULupDJNpbkGm/N6ng1JDIrAAjHrSyEKpwM0wsSZV+hxQr\nndgmm2+GU56LyaJANwPTd0oAWTO3d1FIvZmOfanxoWyo5pWjCJkc0AErbsEcChZP4epoU+Zj\nC5FIT82VHSgCZZGZQMbcdal3/JuX8qhWbdHgjj2pqyfLhRtPapGWWy6cDmlTHCnj3qFC/OTz\nUo2qACc96YD1zk7eeefpUgb39qjjwmSDwetSKqbwwPJ7UhBJiPb25oRc7sHNIf3jYbop4pwU\nLyvRqaAb9zbu59hUyxmSMgfKwqvnLZznFSxzM2RjBpjRIrsY/m5PSpo/myD0qJWHA79qeyyK\ncLxnrSYh38YyKdGpLbzgGmbcnmnQgBuc7aknqSNlm9qXaGAbtTVYsGwMU9f3i9gKBiMwXO7k\nHpTCkpkQghYscipJEwcd8U1G+YA/jQOw5G8tmzznpT43XAyMmlRQc9xSoo3ZHWgY8QspLHhe\n1N46kZ561JIzSMF6KBTFU+Wc8Y7UCH7lYAKMmiTLfN2HWod67QBwc1ISDHtHXP50ASSMsiqQ\nMUirtXJzig9VwMClXdnaWwCaBiqp3BjwKRmMZOOT3pPMAbBbIpdp3fL0pMB6LubcfyoU72xw\nOaWM7ZM/epSwOcJgmmIcsnr0zilbDE+9NIHHFPjyFz60D1CMqPlz89Kr7Tls5phVo+SM5NOV\nMHIwR71JQ8sj5OOaep+UdxUfRW4oi4UFhgdOKCSTdt6nj9aer/u+elV9iecADyeanGOe61Vx\nij5RuB/ChchdxxmmttZcA4NJHnvyKBjlyqkMOaUsVwc/hQCd2SeKRm3YI4FAh/DYI5HoTzQW\nPOOBim+WhOaGYcDOOaTGSKOOtKCSuMVFj95ntT1kYbTjjODTECqMHJp6Esx9AKAp3HpjNNbP\nToKBiJGFUjjJNOU7sjGDTtq4OGBPpSdDgnFBIKDn6U4k8nqKReD7UH17UmNCKhbkCneWy4Pf\n0oLFehwKFj3ZZjnHShi1HDIPPFKuRkjmhWXdyOtN3eWxJ4FMepJuLckYPagZIz3pi/NhjnFK\nrDcQSetAyTmRaTH7wDsKQZV8Hp7Uo9MUDsLuEeabyzHA4oyWODgHtSqfnwaAFZtmPWnIPMzl\nsE0zaWk9qkVV53cCpYgjBVcEc+1OaTHyimox3ZzxUi8EEAMfemAyUlXAPpSxsGHHIp3UHcKa\nsYVcr19KY2KoG4jtSMw9OaUq20nGTTcbl5xmpEPUru+bp3qKSFQxYdWOak4HvRtJ70wI9poq\nbZ70UwObubt7rVUONsCDG71rY2FkDAfLUVhpqt/rPyq8zbU2qOnFZgR7iq4x1qFV8zjrg8ip\npGOBnimRxjdkdz1oAkYr2wPalVRtx60zEUeectUiyKq570wDaI19c0qsYST1pu5Wbk0xc7iO\nopAPb503HvTGX93kHFJ5i85HI7UxV4zj5fSmMFVQQQPepY2DZLLj0pFbowAA6YqRV3KSBz6U\nwsMkUPkgGmsHCcYwOuOtSqR05qLlM7u/pSEOjkEjc9cUjbTncPpQ2yNeo3Gkdg2MjgCkAkTD\nZtxj3pBgZ3HGO1KsqyZwCoFQ7TI2RzTsBLIA6BgPrSR4XJJ57U9oy0J7e1CxgAFqQhhXa27N\nOTOQKSRTIvy+tSjcuCOmKQxCqr160Mz+X2IpsgLAZPOaVcjIzgUxAoQxYxjnmoVXAJPPNOaR\nt2COKNqodx5PakMashbIz8tPViykdBRtDLu4yab5ZXIzTGSMw2gevemGPC+3rSN82FHAAprO\nVjx1ouIlaNWjwOKaFXby+DUYkHYmllOcMRx0zQA5ADkA8imm4KttK5xT/lSEsOaYF3D5hzQA\nisRkHjvSby+QAcCpMpt6VGrCNc5zu4xTsAMqqAx70M3QAc0qlV+8e2aYJFfBAOM0gBSynjk5\nqVWZo23HntUbSCPBA6mmyuc9cdxSAjKusu5249PSpl+TJJGaax3e5NIdu3nrQAvOck8Uz+Eh\neuaNwxgmkzxlTye1ACsQvyscVGsbeWeMA0KTn95z9KfuJ4B+U0AJtwpwe3Wo45t33+MdqcwP\nTHFNaMj5u1LqA5sNhuopvCtz0pzMZMADAqJYys2GPHWmBMvGCabI3zHDZ4qZnVox2qsylm44\n+tSAmG8s/wB70psoZoc4+apJJFU7iO2KYpZlJHNICGNmcjdwan+Xfwcmq+87ju4p6OOcHB/n\nQIGh84tuO0U6PZbrlai3B+MmmtkOOMp6VQxPLzNuIwOvFKxJJB+6aknxgbOlQyL3zg0rAQyR\n8Y6D0pFgIViTj0p+1pMkDkUnmeYDnr6UAZtxHubccsRTY5FYfToKu7duT1FQy2oEe9aAK8uU\nbcBjPXFV5V8zJ21a5GN30qGZXTII4NAGfdIOqnHrVKaMKp7itK4h3ciq7Qh4ytMGeNfGTwQ/\niLR5prcD7Xbxs3Tc0i44Ue+cV8dappstq0kboVb+JTwciv0VvLUbjnJJGCPWvlv43fDEaRdP\nqVtGy2MuVLKMhG9D9a1hIxkj5xmx1f5R2zVXjayr8uf4sVtatZhEaNxypx05rFuFcqEUcdxX\nQmZOLsUZG/eHAOSePSo7hdyjauecfjVht0aPnkjoPSqoYPIuxunJq1sZ2BlVSVYYOOtVGj3c\nlstViZvlK55LctTFwqvnkj+IU0A3a8Y2kZA5qFYx+8CtlcbqmjkCttKnJGdx/lULJuiOG8sE\n8e3tViGMqybHAwuOTUU0aK3A5PRanLDyztbGOPrVdy6ttI2ZGfXFAAEO7AHI61XaQKpRhg59\nKmZ2jC87vRqiDlnOQTnngUEiiNVU44I5qrG21TvGRnn6U92LsoBGc8807/lttUgAnmgBq/Mu\n0tu4+Vvb0pAqlgrZHtTVEayFW+Vg3FLIzeeGJwc8VSYxZm8xSGXYAce9V1Hk7hywJ6mrXmvu\nYSHcTxwKi8wcF8dcHFO4yEYX5920A809f3qb3OU9u9DKNsgZfkbpimjcVVVGB3FIQGN1UbeR\nnOM80xmY9RtAOR3Jp/mbWB6/TvSrEZ1YtkL1BFSIjDbVDAfMfSlMYaHmU8n7tKv3fvAnFMkY\npIuwb/emMk3KqBR83oaPMbIyMsppqMPNwRhs0IzecoJxu7+9FwIo5DDuVhu3Hikj3NIdoBPa\npJJRHnK52tTFUKzEt8u7d9aYhysF5wdwPI7CmLIV3kD5+uakB8xmZeF9AKjcGVfvAqO2f50D\nEZ/MXPty1IzFWUEZGOvrUk0yRsu7liMYFRMGDgHk/wAqAEky0XDfealmw21Vbp6UrIMFs4/2\naZ5bLGGBAYmmHoG/y1KAbhRNujVRtzzyaVMKpJOSOgqRPnQEnd3+lILX2ISvl8EjPWmSMyxq\n397nmpSyL99SfQ+9P5kUDaCFGM0C9CrHIdvmN8wJxUv3ejZHYikjk2hkYjHpimx/eUk4SmA5\nmKSZzll6UCZmyW4JPenFkaQlHUHqAahkYyLgjO7gfWiwA+2KUBk3fypAxYsFbc3XApY32rtH\nB/iLetAZN4OMZGDtpgK42IzhdwBxTZFZsAttLdMdKUkxs4GQOtLtYYDHcpXPHT6fWgaGMwaQ\nAFlZR0x3qSTe2M5wvemHbuDD5QRjmoizLJs8z5yaBkm3Ycld+e9Iu5iwC8Y+7SNhSGYMMcEV\nJLuLDptxSF1GecjKVAK4GSKPMXcnduy0vkho2P3R6ikYAIvyZ2859aYhG28+rHrSSAs4GOak\nkkWRg2z7wxUSyOY+Fz25pjERkUkYLp2PvSL8uef3nbjpUit5cY3HknnNDKpzIQT2GKQdBrAj\nGW47kUKqlQUTKsOaXDNtG3AHO6owpJOCUx3HQ0xDzhW2A7m9vSlRSuWLrxwBnmkXKr6Nng0T\nEsuD+OBSCwpw7Ltyv1qNsjacBhnv2qRh5jJ/dUfdqNSm35lwqnpSDQHkP2jaE+al5bOAc/xZ\np7Lv5Q89d3pTV3bT1+vrRqBGF3TBj2GKc0nzdPwo27gBuwTTWAba2MMrYpgPWYEFQeOnIpJM\n7tm/a2Pl9CaRstlWwFz2oXH3ME7eaBCRyC4h2yfKyjPA5pBHt2MDlW7etPViis6rvGefamsu\nw7hx3FFxi7UXbn5nbp7Use7adr5KtyCaY5C4GPlI60+P5cbACP8APemAjeZ5xJAIxkgihmMi\nngZHIprMI2TcGbcckLzj8aVlHzA/73HWgQFf3ihOp9+lJHMfOKldy4yXA705dqIjAfOf5UFg\n33D0NAB/rAQmQRzyOtKVKx7y4jbt6/SkeRkYhRyw60k38G/n3NBQqz9FLbR9KjjYxqSMsN3J\npZMbhuGaagCq2Mjd29KXUCUsp2rgls9e9Nm24CgE9yfSkTORgZkUZGaa4DMquSCeooAduLcY\nApS7BixHQUxvmwo7dMd6fuC/vGPbGPSmIJmj3nGTx93tSFtq5xh87TTYywWXcM7hkU5drqcZ\nB77qQgaPcNo+VetOjk2g8b1xjn1prbpFG09KBlPu8+tA0IvzRu/UdD9acQfkxxxzSTL5aj5W\n2noccZptuHkkO5vu/lSAXzFEhHlkHP3hSkkMdwGD1ApJN2GZDn0FG7dxtOSOWPSmA+PCqCw5\nz39KYSi3DIPlQDdSM4ZTyTgYpq7WVdrcns3Wi4iRexU4Y85PpUO75WyeS3BqeRsRgleSdvFR\nBR5hiJwV5oAWTCuuVwMde1NDE5wOP6VIv75CrcL/AHjTQGCgEcr6dxRqA75ioOdiHnf/AEpO\nGGWGDTFjXDDJbnIPanRAv24pjA4aPIYK+e/enNu3Ko+UdzTNu5iG4xSzNtdeCT60AJlGZ3Vc\nL90e59aTaI4VK4DdPepdwXlV3mmsMONwAHb1pCG+ZKmBgHBzigNtP7zDL7CnNjGCcP2qJVLN\nt6oP50gGrGdu/oxPHpUjbmx5nA6UMrOMA5C9MUKy8ldzHoWI4pgN3bHCsM46rUkWWBDLtb+G\nmiQl/l4GPSht0nIxkdTSELtKtvIJ47GhgVBcDqO3akZz5fyZwaXaVy3fHSmMImPzfNuGf19K\nJPnHyttPfjge1NfbgMW2huCBSszNGUU89NtMYeY0UajeAM96csaM0j5DHbjbUaw7tqthmz0a\nlMPzMykbvbtS6ghPLfyd5bIC4p7Z8lc9TwGFIIjt2b+O/vSttYnZxjrnpmgQyRW8xOcqCBgU\n9WEQyBhsnNIpzIGX736Uu3aF39M9TTAi2/uSuMHdu/Cl3BmG1izHotSrt8xgjZDcEsKZkAsF\nXa6jqPagB3EnJbauMhaiTAYYbB6j6VI8waPIwDjpihY/MYZ5CnigBI2MbEE726jPahlduDxu\nOeKa20MxI+Unkin52RnBwD90Uhgy5ZQT7H2pWVQpHUZqPcWUfNuPrQNqsrkfOKYdRIy3O3kZ\n/iqeNdr/ADcbuKY21jluBjOaGZ2VSpyMcUgAKu91VMhf4vWlj2CNi3zHr70xSFVcHAzkgUiu\nJGYIMjPXtQIkWQKclTihnGxmUZb3o25XBbjvUbI20uCAoHGaYxxZ1jXK5J/hoZT94Yb1NKqk\nYWRi+3pUbearbmHU8qKQEjzKfvH6betCYRQMjbn+KnNGVVRgLuPNMdlb+HCo2eRmjUAcBpPm\n+Vc5G00rYZjtOeKRmEjFdu0nkelMRWbKqpxj7wpgOaN41CnBJ5FKdsnGcMoycUgJcAHJ55zT\n0ZFBI9aQhnlsrDeuQRkMO1ObPl/7Pf607a3z/NnI6U1lLP5bNhVxwp5piBmVY1DttT+96mjJ\nToNzH9BTcGRWU8MDxnv707ziwURrjB5FAxeF2g9GGTjpSbWXhfuD86FPlqV3bhng0m4FlBJA\n9aQBEqPztz2560LGFBXJ4yQSafGrRzFeqdh3pWxIpI4bOKAFRvlx14/ipqhCp29AMml4VtzD\nPFM5CDZw+d34VIx8ce4bg/y4zRyyg9E96X5GkJwcMKaqs0bK6/IDwaYD9pVQWO4Z4GaWQGQ5\n6iojMi4B69hUiQkRjyzlycnJoAU4jdXwSRyVFLcMrAEDAPp2pykqwOcHHH1pONu7bxnn60AR\nbgqBicknGKd97hiE9u9R+ZhcNzg5qx8sihgm5m/ioEN+Xkg5FIqo3ABBxwasRqilC6/KeRUU\nkxdlZE+SqAjjDKdxbc3en3Bkl3Mr7O5X1p0g4AHX2pNp2qXXO7rzSHcaqk4JHO2nwjyUADkM\n3J4pjAybkxtXoKRceWTg/JxSH0HBlbHQAcimtK5YfPk5p8bJtUSDHHGKjWRfMJVeFODmmIVo\n3llwQAePu04OFZk2c9+O1OEhCblGHY4zSNnduPOO/rQAjPGV5HbOR0pfuW6uONw4prbfLXby\nM8ikIDRxxgdOaAFbc21XYlcZNLj5SxBwe1MVjhjnJB49qerOAc4LHn8KYC+UqQ/KxI60vIAk\nPA7r1qMHbhQCV+8T/ShuSSCVY9D/AEoAniYt+7A+fIao5H6oBtOeeKdk7YzHgDOCKVoyu/5h\nnP3cUgE3tu3HlVGBQqmZsIfkP86NwZQVXHYEU4bVbbyM/wAS0xDmAbcu7AX19aZHGWxl8DGT\n9aMsoVGO4Hk8Uu75CNoJz1pAIGy4ySMdWqRY0VSXORuwQBUbkyxKQNq5qaRVcCN22leSw70x\njGiEayMvDA5CU5t0kaz5PTBqOMFmO35CD901ITlMbsnP3R0oAcv7tShbPfJpu4DBzhs9abIE\njdVXLMeuTUjyxmMBh+nSpYhsqnarow3g446VIrMVDL1XgVHHGkiKi8/NnPtV22URofk+TPXv\nRcCWzs3m2hF5I5auu8L+H2uJF3p8qjg4qnodqbq32xx4716p4b0nZaxqE25H4mkyjZ8P6Oq2\nqbR8+eDXb6Zpw+XfjNU9N0v7PCjAbPbua37G2/iPPvUMqxZghSFSq9/Wr9kgKcCkitY2Zecn\nqauqFQFVGBnNQ9y0Oij+YDFWBHuAzTV+WLJHPap4YyzDPAqRieWWPtU8QVhxwKfAp8zbjin+\nWI2YD1pdQGxqmckZHtViGIbTnjPTFMjZcYVee9SZ9BzTGNOY29qljJzkDK0iqW5xk1KoBXb0\npACrtbd2p8gDdOKZt8vtT9u9gSKZQv3RtNAVmQ8incZ96NvUjvQA9VVRkCk2hlyBhqaq4Xk1\nJG3GPT1pAIq5ZQRjHrTiqhuOTRu85Tg0oXC4ouAp3bjinqwXGetMXd5gAOB70u3cxyOaQD2H\nPWjarNu6U1Wx95aXd0waAGpt3gMtL5mGwpxS9WwVxRxtzigBV5bOTT/LZicnGDTF5zx0qQMy\nw8c5NAEkihcMTRG4ZvmHPak+8AGXmg4WPHfNAC7Quc9TTtm0cdKa/wA2G70u4vQAuCeA2KVg\nDherCmtuLADrTlXbzSAWTDIAOKBJt7ZFNYNuGKccrx2pAPXBbd2NKdqr6imquFKtwPWl+5g4\nzVDHbSvSkVjjGMU4SANn7x70rfNnBxS6jsKqgrlulHyFcgbTSLhk2n060gYIWVxx2NAhwTO1\ngd31pVVlckjjNEZDR/KMnNHH1NADivBGeaX7oGeRQoVRkmneYGwFXmi4DeVbgYpPvMWwcCnq\nzNgN19aazcnnvSCw9XDdDilwd2aYABnpSbiyHnFHQZMnzcE4pMFevIpowyg9KeJOpI6dqYDl\niUkHOKmRhGQHGR61DjzFzjB7VKj/AMLc0AL95yR90+tETtA2772ab9446Cnt+7GMZpDHtIGI\n+XFIAM9Tyaaqg5yNtPjkzxjiiwDlUJNnJYYpZGwuQMGmsxXov409VSXIbrQA4ZZfUYzS4IwA\nMHrUartG1WxU6uSoDDt2pCFEXnL15o5U7G5NMt5TCuGODmpFIWQycHNMY5WVsgnbimrlVZs8\nVN5a7csv40kLoylSvy0bASriSJSSM96j/wBXJ8qcU75QuUGBQkgf7vUUh3HKTK+Tx7VPCysx\nV+lVI5v3m0j8akkdUznJHqKdxEvluv3G3jNLGo+Yng1CuMqV6GpfLMeT1GaQEmBImOhpPLMK\n5blacq+YmehFOhbauyReCc5oCw5X/d4zg0kbqJMN96h1KNyOnTFMbLNuxg0AWtoYgZ2n1p8j\nMvynB46ioYmDRkMOfWlWUFsMDjpmhlE6HEYGOab5nzACowrRsedwp7sGUcEEmpF1LEaAMecA\nimy52dNxqNctHjvmpeVbr25pjC3n8tS5Hy9KfgNEdvU1F5gb5cYHepo5Ij8gfDUxNjLdmt2Y\nHnilVgyk45qTaevWmx5X8fWkNDN3l4AHFTkbow4/SkkXefWljY8L0pDHcpjHJqRvuggfNTtq\n7dtL0XA4FUISI7o/mFOUMp4Wlk+YADg05SzKQ3FAD9w2crz3NQ7t2dpOynMxUHB4pqEo3PSg\nGJD83Q4IqVQZHyDzSxxqeRwaEQNnsakZIoKsuF5NTN/rOTk45qNZPkLdGHSlgzsJPJ9aCR7M\nI2B28UMxwSO9KsnmJjb0p7fdwOppD6Cou6MfNikGR2psamOMkjNLHkruNAkOXdyd3JpIt3Uj\nNI0m8jAxUpDM/wAp+XHNBfQerbV9+ppsWGPmUiybUJxnnFPZgsYAGDQSEDeYx5xz3p7Z3Z/i\nqKFwJCDmnN80md3SgaFWMSMCetSMu3oPxpm/cxfGB6UgZ2cMeEpAS7juPcgZpjFvMV84B6Ch\nHViwU8+tOC7sMeQKYWGRgtI+RkGrC5dMoKjHEmV6VIrbTnoKQAGMOCVxT/MDc5AzTD0wx+am\nyKO64pgSqMNgHINO2tnk49KjRSihs8U8/MMnpQMFkxlXOfeh/u7j09qcqBl3dRQpVgfQdqBj\nY5DjAHFSpIGXJH4U1lXhlP4UjZ3AngelITH8b92Panr93nimMyspwKdGO+OKBD1B5II/KhVL\nHrxS8rt+XKseaD8uf7tAaiNtUcmhQFHPIpMUu44GTQA/5c7ueKTarMWNNDh2UdBT24X7uQKY\nxN30FPjcbivamFVk5I6U9SPSkMQsN2CDkU/cRGeMmm53dKduO3niqAiW3AYSPnJqXh+XGB2p\nJOVC8g0nzdzkUriY9WAbHahQevWkyNpJGaF3DkdDUiRIFGOeaFYcnpTVyuWpOJF54PtTKH8l\ngcZHaldgx6UyMEMeflpY3wpOM4pgSqx2gZwKG56Dmo4Tu56e1S7lXbnrnPvUgM2sjctn270v\nLHrj8K4vWvF/iLS/Ftraw6F9r0ad1D3keWZQe5HbFd023LAAbRx35oGQ7g3JHPrUilW98daT\nhRjHFK22PO3pQIUAKx5pw2txUQ+8AefWnOuW9qAHrheDSIQJOTgUKhPcZpzRg8E8DnNOwCsO\nOlN2FCG3cGk84Njad38qk+9yRxSAaW+bI6elP2Lu3dBTSh69KRFY4DGgCRFDLnpSKoyQRget\nGNrYwSKaXPAPKk0ALz70VNx70UARx4RQR1pkg3MSOR1pkUhPAGBSxbjkE45qLgRlfMznHPSn\nj92oAANIGG4qF5HelGeuM0xjJ4TJz0NOVMRhcZNSbmZgMUqZGWAzSuwIfKwx746YpJVKqNvf\nrSyMU5Hc9Keqkxk++apAVzCWUkdadkhBjmp9q7fTvzTdvzcDNMQ1FG33oyc8Nj3p4Q7sn5Qf\nSgoNp7g0ANMny4HX1phLMx9Kk2+XGcnpTd6SbSvzLU3Abt+UMy/nTtwZcZoba2fm/OnLCI4D\n3PrQAyZiMKg+9S28e3rxzTioCAdxTGZuOMii4D5f3shIwFFM9cflT1xtBqF9wy2KAJN37v7u\nDSqwjjwck0nmEx571HGOpOTSETNtkUcYqswPJJ+SnNIwXOfwpXBkQbjhaYiCTMjKA3OaXcc4\n6gU6NVD80TfeG2mPUco3YGMVIHDSDHFV1Y9ScU/pyKTAe7A/d9aiZSuMcqOtOMv8O3j1pq4R\nDvbLentSGIrBc4okYyKFJyM1G5PUdDUkUYVhjmgCOZXkjKKdozU0OY4wGO4460swBhI757Um\n7CrxgAUmIagwpDdzxT2QYznmmeZ3xSPIGIwcUxjGywyKcrbVx0pGO1eP0qNd8nOKbAfv9uPe\nmiMtyw4pxbPXgetB3KOTxSAdkdB26U1sBNx5NOddsfH3qh2svLNn2oJF8tGG5s57UQoPmx1x\nSzOGxgYxxRtMfzA8GgoRkPB7U37qhicYqRmyAe1N2iTqOKYC7xJ04qNmUEgnNO2qqk9KrCQN\nIQVP1qQRYRhtAHAolXd16H+KmL9zpSMxZdtMZK2FwBzgU24yyjaPrT42RSo60Pn5wOhqWSVl\n/fQ9OaaspUHH5VK3yLwOcVGq7SC3egA8sOpJ6mmqg8o9KY7mR/lGFBxTpRujwn40DIIW8wk9\nFHFLKwjXrkU9Y124bj2FIyK/AHFAiVceTkc1Tl+bBzg1YgZUXYTVeeEtIQKLjHQt8rDdjNQb\nSJDyMVOiqFyDnHWoWXrzxQJjFmRsKDnnFJdAIpCnOKWGJVy6gVDJ/ERz7UhlRzkgk8VJOrPG\nOaRsOvoaFJVcMcigCm3y9ahkX5sd6tSKuCcVUlUhgQaEMq3UbSdRjFc54m0GDWNLuILhfMhl\nXawz+v1rrJD5kPUVQurfMZzxxxVCPh/4reAX8M3zQohaPdlH2/KQecfXrXlFxCGkLJ1znFfd\nnxI8E2/iTSXtZc+bhjFJ/dbFfGfizQp9FvbiGWB4niO0gjGccZHqK2izKRw0yL5zOGIJOCpq\nGVRG5XZgMMZA6e9azWsaqRngHJz1qheXHdT8oH3fat0czKJhCqUDbmH8XrQiDzcOMADnFTSK\nZlSJBsPB/Co5IzCzKWyOlaEFdn+dyeV/hqBldiARwTk+lTXR8tcdSvJApvmDeq5LEjcP8KYy\nGTE02B8qiocNCpbIZScfSp12xuSy5yM4pHRWjKY2E8+1UBUbLN14HfsRUXzSbijFFqZVWMZb\nhlPHvQWLRMzR4fsRQIhX5towCOv1qLayqTtwwbPFS5Em0/dJ4x6UjebGXJX5emfWgYOyqdxT\nOfzzSFi0Z3p83amyMqxRFUJJPJNDSGEoW5YnpTuBH820H+JjilRW3sRhWX7v070m7fIV6HPX\ntTW3R5y23nrSAfI4VmXGM9Kh3OJPTIxTZPlfc3Iak3ZuELNkCruA6QHaqLwx746U5GEcbqXO\nMdqi3Nub5sZPWpVTbjuOrL3pCGhGK5JwQOB3+tDNgq46Y/WmeexZmLZ9MDoPSlVlyyBe3WkI\nX/WOq7eeu6hVPljJ2kfnQsfluBK+ONwprIGVcNv3dD7Ux9BJGDLznpyaau0nCjI6ladI37sn\nGRnGKInUbyOuOtMkWR9vzD5fYVHJxuULkkdMVJGzFg7YztpFmxhiTk/KDikALGvHG4gDn0ps\nKszZ3Y5yfpSZZXIxkr1IpvmqzNuJHp60DEVDI2VIJ3dPansAcgHBz+VQ7FXOCRUx2tjAyB1p\ngKsZfeRjbjrUWf3KkA9cE+tSIwVsL8ozxQ0ZEb7n6+lIQ5W3ISRxnHNQsZGjDBtqA0savHGq\nMc89aTcPnB49PSgBzsjYYDI6HApnyjeFHy+po2/u0AGWzzinfKrMoB46FhTHcgWLdktjp96n\nSjYqEDcgIOB60rZyW9BSeZuVVK8MOT6UDuOKgszyDdnt6UxtuwhVyR0pm47Qp5APFPWMRqHD\nZOelAxrSHoSQMc59akLjIRF9z6ZpDlt6NgHGc0iu3ybl2jGKAHSESblZflx+tRsw2eW2M7eC\nOtL8qKSzcdqNzFSQgVh0Y00BF5gWEoSSTStNnB7gYp7MvD7SxYYPHNJIyxuvG7A5WkT1Bf3m\nCTjHpSj5VIJ+Y1H3z+tScljgcEfeoDcVgV2+n86j8ssxK/KF5PNO252Z+YjpikULHGx3cE/e\n/pTHYazAsGVRjqSalWQAYY/MRkHtUIIyI2Xax65qVmCoVI4B49aQmR7gVDZ6cFfWmGQjIxhT\nS+esnLDbzgcUqkyESKvzKOaNQEVdvVtw7inBhHgZJGacq5cyPS72HzAABuAtADWTbKAX68gU\nzIX5CuTnOKVx8uc57Z9aGkbhgPu4HNMADKrYwVFP27ct1Pb3pWR1UlhuB5oWPPzMpIxytMEN\nU/Nx8pPPIpJI/N68L1PvUjffIVunQYobCyKetSIZGiMjpnHcVHC0gDOcAKOakZgzEjgZ61G2\n3DjOdw6U7gI3+uGzIU1I2BuOMqvGKareaoK/wj7pqLzFRcZznt70ASbmZWI4GMYpvzrs3HA6\nU5GDMU/jA/CmbWZsu2B2FAxWjGFK/M33RzilkbGUxh+hFDr5Z3K3GOM0qyNgOzAAjHNMBsao\nqEdWHJx6UbVVj5a4GM5oiDQsSCGI/OiRcNvzkvxtWkAvlkDcxyuM8GmyKETdnLHnaacqrkId\nyc42mkDecxVlwCMGgBFjaRiQfunOKH+8T1BpDnayq5B6Ukajy/3jZZRigYrfcyvLds05WZuD\ngdj9aYv+rI6+hoVdjf3mxwM0XELGu1mTOV7H3pBjySFXJzzTtxb0XdwTSKjNiNTgZwWpgOWQ\nquMkEjB4pjKu0nd8xPPNK0h4Rm4HemLgxyAjDh/wxSGP5jKngDPNKZH8shBg5yTjtSSOG9DS\njdGAwOc8baQxGboBuYnp6CnRzBclj82MADvUfmfMwbKjH5Uzd8/zDjGFo1ExXJaIsoKtjBFO\n5a3Qbs04MUTdwxAwV9aG80KoULz1x2oJG/MASAB2qSJEkZe/HXFRfOshRunrSqFjhKhj1yTR\nYCNDJIDGBjaxPNSbjkqwwx/ipgkCxn7xyfxp24KuW5Xt61QwZF25LHb/AHaVVWTK7scdKY0m\nAX6L7UCE4Ei8k+lISHYVlBIK9qOFfyx9dwpfL+XczEOP4aUMdu4rzR1GNbbgnd7HFMk3Y+QZ\nPTmno24OHGABnNNb/VnB388UCJFJTaAd7fTpRLjIyu71pBIRw3BIpiZ+b5uezdqYwH3jkfL2\nPpSqowV/g6lqa7DCgct69qcMJIF5LHjNABJGqMOMHqMUn3lIUHk5pWUhfLB2kHmkZvLO4ZYC\npYxyvuk68dMAVCAsZZ8nINSsqqud3OMio+Ii+0kg9jQhCzSN5iqnTGTTyzbwVTcOvNIrZUAj\nHH3qcV3RrsyGHWn1EMT95kOvGfu0u4HK7sBeTSbt6ncrdaSRcKUbgd8cmmPoPjZZ9rlcDsKY\ncBCqDawOd3en7duNjArimquM7hywxQAiv+86fe4605VixmPc7L/EOmaHVGRU6Ec00fKpCv8A\nI3HHrQMcZEQmTgMR0FPj+ZRu64yKj4aPb3HFNUEZwdvHWgCRvvZH8XH406SQKwwuGHDNTV2q\nMBgcj72e9NXNwrFfocnrikSK23n+836UNvVCFPJ5pFjMyqcbWU80/azbgTjHf2o1AhZuDj06\nikZtmGwSOm2nSOMhQQAv3vc1GrfNlj97o3akMkQoCQqkHqQadHhnZiMrjj61H5myQg8tjBNS\nRuCoBXB6g0xdQUfNkjrx9KY0xQ7UUk5xUrfKgY/KScYpnIY+WxPODSGN8t4cSbeOlOVtqErj\nnrSrFu4ZixB6U8xYyvUDndQIaqbMDq2c0MqtnzOS3p0FLINrj0IzuprMqvwFPHUUxi4KsVA3\nj1HakO8rhV3AUJj5tqtil+b5W3bV9PWgYrbhhVw3GetCsVG7bg9KZ5ZYEIp3Z3dadt3bQ52B\nuc9qYhCWMQVkwBzn1p4ZgoUnaOox2pFQNl9xKZ2+uKZsAbaxO49qBDkG2QqCfm9aRtqjGDkc\n5pR8rE4yR+NFwxkVUiBLHkntSAbIwbHzH5h0FKWDzZI3JjBK8UqsQwBGTjg0ixhcktjPOKYh\nJducA/KBy39KcVXAXPBGdopNyqQqcFunH6UuGwVX5T7dqChBlvvdQcZ9qXYfmB7dAafJncox\nkqOaRm2p+9Qk9qRIgZlUt1YUrbmVdo2E8mnRx7sHpxUbHdJ1xjpQVYkUmZsEdDz6Uzd8x+XP\nP+RTl+4SRlm6bTjBoUmOPaMMoP3j1z3oKB1DFQnyt1pzMFGM5J79jUZ2ScbtslOVdoAYZUc8\nUCGrGm1Sww2eamUjeWBIPYVHHiYMeo7HNPP7sA8r25pAOjUSSEbuO7UskKqhLP8AT3pFZWXC\n8EdTTtvn7VxznqaZJDuOVYpjFTqhjVtzctzx2pJImikO75lHJNKwPmeb9xiMhaAF87zNvqvS\nnOieWZCcjvUPnBxll2t3IpkgRuC3HXr1ouBL9lnaPOAA3PB7VCzYt1TZ0709Zpk48z7o3e+P\nSmcsOMyMRnbigBJFKhWd/m/u1NuDboyc7ulN5DAuME8fNSKqrIQ2ARyGpjH58xUOACowRTV/\neOSo+XrimFxkAdR1qaRvnCgcY5IpARtMrN1wuelOIKzOSfl6YpNyrmPr3BpQx2lj8zddppgH\nk/NtxyB0Hehsqw2rgKuMUgd2bzFGDjvRmQKd+NxGDQGoxtxY8bRnmpPlEbMy7iOhFESpJJhj\nuwMk9qcu1VwRhM9aXUCNoyuC33c52ipfu7iOR3oDB1Yr65DU3b97cQfcd6YD/mWAPswR+dRe\nWd3C5ZuSWNTLI0ykA4YDvUfls2SzbWxjmgSHR5kyq4OOlC/3ycbT0pjSCKPg8dMjvSKw4B6e\nlAxyqJPmcsH7baXJPHVl6nFEjOwRkXZg80N+8cj8QRQIcxMkgBfbH2HvSc+Yw3f8CpVk3SnB\n/h4470ineeVOR1+tIB7SjYG6j7pA6/WlZtoHljd/nrTl2RqTjDMMZ7c0fIwznIQ7T70ARqR5\nivt3HpmjzHjkO1dxPADVNtEh3HqOnsKT5t6kPnnJGOtSBdhhi2bv7q7i3v6Vbs7Oa7YJGm4N\nzx6VBbQs6yLGNu7n2+lel+C/DPl2ytMpLP1f0HpQBueFfDC2lvGzx5LKK9A020EbRkJlvaoN\nH03yYF43KBxXW6XZLHGHMfzUnsUWba1dmXHIxWtaxAHGODRbwbUBUYB61bMfl4Kisi0S21uF\n6ipVXdn5TUqxjap6euamjz6fjSKI442bAPSrbRspHFIsbDqKlGeNx49KRRIvyAEcmhV3cnrT\n1w3Qdu9L5eGzmkFhsMflMe/epF+Y8HFO27RxSxqGAB4PrTGKqtkA06Ndpw3U9MVK0YCjPNJt\nRTx1oKGlSOh/OjzdvWpAq7ck802WNWTgkCkIcqhxuHWnE88cUxFCwr1qXaF+8c0wI8bc96cq\njgg5p6qNpPekVVC4HFJgNT5TkDAqaNRI3HakUZ68CjHl9DyakdhzELx3pPmPLDmnBV25zlu9\nLHuOd3emIazeZxSMgKgq2DmpFHJzzTWQZyBQA5mK9txI60LH27d6VH+XHenNnPWgBjgl+OBT\noxlwKdtwoBIzRtPAAoAfIoXjdzRxjJpFTaMtyaDhlIoAXBx0zSqe2Oab6ZJoZtmAoPNK4xeA\ncZxUu8beBnApm3gkc00b0ctng8YpDHiQNjHBNG4dDSYVl9MVJjfyOKADBk4P3adv8taYpbdj\nOTTvXcMVQBt/dkgdaRG2x/N97NP3BYx3PpTWXdnikJjlPykZHNPJ24Ld+1MWMMMA4Ip6j5QG\n5b3oGCMfM44WneWeTmmK235cd+tSyptIINIOg2OQLnIpdoVtw6HpTiRtUY5zmkkxlfTFIQGX\nHBBJ9qTaB1HPWlOFPrS7jgg0DG/hxT9q4AP3aBjb1ojcFfmXpTGOaMSYwfpS+V5QbnccUn3t\nvanY+Y5PGKooWM5UEHkDmlLDd60ijC0gY7uBx3pEk0cxZT0DU5WP3WOajARhnIBpRt3fMaVw\nJOAdrDCn+KnRvtQnGRTPlPy9T2pWjCx5JpDY/wAwtjAOKBgyAHg0yOZVTAbmnyZaNWA+YGgQ\nm07ju496kjl8s4xlexpE+ZTu60zaf73NAFlyGGCM0BBsz3pqYVPm5NKxVj8vAxzQMsRs/l/M\ncrSKpU5HSoY1aRcFiFp8b9m4oF0J45Sq4Veackis/AC57VAqtHHuDbvpUW7DZzz1oEXJozGu\nT1pI8SKVB6ipNxuId/GcdKhSSPcCQVPtQMntwIzsbp2NSyRu6kqeMZqsjK/3mxmpkYpna+R0\nxQA0SHcCDgVb+8vz8/Sq8ib1wBii3kI4Ztw7UhonXcwOOaOWbBGKapYMQDz2pVjf+JsUwe4/\n15waktWEi+hFRb15BHPrR5fIcce1LqMmdiG5HPrTtw289uaiEpEoBFSSyAMMDIzSAI5N2e1S\nhiFyBmmPs3DFKp2jCnj3oAFyWDBeM0+5jkMqPGvy55NJBJzhuAKtNGGj5Py9aWxPUaPvZpnl\nl5AAcA0qgbWKnjtQMmPr81WUSNG0cgAO4VKo+bBH0NMViqr2J4zUiqu3I9cZoAcsZViH47ip\nZGIwAKRcsdp+buDSb8k8YNJgLuDMAvX3pGb+8DSLGWG4daWeTcyoBg0APjf5fmH0pG+8P7vr\nSspUjI4pFyeMYX3ouBMFCt9aT7rAds0iyKFx1PagsJYvu4IpMBzRkjcKkXI69KbDJ8uOtOUb\nmPagBVzGMjpUysGGcc1HGCi/NyaVWG4jvUjJWO5QoOM015NseMdKiZiJCScAdKfG26PmmA1W\nA+YnP+zU6sBgrkbutRsqR9g3HWlPOG9KAJi3UKKZt3KCRzSK25iR/wDqoEbws3zbhQUOU7lI\nPFLwMDp2JqNF3NmrMcKspDnkcigQeWkjDD4AHalkXfGUBzximxsFHpTQzR5YnPPSgY+GNIY/\nL2nf3NO2llIHQU1ZCq5YcmmlihHPHegBys23OKliZmjOetNWReq8fWnhtxyKYkObjquSKGyV\nz1b+7SibzPlZcGk4VgV4NTqIX+E56UyPc+QfuigZkyG+tLH34wDxSKJF+VCcZPpSeZt6DBqS\nMlGAxzSyKrKSThqAGc4yetPX94vzfKRSKnQt1pzNuzVCY8ADHGaN5VTng+lMRmTBNSK3mntk\n1IiNnO0buBUkbEEKRximzLnk0q/dGDQUKxITAGeaJOgpArKxY9KVk+TrzQALGhwzZ4p3PRTw\naZwvHNSBwMHrRqApX5doPNKqn14prAtkjiljX3oAUt0B4GetPLfNwN1JwQB15pP9XJ8pyKAH\nNk/1pdwXgD86IxsbJ5yKZkluBxQIVvvZHHtUkbfLtYcVFuJ/CnLNu4IpXAkUDHtQx2g46mmc\nlcCmLIfMw3SgB6SDOCKc2dwxwtRRtvkIxx61IrZ6H2phqO6DGO/FSZXcARmowe2cU77ze9Ax\nzQoy529KcEAwQc8Y6UxW+XBNC/KBnpQA4EeYM9qkOZG56dqiC7mJFPwOBmkA0RZ3ZPWpFA2g\nZximHlgAPxpW+VueaaQD+G4HWmbSuTnP1pyrtHpmpAAV561YyOOER5bGKkXPXHFN+tOAJxg1\nDAZ5m6Q8844FKuTweKeMK3zYqMDk+ntQIfk45PFPGXXnjvUYZSuDTWmLNnG3FAEvPrRTfn9B\nRQBGu5/unBB/CkaQluSDn0pm/wCcDpTzCONvWpBDlkG7PShm25J6VE0LMQOlKMrw3IoGKJQ7\nDBwB1p7TBfunGabGoVTleM8Uu0A5xn60CDaHUFuo6UiSKrNzxinMT/Cc01I90m3IJx1oAWJk\nlU+ookk2r8o5701d0cpBUYodhs+TpmmhB5gjTd1NNjmIXJ5GelCNiM5GamWNdwDDAIzSuMbM\nwkXAUgetRww7YyAMDNS/6uRs9cVEWZ354GOaAEaMD73SnQsdmM5NOX/V7Tgim/KuAv50XAC5\nIAXrnvUcjGOTnkHpUhI+8DjtUc3+rBxkii4AXLqNowB1pNzMwUtjNS7v3fTg02SNVwzcigBx\nbZGRjLCotzLweDTZJjG/ympYezFcn3pkkM27sOKdn5QpPFTvjb6eoqFiN429aLD6CceZgdKe\nWWP60yRhjLnHpTNwDDNIB8h+U7R1pBuOC2AKeCMHBHtTW3bTxQMcuCtRyY3dKcCy8gUmzcwO\netIaFVRt5HB60gXacjgU5jtjIAz60hUths8UCFaUqmMc0m35Rk4pp7r1z3pyp8nz/pQA1l6n\ndxTI4AvLHNOjjHPPHvUki+XHuxk5oAiQBm4GKY7FHIU5FOZvQ4NJ5Rds9KbAazFiDg7DTt22\nPkFqdtLfLnikVW5UnikA1mOVHTNOx170mflwfXrSbRG3HemA7YGX5Tmo/Lbs3TtSriNuOBSn\nHJzSAa0in5QOe9LjMZI6Coi/XaM5pwYiPpjPamwHSMF2gnr1qHbuYk9M0sn3RxlqTf8AMQaj\nViCRuqLwKaA+0Ecr3pcb269BStlYQoPfmgY4FVXcBnHNN8/PPrTdx+6OBikyFX1NADWZpGAK\nkUsihe9LuLHA60Ns3cnJoJI35wR0oWMqxY9PSpGAHA6U1iR7jFBRHJgYKjjvTWyV+U9aRnVx\n6EU2PczAZ4oBCInlgFhk1HcszfOp2n0p80u6TCjpQyn1oGRQrJGuS2Qe1LcKsi7R8p7mnN+7\nX72aQfvE5HA70iSLIWHYnWqsmYWwR1NWlbbIewxUNwwfG05agZFJsYEgcDrTNse3g84qWNvJ\nBD85qBl3PkDApgJ/sgZ4yaqTINxIGKuhdoPzYNRRtvZuOcYpDKHByAtMkj8wcVaaNuQeKQwm\nNQTwKBHO6pY+YzcduK8c+L3wzj8U2P2q2Rft8Q4GOW9v5171dMOcjJ9awdQsRPnIw3XNUpCc\nbo/OnxVpD6ffGIKQUJV1xz15zXMXUP7xhn5f4RX1d8bvhCkl1cazYA5dcyqq/QfhXzR4g0x7\ndzGmTgkEEYIx/k11qSOaUTD2kgHftkHbtikZDJHlz05zTyqmNR0emLvTejMCp56VoZFeXb8r\nhdxzg1XmDLJuDDHReKtBj5WCvBP3aZsPIccnoDVJiKkmXm8zOOMbf61DtkbeD83vVmRQCG6E\ncGo5l2r8vHOT71YFTbskUMc+9C7mY7s+gxT5F8qRiyds7fShi0YU+vNICuivtZSMP24phkZm\nChjtA5Hv3qdzuYhnw1Rf6uYvs28Yx60C6jGG7Cqc85FNdSsgHQ9CD/jTsEn5enXFPVXbLKwL\nYxg9qYyFUPRHDDPTsKf5QkjlDjCnvTdoTAXr1NMklZc84GevagBqKNynbu2r0NMVRxuGGY9D\nUzFvLcjqR96q6/vJFG/KAc07sAPyqMLv+bnnpSq2SXA+bH6U/aGUkfL6LTFjLwg7flBwQaLs\nWwgUCD5QG3HGOlRsPJ2hnwzcn2oLGZlHII5xT1XzAEYAt29hTDoMaNZBukIbB+WkA+RpQPl7\nU/ywylVO4L2pEZTuTHOelAg3eSytnGe3ao/LkXcG6k5GKV08ybJbKqPu01txcFTwOozQA51+\nUg8U6PaxO04IFReW0mMnj+lLjy2LAZUUASvGqqynIYjJNRzR7RGy9RgH3pWkcgMZMMen+FIj\nNJGAjYC8lT3NAMRum8rkZ5pI2K5boD2pDtLHllB5P19KcIxJliMADoaBD1YMoZjjaeMd6RmL\nSNjgHFEedwdRjAxxSeYFbCg89V7UxodNKRjamRUcgbd93Cd/Wns25Rggvnp6Uzc7ZyQnp60D\nHGQ7jhcAdKjkG7PP4USEqwLEg4o2hmXJ2r13d6YiKRWCD5sY6ileTzFG0hsnpRJtaQseU6A0\n2ONo9yrjd16UDHgqww7bOcAYpGjMK7mIODgYNLtVedpLZ5NN2+ZvDfM2M0DHbSzb2GaXaJGI\n3fMBwaIZAseSQeKTaFPXANK2ohqqhf8AvN2+tTbmZWTaC/TrUMalVbaOG706RUDb+h7mmFxq\nr8wQthxTmiKZcrz+tR/M0h24b3p6sytmQ/N6GpF5jIiyuCV69Rikc5djgjHNTSTbYwARvJzi\nhlDDHALfepoOpBhGJH3FXk06MlQ+CGXG7NR+TtUDdtOOtIw3KQG2j+dNgx33mzgMevNC/Oyn\npn1pm8lQNv3jmpJMSKAnCDk1IhmDu5XnP3f60vzqRnkgfnUjqjDOd0R6mm4aZsDgLyCaYwPz\nKQwwO9AjDoueSDnBolkYrhTjJ64pC37tgWGcc4oEG/zFIPBz09aGzu2N+FPVgI1XGY8cn3pm\n3ylz97PSgZNbsEbLfOB71MzDzgM7gw7VUXau4AFjipFIh2bAMnrntR0AVty5YL8mefUGmJtU\nfMNxznNTRxmYnPyqTjd2qBoXQsH5OcA5ouIe3yuAR159qhL5bKp0PepI2beA7kKOlIygBgeC\nx79/SgBjOV+YYHNM8uJW3dGbnbT2hbt3GNvvRuULudM/4UAM2gAuD83Y1EsfmfePPXNSpgqw\n6jqoph74PTtVDFk3Nt4G0dQadt8xQW69fpQjOYwANzdeaVnXccnHGD7UgGeYWYb02+jCjKou\n734p3zMwGcL2pI2TcQ4zg8UAOVl65yw7mmj5U6kBTSNhI23DcfapJE2oqj7oOfr7UwIjm4Vc\nMFAbP1piMo2sU+8cVPtKrjGRu3DHamMC5Yk7eOCRSYCiNl3kDPPJpI1KNuDAr/epJGZlX+Fu\n9NLNGBgcZ4oAWNSyMqrtBJwzU85KDdgFR2NNZlkmADkHqfSnRiNd2453UDEWNG2qCd7ChVIj\nEbID1JweaawZSN5wvtSx/K45ytADIgGVXUEE8YxTm2/N83PY+ntSNtfbjKc8EU+QeXtHyt3p\nAg+dYWkjXex4560kmGkQnA45PoaWNgGLO20Hpg9KXylmUYPIP50xDJAeQR970pUUeWQDtbFS\ntGFXagxjmoHbCnJy3pQPoEilY0JO5umKXyzyCPl701W8xfkGT1NOaRmQ/NtVugFMQyNfJYbk\n3Keh70/d5Z+6GHXJ7Uis21lHO0460jZICnlsj8KAGKhTLuMqecetTcqAAcE8ihsyNlT93r9K\njA2t3bv7UhFiQ71DYwehquylgB8yAHPPenGQ4Gfue3UUbUYZJIBOADQA2RTuyW+8OPSnRsWj\naThWXqhprQkKxyQBxyelRrGF+QnctAEjfMQFbcW5zSAbc44A6g05UVlLZOVH3qQqrMxGPlGf\nrQAjlDt5x/Kk3Bc4bJPApE/e7SRz12U6H7zMqYA6+1AxrfKyq3JbvT8hWIXsPu01mGzJ5LdD\nSyKCqPtIYfxetJAgLZGTyCOMdqVY9zEls59qbz5gCj5Tz9KXlT97I9KQEioXYK+FHb3pqmWN\nmDAZPTFEsZIBLbl9BSt0G35ewBqhERY7RubCZ7etP8wxkZO459KaxG4KOQD096RpM7wFycZ+\nlAxWwpZiny/1ocKkaZO45zmlV2XZu53DtQsO3dnof4vSmNBt+YngA9GPemFjtAUYAPQVL5Ik\niKt8zDkUgTAXd82f4RQIXad3K4zTnhCqpBDHuKiXeynqMHhTUnlkoVBw3egAVVjy+Ac9D2o2\nhUVSQXYbigFEmNgEzBmzwMUM3mKAvy47+vtUiE/iUFSG6jHSlZWRug3H1o3MFAB3Pnt2pJGE\njkOaoZAzbVAZQXzy3rTvvMp25Qc7Kd8u3eBux2pvmGSPleScAUgGtAWkZB95vmFOWN9zeYQq\nYxtz3py72ZsHhe1O2pkZ5PWgQzBbAzlcd6VjGy5G5XHp3qSRlVN+fl6VGV+UKOuc5pDAYEgI\nbtzT4lO7apyTyM8UkjJI3QjaKZ5YPJB6cc00Me7MrFQcn1owFO/aOnNRx/u1yeST1PpT9wZW\nIHbGKRLBZG+fYdu7p6U1mIVULhivpSBcKuMH2zRGu6VsLtPaqGPADMwyRxuIpY5izB8bjxgn\npikVip3bQrHjdSNGyqNmCQc0hD/9X5hXgN0UdBTNu1Vzwe3rmpNxZlYjGT+FLwVbcPmByDQM\njYlkZvukDBp0e1VAQliwpPM8tm385PHvTtpVgAQCRn6UCE5ViM5AHzUOqswUt8zDg0NGrONu\nckYOe9OikHzRtjPZvT2pgRSQl2XB2svQ9uKczNuB3Z9eKXcWwV5XpS/dIU4x3apGNaMtyW28\n8U7zHEm1uYz/ABYomUFlLHPb6e9CgmN9r7h93FO4AJBDw/VjximsFWTeDnb1FOLfKvA2jAx7\n0kcbFsbh8x4xSJEXa0Z3EqxOR7Uu5hsLDe2fvdjTmyyknKsOMUbWkYL2bjFAw8vfJjv1/wDr\nUMpWI4GWzyKbxuPHQ4B9KezAyIG+8aZQ6JEjZQyBcDJFOeXdgsMJnhqZ8qzK38WMHNIsisvz\nZYbu1MTJJGWKQsDuyOeKkjO1dx4B6GoSV4/eDDHlSORQ9wHQjOSp+VfWlqIGlw2N2Y+/vRHG\nPMBBJPXLdh6VC7gR/MNuTnj1p8iMqhS2GIzQUSbizFRwcntTMsrKHVXPU460H7xXOQy/rQ26\n3GDyMdRQxA20k4Qr3LH0qSC7eCYSDHA4GO1MwGUcnOMmm7fm3Z+Yjj3FAie8uY74qQdmTzgd\n6idRt2EAc4Ld6QwggKW2k/zp/wAzMRJjIGAR60wGiHDAkgp0LHtTmUBdqnv+YqJUZurblHap\nPNDEOAxI4OKBjclW5UDHP4U9UCuSDlm/So1Y8nbnn+LvTsJ5mFbaTyc0AK7bWI6jtimqN6ZJ\nyf5VMw8tSn8JHJ96jWTYoK/KOlIB8OzBBGDjpSIWaFskZzgfSkVvLVcjhhzSr8xXqOcH3pCH\neYiryPujB96bIoLB053EcU1ULbtpztOdp9Kdt3ENnB67R2pjEaPaHy+z39KbNhjGDlwwxmkh\ni8xmG7cG5JzxTi2/cCv+yMe1AAeqxn7mcbKVmUzEKu3bxtNI0YYLjjHrR96T5myvr3pgP37i\n4f72OlLtKRrtIY96RZMfKq7yDnPtSTSCRkZRjI7dKQCbhhiOq9/WnJMDIHQckYPtTFwn3jxn\nPtT9wDYC8HnI9aBD5dytyc+ooaTcuFXLU3y2kZmLZAH+RVqOAO25CcqOe2adh2FgVtpkK4XH\nf1q1DAbi4jxF8xGNo/nmlt4dsiqCd2emOK7jwv4beeSGSRcAtnb60twLfhDwpsk8+YLIcfdx\nxn1r1LR9NLKikAEf3aZougNHINwxnGPpXb6ZYxp91OenSpegXHadpn7pQBwK3ra3G4BsfSnW\n1qWhz90CpbG1MkpJJwKyuaWuXIYQvtV6GMfWkitRgnPPpVpVAUDGMVDKRHtLcdal2hlweD6U\n5lPDCpIYyc560ihYYyynPHpS7C0gBFLGpU1LtLZI60ygVSrHPTFCrt68jtTwueDzUqJu7Urg\nJGwfjtUm0FunFReWYRkjjPNTAllU44JouIVQFODyKeyqee9LIvTAxTB8zAYpjQnGeBxT0b93\ng96U/JwRQpO7BFIGPQZxuGKNoKnIpd2aX7rDIJBpXERmM9QcU5cKuMZpdhkbrinRqGNAw27s\nUuC2ewpM/NjHegkJIB2oGOGMUigk+1PZT26UAEtgDAoJ1EX7vrSyEIuQaXY24gEUvljy8Yya\nQxmTuyelPC/u93vTtwxjGTS7c4B4FA7DDhiCRTljLElW/OpCAqgbcrSBf7vFMBp+UYPWgKGI\nNCrhiTzSohVQ2cc0DsB460vPVhmnE7TufkGpN2FxikIhjk+bgcd6XjzCD0p7AeXwOaNoXaD1\npW1GCrhvu5FSRgKpB/i6UjLu+goAG7PagA8obtxNKFzx1pyqBz1zTWXb0PNMWonlnnB5pNrd\ne1SDOzI61G0bsxIPFMYsafNyacrZcZHNCoWXHf1oCnd1pDFZfmHpUn3uOuKZH8/U4INO27W4\nPU1IAueo6UuC3GaV1ZV9abnbg4596BCqoVueaGT+INkGmsDIvpS43YHApjJY1XaSetOX5h8w\n20xOMg9KXcR3/CkAMQH6/hTl2scnrQuJDuOM9hTo1DNknFMeop4GSKarDdhjg+lLyxIHJzS+\nXtkG9fqaQhu0Bc96kjVTyRzQ23nJ4phYr2yB6UAKY2jb5cmpNxaAt1OcYqPzTIuen0p9vIGG\nw9DQNgtvzkcmpYmKybW6Uzp904Wnsw3ZK5PqKBDGXcWIbIBqTgx56YpP9WPu/LTmUmPcvT0o\nAf5gZQT1qTg7iBjiolUHBX9aAxVjTAI5NvBzjNTMd+CtRhcjmlGei0wJ0BiXj7vpTRGRliOK\nY0bFflNPjZt21jxUgTLlowA2KYFO496R1IYkcCl/gyGzSGx+5ZIcAYcUqyDHA5FAHzdOTTSd\nkm2mSWI5RkhsrmmMvPXFJIGbHGaHYNjsaQywGC4K/eqyJBJEN3DVT5UA9antz5inPXFAEjKG\nUEDODTmZS3HTHSmR3CMOAQelKNrAigaEEfY1LGwVdpTjP3qgZtqjvVtX8xVA49aQxY3WNiW6\nUhXJznHtSb43fHXmnzY3DPFIXUXAdRkYxzVlMGEgnqOKqNvwCp3D0qSNy3DLxVMY5YmWMqDR\nGh6mkDbmPzbRTn+VM5wPakBMrDbhxxTtm3GOVNQwyCRQD0qwynaSvSmwJmXy1Xb1pyyCViMY\nxUVq2Plk5qVVHPPJ9KQCSN5eCOKaSG5xn3pV6Ev0HTNRtuU8cr3oGSCYdAc4pyy7shlz6U3a\nrfdGOKBbSbdwbGOlIomWMbeODT0644xUVvJ5ed4yKsR7W5xxQSKu2LtmiRQ/zdD2pVkTaSw4\nzTpI2kZVU0AQKxzkc+1SKehI2mpI1C5z1XilZRuUn60DBI1ZctSr8rEbcoaVnBbgcU44Xqcg\n0C1EaPdH8oqNdzJgDJB6VKqFoyuCG9qEjKNlRg0DGLjcePm71Minblj16UxYD5hxwe9STMeM\ndKkNRY9vUfjS8ycgUlvKAj7xyBnFLG25crx7VRIhdcbcYI6U1lMkm4cAdqcArckZIoCkElR1\noKRIWBx34pNobqPmpQDGqkDJ9KFO4n0FSMAu3qOPWnrGVIAPBPWmrudTk8U4fcwOQOtBJK2O\noNN8s4BFCvvbYF4qRZNudwxgUwBlGemDQ+VAOOD1oVtwznOaeu1hy2KQ0L0UgctSCHcBnqOa\nN3PB3GkUvyT1NAbi/NzxkU5VxyeD6Um4FcYyR1psm5mBAoGOPyqVzu705SI1BPU0zyzzgj6U\njYlHvjigCQMWUg4JzSn5COKrfZ2jTKNl89DVjc8aDcuaaAeufvHJB7UsffNIZfLXp1pI2+Xn\n71IB7N5meOKbEpVc4+amp8uQT3zxUi5ZskcdqABOY23ULxGOKNp/vfKaevYDpQAiHBxjimrG\nBIW3HnipFbHyg03o3PSgBdxiXDHNPRj14ApjZ46U4qHXbzxQAN8rKc556U6Rg33VwM9abtPX\nNKW2rkCkAxTvbA4x1pzLlyQc0igKuc8mk27VJ6mkADO44GFpY1/eZpI8kAVIF7nimhagu1Wz\n1qTLNwBgGoyw/Gk85iuAcc8UMY9ht+XHzU5WPf0pP4ct1pNqhTjnNAE2SvuMdu1ACldxOD6V\nVmglkC+RN5Uq8+uR6VZRsqNw59u1HUBOW29hmnsV8zcOg4FG4KuMcmjlCBTGOZvl3fpSYKjB\npSrHcfvCkXJ+ZvyoESqdy9KQA0SfLjjFI7Hr90UAKdpHzfUUkZCsw9aQLvUetOVTu9fWgAYL\nj3pwZWUEjFRsjfeUZpd4kIyNvtQBJ5q+popm0UUAM8sbgTycUkkjRj5OtOKk98AUAhcY61IB\nuXbluDSttC/MMnFBZWU7uTmkfC4OeKQDGUso7VJ5JdcZyaGbow6Ac0kcu6Q/Nj0FVYBoY9Cc\nFabGw3kg0bhGW3UKAYyO+aQCySNt9aGXCAikU7ZeV+XFAk7kYHSmAL0bPIpTMsajjJFNZwr+\nxFLsAznk0D6D0k3Z3YziojuVeRketNRSzVLzyMZoEIpG0EjIpzKG6dKQyKDs+7TY3LZzSAWT\na2AORTC4VjkfLikkOMBRk0bvNXaeDSASPc646D3p7KGXrkjtSfL5igPwOtJuXe2KYBLEBggc\n0m8sQAcCjkNk9KcmPTigQyQhnx1NMyV4ApZAd+VwBUm3d+FAxix7k3dSD0pZI/MkyB2pFyud\np4zzS7zuGKAEVc8EYYU5WboTT/OA5xzUe/qSRiiwCBim4dRTnk2qq55BzUTSKzEDsMmkMLMu\nWP0o1AnWXH3hwe9NEnDAjAzTOSuOoFOVSE3HoaQClQyEjrTdwK4HJ705F3AnOFpnEfHegBy8\nR9KDgDDN94dKI+5PSo1UN1OeaAE4VlGOadJLlTjg9KWVNrdPpUBhDgkFgSaGA6OQnqOaeFMm\neabH8pIPNI7bT8rY9aAHJMBkMOlMEzAfKOKPlzk8ZpS25cGgAbnBprMDhRyc05MOcHg0xl+b\n0pdQDywDTfMEbMDzRkiQgHORzSKNvUfNRqA5v3ib8YxVdWDvjt1zUu4tkUsZVU2jhu9MQnCo\nMDknmmyZ8wHP4U5QeQeabu3tyMAUAhrB3ZtvApqqw+XGe5qaPgNg5pIyQdmfm61NhjFXaQSM\nH1pjLmQuefpT5t8ZAPINIo2sM96Qhi/eOW47UoVpOM8USKvJ6U2IHOAc0DEaMRsqnk0Lz7EU\ntx8sYxyc9aSFm8vJ55pgRSff4/GkbKgYOfWrEjRxw5UbnNQ2qfviX6YpAQyN8wGOKVBuOF4B\n60+8i8nvk9aSNgY/emIhvmC5jXjAqnsKqpxipJJA8rFjT1XcBnkUDIpseWM9ajHzRHA47VPI\nok57VFbOqqyE96QyE7eP71NdvLwy8+1StGN2R3NRx5WRieR2oERSAy89+tNkYycdsVK77VJq\nHO5cgUAQPb9D1FULq3Kt8wrUHI2/jUFxD5p5PSgpnLappInXAUMv9xujHsK+Wvjl8LXtNYk1\nKxh2Wk2TIqDOx/pX19JandnqK5vXtFW7t5o3jEiMpXaRVKTTMnE/OfxD4bbR/LxJvZsuueOM\n81kRoI1BZc7ufmr6P+M3wzhsLcXkCbYDkMp/hP8AhXgN9aiKE+cNwXhPf0P866oyuc8o2Mm4\nVd2Ebkc5qKSFvOLMxJwDirywrcpycbBlj6+1Mmj2nIH+7WyMzLkwY2zk45NFwsDNG0RYMVGc\n9KssjTKyhcPVeZdkbxsvzp3pkoqbJFlMJOQ3TvSzWcmwuDu8vg1Las6MHB/eK2ee4q3qF5Hd\nOCg8ot1FMDFfd5y8Bi1IJM/6zh6ezNvQYHX8qUojkgDcemafUCJgrcqCD60jL++XnnGfrSXG\nFUgdBxULOskaq3D56+lMLgZWjVhjcC33gP0pfLCxuhbKmpVzIyxMQC3TihVCqwIyQcZoAgaM\nKwBOBj8KYQEOVXCinyKxmJzvTHApm0b9ucfWgTHbvORZBxxmoixWQOT+6/rTwuxtoUj+tJKg\nZiYxuKr930oAgkbziAR869MVKivHIX4xj8qEiVecgOei01ZAqnexbDchRVDI0Ywq6yOFDDIa\nlki2wxtuzk8qO1E8fmkoee+McCljZPIKjcxJ+8R+lAhizZyV+Ven/wBek2iRSy58w8D3pu3y\n1Ziw44xT4cnO84GOMUAAZpGVcYxwacf3kxUDavQGo9pVS272AoZpNypjAxk0CJGjPmYYAhR2\n7moWXzFDK23+tShQse4Nk0xWVQNi8g5IoGSbt2FfsuePWowxfG4H5uKcuJXPYkd+1CHgAfMB\nkg0ASW9rK0YZSMA4INMkVo5MMODUkM0sa5Iz6gUtxcJcYPzJ7GmMqvt+6oxng5oaNmZFX73T\n8KU8yAsABnOKRSNxkTsc4NIRIyndh128cYqFo1ChicYqWS4ZVHG7d+goLblKryyjOfSq6AV3\nUcgr7jmnwYbzMnDY4p2EO0jpj16GkDDftJC8ZzTAjjc7gVPP8Xpmlz5bZyCG4yKTzPKXAGQx\n60gYbSjDPcHFIYbFDDjIHNPz+83KNpx+VNK4UKfukff9KarbtqAELn7/APeoGPYlW+ck59Bi\nhZPLOMCQetK0asCEbJzzuNMlHzKW6+tBIKwWQZG2lKtJnJ3kHPvildRywXHHHrTI94284Pc0\nihThgARnnJPoKdsO3ggpnINJJGQGKnvmoWUs33tuaBaE821AoZflPeoZ/lGX4HTihUODvbPG\nMdzSY8zCs+DjBB9KoQqrt/hLEfxU12O1ivy04CSP5VbP17ilOGzlqnqA3advLZTHKgVIuEjx\nuDbulM851QjgZGBTUjG1VIKkc5piHrCrbVPbqM1ECm9wRt56VKJAjZIyBzmodvVyuXznHtQM\nm3KvKjtwtMCMxB53n07Ukjbcgdf0p25GZTIxQdNopADSEMUyN46inKn948dhUe1lYlcZHAp8\nko8sZOCOpNAiSNtykE4HpTnXftDNuHb1phcSBSpyTxTY90hcZAkU96YBIBGSsh2+lNZnBTfh\nu3PanSZfD9SOM0yNRvIZuOozSYyUYZ3bd8/Tr0po+RiCRt9KQ7HIDH5umRQseZGdyCoGAopA\nNUNtblSMdqjj/wBHwSxP+yRSNt+npTlVWkywOcd+lMQNlXbd8nPDehqNY90bAndITllP86eu\ndjnk5557U0tGu0Dkdx6+1AyRlAiBPIXoFqNsBWOOo4HepI1Zn2seDyB7elDNtY4OKoNQVcrG\nR0xhjTd2GkXduPYU5WEisuMH0prKDJnoVpBqAVFkUnIOOQPWhlZc7vXseQKPvjPI5zuFKpXk\nk5b3os9wI1+ZcEZHvSx7Y87jhu1OHz4OcexHakaReTjcQeKAE8tto3EA53Gm+Yu1gRkHnil3\n5bOcHrj09qftMfz9S3T1FINiPasfI+ct3NPaMfwnccZo2hOWOcdfWmofkLIvzf0o1AVcs6qp\nyOtNaQTMccnoD607K7g2cLjr70DDMWKjd2OaoCNgqsoZD161YYHbhevqKbuT5ecnPRqSQ8Ni\nTdn9KQFjTVWWQiQkj09auNNFHxEqqOh3dRWQ0gVxiTYQOo9acsjzSbuvGDmgZJeQpHhoSp3N\nyVNQnGNuOWOOf6U9FVoSqrt5zmouTERu57etHUQzb5bKpDMW5PpUi4ZeB8y8UuEjVSGwQOc1\nFH8oZgxBJzTAmVmWRhj2OKXIixuOVPb1o81lVsgHd1NM+QtluSo49qAByA2F6Z7dqGXfJvBp\nkcQikeQKWdhnOeKd1YSng4wFoAcYzI543Cm8KuwuGP8AdFLjcF2yZYnkCnq21jtVSq9aQiFW\n4ZBn/dp20IfmHyYyRSeYdoOOSe3pSNMclAMtnr60DFVgjB0GCBkfSkVnPmBOMHLZ9KVRtc/L\nyBRJIAzEDJI7UCFTOA/GOvNNXJZixyp5FKrjyxg+YMelIq7tp5Xac4oAVWB56Z4IPWkj2xR7\ngck02Rl8/cQSzc57U5juKhOAvUHvSsANId/ynjHGPWpGcMYt45HTHrUUKiON36AngUqqpVcO\nd3cVQCbX2ucYUtmnso5Kjb8vemNbluGfaD6mlwWjCbhlTnOe1IA3eWgUnBxmhWBw6klPQ0rf\n64Bscjj/AApCzJnam/sQO1MYn7yScMvDDkj2p7LtgaQcnPXNKWOBIMowGMHrUfmKy5UbWzQA\nm7K4DkjGSD60CdkVQDyTgn0p6MtwGBO115yOh9qWRDsGABnk564pAOaQtIFKKT/e9aVWaTeG\nGAOhFQ/M65J2gdDUyNujH+c0hDcDcNsuGxRuHzNjOBgfWiRUjzjBOM+9NlieQL8+xf7oFUA1\nfkEan5S3WnwqhkVQcHuak2svysQwHINRbmWVU6ZXcPpSGTNHsUgHJJx9ajwyrnjcDilRtuST\njHrUeVjkLNk5pgIzGRipwO5Ap6ZRcr3GR60yRSNrZyW4461J5I5DSYK9KQDBIzAZGOeeKFBb\nzMggH7pp8iko2H2gD7xFClmYHbwOD/jS2AibczgKMhR82ae3yqDsIBFSswC/NwOhxUTKfLK+\nZkHiqAjX93FuwA2epqRmJ4HBxnIoVY3T5zkKNo4709VCgKeoPUUAIq7ZFZhlcZ5pm8eWxbhi\nam3bmJHK9ATTJEO0ZGRmkINrKq7sEDninMy5bqFIH4Um4DcR95abIx2ksM/SgY3yzIGY7gD0\nzUiLHtXP3umfWkXMyrhqVcKrKPzNFwFKgEr075pnlhVAyCTzmn+YrMAx2kDnNIqllxJ0+8vq\nKYBJtjVufmAzuX+VGfNAGwnsKYM7SyJhO9PDs6nBxn0qGAnC8P8AMQcGiNkWZ2A+TGNtK6gB\nQRg/zpgBXB5AY1WgDmV1VWABpFkDNkfu3HIHanqx3ZK5QDBU1FJG8aqxK4PAxQMeJmkcnfuA\nHNSRsrH5W3cZBqHzEt1GF3jODik2sgZwRv8A6UCJVIOVXvyc05lVo89CehqOTgjIx6460rMF\nkUfw9loAdt2oI85461HuB+4MY4/GnRyeYr8YGcDNOX7oZBj1ouIRVy3zj5jRlNucFuy+op0b\nLGDjJYn71K7Rq+c8Hp9aYyNUaM7SQF/2u1PVgrBCD83Q1D5P7w5O5euSe/pUkjh+ehxxQUKk\ne3dk7mzgUgcspUnC559qcqLuTLYYc59ai+8xI+dWfnFLqSx/J+UYJx19qVtsboD0xgUzdt3o\nOGJ/SnfOsigryozmmAsu2TBGSo4pJFG4FQQcdT2okkOBgDJ6+1Ku77uc8ck96BDdwJ4UnsW9\nant1Fuq4PLdRQxX7PGgODnNRNH8xClmYfpQMuYUMzxJkgdG9aqsSV3sMMTyKeJD8ro+SONvr\nTGkCsS5yM0gAbmXaX6+tOZgNzf8APNfu+/rSGEc5YDPIoZUkyFyT0LUAwfeVUv8AKF6ntmiO\nVkkywKe1M2gt0KgCnbgyN64wAaCUI+S2c8N6UrIm87STg/KPUUq8MrqmQo5BpNzedtYYAXK0\ndShWZZOANgzwBSedtJK8sDgiljyvmHGQPu/WjcuSSvXpj1oEIrHnORk5oeRlmxt2tjvSxyHJ\nyOfc02aQGTfI249CvpQMcpZT12nu1M85ZFKg/KvQetCH93x8y54qRUUMzkKGIxSEJHIyLuVA\n6e9Sx/v7eTb8snYeoqO3j3S7OVXGCoq3FbhZOmB0oGEMIaEMB8wGMdhWhBZ/aFKEEFxkimw2\n7NGUVy+em0V2WheF3uZFmk3eWqfSkMk8L+HmuNskkYlGACuOa9S8N6DuZQRtMZ4HtUfhzw+I\n4lUbkG3PPpXe6Jp6wsoVfvDuKAJtP0l2UEjjPHrXSQWItowBz+FSWdqEVccVoxwh8Anb7Vky\n0tSGGPdGBgj1q9bx/wB0YA70sEG3POauQxgLjGKzLGN82AOtOjXdx3p+wKPc1YjVVA4yfamO\nwxIzuAPSlaPbg7uamx8xA5HrSqm5sAZqWUiNV2r3NSxr3zgVLt8zgUgi2LjNUMjX5cjqamVT\ntHNEaDGT1qRVB7VLCw7fuUgjNAX92PWkDfNUq+vpSEI2SoPSmtwoI+9SvIW7U5QsgwRzTATa\n3JNOj4wSCTTtpHbFOVWxzwKAGhQSTSls8UL8oOacvHJHNACce+aTaFyAae0u0YVetNKt1oAR\nF3Yz2pxAZuhNCsVbg05W+bbQOwKfmx2pysQvJFIoDcZwakkVVOD1oEwVgq5Yc0kbruJB496U\nSDoTS+WOOM80CE2q3PfvS+UeCCKf5XU560z7vagsco7E8UR5DHI4pVUtnHNOZSwxnHtSATAb\nLAcUikScfzqXaNo4waF29NvJ70gGFBN1pdoXvkU9V2/KKXaFXBHNMQFeOOlNbG3LdaeWBGaR\n13YyKLjG8bSc4FIuGXOcGnrt6d6ayFmBzgClcLjlYR4BG4+1LvDMcDHpmnIQZB0HrS7grFSM\njsRTC4z+LHalHt19KNoK8cNRHGwU54PqaBhGzHJ6DvQvzcYxQVJHenquVHFACAEMRsyD3pr8\ncdDT2jLd+KTAXJPQdaQCx7y2COfWnSfd3dTUTXkYwd/FRrqltI/lrIrN0pATrlvrS7McNRk7\nhwQKUkuwFACxMA2007b+8J6imLGTIWBwKdtLLgHmgYLjdyMU9o8gYGeaBjbz1psjEsADgUAO\nK46duaVlIA3nJPIpI1BXkk4NO2jOeMUCYKw3fMuRTsOrdOKOi5NNO51zk5oAVowpGBjJ5pwj\nVWZe9MGeBjmpuGjzj60ANMZVVU8D1p24cLnPvT13cA9KGQMMryKAFjUOCGO6oxJ5ZGPWhCUB\nbOOeadMQzBkGBigCTgqSRjPPFIx24AOacJSygYximbfmG480ASzR7MfN1pqqdvenYD4Gcket\nOb7pIP4UALGBj5WwfemH93JyN9Ebr0I2tTiNygdD60tQJJFGNw/Klt8FeePrSxrjqQTSE7mK\nk7RTKCJW3cHcKezCQjPyH1NRxswbAP41JMRIpVuKQiRHMfJGad5ccqb/AOdRqv7vA54qW1eK\n4hKsChFMQ7ytuCBkVLCo+8DgGo42CnbnjFOjUcqGxjmgCbywvIxig7dretMdtqgNxR5f3SGy\nCaRSFXKrkjIqZWVjlTgelRyS+W2xDyeOaSNjHNtcfWhiZMkS8np3qVpgxACnAHJpqIqSFg2Q\nw6U5mGcYwKQC+YI49xHehdxXdjv601VLMFYZFG1Q5A9e5oGWpEG3KHk9qZC3mKd3TNO3eWAe\nMdKZEg3MSMZNABJGVYNGcYOcVajml8nJGQetRKoXlhkelOWQRSlA2VIzQwJ42Vl5pcbmJXmk\n2hsFRg/3akibau0jDd6AGyASQqpODmk2vGMEZBpfL3tgnBp7524zyKQC28iLlXIz2zVjcDHg\n1QSHzv3jDKg1cV1jYLj5aRXQYoL9emasowVTsXJpqKGY5oZjBIQvIpkj4cOrbhSR5jXrzmnJ\n5ezBOG6moxIGI3ZHbFAE0JDcn1qTqrHORnpSL8pBXgdKRVdmOemetAySNkbg9fWms27O0dKH\nbapOMgcU5WHl5HA9aQ0PQjGTw9RXDTibjHl44okYZDYqQKJo+hz2pXAdFukUZOCO9OUN/Dgj\n3pApWPr+VJzt+WmCHTMvH9/vQgK4Ixg0qr8pY9aQrtWmIUHazZHB5peeo6GmjPUnPHaiNt3G\naCh+7sTxUasVYqOVqTaF+9QCVOTz7VFgBdsmRnaaVc9AMrTFZgxIGM1Mjqp4GeKoBIZDHIew\np5bqxOc0KBwSB+NBkUYJGR7UAPjjP3h0x0p6qMEnig7lOVxj0ojYySHAwKQAzD5dvBpzNwc9\naYGVmZcYPTNPAEbZY7uKAGqBuHcU3PzkjOacrKobHNOVh3GPpQMbjvu/CnH5cMOKFjEzHHB9\n6cqnJHpQSMjVpGJPr0qWNidwI+XtShf3eV+UA0qHOfSqQwChgcmk3LvU4pJH5yFzSlcrnpSY\nCH7xIANP3NgcVCGZTkDNSrI30qUJiKBu2k08sFbGaazDdjbk+tGwDk9aYxTjdu6GjcuM0fLy\nQc8UnIQMy8VICqwmB9vSnq23g9TSKNq8DGaRs8UwDeckdqk3BQO9Rbdq5Y8+lOBDggHijUAL\nr2GRSuCqhicChVWJMZzS8Py3T0qQFR1Tlu9KMBsmk4kwABik3KwKgZINMAYqrkgZ9qfgHC8U\nqqGww4HQ0qxjcDnAqgEZgGAIyBTl2qxPaj+Ek0z5o5ACoOaBkoUeYrd+tKzfeA6k07uM4xih\nscnsBSDqES+valbruxn2pU4wT3FKqsW9qZQKwMf93npQxHUDI9ab5e59x6Uu4CI8gD0oJHrn\n7p59zQ5GBu5po+6hI696kZsf7QxQIZ5m4hRxTl5NMGNvTBqQfcGBQMcowOBSNhmztwaQ7ucc\nDvSo27r2oEO2n0H5UUbzRQBXDFo8mmswLdakbHX8xTElEgO4CoAj27eR81O3fLjHTmn/ACqp\nAGTTdpChs/hQAqYZSCaTy1Vg2MUKfMYsOFFNwG5DUwHsqO/971pWYrkKvHrSbxHgk5FN84np\n0ot3AVlG3cTSKqqo3c0qNjcPbpTN25uTkrRsMkOzoce1J9wc1EVBXcWx6UrMdoB+Y0MGP+8R\n/DnvUmSuABmo413d+F/hNLvJycYpCBoQzFm/Cm7Sookm24GKaZOg6euapADBlIPrSqu+IkHA\nzT2YPioz+7U4Hyk80AIqqFOetNRR5ZbHPapEZSxOOBUfncYIwKVwHsu4EZxikVhgnPSmbw0m\ncUMBwR0oAVX3NnbxStIOVHJpy4CcVHwhJAxTAeoHBxtNKsI5JbtTPMDoC30pPM25446UgGMy\njqQppJI1ZRg5pskIm+lOjjEOD1pACx9cirDfMmPXpSfw7j0qJid3HIpi6ih9pA29Kc2ZMZHF\nM3ZwO/ekk3bcA8UdQLEihoSvQetV4VDNyd1N8xlXBORTkykYyMGgRJnblTzUbAK2QKfE2RnG\nTTZZAfm6L3pDQ12ZuAaUyjacD24qJn3cYwD3pzfuvlHPtQMCWUD+8eKRgNw3c4HP1pyc+9NV\nCyknrQIZIweTp8uKdwy8UeTu68L04qTaEbaOlAxm3kHpUUjNH2yPWpv1pGztC4zSArq37zJO\nDTriTgY6mnSQ7VwwPsaV4x5YJPNMBi5ADDk1FJncSBUoZcNg1FGGbOeR2pAS+aCo6Z70mRg8\nVG0ajoMHvSq25SBTAmjVFU9CaiZt2SRgio1fGMZznmpmkDLtHUikAxnVuTnpSL0Vicjv7UyP\n+71xVnYfLJK8elAFZl3tgjj1pMiNiFGcjtSrN/DjI9qWNvLDFuD6UgGSNuVUx1pU/crgUu8f\nexxio2YP0pAIX8ybaPSg/LgKckd6DGVx296jhzJIyqMY71QDplMgGB061HEPLyznC9qVi4m5\n+5jmornBwFPFAEMkKyTbl4FPmjO0FeMdqfGwX+HmmSb2bAzSuAkqFoxgYNVWjCgv1q0rtyMZ\nFV5OXwooGMVcL6qaR1KrwO+adIHjYY5U9RSqw2kd6AK8i+nK1EihunX0q23ywkcYqFUT7wOG\n9KQityZCBwPemOuQR3qaTMjnjBpu3Cjbye9AFVlLKeMVQuI+PwrWkXI4GKqywiRcUIDgvEGh\nxXSsPKV0cYIbkflXyp8avg+3hyaLVNKhmayYN58bciNt3Ue3P6V9pXtqNpTqD2rlPEWgpqln\nPZyqGglBVwFB3A9quMrMiSPzpmXyZriIqcdqjK+bHgNgg16/8XvhLP4L1L7TZRmexugRGxGP\nLPZT/wDXryCZHgmkWRPLlJx7V1xlc5pRGybWcbewxx3rOmh3M7FiN3c1oPC0cYkDbuedvaor\njOB3B/WtTMy9jpMHCl4xycU26YzJxhQTkEdqtOXjVkUld3GfaqrRmBWEfK449M0wImzGUVXD\ntjAbFEsnl7Qoy+MN9abkqTuYbFHP1pjsqlB93cc0CKuTLlSrFs+lSuu1tpj3HHOR0qz57RqG\nGCuOOKqedO0hJ5BpgOizH85OaXnrjjqahZvIVtvzgt0oXzJNpViFB5X1piFuMBgUO1gecVE3\nysTndzmrSiPeGI6DBHXNV2VmYso+UHgGmFxZJHkKKv3xyKZC/lyycncetO3bAZPuk8DNLGx+\n/jdzgmmUBhWblMszGqaubYmOUFcZ+X1rQ842sitGB5inIBGaueZBqMDPcRoszdWUdKGJmJC2\ncg55GcUrR7oW8uTDBvumklxHIY9+cLy4/lTfLypOMZ/WgkFRJBsPzHqQOtNf92wy3DcDPanC\nM5Vo2CHGST6d6RWMxLgggHkUDGsp3lQd2OhFOk+Vl/jz1zSfOsZyOWPBqNmcIXwCE4Ge9BQ9\nsnfk/OelORxtRlPJ6mo9+4OeA2OpPT2pzfu/LUDg80CHyRBt678MBuLe1MZRInyH5e1MW5aS\nV1C8ngmmqhUkhu9MQ9g+5QrYxyaf5Z5ZnznkCo42bzMkZHcU6NtrMGHOenpQULJ6j06mmNll\nycBal87bGTjeM5xUe5WxkbM8gUiAjX5WxyfSlXcMJjnHLU2LDsBnBBp8mRICFyuc4zzQMbxG\nzKOfl/Wo2kSNULj73GMVLsLl1Ucg81FKNykkgL0A75pjGGNmUGNtqZqbJk6Haem3FMjXCKqt\nnnJp0qsZ0AYF85JHYUARt5hby8YxyR7U4L8oOcL1A9vSnzb/ADlkI4YY96jkh8vYA3Lc/Smh\niKN0hY7sHpjtThFtGwtg5zk0qOskm0H7vBFJs3S/MD9KNxBIcyKynI6GlVQpZSflNJsCrjHA\n5oj+Vd6gdfvGpAlZgqrtGGFVlURyZk5LH73vU7gsuU+UZ59agkJZioGccjNNAIx3Bm/jHH1p\n24/KWTk8E1FIzKowMHvinHDcbsfWqAHX/SFAJBHftj0p+U3SISN3rRyV29x+tRtg/wDLPGRi\npEHEi8YBXvTnk3Ltbr1HvSLGpByO3ApN+6RCkeFA5zQIGkG0gpn2pkp27FALSNycelOY/MG2\nsN1N+dW3R8sP5UIY5lQyAltw649KNrK3z4bPINMMmASnG7g57VIvI2hsnHSgRGoZWzuyaRnL\nqRs3Hv7U9GjfJdsRjv701WaNXRRt3dKBakm4NGu0bQBwRSeYVkUEbt3GaYu5QqMQOeRT955a\nPgg8ZHWi4CbiJOM7M49MU+SRWY+ij9aYx/eZOfm6ketP+WdeBtI4NBQis0asSgPpSb9r7VX5\nsZPpRJhdpVsqD81IufLJJ3DceR6UCDaUmyyj5sYz2olUZZS42+gpdueQd23kKaFHHUMre1DA\nYUKMF6hhjimNHyVbjbTwAGOSfl5pqxiRTknJOeKoocuM7o+GAyWo+9IWb8KR125RRjA5pZPu\nqYjn1HpU9QuJkNIDjBPBojz5rHYZM9qVWKtkqfShvmeMj+FuecUCCP5kOBtHJ2kYpF2My8HB\nHNE27znYncpPSmRtJNuAXy9vI96AHeWr5XmmSQ+XhWHHUYqYMrRgtkSelMVNkeTmUnkD09qB\n3EbbHjEZbHWkbLdeD2btTlkl84Arlcc+1DPuXbwRmmSIzDb8xHPenceXg8DHIprY3ZI4xgGm\nRKNp457LQMk8svGBtB+tCqvyleTmkDeZIR0JGD6ChiIYyg+8KBDWUM7HGVHehsNgIMD0pWkU\nLsAxxn8aapLPgHD479KAEeH51I5wd1SNGd3mk4yeAtNLBom+Uhh3BpfMDMoB2rjk460xirhp\nOu3bzuz1pm8eY/HJ7mnMwbIVhweDSNGJGJcZOO1AhGVJFjz8rHkfSnSKqHk7+PuihSqMC3BP\nAz2pjAtFkH73pSAaG/dbidoz0NOkYhlXbnIzmnRojxtuPGMU9VRYR3ZRimMbnLHa3QcY6fSm\nnAUEncf7uKSM7Rt5YHnaOtL987R93+dLqAm2ONt5BU+nYUSDdFnGRnsaVj5ewE7gy8g9qYx3\nfux+Y6UDHcqFY0SN/Eo+Wkjc7tgAbjvTfKPOWxkdPShh1F3CMoT3Oaf91mYrlDSchVBIak2H\nc3HuDQJiMSm3aeSfu+1PlZo8cZJ6UigLIWYbnIxTsHb8w354A9KQhP8AVr2JPO2klVdwbOTT\nVPlTDn5F45qUbd3A+9TAroyGQyY46Y7VNuAxlNrdaZtCsSpxTmULyXDAijqMazAKxI3ZPX0p\n5i2+WQBluufao2wyA5wM8GnNtUk78n2piGGQfOwOWB4GKe7FeQDx396VR8xZUxR+8aTaGG1j\nkigYikhQWO4n9KaqhMhm3HP3adIyq/zYCCkgYS/Mv1WgYvybwifKOuKJFDMRnL9/pSt8rhiu\n4+hqJ9yqxx1PWgkmWFX25yGxkemKRsL7HsRSR3CryWPIxyKHk+XdjIHagY4lGbeW+YjGDToc\nHBzz6VGcKyZQHdyD6+1DfLMu3g/ypAW5AGVu20ZPvVfJYI3G7oFx2pWuBtY5OM4qJJuS24Ad\nmpAPG0Mz4LKOPxpwPnJl1IbstM4RVJbljynf60rSSbcP19aYXG/J86uOaWNUMZwG/nSlo1+8\nCPamRyFeYzhiaAJl+VxxuwvFNgUx78kAsc4ph3edw3GOnqadE4aTLAjjmjcB+07fmGfrTIwC\nVJIG7j6GnXVxu2hBgdWzUPythT0z8uKBDsyeYw4VRxnFPGW2hFwvekdsgbzgZ4prBQ2N5C9S\nRQJstNHIkRyAE61WhkeRQNhyD0p/nb8JglPU0yN8MzK5AHRv6UdRjxukLc5U8H1FEkO0kbsF\nqZFcCLKjnf8Aeb0p4YNzkPjgUwGwR+W7AdAO1INxUs3PPpTY2LB8ZDZ6Uu3ODjOPegYqqfM3\nHbvxinLkkgnaegHpTGmUkbeQPb9Kezo0iYb6+1ACKGDMhO5cdqdMy/JtXj0HemtuPKsqDHQU\nke5WC7ct13UnqA6TBm4+UY5NJtbysE5U8g0Db8x+8B1+tO5jiQA5kJ/ACgQjM/VeVxjNDSR7\nl38jsPehgPLYmTPP8PQVDFGqyYB37uaB3JtyGT5enf8AwpPlYhlX5eaRmUsQvylaXIVQ2fmb\ng47e9MB3ngsSynFMVljUk9f1FOVSVO87iPSmq21S7DeW4xQA7dENzEkAilfcwBX5T3HbHrSS\nKW+clU2jp7USSBvlJ/dnj60CHuW4GeOxxxTZPmxkAMOB9ad5fzqVfccbR7U1VHllt+Nhxj1N\nBRI+xsBQd2Pmz3NMVg+SBhlGCKGkxgg7n/zxQylpEkUYI4YCkMF2IvTJ9aVIyillPHUqKYzG\nQjByM/dx0pwjGWYnH0oJYnG0t0Y9KWPepOBubHNMG5owx+Ug/lVmN3i4yWVhy+O1MRCybsn7\npXv60MHkQYA6jOPShsLINrblYZp+7KEK2D0AoGNZlO3jK57U7d5KnJ3DOdo9KarBI9oG1l5N\nIzEybMhjjNADyE4cZ2t/FSIp8sjooJPPen8+WSTwvp2qORUkZG3Z46epoGEy/Mu7rjNO27mZ\nS21R0x3pjNuzuPzAYI9qaqmRcg9eQKQh2XbILLTT8y5ZsMvYU+RmQRhF8tjyW6/hTGdWRieG\nz90daQh6/Mi5PzUbizMM52jvTY5cTKTwCMCpbdWkkHTlufamhibizKR8pHzbaP3e4EfNkflT\nGkJcqUyc9+//ANal8srISBw3RR2piBfmYDHHek+RZmXGM8D3pQuWLBtpxg08osi7R1Xr6mgB\njL5CZKZ/hGOxqePbtzt+bvQAGTGCQOn1qyq8xrGBz97NAxLeJdwwcMT0q5Z2j3LOXX5QcAep\nqT7G4dPLhyxONw5rsfDfh37RcK0iMgxyPeoGO8NeHN0yPL8rDpE4r1Dw/oJ8ss67g5546U7Q\n/D5kySoUDpkV3ml6WYVRIxytS2A3T9MVFRSOB3rp9Oscbcrx2NLZWDsqnGRnmtyCMoo4wKhs\n0sMht/lxt6GrUce5sAfN2pyndwFqxHCVbJFQ2URQwu3GO9WPLZjkHpUqrwQODUkeWbaBzUlE\nQiG3nmpoV2qKVY+oPWn8FV2/jQMdgDgDikWMkccGnsG3KAOKexKtjGKNwGfc75o2no1L97tT\n1A3ZbmgfmMXk8Cpkyhz0pygbuFwKcV45oC4xQXbJX8aXb8xyeKkz+7zSIvJJGaBDFTqRUkI2\nnJGGp0bBhzxUiqHBJOKAG8sMHuetIytjk1Jt3AD8ae0eeTzQBXZd3FP2496e0e5emDTo1K4F\nAEcMZPuKdIpLAdAafyo60vHHrSAr+SVk/nSlSxxjA9amK7WHNI2exoGJHFjk4zTzgqWbrS7Q\ny5zgilUKFG7mmIjWNfvDmpF3b8cAVIu3acJj3po+YZxzQAxmKtxyKXaMA+tP280n3eMfnQND\nlAXgHFKV3MD0prZ4yOM9qmxvqSiL7z80vIycZqSMAseOKdnbnHIpAQMwbHGDT26dOal2iROV\nCmmyJuI2+lNARq3qKc7ZHtSeWcD1p23cuBT6iEUDbkDmn7V8vOcGlC7VAA5p7gKPu0mMZGoV\neF3ZoICnoRS7hxxikkz+FIWohXbgjnNLtYgelEnyqABmhA7cdqYxyt1FOVlXr1pq5VuRmlPy\n89RQAnmBmI24pskbL8oOdw5FPVQ3IPFLsIOc/jSAzL6EeTjbt9altrG2UIY4VD4+961bkVXQ\nhvmpY4+BhSBQAoOMDHNNPyt0NWFVuABn+dJcKzcYwaXUCFW/c9CGp8aYXIPNOTg7TwKesYEe\nc4pgNWMsvNKVUDaR81SbdoA5zTGw0mc5FADVwo25zmgBVBUjmlMZ64xT2XcoY9aYEfHdqkYE\nooTp696BEm0nqTSwqcgUgG88LtOf71KzPwMVLIpWQdqVYmLbmbg0kHQYG3J15pVODkcetLEg\njLMeVzSx7ZMk0aiEkTcpOOaSMHucUvye9OXCg4FAxjH5sCnsw2l+9JxwWYU5drffXI7UBYRT\nuTIHNSYzHk8Gn+SFTKjim+XlfvfhQBJt3YyB9ad5YbJTkelLHGNvJ6UkcjDOAKYEZzg4GKe3\n3FJGaZuY8GnBmxg8UFIUN/dFSRssrYI+tIFBbHShlG7gY+lSxakyoY2yOlOj/eMcrin7t0Py\n8Gmp8zAfdNPoMTbtYAipGXGGqNpgz7ScGng7epyKVwJPMV1BcZ7Uqx7cYPvTH2yYwcVJHt3c\n8kCmARjzH7Ajmp2UTLtc/P61XjYJJnGPWp2O7kDNMBiq2AVOcGptxkbmo1zGCoGCaVSyEA81\nIFhZHgbI5HvS/LM+4d6I1MgOD17UzbtPPUUgJ1jCyEe3FIzbiuPl7GhmfardadJu6g470wJF\ncM2Cme2aeY1ZdpGBUPzbgQfl96czcgbvzouBNbq6ZTqvXNTs3y9OahjbdFgGhmOMKcnFIfQe\neVDkZ5xxUqxrLG27gj0qGGbcwPQelSB+pA49KVhEaqYcAnC9asq8bLll3CopIw0fDYNPVhnr\nwBTKJi3Qr6U1flVWJzz3qJJhgKRyTU/3myBhaAEVD5hYjipBMgfa68+tMWdhlSuBnrT5FDMD\n1NAE7bZE4HFM8vdzuwaWKQKdrcAc04kSL93r6UgsO8sGPrz3NL1Xb0HamL8oKinqvqaQC4HQ\njtTVZ1zjikX1JOc07jk0xCwt5akHNSw/Mu7pTVOQARnNP24Ugj8qY0PVsL93NM+9z0PpSQ5R\neODT9p5ytAx0aDd7daPKXeGU8Mfyp7Y2Y6Ejmq7ZSMeuakCYhZHIJwKVVVT1qOMfKTmnsd3o\ncUAJJHt+YnipFjj8nfnilRd0ed2R/doYDaB0BoAdCqtyeFpI1G4jPy0FRtGTgUqsNu0DJoBB\nnZjJyKkU7OQM5qFvm5zxS/ecLnn2oAfjLEY+bOae0bNJy3GOlM8sxyE5wfSnrlcZGfegZFGu\n1sHmplUKcgc1HyzfdwKkVtvWgGOx3xg0NJsYYGVPWhdwY5HFLt+Ug9KGITeMlfypF6Be9EYW\nTJzmkVh5hxwaAJcBVOaRvmXjgUwZlb5jQCc7CeaAFG1V9TTtmec4NIGRWwBk0/zBzxyKkAWQ\nBuBTWZvMKnkGnrsWMOaaJNuMjOaEPoRqqx7v0FSsxkUANxjpSY2sfWnRqVPWqEIrFcKetS8E\nEnAxUKkZPy596eyq0ZBHPWkA51LAFRjNIFZBjtUE0Em5ZI5ii45X1qbbIqDc1AxGHzetPK9x\n0pFXbhuq0u4svHSlYAA2nPYUrdcgYzSEEqMDjvTwcuB0WkAD7v8AOnbi2AV4oC/MRjikL7lx\nmqEOCFmxnihlMkh/Smqm3oakZec96VwBR1FId2Bjn2pWX5Sc803a/lgA80XGPRjyCKdGSxJP\n5VD5jxr8wqVZgeQMGmMk4KH1NJtAXB6U1W+Ulhg04ZVN56UhDiy7cY+lOVR+NR/xcjipFyFY\njvTEM+UMSeKl3ZXg4pmBt65NG7CnrSGSR4Xqd1Iv8TAcVW/eoCWPHpUykbQB3HNAC+Yf7tFJ\ntHrRTuOxXmjPqRmniMKoUdaY7Ozcc/Wm+c6yjcMjtUisP2lsDHKmpA3ZhjHNCzK/BG04poYM\nxIHAoCw9cqh7Z7UixrySOQKjE37zDDC0+Q88Hin0EIu2SLa3WkboFzjFIrKvUjmnSKF6nJo3\nAibO456U6OQKDjGScmmYaTgc4pxtyCDke+KoCKZ/NxtOB3FSbh5YJOStIyJuOCM03b8+KkB/\nnMvOPlPcVMsmVAzyagZvJUZGVzUiSFeduV96QwfCynIyOxpjAMpJyfenGQelRQyOqkMT14pi\nHxyDb607eFTBOc0IoGQx+tQs4Xk9AeKYx7dMdPpTmXcvJo3DaHHIprXGDwKQmNVOQQfrT2VV\nxz+FNWZmzlcUiwszlyeKBEjsOSoxUapuXBNStkYwMikYYO7v6CgY6JY/Lw/GKhkYNlVwRnmk\n3FmJPSl2Ko6c0gEOIeeoqRdrYzxUS8sNzfLT2UuuQeM0wFaQMNuKYz7VI4BokjY8JTAoUAsc\nnvRcRJGwzudQfeiRl5JOB2FCr5ikY702RQqgqMn0pAIkagckk9aFcc7sn2pjF2GRwaGYKqkn\nJ70AIs27Khtpp31OabIo8wHHFPRCy7iMYpghkkZYAA0i5wWHXpTvul+aT7nPakxjZmZfujbn\n0piyMAQ1TbhIgy1QNmTIAwc1I2PhlO3ax47U9mKg8ZPemRx7VDHrUmCuVPXrVB0IxcBSAelS\ns3THeozD8wOM06NguSRn2pCFmkPl7QPxpjAsq7vSnMwZdvRhTC3mKQvJpiI2jXaQp5qRYyqA\ndhzSKoVMEc1IG8wHkAVPUCnNlmLDgUqnbCSozUs0fysM4pm0xx8dKBXCPJHzjj2pNg8zcDge\nlO+ZlB6DPNJ1YnHFMoQMokIx83rRlmQ5YjNOjjLMT0705k+WkBCqKuAhxzzSzR7mPNBKxkio\nVmLcdxTARVLsVBwPekdDF8pPB707cMgg8elMmbzY2THfikISSRmZAVxjinKrRIwHQ9+9JgKo\n3HJokztwOBTAiVj3NLhWUnGKWFQ+4dxTWbb8retICqNzScnoanlkDIR0IFOO0tgDnvTZVAVj\nnjGTSGQYZk64GOajX5VBzk1aiyLbpnd3qnMC/wAopgTBh5Z71XUDdwM0p/doBToyNvXDUDEk\nj4x/D61UkKxsPWrEm5uM4WopoQMHNILEIzJniiFCjc8jpUsYbOBwKZtfeRuBNAhlx8o29vao\nFGFODxVhgXGccUxlwOPyoGUZLfzFJI+asq4tdrHIytdFJtmIx8tUb2164OaAZ534v8NQa1Yz\nRSW6zKRwrDIGORXyZ8W/hG+mzNeWgZk3HfG3VfofSvuZo8xshXtXJeLvB9trVk0U9ukocHOR\n7H/GqjKxnKN0fnPcW0sGYgCDk844PrVWNyEViQQTkL7V9DfFH4LparJe6ZG4YD5kPVfp7V8+\n6tay6bcNHIp3nnpiu2ElJHLKLRBs8yRj13dM9jVSaOVoygA+Xt2q0lwPso6ZIzv/AKVDvB2c\nltx4atkQUm29DHlW+8Peoiob93t5B79qvzQszME7jcM1VmjZioHD96kCkd0cBj3fdORSAblO\n04dT3p7ZC5P3jULEiRi7bcc1YmQhTG5XOGxQm1VwST3xT5F+1Yb7pHVvX2pWh8tQyck9fpTF\n0F8xuPLAI/u0jNncc7Qen1qBcqvyD5+tTKy+Scp+B9aAEkYMFV16daFyzZXAjPFOwWXHXI6m\nmDHzBvu+o7UDE2DYwPL9j7VHt8uQlOF29M09VKrjGW7Gm7WYMrDB65phuROqsxcnAbt6Gho2\nbHPPenKpcAhs46mlZdvyocRscbj1zQIr7XVIjjjHzCkU7VATAZjyMdqkZm3ZBzxg4pGy0iuR\ngfdzQMbIHRMZ3HNMDqvyy8Ljge9TKpWbYx3dyv8AWoXI27jyN3SgWoyKMLIRjcrcmpY49u5c\nkuRkc9BRkbhJG2P4TTFlaPcAuB0LUBqLFhcjPbg+pqKZWijMe35t2S2alVlcqEcFRzzxSyRi\ndi2eSdwwaYDWmEHJGePyrZs7y0Nn++izLtwc9TWHHH8rseWJqULu6cJ0oe4CtiOPaBtRj09q\nSYKVRk4AOPrTvkRsHkDrzUMe4zZxhR93PakMGwSwDe/0odgqksxBU4B9aXau1jxlj8xFNkUq\nc53fT0piYvO5WjJHr7mieMqpc/Lu655xTWuUjjCgFAD0qwz7ehJJHHtSGV1UD7z4wOcUijy5\nGA6KcZ96aq4jIPIJ605ozHgrk/3vSqQD4ZhJ/Fu2/pSSN0XdwOjUKBuAVQD6j0pDGA277vPf\nvSBD4yqjIIznlqZ5g2v/ABueAaYzYY4GB2NEKZADNu3daYyTzhCiAryevPApjszgJ/Du4wKQ\nL5yhRwM96R2ZTnoRxtFLqBJklXCtyvJpu48nGdvQ1EVyy4fBJ+Y1LvCcI2d3O2mLqRsw5G4b\njzUm75W3KOOPr71H5Y8x3Y5O38qI18xd7Hc+OnoKQuorL/pCrnJYZ3UhyzZA+UcAmneYPTkD\nigKe33SOlAxMlXznIFJ5byMPm2nqBj9Kd91QBgfWmRzBuWbDoeKBC+cjRnBwV4NM3Bl3oxD/\nANz1pWwWwF4Y5pdoZg2RkcEYoARlGc7eT1FOZVI3Me3TvTVZirlflIPSk3DcA/DmgBsalfmx\ngAdT1qRVPBPKHk5pGxuGW5HNKrGSQjIBY4J9BQAqsoVwfu9nqNpFaPKk7gcDP86UL+6O4YbO\nCPUVHs2qFUd+PahCJn4ZcD5O/wBaf91hjkMetRmXao4+UkcCnzYVCM4B6H3pjI9pW4Y7cgfr\nR5gWNjgn2UUjNtQrzkdRT1+Vl5+TFFwGiUrHnZntimqxVSH5ft6VIrFWGRuY8e1JvO8E4IB7\nUAEkiumE+8fvCnOgVQUOD3GeaikbZLkrs3dqmKJLHlgQ3ZhQIibd5mSC39KVv3bgpgkDoKXc\nI2OSTxTfLETZQ8kbvwpDDcT1cDvzSbRkncKWTY0bfKC33s9qRVMI2lQd3UUALtKyDHKsKVh+\n6Q7unLKPSmxq3mBcYprFFfAZi47djTAe+SuSApB45ofdHKzBtg6FTTXUeWo/1jZzUrMhZuNx\nPJB5oGRbmZsK/P8AdpNodhhNw/2aRUd1YgqHB/SlZmXO35UxTEKdqxtubC9qF+6OzYzTlj82\nEZOZeufUUwffwuFYdQaQCL3I5XvTf+WJI+Uj1704sAHHIFAYhQOGAGcUCIwzyx7gM+5p7FpM\nbVw4GacybnG1hEhGc9qb1YBwc9M9qLDEkaRcHgj+6BSG49Pud8inHcmUT5k9ajVAzKxO89Ct\nA+gjKkcY4yWPG01LGw8vPZTjNSu0cqqcCMrwQahby5G4+VOuKNRCRzGTJcjg8DFP2GPC7uSO\nlA+chWQDvRJKWVRkHtx2FAIbGT/GpwOM0/d83C8dd1NdsKQDuHekUkw8DB67W70xCRyuZPlQ\nkE9fSlJ2yEE5fPGKUN8oJ+Vj/DQfl6nlvTrQA11Y4YnkDBFHKqqIRuzlmNKzEtnnGMEUbk8s\nHBwDzilcYku1mCfd5zmmuojUMgOG7Gn8lmw3J6A01hJ5akgHaMY96QAIwzD5tp9Kc2dvJ2jO\nM03cisd3BzxSMrZ2KNozn5ucUwZKnzKXyARx9aSL5m3bsHHSho2Od+CM8D1pAQyMqD5uuf6U\ntRCyKpcANkdcd6bI+1lJ5B4GKcGEahyMseKQSFVOBuX+VPYBrSBWIAA9c96RlyeRtPajcGfD\nLuPWneczRt8u/HQd6QDJM4AK0u1oVYuvHY96d/r9oztOPm+lCjfn5vmHCn2pgIu9sEyYGOVp\nXj8vDLzz+NQFWib1UdqmYlsMMofXFAxo/eI0jYAzgZHWnRyqsgcgbcY44/KlWQbSoG7uaaF2\nqOO+QfSi4DpY/LYmI5GMkt/KmcBSQTtbgD0p+795tHU8nPek3IkjMy4GMBaYg8o/KC2dooYw\n7C2GLYxx0qPcvyq2ealT5WwR8p9KXUBvMoTbgleg9KdxhmUHzD2NJGv2djhs85GaGf5c5yc8\n4pgNbMjDJ2n7u33p/k7lKsoG2m+YGw33fTIpxXapYtuB60ARlUUbn5bs3WnltygK22TGMUo3\nRthU+UjrSKowAV78VIXDaFEalvvfzqDcTMVHyk8VYG1o1DDGG+7/AFpZF2vuxgKOPemMZzEw\nyCw9aesu3jHyE1GJN75X05FNG9o9o4Oc80gJZCpXLNtcHke1NWRHLsBx1zRIWx8y5YjBNHLS\nqmNqKM/WmIVWEeF2hg59c0q/MzcAKDjmkbJc4IUZyD6U18Dn+LtQMcqt5h3GkUMqbV55yVoy\nzYIO8nv6UMvyElzHz94DNCELISynOFB9KVVWHAU5PUmhcN9P7rUn3mJC57YpgIGLZdQeTjFP\nCleoyg6LTQ21tw+ZRxxT5G3LjHzZzQAJ/rCWXntSzJGNqswDZzgVEWVmwGyw5pybMjcvzNwT\nQMVfKbgnGD1NOTbv3O3GeMUm50UALujI5OOlM+Z1AIDDPDCkIl2IvmbR15NKpWNQX647Go/O\nSYFWBRl6gUiuJD02nHBpjD91JCVUbWPelDMFCoowo5Peo/MUqS4J9TikkUyfKWIiIySODQIV\nW3SbsbVxz61M6BtrZ2p3PrVaGRtpjAymOPWrMbF12nlFH5mgY0lfLLA8Z6Yp26MbcFvmGMUx\nWC72ZTjPFKp/eKSAfSgQEKNpZSxYdaa4wCCcc9afuAhOX53YFIrAKEPPP3qAEVwyoF475p6M\ni7gcnnIPbNNjj3MV25HTNMjYtlEbleMYpDLCsucD73UjsaAzxwkucjd0FMCkK46kLk5pWUbU\nUvjjNMLjmys2cALjOaYuFkJ3bh1x2ohkYyM7L+6xtH19aVFDAgLtI6mgQjSfxEYNNLeVICH4\nxyKflpGGApVevvTdpZs+WSueDnpQMbHlVABBOMlv6UoVmXIIYZzT8DaGcblXjij5BnC7V65p\nADFZBnn0IpuwKyhgd2eG9qk3KsTsG6HFJMxk2jqMUwHfLGrHPGe1RSL91jgHOd3pU8UYjtzs\nbee+ajJTYWPLehpAIy+Ym5iNw5z6+1K+7Yh6Y5GPWkb5tvOcjij/AFYyrb0zzto1EOlO6ZVC\n4IXO7NRxqFl+YYBGc07aHm5BOORzRKyyTAHIyOtIBm0SSZ3EL2OOlWFU7RgZx1YcVG2JMBSP\nT8aXd1OSCBjHvQMe0i7mIxlV4zUcTExrJv3HGcYqP5mjXj3Y4qeGFFBbaRnvnigBP+Phw4GF\n9Md6sR27MzY4PrToUIj+VN2Oxq5awmRWbnaBnd2+lUIZBb7EyeHHRT/OrVlZtdSKVXErdM9/\nep7HSp9QmQOmN/8AKvQPDfhPMu0Q42rjd6VA0QeFfDKiN/NYk/eyBXpGh+GVVdsS/u+vvV3R\nvDioqJtO3OeB2rudK0FYyu0YU8gYqHuXYg0zRVWLaOT0NdPY6eV28basWmmLCTxx1zita2tz\nnBHHaoYcpUjgKcLwKtrE3Azx1qwsLNxip/L/AHfNSaJEdvCFyVHerEcbO3Jx7U+PacAcVMsX\n8XakWQeWd554qVIdvzA/NU3ljjjApyqqybscUgGCMMc0vl7TwKf/ABHGKN3zbcc0AJ5W3kmh\nV9etP2/Luzx70inOe5pDQbdwOeKcrDbgjNO27iCSKkjjU/e/CmIYzfMD2p4XzOc8UjZp0WOR\nmkA1o+/anAEsMdKfjdx0HrShewoGNaMjBHHNP4wQw+lOZehNO7ccmkIaV2j0pVyyHnBodQ3f\nFOXaeg47mi47CKp9aFU7+tSr90YpvPPOKBg3oFwaXB4bbT41DdTzQVZlODxTCwxVGc/nSMqm\nTjkU4KV4HPrTkj5AHWkHQZtyORj6U5kwvA+X360bTuxnvUqxszNnkDtQKxG5PljHNOVSy5qb\naFXj0pqnfkjigLEch2cjpSKpmGeKm2jjcOKPJCgsG/Ci4DOFXaKcF+XrzUixgx9aM7Y8hdxp\nFDVG0ZFKrZ4xzSthk96Rs8Y60AAxgbQfxpyqDuHc0jBsgCpFXZgkUAQ7CuATzR/FjHzVZMYd\ntwNRMpOQvBpgEf3eKjLMWKnmnxq3RhtqXyx5gIPFBJBwq5JxTGycZ5FTtGGYLt4zStDtOAeK\nQXId2CPSpGA47Zp3ljIGKXaAwz0oGMZf7p+tN2mrQKbgvSo9o8wgDOelAxv8SoRx6094jwqj\nIqRY84H8VKR5jcHGKAKnklWOfWphv5CjinqxUcj8ad/DyM5oAijyJBuBAqVoxvJPJpyrhQet\nD5OStAEbRsFzSRszcHgClZn24BzTlA2Z5oAU7z07UxcnPGTUzbWUc0xWOMLwaXUAVWztzmka\nMkZHrSBW3AZwetTc424/GmMaAFHShVIQt0qTb+7Kjmm7hxuBxQIGUMuScmmJ97DcDtU6r8xz\njFO+Tadw57UFEbptQEgbTUe3ABAxU7SCTahHApm0OSR90UupNgRdq+ppzZHvTtu7BxikVtuf\n4jTGM2Lgcc07am7pilEmzPFPA3rz0pdRgrMFOBxTTGOp5zTyvy8qR7VIqrtA7UEjWjVXBBOM\nUzZufJyBUoZTkUz7zEE4pMoPlX5hRIrSLlRzRjapFSI2IxjrVEkatt6DpU+4NJkLkjtUR+TI\nHeplPoOTSsAisyTEkYB7U9fm+bmo2kKKQy1JatmM55xSKJBsZvmXHvSSKFIwOBSKC43Hj2py\nruYAnAqgFRlXB206RPL/AHgHFRurDO3kVMJN0I/lQBJGqyJk8E01SYm65FLbrujOTj0pphZc\n7TnFSBMvzDOaVlIYEnio1ztGeDUsjBl560DZJtMWWDYpVxMpOefWo/ux4I3UQY2+3eggsx/P\nHtz0qrdXMiTeWRxiplk8snFNmUSSB26ngUiiZbgLEqsvNEUZkyzcc8VE5ZWUMM8VZVlZME7R\niiwCLMYpBgfLVrhmDggH+7VCOUZKn161aAVcMOaVxj2I6gflTow5k+VttN48kkdc02BvOyAc\nNTAuKCucjJHrRCu4ZC5qNpCpAJ47mpI2Kj5TQGorIZG4+VvSnR7t2D070JLtyW5I70FfNXcr\nYoGSElskDjoKlXlQSPmqG2fzoyACOfzp6t8xBGQKAF4ZiSMU9XwpwfzqGFgzngg56U+Rdpzw\nVpMBWmEEgRhlmGRirKttQt0quEyu8Dc3vTyrSY/u5pAS/eXd39KZuDKc/LTlKqp6g1Evz5zx\nVAOTIkDZJFTsxUgY96YAFUAjPpUp+ZQOvFACoRnJHFK0zbsgUxd+4LikYO3KnHrR1GStJlTx\nyaX5cbXqIsY2x14p43KucUgHKqqcZyBzTm2HkHGeagiY7sk5qXcO4+lKwEgUryO9LlW4B5qM\nTYkAIyKerHd0oAVkMuA3FPVhkADgUi5ydwxQzDO1Rg0AL5W0bwM+1LHk9TgUMSuME1GG8zGR\ng57UDZMVw4HT3pyZ5y34VExOScnApyyDv3oELuPqaGXowNOLKyE5xUauqsPRulADlmLJk9qk\nWQyJnim7PlIzgGmhvJXAGaACJPLU54HWpDHlgwqJdxU7h1pQ20Dk0CFVg4J285wKVV2ybgOR\n60qsw5xx1py5bLE8H0pDGvCGyx4NSRqAcjnjGKYFyfvZ9qk6fd4JqQEGMYPQUM2SrcEChhg4\n60xlJ6cCmMexLcjrTtpwSfSo4dy4J5odXViQd1BRKg+XmkXaASBSJkAZFO6ZHSgAT5lOOD70\nkknlxljz6ihgWXjjvxSZHmYIyCKZI2OTzIQclQe1Ob7n1o/DgUrMRgEY70MBIWLZUjGKTzBx\n978KUoWyRUUgkZSy9MYFSMso0obJGRjgClXpk5Gay5Wu4wEVj5nXNX7Zp9oMgAPoaYiwFxjD\nc1IvzcGo1wckntSb/lz0NMCRhngGj/loCT0qPce3T3p64Y8/mKLgKuJMnr7VJxxxg0zb1AOK\ncvC4akMVvnyaXd8uM8Uit8o9ak8sdaBdRituypJzT0UqoVmyfajjd6UqqNx5zT3GC4Ck/pS7\ntq7sUiuMkbefWkZdwJJqRDizN1pU3bhSKo7c1C9x5chByeOMUwLPPpRVD7c3oaKLBqT8HBxz\nTJrYycqcU6Q4UEflS+ZswT0oAYIyn3jSxnGR2NKy/aPmBwR2piFudy4IoANom4Yjap7VLKyi\nPAFRwxhoySMc80nG47jle2KQAqiSMNt4PrT422kKOQKimfywFGTmmK23uQaBFlpNrccGkjjK\ntlm602ONsKc5JNPkVth3HkGmAyS1Gc5xSbfLXDHmnMp+Vqe+JIxu5qgI8BgqntUm0bSW+7TP\nkVhg5pwztwTxUsCNhtkHy53DtSnIOMcUMw6AZqRiAgA60ICs25V+tHl5xnBFPkUYPzc0gxHh\ncbhTGDLhCqjjrSRqTyRniiSRlGQMChZgigZpdRMU/MMjpQG7D0ppHmd8CpViEae5qhDAGVAS\naWR+gI5pdu4nnNMOFUnOTUjG7gzkD0poZ2X5jkZqFvM3BkX5ScE1IVK9uKQEn3cdxSLNkEYx\nzTVfgsBmpCwcAqPSqEP3FUAQDd71CW4Axk55pzybm4GDUYHmMSDjmkMm83apAOCKa0hZMsdp\nxwabMm0dQfemMztjBGPSgBgmxH147mnlQzDutQswHyleM07b8vB25pAKzbSU5J7UNMVXZnjp\nUhXoRycVEsYaTkZoAljUfdbqe9JNHwVJpykNndyKYGXnJyfehgQrDtXcxxzT4+JNynNPYqyk\nGm+T/d5NIB0hEa5IqMMWO4c/Wn49eRTMlW6cUAPRirDJ60Fhk8Um0bhmlkj5yDgd6kCJpMt7\nmkVhGwx171E2IdxdunSo7a6W4jZguDnHNUgLe48+lJHtKsAOaYmWGWOMU5ZFXIHcUmA3cZPw\np7XCrFg0zG1Tj71MaMzIQ3akKxEJGzyeKlRl2nmo32xrgmnJGPLIHLe9UMmVsR0kjN5YA61E\nW7dKm2llPr6UwK+Rkg8mkWSONvqPSnkBZMgAsaSSMScKRkdakBioJGLBcL0FJMu3AP51b+WF\nAw7Cqkz+Z855pANyN3zUNls7mwPamCNt2WPAp1wRtJHFMCKM7X4PNW3hUfMRuJFUIWLMCDV6\n4dtoGccUgKu3ywWPWk3cAMMUrfdGTmmyTBUAYc9qoCKS4EeQelLbbGjzjk03y/MYAjNJL8qk\nKcEHtStqBFNhVIK5IqKPc2SOnpUzvu/GjyWXkGhgG1Pp7VHcR5wRjFOKkkk9RUcnYA0ASqqN\nGFPBqo0QikLMfpVh8qoIb8Khmk343L8tIAhcdMDk0jLH5hNBjLgFRioGfMhAGRTGNdSc4Gaj\n28HjNWFbCkDimlhu4HagGZ1xbjBOcHrVKaHfwemK2ZIw/wCVUpowJOBSEcRrHh9brflFbjv3\nr57+M/wdF5G15ptrvuW++ka88Amvqy6h3DhPlzXO6xpYuFPG30OK0hJxZMo3R+bGq6FNplzK\nJTsCcEeh7g1mLNtUsPlC9c19gfFj4I22sxvf2MYjvwpO1QAr/X3/AMa+V/EHh2bR7trW5t2g\nlVuV75+ldUZXOaUGjImmyyFWyVGDUduBHIcnknljRIypIFKktjk+9N3AMByMnG70rUyEvId1\nwqDB285Hes66jbzhvXG4/drT3eSwAOBjjmoLzZM0bZPmA4PFUIoH91C6n5wpyMdqlnheztI5\nXdTHKePxpjqY5Mggb+1SR7PszpMS6jop7UxFJduwgjYmeHp8XzQjGcdc1FJtaMDB65UelTFz\ngchU24qkA1nVsqpOOvHWo1myp2dDxg9akb52i2YHHOajbK8uNvOBx60gHSSM3yjh8YzRukUs\nGG4DimKQpKEYlXg5701JHG7jJPUntTKLDMAjFQMYz9KrKQV+c4GalVf4WbqKj/1O75N2T+VA\nh62wMe+M47fh60xkbCx8beufWpYpjt5XYvT/AOtTJlbdxuVgM49qBlf7k5JGWxj8KReuQM9t\ntPWFYmUIdwPUmo5iI/lzjcM5qgG89Fx7j0pnmHYuThScZNTqowrnmP1qJozJJhTkDp7UibiN\n5fTbyDjcKdFuWQf3QeabGpMnznKgfrShiIWAbDnpQUEkm6chTgdR6U5VKttPf5jUYydrKpOR\nhh70qwtzufDmgTHRgrkkKTmnGSPcfM+YnjI6Uzy2VshtxC8ikUmHaNoYdfpTDoLJHGvGw468\nGkjiYRlgdidqlDBmJxuPXFMY/dUkqDyfakIgZZFYbiME07bnOWbjn6+1OmYMoRRwejU3a8bA\nZy3GDQIPuZ4yOv0pN2ehwepoZGbLA85xikkJ3A9x2pj6CkRmT5tw3EfNRJHtLgAnAyKYWDbC\nfm+bp6VJktJgNh84+buKLAQwyptUnO7PWns0ayK4csTz0pfJWNsNgYOetMZNysdwQf3aQCgi\nRvnUhaRVPzhBjngnvTh8sfB+XGQ1Jk/K4cgMcZxQMRQu75u3XHrQqqSMN8w6U1lMbHf97P6U\n122thVwT/HTGTSSp5m0nJxz70wMPOx0OO1D7XYEYIxz65pGDfIGG0UdRaiKHRzlgvtSou1SQ\n2T157U54t0m12U45wKiaNvLKZJUmkIkX5JD5i7h1pJGjkJGzYxHBxS+Zt37lLYHFOdd0fCjj\nn3pARNnywr5VsYHFLI+3Hy4bgdOtJtAUMzsDnnJpWVZMMGYqD19TTQhqthiJDjPRqkIVlIOG\nfqG701dshUOMsRnaabsJkOG246j2qhj4trqTjkc59cdqjeZdpkEXJPanbf3QUYUg5/Cj5lbI\nG5G9OlJgRnDMuc5xT9/zbW5PtTY+/G85oVm8okLuOfxqQEWXyWC/xdqRW2hmYcg8UrKFkGcb\nvrSpETkl85NUAke7qv6U5pkkAB4Tuw9aFQ+YwVgij9fakCqYcRgoQctxTAnWMMpK5YY4NNVu\neF2jGCPU1b0rDt5Zbndwvc1YvtL2HcThs5Ax0qQMeQBQFzuI5K+lWYwjZQvgHpTGAWUhlIc9\nc9xUkFs0gygwuc802DIrhRD93JA6tSLIWxtXOR941bkG6MBACc+vWopd0MmGAJxnApAVVTcu\nzGw56HvRISyNnjn7/wDSpJZWm5wcj1FNGzaY3JQ54HrVACyFYy2N3GMURsRIcpvHamN+6XBb\n2pNjKxYHAJ6CpAcyqG5zz/FSIwjy2Mk8EUsau5I2fJ1NMWRXRiRwO9UAfNG27bkHjb6UoDLn\njPel3blXa3JHPFJ91SgJB9akYrSEseenA9qQKY4yWXc4P3u9DrwePlPWowjJtO7C9KdxEu5m\ncMFyCPumkVkjySPmP8NNyyShNxKn+IUrRskhPUdjRqIcqqCwHCnnmk2lsn7qUSZViAyOSM7C\neacjLLjdGVGORQMauT8oG1c4yaBGPukY2n8aGj8tQY/mz/DQyiX1WXPQ0wGsp2tlflB5NIr7\nnVVX2DU6beNpXhe4pku7Y+0ZPUMKQhdwRmxwfu496RVIYuOHAxtpZMhUUjDEZz70SK20sOWI\n5oKQqqGIUttJ5x2pCzhmVhu91pGXMIbBp8fzSDBwMUxDVzJjkEDtTiflIK9OlM34EgK/N2xQ\nGLR5ZfmHHFACq+3JJ+WjJ5CHBxkqabuO4bMKAMmmrzkg7O//ANakIa259roOSfyqx8rMr+Zg\nj7y470gztK5wW5ApqHC7cEgdaQDuHKsOCDgGlwWlk4ypGAPemEruIxsHXPWhHK5Jzg9F9aYB\nGv7zeZPcLSr944IT5uR61FtCsV3cHkjsKXjkv0B3ZoAfIWKnpGQe9HMIJC7lPPHeh1ExGOWb\nnax7URrlQofPPAFACJtaRiQyYFP27IyThc/w96Y+Y/nV884Jo3KxyT24wO9AIau13G35QO9S\nIm5Cy4LZ6U35toDED0GKjbAkwBsPXjoaSGP2lmManMjHhakkDjajEA45x60zaUIOMHqGprbn\nbgEbf1pgHlmP7n3up96VgZMBeMnPNPjYspctt9Rjmodu7LbvlBpDH/eb5xtI4zTpl3RgZUnu\nRTVxJu53PjhaAodQoPQ4OaYhSwbB6DGDSspRSQ2MdM96a8YXcccf3T/OhVEj4LHbjI96QhW3\nMoLD58dqZyCvAU55pZIywDhuV4xTS4ySRz/WqAljl3FgwDdenakZ0VBh+M80yRnkjCKuGPUi\nmtEGXyx8wxy1AEgYqoYncCeKcu5uQcIp5zUdvKpUDGccdOKkXDTZLYHYLQMk8xJOUG0dt1Q+\nYGjZ+uOMd6RmbJAwB/e70nIOAvOOtLUCRWAjU5245+XvQpZ5DlSMjIpFw2AD2pY97ZAYbsUg\nGhm3Yzu/2WHNKwG7b1IOfpSzMybM44796bFvVi7HHHK0+ggXdu3wnH1/WlILebtZQCvpSGQq\n2ANoYZpkMLtv5waAJcNsUqRhRgD19aam5sqvPqaV43LZIzH147GgLuUsM5pgK6llA/hU9R3p\nBGRlQ3y5zio1dmiCj5cnFLwHDLn0BoGPWNPM3EkjGNuaRm+VSpyyngGmsqq3yrtAGfxpyyeY\nmWG31pALtG7zlI3HrHRGOkhwHznbQqqygruHrmhtu0NjA6UALuLQySq2AB070yHJi4PelMag\nYVs7uKbtUYQN1OePSmAsR3OzH8TTl+aTqFGOKQwl0JjI2ZxzSCM8BuMUuoC+WsbMzDI9jTt2\nxcsOf6UrfMoGMDPNRMQzfKT14JpgIoYqAvBJ4PfFTxqzYQcEHNM3f6RjdzjtQ0xVfl+9nrQI\nIpHjVkOH7DjvS/MwBJCD175pysGX5XxnrmneUv3mw4/u0DEbaxChQO5pN46Zwc0YC5ZeRSbt\njHeOG7igQLLtZiMqOu7+lLGRuDOuFPpxQy7txJwAOPam+XllLnK7fzNIYCUMzKVIX1pzSO3O\n1V9GoCjhQMCmKpjfaDnHrTBkuVXG1sk+vam4AyuThurUNtjmQhCd3X0pFfax3LkAZ2+tBIbl\n3hxHsVeODT/M2udnGec0iIFYKxyW+Ye1MkZmZmPAHX3oGPTaznLZBHIFObG3DfL6UyNhJlkX\naCPzpf8AWSKRxjgg0DFZVERUkEsc8etRnLKvt1NPWMj94B0PGaWQqwK7SGPP1pXF1GSMYYlW\nPqx60rZkO1hlem/3pqoY93Bz0HfFETN5JiGSd3PFIZKmVZRjd2qLcHkAjGzuRT9zKwA6+lJH\nLnDhcgjjHXFO4A77vnxlumRS7wMKOm3rjvTIQWV8N82eFNP2leVPz45FMAkYKqKRhj2X+dS8\ntGw5wo+761HuMZzGvJHINSLuOc8HGeKBCYMkeY5AoUZPtVuG0/eKh/eDG7PvUMcQaMmNec8+\n/wBau29vJ5IkyS3QYpAOVdylW4DdAPStzRdLmvtiIrLGOOB1rQ8O+F5ZUjneLczHjvivT/DX\nhd41XMQGTkkDFS2MxtF8LgRx7Y9rr3Neh6J4fVCrplgRgrWnpXh3YqrsAbOSa7HT9ICogVMe\noqOYpIpaXpGYeRtOa6TTNOKsO3vU1tp+2QAcL6VsR25XgDipNEtBn2XaoTcCKsQ5VenApqxs\n/arUUeeDwcVLGMhQqd2al2mRenNOMe4YHBqSOPaoZhkUi0MhjT+Lg04A49RUqIpUkLRwvPSp\nGKZAxC47U7ZtXJ/GmbgcE+tT7Cx5oAh2fL6NUix4HvUnl5604R+9SMg8tn6cU4Rlfr3qb/Vg\n8Z9KXbvOcYNAvQakYY5PGKkcfhmnbVX7zUjKZDweKbCxCseOvNSww/N7etOKjjtUnAjx2pjs\nJtXbg84pQu4gBcD1ojAVuvWpN2W4/lQIZtb7o5pVU9ccU4KVOQDTlzIvy8DvUsCHd1GKkhU+\nWwI4qVI/LBPWkZyV2jH0pANKkJnpQuWXOKGY8Ajipo2DJgigBjRsF3KKNx+UNwDxxTto6ZxT\n4V9Rn0oBERXa2DwOlOXCE5ODT+/zCjYHI4zQMj2/KWPrUqgqQc05XDNtxxTxCWzzxTAhaPbz\nnvSttUfL0pVXLEsOlSR4Ocj8KGMhKhsLmnCMbiG4FSRxoBwdxomw3DcD1pEjI492TngU9Uyp\nAODUUSFc7X4FX8L5O7Pz0xlSRQqjnafSmhSHGasBVPU5NDQmT7vB9aljIPLbcecVJ8xXaRk0\n4IcZPJ70+NWbLYxTAjjXaP6UbmVuBxT1x3GD70ZZgQo49aAGHdzjrTY42YZzmpVXavznmjmN\nioHWgAjhZ+BSsoUnvTow8fy4P1qRYwBjGSaBETIFVRj5jTDG3IPNWFQ7uetOZe/Q5oHYgKlf\n4ecUkcJb5u9WGV2UjpSq3QEYNAFf5t20cnvTlUZJxT2jKyggZzTnyy4XrnoKXUY2OMNGc0rK\nCgGcCnLH0HpS+XuUkcmmKwgjXdhTxik2j7ooRQOT174pwGV4GKQWIFjKvgjJqRsjj8BT1Xd0\n6+tK1uduWNMLEDxnbyuB608RoFGOadLlfl6r3pyqNuFpDQ2QbecZpFG7qakXGTuNJxs+X9aY\nMSMHdil2kNyOKeoLY9RUhzxkcmgTI+F6rkUi/vBnGBT2Y7SDQq/KFzxSAZ5eV6496I4Rtxnv\nmpvunGRikkUnkUuo0xjRhc4NNRe4HNB3dhUsY24JODVDImjZcAjFPMR29MgVYaTamGGTUIlL\ncE4FSwHN++6ZyBUbSFRgDNS8hhjgd6i484kdDVAEYyp7c0rYZxkc0/yxt6806Jd3Uc1LAiYY\nYnOBTkUjnqPQUk0Jf8+1OZWhwR+VAEbMQeVIOanwOCaYZC55WlyW4pASphmOcYohjIzzj6Um\nwKOaepOcg8UAJGrZJPAqRVHOeTTWbdjr+FKzY4xhqroAKQg+fpT1x/COKYys0YG3NPg4OGXF\nIYLujzk8VOJDjjpUG5FyDyKmU/LwOG6UhCSZ4IG6nNkxqR8rUerDoKNu4ggUwZJtZYyQO1Ot\n/mUZGCaSPO0gmhf3JQsOM0MVh8iGGZV+9T2UTNx0U5FOkZX5X7xPWoQzQt0ytIZJvyCWpyMG\nzkZpFj3Lnue1KjbXKgZoGEcbHjbwfWpl3RSY27lpm54255FSxybSGP3aAJo2SX5R8pzSeSbe\nbjo1Qt/rNyjvmrP+sXdnPtQAhVkYg8nrgU+GQP8Ad696PL3fMCQR1oh2jttPWmO5J94YHXvT\nbdjHLhjxnpUsKhlLA1HPET8ytg0hdR6ybZjs45qzCxk3MOlVLdjjLHmraoqrkA880ihsn7ti\n+Pyp0YXb83ANKsm75T8ppdgZcMc0E9R6gJwOKk3BVzUCzbZOOR0pVA3YLe9AEvmGRW7cd6ga\nPzFLoeVOKkLbeOueKak3lsQoyWpajF3fKCTlqs7SAACfWoeOpUAinrJtX73PpRqANIzN6Hpx\nT95T73P0qFMq2QfrUyyDaQVyfWqGJkudw6CpY2LLljgVD94fKNopWk3MRS8wH4CdqUynkbc+\n9Mjk+XOM9sVIqnae+aYDlAbHangKWwW5qNlCxgqfrQCOW71LAl3/ALwqDxS/J/EDUKqfvj8a\ndu3Ac5PpQBIpO47jhaecMRt6VFu3qwOM0qAtxnAFJgSLGeRnIoX5uSOe3pTI2Iyc8U4y4XIH\nOelIBBhlPFOVFaMA/eXpTV/dy5IwDzUiKGb6mmMcV6A+vNHysxXnA6UkbCRn9BwKVWcYC8c0\nCDkMOOKcwBwT+VRW7yLcPHKNo6huxp7fMN3bvS1AkZm5bGB0pIxtUjPFAXIGDhaCBJu2kjFM\nBq7Y+e4oVixBFIF4GRR5ZVwQ2PamOxY271OOppqqPLOTj+dMWbaw9elCjzM7gcg4qA6j0ztp\nVzu6YpqtsJI60q/dyxzmgocJCvVeKbNL5MRkZS3sKVV2g5OaeWEiDjj3qiSK3mE0YmUnY3qK\nVuu4cU5sS8dCKhlD8LjjPWkUTpsdST9KbJnoRwBSxoNpPQ0Ixbg5JFADYs8joTUm393xSqC+\nWUYFIisq5HPtQA9cp2+bHWhk+Xio0lZWxIMGnqx35PQ0ACqMHH405du3nmmBRk45Jp7MAoyO\nfWjUQw5UcHB7VIq71LA/Wg03lfUCqEP2nkk1IWXaAOvvTMDbwcinxruT5uCOlSIerbVxScLI\nCaRc+maG+boKQDmXdzmkUe9Kq7unWm8YyadwJGyuG4waazbhT4+24/Sk24y3amUOVtuFPBpA\no570LhhuAyKMhfakMbtP90flRTt59KKQELBsbhyKaGL7R0APSlaQIcN09qXKbRgc1SJFZhuz\njHPaozlpC2am2DHJxmkkh242mkBEsqBW3Z6Vn/2somwoLdsYrSaJNpLdKjW3h8wYx9QKQFJr\ni4uJQAML61fjtH8ks5yaHVYeR2NLJMVh3b8KewoAeWEajBz60+SRW69MVBBIjLu+8aReeG6Z\n6U9wJnYMoGeKbMCiqEOfXNNGFIGMjPFKcrk55z0piHbSAScGkQiT5e9OOREG6k9qjH3uAQTS\nAURHOM0442+tNXduYDkd80SKVXA70bAI+w84xUUTFshh+NPjU7cMec0MuM7aYCMcqFUU1Yxt\nwwp6oxjB7il3MBj1pWGx0cY8v5uKerY60xcNz2prLtbjke9AgZgq5FR7SSeOKlZkXimxyFmw\nBxQA08rgdPSnzHYo44pkjCNulNZiy80eQDDMu0hVyakjcqnPp0qJW+U7RgmlRSOrUWsAgYyO\nBjFS+XtYEjApcY+bOaf80iAE89qAIpsfSmbfkOOMU+RlTcGHNR7iy8DjGKQEcrbVHGaeuZMY\nNOaHbAHz7Ui/dyBtWkA/aUPXNRiZWZtowOlNLdFDE96TiFSPvZ5pgPX7vPAoVUaPI65oWTeo\n5GBQvynIHFDGK5XIAXPFMjDdFIDU/k5JXApqqq/dHNIAddv0ojXK57USfMODTASqYoEI2WcY\n6DmgSMMjr605flbg8UHC8460gI7grKvIyB1pilRGdqYqRlLR4AxUfzIoHU1Qh2V25YUyQBVB\nXg0rzBSqjkmnTR5YHGBQMTadyntUjjdH6GozJnA6inSZwAOT60rAMS3U4ZxkihmVizEc9Kcz\nlVKkc4qEZ70wFZBGAx5PanxzEMSfSmgKwAJzilaEyMSGwMUAQMqtcjPPenfd46EjNSShQq4H\nzDvUe4HhutSwHq37vJGVPFRts6dBTmDbcAYWmbSynjPNAwyNxOOKiVlkyG5GeKklYqmB0plu\noVQxGaLCHGNU24HekkY8qw57USznbuA+btUbCRgpIOPWgAVR06kdqgfG/Dc1Zkbb93huhqpN\nH/FTGP27Of4qgALMT1z1pymSTAbgetPdRwAcN/OpdwsNWM5xj86jeJ4WILe9SQM247qjfc0/\nPIpiGqTu3HnNKyru3fpTnYAYFNjG1uRkHrSAr55OeD2qJizHaeKlkZfMpzBbg5HygUDGIxI4\n6VFJHtbcOKfuCsFzxSSdc9RQIiYHHSmhRzzipGzwetMZstyKYEb+3SoiowTVncu0ioehK9RS\nArzRBlO0cVlzWokbLfdraZG2nHWqckZVMY+b1oA5K/tUmmMP8Y6fSvH/AIr/AAk07xQGkki2\nzYJV1HzA+p9q9+n02NpPO2/vO5rNvtJjm5ZcmnzWDfQ/O7xt8P7nwvKEuYfK3dJFIIYetcek\nZjUEjkH73HNffvjj4d2utWdxb3ESvFIORjr6Aehr5Z+IXwZvdCkea0DS2KnAzyyfWto1X1Oe\nVO2x49eR8/KMe9VJPkwzE5zW/qGjy29qFZSrZJ3DkfnWPNHhQSm9a64u5g0Z98pkkRD1U4yO\nKkmXc64ThfSn3W1m3t6YGP51Ft6byV2jqPSqJImcMxIT8PSqqs0keeM7sAVq3WmmSPej7QBy\ncVm+SVbbjGOfrVIA53KGfadtOT9zdQsy+ZGCCQe9Qq5aUPtxhcAd6mEjSDduCuR900DLmoyx\nXkrSxgIuMA+tUgQ0YTkc8nHWrbRr9hU7djscbf61V2qqkK53Kf1qkLYgY7Wwwxg9qnks3aNZ\nVI2f3s9KgkmkuFdgRhTyMUrNuhWMScfeAoEKI9zOVOf9oGnSoJVwvOwjNRqduQDgEZalGFjB\n34DGgY05YLt67uvrTZIxIzHy9wXjJp+0srZGMccdveiPdgqPTn3oArPH8pCgEKemaEYOuM4O\naftQIe+7nApEjby1VV6nO70oGCxjc0e7bkZqFtqqqZyc/eqaSGTa+Gw/rUAG4DA9jTAl2KZB\nGWIJHamJsZmbkhOKItikGU8/3qGwpLKmwE4I6596YriqokXfu2tngUqqecEqM8mnuyNtZBjA\nxmmeS7Lwe+Q1K4iTyzDMMNkYycDio2jfydrDbLv3DHPy09cW/mMW3SY6CnWLSXE6xcAMM7j2\npD6FVWb5CVPpSyM+HJUKc4BqWeZg7BgoVTsGO/vUEgbbgcn0piE3mNxkH69qZ5j7mZV75Oe4\nqyRsjUOQaGtQFLtJu44VaYEBCliWG3cOKd5gWR1c52jGcU1UCv8ANzkcZqSNfvndyw5+npQM\nbvDRjC47YPP40z5o2O/bkDNPXMakKPmxwM01lEjfKvzY5zQAhb92FB+9zS7Qse7PyjjApG/d\nqrDselG4SK5A564pDIVfafLwWHUGnSZPyk4okVmxtTHHSg42szIQaBDo224QIdzD71IzDjdn\neOD6Uqyb9wdSGUfL2pvDNhgwLCqAQZ8we/epGZ1XHQ96SNtwXPRTRGw8wiTnNSwE2tsLBvxN\nJ5g6kjA5NG59pA4XPQiogytw0eCaBD1kwrAgOGwce1ETbNy5wqnNSxrt4BA44zTN0bRsHQnH\ncDrQAnl7oiRwc9c80bdsY3MMZ+97UmzzPKGNhJ/IU4Kqs7bMuOimmAkewyMG+bjikX9zCVVu\n+SPSiRQ0LMp/eAZK1JDJDNEi5xxk5o6gRNnyy24Y9qcrMjByNzEYx6+9N5VWQBcHqaazAnOe\nVHDelMQ/DK7gJ8xGemcUm5dqDaWYdTQy7lADtuxnPr7U1mVdq7dp68GlYoVGHmtuGEbg0kkj\nR9PuH5R9KCoUk8k0m4yMFXHTkUtRGjps32a6Y4EmK3pYxOsZI3FxyM1ykbAyhChX3HeuitzJ\nIiMclgOGosBm6gmHYOvQcCq8GoeRtU4dm46VfvoTtJDb2Hr1rGaIQcbdzlsgelPoIuNdKsiY\nwCR8wFT/AGuP0B/Dms1sRtgcso+ao2mPmBejmpAs3jIzFd3BGAR2NV1ypwTlh3qSIecdpPBP\n61CsajzHZiCrbSKYXJZJQyhiOM9SKWSQmNTnGDknFDHZtXbuj6g0SHZtDcjOdtUPUb5mWYlt\noNR4VYf3edwORnvT2K/Mx5OentR96Pnp1H0oEDOFKk4DseaWRmUsCcjODUfyKV2ru79OlSKP\nMZl29RnJoKQxgCwAJA9Kcvlhzz06CkTcTjq+MfSlHmbSwUDb1bvQSDTNkHATHam7jLkE4bqa\nSRsD5U+8MmhVC8nlsYpDFlbzmDcDA4OOaZGxaNhv6mnzM0MQBXP0FM2j5P4ifwpgOJaMFlbB\nXpTGeWQoZOGY9RT9p3cDgHBJ60siluRz7mkLqNOV3EuVwcClhZW5Dex3d6RgFlznPH60m3ar\nHqzdPrRcYrMI/nbJApPMO4OvLE/dp3B25bfxj8aRfmwq8leppgSTN+AHVajXMi8AKB/FSvjy\nR8+CWxUf3W9h1pdQFcNuLEhf50I5kyQpL9SKRZAy7GX5c8ZpwZzjBwCvQUxoFmV1G9Oc8gdq\naMeWVxyDn8Kc+VjQfhTkxCzBuQe4qQYxV/eKc5yOtJtaNsPxznikLhVKDhc5NOjZiDuwVPCn\nvQSKW3PgDDjq3tTc7pQq5Hc+mKRWcZ9RwaGXYzbX3c8j+lMYNlh0/dsfvUokRcEPgZxjHOKk\n3Ky427V7f4U2VWZtpVVcj7p7D1pCGsvmKTjEfYd6eGYoFQBSozupFVlUKW3KO9N+ZpCMfL0x\nTQDRuYh127T1FP5EYyAEz96m8RqwCZ4xxTBGFVVfcT2FIdiVcRqzE5JPeleSNWXHzZ61GWWY\ngMCdvf1qRsDBxtHXNNAG5JNwzgY60kbFYQGJDdfwpv8AeCna3U+4pEUyL1+UdKQD3AYjklj3\npkajnJ6DLUqswdflAI5z2pefmIOeKAGA7chRuDc5qUsJFO1QPU1HEu5c7sgjkCmhG8slOcHp\nQIdu3tlm2sOM9Rj2oSPac+Zxn71J83LbdzYocM0aPj5+hWi4AzHduU45wPQ06TeSSR78UmNo\nw7DPYUNhYwu47s/e74qgFGUjbccbujU0yCPbGv3WPDU4L8wfB8v+8etIn7xuDvz6ChjHoxEh\nXhVpMovzDO/0p21STuHK8EUjYP3k59qgQigrnI4bsKUMqof4u1CqAu4/ePGBT2yrou35MZ47\nmmMjgyrHaQQOaVQqt5rcN1201o/mYYAB/hHajzkaMgAkDgimIViWZSVJDHhacoZmHmMFVuM0\nyOVjIuxlz6Gkk2+XIrnvxmgCSTC7lHPbc1JtDAMXwfen5UwgE7toHAHWq64cHbkHOdrUgJY2\nKNtD9TyD3pI5WZm2nocFakaIAGQ5C7elRr8jgHgEA0wAlVY5GN3Gf7tSLGPvHj2/qKbJlYm2\nYO6m8qoUHBpgHC8HJUnNO3YjKhT1z0prfvFHVR3JpwJKhiCnGAGpDEZS2WYhF6AZpI5AIztX\nn1ojjVhyCxHb1pWlh+5t2t3HagBV2SNno2OlNO0MXfKr6Chl8vdxyRkNntSSKZFU44I6e9MB\ny4KbV6ddpqTaNuQdw7mokYMO+em2lP3R8uHXk+4oAN27adpOaRh8wVeST09KdGQpIY7gwzSM\nN7Kqn6etAAsW6ZsHbQshXA2h3zSswZcqPmBxSbgASQcjsopAFu6MJAyYJ65p6xx7xGG+bHyk\n0xl2xu687eQBTVZ5DkLnjFICZZNrFRgrjBxTFYScL/D2pwQIoKbQoHzc81HGwkLBhtXODjvV\nASco2Nu5W60eXuhBUHIPSmq3zKhGFXofWpvP8tSVO3/Z7mkIibG3zA+OcY96WQGMMrjLMOop\nrMswJAwh6L61Io3scHCgdD1oGMG548ZC46ULk8sMjbjNFxjci56DqKbliUC8gHP0/CgBzMI9\nqk++fSnMxVhIPmU8fjUczeYCwGec5PWnfw4bHWjUTJFG63ZQu2XdndmoWbawCkk9zinRsTIN\npyuec96NyfMAQy56ZpALGAMqz5bOdtKshbe4XcOn0pvyLICBxjr6VGrKnIfAJxt70ASyfKuR\nk7uc+9RMx8yJlyrMPm9BUjIVUAHODkCgKOjHaG60wEyscx4Zxjk59aRWMSkKfpjsKcyssWxB\nuOcbqc1uUwTxgY4pWKGwRlGBbnPJNSLH5J6/eOAeuM9qIkEjhTye2OlTJD8sbFsMDnGKGA5b\nRgp3cMOq+tLHGfMIQHgflVx1W4+6Tn+53zWvovhi4vZl3Dys9vUUElXT9ON1kQjJPLN7V2/h\nnwqJI8ONoB4JGSa1dD8E/Y4wUUPuYEnHSvTNJ8LiOACOMFmGd2OtLmHZmP4d8L/Z8KeWByuR\n2rvNJ0crwFzzzVvTNFeEKuPmYcmun0/S1jI3Dnvis3IpRZXsbELtymPat+1tOjHj8Klgs0AB\nIyavrD0wag1URIIV/hXn3qaONTkE81JF7dqlVF8zOKm7LtYTb8p5xTo13JwOe9S+WGb2p6wl\nRSAgZfmyOKeoYr7VJ5ZVenNCrnKnrQNDPLbjBp7Q9Fx+NPhjIk56VL95uOgoKK3khW5OanRT\ntz27U5o/UUKrMvtQISPO7Jp0gGc4p8ar97OcdqC3mSY28UhiquV56U3ad4J4FOZBjDGnKCSP\nSixI7YGPAzSqo3c1Jt4BUc0ir82W4NIBrRHdxyKeIVUYNS7f7tKU9SKopEMSheOtO27W+anq\nvbpTuQelBAyNDgoDz1pY0bbkcetKH+beOtKPmBqWAq/d61Gsaux2nmpYlDOAegoWHaxI9eaR\nRHt2jBGaPukEdan2g84pd8ecdSKAI403k5FKsbc84qRfyp+CvIXNAEaQs3JNO4EeFHNP9ulK\nke7n8qBleOFjntUyruHp61K8bMpVRhqYi44J5oJEMZ6A496b5bxsO/PWpGBY4AzSsGC4oAZG\nwBIWmtGGUkDA9akjVVU5HJoj+ZiP4aBlZR8w7irO3djOKkih7HAAqV40bAH40DK42Bsd6erY\nj2gc0oxHk4prNIWB24FIYqqS2cYNSxqdxB6U6OI7dzZo3N6YFMRXdAzZPrT8AcK2Kk2g5LDA\n7VGp2ncQKBCsiueeaZye1WFj3fNSQwurE54oGiMM3AqaH942TxS8+YQRT/LztAOM0gBsKOtR\nMo2g5wTT/L6oTk5pgUtMF6jH4UxgcKOuaeoLIeMe9KsIPyk/jS525XrSGQ+XtbqcetLs28qc\nk1Iq8YPHtT1VfLJ70CIvLby6VU2Kc8U7ceg/Wja0i9MjPWkGonk5Wm9iCKdIGVsD0p1uTIPm\nHNAETZWPg809JDtwVzkVL5KbM5ppwo4PFMRAqNwM8k96k8va2MfWpFYKwyOfWkBLSEfwetK4\nC4RRyBiq7DnA6VYMYfocinxxhjnGKaArqpyP6VMdyoOM06QouccUnmFeGGQaQxNp796Ryu7b\nS7trkNzRtX72efSmFhIwFbJ5qQKHYnOBUa4xxUirtKkigBh+bt+NOVQAcgGnqOvYHtSfLnGT\nimAZDRkGo2+50yanCjkjpTdgzx0qWMZtIUAmk2r1xmnAbhjtTtmFz2oAa3rSqOR15pu3nOeK\nfnPA4NIBVU9qWZt2M9aYMr6mlhVpWPYUgD7uD19qNu45HFHIJpysNvPWmUw+XHJzT0j9DTNo\nYZxjmnookbOdoFBI0xtHJtPTrStluQacrEjJ+bnAqSE7ckjimISDI+8cGpGxwcUx2RmyBgmn\nRnb15pDImUfMv8VSR7kVQwzS+XubjvUsjbdq4oKQxVPzf3RSq53DH3cUEHnApI13MP1oEybn\nbuAyPamrLuHlsMt2zT4yytgH5aJMLJnHXvQJEsK7I2TPbg1D+8j4kp8bfwueetTrIJOHGewJ\noAZEzdAOlCKYpix5BqTYMYFMEBTg9aYySOQchm5qZVHl7jjJqCNkDbJPvVOyjZx0xUiGIxST\nkZWrLEJHuXk+lQ27fNnGeMU9W5wR360wH7/3YOdx9KnWPd82McVCqhH3AZFSJcEoVHXpzQMI\n22t6Usjeo5NNLLx2f1p2VZst8x9aTGEa42/Wre1ZBuDcelQxvEOGODUylQpCjFMNQbHAbg0L\nz79qg85GbEmQfWpIZEwVxx1zSYA2QuFXn+9SLGWww49af5wLbQvy5xS7RHKdvOaYh7HkDr60\ni4Lbx0pH/cnI5z2pV3LgKODyaQE64Zxwaf5W4g8AA1CJywPGMHipRlt3pmjoPqG3a5OM5pjK\n2DtNSbj07UzaeQDRcYqhlYdxTlZWyGG3FMU7Vww5pYwvOSKAJYzg7gOKfzuB6L7VEuW4U/LU\nnnBVAK8UgHYCnGc5NBRR0OTmk+XgY79alKjcT1FAELBhCw6c8Cmwny/mbg9Kldg0nAJAFNjY\nNJgr+dACkBeSaVcqPehsMCvU04ghQCDUsdhVwck0olyw4yKaGVVXJ4pykLk5zmmhDsBmx360\nbish2jjFKsbMu48HtTtu2QAg49aAFh+7k8E05hs4xuPamKqq2WPA4p/mbT13UAIo39aeV2qc\nHOaZuXJO7HtSqMMWBzmgBRhgOxzTvljfA4BpjMF6jBp42lcn86YCbstik5Y/d4p7EdhSTN8v\nDbeKTGHlhlxt+brTgxIBHas8X/k3KqzEA1pbueOlIBmM7uOe9OLrtAA56UNhfm7VGp56YFMB\n3mAMAak5JGBio/JPLgginKxXqOaYC7gGwBzTiwzgjIpi/eJ9aXnac9jUgObG0YHU077pyOtR\nh92D0oOeGpFDkzkjP4ULuUjHShcbs5xxSnjqeDTuApwzZk5pfMG3kYNQvlmp8fU7qBC9GzjB\npzMS2B0pFJzlh8vrSqQFYDlqQwkZscc06HkfOeKZGzbCDwaVcr2zTJJAvGOlP5kkwTgAUyNg\nxyTg0ZDc5oGP3MjDmlRwOv401pBIuO/rSrjbntSESfLjcvWkALHJxtpEX5eDQynlu1PVCFXG\n7k8U8x7hkHio1wuS1SKR1HSi4AzYGF4FJxxxSNGd2B0pdvBJPApD1F3e4opmB6GigNRssYY7\n2OB2pqsq8dalRlkTB4x2pi/OuCMD1pgSM33T2zUUjOzMowacqjbjP0qIK2fmFUFx0hyqgH6i\nkhAjjJJ+bPFLwygimsBHh89e1SAsgMnUYqHyieDU7DzF3dMUzzFK9MUXExIvlUqBjFObsVBJ\npzYjcd+KQSDntijYByZDdMdxilZh1YZJ9KYBkEqeetGNyklsCgBS2VJA+lKrFsHvSIuV4PAp\nY12+9IYMpVMkUzbkLzjmnyyDaQWxUYUsu4EEd6pAPiKs7DHHqaSNwAX7ZxREpjjIPPPFCqNp\n3HA9KYg+YsWXpTdx6HrUi/NjH3RURXaxYj5aQCj5VzTmycYpsjL5Z7c0Kdv+0aAEZlbntTin\nyrtO0mmrt3cjj0pwYM25jjHSp6gQtluDk804qdpAHGKeF5LKaYrGXOAQOhNUgFjO2MDApWjL\nDoOaMjoRntQoVU5J4oYhGj28ZzS8hqFYbgSc5odxIxAIqRkTtvzkc0i/u48d6fnsRk05syAj\nHSgCFctwVyKVpNq7VHBp0e5UIakYKqgk9sUMY2MbYS2OTxQ0irxjIxzTGdjGMDjNJER8wPU0\nkIRY+CV/CnrLt2gjrSBv4RxSsM4BbFMB8jLtO4hc1ArkN7UsgE33hz2okAVcDgUAO8zcvA57\n1F9DzUkRC89T6U2RdremeaAGRLulOTgYqdWG0gjNR4VAvelfhDipsA9zuwelBjwpPtTAxaLl\neKUSblxiqAqqvO8jBFPkLSAktkYqVQMYI4psi7lO3gUgBdsSAkcHrSjbtyM4zTJPmTB6dKRn\nKgIKQCTyKGwDTVz5e7AHakaSMSAP1p8aechycdgKAEbCtwc8U5W8wAr071CEZVK8Zzg1YEIV\ncA9s0wFZlUbaryMsfzHmjducYOeKi3fKc96YD/MMy9doqNXzhVbilVSynjApLdVh460DFYHa\nQaRf3a0rf3sfWmtIGxgcUgYfe5AqWQ7oQM81Hk8Be9OEZLMOMqM80wK75HJ4FNhUTSdcCp3j\n3bQ3TrUjxrwFG36UgK0iiPIJqGT5l3AcipLpuobrUUcbZU54x0pCGruZc1G0nODwasru5GOK\ngZPn3MOaQDdx5wMgUvnfLnHNSMRs4FRL+8bgZpgMmjwuMDJ70iR7VI71LJlsEdqikkKrk4oA\ngVQzMD1okwqgU/ym3dODTPLYMd3YUgGrIV5pjbeT0qbb8o4qFl3SHPSgBuVkT0NIYwvPb1qU\n2+5SR0phVmjx2FAyNoyMc8VCww3TNTyZB2mm5C0CKc0NNe3XZxjNWriMMMioZFO08cVIGNqG\nmrc5OOPSuP8AEnheG7tTE0SsWHcZr0DHmZ9KrXFokynIzQB8g/Er4LzzR3F3pm0OoyYFTap9\nT9a+ddY8Pz6RM0VwCsueFxX6S6poIlt2wvJNeG/Er4N22u27uluI7oN5iyJ94n0/GtYT5WZy\njc+L7qFkDEkE4ziq/lmRRuHHfmu+8c+C7/wxKBNatGjLhXxkfT61x2xZlAMZV8YNd0ZKWxyP\nRlc3/wDo/wBn2buwaqBhk83D8Dpu7VpS2oSVQvy8YNJeWu6BQOWB7GqEZLRhWxI3Gamht4jK\nu5CBnpTmhKyKrjc2cgGtNQBjC/N15FUhFW8dfLkwvzKMCsQFlyFB3ZrRvJHPnfMMenes9XeP\nadud3WqAfPJ5JDK2Fxgj1NQJzjbzk4z6VJcbvulBgnrUTYyMNtGcVaANxG8ck9Bj+tSrJuVT\njHHGajiiCGUtnPXrSxqZJGKnhe3oKLCE8xrdWVeh60snzMGjJ+7zmk3N54O393jg0jNJIQvG\nzHNIAb95gj5sfw+/rQ03zfey36U35o2+RNoHFI0ZXAIyCe/rSGPfdld3XHApigKMiQZPzYpW\n2ort827GPxqNGDR7mXn3qhEuZGwVAKgcnHSmLNIsIHylM5zjmg723YPltj7oNDuW2hRk45A6\nUhhDL5m4KARnpTlYqq8EbutRnCqcd+c0rOyw9NzAUrCFm2xzfd3ZH3hSNNszsG3IxUbq+FXG\nO9DRgszqOgwOe9AMbIwfg/MqjP5U5RwxU89fel4UDecPj5lpISqZlwSrnB9qYw3HcHIygOKV\nm8tmGcsRx7Uwyq2CGbYegpy4ZgCf+BA0CHsy+WN3LYzTYWDFeBz29aaJiWJ2ZA4PrinLGFId\nRwenNAxVBzuKHIOKNxJ+7t9BTo2HV3C8Z5qEsGHmbtxzgDNADsp5TAEjPeoZGGcE4yMbu1Pc\n7YwCcc5Iph3Sxugxxz07UB0Hxyho4z1wcYFSrGEzls5OarwyIq4K4JHWp1xGwJG7jp/WgCNv\nMl3HPWo5iYY0Aywz94VYK75HVjsUdPemk/u/VjwPeqCxDndlZAf72OlLFmYZOF9BSyMGXeAc\nj5T7Uz5s7D16jFIRIJiuVlAcDjFRsR5igDbSx5Vsnb83I56fWo2YXMi7c+596AuTbQ2T1+lQ\nRMGU8Mz9NvpUuwqGwffinGZizBQA/YYpCIo5GjkDgE9s0/kSbwMgdRmkXpkHLelBkU4XuetM\nYhJb5tu1C2fY0krKu/zFXDHAZT0o+ZY9jN8rfdHvTvVmA4AG33pIZF5ZBUbSeQMe1TLIIpT8\nu7b2pyt5bu3T0HaoOI5C4zubrmmLqDSGRiVHfjnkUqMJFYsPmHFKyiVtxYDsMCkVdgfodvJ9\naQyJi3GTk9KcsG4kj7wp8hXaQUzn06U6M7nXYc7R0p3ESx7SwwRwuTmtix1COJULHKDrj0rn\n/LEjNuOz2pyny4VK88ZPPFLcRu3F5HIpEaKR3Oec1keWfMZSd3fPpUQnkht4wrgA8HPWkhY/\nPv3Lng56GmArR7WcAggVExDAMRhgKkfc+TjjpxRhSyqy8rzSKtcbbk7SMHGaWRAkZGzKk560\n9JHjY4b5M5xTGICyAjrzknpQJixlmTaSNp6U+SPcqBD+8B5qFg2NhGNuCPepGzDIzED1FILi\nbm6Mqhu9Eke3aAxAbqAOKjWQKF3HMjcU7zj8vGDj86oQBtwIHC+lND7V2k4BGetEm1VA75zS\n8bQ/3sDAJFAxjuGUbPl96HlZZgygqMYIoZUyMbi3tR/q/mC4AHft70XARvlZQg2oeaI1Mas5\nbbzmk37mBY5BGA1Lu2xqZAfcDmkAvnBmMkjbgozinEsu5tuxlHAPvTWWNgx3bUPG0ikkxKyL\nlmOOuaYXHeW3OXw3Umm7sKxDktnvQ0exXYfeYYyab97Y7rk9DTDzHNwq/MDk8mnPtZgWOCOl\nJuJYAKAvbik2hnZgMlhjmpsIesYTDBvlY4/Gm9Dt3gc/nREvRHPQZpsakAkjg9KBj2kRWVMk\n7j93b0omXdISMAY5xQRuX5vmbPShVIUkjCHktQxsadu5P4hjn2psrfvExkDoPelUM3AGc/yp\n/O31wcY9KoRG0YJY4O5eooyzRjYADnOKSRnJJHC+tSL5FwwaNSpUd6QERJRiXKqadtXzkOTt\nIouFQHb1Ycg02NjJEWHLZpCJY5BGzBhlfWkb5supHPJ9qav7tcD5h13dqPMDR/d5JwMUxjZM\nARspwCcmpFbzJSBluOtMkVcfNyR2pVDeX3TPTbQwFXEbY7Y4B9adH8g5fJHzc9PpQqgKoZSX\nFDOiqVznvt9aYgLbWJJxu5AqEqWckn5sU6SVWVQzbfT2pHIZgrHDetIBwYbUGcAnBHrQM5IX\n5s9F9qidduOd/OKkKCMLGOTTGIqpCSoGeOtMyrAFWwc8D1qRVDNgMM9DmmMAWxjDA8LSAcP3\naNkHeTzmkYbVLZwp9KEYyBt4w3SlWQKAM8dCCKOgg2KynY6q3vSqzRg5IQ4zhaarHzDGq7nz\n1x2pJpBHlSmR2XvSsA5FJUOOBnIPr+FPclXLjk4+6OtRW5YNuY7n7Htj0p4yr42grnOfSqHY\nanzcnB9CfWhcqcOAW+nSkZNyuS3HpjrTo8jY7HBHSgAWYSLhtygdPemog4Bb5R6dqc7F23Zy\nM5IFJ5aljuPyt0xQIIlVmIQ8Dkk9zUkeZpPbFNUq7YQFFUYzSxsyyY/1inuKQCeWfOZtwG0d\nPWn/AOsRSj4K/eGP880imPOUUlj3ofsAeO5oGI+JJGbIAI4I60zlWODkY696VeFK8HB4NI0W\nxVLna2c0CG/Z084F/mXrj3qfKvkH5iKjbCyY5JIpHk8rcq9WHLGgBfMYrtQd+GqSaMSNg/ex\nyajQBQo3bQRzQNu4fNnsPegY5ZI1wrNlegXNOAe4wxKhOQKaoEbFtobHUU7dub5QQp5pjGLF\nJuJK5Cjj0p2F2kgc+vvSSIT6n1GaSPEYKEYB7t1FAD43bILfcUfNUe4+SxGTtPSpAp+cp0HB\nPqKajZjAHyj+6e9AhWuMsDyvHT0pIPu7pBvzwKaGPznGG6AelOztjG4EEDikhodC6lcc5zj5\nqRd7ZB5bt7U4RhuScFTyKZO22TpuRvemIGw3fkDkYo4YLsbCHueuaUcKNzZ7g/0pZYUnYZHz\ndSOlIBu0Bum05pskWWwp79RUjgNOAD8mMbT1pqk7gFIxTANwVdqDac8Um7IJPD9MetSorLNu\nkwR/DULsCzEdaBD/AOLYCU3DnFN52t/Djrg05mXcrEHeBS+WI1PO0N3/AKUCDaHjVwdnqB1N\nNlfcwwOOhp7Hy49mMcdB61HDkq2eq/eoLF8wRqOpdTn2okC+YZmJGRQ0anb83U5HtTGUq5Uk\nlc9KQEu4MFBI9j6UQsfMYk4xxj1qLIDYx+dODFynZvTFKwCqRubavT1pFXzAQp2mrG2KK0bh\nvNLZqDdt5J3KeMCqEOLfL5ZI44JNMTduLe+Kem0sO46EGmeWF37RyeBzSAI+Y5A3zEHIFO8s\nRrv2Ak8AikTMcBVj94YYmlj2qVUZK9mo1Cw6OYIjKw46njvTJNrZYdSORS5PmNt6jq1NjjZm\nOwgqeKAHQsq4Zjk4wKlXC7WOGHdaQQFsI4H1+lWBaogDo+Vb7ooAj8srKWj+43apo4TJhuS3\nTb0zT1hPl71P3TzVq2jadjkdsqTQBXaPyTt4A6k/0q3Y2ct15SRxMxJ4x2961NF8My6gFklU\nMuctur0Lwx4TEfziEEHpx0qR3Ob0nwWI2G990mMlyOPpXfaD4TZpELq3mJwOO1dBpXhopOGI\nyM46V3mlaOI48MPve2CKlsErmXpfh3asW9QEznbXX2OnFVzjBHfFXtN0dR5aEZI9a6K10ldu\nRjANZXNlEy7Wz/dhSuB61rW9ntAwCAKui1+XIHA6VMkfygHioY7DI4wcBqlWEA+1Kqt2WrEc\nZ4BOaCrDNoxhRUixhcZ64qZV2ntinMMjAApMCHad3XFPRm3YPSnrHuUnuKesfc9KGVYQKWyc\nYoWPkEin87cdqT5lXPU+lMBdgwccU1fl4xUgOVBIwaeq9DjNAyONGaTpU0du20qTUka4yTUm\n7acgUCK0cPl5AH41KFCt0pzq7E7RilCuuM8ZqeoyKSMK2DTYsuxAFTtHuPJzinRssfAHNMQx\nVfbx1FPb5wD+dSBfMbP3TTthXIxxSGQxqqHIqSRA23PGTTW5U4FPj+ZQD60AL8vrk+1CruU5\nPNIdqk/WnquATiglkflDt0qVVUKRTcHbkdDUm0fw9vWkMY0eV+Q7WqYD5R3aotxOD/KpjIq8\n96AE8ncpBOB3qFYdmeOKstuY4IxmmeSW70DGRjMm3HNPKOxIBx7VNCqrnC/N60jKWYNnFAmI\nEOORk0kalvu8EU5mOCAfrSwttTBXn1oGJIzSDIODSBRt5HNO4I9DS7lLAdTSEN2DPLY96Fj5\nwcmpfJG75xlfapY2CtnHSgCt5JAII4pFjwy81ZctLzjaM80ww/N8vNMAJXdjGTSeQeW6CpY1\nCjJFKylvmDcelBRH5bbQNualWNvLyRT414+Y1IzjbtU0CKzSM2AB3oU7nKkcVKqhW6k07kMF\nAxz1NAETR7l20rRAYO3p2NWRx9QetRlmMjEj6UB0I41MkmAu0U+OM7jk4pyyPtBY5HtSGUHn\nbSGhNoVjk80rQ+nX2p2AzAgZFP8AOEbYC5J70xECQmQn1oa364OPWpvN2tnHNNX7xJHWgPMR\nV4Hy9O9IMM3yjn1qXDdz8npTeMYHFADGi3LuzRHEvc8mp1TCnBoUD+IUmBDMoXpSK3y7c4FW\nJNmRUDAGQADvUldBZFzj1psalc/rU23zG44HrQITuIHTuaBdCJgHzgUiRDPPFTLbuFbnijy+\ng7+tBKIzH8pFIiEqB94U6QCE55Ymhc7Pk4oHYBCsLFs4HpRvRm64pVU8hhk0vkjdgCmFhPKR\nsktQ6lAB27GnbT93tSbSuM0AxuBkHFJJGB8wHXpUvDKcdaSMr0Y4NBRD5R6nvUi8rijzAJgB\nzkVIFbb93gU7gKsRaLd3phTC5I5qT5mwACBSY7LyfekA2McZ6U/g8D71Ix+Wk8sSLnOMUgEa\nMqB+tC5PHOKcFHTNOX5s9gKBW6iLGTkYAHrTDCd2BzTwx2nJz7Cj5lIweDQUQxqdxJp6qwyQ\nMVOpXnI5peG6DOKBFfyT/ezTd21iMc1a2rzk4PpTFj3NlRn3pDI8FV609VO3jgd6X5dwpzYK\n5zxQIjCjdnjApy5J9OaTaGOFqV1LKBnBFMBhVlY0+P5kwOoppkAIB5qWNgrbscYoAciNH82a\nUsW4b1zThzyW49KJFZVAC5JpBcb5hDZHIpuBHuI7ikhXDFScVJyVO7qOKYD4fnULnHFJteNs\nOMr/AHqjhj3k/Nip2OdqlvypjE3K3Q89ial2Dy8A7qrIuJDnrVuBgcg4BpANhyWXJ2r3qcjM\nhOcjtVZGEkhUjC1YVcoAnJHSgREsLLIzuODT1wFODk9SKlXduHmA4702RAsmVNAElttfdk7W\nHSppF3KA3HvTFhURlz1J7Uvm+Yu3GTSAnX51wvIWmFdknAytRxQtDnLcNT2Y7QM0AGA2SOtL\nDH8pDY9aakZkzg4NPSMruB3HigonbayqSAGpSy8J3qNU+Zd3pSTEbhtOTRuHUk8v52GO1INq\nnacKxqMzGOQFmqWWNZ/mGSe1G4xZGKsMjk9KfGTg57VGMsgJ6ipPLZoyBmgB5Xy1LHkCnLIV\nUE8AjIqOOTcrBhT9m+PA7UwJE+ZTk4qbjb8vDd6gRWVQTg0+Njl8nkdqVgHth1wuaYud3Py/\nWnr94Z4GKY371yuciiwDz+8GfSk2iMbiuRRu8mTBHGKVDlfm5GelIB0bIFyOoFM3M3Pb0p6R\nJGSw+6e1J5injGCOKABSW5zjFN85uMZ96UhVYA5YZp/ykkAYFACrIOMHvT2jzlmzTFhG0YPR\ns1MUcFmByD2oGMjUHLKQKXzjswRk01cjaCOvXinhfm5Py9qTAFVWjz055zT0UOmSM/SnBVMZ\nGRzUfzRIQGwppXAcN7TYB+WpWY7tp5FUorh4SS/J7VagukmbaOvvRcB+BuOeRRtKr8vXNKoE\ngfHXvTVb5c55oEOZFDYA5x3pzbowCBnFMXdvI64PFOCGVjyR3obAkLeZjI470jRnuNuOcU1l\n+ZSOPU04yFcjHB/SgB7HbtHqKcwBTBpq4bAPJpQw8zkcVQyjqGlLcRkqfnHIbNMs9TaONY51\n2sOM4rUVvlYEZFU5NPVpM9eKRRZRlZMqQyk07h+2PpWZHYm0YlGPl5+7mtFl2qMdTSENj6E5\nyPapFYbeuajih8vjoKk3KqnAqhbCSDgHNKvzLx8xPak5+ooZgq7s7aQxwGOCOaXGFO3nmmxr\nu53UsaGNSc8k0hgUPXvSFTjOeR2p3Jo6HaetFgEx/F0XFLxszTvlZsE4GKZtGcU7ASrjYQen\nemDCtkdKZ5hJIOcU/A+XHWlYRLu3IMLzQqnv0oPOR0OKZHuX71MCXadoPUUjKvuM9qbv7jr3\nFZ97JcrcLuGIz0xSGaSqv3Rzg9aWQbsgcCo7FnZT5oyfWps7AM96QCRt2qTd8nPXNMX7x9KU\nN1yOtWIdtGdrH8aXaF4HIpinPWpQyjvxUgI3XHWl52gE5FG7b75pefqDSAT/AIFRRtFFUIrx\nrtY7h1p0khGAvI6UK24HJpGyo+U5NIB8bZjzjlTTZGLdBzmkVmVenNG5SuejCmFxjMWJbGB6\nUMVRl3HjGaGk3Lg96b5bKvzc5qQuyRD1x909KZu2rtK80LJ8oU5qQg5y3GegoARcLgsCaDtG\nW6j0pPM/e+WfSlChuQefQ07sLDWcdQPanwlWUqelRso3dePQUpCrkjoaQEka7R7VIcbcjpVZ\nWZiB0Wptyqu3qaYDJmHHy5NJAhySeF9KftD8g8U7A6iqERSEpnFJHjyyzDrRINxKj0zUEbH7\npzQBP/q1yTim7nVs9VpxUscHmozL0XHekBLHFuO5xgU35Y8880s0jM+1V+XFRtz8wGTQBIFX\n7wqJc5PNOZ90eQPrimpk0gHHeq9OKd5gRAPzpBnPJ+WnFhtJIz6UbCGN83IGKduPQc0oXzI2\n2/eA6VDErF+v4VLEPXD5LcEHiosBWPGMnrUi/Kx3Cl8sSdaYxdwX5uScVEkxbOeKezlV2jr0\nFVvuvg8mmMttIuNueageQHIY0/auwk9agTaW3Ec5oYDlTaqnOQRmkaIsRtOOcmnSOu4KBgUi\nsI1JzknikAhUsuRxzilYbY/m9aJFO0fN05pkkzbORx2oAFPzAjp706RRJjJxUcLHbljx2pzb\n+CvSgCTai898UxiCNx5Ipu8Pnqeaey4XmgBqsJOOhpRubsMUxZBJ0Xp3qcN5i8DAoAY7dVpk\nMmflxyKj3HBJFSeYFXcFzSEP2lVLHpml+VmBHSkjkPlnPAPakXczAgfLSGV7g7ZAFGcGpQu6\nhoWZ92QM1GodWGT7VQDWtRJISwyBT2YbTj6Zp3meXJjqajZA3AOKVgEVRuySakkA2MFbJxUL\nLtIGcmjzPm+XhqQwhjy/pxRwzE7ehqSFiVweec5pMeXN1+UmmIjYM2cDApkaBTg047pJmx90\ndKXgpuYimMjJwxHWhfukYA71LbqHJbFBjDNzwKQmVhGzEFWwPWpV3huRn3p/l7SCpGPSlbJz\nxRqCK8kwbeg+8DSeaVYBicY4NIbcRyGU/wAVLMy+SAOKNQKsys0gIbNPWTqCfm7CljX5vWmN\nGPM3dxSQiPzCzbTwKc6nIx0pwO1hxnNLMw7UhkBmO4qB8uKI2MaHjrSqfKUk8g8U4jO054oA\ni3jv0pjIJixzkCnSInPJpsaiNc9zQBJGpKjJxUEikueeKsAhR1/OqsjbpDk8UwHyA8bag27V\n571I3T7xHHFMTORu6CkAMzR/7vpSRncSRwKey729F7U04KlQMGgCCVS/TrTTGAoGeamCZXJP\nFN3BmK45pjGFPlqLG3qePersO1oyrCoJoflYjkUhFby18vgc1XdSB8oyO9XUUUxogM4qQM2S\nIHk8Cue1bTwW3DH5V1c8asgGMGqc9sm3pmgDx7xd4Ftdas5oZIlKSAr8wBxmvmjxz8DbnR7O\n5msfMkVT8isOcfWvum+05ZoyCq/lXG674ZjuFYNGCpPAPT6VpCTiRKPMfnLJZTw3ElvcoY5Y\njtYelQyoIyrFSM4X2zX1V8RvgvbauJXtolt7jduLqvXjGDXz54q8B6hoNw9vd27jZyrKcjA7\n11RqXOeUGjlrSFbi+EkmE8vIFTXCbpM7vlY4xTVty0hAPzY78U64X7qjqoywrdMyMDUY0hbK\n8p3qDyR5gUPhCudtWbiLy7hkk5wOh96qyMzbTnJU8VoBDLIGU/KxwOT6U3cNoRVyg9e5pzSG\nQuARlvSozGRGxJyP73pVXEJJvyU6E9acYzuMkZ2gDmpW2tCjLhiBye5qrISqjqqk0gHspVfl\nk3lvmxTW2vsOWXaOdtHzKCFXeucbhTWJ2nPytnA96AJZgZJRtOMrkD1pvMkY3/K2D8ooZmkG\nFb5UHJ70SN5UKP8AxbtrCgQkjeVahlO8N8vHrUYiG75jk4+6O9P2FWIVvlySPxojYo4Rlyx4\n5oQ9RNyKFYfKQelAXBba+0E5zStGNu0jIpu0b8dRjoaYDSo+VscZp8g2yZ3dvu0rKBGQTkY5\noWMFWUDJ6Ci4iPadwJJfFRLlVdwdi5+61TM0O75EKjoxJ703ywiuPvI3OT2pAMXaYyCuMngg\n07JhU7lDjHIWlCr5ALnP0pBJhdzrwO3tTAG8twqn5Sq5A9fakb/VLkbVz2pp2qzeZ3+6KWVD\nGoBb92ADj3oKHqcxsD8rH+VNLrgIBgjuabGJOuMFuKkWT92u6PLjuKQBGqSZPVcYBNO4kwuA\nm3kkDsKaeEBI2r/s0v3soeDjiqEGxHzKDjnlT6Uixu0h2sE789xSRRhAN5y1Lu8xW/vLxigY\njxnYWAzjqDSoqupJYhscfSpY/uhSNx7moOhdS2McigQMDjLDg9KGA/d49M1I2GCMPuY7+tEc\nJ3BCeT0oEVtzR7gTgMelCyEN6+lLuaTf8m3adp3UR/OjMOdoxt7/AFoEMdlXzN45zke1I2WC\nqi4QDrUnmBmIfAH0706TesP3cqf4aQyALtbduAqRvmctn64qMxqxAPHovvT/AFY8rt5X39aB\njZFPGDvZsdKX7zKB6ZpY8tERnEuM5pqtsRTtJwME0ajBsgr3x2prSfNkLtpuCrNnJ704OvyH\nG5h1oAd827ruU8mm/KSIywZc8elKsh2soGATy3rRHs8kkcndgU7khCWYsgXCjvjilMkm3JIG\neOnNLG5VQpYkFuMU1mDTsCM0AJlkYsfu4xTVYKu9R83QU+SQM21TyOq0OhONpwD69qAF2kRq\nDyxPJpkg5kU/dUcHtT/LLKp67RkmhUHl4X7xORQIiRg0PJxtPFPfIHLH5un1pXG2QjbyetN3\ndVKgqOjehpDHRyFSBjJ7+1N+ZdzFdwNNXcrcjd3OKczbpCM4XGdtPUYLHyFAPr16Usce5Tj8\nSaaPlBYE7aZ5bS8Kww3SkIkDIrFi5L9w3TFKoLMw67uc0vyuoVgCBw3vimr8rAbvKGfu9aLA\nIuPMVvu44+tObK5LrkDgGhdyscj5c8H1NR+W0jYdiV60egDiSyguAqjuabt+YZfcev8AhQFj\nZdwbcQfwxQu3dk5Hc/SmMUhlOWbHOaPMDSb+djcGmqAyfLySeM+lSbXZSAQO2BQA3yxHgb8L\nnIYfypA3kqQT8vX3/KnSR8oC3A9KQMqSH5csTjNIAHzYZucckURbZWZ0cIo9R1pAQykR8AEg\n02PDMPl3DpgdKAJXZdwYn5FGajyYRuK5Dfd3U5Il3Enrn8KRsvkscnpTAXlYhnr/ADNIzANg\nDkdTS7/lw3ReM+lRNGW+ZTnuF9fekSSGRSM8jHfFOEgkiLHgrzmmxuZI9q/KfUjiljkZm+4D\n2PNGoyP5WUv94Oae0asoUtu55YGnbdxbBVI15qGT5lUxnaWPFAx2185HIzyaGjDPgNt9Kaeg\n3MRngjvSyALHjcVK/rQIVWZdpIyo4I7UNMMkr9AvT8KYrSOwwfl60FVblxjHT+lUAzcWb0P8\nWe3tT0JXO35o8dKVvlUj15zSJsjXLbl78+lADmbzFTBG0daTzgd21DjsKavzRYBA+blTTkG+\nNCTt5/GkA75mjCldr9RSmTcNxO3HGKGYszNkjHemNt5ccbvWgepJuJwytkdBnrUMkw5CoMjr\nxUjZRQdvIHQUwLIx5UJnkmgRD5imQKBtB71KF3bgp6dqVVKqcHjuaRW8ojeu8Y+8OKGILck7\nWPT0pZEKmSQ8jtQMrgZBHYLSeeZH2gbV/vEd6EASbELjnO3PSlt/mUNnK+rdaayltzfMzE+t\nK4DSZI2/L07UAOjYiQt0X3pExuJI4J701ZByu0k5/Cn5WRcY/AdqYxQzRtkAfNxz1pZt2zDB\ndw96YpZn+Y7gOlIvzT8NzjnNSAf61l5y2OcUv3iArcdDSiT5iSBnoMcUbfMTCDDrzigAkZm/\ndsRuXpSBjMql/wCHoPWmfM0hLA9KVcSSMVQgYoELL80YWMbWY5NOWESRo2eOmBTo0+ZSeCOa\naVdonGPLGeMUDBlKyCMHbnoaFZI2c9AOo9T60xFEaZcksOtPcRLtZiQWGcd6a7CsIsx3YBD7\nhngdKVVDx7T8rjkE0i/Iq4XAPQ0qq8ig5yc9KYxGj8xvboeaX/WqeNzLxz6dqRW2nJHBJHPr\nTGXfJ1YFfSgQ5c9EU59KVY2lbnDL0zTlZgGONzYo8wKpcntSKGiJ3UNs+Q8D6UpymAYwdvSm\ntNuTKkhep/8ArUjSCSQYY4xnNIRMVCjLH5jzxSK7Bhzle1NT94u3gyk9M9qctu8K7idy5wMd\nqoQvknYU3Yf734U1WEsgVRuPoaWPcPnLdOp9aQR/vC+c7umO1A7CxtvUspBG/B7USKzSMxw3\nYLRJj7uMY60bnjUgr2yKXUBEj+Ybwd3XinvlWBxkfSmRSZXfzilYB4xlsZNADWVjIfm2nqaT\nB8vapyM+nWntIeFBwFPXFJiNlkYbitMGNZvnACbcdeeKdIzHdk7j3HTimBo2XBbnH3aBCJGY\ntyR0AoGxy4ZgDycfLxTV27XLDaew96PNEqAgfODj0xTpgT7DH60CFbB2xhjuwDn3poHmLIAO\nR396d5X7tWyd3r6UMGiJYMA3SkA7yz97d90cNTNy8/PuIppy27cxyeSFpu4eXlR0Oc460xD4\n2xGzbvm6gU8TBoxu4LcfjUccm5s428cmnIqy5XcN3Ue/0oGO8s7QO6mhmeOYAsuG9KTI8xtw\nbOKVY9qjPJPIoAjl2l+c9fyqQSEKSPmHfscUrrubcPmIHrimw7klkLjAPA3cUh3HyyPtCoQC\nRxmmJhRjGW7mnxrGME/QnvTE2r0ORz81GohzRhmI/ujPFM3AKCvVjjntSeWdwYP90/NUqx7R\nuxnPJ+lMBsmcnBDBRg0iI7YB6k8elO8vlto/dMOtOa3aYIhPA6HOKQ0JHvklIChGztIzU6w7\nY924Da33fel8tYWCn7/en28StIWLHC8hfegQrbS3LbQwyajtnAVipJUHsOg9qvxaf9s8tT87\nuegHAre0Xwv56sWG1EOMYxupAZNlYzXEfmeS3lA4JI5J+ldf4f8ABctxjdE0XOdrDrXZeHfC\ncjeVH5W1R90kdq9D07wxEMEqd6+9S2Pc43QvCZ3Ko24HJrvdL8P7VAC7Vrd0jQ1idf3WCewr\np7fRWkAAUD+lZuRUYmZpejJHCu9M89RXQ2uhoy5QYOe9aFnp20KhAOOtaosyuAoxWd2zflKN\nvbCNVXbk1pLD5YwowKkhh8sHPWpsHaO2T1pDsQCPpk4z2qVYTux+VSLCA248+lSc7idv0oCw\n1YymA1P2Nu6U/B6HrU207s0rgRRru4IpwVi3yrTtx3cjAqT585WkAxYmV/50/wCXoTT1Zudw\nxTfLy+QKYyPyz0WlWLc+W7VMv3sgc0/acdM0rgJ5atwox7mnIgXGRg09AF5bp2FLt3Sdc00M\nMDHSnqq7dx/I0xpRnA5PrRsbbzzTIBSc5xgUK27OeRQsmEx3qSNQeCcUihQo28CmIis2fTg1\nMF79ulHkhWznFSIi3fhSrIx+XNP8vzuBUghGMYxQMgWNgfanquV5FTSIOAetKsZK89KBEIhB\n6inYLfL92ntngCpT09/WkPci8kKmM81GsbM23rVjBxzyakWFc5HWmBXjQKpXvTmUDG4UTMWb\n5BU9sodG3+lAEQyec5FOViBx1oZWzt5pZI2GMjAoFqNKugwTTljMjA9FoHMgDc1abYq5B7UC\nK4QbuDmnYO0549KTzOM9alJDc0DI1x3FTrGrLkDDd6bGN3bipVK7STwaBjJMQ5AO4+gpizeZ\nj5Me9CrtyDwT3qVFC/7tAwSMtnPSnxwrnO6mls5A4FSBR2PNAERVVyM5pI4gcZqy0JWPOaZH\nhmBPFA7DJBu6ClihaP8AGrDbegIJNR8s2CcAVIhjfuZCGOabn589R2qyqox5GfemtGobK8ig\nZCqsMsT8vtT92/HYVMqpjjr6UeUGbnimMhbhsAcU9VAXkc1K8YbgCnLDtOTSAhC9Ow9aTy+S\nD+FSMuRg8HNJsO7rQAwKB1pQm5uOlSbVPGOfWo3cqxVOaBWF278g8UvljnNRQyMykOuD61Mu\ncbelArDWK7QBw1G4t0OB3oZVZuuMd6VdrZ5FAwwO4zTzCNoPQ0RsDgEcU6QhmwDSGN8kFsdQ\nfSl8kq3XAqRcoQOBSOGc4FAtBq/LkE5FRtjzhuPbtU/yr3+boaQrznH40ARLGD94c0eXjpUu\n08Ec+9M3bGwxOGoGKgD7uOQKbgM2elSBArfK2QaAp3HjtQBDjdyOtBbLYapfJwBjv6UxVG45\nFAEXl/NweKcyquCRmlVdyk1IsZGNwoJKwxuyF78VOGYg9qfL+7wcUeWZFyOM0DGK2FJpWP7v\npyaFXjaelKuGb72aBkar82OopGU564FPb5SSKGAaPnrQMbwuMjJNKyhOCcU5V+bJ6U7aGyTi\nmBHtHUHAp+5FpdpyAOlMaMsxB49KQdB33ucYp6LjnpTOmPWpI256ZqRDdy7iQNxpAT0HGacq\njcfSmKu1sk9e1MBW9SOaSNlZWVlwRUnysMnj0qNuWwOtMY6OPad1PjjIOX/CmqNzH0p247T3\nI9aYEbYjbIGSfWpO3pQq7VDNyfSpEQsh+XmpBgVVlBxgikO5VBBzTlwVye1LuXHAzmmRYjaF\ntwY8HrTgxEmGHWpI23SYfoBxRx3OMUmMDGq5AGTmnZCP8y8UsPyqznn3puTJJt7UANkjHJzk\n+gpbWRRLlqdt2yYx+NPjt8zg9qZW5IwG4nb8tPgX+IEg06RCwyvAp1vhpAp49DSYh8r9QDn1\nNQQxFmwxzUrj5yAR1pZBtUMKQDj+79x6URt8vK4ohU9Sc/WpGUqpyfvdqYB5yEYxnimrF53z\ng4HpSMu1eBx0qRSqgelA+gKoTqefSpI5NygfxelRMPMbIp3lmOYHqO+KQ0SxxkNuJyM1HMR5\nhVBhupqaNxyRyPSmeU+7OB15poOokcZkT7vze9AZlm8snA9alVtzHacGkkQM+TkjvTBkm7yx\njIIp+7A4b5TUULJuweKc2d+3GRSuA8lPvEVLGyyoFHy1HGiYIJ3YqQYUgjpSDUVo2h+YnIpg\nXceDlj1p/C/e5zzikOdyMB35FNDFXKybG544pPu5+XnNT+aDlgAWFQhtrbj+NO4D3JaMHODS\nrIQRxkU1pE3AnpTkkDZIGMdKnqAbgeW9elKdrR5PXNBG7nrnml+VuM8jtTYDdxRlYD605MfO\nyjlvWgruVWByM8ikKsvIz16VKAk2kjbjtzTt/Qchu2KjMjRtx82alGWTJOCe1HUBdxWQLnI6\n/Skb+LceO1DLtUEHIFCx7+opjQ6PbtPHNLuBx6UkalMt+GPWhR3K81PUBi24bLsM4PeptiFh\nsHPtRGSc5+76VK0YXDLx7UWAjVWjkJIwCKdy0fHWnYDjg801gynrxQAi7lyRyxp8bmPKt8zU\nYBwc/MKTzSrYxmkMdIwY8cChmxzjilbCfKRk0u07Oo54oARZDnDD6Gnl9xwozTFVhHj71O4X\nC7cNTAUu68Y60CQquWFRmUKevzVLCwkUluaYxylGjA4xnqaRpB0PGDUa4247U5sN70ASbtzH\njij+A+uabDja3NO3egNIQm7De1JJhjjG4elLt5pFyWJxiqGJHGGwW49qk4VqFbe3PHFL5m30\nzSsAqgk1DMzx8qu9qm5JJDce9N27BwcmkA2OUSfMVI9fapFZW6fnShsD3pi4wcDmgBdpXODm\npVAXbxk0i+lDMcDA6UXJYudrYbqac3JBz0pm4hgzUNhn60AhQo9epps0RYjPrxTiojBGCfpS\nZEmMZBHrR1KJIycbe9OZx93dzTNp5zSrGrjkfNQA4tub5uB7U/buX0pAuMBsGlbO72oAGwvb\nNLwy44pNwKn19qVVGOOOKOoDgueR0oGDkZwabC3ynJpRjJPY0AG6inZjooEQMVh7bqZHJ5gP\ny8VKq7OD3piDbwvPNFxMdLIyhQvy8c1FETJu3dPWnzN5mFPWkGNpxximIYqjzAM8e9SNINuP\nSmNkYwM09V7EYNICJf3kmc7QKlQ5JYjJFJNtCrtH40uCseRzmkBFCAxct35qRfmXcPu01sbQ\noGPWlVQqYb7tO4CxqOo5NIyjnH40nmeWxP8ADRyH65BGaYD1kPAFJJ83zdDQp8s5xmkmYjAH\nFLYB8a/KMHFNk+b5c/NT0kXaBmkaPa2/Py0wGH5D6nHWmKu6QHtT1bqCevSkMgUlQPxpgOZl\nUEkdarrhZPWpFb5gWGaSVl3nHSpuA2eY5zng/pSq4K5XrmmNGHQHrzzRHs6KcAHmi4Ei4ZCq\nnHOaTaWxg4FLtC4C8mmSSbW6cUhdB7DnHakmZtuMAUjdhmlaTd+FMBMHcMNjI+YU9GVGGDgU\njIN285xURiIbHbrSYWJWZXU7TzTV+VTk/P6UGNVIPAPWkkk3MB39aQwkzkdjSKoY7sc96YzB\nZBuJNO8xdvB4qrgNdvmAzTHJRuRxmhF8xck45pv8WKQx1xujbccFW70KhCA9aR2+UKxzg04t\ntAycUAOkwFAxihVEcZ3c02WTjHfsaYG8zhjzQIlj245GRUW4q2CcDOaduCjaDS+WN2WPFADG\n+U5UZqTzFMYz19Ka7fKcdBQuQAduR60tQE2jjAxTvu9OKQMQQ2M0nLSEgcUx9BmX3Z6VMoHl\ngEfOTyaaylCNx4pytuXjnFAhpUfMO1H3V68U5nIxgfWmSdDk5FICKST5iRT+JEyDUWwb8jpS\n7dpAHOaQAq/MCxqU/N0pPJO0nHI6UxVJb+lADfuyZ4yaVo13Bl5J60N33c00MFx82aoBGl8t\n9uOKcu2ZePve9NdA3BHzdqVB1ycUAPfCx/KOelV9u5cHgdzUizBm44Iouo/kyvBPal1AY1ws\nThU5HrSpJ5incMc1FCVUEFct604sEB3fWgAClG9qmWYIu1hzUKyI7A80/I5L8DtTAinkDLtA\nqKcBIck0qxl5OvGaluFVcZUFakCtC/7s54Jpn3cnPFTbR0P4UyaEvGQhG70pgRRt5knsKc3y\nnIpscTLz0460+Fg6Hd0FICOb5l9arsHRalmztyPWlWQJH83OBTAhVT5gXHXk0rZWX5uQKSGT\n5ssetLIw3AseOlIBJCGwFqNVBJ9KlaI+WGHTrTI4WdTn5RSAi5ZgDwKUqq4A5NPbDNwMgcU9\nR82doxQBEzHbg8Co1jboeMnrUkwoXPyg80AMkUxnHGKYke5icjNTtiQsOgqtyvU4oAAvPOcU\nMuF4PFSJk9ximsCvy0AQsMfMBSbh/d5qToSKY3zNjoKQEf3mxiq1zGQD/dq3+NRzgZHFAGVN\nHu4A5qF7NZV2yKD9a1fLyx4pGiDLTA4TWPD67JMpnPIOK868VeALXUrWRZolMjDl9uOK9yub\nUSMQ3K1lXmirKrMq7uORRzNBa58FeNvglqFndT3FmpK8nygOSOxU9xg15lJY3Gn7luVYYON2\nOR7Gv0R1fwmsq7tuAo4FeOeOvg7Y6ksjRQCF2JLMo5JPrXTGp0OeUOqPjrVlVnRt29iMZqrI\nqtHtMeTjbgV6T4t+EuraF5kgiaa3V8KwGTXCX1g9rcBnUhOh9Ca6YyTMeVmK9vFb/Kse0dea\nWPCsdxGSMYNaV1biTBRSSvaqCRbWk8xcg9OOlapklaR0kjx/qypPaoZGRUj3DLN074rQmwsS\nCRRtHfFVZkxIOM5HDe1CArwuv2plU4EdO3CWTOe+dxpYY9rvJgEt1qFm+VlcYBPGKoQ/bukE\nZGFY59jRdbwyxgBlX8zTpH3woqpwvVq0ZNNhktQyy7ZMcj3pMDKRguGI5P8ACKdJuaZWH3vS\nkRVSTYSFi/ve9MZvLZiwLIPSmMRs4YbtwJ5p8shaFQFwVOAabkzLxkluhpZI2SJVZuQaBAoU\nliSQOmO1P3DaWUkSdMUxmeSbbIO2QAP1p0iDaPmG71xSENkYJGF6uTz7U9HRFO7ndwRVi6sY\nhao6sFlPWqTYVSScuOAP60ygZW2nbwo9aauZWHzckYKnpSeZuibIJVeSPWkkycqBhQRk0CFX\n/WyKV3n19KfFmRg/RQMZo+79xfl71G7eWvydDwBSENZ/3mxCdo5H1p6y4w3Ttx3pJGkCgqBn\nvTAyqS5+52X3oKHqw2kYyuc09lMuWA4xw1MjxvDBuoIK0sch5jYbUpkj5G2+Xk9OaI4x5xY5\nIY5zntTZmEcf3flPf2qJmWaHGfLUHqaBlySRG3AEqMYB9ah3KAC6ZbpRG/nLljlenSlIbys7\nec4yaA6Cqu1cfxelIS5mPy7gDxQqszMT1PGPWiNm8uUuDsB/WhkjZid2UI2MeaYzHcQ52rjG\nRTyrPnJ465pTt5yCysMUgImK+UARx2NObd9nQM245pkbSuxGFKDgDvUVwzY+ZsbeeO1UO5Ms\ngTchXDUNJtChQOmCKjWQsq4GSw6mkjbvjpxQMljKBtzAge1Jv87/AGRmo2ba4DNgdQKWRWbA\nGACeDSEOZsNn04zTVkWSTKLwOo9RTVYqHB+dP71EarGiyZJyf0ouBKrfu9xTAJwAfSmPGFXa\nBhqTcWYbmwpOcelI6guTvwaBDuY9tJHJ5atJJzk9O9CqW5BL8dzSKJGUnr2PFAEkaq0zSYBJ\n4zUjyqoMY2hqhkbaqlRyTjFM27sMqg8c5osBoQ2skkKOQSg67arzWcyyB0HB5Ge1Ot9Ra1j8\notxnpTzfNcBtoyo5x0pgQMS6qysN+cGkaPdwcMPal8yOQ70+U+tLGy7gpGCetFhstDSZI4fN\n25XGePSs9mRWDvkjP4/SulttUtv7Okt5FIZRnOetc9cRx9VX92x49qQEUMyqx4KljgJjOafJ\nGVkHlnbt61FyxdBw/dqlaNdq4JBxzQAxSfMLKMAZJ96VmAcfu8BhkCnqx8tyGG4DAqK43Kil\nt29euBxQAFV5wTu/unoKRsspVfu4wT3qSOQ7iXAAxxUOVXcwyM9V7fWgLixqMRqEIA5JFEcj\nFiyD+LA+lLFJtK4brxzSco/HAz2oHcWTHmdd2OaaV6MnyevNDEnkcc8UhZfLOQRzTESIyjgH\n5G4P1pu8N8qN93qOmaYq+Zjam3HNSNskYDv3NSARuQpCqGTufemBl8zCnDelOaPcpCvhs5oO\n3dkjOBy3emIe8JV1KnIb+GlZUZzGOJBzx0+tQtMXj5HPY+1Jt8yMbyVXOTjvRqMlhikw4G1l\nIz1pJc5XI2HHSnRyrIrZ+5jjbx0oRvOUSFSQOMUAVjlZNzkkrz7GpIphzIvJbjbVyxjjvLsx\nSKNhHWorvThaqVRvcgdxQIrTKWQqwwOpxTlcSrGMYC9D2H1pMNIhI+UAcH09qf5O1evzDnmk\nMYsrMys+MD2/lUwmTy33oSW+6fWo9zSMrCPDKOfpSJIDG5cEk9qsBqxurIpO0VLLGN7b+3OB\nUapuZSTgYoXPz5YbsdaQBJGwded27kL2phk3SMgyZUGfbHpTsMqbh8wAyc0uD5YlQqpYfdNM\nQf6xI1fCr96l/eMdqhSF5/CmndMpO0KQOOaWOLy5CQSQy4zQUKZNxynOaWRtzqGGP9o00bo1\nZhwo4pWj8yP7wYEZP1pACq+/Jf5ycbh6U7jzBG5ATvmmNuVVCjDYpsmZsFWyOhz2NAhyxhWc\nKcJ/epeWjHy5xxu9acxLhc9cYB9aaqltpYZCnkjtQIYNqMC65weMcUK2UAGQC+dpFPZjJI2f\nkC8jPemq5X5yMj0oGNV+M9ST2ok5U46kY5pVGI9o6H86JMbTxlqSAbHIIUwwZhnJ46n0pyFf\nvhSmf0pF3R8B+W7HsaXYJM7myP4hRqA5VOzghO43U7aPvOCp7+tQhUOSoIPQHNTMxb/rmFxm\niwhnlhmVwcLScqWdTt/rT9qKmQ2Tj7tJtVlRgcjpSsA0MXVVPD9QfanNvVtpbcG6EUxt4m2t\nwfWpMlMhent2qkA1VZ1ODkjihpJFjAc5OccU87yDgqqY/E0yRiFQ9EHWgoOSyoV2qO2O9IwW\nQZf5WXgetOkWRZDk7lHGBScmQl/u44FMBFb5RuB6/epJF2Kuxsc4Bo+bgcAdSaWRvM+ZG3DF\nArWFXa0gjlGRn7oH8XrTWXlwfvA9qkb91GhjP+OaaWO7K8f3jSAQyfu/3Zw56q1NwFUEjJbq\nKVZFUln5/rSqEAAZsnOeOgpAG4DYCuF202NFKsW+8OQuadJGwkG1hsxgNmmGJNpOQR3xQBLG\nFX9/IMHHAWn/AGolhgYDcHNQKRt4OFPC8U1m/dgHk5xx3piLbSBiUX7g71CccgHGelDR7Ic4\nwSM4oikyoUgD3oGIwaNPnTLHgGjc0PJBZMbvr7U+VS0mRJ0FJGpkDBXBbGcGmMjeaNoywB2N\n0UdQad5inGAcYxz601R5ZARck9c0rYL+WTuY9MdjQBIpaTkgLgYo+8wCn5O7YxUTNuZAD8o6\nt6mlLtJOXH3ulAh0khjZkii37xgnFMQyKu3+Ic+/0qaMtIzqxCfXvQrLHzuG70NIoiWJjH8w\n+Zj1HahSVAGeR19zTpGYKWRsBuCCP1pkqSSR5DBVB3ZoETtJHNGADtkHNQsFZj5mc4+9TFcI\nxB+83p2qSNj5io+N/QUajEZvL2iN8yduOtJJI7AblwoPYU6MrG7kr84OM0xGfzjtUqD/ABe1\nGpIrHaDg9vxrS0/T4pY/PkfDY4GP1rO3eXwRlSflJp8kjshQcLjIxTKFeNd8rq+4D9aYvMS8\nMwz97PSm+WOuSWI5p6IWjJJwgHSglihSpYKSFIxmm+X8uPvL/eJ5zVjywyLtOABkVH8rR5Iy\nc9qAEVfuv/d7mneYFhPybgTSjuNvAHK1KYQrINwyVzSGQJGFlAVue5qVUAkfLbgozz39qk3R\nrIpChWHRfX609stJjAw3P0pgJI0bMrsCzkYCjpTFhP2pVEm0dWDdqsNbliAvzFjmtfTdAuJo\n/Mccg8jHOKkDFjsXnWTLMHJ4b+VbGl6HcXEiHBDYx8ozmux0HwY1zcbdh3t03ivSNH8DxwmL\n90GcDll4NAHFaB4LddqtGHHXeRg/lXf6D4RWBg3l+Ztbkstdhp+gDcAy7TjB4rqdM0LdgKNo\nUZ6VLYWMTTdBDMJDhWzwFFdDbaGGZNq4fr061rW+klWTByM5PFb9tYrEwIGTWMmaRRnWOjnb\nnAU4rRgh8lghH5Vbjhxn1NWVhDYyACKg2SGQw7ecYqZO4xzUqIGUdqkWMM2KVyrESLxyOad5\nLcDqKmC7+OhFSqu3jk0gIFXbnjinLJ12jpU2VUYZDmnxgZGBx70AMjhDEFvSnRqA3ynincnj\ntUiqka8DJoERtEznOKkUYwPSpV6c8e1N27c80hjFUyMQBUiR/KSaF+XvilbC9OnegAMY8sYG\nTT9u1QAKjaQqoYDNSR5k5J4oAcyjO3GRTdvl+x9acMsCOho+8QD260AJhSMAAe9SRYxjrSbQ\nuCOlHl7s7TSvqIjmjBxtqSONtvI5qRbdvvEVKP3WM88c0xkfl7cE9KeNu4d/rSbtwPGfShfl\nYHGaAJHGzBAoU5bJXil3F254p3LArQIZIgznqaVslPQU9VIO7qKftMijjvS6gMjQeXyeabId\n0Y2g1O6ruAxj1pBHjkHj0o6gMjjzgmpvLWIlgcjHNI3bHftStCWX5eKYyJFGOBTUUsSDxU0M\nZ3YYYxUrRjqKAGGPaFOc0yZTI2AalaM/KO9O8na2QMD1pAVgu1OBk+tOWHIG481a27s9hTPL\n3cA0CYRxxquCOKbtCn1NWlhGzjr70nk8jOBTArrleB3qWKPJ+YcVL5aR9TxTdwUnH3TSGRSA\nebwMgU7jYcUmOp5qzHGDGMjGaAK8MBkUluKljAVsHGKfIcY244601EDsPSgY2RjnAPFNCqx4\n5FSzRg8A81JbwLtJxxSAiVAzc9BT8dVxz60kkYOAvrT9rCQDI9KBD1A8nkc96i3DoF6095Du\n29qFj+UHOKBjfJHBNCr3qcoNh71CyseRzSuC2JFYcU2aXgdsU9VAyW9Kiny3bigGSIu5tzHq\nKb5XeiJztxjFPVec80DQCMdc5pqxoqk45p2OcAECpfJDe3vS6gU2U7vb1qWGPcxJ5qTYF4J4\npvl7TjoKYDJIQvGeD2pqxRxqM96nji+UlhQY33Z25FAEKqB0GRT/ACc/MKfkn0B9KVmMeVAz\nnoaAGMy7OfvUsf3evNKIfLAz1J70x8buB0oACp3Zxk08K3fpSDdyD1pVJaM80DsNEm0nHTpU\nbIZGH6U+NWC880cq1AB5bqtBU9M1LG3y470hwVyF5oERqhReOtCsZB0qyqhlVs4PpTWXy+Qc\nZ6igRAqjdgVKTuOO9Rx/M3Q1KMdTQBFIxbORSLJnAFP+XkEU1tqsMcmgYseOd1M8osSVXGKf\ns24J70rRlm3A0ARf8tACDilbbtIx1qQMFUhjzQVHBHPFAEY+UbetCrvycbR70+EBmO4UrMu3\nHSkAgC7gpp2AuSelNC7u3NAyVKkcUxjUx1NSMwVsAYo2g9sCkXHPepEIWIAJXj1pdobkcn3o\nXLDBNPdtyg+nHFNANCqMbvrSfxMcYpw3dMc0bGDc9KoYkYG0MVxSSTDzAMHJpxYr1+7TFYc5\nGT2pAPVSzDBHHFTjKcA81XiX5Tk4NP8ALIXfuyKBDkUAncaVW38Lxj1pVZWjClfmpxUL0FAD\nVBC7sZ5p20MNxOPUU7zPmHpRIQMccGpYxx+VcY47GhYwvIOT60zbuBDHpS7QCD296BE6YbKj\nk00ybVBA5zzUkMaFt9LNaq/Ktgd8UC2CGZm6JlSeBT+dxOMe1RQMYJSjDMfrUolHpgCmMbgK\neRTo5BMuw/LjmoiPMJOcUqrtfdnjpSKHtNtbgcCp4285d+3GOTVabjHFS2twAuxqAsOOWyN3\nB5pN6phSu73pGXHAODmnRtuXaeuaBBtMcm4fd7inxsWYA9alSPYrbueKjU+ZjjaaGUWYRtOQ\nMg0jEqGPVjxipIVKrxyabLzIrCgkSENtBxyKZJM0ylFG05qRWeNiNuB1oYFmBHWhjINu1hkc\nipkkLOp5HNOkTYuHHWheMDGRQMlT5GZVGAeSaf5nG0jNJKw2gKuaduDAZ4NACx7MMztgdqSM\nfMWPNPSPco4DJTpMBSc4zVJARqoXLr1PNL5vmKTjilW38tQSc5pu07cHgVLAI2WQH+96VKuR\nyQB7VGsK4OOtOjUqfm6dqAEwdu4EjmpXYOoYrzTGYNwKdHlTz0pMdiXaFXn8KXaxUEcjvUat\nu+7zzzmntJ8qhW4I5oQWE8sbsjgCnL8pDtg9sCoFVl75qZv9Vu24PfNAg/1ZJP3amjy4x+Oa\nhjYFSG6dqAzIo7elMZYlYlSAeKjU7eOtCbiuSce1ObavNINSLO3rnr2qdZQSM9KRANxyODTl\njG4jbSEP3ovQcVGzFht6Gk2ndgU5oyvzE0XAVY/3ZcnHpUSyetO5IAJwM5IpzbVbpmkUOiB2\n5c8018q5HUVJlNozSMu7IUj60wuPj+7k/lRjcc0xDkA9e1DKyyZHIoBCqiMpz1NPx+7IHcYp\nsfGcUMx3AdKdw6DVHlhcnJHWpA3BIFMePcxZeM0rcYC80hD0I2g45pTJt6CmrhVJfgUiSrJ0\nNIY5WbzAOgPrUg+904pjMFI3c0kbbweTQIlA3cCjao6/rTY8pznmkGecnjrTGLDIG3Ky4Hal\njxnmiPG3JGKc0YkYN0osMcuMk96a2AowMNSsx70OobHalYBUxuBp2Tn2piruHJxUnUGmKwjY\n44yTSKyq3I5pUUZBJxTf4yT07UAP3fvCM9uKR179PelVhnO3mnsx6EcGiwxqgqBk7qcCc9MU\nxQyyYHSpV4Y9zSAeMbeetC5LfSmhWZiTwKFYquapEisu05HWoo5DKGypBBxUx3D5hyKDlgDj\nFIYi4U7dtPU9RjpTGY791KxLc/dNAw2pRS7k9aKQETSBu+aaUVuS3FOl8uNRgZPpREpk4ZcG\ngTGMuCOc0u3apIPFSbe5GR0pky7cDPanqSOXHU9MU0bipbkiiNxyCKkjf5sYylICGSRTgZwD\nQ0jR8DpRNAmevfNLyynuRQA5JAzAFefWibGMEZqJGO3j736U/cW244waq4CbQBydwxS7gvPq\nKUurA4GBSAHjkBRQMTLLjLZqKZzIwAFWMFlPANMZPlFMQ1Yz94DIHWhidvPSnqwZSCPamSKe\nM8UAGArAE00oWzt5X1pQB0PI9aAxRcKMip6iEbGBlsGmvtxjqakADN/WmyZ3dMCkxiRcxkkd\neKRY9sYzj3FSfQ8U1mWPvuJoEJJJtYIq/jTNwbLMMexpQu9vXmkmUBSMc0w6DUkDdRzUi8qC\nRyTUD7mxxtFTZLkDpQCHlui54FDevamqh3MT0ppJ5BpAOLhm56YpqsRxjPvUUjnb0471NsG1\nWz17UAhm5Xb5utM8gsV28Lnk0+XCnA5py72XHUUxkMikMRjHvToVHU/qKrX2opb5yMlew71z\nja9eT3wVR5a/3aQHVtGGHoTzSMi7gc5x2qra3rPb4YYanRtJJICo4NNAWJIfM5DYFOjAVeRk\n+tM3bH2Ac1JyIyM80gId3mdBxmptpHytyKjjUxt6g1IzfMMmmAyMbt/6UhJMWAcCpeANo796\niVCQR2BpAPjT93j09abu2qzYp2Dt9KjhjZgykZFAAZhIFB+YGpR8qnjAqBsrgAcCplyVpANu\nWVVUryfaoVnEikMpXBqy0aryeahl5kzj5cUgCOPK7tvFRN8sgIHFW4cyKR0FVgu2RgQTQA6Z\nmMJ54NNhbEYI7Upbdx2pdpVcrgn+7TEMmOza2M7u1NWNcM2PlFSyfdBb5WHamINsJUfxGgBq\nkM27PINPb72QBimLsB2980D5WIPIHegYyRVwXTg0LK0iZPAFMOZiQOBUkoKxhFpgQtIZGGFx\njqabcfN0NTMvkIueh61WZRwQc96AJ4duwADB75qKbzGkCqMgdaEk2tuakZd7bwxoAVMRnrg9\n6WdvmCDketNaM/KDUEm6NiD1PSp6iHuCqk559qRVxGG3cmiMkD5hTZJhjYMUhCxyb1KUydfL\nIVehpqZV93WpXRXyWODjtTH0I7ePKt5vI7YpksYWMgfMDU+7y4gq0xvmb0piK/lgr0wahuoz\nJhVOD61fKgGoLmENjBpMoijb/RwjdRxT921cetMVcNjHFLtLZ46UgFiUdAakkYLhaYuI9vqa\naVZmL5oAdIFbJHWomXYQ2fekXcQSaWQ7cE9u1IBrHdnJxmmvGWjyvOOMVKI/MUNjAPemNE0L\nDDbgaYEHzKSDxUhUqgzyaRsnP161J2GaYESrjcSaZww6VOy7ju4+lR7Que1ICIqq9P1psg3L\nnHNTbdw56e1CxjaQetAyu+CoAHOOaYuEyKk29fUU0oGbJFIRBIgbmoPLIVhnrVt8eYEHA9aJ\nIc8ZpAY9xah8fLXO6loaXG8bOa7JojuxnpVOWEAnIyW6iqA8X8SeCzdb0YhQOi44rxLxx8FP\nt3mSwQ7GGWIUcfUV9hahoq3EbZHbiuV1Dw2JoyrDnr9fari7EuNz89vE3g++8N3jJNF5YDbc\n4OCD3rn7lQsjIi/N0LYr7n8W/DW21BCZohcELjDDpXhHjP4LSQyNPp6Mz4yI2H8jXVCoYSgz\nwCS1KyLv+bd3qnNE6Suvb/Cum1jR7iwugsltNEw5O9cEiqLwxybwwKyHpW3MY2ZzLH7RGWVT\nwfpSzRlVXPJI6VozQkPIrr043etVWhfzFO3tVc1xFZc+VgKcg5+tOEkhkB3ZTuO9T3CFFHIB\nx0FRJGFw/Vv7tMBjRjyWYdCSNpqGHbGFjyVUkbj1q5cfvMMQAOhjqrN1CRjJz0HpVCuTX9u1\nreBUO+PblcVWeMSJt3BuNzZNSNJJu3Nzjrz29KieHaxcfIWHWmIWOU/I4J29M+tCssbbhwd1\nMkBWFAB8ueo9acux9zMMgfzoGIZWmdskhVOQO1IZdzGQrnPANSKMMrKCAR37UwOIZg78k/KF\n/rQIOVYYx7ilSVhI2Rj3NIV6yj5lzjdT42Lrgx5H96gY3h1UhsDODmkuFLSgD+DmkVz3BIU8\ncU/LfMeCM8etIWpGm+YZxtJNNG51wq55I5qaRVyu07c0h2YC/d5/M0AJ/rACV2Y4OO9KyBk2\nckDnPennagMeTyeSKj+eRyn8Qqrj6B5jtGwYbUXoT/KoeZoRuJ3dh3qyIy8QD9FNNKqzxsJA\nc9TikCIpGKsBjjGDinFhgBGI9RQ2JJG2ZCqaVm+Ukr5XGSaAHRs27k4Hr/KnmRXXYw2gnmoI\n9q7iH+dT1pfMPdRzzSETSRqqsF5Hrmq7HG0KOfY1MZhwRxxjnpTJB++XoTj7woAhkjC5/hlx\nnK96YwzDlurCp0Vd2WB4OTUeGVmYHKscj6U7gRiNWUBXwMYBpzbOQBjjqelC7WkKnBX0HanJ\nnyym3PfJouBF8rDH/LReefSl3K2Dk0rKC0YZl3dvpT/LXgKcgmgEBaOGJiTsU9TikUsB5YOe\nflzTW/d5wM9jnmlZtx/uimAOCX4+YjqtEzK7KwGPUelDsNu4ZXsfemSD50izyRkrSAdyyjnH\nGcetO8xjENgx220q7FXKlRgbeaj3BXEbN1GafQRKoV2VjjK9veo/LZW6YTPrQ0e1887e+O1O\nGVU7Tz6mgBvlrJ8zZXHQ+tJztJByf7opGbzuhbPc08uqvwpYAc+tIBjJ0LNz1Ip8bM2GYYU9\nDTxIEQ7k3egpFYmPKEFc9+lMA3qy5VuemKjM25d4BCjjFAynb5SckihSAxBHyt0pMY1lVFVi\ncu3Vqeu92C5KKOd3rSMhRTtAk+nYUcuwYHPH3T6UwYnDbivy+pNNmV22bm+UnPFSBQcMOV6V\nHu+SRsFWXjPtQIdtZ2ZSOD2NHlujYAzxjHtTQ7bVxw3Y07LFjIz4xwaQC/IZNpHI9ulMDOrF\nj8q5xu9KN3Ib7zE4BH9adIrnhmwvXHrTY7EJ+WTLNuHY0qqPLwSWLHOO9St8sI2jcx/zilkx\nIo+Tawx1pIBkbFiVBBPZumKWP7ingEnp7UrYk3NtzkYIFNVDx/CF7HsKbAT7RtJJ6ZwKFDlT\n8u5uvHpSsomVVwBj5vr7UvfIbA64/pUgMVTkkNvPXHtUjSOW37cIoweO/wBKC6+YG2EjHJFI\nysrBvmz/AHqsBi7WwUBB709XKqePwFCr91U5bOcUpAud4U7QDz9aQDoZZG+cYQjoQMUSXnmL\nuOd/3Saj3FFVZDwTjdScngLwD+FLYGOjcW8bEfOTSrL8wBXOTkiolYmFieFzgeopHU71Od69\neDSAl3nc21+OuTx+FNSQnlVLKaZJJJwFTJzyadzlmU7B6UwGsUWRSV4/Okjbb5hZsBunFDIy\noMngml8zbn5cDoDVAxdz+Xt6Ajke1B2tb7S2RnhcUv3o1XHGaYqGFSQd3zUuo7D/ACxwB0xg\ne1Hy4IBwMY57GhtitzjnnNNYZB3KCnU0xCqP3aIThs/epQBtcAhfRvWkCKsZYenFAgz9/wCY\negqRCpvWPZgsc/epnGcEbCTipVkMTYYbf7uOajuG3zYXqoyaYDzII1wSX7YoMi+S4IxHS7FU\nOrNlDz0pJYyMsBuUjA+lAEcrDzFJPmADgetJ5u5ShG057dqaF2r8zkHNOUtHOpYB0YY+lACx\ngx7vmBNNChlwFIPdvSnrGY2I2kjPBp0bCMMCfvUdRjGI3BdodSOW9KFG1duRt9e9NVgrAsvJ\n6UsrKrDfHnPf0piHrHlhuPHoKVo9wKg+Xjo3Y0xI42VVLtv3Zx3xUvysxByq5xt9aBoaMrMG\nYfNtxgUnlmMlS2O4ApHZoQEAwpP1NKmHkUYYDGfcUCHQx/u8t97Pc0TK6yFgNoA5Hc0Ku5em\n4A9elHnJ5hK5OP4v6VIDZI2uICR8mOVX19qe0SsoDc7e1HmYz5g/OogzMW+bGDmmMepYq+ec\n+tQ7tkfzfO3pUu4eYCeeKayrDyR859KBMZGwyAybo+rZNTPIFl+VOWHH0qHy9rAEklv4aUgb\ncBwCvY0ajHKdqqrDaQdwodh5fTKs2elKW/eADsM596UMeS2COuBRqIiaMK/yjKdy1P3RfK2c\nL09M0vmCNQc/Oxzt9qJGSTonTrTGNaPYpIcbc/d+tKuIlZTw2cUjeW0fy8qODu7U9sAcjPpS\n6iG7AuW6lelG1j82BwchhR5gMQXfyDk/4UsZ271A5LZGewoAQMTtDvnNJzJnHJzwO31pGccs\nx74PFJJGWhVd3yk5xTAURuz7V4wPvUbljbcRhlGN3Y1JHMscvmI3zAbTnkUSyvMwMiqVPccC\ngBisW53ZPpQ0LLIAVG7IJxSquw7gAV64pQoG5w+Q38JPSkNCtHuzx93nNRNvkZQh5HLNT1B3\nbOxWmrIzRkECPb+tADljByrAsW6UznYFOAVNOUEYJbAPQVFgtuY8kdu1ICaRixGOBjkVH5bx\nqrDJ5+73xQpLSDcMADNPVTJJtc7O45/KgCPcN2VQhCeCfWnhTLIxYdBSCRghBGDuwWpyqY+2\nU67qoB+d0YXKkDkmo2kkZTsGFxw1D7I5AVO3d/DSDcQ5RsHnI9qBX1GqpjkZXy644apFjZVO\nWyeuDxQ2dgbqMZIp7BZTGhXhueTQMEI8lmGQe5oZmC4J5HBp7bmYjooONvY035twU+maAJl2\nlVULhm5PvQV+8R9wdqfDv2ghgVPGDTZEPK9vTPWp1KHruyAhCjGST0+lWLi1e3XMkZG8ccZF\nQCNXUrjoMnFdBp1jdXVmsco3Qg5Cn0pcwjnYbaWQbfvnoMCtiz0JpGIcH5RngZrs9N8GFo0Z\nF8sNyc84Fd54Z8A/ZtzTENvPVVzxR8wsef6T4SlEaboADkHn0PevQ9I8Gq0iqU5UZ244Irt9\nJ8IxxfM8JJU8Lj9a7HTdEj8n5E+cnJ4qHIpI5DSfCJjZWKgqefLNddpeh+VtPlbSe5robPSN\npQlBnoRW5HpqMB8uG9KnmHymFY6PwWYZatq0sxGmKuQwnpt71cjtt3B4NZuTuWloVobMHGOB\n3q7HBtC96mMYjUKOtTxw7sAkAYqC0ReSGOQMGnrGGIHerCxqoqQqqrwRk0FkHA421KoG0cVK\nI1wARzRIrqRjBHpQwIzGd2R0pF+YnDYxVmNCcnHNDR7fuqN3ekDFj+6GI5xSBSx6cU+NcD27\n0q/N0oAjEJHvT0C9GHNSNH8u7v7U1csfmFAEjKMggUbA3VelSDhelO44GetAFdlVlzjmnLH5\ni4PFShBypHFSRqEjO0c0DZCIdseO1KsYVfapATI2M8Um07hgZFBIgXacnk0gXeDipkjOaSNC\npyeCaBiLE2B3HenLtFSRx9xk0jDc3HagCVQcdcimvFtz3OKVZBHyBzRDllyx5PSgCOKPaefS\nl8tto471MylVBpG3M+FPFACNjdz+FSI3y/dzSrCW6jJ9akVDt9qXUCL73yjgVKq4XOeOlN2d\nc06OMDpz7UARbWViSeKfFGWUnsaU5b5ferCxjHXAHJpEsjhjGTnOac3DAKM+tIy/vARyDzTw\nvJIOKYx3HIqJVO7jOM1IsZ7c+tTqAuOhNMZCsQb86VR1UmlDbt3Y9sU0Z3HIGaTEQncr4/hp\nyK3mY7VJw3zAcVIhXvxmkIT7p+9TZFbqvJPrSNtViOaeueMcijUYixmTqOabtA7YFWVcKOe1\nDMGOApFUBAuSTj+VSLvyCeRSpEWYVN5bJnj5aXUCLy1G49jTMYAC08qWU0+Mu3UfLSZaI8Io\nyfvVIzbFwKckILHcPzpNgbIznFIQRqrLuzT9q+maVmXb93BpBkqOxoEM4ZiMU7ZlOegqdYfm\nHOc/nRtG08cj1oAhKlQM0qxlRu9ac33eaRVfseKZXQRvukEVG0ZkPA+Wp/LaRsdu9J5bDg8V\nJIRwhVxilbMa560jfKuOcmnSRvIoHoKChqkYB6k/w08yDOAPwqJYj1zzU8alVCj8aTFqCxgq\ncjIpphPXrzT1cRsQc0scgfpVCGqM5HUU1WJ4PPtTgGVzxSqvJI4NMojMYGcU3BjBz1qx5JKk\ng81DsdVG/rSAYsbMuScmkVSmVYH61KpPpx7VJLllyBx3oAgVNyjmhuuMYoVwoPFNZT1ByKBA\nq5BO7Apdu9RtH4mplgwoBxtPfvSTQFWG3BX2pDG7doxjpSM2/dtB6VJtIHA596RFIGc4Henc\nCFG3RAHrmn7s4HpQy7mBAxStjzOnagQ1W3knp9Kk2h14+9SIvyYxT03ZHGKBkJh3detJ5Xlt\ntIy1WuCw+WlKZ5IzSYFXHADUiruyQeM1MYSrYOCPakjUcgHBzSAhkj3jpzSL8uQwyanGVYjt\nSy7SAR+NO4xkf3DnrUbKNucVNtHrTvLzGec0rgVY9yNkjilUb5MnO2peSoyM05ostlelMCNM\nK2T92kjXcx7LUm1cHuKGQbch9o9KQDNoLdCadgqAMZFCMf8AGn7sKc80CGkZJIPOOlCqdvNI\nrMGPHNPO3jj60xjVjOfUe9NX7+COKeclvlNIg2tlqB2BkH8Ip6pt4HT0objJXkURsW9qLiHb\nfmyO1OXhevWmRNwQRktSY28Gk2ImVF20mRxnrmkjyzY6CpnX5QRgt6UwI3I+9jPrSrIjYVhj\nnimwsFkIYfjUjsJHUnGelIAhzlwcYzwKPN2kjBHpStH5WXxSM25RkfjSGSx5ZRg805pFcndh\naiXdFGQpyT1NKsAb5u9Uh7jhH82RytJMw6Kven+Zt+XFKq5XPpTEJH+8BVjhhSRhVYqwx33U\nFCrbgc5pVTcxB70rASrB8u7dupMHcCBzTOYW+9x6VPHJ5qnjHvSGSmYHae9SRqrDcevWqe7s\nOue9X41Vo+DjimIZEpJyH4zTwdrHPI9KYv7tcL1JpYsvuDmmA6PY2Qx+brz2pn+rIKjj1p8a\nrgnqKZIpZRtbj0qWUSfNMvzcntUfmFZgpFSL8kOScU2JfOkDEEj1oJRMv7xSemKaw6c96eu1\nVYVGRtXnlqGUTrIYxwcCpMB1GcE+lRqpkiUDr15pFXbJkc0AWVbHXGe1Ndhuz2ph3GTdxgUv\nMqkDrSuAuduSo4o3eo4NIrNsxt46Um/5cMpHPGKAHmMcnNLC6scMaZwucjrQigqOOlMZWuo3\nhuvMQZQdRTobpZl8vYyn3q4YizZPQ+tCW67Txz60CE2opUE9uadgyZ+XijYqrx8x7mgMRnnA\noGIZhnZ39qf5gkAOfbFRbVL5Xj/aNOVCzfKMUgJSemeKWT5sADn1qORcKMmpfNVduetINSMs\n24KOT61ZEhVhg1F5i4zjj9aBs4INIRP/ABD1NNZtjEk/hSblOKVAM5xuoGIu1l560rZZen/1\nqVkHbmkhOVxjHPSmUHHVwNtP2jydyjGaryMwm5GV9KsE7o8BtvHSkIFUbQD1p4XHfaajjf5R\n3apGIZvmGKBCRfK3JphkbeaeI/lB6c9aUruz/On0uAKu8kdOKiUNEMkZGetP6cFqVVPXP0pI\ndgZDJkNyKaiFdoC4PWnqx8sgj5vWlEn7vB60xWHMvmLnqaauVAbFKnMfXBpyrj+L8KYaiNIF\nUMR3pTg89qa2N2GHFL6r60ASfKy9cmmbuxOBS7duCq0rKG6jigodu+YcZFIMyLgUrKF6nNMV\ntjZAOKWohUVgpDGpN3lr70bvlG4fNR97ljQSKpypOMn0oYBssM/L2oQYJNKucEnrR5lCLnAY\njGak3AjkZqMSfLknNO/hDDjNMY4FGOcYamxyFmbnJHakZu4/OjyxG+9TyRipYiRpCzYH3cda\nbHkggnipN/yhMDimjGcYpgPGelJuLHBJwKdnpgZpisfMIPSgB7fIBnrSs3dhxSfe4Byac5DK\nABQIh2GipqKAIpEBOO5o2lflzzTJMqOlJFvck+g7Uxis0it9aczFmUsRgUxs7Rk5LU/GY2BX\nNIBm4ecdgyKlhZgxBAI7VFEQuMDmpd20nBzSJGkdcnntTW3thFGT3oaM7vMPSo0mK5x82e9A\nC7trYxipJZB5ZIxnHaomx0Gd3pRGjRMWbp6U7gL5w2jg8ikLFl2kc091MnTpUka8ZbqKbAhU\nlo0IOBjpT93zcDjvUjRosY71HwuewPrS1AUSBeQuaa2GXLGn7htyDUMihsn+HpigbHR7WjPH\nFCSLH8o5FOX5IsdKaw2gUxCsoC5ziq8iOw6k1KWEi9OnNMaYqVA4zSYCBT5YIfg05lG0EZLC\nlbsuPlHPFNhk/eEHj0p2ARAVJxwxpWYdXOKlmZWxt+93qtIoYjIytAh0jnH3cikRd43H5cVI\nuD1IJpHYA5xQCCRjtHOKYyMcY6VJ8suM9aT/ACKQxoXbGNwzzTWI3biCB2p0ZK5JPtg0xmAf\nnmkAL8+SvNPZG2jHB705dojG3g0m9lzleKAMrUdM8xsjnJ5FQLpgVVyuXzgMe1au1mbfnjNS\nIoLZJyDzmkBBFaJHweSOtToBEPkXBzT2kHQDAqNm2tx3pgEsZDBs5pGk3MMD2zTjmSEgDDet\nNWQKu3GRTAcQNvDe2aTyx+FNX7pBHBqNn2jnIHakBOzcDApFJ47CmRyHGNv0pskjIdpGTTYE\nsg3LnNMDfKADjnmhG3L6D0pka85NIB2TuO3vT9wV+abxnjg014zvbnO04NJgSs4GGDDB4xUc\nkgwe49aZIoKjHFNaQJwBwaAH+cdvy5pAxbocE0xSysP7p60u/a20fnQAuAuctmljZdu7HFQm\nE7id1TKv7sAdfSgQSYZgc4FEknHPAHpSSrn7oz71Ay7VO7OaYD/LH3gOfWkSQbiG6U9MKigt\nioVhYMzMflJ4pDHptXOOlIZAJNn61HCx3N/dHeptqlQSfqaYDJcyADGeaZ5SwjDYzinxybSf\nl3KDUV04bIA5NAESktkjpS3CFAoBJNLHhHB28YpWnXfhh9KAIo52GFYbqLxcsGp/lrgMRzni\nkuFZo/oe9LqBEJNx2tx6U3yQuSw5pkmW4HBFTMxCYPIxSsBHGwX6VIqjbg8t1qqpOasK5Xnj\nmgBHztJpvOCTzQnyZJ6d6nmVVj3BcnHagRB1YY6YqOT5k2gYPem+Yfxp64YnnFIYxPmGAeKl\nZWjXBHGKgMbZ44xUwLyDHU0wK7cckVICzRkDgY6011LLnGKb520bT070gBd25AeeeakuI9zE\ngZ9qcmB8w9MU2R/l44NAiMyFU2kdOlCyeZnIxxSTMq7QFzmmK21+OlMYnLdulDZYY6VJH90s\neKibKzHPSkAqqdpOQMU372CTmpAp8v8A2ai27V4oGSvhQcDiowyKmT8xp8q7k4ODUCsvQ8UC\nGtH0YevSkZSeccU5m+bIHFIs2W2ZwKQEZUHml2gjPpTmj4pD8qhT0JpAVnU7i2ahaMSHJGKu\nNH8xwKZGAcgjmqAqyW+UZSRWVcWC5JxkdK6T7OpjPrVGS2GSuciqA5fU9FHkhgoPORxmuPvf\nDsc28lfrxXqE0Z+7jisubSQwYnnPNHUZ86eOPhZaaszedB1/iHXHpXg3iv4L3Vu8zaeMANhY\n5DyR9a+77vR1bgpk49K4/XPBMdxmSMqG7LitVMzcbn52ano15ZXE8VxE6lDhuOhrPeMsy7gS\ncYr7Q8ZfCmDUFk82BJ5TzuRcE14d4w+Ct3p85lt1JhkXcMD7vsa1jURhKJ4w0ayTEyDb2zSF\nYjGVGSQea2dS0G8099txEVbOU46j1rKuIW8zazbHzXQpIxsyk2PLkIRs9iTUUaiRQQeTwcVd\nmRQyDLE9zjAqOS2K3AWMYVj1qkBWaEDbz1420kkJ3AtNlV6CrMcYZV7FTjdSTW3GQwHOSfSm\nIpSN97b8oprZSPGM991SNC0bHvu5B9qZJGVB8z7rccGmCQ1WLOpdvlbt6GnMQzfMRtBwaTy0\njKhTuX0pu4N8wXHoPpQ2Ahy244wFNX7Wxjms3Z7nym9f6VSTDsFJzu6n+lTC3VWYDnJ6ZpiI\n/MdFOegGMY7UK6bRn5OeKezI3mI2U2dKiQhtm7mNuM0WHcVd3mSNjKqcUjSBg24Y4yKTY8cb\nA4I3Z3D0pXQblI444NAEnmfKpzlsU3bI0gIfLY6VJFGjMJQdyjjOO9NmhV43eMksrc0CGh8L\nzlmPBA7U2QcKqAALxmpRmGTZs5PcUkn7vCuPlB7Uhog2siuGbdnuPWoy37sAcseDmppFX5lR\nsE9BUYVpG+UAMBj8aBDpFI/hxxyRUfl71RN2MZOakVmmUqx2HGDSFR5fL7DjgUDFZVbaF4x0\nY0e4/OmM5Xb82cjAIHFKrOPvYY+opiFdSxL7go6Y9aa0hjyF++RgelIzs8vMe5MdBSNIAu9D\nls42+lAxI2Ei/dxJnG73pSrR5Lnf2JFLIf3bMi4Y9fTNMT7yxtxkdaBDpEC7ZQBjOPel3IiD\nd8ozx/jTSAmSrZXp81K0fy7ihLAd+1A0I07KvHQnrinoq7WLN8x/Gmwtv8vy+DjlqUMilgjY\nOeRjigCN8sdhO5Mc0se3zULLlsYB9qk9cYPHNQKCIyW5JOVb0pE31HeWgkKnjJ5pJMRyK6je\nQdo9frSsA0i5bDY5p0O+MuQVB9+tMr1BSxkYg5/2s96YjHoclj1NERPluV6Gmx73XKnGRj5v\nWixJN5IjjIL4bsBUYzuJXg9Ke+FYbvTlvSmDLRMS2B2x1NFgHKpBbe2R14pI23Lv429vSiFO\npB+bHIPeozGY1yPlHYelAErJ8oB+XPJx0pjb2wB9BnsKYpCcM5CseBUjo24budvpSGG0R8E4\nK9xQ+AOnyNyPc0M26MuBgDrTgw8sMwyoGfwqgG7RJIM5z6UnLYIO8AbmA60rsVjV0XIBzz6U\niSLHISgIXHUUgEXa2ZFLbOo3DpSNtES5/iOcetGRtwuXGc8elOdxIyErhB2pAJ5gjJVVOe1N\naN1kG5vvfpUiyKGJC5PYUyRXdQ2c89D1p7jHyROHCblEIOaazNJJjHHv0pVaNQf4mHSiSTcS\nSwPpigQrSAcgY/2qjbc8e+VwATgYpSpO1QuF6nNM6MwK5GaGIfHHuj8sH6U5lLYyCmOCo/nT\nWkKlNpIDHAzUqq7bmBxjlTTsBXMx+You9PugUqsYwFcNnsCadlthZxtbPKDv70DehOdrr79R\nSGKzrHySS3TC0scm3cg496iwFkXggdcn1p8ZklY/JjvzRqAjRt5e92Zkz09KPLIwUJKmlRmm\n3CVtu3+GnqzIBtX5cYLemaAGsoaPEhAZedvrUO0Io2vsBPSpDiE4PLdiaSRcquBl15p9AEiU\n71YnbTyxZmAK7aTyjIo8vjAzt/nTDHsfbtPPJpD6DmVnCgtlV5pud64xxnPFS7lX5tvyegpH\nYmRcAD/ZpkjDtZvlfcMfrULf6sLyzdfrUr7I42Zl8kA/X8KFWTrgKGGR9KVwH5Xy12pkY+63\nalkfbsIAYg846Ypnm7lxgBehxTVZN/GY0H8Pr700MtQsu8syDYT+lQXU37z5F3Iwzj+lKrGF\nioOFIyH69aj8vy5AXYFsUhBnYF2g5PY84HpSLKFmBce1O4jJ+cs5HTHWkVcZ3AY4yfSjYexM\nrebkAqGznBp9u2WOVaQe3GKpgSJKxJyDxUiM6Q8Nhh0xTC46QKSyyLluopqyblVtuD0x9Kc0\ni43AdsmoXkwHOfkxx7mmBMrPJIQTkDnFDOeRgH/GmqqNHgkqDyPX6Gm/KowH2uvRKXUBzfLk\nk5OOfrSLIYwN45IxmlZjJtIIySBinTyxtlck44NHUGNiJUnauBjGc96Ux/Jl5gW/uioWYKB2\nTPTPNSR4CsVXc2c5PpQwBGMbf6zK54Hel+8zHnPQkmmKyupYgDce9STRxsEyoBPy4B5pCFba\nVVCh2qfX9aZgqrBWA53BjQ25Mq42gfLmpPJTy0LMCtUA2YPIyDO9iMmk8yJmHmL93sOtJ5wj\nLseCOBipA0cKHcnPrjmgojZnaQFR8n3vwpXYyKc8N6UwghvmGwt27U/aUUjO4LyWpXE9RWk2\nqmCMYyaa2GU8be5b0pH3BRhQccYPelVZdqnovf6UwFwvI3ZIXOfWl+6iMxABGeKjLKqtGpzG\nTke3tSxpu3L/AHT92gQsOE/eE7mYkc9qFyJN2QD3Ge1IG67j+7PGKfHGs21Qc885oGMlDbgG\nXKsuQe1PLMyrtXK4x705rfGTvLbeM/0odRGEPIcruK0AV1ZNskYXYfpU6tuKhh+8K8ZpPM2z\nMc7Rt6elCR7oyD90dDQIRGHKyDHHpSAOq5YFl9KkWEScMONuBtpi4EY5Z0Xgg9QaAQ3GZB5e\nCtK23cQvTuD0FKXZMYGT607bGZOOTjJA/wAaTGNWBRN8jbgR09DTGzD8p4Gc8jmnCTb80Xr+\nlLcM11cAkheP0pCIVf8AdyHDFwc59qcp6B+hXdRJ/qSqN83tSLkxBd2QOpFFgHbg+1XBGPu0\nbE2kbsv6VLcYW3AB3Y5xTGX/AJabRgcbqLAJ5e5wV+bA5pGVZJemQ3B9qap+UBCVLGnuTu4G\nOMk9qAFbCtjk44HrSrPtkVWBQY69/wARTW3syc4j7tipzj72cOOMVQEaqCcMNp+9j1o8ss3y\njGeTipRncW6hRjbTNwhZtvVh+dAxPL+Y5+7jrUiwmOEB14PIamxxqskYbee/TgVYVJ3yI+Qz\nYFBRBN8y4Qcf3c05MFBHySoq/baSZGbjaFHODWta+F7mRgiLvjPIOOtSIw/sjrb4CFmPPy9q\n2tH8Pm8ty8iF4sc7hjmu30PwY6+Ss8WQSCOOld9o/gRZZCTFmPpjpmpYzzTRfCUMgLeWQ33c\n7e1d94b8ENM33FjhBx8w5rvtE8GiFT+5XZ/dxXY6b4bVWU7AvfdUNgcdpXgmGNACuAvfFdjp\negom1lGD34retdE3Mp2963LbTVjwdmWqLlIwbTSCJB8uB2yK3bPR1Vt2Av0rRhtSzBdtWobc\nsPu96VzSKKkVqAccAetXFtxwwHPrU32Xdx2p8aFFAPK1Fxka2vOFwKn8hTtwOakWI9BTuM7c\nc9qQEPl7ZD0NSbQ+MDBqVYznpnNPER546UiyOH5uDUyRqGPFLtVmPGKd5Y3cHikxiKm5QBy1\nO2srDeMU/wC7yvFLIp7nJpgM3bW4p6gE5H3sUvlhV3ZyaXG1RnqaBEUbfKR71KiqFJoUKOGG\nPpSxnkjqKBoTcc5H4VJs3Y9e9ES1IVIyQKQEW8rkEd6mQBm4HNKkYZR61KML1WmAx4yGXutN\nm+b7oxzUiqTGeoFG0btxpANjj2/MeAeKXy9g455pfunGOCc0rZ5A6dc0dQDAyAvWh1Al5Oad\nCQpBAqVl3NkCmAiMqnHQU1JAXJC4FC/MckYNE2eVA59qAECfN1yDT8A/SiFV8vnlhUqqvH8q\nQDA24hQKl8pVI9e9ORV3elPYj0zSv3AQybPu81HHMNnI5zSqwyeKbt2t7UB0BsM+TwKkVsk8\nYoWEyKcCpYY2K7iOnagSI4VAYk80/wAwbSKXcu/aB1709oecZqh9CONfm4GakKhlOcg0sYEb\ncc1KMenNIRArFF5HFSxqjYz1prYfvg+lKjfN0xSKFWMLmmSKD061LhiMnpRHGu7OfrQxFV8q\nMdFqXcGQDHNW2jTaygA0m0Mo45FIRXCjfyvFTKi7hijyWztJ680SQtHGCG5zgUxiSYZgBxUr\nOMZA4qJYWkYADPrUskZYYHSgRH5gHbFSrMV/iBHvTTA3y5FSraow5oGHyupbtTsjZgcGo0QR\nggdKcsZkxjgUgG7DtI3c+9M8sqMj71WfIwvXmmyr0GeaV9QuV2VtwBGasR7PMAYU5lZlGBzS\nLlfrTAfIoU5DcelMP7zNDcru60qEbuKXUBqxhfmbml6c9KlVQMk9KikU5459aoocrOq4Tp3p\nwYL94c0ihsDFSLgtyM0gI3KkkHmolYnqcmpjH1UDGe9KsO0epoAjVCeKeqOqnB+tPVircipI\n1DZy34UrAVSrMwY9O9P8offXipig5x096i+bkY4pgLuOCQMikVg2C3FLHwjA8mhYypGRmgBx\nXaeOtEityeuabIpX5s5oZj5dIBu7aQKa7DcM8j2p6gdc04quMgc1QCR7WBx0qTaqxknGegps\nIAUnHPU0r/MuSMHqBU9QIeqgGnj0B4p8WGbkce9JtG/PamAzG4nnFOVd3UZFShdynaADQWLE\nA/LQBCyr171GvIxjJqz5fzYAx70wIYz1yaARGqspznA709VByTyKd/HjHXrTvLAHAoGNU7gc\nDApu47fT3pGfbgdRTh8yk4OD29KkBjKzAbT1pqRhd2eTT1Rgpx07U3lM5Gc0AG07hkcUnlhu\nCvJqTnqDTWZuGPFAhhjPToBS7ucDpT968jOTSbQoyeKAGOuzjrUas/IFSr653U5M7d23pQBF\nsbPP40rINvtS9iScUcbaBiZB4UY9ab/ETnFTQoFYdwaYI98pUrx60CEXarbid1O3bl4HFJGq\nqWzTl56HigBscQbknFOb5cYXdT1xsxineYR90UAR9+BQ8Y3AZ4NOVGZsHgHmhhu3DFADY1aM\nknkU9YzKM9Kaj8DJ46VJyrZBwKAEVlYgZxUu70HI71FsDbiadGSvQ0wEbAHT8qeI1ZevPpSf\n6tsnpmk3b5G2nOKQDx1YMcr2okxtx69KaGGRj8qlZQyjdx6CgYkcY6AkN0INOXKt0yOlIoDS\nDP3qVpD5m3GAaYCTNjG31p9rho3UnihozsIptvMoQgLt5phYUOFjCe9SQ4Z+OlSRxo2WY9uB\nTYlEb7CMd80tgHTRYfIXIpyt5O0beajkLNkZOe1OVjtGfvdM0rgPmUGUOy4LccVNHtjVgTz2\npsOMYk5PWnN5ewluaBjJGUrhX+bPSnRsVU9+etM8oxsGx8vWrMG3qFwD2pgMEnmYBGPpQzHc\nQVxxxUqwrtJUYbPNKzDcpbgdM0mMjClo/m+tMt1JZgoIz1q3HsbJp21FYspzxTAhaIRKATkn\nmpFIwBtyexpY9zLubn270kTbWIx1pMBGzuAzg1K21o8AYPrTeeucHNTM3zKB070CK648zhs1\nJ90EkYp4jXGR1zUbNukUEfLmgYhfOPTPNO/1jAjoOxpfkaR1ztPao1jbqx/KiwDtyySfMcU7\nlHwOh70Kq7ANvJPWnfdBB5AoAF+8AckVKc7SP5U1f4T/AA4p8atu5+tAEX+r+VRnvSLkj5uB\n3FTK26Qnbj3p6oOe5NAIrRrhT8uVzSxyF2CrkVIkLR5JPFCqd3yqAaXUbHyW+GBboaQJh8Mu\ncUpkZvl7U5ZF7g5qQEjVdxLDj0p2RIwXaABTWiLHlcA+lOXag5HNOw7DzCORkAd6Qssa7VJJ\npG2Dkn6DNO3BlHGBSFYbbj5m38GnqoznpRHyxB6U7y/lBX8aAGGMDPOSelJCuepwR3qRCmDk\n01huCsPuk4p2GKF2qcHdQpBU5PPpQv7v5dv407AYbsfMKBDm3fLnp6VBLcrB947FJxn3qbzA\nVy3A/Wo5IhNEytHuB6ZoGPwHXOQfenoBsxUSR+Wm0jC4p23dGVXr2NAiRG3KCB9abkMhwOc0\nKpjUKTg0q5Zj6UBcF+XmncOwAGKRUbBJ6UiyDd1z7UegajwAzHIoIVssDmgLtLdhik3ErjGB\nQMWFjyScimqxDEGkX5cAcetKyhmPJpjHFh1NPRhzn04qIKAuCSTT/mbGBzQId97HPNLjbknm\nk6Nk4xil3A96QrDt27tgUvcZHFNdiq85P+7SqxI2/rSGDY9KfG42kEZpu0Ng5pwdegoAEYNG\ncCo9xOOMYqQkBvQ+lKzDnaefSgYqstLGjM2NuRTHbcoIFODbiDk49qAJCrD1GKjGd57g09ie\n54pcBkzmmAHIYbRzSrnOGpx+bjZ81IAV5akxC7h60U3a3oKKCSE7t53MMdhRANrFw2RjkVHc\nMD06imrMFBGOTVDuSswbBUYFODFVIGSKaqnyR2J4oRmjTB4OcEUWAdCy7sYxQqhc8inxquM5\nyTUUsOTnOKLAP4ZSp6kVBcD7iouBiptojxk5NPaTzG4UKKQiFZAFAbg0sihkwDxSNCGbPWpx\nCNpPbFICvDuaMnOMd6lXAj3dfWkGWXYo+XrQGDNjHy0ANLDjaefSmcnIbr6VNuDKF24xTWUM\nfSmgGRj5iD17VIVG0gcGmMyqf73pTlddue/TFDAYPlXBPNCnc/zHil2BlJboKaR3X8KZNxrY\nVjg/hUTSc7StSzZ4IPzd6jb5uSMGkFxGJ6dKRfvggYpGbd1pyMFweoNNDQ5pDC3zAEmmjJ/3\naVwJfvcUxVZWIzQhjWQq45zzU36Cjavyj+KlHGcjmi4hFjVWyenakyVm2fjTm+bb60M/ylhw\nfeluMVdrKWY9OKYF28YyD3NN3loySKQfd2nrQA5gU44PpTWZyvJ4xRt5yBTsEqSR2oERszRx\n4XnNOjkG4Y+73HpQy7lU9valjhJ3Hp6UgQFRzjjJpWOz5duaXYQo3flSbv4T2oGRs21h1xUj\nSK0fAxTEHy5PrT8bVOQKAGrjqcgUpxs+7n601SSM96ThpD60CQvTG3rSP+86HB9aPMJUnOKh\n3HfnOaYyb7oHc0xsrIOM5p/mDy8dKj2nIOSBSAk34k+7mkOMHJwW605sxqW61CVLyBj0oAGX\nYuG5FN2twTUkmCuOf600ybWB6+1AClQfoOtAwqnB70iMGDe9MZduBjFIBWkLDpTwxZSB1xUa\nsTwBketTDCc0ICBSzKcHGKd/rMHtTdwZ2OdqntSMAuAhOKYhsrqpIPNLC+4FW5QjimfZzIx9\nPWiQeSnHNIYSfuwAOhpzfMQMYFIr+YqkjBpZN24UANX7xHQZqE7mkweuankYNICT+VJFs8xn\nY8A0uoCTtyqkY461FJGjck5NS3HzH2qvsP1FUAuC7DFS3XyqO5xTHbyyAKa5AI+tAETL8ucY\nFAyq+oqaSQSDb0FQSIRxnigCGRR1PHNTFP3JOOPWo2I4DDIzVjcklq4VuR2qQKseG6nipvM2\nKec9qZHg4BGKfIm1sHrQBXKbpAR6UqKQf50pwh3ZyfSl3FsEUANmbawAHWpI9yx7iNtMEhyQ\nalZty4HTvSAh81W75Haoiq7uTzSugDgAYFJPCV+bGKYD41+YgH8KQ7d/PBNMt12yg7ue9OnA\nZ845FIBsy7Rn8KhLBF4OamZv3eCOai4XgrQMm3BolCjPNRzqX+YdKFXbgj1pjZ59M0CHSSna\nq9Kaq8gsajk+bBHHNWI4fMbg5oAawDc96YFDMRjHFEm5ZSDx6UxQ4mDdRTAftAXb0qCRQrdO\nKsNksR29aZsO3JNIBCwC5A4pGXcoLcelO2fLnOKj83Pylcml1ATBB5PBprBUOVOafuPUjApP\nl3DPBqgGs4K46VBtzICOTVqVBxjpTOOo60AQMg6EYaqrW+FYnrV6TLnJ4qKSM7uTk0gKEtss\ng5UVmXGkpICO3pXRHGOAAe9Qyw9SBzTA4G98PlpSCvFcr4i8IpdKQq/P09sV61cWhlzjp3rI\nutLMqk45qkNpWPmbxZ8K7e7DhoAeCBx0rxbxV8D545JJbTcGj+bDD9Aa+7LzQ42jZWQHI5zX\nJaj4NSaJwBg9gRmrU2jDkPz01Tw/eabJtntpIV+6DIMZrKks35Vcs2P4a+2fE/wti1JWEsKy\noOSrDP5V4v4s+CTWsjNa5CE5wvYe9dEKnciUDwhUDRhRgMONv9alihFwpBbYV6cd66nxD4Hn\n0nEbxtvPKjHb61hRRtCojcbWwcnuK2umtDHlZnzWZ5Qt83Ur9aptZq21d+GUbi1arFGbh9zj\n+VV5Lfy1A2ZyeTVXEZbqVjJAzJnoO9Q+W0jAqR7rWjcA/Lsjwd3LGmSWflqXGQGP40AU9oEi\nPuyQfvU4zMkj4PzE8Gpfs/lw7lXcO4qJowNu/q3KkDpTQBJL8xyud33mqNVO0hMf7p7UxQ0b\nEr82Cc81YaBlUYHB+amHUiWN/Jdg28Y5qe3YtGoYDGMDjvSFDG2ANxPJFTfeUc49qBNgrjYc\nDOODjpSRyMu5GX73PTilhbysKuPm657Vbi/fnLjy8DH196YrkMca8kDcOpNRNGJFb5e+eau+\nStvjccr1C9zVeTJBCkbt2ePT0pDKksbowKqGXuO9M8sySbl4X+tXJWG4OvY4NQso2EDjnJPp\nTGVZFO08fMe9Rfu5FAd/mHFTtC0gOA2P71RFAy4C4PrQSEigSKqtkAfdpMFVyDt56UMvy7sb\nD03U2XPWTggYHv70gJeWGU4AFRMRuwF2kHHHrTUdtiheQT1p7blAcrkDuKYxrZZmEbfOeNp7\nmkTcY9zDac/pUnyHa0Z+fvxzTFZhu7jPX19qBDJNvlsvDr97NSLloxhssQOKXiPAOAG5z70g\n+XccjcvJo1AbiRuUARFHJpVz5gbOBjLDvihN6ru6oaVUKKC3QjpTGN+ymSMlCeG+ahplWPI5\nRT0pq99rEdjzT2wURVA2g9aQhsynOWwwPzY9KSVtrB2HQcAUNH8zA/M/ak2OEGRlh2osA4bN\n2YwQWGfxpGd1t9jDnPanMXeMY+XmlVd2F3ZLDrQA1cq2c4bbkZo3fLtU853H0p3mIqlD8zZx\nTPv7yRtTPNAh3zbCX5J5XHYUkas0ewYYe9C5ww3/ACegpEkG0uF2EjAosA3ymkIL8halb7pb\noOp+lCZ+UfdPfNIV+8ByDwTTGBhVJB5rYRlyAtNMZXEatnHb2pUPlsFyXI/KkkWOSQkkhh6d\nqQDW+ZCzAhV64PpT/wDVn5GG0jIwP0pdkcanD7lNRKrwshxvjz69KAHW5xGecMxpFVckKdxH\nUUrbVdiJNysfu+lOjxHG428N0NAEa8kvt2nHemrJ8vKHJOQ1KyAqB/D0zQqFeOqjtTQDlZFm\nDN8q49KP3cZClflH8VKyllBK554prMckhRlhikA6NQrtl2C9T3ojYbmwcZ/vDFEcZuI/3IKu\nvVT+tOaNhKwcZcc59qBDUYbXQruOOvpRkfcyQ46Uxjt5P3m6YHanrJ95j1AzmmMVs7i2xgEX\n65piIA27GB2bNIjkrKrOcf0pyoMKPMG3qMUgF87coDhiTzwKT5trF2IHsamktX8ve0bMccY6\n1Uw4XHf/AGqALMUKtjkDIzTWCqwG7C9cDvTYAVwCdzeg7VG33csdx3ZB/pVCHKzjcHG8E5UU\nqs20gj6r6UiksQS23J6e1M3hmbBKdvqKB9CRJPLUbf4jgUgdll2N8x70cNGCAcA/d9qSRQzh\nj93ttpALICpY9F9KVfvZcZRhhSKaRjDbW3fWk5XDZ3D+7TAWNduAfmPX5hR+8ZfvYJ6Uv7yS\nZRvVV20zySu5Qx9d3vUisEQXZgMUkGR061FJN0ULnHXNWI2Z1wygN60LD5uQR8w9DTGRIXmT\nKR4A60MSwI27Tjgdamjj2knPCjIWmI29WI4fHftQAjFn2FU/4FRJJ5mF2blBpqEmPKnOP1p0\nZDfwnnqKYhGTzlKr8pHUmhVGxNuMg4+tLGSwcAA9go60rRlk+Vht6EdxUi1I1XeZMkB+QFHS\nlTKxhcZB+YD0okA3Beg6EjvSR4MrYO1enTpT1GG14wuxg3Y08qu7kfLjoO1RsqRtsjJPf60r\nhmwythuhX/GkMFWMRkg7KTOWGFIBHJ/rSghfmIz/AA+1MVSrmNWPThRTBkgjAyrEMRyT3xTU\nXauOeTxj0pDatHErMxL5+b6U/wA0bQw4IPGaQhFCxSHgZ7Zox+8L4A28hTSTKftDEjgc7e9L\nIyR7mY/SqGK25yAQW43CkRlMIycEHgUqsYirlDsOASO1MZQxmUfKwb5TigBWwWCEblY9fQ1L\nCr/OS+QD941CW3Y2nnpt6ZPrTkUtGS3C9wO5pMBNxMmxhwT1o6OzN8oHAWjd+6LN94fdp0a+\ncULDbjkt/SkIRpGZVIbe2OfWmsWb5i5HtVjaF3KBluoqukjMCrLhgaYxC67lByBjlcdKmUqu\nSDnPU96jUOq4ABHZfSnKzc5646EUtQGrIq43DcQ3AAq22Y3wAuCM8VWX7u3PJ6U8SeZMqEbU\nX19aBDR8yllYrzzSnMigA/QGkz5ikfcXdkCnNkEFnyp6+1MCFY3k3n7u0VaViF4IY98Uxfuu\nic8VBEQm7aeMcnvTGTrIckj+EdKZNluV/djPJpGUJCw3fvQvT1o8ttp3OOnNAhZGbr0YDqO9\nJJlY4yBlc/MFpoUlAu/5f7lPzsjaMAr6ikOwrMWYqOD24prSLtQLktnk4qTcFDLjIPAbvTFX\ny127snNACFgZMjgH0ojKxgr1BPNL5bqDuwoJ+9SyKjfumGD1JpgAkV2I27cDApsmWUgDenXA\n9ac8ZjcGLjikmO3a6nB7igBzNtKcBQBk0jRBsDou3OfWnbMHcxAz0pcDzCknD46UAIvmSRow\nO1Om2nRn94uQfWnKp87YvzBfbpVmPT2nK/eU54oGV0f98X2lhn04qf7KrSAgExntjvWxBoMk\nzp5uck8nHGK6fTfCb30JESDap+v41HMM5Gz0VpWj+Vtuemea37Tw20sigx5Hp0xXc6D4FJY5\njaXnhjwK77RPBrQxxiWBUHTJGTS5hnnmgeB3QugQNG33VYd/rXX6J4Cmc5ZMvnIzwcelelaL\n4ThjQsQfmNdPZ6HHHwqcE5FQ5AkcVpXhFflLjOB0x09q6qx8MmCNNo9wK6W10UrzgBfQVq2+\nmg4A4as+YvlMCz0g5xt2kd617TSwvOcr9K0ltWH8NWIYi3ynhaVw5SnFb7V4FWoVO7OKsfZl\nX7oOakER28jFRctIbCDGxOeatxrx9aSKHOOMVZWPaevFIZGIcAE9Ke0Y2gdjUgXdkZ4pRGX4\n79KCiNIRu609Y2Vidu4etSxwqrYJpWXHFICLkDPQ9ak2/dIbr1zUka9SRxikVNy88+lMBzqg\nzjk9KjVPlqVYvlwetPMe5RUsZH5fy0iqSu4dalkAzheaIUK7i3pQncBm0L+IprKzYxzUqxll\n55FOjQ7uuKfUBkYG3DCnqqN1GMVL5e7INOWIc5pgMRe+cipCw44470xvlGAKVzhTg8+lAAzB\nTntS8sAe1Rxtu5Ix7Va27o+fwoAjZ/lC4xSGM7RzxQ0LMcg81OsfQHipGRkEkccVJHEGGM09\nI9qtu6UsciBCAvJoQiPygsmB0qQsF6CmhWVsfeHrUjKfX8KoCPZzntSLCwYknNWki3RjI5pj\nRjnnH0pMCBTtOTxUuFHuaEtwy570+OE+hzQgE+Z1xjFOk3iNQFqVCR/DnFMff5mTwPSkwGxq\ndwyOKcyYkz1qSFcsRipFjGN2OKYxhUheDtWhWOPapHT5cH8KTyzjg9BUgN2FVyACamik28vz\nTY1aRcjr3p6w46jiqAiVmZiMcdadghuRg1JDEW5JxSyr8xIPHekIg+XOR1pWY5xj6URqWyoH\nvVnYFUAjmgpDI2YtjbxigxgyDsKkVgwI702OJ2PPSkSOO37oXn1pVjPHPPpTkXa3JqQgqwIP\nNAEDLhj81D4281Lt5ywz70kkYePI4xQMijk/uE1MuMZzimRR57Yp4UuGAHegQPnbweKRVbZn\nvUjIwUCn+WOBmgCNFBUZFOL7ARwKcsbAZ7U3bu+8KBiRk856U/5TgkUsTAcHpStjBGcUrBYd\n0Yc8VG0Z3kjpUiqGU8g0ijbgE8UMLEb4Ge1KkRKZHWlmUHoKmhypHHFILEQjLrgdR601AVXB\nHWrX3TupjtuzgY71QERVgPao42cseKsJiRcnjtS+WUBA4PrQMQr0J4pdndTj60sY3feOTRy4\nz0I7UAMX5jzyaHjxgjgmpEjPJpVBbJPH1pARY3fKDzmlVTkkDgUwRGOXduzzmp1+bPOAaaAY\n2DHkcGk5KcUcLkCnRp8uM4pMBnlnbyab5LYz1FTthcc5pgkwTngUADRbVDAYNOVRsyxy3pTA\n+5s7iRTg2XJ6cUAMXnnGPanBsycjPpTWBPzd6RWKvyKXUBWzu4HFG3bySKPMHQ0si/KQfrTA\nRv3a59aey/KD/FTY9pZd3PFP3DOSKaGNTKE55Bpyw+Z0NOAUJn1NRs3zZXimFhduzOaU4UYx\nzTVVs/NzSqCz4pANVBtIIyPSkYlV2g/hUm1hnHJ9KBGWbpSYFYgr/FUhwFAYZpzxovzMcGnb\nfM4XkY60CBUwoPamNz8pGKmiHljnn60u4PyQaQymyFWyakOGwB0qaTDMTjiookLE4oAPLCrm\no2QIOOlWBH8pB4qPbnIxyKYFdlBb0qwqpswwOKReuCMmnPhe+RSAiwV4IwKaSOQDg08gyYPQ\nVGYCrc9M9aAGKSHJ259akaTcMBMCpvs5bgNmmiMrIUxgCgAjA2DIOakXC9acu1gT0x0qPng4\noEKy+hwaQrg8fjUhxIchaQg5oAYIxt245zmlb5mwBxUjDcwI60wE7/6UAKg3vj+GnsvzcJRv\nG0BRz3pTncCaABWzxt6UzZ1YAAmn7BGpZjgGmqvl8g5zTYhEj56c1J5nUEUqFRICaVgskmUB\nB96BixqGkDAZFOuECoXJO40zy2XjGD61Pu3JhvmoGUzcCJgScmpHZWXeoznmka0G4lvwqWNQ\nkeMYXpVAOjzIgC8Gl8stkjk96FUx5wee1TRsuzGdrHrUCGrmEEsNw7Ujjy2G7k9cUjducrTt\nokbPfpmgCT/XcngU3725AOaVWEToG+YE4NKvySFt3DUFEkLFgVkPGOKn43D0xUJjEi5DdsUq\ns8Crhs445FAD2UtuC5A708xCWNVxjHrSKzbDkc0355FAHB680DEWN0b5fu+lSMoRck/Wlhbc\npJ696SaQIwUrnPegY5G3RkqMH1qVVB7800LuXjp0ojURnnkUWESKgWP5jk015AuQvPHWmLhg\neeKI8kHYaAGMWUYzjNP4PJOPSm7i7Zk4qSOPcn0oYEafeXnLZ71Pj5/npu3dIuPrUz+Wzckn\n6U0A2QBk46UxFXBJPHpU6w4xtOR703bGFOfvA0NAKVO4Y6Yp27cuOd3tTfvKBn5if0p0C+Wx\nY/TFQHUYu9eG4HenK25COlPkbd/vUjDC4xQAg3LwTmmKpZ9xPTtTlYBQGHNOVSrcnANIBkhY\nMCTtGe1TEbskdMUxlC5PUUyOZmyDwe1MCYMcgFuKeu2Q4NUml+YZHI61Y+bAcDJoQDzBFIwy\nuMU8xjAwcgVBNIcrxx3qeOQBVGOtADiw/u89KVQc8nikbG7inc+lK2pTGsqk4x1pSpUAjoOl\nDLt+bvQvT5uR6U+ohOd2Cfm9akUhU96j+TJySKcqheCTigOoxtgyXOPSnmQnbt6UvyMnrUSq\nVYntSGTD5t240iuB2waYzfdBXBp+4H3FBIN8zBs5pW3BcL09ajyVB44p3Kx/McEimA/cTCNv\nzAnBpE2rnHHrRD8q8DIqXb8pPrTQEbfKDnrSq3yjNIeme9Iud/TNIY7buZjnGOcU5s+WD0zS\nc7sg47UqD5ueaRQsW3d82aduCjGaRmx/DTZ1ZSpUDDdqAJDjg44pHRXPAxSFSqjccinZHAz+\nVACqu48NzSfxHHX19acrKu7ucUblVeh5oARGC8HpTjtXFRxttyjde1G8dOtUwHSONrSHhQM1\nBZ3izSssY467qkmhaa3KjkNxVbT9NOm5IbOTk+3tUiNMYj3DbwajCMnXoaeF3Kd3JY5peOM8\nigYeVyCTn2pQdvbbSdDkHmnMd2C1O4DmIQBjyaGxjDUMx420FdozjOaYhvlj+8fzopcN6Cim\nFiqWXgAZNI0bbFO3nNSs2/bgVIP3ny5571IiLa25enBpz5eYkr9KjupFRlCjLDrTt3y7s4oE\nOYhenAPak27vcdeKaqkxknk5qWNVVQQ3PcUAKzIHAI7UzAAzjGaGVmkJ2/LjrSbvlHzfhSAc\nPbkVKMBCO3vVePcynA6GncyDOMCnYBzMEXIOKi8wF+Kd5/l/e/KjaG5xigBdxKnHWo2BVCe9\nSZCsCeB61FI27PvSuAzcfL3Y4NPEbHDdqaFyhyeB2pfuwHbkkVQDid0bAkjNM5Xbg5HepTuK\n4JFRuvBzSATdufIGabIwOQB8vejzGjy/GKj3FsAdD1oEOjTvn5ac0e1cZqNW3EgL8tPkkAUE\n8UDHZDcbeajVtzkgcdKfuBGelN2/MGB4oAcRlhx0oKnrnik5ZjyBSN8i7Q24UCYqkc5HNMbJ\nY/3aep3kEjFD7lkwVpoYY2qQwpFYY3YpWbdjJpmCpbP3aTEAbdgDn1pcblYKPxpuflLKNp9q\ndG2wZz8p7Ug6BuIwoPGOaevscHrUbEN93gUz/lpxkigRNKxkXI6+tM2gHJOTimSMwxt6ZoU5\nUk07DJFKZJzio/MLAmm/Kxx7UKpKgDk96QD0Jzk0xR8xPbNG5jkhqFY+YFboafUCWRQNuOM0\n1VXdilkjIbd1FNZdoG08+tDAf8qqcrk1HJ80fFNLNySc54p4Xoh/OkAhkHlhcnNKrDeBjNJt\nXn1FJkgg9qBjpOucVEcFsngGpmxuyeTUEeVkbPIz0oAdGixx5zk5pW4GSN1KGVGw3PtTdpP+\nFArkcO7n09KdMhXvzSKx8ynyLkZz3zU9RkMgHbilhQ7cE5pxXc2TTkjMfIOaoB0i7Bx0xUAZ\nccjNSSzKygD15qFcFj6UgFOF5/hp23cuQ3HvTZfugdRSrGPLAXOaQiCSRDLgdutSbUXOenWo\npIdvIHNNXLNz0oBCyyg0kDbgQRg5pr7ZD0xim4KyDaD1oGSth3z3Wo5FDxNg81J5ihWzwc0R\nQ7lI6A1QEdpGP+WnSm3GS5A6dqf32jkZ5pZQWxjigCv5KhCWPNR26nnLcVaZdybT1qFlG7I4\npANjU7mz2pWmwVBGanZQqjnkiovLOCT07YpAIqrJ0GDUMWVlORyOMVZUjbx1FQCbazHblvWg\nAkbdHkLjtTUYqowM4qXcrd+euKhjh3SEliBmgBT8zbsYNSM5kG08AUeXtbJPFM8s9d/ekAMq\nFgen0pkwzyKJFePBHIobDAY4oARQJKgk+WQ8cCplXyx15p3l8ksQRTAhVDJ3x3phVkU9zVqO\nNSpO7iq0uc46CkA1oz5YJGDRFIYWIHenSfcHPNMVwW5HTpQA+RvMwcHNNyNvB5p7SHa3HJqK\nNSV45NAIN3Rex60/blSM5FRfxYHNSMvl89RQAkjHaB2qDaFYuRU/mKzYxTZAN3A/CgBpwYxj\noetL8rentSMo2n3piKcZNMbJNpbJ7VCy88VKuVB3Go2bEeAOaQhmwq2ScikKFzkVKuHT3oVc\nHgUDIJIx0xUUqnb1wKsSfdx0OaGXd2oBFFV4qKSH5cY4q/JHjGBxUSqcmmIy5LETVQm04M2N\nuB9K6BlKtnFVphuQkUAcjf8Ah+NtxXr15rkdU8IJIzsqj6Yr1KS2Lc4qpNpYlUlgBinoKx89\n+IfAMN1hGhVihyu5c8+leV+Lfg3HJ5twIvnb+FFxz6V9d3+go2dig5rFv/DqyKVMYPHWqjJx\nJ5T4L1r4V6jpOVjtzKHJYDGMKCeCfX2rlJtPutPZhPbOgBx8ynAHpX3rq/g+O4VhgpzkDHSv\nNfFvwhS9jYQxthmyR2J9a2jW6GUqfVHyU0KuzcZXqOKgcx+WyAbiTjntXsXiT4S3VlOxt2UB\n1wVYHj1xXn2q+FbrSpvnTEGPvbTw1bKdzFxaOWe1k3bAdgIwahkt1iXOd7KO3ar0kbLKquPm\n7t2oeBFG5PnyeD61pckx/KZUJIw2M9KfAu9Qc7eOlXp1D4J+ZWHylaqRxtvKEYYH72e1ULqQ\nxoUbeGyzHHNSxW4lyJM5zwVpfllkIGRt7f1qWGMxrnfwx796aGIyq3B59sdfen8NIv8AEAPw\nHtTtwaYKDj5eRTtu2Tb/AAYzTAZcM8i7wNh7e3pUclu0i7gcDGWx/EamVRzk8ilEIXbhyQ3r\nQBnSOqOFI2oefoakK7kG085qaWKPBG35we/ehlWLbIpyxODigQyZd1uxPyheoBqt5Y+6vRhm\nrDKFZv7p6/WnKo4YD5icZpiKUqGBSRgrjBJ7iq8yqyxhOR2Bq1Mv2iRwxwB0FVIY9zMJDsjF\nAAy7nRAwHPNEjfeAPyjg0eWWkDxjCDjP9ac6locBcgnlv607BqJuIbpsG2q6/MhG7IHIx0zU\nnCh2kLZX5VPvTMBIclfnIwR70xoWRvuLwckEmnykOWRQMZ+9QsYZY1X5SP0pPs7RqzMeM/iT\n/hS6kjpCFwEPyEUsbHrt3gcCl3KFygwe+emaamWkO3O7GWFAwa3aRskBB1qPasgwoyueBUyv\nvyr5DelMVmVtgIwR09KQCOoZgyA46Ee9Ij+S54yW6ml3FcBiQvQU0h13DaCMZDGgBBtZl2gn\nnnNPV/ujGCKSRgohK4GPvVFt3bXPKt1poCTcdx3YwTnpQ3OAeE9qa0m3Ab7vQU/leCN3fIqQ\nE43OFGGAzj1pkkbMqkttZutPZfMTeDhumKBGTKuMsMY4pjHO5YIMbcDk0jBXjBJKjP3aZDlS\nGLHcFxmlaJvLBU4BPIHb3oEJhR8o3AZ60kbNtY7Tt+6c/wA6VlMa4YE5OBS7ysZLqQc4IoAQ\nKN4QrkY/A00zLGCu01I26RhtOBjgGmbQrKSe/wCVIQiqwUkICSKcrjC5HbpSSfJIf3mTjORS\nzR/LGd2NwzQMUIWUKemeKkhkBZ0xhlOM1HuyDjIYVEFdFdjJ8r8s3fFA+pJIrrM24gD86QIV\nVnGNv949qcwiLL5ZJwMilCrLI2P7uSvSmHUS3uPJJkA35GCB3p7OrKZS23dwF9KgAHl/K3Ge\nvc+1BxHH8vzAnOBVAEjdR3xxRhjCMkZ70kirJhcYzxuoYliqHqpxupCFVRIpAX5j1pyxrtKf\nd284pioeQW+UnrTI1PmZGcKf8mgDXgvvLjG/OccjvWVI25Tu5BbKjP6Gp5GC8scnFQqoVyVA\nGR1NSA9ZNm4N+7J7Dmo5lVYyBk0zkqQG4zUkcZ24dsnrVgOZh5SY5LfxU1mK5UAENxuojXao\nO4Bck/SiNd29kIY9QM0DHLGGQHdjBxTVIWN/L5APOaRog20hvqKT92kmMHa3A9KVxDo7oZAw\nxJHJxQkitIo2jfnrUbbowQx4zxjtUixszKegXmmAkkaLteMbmznrRJuZmdMqx+8B2pDhmwh4\nz0p2wyF8ccdqQ0IH2/d5FKNqsCDhj1xUccykgKMkdaftCSFh0xTJAuVZiGxzxTWuB5zAfMCu\nDjpTDISxCgMfQ8UqyBeANq4+97+lSxicLEUAbHelaZWYdRtHGKX5kkVSS2eTxSIyRiRWx833\nWz3ouAjrl3w+w9PenMxaRgBwvpTTub/WL87dCD/OkVgqvkYz/Evr7UCHFZIzk4ZMZB9/ShpA\nse5QSc/NSbiYlUHYp48xvWl+7gHlx1x0x60wGeZuQELtyeSae3lqcj73c9qXcM4wHJPSm+Si\nzlMH1zmkA/JbA4I7r60FhwEG3PenbQyseA2Oveo2faqrnHfdQAuZV2hjkZ6+tDMWVn6LnFNT\n5iSW5PQ047sE7ht9KYBtA+ZeA3GKXa0kwBQbB1NRtvPykZPbFKsMmRj5Qeqk9aLgSShVk4GV\nI/Woy25Mg4bODipEzJnjcq0xpFWTKLgj0pjHgG4QlRmMcH6+tPkhEZCmTqOvaltWEMbkncW7\nU12STCAZA4zn1pB1IbgBJFKnCdsjmmorthj8p3dqsSKZZdp5VVxUUb5Ycjd0z2oHYmmkKTBg\nct6kVXFwJTINu6Qc4FOc8EAknOc00MrZKqUc8Z6UCHeaYcPEd27rnrQpM4bccN3HtR8u4qRl\n+uKULvUMSFwfu0gBF5IVMgdD6UjSblAOGb2pITkSHacA5zSrGVYtjPPahBYUZkXhRwc5okm3\ntnb+A9aaq/NtV9vJNSMSvbIz+NMBI3l5RxtDU3yt6uH+hbFOWRtpkPzkj7velXdGow2Cw6Hm\ngQ1f3ci9H2rjFLJsHzdCeTikWEKny/N/Q0/ascedvHf60gFwrR7N20g5GKcyydW2sP7uaFVZ\nWGE3EjpS7UVWY9AMBfei4EckhZSzEg5wpIpZEdmUgqpApGBKxs7Zkz6fpT+J1YrmMZ5yKe4E\nTJJ5Zyfm/vGnbfMtQOB9OuacsTFQmcg/nUsUb7lfZkg7cdKQ0QRRyfKxHPTFWFh/fbmTzS3y\nhR2q7BpT+cMqxU8sq/41tWHh9XAAHOcg0XGc1b2+G8sx7nBwR6VoW+ktIxLx4zxnHWu4s/CT\nXRUxqpkHoPzrrtJ8DvNJgjcg5AK0rjPM9P8ADczf6tNzOPugc10+keCJbrCLEwZepI6V6/pP\nw52wxt5QV25OBzXXaV4FEIA2dOoxUOQ7M8r0v4fyMqjCjjncOtdrpPgp441URqo6HaMZFekW\nXhWDAAXB9xW/aeH4wyjGcdTisuYpROH0zwsEZSYgo+ldHa6CHwNoLHpXWQ6fFGpTAq3HYpnI\nG1l6VPMPlOft9CNv8xwR3xWtBp6lh8oFaUVuq8dqsrboi4xk0rmiiZq2nz4Ax71bW3OMjr7V\nbW3AHyjmpPJ8tfelcCoIQvIG71qTy1I+UYqz5Jj6detPRV5+Xt1pDKyxHrgCpFj6FulTbdyj\nmn7NwNIYx4hwRTJFZsY/GrEMJbkngU825JzTJK6qVwM8VLGdrE4qbyTx8uaPLdegyKCkIF+X\nOB1okVmfpxUhUgg9qeuP4hk9qQyMIdpFJtwvoamQHdyOKcUwuWH0pgRxxlhuNSKu1Q2aN3y8\nDrQp5GRSGNKc++aftHfmnqBu5oZcvwcUILDFjwuc4GaB1zjinrCcnjipI04wKYWI2yeQKNpb\nOKkT5ZMHpTljLZwMUCI1Ud+DTfK6tirDQlmBY9qPJbjn5fegOgxYV2570/Zjg8UBS3Tpmnbd\nzZbmpYhVUrz29aQEsuX4pye549KSSMyc4qgBnyuMZpwj9BxSoVC4PBp6t+7IFA7DY4yq9aRl\n3cdKfuPl7mFOXpzU3Ahi3x5+bK5qWKMyMTnApY4y+7I461KFCqBjFAWI9u0mn7mVcgZqTcNv\nFJ5W1M+tNARxqW4J/KnMuGAPNOVfmB6Cnf6wEAc560mAsbKmVxye9SHIj45FMC7Mk805GLLh\nRQUiHcdvPTNTJEGXOeKYsZkHJxzUyLhcdhSEQPmJvlFTRszc9PrUbTbm4XmrEeHTmgQvzDkG\nmMpZgDwKdu2L603l13d6BjlxHJ7UnLHefyohhfduLfhUzxbs4+U0BsQbTu/lU6/u+DUSQncf\nmzipfLbdhqTEPVVbPFN/5aelPRDtPNM8s8knJpiHspHQ5FCx8gnpSoobqakZvlA4IFAxAoOf\nrTGz5nB/Knb9xxt4oO1Rnoal7gN2ndnIqQ4645qLd+7yAT61JFnjvmmBG29lx0pnlscAtVx0\nHJNIrKq570FDPsqsvPWnCPC44yKljZpASRxUa/KzEigBu3ChscHikX539KnXLY44o2KuSeFp\nAR7sn2qUDC8L+NIpikbKmpd3y7QaAINo28/e70m3nB6e1WPLDRt3aolyvbB96oBu35gSMLTH\nVmzj1qzuO3kcVCQCzEHBNMfQYF54/E08MQ3QEY60RrtT5uhPOKexG4Ko4pCGxqercLTTGW3A\nNxUqsF68+1B9cc0rAQr8rfMKkEYxzwKe0Z2jAphk3YGMj2o1EHlpu+9SKgZhk96cqbOe9PXE\nmWApWAR4wmc856VAePerDfMeTSeTupsCttIAwKkTL8YxTtjs20dqey8Y5FGoyDymYnaeacqc\nYNSnHGOtKMg5IyKBkbIu0ZGWpQuOWHFKFHOeab170hCYWR/u4HrSKrNnIqWOMr1OQaUZweeB\nTAYYzgetEajawY4PalZwFzjmk8wuw9KQISNTg80MPnGeGxUoVWVt2cUjrtwR1xQMacH5X+UV\nBcXX2GMtnK+1Xo9rMNwyar3mnxTDac4zQIw1vJtQ3siMIx1yKt6fFcxycthSO9akUMdsgWNR\njv71KwDYIGCaEBAvLAMeadvK5OMLT9rLkkcetQsnmDg0DGZbPzcUgYBSBwfWneXwdzZIp0cY\nbHPGKVwEwzRjPX1prZDD1HNWI4x/ewO2aVlGxietAWKu4LlsA880rIqqrnpmpfJHk8jrUbLn\n5QOBTCwkifLx0oCnbluR0FGDtIPSpI1HlkbunGKBEKxlT071NJH5Y39zQq7cc7gO9K0nmBsi\ngCFl5yvIPapBhmHbikibK4xz2pVbzFJx82cUCYscYViW+YUsjBSOBgnFG0lgM49aakZMmSeh\nqRgyjcQD1pPL2t/eqTZljkYFNJOcgcdM0wGRrtb0pzKW4U5NIv38Hk1MwUMdvBpgNZHkUDG5\ne9DZZsDgVMoLNxwuOajbG4Beo60AJPH5bKByCM1IBu5HUCk+Zo8joaYsTx4INMENMxxzkH3q\n1DsEHqTVbluScc9KsW6b8gdulMAY7sUhj3NgdOtODBjt24YUjPtfp060gGbsYGPxp3zNzihX\nDPkjiiR3LHjavakNCqN0mO3pVjbtHyrVW1Y+Zgjgd6ubxgjqSKQISSESxll5YciolWTywGXt\n0qW0b7PJtIyG6g06aQLIVHApgOhkCRheppJCQMkce9LEu0ZHFSLCGyx5BoH0I4sKpbsaTlpE\nIJCj0709oSrBVHHWpfuqGoDoPk27gy8HHIpCgkAyenSkb950ODQqtu29B60hDdxXIwRT45A0\neTyRUgYY+Y5PSmGONeB39aXUYcMu4DmkhXyhnPWnxyBvl5xSrHubGaoWo4w+bDnGTTlj8tOn\nGKcjeSpB57UnmFWCtz6UFAiBlz09ab8vA6UhkzGSBhc0z5yvAwD60ASsNq4Bzmowj/aDnkYp\n20txu5qzCVbvQwIvJI5PIqXayqGI+XsKH+VslvloEjFQM8UkAzcUPIxSK+7p0qR2M3JqNYfK\nYMT8vpSYC7xnJGaezCQ520u0MTxgYzQpBwMDPvSH0HY3dgBUe0KdzdBwKe2FIPIFKzBj0+Wn\ncRH5azZYDpRvMWAOlP3bW+UcCkLZbOzigAI3KGx05Iqb5G+YDBxkCm9uPyNKqnAJNMBqrtYM\nT3qUyCRiBxSLt2nJ4pFZQwyMUXANh4O6nsoboaZuPNEakNgikAm0K+9hmnqdzc9KGVuv8IpM\nFu/SkMGyWAxSbhuKkGpI2Kn5uaNoZiT3oGRqVXhuT2qVWVW5GBUYXDAkbjQzBSR0oQrD1USZ\n4IprAZ9cUxX2yYzxTyRwwH41QC28pLspGBTw3vTWbarf3qSHPlkNxzxQA9yOD2oP3iKRm8lT\nkZoRgyjPWkANHtUEYzUi9OuPWo2Oc5FKchV4+tICZuxzTdxkbmkPGMcilB9eKaDUU7gxOcik\nbAwRyaTcee1LFlW3E9aQ0P4wTijkjrx3pGYetLt2rzTARoQ3zd6RVC/40/GVoRQMFuRRcY5W\nXb8tOVQwxmmbQpLDpSjCgAdaLgTKMcE9KRVOCQcUzbmQHdipFUKxJORTYkIDnkjml4K9aQqy\nn1FI3zYxwKkBwk2pypJoLlVFN3EqB0pzsu4KetUgsO30UbDRTCxEmFYHGB606YbeV60kJ3Nw\nePSnyFuRjioArtGB8x+9S7lVcEZpY4yzAtUN5PFBcBHdVPoTQISa8WGNmNUk1TfIMdDxStb+\ndkbuM8c1Nb2CqvOCR2qhFuCTzMjoMUpjA4IpVjWOP5Rye9SN8wHekxkassa4HNKWG3K96Qxr\njP8AMUDj5RzSbCwwsGb5vwzSjnPPFSRqsijK8+tNaLa3YCmIYx3R7TSeWWIOakkw+MD5RTpN\nvlbqAK8LqZGAbIqWPHl8cg9agaEKSyLkn0qRW2bAPxpjHGRVcjr70126f3ajU7mOenpT9u3G\nTtoBEV1GQuQec/dqNWO3pk1OVGducgc1CMc5GCTUgSRsVjIZcfSo5F8z5ScfSnqpzndx6GnH\nCkEYNMgYv3cE5pY8svy8LTztbOBimNlF4P4UIoChWb2xTunbFJ5m5c/xUu3cuTTAjVi249AK\nerttJY5prrhRtOadEwDEtyOhpbARyKH2hSM9aRWBIXPFP+Rd1RQFeSR9KQEiyHeVxx70Nhfl\n603d7ZpGiXcGVsj0oJHqqdR81ImGzjg0isNuelLtMfJHNAxG+ZeDTV+XIPGaczKzDHApXUsp\n6EdqsY1WULuxmmo6xtz1NKmI1J6+1NK+ZyfWp6iHkq68HkU9iFxxk1EzpD82OKeo3NuBypoC\nw6RjtwD1pN3y4xk55pkynaNvXNSIp2/KM+tDAdJg/KBxio9u5CO/pT2+XkjHNIF2sT3pMCOP\nDKVbgimBisnqtSNjcfUimRq27JX5KQwky2HDbcU9trZJIpJGyuAPlNRtCWUjvTBjGb5tvWnq\nxWPrTWXbtHfHNLt2kt/DSEhYyCwOKSR13EA5FL/AQDioLeMrNnGaaQErZ+i0LI0ORnJ96JJP\nmw3ShlUtkmhjK7ZduDx3p8ahFI605cI3HeklQrhs/hSQEZRljJHPPSpYmYL0pisx6cGpEkIT\nGOaGgIgp5LfWoWYl+mBUm5vNOeh4FLIPKX56AGrjGOKjVXVsds08Jwvcse1LJuVj3UUgGyx+\nZkgdaWSRV2qD7Usbrtz602OEfePrVLUBVQD2zUKsfOZWPK8ipLkMpBB+WkUKFAI+f1p2AHxI\npwcGo1Urh8bh0qxJjbxwelRQyEqRn5R2pDIZEZvmPApvmBowmeaszoJOmQBVfaBzjOO9T1Eh\n2zy1BxmmsoXLYqfBMJNRT/6kACgbIWAbgColyuQetTCQ+nNMuF3ngYNIRKp3d+1NVgqnjJJp\nYmXA7+tObawOBQAu4MvNQ7lbO3pQG5K55qJd0RORwfSgaJIcsDuHTpUMjlXwOhq4rpwCMcVB\nKoY579hTAhLHbjvScMvIqRl357EU9VWSPI6UCK8jHIwtQpCyMWOcHpV1Iyreq1HLypUdKBsa\nzCRR6gYOKa37sDBxmmw5VulLIpU/NyMUiUJHH8zEHIxSMdse0n5qkiXMfFR4G4g1SGRq20j+\n961Lt2puPJpNuzk4I7U9V8yM4ODSYEJ+960rg7TgcUirsb5uKchPIz8tICPqvNKwCDB5J7+l\nJkK3IqXaGyM0AQEeWuRyM02TOAAak2jaRSeWBtzQA2QcAjmkIPbrT2XHHagKGPoaYEbZ2470\nxeO2alVRksTxTW/2RxQMjdc9uKhaIL1HFT7jTWXjmgCu1uQfao3hx7irn3V56Gm+WPTOaBGf\nNao0RwozWbJahf4Aa35Id2QBiqklmGbrigDCm0mOZSzJk471g3/h9QjHGR1xiu6a3O3HX6VV\nu7UNwwHShAeP6h4VWZXPkrjscVxXiL4epqEZ82IScY+729a+gP7IVlJPy46Vl3ukjIO3P+1i\ntOazIkj418TfBceW4jiVIycA47f3q8w1z4b3WmCUEsVjPysq9vXFffmoeFIpEcbM7+S2OlcN\nrHw2S4hYGLcOSTj+daxqGPIfDF9o8ttcJ5ZCxsOVI5FU200QKxJxuOAa+q/Enwfgn8wBQsrD\n5JNvT2ryvxD8KZ7Jx5atIcbs4raNRMhwZ5FDF8g3jbKD1A7VLNGHxEV+br0rqrzwPfW8Ika3\ncKBu+X+RrBk067tJ900DoW6Bu3pV83mTysoy2MkbJIF49aSRGiyjHc55AFahaSSIRbckVnzC\nSMvMyZK8ZxTuIh3KrZ24KjmnxxuFSQkEE8Us8Bk5LhCq5OO59KhVXbaHfHce1UhBMh278byT\ngqOtQKwhUqUyF5zVkF+hwYx1NQBFZWY8rnk0AJIokXco+Zh90daWSL/VDHP1p8I3SIq9ezVO\nLNjvcHjv7VXRCKs0OxS4XJzxjrVeS2kbDBR5vp7VelhZXVkJdcc1G0bJOpO5Wb5RmmBlSRlY\n2DD5QeNvrU1vMfJIxtH8S/1qaO3LbzjaQc1G0LK2McMOlArkN1if5VHvUEcZydwPAzn1q1Nb\n7drFyqDjOKhkjDJlWbOcLkYzTQyFPlG8xn1p7YZS+eD69qWZSFUh/wDZKn1qDBxjPQ4NAhY5\nUC5C72HFSbirK5bDA54/lUG8KcIpd+9T7X5VQGCjcR7UuoDH/fSNJ91s8N6UmT/EwYjqcdad\n5cjRsdvbOfeowVkhUkBD0ahiBNjglQSKjj2+Yqhi4HXNSBgn+rG8CkWEb8jAVhnNBQ+IKvmE\n888U3aG3YGQ3emvIoBUJlu22nBF8weWc8fMPegTYi4+4/wA3H5UmH8lyjZA6DvTlbDMwXK9M\nmjia3O0Bc9aBJiRrtbf5oXplaeituLIe+RimeQAwwuc9eaPMUSbcFATjd6GqGK3YFty9cUbs\neZ1BK4z6UrSLtKoN3ocUmckMQG7GpYDEhJh2rJuVe2eaTMjYDMKIY1dZAi4dud1IsbLD8xyc\n/jSEPKmTO1scdKYflXaOe5FO2bdoznccYoST5gsY3HOMUxi7R5ecAp2PcUJlnEb845ApGwu7\nZ8h6FacjbQpC52j71UIZ8xkYsMAHmkZh5gCqdtHnbmLfdJpWw0ZbBC/3qkBZGZWDlQT0AU0k\ngbLGYhFK8FaXy2ZdhwSRnNJtEaqoJJ/lSHciwpkGMjbx9akh3FikZC5PenSyANuI+UdgKiiY\nMzZDKGORTC49tiSfMDIw/u9BQ3+swCAW/i9KUxlWBR8D2OaaoLBiSCwP60wANsU5G8KcbvU0\ngyvJOM0jF1+aP7vcnpmnqzs+Ttdun/16Q2PC/u8/oajY7Y1yQBnBNOdhNMN27zFHTsaYpDKM\nHGc5zT3EE0bowUrknoBUfzxqSR8v96pfLVo1YOSw5C1Nb2oubWSQt+9XnFLYexUXO4EDKdac\nQh68HvikWMpI299o7U/7uB15+9T3C43gyDaC3GCTQ6YJGeB0FG8phhyTnt0pr8rlj83t2pCu\nTKEYfMu49aYsiyxsSxHOAcdKZHJtYr1J43ZqQYXKGMtxkHoKdxDPLO3jn3Hehph94BgcfrS7\npNu1V4Xkk07zpGUjaPnpDGKrbVdflXv7mnFfmIzg9dxPFHO3AzgdR2omVWj+bgHgnNGoCuFa\nNASu6o/M2t93MueBQIQ6osbEbSCW9RT2jUS5ThM5oAYN0ZLuS7seaQJlAr425yBinqrPI/zq\ni9RUZbc25jnjAp9AHbvmJYcjjikykbKo5Yc7TxmkVXVlwQw709ivkndjdu49aYhsyq3KnnOS\nvpRjaxwGyR+dOHywngb6Rnl2oGPTil1AONoRV+bGaB9zbj3GKJHEfGcClbbAw8tsg9vShlAV\nVs5YDI/KmMu+3Vtw25wKUHc23aGBOCaWePbmNPTgdgaQhpwsgYcY605wXycgA80kY/dgsMNj\nkepqRvuhlTPqtPoIjZHC/KfxodU2q5LM3900qqW53FcfNtoOGkV/u5GT7UIBY1aPeScnbnOa\nVS2QCPxpuC27b8ynvSNIHbI+XA5o6gPaRUbbjeCeaP3aZTaRjniljXcyjO8Yzt7YojZcNzwT\nwQKBkasZPnDFSONuKRVEeSy53dBVlmXeCzAbT6YpshBZucjqKNREUmWZW6DpxUhT5QzD5c8r\nUe5mUlRk1N5jbcuM4HOaQETfKhCnBJxz6UMqMygcAcE0sbPyxUYFKfmy5UR+uKYyOMugkCjc\nCcCnspVVYPhsZardpm52YGBnhvWpNQtPszgFiM8UAUISVkLEADHK0jQjOG/1mcrUqhTH8o/C\nnYZYyOrVIEbHeoO3bt+UimyMF2FVyQeBUnlBZAxb5QMlaZEo3SIFaUkFgue1UBNO+0gMdp7b\nemaXak2CPmK9fSkjiG8LhtmPTpU1va/aHWPYVUnqv9aQyHy22E7djZ4YGiH54V3KxJPKgfrX\nQ2ensn7oQl+4BFX9M8HzXUqyiJolzyf/AK1K4dDk4rbzvvbghHGBzmr0ekmWBdo5/vMcfga7\ny18FiRwrBtoPGFrrNF+G3mRsVUlt2Qu3+dTzBZnlNvob3LRI0RLL2UY/Wui03wfK1wCY1K5z\ntXk165Y/Dv5l/dYCnOAPvfjXZaP4DELBxaiLjnaP61POXys8cs/Asv2d4Gt3Ku2A2ea7XS/h\n2sNrGGhB2gdu/vXrFn4SRVB8sbs5rorDwzbKqEpxnkVPMPlPNdG8BoIzsiC56fLW3pvghrZi\nY42Kk5Za9Mh0aGNQEXCjpV62slRCu3A7VLkNROX07QWVVVeB64rXt9LEQwTz3NbdvbqoOVya\nlW1V2Py8VnctRM6GxVVyFz71ajtxzgYNXljCfKF+WpFgxggVLKsUUsR1J5qfywABzV2OPcxP\nSkaA7sk5WmBBHEMg9faniMs2cVMqDbwPmp+flxjmgZGq+lSFflwR171LCo9Kf5ZZsGl1EQqp\nXgjJ9aVYyOAKsrGd2O3rT2jHBB4NMaKTRgNnoKfGAx4xirLW4ZeetM+zbORUsY1fl68VIq7l\nOTipFjO0E9fSjy8sOMUxDI2K8E8etPX7pJ5p7IjEAjpzUiInl8DjNAyJYxtz2pG+ZsAYFS/d\nyOo7Uu0sucY9KTASOMnJzg02TcMZBNTLGQuc80bvlwWxiqAh+72yPen+XuxjipVAboQR3pky\nHI2nipAGhCryfmpEj3MDTmA3CpN2RjGKaATBXntSqqgbs4o8nzCPm49KGTIAbpTATiRjt596\nk2nbkHimLAUOd2PYVOWHA/SgBu3dH0yaFXOQ3FO3Y6DFN3k8EUihNu3IpvK9TxT9uOx5pywj\nvSJGBd65xT2b5fenEHoO1OEJ25xQBW2q3Vck1IsahQAOe9S+XtXIo24UEUDHKoXGeRUm0Mpx\nyKjVDnnp1qRSFUgcUAR8qwGOKfx0NPZRt4bNOeHaqnvSGRCPcflFKzZ+XPFP3FPpSxoN2eop\niI9u1cE81Nj92D270jw4zmiPOAOueKQrEirvhIPam7iuAOnepdm1Qo5A70ixbsANj60gGHEn\nsKRF3Dg4FTSRK3C9PWmspZNqjFJDFW1VW3NzSlevPHpSKxZMEmhV3HniqCw2Fj90ipY1O08U\nMpj680+E7lwOtAajvL2rk03ecgZpzbsYpjAluPwpjFWMofmHNSMwQDdyKX5mUE800LuHINIk\nPMXBoZifuihYwDipo12E/KTmgdiKFflx196c0e1eeaecxjC8CjcW60CGKu0Yp5j+Ut29KBHu\n5BOacsJXhjnvQBHHkfe49qfzuwAAKkbA6CoZCNwCdaQyT+LGc0nlnfuxxShDtJPUU1ZGYdeK\noY/LKpI5FMXMmMevNO/h64HemIP3hwODQIm+63BpWU+XjNIiU5sbhz9algiCOPYTnnNWFj6E\nHAxQoXJwOKk6r1xiiwCbfLGQcmoiSzcjmp/mUZ60NjgkUxdSJ8cDPFNMahsDpUu1R9aaG7Hi\nlcYixgcE5Jp3llML196RjtYEcmpGkLckc0hDBGI85pkasrZJyD2qbdleRzTVUk56+1UUNkZt\nuAOKYFAYY61ceMeWOOajji+Yk9KQiI88HvTlUKvDYpVh/GpFUdKQbEQUMmeppfuryKlEePpT\nWYKwz0oGRRsN2e9IBt4PT1qRtrEHbxmnS4MW4A+lAEKqDzQpKjLdPSplUBRQwVuh5pgRMqSK\nX701+VHFSCPGe4pN3mAjbikIAvTHNMb5cgVZhBXOeeOKaq4XJHA60DKqoHb5jipPJEigD161\nKwG0YHzH+VSbd0foKBkO3aTuOR04oXH8Q4p6w+WTk5zQF4JzUgMyOdppuDxuOQafGoZckYPt\nRgLzjPamBHGSsh447VJu68UrL8wxwuKVV/EUhsazkKQe9R5KjAXrUrRlmx2pmCTjPSgkb5YV\nSSOTSRxlQSOBViOMMcnmo5JMEr2qgGM5bqRinOAMbeaSPrnbT85OMj8qQhFRpl2qeR2pkmV5\nZcVKu6MnbwT1pCvVmOT70gItuVDdajVNyk42nNWUDLxj3qJmO44pjG8R4XPFK3oRTzGB8wP1\npNgXkmgYikKuMc0HAHBwM80pZM+pp3mKeooJIWkCsT1FPVlUHn3ouI/3eQOKjgUEEnk9qQx+\n4MM9s0OwaTHQYqVYPMjIxg1EsfUucgdKdyhY+uDyO1SqoVTnlvSoVyq596kXftb34oGOjbcQ\nT8uO1MZi7EADOacrbYthGabHg8EYz3pkscuWjwOCD0qRTlhk4NMjj2qxByAac3ygN3oGMkQb\n8DJPWpbWQxNkDH1qIb8j1qdVeYkKBxTJGbSszNnjrRIu6MMDz3pWLL8rCh2CLlVoAbGCe1Pd\nvMYKeKjaRo1BHNSN0D9BQMVsjlRgdKlU7uAOVqLzGTIPSn221uQ2R2NHUCZEG/c9NnzJIVBB\nOM0CQnO/8xQoEcmRyGHBpMESsvyL1FSREN1+UU1ZPkOR9KWHH8S5zSGLtPAVtxzTm6hcZA70\nscflZPanqGZBjGM1Qhsf3j2pqgMWIJqZsJxjPvVb5o2+UfLUjHzZ8v7uMUsbiRwMcYp8ZJGW\nxT4/LQjI+Y0DGherdh2peVAkYfhUy7Y1bB59Ki3BmG/kelMCNZPNk3AYFTNIx4IGahaMrgIO\nM5zUxkDcnHFAbifKqlcc9aC5dl4wuKk+UruB5pOCwJ4xQSxPL+Ydh3qR1WBtwbj0pNx3BevN\nP2BmJIzg03YYjOn8XJ7CnyH93kKMUj7edoo3eZtCdO+akAjYscn5adMuVDD7uelGDuAIpjsT\nIvOFFSMQKdzHFCsJGBGRUrZYYQ/N1pu5vM+5j3oGCqXbvgetHmAvjb9KPm4xTVzwD3PWgBwY\nrxjr0qQMWXBHNQpIMlc5wcU92KsD2oCw142yCGxUuxjgA/WkjbDZYdaY2/zC2cDsKbCwrRsr\nfLTnG5s8/WkWSTaeOafG25duOO9IQKwkXGelO3bcetNRQAQOKGyMEDJpjRJu3Ng0SDfkrxTF\nfc2SMc05vlySMUhDmUqqqfvd6ZuO1hilXCyBi27IpeRx3oGCqdnJqNwGXB4NSqeu7tTBIrEl\nhxQBFt+UqBz60biy4BwBVgYYZUcVH5K7uuKBDipeP07GnsuWz7U1kGRlh6UKvXnmgB/y7cNz\nTURdoI6ilU43ZOKZg8EE49aYEv4YoaTfkHimjcw680qqCcHrQA5VxGQaRcZ5JOKVlZW4Ofak\nDIzYI2t1oGPf5jx070L0xjNJgdC1Kx6ADNAxwQY5Ao3BV59aYcnJPFIrDKg5pB0HlmbpxzSq\nxVgppykJlj81KoycsOlOwIXgnrgU1V2MDnINHBUigHauNuRStYZL5e5cDimxjYWBb5f1oWYg\njHA6Gkb5eGGaogkWQgYLYFMZl3fLQXRsBhij5V5UZpDuMZmY85qT7zLxzS/MWxgUvO7BGKLi\nJPM9qKNw9BRRcDFtNet7m88pXA47Dv6VrNMrQkg98GuW0fwxFbt5ru28jp0/Gt7y3ReDwKQ+\npDrGoNY25ZcZrzu4ubjVNTCvK7bmrvNWs3vI0AOVzk02w0K1sowRGC+d249aYD9PtWt7aNXP\nI6CtKNSrlgODQnz87cYqRc4OTxSEAwV+VuPSmyOVYEce9OXYikjvTJcbQM7u/FAEjS7mxwRi\njJ2jA5NQqpXkVIsmDn25FWMVcq5PQUFtzc9Kap3E9xjNLtLRqwGD3oBiTNsbgcUit823G4Uk\njgHD8+lR+cPKIQfN3NAhJJmVmCEfWiNiq7mPNQwKdvPBPNTNIu3nikAu4Y3AUeZubeRuHpSq\nw24FRq3YDvSuJDY2MkhGMU9SvORUkfyuSwwSKZJhjwMYoZXQbj3BpSu6PaODTW2j5s/NSB8L\nznNFiNhS3l4Gc0rKSCduRTWUOATxT1ztFGwajPuxnPHpTo23RjPA9aaU81sY4HWl3ZbaOFp3\nKFXA3c9+TQ8f7s4Ge9C/6vrwai84wyEDlaQD0xtwR260gVcYWkWYSMRjFLGuM+nagBFxuwRQ\nxC5x0okkC445pm75sEc0gCFkZTxgipGbnk8YqOb92uQODS/dXlufenYBQFQZznJ6U8/MOBxU\nKkyYB5FWV+WMr+lAkVPNAJBqTd8vSm+XnORSs20DuR2pMYIwaFgeTmnJIFXaabHtk+YjAPal\nhQLkEc54osAxsgDHPNTQyBWbJz7Ujx+YD29KRVEa5x83rTAWWQKgPU56UzcY29c0rfNximv8\no696GIc33tw64pVyybagVw0mF4PXmpY2Kkt1oEKy/LtAwacWEa4796amDuOT+NM2jc+SevGa\nTGK3zENjpTNpaMknvUn8IUfnSSLxuXpTEMxxnHHvRuCLuxyadHIGjw+RSGPnrkUgImj8yTI6\ndaZLJu/hwAcUk+QuF9acuD8o9KCh4YMCuOfWmspzlufShiY2UAU5/mxxzSAjkcZ6YNMkm+TI\n4pzJuY4GDRcKvkggYPemA5VZsMAGFQ3TDJ3c9qWLiNQoIpzRjcSwoAZHJujUgcLQsnZhgMaZ\n5gzt6CpFyzAEjbQIYyqhznjOKlhG5XA54wKSZSo4Gc8UxXMLYHpTvYY2dnVkjIx60SKRINww\nPWmyKSm7vn8akjufOj2yDpSAaf3ittOcGm+WUjJx96gKYZPlPymrPmDb061IGfJM8Slc9etR\nwyNyMcdasXEJaljh8tCSM4oAVpP3YHc1WuMqgbPenGb95jt0zTzGyrjbvBoArMzRkHGRUiEu\nvzjBpTtjyD2ojJZt2ePemAjRbV+XpUayMrEHpVuVgYuKqFR2PPvQkA6SPkFfxNKhLAKe1SkB\nl2gdqhaM5BWlYCRo/k/2utRFS3zd6mLFeaijO3OaQCOgUFgfmxUMB8vgkD2qxGob71RbUZm/\nvdqoB7MdpC1EvygButSlWjT61GuMfNyaXUCKSMo6sPu96cyb4yc80/f5nGPl96Y2Y8HqKAIY\nmZMilkUHBHXvUix7mznFI6dCozQIbIu5QKYkxUkbc1JjcTnjAqNl+YelIpDi24EEVHuw2MYq\naPGT7011J5IoEMk+fHNEnyoOzU4ptXOKd8sy89RT6AQNlV6c0QksDu4pzAq3HNOChY8k80gI\nmP50pyq5HWnsFZQehpXw3bApgR7TIOeBUT7l+UdKn4ZcZ7011CnjpQBXVTjJFSNhuRzT0XLY\n7UixggkUAMkVW4ANNUbW56e1P5ZgAM+tNkyuMUgGrubOBRsJ4Ipyg8/Nik5U4zx60AQyY28c\nEVXkj3fM3JqafKjGR1psQLDB6VSArSWysQc4pjWaMnY/WrjRls4pvl9iKAMtrCNl2EZNULnQ\n4mZiw+U9a6JrcBNy/e71BLalk56UhWOHvPDsDk/ugR2rmtU+H9vdtvKgMvIr1l7LcBt+7Vab\nTUZCdoLUAeB6x8O4mYERqPX5a4zxB8H47hndbcSMRwFH9a+nb3SI5l2kD6Vz9/oLx5wmUrSM\nu5DifJeofBZmfGzyl/vbc1yetfCqW3kDyRBlz0zjNfZkvh1VUO8WCTxxWPqXgm11AEeQpX6d\n609oZ8p8Mal8Prr7QxJMUKnjI6msjU/Dd3pOHlRmQjuuK+ztV+GMUkjssW9sYHFcP4k+FwlY\noEOzH3M5wapVO4uQ+U5IWWP7pwwyQKWJlW3VtnGPu17RqXwdkhk3hXRU5zjIrk9a8A3UELSw\nxmRE6ptxketbcxHKcFJHsBKrtZhwfSkTzF5L8YwR6+tatx4du4YGLKyqPUGqUmmTRbSxKpj0\n71alcnlKW5zlX+VQcgj0qVmjnUE8lenPSle3byZODlfXv7VT+0GNcjCNjpjpVEE4jBUgZXPI\nY9/amNGZCoHykeopjTOwiDEEg5zV68njkjVQcE9cDj86q4yhu4dUGSxwFb+dQ3CbQuFww4I9\nPeiRTuJ3Hr/CaevY569fpTEVZoRGwZm3L/Woin7kuVCnOfrWq1vGxxgFGHFU5EUNjrt7e1MW\npSlhRsYJDYzuUVGrEZEgzjr71e8lHZlzjA3Z6AD0qKRd0e4KD/D6c0hFdmZoUVSdmfvdzSNG\nI2PzZ4yeKlSIsqg4345pE/c7jjJIwD6UgIgrnbIuFix+Jpu7y12gE5OakmchhGDyBzTFAbnO\nD0plCbhgEAK/bmnqgbOxsd29qeq+Wm4j8x+tINkUfzdWPPvQArEBgpOFxkCq7IPlAba2ckVZ\nUoW2sMkcg0eVHsZmBJxmgRCoEjHaWHODmkdk3MOn8I+vrUqR5jDbcDtSPiRt5+WNeD9aYEav\ntVivQDB+tLtMbRFUynU89aVoQY2ZPlxyTTJHMiq68nFAAGzG2Rt56UNGdykD5mHSk3FowxXH\nrQuWw4y2PyoEPVtyqw4PT8aVgNu5flI4NMhaNYSn8Wcg9qdGfKyrn5H457UDGxgNIQBjIzuN\nMTARyT+NPKRq20navQ1LuGGjhHOM8jikIqKzyJklQo9etS4HlbSeQMikx5iqcB+OT7+lHmcr\nuTaelIYfMyqepA6jrSu2QWXooz70qbvLIU8A8mmNCQx2n5SPWgBJGVUQAFieeacjZn3Fcx0M\niKic8qOaR2WPBJKrjPTqaBDv9UxcuNp/hHX60xGLOSvA7NimyBpY1ymf4gw4xStIXUEnJB4A\nFV0GOP8Aq1O3hvvc8ZqOMmGTafu81IxWJQXDBe/GcGmBSzZXgHk7qQx6kKxDA9OF74psyh2U\nn5EHG0U7aJCH3fMeMHvQQ3krngE8+1BKG4KgRhwCOQ39KV8xuFRthb070MY1mdM7hjgjpTlg\nbajIdxBzmqLI0AQsXwxz1/pT41BjwowAc4PSmMm+R8DAPJB9aVv4cHaPQUiWN8whDzg5xtFI\nsQyWB3AjnNP3RswLL8vfFCsjRkNyxP6Uh2I2j3IGjG/A6U9BgEMxx6U+NhC2EUg4pskRkUhH\n2nOTRYQnlswJ3qztwMHtSviGZEY5Xbhc+tNVWTJwAPY044bnGSR37UCHKwKgMOScEg1Fcbnj\nVCMr2HrSLIRhcAtn8KGl83MbZAHGfT3qhifOsOPuj7wUfypZCIYQwBYt1HpUm0bQN20dMf1o\nZdpC7g6+mOlSAzYGU5HyY9acMMFBi+U4GRTZF/djYOd2KdsYMSpy46UCGsoXcHXygp456UbQ\no5ycjqwoZt0ZX7+Tk98UiqJMM7FWHQE9aAEZxBMAw/dkfep0luyAEnO7kE9MU1l3KAxAXOc0\nrZbG7lfemAMyKPnbJHQYpJPuqFU9c5pyqjScjcAPvU5Y3VvvbV64NLcZGcKjI3Vz+VCDp6r8\nozS/IzDKbmz1qw0AZOuOcDmqBlX95uAMg4P8VSFiynBwD29aXaDwR8y/rUUajcTnnNIQ/wA3\nzl5GMcfjSOvyn5cnG7Ip0YHLbeW4Ap8eVwSCjdBSGJlWUA7kyMggU9oVWDep4Bwfekw2PXua\nmVd3pg8+1AivMDsQKO/P0prfeXC4CtkqKnZ9zBSNh9e1JtVRkDLZ/OnqMjkjXd8wLd8+1Kyh\nYy/UAZAp7bNo4wvUmmuv7xTuDIRxTAI0bajcKzHcKfJHujYSHk/MdvamxfIp8xuAcZHSnYPT\nquMY74qQITHtJIyQKkt2EjOWG0bepqSK3xn35B/pV2Kz3fNt52847UrhYarCO3jcAKw/hqGe\n7a5VN4JfPHFXFtzIgTy2wTgNjNS2uiy7cuuG3H8qLhYzfL3nGBn+9RDHubbnc9b8Ph2WaVUQ\nZVm7HmtvT/CUt5PsS1ICj5n280aFHFR6W8zYUFm+laFnoEkMOFQySE/NxzivWdL+HaW8aSCM\nu/qRiui0v4fG4x5kRKg5wo6/WjmA8ds/CbzAhiMfeOOCBXT6Z8PXny3llUP3eOSPWva9M+Hn\nzMPJQ+23kD611On+DYozgRlj344+lQ5DUWzxrSfhyxeLzEcfTv7112m/DlvMLLDuU9SzGvVr\nPwkrSKSgXb056Vv22gBVChQB9KhyL5Dy/T/A0bAExbQvfFdRY+E0jUBB97k/4V3Froqr1Ax6\nVoRaOFbIXjtis3ItROX0zwzEqqpj6DABraTR4PLClCMccGtq3sdqg46VNFZ7s56UrlWMiHTR\nEuAuTVmKwPHAHetaO38sjipltgzA96m4zNW3ZeccVNHbgjdVvySMrmkMPy4HBoAj2opxxn0p\n6x+1VptNmnUssgQjnNW7eGaNE8xTnvQAqx5XBpyRYbjmpRGVNS+X6c0DIlj+fnikZQ2BVnyc\nt/jSlAoPrQBAsflryKUxjORyDU+3jHUU6OH5fxpAV15yMd6m21IIdrYHepFhO45GB60CKzbl\nwADinbivHarCxFhjIoaPnb1PrQMrhnbPFSqML6mpVj8slTR5ZUk0hjMZ5OcjinLGeSakGPLI\nYYPrQvUADNIRDtVuOhqVYyF449ad5TZyvSp4wv3Se1MCv5W5gc8YpV+Wn/KMjrQIwTzSGJvH\nf5aTy1k4NDQhhnPSpMdMcUxkRhEfTp3pfL3YI6U/b6nNHO044PpSENkX5QMd6kZOncUiljgG\nnbwMihCGoo3cGnsvzY65ojUNz0p0jDaMdaYCKvQ5FEjjPFSBACDjg9qTygc0AQ7Ru5pyxt1H\nNTLGGznjFSLGWU7eKQ0V1Vg3A596eq/Lz1zUyx9T7UwKdtAuoBdvWm7mp+1pF4GaQKdwGKCh\nv3jjpS+Weg6U6Rfm460+Mbm9KBCquMEmkZfm45zTyh9M0FSrdOB1oAYCMhae7ncAegoVAzbu\nAKdwR1oEiFgzH0FSW6eZIFJwKkYBhnbSxjaoOOaAHTD5uaIgOoHSlkCsOtGdqHFJjHK+7gCj\nyw2SeKZCG3A5qzMAoGDz3poZFGoGR2pojZjgHFWAyqowtKqhjnoaVhESxlVJYUySFidwPFXm\nX5eeajaL5eDgUDIol3Jzk1KoUcgUsO1VO40KQq0wGxktL04pSo64wc0qsQvHOaSRTnkEZoAO\nffFKvKkk4FOVT8uORStjn5aAIxhsFetO3NnAzToUG77vSpHUDtigQzfxj86EUMwA4FO2rtBx\nzTwm1eBmgVgjUx5GOB3pdxZs4zTVk3N83C+lKMbsoMCgdg29ARxTY1TfknFK0nzU8ICOKB2A\nYXIbmmcZOBgVI64wDz9aVFJHIouBCtv5vtVqOFAmCOlNj61MGG05OKQEDIFUYXFNjZc/czUj\nem7INMXK54pk6knl7eVwRShVZcn8aMALgnmm4aRsZwKAsOZeMZx6UxUf+LmpSw/DpSsPmAzi\nkNEJXAPHNMYDcMnip2BckA01Y9q/SgQzcvYc05V3YGcijb5vQc05Y28zAHApWAPLVU96SHar\nfWnNu3DHBpTGWHNNdgBsnODzR0X3pR8vWmc89xRYeo0ybZBkcU7lmIxilVc4PU09c845oEIH\nyvTpUYVm5PTtUjQqTycE9qZtK4G7ilYaIUkyxUnODU8ZBDAn5TVdoT5pYYHsKnt4sIdxGeuK\nChoi2xkE5PtQvHG3mpfLEfO7JPOKikZ15Ck0CFUfLhjikUYzg8CgtuwQMUvlkd+tDAdC4xjH\nNLIoXPzZ9qRkK44470mxjljQBICCpJ6LUcbbs+namsxA29BQHHGOKQxVX5gW4PanOm7j7tJH\nljuIyBT2Ikl561IMjbKkZFKuCw5qQQZyTz6UjxgJnvVIQx/mB9CcUzyzH/u1NCp3A/mtOfdn\nDcLQMiDM3Q8+9Rx5LlXG2pSwPQYbtSbtzAMcGiwhDKAdoOB3qO5lRMkDjFOb73NI8fmfLjjF\nADIf9SGzT2yzgrwMVIqDyx7VAyyK3AytAydW8yTlvajytxOOo5xSxhW/hxjualkby+gyCMHF\nBNisQyMQx4pDH5TMR37Gn88huR1o3B1JIycYoGRSZUAHoaV1B246ZpVyy5PanbTtzxn0pgNd\nURvT3pNobOKlSRfL+ZQwNRNk/N0ApAKuGUjOaERVbBHNEkgIG0Ckbcsh78UgFMpVcdKY2TwB\nxRGTuw4zT2wAw3YFFwQm4MAT0pynPU7Qe9KipJznFL8sxA7CmMaq7c85HY0kbBWwQc9qfsVW\nxSc7iMZ70xB5Y28nBPWldl4A7ChkZUzwc0qqApx81AyNWZWHGant5Pl9+9Jx5ik9aT+JlHWg\nRIzJIvTLeopkSANhu/rRGphUHqKc2W5x+IoAasPzFW4qWSPzYxGDj3qU4YAkcU7y1wOetOwy\noI8IQTntTo41UYUbR6CrIVCCuOnekdfL/wB2iwh0KfutxHFM2q04wuOKmhl/d4HShY8gEEcm\ngBvkMynB5FMhkPmZYHAOKshhDjLfhUUnlvISMqvekMtP+8IwMCoCx2kkYWpBtZMDt3pnDZWg\nOpLCnHB496b/ALPOaEXyl253VIOhAXn1pFDFAOM9KVm+cADBHOacw2ouRRNhhyMGmJDHbDcn\nk85pBsx1O71pkyttBU5B606PEi46kcUuoh+XAAHSmzQs0ZAJHzVNnbjsamxuVugGM0x6lKD5\nDtY85qwCkvQ5qHaY1z681JDIq9RigCWNliyWP0qRW24PrzTwkcseD1pm3dINvTpip6iJV2tg\n9DULsOdq45p7MMgHgigMOcjg0DQ1fu5ByafkMOVwaDCFGQc57Uit1GMmkMBGBlgee1KsgDbS\nM8c1G28IdvDelKu5IwCMsTTAkLBPalaSONlx1Pc1EzgghhUbYUgk5AHSiwyYqjPuC4/rTyql\nsnpimAsFUetSYy2D6UAR7SFNSKysilutNUlQ38QpVXruGKBCodzMBTtu0ZpFUbSRR5hycikA\n1fvEZ4p7sMcHFI0i7unSgwhuc4oH0GxsZmB7Cpiw3FW5FRLhOO1K3z4weaXUQ4KBwBTmxxkU\n1RjnrigqG9c0wBRub0Bpkke1dx5Ge1ScIMNSLJuOCKXUY1WKjpgn8qczFcZ5pxb5cEY96ase\neSeKYEckYZgBwTTrVfkPzZOcGpeNvuOarRRhJX55Y54p2EWfMHmbCKG3KuP4aRc/dI59aVQV\n4NIBytnGBgUu4d+Kap3UrL81AxPfJpPv84709RheelNWZFyD1oCxIVG7ikVTG2c9aRG4JHOa\ncuWHNUMQBs9Kcq568mn5PrS5DNxwaVgAJskyfukdKGYrxnmkVTy2eKTYd29jn0oAXdjtSbt/\nA4xTt22PmkwF+amAm35tzcCpVbcuCMimH3PBp/HZttMQMQcnFIqgN8vFDfL0+b3prHcwPSpC\n49m6dznrT1z+FQ/NwB0FS8lev4Uhjti+tFQ7XooAVY92SOKSOM723cgjpUyxjaR2NRyL5LDa\nd1BIJHtjz2prL0A61JG3QN0zSPIAxIxn0qkA7aYunzDFMXBbOD06U6Nyy56c07G05bjNJgiJ\ndrcr0I6UnlqNoB5pGXa5C809FyuR2HNIY1c7jnr2p5jXaGY4zSR45JOT2oZgy4PXtTEIwHKq\negpJJvk2A4OKVWCRnjJ9aYrjd74qgIVkDuM9Md6QkZz93FTNGZOhHFV5EG3rkUASgbuQMihl\nHl4Yc05R5ca7Tk0ydt+dv3qBiM23Hy/LSZ2Lkc0iyFlANEn3cqMkdqliFVmkXnio2ZtxBPFC\nymTqMN6U/eOvakBGrDkYyTTlO5sHjNOVd3WiSPkt/KncQ2QbWGTlR3FO3bWHP0pmRwGztpXA\nwTjpSeoD24yRwabGxZT81MVizAZGKlKKqggZNICNj2AxTFi3g5qRvvAp+OabGQhI6jPOaoYx\nk+Yfw1MMryD8tOIDMD1ojztxg7aQEDqVbPelhXdJluvanFdzEnj0pbddwJLbWHIoASVj91lw\nBSSKnl7upNDZkAJ6k0LiNST81Agj/wBThR81JIW8sMDz3pyKVAI5NMmY/MuRj1pgOSQvhsZp\n0jbcHbyaRRi3AH40m5mbaTlQOKAQp+TI/EUjZC5IwT6UjqfLx/EDmjazMCeKBiq5WPk8U7sA\nR703jpTxnzOeRjikTqNGEyRSbA8fXNIyq24Hp6Uo2xqAvIoKI5IQvuKYsgyFB47VLIw4I59q\nY8S+ZlOAfWi4mO5VSe54pYTuzuGRmmLu3BScipd3lMwxQJDcjzSCcLSD096j3kISBk5qdmLK\nCOOKoRFMzdNvFKrhVPHAp7MGUHoOhzUWwjI7VJSGPIi4BHJpkTLn8aJAWAUHJFLDD/ERk+lA\nyzJyoPQdM1WL/NgjIFNumeSIKMjnNMgG2PLnFICYodmV6ZzRI26MnpRn951wMUzysrktgUag\nDfLHxjPam7jIoyaXcp4PpUixhI8kcmmDI9iNgY+aoId3mHjgGrLKrKSTjHIqGGQGNvXNBI55\nNsmM54qN271NIqH5u+Kqy47danqMJFMjAAnHepFUBSzYwKkTYsOActUFx95RsIWqGSfLIuRT\nFUsxHOO1LGBGcckGmtuRsqfwqQCRD5gzxxUrH93gNn1okk3KMj61DGRu56UAQSW58wMKlD7Y\nxu+9mpVjyGYnA6VXuMheMUxkMmHlNTcLGBS+WpRSBhiKY67RnPFFxAvzLk9jUci7jnPSpY8y\nnAGBUdxHt+7yKAHQsR82c05sjnoaji/dqM8ipd27jFICLzCeMZFMVstz1q0y7UwBVduJR34o\nQC8pGW654qsy7cZ4NWWYNjHC01thzuP0pgS7wwG7hQKrPGJiQMgU4NuUqaG6DBpANVhHGTg+\nlMVi3+FT7goOfu1B2PGOaQiRvUCotx3dcVJGdoAPPNEmw5YdOlMCBkPXPFOVfMGAegp7ANCB\njvTNgQZP4UDGqm1sHrUqqdpLHAoVtyrxSMC2c9KAGM3BweKjjbqMU/b17UKwU0E3I1yWJ71I\nI8x9hmjbkHtT9isAM44oKKygdccUGQMu30qYJtbBxioVxH8xGTSAcVAwR3FDj5QBTtwbHGBT\nnwynbimBFsJ6dPamoh3ZB605GKDnpT41+bPagCNkKqwHXNRmMnH61M0h35xwOKHQr82evSkM\nr4+U8Umwvg5wBVnb8vPfrTdoC7RQIr3EayY7Go2iKx5B4qRkbB+X6VIEJIB6U0MrqpA60CMt\nyRxU3k/NRtYHA5oYiFomXBA4NI2VUjtVn5uAeRTGiJz0oGyr5W5BjrSLCvTGDVhYzuz2oaEn\n5ulAjLms9zEdDTVtE2lWGa0pE+aonH400Ix7rR1mO3I2jpWZ/ZsceVK966hkLYKgA1FcWJYE\ngAUCRy0mhxz9E5zmse+8MRMz/JkGu8FuU+909qgms1bHTmgDy3U/BNvcRnCYBHNcjffDW3kV\ng0TKP7wNe7XGjqYuMdaozaMicbc561abRFj5r1P4VwMGUwnd3Y88VyOs/CFSoMcRaNf4ccV9\nX3Hh9HRiAMZ+7WTceFo7hXBjyMcYp81tQ5T4t1T4T3e9nijZZDnCdBj/ABrjdW+G1xZ5Agld\nduT6g19zX3gpG5aLcB6jpWBffDuKdTiJST1B71sqpk6Z8KyeErqKTiGQhR8uRms64sp0TkEO\nGwVr7O1T4Twux2xbCOy1yGsfByJbh5FtxGxGCV/nWiqkcrR8s/ZWjDkEs+D8oXvVeNZmhEhj\nA2jbnvXvuofCGW0jlPls+7o6n+lchd/Cu+ti48vdkZ24NXGXMLlZ5sshZULHA6Z96VomZ2kc\nFSo5HXiuqufAl7ZxsTH86neABnisyXRpo/3ixOSwJOeKvmJszAZPM4xvXGRinMCuMABAOcjN\nXF0+aBtxgYBu+DSXCbMKFw3pg5NPmFZmdtClW28E4JP86XPzOPTpxVx/mAEkbK/fcKhmj3KP\nlwP71NNBYpbYtj7gzysfvYqGa33ttQFT3NaIj8tAAec+lIsIZnKOd/vTEyuoZodpHzj+GkaJ\nRtGzc3XFWvs4wcNwR096RYzbooHzH+93pXGiu2zJBGM8nA6VG0jEKd21B0XHWppZj5gRAA3V\nqimwPnZiSeMYp3Ac0jfwL3z7UjsrHIBAPNMYmHAbDZp3Mg4+UDpQIb1V1EgbcORUUkW4IiNs\nYetSfIpwRgdzRIA0K5G0Z4o1AjG6NWUnEntS/vI4+Bgt1qbP3ScZ7etMJZpDubIPQUwI9u1h\njg9wOlEzHac/ebpUjWx+VWYpIOT34pqxsF8x1OzsaRQjEhghTI28jsTTfMn52rtAGPpU8kiq\nsf8AFzkt6U2MqzSkDljytGoEClfL2hsDr+NLGGkOG4GOGPep4bdsFSFGORn+VKIx5oyOO4NM\nCrJEsaKqsVzyR60snyt5ap2zuNXmQM5OAV9ahXe69AOOlIRExbcMAE4pyxNIpZsLngFhUsNu\nPK3Y3EnjB5oZvk2j5xnkHtTEU5NrsVUnng/WmEFsbSAUNWZF252lRn9KhXDRnccNnHFACSSu\nq72Pfmmsq5B52mn7Qp3uckDG2nhDtBXnPX296OgEcm2TALFNvI4qSIfuQmSR1NN3BiUK7weM\n1KuOi/Ko4qQK4X5cr94HBAqSEMsbqW2dyvrTo40V2z8w6cd6jjVlQo2XGetUAbnwu0qF9DUf\nmbW2lRvzkYqXlW2qoCtTZFKyEbMHGM+tDAazHy8sMANzUgjTCk/LuPBpsRVYyH+Zj27U9g6w\ngH7g53evtUiGyN5eRjEnTA5qPydsZdiSf7o61MqmFi4+9jOyk5bc+cMByPaqGMaPzG2jPA5N\nHlfMVLfMBzTvlMGQ5OT1HFJHHtA43Z4LUmBXZQ2DghgeKscsMsmwjqfWiPzVumIUCP1NSLj5\n9zYcHAU0AQfKNpVjgH7tKZCsbNwCT6VLGo3gjjnv60xd6s4cZ54WgYi4WQAnAxnce9NVUMhD\nOS+flPt6U4qjMquSnfipG2rIq9GzkHFBIxo2SNvK+U9z/SmNH86u7YJGAO5PpUsjOA5xlM/N\n9aVdpliLnKLyG96AKwzICpXC55Jp3mfecjcqjAFTPHlTzgZ796YzK2EwAfagBn3cqBgdhTzl\nQpZtwxgt7+lN8wSK+9uT2FO2lYfk+Zey47+tAAzKrLwNpPLCljGJCGBBYcUjj7pKEHuPenLv\nZcsfLK8ByKNihI1eZCVXJBwcn070q7FXCoX9T0pxLxYYISx+9t71IsZfB2lnYd+KA9SvdK9u\nq7ANpx06ipWYtIoyCRStC+VbqudtK0LzE4Tyyv60hj+I2AZMhjzUc37vodqZ4FWFt7hvLHl5\nTrmnDS7i6mJUbVX2yadxGf5hcNJxszt60vEjgLw3TP8AStu08PvJndgsRnbjj6/WrVj4Z6Bo\npCSeMDOPf3pORSRjLC4h+5t5wSRnBqMafIynaQT03L2rurPwOZpfJkWTnkhu/vXQWPw3mDMB\nb9vk2j9anmHvseV/2fIgRZIyRwA3qfWrq6HdeariIlhxx6V7HD8NAFVdpG08yMucnvXS2Pw5\nTcieWzLjJTHX3zS5hcrPDLfw+ZnH7vB649DXQWfheVlCsm3zOMlete96f8N41jA8lGPZtvIr\neh+HsKeX5sKvxw2MVPMHKzwqy+HsjWoK58zGMBcmuk0/4UpIqP5LCSMbvl53H3r3DS/BCxqN\n8KlfXvXRW/hpYoxhRt+lS5l8p4pp/wAMI44xIIsu/JXZytdBY+C/LBVIkjXuQOTXrsegqu0h\nMHHQVft9DiWMfIM9anmLUDzrS/CqNjzow34f0robXwzHCx8uMK+MZArrP7ICKrqgFXbbT97Z\nxU8w+U5qz0QRtgqCOp4rRXS18zO3mtk2Q3YA5FTx2Z4HbvUN6lKJlR2aqwUr+OKupZqx+Uce\n1XhDwV21IsZTB6CldjKi24VsYqZIzgnIxVjyd3PrT1iAGAMigCssQ24x+FSLD5fA4HepjH5e\nDj5qcylhnuaYEDbVYZXinsAGB71KIyyhSM1J9m7HqaAIBH5mDineSBjjmrEa7TgjNSbDJk0h\nFdYNgO0dacuNoWpV/ctycmlUjuozQMikQKoGMmhlHGKsGI7QoGWNM8llG3vTAapBxQ8alcDi\npo1ypGOlCoCemRTAhVBtGBzU0ce0Cl8nqy9KQxFsHNIQ6TByVXmmdRjnNOjYrwamWP5eBTGQ\nrgYz+lSMPlAUCntCAAepo8tt3HSkAzaI+WOaMBl5OAalaMYxwaSNlY4xgVI7kSR5fnlamVAG\nPQUKp3deKeVDHOcUCIGUhcLxzSMpXmrOBtyelN2qynqKAIQA2McetEa/M2egqVVHbmngdsCn\nYCFV2vkjg05l44qZiOARTWUk57UAQZHZeacYyWJxjinY8xiMe+alBDRgc5pDK8YO31NT+WMZ\n43U5YwM/0pu07jxSENC4HrT1wWyBz0pVQ9acI+4x70xiD5eNpPvS49ep7jpTnYjgHFPIbaMH\nHrQFhFQqvqKTIifjvUrfMoXd0qNlBbpmgdhrs208URqzYOfwqVZMYGMilGOvSgLCBdrcCkAZ\nuM09GKjNC9Sx4oGNZCrcjpTdwXkd6kk/fc0eVlcY/GgkBJtjNJG5kB96XZtXBGakjX5TgYNA\niLaY+ccURL1b+dTBfMTaTg07yPlUZxQPoNblemKWMEv04xVnyQiEH5hTFGD6CgLEPl9aXyyo\nI7VM3CjaM02NWLHI4pDEhXt2qRlGeeDT3RVIA70km1woHWgBF+ZcDsacuTk4p0art2n86QMF\nYimA8Njk0uwuvpQmGPNSbdrH6cUAyBrf5fWkdcLg8g1ZbIX0yKhWMnAzn1oEgWIKuOv0p4JO\nABnFKu3HHIpEmC5IFBQAEgkcGmxk7vmFSK3U7afwVyOtAhrAqM0qoW6nmmlmkfJ+6BT0bkYz\n0oYdAVRnFTY8sc8io16gAc1J95euBSJImi+Y8U7ZtHPFOJ24wcn3o37uCOadyiERhmAqVU8v\nlqVRu5Ax70suW4OKQiLmRsU7cV460qrz16UsO5t3y81NriGxja3Jx7VKq+YjcUeUGIyOaVoj\nwM4yaoCNI9o9TTcNnhTUw3IxBGaepP3s4FAxqxqqb26mmfdXOO9S53HBNIsfzH0oAZxu9qXa\nHPXApRGqt1zT4iBIc9O1JjESNV6NTTlmPoKkyrtgCn+WGQ9qBbEUfDcDrSx5UEHNSR7VXBPN\nLIxYfKKBEXR9xHGKbvPepIwWba1NkhIbA5FADJFZl4OaFj52g0Kp8zAPFSbdqn+9QUBj9OKN\nu37owacvOKdtbaKdxMY0YJyTzTRhuMVKyhcknNJGu7g8HrSKG/ZRtyODUXkjdz1q4FLqcGop\n1/eAAUCRC0YZs5pMOqk5zU6qMndxn0o8sOeWwKAIY4+hJ4705gFYkDPt3pT8oPHOaRj8y4GP\negLjGdo8hhwaFUbcBuakk+bJPNRLGT7A96AIpeQCeR0pY8FcEd+tSSIV2rj3pjDa3PQ0APbI\nbC/dxTI2HOfvVIN0eARkH0pywhsHGfpSAdC3JU96RFyxyMilxt3Z4I6GhZMYPf8ASmAEtuGO\nBRNnYeaWUc8HinLlRgjdmgCAwhVDK3OKbjbyTkmppVZRjHNR7d2PXNTYCNVUKSR35NPVWk6D\nipDG3I9amVgoVB1pgRbfL4I4NKsZ5NTnbjpk+ppsi7GJzlaAK68ZBprqw6fd7U75fMP8XGfa\noXkI4J+goAcD8ozgUjLuYFflFEi7lB/SmpJk4bsaBEkajJB6Urqu/f26YqMZ5I4FLuHy560D\nG7v4Mdac2FyrDFDcMCRzSeW0iliOKQEadcY4pT8pOTUgxHH0zTWw3/16Qxi/Kme/rQMbgAMg\nikxuBAFPK/dYDgUBawqkfd6Gk2NtIA5o2gc9+tKzsvOKoBmdq8n5qk8xGXacgkUR/vG5/Cnr\nGoOCATTJIlj8tAS+T6VPGoVWI9Kc0YQFsbqZHIGU8EN6UDG+g3VKyjhie2KjWLzBvHHbFSD5\nRjrQAgQt8uOaXPBxSBnV1GOvenqvl/ePemA1XfaqkZyatBA3cBhUALCTFSRqVJDc5pDsOZcA\nsORQqCTaN2FHNPiIXIJAA7VHkhunFVcZJ5axqzH14FLH8qnK8U4N0BHam+YduB+NSDEZUkbc\newp2wtwBT/LXyyBycUkTGNVAOT3BpCI45juMbDgdKmZdoDDHTmiSNW5T71JFEzMcmmUCrubp\nU+8yRn1WoOjZJxjtU6MGUt0pCGRyBVIIpjMWUHbnnpSN81wvPyYqc4AyDmmA4bfL44z1qrGR\nG2AOamjbrk4NDRgtu5/CkA7y2Hz9frSxh2yG4NNXPlEs3PYUtuxabB9KBjWYjkjp2qNV89sj\ncOelTMxwVcd6TcYcDoKQCSOYWGAcd6mXEmGXrUe7zIzx3qSOEHad2KQD2lDNyMelOVgMhuBi\nmtIPO28AU9oxIpWmgGKvTHJ65p0ajnBxmlVSidR6U8x78HpQBEwO0mn5KqrHn0pC21ju+7Ub\nTf3vwFAySYKykn73bFQsAIxnj1pyNuY5HPvTnVZAOtFxDlk6KefepFY8io49g+XvU+BjjrRc\nWozIVcdKXaVwM5pI1/eZbmnbScnHNAxudrbRnmnfOFPFLuCjB/Ck8xup5pDEVdzKSNpp0ilT\n7UjHgEClbtlsn2pAIrBe2aB8rFjwKABt96jzu4YHFKz3EPXv3p6MducVGyj5ednNOVtucGqG\nEyiZcZ2tSqGCgdW9aX+LJFO3Fn44AoDUY+VA5zUiyDGMcUisOQRzSbewp3sAk2+RQM4FV1hY\nygDg+tWW+bgfjQ55BAOKExDVkPmFTx709o2Y7849qQOOrqMUjMT8p6GkAKx3DPHrU74MmVOe\nKb8qr+OKTaOoOaQxDkg8cU5Y1kXkcjrRz0zinqvcmq6BqR+X5Z+U8elPyykD86zNa1ltJIcw\nmRO+O1XNN1aHU4C8fJxnFICx796d5WMEnIpFHB9PWg7u3SgY5cKT/d9KVsvgD7oqJlLY/Wn7\n/lOOtAXHZLHbjihhnjPApu5mXI49aduCxcgk1YhwHm5OMDtTfmUc8VG0mwgAc1KZPlGaBocq\nhupprsAuQAKQtwcHio3jbjB6mkSWFztB6U12O7Ao52gHpTVQmQndxUlD9z0UmTRQIkX5sDqP\nWmzHYxCYp5TanynGKg2hlyetVYBYztUlhzTNmGL9QetPUjNObG0gdO9IQ3O5RjIp3mbuDzUn\nC8HkY7VEqkk9MGkMrl9rE9asxSbo1yeD1qPy13HJpqr5QyTkZphYl2jnHFRqc8YyPWlaTdhs\nfLQO65xnmgLDGctkA/KKYylunGKWRSI+OeaRSxIBHNPoIWPK8Z5NRyqd2CwqQy+WcFOe1Rsu\n5gzflSAWOTau1cE+9JJuCkfdNPMa/eHFMkU9+tACwqGX5z81PydpUdKZGu7OeOKd91evFAiN\nflUkAmjI28LkZqSMkLsA5qNcr90cUW1Ac0g3bsY7AUvzMoOR9KYUcoTjIzmmonRjwaLCGyY3\njk8c1PIwyD6imEbmK9R60u09QAewpD6DFwrFfu8ZzTk3gHPTFIyFs560sjnywO+KYgiYeWQe\nMc5obbt470q5bgjAxTJY9o4b6CmUDEKMZwaI5Xx14p/ljALU1YR5nDYWkApbcMnpRvPUfnQy\njcR1FLgjr0pbCGpIOT0xTd2zO7kNzSCQLldvWpFjbaN3NO2oITb2J5pu1SrDFSbO+O9BA8w4\n5GKbDoMjwVw3AxxRuJ5p2xd2MZpu0Fzzj2qRCrhcljSMxC8/hTmG5gW6UM23jGVpoNRoweuR\n70gzyc5FPWQuOnApGb5unWkMavzA4HFIuFyuOccU9Y9y4Xr3qM/u5Ce+MUAEaiP5m+Y0t06q\nBzQFOetDWu5SxO4UwI1+ZsqePentnjvUW7y5FUCpcnYeMGl1DoOC/wB3H0pHyq4ojba2T1p0\nnzLknihiSIDJvQoRgdal3BQvORUQIXOTnPApFzgHsDipKGySIr8CiKTGT0NLcRHduWmQ553m\nqAsSEeWM8ZHWoIgDwV3LUlycr8pFRx7sAl+aQD5lC89PSomYGPBHFJIxZ89QKlVlMfNMCKJR\ntYlcj1pd+WXLcelS7lC7PWqtxI0cgyOKQhZm+9jgUyAjPIwopZGLKGCmljZfL6ck9KY0gkG3\njpmmbRjGOalm+8AeuOtV1zu68UAWFhCn3xTbiYqwXqtSq3yFhzULOZl9KAE+YnIOBSM2ckij\nnGAKdx5fHNSAvmB4xxgYqq2QxPUVJ5h4UDgdRSyKrZ7e3pQAxZC3Xn0FMOJcg/KacqBf8aaE\nOG5/xoAXzFChT1WmbdzD0qTauwbuuKbtbaOw7UALEwjG0mnS7duQOKjbHJI5oTITrlT60ANU\n7WwehFIPlXB45o8tgwJHFWGCNjNLqAi/Mg7+tQrgMc05oyp64FRSfe46UyR80e1Vx6UyPdu2\nkcGpGy6cdaOQqnp60DRG6sjE46VAGZ+CeasySFgcHio5AI13AUgEkVmjXj61GfmXk4NSRyEj\nnvTZYSMFeaaECsNuOnrTOx96dGu5TnrSuuACOtUCCNg2VxgUOpXnGUojXd14p7qGQjnFLUoi\nILe3pSRqehOTSxsemcU1dytljxTEKWPbGKbIoK5xn6VLlW6Dmlx5Y5HFISKwJ69qQybfapm+\n6eMCoZGVscZ7UhiyMGA45NNbt2qVsAhR1NHkg7mz0osMazrt6c01GHam53EYH1p2R2GKAFZc\nrzSE7aTcfMwelODK2fahgCx7geeTTJGKKQRkin8bgQeaSZTv5P1pARbiwHvSt8rA9qk3KuQO\naReetAEbZ25B+Wm5IHPWpc7jjHFN8sD7xqkA1lJxikU7c7uak4Vck1C3zfN70bgLtK85wtJJ\nlhxxUirxhuppvC8E1IDSpVeDRkkHNPbC47imfxAk8VSGN9eKikiH0zVlcbiaR480IRUVClOb\nJHt3qZ493I49qj3fKRjFHUCKZQyjA+tRfZypBxVhlGF4p23jNJgQNDvXHFVWtzyCRV5lPNMZ\nQ3agDL/s9R0HNQNYquQRtzW15Z645qKS3MmaCbGBdaeoAG7K1VbSk2tujHIxXRyQbY9oGTUK\n2fQt+VO4zjptAR8jHPtWZdeFEmYBl/GvRntY3xharNZHfjZxTuKx5fceB4Wz8gB6bsdKwNQ8\nA+XJ8inZ/fUc17ZJpg2EgDNVJNI4C4696ak0Tynz9qHw1t2Vsxk/7WOTXKal8IIef3RELeqj\nrX0/N4dRpPmQEetULvwzC3yFQVzxxVc7DlPky8+EayR/6kj/AGh/hXL6v8HjseUiRmXp5eK+\nxLjwkp+VYwq567aybnwOszs+0IAeu3rVqbJcD4uvPhLdYEoglZMZ8xuTWNqHw2vFYeWAqnqO\ntfa03gFZEbgMpORxWHffDNbiQlokJ/2QBV+0I5D4uu/BskKmH5nfPUDB/Ksu48O3EMm1YzCx\nX+MEE19jX3wvR8s0O5h/F3NYOofCdJmEjRSEkbQG7VUahDpnyW2l3ioXETLj73HpUMlvJGoJ\njJU19P3fwfgO4iDL4xyxrn9Q+D8u0eTGu7OCGHbuQa0VQnlZ8929uksjbl284DNUc1qqsw3H\nj+HHT3r2mf4RvuLfZt49ff14rMuvhi8UZEka8n5mGSfpir50HKzydUC/fAI/vetR+RvXzBJt\n/wBnHavR5Ph7dxwFFgyGbA2rkAduayLrwLeLlFiLupwSoo5kKzON27ozjB3dARTI4xJL+9Bj\nK8jPSuhbwhfxyN5sTKi9VPWornwzeRJ5YIJbkNj9KpTQWZiNC5kbgNnnd0zTWXyyQzYc9K1J\n9Fu4/KYxO4P3cD+dRXVlMzHNu0eOMsMUcyCxS8wx7QeH/u9aJW4G44OOtWZLFg4UIT/npTVs\nZOVYljn7uOn1ouhFWOPnPT1zUscX7sheCT2FDW0u5gc8enSnfZvLZW3MXxyOgqroBiq3nICN\nhPGDU6whmwAWOcGmo+VLP1x19PxqQ3AhDMvzHtii4iKYNbgLtyM5296hWMvkrJ8q9/6VcuHW\nRjuG6THAqvLG24AjY2M/WmGpWVVViiscdf8A6woUsVCgbealmwOFORjPvmmhfKZCTk4yWpXA\nQLtYB8Y9ajm/eyDHKr17U+RlhiLMGkYnjA5FSPnGCByBilcLFfapbByO5GKbs8pWUE5J4+lT\nyR/LjBwVyKaqeey84OOfamIij8xoSQVVRxmhWYEHdz2wOtOjjZAny7tzYPPSnMwUE5xnjGKL\nDImK7y68diPeptomARfvA5ODxUMsa5CsxVcdhTo7eRCCo+QdCT1qhMdKwUgkdf0qORRJJFIp\nwOpPUVJCzK2GUAep5HtSeTJtJ4GOMGpZSFZo23Fche5x+tKjIyna3y9BuprMFXaRh8c00xkb\nHaNsdqQhXAMalX6nBo8z5kVU3A1KtuI92TgnnGOPwqNRHIoTLMQaYxrbWkxuDc8N2+lOjG3I\nB4zTkt9wK8A5/i700odpBUoM8tSJZGFKuUOSG5qby05AO1OuevNKtvJuCs2R1z04pwjK4VPm\n54b2oGJGp8vbjC579frT7kFMIXVjkHipPs3zDnGRxz1ojhIl8to/n9etAyi0SszyH7opd3XY\nuOwJrSGl7TINrAYqzb6dttyjf60jIGM09BWM61sXlY4+b5elVZY8q6EbSpxg+tba280duQIz\nvx0qSPRJnmO9eWHK46e9TdBYwDbHgHc+3qTUrQSKwDhQSOGxXTQ+E7mNhuH7pj1wc/lUx8KX\nUxKxpuUHvwT9BU8w+U5BbZ5NpCjeTjb61I1rJbzbi3A+8uOld3Y+B7iRg4UnHVdvNXIvBZtp\nixty2f4jz+lHNcmzPP49PkuFJA2qeQSORTo7V5/3c6FVHGMdfevW7XwPNI0ayREqR129q1bX\n4VvcK/lwAgdN3UVXMVY8gutDVbWOSBSZP4mzwB/jTo9EkmKsmT8ucYOT+Fe56b8MjFMBJa5h\nxyGGVz710ln8LYLXDRweWW7KMn86hzK5bnzzFo0j7GNu27+7t5rUt/Bt1LGH8rCtztx0NfQc\nXwtt4GjYh1bdnHXmulh8Bwts3R5CjBXbgVHOPlPm3Tvh/cbuIt/qOmM+9dLa/C4SKAyso6g4\nwc+le/2fgaK3TPkMefwrfg8HoyonlLnqWxS5x8p892/wpaMpI0ewng/Lxj3roNE+GIh3Zj3Z\nONzDt7V7zb+E4o12ogB78cVpf2BEqgsgB7cVPMPlPHdP+Gtui5EXzAYyRzW3Y+CTC4VoyRjG\n7HSvS7PQYlkHGW647Ve+wjkBePpU8xSRwUPgsBQTGuD/ABetaEPhSOKMEKD7AV20OmjaGP4g\n1a+yRcEilzDOPt9AETbsBTjsKvwaLG6AOuT710i6ejfMODUqWe1MnrRcdjETSVj2/TpViPT1\nHBH4VrNCWwcUn2fDbh1oHYqpZoqgY5qVbVV5GN1WFXHOKlWEbsY5pMZSEO5cEAVNHGI1JAyf\nSrP2bqCM05Ywi5A5pMCqISrZK8mnLEQ3FXVAI5AOKSONNxJPbikMgWPkmlW1yDzxVmOIbB6m\nnCPkbTQKxAIwOopscROe3pVloCxJzT1U7SMYoHYqrHxyOSaf5AZWPcVOqbW6Uvl+/OaYEMUJ\nXB9RmpWhNTR42kdT2pcNgZoEVfLI5zSrncAKtNHjtmkjjEjY6GmMquq7sEZPr2p/ljcMVMU2\nsRtp6RBfmoYyLlV4HNIIyevWpVUoeuQaft2r9aQiLZ5Y56GnbV25UU8xqQNxzjpSn73TigOh\nFt44oVdzAYxUhU5AFSJhTyMkUXERNGFzkc0qR7lOKnkJkyVpojb8KGMj4VenNJv54FPVSwJ6\ninKgVeaAIio7c0wJhsEVZWPtSLjofzpCGKoWhY9qkdTU2BgDt60jKT0NAEOzK7e9OWM9PSpB\nH5ZBPNK6t16GgZH8oXI4PemlN2NvFPVVbOeDTo1LNjtQFrjdg28HJoMe6OpdvXjFPjUrGB60\nCK/k7eQc5p235hjpUrIseRuyacNqxk45oGVmVtpxT1DDkinK3ykkcU5W5IJphYaAQpY8Um35\nSQfmp/mGRgu3ipNqbsj6UhjPLBAJ5NKvXkZFOUEZGKQdCM80DBo8ng4oUN5RPVjQtOSNs9ci\ngQ1Fwpz1pvXIAqTySM8VIqhU460AQKduATTg25TxSSLyDjFSliy9qAK+7naKni3bcEZoWHdz\ngg1KPlyKCRFTcx7UHO3HQ0Bju6U4gyDg4oAjClmpV3d+akWFlHWnZ280DE8x5BjkClLbvlXr\nUi8DpwaYD8wwvFIBNrYwORT0JKlelP8AKfbkcChYyG5NMBqxEL1zT1hH408R45zxTiqtzmmJ\nEJ7DpT0hMjEY4pzRI/IwKfj938nHrSATydgB6ikMg3A44qVW3rUDRtnnpSAkZty57ZzQrKzE\nDjNIp244oVRu9M0AKsI3GpY408sjb8wpVxtwBk0cMpxw1A9RJPlWmR52kAfnT17nrTYz+89A\nKLiDZ5efSpEAUgYpGyB0zmnrGVYZNJsBRy5xxSbev50hk+bKipdvHuQelUgIdpZvlG7uaeQO\nCRg05ITHtIPLdRTm4YgijUoaVAH9KjdN1TBRnJzTs7sjH40gK4Ty8dxUiZZdy8E0Mvy4Bpyb\nsYxge1FhMP7tPPzHJ6U7yw3HQ03Hy5zxTEhGZV5601l8x+eFxSnBbbUqj5MscGpGQJGSp9uh\npV3qvseKWFixbNNMh+6aAHHBx/ep8eCMEYIpqrukzUywufmIAX9aAGdV6Y5p46ZBxTFQq3zH\nipEX2xmgQxsKuOppAu3HenNHt6c01cnIzigB5zxTWX72fwpWyB1pm8jgd6CkMWP5hg59TUmc\nHGKRcgY704/Iw4piF8stkr2pIVfB3HipPurkN+FJuLKBmgNSL+Jtwx6UISByMn0pxVSNp6rz\nTlyTk+lSPoNRywPG01IGHlnIyaRWzkdTQx+XpTBDWX5eADTWZhyBR5auynPFPZcZwfagRFy3\nuTSKS3B4FSuA0YGaRYwfujFAxqlCxA5NDE7htGVp25Vbkc0jSHbhcUuoAW2qSRnFRghuq09o\ncc5yKcI2HvTAYvPrmpUdYztWhOOnUUiY3knikA75Wzu71CwHOBxVgLUWxV5wSaXoAyMHZyOD\nQB82M8dqFwMpz81SRRIoA71QAyuy5LcDrTANpyKl45GQab5QHQGkA3e3l8cEmlQHoevrRuDY\nBHNNbhhk0gFZiTjNG7aBu6UKoYHPpxTJSWwOoxTAFXPTpnNQ3ADNnbz6jpUuPm69RUW3zGOD\ntUUAObBQAnmo/wCPOOO9TRBG4PJpSg9KQDFiWTLbsU1V3PnPAqTcE+Ucg0xh0GcUxgzOV2gc\n5zRIWBBHAI5WnN6hsGlXM3PQ4qQGqAq4Ayaj3KzEkcVNITHtxxmhlEqqCMUAVmYRMODz0FCy\nFsg5UZ6VLOo4U8kdKRl3RkAc0wsKyncDwRQykDIPFEcbLjJzSyMNy54FUAbS2CDigjbIaTzA\nkgBHy+tLuBbpmgRIrbVJPApwVWG48Cmow28rke9KvzsBjC0ASNtXA28+tMkI2nnmpDtbPcrU\nbRAfMOvcUAgL71GeMUsTBj8wqM9MH/JpybpAMDkcGgCXy/mzuzzUlx+6VMHPNV0y0vORgdhU\n+wlefmqQIVYCVtw5qymGTaw+lR7VkYDo1TKu3745z2pjI8/NjqfagkrICVI9qkRfmLgZOe1L\nI7DmmAsUoUZA+tPZV8wMActUG4uuQOO9SwOc9aBCzK0a8cDtSqxXBAycc0u5ZAf4iKkjbzF/\nuCkUNXazZYUitndtHFSDBUgjj1qPeFQFjtUcZoGNVstx0qUj5Ce9QxXUBkIHU1KzFV4FAhkS\ntu3VOuDwTVePLPz09KsJJtB4xTAaWVmBA3KOM09olZgVODTTGGUhOO+KSFXP0qRi+WASz9Kh\na4VWTchK561ZIdm6flTmjWRVzzQBGq7txBwPSnKNqrwSvc0/aF4pwY7Su3iiwDZIQcMR8w6V\nJCQO+DRwy5x0okRZecY96AHeSTnnNMkQ9Ac4pYWYHn8KlwA2emetADNob5WPaoI49rndz6VK\ncNIR0FKq+YuM4osUxkYVwTznNPmUMoxxmlx5bDH4inKAy/MeB6UxEPllJOBn3p+7bz3p+QFz\nmj5ce9SAjfvOnWl+fbyce1N2F/unafWmQ+arkN8w9TQImyp+9xTtyvgr0FI0aydT0pY8KpwO\nKBiAncTwRTVKKcAc96cF2jP8NKzBVzgDNAagqkc8YpjNtPIytKSGU7TRtbYue5oEJIRuC/eN\nNMbrkHAz0+tKYwr5OcinFQTnkg+/NAxI2bHODgc0LLtXnr2pPLO0kn8KI13HGKQyQN/F2pfl\nB3CgHauKYGLcY4pBqSLg8gfnSLnzMc4NQrKWbAB4qX/WLvzxTFYU4GQ3rSt8pBGDSfLnBPWj\nbtIzyBTEOVctnOR1NIrBckChlBbI/KmDarEUx9B3Yk805lKqCD+dCMF6DPrT/lbp0pXAZNHH\ncQ4dQcjHNR2On29nkwLtyMGp9oXA6inmQdcYHagYxvmXHbOcU3zWXPFOwOc5z2qGTK4Pc0mM\nljcycnipY8EE45FRI2Y8Y5FOXLcjOKYiRGypXGKGG3+LihcKvXP1o+UD1oERr80nSpZMLg5y\nPSk8vYTtOeKI0KryRQAgkB52YpzNlgcAAUzbliKNjKM/w5oAeH3Lk+tKCq9OfpTNu5yOg9RS\nyY4CcUAP3f7NFM84+lFLURMrYXkZGKTyV2byce1DMPL4602P5mKseO1MAwB8uMGmqw+Ye9Pb\npg9u9M+4p+WgY9VO07e/c00sFJHekDDaF7UeWisDk80AJtGPmFKNhyD0o3BsgnI7VGW2t93I\npgPVgg54WmsRkg9aAwZvmHFCxkLn3oERuvlryc+gpYQPvN9Ke/7wgdGpFI8vkck0h9Blw4Zt\noX5scUxYyFBY/P6VKq7tys3NV2JLDDZ7UCJWJVOmRTRJ/E34UqSbWK9RTWU+Z7dqAHqC2QO9\nP8tdvLdO1M+7jbyaFHPJwPWgBJMbeuD61DGzdmJqWaRRkn5j2pizYK5TiqEPdmVcAUituUlh\nxT55BtBU55puzPOePSkIOWTdGcNSwsWXLHmm7COeBTOrbgcCgY84U+uaJMKuRzQv7wHim7VJ\nGaQMfuZmBxgYpjZZdxHGaZMG3cfhTwWTGeR6UwHs27HPy0BQy+npSrMpyAuacudpI6UCGlOw\n5pjMduOlKHZW3E5yKGPmY47UtygKgoDnOfTtSNuJA/KmruUbBjB7mpGVg3I4xTuAbdvfmkYh\nW+7n3pGPHH4U2GX90SemcGmAKxaQlTxR/ESaJsJgAbWbtR7Y5qRWBWMh5qRl2qcUwMFUseO1\nNBbzPUEZoGOyF4WkClG9aZuKnpih2J5AyaaEyYY6g4OKhcYBPWmtKzODjGBTZJPlO3kntSEP\n3eZ8w4oDssJA65/OkhwFA61JwMjqPSgYjZ4YYHrSsy4yOfWmk4wM5HpSLIvCgcmkIVsbd460\n1W8xcNwKXd1GOKRmAwMZo1GhsygR4HBpiFtijsKmdhtGVy1R5Y4wtCQkSMN2ACaakYEmDzQZ\nCq8il3Kq8nDU2MgmjHPNPjX92CAOlJIBIMDp60kKNtxnNCAY5CL93qaNyllUjk0s0exck9aZ\nADuyRnHemMmuY/JVSoqAI0jEkc1PJcDZhhSKRtBzUgRSMyxhQOc0sPyrtZfnzUqt97pikZir\nbycijoIWSNW4Y1Q2/vuelW2kEi8daqKwaQ5OOaBltUMce3FQuvp1zVvG1FJyRVZk/ebgeOtI\nBnIXB60/d8npTJWCHkZqD5t3BpATY+U89eopsLeZIVI4xUqkNHz970psIKuTjqKYEcybSQKQ\ncjFTtx82M8VXMnzHj5e/tQA1/m+opG3cE06OdBkEZ9DSs2V5GM0WF1GsAydeaTcFwuM0Lg4y\nKcW+bpTGMMhZwT06UNIOh4peDx1psmHbGORSESSfOoqNcrztytP4aMKvXNId3POKQCA7j/dF\nJuHSkdgox1NMRPm35x7UwH7QemaGVTHjmljbJxikk+Vvl5zQBDu29sU5pDtBHHNI7bX+YU8o\nJI/mGDVBYIsYbNIzI4JHFKqkAbeBSNJtzxmkMijywJJwKnC/u6h42ZAp0cwHelcCNVw3Ipx+\nWn8MDzSMcYx83FPzEMVvlOeopZGMkJ9aQuNwyMDpS7SufSmBDFllAJzTvL+b5eg9ad5gjXj8\naANoyO9IZFu2np+NK0bFdwP1om9jxUomO0AjIpgRBQvNKvXpxRIyM3ApOdpPegBsnLE4xSRq\nQwAPWjaWxk0187s5qWBI8ZXuCfaod24gd6mVg3C8nvTFjGTnrSAaflzjvTlwo+9zTmUMABxi\nkAxyRgUAG7HBApJfmXIHzUTfezjinccHPFUBWYeZxmmLG6KQee9WGUbjjrQwCsDn2NIAVsFS\netJLGGJb+VKgDZHU07csanPWnYZHGwK4PTvTXQEdMjrT0IbOeBSeZz/KgRGqhgSAQfSl28gZ\nzUschIIPWmMuc5ODRcBFwynJ+ameX+NPUqykY5HegKeMGkIjZRwKjZWX6VOyjpik2dQKBkO7\n5enFLtDLmhoz68UiqV5I4oAbznAopw5bI4py/MQCvWkBE0ahs9aY8XUjkVb8vLACmlcNt7UW\nApKrdxj0pyhu+KsSLngU2SHzBwcGgZDtA/hzVdYSFbndzV0/Lz1I4pjRjZkfKTTEUPL+XAOG\nqKSzEpBbgirjW25eDzTWU4xjkUDKJ08FSV5I5qrNp+5c7etayZX8akNvlRg49qoVtTnptNjx\nnb81U5NJjZXLIAe2RXTtagsSTgVA0KsSpHtRcVjkP7FjYY8sD6Cqd34bReQmfrXbNpy7cgfS\nnS2YFuoxk+9TcZ5yvheNid0QKnuRmqw8Gx4ZSm5M56V6EunblIxUn9nFVA9eMVSZNjyi98C2\n9xwistZM3gGJty+UoHqwr2eTTVLcpjBwarnR4riQhxgVVxcp4fcfDWKRSEUK3+zwDWDcfCmJ\nR+8Rhznivo99Bh2gYA29Ko3GgoWyyqapSFynzZefCeOY8w4P97HNYt58I4I5H2xNJwfm24Ar\n6lm0NXTATj1rPk8LRysTIn1ajmDkPlW4+ELtgxrx1KjrWZd/CfcmJIyr5yONxOPavrN/CsIz\nhAy+uKgk8L2vaFAMddvNV7Qj2Z8f3HwekY70iliPX/V5/HFUbz4RzMFVELuxznbj86+xh4Nt\n2U5XGewqo3gWNZG/d5U8ZxR7QPZ3PjdPhZJ57RiAjsR7+1Z918K7tZmQxgxKCc45+lfZsnw7\nRGOzb65xVdvAEUilWjVh6Ecn3zR7Qn2R8Ut8NbmDP7ltpXgdSPwqk3w9uI0JEbMnXeUI/DFf\nasvw8gbP7lRxgjH9azrr4Xo67ViG0/jWiq6B7M+NJPAd3HMZHhZVZcgHmoJPAlx5YJZoz/tj\npX2Q/wAM4lwBAHA6ZFQXHwxSZhvtlZRzhxT9oHIfG6+CZ9rKRscdHxw30qKTwfeRqRJEzEDI\nBFfXzfC2PzCyxqV9Nnf2qr/wq/hlEe09+KftCeQ+R4/Csrpu+ZsDPQ03/hGZ1V96M7D7qY5r\n6tl+Fav9+NgOnC1EPhFErBngbk8OOD+VJ1A5WfK8uhzLtGx41PUbDj86UeHp2bCplCP4a+n2\n+FpYMqKdgOMMM1A3wlaQYKxhR0AGDT9oLkZ8zSeHZpMt5YX1Uc01fC0+RGF3YHC45/lX0u3w\nojkbHlMF6NtGKkf4Vr1Xd5SjAYDBo9oPkPmeDw/KRLHypU4JZefpikXw/NtEIjZQpyF25P5V\n9OP8MYGAxExZuWdhSD4SpFJ5rjdkcYHNV7UOQ+Z28N3DOQbdmUDJxximXHheVHXMTySAdua+\nmn+FqqgQRHYx5XHJoX4RhceUuzHXjPFL2gch8w/8I/NI/wA1qRIeN5FXYtFnfMc0O4hcZQd6\n+lo/hY5ZgIwy9RuFOT4YhVyYkQ564p+0J5WfNNv4dbz0EkZYkYwR0qwvhAvI48vAHXjFfS8f\nwtWSPlVQ55YLk1OvwlidgVTd6til7QfKfMkng+SWQeVCVG3oRx+dC+A5/wDWA7gvzFdpr6lg\n+E6LwIjKAfmYnBB9qsR/DpGYJ5J64OU7U/aIOQ+V/wDhDZbrClDtYg7R1FXV+Hs3mYRNyEYy\nwx+FfVMPw3hbIaJeOAyrVuP4c20a48hXK8nANL2jHyHyjb/DWRvm2tGFODGwz+tXU+G8q5CR\nEyZ4+WvqiLwHAcolqp/iqz/wg+1lGwKOuMVPtWPkPlyH4a3A2mWNhx0Izj3rasvhmZyu5VYd\nztxmvpeHwPEVUsm0jksR+lXI/BkCpjbkH2xxUe0HyHzh/wAKx+Rl2RgNwRjJq8nwoikyfKdW\nHFfRcfhWH+CFdwGM1LD4ZUEbQCueRS52PkPnqD4ZNvj/AHbiMf31OPzrcj+GouBgJtx/cX+t\ne6f8I7tO3yjsz6Zq2vh+CMK2zBo5g5TxK1+G/kRonlZAOfm7mtKHwDEzFXt9jf3tvBr2OPSV\nkQfKBz6VOdHHTblfenzBY8it/AK20YKqSmeh7Vu2ng+JVBCYJ9K9Dj0kRowCZHTJp8el+SVy\nuaVx8pxX/CHiFgQNy/Srlv4ZjU/MvHpiu0W0cLtI71MtihxuHzH0pXK5Tlk8OQKudgz9Kng0\ntVyCmSOmRxXTNpe3Jz+FO+x/N0oCxzSacJG27KvxaaByBgAdK2WsVjUNjn2qRoflHpQFjEj0\n/d81WUswy4YbmrUW2+XI+57dakWHb823mkPzRmw6d5bZxg1YW3T+IA+1W/LMkg5p3kjBB60C\nRT+y8cKCPSmNag4+XFagjHtTTFtGSAKYWKYtyoGMYp7R9ARzVpYwvvSrg/e4NAaFb7P1JNNW\n343Grvkjk5pDGAvHNFx2KJg+bPapFjDLnHNWlUHgUrL+dAyrGpU8jAp6xB/lHWp44ief50+O\nMBsYwfWkSyt9n2tg8ChYg2QBirxUMmTUSxKp4PNBSGLFuxjBPpTvLCnIXinqu1xjrSS5J4Bo\nAbwi8rRuB60/BPykUu1fTmgkiCHOTwKeuOSBzUrL82R+VCx4bAoKItvQmndGJBzUhXpnGKRl\nUcjpQAzaT3pVUBs9D609SD2pWUdO5pi2G7Qyn1pGjC4A5HeplVQwFHljJJ6dqLiVyARhm68V\nHJ8uB94VZjXLEDkUnlnPOAKRRErK2OKcvytzzTkjHRhgVI0Y60hEbAsuQORT49oUlqVmC5+m\nKY0e0DHIoEKB5YJByD0pm5mJXNS7l9OgoaPjcBzigoY3yrjofalLBlC45prKWxxyakK7QM1Q\nAI+x6YpFUbcHmnKwZsdqNy56c0hNAy/LgCljQ04Ordqbu2g8ZoGg6YJ5oAOcGkXlcUvKdetA\nhu0Fj6ilVg3QU5VX13H1oGN2B3oKBz8vGCaUk7QT0pRGAx5yKTjgGghjW2ZHINJjJ9qftXkn\nmm7Q3I4oKQvGMBaRYsITjJqVAPWms2OAaBdSNY8MB09xUnDYzwBTlHoOKXaNvTmgYzy9zg5O\nKVlKycdB60/nbkYzSEnaBzmgXUEB2kEZPWn5O0npTGnELYxziqMOqlpWTbtGetAzRO4jcakj\nUjg9TTQVVB3J/Kl2lupxQA513ADAyKUQrgMeKcuGbrzTmYMBntQAgwp+UZFI3APHJpysdudv\nHWozhmznBoJDyyq5J5oRe5/Cl3Bm5pzLjocClqMgldgpp8a7gM80bNxxu4HWpI4zH3znmmMn\nijO1stgVGqhckHinsRnFIwx24pCFjJZeDTYmbcQVyPWnKoIwpqSNcLg8GmIb35pdvB96dIS3\nGOPWhVO3mgA2jyhxn6UKNuATxSQseTj5akWMMpxzn1oHYIWC5B4o5ORSrGF+9yKdu2vyuBji\ngYsUeR83SmvCC5bP0FCsWU5yOaWYMqgAZA70WESRqVj44qP/AFm7sanjwVy3NM2BGLAfhSsM\nbGpTilWP5ueBTVYyNgVOY2ZBTshdRREQ4PGKaTu4xzTmU7sA0iY3dOaLDD7P6GnpnOR9KdvK\nJx8x9KApYkj/APVQA6TcrfMOKjMoZc4yaczM/X6UwbV7c9KQCru+8RgelOjZSxyKVSN3Wmyf\n7JxQMcu3vxR5bOpwcDNOX5V+cc0Kc57CmGg2XC4K5+tATfHgYz6U5nVkwRxSeUAcqeaQtiHy\nzuwPvU8DcpLU7aQxNNYnaeKQDPmUjHAqTYrH+dCoduD1pwIGARz60CQ+OHBJyeBxikaNzFu3\nYpykc4NJIvy8nFAxrncgGeacjDaSew4oVQMntSBQyjJxz2oFYjaQ5BqVWDZ2jmhk+U4HFIOg\nIXkUCGnAyM5NIylQMjpS/d96Tc3lk469aABtp2kikboT3oY5j4pw6DfTKFjUMBnIoVdvJpNx\nHymlPOVPXtSAGwPmxTFkZjjGRT+TDjNNx5fGetA2SQsGBXGD61JNtWMY5qvzuyeDTzncM9KC\nUH/LPhec9fSmtnfkAkY5p6N8xIbgdqTllPOPagoj+8owOKeF8sgZyKRfkXbjigL0JPemIk2q\nxPc0xY1WQ8VJHlmJ29aaqyNuLLjnFIBf4QMZFKr7cgA0kjbCOOaOWycflSEG35dzDFRNEUww\nxmnvG20hvyoVU460hiYJZQG5Pancgnjg0mfmLdfSnbmHBHWmBCibpM1MG2gkDmmEdx1pzMRg\njnFADthHOM0xpGUjH3e9OExIB6DvSFvMGQPagQm1WBcdqifMmCBUgX5sDg03lWznBzyaBinf\nwFwT3pnkuGO7k1ZiYKfUdc0rMNxbPNICq5HLEEYpq4ePP3c1P5XmZB5FMWFQxU9KYEEaGOTr\nuFSLHuJzxRHAVfcOnpU0nyAsOmKXUCvs2qTmmeYGwCM+9MunZsDpShW2qVHFFxj2twxJU/nR\nHGytzwKDwozk5pFyfXFMQ8sG6jJ7U3kyD0FBVlbORSZ3dTSGSuqnBPWk2hfmVty03eXU44NE\ncTPAYxwM80CBsbs5x7Unys2T8yj1pyxmM4K5GKaELZAbHtQA0ksuSBipI12x5PU0Oo4G7bUh\nwrByeOmKYEaBsc9qk2luQfzpV2tk54HekWTnnp6UgCICNs5qaRQy7ugqD72T0GelSN8i4HIp\noERumxxzn0qYt5akjv3qGRlbvT4fu7c7hTAbG3Zshs9qs+YyxlCME0yC3Jk8zI2+lSyL/FkG\nkBAvy8DrU8TFmw5NNj+XJIyaHwxyTjPpRsMWNtzZU455p2SxPHA5pn3enAqNm+ZcAk0x2JYZ\nfLU7+hOakjZeXGMVGSJflx+FOjjVZAucD0oGkiYhVUMgwaGcdDUqL1B4HpULRj15oATLBeuR\nVW8kWSPy26E9quL8qHNUZrcmZSo3KOT6VIEtrYrtDmrq4z6+gpNwVVZe/ahTuY7SAPSkIXZ/\ne+VqmaP5Rk5B6e9JDII1+cZpwmRQRjg0wIHJRzzg075mjxnBoMY25J5NO8k7cg5xUjGwuytj\noehNTNGeoOBTFXacmpo1w+WPHWqQCLGARu6in98hqSRlznNKsYaPC96ARHHvXJbGCacWDIRn\nHNIoEf3jmnMob7opjsNdSuGH5ColuH80jGR61PsfK+lPZQDnHFICDyWPO4Z61MuM4xxio0V1\nk5PyipTgLuHJzTAjLKzMc44pqsQmFNSHap3ADFOChRk9fekw1G7jIo46daGlCjOAR0o/gyp4\nNKV3ICR+VT1ELaSB1fcMDtTtwGcjioY4287jofyqzsBJFMBqqe9GOSOi0rMVXpmk3huo4oGh\ndw4HX2prfvm+ZcAdKcv3iQAOKFJZcH86bGhiKQuM8ZqQ9ueKRlG3J4pG+YYJ/KoJEXuTzSDH\nGetNBIOKl8tWanuMYqsWJbpTtrZG38qFHUClVmYE5oEM3ZbaTg09WDYGOPWhVBkHAA70/wCV\nWxjI9BQUgChTjt60yPK7lAyKcW65GB6UO2FDAUyRir5hztxTgctikz3HGaUfLx3oHYew+U44\nI9KjVVb5zzSsxVTnilGNmQKB9BMARZHWnKzbRgVGqlm64AqRcjnPFIkPmK5PSpVkDDp0qLcW\n+XNLnB6UykDNzUMjNxmpguF9aiuAxwAOtAyWNSI855NOX5WHpTE+6CKlzu4oEO+83TikXPPF\nNT5s5zin7TnIOPWgQnO3+dIMM/B4p2RnGeaTy1WgBeNxxzT45BtKkcGm7x6UgYhcnr6UeghB\nuTpzSKWb745p+3coycGlb5eSOtSMXn0opvnmincLEqqGOPu0iqsZYnmhscEmlUhVJJwtMQmf\nlywpGwx2njI4NKzAgsoyuKi+bcFcdaBgx2beQaSTk5XvxT1t9+QRz61HtK7k7ChghFYbcEYI\nPOKJGDewpGHHy9O9J5e3OaBEilWwoHNGT93vTMhsMoapFXcpJPNAEW47t2MimsSuXHPtSbmX\njtmnblUjjigCLnhx361MI1xnHvTVxgnt6U2OYopDDPNAC8KelOXDE+tMkyzg5GKXcFYucYx0\noAIyGY84PSh1+bH8NLbsu0k8e1CMzZLD5e1VsIqspZuBjFN8w9N3OamkXzGPYD0pqx9Qw60D\nHsoGRuwoPWnLIRHtA4/vetKqjaeOc037h5HFIAZvNPyD6mm/hx3qR5CqZUYpN25emD3pgJG5\nZiBxxSsGhAOM5pV2q2QetEjN0qSWMZWDZx2qQ/dGetNjVguWzUcjHIx0oQD44zkleQakRj5Z\nQ8U0AooGcD2qKT5mAzVDJF544xQvBIPAp8YjjRhnBxTPLLFiDlRxSGBizjmhTtY5ftSxthSO\ncUi7XPzcAUWJFaQKpVec1Eu1cnbg0846ZB5pqqFbLNkdqBjBiQggVL5ZHBPvmlCFRgDAPPFM\naTZkHmgYrMJPlyKGURr6mq8a/NkH5VOasRNuUluOaLAN8wM33cetMZ1VchiCKlZUxx0pix7g\nTjpSAYqiT+Lr61JHGFz61HI2zrSxn5lJbjNADlxGCMUbtrD19qHwNxByO1RqpkbAPIFAD1+Z\nuRg0xWBYnpzSq3mHHQigR7mOTigVhVDNkgZFIx29uaRmaP5VbinMuFDFt3tQAyRTy5OPanRf\nN9489qczB2Gaj2iNSc5PvS6iGyMPMwtI33csaFZTn0qKVxIpA60xj4iehNPVQuWzgU4qhRSO\nuKiVmWQrgEYoAewLcZyKMmNCMVHJuXgU/c+0A0ANljLYpGj+YDoKexK8d6jJPQ5JpDFZgrEY\n+X1pjASMBu+WnNuKD5cimx/uzjHDUgFkjCqdjZ4qrHCOGPODk1ZkdgcldopYCPmLjg09AHed\nu4Oah+8wVTkelOkAC+v0qGMFTk9fakImlVUU7uOOKrqAy571LchVAOc1HvA4PHfigYuAWzjn\nFTqyx2+7GWJxUKruU4NLuwoBXHvQBMvzRnPBqpMo28H5qnWQNCc9c9arlAzseooAi8sY3AYp\n0fzZ3crUi7Uj/Gm4CcE9aQA37peaaJQwyKc7biMjIqFDmQZ+UUATlfmBxjimMqtz3qWRSVzn\n6VDtPJIpi6Dd20jApzSCRSQcU5sqnTPrTdqry3ApCuRjpnGaGk3fL0PWpY2G4cZGe1MmUO7E\nfLQUNBLYxkU9SHycU0t8mCcChdy1Quo3ycvv7d6k3fN+mKUNt5x+FRqccnrTGwbEZIJ470sc\naNnc2B+tNZWaMttzzzTWBOCOKliQ8wrt61Ht2seM1K+QozyaWOMsu4GgARR5eT0qHgk8YFNk\nWQdKVflxkcUARSDc3oKkZtqjByakaMNkjpUax7qoAbD4A60kylfYVJGoU9KbMpbGW4qQIl2h\nBzk1KvTLDioOAc1L9/aOQKAECDk0fdxnrQcL0NIrK7nPAx1oBCthuSOKRVG3I/GlVlKnJ70C\nQbiAM0hjFwORxTJI8DI61My7RUYYNmgCMbk78Gn7tqc9PSnLjoeabt69xTsAoXdHkVGG24BF\nOVtrUS/MDTAFx07mmyRbVAPXNOjYHG4496c3zPk/gaAIVxGxI607IkB45qRlTacn5qiC4yRT\nAayhcc0/5XTAH40uzEf1prLtwAaAE8s9jSfxcjin/d6daarjByKkBq/KxwPl7UkilfbNOVjJ\ngDilkODsJyKAItxGD1py+tSDG3HQUza24EdKEArRhhx1qNsN7CpeWJXGG9qayhsY6CkBXZfQ\nU9WPGefSlLEqRjiljULjvQA7B4xxRt3A560jNzjGKf6ECgCNkO3I60xFPQjFTN/s0jNuX3oA\nhwDnIwRUbKT1796m4deeD0pqqdvHPamAxF29s1EY93QVb2U0qVHt7UFlV4cKD3pNhB9qtMrF\nsY4qPaU9SKCSAqOBnNRyW4Y5FW/LZlyMU3ay8Ec0gKixleOoNK0R256jpVkx8Zzimbfl20AV\n/LGMio2jKnJ6elXFj2rxyajkU+nSmIh2naOMexqB4VViw4BqW6ZggYdc06LEwyelO4ECR7iR\n1FRSW69MVoNCFQMOuajaLIzmkBnm2DLkYx6UjWm6M7RV5bf8KeYdrcHIpgZkOnqsXI+Y1HJp\nKSLlevetYxg9sUjRqvIGDSAyW02NkHG0+1QtYKqkHk1stHuUE8Gn/ZkOM+lAGFHpasMfw02T\nRwVOa3DbpjaOlBj3kDoKeoHOHSwMDH14praLGysSuCT6V0L2mGOGzTY4tzfMPamBzP8AYUAb\nhSaZNoAVt23j0rp2tlVeF+ahoz27jmgDkYfDsZZlKhVbsKgk8MoXKYCnP3jzXX+TtPK8+tOa\nBCc4yfWi5NjjP+EbTkbAwHcCmr4ZjVtzKMfSuzjtzuPHFMmsQW4PB60+YLHHyeG7VYyUQbj1\n4qo/hhXzuUY7cV3otkVdu0VE9mNx4JFPmEziR4SjVRmIEfSlbwpFJltgG3+EDFd1HZDy8Pnm\nkNmuSFH40rjscOvhGFgCygfWo5PDMByDGpwa7xrMqOee1V20394GXkUXDlRxn/CMQrgKoLet\nPXw0h6RqOea7NNPVeWwDUbWRVsdhzQHKji28MhWICLj2p/8AwiiJHkoGXOcV2X2HcgO3BNCw\nEfK3IpC5Tkv+EVgb59hHFN/4RvyOVGf96uzjs85B6U6S13Y+XpQOxyEfhlJlLYwuOfrSt4fC\nxbVGPpXWx2QjUjsaa9rnhVzTuFjlY9BaNRlPwqRdIRGzjmunWM8qRQ1mnmAgZ4p62DlOej0m\nMfNsqxDo0bqC6qFFbXklVJAHvSmAMoJGKkVjBfRRIwUAbM1Yh0eINtYAZraihXJ4/OhbUCTJ\nPFMDN/sWJeVApG0sdBGBWvGG544o2/NzTYGYtiiLhhTG01NpUnk9K1/LHUCkaHoTQykZUdis\nKgYyPWnrajGWHf8ACtJYxJgMcCiZQ3Qce1BLWpQ+y4+lO+zrwRzjtVwRblxjBNKbcR8Ec0xF\nLyvbmnW9uBJkjirSJ82cZAp3lhsnOKBkHl9cjikEJXntU7R8EA5pzYbA6etMZXjhDPz0o8gf\nMeo6VZ8raxPbrUkcYaMnFMRV8kBeOfakeMsnXHtVtV8ts9KYbfdlhQBWRNp6U+NV/i708Rlc\nAjNG3b9Km5SGtb9FB5600ws3erHG0Z4NP2gjgVRLKwj4460jwt1ABq15ewgnmnNGM4JwKARW\nWI7fmHFEce7ParDKF6HIpm3kUdQK4j2nrg0+SMrg5zU00Yb7vPrTeG5A/A0AMRDtyTT1A3DN\nP8s7cnpSFfQY+tACybedvSo/l3dOakX7uO9IytuwKkQmwZB6+tPVQzYxTVyrbWo5boMGmGoO\nu1iMc08RjaABzSLkDn5jUmSigEUAR+XjJzzQoPXjNSs3mAKvGKjIJbPSmCIjlgd1BXcuM81O\nYty9aYYyzcAUCFXCoBjJoA28kc9qPukDvTsHdzSAiXO45GKlTA4JznpTTljk0/5RzRYYipt7\n9ads5xnjrTVZV5PTNPwC3PpTEGBxkU0j16U7aOmcULjdgnIpFbIGjHQ896bkZxtxT9m5s9KX\nYp75NBJCyjqKRmZirAYxUu0L90cn1oVW39OKYxm3cc96U4bIYYqTnk7cUMrOAemKQiID2xRG\nnzZPSpGQtnLcUxo2UcH5fSmA3G3gD8acVwAo6d6Xbgbu1OGGyTxQMjLY+UDFNwGJapjjbgnm\nmADLelAwVVbG3j2p/wB1sADPrSR+ven7SeSM0AIFI4xmk2fN9aVS/fihl3DJbFAhjYXjGaVs\nMgxT+AOmTUYXHagY4N1GMUABsUqDd1FLgKwB6UEgMquAtKrbV9TUq470gUtnjigCPbuBOeKV\nVO0HtT0hx06d6ey5GOg9KBlK6h8yQHPakh0qP72Mmr/lhAKcqnrnA9KBkCwhV605vujPWnFQ\nxx3pWiK5NIBoXbz609l3LgdaXZuAJpqsdxyfyo1AkyfLAzz3qNgNwJHNP54OOKewBXgUCGMA\nvPan/I0XXJpEXs3IpVxGuNooAjjUA5K8VKjLvHtTfL3E4PFEWN2COaBDxnccjApVPmDFP6/L\nSn5OMc0AN2eWM4oV254zTzuIwRUi42DGAaBkK5HHapFHXPSk+7knmlDbkGRg0xi5GMDgUu7a\nvTFQ7sNgip8jy/m60AOyFXnk0ySQv2pOdg9amjTd1NAEUOWf5uBTvMMnC8LUnG3DdaYTuGBS\nAcjDhW/GrG1WXCniqyKf4ue2Kn8sKpweadxalfy28zaDj6Vc3bQB3qCBSGYnk1OuOhFAwKjP\nvURXbjjqalO1myp/OiR+hIyaXUBCoVgw60+NmVjmkHOTjIqQLtjBPNADFwzZP4UeWnmLnn1p\nWXy2G7ihsOxxxS6gEi7WPAFR7s/KBzUh6gN8xFJ8u7IOKdgHZPQ84ppVmPBBHpTY3PzD3qQr\nj60wG/w8ghaGG3joadtOOtO3ADJGR61ImMPMHH3u9NDcBh+tKp3NnP4UexXFAxWVsbu1NajL\nKuO3ambjI30oESr8v0pzdscg1Gjj7pOM1MvTkY9KVhETtg/N0o87nhccU6SRW470rAMBjg4p\njIfMl570+PeRknB9Kdg7cClb/wAexQBEc7sY5p+5ecn8Kh84Z547c09VAbJ+akwF8slDxilj\nyyjIqdMHBamyOp4XNMBqrukFMIVVJOc54pMsrDIqUKN2TytDERqwIo+8cEYNS+WjNwMUu75i\nAOaChgZRzmnbgeSc+lRzSfOFA4p0ikt8vpQBGFEbEEYJ5pwxnkYpI2+b5/zqRmG0DqaAGFD6\n01423ccD0qQtkccGkY7QM8c0XAfG351IrNtJNRcgbhT2kx8pGTigBnzN8xHFPThiQcUiOduD\n0pzKHUEdaQETttB3cmolG3nk5qxNGGGc01VKk9DSAYpVjtApJGMbZzyaQZ8wgDFK67WGeRTA\nd95RzgmmbhCvNORWdWyMCoZFJ75AouBIuWXpilXLMeMAU1ZBtHNS/exg8UgI2Y7zTggYYbii\nNW+fpimtjb1wad9AHr8gx0pjSZ+UDn1p23MYycmjjacdRSAj+6wanNId3PApqyDzNx5HpTmU\nSdWxTAkEYCBs1XumEcfGc1JtZf4vlFJt84FicgUDK7QmRQzdcVLGjRx88+1IrblwR06UF3zy\naTAXb0J79qdt+UgcNT+PlzTJNrbi3HpiqAYq7ThhxjNJtDAEAilUF4SPyPel3ZKoeOKBCFFB\nXGeetOXELEbvpTkILgEYxTpIwegz70hkMzGRhtbBqONWEwLL1NIzHfsxg+tTec42qBk0CAqu\nXA69cU1UzHlietPTLNkj5u9PYbfvYxUgIEXydp+tOXYynIyRTFwyZHJ96csZGTnt0oATyhty\nG6mnspI+UZFMjjxDuZsEnAFTBTtUE4Ip3Arsu1QoCtzzU0Y8tcbahZF8wMo4zU3nAMKBk3/L\nPApiq3IHIqVdrLlelRCQByq5+tUIFkDJgnk9qlhUdDjPvUKwkuW608MfulcNUspBOg+Ur37U\n3yWaMc4PWpQoVl+YZXrTI2ZmLfwU0DJo5dqqNo6c05lXarZqEFlbhcrmpWT5cDgd80xk+/K5\nP5moC7FgwWlhQFTnkCnL8hwRgUhMcyllHPemyKVjIQUi5VuuTUkfAJNFxakMZ+YE9KdtCqWG\nRzVhoflB4FAAZSvqOlSPcY0f3dzcEU7/AJZhSMkGnnaNqkdqRlz901SGG5WYAjBNP2MrHBGC\nKaMSMTjkHFJMp8vjk+opAKql+CeBUoG48tyBgCo1bKgDmiQEkEcYoTAcyqWwetOTKt7U1sj5\ngNx70pDHJ7UIAkVWU84qSGRQcY5qMxnjcMmnxrtbJGKB+Y/B3cnimbx3Hen7gykEYpm3sPwp\nAJy3WpFx249aidnwcDJpys3lcjB74oEISvVeR70/zA0eMc0i4Izj8KedpGQeKAEOV4HK0cbf\nQ0YxhR0psilmA6AUgFPy4x0pfMywK0/gLjqKbwCOKYCjluvFJ/skfjTtq7SO3XNJ1brxigA/\nhPpUXzMwA+7Urt8owM0LhV3OMUmAMOzdKUKpHvTVcbskcU9cDOetAxDgNyOMcU1OD7U8sQo4\nqMsPung0xDyuBkc/Sm7Sc4OBS8KelGTuywx7UDE2tkHFPHJwDzSM3zDHT0p20FgSPlpDFLf3\ngCaQZfHNMO3JI+7TuFAznNMVhW29O9R8l/pTuFXI5NN3Nu6YoAXg54oZx8oHH0oZTgc8ZoX5\nTjAJzQAofGe5p3BwBmo2Uhs96lXHpg0mIRV3r05p4O3g80m4Zxmhlbt1pgC9CDwe9MlccY5N\nO3heTTVw3zAYNAyVgFwf5UMM8jrS9cccUsgI5HWgLdRNm/heBincLgEc0hLLz2xinSdVx1xw\naYCZL5HQ0kZ3AhqXbwc8N6ilClWBPSkAFCV4pMDqeDUi8r/hTeducc0AG35sGmsw2gHOc08A\nt1ppUlckYIoKDcPSijn0WigCZlVYwTg/SmLhkKnk+lHLNtIwKYsnlydMnOKCCXa0cPA4703P\nmMBuwVo84tuB4zTNpjbjnigY/wAwhjzkVHLINwJHHembyxPaldf3JJ6dKBjFZdpweM0vnIGA\nJ5pirtUBR9aFg3Alx75pdSSQSEsSPu00s+75e9HG0gHjFLGRt55PrVMdkyGTbkAHnvT9w8sm\nom+YH3p4BZlHbHNAhVcSKMDHuaGbawcfd6EVIzBsbB8o4xUcnOR+OaBkYIbJY456VNKqrHxz\nUXl+Y3T3pdzBT8vekFhyr8pYfSgruUDOKbwu0seCelSNEvLCgkZt8sDHU9aRWGDlefWlZfLH\nJyaR5CsdAwRju3YyKWaQbB8u00kJXaM0kiliSRx2oAIisw+ZsHNPVFLcdKayhEwFpV+XBxj1\npgxzbf7v5Uu0nDfnTWyenAp6dhnmkQRqx5GeM0jAhhwAKeVCt1yKhlcNjnP9KY7EjLtP3uaT\nJwMjNMDKq5J3GnRyFs4AFIYNtxnFL5nlqR3PNMUbiSRQ69Oec0rjGwszMQRipm2+WRkVFuYN\n046U8RFV+bkmr1JYkaI7cjkUjdCe1SqyiM8YNQ7tzEdFpaiBXJwwP4UTBWwec06Bxn5V49aQ\nsrZ9c8UrssZHGDweKk3LnB4Hek3fMSaQsvIPegB2Rs2j1ojyEJY9+lGPMYKnBxRLEqqMtQTq\nRvhhtI5pPKRRjOaZt3OecCkjcKxVhlKCh+8KwC9M1IwP3h1qFmOPlGFzUysduccYoEyBX25Y\njjNTBT5ZI+9jNM8sH6U4swyAeMUDGxR+Y5HbHWmt+7XbjmplUR9cjjnFQtIRkuMe1DJJVAbB\nxzim4DMSDTYslM/dX1FLlQuRSGMYlnIC01GVQxbjipWxGASck1WWM556Z6UBuTQspiz96lGD\nHkjJpu3arAHGe1RJleOopgP27unNKsfUmk37OfWjzl3DGQe9IEAYM2acc7ivAJprSLtwByTR\ntHJZsnFAxythWBIBxVUbtysDyKfnfweD1pittZsHNMRNIxk+9zQvzjA5GOKRpAsfoT3pIck5\nXipATBjGWFH3YyMc9aJGKsd3NKzHy8gc0xldmWRhT2T5PehcbclcNSbTuBZscdKWoDWYqgbF\nSRuJAB1pMhRjqPemrxL8o4pgPkxGdoGRUYjK/dGV71JnueDTZGCqVU/WgVyJvmbpgVGw6Enm\np41DD5s1GwCk4FAxQokTK9KrMvzENVsNthGOlMZQ657mlYQqybVAIzULu3mA9vapFI6EjNMk\nzyRQA/dhf71Ry/vIxuOeelTb1RBjjimNhY94G45qhIazCPlRikbB5IwakUj7zLwaawDKAe1S\nUIFWRAGpDnoDwKf5P7st6U1VBxg9RQTcXK7cE80zHZutNztyTytSblKggZJqhi+YUjIFIsg2\n425J4NNEm1cYpyvj2oAiaRlYDHtTo5NoOOlJyzE+tGFQ4J5NIYrszrwuDUewqvzHmpcBZM5w\nKbLIuQR1o1JI42PSnFhD2yaVmXg96JMY46mlqIYCWUkA0qruByM4pyZXvxTfLbduJwtMZHtH\npgU4/KvA5o27VxnIzml2/wARHFBQhjz91cmomjdVwRjmrSsqg461FJ81IkgX7xxwKlLbV3cU\nbcZz93uaF2lB8vGaLAQrnfknn0p3yqCAOTTpFG7K80Aq3saooa3y8GkWTEbcc9KkkwY8qO9R\nrhc5xz2qQGbxxgc09cZI74pNyr8uOfWnN1GBTAQqOvftSMpxnHzd6fGPlx15prA9M4yaYwVT\nIcik2HuaVcrkZ/Kl+6CM80hCL8/FMOCORz0p6gkjsaWZT2pgRL06UMu5T2oXLd8ClyVU7hxQ\nAwfKo5waj8vLEk1YGyVcgDcPWo9wxyKBIeY1CKGOSaRsrhB9c05WEikHg9qUDAHf3oAaGCtu\n/Cm48w8U+Tn+Hg0yOMq2QcCkIaoPzIOuaVtscf8AtUrKQxKHmlba6AnrSKGR8jJpeD04pZVK\n7SoOKAvQsaQxn0OfWmnK896lZducdTzUf3cgjNAmJtHXbmlQKucHrQuO9I0aq/AxTBC7STno\nKF+XJNNwR7inMw4HekMZ/Dkmhht6cg0bjyoWlKlkxmgLke7aOOtKuX5NNRPLbJOaWN9zEUDX\nca8Y6g5pjEMxA4NWNpPGBmonj2yZzzTGMVSvanPHubPQU/aduc/Wkb5mx2pEszNUVowhTkZ5\nFLaqNnIwc9KvsFY8cj0NN+zgruAw1AhjRnqDgelMZSx6cVZAwwLc05F357YoEVWhK45xTtu7\n61O8Y25J5qPaPWgZBs+bHU0kibjgcD3qzt+YbQKVo/X8KaApNHnHpTvJyDipxETxSMpRgOop\nAVGUrgYp0cY5J59qtHa3B6Ypvl8Db0qgKrqOAKXaCQTxUzBV7YFMYEY45pAOMYYbjiojAXUh\nSKlVwy7T1pUTb27UwKxtz0NPEA2Y4zVhjhQoG7vmhlBYEDBpAVPK2rlhjFQ7dxHp34q83TDU\noHy47UwKBhB6ilWPcwGeKtYHmEUqx7mHAoEQmH0pPLxjvU8mQ2AeKbt3cHii4yNVG7k8UjQ9\nMcDNS+T27etNI/hJ4pphoQmMcnHNOKhlOBz3qXaB05HbNNUYBB4qbiKToSfloEPzYq5JCD90\n07y1XB70DII4VXBJPFKPmVtw4zU27P8ADSeUWbA6UagQbQ2AKXaeMLipigU8cVKq/Kc1SAqe\nSN2cZNIsRVs4q5tDLxTWRlHNAFXyAzdMUxYx16CrjA7eOp9KiEZOc/lSAhZQG46UwgFuOlXF\nj3Y4+tNliAYkAAVSegiFVzj0qXaNpzjFOX7mAOKUQjcBmkIjXGOODTtoI5AqQw4bPak2nPqK\ndwITApbHQUu1YwFHX1qwIw/HekaLnJxSEVWU8Y5PrRuMjdOamKDtwM07YWbjjFMCDHBwPrSr\nEPLPFThSW2/rTeQpHU1QESxhRx0oEOcZFPZv3eB1py8r1xSGMeMjaewpoJU4HAzUhztBPSjY\nOCBmmIUsGGDye9RyDbwowaftUHgHNKxz1p9B9CBlI6np6Uzbz0zVvy9z8cj3pGjXofvVI0Q4\n3KNw+anKnynnFSZ56ZoZCOO9UIRF9eRSY5IHI9aXAXvRz6UwGr8wOcUpUNTtqueOBQ0e3BVs\n4pANEZQ8GkX5mx0qfCng8Ujr6Ci4DVUDgjmgp3PSmt6559KkjIK5figQxNvzEc4pGj3Y7d6e\nFCPjpRIp3jaeKXUQxlGOgNIvSns3yjjnvTiq4zQxkZjA6UrArHx+tSHDRjjFNbluDkUCIwpU\n8HmncnrSNleO/rUjRHbx19aaGM2lRw2c09cspzTeFwCPxpdp7GgLDRGN2SacoVjnBIoK7Tk0\nik/w9BSAVlRuAtMPzHGOlS9iaYpYUXEHklulPCHoQafH+74akO7rnIpiEZM/SmBWxnHFSDJU\nlaRWyoBOMGkytxN2eS2KSNsv6U5lDDGKkWPBzmhMQ0NuzlMe9M8zjHXFSqwJ2ryKQKuD602I\nXhutJtGCMcUYIByfyoPzAYOR6UBYjZSuCeRSPlsYPHvUzY25IxUbJuQYGaAsR9PcURqWWpEU\nKuCTTlX8DQOxEsZaQ+tG0g7QOlTKpy2OvrRt+Xn86NRjRtK5AxmmMDjmpWj8vmlZC0eRTAYO\ngOc01lDZPQU/y2CjmnbRkgDIoAjj+U4xmgrhcdDUn8HA5pjSBsHGKQDV3DinN12nrQinqacB\nheR+NMkRetSmQdBSbcjAGKVl2qDSKBW+XgUoO4cjNLgR9OQaAw3cCgQisG5ycin/AMPNIR0z\nQ77m29BSBgoG/PanE7uvSkX5VORSlgyg9KECGbvm9qdtG7OKQbQSDwadvxwRVDHFjtI7U3B4\nINIv3uuRU6/LHxTARU2rnOc0KBLxjBo3fMAaUSbZDjmpFqLtCtg9B1qNgN2QaWNWkfBGKeUV\ncA8EUgCFCPmPNSn5mGeBTozuGAKTbuU4NMBJMhRSqu6M45pFyzYPSpFTbyPyoGQKpPFTMp3D\nIwaXyxu3EYpm7dzTAa3zcEYp+Bu9amXb5eCAaZGyljkYoATywVBz3pyrjJxgUqsPN2gcGpyE\n247UCRAy7jmhIypPTbU0apGeuT6UyVV8w9Rx0qbsYKozuBp6sOaSFcqRmnnLKoHQ9aAEBx8w\nFKZOvGKFj2r1zQMbWx96jUBqZJOBUrZYDFRxqd2Q2BU38QOaaASNlXK4JPWn5MhxTVVfMOD9\nRTmTjIOKYCs27k9aJIy3IprEdTxSxvuzk8VIDMbeTRxtK7eeuafIdy9OlJGxwaGAxUaP3z1p\n7Y28U/aR05NNkXuOKABV+U7hk9qVY8KVpu8lgKkUE9aAIthDDApZFd3z0FTLwM+lRnLZ5oEN\nMbNj2pEjBfrg05pfK696UbWwU4NIBghRuvJqZd3l5PB7U0AFueDUg5GC3FAiHbu/h5pVX5si\nlHyBsDJNG4KfamUh/Kxk4zVO6kbggHNXOW6cConh3Ny1SIzPKa4YjPWrOnwtC5jdt2KtRwGF\nc7eOu6pEjzlshT6UgFMY2ntUSkLz1qf5enXioNnOMcU7gOYqxB64pW+YcVGEKr7VMQI1B9aN\nQsMwcDt604dsH5u9G7PUcUkjqoyBimPoMaNVIY9aFkx/u9qXcrgc0mB0IoAOJOB9aUfvMgcC\nlC4QbepoXhsE9KoBFXaNuOac211AYcihmJzQuVXB60gALuTgU3cd/Jpw37evFOCr1IJJoATc\nFbB6npUhITgLVUsFPzdjxUu4lc96BCN90jFRnr1pzxn7wbg9aThuMc1ICxttYkgmm+YGYKDx\nnrTiCmQOagMZ3ccd6BllWCqct81RNGW9hRCPlLGkXJb8aYdAhh2sd5FSso45xjmlZQy4x360\nnlru5JwKQhFlDKPlOM0nliRSzcc8U/aCnoKRVbaf7ho6ARbNq5zkZ7U8qpUgDn+9SeYSNq9K\nQc9OBQAiIHXI5IpJE3Y29e9P+6euMelNJDSZXgUDGkFV+Y/hTE4BGcCppE3delNManpRcVyN\nm2KABk5olhaTDKcetOCFWy1Ct+9x2pAhnzdB8zdKfLCTt3HBHUVNHhVJx3pkjHd1+WmURnJP\nyn5acVAxkZNKQO5KnstI2WPTH40cwgf/AGhj0pFmfaVxkUP6ng+lKrBmHai4yAIq8nlietWG\nUDgmkZN3BHQ0/nbwM0biGeXubgmhV28NzToS23cTg011Z235pC1JYWDHp0FKrZU7lwc02OPv\njips7V6ce9AxjYfK+lMkj+XrTlx93OGNOaMYKHl6oZAoThTnNWFjCY3YINRBdzZIqRsSPszj\njrSBkkfyAjb8tR7drZxxRvO0ANnBxUjMc/c60CBVOAcUvfOMe9RKfLU72OT0oSQyAY6A0DJF\nVE3FvpSZ2kAD5abMVV8bsqaVRuUBT1qgH58xhjgU7cFzzyKHxGRjtSNFuQtjrQUEcijv941a\nDbhtIzis9FKyBiuRU6uN+Tn6UhFlo48ZXk03H7vOOh5HeljXoR9ajZjtYg4ycmlYCwrcAg4p\nqybevNNEmFwRzmnrCZOQwz70gsObcIyaId3l5I5oKtt2k8CkkZlXcOnamHQcFYuWHA70ishZ\nlDUK37oZPzdcVHHCNwJ4yaNRE6ptXNNPc9BSrJtyMZxUyFZcgigojDA4wKHZlIA6U9MMOO3a\nnMvmAYHSmgGJMdoL8H0qXG5ck1AwKyCQjcOlSqwbg8UXAcy7hyeKYcovWnoVHPUU3buG33zU\ngJtPHcdacpCZzSeX8vJ6U4qCmFHPvSGIfmYblyPSg5bO4Z7UFWYZ3bQKTLYyRketAh+0t8o4\n96QZyd3KjvQ4PRehokiYptDYpgP42jB4pAPxpoXy4wD8xpeUUt69KXUBUzzmk6AkjBohychu\nD7UbfM75xTGJ5Z4z0pZF+chjwRSCYrhSMmncB8H5vSpYheCo46cUqsOcimMrbQR0zSKx79M0\n0BNxtHemrjkgZPvTmIZQR+VMXIbnpSGGfmGRTx0OeT60m3rjkU3JHr7imIXcoPTJpGfdnFLw\ncY4pHBTJxQA5UCqC31obJYHpSYMijvQVLjA4IpDuI6l2NKpEa7cZNIz/AC4HWm7sEZHNUBIW\n9qNwHak9TnNOX5eO9ABtz3xTu+QM9qRTtbLD8KWNTySOKBEbRlcnqfalWRh2JHrTeIWJIzRH\nMWXLLsHpQPoSblccjFCccHpSoy/e7dqGkWQ7RTEJHJ8x5qaP5uSNwqAoI89xUkOQMHpSGO2j\n5mHA96bIxwgHTvS8Zzjijjj0oAVsK3GDxRuOMGkeMYzjkUKu3k9KQh+4qtRxs5l56U8tub5a\nQqVbgc02A5sjtmnMCR7Gk3evWgn5doOaRYzNFHkn1opgKs+5iOnpSxqNx3cjOac2zbuA6UzP\nlrvxkGggdMFZTtOCagbKqTu5HFP3bm4HHWmLteTJHy0DHQkYLMN2KVZPMjxt496VUKrtHFG0\nMrEdBQIYQPLyDxTfMEXLU9lIUBfmBqu0LO2AckckUgJH2M2V+9ijzMJwMknFI0YODjBp33eM\n89qYxrqMhR09felZhEnTJxzTH/1igNjFJN12nvQIasgC/KetPdtxwPSmKPLXkd6eI9uSOabA\nTd0IHSpcI0fHXGab8mcn0pq42nJ/Ciwxm77rDlc8ipGk3KccrmmBh8q4waft2psIwOtAhj/M\nNwoK5UL3qRlVIwo7io2xtHODQAyNCwPODnAqVlZl69OKjEYVuGNSNIBHg9aAGspY8NTGY49R\nTkYMuMYzT2Xy1K5zn0pbAIuRHvH3aYrgSDDVKGKqABkdxQ6BfmI4oARuF45Jpnk5bOOKkwQB\njimqzK3znK+1AETx/wB1uD60qxgISTUnyN0BNHk5X0NMQjYyNvHrTVZWyTxg0m4Rg55OaXyw\nwx3PNIYM3vnPSlQvkZOaaV2Nx1pwjOc4p3EL/ETyaaxU4C9acJM8Z5piqMs2KYDU68jC1L8q\n4cDOOtMlyFBI4qSN1MeNuKhiGvIqsARwaJGjUAcbs0gj3MAeme9Mx5km5RnacGmP1HLINwA6\n05l+YtnIpR8uSOD6Uwfd2k5P60AMYAMW6ULhsEfjmpfMEY5waaPmUtkY9KAGSHLYXmnrGemc\n0zlV6YpVY85GDTEGeMgd8Ucc4OD6Uu3BBz70q4LE5yanqPoKsgKjafm6c1GygsQzZHXFIWbc\nTjBpu4LlmX8RTENbe0eF6elPjUeXk9uopY5OBxyacsYGQaCiOUpwo5PWnfwc9aa0QV93btT2\nKiMseeOaQFaR2ZgfwzU6x7R0zUW4eX+vNEczbTjmgAk27gpHFRyuwbYnFSrhm+bioGBU7t2e\naBEjEMFx1qTaCSDUO/dwo5pwVtwXvSGNmX5sj7tQrt3ZH41NJlsrUCwnaTn5RTETSRq0Zyce\nmKS1+ViOSKnXBUfSmuuz7hwO+KVgIpmG8g8nNSKyrGCxz6VGv7xz3NOCmNs4+UUw2BmDHODi\nmbSxztzSffJAOKEZlYYPGcGgYkrKylQuCKcibdpxzSGT96eMj1qUKzLwRimIjLHdjpk035QS\nTz2p6skeQx3N2FMMLluB8p5JoAiDbcjNTMn3T1OKixtfPUUok/gJ5oEBXjGc0xm2xnjBzxT1\nIXPHzU9F8z0z0GaBlXb827vUmVZSCpp8gCkjGSKZH15NICOTbgcdqcylVUdO9KMbyCPalX5s\nhjx2piI0b5tuc0snyjHehowOF5+lCrlsnrUj6AflUc/LSxqFpoILc80u7a3SmSPljVgFUe9K\nI1VDkYNRNKduAMHNHmbUJY0yhNw246GhVVhj0pNyyt0qRcKwHWkBG2MYPFRyR+vWrDEFielM\n5PFIBGUSLgiocbM7vvdsU/mNS1NVX6nmmIa7IyA5570/+EADJpjrxx1p8O7yyWPOcCmIRWwu\nf502RvM4qRlHXIzTPm3bscUhjPvduKlBOMdqGxu9M9qD8rYNADcBVLdxUUjFWB7EVMFyp5qF\n1Ma5YZBpDJVIKZAyaYxCqGPWnhflABwKYwUg880C6hldnsaaqqzj0oA2n1oCb23KflplEbdC\nOgBp3l7V3YzmpDHnoacsZwfm4oAq7W69aec8etSONvbmmyL8u4nmkAyRvJUAdT1pAxIHPNRz\nTAgYBJqTZ5ZUnk+lA0PVupNIsn7v7vPrSsyvwBim+Xx97igQ5U+UHoaQxtIrDOSKRVYYyc81\nJtOCw4o2ArAsuMrgCnrJ5gORT9pbPpUfl9QDVCEjjzk9BS7RnHanbWjhx3pdu5T2OM5oAhZh\nyOhqSNvlPamHDYPWpViZlIHSpABIGO3IpMlM88VGqjzMgdKnVBgknOaV2AzJYZxxTFjLMCB8\nvvU+046U1Y/4WPFO4xnIyG6dqYPQ81JtKtjjFP4VgcZ4pAQyL0INH8IpdmWxTmjbnPSmNkTA\nKccZp3X71IsJ3ZNKV9+9ISG9/amSQgYNTNIu0gDmmKrNHubHtQWM2bRkU4bdu3171IuO/SlZ\nU2lh09KCdintO4jtSqPlPFWl2suCNpqORPLbOQc0yhmTu45pdoP8OadgqOKVsqBg0iSNl3Zx\nUTLxgHmpx90mjZuXgUCITHt5pqtx0qd02+9IicAkUwInwfpSL8oJNWHjXaMcVG0fqaAI2bzB\nyvApGww4GDU0YLKT0FBjO3NICEEBePvUO2FwetOI2ikZA3QUDIdxX6U6NSuQSKf5O4YxmlZQ\npGfxoERBMNkjinFQFzUu0+XnqKTbuAOMZpgQeWG4pGhyoOc1Y2joRTWVVwO3akBXiTax44p6\njaOlTMvygAZpDnbz16UwIlxk54pfkC980pUxtjOaNwLcjigCLAbcTyaRl+Wp/LBXPTNMaM7t\nueKAIuOmOfWm7GjkJzkVKY23YXinbScA8n2p3KIowGbpTmjBAyOakaLb7Gj+EDuaCSNl2/Ln\nNMkUbRwKn2+3NQ4xJzyKAI1jG7IpWjK81OQFXgUjZZecUtwK21t1P2dzwKkKFlo9AelMBdqb\neOaRsY9D7U9R2ApP4j6UAN2D0yKVY8qxPT0pVzjApANuaYBjauMU1kLcngU9T2zzTXYnIHSk\nBH5f93rTduD0+b1qdMjNIrc88U2BGIznrwaOi7SuasDbtIH50m3dnmkgKvknqDhaNpxnvVna\nBx1pkkePm6VViRiqx78UNCcegNPVsL0p27cB6UXHYj2hOAMmiVe4OPapM9wMEUi8/eHNMViJ\nlLKD0HpTcnOMVMy471G2VORSAfhQMD71NZOpHWlX72e9LuGfWjUCLHy9Mmo9p4OOe9WT8vtT\nfLODg0DYxdrDjpTtvygCiOPbyaUgqufypiGPlcAUh4YcVIAdvTJpVbd0HFMBGXjcBiomzyBz\nU+TgjtTWj+YHpQHQYItzdDjFNxjryanh3KpBPemtGVbd1oH0GLGG4pwXg5OB2pU2k8cUpX5s\nHpQLUh2/LhRzS+WcDAxUv3WPGKaFfkmgAZfmAPSnspbpgYpOwU+tOVRgk9aBDOPM3EYpGVTk\nnmpDGCvB5pjKobAHNADG5BNKqhsE5qTb8uOKROAcrwKoY3ad2R0pVYL1FOXnkcijZnJxSYhp\nO4dOKbtJxtFOC+XyTkVJ94ZXgUARP8wzjFKmceopxQn2pChxkUwG+X5hPOBT1iBwQeaRVwo/\nM0rdRs70gGhjnkcUm4s3HAp477sAUOvyDHG6gBqj5c5oY5bIqT+HAA96I1GDxSAauNuWGaRe\nDkdKlUFUOcUFBszQBFu28CkBDHGASaccSLjHNII9vCDmmyR3ksv1ob5Vx3qQ7two68/zqUMb\ntCqCo5pm4tjpip4yuCewphTgY6VTC5F93OO9KVCMNvenfKpAPNJIvQr0oATbuY5OaUuBjjFK\nq7m9KYybuAelMY5mG75RkU1m55FJGrfhUgGBk9fei4xOeKcymRhjpT9oA9aPujjpQIbt4x97\nFKM9AOaWLrx+NLIzFflGKBdLkQz0JprfL0NSbQxGOtCQ/u2DDnNAIZ8204NG3uRUqp8oOMUH\nOcY4pWGRbS59BUjD5cdaevy8EUbsc4pgNwRxipEXI5FN3DccmpFHGc8GkBHIgGOeDSBR2H40\n9UGPUU7G3/dpANVQ3ByKbJHtIOelTqwZRxTGUDk9KAEJyo4pjKEUmpNwfjoKQdeelADNwbtT\nlVd2DxTztPOMChFPcfjQA/airgDJNJIAq8daVVIx3HvTpOQTjNADdp2ZDc0jAqMjrTkQcknB\n9KcudoHBGaAASfID0NJ5Zk6jFI2FyKkXLKOcYoAevyjb3oXbzx81NLFZN3ansDjcBTBDYY9r\nZPApwY7uBUnlnYNx4HWo/MDMSBjHSgB3mFxg8GkYbRkjilA6Fqacs3B49KYx64YZ7UpTYucZ\nojzuC7aex+XrzQIhRSzEgYAqZV7nmmxlmbA+pqzCu1ckZFIXUij2nOaduXcCRnHSnbVZiV4p\noXqeKQyVcMoPamNgKdtO52gAU7ZxQCI9rKvSkC568VI33c5xTkwRzVgRMobBXipY+F5HSmHH\nNOjY4HGTSAG5yV4Y05SNuCctTZFKtmlXaGA6mkCBlEmQaRY8HkYp6feJ7U8S5XoKQEe3PzHp\nUnlqeRxTGG7p0p0ed2KAGNuTJ/KgdQcZNSMwZgG6Cmsu4kdKTAY2e/FODbuvGKbt2tycmpQq\nnGRTENLZJHSmLGxbdu49Kec56UrYLccGgZHIu4YamxpyOcCpmxuO78KZsycDigBW96GC7R2q\nQKDwTzUbLg7SKAHbdoz1pFw3ehiRgA8UqqrNycUAKuNp2mmNnoBUm1eQOKTB6g1LASNmYcHF\nO4b7vSkVdzY6UpRVj44NCARVC9+KVWHmHcCRTfuctyKen3d1Ni1EIBHoKTy8yBiePSjaTlsc\nUDLe1JDBvmzkUxlVexPtUnzKfWhVypbo1NgMRF9Kc0YaNgePSl429c05cMvvQBDzCihuTT9u\n7Bx1obJOCKWNm3EL2oAAwVTkUxZFY4Jpy58xgeVNI8SbRjrTAazFW5P4U4scAjilbn0xTNpZ\nSQeaXUB0Yy5LAUMrYYgcUxeeelSNIfehjK4O5QOmOtSswUZpVU+WTkAmmBcEEjI70xCsw2gr\n1qKSMt0NTuVYgrioTnkk4FIB6/d2+1GRkcZakXCnk809fvjjA9aQCpleM+9RfMu7Iz6VMqgZ\nINIJN3LcCj0AXB4DDjGaV3Y5GMKKFfPTk0u4sD6imBR5ZuOD/OpVyuAV5p20c8Yb1pfXml1A\nTb1btSMQvHUmnfNgEDjvTe+cVQCMxHG3gUo2hCQcGlGW5Y8elNdVDcH5fWpGIxVlGetL8u/A\nHNSgJ5fYmmcBhxjFIBvmFVIK5OaUN83TINPZskt2qONt248jFPzAGHJZgc9qYThevvT/ADCz\n5+8Kc2GXOAKYiMsGUNj60bQrgk8U/crIMDAprYZSSOB0pWAb8zMxyNtSFW2LtOO1R7QEJxUn\nmFiFx05zT2AUIV69RUf35CM4FPVlkBJOaAgjUnHzVIBHnkA1IxwoB5HpUS5MeCO/WnhugzQA\nqKXkBHOO9J5xUsCM5PWmtIqHEZ5PWnBQcZ5qkMVsRrkHmnKDIo2getIxLZ3DAxUtuoVR70CI\n9ig/JxmlVNoO/NEnytxQZTIMEYoAeFViGxnjpSq6tgFdtN5X7o46ZojUyyHJ+lIYySQbsBc0\nKPLXeBgd6l+z7Tkv+FI3THXNFxjmXzFBPFOUFYyM5FRGRgoBGRUsYypGevai4DljbK4ps0bZ\n3qoNTKwQr7DFDSfNgikFxI2dTuAwuKACysWHFB+bA6CnL8+QDVCuQq2+TpxVpvuhhwfakRRG\np3gZHenRuM5J+WkV0Dg8k8U0OGyGHHpTiQRnAx2pm7qcc0rgLsAkDY5xTo1GDke4pU4XJp6J\nu5zigXUQIdpPQ0I2D2FLFneQRkUpQbelUgHD5cyAYojkYgj+9SLIrfJ0p8jLjKnipuNDPmUj\nIzTZI9rE5607zN5xjNSKxkIyBtpDI4htX1p6sdxJFO+VZCpOD2xTY+CQ3eqEJ95sUm4qcGpF\n74qPyyzE81BQrNubDcCn4+QuD8vpSAAdeTSnPAx8negTI0kLY44p/mHfjHFP3Luwp49qYVPX\nPNMQ9c5IamjEh5zgUrN0yenU0cNkrxQAjKVyy/lS7g46bTTgu1euTQ2G4pDECBW55pO/Bxmj\ndwB3ppIZ8EYIpAO27Bg9+9O2jbxxUe5vLORk0u4vsO3C96YWF6LtxzTlQ92o3eW3Iye1LI24\nA4xTH0E+7nJxTd25vmPNPwu3J5zxSSRllyBQSIcYC/rS4Lgrmj5VAGMmgSCPkjk+lIqwKhjw\nRyKNzM2elJk7RjvQytuX9aCRGJ3dM073BpT8qlhyO9N/gBHPemOwq84z19KUMN2M80Kd2OMG\nlYDdwKOoahIAWyWyaTzT0HWjb1O3mmsuAcjBoAft3D5uTSsnzAAUscYwMdTTnUnkHGKdwGfL\nu20mwsQQAOaXauNy8tSr8q5bkmldh1FjyWJI6U7d1GOKRcBc9KcFK/eNFxiCQNgEYHelL8YC\n8Z4oVcZJ5z0prH5TzigLDm3LHleaGUlQcjNNjbEfXilXg+tMAKEdDipN54A5JpoyW5pFzzjt\nQSO5H3hSqRn7tNJLKM/nRtYc9aQx272opm4/3KKRQ+RhyB0pok2xZYfhRJHubJGBSyII8HHy\n+9PUgRW+UZXHvSfJG23GTSBv7xwppIk+bPVc9aCh+SMkjIpqylUPGATSvhuh4prZWMeuaYiZ\npF8vjrUS/L8xIzUc8wjX7uD7VBHN5h9aANBQp5IyM1EfL5Zh82cCmLIY2GOQamuNvXHHpQIq\nNy3Dc96WT5l9/WmiNFbJqVfL4xwaBjlXAXOGHem8ZbB4oDbo2Cj2pmNvHan1CwEDaMMA3WhC\nHySO1MZ/lJA5FShCsPTtk0xEa4ZOThs0FtrEZyMUOq7QTUTSbuF+9SGWGZPLySScdKreYV6D\nINTrtaMnkHGDUPlDy8ID17UgZJ5iyKCTimM3zYxmo2XZz1HSpVXbjvQBIG28LTZGK8D8zTVZ\nfMBqTcGXDcc9aBDFZlbk8mpfMYsN3Sk+8+O46UDJ+9QIc0hbK449ai4jXPcd6kb5TxyKZIiy\nLtIwPWgAWQtjgH3pWm6DoaYY1RRg5pZMKwY0ARNuLn2p0algHzinBtucjg8Gm7CG29BQMWSY\nL/Dk+tAR2YN2pGQ9Oop8fyYHUUwElI28Cm4KrwaeR1PakhUbeTmi4DiS3JHQUHHYZFE2UGcU\nRN03dKQhVPbOM01WC7hjn1psigyA9FH60rLkbugpDIkZ2YbuRUzKEAYUigD5gcikKnZuc45p\niGOw+6qcHnNDQlVyG5pVILHcRwKcVPBPIoGC5Y9jTZOhGaP4jjgUct2pSYhEA43Gnh1VsBfm\nqBPvc1JCuWY00uoglVmyexprL8vHIp8m9VyD9aRDukwOB6U2NDSNoVScGlZizYp8yLyxPSq8\nZ+beD8tSMkZSOQeaWR+DkAjrimTLn3+lM2ttzjt3pbkjpSrx4VcZqtHlXIAwaes3rkYpch2L\ndD2pgTLiRDkY4qLaq4G3PFL83c8UuAVZgeaQyNgMjHUelTbVC7s/PVcgqc5p4JClgMn0oAas\nZkPB+tOO1RtNJExYZK/gKOODtz60ADMExg9eKAD0x8uKGUNg8cmpW+WM8+1ICDheMYNLK26P\nbuwaZGrSMMnApShViD1oDUFj2xnAyfWkjZVJDE+tEbbozjrSKok5JpiEkCspI6GmQq6jOcU/\naApGcUgbLehpgMlUBgd2GqeNi0fBIHvULgt/DzU8ZDLtbjAosBWmyrHnIoUbgGI5qaVVyMcm\nmQhvmJ4AoAZtxJ8xycdqI9yc4zTgp3ZBpVwxOeKQCLlvrUSr8zHOBT0bLdcAU6SPb8xHytTG\nNj27ck0jNtTOPpS7gvGOKjklBz8vFMZIjjHvTZfuj1JxTBMX2jbgetLI3PrQIbtC8HrTx8zA\nk8Co2ZtwwPqamcqoAXk96QhrfM3yjihlVo8dTnJo3bcHtRGNzMe1MCLyyp+U8UNujBJ609U+\nY4NOnjxGM80AVlk39T71NHICuc1B5WPlHUmn8RrtwKlhqPxuXnpTQx3c9KVmDKADxTdw6HtQ\ngFBD57HtTWXGM05uMDPvTX+4WPFMQ07t3qKcjBosDhs1FGx3AnpVuPYMMF60gKzLyCTzTyN2\nM0+QLkgjnNRBzHKSKYCORuG3kUqqWbJOR/dpwQtuJ+9TCpXHzc0ykO3BPr6VE23Ocdal4LE9\ne1IuNpyM0DIZJFjYHGQamVlUbRwTQ8A2jAzSBTuzjmkAjNtU4HPShGCxgHrSso4JOKRlWRgU\nb60aiQu7d06ik4Yc1GwMbDBzRKdo4PFIY0BQxJHSjd5nQ1HExaTpx6VYWNfv9B6UAiBjt460\n+Jl6EUjLuc46U+NQykdDQAqjcvHXPFPVgeCDk0irsye1Csc4NUMYxEe4dqbF83K1NMqsuKgG\nIxheTSESP8x54oEe5SOvFN3ZzmpVYhQM8UagQLtboMY4qUsFXAP40m0YOPWkdVZcE4pARbG5\nwO9Wo4/3eMc1AueO61aVht6UCIWDKvTihiGYfLU0nQA81GsZXkGixRHIo3jPAFN6scHipJBz\nkmnMQvIGaBEDYbJxzQvzKc9Kk5xg96Ty9owD+FIBCVbgdBTEX5zSMCo6UZIBoARl+bKjIpoY\nNhScCljLNz/KnrF5meMGmA1lPINKuOF709lCjjmhgN3TtQAjAZGOvrUUq72B3cVIqFlOKasY\nXrTQBuGAO1LwzcdqQqG4pDjjjHrSGKdv4UN8pyDkUMoOewpDH5YHfNICOUl15OKVWH4DtT9v\ncj5Ka0QVuuTTQhWIdRwBUeACfQ1IygnnsKjVfXpT6ACrxgHvSyZzinfKEAxzTWbGKQDduRg0\nm30qSKRXkwe1Eihm4NAEQPp0p20FelO8vjPakBPf7tADsBQcU1lOOmKeEwuQaTG7BzmiwDGU\neXnvTPKBFWOxx2qMocdOvSmBGx2gDvSfhmn+WU+8cimjKvgDilsBGq54p/kjoSKfGw5+Xml2\nhutADJI+BjpTCvX17VK33SKjGQAe9NAN4YdeabtK9DzUqoVXnGKbt2/WiwxOT2yafww6cijd\n6cU1s/Q0xA64UYOT3ppjXjHPrTlkHfk0u3cvXBoAiYYbPak4c9MCnfw9M01Sct3oAXhlwKTZ\n0UjNSJjaARg0q/e3HtQAm0pz2PrULD5SB61Z3Blwc1C0JUHnigBY1C5700R+vNOUAKB+dPZe\n4HFAELKOOKdt+9gZqQ7dtRxsyyH0pdQI1jbHSnbQzVMjE59KYynPFAiJm2NgdKFZzkkVM0Yd\nRxmkxgnApoZErFu2KXaf4uRT19DQynPBpiGsoKkdh0qPhTjtTw4TKNzzT40B+9jmpGNXLdBg\nUrYYEd6eF2kelJtD9OKYEYiztGetDQ/Nj0qV1O8N0FDKW6EZqhWIWUdMDdTQP3gAGam2jJPe\nmMrKoOOSaQCPu3YpOTwKfuHJPJpmXX7opCGup6c0oyV5FPwcZJpi55z07UwIxkZzxUsX3cet\nIyhsc4p6KdtAMRgVNMf5sEZNOk+9zQWyMrwKVwBFDAk8Gnbd3HQGkVTuye9P256/hQMgMIVs\nd6MnOCOKmYDOTUZRm6HFNCEPOe/pWdqd49vMu3cV74rVjTbgnmmyWyyKRgbfQ1Qipb3izADo\n/wDdNWNrM2TwKbDZpHk4w1WFj3ZA60DG+WV5HNI8LABup71MyuuCBn1prA9c0CIWX5Rg4NOV\nCqhWPNSGPPUUpI43DmmxkKrgkHgU8R8YPSn7dx4FLIMd+PakIi4xg4IpNu05zx2p3HXGKcyh\nvlx8tAyLlgTmm/MvU1KsY6A8UsqDAzVMRGrf3T19ae3zKAMA0Kqhc4pwUNz2oYEYXKnPSnBQ\n+MnAFLt+Y5OKXy/U8VICMvzDoBSMCcgU9mG4AjijI64oGMCMRS7McHpTg3HWnbcqfmoArqu3\nnNSYG3GKf5ftQygYOc0Ekfl9OacAGk5HFBUbuKCWbKlce9CAGyB04oZgy4zikbdt25zSrENv\nPWkPlGLtyflp5UY44o25bHam7c8Z5pj2GgMyYxjHembevrUqtnoaWNgCTQIhVSsfXNOYFsHH\nFSsPl5HNN52hcUAOVRTo/lGGGabtZWwBkU7aWjznmqF5jfl64waftx2pqqeMjNOcnigY37uP\nWnL8w6c03hhn86VW3Z7UgEKkKQxxzShTwM07G5uaRVKg7jxSEDAq/XIqNmIbg5FP+VuQc0qr\nuHQUwI423NnFSFRngU1MpwRUqL7cUgFVSo64FJ/DQ3Qc80q/NnI5oGIScdKRYyy/MetP5256\nik3DZ05oAQR4UDFLKoDDjFH8XOTQ3PWgQ1cY9af5g39cLim7CFGDg+tPYDgdfegY5OF9acvz\nHHQUxeT7U5XXdjGKABhtz2pmXC+oqdguQTyKCvp0oAamD1HNP2kcEcUN9zIWnbt6jFADCh7n\nipIy2dpFOXb6/nSRE+YcHigBVyY2BPNNWP8Adg4wKk2ljmm89iaoBFXdIMnK0jKN2cYFOTHI\nYHd61KygLyeaAEt5BgnHPSmsAWyRR0zinR4kHJ6UwFjG5gRVhs84qBWCtgdKkVvxqSRq/Nyx\nwKeV+X5eRSKo/i6U/Kx9OhpDF+5ijaWbg80gk3cUowrAg8GgYFBJ7YpePwpGVl6HrSc7sVTA\nVuWGelG0cYNLg53EcUvy4FSAxm2ggck0m1Y4wScNT2UL060BFbhutABGvyj3pxZV6flSMuGF\nNKnr1FAEn8J460keBznNAfjleKWPGOFzQBIFUnBzk1E7YfGamI6c80z1+Tce2aQDGXd1605V\nwlP8vpuOKVCuSD2oAj3noaCvzZpzfPkAYqQxgR8HJAzTAqlivalVv7xqb/WAcc+1DR7AOOaX\nUCu0iRygE4p4YySEryo71I6IyjcuaaoUdOAaYEWwyNgGnrhc7hzSbirHA59acuSvJ5oAfHt2\n5Yc9qac7eeKePlbrQ7bmGelIBu0sMqeaB+96HBpwO3kfpSPhV3DvTEGxTjNObEnQYpF5Un8q\nb5hXH1oGPWM7cE0gjPl/eGaVWOSA1JtzznmkAmCqkk0jRl13K1PDCQbTwKaflQgUgGnKrjHN\nKuF5Y7acCcAY/OkkB28jj3oQh+7sOTjr2qOOTEhIPNIPmUjn2pqwiPnH1pjF8wnPHHWlkkyo\nAXnFO2kDPamRuG/CmA6La0ft3pzxjjbwPSo26jZwO9OXcRx96kAKRzu4oZScf3fWhY2LfMKm\nVQMrn5etG4Fd24xikX+72qRxlsjnHakwG5wRR1AhkRkbpxShVdAT1HarAVmcDgiopsDIUEGj\nqMFRWbcfwpH69evpTcHaO+Kft6N0+tJgwYBVC9PegRmSQE/KuKc2HIBGRRtZlPPtSIGRt5bM\nCc0btvOahMZDA/w0k+5o/k7c5oKHNJtViccnFJCwH3xnFNGWC7hnvUm1W7ZpgPDArkDiomYy\nHavFPwUU4PFN2g5yfpTAWHhvn5HrSMAWwB8tO+VOvIpevPQUmBGy9GXpnpSMzHjGKk4VQV9e\nlO3fMCRgGkxkXYITSrG+7jBWlVlXIIyaYWZdo70hEqqE4X1pC3UlaIwe4yKROG9BnvVLQA2E\ngnoMdKcv7xRkAADrT/OC5AG5SKjKq0XpQwGhWZevFHAAXODT2jPHzYHenLtVuBn3NUBEFWPp\nzSM4DDJxUkh2t681EyrNuZ+MdKgYv+sj+U4XOamjjXYectQm3apHCkYNIjLz2IqhERZN3rSr\n/qs9Dmn+UmM9utSRkEqSOKQyPcc/NyPSrKyfLgDaQOlQSYEhb9KktyfMaRhx2piHNlmXd/FT\nSq7jnn0qRWLuXHOO1RP82eMEmkMkjCrGQ34UgXGDnmmbiqqDyScVKyktxgKOtAhhHqD9acwV\ncHrTmm+Up2pqDfwOO9IBzr+7BpI4Tnf91ak2k89sUqY2gDrQO4KoByeRRLjH1NQ3AV1BBJIq\nwmGjVevcmgeoGISREA80W8jQSEMMqBTlbbkY+hpVUzIWBp3CxHuBYE9D2p0iblGw4HSlZNuC\nwFNyPWi4hkcLRycnPtVlTtbJH4VGqfLnPJPWlJYt6mkMduzkhcCnwgOuQaQRt0fgYzxSriMA\nLSAG+YlQcUsa7VAbpTlVdxJ601/m47e1NCFjQYJB5pY1blSeKXcjdOKFkDSYxmmUhDGcHBpV\nO1fanbttMlYRoKQEg+b5jyaFYs2enrUMcx3Apg/WpWyG5Xil1AeWLEA8UOx++OB0xRkN3waV\nmHUj8KQCcBlyOtGBuOfwpEBkYr0780uBu9TQADG37vNAXdntSHJ5Jx7UpYYABqgE8sKPU04Y\nXt+VKmCp9aBnHpUjE3Ar707IXHFMjxuJHNOf5snpQGom0dTSbQ3bikwq8NzSeZxjbigRLtDL\nnHNM+8cAVIu4DORim8NlulBSGKN3B7U4Mqrz2pY8Y60u3cx9aoGMVm25J4PanbuTk4wPzo2n\nPvSjJ+XGTUkiKpZcjikZSGwRk0qE7vb3pPM/eZzmgYu7bjjiiRtwyDg0pYMoA60nEmSBg0hA\njLuK9Bimt8/AOAKXHzZxkijaNpwMGmMSP5ep5qRiccCo48qem760/ad3JxQMdvGRuNL8vOfw\nqNkXliRUm5flPbFAgXlT60m48KeBT1w/IGBTcLuOXoHYd0zgcClwpOTUaBs8nin7c8kYFAh3\nDL/KiRS2B1oLDjA4HJoDbskcc0xiA4p21doycmjaSf8AZpFjHLd/SlYLCbOoxRHGdxfP4UO3\nzA5x7U5fmyO1UDQwffYlqdnjgcGmxleQRzUijkrSJF2cYJ4p6sVXGKjZSrLk5HrT8MW5bikM\nXefSijaP7xopBqRq24Lu6U+T95GR1HaoGYEBcewpyuWjIAwVqwAKvlgvx7UkjBVwpwpp/mDy\nTkd6a+xWVR1oENEfG7dSLnvyKPM8yNgeMdKZ864IwUNAD5GV4yCKiS38vHTmhWy2xxzUpX5e\nuTQAjMQmfQ07IbB61GvfdyPSnM21MdPYUAKsasvAyTUDYPDffzVhTkfIcCmLhWOetA7j49jA\nhetQPGVYpnmp422dqSRPMyTxTBMrKjRt61KZG2kdQabyDjqKUR8c/hQDDcvQHLd6FQMzHHNN\nVTudsDGOtIpPA9R1pCB224weKkjm2oQB1FRNFubINKOMACgBXCqqK3LHrTmXavA5qLIZt3oc\nVLJncuenWmBCMc5HPakZGbA/OpVUbiFHfIozuXIHJPNIdhdwVQVOe1Lxjk9aRY2281HJHxnG\nTnpVBYkC/Ljtmns3ljGOKZCyhCDxT5sFQR0xSEVmmTzAAePepXUMoOc96jMKbScZpsQbcR+O\nKQD2JIGeBmnMpwW7Cm8TNg/KVp0eec4KnjFACRvhd3anKvzk5z7UjL8uMYNLHGT06+tMQbs5\nBFNjUqwI6ml/GnMU3A+1FgCWUtuDdulQKzNHk8D0qThoyeozSpjjNIQ0jeoweO1DOVUjGc05\nvl4A4oU7TzzQNEcZO0kDIp7sJFCnpTd2GLDgelJ95QcYBpDGtCN27t3qRTv5PToKVcbeBmo8\nt0x8tMBsgCzA9u9SKwVjnim5ywwvFKYyevTNArjPMDSdOOtSL/eFDKobjk03dtXKjPPSncQr\nsJFxjk0iqIHXAzRG3zcofrT5G3IQSAKQDJiOfc0ifuwflytMVg2BjOOKe7/NgcjGDSAbtO/P\n41FKzscEGp1UbTk59KaqjGPSlYRW8sAEEZqaOIHntTDIOfX0qSNQydaYwb5c56VGMqvPrVh8\nrjIyKa0bntx3pFCJGrc0zgZYH8KkaP8ACmrGM5xSEQ7h1BIJp/llW67VIpzQ7iSo6UhVtwDc\niqQEMeOhzwafI3mRkdDUsi/KSBj3qAZ3AEg0mHQfEgVck9qhmkHBxx61KzfucY5pjYkXGOlM\nQ2M7lOwc02RdvPv0qSJNqlgcD0qIg5xzj1pgOwGxnrSOu5Tg805l24HU9zUtrGjBs/eoHYqP\nuVeDmiFnZvlHNTzIqDnio1UIBtODUsQhY9TwfSpLd/MyrcA1E67nJ79jTkXy1y3JNNDFKLGC\nV+ao+GU+tPaRVHHftTYWB7UARxruk4HFTTbmjGeMetR8HJGady0fzUtQIFyGx1pzfKwJHFOV\ntpGBxSPgsS3SgAWQNkFcCmrjtQu1UJJ4zRH/ABetMQKpZst0pW+bnOKTkH5jxSNIq8CgBY/m\nUjrg0H5VIB+tKuApGMVHIobnPNK4xE3R/dHPvUxYsPemxqDzuo2ZVuOKNQsMYjcP72abt3Nz\n0Jp6x/NkZHFIflyC2SKLAQx5VW3DHPFSNs8skDNLzIppscYK4ximASKNq49OtKwWXaDyBThG\nANoHFM2lRjHNMYix7SQBxUijahOc4po3BsY5oDFG2sOKBCKxbOOfXNIFG4nFLuKnA4oYe9AW\nFDbY+Tz2prAbxxxTl2t1HSkyN3TikFxfkCn1zUbBsEDin8L706Rd4PagBiM20Z7UBtxNOVht\nwRUflfKW9KQ7DfMCE7jmmIwHbilaEuc/pUixjoeDQAnXnHFBjEkZBHGM0qg7sHhaN21sZ4p9\nRFXfmTGMECp8eWo3DJNHlc+5p5XgA5OKA1GOAGHakRerZp7bWkyaQRhmqQEViMZ5Bpf9XJg8\n/WnMOQO9RswVsMDmqGPYjaeelRK2ct+VOdgGG35vWjyztB9+lICNfmb5upqdMeX2zUZQM2am\n8ofWmBEcL2607IHBpdvytmkICgbuKQCogXNKrAMSelOyFGcZBpBhW6c0AGD68UZ24xzQ7gcd\n6YCyjOKAHSIN444pGwueaUMW5PSl8tW64pARbenrTmQiPOOac0eGyKXccHPIoAhVd6nPGKjk\nj24yeO9WtpXDAggjkVE4EinPY0ARbgucVJGxIG3PNJJHuXhe1Mh3ww/jTAkkX5jgg0ZBG3bT\nfMUKcDPOaduBGaB7biFdq8HrQflj60gG7Jp0n3MY4qhCFd2DnNI2Gwq9aI1bOBTjCVbIqWMb\ntKtkDikYlsE8gU9i2KZndwKQhNysgBFMc1NHHu6ikZAxx0NMBm5eQBwR1qNhtAAPSpPL8vvk\nUrKAvPXtQBHKvAx3pjLtABFSh8/epx2suTz6UAV9ojbcBmm/MBuIwDU/lAnOcDvSyfMuO1AE\nS88Z4oZR93NPVQO/B4pzIN644HSgCuu5vlzipBGe3SnpCNxINL93IHWgY1ee1JjqRkGk3dCK\ncrEd8UwGt8ozj8KZtJXkYqbaGbJpFVt3qtIdhikrjPSnbdueOtOZhwMU/bkk9BikBXZdq880\nz7xB6CrO0ZBNRNCwbOcrVisRMMe+aU4z7VIyjrTcfKMipYiORQvGaYD6mrCqGUZ6010XjA5o\nGRbd1OZQvzdaApDcDin+UzL60CIiDjgcU1V9OtSFHU5zim8dT3pgKAD16iheeMfnTolyOaNp\nxzxTARsZB/lSM25jxipVA2dOaay9WPFAEYUbwBg+uakb5TjikRQq5HWlVTuycE0gIzCfwpv3\neAMmpgcseTT9q4J70gKwPUGk9qeVO48ZoY7V6VQCRg4PPShcnPFC/Mc9DT1B6g0AQgDfkH5e\n9OOGXI9aft8wHIpUwqnNAEMkYDbutM53Ae9TZB6CkVPlz3oARs5OOaQbivHBp4DKpFKvAyea\nBoRuMd/Wk6cgYp3v2pOXbpQIj3ALjHenyAY4NNMg3H5eKcvYUCsIsYzk8UMhjJ45p2z1606R\nju9SRzQBEFBXJppjPrxUqrkE8U0rjvQBCyHaBtwc04Dgg5qU8980i/Kc9R3oAQKNoyMmkwNo\nwtSp82DQw6iq2EMpVwwx0IoA2thqc2FPtSsIR0AwWGeKjXC5yMelTcOuB+tG0E/N2pjGhRgE\n0P8AN0p4j5/2aTHzHHSmA3ywfc0hj+UdsU9vvZUc0h5UHrQAbiTnpUbLhvXNSKemeRTpFC45\n560DsRsT0HYUwqSw+WpldTx0NIwLYz1oENUMpAPSnGMc45oHynnpQGKvkdKAItpY4xUgAwSe\nlLuyp7Uq4brQMbjHAXmmmMbueRT/AMaQK23BGaBMh24YkDIpf4eAakVSuM9KHPOBwKAGIozk\n8mpljDckf/WqLac5FTqw2cdaBELw9Tnimr8o54FTbc9elMkO3tkUDE8vndS7RjpUnLKM8Gmk\nHbQA3yycmkX7hB604ZWhsMxIoEM+XIyKdtG00bO9NO/tQMTcQAKCvoeO9OUbhnH1p0ilsL0F\nAiNl5HakZPm4qcqOh6etCL8x449aAK4j+bilC7cjGak27ckHnNGw7sgGgAxtGW59KaM7h6fy\np+3HJ6UHA5B69qEA1jtGOtKFHY0gXJzjA96cqjtwaYhfLO05ODTApKehpxJ5zSL83txSGN2A\nJjv3oVht5qSNQ2OOPegxheOtADJGO4DH405WPfkVKcY9aj4XpQBH3zjin7SzcHBoUl2xjFTK\nu5s4oArD5urc1KrELgcipvLVVzgZpjNt+6M+1ABwSKOC2OlLGvy56GjcccigBrjCkAURnoSv\naht3TNSKTsGRwKAI3Y8kCk5bnqKmGFfPUelIyhVxnANAhGZcf7PtSHKx8LxRlVXpuFSb8xhQ\nKQyOMBlzS7Pxpy8L0pdu0g5pgKowvqfSnSbl+7zmnKAeR1pvPFACBz1JyPSljPzHbx7U9lCq\nF/ioYAcgUgE3Do3WpI1O3kgUyMLJzintl2AxxQgZMq7mGOneoUQrOfSpFcqAKljGeRiquIjZ\ndvBHNPcK6g9DSSKW5JzTYwJBjOKQxjKN3y0RqyknbxUpUIhA5OetIu5lIzxTAT73XinQqVHO\ncUBg2OMGpcZQ84xSAY33Rx3p7IrR96d8rKCTkU37w44yeKAI9rLg9vWnLHu5J4pSWX5eppOW\nYDotAyVVDcE0m3nGRmjaHOV4FKVXrnmgQwZwVJxSqwXFPOPTIqPG5+elICTI38inRxBpM5NC\nqMAj0pFBDcHBpgNAJds9KWL5sjFSMVz97jpSKhjIxSAUr0FOdfLGR0pGzuB/OmNJ5nBznNAC\nspkYc07eMcjkGj5eGHSg/Nk0dQG8cnrmlGN2e3pSop5NIpGeRz7UAOXGCT+VCghSc8U1W3MS\nwp7MGwOlACKCvSkaRguSMk08cMSDxSrls56UAQqpkU84NI3y8Y6VMVCjI60xgStMBilmBJXA\noC5+bNS8eYUxmlMmw4xgUAIVHXFNXaRg9akGGy2cjHSmBQV9DSAFCgZPFJw6/MOKcu0qQeTS\nhQ0fWmAxUI69KZ/y06cVKFLZzwKjXn7vPNAD1i25fbS9ssMUvmncA33aSR+wXNADOFwD1p20\nYyTgUbflJNEmGVQelIBpPy+1MLGRcEZNPZNvQ5FJzI4HSgB8aqqg4xSAEls1HI5TgYwDS79y\nmgBzDauM5zTEUR8kU9I+M0rZfjpigYmNpAz704ozcqOaYse7AJoZWDAbiPegkcuTnPJpMEqS\nDg+lSbmwGYYNR7tykgY96BhtPlsT1PShVcgemKRnYYyKWOQMhwD1pABBV+D+VB+bNIrDn+9T\nm4XnrSATaEw1MkBVTu/Cnx8gkjjNDMG5amxjVYrHuHHGKjaRgoB696fuDL7Z6UbTycZ+tFhE\nG4+WVPrQqt8uTgVJt2ocjnNMZv3i8bs9qQEsMaqxDHIpnR+OBRv8tCGHJP40gPljk7qYDsh5\nCCMCoXlCbsjOOlS7l2kinNskwCuBQBXhb7QuT8vNWIxvkKAZqBV8psAfKT1qRSYzgH5vWgBG\nTaxI55ppk5+bOAamztXk81FywPy59aQxyyKMkd6dJgqHyM01Qq9VyKb97t9KBDo97LnOBmhc\n7sMKkb92oQ9TUEjkEHn0FMCRZAuRQrb1yRgU35X46djTyNvyihsBDtZh2FKpAkGT8vSkkbot\nMbJjxnJzxSuA5gGYkdKWNRIpUjHPWmF3QYwAO9SpztCjk0wI2DxZAHy0sLfxEfJSvltysc81\nII1aPYCAaGAzG089D0omb5gqcmj/AHunSkjkSNd2CzE9KGMkUA5Lcml3HkDgYpgb5N459qUS\nedxt24poQ/8AgDLxSouByc5pC2E2kcZpWGxd3agYu3bkMPpUrSqYcAZaqyyEcsc+lPDEDKY3\nH+E1I2EUYXJbgVKGAUkDI9aRgfLJx17UW7eUvltzTEPWbCrxntTwq7iMVEy7ZBxgZp8r7ZAR\njFAEkkauoAbYaiijWHOW5Pal278EmmNuZvm/OkNkmcY5OBRHJ8u5Rx2oVWXAyDTlbbwcL/sm\nmkSK26QhWOBS7VC/NzSsCeeMUJjacj6UihCy/d6GpkO0crye9QFyh+ZBmpVYyJnOPQUDHNu3\nY3Gl+VV5bJ6U1o+hzilbCttJ685oYAeoGMmnqBuAzgGmxMN+SfanMgZc9D2pdQEUrHncO9OR\nvmJC4HrSDch3FcinR5+ZiMbhTAVc+X05zTSgbO7vxSKzKcHkYxTxnbu79qkCH7OVwu3B9asl\nsLik2hvmY/NRgPgdMUCEJLE/KKVgABzg0vQYpqKsm7d1FIYA9j19aFbHB6UKu0DNHH8IzTAX\nb68ilQpu+7SDLfe+U0qkbgp5oAAu5vYU8qB0OOKZJtV9uOO9KFBIx90UAMUkNyKlVTuyeRji\nkUDJ53U3nd3FAAuN3zCiToQOnpQ3XjvSKhDZPIpABbYozwOlO2/N97imeYrLzyKXd7c0kxj1\nbPBGKbypz6UoIZOnzUqr1zzgZqrjQksny/KOaZvkC/L8tPEg+nFISWU0CGLMG5PXp0pZD26j\n1pJJPk2qKj3N1ByvekFiXKnYR2/Wpc7z6VArDYBjNPUjOM0AP/i60eueKdKyKNx447Cm8MuT\nnpimIFQFMZ59aR1LY55HFCtyMDgDFByVOBjmqGIRjjGTTtpDcjil5PA/GpDjb96pEN+7gbqU\nx/KMjk0HZ5fI59aRWO3PXBplCqArHA6VJncvPSjzBtJA60AZU9qVgGNik3Fj7dMU4Z257H1p\nMBelAAshK4Axjrml5Ck/yoJHWgr83I+WmMI1zGd3WnLyw29MUsmFTjndxTGXGGGRimA4qPvd\nqapIz0ye9P8AlmTB6UxQI2XOfSpFYfg7lyflpY/uszjABxQw2++aRN3zbulAbEuB60VXyaKQ\nxyEKx5GTTWX04PrS8MqnGMikZgE2g/jViF6qc5ximM2EGB83rUjN8oGM8VEdu0be5xikSMGG\nYjuamYDaARiktYtpZiPu9M07cGz3GM5pgRPhZNw5oLA4A4NH3VB9TShiuWAzRcYYHAPFNOV5\nPI6UyR965A5py5kXI4oAfHJHsK9/WkYjgDlqR9ir8o7c03eG4UYOOaAHNJ83HHvTGmLDAOTQ\nsv8ADUaqfOIPApiJY5AwyKRnMZyehpEG07QO/WnbQzfN8wFMBOdny9acqgck8ml3KvCDjvTS\nwbHapAYvGc9jUm7fHgdaTKnIxQkLLhyQVP6UwGqQuVK4pnLE5PFTSLtXnvVYofOBzlB2oGTD\nK4IGadnABwAfSkDhM56+gpqkMxOMUhEwJbH93vUe7OW6c8Ui7gp5yKUt+7Ubce9O4DeO/c08\nEHvxTONwB5FP8vj0FAxjMI/u09GEmaXA2jau7FNWMtnbxjk0CI13NKSOBjpTtuyIY+9nNDb9\nuQuKUqWYZOOKAEG5OGO8tzmkVmXd2oWMYx3FMkZWXIOD0pEkm8MoOMilbAxz1pIYwcLnApTt\nLYAwtA+gyFuoYArnrT2UKxPak2/I2OBScnbzxQSOaPKk7uKYoKqc/eqSTAX2qJlKtnJoKRJJ\nEXCgU3f8gA60BiVBbPFDfKvC5oAYxKYAP1p20jvRGVY8jFKS277vFADRk5weaRcvnnApsikc\nj15p4XdhsYxQMRcZO44NN+U4C9aHbnDDNKv+r+6B70WAkzhSQOKgaQMCvrU4UsjAHtUdui/x\nj5u9DAiRdsZ+tOg2fPub5scU6dsNtA4xUEcQXLMD6UiSzH80QOeKikkCybQaVcYw3HFQyKVw\nQQ1HULBIUb5h+NSW+1lBxxSeUNoO3jHOKZu2+wHahjLjKGU4PIHeoNzfdzkGgSFo8jjNJJNw\ngC9OppAOxtOc8ik3GQ8DApzFdnzBvZqdD93djimA2RfL5J7UQ47CnyNvU8VAxIj4PNAySV1A\nw3OelZkd5m6MbDkVcuDuUcc1BHAqyZI5PegCxMyqq8cGqiyPuYBflzVm6UsoI4FRNlGGOc0h\nWEVjtOPyprbuCOeelO2MvJGKeq5GSKoBsjkr0245pLFtzkknPpSNjnPSpbWNeWP3qBkVzIX6\n9jRCrMhYn2FEkW1zzwelKAVjNIBpUqcfw0xmbkEcVYjkVveotrZPGaQEasN33cj1oY4/Gp9q\nJHlhxUbRhyBnin5iGhgPu9KduXrj604osbbdtMJCk4ApgIeWJHSoyM5JGRShirYbgGpVjBzg\n8UhIrrGrLjt1pFYKTnpUrALntTVCsDxxSKBsdSOKZtH3sVJt2j1FNLAY70EsUNuj4GaRU2nl\netP4Vc5wKSOTzOSMe1UNCQ/K2P4aGzzt5GaVvlwB61HNMYW+tIQMzIRmlb5mBxik3eYuR1o8\nw/dJ5oAbtP4U54yiswORQv3sd6ecsD+tAEUfzY5wTTvmjLMw4pJF2sMCkw7LyePSmUPj+YHJ\nwaYWDLtI+bNOx8vI6UCMsc5A+lBLGso359KeqjsBUZkAUqR81OVd64oGJtAc80m4N8tP4Xnt\niolXndkj2oAesYC5IzTFRsEgj6U7JHABpNv74ZGaBiL6HgVJGCPfmmyMPmBHSpNhaFNvWpAb\nsG4sDijy/lyeTTfmzypxSru/i4FAEbffC8YokUM/TpSzKWkzjj2py5YdKAGthmHtTWkbtzzT\nlAzSrjkDg0DQrsuzheajjBJyTUmAeKbtCg5NIQrMWbOMVG4HVjzU0e1T61FJj0pgQOrcMORU\nqtuPJx7Ui7h2zTmj+YEjmgBu0rwfwp4JUdacJDuOR7U2QlWHOQaLAGB1zmmtmTGalaP92SKZ\n79qAADy+D0py4IJJp0i7o846UxV8xeOopAN2jzMEZ96ezBmxjpSFfkzjmlCjsKAEODkYyKYz\nBR0wKnVOCKjdRtwRkUCDomc5FJkMoxScqMYp6kbelMYjKMcZxTEjByOtPC8YB496RVKtz0oA\nTcUbnpStjHNG7gkikLFfemBE0eDg8ChV/dYzzS/PJ7CnL93p81HQd7kEb7Sc9akHzADPNO2r\ng5p+wKQc5GKQCKTu2jg+9P8A9Zxj8aOD25oU9VztosFhnyqDgZqGNSWNWFVd23dkmk2qucZB\nosIYp+XrTfM7sMH2p3AXgc0SL8y5FDAazYHIyaVU3R7jTlTrzyac2VQAjIp9QKkifLzmnKvQ\n9qlKmQbsU1cnjHFAELNhjz3p6tnHpStEGPTmk2lTyKQCsitnFIudvPUU9l3RkgYP61H5LFup\nJpDE3Bcgd6dtzxnBxS7cjB6inSLnkHmmIgYbSD1FOVVPLc/SnNgfKKaqlvancYckYx9Kk5OB\n2qPLI3NSK+4YA70hjGjBkx3qNSVYjtU/GSWzuo27l4OBQMbIPkBFN5654pzYjGO1A+704pok\na2GHTmiRNvfIxUmwNTJIyMEdD1oYiJRkbhTetTfcXHemn7vTBpW1AbHGV6nIqRemQaY6lfep\nIWVVzjLd6AIm+YnNMkUBenNTspJBA4pjKc8jimMjTdkYNS4O8hhmgbdy4HWnZzk9DQIi+72p\nen3jkU7buOSaSSPdg0wI8eY3AwKeIwtKeMe1Lu2ru70AQtGWyRxT1UhTn8al80tjIxSMNw44\noAgbHXOKbkFalZMU1odynnB9KAIGU9e1Obcvb8qesZxj0qZYyy+9AFfcVXFAGVqVVXcd3WnL\nhVJ7UARiPavIqPO1qnZQFHzUx4wWBHpQAw5bBFNyynpmpUX60kkRPO7AoAbz1HNNXdGScU6P\nK5GMjNL1YelADduce9NKlWHtUqLu68UNgNgc0EjYyHZjnBApdu0jHPrTF+VyR3p+4/ePSgLD\neBnPApNoZc5p7KZMEHilWP1psoYgG3HpSMo9Ke1KqtjPSjqAzb3xinKAy+9PdRtHFAbaARQB\nC0bd6Xy9wyelSuzZ+cU36nimwGMwVfSiM7uetOaMPnihVYLtHamKwqn1FCgtTUJxyKcvB4pX\nAawP3R1o3DjjFSbDknoKYykqSKWox3HWmMp6kZpVUjgmnsCyjaaZLIPcjmnF8896kkUMAM4P\nrTFIxjHPrS1AZuC5ZvyqQEsuQvFI0ecAdaVWKsc/lQIOGOegpVXapJo254HFJhgcVQxfkCg5\nobLLkdKZj0H1qX5uOeBQA1Y93BpGSpCwLZoXpyKBDVwOOlNb/ZHFPMasvX5qVoxjI6UhkO07\nsZpTkYB5pcHHvSMhHPWgYN0J70ijPfNOVc8Z4pqod2R0pAxduPcUKp4I+7705iSMDrSSArgd\nBQITvz0psy7myDxUjDDDnIpGH7snHOaaHYhdfLIPapFdZGNHlhv4qFi2bgB1piHsMgDrSv0z\nnA9KSNWjUE9cU7AZeeDmkBG6kLnGTTkkPl+/pT2bnK9KMYXPekBCVJU+9N3EMMjIqYcA560b\nd3tVIRCVLEknAp0eR97pQykrgnmnqvT0oGI3yqOM+tLtDD5etLt3A0qpsOc0CAr8uBximnA6\nc09cdTyaRVByR0oAbgLzijqBkc085weOKQZXmgBv3eMc0pykec80oz949aerArjGTQA3JZR6\nVG42sCBUo+VSBTWzgZoAV2AwD3pM9TmkPY+lSqNwzwR6U9wGcKuWXNOjYFcZxmjd2IJFBxuy\nVwO1SAMoWQDvQ2Wbng03jdyae/yrkHNAxI0Jbk8U4qRupY13AelPY47ZpgRbtqjPapANx5HF\nIwLR46UgLNg46UEgzFc7RS8rHk4zS53cY4okAHvQUCZmUY5NSLGY2OabGQvKkVIDv5zk0hCH\n5V4NHPKsPyobG4YHAqRQW78UwCMggZH1p+PnypoK+XnBzQuOuaQAW2jJFMOPMUAc9aeuWYjG\nRQw25bHQUwFZlUnPSljjKrntTVYbASM5p67umeKQhNg3A/jUir82G4o9hSN8xzimMRRtDKRw\nad5e5eGwRQqlucYFO460DIWZlk9aeucAmnIRuOaOG46UCHN8q5HGaZt+UEU9RuYhuD2pysFO\nCM0AMjbaNp5yc0/5fmyO3FI2A3pSp8/sKAE/iGDgYpdgZsk4pVXkk0n8JOKQB5atkD60wsyk\nEnipY8bMng0xoywzn8BSAl3DgH65qPHXnNPUb1CkVIFCkjH0pgRbSIxjrSqy8DPNSMo+83Aq\nNo0X7oyPWgfQf5gVuBkUcHkCo8EZwc1I3y4APagRGVO4tnipPl3gkcUx2O3gZNKG3RjjmkPo\nKV4PpmlSTa2T92kX0pGbHylTVCHyYzkDikX5uB1pI+AB1NNK7mL8igQ9Mo2SOaRvmo3sSSTx\nSI3GAc0DE2jdgZxT8gcZyRTfM6gClUL170AO+VOvelYKq8Hk1C0R3Z3cU9VAXH3vegkVmPk4\nHrSLhWJxjIo5/h5p8hDfKfvUihFAP3hSMCsmegpu7J29KdIpZfegYMvOc4prR+Z9AKUEbgD6\nUm7bkUtQG7WA6ZoU09pAuBmhcNmmIiEYJ+Y4FSKoYcU1lycMcntSmRV24BC0vQAU7WINDSDb\n70mcksvT3pYyoYlh2pNgOVQq4PNG47h6U1ctwDzT0UN9aEMawbBJ6U1c7RnhakYncc8imKWk\n/h4qhBvHmDjilVginjHNJztyRhqTY3VulSAxU+bcT1p5XODTf4Tip42DR5xgUwIy5UbeMUvE\ng2kVFIBuJ6ZpVZtvFDAkSFWUleop7YKg4qJ8hcKdv0oBbbg9RTHYc2cnuPSodu5gQcGpGQ9m\nxmodjLIBjPP3qOgEm7OW4PaosDcOKesZ3NzgUu0r1Gakkr7gAVHXNSR9cN096heMxsT13HpU\nqxkqFJ5oGBUqzZIKCh2G3cAakki2r0zxzTF3eWM9KBg69GGScUbh5eQcGlDMny9fem7M9/rQ\nIVpdy4x0pkeWbrgDqfSnRgMT6U/cvkttXBosMjjJmctngetSHngrTVjaPl+C3NOLdeeKfUCM\nRsJBgZqYyLye4ojYp834UxNqq+/ls5oCwi5b+GnbduKQNuYcYpdpZhluOtIBGQhMls80IwDb\nx0xS8MxHamKCq7eetAh2FbPOG70qsuenFPKiTBAx6mmNGFO7OR2pagK6jbk5NIMthhxUjMOB\nikdNvAagZXGTnZwBUqqXjJ7iiPC8Uq/Kxx0PaqEJFKF+/wBO1WfOBHzLgVWSNpGA6iplhbbl\n/XpTAYI90gqZVC/e4NMddjA9RTmJPUZpDHeZt96kkYKu4KDxUDfMcioysk2FX8aAsTQzeYP3\ngINS+WJFx1qDaysCOg61OpIyymi4gZfu+o60skgZRt+mKdGCykAkNSouxckZpDGI54BXLChs\nSNuI5p7R/Pu6ZFI23bwOlAWFVc4AGeeaevO4Y2mnLIjfMvBxSKSTlvzoGR58whT1qRYzHgAb\njRJ8sYbHPtT4/mYbuB60DAo20jIzT12lMkZ45pJFG7g8dabIw2lelJgUY53jkYH14+lXreTz\nFwxyfT0qLy1ZckYaniMKARwaQExkUrgcUrKBGMH3pqHcxUrkYoUhePSn0Aco6HPFOZRzzg1H\nu+bodo5Jp7gtggZFIBxQ8DGR603ovTn1pWzygNN37OOtABI23B65p23PHtRwMcZokbaOKLAH\nU/NSZKLu25pZTuwo5OM0gJkQDOKLAKCrqN3ytRIpVuDSFQ5FNYZPA56ZpDJVIwWPJxSxsF7d\netD7VRcnPFMbDYPamA5cNuI4OaVVLZz+dAUL06d6ar5YgH3xSACw6ZppJ9cCkPzYwtMVPMbB\nPegRLIwjUFVz7ULN5xJIxQI/MIY8Yo2hc46UJDDd5ag9T3p+dxb3Hak2hWB69qaqkPjPI5pA\nAU54BP1oZiD0yKfvxkmk3Ky4FJCGLsVvnODSM3B2mhoY9yg5p6xorEj7tUMjdiu1sZFKsise\nOtKZAAMDgnGKY0IwcNz60kKw/aHPJo5K4BqOImNdzfQVNGgeM9mqgHR/MvTinbQozSKDtI6Y\npVYHg0XHqEff1xml3eYq5HIpsbdR0NPZuAOlSAMqsMNxSLiNcDml2kNycinRt5gPHIpjBfuj\ntTtpY1G25vmqQsNo5zVAR5LSbMYH6VIRhcYxQpC4IOaNx8z5uQaBjRz2qTd8w44pCvocUxVI\nHNFxEoXc3zLj0pOFB4zSEtwR0xSrimMRVJ5A4oZSwBx0py7txx92nLjBY/hSAZztHfnmpFUM\nvJwKTcNuQKRWDcsMYqQF8tPeim+atFAEWG4AHyiiPluwHWneYfL6cVVWZWkKjJ96skuK53Hp\nUDK3LZBC807cV7U7jGOg70hBA52bjyG5waRSDkAZ57U0rkjB4zSjCk88UmMkABb7o6dKjK9e\neOmKQMFbIpGkAX5T16mqEM2rH8o5z3pW+UjHApI3Xf8AN0p0kqs3T5aADcu31Jpm0Mpw35VL\nDt3MRTAu1icfgKAA7eGx1pHYrH9aeZFUbcUm1NpDdaYDGYqoBORTdwOdvJqR1G31FMXH8Iwa\nQC/LGvPNIF2McnK4yKVQDgY702SJkbaOTnvQAsjbcEDqKch29c0q5PUc+lNk7se3GKAHN84I\nzioljKoWPapG27iDSou+M88UmMrqwZjxx61IVZ1G04OaSNcDHXB5p275jt6CgQ8Asg+uKdjc\nh9BSfc+Ung807A+6DwetMCBV3MAOOadJnkHk9KcxSNgSp/CmuwVuctz2oAGb93t6GkRSFPOM\n1IxVlyBmo3kOB8v5UAPLbVUsc9sUi4EZGOSaGUbd35Uze4bgUAPc7OT0qGTY3KrjFSSTFl5T\nFMd1XHoaQCW5JXngmnRyDzipGOOtLxIvHFDDGCRkHimA9R+IprKAOlOztTA61GzYZW6CkIa0\nnltjGfamRyFiVbnnp6UrEsPbvUTKUyV5pjLO44+78tOB/dYHXvUUW3aCQQaVp9v0pABJj5PI\nqQsrMuT2qFtsnIPHpTwiqoPekTqMbIG3tmgtjjtT8gt0prMN/PWqAZu2qSfwpwVnUeh64pGX\ncOfWnMWXheM0DQ3cyuFJ4FQhnaRyQAc1Y2szAcY60MqbjQwI0Yc5HNOJ8z5TwtNOwN1x6Uis\negOaSGLIpI5HHtTPJ3Mp4+lTR/PxnmhlDAle3egBEXDEDoKjuolzk4qSNmHGOc80kqLj5uDm\nmBFChXr68UbvYjmlDeWRghqe8g8s7RkUgFVvMbDDj1pytsUg8DtUUeBgt0IpZMZ5OfakIJTu\nYbeajZvMOB1oDBVYjhj2qNNx7Z9TTQhHyuFOetKjdz2omkCjnmkXHl4xzTGh6TIzYP40NiJf\nlORmodoViT1pwbcuNtADZY9x37uKRHDYAJqVW2rjZkVFIoDZU4FBJJ5GWJPSoyQpPO2rHmJs\nC9GqvIDtYAZ96ChUUMwOdwpc9eOBxUW4xogI57VIrAtgtxQAsLbT047ipJlC/MvAPao5VCkF\netRzTHoP1pB1GZZww7D1oh6YJGadtZVweM0kYxuOAakZLMxJzxjFR/KcEHJp/wDyzPOPaoCw\n2ccGmTYlmyzKQM+1DHHB4FEEgXl+PSjl8sBu9qAImKyZ45pzLtj4GaVcbjx+VIGIkwBlfekU\nNbJT5aiUbTzVgsIzyOKYpCtyuWPTNUhWGiT27Y5pittHpT5D+7I96YflIpgPU7iDTbiPo56U\nq7cZJxSsw8sgnJpCIFnDZZcDFS7RIucYao47cDnGRUoYL0HNAEQ3KSW7VMwwOvWjaGX5utKO\nwPIpgQyMDjHSnrt3DHWnyYwWUYFR8j5jjNIeo5jycULiOPI5NNGWbOOKeNuwjHNAEXDfMeKF\nY8YHfmn8bcY5phB4OOOlMY7c2CcYpMFh6GnMpwD07UFNpAZsmgCNGO5fTNTNGGb0OaY6HHTp\nSqSr7s5z2oAR8c8VLGy7QR19qjKhj6ZqVdsQOBk4pARyMeRmo+WxmhmHQ0RhlyuetAD2bIIA\n4qPnAx0pxwPlPNOGVXGflpARM23APWgewyO9K6BWAznimj5QeOKLAiTco4A59abtHJNG0qOv\n0pC20ZagbBfvE4wKXhlLCnbCVznn0oWP5eKBDN3GabGzdSPzpyMQ3IpzfKrYGc0gI2Ddc5ya\nOvWkDDaDUir5gDdKYxVJX3FD4UjPShv9YADxSMBgk0CCRSpHzcUqkKfTNIOnv701c556UgHS\nNj3FLHJ+75HJpkmQ2AKUKO9ADgx29OaG4X270uwEnacU3n7pGaYCJhQCT1p7Fdueo9qJIxtG\n4/SmspjwvrQALt+9jApGypPensV246jrTPMABzQBGWHQflTmwy8daNo4OMUjgsOKAGc9BT+V\n7ZNLt+bPTikVSGbJoAbBnDKetSlcgACgKFOehxSr7GgYRr1z1FMVd2TU6MF5YVAWHOO5oANo\n3gqMHFM8xmbhcfWp9ucdjTJIyrnPU0CGHnqMUkhbzAFqX5WXGOfWmrGfWkBHt7s3IpX+ZRls\nCpNoGRjPqaibZ53I6UxMf7dvWmSRleQ3FSbgF5zTOGGKBjVG1qAm5ie9PZeOORSZPcYpAM3Y\nOf4aUPt+nrUhjHXtUUikDpxTAeqKF5OTTPu5GAR605VG0Hue9P8ALXHHNMdiHyx1HJpRHu74\np+09VXIpsjEcYwaQDdoPXnFR8Djp34qROV96SRQ3IODRcQqrub1ph4Y7TkVJGpX3pyoDkjAx\n1oGRSKGXPSk2ngk8VMYw2ec0hQtgelIGQhTuJzxTsbuSeKk27V6c0wKW6naKYDGJU4x+NIvP\nJ6U5trE54qfYrIKY7FZU/KneWVbmpV2q2TzTJPmXIPU0EhHy3WkaNmYjtRzGwJOMdadvVsHH\nNADGi7DtTfLxnvmpixbrwKZt6nPSgEN2jkZ+tRnO4AVNtXYaav8AexSGIu3qRTVUtkCkbIBx\nTkYccYNMQxlO7J6VJGAVOe9KVDE5IpMjJoAFXIIpuN0mCce9O2bec8UdcHrTYEOzafenqDt5\nOKXaWJY0xhlfmNCAY+Gk4p0a/KAfWpFxtBwABRnd2zTAZsQqcnntSJ+dO27SxYDmmfd7YpAC\no248/LS4WReSQacMUxnUZx1oAd5Z7ce+aZs7A5qQ4Zd2Kj+XaSODQAnO7pxS+WeT0Bp69V9K\nG9jmmxEIj+apGUbcdqfw2cDmk6qAakZHGoVsE5FSNH6HIpzrnBximbTjggD0pgL8u3nrUe4t\nkDg07aVPrTt23otADCw7c+uaXjqDUiru6jFRJGVzxQAOxbBxUbZ2k1IylkwD3pGX5cCqAjjY\nsODyKmDblHY1GqhccGh/9YFCkUwJHB3cikb5RnFSk7lyDUfPAPNSAisep5FKSOMU9grYpgUL\n1GDSEJ95unFOOF+UdfWjnqDShenGPegQigqoyc0zaQ2e1WDGJDn8zUTfKp45pgR8tyDilVep\nHUUvPcUuSF6c0XAbt3MD604oNpINJ90c5NLx5YI60wCP5lx0pFJXg85oVT3p6/MOmKGJjG47\nYpN3yn1qbnGM0wcsQcUmOwxsnkDFKxAxzx7UvYgDNNwDjj60AOYjbk00ZK7hUmBtzjNRqDux\n2oGJGoHJOaXnadopVx5nTjvUmR1H5UwIlUht2adIzNj5eDSKQCcDjvTm+ZeDQSMI6A0pXPOa\nGyF96Ow70FCqN3AFKw6DPNKu6MbiODS7AzB8/WgQxge9JuX0qTO4k9RTEUN2qRAVx0HFJtOB\n6VIXxwOaM7W+tMZFIDnd+AojBKnP4U/aB7gUoTndnI9KBjMjuMmkRsxtxzmnNG2Se1NVtq5/\nOgAJ24yeKXdu7U5dvXr3p64fkcCkSRbjnpgU5eY81I7D7uKarYjxjNMYi56Zo9iKOB7Uv+sX\nAPINMBu0sfQUqKVJPWn9I8EUKQvJ+6aVwEVdy4PB9qb5eBhjT2wmMdaSRgx6cUwGABWx1FOZ\nQORxRuHpxRIpZRtNAhI8mTB6HvT3x0zml+ZVwMY700HnjmlYYgjBye1P4CYIy1N2dD2p/Q7s\ncUrgJzt4OPanKwkXBODRjdlqTcI1zjmqsA5lweORinLlVBNLu3w8HnvTC26PgcigQ6Nhk5GA\nP1psjiRsAc052DKMilhxuOKYECxFGxjirMce1hxxT927oP0p0eWJBpAMaLrzk9hSx/cyRilY\njdSycjgcUAOdQy5BpirjrSqw296OckY4NADgw+nFIrbs8cEURqeQTxTtg8v5TzTAiDN34A4q\nV4zGFyetNC7WBfmpC3mY7CgBVZQOak24UH1prLvUDPFDErkdBSAPurjPFG3C00N8vIp7cr6U\nhgPuZo4bnGDSfNsp4/1eOM0wYN90k9KbERsGfvVJ91gDyMZpisZJDnpTEPxuycDNM6HFC5Vv\nrSr9454oAdE2GwTkUpI5I4NIFHHGPemls8r0zipKB1+THJpR8uO9O3Hdg9KcuCMYosIVc/QU\ncnnOabypwTxQvy8gcUCJWxJhe1Mz5ZIxkUKSVOeDSrwCepoGCNtbjAalkbpuHPtTFXPPcmnl\nGXnOaBA3rjikYhW6cY7UeYyryM03dxQPoIp+Uk59qBIZOT1p2R0601PlYjHWmTqOBK8qNxp2\n0tyR1pIwYzxSyMdvWmMbtxnd0pRGvK9DTtw5DHtTFXvnP1oAQRnB5pFz07VIy++RRsIXIHFI\nBNgYgZ4603hdwA4zTlAFLu8tSSMigBI2I+Yc47UmRnJHOaci8nB96a/zZxSAfhfMUt09qRss\nzc4qNZlDLkU6VRyc89aQwj2ry3Wl4kbPFM8vu33aAvy4B5pgNEO5juPNNIxJtXmpfMVFO4c0\n1WG3I4ancQmd2WHXvmkXDjIbAFP+VQc96UKOFAAFFwI5pPLXodvqKZHKJWAAx9atiLaGIOPS\nqywrE2RgA1ID0+U88kmpGyjZGMUxfQ8GlZtykY6d6AGiQtk45zT93lkEnC+lR8x4J6VI+2RC\nD1pjGyN5ilxUa3G5RtPPpT0j2g56AU1YwCCMbsUAN6cmpjkQ4zTFwrAEZyaR87iKQhwiHG45\noLBfYdqarbfmPTpTdgDA5J9qAHhN3Qk09lPmDFIrdidoppB5G7mgBG+XIPWm/e5HGKXcTy3N\nPd1PRdtMCNlTKkg5zktQWG8kfnTkzyTUXc+tNoQ58bTn8D3pj5DIORnvSiNiwPUVI0LMOTwK\nkY1d7Ly2eaDjOKIWKqSBSthl9DQMi2MGGTSqu3cxNKp8znsKZI25toFAMkhx5ZNOb7oIpi4X\nANOmO1Bs5yadxC7gcF+cHNMQbpH44Y0rL5mMEZp3ClcikUIynzBn7tRuqtIQTzUjvsyNuSab\n5fmYI4bvQAsajrjFK2McDLU3a0eRuzUy/KwYnqKQhu0LyPTpUUjFeV49al2lWwT1puMxlsd6\naCw5W3xhc4Hr3pY1XgE9DSKo2+4GaRcqOOc9abGDYXPBbmnIxVsHk9s0bju69qbGcrhhuosF\nhGbaxyBvpd5WPJHXpSRqNx4wKfOpKxhSDhs0WECjarnv2qWNsRgk/MahiLfdYfjUhA3+wFAg\nHzZApy5XrzQsiEHg59qazHO1lOfWpuMNu5Tx170kbeWuMU8FlXJ5XvUmBIm3GB1oKI2YLESe\nW/u0kbNtz2PalZdqhSKWJBG2SevQUxE21uDz0p8UgLFWpN21Tu6AVFzIRgEe9OwyVz5a5c5H\npToslsYwMd6JMbcdcc0NmWPdnvQAqqY/l6803+8oOCKnjKFecj6VG5V2yF9ulIAWTd24p7YK\nkdKaEycEYpGUiQlfpQA+QgKoHfvSSANjjmnMpyM9KQMOc8UmALjuOaXgqCaOB16VIsYxupgL\nGdi5Az9afuXrtzmo1kPJP3aI88849KTAcy7lwv1zTUJHzKeaXa0Y5OFpFx1BpAOBAYZ4PenM\noJwePeo5AW+tClm69KAF54xTF3eZk1Ivytto4LDHSixQIDyTgU04VQAOTT22rknpR8vWmSJQ\nG2kAgc0K3O4CkY8ZbikwHbQ3FIuFHJpVkG3A5p24NGV24NSMR8k/LUartYkjBNSYCgEdaRlL\nc9TTEBYbuv4UHDKSOGpqxgMGPWpOFBJHWgsRiVUZ5ppYYHpnml3hvahQHH40xCLIWlIA+Wn8\nAj1pdqjpxSOwOML+NILDHG6Qnp7UowOlHl5f72PrSgpu4OaAEXvuH0pysB1XilVS2aNm3lul\nADNy7sgfhS7V2kdRR5a7sg8UvG0cYNAwRQFAI3Cn7RztNJHwCWBz2pD2xzmgQde+KTHTtTW+\nXluB60qfM4waAHoVOfWn7d3FRK22Rt3rxUu7PtTQ0J3yOfal42GopCd2R+lOUN1A5NISHbcK\nOMCnGTCkYzTWY9GzxSZG4c8GmBIie+BTlxk5zSKodsCkYmNipHXigB+QACKbu5oVfU4FOjUZ\nJ7e9FgYu4Lk9qafu7sZpyr78elNZirYXimCHx/MvXFMVj5mMnbSENtweDSxsQMYzijoMf/Fx\n0pNwbjtSs20HjJpu3eoIOKkA8lfSipPLPqKKAIWJCg9KbtVVLFcmnyf6s/Wkj/hqiRnLMrEY\nHpUbP85B4FTyf1qpJ/rRTAm3BWyR9KYu5lYkfL1NF12py/c/4CaAE3Ky4Apwj+XGetRR9amb\n7tIRF5ZU8DIp3lnkEY4qRelI3amMib92644JGKcrfOSxz7Uk3+uSj+JqAGN97gd+tSOojUbm\ny1B+4PrS3HUUCGxtvyO1Ky+XgjoadD/HRJ/qhSuA2IhZDu+tQec7SkEcZ61Ov3agoQyZZCNo\npjBd3r3pE+9TV+9TAe77ueMk1J8pjwOneq7fw/Wp4f8AVtSEN2lVODwelLHGUYEfMO4pG+6K\ndbdfwpgDLu596SP5s8YIpR2+tIn+salsA5u27vTFUsxx2qWT+Cmx/eP1p7gxitt+XpS/d+8c\nU1/9dTpvurTYIJF3YIOQKF+Zfehfu0yPvSGSNGGfOcCo5FCr0zTlol+5TEN2qi5z1pdoVQO/\nWo3+4tSN0X6UgF2nb97nrzUePMYndkelSrVeL7xoAekLfM5OFx0pQB904FSN/qz9Kif75pdQ\nFWXg/LkdqaF8xTuXHpRH91fxqb/lmaGBCwTaM8HtinphYyrAluuaZ/yz/KpPX6UIkhe4VFB2\n9Tinx46tUM3+rX/eqT+IUdRDnbdnIqNdz5BpR91qWH7ppjQnlYwd3zUKrjgniiTv9ak/wpDu\nQSRr5nAx70DEYHNFx/DUU33l+lHQZIuN2RxUkjfu8L1qFadJ900riJI5OB83z+lMnPyEHv3q\nOPtU1z/qTQMhjxtHc0zy2gBY8qadb/cFTTf6mmBDu3EHHy09plK8DkdTUcf+rNIn+rel1AVh\n8wJPBGaWNRt4Pej+EfSo4+gpkj5lDNk9BTFx1NSP3qP+A0xoI4+C3Wnx9z3pkX3WpYfvmpYD\nGmMaFRzupEXcoU+vWk/janQ/64fUUCQxsJLlhxUm5SvyjOaLr/WN9abD3oBjmT5eR0qJV288\nHNW2+6fpVT+I0FE8gEaZ9qr7ScA9akuPu0neqF1E3bhtJo27Vz096if7tSH/AFQ+lAxTIu3B\nGfeo5mIXgcUrf6mmyfcSpAhjXd949asKyxx4VjSUn8NIBsfyfLU+0jsMetJH1NSL900ElYhn\nU9KTPzDd9Bin9zR6fWmPoNb5M5GTUI+6S3HpU1194VC3emIcnOMjFNMmJOTkVI33V+lQnqaY\nyXzwcYPFL97JWok+9Usf3akTHRocBmxinH7vHFIf9XR/DQMbknIJ4puc44pzdTTY+i0MYbtq\n7emaFUMDg5pZ/vCktfuv9aaACpXGRTww4TGB2NH/AC0FJJ/rDTEOkZdu0jPvUW3cPent90Ux\nfvGgY/bk80wKd+OoqV+1Nj70wHOy9xTIzuy5PXinfwtTD92p6gIyBm3Z56UxVMbZOSactOk6\nGmAw4wSTk9qVoyy5ByPSmVLF900rEsYyqy0nHcYpO5qVvur9KY0R7hx83OeKRmG7nkUn8S/W\nmzfdb61PUZLHluaU5XGDSWv+r/CpF6LQBFGRyeh9KXjnjrSN/rjTj94UgIjCUwByD1p0f7ss\nBzUh7fWmR/feqQwKjeCOppfUHnFN/iWnL/H9aQhM+WBnnmlaQHkLimzfcX60fw/jQA8jIz0p\nq43e1SN/qxUK/wAVPqA9sL82aRW43etNb7lC/wCrH1pgSfeyCOaGPy4PUUD7xprd6lgJn5eB\nmmLGW56VLD/qzTl+6KQEKqQMMM/SlVdre1Tfx1G38VAAq72GaWRRuBpkf3jT5O1AxmO5HJoX\nhs4p8naj+GgBMtt4Iz6UwfNkgZIpW/1gp0P32+tNCGhyckjmntmRckdKO34071oSAjBC0gbc\nwAGBTl/i+lJD/WgAHG4ZphjJ5xz61Ie/1p/pVAQyZVR3oZ90fIwPanetJJ9ypATlcUbSzAnp\nTv4RSr0NIBoXcx9KTHzYxkYp0f3mo/iP0oAZuAXpxSqQWO3gAc0ifdNMX7x+lMCbaRGOeT2p\njoPpUv8AGv0pk33hVAQiMbs80ixnuOKm/hpTSGQKnJ3crS7VbGODT/4aj/iph1HbdxAzSrnz\nMU6PvSr/AKwVLBhIPSodmcnOasfwmoE6fjTQhPLGD644pw6DJ7Ujf64/SlbrTAZtU8nilYju\nAB2pH+4v1om7UARZ4Oeeal2qyhsbRVb+Kpm/49/xFJAOZPXim7d4OWzUk/3fwpkf3aYEZ/dj\nA5o9DnApzdab/CPrS6gG0MCc03aV5HNOi6H608/dakwIlG49PmqQADntTl/1g+lI3+rP1poA\nbAGOvpUTZOOwqX/loPpTP4TTAGU4x1oVhtINLUUP8f0pASNGBjnOaaE25pV++lOX7v4mmT1E\nbbt5HNNddyjjFIe1Pf7goKGbMU1lU8AfjU//ACzNRR96T3Abj5uDxTvJ7gimL95qk7U0A0rx\nx1FN24Ix1qRfvH6Ukf36bAANvAGPWgL82O1OpE+9SAFk6qwpq45AqSTqahj/AIqAJfvDIGBT\nWk6YHNPH/HuajXoKBj+eucimsC3Q4pydGoPb6UEoibK88GlZh0FM7NS00ITHfOamVfMXk81F\n/EKlpMfQjXO4/LxSgls8Yp9NbrQwG5/hp23HLNxRT5P9XSQIFUbcjrTGY9CKev8AD9Ka33mq\nughFfapXtmnMyqmc5NRN0NB6UhoUsd22l2n6/SmH7zfSpIelAwA3ZzSIB6HFK3Q1JF9yqAj9\neOKYSzNjpU6/caov46GSLzz601k75zT/AOKm/wALVIXCPO04HHvQVPSpF/1dL/EKoBjKVjqP\n8Kkm+6frSf3aQhAgHOad5Z4PakH3jUjdvrTAhwVJAHGacw5GBUn8D/Wmr0FMdhrIc5Y1HGPa\nppqQ9qQDi6sm2mL83G3rTf4qsxd6bAg5X5MUhG3gVO3Wq7feNT1Aft+XpQwIzzzSn7q01/vC\nge4LuaPkDNLwvA4py9qZJ94UMQ9clemaiZWPHSrK/dFQt3+tMaDbuxxxSmEK64OBUi/dqH+L\n8aBDmxyRyaZll7DmnL3+tPal1AYE+Xnmm884HNSDvRD95qoQwsXBXGKTbwFxk04d6cKkpEfP\nJxS7fft0p47/AI03/lsKoTFVRtGQac21cbacai/j/GgRIFKn5hmmtjb8p5qRuh+lRJ1pdRjl\nA6daAx2ncMUsX3xSz9TTEIDu77RTdqyN1OKG/hpq0D6D416kHAqaJfkPcVXj71Zt6QhjLuxz\nxnpUq4TpwM1Gv+sapJPup9aLjDzOuR144p+SvA61H/F+NTSfeouA1sFcAc0ucLihPvUsn8P1\npIRGckD1p652kbqRv60kfeqAcinBBbBoVW34PIpf46ev8VIBHVMfN17VKrJtxt59ahm7VL/y\nz/CgBGjIXrT2AC4JyaF+6KbL96gBGG5fl601gxxzz6VJD9401v8AWGkA5FO3JpzBfoaX+Cm/\nxUwFGd3TnpTZMx4AP1qRPvGo2+8aYChWbnPFPbAXpk0J9ykX/WLUsY/7rcUrc9BSP1pf4DSQ\nxNu7DZ4p2PmAAxSQ/d/Gppf4aYiENhzgZFSeduGMVF61Iv8ASkCQLgnJ6U4YGdvOaZ/DSJ96\nmA7heRS7jtAPFDdKU/dFAhsmGIApgIycc4p5+8Kjj+8/1oF1HL8i5IFOYhe2c9Ka/wDqxTj2\n+lUMRc7RnpQ5G3PpT2+5UTf6s/hQA7YWwegofb5eAMGn+n0phpDAkeWvODS7zHwWz6imfwL9\naH+9+NMB65Zt3T2pUIYHIwp7Uh7fSkb/AFZoEOUFfpTdyZbI4p38AqB/un6ipAcsWQOAKe0P\npzUifdpvrQPoRtJ8wTvSbecDk96F/wBaalT75piIWjzgnk0ip3PFPpD2qQGNkd+KVXJ/hxST\nfdpYe9ADlbgjOSKaeTkDNLH/AKxqen3jTsAmd3FOxlccUi/fNIfuvQACRWbk8UNHtGQck1XT\n7341Z9KBjGDbsHpTdvzZNTN96oJvvUCHtjIOe9L9+Q4xTG+8v0pU++fpSAdIoZeOFzTmVdo/\nvdqa3+pWkb/WJTAczBVG4dKZI3mOHPHoKW4+8aST+CgADZIPB9qccyyHAxiooP8AXCpG++aQ\nEfzyHsAKBHj5yePSnx/cNN/hFNjHpjBOcCo1VmzzxQ33aev3TSEN524FM25jyetSJ980fwn6\n0ARY2jI49qcjhjwOcUh+8aE/i+lABhep60gznI6UhqSP/VtQMbGp3HAzT9vBz0p1rSn7rfWm\nMZt+Zd3NSbhG2QoximfwfjSzfdoYhu4Bstyac0gkU8YHaov46d/yzFSNDip2qSc0hjLLjPek\nH3V+tSN978ad+ghkUhj3Z78U/n0waj9frU38YpgRNyCWH5UK21SCMU9vvCibtQNESsOc5yem\naPfPIFDf6wUL99qQwywC4NSrIv3c80knVKhX/WH60E9S6gVBjvUbbgwLHg9Ka1P/AOWKfU0D\n6jmfbgYyKe0hZQoGKjTv9aVv9Yv1pMYMwYLu6UxnEcg3Ln0pW/gpZv4aOoEilpBhlwfWnRoy\ntyeBRH/F+FObpVdBdQVT2PBoHyyEY4xTo+1Dfx/SkMfEysAR0FSqwbkDmqlt/qz9atR/dNAD\nGJU5JoX5fcGiToadD2+tSAsmAoy3FM3f3Rk0+4+7+NRt/DTHYc6h4wT+Ip8bBmCrwKi/5ZtR\nD94UxFllHOO3Wmxv5gJXjFN/hemWv3T9aAJlcn73zURkOpyuMGlj+6aaOjfWgB69OetIyttw\nDketC/eH0p0P+pP1qR2GeZg+9Lt3NnG00z+JalpiGhflOTmhl+XGab/FSfxGhDFVW2dcj2pZ\nG/d46mnR/cprVIAka8dj3py88YI96Z/Eanj/ANW1AhjevbpRu2tjPNOX/V/jUMn+uoGSH1xn\n3pFYscdu9Sfw1C3+regY/wCXpSqm3tUcf3BUrfeFAxevtSg/ISKY3UUP92gkBt2lmGfSlTbG\noOOSeaVv9SKZ60kND923J7ZpqsO+SDQ3+ramr96qGSZVW6YFJ0bJpZO1Mk7VKAVpN3Qe1KG6\nChPu1DJ/rBVE9R8jb+HztFOUg/dwPSnf8sTUVIGSKTsOcE+tP+vAqPsalX7tHUBFhxg7sj0o\nbjoPypW7UdjQMThhRwFzikWloJJVXd8wJoxnJLZoh+5Te1BQ7cpXGOaPvDjg0N94Ui/eqrC6\njsE+xpGYM3BOKc1RL2+tBROzDzMk5AHSkMig8dKYPvn6U3+EfWmBKw56007e/FPX+Oox3pAJ\nx70UlFAH/9"}}]}

================================================
FILE: qa/common/perf_analyzer_input_data_json/int_data.json
================================================
{
    "data" :
        [

            {
                "INPUT0" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
            },
            {
                "INPUT0" : [2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
            },
            {
                "INPUT0" : [3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
            },
            {
                "INPUT0" : [4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
            }

        ]
}


================================================
FILE: qa/common/perf_analyzer_input_data_json/int_data_diff_shape.json
================================================
{
    "data" :
        [

            {
                "INPUT0" :
                {
                    "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                    "shape": [2,8,2]
                },
                "INPUT1" :
                {
                    "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                    "shape": [2,8,2]
                }
            },
            {
                "INPUT0" :
                {
                    "content": [2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                    "shape": [1,8,4]
                },
                "INPUT1" :
                {
                    "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                    "shape": [1,8,4]
                }
            },
            {
                "INPUT0" :
                {
                    "content": [3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                    "shape": [2,8,2]
                },
                "INPUT1" :
                {
                    "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                    "shape": [2,8,2]
                }
            },
            {
                "INPUT0" :
                {
                    "content": [4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                    "shape": [2,8,2]
                },
                "INPUT1" :
                {
                    "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                    "shape": [2,8,2]
                }
            }

        ]
}


================================================
FILE: qa/common/perf_analyzer_input_data_json/int_data_optional.json
================================================
{
    "data": [
        {
            "INPUT0": [
                1
            ]
        },
        {
            "INPUT1": [
                1
            ]
        }
    ]
}

================================================
FILE: qa/common/perf_analyzer_input_data_json/non_aligned_output.json
================================================
{
    "data" :
        [
            {
                "INPUT0" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
            },
            {
                "INPUT0" : [2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
            },
            {
                "INPUT0" : [3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
            },
            {
                "INPUT0" : [4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
            }
        ],
    "validation_data" :
        [
          {
              "OUTPUT0" : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
              "OUTPUT1" : [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
          }
        ]
}


================================================
FILE: qa/common/perf_analyzer_input_data_json/output.json
================================================
{
    "data" :
        [
            {
                "INPUT0" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
            },
            {
                "INPUT0" : [2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
            },
            {
                "INPUT0" : [3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
            },
            {
                "INPUT0" : [4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
            }
        ],
    "validation_data" :
        [
          {
              "OUTPUT0" : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
              "OUTPUT1" : [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
          },
          {
              "OUTPUT0" : [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
              "OUTPUT1" : [3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
          },
          {
              "OUTPUT0" : [2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
              "OUTPUT1" : [4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
          },
          {
              "OUTPUT0" : [3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
              "OUTPUT1" : [5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
          }
        ]
}


================================================
FILE: qa/common/perf_analyzer_input_data_json/repeat_int32_data.json
================================================
{
    "data": [
        {
            "IN": {
                "content": [
                    4,
                    2,
                    0,
                    1
                ],
                "shape": [
                    4
                ]
            },
            "DELAY": {
                "content": [
                    1,
                    2,
                    3,
                    4
                ],
                "shape": [
                    4
                ]
            },
            "WAIT": [
                5
            ]
        }
    ]
}

================================================
FILE: qa/common/perf_analyzer_input_data_json/seq_data.json
================================================
{
    "data" :
        [
            [
                {
                    "INPUT" : ["1"]
                },
                {
                    "INPUT" : ["2"]
                },
                {
                    "INPUT" : ["3"]
                },
                {
                    "INPUT" : ["4"]
                }
            ],
            [
                {
                    "INPUT" : ["1"]
                },
                {
                    "INPUT" : ["1"]
                },
                {
                    "INPUT" : ["1"]
                }
            ],
            [
                {
                    "INPUT" : ["1"]
                },
                {
                    "INPUT" : ["1"]
                }
            ]
        ]
}


================================================
FILE: qa/common/perf_analyzer_input_data_json/seq_output.json
================================================
{
    "data" :
        [
            [
                {
                    "INPUT" : ["1"]
                },
                {
                    "INPUT" : ["2"]
                },
                {
                    "INPUT" : ["3"]
                },
                {
                    "INPUT" : ["4"]
                }
            ]
        ],
    "validation_data" :
        [
            [
                {
                    "OUTPUT" : ["2"]
                },
                {
                    "OUTPUT" : ["2"]
                },
                {
                    "OUTPUT" : ["3"]
                },
                {
                    "OUTPUT" : ["4"]
                }
            ]
        ]
}


================================================
FILE: qa/common/perf_analyzer_input_data_json/seq_wrong_output.json
================================================
{
    "data" :
        [
            [
                {
                    "INPUT" : ["1"]
                },
                {
                    "INPUT" : ["2"]
                },
                {
                    "INPUT" : ["3"]
                },
                {
                    "INPUT" : ["4"]
                }
            ]
        ],
    "validation_data" :
        [
            [
                {
                    "OUTPUT" : ["0"]
                },
                {
                    "OUTPUT" : ["0"]
                },
                {
                    "OUTPUT" : ["0"]
                },
                {
                    "OUTPUT" : ["0"]
                }
            ]
        ]
}


================================================
FILE: qa/common/perf_analyzer_input_data_json/shape_tensor_data.json
================================================
{
    "data" :
        [
            {
                "INPUT0" : [2, 2],
                "DUMMY_INPUT0" : [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
            }
        ]
}


================================================
FILE: qa/common/perf_analyzer_input_data_json/string_data.json
================================================
{
    "data" :
        [
            {
                "INPUT0" : ["1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"],
                "INPUT1" : ["1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"]
            },
            {
                "INPUT0" : ["2", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"],
                "INPUT1" : ["1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"]
            },
            {
                "INPUT0" : ["3", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"],
                "INPUT1" : ["1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"]
            },
            {
                "INPUT0" : ["4", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"],
                "INPUT1" : ["1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"]
            }
        ]
}


================================================
FILE: qa/common/perf_analyzer_input_data_json/string_data_with_shape.json
================================================
{
    "data" :
        [
            {
                "INPUT0" :
                    {
                        "content": ["1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"],
                        "shape": [2,8]
                    },
                "INPUT1" :
                    {
                        "content": ["1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"],
                        "shape": [2,8]
                    }
            },
            {
                "INPUT0" :
                    {
                        "content": ["1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"]
                    },
                "INPUT1" :
                    {
                        "content": ["1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"]
                    }
            },
            {
                "INPUT0" :
                    {
                        "content": ["1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"]
                    },
                "INPUT1" :
                    {
                        "content": ["1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"]
                    }
            },
            {
                "INPUT0" :
                    {
                        "content": ["1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"],
                        "shape": [2,8]
                    },
                "INPUT1" :
                    {
                        "content": ["1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"],
                        "shape": [2,8]
                    }
            }
        ]
}


================================================
FILE: qa/common/perf_analyzer_input_data_json/wrong_output.json
================================================
{
    "data" :
        [
            {
                "INPUT0" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
            },
            {
                "INPUT0" : [2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
            },
            {
                "INPUT0" : [3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
            },
            {
                "INPUT0" : [4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
            }
        ],
    "validation_data" :
        [
          {
              "OUTPUT0" : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
              "OUTPUT1" : [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
          },
          {
              "OUTPUT0" : [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
              "OUTPUT1" : [3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
          },
          {
              "OUTPUT0" : [2, 0, 0, 0, 0, 0, 0, 0, 0, 0],
              "OUTPUT1" : [4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
          },
          {
              "OUTPUT0" : [3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
              "OUTPUT1" : [5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
          }
        ]
}


================================================
FILE: qa/common/perf_analyzer_input_data_json/wrong_output_2.json
================================================
{
    "data" :
        [
            {
                "INPUT0" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
            },
            {
                "INPUT0" : [2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
            },
            {
                "INPUT0" : [3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
            },
            {
                "INPUT0" : [4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
            }
        ],
    "validation_data" :
        [
          {
              "OUTPUT0" : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
              "OUTPUT1" : [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
          },
          {
              "OUTPUT0" : [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
              "OUTPUT1" : [3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
          },
          {
              "OUTPUT0" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
              "OUTPUT1" : [4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
          },
          {
              "OUTPUT0" : [3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
              "OUTPUT1" : [5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
          }
        ]
}


================================================
FILE: qa/common/reporter.py
================================================
#!/usr/bin/python

# Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import csv
import json
import os
import socket
from itertools import pairwise

import numpy as np
import requests

FLAGS = None

ENVS = [
    "CUDA_DRIVER_VERSION",
    "CUDA_VERSION",
    "TRITON_SERVER_VERSION",
    "NVIDIA_TRITON_SERVER_VERSION",
    "TRT_VERSION",
    "CUDNN_VERSION",
    "CUBLAS_VERSION",
    "BENCHMARK_PIPELINE",
    "BENCHMARK_JOB_ID",
    "BENCHMARK_NIGHTLY_TAG",
    "BENCHMARK_REPO_BRANCH",
    "BENCHMARK_REPO_COMMIT",
    "BENCHMARK_CLUSTER",
    "BENCHMARK_GPU_COUNT",
]


def collect_gpu_metrics(data):
    import pynvml

    pynvml.nvmlInit()
    unique_gpu_models = set()
    total_memory = 0
    total_free_memory = 0

    # Get the number of available GPUs
    device_count = pynvml.nvmlDeviceGetCount()

    # Iterate through each GPU
    for i in range(device_count):
        handle = pynvml.nvmlDeviceGetHandleByIndex(i)

        # Get GPU name
        gpu_name = str(pynvml.nvmlDeviceGetName(handle))
        unique_gpu_models.add(gpu_name)

        # Get GPU memory information
        memory_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
        total_memory += memory_info.total
        total_free_memory += memory_info.free

    data["l_gpus_count"] = device_count
    data["s_gpu_model"] = ", ".join(unique_gpu_models)
    data["d_total_gpu_memory_mb"] = total_memory / (1024**2)
    data["d_total_free_gpu_memory_mb"] = total_free_memory / (1024**2)

    pynvml.nvmlShutdown()


def collect_token_latencies(export_data, data):
    first_token_latencies = []
    token_to_token_latencies = []
    requests = export_data["experiments"][0]["requests"]

    for r in requests:
        init_request, responses = r["timestamp"], r["response_timestamps"]
        first_token_latency = (responses[0] - init_request) / 1_000_000
        first_token_latencies.append(first_token_latency)
        for prev_res, res in pairwise(responses):
            token_to_token_latencies.append((res - prev_res) / 1_000_000)

    data["d_avg_token_to_token_latency_ms"] = np.mean(token_to_token_latencies)  # msec
    data["d_avg_first_token_latency_ms"] = np.mean(first_token_latencies)  # msec


def annotate(data):
    # Add all interesting envvar values
    for data in data:
        for env in ENVS:
            if env in os.environ:
                val = os.environ[env]
                data["s_" + env.lower()] = val

        # Add this system's name. If running within slurm use
        # SLURM_JOB_NODELIST as the name (this assumes that the slurm
        # job was scheduled on a single node, otherwise
        # SLURM_JOB_NODELIST will list multiple nodes).
        if "SLURM_JOB_NODELIST" in os.environ:
            data["s_benchmark_system"] = os.environ["SLURM_JOB_NODELIST"]
        else:
            data["s_benchmark_system"] = socket.gethostname()


def annotate_csv(data, csv_file):
    csv_reader = csv.reader(csv_file, delimiter=",")
    linenum = 0
    header_row = None
    concurrency_row = None
    for row in csv_reader:
        if linenum == 0:
            header_row = row
        else:
            concurrency_row = row
            break
        linenum += 1

    if (header_row is not None) and (concurrency_row is not None):
        avg_latency_us = 0
        for header, result in zip(header_row, concurrency_row):
            if header == "Inferences/Second":
                data["d_infer_per_sec"] = float(result)
            elif (
                (header == "Client Send")
                or (header == "Network+Server Send/Recv")
                or (header == "Server Queue")
                or (header == "Server Compute Input")
                or (header == "Server Compute Output")
                or (header == "Server Compute Infer")
                or (header == "Client Recv")
            ):
                avg_latency_us += float(result)
            elif header == "p50 latency":
                data["d_latency_p50_ms"] = float(result) / 1000.0
            elif header == "p90 latency":
                data["d_latency_p90_ms"] = float(result) / 1000.0
            elif header == "p95 latency":
                data["d_latency_p95_ms"] = float(result) / 1000.0
            elif header == "p99 latency":
                data["d_latency_p99_ms"] = float(result) / 1000.0

        data["d_latency_avg_ms"] = avg_latency_us / 1000.0


def post_to_url(url, data):
    headers = {"Content-Type": "application/json", "Accept-Charset": "UTF-8"}
    r = requests.post(url, data=data, headers=headers)
    r.raise_for_status()


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-v",
        "--verbose",
        action="store_true",
        required=False,
        default=False,
        help="Enable verbose output",
    )
    parser.add_argument(
        "--gpu-metrics",
        action="store_true",
        required=False,
        default=False,
        help="Collect GPU details",
    )
    parser.add_argument(
        "-e",
        "--profile-export-file",
        type=argparse.FileType("r"),
        required=False,
        help="Profile file exported by perf_analyzer",
    )
    parser.add_argument(
        "--token-latency",
        action="store_true",
        required=False,
        default=False,
        help="Collect token latency data",
    )

    parser.add_argument(
        "-o", "--output", type=str, required=False, help="Output filename"
    )
    parser.add_argument(
        "-u", "--url", type=str, required=False, help="Post results to a URL"
    )
    parser.add_argument(
        "--csv",
        type=argparse.FileType("r"),
        required=False,
        help="perf_analyzer generated CSV",
    )
    parser.add_argument("file", type=argparse.FileType("r"))
    FLAGS = parser.parse_args()

    data = json.loads(FLAGS.file.read())

    if FLAGS.verbose:
        print("*** Load json ***")
        print(json.dumps(data, sort_keys=True, indent=2))

    if FLAGS.gpu_metrics:
        collect_gpu_metrics(data[0])

    if FLAGS.token_latency:
        if not FLAGS.profile_export_file:
            raise Exception(
                "Please provide a profile export file to collect token latencies."
            )
        export_data = json.loads(FLAGS.profile_export_file.read())
        collect_token_latencies(export_data, data[0])

    if FLAGS.csv is not None:
        if len(data) != 1:
            raise Exception("--csv requires that json data have a single array entry")
        annotate_csv(data[0], FLAGS.csv)
        if FLAGS.verbose:
            print("*** Annotate CSV ***")
            print(json.dumps(data, sort_keys=True, indent=2))

    annotate(data)

    if FLAGS.verbose:
        print("*** Post Annotate ***")
        print(json.dumps(data, sort_keys=True, indent=2))

    if FLAGS.output is not None:
        with open(FLAGS.output, "w") as f:
            f.write(json.dumps(data))
            f.write("\n")

    if FLAGS.url is not None:
        post_to_url(FLAGS.url, json.dumps(data))


================================================
FILE: qa/common/resnet50_labels.txt
================================================
tench
goldfish
great white shark
tiger shark
hammerhead
electric ray
stingray
cock
hen
ostrich
brambling
goldfinch
house finch
junco
indigo bunting
robin
bulbul
jay
magpie
chickadee
water ouzel
kite
bald eagle
vulture
great grey owl
European fire salamander
common newt
eft
spotted salamander
axolotl
bullfrog
tree frog
tailed frog
loggerhead
leatherback turtle
mud turtle
terrapin
box turtle
banded gecko
common iguana
American chameleon
whiptail
agama
frilled lizard
alligator lizard
Gila monster
green lizard
African chameleon
Komodo dragon
African crocodile
American alligator
triceratops
thunder snake
ringneck snake
hognose snake
green snake
king snake
garter snake
water snake
vine snake
night snake
boa constrictor
rock python
Indian cobra
green mamba
sea snake
horned viper
diamondback
sidewinder
trilobite
harvestman
scorpion
black and gold garden spider
barn spider
garden spider
black widow
tarantula
wolf spider
tick
centipede
black grouse
ptarmigan
ruffed grouse
prairie chicken
peacock
quail
partridge
African grey
macaw
sulphur-crested cockatoo
lorikeet
coucal
bee eater
hornbill
hummingbird
jacamar
toucan
drake
red-breasted merganser
goose
black swan
tusker
echidna
platypus
wallaby
koala
wombat
jellyfish
sea anemone
brain coral
flatworm
nematode
conch
snail
slug
sea slug
chiton
chambered nautilus
Dungeness crab
rock crab
fiddler crab
king crab
American lobster
spiny lobster
crayfish
hermit crab
isopod
white stork
black stork
spoonbill
flamingo
little blue heron
American egret
bittern
crane
limpkin
European gallinule
American coot
bustard
ruddy turnstone
red-backed sandpiper
redshank
dowitcher
oystercatcher
pelican
king penguin
albatross
grey whale
killer whale
dugong
sea lion
Chihuahua
Japanese spaniel
Maltese dog
Pekinese
Shih-Tzu
Blenheim spaniel
papillon
toy terrier
Rhodesian ridgeback
Afghan hound
basset
beagle
bloodhound
bluetick
black-and-tan coonhound
Walker hound
English foxhound
redbone
borzoi
Irish wolfhound
Italian greyhound
whippet
Ibizan hound
Norwegian elkhound
otterhound
Saluki
Scottish deerhound
Weimaraner
Staffordshire bullterrier
American Staffordshire terrier
Bedlington terrier
Border terrier
Kerry blue terrier
Irish terrier
Norfolk terrier
Norwich terrier
Yorkshire terrier
wire-haired fox terrier
Lakeland terrier
Sealyham terrier
Airedale
cairn
Australian terrier
Dandie Dinmont
Boston bull
miniature schnauzer
giant schnauzer
standard schnauzer
Scotch terrier
Tibetan terrier
silky terrier
soft-coated wheaten terrier
West Highland white terrier
Lhasa
flat-coated retriever
curly-coated retriever
golden retriever
Labrador retriever
Chesapeake Bay retriever
German short-haired pointer
vizsla
English setter
Irish setter
Gordon setter
Brittany spaniel
clumber
English springer
Welsh springer spaniel
cocker spaniel
Sussex spaniel
Irish water spaniel
kuvasz
schipperke
groenendael
malinois
briard
kelpie
komondor
Old English sheepdog
Shetland sheepdog
collie
Border collie
Bouvier des Flandres
Rottweiler
German shepherd
Doberman
miniature pinscher
Greater Swiss Mountain dog
Bernese mountain dog
Appenzeller
EntleBucher
boxer
bull mastiff
Tibetan mastiff
French bulldog
Great Dane
Saint Bernard
Eskimo dog
malamute
Siberian husky
dalmatian
affenpinscher
basenji
pug
Leonberg
Newfoundland
Great Pyrenees
Samoyed
Pomeranian
chow
keeshond
Brabancon griffon
Pembroke
Cardigan
toy poodle
miniature poodle
standard poodle
Mexican hairless
timber wolf
white wolf
red wolf
coyote
dingo
dhole
African hunting dog
hyena
red fox
kit fox
Arctic fox
grey fox
tabby
tiger cat
Persian cat
Siamese cat
Egyptian cat
cougar
lynx
leopard
snow leopard
jaguar
lion
tiger
cheetah
brown bear
American black bear
ice bear
sloth bear
mongoose
meerkat
tiger beetle
ladybug
ground beetle
long-horned beetle
leaf beetle
dung beetle
rhinoceros beetle
weevil
fly
bee
ant
grasshopper
cricket
walking stick
cockroach
mantis
cicada
leafhopper
lacewing
dragonfly
damselfly
admiral
ringlet
monarch
cabbage butterfly
sulphur butterfly
lycaenid
starfish
sea urchin
sea cucumber
wood rabbit
hare
Angora
hamster
porcupine
fox squirrel
marmot
beaver
guinea pig
sorrel
zebra
hog
wild boar
warthog
hippopotamus
ox
water buffalo
bison
ram
bighorn
ibex
hartebeest
impala
gazelle
Arabian camel
llama
weasel
mink
polecat
black-footed ferret
otter
skunk
badger
armadillo
three-toed sloth
orangutan
gorilla
chimpanzee
gibbon
siamang
guenon
patas
baboon
macaque
langur
colobus
proboscis monkey
marmoset
capuchin
howler monkey
titi
spider monkey
squirrel monkey
Madagascar cat
indri
Indian elephant
African elephant
lesser panda
giant panda
barracouta
eel
coho
rock beauty
anemone fish
sturgeon
gar
lionfish
puffer
abacus
abaya
academic gown
accordion
acoustic guitar
aircraft carrier
airliner
airship
altar
ambulance
amphibian
analog clock
apiary
apron
ashcan
assault rifle
backpack
bakery
balance beam
balloon
ballpoint
Band Aid
banjo
bannister
barbell
barber chair
barbershop
barn
barometer
barrel
barrow
baseball
basketball
bassinet
bassoon
bathing cap
bath towel
bathtub
beach wagon
beacon
beaker
bearskin
beer bottle
beer glass
bell cote
bib
bicycle-built-for-two
bikini
binder
binoculars
birdhouse
boathouse
bobsled
bolo tie
bonnet
bookcase
bookshop
bottlecap
bow
bow tie
brass
brassiere
breakwater
breastplate
broom
bucket
buckle
bulletproof vest
bullet train
butcher shop
cab
caldron
candle
cannon
canoe
can opener
cardigan
car mirror
carousel
carpenter's kit
carton
car wheel
cash machine
cassette
cassette player
castle
catamaran
CD player
cello
cellular telephone
chain
chainlink fence
chain mail
chain saw
chest
chiffonier
chime
china cabinet
Christmas stocking
church
cinema
cleaver
cliff dwelling
cloak
clog
cocktail shaker
coffee mug
coffeepot
coil
combination lock
computer keyboard
confectionery
container ship
convertible
corkscrew
cornet
cowboy boot
cowboy hat
cradle
crane
crash helmet
crate
crib
Crock Pot
croquet ball
crutch
cuirass
dam
desk
desktop computer
dial telephone
diaper
digital clock
digital watch
dining table
dishrag
dishwasher
disk brake
dock
dogsled
dome
doormat
drilling platform
drum
drumstick
dumbbell
Dutch oven
electric fan
electric guitar
electric locomotive
entertainment center
envelope
espresso maker
face powder
feather boa
file
fireboat
fire engine
fire screen
flagpole
flute
folding chair
football helmet
forklift
fountain
fountain pen
four-poster
freight car
French horn
frying pan
fur coat
garbage truck
gasmask
gas pump
goblet
go-kart
golf ball
golfcart
gondola
gong
gown
grand piano
greenhouse
grille
grocery store
guillotine
hair slide
hair spray
half track
hammer
hamper
hand blower
hand-held computer
handkerchief
hard disc
harmonica
harp
harvester
hatchet
holster
home theater
honeycomb
hook
hoopskirt
horizontal bar
horse cart
hourglass
iPod
iron
jack-o'-lantern
jean
jeep
jersey
jigsaw puzzle
jinrikisha
joystick
kimono
knee pad
knot
lab coat
ladle
lampshade
laptop
lawn mower
lens cap
letter opener
library
lifeboat
lighter
limousine
liner
lipstick
Loafer
lotion
loudspeaker
loupe
lumbermill
magnetic compass
mailbag
mailbox
maillot
maillot
manhole cover
maraca
marimba
mask
matchstick
maypole
maze
measuring cup
medicine chest
megalith
microphone
microwave
military uniform
milk can
minibus
miniskirt
minivan
missile
mitten
mixing bowl
mobile home
Model T
modem
monastery
monitor
moped
mortar
mortarboard
mosque
mosquito net
motor scooter
mountain bike
mountain tent
mouse
mousetrap
moving van
muzzle
nail
neck brace
necklace
nipple
notebook
obelisk
oboe
ocarina
odometer
oil filter
organ
oscilloscope
overskirt
oxcart
oxygen mask
packet
paddle
paddlewheel
padlock
paintbrush
pajama
palace
panpipe
paper towel
parachute
parallel bars
park bench
parking meter
passenger car
patio
pay-phone
pedestal
pencil box
pencil sharpener
perfume
Petri dish
photocopier
pick
pickelhaube
picket fence
pickup
pier
piggy bank
pill bottle
pillow
ping-pong ball
pinwheel
pirate
pitcher
plane
planetarium
plastic bag
plate rack
plow
plunger
Polaroid camera
pole
police van
poncho
pool table
pop bottle
pot
potter's wheel
power drill
prayer rug
printer
prison
projectile
projector
puck
punching bag
purse
quill
quilt
racer
racket
radiator
radio
radio telescope
rain barrel
recreational vehicle
reel
reflex camera
refrigerator
remote control
restaurant
revolver
rifle
rocking chair
rotisserie
rubber eraser
rugby ball
rule
running shoe
safe
safety pin
saltshaker
sandal
sarong
sax
scabbard
scale
school bus
schooner
scoreboard
screen
screw
screwdriver
seat belt
sewing machine
shield
shoe shop
shoji
shopping basket
shopping cart
shovel
shower cap
shower curtain
ski
ski mask
sleeping bag
slide rule
sliding door
slot
snorkel
snowmobile
snowplow
soap dispenser
soccer ball
sock
solar dish
sombrero
soup bowl
space bar
space heater
space shuttle
spatula
speedboat
spider web
spindle
sports car
spotlight
stage
steam locomotive
steel arch bridge
steel drum
stethoscope
stole
stone wall
stopwatch
stove
strainer
streetcar
stretcher
studio couch
stupa
submarine
suit
sundial
sunglass
sunglasses
sunscreen
suspension bridge
swab
sweatshirt
swimming trunks
swing
switch
syringe
table lamp
tank
tape player
teapot
teddy
television
tennis ball
thatch
theater curtain
thimble
thresher
throne
tile roof
toaster
tobacco shop
toilet seat
torch
totem pole
tow truck
toyshop
tractor
trailer truck
tray
trench coat
tricycle
trimaran
tripod
triumphal arch
trolleybus
trombone
tub
turnstile
typewriter keyboard
umbrella
unicycle
upright
vacuum
vase
vault
velvet
vending machine
vestment
viaduct
violin
volleyball
waffle iron
wall clock
wallet
wardrobe
warplane
washbasin
washer
water bottle
water jug
water tower
whiskey jug
whistle
wig
window screen
window shade
Windsor tie
wine bottle
wing
wok
wooden spoon
wool
worm fence
wreck
yawl
yurt
web site
comic book
crossword puzzle
street sign
traffic light
book jacket
menu
plate
guacamole
consomme
hot pot
trifle
ice cream
ice lolly
French loaf
bagel
pretzel
cheeseburger
hotdog
mashed potato
head cabbage
broccoli
cauliflower
zucchini
spaghetti squash
acorn squash
butternut squash
cucumber
artichoke
bell pepper
cardoon
mushroom
Granny Smith
strawberry
orange
lemon
fig
pineapple
banana
jackfruit
custard apple
pomegranate
hay
carbonara
chocolate sauce
dough
meat loaf
pizza
potpie
burrito
red wine
espresso
cup
eggnog
alp
bubble
cliff
coral reef
geyser
lakeside
promontory
sandbar
seashore
valley
volcano
ballplayer
groom
scuba diver
rapeseed
daisy
yellow lady's slipper
corn
acorn
hip
buckeye
coral fungus
agaric
gyromitra
stinkhorn
earthstar
hen-of-the-woods
bolete
ear
toilet tissue


================================================
FILE: qa/common/run_all_tests.sh
================================================
#!/bin/bash
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
    REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
    echo -e "Repository version must be specified"
    echo -e "\n***\n*** Test Failed\n***"
    exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

CURRENT_DIR=$(pwd)
DIRS=(../L*/)

passed=0
failed=0
for dir in "${DIRS[@]}"; do
    echo -e "Running $dir...\n"
    (cd $dir && ./test.sh ${REPO_VERSION})
    rc=$?
    if (( $rc == 0 )); then
        (( passed++ ))
    else
        echo -e "Failed\n"
        (( failed++ ))
    fi
done

echo -e "\n***\n***\nPassed: ${passed}\nFailed: ${failed}\n***\n***\n"
return (( $failed == 0 ))


================================================
FILE: qa/common/sequence_util.py
================================================
#!/usr/bin/env python3

# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import sys
import threading
import time
from builtins import range, str
from functools import partial

import infer_util as iu
import numpy as np
import test_util as tu
import tritonclient.grpc as grpcclient
import tritonclient.http as httpclient
from tritonclient.utils import *

if sys.version_info >= (3, 0):
    import queue
else:
    import Queue as queue

# By default, find tritonserver on "localhost", but can be overridden
# with TRITONSERVER_IPADDR envvar
_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")

_test_system_shared_memory = bool(int(os.environ.get("TEST_SYSTEM_SHARED_MEMORY", 0)))
_test_cuda_shared_memory = bool(int(os.environ.get("TEST_CUDA_SHARED_MEMORY", 0)))

if _test_system_shared_memory:
    import tritonclient.utils.shared_memory as shm
if _test_cuda_shared_memory:
    import tritonclient.utils.cuda_shared_memory as cudashm

_test_valgrind = bool(int(os.environ.get("TEST_VALGRIND", 0)))
_test_jetson = bool(int(os.environ.get("TEST_JETSON", 0)))

_max_sequence_idle_ms = 5000
_valgrind_delay_ms = bool(int(os.environ.get("TEST_DELAY_MS", 50)))

_deferred_exceptions_lock = threading.Lock()
_deferred_exceptions = None
_jetson_slowdown_factor = 3


class UserData:
    def __init__(self):
        self._completed_requests = queue.Queue()


# Callback function used for async_stream_infer()
def completion_callback(user_data, result, error):
    # passing error raise and handling out
    user_data._completed_requests.put((result, error))


class SequenceBatcherTestUtil(tu.TestResultCollector):
    def setUp(self):
        # The helper client for setup will be GRPC for simplicity.
        self.triton_client_ = grpcclient.InferenceServerClient(
            f"{_tritonserver_ipaddr}:8001"
        )
        self.clear_deferred_exceptions()

    def clear_deferred_exceptions(self):
        global _deferred_exceptions
        with _deferred_exceptions_lock:
            _deferred_exceptions = []

    def add_deferred_exception(self, ex):
        global _deferred_exceptions
        with _deferred_exceptions_lock:
            _deferred_exceptions.append(ex)

    def check_deferred_exception(self):
        # Just raise one of the exceptions...
        with _deferred_exceptions_lock:
            if len(_deferred_exceptions) > 0:
                raise _deferred_exceptions[0]

    def check_failure(self):
        # Check securely whether a failure has been registered
        # This is generic because the failure behavior is undefined
        # for ragged batches.
        with _deferred_exceptions_lock:
            if len(_deferred_exceptions) == 0:
                raise Exception("Unexpected inference success")

    def precreate_register_regions(
        self, value_list, dtype, i, batch_size=1, tensor_shape=(1,)
    ):
        if _test_system_shared_memory or _test_cuda_shared_memory:
            shm_region_handles = []
            for j, value in enumerate(value_list):
                # For string we can't know the size of the output
                # so we conservatively assume 64 bytes for each
                # element of the output
                if dtype == np.object_:
                    output_byte_size = 4  # size of empty string
                else:
                    output_byte_size = 0

                # create data
                input_list = list()
                for b in range(batch_size):
                    if dtype == np.object_:
                        in0 = np.full(tensor_shape, value, dtype=np.int32)
                        in0n = np.array(
                            [str(x).encode("utf-8") for x in in0.reshape(in0.size)],
                            dtype=object,
                        )
                        in0 = in0n.reshape(tensor_shape)
                        output_byte_size += 64 * in0.size
                    else:
                        in0 = np.full(tensor_shape, value, dtype=dtype)
                        output_byte_size += np.dtype(dtype).itemsize * in0.size
                    input_list.append(in0)

                if dtype == np.object_:
                    input_list_tmp = iu.serialize_byte_tensor_list(input_list)
                    input_byte_size = sum(
                        [serialized_byte_size(i0) for i0 in input_list_tmp]
                    )
                else:
                    input_list_tmp = input_list
                    input_byte_size = sum([i0.nbytes for i0 in input_list_tmp])

                # create shared memory regions and copy data for input values
                ip_name = "ip{}{}".format(i, j)
                op_name = "op{}{}_data".format(i, j)
                if _test_system_shared_memory:
                    shm_ip_handle = shm.create_shared_memory_region(
                        ip_name, "/" + ip_name, input_byte_size
                    )
                    shm_op_handle = shm.create_shared_memory_region(
                        op_name, "/" + op_name, output_byte_size
                    )
                    shm.set_shared_memory_region(shm_ip_handle, input_list_tmp)
                    self.triton_client_.register_system_shared_memory(
                        ip_name, "/" + ip_name, input_byte_size
                    )
                    self.triton_client_.register_system_shared_memory(
                        op_name, "/" + op_name, output_byte_size
                    )
                elif _test_cuda_shared_memory:
                    shm_ip_handle = cudashm.create_shared_memory_region(
                        ip_name, input_byte_size, 0
                    )
                    shm_op_handle = cudashm.create_shared_memory_region(
                        op_name, output_byte_size, 0
                    )
                    cudashm.set_shared_memory_region(shm_ip_handle, input_list_tmp)
                    self.triton_client_.register_cuda_shared_memory(
                        ip_name,
                        cudashm.get_raw_handle(shm_ip_handle),
                        0,
                        input_byte_size,
                    )
                    self.triton_client_.register_cuda_shared_memory(
                        op_name,
                        cudashm.get_raw_handle(shm_op_handle),
                        0,
                        output_byte_size,
                    )
                shm_region_handles.append((ip_name, input_byte_size, shm_ip_handle))
                shm_region_handles.append((op_name, output_byte_size, shm_op_handle))
            return shm_region_handles
        else:
            return []

    # Returns (name, byte size, shm_handle)
    def precreate_register_shape_tensor_regions(
        self,
        value_list,
        dtype,
        i,
        batch_size=1,
        tensor_shape=(1,),
        shape_tensor_input_dtype=np.int32,
    ):
        self.assertFalse(
            _test_cuda_shared_memory,
            "Shape tensors does not support CUDA shared memory",
        )
        if _test_system_shared_memory:
            shm_region_handles = []
            for j, (shape_value, value) in enumerate(value_list):
                input_list = list()
                shape_input_list = list()

                for b in range(batch_size):
                    if dtype == np.object_:
                        in0 = np.full(tensor_shape, value, dtype=np.int32)
                        in0n = np.array(
                            [str(x) for x in in0.reshape(in0.size)], dtype=object
                        )
                        in0 = in0n.reshape(tensor_shape)
                    else:
                        in0 = np.full(tensor_shape, value, dtype=dtype)
                    input_list.append(in0)

                # Only one shape tensor input per batch
                shape_input_list.append(
                    np.full(tensor_shape, shape_value, dtype=shape_tensor_input_dtype)
                )

                if dtype == np.object_:
                    input_list_tmp = iu.serialize_byte_tensor_list(input_list)
                    input_byte_size = sum(
                        [serialized_byte_size(i0) for i0 in input_list_tmp]
                    )
                else:
                    input_list_tmp = input_list
                    input_byte_size = sum([i0.nbytes for i0 in input_list_tmp])

                shape_input_byte_size = sum([i0.nbytes for i0 in shape_input_list])
                shape_output_byte_size = shape_input_byte_size
                if shape_tensor_input_dtype == np.int32:
                    # Currently in our test cases we are
                    # using int64 outputs for shape tensors
                    # hence there is a multiple of 2 to compute the byte size
                    # properly.
                    shape_output_byte_size = shape_output_byte_size * 2
                output_byte_size = np.dtype(dtype).itemsize + 2
                resized_output_byte_size = 32 * shape_value

                # create shared memory regions and copy data for input values
                ip_name = "ip{}{}".format(i, j)
                shape_ip_name = "shape_ip{}{}".format(i, j)
                shape_op_name = "shape_op{}{}".format(i, j)
                op_name = "op{}{}".format(i, j)
                resized_op_name = "resized_op{}{}".format(i, j)

                shm_ip_handle = shm.create_shared_memory_region(
                    ip_name, "/" + ip_name, input_byte_size
                )
                shm_shape_ip_handle = shm.create_shared_memory_region(
                    shape_ip_name, "/" + shape_ip_name, shape_input_byte_size
                )
                shm_shape_op_handle = shm.create_shared_memory_region(
                    shape_op_name, "/" + shape_op_name, shape_output_byte_size
                )
                shm_op_handle = shm.create_shared_memory_region(
                    op_name, "/" + op_name, output_byte_size
                )
                shm_resized_op_handle = shm.create_shared_memory_region(
                    resized_op_name, "/" + resized_op_name, resized_output_byte_size
                )
                shm.set_shared_memory_region(shm_ip_handle, input_list_tmp)
                shm.set_shared_memory_region(shm_shape_ip_handle, shape_input_list)
                self.triton_client_.register_system_shared_memory(
                    ip_name, "/" + ip_name, input_byte_size
                )
                self.triton_client_.register_system_shared_memory(
                    shape_ip_name, "/" + shape_ip_name, shape_input_byte_size
                )
                self.triton_client_.register_system_shared_memory(
                    shape_op_name, "/" + shape_op_name, shape_output_byte_size
                )
                self.triton_client_.register_system_shared_memory(
                    op_name, "/" + op_name, output_byte_size
                )
                self.triton_client_.register_system_shared_memory(
                    resized_op_name, "/" + resized_op_name, resized_output_byte_size
                )

                shm_region_handles.append((ip_name, input_byte_size, shm_ip_handle))
                shm_region_handles.append(
                    (shape_ip_name, shape_input_byte_size, shm_shape_ip_handle)
                )
                shm_region_handles.append(
                    (shape_op_name, shape_output_byte_size, shm_shape_op_handle)
                )
                shm_region_handles.append((op_name, output_byte_size, shm_op_handle))
                shm_region_handles.append(
                    (resized_op_name, resized_output_byte_size, shm_resized_op_handle)
                )
            return shm_region_handles
        else:
            return []

    # Returns (name, byte size, shm_handle)
    def precreate_register_dynaseq_shape_tensor_regions(
        self,
        value_list,
        dtype,
        i,
        batch_size=1,
        tensor_shape=(1,),
        shape_tensor_input_dtype=np.int32,
    ):
        self.assertFalse(
            _test_cuda_shared_memory,
            "Shape tensors does not support CUDA shared memory",
        )
        if _test_system_shared_memory:
            shm_region_handles = []
            for j, (shape_value, value) in enumerate(value_list):
                input_list = list()
                shape_input_list = list()
                dummy_input_list = list()

                for b in range(batch_size):
                    if dtype == np.object_:
                        dummy_in0 = np.full(tensor_shape, value, dtype=np.int32)
                        dummy_in0n = np.array(
                            [str(x) for x in dummy_in0.reshape(in0.size)], dtype=object
                        )
                        dummy_in0 = dummy_in0n.reshape(tensor_shape)
                    else:
                        dummy_in0 = np.full(tensor_shape, value, dtype=dtype)
                    dummy_input_list.append(dummy_in0)
                    in0 = np.full(tensor_shape, value, dtype=np.int32)
                    input_list.append(in0)

                # Only one shape tensor input per batch
                shape_input_list.append(
                    np.full(tensor_shape, shape_value, dtype=shape_tensor_input_dtype)
                )

                if dtype == np.object_:
                    input_list_tmp = iu.serialize_byte_tensor_list(input_list)
                    input_byte_size = sum(
                        [serialized_byte_size(i0) for i0 in input_list_tmp]
                    )
                else:
                    input_list_tmp = input_list
                    input_byte_size = sum([i0.nbytes for i0 in input_list_tmp])

                dummy_input_byte_size = sum([i0.nbytes for i0 in dummy_input_list])

                shape_input_byte_size = sum([i0.nbytes for i0 in shape_input_list])
                shape_output_byte_size = shape_input_byte_size
                if shape_tensor_input_dtype == np.int32:
                    # Currently in our test cases we are
                    # using int64 outputs for shape tensors
                    # hence there is a multiple of 2 to compute the byte size
                    # properly.
                    shape_output_byte_size = shape_output_byte_size * 2
                output_byte_size = np.dtype(np.int32).itemsize + 2
                resized_output_byte_size = 32 * shape_value

                # create shared memory regions and copy data for input values
                ip_name = "ip{}{}".format(i, j)
                shape_ip_name = "shape_ip{}{}".format(i, j)
                dummy_ip_name = "dummy_ip{}{}".format(i, j)
                shape_op_name = "shape_op{}{}".format(i, j)
                op_name = "op{}{}".format(i, j)
                resized_op_name = "resized_op{}{}".format(i, j)

                shm_ip_handle = shm.create_shared_memory_region(
                    ip_name, "/" + ip_name, input_byte_size
                )
                shm_shape_ip_handle = shm.create_shared_memory_region(
                    shape_ip_name, "/" + shape_ip_name, shape_input_byte_size
                )
                shm_dummy_ip_handle = shm.create_shared_memory_region(
                    dummy_ip_name, "/" + dummy_ip_name, dummy_input_byte_size
                )
                shm_shape_op_handle = shm.create_shared_memory_region(
                    shape_op_name, "/" + shape_op_name, shape_output_byte_size
                )
                shm_op_handle = shm.create_shared_memory_region(
                    op_name, "/" + op_name, output_byte_size
                )
                shm_resized_op_handle = shm.create_shared_memory_region(
                    resized_op_name, "/" + resized_op_name, resized_output_byte_size
                )
                shm.set_shared_memory_region(shm_ip_handle, input_list_tmp)
                shm.set_shared_memory_region(shm_shape_ip_handle, shape_input_list)
                shm.set_shared_memory_region(shm_dummy_ip_handle, dummy_input_list)
                self.triton_client_.register_system_shared_memory(
                    ip_name, "/" + ip_name, input_byte_size
                )
                self.triton_client_.register_system_shared_memory(
                    shape_ip_name, "/" + shape_ip_name, shape_input_byte_size
                )
                self.triton_client_.register_system_shared_memory(
                    dummy_ip_name, "/" + dummy_ip_name, dummy_input_byte_size
                )
                self.triton_client_.register_system_shared_memory(
                    shape_op_name, "/" + shape_op_name, shape_output_byte_size
                )
                self.triton_client_.register_system_shared_memory(
                    op_name, "/" + op_name, output_byte_size
                )
                self.triton_client_.register_system_shared_memory(
                    resized_op_name, "/" + resized_op_name, resized_output_byte_size
                )

                shm_region_handles.append((ip_name, input_byte_size, shm_ip_handle))
                shm_region_handles.append(
                    (shape_ip_name, shape_input_byte_size, shm_shape_ip_handle)
                )
                shm_region_handles.append(
                    (dummy_ip_name, dummy_input_byte_size, shm_dummy_ip_handle)
                )
                shm_region_handles.append(
                    (shape_op_name, shape_output_byte_size, shm_shape_op_handle)
                )
                shm_region_handles.append((op_name, output_byte_size, shm_op_handle))
                shm_region_handles.append(
                    (resized_op_name, resized_output_byte_size, shm_resized_op_handle)
                )
            return shm_region_handles
        else:
            return []

    def cleanup_shm_regions(self, shm_handles):
        # Make sure unregister is before shared memory destruction
        if _test_system_shared_memory:
            self.triton_client_.unregister_system_shared_memory()
        if _test_cuda_shared_memory:
            self.triton_client_.unregister_cuda_shared_memory()
        for shm_tmp_handle in shm_handles:
            if _test_system_shared_memory:
                shm.destroy_shared_memory_region(shm_tmp_handle[2])
            elif _test_cuda_shared_memory:
                cudashm.destroy_shared_memory_region(shm_tmp_handle[2])

    def check_sequence(
        self,
        trial,
        model_name,
        input_dtype,
        correlation_id,
        sequence_thresholds,
        values,
        expected_result,
        protocol,
        batch_size=1,
        sequence_name="<unknown>",
        tensor_shape=(1,),
    ):
        """Perform sequence of inferences. The 'values' holds a list of
        tuples, one for each inference with format:

        (flag_str, value, (ls_ms, gt_ms), (pre_delay_ms, post_delay_ms)

        """
        if (
            ("custom" not in trial)
            and ("onnx" not in trial)
            and ("libtorch" not in trial)
            and ("plan" not in trial)
            and ("python" not in trial)
        ):
            self.assertFalse(True, "unknown trial type: " + trial)

        # Can only send the request exactly once since it is a
        # sequence model with state, so can have only a single config.
        configs = []
        if protocol == "http":
            configs.append((f"{_tritonserver_ipaddr}:8000", "http", False))
        if protocol == "grpc":
            configs.append((f"{_tritonserver_ipaddr}:8001", "grpc", False))
        if protocol == "streaming":
            configs.append((f"{_tritonserver_ipaddr}:8001", "grpc", True))

        self.assertFalse(
            _test_system_shared_memory and _test_cuda_shared_memory,
            "Cannot set both System and CUDA shared memory flags to 1",
        )

        self.assertEqual(len(configs), 1)

        full_shape = (
            tensor_shape if "nobatch" in trial else (batch_size,) + tensor_shape
        )

        # create and register shared memory output region in advance,
        # knowing that this function will not be called concurrently.
        if _test_system_shared_memory or _test_cuda_shared_memory:
            self.triton_client_.unregister_system_shared_memory()
            self.triton_client_.unregister_cuda_shared_memory()
            output_byte_size = 512
            if _test_system_shared_memory:
                shm_op_handle = shm.create_shared_memory_region(
                    "output_data", "/output", output_byte_size
                )
                self.triton_client_.register_system_shared_memory(
                    "output_data", "/output", output_byte_size
                )
            elif _test_cuda_shared_memory:
                shm_op_handle = cudashm.create_shared_memory_region(
                    "output_data", output_byte_size, 0
                )
                self.triton_client_.register_cuda_shared_memory(
                    "output_data",
                    cudashm.get_raw_handle(shm_op_handle),
                    0,
                    output_byte_size,
                )
            shm_ip_handles = []

        for config in configs:
            client_utils = grpcclient if config[1] == "grpc" else httpclient

            triton_client = client_utils.InferenceServerClient(config[0], verbose=True)
            if config[2]:
                user_data = UserData()
                triton_client.start_stream(partial(completion_callback, user_data))
            # Execute the sequence of inference...
            try:
                seq_start_ms = int(round(time.time() * 1000))

                INPUT = "INPUT__0" if trial.startswith("libtorch") else "INPUT"
                OUTPUT = "OUTPUT__0" if trial.startswith("libtorch") else "OUTPUT"
                for flag_str, value, thresholds, delay_ms in values:
                    if _test_valgrind or _test_jetson:
                        if delay_ms is not None:
                            delay_ms[0] = max(_valgrind_delay_ms, delay_ms[0])
                            delay_ms[1] = max(_valgrind_delay_ms, delay_ms[1])
                        else:
                            delay_ms = (_valgrind_delay_ms, _valgrind_delay_ms)

                    if delay_ms is not None:
                        time.sleep(delay_ms[0] / 1000.0)

                    seq_start = False
                    seq_end = False
                    if flag_str is not None:
                        seq_start = "start" in flag_str
                        seq_end = "end" in flag_str

                    # Construct request IOs
                    inputs = []
                    outputs = []
                    inputs.append(
                        client_utils.InferInput(
                            INPUT, full_shape, np_to_triton_dtype(input_dtype)
                        )
                    )
                    outputs.append(client_utils.InferRequestedOutput(OUTPUT))
                    if input_dtype == np.object_:
                        in0 = np.full(full_shape, value, dtype=np.int32)
                        in0n = np.array(
                            [str(x) for x in in0.reshape(in0.size)], dtype=object
                        )
                        in0 = in0n.reshape(full_shape)
                    else:
                        in0 = np.full(full_shape, value, dtype=input_dtype)

                    # create input shared memory and copy input data values into it
                    if _test_system_shared_memory or _test_cuda_shared_memory:
                        if input_dtype == np.object_:
                            input_list_tmp = iu.serialize_byte_tensor_list([in0])
                            input_byte_size = sum(
                                [serialized_byte_size(i0) for i0 in input_list_tmp]
                            )
                        else:
                            input_list_tmp = [in0]
                            input_byte_size = sum([i0.nbytes for i0 in input_list_tmp])
                        ip_name = "ip{}".format(len(shm_ip_handles))
                        if _test_system_shared_memory:
                            shm_ip_handles.append(
                                shm.create_shared_memory_region(
                                    ip_name, "/" + ip_name, input_byte_size
                                )
                            )
                            shm.set_shared_memory_region(
                                shm_ip_handles[-1], input_list_tmp
                            )
                            triton_client.register_system_shared_memory(
                                ip_name, "/" + ip_name, input_byte_size
                            )
                        elif _test_cuda_shared_memory:
                            shm_ip_handles.append(
                                cudashm.create_shared_memory_region(
                                    ip_name, input_byte_size, 0
                                )
                            )
                            cudashm.set_shared_memory_region(
                                shm_ip_handles[-1], input_list_tmp
                            )
                            triton_client.register_cuda_shared_memory(
                                ip_name,
                                cudashm.get_raw_handle(shm_ip_handles[-1]),
                                0,
                                input_byte_size,
                            )

                        inputs[0].set_shared_memory(ip_name, input_byte_size)
                        outputs[0].set_shared_memory("output_data", output_byte_size)
                    else:
                        inputs[0].set_data_from_numpy(in0)

                    start_ms = int(round(time.time() * 1000))

                    if config[2]:
                        triton_client.async_stream_infer(
                            model_name,
                            inputs,
                            outputs=outputs,
                            sequence_id=correlation_id,
                            sequence_start=seq_start,
                            sequence_end=seq_end,
                        )
                        (results, error) = user_data._completed_requests.get()
                        if error is not None:
                            raise error
                    else:
                        results = triton_client.infer(
                            model_name,
                            inputs,
                            outputs=outputs,
                            sequence_id=correlation_id,
                            sequence_start=seq_start,
                            sequence_end=seq_end,
                        )

                    end_ms = int(round(time.time() * 1000))

                    # Get value of "OUTPUT", for shared memory, need to get it via
                    # shared memory utils
                    if (not _test_system_shared_memory) and (
                        not _test_cuda_shared_memory
                    ):
                        out = results.as_numpy(OUTPUT)
                    else:
                        output = results.get_output(OUTPUT)
                        if config[1] == "http":
                            output_shape = output["shape"]
                        else:
                            output_shape = output.shape
                        output_type = input_dtype
                        if _test_system_shared_memory:
                            out = shm.get_contents_as_numpy(
                                shm_op_handle, output_type, output_shape
                            )
                        else:
                            out = cudashm.get_contents_as_numpy(
                                shm_op_handle, output_type, output_shape
                            )
                    result = out[0] if "nobatch" in trial else out[0][0]
                    print("{}: {}".format(sequence_name, result))

                    if thresholds is not None:
                        lt_ms = thresholds[0]
                        gt_ms = thresholds[1]
                        if lt_ms is not None:
                            self.assertTrue(
                                (end_ms - start_ms) < lt_ms,
                                "expected less than "
                                + str(lt_ms)
                                + "ms response time, got "
                                + str(end_ms - start_ms)
                                + " ms",
                            )
                        if gt_ms is not None:
                            self.assertTrue(
                                (end_ms - start_ms) > gt_ms,
                                "expected greater than "
                                + str(gt_ms)
                                + "ms response time, got "
                                + str(end_ms - start_ms)
                                + " ms",
                            )
                    if delay_ms is not None:
                        time.sleep(delay_ms[1] / 1000.0)

                seq_end_ms = int(round(time.time() * 1000))

                if input_dtype == np.object_:
                    self.assertEqual(int(result), expected_result)
                else:
                    self.assertEqual(result, expected_result)

                if sequence_thresholds is not None:
                    lt_ms = sequence_thresholds[0]
                    gt_ms = sequence_thresholds[1]
                    if lt_ms is not None:
                        if _test_jetson:
                            lt_ms *= _jetson_slowdown_factor
                        self.assertTrue(
                            (seq_end_ms - seq_start_ms) < lt_ms,
                            "sequence expected less than "
                            + str(lt_ms)
                            + "ms response time, got "
                            + str(seq_end_ms - seq_start_ms)
                            + " ms",
                        )
                    if gt_ms is not None:
                        self.assertTrue(
                            (seq_end_ms - seq_start_ms) > gt_ms,
                            "sequence expected greater than "
                            + str(gt_ms)
                            + "ms response time, got "
                            + str(seq_end_ms - seq_start_ms)
                            + " ms",
                        )
            except Exception as ex:
                self.add_deferred_exception(ex)
            if config[2]:
                triton_client.stop_stream()

        if _test_system_shared_memory or _test_cuda_shared_memory:
            self.triton_client_.unregister_system_shared_memory()
            self.triton_client_.unregister_cuda_shared_memory()
            destroy_func = (
                shm.destroy_shared_memory_region
                if _test_system_shared_memory
                else cudashm.destroy_shared_memory_region
            )
            destroy_func(shm_op_handle)
            for shm_ip_handle in shm_ip_handles:
                destroy_func(shm_ip_handle)

    def check_sequence_async(
        self,
        trial,
        model_name,
        input_dtype,
        correlation_id,
        sequence_thresholds,
        values,
        expected_result,
        shm_region_handles,
        batch_size=1,
        sequence_name="<unknown>",
        tensor_shape=(1,),
    ):
        """Perform sequence of inferences using stream async run.
        The 'values' holds a list of tuples, one for each inference with format:

        (flag_str, value, pre_delay_ms)

        """
        if (
            ("custom" not in trial)
            and ("onnx" not in trial)
            and ("libtorch" not in trial)
            and ("plan" not in trial)
            and ("python" not in trial)
        ):
            self.assertFalse(True, "unknown trial type: " + trial)

        self.assertFalse(
            _test_system_shared_memory and _test_cuda_shared_memory,
            "Cannot set both System and CUDA shared memory flags to 1",
        )

        full_shape = (
            tensor_shape if "nobatch" in trial else (batch_size,) + tensor_shape
        )

        client_utils = grpcclient
        triton_client = client_utils.InferenceServerClient(
            f"{_tritonserver_ipaddr}:8001", verbose=True
        )
        user_data = UserData()
        triton_client.start_stream(partial(completion_callback, user_data))
        # Execute the sequence of inference...
        try:
            seq_start_ms = int(round(time.time() * 1000))

            INPUT = "INPUT__0" if trial.startswith("libtorch") else "INPUT"
            OUTPUT = "OUTPUT__0" if trial.startswith("libtorch") else "OUTPUT"
            sent_count = 0
            for flag_str, value, pre_delay_ms in values:
                seq_start = False
                seq_end = False
                if flag_str is not None:
                    seq_start = "start" in flag_str
                    seq_end = "end" in flag_str

                # Construct request IOs
                inputs = []
                outputs = []
                inputs.append(
                    client_utils.InferInput(
                        INPUT, full_shape, np_to_triton_dtype(input_dtype)
                    )
                )
                outputs.append(client_utils.InferRequestedOutput(OUTPUT))

                if not (_test_system_shared_memory or _test_cuda_shared_memory):
                    if input_dtype == np.object_:
                        in0 = np.full(full_shape, value, dtype=np.int32)
                        in0n = np.array(
                            [str(x) for x in in0.reshape(in0.size)], dtype=object
                        )
                        in0 = in0n.reshape(full_shape)
                    else:
                        in0 = np.full(full_shape, value, dtype=input_dtype)
                    inputs[0].set_data_from_numpy(in0)
                else:
                    offset = 2 * sent_count
                    inputs[0].set_shared_memory(
                        shm_region_handles[offset][0], shm_region_handles[offset][1]
                    )
                    outputs[0].set_shared_memory(
                        shm_region_handles[offset + 1][0],
                        shm_region_handles[offset + 1][1],
                    )

                if pre_delay_ms is not None:
                    time.sleep(pre_delay_ms / 1000.0)

                triton_client.async_stream_infer(
                    model_name,
                    inputs,
                    outputs=outputs,
                    sequence_id=correlation_id,
                    sequence_start=seq_start,
                    sequence_end=seq_end,
                )
                sent_count += 1

            # Wait for the results in the order sent
            result = None
            processed_count = 0
            while processed_count < sent_count:
                (results, error) = user_data._completed_requests.get()
                if error is not None:
                    raise error
                # Get value of "OUTPUT", for shared memory, need to get it via
                # shared memory utils
                if (not _test_system_shared_memory) and (not _test_cuda_shared_memory):
                    out = results.as_numpy(OUTPUT)
                else:
                    output = results.get_output(OUTPUT)
                    offset = 2 * processed_count + 1
                    output_shape = output.shape
                    output_type = input_dtype
                    if _test_system_shared_memory:
                        out = shm.get_contents_as_numpy(
                            shm_region_handles[offset][2], output_type, output_shape
                        )
                    else:
                        out = cudashm.get_contents_as_numpy(
                            shm_region_handles[offset][2], output_type, output_shape
                        )
                result = out[0] if "nobatch" in trial else out[0][0]
                print("{}: {}".format(sequence_name, result))
                processed_count += 1

            seq_end_ms = int(round(time.time() * 1000))

            if input_dtype == np.object_:
                self.assertEqual(int(result), expected_result)
            else:
                self.assertEqual(result, expected_result)

            if sequence_thresholds is not None:
                lt_ms = sequence_thresholds[0]
                gt_ms = sequence_thresholds[1]
                if lt_ms is not None:
                    if _test_jetson:
                        lt_ms *= _jetson_slowdown_factor
                    self.assertTrue(
                        (seq_end_ms - seq_start_ms) < lt_ms,
                        "sequence expected less than "
                        + str(lt_ms)
                        + "ms response time, got "
                        + str(seq_end_ms - seq_start_ms)
                        + " ms",
                    )
                if gt_ms is not None:
                    self.assertTrue(
                        (seq_end_ms - seq_start_ms) > gt_ms,
                        "sequence expected greater than "
                        + str(gt_ms)
                        + "ms response time, got "
                        + str(seq_end_ms - seq_start_ms)
                        + " ms",
                    )
        except Exception as ex:
            self.add_deferred_exception(ex)
        triton_client.stop_stream()

    # This sequence util only sends inference via streaming scenario
    def check_sequence_shape_tensor_io(
        self,
        model_name,
        input_dtype,
        correlation_id,
        sequence_thresholds,
        values,
        expected_result,
        shm_region_handles,
        using_dynamic_batcher=False,
        sequence_name="<unknown>",
        shape_tensor_input_dtype=np.int32,
    ):
        """Perform sequence of inferences using async run. The 'values' holds
        a list of tuples, one for each inference with format:

        (flag_str, shape_value, value, pre_delay_ms)

        """
        tensor_shape = (1, 1)
        # shape tensor is 1-D tensor that doesn't contain batch size as first value
        shape_tensor_shape = (1,)
        self.assertFalse(
            _test_cuda_shared_memory,
            "Shape tensors does not support CUDA shared memory",
        )

        client_utils = grpcclient
        triton_client = client_utils.InferenceServerClient(
            f"{_tritonserver_ipaddr}:8001", verbose=True
        )
        user_data = UserData()
        triton_client.start_stream(partial(completion_callback, user_data))
        # Execute the sequence of inference...
        try:
            seq_start_ms = int(round(time.time() * 1000))

            sent_count = 0
            shape_values = list()
            for flag_str, shape_value, value, pre_delay_ms in values:
                seq_start = False
                seq_end = False
                if flag_str is not None:
                    seq_start = "start" in flag_str
                    seq_end = "end" in flag_str

                # Construct request IOs
                inputs = []
                outputs = []
                # input order: input, shape(, dummy)
                inputs.append(
                    client_utils.InferInput(
                        "INPUT",
                        tensor_shape,
                        np_to_triton_dtype(
                            np.int32 if using_dynamic_batcher else input_dtype
                        ),
                    )
                )
                inputs.append(
                    client_utils.InferInput(
                        "SHAPE_INPUT",
                        shape_tensor_shape,
                        np_to_triton_dtype(shape_tensor_input_dtype),
                    )
                )
                if using_dynamic_batcher:
                    inputs.append(
                        client_utils.InferInput(
                            "DUMMY_INPUT", tensor_shape, np_to_triton_dtype(input_dtype)
                        )
                    )
                # output order: shape, output, resized
                outputs.append(client_utils.InferRequestedOutput("SHAPE_OUTPUT"))
                outputs.append(client_utils.InferRequestedOutput("OUTPUT"))
                outputs.append(client_utils.InferRequestedOutput("RESIZED_OUTPUT"))

                # Set IO values
                shape_values.append(
                    np.full(
                        shape_tensor_shape, shape_value, dtype=shape_tensor_input_dtype
                    )
                )
                if not _test_system_shared_memory:
                    if using_dynamic_batcher:
                        if input_dtype == np.object_:
                            dummy_in0 = np.full(tensor_shape, value, dtype=np.int32)
                            dummy_in0n = np.array(
                                [str(x) for x in in0.reshape(dummy_in0.size)],
                                dtype=object,
                            )
                            dummy_in0 = dummy_in0n.reshape(tensor_shape)
                        else:
                            dummy_in0 = np.full(tensor_shape, value, dtype=input_dtype)
                        in0 = np.full(tensor_shape, value, dtype=np.int32)
                    else:
                        if input_dtype == np.object_:
                            in0 = np.full(tensor_shape, value, dtype=np.int32)
                            in0n = np.array(
                                [str(x) for x in in0.reshape(in0.size)], dtype=object
                            )
                            in0 = in0n.reshape(tensor_shape)
                        else:
                            in0 = np.full(tensor_shape, value, dtype=input_dtype)

                    inputs[0].set_data_from_numpy(in0)
                    inputs[1].set_data_from_numpy(shape_values[-1])
                    if using_dynamic_batcher:
                        inputs[2].set_data_from_numpy(dummy_in0)
                else:
                    if using_dynamic_batcher:
                        input_offset = 6 * sent_count
                        output_offset = 6 * sent_count + 3
                    else:
                        input_offset = 5 * sent_count
                        output_offset = 5 * sent_count + 2
                    for i in range(len(inputs)):
                        inputs[i].set_shared_memory(
                            shm_region_handles[input_offset + i][0],
                            shm_region_handles[input_offset + i][1],
                        )
                    for i in range(len(outputs)):
                        outputs[i].set_shared_memory(
                            shm_region_handles[output_offset + i][0],
                            shm_region_handles[output_offset + i][1],
                        )

                if pre_delay_ms is not None:
                    time.sleep(pre_delay_ms / 1000.0)

                triton_client.async_stream_infer(
                    model_name,
                    inputs,
                    outputs=outputs,
                    sequence_id=correlation_id,
                    sequence_start=seq_start,
                    sequence_end=seq_end,
                )

                sent_count += 1

            # Wait for the results in the order sent
            result = None
            processed_count = 0
            while processed_count < sent_count:
                (results, error) = user_data._completed_requests.get()
                if error is not None:
                    raise error
                # Get value of "OUTPUT", for shared memory, need to get it via
                # shared memory utils
                if not _test_system_shared_memory:
                    out = results.as_numpy("OUTPUT")
                else:
                    output = results.get_output("OUTPUT")
                    output_offset = (
                        6 * processed_count + 4
                        if using_dynamic_batcher
                        else 5 * processed_count + 3
                    )
                    output_shape = output.shape
                    output_type = np.int32 if using_dynamic_batcher else np.float32
                    out = shm.get_contents_as_numpy(
                        shm_region_handles[output_offset][2], output_type, output_shape
                    )
                result = out[0][0]

                # Validate the (debatched) shape of the resized output matches
                # with the shape input values
                resized_shape = results.get_output("RESIZED_OUTPUT").shape[1:]
                self.assertTrue(
                    np.array_equal(resized_shape, shape_values[processed_count]),
                    "{}, {}, slot {}, expected: {}, got {}".format(
                        model_name,
                        "RESIZED_OUTPUT",
                        processed_count,
                        shape_values[processed_count],
                        resized_shape,
                    ),
                )
                print("{}: {}".format(sequence_name, result))
                processed_count += 1

            seq_end_ms = int(round(time.time() * 1000))

            if input_dtype == np.object_:
                self.assertEqual(int(result), expected_result)
            else:
                self.assertEqual(result, expected_result)

            if sequence_thresholds is not None:
                lt_ms = sequence_thresholds[0]
                gt_ms = sequence_thresholds[1]
                if lt_ms is not None:
                    if _test_jetson:
                        lt_ms *= _jetson_slowdown_factor
                    self.assertTrue(
                        (seq_end_ms - seq_start_ms) < lt_ms,
                        "sequence expected less than "
                        + str(lt_ms)
                        + "ms response time, got "
                        + str(seq_end_ms - seq_start_ms)
                        + " ms",
                    )
                if gt_ms is not None:
                    self.assertTrue(
                        (seq_end_ms - seq_start_ms) > gt_ms,
                        "sequence expected greater than "
                        + str(gt_ms)
                        + "ms response time, got "
                        + str(seq_end_ms - seq_start_ms)
                        + " ms",
                    )
        except Exception as ex:
            self.add_deferred_exception(ex)
        triton_client.stop_stream()

    def check_setup(self, model_name):
        # Make sure test.sh set up the correct batcher settings
        config = self.triton_client_.get_model_config(model_name).config
        # Skip the sequence batching check on ensemble model
        if config.platform != "ensemble":
            bconfig = config.sequence_batching
            self.assertEqual(
                bconfig.max_sequence_idle_microseconds, _max_sequence_idle_ms * 1000
            )  # 5 secs

    def check_status(self, model_name, batch_exec, exec_cnt, infer_cnt):
        # There is a time window between when responses are returned and statistics are updated.
        # To prevent intermittent test failure during that window, wait up to 10 seconds for the
        # inference statistics to be ready.
        num_tries = 10
        for i in range(num_tries):
            stats = self.triton_client_.get_inference_statistics(model_name, "1")
            self.assertEqual(len(stats.model_stats), 1, "expect 1 model stats")
            actual_exec_cnt = stats.model_stats[0].execution_count
            if actual_exec_cnt == exec_cnt:
                break
            print(
                "WARNING: expect {} executions, got {} (attempt {})".format(
                    exec_cnt, actual_exec_cnt, i
                )
            )
            time.sleep(1)

        self.assertEqual(
            stats.model_stats[0].name,
            model_name,
            "expect model stats for model {}".format(model_name),
        )
        self.assertEqual(
            stats.model_stats[0].version,
            "1",
            "expect model stats for model {} version 1".format(model_name),
        )

        if batch_exec is not None:
            batch_stats = stats.model_stats[0].batch_stats
            print(batch_stats)
            self.assertEqual(
                len(batch_stats),
                len(batch_exec),
                "expected {} different batch-sizes, got {}".format(
                    len(batch_exec), len(batch_stats)
                ),
            )

            for batch_stat in batch_stats:
                bs = batch_stat.batch_size
                bc = batch_stat.compute_infer.count
                self.assertTrue(
                    bs in batch_exec, "did not find expected batch-size {}".format(bs)
                )
                # Get count from one of the stats
                self.assertEqual(
                    bc,
                    batch_exec[bs],
                    "expected model-execution-count {} for batch size {}, got {}".format(
                        batch_exec[bs], bs, bc
                    ),
                )

        actual_exec_cnt = stats.model_stats[0].execution_count
        self.assertEqual(
            actual_exec_cnt,
            exec_cnt,
            "expected model-exec-count {}, got {}".format(exec_cnt, actual_exec_cnt),
        )

        actual_infer_cnt = stats.model_stats[0].inference_count
        self.assertEqual(
            actual_infer_cnt,
            infer_cnt,
            "expected model-inference-count {}, got {}".format(
                infer_cnt, actual_infer_cnt
            ),
        )


================================================
FILE: qa/common/shm_util.py
================================================
#!/usr/bin/env python3

# Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import threading
import time
from ctypes import *
from os import listdir

import numpy as np
import tritonclient.http as httpclient
from tritonclient.utils import *

CREATION_LOCK = threading.Lock()

# By default, find tritonserver on "localhost", but can be overridden
# with TRITONSERVER_IPADDR envvar
_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")
_test_jetson = bool(int(os.environ.get("TEST_JETSON", 0)))
_test_windows = bool(int(os.environ.get("TEST_WINDOWS", 0)))
_skip_shm_leak_probe = _test_jetson or _test_windows


def _range_repr_dtype(dtype):
    if dtype == np.float64:
        return np.int32
    elif dtype == np.float32:
        return np.int16
    elif dtype == np.float16:
        return np.int8
    elif dtype == np.object_:  # TYPE_STRING
        return np.int32
    return dtype


def create_set_shm_regions(
    input0_list,
    input1_list,
    output0_byte_size,
    output1_byte_size,
    outputs,
    shm_region_names,
    precreated_shm_regions,
    use_system_shared_memory,
    use_cuda_shared_memory,
):
    # Lazy shm imports...
    if use_system_shared_memory:
        import tritonclient.utils.shared_memory as shm
    if use_cuda_shared_memory:
        import tritonclient.utils.cuda_shared_memory as cudashm

    if use_system_shared_memory and use_cuda_shared_memory:
        raise ValueError("Cannot set both System and CUDA shared memory flags to 1")

    if not (use_system_shared_memory or use_cuda_shared_memory):
        return [], []

    if input0_list[0].dtype == np.object_:
        input0_byte_size = sum([serialized_byte_size(i0) for i0 in input0_list])
    else:
        input0_byte_size = sum([i0.nbytes for i0 in input0_list])

    if input1_list[0].dtype == np.object_:
        input1_byte_size = sum([serialized_byte_size(i1) for i1 in input1_list])
    else:
        input1_byte_size = sum([i1.nbytes for i1 in input1_list])

    if shm_region_names is None:
        shm_region_names = ["input0", "input1", "output0", "output1"]

    shm_op0_handle = None
    shm_op1_handle = None

    with CREATION_LOCK:
        if use_system_shared_memory:
            shm_ip0_handle = shm.create_shared_memory_region(
                shm_region_names[0] + "_data",
                "/" + shm_region_names[0],
                input0_byte_size,
            )
            shm_ip1_handle = shm.create_shared_memory_region(
                shm_region_names[1] + "_data",
                "/" + shm_region_names[1],
                input1_byte_size,
            )

            i = 0
            if "OUTPUT0" in outputs:
                if precreated_shm_regions is None:
                    shm_op0_handle = shm.create_shared_memory_region(
                        shm_region_names[2] + "_data",
                        "/" + shm_region_names[2],
                        output0_byte_size,
                    )
                else:
                    shm_op0_handle = precreated_shm_regions[0]
                i += 1
            if "OUTPUT1" in outputs:
                if precreated_shm_regions is None:
                    shm_op1_handle = shm.create_shared_memory_region(
                        shm_region_names[2 + i] + "_data",
                        "/" + shm_region_names[2 + i],
                        output1_byte_size,
                    )
                else:
                    shm_op1_handle = precreated_shm_regions[i]

            shm.set_shared_memory_region(shm_ip0_handle, input0_list)
            shm.set_shared_memory_region(shm_ip1_handle, input1_list)

        if use_cuda_shared_memory:
            shm_ip0_handle = cudashm.create_shared_memory_region(
                shm_region_names[0] + "_data", input0_byte_size, 0
            )
            shm_ip1_handle = cudashm.create_shared_memory_region(
                shm_region_names[1] + "_data", input1_byte_size, 0
            )
            i = 0
            if "OUTPUT0" in outputs:
                if precreated_shm_regions is None:
                    shm_op0_handle = cudashm.create_shared_memory_region(
                        shm_region_names[2] + "_data", output0_byte_size, 0
                    )
                else:
                    shm_op0_handle = precreated_shm_regions[0]
                i += 1
            if "OUTPUT1" in outputs:
                if precreated_shm_regions is None:
                    shm_op1_handle = cudashm.create_shared_memory_region(
                        shm_region_names[2 + i] + "_data", output1_byte_size, 0
                    )
                else:
                    shm_op1_handle = precreated_shm_regions[i]

            cudashm.set_shared_memory_region(shm_ip0_handle, input0_list)
            cudashm.set_shared_memory_region(shm_ip1_handle, input1_list)

    return shm_region_names, [
        shm_ip0_handle,
        shm_ip1_handle,
        shm_op0_handle,
        shm_op1_handle,
    ]


def register_add_shm_regions(
    inputs,
    outputs,
    shm_region_names,
    precreated_shm_regions,
    shm_handles,
    input0_byte_size,
    input1_byte_size,
    output0_byte_size,
    output1_byte_size,
    use_system_shared_memory,
    use_cuda_shared_memory,
    triton_client,
):
    # Lazy shm imports...
    if use_system_shared_memory:
        import tritonclient.utils.shared_memory as shm
    if use_cuda_shared_memory:
        import tritonclient.utils.cuda_shared_memory as cudashm

    if use_system_shared_memory or use_cuda_shared_memory:
        # Unregister then register required shared memory regions
        if use_system_shared_memory:
            triton_client.unregister_system_shared_memory(shm_region_names[0] + "_data")
            triton_client.unregister_system_shared_memory(shm_region_names[1] + "_data")
            triton_client.register_system_shared_memory(
                shm_region_names[0] + "_data",
                "/" + shm_region_names[0],
                input0_byte_size,
            )
            triton_client.register_system_shared_memory(
                shm_region_names[1] + "_data",
                "/" + shm_region_names[1],
                input1_byte_size,
            )
            i = 0
            if "OUTPUT0" in outputs:
                if precreated_shm_regions is None:
                    triton_client.unregister_system_shared_memory(
                        shm_region_names[2] + "_data"
                    )
                    triton_client.register_system_shared_memory(
                        shm_region_names[2] + "_data",
                        "/" + shm_region_names[2],
                        output0_byte_size,
                    )
                i += 1
            if "OUTPUT1" in outputs:
                if precreated_shm_regions is None:
                    triton_client.unregister_system_shared_memory(
                        shm_region_names[2 + i] + "_data"
                    )
                    triton_client.register_system_shared_memory(
                        shm_region_names[2 + i] + "_data",
                        "/" + shm_region_names[2 + i],
                        output1_byte_size,
                    )

        if use_cuda_shared_memory:
            triton_client.unregister_cuda_shared_memory(shm_region_names[0] + "_data")
            triton_client.unregister_cuda_shared_memory(shm_region_names[1] + "_data")
            triton_client.register_cuda_shared_memory(
                shm_region_names[0] + "_data",
                cudashm.get_raw_handle(shm_handles[0]),
                0,
                input0_byte_size,
            )
            triton_client.register_cuda_shared_memory(
                shm_region_names[1] + "_data",
                cudashm.get_raw_handle(shm_handles[1]),
                0,
                input1_byte_size,
            )
            i = 0
            if "OUTPUT0" in outputs:
                if precreated_shm_regions is None:
                    triton_client.unregister_cuda_shared_memory(
                        shm_region_names[2] + "_data"
                    )
                    triton_client.register_cuda_shared_memory(
                        shm_region_names[2] + "_data",
                        cudashm.get_raw_handle(shm_handles[2]),
                        0,
                        output0_byte_size,
                    )
                i += 1
            if "OUTPUT1" in outputs:
                if precreated_shm_regions is None:
                    triton_client.unregister_cuda_shared_memory(
                        shm_region_names[2 + i] + "_data"
                    )
                    triton_client.register_cuda_shared_memory(
                        shm_region_names[2 + i] + "_data",
                        cudashm.get_raw_handle(shm_handles[3]),
                        0,
                        output1_byte_size,
                    )

        # Add shared memory regions to inputs
        inputs[0].set_shared_memory(shm_region_names[0] + "_data", input0_byte_size)
        inputs[1].set_shared_memory(shm_region_names[1] + "_data", input1_byte_size)


def unregister_cleanup_shm_regions(
    shm_regions,
    shm_handles,
    precreated_shm_regions,
    outputs,
    use_system_shared_memory,
    use_cuda_shared_memory,
):
    # Lazy shm imports...
    if use_system_shared_memory:
        import tritonclient.utils.shared_memory as shm
    if use_cuda_shared_memory:
        import tritonclient.utils.cuda_shared_memory as cudashm

    if not (use_system_shared_memory or use_cuda_shared_memory):
        return None

    triton_client = httpclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8000")

    if use_cuda_shared_memory:
        triton_client.unregister_cuda_shared_memory(shm_regions[0] + "_data")
        triton_client.unregister_cuda_shared_memory(shm_regions[1] + "_data")
        cudashm.destroy_shared_memory_region(shm_handles[0])
        cudashm.destroy_shared_memory_region(shm_handles[1])
    else:
        triton_client.unregister_system_shared_memory(shm_regions[0] + "_data")
        triton_client.unregister_system_shared_memory(shm_regions[1] + "_data")
        shm.destroy_shared_memory_region(shm_handles[0])
        shm.destroy_shared_memory_region(shm_handles[1])

    if precreated_shm_regions is None:
        i = 0
        if "OUTPUT0" in outputs:
            if use_cuda_shared_memory:
                triton_client.unregister_cuda_shared_memory(shm_regions[2] + "_data")
                cudashm.destroy_shared_memory_region(shm_handles[2])
            else:
                triton_client.unregister_system_shared_memory(shm_regions[2] + "_data")
                shm.destroy_shared_memory_region(shm_handles[2])
            i += 1
        if "OUTPUT1" in outputs:
            if use_cuda_shared_memory:
                triton_client.unregister_cuda_shared_memory(
                    shm_regions[2 + i] + "_data"
                )
                cudashm.destroy_shared_memory_region(shm_handles[3])
            else:
                triton_client.unregister_system_shared_memory(
                    shm_regions[2 + i] + "_data"
                )
                shm.destroy_shared_memory_region(shm_handles[3])


def create_set_either_shm_region(
    shm_region_names,
    input_list,
    input_byte_size,
    output_byte_size,
    use_system_shared_memory,
    use_cuda_shared_memory,
):
    # Lazy shm imports...
    if use_system_shared_memory:
        import tritonclient.utils.shared_memory as shm
    if use_cuda_shared_memory:
        import tritonclient.utils.cuda_shared_memory as cudashm

    if use_cuda_shared_memory and use_system_shared_memory:
        raise ValueError("Cannot set both System and CUDA shared memory flags to 1")

    if not (use_system_shared_memory or use_cuda_shared_memory):
        return []

    with CREATION_LOCK:
        if use_cuda_shared_memory:
            shm_ip_handle = cudashm.create_shared_memory_region(
                shm_region_names[0] + "_data", input_byte_size, 0
            )
            shm_op_handle = cudashm.create_shared_memory_region(
                shm_region_names[1] + "_data", output_byte_size, 0
            )
            cudashm.set_shared_memory_region(shm_ip_handle, input_list)
        elif use_system_shared_memory:
            shm_ip_handle = shm.create_shared_memory_region(
                shm_region_names[0] + "_data",
                "/" + shm_region_names[0],
                input_byte_size,
            )
            shm_op_handle = shm.create_shared_memory_region(
                shm_region_names[1] + "_data",
                "/" + shm_region_names[1],
                output_byte_size,
            )
            shm.set_shared_memory_region(shm_ip_handle, input_list)

    return [shm_ip_handle, shm_op_handle]


def register_add_either_shm_regions(
    inputs,
    outputs,
    shm_region_prefix,
    shm_handles,
    io_num,
    input_byte_size,
    output_byte_size,
    use_system_shared_memory,
    use_cuda_shared_memory,
    triton_client,
):
    # Lazy shm imports...
    if use_system_shared_memory:
        import tritonclient.utils.shared_memory as shm
    if use_cuda_shared_memory:
        import tritonclient.utils.cuda_shared_memory as cudashm

    if use_system_shared_memory or use_cuda_shared_memory:
        # Unregister then register required shared memory regions
        input_shm_name = shm_region_prefix[0] + str(io_num)
        output_shm_name = shm_region_prefix[1] + str(io_num)
        if use_system_shared_memory:
            triton_client.unregister_system_shared_memory(input_shm_name + "_data")
            triton_client.unregister_system_shared_memory(output_shm_name + "_data")
            triton_client.register_system_shared_memory(
                input_shm_name + "_data", "/" + input_shm_name, input_byte_size
            )
            triton_client.register_system_shared_memory(
                output_shm_name + "_data", "/" + output_shm_name, output_byte_size
            )

        if use_cuda_shared_memory:
            triton_client.unregister_cuda_shared_memory(input_shm_name + "_data")
            triton_client.unregister_cuda_shared_memory(output_shm_name + "_data")
            triton_client.register_cuda_shared_memory(
                input_shm_name + "_data",
                cudashm.get_raw_handle(shm_handles[0][io_num]),
                0,
                input_byte_size,
            )
            triton_client.register_cuda_shared_memory(
                output_shm_name + "_data",
                cudashm.get_raw_handle(shm_handles[1][io_num]),
                0,
                output_byte_size,
            )

        # Add shared memory regions to inputs
        inputs[io_num].set_shared_memory(input_shm_name + "_data", input_byte_size)
        outputs[io_num].set_shared_memory(output_shm_name + "_data", output_byte_size)


class ShmLeakDetector:
    """Detect shared memory leaks when testing Python backend."""

    class ShmLeakProbe:
        def __init__(self, shm_monitors, enter_delay=1, exit_delay=1):
            self._shm_monitors = shm_monitors
            self._enter_delay = enter_delay  # seconds
            self._exit_delay = exit_delay  # seconds

        def __enter__(self):
            if _skip_shm_leak_probe:
                return self

            self._shm_region_free_sizes = self._get_shm_free_sizes(self._enter_delay)
            return self

        def __exit__(self, type, value, traceback):
            if _skip_shm_leak_probe:
                return

            curr_shm_free_sizes = self._get_shm_free_sizes(self._exit_delay)

            shm_leak_detected = False
            for shm_region in curr_shm_free_sizes:
                curr_shm_free_size = curr_shm_free_sizes[shm_region]
                prev_shm_free_size = self._shm_region_free_sizes[shm_region]
                if curr_shm_free_size < prev_shm_free_size:
                    shm_leak_detected = True
                    print(
                        f"Shared memory leak detected [{shm_region}]: {curr_shm_free_size} (curr free) < {prev_shm_free_size} (prev free)."
                    )
                    # FIXME DLIS-7122: Known shared memory leak of 480 bytes in BLS test.
                    if curr_shm_free_size == 1006576 and prev_shm_free_size == 1007056:
                        assert False, f"Known shared memory leak of 480 bytes detected."
            assert not shm_leak_detected, f"Shared memory leak detected."

        def _get_shm_free_sizes(self, delay_sec=0):
            if delay_sec > 0:
                time.sleep(delay_sec)
            shm_free_sizes = {}
            for shm_region, shm_monitor in self._shm_monitors.items():
                shm_free_sizes[shm_region] = shm_monitor.free_memory()
            return shm_free_sizes

    def __init__(self, prefix="triton_python_backend_shm_region"):
        if _skip_shm_leak_probe:
            return
        import triton_shm_monitor

        self._shm_monitors = {}
        shm_regions = listdir("/dev/shm")
        for shm_region in shm_regions:
            if shm_region.startswith(prefix):
                self._shm_monitors[shm_region] = triton_shm_monitor.SharedMemoryManager(
                    shm_region
                )

    def Probe(self):
        # Jetson cleanup takes too long and results in false positives.
        # Do not use the shared memory check on Jetson.
        # [DLIS-4876] Investigate how to re-enable shared memory check on Jetson.
        if _skip_shm_leak_probe:
            return self.ShmLeakProbe(None)
        else:
            return self.ShmLeakProbe(self._shm_monitors)


================================================
FILE: qa/common/show_testlogs
================================================
#!/bin/bash
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

error=0
while read -r line || [[ -n "$line" ]]; do
    error=1
    name=`echo $line | awk -F "testlogs/" '{print $2}'`
    echo "<<< Reading test.log from $name >>>>"
    cat $line
    echo "<<< End of $name >>>"
done

exit $error


================================================
FILE: qa/common/test_util.py
================================================
#!/usr/bin/env python3

# Copyright 2018-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json
import unittest

import numpy as np

_last_request_id = 0

# Numpy does not support the BF16 datatype natively.
# We use this dummy dtype as a representative for BF16.
np_dtype_bfloat16 = np.dtype([("bf16", object)])


def shape_element_count(shape):
    cnt = 0
    for d in shape:
        if d == -1:
            return -1
        if cnt == 0:
            cnt = d
        else:
            cnt = cnt * d
    return cnt


def shape_is_fixed(shape):
    return shape_element_count(shape) != -1


def shape_to_onnx_shape(shape, idx=0, increment_index=True):
    # Onnx use string for variable size dimension, and the same string
    # will be inferred to have same value for the model run.
    # So there is an extra "idx" parameter to make sure the string is
    # unique
    res = []
    for dim in shape:
        if dim == -1:
            res.append("var_" + str(idx))
            if increment_index:
                idx += 1
        else:
            res.append(dim)
    return res, idx


def shape_to_dims_str(shape):
    return ",".join(str(i) for i in shape)


def validate_for_trt_model(
    input_dtype, output0_dtype, output1_dtype, input_shape, output0_shape, output1_shape
):
    """Return True if input and output dtypes are supported by a TRT model."""
    supported_datatypes = [
        bool,
        np.int8,
        np.int32,
        np.uint8,
        np.float16,
        np.float32,
        np_dtype_bfloat16,
    ]
    # FIXME: Remove this check when jetson supports TRT 8.5 (DLIS-4256)
    if not support_trt_uint8():
        supported_datatypes.remove(np.uint8)
    if not input_dtype in supported_datatypes:
        return False
    if not output0_dtype in supported_datatypes:
        return False
    if not output1_dtype in supported_datatypes:
        return False

    datatype_set = set([input_dtype, output0_dtype, output1_dtype])

    # Incompatible datatype conversions
    if (np.int32 in datatype_set) and (np.int8 in datatype_set):
        return False
    if (np.float32 in datatype_set) and (np.int32 in datatype_set):
        return False

    return True


def validate_for_ensemble_model(
    ensemble_type,
    input_dtype,
    output0_dtype,
    output1_dtype,
    input_shape,
    output0_shape,
    output1_shape,
):
    """Return True if input and output dtypes are supported by the ensemble type."""

    # Not extending test to uint8 yet
    if (
        input_dtype == np.uint8
        or output0_dtype == np.uint8
        or output1_dtype == np.uint8
    ):
        return False

    # Those ensemble types contains "identity" model which doesn't allow STRING
    # data type
    # Test types that use identity for both input and output
    test_type_involved = ["reshape", "zero", "fan"]
    if (
        input_dtype == np.object_
        or output0_dtype == np.object_
        or output1_dtype == np.object_
    ):
        for type_str in test_type_involved:
            if type_str in ensemble_type:
                return False

    # Otherwise, check input / output separately
    if input_dtype == np.object_ and "sequence" in ensemble_type:
        return False

    return True


def validate_for_onnx_model(
    input_dtype, output0_dtype, output1_dtype, input_shape, output0_shape, output1_shape
):
    """Return True if input and output dtypes are supported by a Onnx model."""

    # Not extending test to uint8 yet
    if (
        input_dtype == np.uint8
        or output0_dtype == np.uint8
        or output1_dtype == np.uint8
    ):
        return False

    # If the input type is string the output type must be string or
    # int32. This is because the QA models we generate convert strings
    # internally to int32 for compute.
    if (input_dtype == np.object_) and (
        ((output0_dtype != np.object_) and (output0_dtype != np.int32))
        or ((output1_dtype != np.object_) and (output1_dtype != np.int32))
    ):
        return False

    return True


def validate_for_libtorch_model(
    input_dtype,
    output0_dtype,
    output1_dtype,
    input_shape,
    output0_shape,
    output1_shape,
    max_batch=0,
    reshape=False,
):
    """Return True if input and output dtypes are supported by a libtorch model."""

    # Not extending test to uint8 yet
    if (
        input_dtype == np.uint8
        or output0_dtype == np.uint8
        or output1_dtype == np.uint8
    ):
        return False

    # STRING data type does not support I/O with more than 1 dims. It supports
    # batching when 'reshape' field is set properly to empty shape.
    has_string_type = (
        (input_dtype == np.object_)
        or (output0_dtype == np.object_)
        or (output1_dtype == np.object_)
    )
    is_more_than_one_dimensional = (
        (len(input_shape) > 1)
        or (len(output0_shape) > 1)
        or (len(output1_shape) > 1)
        or (max_batch != 0)
    )

    if has_string_type and is_more_than_one_dimensional and not reshape:
        return False

    # FLOAT16 and UINT16 data types are not supported currently
    if (
        (input_dtype == np.uint16)
        or (output0_dtype == np.uint16)
        or (output1_dtype == np.uint16)
    ):
        return False
    if (
        (input_dtype == np.float16)
        or (output0_dtype == np.float16)
        or (output1_dtype == np.float16)
    ):
        return False

    return True


def validate_for_openvino_model(
    input_dtype, output0_dtype, output1_dtype, input_shape, output0_shape, output1_shape
):
    """Return True if input and output dtypes are supported by an OpenVino model."""

    # Not extending test to uint8 yet
    if (
        input_dtype == np.uint8
        or output0_dtype == np.uint8
        or output1_dtype == np.uint8
    ):
        return False

    # float16 is not supported on CPU by OpenVino
    supported_datatypes = [np.int8, np.int32, np.float32]
    if not input_dtype in supported_datatypes:
        return False
    if not output0_dtype in supported_datatypes:
        return False
    if not output1_dtype in supported_datatypes:
        return False

    # Return false if input dtype != output dtype and shape > 1 dims
    # https://github.com/openvinotoolkit/openvino/issues/7173
    if ((output1_dtype != input_dtype) or (output0_dtype != input_dtype)) and len(
        input_shape
    ) > 1:
        return False

    return True


def get_dtype_name(dtype):
    if dtype == np_dtype_bfloat16:
        return "bf16"
    else:
        return np.dtype(dtype).name


def get_model_name(pf, input_dtype, output0_dtype, output1_dtype):
    if output1_dtype is None:
        return f"{pf}_{get_dtype_name(input_dtype)}_{get_dtype_name(output0_dtype)}"
    else:
        return "{}_{}_{}_{}".format(
            pf,
            get_dtype_name(input_dtype),
            get_dtype_name(output0_dtype),
            get_dtype_name(output1_dtype),
        )


def get_sequence_model_name(pf, dtype):
    return "{}_sequence_{}".format(pf, np.dtype(dtype).name)


def get_dyna_sequence_model_name(pf, dtype):
    return "{}_dyna_sequence_{}".format(pf, np.dtype(dtype).name)


def get_zero_model_name(pf, io_cnt, dtype):
    return "{}_zero_{}_{}".format(pf, io_cnt, np.dtype(dtype).name)


# FIXME: Remove this def when jetson supports TRT 8.5 (DLIS-4256)
def support_trt_uint8():
    try:
        import tensorrt as trt
    except:
        # tensorrt library is not found, detect from environment
        import os

        return not bool(int(os.environ.get("TEST_JETSON", 0)))
    # tensorrt library is found, return if uint8 is defined
    return hasattr(trt, "uint8")


def check_gpus_compute_capability(min_capability):
    """
    Check if all GPUs have a compute capability greater than or equal to the given value.

    Args:
        min_capability (float): The minimum required compute capability (e.g., 8.0).

    Returns:
        bool
    """

    import importlib.util

    if importlib.util.find_spec("cuda") is not None:
        from cuda.core import Device

        devices = Device.get_all_devices()
        for device in devices:
            cc = device.compute_capability
            compute_capability_value = cc.major + cc.minor / 10.0
            if compute_capability_value < min_capability:
                return False

    elif importlib.util.find_spec("pycuda") is not None:
        import pycuda.driver as cuda

        cuda.init()

        for device_index in range(cuda.Device.count()):
            device = cuda.Device(device_index)
            compute_capability = device.compute_capability()
            compute_capability_value = (
                compute_capability[0] + compute_capability[1] / 10.0
            )

            if compute_capability_value < min_capability:
                return False
    else:
        raise RuntimeError(
            "No packages found to determine the compute capability. Please check the environment."
        )

    return True


class TestResultCollector(unittest.TestCase):
    # TestResultCollector stores test result and prints it to stdout. In order
    # to use this class, unit tests must inherit this class. Use
    # `check_test_results` bash function from `common/util.sh` to verify the
    # expected number of tests produced by this class

    @classmethod
    def setResult(cls, total, errors, failures):
        cls.total, cls.errors, cls.failures = total, errors, failures

    @classmethod
    def tearDownClass(cls):
        # this method is called when all the unit tests in a class are
        # finished.
        json_res = {"total": cls.total, "errors": cls.errors, "failures": cls.failures}
        with open("test_results.txt", "w+") as f:
            f.write(json.dumps(json_res))

    def run(self, result=None):
        # result argument stores the accumulative test results
        test_result = super().run(result)
        total = test_result.testsRun
        errors = len(test_result.errors)
        failures = len(test_result.failures)
        self.setResult(total, errors, failures)


================================================
FILE: qa/common/trace_summary.py
================================================
#!/usr/bin/python

# Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import csv
import json

import numpy as np

FLAGS = None


def add_span(span_map, timestamps, span_name, ts_start, ts_end):
    for tag in (ts_start, ts_end):
        if tag not in timestamps:
            raise ValueError('timestamps missing "{}": {}'.format(tag, timestamps))
    if timestamps[ts_end] < timestamps[ts_start]:
        raise ValueError(
            'end timestamp "{}" < start timestamp "{}"'.format(ts_end, ts_start)
        )
    if span_name not in span_map:
        span_map[span_name] = 0
    span_map[span_name] += timestamps[ts_end] - timestamps[ts_start]


class AbstractFrontend:
    @property
    def filter_timestamp(self):
        return None

    def add_frontend_span(self, span_map, timestamps):
        pass

    def summarize_frontend_span(self, span_map, cnt):
        return None


class HttpFrontend(AbstractFrontend):
    @property
    def filter_timestamp(self):
        return "HTTP_RECV_START"

    def add_frontend_span(self, span_map, timestamps):
        if ("HTTP_RECV_START" in timestamps) and ("HTTP_SEND_END" in timestamps):
            add_span(
                span_map, timestamps, "HTTP_INFER", "HTTP_RECV_START", "HTTP_SEND_END"
            )
            add_span(
                span_map, timestamps, "HTTP_RECV", "HTTP_RECV_START", "HTTP_RECV_END"
            )
            add_span(
                span_map, timestamps, "HTTP_SEND", "HTTP_SEND_START", "HTTP_SEND_END"
            )

    def summarize_frontend_span(self, span_map, cnt):
        if "HTTP_INFER" in span_map:
            res = "HTTP infer request (avg): {}us\n".format(
                span_map["HTTP_INFER"] / (cnt * 1000)
            )
            res += "\tReceive (avg): {}us\n".format(
                span_map["HTTP_RECV"] / (cnt * 1000)
            )
            res += "\tSend (avg): {}us\n".format(span_map["HTTP_SEND"] / (cnt * 1000))
            res += "\tOverhead (avg): {}us\n".format(
                (
                    span_map["HTTP_INFER"]
                    - span_map["REQUEST"]
                    - span_map["HTTP_RECV"]
                    - span_map["HTTP_SEND"]
                )
                / (cnt * 1000)
            )
            return res
        else:
            return None


class GrpcFrontend(AbstractFrontend):
    @property
    def filter_timestamp(self):
        return "GRPC_WAITREAD_START"

    def add_frontend_span(self, span_map, timestamps):
        if ("GRPC_WAITREAD_END" in timestamps) and ("GRPC_SEND_END" in timestamps):
            add_span(
                span_map,
                timestamps,
                "GRPC_INFER",
                "GRPC_WAITREAD_END",
                "GRPC_SEND_END",
            )
            add_span(
                span_map, timestamps, "GRPC_SEND", "GRPC_SEND_START", "GRPC_SEND_END"
            )

    def summarize_frontend_span(self, span_map, cnt):
        if "GRPC_INFER" in span_map:
            res = "GRPC infer request (avg): {}us\n".format(
                span_map["GRPC_INFER"] / (cnt * 1000)
            )
            res += "\tSend (avg): {}us\n".format(span_map["GRPC_SEND"] / (cnt * 1000))
            return res
        else:
            return None


def summarize(frontend, traces):
    # map from (model_name, model_version) to # of traces
    model_count_map = dict()
    # map from (model_name, model_version) to map of span->total time
    model_span_map = dict()

    # Order traces by id to be more intuitive if 'show_trace'
    traces = sorted(traces, key=lambda t: t.get("id", -1))

    # Filter the trace that is not for the requested frontend
    match_frontend_id_set = set()
    for trace in traces:
        if "id" not in trace:
            continue

        # Trace without a parent must contain frontend timestamps
        if "parent_id" not in trace:
            if frontend.filter_timestamp is None:
                continue
            if "timestamps" in trace:
                for ts in trace["timestamps"]:
                    if frontend.filter_timestamp in ts["name"]:
                        match_frontend_id_set.add(trace["id"])
        # Otherwise need to check whether parent is filtered
        elif trace["parent_id"] in match_frontend_id_set:
            match_frontend_id_set.add(trace["id"])

    # Filter the trace that is not meaningful and group them by 'id'
    filtered_traces = dict()
    for trace in traces:
        if "id" not in trace:
            continue
        if trace["id"] in match_frontend_id_set:
            if trace["id"] in filtered_traces.keys():
                rep_trace = filtered_traces[trace["id"]]
                # Append the timestamp to the trace representing this 'id'
                if "model_name" in trace:
                    rep_trace["model_name"] = trace["model_name"]
                if "model_version" in trace:
                    rep_trace["model_version"] = trace["model_version"]
                if "timestamps" in trace:
                    rep_trace["timestamps"] += trace["timestamps"]
            else:
                # Use this trace to represent this 'id'
                if "timestamps" not in trace:
                    trace["timestamps"] = []
                filtered_traces[trace["id"]] = trace

    for trace_id, trace in filtered_traces.items():
        if trace_id not in match_frontend_id_set:
            filtered_traces.pop(trace_id, None)
            continue
        timestamps = dict()
        for ts in trace["timestamps"]:
            timestamps[ts["name"]] = ts["ns"]
        if ("REQUEST_START" in timestamps) and ("REQUEST_END" in timestamps):
            key = (trace["model_name"], trace["model_version"])
            if key not in model_count_map:
                model_count_map[key] = 0
                model_span_map[key] = dict()

            model_count_map[key] += 1

            frontend.add_frontend_span(model_span_map[key], timestamps)

            add_span(
                model_span_map[key],
                timestamps,
                "REQUEST",
                "REQUEST_START",
                "REQUEST_END",
            )

            # The tags below will be missing for ensemble model
            if ("QUEUE_START" in timestamps) and ("COMPUTE_START" in timestamps):
                add_span(
                    model_span_map[key],
                    timestamps,
                    "QUEUE",
                    "QUEUE_START",
                    "COMPUTE_START",
                )
            if ("COMPUTE_START" in timestamps) and ("COMPUTE_END" in timestamps):
                add_span(
                    model_span_map[key],
                    timestamps,
                    "COMPUTE",
                    "COMPUTE_START",
                    "COMPUTE_END",
                )
            if ("COMPUTE_INPUT_END" in timestamps) and (
                "COMPUTE_OUTPUT_START" in timestamps
            ):
                add_span(
                    model_span_map[key],
                    timestamps,
                    "COMPUTE_INPUT",
                    "COMPUTE_START",
                    "COMPUTE_INPUT_END",
                )
                add_span(
                    model_span_map[key],
                    timestamps,
                    "COMPUTE_INFER",
                    "COMPUTE_INPUT_END",
                    "COMPUTE_OUTPUT_START",
                )
                add_span(
                    model_span_map[key],
                    timestamps,
                    "COMPUTE_OUTPUT",
                    "COMPUTE_OUTPUT_START",
                    "COMPUTE_END",
                )
            if FLAGS.show_trace:
                print("{} ({}):".format(trace["model_name"], trace["model_version"]))
                print("\tid: {}".format(trace["id"]))
                if "parent_id" in trace:
                    print("\tparent id: {}".format(trace["parent_id"]))
                ordered_timestamps = list()
                for ts in trace["timestamps"]:
                    # skip GRPC_WAITREAD
                    if not ts["name"].startswith("GRPC_WAITREAD"):
                        ordered_timestamps.append((ts["name"], ts["ns"]))
                ordered_timestamps.sort(key=lambda tup: tup[1])

                now = None
                for ts in ordered_timestamps:
                    if now is not None:
                        print("\t\t{}us".format((ts[1] - now) / 1000))
                    print("\t{}".format(ts[0]))
                    now = ts[1]

    for key, cnt in model_count_map.items():
        model_name, model_value = key
        print(
            "Summary for {} ({}): trace count = {}".format(model_name, model_value, cnt)
        )

        frontend_summary = frontend.summarize_frontend_span(model_span_map[key], cnt)
        if frontend_summary is not None:
            print(frontend_summary)

        # collect handler timeline
        print(
            "\tHandler (avg): {}us".format(
                model_span_map[key]["REQUEST"] / (cnt * 1000)
            )
        )
        if ("QUEUE" in model_span_map[key]) and "COMPUTE" in model_span_map[key]:
            print(
                "\t\tOverhead (avg): {}us".format(
                    (
                        model_span_map[key]["REQUEST"]
                        - model_span_map[key]["QUEUE"]
                        - model_span_map[key]["COMPUTE"]
                    )
                    / (cnt * 1000)
                )
            )
            print(
                "\t\tQueue (avg): {}us".format(
                    model_span_map[key]["QUEUE"] / (cnt * 1000)
                )
            )
            print(
                "\t\tCompute (avg): {}us".format(
                    model_span_map[key]["COMPUTE"] / (cnt * 1000)
                )
            )
        if (
            "COMPUTE_INPUT" in model_span_map[key]
        ) and "COMPUTE_OUTPUT" in model_span_map[key]:
            print(
                "\t\t\tInput (avg): {}us".format(
                    model_span_map[key]["COMPUTE_INPUT"] / (cnt * 1000)
                )
            )
            print(
                "\t\t\tInfer (avg): {}us".format(
                    model_span_map[key]["COMPUTE_INFER"] / (cnt * 1000)
                )
            )
            print(
                "\t\t\tOutput (avg): {}us".format(
                    model_span_map[key]["COMPUTE_OUTPUT"] / (cnt * 1000)
                )
            )


def summarize_dataflow(traces):
    # collect data flow
    # - parent input
    #   - child input
    #     - ...
    #   - child output

    # Order traces by id to be more intuitive if 'show_trace'
    traces = sorted(traces, key=lambda t: t.get("id", -1))

    # {3: [4, 5, 6], 4: [7]}
    dataflow_parent_map = dict()
    for trace in traces:
        if "id" not in trace:
            continue
        if "parent_id" in trace:
            if trace["parent_id"] not in dataflow_parent_map:
                dataflow_parent_map[trace["parent_id"]] = []
            dataflow_parent_map[trace["parent_id"]].append(trace["id"])

    if len(dataflow_parent_map) == 0:
        # print the tensors of model
        first_id = find_first_id_with_tensor(traces)
        if first_id != 0:
            print("Data Flow:")
        print_tensor_by_id(first_id, traces, 0, 0)
        return

    # print the tensors of ensemble
    print("Data Flow:")
    first_parent_id = list(dataflow_parent_map.items())[0][0]

    # {3: {4: {7: None}, 5: None, 6: None}}
    dataflow_tree_map = dict()
    depth = [0]
    append_dataflow_tensor(
        dataflow_tree_map, first_parent_id, dataflow_parent_map, traces, depth
    )

    print_dataflow_tensor(dataflow_tree_map, traces, depth[0], step=0)


def append_dataflow_tensor(
    dataflow_tensor_map, parent_id, dataflow_tree_map, traces, depth
):
    if parent_id not in dataflow_tree_map:
        dataflow_tensor_map[parent_id] = None
        return

    child_tensor_map = dict()
    dataflow_tensor_map[parent_id] = child_tensor_map
    depth[0] = depth[0] + 1

    child_ids = dataflow_tree_map[parent_id]
    for child_id in child_ids:
        append_dataflow_tensor(
            child_tensor_map, child_id, dataflow_tree_map, traces, depth
        )


def print_dataflow_tensor(dataflow_tree_map, traces, depth, step):
    for parent_id in dataflow_tree_map:
        print_tensor_by_id(parent_id, traces, depth, step)

        if dataflow_tree_map[parent_id] is None:
            continue

        print_dataflow_tensor(dataflow_tree_map[parent_id], traces, depth, step + 1)


def print_tensor_by_id(id, traces, depth, step):
    if id == 0:
        return

    tabs = "\t" * (step + 1)

    print("{0}{1}".format(tabs, "=" * (50 + 8 * (depth - step))))
    for trace in traces:
        # print model name and version
        if (
            "id" in trace
            and "model_name" in trace
            and "model_version" in trace
            and "timestamps" in trace
            and trace["id"] == id
        ):
            print("{0}Name:   {1}".format(tabs, trace["model_name"]))
            print("{0}Version:{1}".format(tabs, trace["model_version"]))
        # print data
        if "id" in trace and "activity" in trace:
            if trace["id"] == id and trace["activity"] == "TENSOR_QUEUE_INPUT":
                print("{0}{1}:".format(tabs, "QUEUE_INPUT"))
                print(
                    "{0}\t{1}: {2}".format(
                        tabs, trace["tensor"]["name"], get_numpy_array(trace["tensor"])
                    )
                )
            elif trace["id"] == id and trace["activity"] == "TENSOR_BACKEND_INPUT":
                print("{0}{1}:".format(tabs, "BACKEND_INPUT"))
                print(
                    "{0}\t{1}: {2}".format(
                        tabs, trace["tensor"]["name"], get_numpy_array(trace["tensor"])
                    )
                )
            elif trace["id"] == id and trace["activity"] == "TENSOR_BACKEND_OUTPUT":
                print("{0}{1}:".format(tabs, "BACKEND_OUTPUT"))
                print(
                    "{0}\t{1}: {2}".format(
                        tabs, trace["tensor"]["name"], get_numpy_array(trace["tensor"])
                    )
                )
    print("{0}{1}".format(tabs, "=" * (50 + 8 * (depth - step))))


def find_first_id_with_tensor(traces):
    for trace in traces:
        if "activity" in trace and (
            trace["activity"] == "TENSOR_QUEUE_INPUT"
            or trace["activity"] == "TENSOR_BACKEND_INPUT"
            or trace["activity"] == "TENSOR_BACKEND_OUTPUT"
        ):
            return trace["id"]
    return 0


TRITON_TYPE_TO_NUMPY = {
    "BOOL": bool,
    "UINT8": np.uint8,
    "UINT16": np.uint16,
    "UINT32": np.uint32,
    "UINT64": np.uint64,
    "INT8": np.int8,
    "INT16": np.int16,
    "INT32": np.int32,
    "INT64": np.int64,
    "FP16": np.float16,
    "FP32": np.float32,
    "FP64": np.float64,
    "BYTES": np.object_,
}


def get_numpy_array(tensor):
    dtype = TRITON_TYPE_TO_NUMPY[tensor["dtype"]]
    if dtype == np.object_:
        value = next(csv.reader([tensor["data"]], skipinitialspace=True))
    else:
        value = map(float, tensor["data"].split(","))
    shape = map(int, tensor["shape"].split(","))
    array = np.array(list(value), dtype=dtype)
    array = array.reshape(list(shape))
    return array


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-v",
        "--verbose",
        action="store_true",
        required=False,
        default=False,
        help="Enable verbose output",
    )
    parser.add_argument(
        "-t",
        "--show-trace",
        action="store_true",
        required=False,
        default=False,
        help="Show timestamps for each individual trace",
    )
    parser.add_argument("file", type=argparse.FileType("r"), nargs="+")
    FLAGS = parser.parse_args()

    for f in FLAGS.file:
        trace_data = json.loads(f.read())
        if FLAGS.verbose:
            print(json.dumps(trace_data, sort_keys=True, indent=2))

        # Must summarize HTTP and GRPC separately since they have
        # different ways of accumulating time.
        print("File: {}".format(f.name))
        summarize(HttpFrontend(), trace_data)
        summarize(GrpcFrontend(), trace_data)
        summarize_dataflow(trace_data)


================================================
FILE: qa/common/trtllm_util.sh
================================================
#!/bin/bash
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

function clone_tensorrt_llm_backend_repo {
    rm -rf $TENSORRTLLM_BACKEND_DIR && mkdir $TENSORRTLLM_BACKEND_DIR
    apt-get update && apt-get install git-lfs -y --no-install-recommends
    git clone --single-branch --depth=1 -b ${TENSORRTLLM_BACKEND_REPO_TAG} ${TRITON_REPO_ORG}/tensorrtllm_backend.git $TENSORRTLLM_BACKEND_DIR
    cd $TENSORRTLLM_BACKEND_DIR && git lfs install && git submodule update --init --recursive
}

function build_gpt2_base_model {
    # Download weights from HuggingFace Transformers
    cd ${GPT_DIR} && rm -rf gpt2 && git clone https://huggingface.co/gpt2-medium gpt2 && cd gpt2
    rm pytorch_model.bin model.safetensors
    if ! wget -q https://huggingface.co/gpt2-medium/resolve/main/pytorch_model.bin; then
        echo "Downloading pytorch_model.bin failed."
        exit 1
    fi
    cd ${GPT_DIR}

    # Convert weights from HF Tranformers to FT format
    python3 convert_checkpoint.py --model_dir gpt2 --dtype float16 --tp_size ${NUM_GPUS} --output_dir "./c-model/gpt2/${NUM_GPUS}-gpu/"
    cd ${BASE_DIR}
}

function build_gpt2_tensorrt_engine {
    # Build TensorRT engines
    cd ${GPT_DIR}
    trtllm-build --checkpoint_dir "./c-model/gpt2/${NUM_GPUS}-gpu/" \
        --gpt_attention_plugin float16 \
        --remove_input_padding enable \
        --paged_kv_cache enable \
        --gemm_plugin float16 \
        --workers "${NUM_GPUS}" \
        --output_dir "${ENGINES_DIR}"

    cd ${BASE_DIR}
}

function replace_config_tags {
    tag_to_replace="${1}"
    new_value="${2}"
    config_file_path="${3}"
    sed -i "s|${tag_to_replace}|${new_value}|g" ${config_file_path}
}

function prepare_model_repository {
    rm -rf ${MODEL_REPOSITORY} && mkdir ${MODEL_REPOSITORY}
    cp -r ${TENSORRTLLM_BACKEND_DIR}/tensorrt_llm/triton_backend/all_models/inflight_batcher_llm/* ${MODEL_REPOSITORY}
    rm -rf ${MODEL_REPOSITORY}/tensorrt_llm_bls
    mv "${MODEL_REPOSITORY}/ensemble" "${MODEL_REPOSITORY}/${MODEL_NAME}"

    replace_config_tags "model_version: -1" "model_version: 1" "${MODEL_REPOSITORY}/${MODEL_NAME}/config.pbtxt"
    replace_config_tags '${triton_max_batch_size}' "128" "${MODEL_REPOSITORY}/${MODEL_NAME}/config.pbtxt"
    replace_config_tags 'name: "ensemble"' "name: \"$MODEL_NAME\"" "${MODEL_REPOSITORY}/${MODEL_NAME}/config.pbtxt"
    replace_config_tags '${logits_datatype}' "TYPE_FP32" "${MODEL_REPOSITORY}/${MODEL_NAME}/config.pbtxt"

    replace_config_tags '${triton_max_batch_size}' "128" "${MODEL_REPOSITORY}/preprocessing/config.pbtxt"
    replace_config_tags '${preprocessing_instance_count}' '1' "${MODEL_REPOSITORY}/preprocessing/config.pbtxt"
    replace_config_tags '${tokenizer_dir}' "${TOKENIZER_DIR}/" "${MODEL_REPOSITORY}/preprocessing/config.pbtxt"
    replace_config_tags '${logits_datatype}' "TYPE_FP32" "${MODEL_REPOSITORY}/preprocessing/config.pbtxt"
    replace_config_tags '${max_queue_delay_microseconds}' "1000000" "${MODEL_REPOSITORY}/preprocessing/config.pbtxt"
    replace_config_tags '${max_queue_size}' "0" "${MODEL_REPOSITORY}/preprocessing/config.pbtxt"

    replace_config_tags '${triton_max_batch_size}' "128" "${MODEL_REPOSITORY}/postprocessing/config.pbtxt"
    replace_config_tags '${postprocessing_instance_count}' '1' "${MODEL_REPOSITORY}/postprocessing/config.pbtxt"
    replace_config_tags '${tokenizer_dir}' "${TOKENIZER_DIR}/" "${MODEL_REPOSITORY}/postprocessing/config.pbtxt"
    replace_config_tags '${logits_datatype}' "TYPE_FP32" "${MODEL_REPOSITORY}/postprocessing/config.pbtxt"

    replace_config_tags '${triton_max_batch_size}' "128" "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
    replace_config_tags '${decoupled_mode}' 'true' "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
    replace_config_tags '${max_queue_delay_microseconds}' "1000000" "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
    replace_config_tags '${batching_strategy}' 'inflight_fused_batching' "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
    replace_config_tags '${engine_dir}' "${ENGINES_DIR}" "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
    replace_config_tags '${triton_backend}' "tensorrtllm" "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
    replace_config_tags '${max_queue_size}' "0" "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
    replace_config_tags '${logits_datatype}' "TYPE_FP32" "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
    replace_config_tags '${encoder_input_features_data_type}' "TYPE_FP32" "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
    replace_config_tags '${prompt_embedding_table_data_type}' 'TYPE_FP16' "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
}

# Wait until server health endpoint shows ready. Sets WAIT_RET to 0 on
# success, 1 on failure
function wait_for_server_ready() {
    local wait_time_secs="${1:-30}"
    shift
    local spids=("$@")

    WAIT_RET=0

    for _ in $(seq "$wait_time_secs"); do
        for pid in "${spids[@]}"; do
            if ! kill -0 "$pid" >/dev/null 2>&1; then
                echo "=== Server not running."
                WAIT_RET=1
                return
            fi
        done

        sleep 1

        if curl -s --fail localhost:8000/v2/health/ready &&
            curl -s --fail -w "%{http_code}" -o /dev/null -d '{"log_verbose_level":1}' localhost:8000/v2/logging; then
            return
        fi
    done

    echo "=== Timeout $wait_time_secs secs. Server not ready."
    WAIT_RET=1
}

function run_server {
    python3 ${TENSORRTLLM_BACKEND_DIR}/tensorrt_llm/triton_backend/scripts/launch_triton_server.py --world_size="${NUM_GPUS}" --model_repo="${MODEL_REPOSITORY}" >${SERVER_LOG} 2>&1 &
    sleep 2 # allow time to obtain the pid(s)
    # Read PIDs into an array, trimming whitespaces
    readarray -t SERVER_PID < <(pgrep "tritonserver")

    wait_for_server_ready ${SERVER_TIMEOUT} "${SERVER_PID[@]}"
    if [ "$WAIT_RET" != "0" ]; then
        # Cleanup
        kill "${SERVER_PID[@]}" >/dev/null 2>&1 || true
        echo -e "\n***\n*** Failed to start $SERVER\n***"
        cat $SERVER_LOG
        exit 1
    fi
}

function kill_server {
    pgrep tritonserver | xargs kill -SIGINT
    for pid in "${SERVER_PID[@]}"; do
        echo "Waiting for proc ${pid} to terminate..."
        while kill -0 $pid >/dev/null 2>&1; do
            sleep 1
        done
    done
}


================================================
FILE: qa/common/util.sh
================================================
#!/bin/bash
# Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

SERVER_IPADDR=${TRITONSERVER_IPADDR:=localhost}
SERVER_LOG=${SERVER_LOG:=./server.log}
SERVER_TIMEOUT=${SERVER_TIMEOUT:=120}
SERVER_LD_PRELOAD=${SERVER_LD_PRELOAD:=""}
MONITOR_FILE_TIMEOUT=${MONITOR_FILE_TIMEOUT:=10}

# Sets WAIT_RET to 0 on success, 1 on failure
function wait_for_file_str() {
    local file="$1"; shift
    local grep_expr="$1"; shift
    local exists_secs="${1:-1}"; shift # wait for file to exist, default 1s
    local wait_time_secs="${1:-5}"; shift # wait for expression in file, default 5s

    WAIT_RET=0

    echo "=== Waiting for '$file'..."
    until test $exists_secs -eq 0 -o -f "$file" ; do sleep 1; ((exists_secs--)); done
    if [ "$exists_secs" == "0" ]; then
        echo "=== Timeout. Unable to find '$file'"
        WAIT_RET=1
        return
    fi

    echo "=== Found $file... waiting for '$grep_expr'"
    (timeout $wait_time_secs tail -F -n+0 "$file" &)
    (timeout $wait_time_secs tail -F -n+0 "$file" &) | grep -q "$grep_expr" && \
        echo "=== Found '$grep_expr'" && return

    echo "=== Timeout $wait_time_secs secs. Unable to find '$grep_expr' in '$file'"
    WAIT_RET=1
}

# Wait until server health endpoint shows ready. Sets WAIT_RET to 0 on
# success, 1 on failure
function wait_for_server_ready() {
    local spid="$1"; shift
    local wait_time_secs="${1:-30}"; shift

    WAIT_RET=0

    local wait_secs=$wait_time_secs
    until test $wait_secs -eq 0 ; do
        if ! kill -0 $spid > /dev/null 2>&1; then
            echo "=== Server not running."
            WAIT_RET=1
            return
        fi

        sleep 1;

        set +e
        code=`curl -s -w %{http_code} ${SERVER_IPADDR}:8000/v2/health/ready`
        set -e
        if [ "$code" == "200" ]; then
            return
        fi

        ((wait_secs--));
    done

    echo "=== Timeout $wait_time_secs secs. Server not ready."
    WAIT_RET=1
}

# Wait until server health endpoint shows live. Sets WAIT_RET to 0 on
# success, 1 on failure
function wait_for_server_live() {
    local spid="$1"; shift
    local wait_time_secs="${1:-30}"; shift

    WAIT_RET=0

    local wait_secs=$wait_time_secs
    until test $wait_secs -eq 0 ; do
        if ! kill -0 $spid; then
            echo "=== Server not running."
            WAIT_RET=1
            return
        fi

        sleep 1;

        set +e
        code=`curl -s -w %{http_code} ${SERVER_IPADDR}:8000/v2/health/live`
        set -e
        if [ "$code" == "200" ]; then
            return
        fi

        ((wait_secs--));
    done

    echo "=== Timeout $wait_time_secs secs. Server not live."
    WAIT_RET=1
}

# Wait until all server model states are stable (MODEL_READY or
# MODEL_UNAVAILABLE) or until timeout. Note that server has to be
# live.  If timeout is not specified, only return when all model
# states are stable.
function wait_for_model_stable() {
    local wait_time_secs="${1:--1}"; shift

    local wait_secs=$wait_time_secs
    until test $wait_secs -eq 0 ; do
        sleep 1;

        set +e
        total_count=`curl -s -X POST ${SERVER_IPADDR}:8000/v2/repository/index | json_pp | grep "state" | wc -l`
        stable_count=`curl -s -X POST ${SERVER_IPADDR}:8000/v2/repository/index | json_pp | grep "READY\|UNAVAILABLE" | wc -l`
        count=$((total_count - stable_count))
        set -e
        if [ "$count" == "0" ]; then
            return
        fi

        ((wait_secs--));
    done

    echo "=== Timeout $wait_time_secs secs. Not all models stable."
}

function gdb_helper () {
  if ! command -v gdb > /dev/null 2>&1; then
    echo "=== WARNING: gdb not installed"
    return
  fi

  ### Server Hang ###
  if kill -0 ${SERVER_PID} > /dev/null 2>&1; then
    # If server process is still alive, try to get backtrace and core dump from it
    GDB_LOG="gdb_bt.${SERVER_PID}.log"
    echo -e "=== WARNING: SERVER HANG DETECTED, DUMPING GDB BACKTRACE TO [${PWD}/${GDB_LOG}] ==="
    # Dump backtrace log for quick analysis. Allow these commands to fail.
    gdb -batch -ex "thread apply all bt" -p "${SERVER_PID}" 2>&1 | tee "${GDB_LOG}" || true

    # Generate core dump for deeper analysis. Default filename is "core.${PID}"
    gdb -batch -ex "gcore" -p "${SERVER_PID}" || true
  fi

  ### Server Segfaulted ###
  # If there are any core dumps locally from a segfault, load them and get a backtrace
  for corefile in $(ls core.* > /dev/null 2>&1); do
    GDB_LOG="${corefile}.log"
    echo -e "=== WARNING: SEGFAULT DETECTED, DUMPING GDB BACKTRACE TO [${PWD}/${GDB_LOG}] ==="
    gdb -batch ${SERVER} ${corefile} -ex "thread apply all bt" | tee "${corefile}.log" || true;
  done
}

# Run inference server. Return once server's health endpoint shows
# ready or timeout expires. Sets SERVER_PID to pid of SERVER, or 0 if
# error (including expired timeout)
function run_server () {
    SERVER_PID=0

    if [ -z "$SERVER" ]; then
        echo "=== SERVER must be defined"
        return
    fi

    if [ ! -f "$SERVER" ]; then
        echo "=== $SERVER does not exist"
        return
    fi

    if [ -z "$SERVER_LD_PRELOAD" ]; then
      echo "=== Running $SERVER $SERVER_ARGS"
    else
      echo "=== Running LD_PRELOAD=$SERVER_LD_PRELOAD $SERVER $SERVER_ARGS"
    fi

    # If SERVER_ERROR_LOG is not set, redirect stderr to stdout
    if [ -z "${SERVER_ERROR_LOG:-}" ]; then
        LD_PRELOAD=$SERVER_LD_PRELOAD:${LD_PRELOAD} $SERVER $SERVER_ARGS > $SERVER_LOG 2>&1 &
    else
        LD_PRELOAD=$SERVER_LD_PRELOAD:${LD_PRELOAD} $SERVER $SERVER_ARGS > $SERVER_LOG 2>$SERVER_ERROR_LOG &
    fi
    SERVER_PID=$!

    wait_for_server_ready $SERVER_PID $SERVER_TIMEOUT
    if [ "$WAIT_RET" != "0" ]; then
        # Get further debug information about server startup failure
        gdb_helper || true

        # Cleanup
        kill $SERVER_PID > /dev/null 2>&1 || true
        SERVER_PID=0
    fi
}

# Run inference server. Return once server's health endpoint shows
# live or timeout expires.  Sets SERVER_PID to pid of SERVER, or 0 if
# error (including expired timeout)
function run_server_tolive () {
    SERVER_PID=0

    if [ -z "$SERVER" ]; then
        echo "=== SERVER must be defined"
        return
    fi

    if [ ! -f "$SERVER" ]; then
        echo "=== $SERVER does not exist"
        return
    fi

    if [ -z "$SERVER_LD_PRELOAD" ]; then
      echo "=== Running $SERVER $SERVER_ARGS"
    else
      echo "=== Running LD_PRELOAD=$SERVER_LD_PRELOAD $SERVER $SERVER_ARGS"
    fi

    LD_PRELOAD=$SERVER_LD_PRELOAD:${LD_PRELOAD} $SERVER $SERVER_ARGS > $SERVER_LOG 2>&1 &
    SERVER_PID=$!

    wait_for_server_live $SERVER_PID $SERVER_TIMEOUT
    if [ "$WAIT_RET" != "0" ]; then
        kill $SERVER_PID || true
        SERVER_PID=0
    fi
}

# Run inference server and return immediately. Sets SERVER_PID to pid
# of SERVER, or 0 if error.
function run_server_nowait () {
    SERVER_PID=0

    if [ -z "$SERVER" ]; then
        echo "=== SERVER must be defined"
        return
    fi

    if [ ! -f "$SERVER" ]; then
        echo "=== $SERVER does not exist"
        return
    fi

    if [[ -v WSL_DISTRO_NAME ]] || [[ -v MSYSTEM ]]; then
        # LD_PRELOAD not yet supported on windows
        if [ -z "$SERVER_LD_PRELOAD" ]; then
            echo "=== Running $SERVER $SERVER_ARGS"
        else
            echo "=== LD_PRELOAD not supported for windows"
            return
        fi

        $SERVER $SERVER_ARGS > $SERVER_LOG 2>&1 &
        SERVER_PID=$!
    else
        # Non-windows
        if [ -z "$SERVER_LD_PRELOAD" ]; then
            echo "=== Running $SERVER $SERVER_ARGS"
        else
            echo "=== Running LD_PRELOAD=$SERVER_LD_PRELOAD $SERVER $SERVER_ARGS"
        fi

        LD_PRELOAD=$SERVER_LD_PRELOAD:${LD_PRELOAD} $SERVER $SERVER_ARGS > $SERVER_LOG 2>&1 &
        SERVER_PID=$!
    fi
}

# Run inference server inside a memory management tool like Valgrind/ASAN.
# Return once server's health endpoint shows ready or timeout expires. Sets
# SERVER_PID to pid of SERVER, or 0 if error (including expired timeout)
function run_server_leakcheck () {
    SERVER_PID=0

    if [ -z "$SERVER" ]; then
        echo "=== SERVER must be defined"
        return
    fi

    if [ -z "$LEAKCHECK" ]; then
        echo "=== LEAKCHECK must be defined"
        return
    fi

    if [ ! -f "$SERVER" ]; then
        echo "=== $SERVER does not exist"
        return
    fi

    if [ -z "$SERVER_LD_PRELOAD" ]; then
      echo "=== Running $SERVER $SERVER_ARGS"
    else
      echo "=== Running LD_PRELOAD=$SERVER_LD_PRELOAD $SERVER $SERVER_ARGS"
    fi

    LD_PRELOAD=$SERVER_LD_PRELOAD:${LD_PRELOAD} $LEAKCHECK $LEAKCHECK_ARGS $SERVER $SERVER_ARGS > $SERVER_LOG 2>&1 &
    SERVER_PID=$!

    wait_for_server_ready $SERVER_PID $SERVER_TIMEOUT
    if [ "$WAIT_RET" != "0" ]; then
        kill $SERVER_PID || true
        SERVER_PID=0
    fi
}

# Kill inference server. SERVER_PID must be set to the server's pid.
function kill_server () {
    # Under WSL the linux PID is not the same as the windows PID and
    # there doesn't seem to be a way to find the mapping between
    # them. So we instead assume that this test is the only test
    # running on the system and just SIGINT all the tritonserver
    # windows executables running on the system. At least, ideally we
    # would like to use windows-kill to SIGINT, unfortunately that
    # causes the entire WSL shell to just exit. So instead we must use
    # taskkill.exe which can only forcefully kill tritonserver which
    # means that it does not gracefully exit.
    if [[ -v WSL_DISTRO_NAME ]]; then
        # Disable -x as it makes output below hard to read
        oldstate="$(set +o)"; [[ -o errexit ]] && oldstate="$oldstate; set -e"
        set +x
        set +e

        tasklist=$(/mnt/c/windows/system32/tasklist.exe /FI 'IMAGENAME eq tritonserver.exe' /FO CSV)
        echo "=== Windows tritonserver tasks"
        echo "$tasklist"

        taskcount=$(echo "$tasklist" | grep -c tritonserver)
        if (( $taskcount > 0 )); then
            echo "$tasklist" | while IFS=, read -r taskname taskpid taskrest; do
                if [[ "$taskname" == "\"tritonserver.exe\"" ]]; then
                    taskpid="${taskpid%\"}"
                    taskpid="${taskpid#\"}"
                    echo "=== killing windows tritonserver.exe task $taskpid"
                    # windows-kill.exe -SIGINT $taskpid
                    /mnt/c/windows/system32/taskkill.exe /PID $taskpid /F /T
                fi
            done
        fi

        set +vx; eval "$oldstate"
    elif [[ -v MSYSTEM ]] ; then
        taskkill //F //IM tritonserver.exe
    else
        # Non-windows...
        kill $SERVER_PID
        wait $SERVER_PID
    fi
}

# Run nvidia-smi to monitor GPU utilization.
# Writes utilization into MONITOR_LOG. If MONITOR_ID is specified only
# that GPU PCI bus ID is monitored.
# Sets MONITOR_PID to pid of SERVER, or 0 if error
function run_gpu_monitor () {
    MONITOR_PID=0

    MONITOR_ID_ARG=
    if [ ! -z "$MONITOR_ID" ]; then
        MONITOR_ID_ARG="-i $MONITOR_ID"
    fi

    nvidia-smi dmon -s u $MONITOR_ID_ARG -f $MONITOR_LOG &
    MONITOR_PID=$!

    local exists_secs="$MONITOR_FILE_TIMEOUT"
    until test $exists_secs -eq 0 -o -f "$MONITOR_LOG" ; do sleep 1; ((exists_secs--)); done
    if [ "$exists_secs" == "0" ]; then
        echo "=== Timeout. Unable to find '$MONITOR_LOG'"
        kill $MONITOR_PID || true
        MONITOR_PID=0
    fi
}

# Create a model version directory for nop models in the model repository
function create_nop_version_dir () {
    local dest_dir=$1
    for nop_model in `ls $dest_dir | grep "nop_"`; do
        local path=$dest_dir/$nop_model
        mkdir -p $path/1
    done
}

# Check Python unittest results.
function check_test_results () {
    local log_file=$1
    local expected_num_tests=$2

    if [[ -z "$expected_num_tests" ]]; then
        echo "=== expected number of tests must be defined"
        return 1
    fi

    num_failures=`cat $log_file | grep -E ".*total.*errors.*failures.*" | tail -n 1 | jq .failures`
    num_tests=`cat $log_file | grep -E ".*total.*errors.*failures.*" | tail -n 1 | jq .total`
    num_errors=`cat $log_file | grep -E ".*total.*errors.*failures.*" | tail -n 1 | jq .errors`

    # Number regular expression
    re='^[0-9]+$'

    if [[ $? -ne 0 ]] || ! [[ $num_failures =~ $re ]] || ! [[ $num_tests =~ $re ]] || \
     ! [[ $num_errors =~ $re ]]; then
        cat $log_file
        echo -e "\n***\n*** Test Failed: unable to parse test results\n***" >> $log_file
        return 1
    fi
    if [[ $num_errors != "0" ]] || [[ $num_failures != "0" ]] || [[ $num_tests -ne $expected_num_tests ]]; then
        cat $log_file
        echo -e "\n***\n*** Test Failed: Expected $expected_num_tests test(s), $num_tests test(s) executed, $num_errors test(s) had error, and $num_failures test(s) failed. \n***" >> $log_file
        return 1
    fi

    return 0
}

# Run multiple inference servers and return immediately. Sets pid for each server
# correspondingly, or 0 if error.
function run_multiple_servers_nowait () {
    if [ -z "$SERVER" ]; then
        echo "=== SERVER must be defined"
        return
    fi

    if [ ! -f "$SERVER" ]; then
        echo "=== $SERVER does not exist"
        return
    fi

    local server_count=$1
    server_pid=()
    local server_args=()
    local server_log=()
    for (( i=0; i<$server_count; i++ )); do
        let SERVER${i}_PID=0 || true
        server_pid+=(SERVER${i}_PID)
        server_args+=(SERVER${i}_ARGS)
        server_log+=(SERVER${i}_LOG)
    done

    for (( i=0; i<$server_count; i++ )); do
        if [ -z "$SERVER_LD_PRELOAD" ]; then
            echo "=== Running $SERVER ${!server_args[$i]}"
        else
            echo "=== Running LD_PRELOAD=$SERVER_LD_PRELOAD $SERVER ${!server_args[$i]}"
        fi
        LD_PRELOAD=$SERVER_LD_PRELOAD:${LD_PRELOAD} $SERVER ${!server_args[$i]} > ${!server_log[$i]} 2>&1 &
        let SERVER${i}_PID=$!
    done
}

# Kill all inference servers.
function kill_servers () {
    for (( i=0; i<${#server_pid[@]}; i++ )); do
        kill ${!server_pid[$i]}
        wait ${!server_pid[$i]}
    done
}

# Upload a local directory to a GCS path
function gcs_upload () {
    local local_path=$1
    local gcs_path=$2
    gsutil cp -r $local_path $gcs_path
}

# Sort an array
# Call with sort_array <array_name>
# Example: sort_array array
sort_array() {
    local -n arr=$1
    local length=${#arr[@]}

    if [ "$length" -le 1 ]; then
        return
    fi

    IFS=$'\n' sorted_arr=($(sort -n <<<"${arr[*]}"))
    unset IFS
    arr=("${sorted_arr[@]}")
}

# Remove an array's outliers
# Call with remove_array_outliers <array_name> <percent to trim from both sides>
# Example: remove_array_outliers array 5
remove_array_outliers() {
    local -n arr=$1
    local percent=$2
    local length=${#arr[@]}

    if [ "$length" -le 1 ]; then
        return
    fi

    local trim_count=$((length * percent / 100))
    local start_index=$trim_count
    local end_index=$((length - (trim_count*2)))

    arr=("${arr[@]:$start_index:$end_index}")
}

function setup_virtualenv() {
    # Create and activate virtual environment
    if [[ -v MSYSTEM ]]; then
      pip3 install pytest
    else
      virtualenv --system-site-packages venv
      source venv/bin/activate
      pip install pytest
    fi

    if [[ ${TEST_WINDOWS} == 1 ]]; then
      pip3 install "numpy<2" tritonclient[all]
    fi
}

function deactivate_virtualenv() {
    # Deactivate virtual environment and clean up
  if [[ ! -v MSYSTEM ]]; then
    deactivate
    rm -fr venv
  fi
}


================================================
FILE: qa/custom_models/custom_dyna_sequence_int32/config.pbtxt
================================================
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "custom_dyna_sequence_int32"
backend: "dyna_sequence"
max_batch_size: 8
default_model_filename: "libtriton_dyna_sequence.so"
sequence_batching {
  max_sequence_idle_microseconds: 5000000
  oldest {
    max_candidate_sequences: 6
    preferred_batch_size: [ 4 ]
    max_queue_delay_microseconds: 0
  }
  control_input [
    {
      name: "START"
      control [
        {
          kind: CONTROL_SEQUENCE_START
          int32_false_true: [ 0, 1 ]
        }
      ]
    },
    {
      name: "END"
      control [
        {
          kind: CONTROL_SEQUENCE_END
          int32_false_true: [ 0, 1 ]
        }
      ]
    },
    {
      name: "READY"
      control [
        {
          kind: CONTROL_SEQUENCE_READY
          int32_false_true: [ 0, 1 ]
        }
      ]
    },
    {
      name: "CORRID"
      control [
        {
          kind: CONTROL_SEQUENCE_CORRID
          data_type: TYPE_UINT64
        }
      ]
    }
  ]
}
input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
parameters [
  {
    key: "execute_delay_ms"
    value: { string_value: "3" }
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/custom_models/custom_sequence_int32/config.pbtxt
================================================
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "custom_sequence_int32"
backend: "sequence"
max_batch_size: 8
default_model_filename: "libtriton_sequence.so"
sequence_batching {
  max_sequence_idle_microseconds: 5000000
  control_input [
    {
      name: "START"
      control [
        {
          kind: CONTROL_SEQUENCE_START
          int32_false_true: [ 0, 1 ]
        }
      ]
    },
    {
      name: "READY"
      control [
        {
          kind: CONTROL_SEQUENCE_READY
          int32_false_true: [ 0, 1 ]
        }
      ]
    }
  ]
}
input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
parameters [
  {
    key: "execute_delay_ms"
    value: { string_value: "3" }
  }
]
instance_group [
  {
    kind: KIND_CPU
  }
]


================================================
FILE: qa/custom_models/custom_zero_1_float32/config.pbtxt
================================================
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "custom_zero_1_float32"
backend: "identity"
max_batch_size: 1
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/ensemble_models/batch_to_nobatch_float32_float32_float32/config.pbtxt
================================================
# Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "batch_to_nobatch_float32_float32_float32"
platform: "ensemble"
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
ensemble_scheduling {
  step [
    {
      # batch model
      model_name: "onnx_float32_float32_float32"
      model_version: 1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT0"
        value: "temp_output_0"
      }
      output_map {
        key: "OUTPUT1"
        value: "temp_output_1"
      }
    },
    {
      # non-batch model with +1 dimension [-1, -1]
      model_name: "custom_nobatch_zero_1_float32"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "temp_output_0"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT0"
      }
    },
    {
      # non-batch model with +1 dimension [-1, -1]
      model_name: "custom_nobatch_zero_1_float32"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "temp_output_1"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT1"
      }
    }
  ]
}


================================================
FILE: qa/ensemble_models/batch_to_nobatch_nobatch_float32_float32_float32/config.pbtxt
================================================
# Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "batch_to_nobatch_nobatch_float32_float32_float32"
platform: "ensemble"
max_batch_size: 0
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 8, 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 8, 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 8, 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 8, 16 ]
  }
]
ensemble_scheduling {
  step [
    {
      # batch model
      model_name: "onnx_float32_float32_float32"
      model_version: 1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT0"
        value: "temp_output_0"
      }
      output_map {
        key: "OUTPUT1"
        value: "temp_output_1"
      }
    },
    {
      # non-batch model with +1 dimension [-1, -1]
      model_name: "custom_nobatch_zero_1_float32"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "temp_output_0"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT0"
      }
    },
    {
      # non-batch model with +1 dimension [-1, -1]
      model_name: "custom_nobatch_zero_1_float32"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "temp_output_1"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT1"
      }
    }
  ]
}


================================================
FILE: qa/ensemble_models/label_override_int32_float32_float32/config.pbtxt
================================================
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "label_override_int32_float32_float32"
platform: "ensemble"
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
    label_filename: "output0_labels.txt"
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
ensemble_scheduling {
  step [
    {
      model_name: "wrong_label_int32_float32_float32"
      model_version: 1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT0"
      }
      output_map {
        key: "OUTPUT1"
        value: "OUTPUT1"
      }
    }
  ]
}


================================================
FILE: qa/ensemble_models/label_override_int32_float32_float32/output0_labels.txt
================================================
label0
label1
label2
label3
label4
label5
label6
label7
label8
label9
label10
label11
label12
label13
label14
label15


================================================
FILE: qa/ensemble_models/mix_ensemble_int32_float32_float32/config.pbtxt
================================================
# Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "mix_ensemble_int32_float32_float32"
platform: "ensemble"
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
ensemble_scheduling {
  step [
    {
      model_name: "onnx_int32_int32_int32"
      model_version: 1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT0"
        value: "add_value_int"
      }
    },
    {
      model_name: "onnx_int32_object_object"
      model_version: 1
      input_map {
        key: "INPUT0"
        value: "add_value_int"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT1"
        value: "input_0_object"
      }
    },
    {
      model_name: "onnx_int32_object_object"
      model_version: 1
      input_map {
        key: "INPUT0"
        value: "add_value_int"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT0"
      }
      output_map {
        key: "OUTPUT1"
        value: "input_1_object"
      }
    },
    {
      model_name: "onnx_object_int32_int32"
      model_version: 1
      input_map {
        key: "INPUT0"
        value: "input_0_object"
      }
      input_map {
        key: "INPUT1"
        value: "input_1_object"
      }
      output_map {
        key: "OUTPUT0"
        value: "another_add_value_int"
      }
    },
    {
      model_name: "mix_type_int32_float32_float32"
      model_version: 1
      input_map {
        key: "INPUT0"
        value: "another_add_value_int"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT1"
        value: "input_0_float"
      }
    },
    {
      model_name: "mix_type_int32_float32_float32"
      model_version: 1
      input_map {
        key: "INPUT0"
        value: "another_add_value_int"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT0"
      }
      output_map {
        key: "OUTPUT1"
        value: "input_1_float"
      }
    },
    {
      model_name: "mix_platform_float32_float32_float32"
      model_version: 1
      input_map {
        key: "INPUT0"
        value: "input_0_float"
      }
      input_map {
        key: "INPUT1"
        value: "input_1_float"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT0"
      }
      output_map {
        key: "OUTPUT1"
        value: "OUTPUT1"
      }
    }
  ]
}


================================================
FILE: qa/ensemble_models/mix_nobatch_batch_float32_float32_float32/config.pbtxt
================================================
# Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "mix_nobatch_batch_float32_float32_float32"
platform: "ensemble"
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
ensemble_scheduling {
  step [
    {
      # one of the input first goes to batch model with dimension [-1]
      model_name: "custom_zero_1_float32"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      output_map {
        key: "OUTPUT0"
        value: "batch_input0"
      }
    },
    {
      # then goes to non-batch model with +1 dimension [-1, -1]
      model_name: "custom_nobatch_zero_1_float32"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "batch_input0"
      }
      output_map {
        key: "OUTPUT0"
        value: "nobatch_input0"
      }
    },
    {
      # batch model
      model_name: "onnx_float32_float32_float32"
      model_version: 1
      input_map {
        key: "INPUT0"
        value: "nobatch_input0"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT0"
      }
      output_map {
        key: "OUTPUT1"
        value: "OUTPUT1"
      }
    }
  ]
}


================================================
FILE: qa/ensemble_models/mix_platform_float32_float32_float32/config.pbtxt
================================================
# Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "mix_platform_float32_float32_float32"
platform: "ensemble"
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
ensemble_scheduling {
  step [
    {
      model_name: "onnx_float32_float32_float32"
      model_version: 1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT0"
      }
      output_map {
        key: "OUTPUT0"
        value: "double_input0"
      }
    },
    {
      model_name: "libtorch_float32_float32_float32"
      model_version: 1
      input_map {
        key: "INPUT0"
        value: "INPUT1"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT__0"
        value: "double_input1"
      }
    },
    {
      model_name: "libtorch_float32_float32_float32"
      model_version: 1
      input_map {
        key: "INPUT0"
        value: "double_input0"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT0"
      }
      output_map {
        key: "OUTPUT__1"
        value: "input0_val"
      }
    },
    {
      model_name: "onnx_float32_float32_float32"
      model_version: 1
      input_map {
        key: "INPUT0"
        value: "double_input1"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT1"
        value: "input1_val"
      }
    },
    {
      model_name: "libtorch_float32_float32_float32"
      model_version: 1
      input_map {
        key: "INPUT0"
        value: "input0_val"
      }
      input_map {
        key: "INPUT1"
        value: "input1_val"
      }
      output_map {
        key: "OUTPUT__0"
        value: "OUTPUT0"
      }
      output_map {
        key: "OUTPUT__1"
        value: "OUTPUT1"
      }
    }
  ]
}


================================================
FILE: qa/ensemble_models/mix_type_int32_float32_float32/config.pbtxt
================================================
# Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "mix_type_int32_float32_float32"
platform: "ensemble"
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
ensemble_scheduling {
  step [
    {
      model_name: "onnx_int32_int32_int32"
      model_version: 1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT0"
        value: "add_value_int"
      }
    },
    {
      model_name: "onnx_int32_object_object"
      model_version: 1
      input_map {
        key: "INPUT0"
        value: "add_value_int"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT1"
        value: "input_0_object"
      }
    },
    {
      model_name: "onnx_int32_object_object"
      model_version: 1
      input_map {
        key: "INPUT0"
        value: "add_value_int"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT0"
      }
      output_map {
        key: "OUTPUT1"
        value: "input_1_object"
      }
    },
    {
      model_name: "onnx_object_int32_int32"
      model_version: 1
      input_map {
        key: "INPUT0"
        value: "input_0_object"
      }
      input_map {
        key: "INPUT1"
        value: "input_1_object"
      }
      output_map {
        key: "OUTPUT0"
        value: "another_add_value_int"
      }
    },
    {
      model_name: "onnx_int32_float32_float32"
      model_version: 1
      input_map {
        key: "INPUT0"
        value: "another_add_value_int"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT1"
        value: "input_0_float"
      }
    },
    {
      model_name: "onnx_int32_float32_float32"
      model_version: 1
      input_map {
        key: "INPUT0"
        value: "another_add_value_int"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT0"
      }
      output_map {
        key: "OUTPUT1"
        value: "input_1_float"
      }
    },
    {
      model_name: "onnx_float32_float32_float32"
      model_version: 1
      input_map {
        key: "INPUT0"
        value: "input_0_float"
      }
      input_map {
        key: "INPUT1"
        value: "input_1_float"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT0"
      }
      output_map {
        key: "OUTPUT1"
        value: "OUTPUT1"
      }
    }
  ]
}


================================================
FILE: qa/ensemble_models/nobatch_to_batch_float32_float32_float32/config.pbtxt
================================================
# Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "nobatch_to_batch_float32_float32_float32"
platform: "ensemble"
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
ensemble_scheduling {
  step [
    {
      # nobatch model with +1 dimension [-1, -1]
      model_name: "custom_nobatch_zero_1_float32"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      output_map {
        key: "OUTPUT0"
        value: "temp_input_0"
      }
    },
    {
      # nobatch model with +1 dimension [-1, -1]
      model_name: "custom_nobatch_zero_1_float32"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT0"
        value: "temp_input_1"
      }
    },
    {
      # batch model
      model_name: "onnx_float32_float32_float32"
      model_version: 1
      input_map {
        key: "INPUT0"
        value: "temp_input_0"
      }
      input_map {
        key: "INPUT1"
        value: "temp_input_1"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT0"
      }
      output_map {
        key: "OUTPUT1"
        value: "OUTPUT1"
      }
    }
  ]
}


================================================
FILE: qa/ensemble_models/nobatch_to_batch_nobatch_float32_float32_float32/config.pbtxt
================================================
# Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "nobatch_to_batch_nobatch_float32_float32_float32"
platform: "ensemble"
max_batch_size: 0
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 8, 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 8, 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 8, 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 8, 16 ]
  }
]
ensemble_scheduling {
  step [
    {
      # nobatch model with +1 dimension [-1, -1]
      model_name: "custom_nobatch_zero_1_float32"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      output_map {
        key: "OUTPUT0"
        value: "temp_input_0"
      }
    },
    {
      # nobatch model with +1 dimension [-1, -1]
      model_name: "custom_nobatch_zero_1_float32"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT0"
        value: "temp_input_1"
      }
    },
    {
      # batch model
      model_name: "onnx_float32_float32_float32"
      model_version: 1
      input_map {
        key: "INPUT0"
        value: "temp_input_0"
      }
      input_map {
        key: "INPUT1"
        value: "temp_input_1"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT0"
      }
      output_map {
        key: "OUTPUT1"
        value: "OUTPUT1"
      }
    }
  ]
}


================================================
FILE: qa/ensemble_models/wrong_label_int32_float32_float32/config.pbtxt
================================================
# Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "wrong_label_int32_float32_float32"
platform: "ensemble"
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
    label_filename: "output0_labels.txt"
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
ensemble_scheduling {
  step [
    {
      model_name: "onnx_int32_float32_float32"
      model_version: 1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT0"
      }
      output_map {
        key: "OUTPUT1"
        value: "OUTPUT1"
      }
    }
  ]
}


================================================
FILE: qa/ensemble_models/wrong_label_int32_float32_float32/output0_labels.txt
================================================
label0a
label1a
label2a
label3a
label4a
label5a
label6a
label7a
label8a
label9a
label10a
label11a
label12a
label13a
label14a
label15a


================================================
FILE: qa/openvino_models/README.md
================================================
<!--
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->

The models in this directory are TF2/keras models converted into OpenVINO
models. The "fixed_batch" model has a fixed batch dimension of 1 and the
"dynamic_batch" model has a variable batch dimension.

The models are currently in **beta**, which they might not work as expected and
could be **changed, moved or deleted without warning** in the future.


================================================
FILE: qa/openvino_models/dynamic_batch/1/model.mapping
================================================
<?xml version="1.0"?>
<mapping>
	<map>
		<framework name="input1" output_port_id="input1:0" />
		<IR name="input1" output_port_id="0" />
	</map>
	<map>
		<framework name="input1" output_port_id="input1:0" />
		<IR name="input1" output_port_id="0" />
	</map>
	<map>
		<framework name="input1" output_port_id="input1:0" />
		<IR name="input1" output_port_id="0" />
	</map>
	<map>
		<framework name="input1" output_port_id="input1" />
		<IR name="input1" output_port_id="0" />
	</map>
	<map>
		<framework name="input1" output_port_id="input1" />
		<IR name="input1" output_port_id="0" />
	</map>
	<map>
		<framework name="input1" output_port_id="input1" />
		<IR name="input1" output_port_id="0" />
	</map>
	<map>
		<framework name="Func/PartitionedCall/input/_0/placeholder_out_port_0" output_port_id="Func/PartitionedCall/input/_0:0" />
		<IR name="Func/PartitionedCall/input/_0/placeholder_out_port_0" output_port_id="0" />
	</map>
	<map>
		<framework name="Func/PartitionedCall/input/_0" output_port_id="Func/PartitionedCall/input/_0:0" />
		<IR name="Func/PartitionedCall/input/_0/placeholder_out_port_0" output_port_id="0" />
	</map>
	<map>
		<framework name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="Func/PartitionedCall/output/_3:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="Func/PartitionedCall/output/_3" output_port_id="Func/PartitionedCall/output/_3:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="Identity_1" output_port_id="Func/PartitionedCall/output/_3:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/Identity_1" output_port_id="Func/PartitionedCall/output/_3:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="Func/PartitionedCall/output/_3:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="Identity_1:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="Func/PartitionedCall/output/_3" output_port_id="Identity_1:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="Identity_1" output_port_id="Identity_1:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/Identity_1" output_port_id="Identity_1:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="Identity_1:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="PartitionedCall/Identity_1:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="Func/PartitionedCall/output/_3" output_port_id="PartitionedCall/Identity_1:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="Identity_1" output_port_id="PartitionedCall/Identity_1:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/Identity_1" output_port_id="PartitionedCall/Identity_1:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="PartitionedCall/Identity_1:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="PartitionedCall/model/tf.math.subtract/Sub:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="Func/PartitionedCall/output/_3" output_port_id="PartitionedCall/model/tf.math.subtract/Sub:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="Identity_1" output_port_id="PartitionedCall/model/tf.math.subtract/Sub:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/Identity_1" output_port_id="PartitionedCall/model/tf.math.subtract/Sub:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="PartitionedCall/model/tf.math.subtract/Sub:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/model/tf.math.add/Add" output_port_id="PartitionedCall/model/tf.math.add/Add:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/model/tf.math.add/Add" output_port_id="PartitionedCall/model/tf.math.add/Add:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/Identity" output_port_id="PartitionedCall/model/tf.math.add/Add:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="Identity" output_port_id="PartitionedCall/model/tf.math.add/Add:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="Func/PartitionedCall/output/_2" output_port_id="PartitionedCall/model/tf.math.add/Add:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/model/tf.math.add/Add" output_port_id="PartitionedCall/Identity:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/model/tf.math.add/Add" output_port_id="PartitionedCall/Identity:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/Identity" output_port_id="PartitionedCall/Identity:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="Identity" output_port_id="PartitionedCall/Identity:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="Func/PartitionedCall/output/_2" output_port_id="PartitionedCall/Identity:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/model/tf.math.add/Add" output_port_id="Identity:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/model/tf.math.add/Add" output_port_id="Identity:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/Identity" output_port_id="Identity:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="Identity" output_port_id="Identity:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="Func/PartitionedCall/output/_2" output_port_id="Identity:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/model/tf.math.add/Add" output_port_id="Func/PartitionedCall/output/_2:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/model/tf.math.add/Add" output_port_id="Func/PartitionedCall/output/_2:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/Identity" output_port_id="Func/PartitionedCall/output/_2:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="Identity" output_port_id="Func/PartitionedCall/output/_2:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="Func/PartitionedCall/output/_2" output_port_id="Func/PartitionedCall/output/_2:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
</mapping>


================================================
FILE: qa/openvino_models/fixed_batch/1/model.mapping
================================================
<?xml version="1.0"?>
<mapping>
	<map>
		<framework name="input1" output_port_id="input1:0" />
		<IR name="input1" output_port_id="0" />
	</map>
	<map>
		<framework name="input1" output_port_id="input1:0" />
		<IR name="input1" output_port_id="0" />
	</map>
	<map>
		<framework name="input1" output_port_id="input1:0" />
		<IR name="input1" output_port_id="0" />
	</map>
	<map>
		<framework name="input1" output_port_id="input1" />
		<IR name="input1" output_port_id="0" />
	</map>
	<map>
		<framework name="input1" output_port_id="input1" />
		<IR name="input1" output_port_id="0" />
	</map>
	<map>
		<framework name="input1" output_port_id="input1" />
		<IR name="input1" output_port_id="0" />
	</map>
	<map>
		<framework name="input0" output_port_id="input0:0" />
		<IR name="input0" output_port_id="0" />
	</map>
	<map>
		<framework name="input0" output_port_id="input0:0" />
		<IR name="input0" output_port_id="0" />
	</map>
	<map>
		<framework name="Func/PartitionedCall/input/_0" output_port_id="input0:0" />
		<IR name="input0" output_port_id="0" />
	</map>
	<map>
		<framework name="input0" output_port_id="Func/PartitionedCall/input/_0:0" />
		<IR name="input0" output_port_id="0" />
	</map>
	<map>
		<framework name="input0" output_port_id="Func/PartitionedCall/input/_0:0" />
		<IR name="input0" output_port_id="0" />
	</map>
	<map>
		<framework name="Func/PartitionedCall/input/_0" output_port_id="Func/PartitionedCall/input/_0:0" />
		<IR name="input0" output_port_id="0" />
	</map>
	<map>
		<framework name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="Func/PartitionedCall/output/_3:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="Func/PartitionedCall/output/_3" output_port_id="Func/PartitionedCall/output/_3:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="Identity_1" output_port_id="Func/PartitionedCall/output/_3:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/Identity_1" output_port_id="Func/PartitionedCall/output/_3:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="Func/PartitionedCall/output/_3:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="Identity_1:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="Func/PartitionedCall/output/_3" output_port_id="Identity_1:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="Identity_1" output_port_id="Identity_1:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/Identity_1" output_port_id="Identity_1:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="Identity_1:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="PartitionedCall/Identity_1:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="Func/PartitionedCall/output/_3" output_port_id="PartitionedCall/Identity_1:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="Identity_1" output_port_id="PartitionedCall/Identity_1:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/Identity_1" output_port_id="PartitionedCall/Identity_1:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="PartitionedCall/Identity_1:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="PartitionedCall/model/tf.math.subtract/Sub:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="Func/PartitionedCall/output/_3" output_port_id="PartitionedCall/model/tf.math.subtract/Sub:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="Identity_1" output_port_id="PartitionedCall/model/tf.math.subtract/Sub:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/Identity_1" output_port_id="PartitionedCall/model/tf.math.subtract/Sub:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="PartitionedCall/model/tf.math.subtract/Sub:0" />
		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/model/tf.math.add/Add" output_port_id="PartitionedCall/model/tf.math.add/Add:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/model/tf.math.add/Add" output_port_id="PartitionedCall/model/tf.math.add/Add:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/Identity" output_port_id="PartitionedCall/model/tf.math.add/Add:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="Identity" output_port_id="PartitionedCall/model/tf.math.add/Add:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="Func/PartitionedCall/output/_2" output_port_id="PartitionedCall/model/tf.math.add/Add:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/model/tf.math.add/Add" output_port_id="PartitionedCall/Identity:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/model/tf.math.add/Add" output_port_id="PartitionedCall/Identity:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/Identity" output_port_id="PartitionedCall/Identity:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="Identity" output_port_id="PartitionedCall/Identity:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="Func/PartitionedCall/output/_2" output_port_id="PartitionedCall/Identity:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/model/tf.math.add/Add" output_port_id="Identity:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/model/tf.math.add/Add" output_port_id="Identity:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/Identity" output_port_id="Identity:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="Identity" output_port_id="Identity:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="Func/PartitionedCall/output/_2" output_port_id="Identity:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/model/tf.math.add/Add" output_port_id="Func/PartitionedCall/output/_2:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/model/tf.math.add/Add" output_port_id="Func/PartitionedCall/output/_2:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="PartitionedCall/Identity" output_port_id="Func/PartitionedCall/output/_2:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="Identity" output_port_id="Func/PartitionedCall/output/_2:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
	<map>
		<framework name="Func/PartitionedCall/output/_2" output_port_id="Func/PartitionedCall/output/_2:0" />
		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
	</map>
</mapping>


================================================
FILE: qa/python_models/add_sub/config.pbtxt
================================================
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

backend: "python"

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
input [
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]

instance_group [{ kind: KIND_CPU }]


================================================
FILE: qa/python_models/add_sub/model.py
================================================
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def initialize(self, args):
        self.model_config = model_config = json.loads(args["model_config"])

        output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")
        output1_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT1")

        self.output0_dtype = pb_utils.triton_string_to_numpy(
            output0_config["data_type"]
        )
        self.output1_dtype = pb_utils.triton_string_to_numpy(
            output1_config["data_type"]
        )

    def execute(self, requests):
        """This function is called on inference request."""

        output0_dtype = self.output0_dtype
        output1_dtype = self.output1_dtype

        responses = []
        for request in requests:
            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
            if (
                in_0.as_numpy().dtype.type is np.bytes_
                or in_0.as_numpy().dtype == np.object_
            ):
                out_0, out_1 = (
                    in_0.as_numpy().astype(np.int32) + in_1.as_numpy().astype(np.int32),
                    in_0.as_numpy().astype(np.int32) - in_1.as_numpy().astype(np.int32),
                )
            else:
                out_0, out_1 = (
                    in_0.as_numpy() + in_1.as_numpy(),
                    in_0.as_numpy() - in_1.as_numpy(),
                )

            out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(output0_dtype))
            out_tensor_1 = pb_utils.Tensor("OUTPUT1", out_1.astype(output1_dtype))
            responses.append(pb_utils.InferenceResponse([out_tensor_0, out_tensor_1]))
        return responses


================================================
FILE: qa/python_models/add_sub_gpu/config.pbtxt
================================================
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "add_sub_gpu"
backend: "python"

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]

  }
]
input [
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]

  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
output [
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 4 ]


  }
]

instance_group [ { kind: KIND_GPU }]


================================================
FILE: qa/python_models/async_execute_decouple/config.pbtxt
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

backend: "python"
max_batch_size: 8

input [
  {
    name: "WAIT_SECONDS"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]
output [
  {
    name: "DUMMY_OUT"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]

instance_group [{ kind: KIND_CPU }]
model_transaction_policy { decoupled: True }


================================================
FILE: qa/python_models/async_execute_decouple/model.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import asyncio

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    async def execute(self, requests):
        processed_requests = []
        async_tasks = []
        for request in requests:
            wait_secs_tensors = pb_utils.get_input_tensor_by_name(
                request, "WAIT_SECONDS"
            ).as_numpy()
            for wait_secs_tensor in wait_secs_tensors:
                wait_secs = wait_secs_tensor[0]
                if wait_secs < 0:
                    self.raise_value_error(requests)
                async_tasks.append(asyncio.create_task(asyncio.sleep(wait_secs)))
            processed_requests.append(
                {
                    "response_sender": request.get_response_sender(),
                    "batch_size": wait_secs_tensors.shape[0],
                }
            )

        # This decoupled execute should be scheduled to run in the background
        # concurrently with other instances of decoupled execute, as long as the event
        # loop is not blocked.
        await asyncio.gather(*async_tasks)

        for p_req in processed_requests:
            response_sender = p_req["response_sender"]
            batch_size = p_req["batch_size"]

            output_tensors = pb_utils.Tensor(
                "DUMMY_OUT", np.array([0 for i in range(batch_size)], np.float32)
            )
            response = pb_utils.InferenceResponse(output_tensors=[output_tensors])
            response_sender.send(
                response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
            )

        return None

    def raise_value_error(self, requests):
        # TODO: Model may raise exception without sending complete final
        for request in requests:
            response_sender = request.get_response_sender()
            response_sender.send(flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)
        raise ValueError("wait_secs cannot be negative")


================================================
FILE: qa/python_models/async_execute_decouple_bls/config.pbtxt
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

backend: "python"
max_batch_size: 8

input [
  {
    name: "WAIT_SECONDS"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]
output [
  {
    name: "DUMMY_OUT"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]

instance_group [{ kind: KIND_CPU }]
model_transaction_policy { decoupled: True }


================================================
FILE: qa/python_models/async_execute_decouple_bls/model.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import asyncio

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    async def _execute_a_request(self, request):
        input_tensor = pb_utils.get_input_tensor_by_name(
            request, "WAIT_SECONDS"
        ).as_numpy()
        bls_input_tensor = pb_utils.Tensor("WAIT_SECONDS", input_tensor)
        bls_request = pb_utils.InferenceRequest(
            model_name="async_execute_decouple",
            inputs=[bls_input_tensor],
            requested_output_names=["DUMMY_OUT"],
        )
        bls_responses = await bls_request.async_exec(decoupled=True)
        response_sender = request.get_response_sender()
        for bls_response in bls_responses:
            bls_output_tensor = pb_utils.get_output_tensor_by_name(
                bls_response, "DUMMY_OUT"
            ).as_numpy()
            output_tensor = pb_utils.Tensor("DUMMY_OUT", bls_output_tensor)
            response = pb_utils.InferenceResponse(output_tensors=[output_tensor])
            response_sender.send(response)
        response_sender.send(flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)

    async def execute(self, requests):
        async_futures = []
        for request in requests:
            async_future = self._execute_a_request(request)
            async_futures.append(async_future)
        await asyncio.gather(*async_futures)
        return None


================================================
FILE: qa/python_models/auto_complete/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}

        auto_complete_model_config.set_max_batch_size(0)
        auto_complete_model_config.add_input(input0)
        auto_complete_model_config.add_input(input1)
        auto_complete_model_config.add_output(output0)
        auto_complete_model_config.add_output(output1)

        return auto_complete_model_config

    def initialize(self, args):
        self.model_config = model_config = json.loads(args["model_config"])

        output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")
        output1_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT1")

        self.output0_dtype = pb_utils.triton_string_to_numpy(
            output0_config["data_type"]
        )
        self.output1_dtype = pb_utils.triton_string_to_numpy(
            output1_config["data_type"]
        )

    def execute(self, requests):
        """This function is called on inference request."""

        output0_dtype = self.output0_dtype
        output1_dtype = self.output1_dtype

        responses = []
        for request in requests:
            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
            if (
                in_0.as_numpy().dtype.type is np.bytes_
                or in_0.as_numpy().dtype == np.object_
            ):
                out_0, out_1 = (
                    in_0.as_numpy().astype(np.int32) + in_1.as_numpy().astype(np.int32),
                    in_0.as_numpy().astype(np.int32) - in_1.as_numpy().astype(np.int32),
                )
            else:
                out_0, out_1 = (
                    in_0.as_numpy() + in_1.as_numpy(),
                    in_0.as_numpy() - in_1.as_numpy(),
                )

            out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(output0_dtype))
            out_tensor_1 = pb_utils.Tensor("OUTPUT1", out_1.astype(output1_dtype))
            responses.append(pb_utils.InferenceResponse([out_tensor_0, out_tensor_1]))
        return responses


================================================
FILE: qa/python_models/auto_complete_error/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


class TritonPythonModel:
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        """
        The body of this model doesn't matter. The main purpose of this model is
        to test correct handling of Python errors in the `auto_complete_config`
        function.
        """
        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}

        auto_complete_model_config.set_max_batch_size(0)
        auto_complete_model_config.add_input(input0)
        auto_complete_model_config.add_input(input1)
        auto_complete_model_config.add_output(output0)
        auto_complete_model_config.add_output(output1)

        undefined_variable

        return auto_complete_model_config

    def execute(self, requests):
        pass


================================================
FILE: qa/python_models/bls/config.pbtxt
================================================
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "bls"
backend: "python"

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]

instance_group [{ kind: KIND_CPU }]


================================================
FILE: qa/python_models/bls/model.py
================================================
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import gc
import os
import sys
import threading
import unittest
from multiprocessing import Pool

import numpy as np
import torch
import triton_python_backend_utils as pb_utils
from torch.utils.dlpack import from_dlpack, to_dlpack

_deferred_exceptions_lock = threading.Lock()
_deferred_exceptions = []


def bls_add_sub(_=None):
    input0_np = np.random.randn(*[16])
    input0_np = input0_np.astype(np.float32)
    input1_np = np.random.randn(*[16])
    input1_np = input1_np.astype(np.float32)
    input0 = pb_utils.Tensor("INPUT0", input0_np)
    input1 = pb_utils.Tensor("INPUT1", input1_np)
    infer_request = pb_utils.InferenceRequest(
        model_name="add_sub",
        inputs=[input0, input1],
        requested_output_names=["OUTPUT0", "OUTPUT1"],
    )
    infer_response = infer_request.exec()
    if infer_response.has_error():
        return False

    output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
    output1 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT1")
    if output0 is None or output1 is None:
        return False

    expected_output_0 = input0.as_numpy() + input1.as_numpy()
    expected_output_1 = input0.as_numpy() - input1.as_numpy()

    if not np.all(expected_output_0 == output0.as_numpy()):
        return False

    if not np.all(expected_output_1 == output1.as_numpy()):
        return False

    return True


def bls_square(_=None):
    input0_np = np.random.randint(16, size=1, dtype=np.int32)
    input0 = pb_utils.Tensor("IN", input0_np)
    infer_request = pb_utils.InferenceRequest(
        model_name="square_int32", inputs=[input0], requested_output_names=["OUT"]
    )
    infer_responses = infer_request.exec(decoupled=True)

    response_count = 0

    if infer_responses:
        for infer_response in infer_responses:
            if infer_response.has_error():
                return False

            if len(infer_response.output_tensors()) > 0:
                output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT")
                if output0 is None:
                    return False

                expected_output = input0.as_numpy()

                if not np.all(expected_output == output0.as_numpy()):
                    return False

            response_count += 1

    if not np.all(input0.as_numpy() == response_count - 1):
        return False

    return True


def bls_libtorch(model_name, result_device):
    shape = [16]
    input0_np = np.random.rand(*shape).astype(np.float32)
    input1_np = np.random.rand(*shape).astype(np.float32)
    input0 = pb_utils.Tensor("INPUT0", input0_np)
    input1 = pb_utils.Tensor("INPUT1", input1_np)

    if result_device == "CPU":
        preferred_memory = pb_utils.PreferredMemory(pb_utils.TRITONSERVER_MEMORY_CPU)
    else:
        preferred_memory = pb_utils.PreferredMemory(pb_utils.TRITONSERVER_MEMORY_GPU, 0)

    infer_request = pb_utils.InferenceRequest(
        model_name=model_name,
        model_version=1,
        inputs=[input0, input1],
        requested_output_names=["OUTPUT__0", "OUTPUT__1"],
        preferred_memory=preferred_memory,
    )

    infer_response = infer_request.exec()
    if infer_response.has_error():
        return False

    output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT__0")
    output1 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT__1")
    if output0 is None or output1 is None:
        return False

    expected_output_0 = input0.as_numpy() + input1.as_numpy()
    expected_output_1 = input0.as_numpy() - input1.as_numpy()

    if result_device == "CPU":
        if not output0.is_cpu() or not output1.is_cpu():
            return False

        if not np.all(expected_output_0 == output0.as_numpy()):
            return False

        if not np.all(expected_output_1 == output1.as_numpy()):
            return False
    else:
        if output0.is_cpu() or output1.is_cpu():
            return False
        output0 = from_dlpack(output0.to_dlpack()).to("cpu").cpu().detach().numpy()
        output1 = from_dlpack(output1.to_dlpack()).to("cpu").cpu().detach().numpy()

        if not np.all(output0 == expected_output_0):
            return False
        if not np.all(output1 == expected_output_1):
            return False

    return True


class PBBLSTest(unittest.TestCase):
    def setUp(self):
        self._is_decoupled = True if os.environ["BLS_KIND"] == "decoupled" else False

    def add_deferred_exception(self, ex):
        global _deferred_exceptions
        with _deferred_exceptions_lock:
            _deferred_exceptions.append(ex)

    def check_deferred_exception(self):
        with _deferred_exceptions_lock:
            if len(_deferred_exceptions) > 0:
                raise _deferred_exceptions[0]

    def test_bls_wrong_inputs(self):
        input0 = pb_utils.Tensor("INPUT0", np.random.randn(*[1, 16]))

        if self._is_decoupled:
            infer_request = pb_utils.InferenceRequest(
                model_name="square_int32", inputs=[], requested_output_names=["OUT"]
            )
            infer_responses = infer_request.exec(decoupled=True)
            for infer_response in infer_responses:
                self.assertTrue(infer_response.has_error())
                self.assertIn(
                    "expected 1 inputs but got 0 inputs for model 'square_int32'. Got input(s) [], but missing required input(s) ['IN']. Please provide all required input(s).",
                    infer_response.error().message(),
                )
                self.assertTrue(len(infer_response.output_tensors()) == 0)
        else:
            infer_request = pb_utils.InferenceRequest(
                model_name="add_sub",
                inputs=[input0],
                requested_output_names=["OUTPUT0", "OUTPUT1"],
            )
            infer_response = infer_request.exec()
            self.assertTrue(infer_response.has_error())
            self.assertIn(
                "expected 2 inputs but got 1 inputs for model 'add_sub'",
                infer_response.error().message(),
            )
            self.assertTrue(len(infer_response.output_tensors()) == 0)

    def _send_bls_sequence_requests(self, correlation_id, is_decoupled):
        # Start request
        try:
            input = pb_utils.Tensor("INPUT", np.array([1000], dtype=np.int32))

            infer_request = pb_utils.InferenceRequest(
                model_name="onnx_nobatch_sequence_int32",
                inputs=[input],
                requested_output_names=["OUTPUT"],
                flags=pb_utils.TRITONSERVER_REQUEST_FLAG_SEQUENCE_START,
                correlation_id=correlation_id,
            )
            self.assertTrue(
                infer_request.flags(), pb_utils.TRITONSERVER_REQUEST_FLAG_SEQUENCE_START
            )
            infer_response = infer_request.exec()
            self.assertFalse(infer_response.has_error())
            output = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT")
            self.assertFalse(output.is_cpu())
            output = from_dlpack(output.to_dlpack()).to("cpu").cpu().detach().numpy()
            self.assertEqual(output[0], input.as_numpy()[0])

            for i in range(10):
                input = pb_utils.Tensor("INPUT", np.array([i], dtype=np.int32))
                infer_request = pb_utils.InferenceRequest(
                    model_name="onnx_nobatch_sequence_int32",
                    inputs=[input],
                    requested_output_names=["OUTPUT"],
                    correlation_id=correlation_id,
                )

                if is_decoupled:
                    infer_responses = infer_request.exec(decoupled=True)
                    infer_response = next(infer_responses)
                    with self.assertRaises(StopIteration):
                        next(infer_responses)
                else:
                    infer_response = infer_request.exec()
                self.assertFalse(infer_response.has_error())

                # The new output is the previous output + the current input
                expected_output = output[0] + i
                output = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT")
                self.assertFalse(output.is_cpu())
                output = (
                    from_dlpack(output.to_dlpack()).to("cpu").cpu().detach().numpy()
                )
                self.assertEqual(output[0], expected_output)

            # Final request
            input = pb_utils.Tensor("INPUT", np.array([2000], dtype=np.int32))

            infer_request = pb_utils.InferenceRequest(
                model_name="onnx_nobatch_sequence_int32",
                inputs=[input],
                requested_output_names=["OUTPUT"],
                correlation_id=correlation_id,
            )
            infer_request.set_flags(pb_utils.TRITONSERVER_REQUEST_FLAG_SEQUENCE_END)
            self.assertTrue(
                infer_request.flags(), pb_utils.TRITONSERVER_REQUEST_FLAG_SEQUENCE_END
            )

            if is_decoupled:
                infer_responses = infer_request.exec(decoupled=True)
                infer_response = next(infer_responses)
                with self.assertRaises(StopIteration):
                    next(infer_responses)
            else:
                infer_response = infer_request.exec()

            self.assertFalse(infer_response.has_error())
            expected_output = output[0] + input.as_numpy()[0]
            output = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT")
            self.assertFalse(output.is_cpu())
            output = from_dlpack(output.to_dlpack()).to("cpu").cpu().detach().numpy()
            self.assertEqual(output[0], expected_output)
        except Exception as e:
            self.add_deferred_exception(e)

    def test_bls_sequence(self):
        # Send 2 sequence of BLS requests simultaneously and check the responses.
        threads = []
        thread1 = threading.Thread(
            target=self._send_bls_sequence_requests,
            args=(
                1000,
                self._is_decoupled,
            ),
        )
        threads.append(thread1)
        thread2 = threading.Thread(
            target=self._send_bls_sequence_requests,
            args=(
                1001,
                self._is_decoupled,
            ),
        )
        threads.append(thread2)

        for thread in threads:
            thread.start()

        for thread in threads:
            thread.join()

        # Check if any of the threads had an exception
        self.check_deferred_exception()

    def test_bls_incorrect_args(self):
        with self.assertRaises(TypeError):
            pb_utils.InferenceRequest(
                inputs=[], requested_output_names=["OUTPUT0", "OUTPUT1"]
            )

        with self.assertRaises(TypeError):
            pb_utils.InferenceRequest(
                model_name="add_sub", requested_output_names=["OUTPUT0", "OUTPUT1"]
            )

        with self.assertRaises(TypeError):
            pb_utils.InferenceRequest(model_name="add_sub", inputs=[])

    def _get_gpu_bls_outputs(self, input0_pb, input1_pb, is_decoupled):
        """
        This function is created to test that the DLPack container works
        properly when the inference response and outputs go out of scope.
        """
        infer_request = pb_utils.InferenceRequest(
            model_name="dlpack_add_sub",
            inputs=[input0_pb, input1_pb],
            requested_output_names=["OUTPUT0", "OUTPUT1"],
        )
        if is_decoupled:
            infer_responses = infer_request.exec(decoupled=True)
            infer_response = next(infer_responses)
            with self.assertRaises(StopIteration):
                next(infer_responses)
        else:
            infer_response = infer_request.exec()

        self.assertFalse(infer_response.has_error())

        output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
        output1 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT1")
        self.assertIsNotNone(output0)
        self.assertIsNotNone(output1)

        # When one of the inputs is in GPU the output returned by the model must
        # be in GPU, otherwise the outputs will be in CPU.
        if not input0_pb.is_cpu() or not input1_pb.is_cpu():
            self.assertTrue((not output0.is_cpu()) and (not output1.is_cpu()))
        else:
            self.assertTrue((output0.is_cpu()) and (output1.is_cpu()))

        # Make sure that the reference count is increased by one when DLPack
        # representation is created.
        rc_before_dlpack_output0 = sys.getrefcount(output0)
        rc_before_dlpack_output1 = sys.getrefcount(output1)

        output0_dlpack = output0.to_dlpack()
        output1_dlpack = output1.to_dlpack()

        rc_after_dlpack_output0 = sys.getrefcount(output0)
        rc_after_dlpack_output1 = sys.getrefcount(output1)

        self.assertEqual(rc_after_dlpack_output0 - rc_before_dlpack_output0, 1)
        self.assertEqual(rc_after_dlpack_output1 - rc_before_dlpack_output1, 1)

        # Make sure that reference count decreases after destroying the DLPack
        output0_dlpack = None
        output1_dlpack = None
        rc_after_del_dlpack_output0 = sys.getrefcount(output0)
        rc_after_del_dlpack_output1 = sys.getrefcount(output1)
        self.assertEqual(rc_after_del_dlpack_output0 - rc_after_dlpack_output0, -1)
        self.assertEqual(rc_after_del_dlpack_output1 - rc_after_dlpack_output1, -1)

        return output0.to_dlpack(), output1.to_dlpack()

    def test_zero_length_io(self):
        model_name = "identity_fp32"
        input0 = np.zeros([1, 0], dtype=np.float32)
        input0_pb = pb_utils.Tensor("INPUT0", input0)
        infer_request = pb_utils.InferenceRequest(
            model_name=model_name,
            inputs=[input0_pb],
            requested_output_names=["OUTPUT0"],
        )

        if self._is_decoupled:
            infer_responses = infer_request.exec(decoupled=True)
            infer_response = next(infer_responses)
            with self.assertRaises(StopIteration):
                next(infer_responses)
        else:
            infer_response = infer_request.exec()

        self.assertFalse(infer_response.has_error())

        output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
        self.assertTrue(np.all(output0 == input0))

    def cuda_memory_stats(self):
        allocated_bytes = torch.cuda.memory_allocated()
        reserved_bytes = torch.cuda.memory_reserved()
        return allocated_bytes, reserved_bytes

    def bls_tensor_lifecycle_helper(self):
        model_name = "dlpack_identity"
        verbose = True

        # A 10 MB tensor.
        input_size = 10 * 1024 * 1024
        input_type_size_bytes = 4  # TYPE_FP32
        input_size_bytes = input_size * input_type_size_bytes

        # Sending the tensor 50 times to test whether the deallocation is
        # happening correctly. If the deallocation doesn't happen correctly,
        # there will be an out of shared memory error.
        for _ in range(50):
            input0 = np.ones([1, input_size], dtype=np.float32)
            input0_pb = pb_utils.Tensor("INPUT0", input0)
            infer_request = pb_utils.InferenceRequest(
                model_name=model_name,
                inputs=[input0_pb],
                requested_output_names=["OUTPUT0"],
            )

            if self._is_decoupled:
                infer_responses = infer_request.exec(decoupled=True)
                infer_response = next(infer_responses)
                with self.assertRaises(StopIteration):
                    next(infer_responses)
            else:
                infer_response = infer_request.exec()
            self.assertFalse(infer_response.has_error())

            output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
            np.testing.assert_equal(
                output0.as_numpy(), input0, "BLS CPU memory lifecycle failed."
            )

        # Show total memory stats before gpu tensor test
        print(torch.cuda.memory_summary())

        # Checking the same with the GPU tensors.
        for index in range(50):
            input0 = None
            infer_request = None
            input0_pb = None
            fail_msg = f"GPU memory lifecycle test failed at index: {index}"

            torch.cuda.empty_cache()
            alloced, cached = self.cuda_memory_stats()

            # Check cuda memory usage is cleaned up (empty) between iterations
            # when device tensors go out of scope
            self.assertEqual(alloced, 0, fail_msg)
            # Check that cache is properly cleaned up when emptied
            self.assertEqual(cached, 0, fail_msg)

            if verbose:
                # NOTE: this reflects total gpu memory usage, and may be affected
                # by other processes, so don't use it for direct checks but log it
                # for debugging/context.
                free_memory, total_memory = torch.cuda.mem_get_info()
                used_memory = total_memory - free_memory
                print(f"[DEBUG][Iteration {index}][GPU] {used_memory=} bytes")

            input0 = torch.ones([1, input_size], dtype=torch.float32).to("cuda")
            input0_pb = pb_utils.Tensor.from_dlpack("INPUT0", to_dlpack(input0))
            # Check cuda memory usage after creating device tensor
            alloced, _ = self.cuda_memory_stats()
            self.assertEqual(
                alloced,
                input_size_bytes,
                "Expected precise byte allocation after input tensor creation",
            )

            infer_request = pb_utils.InferenceRequest(
                model_name=model_name,
                inputs=[input0_pb],
                requested_output_names=["OUTPUT0"],
            )

            if self._is_decoupled:
                infer_responses = infer_request.exec(decoupled=True)
                infer_response = next(infer_responses)
                with self.assertRaises(StopIteration):
                    next(infer_responses)
            else:
                infer_response = infer_request.exec()

            self.assertFalse(infer_response.has_error())

            output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
            output0_pytorch = from_dlpack(output0.to_dlpack())

            # Stats after getting output tensor
            alloced, _ = self.cuda_memory_stats()
            self.assertEqual(
                alloced,
                input_size_bytes,
                "Expected only input allocation, as output zero-copies input tensor",
            )

            # Set inference response and output0_pytorch to None, to make sure
            # that the DLPack is still valid.
            output0 = None
            infer_response = None
            self.assertTrue(
                torch.all(output0_pytorch == input0),
                f"input ({input0}) and output ({output0_pytorch}) didn't match for identity model.",
            )

        print(torch.cuda.memory_summary())

    def assert_cuda_memory_empty(self, msg):
        torch.cuda.empty_cache()
        alloced, cached = self.cuda_memory_stats()
        self.assertEqual(alloced, 0, msg)
        self.assertEqual(cached, 0, msg)

    def test_bls_tensor_lifecycle(self):
        self.assert_cuda_memory_empty("Expected all gpu memory cleaned up before test")
        self.bls_tensor_lifecycle_helper()
        self.assert_cuda_memory_empty("Expected all gpu memory cleaned up after test")

    def _test_gpu_bls_add_sub(self, is_input0_gpu, is_input1_gpu, is_decoupled=False):
        input0 = torch.rand(16)
        input1 = torch.rand(16)

        if is_input0_gpu:
            input0 = input0.to("cuda")

        if is_input1_gpu:
            input1 = input1.to("cuda")

        input0_pb = pb_utils.Tensor.from_dlpack("INPUT0", to_dlpack(input0))
        input1_pb = pb_utils.Tensor.from_dlpack("INPUT1", to_dlpack(input1))

        output0_dlpack, output1_dlpack = self._get_gpu_bls_outputs(
            input0_pb, input1_pb, is_decoupled=is_decoupled
        )

        expected_output_0 = from_dlpack(input0_pb.to_dlpack()).to("cpu") + from_dlpack(
            input1_pb.to_dlpack()
        ).to("cpu")
        expected_output_1 = from_dlpack(input0_pb.to_dlpack()).to("cpu") - from_dlpack(
            input1_pb.to_dlpack()
        ).to("cpu")

        self.assertTrue(
            torch.all(expected_output_0 == from_dlpack(output0_dlpack).to("cpu"))
        )
        self.assertTrue(
            torch.all(expected_output_1 == from_dlpack(output1_dlpack).to("cpu"))
        )

    def test_gpu_bls(self):
        for input0_device in [True, False]:
            for input1_device in [True, False]:
                self._test_gpu_bls_add_sub(
                    input0_device, input1_device, self._is_decoupled
                )

    def test_multiprocess(self):
        # Test multiprocess Pool with sync BLS
        if self._is_decoupled:
            # Fixme: DLIS-4630
            # func_name = bls_square
            pass
        else:
            func_name = bls_add_sub

            pool = Pool(10)
            pool.map(func_name, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
            pool.close()
            pool.join()

    def test_bls_sync(self):
        infer_request = pb_utils.InferenceRequest(
            model_name="non_existent_model", inputs=[], requested_output_names=[]
        )

        if self._is_decoupled:
            infer_responses = infer_request.exec(decoupled=True)

            for infer_response in infer_responses:
                # Because the model doesn't exist, the inference response must have an
                # error
                self.assertTrue(infer_response.has_error())
                self.assertIn(
                    "Failed for execute the inference request. Model 'non_existent_model' is not ready.",
                    infer_response.error().message(),
                )

                # Make sure that the inference requests can be performed properly after
                # an error.
                self.assertTrue(bls_square())
        else:
            infer_response = infer_request.exec()

            # Because the model doesn't exist, the inference response must have an
            # error
            self.assertTrue(infer_response.has_error())
            self.assertIn(
                "Failed for execute the inference request. Model 'non_existent_model' is not ready.",
                infer_response.error().message(),
            )

            # Make sure that the inference requests can be performed properly after
            # an error.
            self.assertTrue(bls_add_sub())

    def test_bls_execute_error(self):
        # Test BLS with a model that has an error during execution.
        infer_request = pb_utils.InferenceRequest(
            model_name="execute_error", inputs=[], requested_output_names=[]
        )
        if self._is_decoupled:
            infer_responses = infer_request.exec(decoupled=True)
            infer_response = next(infer_responses)
            with self.assertRaises(StopIteration):
                next(infer_responses)
        else:
            infer_response = infer_request.exec()

        self.assertTrue(infer_response.has_error())
        self.assertIn(
            "expected 1 inputs but got 0 inputs for model 'execute_error'",
            infer_response.error().message(),
        )
        self.assertTrue(len(infer_response.output_tensors()) == 0)

    def test_multiple_bls(self):
        # Test running multiple BLS requests together
        if self._is_decoupled:
            for _ in range(100):
                self.assertTrue(bls_square())
        else:
            for _ in range(100):
                self.assertTrue(bls_add_sub())

    def test_timeout(self):
        tensor_size = [1, 1024 * 1024]
        input0_np = np.random.randn(*tensor_size)
        input0 = pb_utils.Tensor("INPUT0", input0_np.astype(np.float32))
        infer_request = pb_utils.InferenceRequest(
            model_name="identity_fp32_timeout",
            inputs=[input0],
            requested_output_names=["OUTPUT0"],
            timeout=5,
        )

        if self._is_decoupled:
            infer_responses = infer_request.exec(decoupled=True)
            infer_response = next(infer_responses)
        else:
            infer_response = infer_request.exec()

        # Expect timeout error
        self.assertTrue(infer_response.has_error())
        self.assertIn("Request timeout expired", infer_response.error().message())
        self.assertTrue(len(infer_response.output_tensors()) == 0)

        # Verifies two things:
        # 1. A request timeout can be accessed by receiver models
        # 2. A user can specify a very large value (11s) for a timeout
        infer_request = pb_utils.InferenceRequest(
            model_name="identity_fp32_timeout",
            inputs=[input0],
            requested_output_names=["OUTPUT0"],
            timeout=11000000000,
        )

        if self._is_decoupled:
            infer_responses = infer_request.exec(decoupled=True)
            infer_response = next(infer_responses)
        else:
            infer_response = infer_request.exec()

        # Expect no timeout error. Check for log message
        # in test.sh
        self.assertFalse(infer_response.has_error())

    def _test_response_iterator_square(
        self, expected_output_cnt, expected_output_value, response_iterator
    ):
        response_count = 0
        expected_output_cnt = np.array([expected_output_cnt], dtype=np.int32)

        for infer_response in response_iterator:
            self.assertFalse(infer_response.has_error())
            if len(infer_response.output_tensors()) > 0:
                output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT")
                self.assertIsNotNone(output0)
                self.assertEqual(expected_output_value, output0.as_numpy())

            response_count += 1

        self.assertEqual(response_count, expected_output_cnt)

        # Make sure the iterator is exhausted.
        with self.assertRaises(StopIteration):
            next(response_iterator)

        return response_iterator

    def test_response_iterator(self):
        if self._is_decoupled:
            # Test the response iterator for decoupled responses. The request
            # has 4 decoupled responses followed by an empty response.
            response_value = 4
            input0_np = np.array([response_value], dtype=np.int32)
            input0 = pb_utils.Tensor("IN", input0_np)
            infer_request = pb_utils.InferenceRequest(
                model_name="square_int32",
                inputs=[input0],
                requested_output_names=["OUT"],
            )
            infer_responses = infer_request.exec(decoupled=True)

            # case 1. Use Next() to get the next response first, then use
            # for-loop to get the remaining responses.
            infer_response = next(infer_responses)
            self.assertFalse(infer_response.has_error())
            output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT")
            self.assertIsNotNone(output0)
            self.assertEqual(response_value, output0.as_numpy())
            # The iterator now should only have 4 remaining responses.
            infer_responses = self._test_response_iterator_square(
                4, response_value, infer_responses
            )

            # case 2. Call for-loop to get all the responses multiple times.
            infer_responses = self._test_response_iterator_square(
                5, response_value, infer_responses
            )
            infer_responses = self._test_response_iterator_square(
                5, response_value, infer_responses
            )
            infer_responses = self._test_response_iterator_square(
                5, response_value, infer_responses
            )

            # case 3. Break from the iteration, then use Next() and for-loop to
            # get the remaining responses.
            response_count = 0
            for infer_response in infer_responses:
                self.assertFalse(infer_response.has_error())
                output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT")
                self.assertIsNotNone(output0)
                self.assertEqual(response_value, output0.as_numpy())

                response_count += 1
                if response_count == 2:
                    break

            infer_response = next(infer_responses)
            self.assertFalse(infer_response.has_error())
            output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT")
            self.assertIsNotNone(output0)
            self.assertEqual(response_value, output0.as_numpy())

            # The iterator now should only have 2 remaining responses.
            infer_responses = self._test_response_iterator_square(
                2, response_value, infer_responses
            )

            # case 4. Delete the iterator before all the responses have been
            # retrieved.
            infer_responses = infer_request.exec(decoupled=True)

            infer_response = next(infer_responses)
            self.assertFalse(infer_response.has_error())
            output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT")
            self.assertIsNotNone(output0)
            self.assertEqual(response_value, output0.as_numpy())

            del infer_responses

    def test_preferred_memory(self):
        self.assertTrue(bls_libtorch("libtorch_gpu", "CPU"))
        self.assertTrue(bls_libtorch("libtorch_cpu", "GPU"))


class TritonPythonModel:
    def execute(self, requests):
        responses = []
        for _ in requests:
            # Run the unittest and store the results in InferenceResponse.
            test = unittest.main("model", exit=False)
            for test_case, traceback in test.result.failures:
                print(f"{test_case} failed:\n{traceback}")
            responses.append(
                pb_utils.InferenceResponse(
                    [
                        pb_utils.Tensor(
                            "OUTPUT0",
                            np.array([test.result.wasSuccessful()], dtype=np.float16),
                        )
                    ]
                )
            )
        return responses


================================================
FILE: qa/python_models/bls_async/config.pbtxt
================================================
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "bls_async"
backend: "python"

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_BOOL
    dims: [ 1 ]
  }
]

instance_group [{ kind: KIND_CPU }]


================================================
FILE: qa/python_models/bls_async/model.py
================================================
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import asyncio
import os

import numpy as np
import torch
import triton_python_backend_utils as pb_utils
from torch.utils.dlpack import from_dlpack, to_dlpack


def verify_add_sub_results(input0, input1, infer_response):
    if infer_response.has_error():
        print("Async BLS failed:", infer_response.error().message(), flush=True)
        return False

    output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
    output1 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT1")

    if (output0 is None) or (output1 is None):
        return False

    if not input0.is_cpu():
        input0 = from_dlpack(input0.to_dlpack()).to("cpu").cpu().detach().numpy()
    else:
        input0 = input0.as_numpy()

    if not input1.is_cpu():
        input1 = from_dlpack(input1.to_dlpack()).to("cpu").cpu().detach().numpy()
    else:
        input1 = input1.as_numpy()

    if not output0.is_cpu():
        output0 = from_dlpack(output0.to_dlpack()).to("cpu").cpu().detach().numpy()
    else:
        output0 = output0.as_numpy()

    if not output1.is_cpu():
        output1 = from_dlpack(output1.to_dlpack()).to("cpu").cpu().detach().numpy()
    else:
        output1 = output1.as_numpy()

    expected_output_0 = input0 + input1
    expected_output_1 = input0 - input1

    if not np.all(expected_output_0 == output0):
        print(f"For OUTPUT0 expected {expected_output_0} found {output0}")
        return False

    if not np.all(expected_output_1 == output1):
        print(f"For OUTPUT1 expected {expected_output_1} found {output1}")
        return False

    return True


def verify_square_results(input0, infer_responses):
    if not input0.is_cpu():
        input0 = from_dlpack(input0.to_dlpack()).to("cpu").cpu().detach().numpy()
    else:
        input0 = input0.as_numpy()

    response_count = 0

    for infer_response in infer_responses:
        if infer_response.has_error():
            print(
                "Async BLS decoupled failed:",
                infer_response.error().message(),
                flush=True,
            )
            return False

        if len(infer_response.output_tensors()) > 0:
            output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT")

            if output0 is None:
                return False

            if not output0.is_cpu():
                output0 = (
                    from_dlpack(output0.to_dlpack()).to("cpu").cpu().detach().numpy()
                )
            else:
                output0 = output0.as_numpy()

            expected_output = input0

            if not np.all(expected_output == input0):
                print(f"For OUT expected {expected_output} found {output0}")
                return False

        response_count += 1

    if not np.all(input0 == response_count - 1):
        print("Expected {} responses, got {}".format(input0, response_count - 1))
        return False

    return True


def create_addsub_inference_request(gpu=False):
    if not gpu:
        input0_np = np.random.randn(16)
        input1_np = np.random.randn(16)
        input0_np = input0_np.astype(np.float32)
        input1_np = input1_np.astype(np.float32)
        input0 = pb_utils.Tensor("INPUT0", input0_np)
        input1 = pb_utils.Tensor("INPUT1", input1_np)
    else:
        input0_pytorch = torch.rand(16).to("cuda")
        input1_pytorch = torch.rand(16).to("cuda")
        input0 = pb_utils.Tensor.from_dlpack("INPUT0", to_dlpack(input0_pytorch))
        input1 = pb_utils.Tensor.from_dlpack("INPUT1", to_dlpack(input1_pytorch))

    infer_request = pb_utils.InferenceRequest(
        model_name="dlpack_add_sub",
        inputs=[input0, input1],
        requested_output_names=["OUTPUT0", "OUTPUT1"],
    )
    return input0, input1, infer_request


def create_square_inference_request(gpu=False):
    if not gpu:
        input0_np = np.random.randint(16, size=1, dtype=np.int32)
        input0 = pb_utils.Tensor("IN", input0_np)
    else:
        input0_pytorch = torch.randint(1, 16, (1,), dtype=torch.int32).to("cuda")
        input0 = pb_utils.Tensor.from_dlpack("IN", to_dlpack(input0_pytorch))

    infer_request = pb_utils.InferenceRequest(
        model_name="dlpack_square", inputs=[input0], requested_output_names=["OUT"]
    )
    return input0, infer_request


async def async_bls_add_sub():
    input0, input1, infer_request = create_addsub_inference_request()
    infer_response = await infer_request.async_exec()
    result_correct = verify_add_sub_results(input0, input1, infer_response)
    if not result_correct:
        return False

    infer_response_sync = infer_request.exec()
    result_correct = verify_add_sub_results(input0, input1, infer_response_sync)
    if not result_correct:
        return False

    return True


async def async_bls_square():
    input0, infer_request = create_square_inference_request()
    infer_responses = await infer_request.async_exec(decoupled=True)
    result_correct = verify_square_results(input0, infer_responses)
    if not result_correct:
        return False

    infer_responses_sync = infer_request.exec(decoupled=True)
    result_correct = verify_square_results(input0, infer_responses_sync)
    if not result_correct:
        return False

    return True


async def multiple_async_bls_addsub(gpu):
    infer_request_aws = []
    inputs = []
    for _ in range(10):
        input0, input1, infer_request = create_addsub_inference_request(gpu)
        inputs.append((input0, input1))
        infer_request_aws.append(infer_request.async_exec())

    infer_responses = await asyncio.gather(*infer_request_aws)
    for infer_response, input_pair in zip(infer_responses, inputs):
        result_correct = verify_add_sub_results(
            input_pair[0], input_pair[1], infer_response
        )
        if not result_correct:
            return False

    return True


async def multiple_async_bls_square(gpu):
    infer_request_aws = []
    inputs = []
    for _ in range(10):
        input0, infer_request = create_square_inference_request(gpu)
        inputs.append(input0)
        infer_request_aws.append(infer_request.async_exec(decoupled=True))

    async_responses = await asyncio.gather(*infer_request_aws)
    for infer_responses, input_pair in zip(async_responses, inputs):
        result_correct = verify_square_results(input_pair, infer_responses)
        if not result_correct:
            return False

    return True


class TritonPythonModel:
    async def execute(self, requests):
        is_decoupled = True if os.environ["BLS_KIND"] == "decoupled" else False

        responses = []
        for _ in requests:
            if is_decoupled:
                test1 = await multiple_async_bls_square(gpu=True)
                test2 = await multiple_async_bls_square(gpu=False)
                test3 = await async_bls_square()
            else:
                test1 = await multiple_async_bls_addsub(gpu=True)
                test2 = await multiple_async_bls_addsub(gpu=False)
                test3 = await async_bls_add_sub()

            responses.append(
                pb_utils.InferenceResponse(
                    output_tensors=[
                        pb_utils.Tensor("OUTPUT0", np.array([test1 & test2 & test3]))
                    ]
                )
            )

        return responses


================================================
FILE: qa/python_models/bls_finalize_error/config.pbtxt
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "bls_finalize_error"
backend: "python"

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]

instance_group [{ kind: KIND_CPU }]


================================================
FILE: qa/python_models/bls_finalize_error/model.py
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def initialize(self, args):
        pass

    def execute(self, requests):
        pass

    def finalize(self):
        print("Cleaning up...")
        input0_np = np.random.randint(3, size=1, dtype=np.int32)
        input0 = pb_utils.Tensor("IN", input0_np)
        infer_request = pb_utils.InferenceRequest(
            model_name="square_int32", inputs=[input0], requested_output_names=["OUT"]
        )
        infer_responses = infer_request.exec(decoupled=True)


================================================
FILE: qa/python_models/bls_init_error/config.pbtxt
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "bls_init_error"
backend: "python"

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]

instance_group [{ kind: KIND_CPU }]


================================================
FILE: qa/python_models/bls_init_error/model.py
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def initialize(self, args):
        input0_np = np.random.randint(3, size=1, dtype=np.int32)
        input0 = pb_utils.Tensor("IN", input0_np)
        infer_request = pb_utils.InferenceRequest(
            model_name="square_int32", inputs=[input0], requested_output_names=["OUT"]
        )
        infer_responses = infer_request.exec(decoupled=True)

    def execute(self, requests):
        pass

    def finalize(self):
        print("Cleaning up...")


================================================
FILE: qa/python_models/bls_memory/config.pbtxt
================================================
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "bls_memory"
backend: "python"

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]

instance_group [{ kind: KIND_CPU }]


================================================
FILE: qa/python_models/bls_memory/model.py
================================================
# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import unittest

import numpy as np
import triton_python_backend_utils as pb_utils


class PBBLSMemoryTest(unittest.TestCase):
    def setUp(self):
        self._is_decoupled = True if os.environ["BLS_KIND"] == "decoupled" else False

    def _send_identity_tensor(self, size, is_decoupled):
        tensor_size = [1, size]
        input0_np = np.random.randn(*tensor_size)
        input0 = pb_utils.Tensor("INPUT0", input0_np.astype(np.float32))
        infer_request = pb_utils.InferenceRequest(
            model_name="identity_fp32",
            inputs=[input0],
            requested_output_names=["OUTPUT0"],
        )

        if is_decoupled:
            infer_responses = infer_request.exec(decoupled=True)
            infer_response = next(infer_responses)
            with self.assertRaises(StopIteration):
                next(infer_responses)
        else:
            infer_response = infer_request.exec()

        return input0_np, infer_response

    def test_bls_out_of_memory(self):
        tensor_size = 256 * 1024 * 1024
        input0_np, infer_response = self._send_identity_tensor(
            tensor_size, self._is_decoupled
        )
        out_of_memory_message = "Failed to increase the shared memory pool size"

        if infer_response.has_error():
            self.assertIn(out_of_memory_message, infer_response.error().message())
        else:
            self.assertFalse(infer_response.has_error())
            output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
            self.assertIsNotNone(output0)
            self.assertTrue(np.allclose(output0.as_numpy(), input0_np))

        tensor_size = 50 * 1024 * 1024
        for _ in range(4):
            input0_np, infer_response = self._send_identity_tensor(
                tensor_size, self._is_decoupled
            )
            if infer_response.has_error():
                self.assertIn(out_of_memory_message, infer_response.error().message())
            else:
                self.assertFalse(infer_response.has_error())
                output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
                self.assertIsNotNone(output0)
                self.assertTrue(np.allclose(output0.as_numpy(), input0_np))


class TritonPythonModel:
    def execute(self, requests):
        responses = []
        for _ in requests:
            # Run the unittest and store the results in InferenceResponse.
            test = unittest.main("model", exit=False)
            responses.append(
                pb_utils.InferenceResponse(
                    [
                        pb_utils.Tensor(
                            "OUTPUT0",
                            np.array([test.result.wasSuccessful()], dtype=np.float16),
                        )
                    ]
                )
            )
        return responses


================================================
FILE: qa/python_models/bls_memory_async/config.pbtxt
================================================
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "bls_memory_async"
backend: "python"

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]

instance_group [{ kind: KIND_CPU }]


================================================
FILE: qa/python_models/bls_memory_async/model.py
================================================
# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os

import numpy as np
import triton_python_backend_utils as pb_utils


async def _send_identity_tensor(size, is_decoupled):
    tensor_size = [1, size]
    input0_np = np.random.randn(*tensor_size)
    input0 = pb_utils.Tensor("INPUT0", input0_np.astype(np.float32))
    infer_request = pb_utils.InferenceRequest(
        model_name="identity_fp32", inputs=[input0], requested_output_names=["OUTPUT0"]
    )

    if is_decoupled:
        infer_responses = await infer_request.async_exec(decoupled=True)
        infer_response = next(infer_responses)
    else:
        infer_response = await infer_request.async_exec()

    return input0_np, infer_response


async def test_bls_out_of_memory():
    is_decoupled = True if os.environ["BLS_KIND"] == "decoupled" else False

    tensor_size = 256 * 1024 * 1024
    input0_np, infer_response = await _send_identity_tensor(tensor_size, is_decoupled)

    out_of_memory_message = "Failed to increase the shared memory pool size"

    if infer_response.has_error():
        if not (out_of_memory_message in infer_response.error().message()):
            return False
    else:
        output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
        if output0 is None:
            return False
        if not np.allclose(output0.as_numpy(), input0_np):
            return False

    tensor_size = 50 * 1024 * 1024
    for _ in range(4):
        input0_np, infer_response = await _send_identity_tensor(
            tensor_size, is_decoupled
        )

        if infer_response.has_error():
            if not (out_of_memory_message in infer_response.error().message()):
                return False
        else:
            output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
            if output0 is None:
                return False
            if not np.allclose(output0.as_numpy(), input0_np):
                return False

    return True


class TritonPythonModel:
    async def execute(self, requests):
        responses = []
        for _ in requests:
            # Run the unittest and store the results in InferenceResponse.
            result = await test_bls_out_of_memory()
            responses.append(
                pb_utils.InferenceResponse(
                    [pb_utils.Tensor("OUTPUT0", np.array([result], dtype=np.float16))]
                )
            )
        return responses


================================================
FILE: qa/python_models/bls_model_loading/config.pbtxt
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "bls_model_loading"
backend: "python"

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_BOOL
    dims: [ 1 ]
  }
]

instance_group [
  {
    count: 1
    kind: KIND_CPU
  }
]


================================================
FILE: qa/python_models/bls_model_loading/model.py
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import time
import unittest

import numpy as np
import triton_python_backend_utils as pb_utils


class PBBLSModelLoadingTest(unittest.TestCase):
    def setUp(self):
        self.model_name = "onnx_int32_int32_int32"

    def tearDown(self):
        # The unload call does not wait for the requested model to be fully
        # unloaded before returning.
        pb_utils.unload_model(self.model_name)
        # TODO: Make this more robust to wait until fully unloaded
        print("Sleep 30 seconds to make sure model finishes unloading...")
        time.sleep(30)
        print("Done sleeping.")

    def test_load_unload_model(self):
        self.assertFalse(pb_utils.is_model_ready(model_name=self.model_name))
        pb_utils.load_model(model_name=self.model_name)
        self.assertTrue(pb_utils.is_model_ready(self.model_name))
        pb_utils.unload_model(self.model_name)
        self.assertFalse(pb_utils.is_model_ready(self.model_name))

    def test_load_with_config_override(self):
        self.assertFalse(pb_utils.is_model_ready(self.model_name))
        pb_utils.load_model(self.model_name)
        self.assertTrue(pb_utils.is_model_ready(self.model_name))

        # Send the config with the wrong format
        wrong_config = '"parameters": {"config": {{"backend":"onnxruntime", "version_policy":{"specific":{"versions":[2]}}}}}'
        with self.assertRaises(pb_utils.TritonModelException):
            pb_utils.load_model(model_name=self.model_name, config=wrong_config)
        # The model should not be changed after a failed load model request
        for version in ["2", "3"]:
            self.assertTrue(
                pb_utils.is_model_ready(
                    model_name=self.model_name, model_version=version
                )
            )

        # Send the config with the correct format
        config = (
            '{"backend":"onnxruntime", "version_policy":{"specific":{"versions":[2]}}}'
        )
        pb_utils.load_model(self.model_name, config=config)
        # The model should be changed after a successful load model request
        self.assertTrue(pb_utils.is_model_ready(self.model_name, "2"))
        self.assertFalse(pb_utils.is_model_ready(self.model_name, "3"))

    def test_load_with_file_override(self):
        self.assertFalse(pb_utils.is_model_ready(self.model_name))
        pb_utils.load_model(self.model_name)
        self.assertTrue(pb_utils.is_model_ready(self.model_name))

        override_name = "override_model"
        config = '{"backend":"onnxruntime"}'
        with open("models/onnx_int32_int32_int32/3/model.onnx", "rb") as file:
            data = file.read()
        files = {"file:1/model.onnx": data}

        # Request to load the model with override file, should fail without
        # providing override config.
        with self.assertRaises(pb_utils.TritonModelException):
            pb_utils.load_model(self.model_name, "", files)

        # Request to load the model with override file and config in a different name
        pb_utils.load_model(model_name=override_name, config=config, files=files)
        # Sanity check that the model with original name is unchanged
        self.assertFalse(pb_utils.is_model_ready(self.model_name, "1"))
        self.assertTrue(pb_utils.is_model_ready(self.model_name, "3"))

        # Check the override model readiness
        self.assertTrue(pb_utils.is_model_ready(override_name, "1"))
        self.assertFalse(pb_utils.is_model_ready(override_name, "3"))

        # Request to load the model with override file and config in original name
        pb_utils.load_model(self.model_name, config, files)
        # Check that the model with original name is changed
        self.assertTrue(pb_utils.is_model_ready(self.model_name, "1"))
        self.assertFalse(pb_utils.is_model_ready(self.model_name, "3"))

        # Sanity check readiness of the different named model
        self.assertTrue(pb_utils.is_model_ready(override_name, "1"))
        self.assertFalse(pb_utils.is_model_ready(override_name, "3"))


class TritonPythonModel:
    def initialize(self, args):
        # Run the unittest during initialization
        test = unittest.main("model", exit=False)
        self.result = test.result.wasSuccessful()

    def execute(self, requests):
        responses = []
        for _ in requests:
            responses.append(
                pb_utils.InferenceResponse(
                    [
                        pb_utils.Tensor(
                            "OUTPUT0", np.array([self.result], dtype=np.float16)
                        )
                    ]
                )
            )
        return responses


================================================
FILE: qa/python_models/bls_onnx_warmup/config.pbtxt
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "bls_onnx_warmup"
backend: "python"

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]

instance_group [{ kind: KIND_CPU }]

================================================
FILE: qa/python_models/bls_onnx_warmup/model.py
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import unittest

import numpy as np
import triton_python_backend_utils as pb_utils
from torch.utils.dlpack import from_dlpack


class PBBLSONNXWarmupTest(unittest.TestCase):
    def test_onnx_output_mem_type(self):
        input0_np = np.random.randn(*[16])
        input0_np = input0_np.astype(np.float32)
        input1_np = np.random.randn(*[16])
        input1_np = input1_np.astype(np.float32)
        input0 = pb_utils.Tensor("INPUT0", input0_np)
        input1 = pb_utils.Tensor("INPUT1", input1_np)
        infer_request = pb_utils.InferenceRequest(
            model_name="onnx_nobatch_float32_float32_float32",
            inputs=[input0, input1],
            requested_output_names=["OUTPUT0", "OUTPUT1"],
        )

        infer_response = infer_request.exec()

        self.assertFalse(infer_response.has_error())

        output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
        output1 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT1")

        self.assertIsNotNone(output0)
        self.assertIsNotNone(output1)

        # The memory type of output tensor should be GPU
        self.assertFalse(output0.is_cpu())
        self.assertFalse(output1.is_cpu())

        expected_output_0 = input0.as_numpy() - input1.as_numpy()
        expected_output_1 = input0.as_numpy() + input1.as_numpy()

        output0 = from_dlpack(output0.to_dlpack()).to("cpu").cpu().detach().numpy()
        output1 = from_dlpack(output1.to_dlpack()).to("cpu").cpu().detach().numpy()

        self.assertTrue(np.all(output0 == expected_output_0))
        self.assertTrue(np.all(output1 == expected_output_1))


class TritonPythonModel:
    def execute(self, requests):
        responses = []
        for _ in requests:
            # Run the unittest and store the results in InferenceResponse.
            test = unittest.main("model", exit=False)
            responses.append(
                pb_utils.InferenceResponse(
                    [
                        pb_utils.Tensor(
                            "OUTPUT0",
                            np.array([test.result.wasSuccessful()], dtype=np.float16),
                        )
                    ]
                )
            )
        return responses


================================================
FILE: qa/python_models/bls_parameters/config.pbtxt
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "bls_parameters"
backend: "python"
max_batch_size: 0

input [
  {
    name: "NUMBER_PARAMETERS"
    data_type: TYPE_UINT8
    dims: [ 1 ]
  }
]

output [
  {
    name: "PARAMETERS_AGGREGATED"
    data_type: TYPE_STRING
    dims: [ 1 ]
  }
]

instance_group [
  {
    count: 4
    kind: KIND_CPU
  }
]


================================================
FILE: qa/python_models/bls_parameters/model.py
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def execute(self, requests):
        responses = []

        for request in requests:
            num_params = int(
                pb_utils.get_input_tensor_by_name(
                    request, "NUMBER_PARAMETERS"
                ).as_numpy()[0]
            )
            params = json.loads(request.parameters())

            if num_params == 0:
                # Base case where the received parameters are returned as JSON
                response = json.dumps(params)
                response_tensors = [
                    pb_utils.Tensor(
                        "PARAMETERS_AGGREGATED", np.array([response], dtype=np.object_)
                    )
                ]
            else:
                # Add the parameters of num_params step to the received parameters
                params["bool_" + str(num_params)] = bool(num_params)
                params["int_" + str(num_params)] = num_params
                params["str_" + str(num_params)] = str(num_params)
                # Complete any remaining steps [1, num_params - 1] by calling self
                # recursively via BLS
                bls_request_tensor = pb_utils.Tensor(
                    "NUMBER_PARAMETERS", np.array([num_params - 1], dtype=np.ubyte)
                )
                bls_request = pb_utils.InferenceRequest(
                    model_name="bls_parameters",
                    inputs=[bls_request_tensor],
                    requested_output_names=["PARAMETERS_AGGREGATED"],
                    parameters=params,
                )
                bls_response = bls_request.exec()
                response_tensors = bls_response.output_tensors()

            inference_response = pb_utils.InferenceResponse(
                output_tensors=response_tensors
            )
            responses.append(inference_response)

        return responses


================================================
FILE: qa/python_models/bls_request_rescheduling/config.pbtxt
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "bls_request_rescheduling"
backend: "python"

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]

instance_group [{ kind: KIND_CPU }]


================================================
FILE: qa/python_models/bls_request_rescheduling/model.py
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import time
import unittest

import numpy as np
import triton_python_backend_utils as pb_utils


class RequestReschedulingTest(unittest.TestCase):
    def _reload_model(self, model_name):
        # Reload the model to reset the flag for multiple iterations
        pb_utils.unload_model(model_name)
        # TODO: Make this more robust to wait until fully unloaded
        print("Sleep 10 seconds to make sure model finishes unloading...", flush=True)
        time.sleep(10)
        print("Done sleeping.", flush=True)
        pb_utils.load_model(model_name)

    def test_wrong_return_type(self):
        input0 = pb_utils.Tensor("INPUT0", (np.random.randn(*[4])).astype(np.float32))
        infer_request = pb_utils.InferenceRequest(
            model_name="wrong_return_type",
            inputs=[input0],
            requested_output_names=["OUTPUT0"],
        )

        infer_response = infer_request.exec()
        self.assertTrue(infer_response.has_error())
        self.assertIn(
            "Expected a None object in the execute function return list for reschduled request",
            infer_response.error().message(),
        )

    def test_non_decoupled_e2e(self):
        model_name = "request_rescheduling_addsub"
        self._reload_model(model_name)

        input0_np = np.random.randn(*[16])
        input0_np = input0_np.astype(np.float32)
        input1_np = np.random.randn(*[16])
        input1_np = input1_np.astype(np.float32)
        input0 = pb_utils.Tensor("INPUT0", input0_np)
        input1 = pb_utils.Tensor("INPUT1", input1_np)
        infer_request = pb_utils.InferenceRequest(
            model_name=model_name,
            inputs=[input0, input1],
            requested_output_names=["OUTPUT0", "OUTPUT1"],
        )
        infer_response = infer_request.exec()

        self.assertFalse(infer_response.has_error())

        output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
        output1 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT1")

        self.assertIsNotNone(output0)
        self.assertIsNotNone(output1)

        expected_output_0 = input0.as_numpy() + input1.as_numpy()
        expected_output_1 = input0.as_numpy() - input1.as_numpy()

        self.assertEqual(expected_output_0[0], output0.as_numpy()[0])
        self.assertEqual(expected_output_1[0], output1.as_numpy()[0])

    def test_decoupled_e2e(self):
        model_name = "iterative_sequence"
        self._reload_model(model_name)

        input_value = 3
        input0 = pb_utils.Tensor("IN", np.array([input_value], dtype=np.int32))
        infer_request = pb_utils.InferenceRequest(
            model_name=model_name,
            inputs=[input0],
            requested_output_names=["OUT"],
        )
        infer_responses = infer_request.exec(decoupled=True)

        expected_output = input_value - 1

        if infer_responses:
            for infer_response in infer_responses:
                self.assertFalse(infer_response.has_error())

                if len(infer_response.output_tensors()) > 0:
                    output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT")
                    self.assertIsNotNone(output0)

                    self.assertEqual(expected_output, output0.as_numpy()[0])
                    expected_output -= 1


class TritonPythonModel:
    def execute(self, requests):
        responses = []
        for _ in requests:
            # Run the unittest and store the results in InferenceResponse.
            test = unittest.main("model", exit=False)
            responses.append(
                pb_utils.InferenceResponse(
                    [
                        pb_utils.Tensor(
                            "OUTPUT0",
                            np.array([test.result.wasSuccessful()], dtype=np.float16),
                        )
                    ]
                )
            )
        return responses


================================================
FILE: qa/python_models/bls_simple/bls_simple.py
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        inputs = [
            {"name": "MODEL_NAME", "data_type": "TYPE_STRING", "dims": [1]},
            {"name": "INPUT0", "data_type": "TYPE_INT32", "dims": [1, 16]},
            {"name": "INPUT1", "data_type": "TYPE_INT32", "dims": [1, 16]},
        ]
        outputs = [
            {"name": "OUTPUT0", "data_type": "TYPE_INT32", "dims": [16]},
            {"name": "OUTPUT1", "data_type": "TYPE_INT32", "dims": [16]},
        ]

        config = auto_complete_model_config.as_dict()
        input_names = []
        output_names = []
        for input in config["input"]:
            input_names.append(input["name"])
        for output in config["output"]:
            output_names.append(output["name"])

        for input in inputs:
            if input["name"] not in input_names:
                auto_complete_model_config.add_input(input)
        for output in outputs:
            if output["name"] not in output_names:
                auto_complete_model_config.add_output(output)

        auto_complete_model_config.set_max_batch_size(0)

        return auto_complete_model_config

    def execute(self, requests):
        responses = []
        for request in requests:
            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
            model_name = pb_utils.get_input_tensor_by_name(request, "MODEL_NAME")
            model_name_string = model_name.as_numpy()[0]

            infer_request = pb_utils.InferenceRequest(
                model_name=model_name_string,
                requested_output_names=["OUTPUT0", "OUTPUT1"],
                inputs=[in_0, in_1],
                trace=request.trace(),
            )

            infer_response = infer_request.exec()

            inference_response = pb_utils.InferenceResponse(
                output_tensors=infer_response.output_tensors()
            )
            responses.append(inference_response)

        return responses


================================================
FILE: qa/python_models/bls_undefined/config.pbtxt
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "bls_undefined"
backend: "python"

input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]

instance_group [{
        kind: KIND_CPU,
        count: 2
}]


================================================
FILE: qa/python_models/bls_undefined/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


class TritonPythonModel:
    def execute(self, requests):
        undefined_variable

    def finalize(self):
        print("Cleaning up...")


================================================
FILE: qa/python_models/busy_op/config.pbtxt
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "busy_op"
backend: "python"
max_batch_size: 1

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]


================================================
FILE: qa/python_models/busy_op/model.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import time

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    """
    This model calls sleep for the first request in order to force requests to
    sit in the queue, and result in memory growth.
    """

    def initialize(self, args):
        self.sleep = True

    def execute(self, requests):
        if self.sleep:
            time.sleep(50)
            self.sleep = False
        responses = []
        for request in requests:
            input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            out_tensor = pb_utils.Tensor("OUTPUT0", input_tensor.as_numpy())
            responses.append(pb_utils.InferenceResponse([out_tensor]))
        return responses


================================================
FILE: qa/python_models/cuda_memory_consumer/1/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import triton_python_backend_utils as pb_utils
from cuda import cuda


class TritonPythonModel:
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        input = {"name": "INPUT", "data_type": "TYPE_FP32", "dims": [1]}
        output = {"name": "OUTPUT", "data_type": "TYPE_FP32", "dims": [1]}

        auto_complete_model_config.set_max_batch_size(0)
        auto_complete_model_config.add_input(input)
        auto_complete_model_config.add_output(output)

        return auto_complete_model_config

    def initialize(self, args):
        self.mem_ptr = None
        # Initialize CUDA context
        cuda.cuInit(0)
        cuda.cuCtxCreate(0, 0)

        mem_info = cuda.cuMemGetInfo()
        if mem_info[0] != 0:
            raise pb_utils.TritonModelException("Failed to get CUDA memory info")

        mem_alloc = cuda.cuMemAlloc(mem_info[2] * 0.4)
        if mem_alloc[0] != 0:
            raise pb_utils.TritonModelException("Failed to allocate CUDA memory")
        self.mem_ptr = mem_alloc[1]

    def finalize(self):
        if self.mem_ptr is not None:
            cuda.cuMemFree(self.mem_ptr)

    def execute(self, requests):
        """This function is called on inference request."""
        responses = []
        for request in requests:
            input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            out_tensor = pb_utils.Tensor("OUTPUT0", input_tensor.as_numpy())
            responses.append(pb_utils.InferenceResponse([out_tensor]))
        return responses


================================================
FILE: qa/python_models/cuda_memory_consumer/config.pbtxt
================================================
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

backend: "python"
instance_group [{ kind: KIND_GPU, gpus: [0] }]


================================================
FILE: qa/python_models/custom_metrics/config.pbtxt
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "custom_metrics"
backend: "python"

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]

instance_group [
  {
    count: 3
    kind: KIND_CPU
  }
]


================================================
FILE: qa/python_models/custom_metrics/model.py
================================================
# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import unittest

import numpy as np
import requests
import triton_python_backend_utils as pb_utils


class PBCustomMetricsTest(unittest.TestCase):
    def _get_metrics(self):
        metrics_url = "http://localhost:8002/metrics"
        r = requests.get(metrics_url)
        r.raise_for_status()
        return r.text

    def _metric_api_helper(self, metric, kind):
        # Adding logger to test if custom metrics and logging work together
        # as they use the same message queue.
        logger = pb_utils.Logger

        # The value should be 0.0 before the test
        self.assertEqual(metric.value(), 0.0)

        # Test increment positive value
        increment = 2023.0
        metric.increment(increment)
        self.assertEqual(metric.value(), increment)
        logger.log_info("Incremented metric to : {}".format(metric.value()))

        # Test increment negative value
        decrement = -23.5
        if kind == "counter":
            # Counter should not accept negative values
            with self.assertRaises(pb_utils.TritonModelException):
                metric.increment(decrement)
        else:
            metric.increment(decrement)
            self.assertEqual(metric.value(), increment + decrement)
            logger.log_info("Decremented metric to : {}".format(metric.value()))

        # Test set value
        value = 999.9
        if kind == "counter":
            # Counter does not support set
            with self.assertRaises(pb_utils.TritonModelException):
                metric.set(value)
        else:
            metric.set(value)
            self.assertEqual(metric.value(), value)
            logger.log_info("Set metric to : {}".format(metric.value()))

        # Test observe value
        observe = 0.05
        # Counter and gauge do not support observe
        with self.assertRaises(pb_utils.TritonModelException):
            metric.observe(observe)

    def _histogram_api_helper(self, metric, name, labels):
        def histogram_str_builder(name, type, labels, value, le=None):
            if type == "count" or type == "sum":
                return f"{name}_{type}{{{labels}}} {value}"
            elif type == "bucket":
                return f'{name}_bucket{{{labels},le="{le}"}} {value}'
            else:
                raise

        # Adding logger to test if custom metrics and logging work together
        # as they use the same message queue.
        logger = pb_utils.Logger

        # All values should be 0.0 before the test
        metrics = self._get_metrics()
        self.assertIn(histogram_str_builder(name, "count", labels, "0"), metrics)
        self.assertIn(histogram_str_builder(name, "sum", labels, "0"), metrics)
        self.assertIn(
            histogram_str_builder(name, "bucket", labels, "0", le="0.1"), metrics
        )
        self.assertIn(
            histogram_str_builder(name, "bucket", labels, "0", le="1"), metrics
        )
        self.assertIn(
            histogram_str_builder(name, "bucket", labels, "0", le="2.5"), metrics
        )
        self.assertIn(
            histogram_str_builder(name, "bucket", labels, "0", le="5"), metrics
        )
        self.assertIn(
            histogram_str_builder(name, "bucket", labels, "0", le="10"), metrics
        )
        self.assertIn(
            histogram_str_builder(name, "bucket", labels, "0", le="+Inf"), metrics
        )

        # Histogram does not support value
        with self.assertRaises(pb_utils.TritonModelException):
            metric.value()

        # Test increment value
        increment = 2023.0
        # Histogram does not support increment
        with self.assertRaises(pb_utils.TritonModelException):
            metric.increment(increment)

        # Test set value
        value = 999.9
        # Histogram does not support set
        with self.assertRaises(pb_utils.TritonModelException):
            metric.set(value)

        # Test observe value
        data = [0.05, 1.5, 6.0]
        for datum in data:
            metric.observe(datum)
            logger.log_info("Observe histogram metric with value : {}".format(datum))

        metrics = self._get_metrics()
        self.assertIn(
            histogram_str_builder(name, "count", labels, str(len(data))), metrics
        )
        self.assertIn(
            histogram_str_builder(name, "sum", labels, str(sum(data))), metrics
        )
        self.assertIn(
            histogram_str_builder(name, "bucket", labels, "1", le="0.1"), metrics
        )
        self.assertIn(
            histogram_str_builder(name, "bucket", labels, "1", le="1"), metrics
        )
        self.assertIn(
            histogram_str_builder(name, "bucket", labels, "2", le="2.5"), metrics
        )
        self.assertIn(
            histogram_str_builder(name, "bucket", labels, "2", le="5"), metrics
        )
        self.assertIn(
            histogram_str_builder(name, "bucket", labels, "3", le="10"), metrics
        )
        self.assertIn(
            histogram_str_builder(name, "bucket", labels, "3", le="+Inf"), metrics
        )

    def _dup_metric_helper(self, labels={}):
        # Adding logger to test if custom metrics and logging work together
        # as they use the same message queue.
        logger = pb_utils.Logger

        description = "dup metric"
        metric_family = pb_utils.MetricFamily(
            name="test_dup_metric",
            description=description,
            kind=pb_utils.MetricFamily.COUNTER,
        )

        # Verify dupe metrics reference same underlying metric
        metric1 = metric_family.Metric(labels=labels)
        metric2 = metric_family.Metric(labels=labels)

        # The value should be 0 before the test
        self.assertEqual(metric1.value(), 0.0)
        self.assertEqual(metric2.value(), 0.0)

        # Increment metric 1, check metric 2 == metric 1
        increment = 7.5
        metric1.increment(increment)
        self.assertEqual(metric1.value(), metric2.value())
        logger.log_info("Incremented metric1 to : {}".format(metric1.value()))
        logger.log_info("Incremented metric2 to : {}".format(metric2.value()))

        # Assert custom metric/family remains when there's still a reference to it
        del metric1
        metrics = self._get_metrics()
        self.assertIn(description, metrics)

    def test_counter_e2e(self):
        metric_family = pb_utils.MetricFamily(
            name="test_counter_e2e",
            description="test metric counter kind end to end",
            kind=pb_utils.MetricFamily.COUNTER,
        )
        labels = {"example1": "counter_label1", "example2": "counter_label2"}
        metric = metric_family.Metric(labels=labels)
        self._metric_api_helper(metric, "counter")

        pattern = (
            'test_counter_e2e{example1="counter_label1",example2="counter_label2"}'
        )
        metrics = self._get_metrics()
        self.assertIn(pattern, metrics)

    def test_gauge_e2e(self):
        metric_family = pb_utils.MetricFamily(
            name="test_gauge_e2e",
            description="test metric gauge kind end to end",
            kind=pb_utils.MetricFamily.GAUGE,
        )
        labels = {"example1": "gauge_label1", "example2": "gauge_label2"}
        metric = metric_family.Metric(labels=labels)
        self._metric_api_helper(metric, "gauge")

        pattern = 'test_gauge_e2e{example1="gauge_label1",example2="gauge_label2"}'
        metrics = self._get_metrics()
        self.assertIn(pattern, metrics)

    def test_histogram_e2e(self):
        name = "test_histogram_e2e"
        metric_family = pb_utils.MetricFamily(
            name=name,
            description="test metric histogram kind end to end",
            kind=pb_utils.MetricFamily.HISTOGRAM,
        )

        labels = {"example1": "histogram_label1", "example2": "histogram_label2"}
        buckets = [0.1, 1.0, 2.5, 5.0, 10.0]
        metric = metric_family.Metric(labels=labels, buckets=buckets)

        labels_str = 'example1="histogram_label1",example2="histogram_label2"'
        self._histogram_api_helper(metric, name, labels_str)

        metrics = self._get_metrics()
        count_pattern = f"{name}_count{{{labels_str}}}"
        sum_pattern = f"{name}_sum{{{labels_str}}}"
        bucket_pattern = f"{name}_bucket{{{labels_str}"
        self.assertEqual(metrics.count(count_pattern), 1)
        self.assertEqual(metrics.count(sum_pattern), 1)
        self.assertEqual(metrics.count(bucket_pattern), len(buckets) + 1)

    def test_histogram_args(self):
        name = "test_histogram_args"
        metric_family = pb_utils.MetricFamily(
            name=name,
            description="test metric histogram args",
            kind=pb_utils.MetricFamily.HISTOGRAM,
        )

        # Test "None" value buckets
        with self.assertRaises(pb_utils.TritonModelException):
            metric_family.Metric(labels={})
        with self.assertRaises(pb_utils.TritonModelException):
            metric_family.Metric(labels={}, buckets=None)

        # Test non-ascending order buckets
        with self.assertRaises(pb_utils.TritonModelException):
            metric_family.Metric(labels={}, buckets=[2.5, 0.1, 1.0, 10.0, 5.0])

        # Test duplicate value buckets
        with self.assertRaises(pb_utils.TritonModelException):
            metric_family.Metric(labels={}, buckets=[1, 1, 2, 5, 5])

        # Test empty list bucket
        metric_family.Metric(labels={}, buckets=[])

    def test_dup_metric_family_diff_kind(self):
        # Test that a duplicate metric family can't be added with a conflicting type/kind
        metric_family1 = pb_utils.MetricFamily(
            name="test_dup_metric_family_diff_kind",
            description="test metric family with same name but different kind",
            kind=pb_utils.MetricFamily.COUNTER,
        )
        with self.assertRaises(pb_utils.TritonModelException):
            metric_family2 = pb_utils.MetricFamily(
                name="test_dup_metric_family_diff_kind",
                description="test metric family with same name but different kind",
                kind=pb_utils.MetricFamily.GAUGE,
            )
            self.assertIsNone(metric_family2)

        self.assertIsNotNone(metric_family1)

    def test_dup_metric_family_diff_description(self):
        # Test that a duplicate metric family name will still return the
        # original metric family even if the description is changed
        metric_family1 = pb_utils.MetricFamily(
            name="test_dup_metric_family_diff_description",
            description="first description",
            kind=pb_utils.MetricFamily.COUNTER,
        )
        metric_family2 = pb_utils.MetricFamily(
            name="test_dup_metric_family_diff_description",
            description="second description",
            kind=pb_utils.MetricFamily.COUNTER,
        )

        metric2 = metric_family2.Metric()
        self.assertEqual(metric2.value(), 0)

        # Delete metric_family1 and check if metric_family2 still references it
        del metric_family1
        pattern = "test_dup_metric_family_diff_description first description"
        metrics = self._get_metrics()
        self.assertIn(pattern, metrics)

        # The first description will be kept if adding a duplicate metric
        # family name with a different description
        pattern = "test_dup_metric_family_diff_description second description"
        self.assertNotIn(pattern, metrics)

    def test_dup_metric_family(self):
        # Test that adding a duplicate metric family will reuse the original
        # and not add another entry to registry
        metric_family1 = pb_utils.MetricFamily(
            name="test_dup_metric_family",
            description="dup description",
            kind=pb_utils.MetricFamily.COUNTER,
        )
        metric_family2 = pb_utils.MetricFamily(
            name="test_dup_metric_family",
            description="dup description",
            kind=pb_utils.MetricFamily.COUNTER,
        )

        metric_key = "custom_metric_key"
        metric1 = metric_family1.Metric(labels={metric_key: "label1"})
        metric2 = metric_family2.Metric(labels={metric_key: "label2"})

        self.assertEqual(metric1.value(), 0)
        self.assertEqual(metric2.value(), 0)

        patterns = [
            "# HELP test_dup_metric_family dup description",
            "# TYPE test_dup_metric_family counter",
            'test_dup_metric_family{custom_metric_key="label2"} 0',
            'test_dup_metric_family{custom_metric_key="label1"} 0',
        ]
        metrics = self._get_metrics()
        for pattern in patterns:
            self.assertIn(pattern, metrics)

    def test_dup_metric_labels(self):
        # Test that adding a duplicate metric will refer to the same
        # underlying metric, and all instances will be updated
        labels = {"example1": "label1", "example2": "label2"}
        self._dup_metric_helper(labels)

    def test_dup_metric_empty_labels(self):
        # Test that adding a duplicate metric will refer to the same
        # underlying metric, and all instances will be updated
        self._dup_metric_helper()

    def test_metric_lifetime_error(self):
        # Test the error handling when the corresponding 'MetricFamily' is
        # deleted before the 'Metric' is deleted, and the 'Metric' is still
        # being used for metric operations
        kinds = [pb_utils.MetricFamily.COUNTER, pb_utils.MetricFamily.GAUGE]
        metric_family_names = [
            "test_metric_lifetime_error_counter",
            "test_metric_lifetime_error_gauge",
        ]
        for kind, name in zip(kinds, metric_family_names):
            metric_family = pb_utils.MetricFamily(
                name=name, description="test metric lifetime error", kind=kind
            )
            labels = {"example1": "counter_label1", "example2": "counter_label2"}
            metric = metric_family.Metric(labels=labels)

            # Intentionally delete the 'MetricFamily' before the 'Metric' being deleted
            del metric_family

            error_msg = "Invalid metric operation as the corresponding 'MetricFamily' has been deleted."

            # Counter does not support set
            if kind is not pb_utils.MetricFamily.COUNTER:
                with self.assertRaises(pb_utils.TritonModelException) as ex:
                    metric.set(10)
                self.assertIn(error_msg, str(ex.exception))

            with self.assertRaises(pb_utils.TritonModelException) as ex:
                metric.increment(10)
            self.assertIn(error_msg, str(ex.exception))

            with self.assertRaises(pb_utils.TritonModelException) as ex:
                metric.value()
            self.assertIn(error_msg, str(ex.exception))


class TritonPythonModel:
    def execute(self, requests):
        responses = []
        for _ in requests:
            # Run the unittest and store the results in InferenceResponse.
            test = unittest.main("model", exit=False)
            responses.append(
                pb_utils.InferenceResponse(
                    [
                        pb_utils.Tensor(
                            "OUTPUT0",
                            np.array([test.result.wasSuccessful()], dtype=np.float16),
                        )
                    ]
                )
            )
        return responses


================================================
FILE: qa/python_models/delayed_model/config.pbtxt
================================================
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "delayed_model"
backend: "python"
max_batch_size: 64

input [
  {
    name: "IN"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

output [
  {
    name: "OUT"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]


================================================
FILE: qa/python_models/delayed_model/model.py
================================================
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import time

import triton_python_backend_utils as pb_utils

# Sleep for 5 seconds to ensure that delayed startup works properly.
time.sleep(5)


class TritonPythonModel:
    def execute(self, requests):
        responses = []
        for request in requests:
            input_tensor = pb_utils.get_input_tensor_by_name(request, "IN")
            out_tensor = pb_utils.Tensor("OUT", input_tensor.as_numpy())
            responses.append(pb_utils.InferenceResponse([out_tensor]))
        return responses

    def finalize(self):
        pass


================================================
FILE: qa/python_models/dlpack_add_sub/config.pbtxt
================================================
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "dlpack_add_sub"
backend: "python"

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
input [
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]

instance_group [{kind : KIND_CPU}]

parameters: {
  key: "FORCE_CPU_ONLY_INPUT_TENSORS"
  value: {
    string_value:"no"
  }
}


================================================
FILE: qa/python_models/dlpack_add_sub/model.py
================================================
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json

import numpy as np
import torch
import triton_python_backend_utils as pb_utils
from torch.utils.dlpack import from_dlpack, to_dlpack


class TritonPythonModel:
    def initialize(self, args):
        self.model_config = model_config = json.loads(args["model_config"])

        output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")
        output1_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT1")

        self.output0_dtype = pb_utils.triton_string_to_numpy(
            output0_config["data_type"]
        )
        self.output1_dtype = pb_utils.triton_string_to_numpy(
            output1_config["data_type"]
        )
        self.numpy_to_pytorch_dtype = {
            np.bool_: torch.bool,
            np.uint8: torch.uint8,
            np.int8: torch.int8,
            np.int16: torch.int16,
            np.int32: torch.int32,
            np.int64: torch.int64,
            np.float16: torch.float16,
            np.float32: torch.float32,
            np.float64: torch.float64,
        }

    def execute(self, requests):
        output0_dtype = self.output0_dtype
        output1_dtype = self.output1_dtype

        responses = []
        for request in requests:
            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")

            # If both of the tensors are in CPU, use NumPy.
            if in_0.is_cpu() and in_1.is_cpu():
                if (
                    in_0.as_numpy().dtype.type is np.bytes_
                    or in_0.as_numpy().dtype == np.object_
                ):
                    out_0, out_1 = (
                        in_0.as_numpy().astype(np.int32)
                        + in_1.as_numpy().astype(np.int32),
                        in_0.as_numpy().astype(np.int32)
                        - in_1.as_numpy().astype(np.int32),
                    )
                    out_tensor_0 = pb_utils.Tensor(
                        "OUTPUT0", out_0.astype(output0_dtype)
                    )
                    out_tensor_1 = pb_utils.Tensor(
                        "OUTPUT1", out_1.astype(output1_dtype)
                    )
                else:
                    in_0_pytorch, in_1_pytorch = from_dlpack(
                        in_0.to_dlpack()
                    ), from_dlpack(in_1.to_dlpack())
                    out_0, out_1 = (
                        in_0_pytorch + in_1_pytorch,
                        in_0_pytorch - in_1_pytorch,
                    )

                    if self.output0_dtype == np.object_:
                        out_tensor_0 = pb_utils.Tensor(
                            "OUTPUT0", out_0.numpy().astype(output0_dtype)
                        )
                    else:
                        out_0 = out_0.type(self.numpy_to_pytorch_dtype[output0_dtype])
                        out_tensor_0 = pb_utils.Tensor.from_dlpack(
                            "OUTPUT0", to_dlpack(out_0)
                        )

                    if self.output1_dtype == np.object_:
                        out_tensor_1 = pb_utils.Tensor(
                            "OUTPUT1", out_1.numpy().astype(output1_dtype)
                        )
                    else:
                        out_1 = out_1.type(self.numpy_to_pytorch_dtype[output1_dtype])
                        out_tensor_1 = pb_utils.Tensor.from_dlpack(
                            "OUTPUT1", to_dlpack(out_1)
                        )

            else:
                in_0_pytorch, in_1_pytorch = (
                    from_dlpack(in_0.to_dlpack()).cuda(),
                    from_dlpack(in_1.to_dlpack()).cuda(),
                )
                out_0, out_1 = (
                    in_0_pytorch + in_1_pytorch,
                    in_0_pytorch - in_1_pytorch,
                )
                out_tensor_0 = pb_utils.Tensor.from_dlpack("OUTPUT0", to_dlpack(out_0))
                out_tensor_1 = pb_utils.Tensor.from_dlpack("OUTPUT1", to_dlpack(out_1))

            responses.append(pb_utils.InferenceResponse([out_tensor_0, out_tensor_1]))

        return responses


================================================
FILE: qa/python_models/dlpack_empty_output/config.pbtxt
================================================
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "dlpack_empty_output"
max_batch_size: 8

input [
  {
    name: "INPUT"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]


================================================
FILE: qa/python_models/dlpack_empty_output/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import torch
import triton_python_backend_utils as pb_utils
from torch.utils.dlpack import to_dlpack


class TritonPythonModel:
    def initialize(self, args):
        pass

    def execute(self, requests):
        responses = []

        for _ in requests:
            SHAPE = (0,)

            pytorch_tensor = torch.ones(SHAPE, dtype=torch.float32)

            device = torch.device("cuda:0")
            pytorch_tensor = pytorch_tensor.to(device)

            dlpack_tensor = to_dlpack(pytorch_tensor)
            pb_tensor = pb_utils.Tensor.from_dlpack("OUTPUT", dlpack_tensor)

            inference_response = pb_utils.InferenceResponse(output_tensors=[pb_tensor])
            responses.append(inference_response)

        return responses


================================================
FILE: qa/python_models/dlpack_identity/config.pbtxt
================================================
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "dlpack_identity"
backend: "python"
max_batch_size: 64

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]

parameters: {
  key: "FORCE_CPU_ONLY_INPUT_TENSORS"
  value: {
    string_value:"no"
  }
}


================================================
FILE: qa/python_models/dlpack_identity/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def execute(self, requests):
        """Identity model in Python backend that works with GPU and CPU
        tensors."""

        responses = []
        for request in requests:
            input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            out_tensor = pb_utils.Tensor.from_dlpack(
                "OUTPUT0", input_tensor.to_dlpack()
            )
            responses.append(pb_utils.InferenceResponse([out_tensor]))
        return responses


================================================
FILE: qa/python_models/dlpack_io_identity/config.pbtxt
================================================
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "dlpack_io_identity"
backend: "python"
max_batch_size: 0

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]
input [
  {
    name: "GPU_OUTPUT"
    data_type: TYPE_BOOL
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]
output [
  {
    name: "NEXT_GPU_OUTPUT"
    data_type: TYPE_BOOL
    dims: [ -1 ]
  }
]

instance_group [{kind : KIND_CPU}]

parameters: {
  key: "FORCE_CPU_ONLY_INPUT_TENSORS"
  value: {
    string_value:"no"
  }
}


================================================
FILE: qa/python_models/dlpack_io_identity/model.py
================================================
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import numpy as np
import triton_python_backend_utils as pb_utils
from torch.utils.dlpack import from_dlpack, to_dlpack


class TritonPythonModel:
    """
    This Python identity model passes the DLPack tensors as is. "OUTPUT_IS_GPU"
    input controls whether the model should put the output in GPU or in CPU.
    """

    def initialize(self, args):
        self._model_name = args["model_name"]

    def execute(self, requests):
        responses = []
        for request in requests:
            input0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            gpu_output = pb_utils.get_input_tensor_by_name(
                request, "GPU_OUTPUT"
            ).as_numpy()

            if input0.is_cpu():
                if not gpu_output[0]:
                    output0 = pb_utils.Tensor.from_dlpack("OUTPUT0", input0.to_dlpack())
                else:
                    outptu0_pytorch = from_dlpack(input0.to_dlpack()).cuda()
                    output0 = pb_utils.Tensor.from_dlpack(
                        "OUTPUT0", to_dlpack(outptu0_pytorch)
                    )
            else:
                if gpu_output[0]:
                    output0 = pb_utils.Tensor.from_dlpack("OUTPUT0", input0.to_dlpack())
                else:
                    outptu0_pytorch = from_dlpack(input0.to_dlpack()).cpu()
                    output0 = pb_utils.Tensor.from_dlpack(
                        "OUTPUT0", to_dlpack(outptu0_pytorch)
                    )

            next_gpu_output = pb_utils.Tensor("NEXT_GPU_OUTPUT", gpu_output[1:])

            # Do not perform BLS inference if it is the first
            # model in the pipeline.
            if self._model_name != "dlpack_io_identity_1":
                infer_request = pb_utils.InferenceRequest(
                    model_name="dlpack_io_identity_1",
                    inputs=[
                        input0,
                        pb_utils.get_input_tensor_by_name(request, "GPU_OUTPUT"),
                    ],
                    requested_output_names=["OUTPUT0"],
                )
                infer_response = infer_request.exec()

                if infer_response.has_error():
                    raise pb_utils.TritonModelException(
                        infer_response.error().message()
                    )

                bls_output0 = pb_utils.get_output_tensor_by_name(
                    infer_response, "OUTPUT0"
                )
                if not output0.is_cpu():
                    bls_output0 = (
                        from_dlpack(bls_output0.to_dlpack()).detach().cpu().numpy()
                    )
                else:
                    bls_output0 = bls_output0.as_numpy()

                if not input0.is_cpu():
                    input0 = from_dlpack(input0.to_dlpack()).detach().cpu().numpy()
                else:
                    input0 = input0.as_numpy()

                if not np.allclose(bls_output0, input0):
                    raise pb_utils.TritonModelException(
                        "BLS input and output tensors are not equal"
                    )

            responses.append(pb_utils.InferenceResponse([output0, next_gpu_output]))

        return responses


================================================
FILE: qa/python_models/dlpack_io_identity_decoupled/config.pbtxt
================================================
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "dlpack_io_identity_decoupled"
backend: "python"
max_batch_size: 0

model_transaction_policy {
  decoupled: True
}

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]
input [
  {
    name: "GPU_OUTPUT"
    data_type: TYPE_BOOL
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]
output [
  {
    name: "NEXT_GPU_OUTPUT"
    data_type: TYPE_BOOL
    dims: [ -1 ]
  }
]

instance_group [{kind : KIND_CPU}]

parameters: {
  key: "FORCE_CPU_ONLY_INPUT_TENSORS"
  value: {
    string_value:"no"
  }
}


================================================
FILE: qa/python_models/dlpack_io_identity_decoupled/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import threading
import time

import triton_python_backend_utils as pb_utils
from torch.utils.dlpack import from_dlpack, to_dlpack


class TritonPythonModel:
    """
    This Python identity model passes the DLPack tensors as is. "OUTPUT_IS_GPU"
    input controls whether the model should put the output in GPU or in CPU.
    """

    def initialize(self, args):
        self._model_name = args["model_name"]
        self.inflight_thread_count = 0
        self.inflight_thread_count_lck = threading.Lock()

    def response_thread(self, response_sender, input0, gpu_output):
        # Sleep 5 seconds to make sure the main thread has exited.
        time.sleep(5)

        if input0.is_cpu():
            if not gpu_output[0]:
                output0 = pb_utils.Tensor.from_dlpack("OUTPUT0", input0.to_dlpack())
            else:
                outptu0_pytorch = from_dlpack(input0.to_dlpack()).cuda()
                output0 = pb_utils.Tensor.from_dlpack(
                    "OUTPUT0", to_dlpack(outptu0_pytorch)
                )
        else:
            if gpu_output[0]:
                output0 = pb_utils.Tensor.from_dlpack("OUTPUT0", input0.to_dlpack())
            else:
                output0_pytorch = from_dlpack(input0.to_dlpack()).cpu()
                output0 = pb_utils.Tensor.from_dlpack(
                    "OUTPUT0", to_dlpack(output0_pytorch)
                )

        next_gpu_output = pb_utils.Tensor("NEXT_GPU_OUTPUT", gpu_output[1:])
        infer_response = pb_utils.InferenceResponse([output0, next_gpu_output])

        # Number of times to repeat the response
        response_repeat = 2
        for _ in range(response_repeat):
            response_sender.send(infer_response)

        response_sender.send(flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)

        with self.inflight_thread_count_lck:
            self.inflight_thread_count -= 1

    def execute(self, requests):
        for request in requests:
            input0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            gpu_output = pb_utils.get_input_tensor_by_name(
                request, "GPU_OUTPUT"
            ).as_numpy()

            thread = threading.Thread(
                target=self.response_thread,
                args=(request.get_response_sender(), input0, gpu_output),
            )

            thread.daemon = True

            with self.inflight_thread_count_lck:
                self.inflight_thread_count += 1

            thread.start()

    def finalize(self):
        inflight_threads = True
        cycles = 0
        logging_time_sec = 5
        sleep_time_sec = 0.1
        cycle_to_log = logging_time_sec / sleep_time_sec
        while inflight_threads:
            with self.inflight_thread_count_lck:
                inflight_threads = self.inflight_thread_count != 0
                if cycles % cycle_to_log == 0:
                    print(
                        f"Waiting for {self.inflight_thread_count} response threads to complete..."
                    )
            if inflight_threads:
                time.sleep(sleep_time_sec)
                cycles += 1


================================================
FILE: qa/python_models/dlpack_square/config.pbtxt
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "dlpack_square"
backend: "python"
max_batch_size: 0
model_transaction_policy {
  decoupled: True
}
input [
  {
    name: "IN"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
instance_group [{ kind: KIND_CPU }]


================================================
FILE: qa/python_models/dlpack_square/model.py
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json
import threading

import numpy as np
import torch

# triton_python_backend_utils is available in every Triton Python model. You
# need to use this module to create inference requests and responses. It also
# contains some utility functions for extracting information from model_config
# and converting Triton input/output types to numpy types.
import triton_python_backend_utils as pb_utils
from torch.utils.dlpack import from_dlpack, to_dlpack

numpy_to_pytorch_dtype = {
    np.bool_: torch.bool,
    np.uint8: torch.uint8,
    np.int8: torch.int8,
    np.int16: torch.int16,
    np.int32: torch.int32,
    np.int64: torch.int64,
    np.float16: torch.float16,
    np.float32: torch.float32,
    np.float64: torch.float64,
}


class TritonPythonModel:
    def initialize(self, args):
        self.model_config = model_config = json.loads(args["model_config"])

        output_config = pb_utils.get_output_config_by_name(model_config, "OUT")
        self.output_dtype = pb_utils.triton_string_to_numpy(output_config["data_type"])

        using_decoupled = pb_utils.using_decoupled_model_transaction_policy(
            model_config
        )
        if not using_decoupled:
            raise pb_utils.TritonModelException(
                """the model `{}` can generate any number of responses per request,
                enable decoupled transaction policy in model configuration to
                serve this model""".format(
                    args["model_name"]
                )
            )

        self.inflight_thread_count = 0
        self.inflight_thread_count_lck = threading.Lock()

    def execute(self, requests):
        for request in requests:
            self.process_request(request)

        return None

    def process_request(self, request):
        # Start a separate thread to send the responses for the request. The
        # sending back the responses is delegated to this thread.
        thread = threading.Thread(
            target=self.response_thread,
            args=(
                request.get_response_sender(),
                pb_utils.get_input_tensor_by_name(request, "IN"),
                self.output_dtype,
            ),
        )

        thread.daemon = True

        with self.inflight_thread_count_lck:
            self.inflight_thread_count += 1

        thread.start()

    def response_thread(self, response_sender, in_input, output_dtype):
        # The response_sender is used to send response(s) associated with the
        # corresponding request.

        for idx in range(in_input.as_numpy()[0]):
            if in_input.is_cpu():
                if (
                    in_input.as_numpy().dtype.type is np.bytes_
                    or in_input.as_numpy().dtype == np.object_
                ):
                    out_0 = in_input.as_numpy().astype(np.int32)
                    out_tensor = pb_utils.Tensor("OUT", out_0.astype(output_dtype))
                else:
                    in_0_pytorch = from_dlpack(in_input.to_dlpack())
                    out_0 = in_0_pytorch
                    if output_dtype == np.object_:
                        out_tensor = pb_utils.Tensor(
                            "OUT", out_0.numpy().astype(output_dtype)
                        )
                    else:
                        out_0 = out_0.type(numpy_to_pytorch_dtype[output_dtype])
                        out_tensor = pb_utils.Tensor.from_dlpack(
                            "OUT", to_dlpack(out_0)
                        )
            else:
                in_0_pytorch = from_dlpack(in_input.to_dlpack()).cuda()
                out_0 = in_0_pytorch
                out_tensor = pb_utils.Tensor.from_dlpack("OUTPUT0", to_dlpack(out_0))

            response = pb_utils.InferenceResponse(output_tensors=[out_tensor])
            response_sender.send(response)

        # We must close the response sender to indicate to Triton that we are
        # done sending responses for the corresponding request. We can't use the
        # response sender after closing it. The response sender is closed by
        # setting the TRITONSERVER_RESPONSE_COMPLETE_FINAL.
        response_sender.send(flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)

        with self.inflight_thread_count_lck:
            self.inflight_thread_count -= 1


================================================
FILE: qa/python_models/dlpack_sub_add/config.pbtxt
================================================
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "dlpack_sub_add"
backend: "python"

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
input [
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]

instance_group [{kind : KIND_CPU}]

parameters: {
  key: "FORCE_CPU_ONLY_INPUT_TENSORS"
  value: {
    string_value:"no"
  }
}


================================================
FILE: qa/python_models/dlpack_sub_add/model.py
================================================
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json

import numpy as np
import torch
import triton_python_backend_utils as pb_utils
from torch.utils.dlpack import from_dlpack, to_dlpack


class TritonPythonModel:
    def initialize(self, args):
        self.model_config = model_config = json.loads(args["model_config"])

        output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")
        output1_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT1")

        self.output0_dtype = pb_utils.triton_string_to_numpy(
            output0_config["data_type"]
        )
        self.output1_dtype = pb_utils.triton_string_to_numpy(
            output1_config["data_type"]
        )
        self.numpy_to_pytorch_dtype = {
            np.bool_: torch.bool,
            np.uint8: torch.uint8,
            np.int8: torch.int8,
            np.int16: torch.int16,
            np.int32: torch.int32,
            np.int64: torch.int64,
            np.float16: torch.float16,
            np.float32: torch.float32,
            np.float64: torch.float64,
        }

    def execute(self, requests):
        output0_dtype = self.output0_dtype
        output1_dtype = self.output1_dtype

        responses = []
        for request in requests:
            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")

            # If both of the tensors are in CPU, use NumPy.
            if in_0.is_cpu() and in_1.is_cpu():
                if (
                    in_0.as_numpy().dtype.type is np.bytes_
                    or in_0.as_numpy().dtype == np.object_
                ):
                    out_0, out_1 = (
                        in_0.as_numpy().astype(np.int32)
                        - in_1.as_numpy().astype(np.int32),
                        in_0.as_numpy().astype(np.int32)
                        + in_1.as_numpy().astype(np.int32),
                    )
                    out_tensor_0 = pb_utils.Tensor(
                        "OUTPUT0", out_0.astype(output0_dtype)
                    )
                    out_tensor_1 = pb_utils.Tensor(
                        "OUTPUT1", out_1.astype(output1_dtype)
                    )
                else:
                    in_0_pytorch, in_1_pytorch = from_dlpack(
                        in_0.to_dlpack()
                    ), from_dlpack(in_1.to_dlpack())
                    out_0, out_1 = (
                        in_0_pytorch - in_1_pytorch,
                        in_0_pytorch + in_1_pytorch,
                    )

                    if self.output0_dtype == np.object_:
                        out_tensor_0 = pb_utils.Tensor(
                            "OUTPUT0", out_0.numpy().astype(output0_dtype)
                        )
                    else:
                        out_0 = out_0.type(self.numpy_to_pytorch_dtype[output0_dtype])
                        out_tensor_0 = pb_utils.Tensor.from_dlpack(
                            "OUTPUT0", to_dlpack(out_0)
                        )

                    if self.output1_dtype == np.object_:
                        out_tensor_1 = pb_utils.Tensor(
                            "OUTPUT1", out_1.numpy().astype(output1_dtype)
                        )
                    else:
                        out_1 = out_1.type(self.numpy_to_pytorch_dtype[output1_dtype])
                        out_tensor_1 = pb_utils.Tensor.from_dlpack(
                            "OUTPUT1", to_dlpack(out_1)
                        )

            else:
                in_0_pytorch, in_1_pytorch = (
                    from_dlpack(in_0.to_dlpack()).cuda(),
                    from_dlpack(in_1.to_dlpack()).cuda(),
                )
                out_0, out_1 = (
                    in_0_pytorch - in_1_pytorch,
                    in_0_pytorch + in_1_pytorch,
                )
                out_tensor_0 = pb_utils.Tensor.from_dlpack("OUTPUT0", to_dlpack(out_0))
                out_tensor_1 = pb_utils.Tensor.from_dlpack("OUTPUT1", to_dlpack(out_1))

            responses.append(pb_utils.InferenceResponse([out_tensor_0, out_tensor_1]))

        return responses


================================================
FILE: qa/python_models/dlpack_test/config.pbtxt
================================================
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "dlpack_test"
backend: "python"
max_batch_size: 0

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]


================================================
FILE: qa/python_models/dlpack_test/model.py
================================================
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import unittest

import cupy as cp
import numpy as np
import torch
import triton_python_backend_utils as pb_utils
from torch.utils.dlpack import from_dlpack, to_dlpack


class PBTensorTest(unittest.TestCase):
    def test_pytorch_dlpack(self):
        # Test different dtypes
        pytorch_dtypes = [
            torch.float16,
            torch.float32,
            torch.float64,
            torch.int8,
            torch.int16,
            torch.int32,
            torch.int64,
            torch.uint8,
        ]

        for pytorch_dtype in pytorch_dtypes:
            pytorch_tensor = torch.ones([100], dtype=pytorch_dtype)
            dlpack_tensor = to_dlpack(pytorch_tensor)
            pb_tensor = pb_utils.Tensor.from_dlpack("test_tensor", dlpack_tensor)
            self.assertTrue(
                np.array_equal(pb_tensor.as_numpy(), pytorch_tensor.numpy())
            )

            # Convert the tensor back to DLPack and ensure that both tensors are
            # the same
            pytorch_tensor_dlpack = from_dlpack(pb_tensor.to_dlpack())
            self.assertTrue(torch.equal(pytorch_tensor_dlpack, pytorch_tensor))

            self.assertEqual(pytorch_tensor.type(), pytorch_tensor_dlpack.type())

            # Now let's check that upgraded DLPack implementation also
            # works as expected, i.e. from_dlpack should work with
            # external pytorch tensor directly

            pb_tensor_upgraded = pb_utils.Tensor.from_dlpack(
                "test_tensor", pytorch_tensor
            )
            self.assertTrue(
                np.array_equal(pb_tensor_upgraded.as_numpy(), pytorch_tensor.numpy())
            )

            # Here we check that `pb_tensor` as a producer, properly
            # invokes `__dlpack__` and `__dlpack_device__`
            pytorch_tensor_dlpack = from_dlpack(pb_tensor_upgraded)
            self.assertTrue(torch.equal(pytorch_tensor_dlpack, pytorch_tensor))

            self.assertEqual(pytorch_tensor.type(), pytorch_tensor_dlpack.type())

    def test_non_contiguous_error(self):
        pytorch_tensor = torch.rand([20, 30], dtype=torch.float16)

        # Transposing a PyTorch tensor leads to a non contiguous tensor.
        pytorch_tensor = torch.transpose(pytorch_tensor, 0, 1)

        with self.assertRaises(Exception) as e:
            pb_utils.Tensor.from_dlpack("test_tensor", to_dlpack(pytorch_tensor))
        self.assertTrue(
            str(e.exception)
            == "DLPack tensor is not contiguous. Only contiguous DLPack tensors that are stored in C-Order are supported."
        )

    def test_dlpack_string_tensor(self):
        np_object = np.array(["An Example String"], dtype=np.object_)
        pb_tensor = pb_utils.Tensor("test_tensor", np_object)

        with self.assertRaises(Exception) as e:
            pb_tensor.to_dlpack()

        self.assertTrue(
            str(e.exception) == "DLPack does not have support for string tensors."
        )

    def test_dlpack_gpu_tensors(self):
        # Test different dtypes
        # PyTorch does not support DLPack bool type yet:
        # https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/DLConvertor.cpp
        pytorch_dtypes = [
            torch.float16,
            torch.float32,
            torch.float64,
            torch.int8,
            torch.int16,
            torch.int32,
            torch.int64,
            torch.uint8,
        ]

        for pytorch_dtype in pytorch_dtypes:
            pytorch_tensor = torch.ones([100], dtype=pytorch_dtype, device="cuda")
            dlpack_tensor = to_dlpack(pytorch_tensor)
            pb_tensor = pb_utils.Tensor.from_dlpack("test_tensor", dlpack_tensor)

            # Convert the tensor back to DLPack and ensure that both tensors are
            # the same
            pytorch_tensor_dlpack = from_dlpack(pb_tensor.to_dlpack())
            self.assertTrue(torch.equal(pytorch_tensor_dlpack, pytorch_tensor))
            self.assertEqual(pytorch_tensor.type(), pytorch_tensor_dlpack.type())

            # Now we make sure that updated DLPack implementation works
            # with GPU as well
            pb_tensor = pb_utils.Tensor.from_dlpack("test_tensor", pytorch_tensor)
            pytorch_tensor_dlpack = from_dlpack(pb_tensor)
            self.assertTrue(torch.equal(pytorch_tensor_dlpack, pytorch_tensor))
            self.assertEqual(pytorch_tensor.type(), pytorch_tensor_dlpack.type())

    def test_dlpack_gpu_numpy(self):
        # DLPack tesnors that are in GPU cannot be converted to NumPy
        pytorch_tensor = torch.rand([100], dtype=torch.float16, device="cuda") * 100
        pb_tensor = pb_utils.Tensor.from_dlpack("tensor", to_dlpack(pytorch_tensor))
        # Make sure that `__dlpack_device__` works as expected
        self.assertFalse(pb_tensor.is_cpu())
        self.assertTrue(pytorch_tensor.is_cuda)
        self.assertEqual(
            pb_tensor.__dlpack_device__(), pytorch_tensor.__dlpack_device__()
        )

        with self.assertRaises(Exception) as e:
            pb_tensor.as_numpy()
        self.assertTrue(
            str(e.exception)
            == "Tensor is stored in GPU and cannot be converted to NumPy."
        )

    def test_dlpack_cpu_numpy(self):
        # Check compatibiity of PbTensor DLPack implementation
        # with numpy
        pytorch_tensor = torch.rand([100], dtype=torch.float16, device="cpu") * 100
        pb_tensor = pb_utils.Tensor.from_dlpack("tensor", pytorch_tensor)
        numpy_tensor_dlpack = np.from_dlpack(pb_tensor)
        self.assertTrue(np.array_equal(numpy_tensor_dlpack, pytorch_tensor.numpy()))
        # Make sure that `__dlpack_device__` works as expected
        self.assertTrue(pb_tensor.is_cpu())
        self.assertFalse(pytorch_tensor.is_cuda)
        self.assertEqual(
            pb_tensor.__dlpack_device__(), pytorch_tensor.__dlpack_device__()
        )

    def test_bool_datatype(self):
        # [FIXME] pass bool_array directly to `pb_utils.Tensor.from_dlpack`,
        # when numpy release supports DLPack bool type
        bool_array = np.asarray([False, True])
        bool_tensor = pb_utils.Tensor("tensor", bool_array)
        bool_tensor_dlpack = pb_utils.Tensor.from_dlpack("tensor", bool_tensor)
        self.assertTrue(np.array_equal(bool_array, bool_tensor_dlpack.as_numpy()))

    def test_cuda_multi_stream(self):
        # Test that external stream syncs with the default
        # and pb_tensor has proper data
        size = 5000
        pytorch_tensor_1 = torch.tensor([0, 0, 0, 0], device="cuda")
        pytorch_tensor_2 = torch.tensor([0, 0, 0, 0], device="cuda")
        expected_output = torch.tensor([2, 2, 2, 2], device="cuda")
        s1 = torch.cuda.Stream()
        with torch.cuda.stream(s1):
            matrix_a = torch.randn(size, size, device="cuda")
            res = torch.matmul(matrix_a, matrix_a)
            for _ in range(1000):
                res = torch.matmul(res, matrix_a)
            pytorch_tensor_1 += torch.tensor([2, 2, 2, 2], device="cuda")
            pytorch_tensor_2 += torch.tensor([2, 2, 2, 2], device="cuda")

        pb_tensor_1 = pb_utils.Tensor.from_dlpack("tensor", pytorch_tensor_1)
        pb_tensor_2 = pb_utils.Tensor.from_dlpack("tensor", to_dlpack(pytorch_tensor_2))
        pytorch_tensor_dlpack = from_dlpack(pb_tensor_1)
        self.assertTrue(torch.equal(pytorch_tensor_dlpack, expected_output))
        pytorch_tensor_dlpack = from_dlpack(pb_tensor_2)
        self.assertTrue(torch.equal(pytorch_tensor_dlpack, expected_output))

    def test_cuda_non_blocking_multi_stream(self):
        # Test that external non-blocking stream syncs with the default stream
        # and pb_tensor has proper data
        size = 5000
        cupy_tensor = cp.array([0, 0, 0, 0])
        expected_output = cp.array([2, 2, 2, 2])
        non_blocking_stream = cp.cuda.Stream(non_blocking=True)
        with non_blocking_stream:
            matrix_a = cp.random.rand(size, size)
            res = cp.matmul(matrix_a, matrix_a)
            for _ in range(1000):
                res = cp.matmul(res, matrix_a)
            cupy_tensor += cp.array([2, 2, 2, 2])

        pb_tensor = pb_utils.Tensor.from_dlpack("tensor", cupy_tensor)
        # Verify that non-blocking stream has no pending jobs left
        self.assertTrue(non_blocking_stream.done)
        cupy_tensor_dlpack = cp.from_dlpack(pb_tensor)
        self.assertTrue(cp.array_equal(cupy_tensor_dlpack, expected_output))
        self.assertFalse(pb_tensor.is_cpu())
        self.assertEqual(pb_tensor.__dlpack_device__(), cupy_tensor.__dlpack_device__())

    def test_cuda_multi_gpu(self):
        # Test that when `pb_utils.Tensor.from_dlpack` is called on different
        # GPU from where external tensor is stored, we receive a pointer
        # and all pending work on different GPU's default stream
        # on external tensor is done
        size = 5000
        # DLDeviceType::kDLCUDA, device_id 1
        expected_dlpack_device = (2, 1)
        with cp.cuda.Device(1):
            expected_output = cp.array([2, 2, 2, 2])
            cupy_tensor = cp.array([0, 0, 0, 0])
            matrix_a = cp.random.rand(size, size)
            res = cp.matmul(matrix_a, matrix_a)
            for _ in range(1000):
                res = cp.matmul(res, matrix_a)
            cupy_tensor += cp.array([2, 2, 2, 2])
        with cp.cuda.Device(0):
            pb_tensor = pb_utils.Tensor.from_dlpack("tensor", cupy_tensor)
            with cp.cuda.Device(1):
                # To make sure that the default stream is done with
                # all compute work
                self.assertTrue(cp.cuda.Stream(null=True).done)
            cupy_tensor_dlpack = cp.from_dlpack(pb_tensor)

        with cp.cuda.Device(1):
            self.assertTrue(cp.array_equal(cupy_tensor_dlpack, expected_output))

        self.assertFalse(pb_tensor.is_cpu())
        self.assertEqual(pb_tensor.__dlpack_device__(), expected_dlpack_device)
        self.assertEqual(pb_tensor.__dlpack_device__(), cupy_tensor.__dlpack_device__())

    def test_cuda_blocking_stream_multi_gpu(self):
        # Test that when `pb_utils.Tensor.from_dlpack` is called on different
        # GPU from where external tensor is stored, we receive a pointer
        # and all pending work on different GPU's a blocking stream
        # on external tensor is done
        size = 5000
        # DLDeviceType::kDLCUDA, device_id 1
        expected_dlpack_device = (2, 1)
        with cp.cuda.Device(1):
            expected_output = cp.array([2, 2, 2, 2])
            blocking_stream = cp.cuda.Stream(non_blocking=False)
            with blocking_stream:
                cupy_tensor = cp.array([0, 0, 0, 0])
                matrix_a = cp.random.rand(size, size)
                res = cp.matmul(matrix_a, matrix_a)
                for _ in range(1000):
                    res = cp.matmul(res, matrix_a)
                cupy_tensor += cp.array([2, 2, 2, 2])
        with cp.cuda.Device(0):
            pb_tensor = pb_utils.Tensor.from_dlpack("tensor", cupy_tensor)
            with cp.cuda.Device(1):
                # To make sure that blocking stream is done with
                # all compute work
                self.assertTrue(blocking_stream.done)
            cupy_tensor_dlpack = cp.from_dlpack(pb_tensor)

        with cp.cuda.Device(1):
            self.assertTrue(cp.array_equal(cupy_tensor_dlpack, expected_output))

        self.assertFalse(pb_tensor.is_cpu())
        self.assertEqual(pb_tensor.__dlpack_device__(), expected_dlpack_device)
        self.assertEqual(pb_tensor.__dlpack_device__(), cupy_tensor.__dlpack_device__())

    def test_cuda_non_blocking_stream_multi_gpu(self):
        # Test that when `pb_utils.Tensor.from_dlpack` is called on different
        # GPU from where external tensor is stored, we receive a pointer
        # and all pending work on different GPU's non-blocking stream
        # on external tensor is done.
        # This test seems to be affected by `test_cuda_multi_gpu`
        # and `test_cuda_blocking_stream_multi_gpu` if GPUs 0 and 1 are used.
        # Thus for this test, we use GPUs 0 and 2
        # JIRA: DLIS-4887
        size = 5000
        #  DLDeviceType::kDLCUDA, device_id 1
        expected_dlpack_device = (2, 2)
        with cp.cuda.Device(2):
            expected_output = cp.array([2, 2, 2, 2])
            non_blocking_stream = cp.cuda.Stream(non_blocking=True)
            with non_blocking_stream:
                cupy_tensor = cp.array([0, 0, 0, 0])
                matrix_a = cp.random.rand(size, size)
                res = cp.matmul(matrix_a, matrix_a)
                for _ in range(1000):
                    res = cp.matmul(res, matrix_a)
                cupy_tensor += cp.array([2, 2, 2, 2])
        with cp.cuda.Device(0):
            pb_tensor = pb_utils.Tensor.from_dlpack("tensor", cupy_tensor)
            with cp.cuda.Device(2):
                # To make sure that non_blocking stream is done with
                # all compute work
                self.assertTrue(non_blocking_stream.done)
            cupy_tensor_dlpack = cp.from_dlpack(pb_tensor)

        with cp.cuda.Device(2):
            self.assertTrue(cp.array_equal(cupy_tensor_dlpack, expected_output))

        self.assertFalse(pb_tensor.is_cpu())
        self.assertEqual(pb_tensor.__dlpack_device__(), expected_dlpack_device)
        self.assertEqual(pb_tensor.__dlpack_device__(), cupy_tensor.__dlpack_device__())


class TritonPythonModel:
    def execute(self, requests):
        responses = []
        for _ in requests:
            # Run the unittest and store the results in InferenceResponse.
            test = unittest.main("model", exit=False)
            responses.append(
                pb_utils.InferenceResponse(
                    [
                        pb_utils.Tensor(
                            "OUTPUT0",
                            np.array([test.result.wasSuccessful()], dtype=np.float16),
                        )
                    ]
                )
            )
        return responses


================================================
FILE: qa/python_models/ensemble/config.pbtxt
================================================
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "ensemble"
platform: "ensemble"
max_batch_size: 0

input [
   {
      name: "INPUT0"
      data_type: TYPE_FP32
      dims: [ 16 ]
   },
   {
      name: "INPUT1"
      data_type: TYPE_FP32
      dims: [ 16 ]
   }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
ensemble_scheduling {
  step [
    {
      model_name: "add_sub_1"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT0"
        value: "output_0"
      }
      output_map {
        key: "OUTPUT1"
        value: "output_1"
      }
    },
    {
      model_name: "add_sub_2"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "output_0"
      }
      input_map {
        key: "INPUT1"
        value: "output_1"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT0"
      }
      output_map {
        key: "OUTPUT1"
        value: "OUTPUT1"
      }
    }
  ]
}


================================================
FILE: qa/python_models/ensemble_gpu/config.pbtxt
================================================
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "ensemble_gpu"
platform: "ensemble"
max_batch_size: 0

input [
   {
      name: "INPUT0"
      data_type: TYPE_FP32
      dims: [ 16 ]
   },
   {
      name: "INPUT1"
      data_type: TYPE_FP32
      dims: [ 16 ]
   }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
ensemble_scheduling {
  step [
    {
      model_name: "add_sub_1"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT0"
        value: "output_0"
      }
      output_map {
        key: "OUTPUT1"
        value: "output_1"
      }
    },
    {
      model_name: "libtorch_float32_float32_float32"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "output_0"
      }
      input_map {
        key: "INPUT1"
        value: "output_1"
      }
      output_map {
        key: "OUTPUT__0"
        value: "OUTPUT0"
      }
      output_map {
        key: "OUTPUT__1"
        value: "OUTPUT1"
      }
    }
  ]
}


================================================
FILE: qa/python_models/ensemble_io/config.pbtxt
================================================
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "ensemble_io"
platform: "ensemble"

max_batch_size: 0

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]
input [
  {
    name: "GPU_OUTPUT"
    data_type: TYPE_BOOL
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]
output [
  {
    name: "NEXT_GPU_OUTPUT"
    data_type: TYPE_BOOL
    dims: [ -1 ]
  }
]

ensemble_scheduling {
  step [
    {
      model_name: "dlpack_io_identity_1"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      input_map {
        key: "GPU_OUTPUT"
        value: "GPU_OUTPUT"
      }
      output_map {
        key: "OUTPUT0"
        value: "output_0"
      }
      output_map {
        key: "NEXT_GPU_OUTPUT"
        value: "next_gpu_output"
      }
    },
    {
      model_name: "dlpack_io_identity_2"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "output_0"
      }
      input_map {
        key: "GPU_OUTPUT"
        value: "next_gpu_output"
      }
      output_map {
        key: "OUTPUT0"
        value: "output_1"
      }
      output_map {
        key: "NEXT_GPU_OUTPUT"
        value: "next_gpu_output_1"
      }
    },
    {
      model_name: "dlpack_io_identity_3"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "output_1"
      }
      input_map {
        key: "GPU_OUTPUT"
        value: "next_gpu_output_1"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT0"
      }
      output_map {
        key: "NEXT_GPU_OUTPUT"
        value: "NEXT_GPU_OUTPUT"
      }
    }
  ]
}


================================================
FILE: qa/python_models/error_code/config.pbtxt
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "error_code"
backend: "python"
max_batch_size: 4

input [
  {
    name: "ERROR_CODE"
    data_type: TYPE_STRING
    dims: [ 1 ]
  }
]

output [
  {
    name: "DUMMY_OUT"
    data_type: TYPE_STRING
    dims: [ 1 ]
  }
]

instance_group [{ kind: KIND_CPU }]


================================================
FILE: qa/python_models/error_code/model.py
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def execute(self, requests):
        error_code_map = {
            "UNKNOWN": pb_utils.TritonError.UNKNOWN,
            "INTERNAL": pb_utils.TritonError.INTERNAL,
            "NOT_FOUND": pb_utils.TritonError.NOT_FOUND,
            "INVALID_ARG": pb_utils.TritonError.INVALID_ARG,
            "UNAVAILABLE": pb_utils.TritonError.UNAVAILABLE,
            "UNSUPPORTED": pb_utils.TritonError.UNSUPPORTED,
            "ALREADY_EXISTS": pb_utils.TritonError.ALREADY_EXISTS,
            "CANCELLED": pb_utils.TritonError.CANCELLED,
        }

        responses = []

        for request in requests:
            err_code_tensor = pb_utils.get_input_tensor_by_name(
                request, "ERROR_CODE"
            ).as_numpy()
            err_code_str = str(err_code_tensor[0][0], encoding="utf-8")
            if err_code_str in error_code_map:
                error = pb_utils.TritonError(
                    message=("error code: " + err_code_str),
                    code=error_code_map[err_code_str],
                )
            else:
                error = pb_utils.TritonError("unrecognized error code: " + err_code_str)
            responses.append(pb_utils.InferenceResponse(error=error))

        return responses


================================================
FILE: qa/python_models/execute_cancel/config.pbtxt
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "execute_cancel"
backend: "python"
max_batch_size: 1

input [
  {
    name: "EXECUTE_DELAY"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]

output [
  {
    name: "DUMMY_OUT"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]

instance_group [{ kind: KIND_CPU }]


================================================
FILE: qa/python_models/execute_cancel/model.py
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json
import threading
import time

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def initialize(self, args):
        self._logger = pb_utils.Logger
        self._model_config = json.loads(args["model_config"])
        self._using_decoupled = pb_utils.using_decoupled_model_transaction_policy(
            self._model_config
        )

    def execute(self, requests):
        processed_requests = []
        for request in requests:
            delay_tensor = pb_utils.get_input_tensor_by_name(
                request, "EXECUTE_DELAY"
            ).as_numpy()
            delay = delay_tensor[0][0]  # seconds
            if self._using_decoupled:
                processed_requests.append(
                    {"response_sender": request.get_response_sender(), "delay": delay}
                )
            else:
                processed_requests.append({"request": request, "delay": delay})
        if self._using_decoupled:
            return self._execute_decoupled(processed_requests)
        return self._execute_processed_requests(processed_requests)

    def _execute_processed_requests(self, processed_requests):
        responses = []
        for processed_request in processed_requests:
            error = pb_utils.TritonError(message="not cancelled")
            object_to_check_cancelled = None
            if "response_sender" in processed_request:
                object_to_check_cancelled = processed_request["response_sender"]
            elif "request" in processed_request:
                object_to_check_cancelled = processed_request["request"]
            delay = processed_request["delay"]  # seconds
            time_elapsed = 0.0  # seconds
            while time_elapsed < delay:
                time.sleep(1)
                time_elapsed += 1.0
                if object_to_check_cancelled.is_cancelled():
                    self._logger.log_info(
                        "[execute_cancel] Request cancelled at "
                        + str(time_elapsed)
                        + " s"
                    )
                    error = pb_utils.TritonError(
                        message="cancelled", code=pb_utils.TritonError.CANCELLED
                    )
                    break
                self._logger.log_info(
                    "[execute_cancel] Request not cancelled at "
                    + str(time_elapsed)
                    + " s"
                )
            responses.append(pb_utils.InferenceResponse(error=error))
        return responses

    def _execute_decoupled(self, processed_requests):
        def response_thread(execute_processed_requests, processed_requests):
            time.sleep(2)  # execute after requests are released
            responses = execute_processed_requests(processed_requests)
            for i in range(len(responses)):  # len(responses) == len(processed_requests)
                response_sender = processed_requests[i]["response_sender"]
                response_sender.send(responses[i])
                response_sender.send(
                    flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
                )

        thread = threading.Thread(
            target=response_thread,
            args=(self._execute_processed_requests, processed_requests),
        )
        thread.daemon = True
        thread.start()
        return None


================================================
FILE: qa/python_models/execute_delayed_model/config.pbtxt
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "simple"
backend: "python"
max_batch_size: 8
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 16 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_INT32
    dims: [ 16 ]
  }
]

instance_group [ { kind: KIND_CPU }]


================================================
FILE: qa/python_models/execute_delayed_model/model.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json
import time

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def initialize(self, args):
        self.model_config = model_config = json.loads(args["model_config"])
        output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")
        output1_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT1")
        self.output0_dtype = pb_utils.triton_string_to_numpy(
            output0_config["data_type"]
        )
        self.output1_dtype = pb_utils.triton_string_to_numpy(
            output1_config["data_type"]
        )

    def execute(self, requests):
        output0_dtype = self.output0_dtype
        output1_dtype = self.output1_dtype
        responses = []

        time.sleep(15)

        for request in requests:
            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")

            out_0, out_1 = (
                in_0.as_numpy() + in_1.as_numpy(),
                in_0.as_numpy() - in_1.as_numpy(),
            )

            out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(output0_dtype))
            out_tensor_1 = pb_utils.Tensor("OUTPUT1", out_1.astype(output1_dtype))

            inference_response = pb_utils.InferenceResponse(
                output_tensors=[out_tensor_0, out_tensor_1]
            )
            responses.append(inference_response)

        return responses

    def finalize(self):
        print("Cleaning up...")


================================================
FILE: qa/python_models/execute_error/config.pbtxt
================================================
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "execute_error"
backend: "python"
max_batch_size: 64

input [
  {
    name: "IN"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

output [
  {
    name: "OUT"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]


================================================
FILE: qa/python_models/execute_error/model.py
================================================
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def execute(self, requests):
        """This function is called on inference request."""
        responses = []

        # Generate the error for the first and third request
        i = 0
        for request in requests:
            input_tensor = pb_utils.get_input_tensor_by_name(request, "IN")
            out_tensor = pb_utils.Tensor("OUT", input_tensor.as_numpy())
            if i == 0:
                error = pb_utils.TritonError("An error occurred during execution")
                responses.append(pb_utils.InferenceResponse([out_tensor], error))
            elif i == 1:
                responses.append(pb_utils.InferenceResponse([out_tensor]))
            elif i == 2:
                error = pb_utils.TritonError("An error occurred during execution")
                responses.append(pb_utils.InferenceResponse(error=error))
            i += 1

        return responses


================================================
FILE: qa/python_models/execute_grpc_error/config.pbtxt
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

backend: "python"
max_batch_size: 64

input [
  {
    name: "IN"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

output [
  {
    name: "OUT"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]


================================================
FILE: qa/python_models/execute_grpc_error/model.py
================================================
# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def __init__(self):
        # Maintain total inference count, so as to return error on 2nd request, all of this to simulate model failure
        self.inf_count = 1

    def execute(self, requests):
        """This function is called on inference request."""
        responses = []

        # Generate the error for the second request
        for request in requests:
            input_tensor = pb_utils.get_input_tensor_by_name(request, "IN")
            out_tensor = pb_utils.Tensor("OUT", input_tensor.as_numpy())
            if self.inf_count % 2:
                # Every odd request is success
                responses.append(pb_utils.InferenceResponse([out_tensor]))
            else:
                # Every even request is failure
                error = pb_utils.TritonError("An error occurred during execution")
                responses.append(pb_utils.InferenceResponse([out_tensor], error))
            self.inf_count += 1

        return responses


================================================
FILE: qa/python_models/execute_return_error/config.pbtxt
================================================
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "execute_return_error"
backend: "python"
max_batch_size: 64

input [
  {
    name: "INPUT"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

output [
  {
    name: "OUTPUT"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]


================================================
FILE: qa/python_models/execute_return_error/model.py
================================================
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


class TritonPythonModel:
    def initialize(self, args):
        self._i = -1

    def execute(self, requests):
        """
        Tests returning invalid responses in execute request.
        """

        self._i += 1
        i = self._i

        if i % 2 == 0:
            return None
        else:
            return [None] * len(requests)


================================================
FILE: qa/python_models/fan_add_sub/config.pbtxt
================================================
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "fan_add_sub"
platform: "ensemble"

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]

  }
]
input [
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]

  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]


  }
]
output [
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]


  }
]
ensemble_scheduling {
  step [
    {
      model_name: "nop_TYPE_FP32_-1"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      input_map {
        key: "INPUT1"
        value: "INPUT1"
      }
      output_map {
        key: "OUTPUT0"
        value: "same_input0"
      }
      output_map {
        key: "OUTPUT1"
        value: "same_input1"
      }
    },
    {
      model_name: "ENSEMBLE_MODEL_NAME"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "same_input0"
      }
      input_map {
        key: "INPUT1"
        value: "same_input1"
      }
      output_map {
        key: "OUTPUT0"
        value: "same_output0"
      }
      output_map {
        key: "OUTPUT1"
        value: "same_output1"
      }
    },
    {
      model_name: "nop_TYPE_FP32_-1"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "same_output0"
      }
      input_map {
        key: "INPUT1"
        value: "same_output0"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT0"
      }
    },
    {
      model_name: "nop_TYPE_FP32_-1"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "same_output1"
      }
      input_map {
        key: "INPUT1"
        value: "same_output1"
      }
      output_map {
        key: "OUTPUT1"
        value: "OUTPUT1"
      }
    }
  ]
}


================================================
FILE: qa/python_models/fini_error/config.pbtxt
================================================
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "fini_error"
backend: "python"
max_batch_size: 64

input [
  {
    name: "IN"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

output [
  {
    name: "OUT"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]


================================================
FILE: qa/python_models/fini_error/model.py
================================================
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def execute(self, requests):
        """
        The body of this model doesn't matter. The main purpose of this model is
        to test correct handling of Python errors in the `finalize` function.
        """
        responses = []
        for request in requests:
            input_tensor = pb_utils.get_input_tensor_by_name(request, "IN")
            out_tensor = pb_utils.Tensor("OUT", input_tensor.as_numpy())
            responses.append(pb_utils.InferenceResponse([out_tensor], error))
        return responses

    def finalize(self):
        undefined_variable


================================================
FILE: qa/python_models/generate_models/mock_llm/1/model.py
================================================
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json
import time

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def initialize(self, args):
        self.model_config = json.loads(args["model_config"])
        self.decoupled = self.model_config.get("model_transaction_policy", {}).get(
            "decoupled"
        )

    def execute(self, requests):
        if self.decoupled:
            return self.exec_decoupled(requests)
        else:
            return self.exec(requests)

    def exec(self, requests):
        responses = []
        for request in requests:
            params = json.loads(request.parameters())
            rep_count = params["REPETITION"] if "REPETITION" in params else 1

            input_np = pb_utils.get_input_tensor_by_name(request, "PROMPT").as_numpy()
            stream_np = pb_utils.get_input_tensor_by_name(request, "STREAM").as_numpy()
            stream = stream_np.flatten()[0]
            if stream:
                responses.append(
                    pb_utils.InferenceResponse(
                        error=pb_utils.TritonError(
                            "STREAM only supported in decoupled mode"
                        )
                    )
                )
            else:
                out_tensor = pb_utils.Tensor(
                    "TEXT", np.repeat(input_np, rep_count, axis=1)
                )
                responses.append(pb_utils.InferenceResponse([out_tensor]))
        return responses

    def exec_decoupled(self, requests):
        for request in requests:
            params = json.loads(request.parameters())
            rep_count = params["REPETITION"] if "REPETITION" in params else 1
            fail_last = params["FAIL_LAST"] if "FAIL_LAST" in params else False
            delay = params["DELAY"] if "DELAY" in params else None
            output_0_dim = params["OUTPUT_0_DIM"] if "OUTPUT_0_DIM" in params else False

            sender = request.get_response_sender()
            input_np = pb_utils.get_input_tensor_by_name(request, "PROMPT").as_numpy()
            stream_np = pb_utils.get_input_tensor_by_name(request, "STREAM").as_numpy()
            out_value = np.array([]) if output_0_dim else input_np
            out_tensor = pb_utils.Tensor("TEXT", out_value)
            response = pb_utils.InferenceResponse([out_tensor])
            # If stream enabled, just send multiple copies of response
            # FIXME: Could split up response string into tokens, but this is simpler for now.
            stream = stream_np.flatten()[0]
            if stream:
                for _ in range(rep_count):
                    if delay is not None:
                        time.sleep(delay)
                    if not sender.is_cancelled():
                        sender.send(response)
                    else:
                        break
                sender.send(
                    None
                    if not fail_last
                    else pb_utils.InferenceResponse(
                        error=pb_utils.TritonError("An Error Occurred")
                    ),
                    flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL,
                )
            # If stream disabled, just send one response
            else:
                sender.send(
                    response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
                )
        return None


================================================
FILE: qa/python_models/generate_models/mock_llm/config.pbtxt
================================================
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
backend: "python"

max_batch_size: 0

model_transaction_policy {
  decoupled: True
}

input [
  {
    name: "PROMPT"
    data_type: TYPE_STRING
    dims: [ 1, 1 ]
  },
  {
    name: "STREAM"
    data_type: TYPE_BOOL
    dims: [ 1, 1 ]
  },
  {
    name: "input_ids"
    data_type: TYPE_INT32
    dims: [ 1, -1 ]
    optional: true
  }
]

output [
  {
    name: "TEXT"
    data_type: TYPE_STRING
    dims: [ 1, -1 ]
  }
]

instance_group [
  {
    count: 1
    kind: KIND_MODEL
  }
]


================================================
FILE: qa/python_models/ground_truth/config.pbtxt
================================================
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "ground_truth"
backend: "python"
max_batch_size: 64

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]


================================================
FILE: qa/python_models/ground_truth/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import time

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def execute(self, requests):
        """
        Mock Model that uses the input data to determine how long to wait
        before returning identity data
        """
        assert len(requests) == 1
        delay = 0
        request = requests[0]
        responses = []

        delay_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
        delay_as_numpy = delay_tensor.as_numpy()
        delay = float(delay_as_numpy[0][0])

        out_tensor = pb_utils.Tensor("OUTPUT0", delay_as_numpy)
        responses.append(pb_utils.InferenceResponse([out_tensor]))

        time.sleep(delay)
        return responses


================================================
FILE: qa/python_models/identity_bf16/config.pbtxt
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

backend: "python"
max_batch_size: 64

input [
  {
    name: "INPUT0"
    data_type: TYPE_BF16
    dims: [ -1 ]
  }
]

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_BF16
    dims: [ -1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]


================================================
FILE: qa/python_models/identity_bf16/model.py
================================================
# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json

import torch
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def initialize(self, args):
        # You must parse model_config. JSON string is not parsed here
        self.model_config = json.loads(args["model_config"])

        # Get tensor configurations for testing/validation
        self.input0_config = pb_utils.get_input_config_by_name(
            self.model_config, "INPUT0"
        )
        self.output0_config = pb_utils.get_output_config_by_name(
            self.model_config, "OUTPUT0"
        )

    def validate_bf16_tensor(self, tensor, tensor_config):
        # I/O datatypes can be queried from the model config if needed
        dtype = tensor_config["data_type"]
        if dtype != "TYPE_BF16":
            raise Exception(f"Expected a BF16 tensor, but got {dtype} instead.")

        # Converting BF16 tensors to numpy is not supported, and DLPack
        # should be used instead via to_dlpack and from_dlpack.
        try:
            _ = tensor.as_numpy()
        except pb_utils.TritonModelException as e:
            expected_error = "tensor dtype is bf16 and cannot be converted to numpy"
            assert expected_error in str(e).lower()
        else:
            raise Exception("Expected BF16 conversion to numpy to fail")

    def execute(self, requests):
        """
        Identity model in Python backend with example BF16 and PyTorch usage.
        """
        responses = []
        for request in requests:
            input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")

            # Numpy does not support BF16, so use DLPack instead.
            bf16_dlpack = input_tensor.to_dlpack()

            # OPTIONAL: The tensor can be converted to other dlpack-compatible
            # frameworks like PyTorch with their dlpack utilities.
            torch_tensor = torch.utils.dlpack.from_dlpack(bf16_dlpack)

            # When complete, convert back to a pb_utils.Tensor via DLPack.
            output_tensor = pb_utils.Tensor.from_dlpack(
                "OUTPUT0", torch.utils.dlpack.to_dlpack(torch_tensor)
            )
            responses.append(pb_utils.InferenceResponse([output_tensor]))

            # NOTE: The following helper function is for testing and example
            # purposes only, you should remove this in practice.
            self.validate_bf16_tensor(input_tensor, self.input0_config)
            self.validate_bf16_tensor(output_tensor, self.output0_config)

        return responses


================================================
FILE: qa/python_models/identity_fp32/config.pbtxt
================================================
# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "identity_fp32"
backend: "python"
max_batch_size: 64

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]


================================================
FILE: qa/python_models/identity_fp32/model.py
================================================
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def execute(self, requests):
        """
        Identity model in Python backend.
        """
        responses = []
        for request in requests:
            input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            out_tensor = pb_utils.Tensor("OUTPUT0", input_tensor.as_numpy())
            responses.append(pb_utils.InferenceResponse([out_tensor]))
        return responses


================================================
FILE: qa/python_models/identity_fp32_logging/config.pbtxt
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "identity_fp32_logging"
backend: "python"
max_batch_size: 64

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]


================================================
FILE: qa/python_models/identity_fp32_logging/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def initialize(self, args):
        logger = pb_utils.Logger
        logger.log("Initialize-Specific Msg!", logger.INFO)
        logger.log_info("Initialize-Info Msg!")
        logger.log_warn("Initialize-Warning Msg!")
        logger.log_error("Initialize-Error Msg!")
        logger.log_verbose("Initialize-Verbose Msg!")

    def execute(self, requests):
        """
        Identity model in Python backend.
        """
        # Log as early as possible
        logger = pb_utils.Logger
        logger.log("Execute-Specific Msg!", logger.INFO)
        logger.log_info("Execute-Info Msg!")
        logger.log_warn("Execute-Warning Msg!")
        logger.log_error("Execute-Error Msg!")
        logger.log_verbose("Execute-Verbose Msg!")

        responses = []
        for request in requests:
            input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            out_tensor = pb_utils.Tensor("OUTPUT0", input_tensor.as_numpy())
            responses.append(pb_utils.InferenceResponse([out_tensor]))

        # Log as late as possible
        logger.log("Execute-Specific Msg!", logger.INFO)
        logger.log_info("Execute-Info Msg!")
        logger.log_warn("Execute-Warning Msg!")
        logger.log_error("Execute-Error Msg!")
        logger.log_verbose("Execute-Verbose Msg!")

        return responses

    def finalize(self):
        logger = pb_utils.Logger
        logger.log("Finalize-Specific Msg!", logger.INFO)
        logger.log_info("Finalize-Info Msg!")
        logger.log_warn("Finalize-Warning Msg!")
        logger.log_error("Finalize-Error Msg!")
        logger.log_verbose("Finalize-Verbose Msg!")


================================================
FILE: qa/python_models/identity_fp32_timeout/config.pbtxt
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "identity_fp32_timeout"
backend: "python"
max_batch_size: 64

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]

dynamic_batching {
  default_queue_policy {
    timeout_action: REJECT
    allow_timeout_override: true
    default_timeout_microseconds: 1000000
  }
}


================================================
FILE: qa/python_models/identity_fp32_timeout/model.py
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import time

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def execute(self, requests):
        """
        Identity model in Python backend.
        """
        logger = pb_utils.Logger
        responses = []
        for request in requests:
            input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            out_tensor = pb_utils.Tensor("OUTPUT0", input_tensor.as_numpy())
            logger.log_info(f"Request timeout: {request.timeout()}")
            time.sleep(5)
            responses.append(pb_utils.InferenceResponse([out_tensor]))
        return responses


================================================
FILE: qa/python_models/init_args/config.pbtxt
================================================
# Copyright (c) 2020-2024, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "init_args"
backend: "python"
max_batch_size: 64

input [
  {
    name: "IN"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

output [
  {
    name: "OUT"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]


================================================
FILE: qa/python_models/init_args/model.py
================================================
# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import sys

import numpy as np
import triton_python_backend_utils as pb_utils


def check_init_args(args):
    expected_args = {
        "model_name": "init_args",
        "model_instance_name": "init_args_0_0",
        "model_instance_kind": "CPU",
        "model_instance_device_id": "0",
        "model_version": "1",
    }
    is_win = sys.platform == "win32"
    triton_dir = os.getenv(
        "TRITON_DIR", "c:\\tritonserver" if is_win else "/opt/tritonserver"
    )
    repo_path = triton_dir + "/qa/L0_backend_python/models/init_args"
    expected_args["model_repository"] = (
        repo_path.replace("/", "\\") if is_win else repo_path
    )

    for arg in expected_args:
        if args[arg] != expected_args[arg]:
            raise pb_utils.TritonModelException(
                arg
                + ' does not contain correct value. Expected "'
                + expected_args[arg]
                + ", got "
                + args[arg]
            )


class TritonPythonModel:
    def initialize(self, args):
        self.args = args
        check_init_args(self.args)

    def execute(self, requests):
        """
        This function counts the number of keys in the
        "initialize" args argument to make sure that they are
        correct.
        """
        keys = [
            "model_config",
            "model_instance_kind",
            "model_instance_name",
            "model_instance_device_id",
            "model_repository",
            "model_version",
            "model_name",
        ]

        correct_keys = 0
        for key in keys:
            if key in list(self.args):
                correct_keys += 1

        responses = []
        for _ in requests:
            out_args = pb_utils.Tensor(
                "OUT", np.array([correct_keys], dtype=np.float32)
            )
            responses.append(pb_utils.InferenceResponse([out_args]))
        return responses


================================================
FILE: qa/python_models/init_error/config.pbtxt
================================================
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "init_error"
backend: "python"
max_batch_size: 64

input [
  {
    name: "IN"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

output [
  {
    name: "OUT"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]


================================================
FILE: qa/python_models/init_error/model.py
================================================
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def initialize(self, args):
        self.model_config = args["model_config"]
        lorem_ipsum

    def execute(self, requests):
        """
        The main purpose of this function is to check whether undefined
        variables are correctly handled in `initialize` function. The body of
        this function is never called or used.
        """
        responses = []
        for request in requests:
            input_tensor = pb_utils.get_input_tensor_by_name(request, "IN")
            out_tensor = pb_utils.Tensor("OUT", input_tensor.as_numpy())
            responses.append(pb_utils.InferenceResponse([out_tensor], error))
        return responses


================================================
FILE: qa/python_models/init_exit/config.pbtxt
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "init_exit"
backend: "python"

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]

instance_group [{ kind: KIND_CPU }]


================================================
FILE: qa/python_models/init_exit/model.py
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import signal
import sys
import time


class TritonPythonModel:
    def initialize(self, args):
        time.sleep(3)
        # Simulate the case that the model goes out of memory and gets killed
        # by the OOM killer
        # NOTE: Windows runners use python 3.8 which do not have access to SIGKILL.
        # We should remove this condition check when we upgrade the version of python.
        # Online forums suggest 'CTRL_C_EVENT' should be the equivalent event, however,
        # using this signal terminates the entire test, not just the server. SIGINT
        # seems to work in the meantime.
        if sys.platform == "win32":
            os.kill(os.getpid(), signal.SIGINT)
        else:
            os.kill(os.getpid(), signal.SIGKILL)

    def execute(self, requests):
        pass


================================================
FILE: qa/python_models/iterative_sequence/config.pbtxt
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "iterative_sequence"
backend: "python"
max_batch_size: 0
model_transaction_policy {
  decoupled: True
}
input [
  {
    name: "IN"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
sequence_batching {
  iterative_sequence : true
}

instance_group [{ kind: KIND_CPU }]


================================================
FILE: qa/python_models/iterative_sequence/model.py
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    """
    This model takes 1 input tensor, an INT32 [ 1 ] input named "IN", and
    produces an output tensor "OUT" with the same shape as the input tensor.
    The input value indicates the total number of responses to be generated and
    the output value indicates the number of remaining responses. For example,
    if the request input has value 2, the model will:
        - Send a response with value 1.
        - Release request with RESCHEDULE flag.
        - When execute on the same request, send the last response with value 0.
        - Release request with ALL flag.
    """

    def initialize(self, args):
        self.model_config = model_config = json.loads(args["model_config"])

        using_decoupled = pb_utils.using_decoupled_model_transaction_policy(
            model_config
        )
        if not using_decoupled:
            raise pb_utils.TritonModelException(
                """the model `{}` can generate any number of responses per request,
                enable decoupled transaction policy in model configuration to
                serve this model""".format(
                    args["model_name"]
                )
            )

        # Get IN configuration
        in_config = pb_utils.get_input_config_by_name(model_config, "IN")

        # Validate the shape and data type of IN
        in_shape = in_config["dims"]
        if (len(in_shape) != 1) or (in_shape[0] != 1):
            raise pb_utils.TritonModelException(
                """the model `{}` requires the shape of 'IN' to be
                [1], got {}""".format(
                    args["model_name"], in_shape
                )
            )
        if in_config["data_type"] != "TYPE_INT32":
            raise pb_utils.TritonModelException(
                """the model `{}` requires the data_type of 'IN' to be
                'TYPE_INT32', got {}""".format(
                    args["model_name"], in_config["data_type"]
                )
            )

        # Get OUT configuration
        out_config = pb_utils.get_output_config_by_name(model_config, "OUT")

        # Validate the shape and data type of OUT
        out_shape = out_config["dims"]
        if (len(out_shape) != 1) or (out_shape[0] != 1):
            raise pb_utils.TritonModelException(
                """the model `{}` requires the shape of 'OUT' to be
                [1], got {}""".format(
                    args["model_name"], out_shape
                )
            )
        if out_config["data_type"] != "TYPE_INT32":
            raise pb_utils.TritonModelException(
                """the model `{}` requires the data_type of 'OUT' to be
                'TYPE_INT32', got {}""".format(
                    args["model_name"], out_config["data_type"]
                )
            )

        self.remaining_response = 0
        self.reset_flag = True

    def execute(self, requests):
        for request in requests:
            in_input = pb_utils.get_input_tensor_by_name(request, "IN").as_numpy()

            if self.reset_flag:
                self.remaining_response = in_input[0]
                self.reset_flag = False

            response_sender = request.get_response_sender()

            self.remaining_response -= 1

            out_output = pb_utils.Tensor(
                "OUT", np.array([self.remaining_response], np.int32)
            )
            response = pb_utils.InferenceResponse(output_tensors=[out_output])

            if self.remaining_response <= 0:
                response_sender.send(
                    response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
                )
            else:
                request.set_release_flags(
                    pb_utils.TRITONSERVER_REQUEST_RELEASE_RESCHEDULE
                )
                response_sender.send(response)

        return None


================================================
FILE: qa/python_models/model_env/config.pbtxt
================================================
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "model_env"
backend: "python"
max_batch_size: 64

input [
  {
    name: "IN"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

output [
  {
    name: "OUT"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]


================================================
FILE: qa/python_models/model_env/model.py
================================================
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def initialize(self, args):
        # Make sure that environment variables are correctly propagated
        # to the Python models
        if "MY_ENV" not in os.environ or os.environ["MY_ENV"] != "MY_ENV":
            raise pb_utils.TritonModelException(
                "MY_ENV doesn't exists or contains incorrect value"
            )

    def execute(self, requests):
        pass


================================================
FILE: qa/python_models/model_init_del/config.pbtxt
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "model_init_del"
backend: "python"
max_batch_size: 0

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

instance_group [
  {
    count: 1
    kind: KIND_CPU
  }
]  # end instance_group


================================================
FILE: qa/python_models/model_init_del/model.py
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import sys
import time

import triton_python_backend_utils as pb_utils

sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
from util import get_delay, inc_count


class TritonPythonModel:
    def initialize(self, args):
        inc_count("initialize")
        self._sleep("initialize")

    def execute(self, requests):
        responses = []
        for request in requests:
            input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            out_tensor = pb_utils.Tensor("OUTPUT0", input_tensor.as_numpy())
            responses.append(pb_utils.InferenceResponse([out_tensor]))
        self._sleep("infer")
        return responses

    def finalize(self):
        inc_count("finalize")

    def _sleep(self, kind):
        delay = get_delay(kind)
        if delay > 0:
            time.sleep(delay)


================================================
FILE: qa/python_models/model_init_del/util.py
================================================
#!/usr/bin/env python3

# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import fcntl
import os

_model_name = "model_init_del"

#
# Helper functions for reading/writing state to disk
#


def _get_number(filename):
    full_path = os.path.join(os.environ["MODEL_LOG_DIR"], filename)
    try:
        with open(full_path, mode="r", encoding="utf-8", errors="strict") as f:
            fcntl.lockf(f, fcntl.LOCK_SH)
            txt = f.read()
    except FileNotFoundError:
        txt = "0"
    return int(txt)


def _store_number(filename, number):
    full_path = os.path.join(os.environ["MODEL_LOG_DIR"], filename)
    txt = str(number)
    with open(full_path, mode="w", encoding="utf-8", errors="strict") as f:
        fcntl.lockf(f, fcntl.LOCK_EX)
        f.write(txt)


def _inc_number(filename):
    full_path = os.path.join(os.environ["MODEL_LOG_DIR"], filename)
    try:
        with open(full_path, mode="r+", encoding="utf-8", errors="strict") as f:
            fcntl.lockf(f, fcntl.LOCK_EX)
            txt = f.read()
            number = int(txt) + 1
            txt = str(number)
            f.truncate(0)
            f.seek(0)
            f.write(txt)
    except FileNotFoundError:
        number = 1
        _store_number(filename, number)
    return number


#
# Functions for communicating initialize and finalize count between the model
# and test
#


def _get_count_filename(kind):
    if kind != "initialize" and kind != "finalize":
        raise KeyError("Invalid count kind: " + str(kind))
    filename = _model_name + "_" + kind + "_count.txt"
    return filename


def get_count(kind):
    return _get_number(_get_count_filename(kind))


def inc_count(kind):
    return _inc_number(_get_count_filename(kind))


def reset_count(kind):
    count = 0
    _store_number(_get_count_filename(kind), count)
    return count


#
# Functions for communicating varies of delay (in seconds) to the model
#


def _get_delay_filename(kind):
    if kind != "initialize" and kind != "infer":
        raise KeyError("Invalid delay kind: " + str(kind))
    filename = _model_name + "_" + kind + "_delay.txt"
    return filename


def get_delay(kind):
    return _get_number(_get_delay_filename(kind))


def set_delay(kind, delay):
    _store_number(_get_delay_filename(kind), delay)
    return delay


#
# Functions for modifying the model
#


def update_instance_group(instance_group_str):
    full_path = os.path.join(os.path.dirname(__file__), "config.pbtxt")
    with open(full_path, mode="r+", encoding="utf-8", errors="strict") as f:
        txt = f.read()
        txt, post_match = txt.split("instance_group [")
        txt += "instance_group [\n"
        txt += instance_group_str
        txt += "\n]  # end instance_group\n"
        txt += post_match.split("\n]  # end instance_group\n")[1]
        f.truncate(0)
        f.seek(0)
        f.write(txt)
    return txt


def update_sequence_batching(sequence_batching_str):
    full_path = os.path.join(os.path.dirname(__file__), "config.pbtxt")
    with open(full_path, mode="r+", encoding="utf-8", errors="strict") as f:
        txt = f.read()
        if "sequence_batching {" in txt:
            txt, post_match = txt.split("sequence_batching {")
            if sequence_batching_str != "":
                txt += "sequence_batching {\n"
                txt += sequence_batching_str
                txt += "\n}  # end sequence_batching\n"
            txt += post_match.split("\n}  # end sequence_batching\n")[1]
        elif sequence_batching_str != "":
            txt += "\nsequence_batching {\n"
            txt += sequence_batching_str
            txt += "\n}  # end sequence_batching\n"
        f.truncate(0)
        f.seek(0)
        f.write(txt)
    return txt


def update_model_file():
    full_path = os.path.join(os.path.dirname(__file__), "1", "model.py")
    with open(full_path, mode="a", encoding="utf-8", errors="strict") as f:
        f.write("\n# dummy model file update\n")


def enable_batching():
    full_path = os.path.join(os.path.dirname(__file__), "config.pbtxt")
    with open(full_path, mode="r+", encoding="utf-8", errors="strict") as f:
        txt = f.read()
        txt = txt.replace("max_batch_size: 0", "max_batch_size: 2")
        f.truncate(0)
        f.seek(0)
        f.write(txt)
    return txt


def disable_batching():
    full_path = os.path.join(os.path.dirname(__file__), "config.pbtxt")
    with open(full_path, mode="r+", encoding="utf-8", errors="strict") as f:
        txt = f.read()
        txt = txt.replace("max_batch_size: 2", "max_batch_size: 0")
        f.truncate(0)
        f.seek(0)
        f.write(txt)
    return txt


================================================
FILE: qa/python_models/multi_file/file1.py
================================================
#!/usr/bin/env python3

# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

FILE_NAME = "FILE1"


================================================
FILE: qa/python_models/multi_file/file2.py
================================================
#!/usr/bin/env python3

# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

FILE_NAME = "FILE2"


================================================
FILE: qa/python_models/multi_file/model.py
================================================
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import file1
import triton_python_backend_utils as pb_utils

from . import file2


class TritonPythonModel:
    def initialize(self, args):
        if file1.FILE_NAME != "FILE1" or file2.FILE_NAME != "FILE2":
            raise pb_utils.TritonModelException("Imports do not work")

    def execute(self, requests):
        pass


================================================
FILE: qa/python_models/non_contiguous/config.pbtxt
================================================
# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "non_contiguous"
backend: "python"

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ -1, -1, -1, -1, -1 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ -1, -1, -1, -1, -1 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ -1, -1, -1, -1, -1 ]
  },
  {
    name: "OUTPUT2"
    data_type: TYPE_FP32
    dims: [ -1, -1, -1, -1, -1 ]
  }
]

instance_group [{ kind: KIND_CPU }]


================================================
FILE: qa/python_models/non_contiguous/model.py
================================================
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def execute(self, requests):
        responses = []
        new_shape = [10, 2, 6, 5, 11]
        shape_reorder = [1, 0, 4, 2, 3]
        for request in requests:
            input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            input_numpy = input_tensor.as_numpy()
            output0 = pb_utils.Tensor("OUTPUT0", input_numpy.reshape(new_shape))
            # Transpose the tensor to create a non-contiguous tensor.
            output1 = pb_utils.Tensor("OUTPUT1", input_numpy.T)
            output2 = pb_utils.Tensor(
                "OUTPUT2", np.transpose(input_numpy, shape_reorder)
            )
            responses.append(pb_utils.InferenceResponse([output0, output1, output2]))
        return responses


================================================
FILE: qa/python_models/optional/config.pbtxt
================================================
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "optional"
backend: "python"
max_batch_size: 0
input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 1 ]
    optional: true
  },
  {
    name: "INPUT1"
    data_type: TYPE_INT32
    dims: [ 1 ]
    optional: true
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 1 ]
  },
  {
    name: "OUTPUT1"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]


================================================
FILE: qa/python_models/optional/model.py
================================================
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def execute(self, requests):
        """Model supporting optional inputs. If the input is not provided, an
        input tensor of size 1 containing scalar 5 will be used."""
        responses = []
        for request in requests:
            input0_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            input1_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT1")

            if input0_tensor is not None:
                input0_numpy = input0_tensor.as_numpy()
            else:
                input0_numpy = np.array([5], dtype=np.int32)

            if input1_tensor is not None:
                input1_numpy = input1_tensor.as_numpy()
            else:
                input1_numpy = np.array([5], dtype=np.int32)

            output0_tensor = pb_utils.Tensor("OUTPUT0", input0_numpy + input1_numpy)
            output1_tensor = pb_utils.Tensor("OUTPUT1", input0_numpy - input1_numpy)
            responses.append(
                pb_utils.InferenceResponse([output0_tensor, output1_tensor])
            )

        return responses


================================================
FILE: qa/python_models/python_based_backends/add_sub_backend/model.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json
import os

import triton_python_backend_utils as pb_utils

_ADD_SUB_ARGS_FILENAME = "model.json"


class TritonPythonModel:
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        """This function is called only once when loading the model assuming
        the server was not started with `--disable-auto-complete-config`.

        Parameters
        ----------
        auto_complete_model_config : pb_utils.ModelConfig
          An object containing the existing model configuration.

        Returns
        -------
        pb_utils.ModelConfig
          An object containing the auto-completed model configuration
        """
        inputs = [
            {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]},
            {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]},
        ]
        outputs = [{"name": "OUTPUT", "data_type": "TYPE_FP32", "dims": [4]}]

        config = auto_complete_model_config.as_dict()
        input_names = []
        output_names = []

        for input in config["input"]:
            input_names.append(input["name"])

        for output in config["output"]:
            output_names.append(output["name"])

        for input in inputs:
            if input["name"] not in input_names:
                auto_complete_model_config.add_input(input)

        for output in outputs:
            if output["name"] not in output_names:
                auto_complete_model_config.add_output(output)

        return auto_complete_model_config

    def initialize(self, args):
        """This function allows the model to initialize any state associated with this model.

        Parameters
        ----------
        args : dict
          Both keys and values are strings. The dictionary keys and values are:
          * model_config: A JSON string containing the model configuration
          * model_instance_kind: A string containing model instance kind
          * model_instance_device_id: A string containing model instance device ID
          * model_repository: Model repository path
          * model_version: Model version
          * model_name: Model name
        """

        self.model_config = model_config = json.loads(args["model_config"])

        # Get OUTPUT configuration
        output_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT")

        engine_args_filepath = os.path.join(
            pb_utils.get_model_dir(), _ADD_SUB_ARGS_FILENAME
        )
        assert os.path.isfile(
            engine_args_filepath
        ), f"'{_ADD_SUB_ARGS_FILENAME}' containing add sub model args must be provided in '{pb_utils.get_model_dir()}'"

        with open(engine_args_filepath) as file:
            self.add_sub_config = json.load(file)

        assert (
            "operation" in self.add_sub_config
        ), f"Missing required key 'operation' in {_ADD_SUB_ARGS_FILENAME}"

        extra_keys = set(self.add_sub_config.keys()) - {"operation"}
        assert (
            not extra_keys
        ), f"Unsupported keys are provided in {_ADD_SUB_ARGS_FILENAME}: {', '.join(extra_keys)}"

        assert self.add_sub_config["operation"] in [
            "add",
            "sub",
        ], f"'operation' value must be 'add' or 'sub' in {_ADD_SUB_ARGS_FILENAME}"

        # Convert Triton types to numpy types
        self.output_dtype = pb_utils.triton_string_to_numpy(output_config["data_type"])

    def execute(self, requests):
        """This function is called when an inference request is made
        for this model.

        Parameters
        ----------
        requests : list
          A list of pb_utils.InferenceRequest

        Returns
        -------
        list
          A list of pb_utils.InferenceResponse. The length of this list must
          be the same as `requests`
        """

        responses = []

        for request in requests:
            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")

            if self.add_sub_config["operation"] == "add":
                out = in_0.as_numpy() + in_1.as_numpy()
            else:
                out = in_0.as_numpy() - in_1.as_numpy()

            # Create output tensors.
            out_tensor = pb_utils.Tensor("OUTPUT", out.astype(self.output_dtype))

            # Create InferenceResponse.
            inference_response = pb_utils.InferenceResponse(output_tensors=[out_tensor])
            responses.append(inference_response)

        return responses

    def finalize(self):
        """`finalize` is called only once when the model is being unloaded."""
        print("Cleaning up...")


================================================
FILE: qa/python_models/python_version/config.pbtxt
================================================
# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "python_version"

instance_group [{ kind: KIND_CPU }]


================================================
FILE: qa/python_models/python_version/model.py
================================================
# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import locale
import os
import sys

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        input = {"name": "INPUT", "data_type": "TYPE_FP32", "dims": [1]}
        output = {"name": "OUTPUT", "data_type": "TYPE_FP32", "dims": [1]}

        auto_complete_model_config.set_max_batch_size(0)
        auto_complete_model_config.add_input(input)
        auto_complete_model_config.add_output(output)

        return auto_complete_model_config

    def initialize(self, args):
        import torch

        self.model_config = args["model_config"]
        # This is to make sure that /bin/bash is not picking up
        # the wrong shared libraries after installing PyTorch.
        os.system("/bin/bash --help")
        print(
            f"Python version is {sys.version_info.major}.{sys.version_info.minor}, NumPy version is {np.version.version}, and PyTorch version is {torch.__version__}",
            flush=True,
        )
        print(f"Locale is {locale.getlocale()}", flush=True)

    def execute(self, requests):
        """This function is called on inference request."""
        responses = []
        for request in requests:
            input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            out_tensor = pb_utils.Tensor("OUTPUT0", input_tensor.as_numpy())
            responses.append(pb_utils.InferenceResponse([out_tensor]))
        return responses


================================================
FILE: qa/python_models/pytorch_fp32_fp32/config.pbtxt
================================================
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "pytorch_model"
backend: "python"

input [
  {
    name: "IN"
    data_type: TYPE_FP32
    dims: [ 1, 1, 28, 28 ]
  }
]

output [
  {
    name: "OUT"
    data_type: TYPE_FP32
    dims: [ 1, 10 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]


================================================
FILE: qa/python_models/pytorch_fp32_fp32/model.py
================================================
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import triton_python_backend_utils as pb_utils


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output


class TritonPythonModel:
    def initialize(self, args):
        torch.manual_seed(0)
        self.model = Net()
        self.model.eval()

    def execute(self, requests):
        responses = []
        for request in requests:
            input_tensor = pb_utils.get_input_tensor_by_name(request, "IN")
            # This tensor is read-only, we need to make a copy
            input_data_ro = input_tensor.as_numpy()
            input_data = np.array(input_data_ro)
            result = self.model(torch.tensor(input_data))

            out_tensor = pb_utils.Tensor("OUT", result.detach().numpy())
            responses.append(pb_utils.InferenceResponse([out_tensor]))
        return responses


================================================
FILE: qa/python_models/request_rescheduling_addsub/config.pbtxt
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "request_rescheduling_addsub"
backend: "python"

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
input [
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
sequence_batching {
  iterative_sequence : true
}
instance_group [{ kind: KIND_CPU }]


================================================
FILE: qa/python_models/request_rescheduling_addsub/model.py
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def initialize(self, args):
        self.model_config = model_config = json.loads(args["model_config"])

        output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")
        output1_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT1")

        self.output0_dtype = pb_utils.triton_string_to_numpy(
            output0_config["data_type"]
        )
        self.output1_dtype = pb_utils.triton_string_to_numpy(
            output1_config["data_type"]
        )

        self.idx = 0

    def execute(self, requests):
        """This function is called on inference request."""

        output0_dtype = self.output0_dtype
        output1_dtype = self.output1_dtype

        responses = []

        for request in requests:
            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")

            out_0, out_1 = (
                in_0.as_numpy() + in_1.as_numpy(),
                in_0.as_numpy() - in_1.as_numpy(),
            )

            out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(output0_dtype))
            out_tensor_1 = pb_utils.Tensor("OUTPUT1", out_1.astype(output1_dtype))

            inference_response = pb_utils.InferenceResponse(
                output_tensors=[out_tensor_0, out_tensor_1]
            )

            # Explicitly reschedule the first request
            if self.idx == 0:
                request.set_release_flags(
                    pb_utils.TRITONSERVER_REQUEST_RELEASE_RESCHEDULE
                )
                responses.append(None)
                self.idx += 1
            else:
                responses.append(inference_response)

        return responses


================================================
FILE: qa/python_models/response_parameters/config.pbtxt
================================================
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "response_parameters"
backend: "python"
max_batch_size: 8

input [
  {
    name: "RESPONSE_PARAMETERS"
    data_type: TYPE_STRING
    dims: [ 1 ]
  }
]

output [
  {
    name: "OUTPUT"
    data_type: TYPE_STRING
    dims: [ 1 ]
  }
]

instance_group [
  {
    count: 1
    kind: KIND_CPU
  }
]


================================================
FILE: qa/python_models/response_parameters/model.py
================================================
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def execute(self, requests):
        responses = []

        for request in requests:
            res_params_tensor = pb_utils.get_input_tensor_by_name(
                request, "RESPONSE_PARAMETERS"
            ).as_numpy()
            res_params_str = str(res_params_tensor[0][0], encoding="utf-8")
            output_tensor = pb_utils.Tensor(
                "OUTPUT", np.array([[res_params_str]], dtype=np.object_)
            )
            try:
                res_params = json.loads(res_params_str)
                # convert all digit keys to int, for testing non-str key types
                if isinstance(res_params, dict):
                    res_params_new = {}
                    for key, value in res_params.items():
                        if isinstance(key, str) and key.isdigit():
                            key = int(key)
                        res_params_new[key] = value
                    res_params = res_params_new

                response = pb_utils.InferenceResponse(
                    output_tensors=[output_tensor], parameters=res_params
                )

                res_params_set = {}
                if response.parameters() != "":
                    res_params_set = json.loads(response.parameters())
                if res_params_set != res_params:
                    raise Exception("Response parameters set differ from provided")
            except Exception as e:
                error = pb_utils.TritonError(
                    message=str(e), code=pb_utils.TritonError.INVALID_ARG
                )
                response = pb_utils.InferenceResponse(error=error)

            responses.append(response)

        return responses


================================================
FILE: qa/python_models/response_parameters_bls/config.pbtxt
================================================
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "response_parameters_bls"
backend: "python"
max_batch_size: 8

input [
  {
    name: "RESPONSE_PARAMETERS"
    data_type: TYPE_STRING
    dims: [ 1 ]
  },
  {
    name: "RESPONSE_PARAMETERS_DECOUPLED"
    data_type: TYPE_STRING
    dims: [ 1 ]
  }
]

output [
  {
    name: "OUTPUT"
    data_type: TYPE_STRING
    dims: [ 1 ]
  }
]

instance_group [
  {
    count: 1
    kind: KIND_CPU
  }
]


================================================
FILE: qa/python_models/response_parameters_bls/model.py
================================================
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    """
    This model (A) is designed to test sending back response parameters when using BLS.
    It takes one input tensor, which is the RESPONSE_PARAMETERS and uses BLS to
    call response_parameters model (B). Model B would set RESPONSE_PARAMETERS (with a bit
    of data massage) as its response parameters. In the end, model A would also set its
    response parameters from model B's response parameters.

    With above model set up, we can easily test whether the real response parameters are
    the same as the input response parameters.
    """

    def execute(self, requests):
        responses = []

        for request in requests:
            passed = True

            # test bls response parameters from a regular model
            res_params_tensor = pb_utils.get_input_tensor_by_name(
                request, "RESPONSE_PARAMETERS"
            ).as_numpy()
            res_params_str = str(res_params_tensor[0][0], encoding="utf-8")
            res_params = json.loads(res_params_str)
            bls_input_tensor = pb_utils.Tensor("RESPONSE_PARAMETERS", res_params_tensor)
            bls_req = pb_utils.InferenceRequest(
                model_name="response_parameters",
                inputs=[bls_input_tensor],
                requested_output_names=["OUTPUT"],
            )
            bls_res = bls_req.exec()  # decoupled=False
            bls_res_params_str = bls_res.parameters()
            bls_res_params = (
                json.loads(bls_res_params_str) if bls_res_params_str != "" else {}
            )
            passed = passed and bls_res_params == res_params

            # test bls response parameters from a decoupled model
            res_params_decoupled_tensor = pb_utils.get_input_tensor_by_name(
                request, "RESPONSE_PARAMETERS_DECOUPLED"
            ).as_numpy()
            res_params_decoupled_str = str(
                res_params_decoupled_tensor[0][0], encoding="utf-8"
            )
            res_params_decoupled = json.loads(res_params_decoupled_str)
            bls_decoupled_input_tensor = pb_utils.Tensor(
                "RESPONSE_PARAMETERS", res_params_decoupled_tensor
            )  # response_parameters_decoupled model input name is RESPONSE_PARAMETERS
            bls_decoupled_req = pb_utils.InferenceRequest(
                model_name="response_parameters_decoupled",
                inputs=[bls_decoupled_input_tensor],
                requested_output_names=["OUTPUT"],
            )
            bls_decoupled_res = bls_decoupled_req.exec(decoupled=True)
            for bls_decoupled_r in bls_decoupled_res:
                if len(bls_decoupled_r.output_tensors()) == 0:
                    break  # meaning reached final response
                bls_decoupled_r_params_str = bls_decoupled_r.parameters()
                bls_decoupled_r_params = (
                    json.loads(bls_decoupled_r_params_str)
                    if bls_decoupled_r_params_str != ""
                    else {}
                )
                passed = passed and bls_decoupled_r_params in res_params_decoupled
                res_params_decoupled.remove(bls_decoupled_r_params)
            passed = passed and len(res_params_decoupled) == 0

            output_tensor = pb_utils.Tensor(
                "OUTPUT", np.array([[str(passed)]], dtype=np.object_)
            )
            response = pb_utils.InferenceResponse(output_tensors=[output_tensor])
            responses.append(response)

        return responses


================================================
FILE: qa/python_models/response_parameters_decoupled/config.pbtxt
================================================
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "response_parameters_decoupled"
backend: "python"
max_batch_size: 8

input [
  {
    name: "RESPONSE_PARAMETERS"
    data_type: TYPE_STRING
    dims: [ 1 ]
  }
]

output [
  {
    name: "OUTPUT"
    data_type: TYPE_STRING
    dims: [ 1 ]
  }
]

instance_group [
  {
    count: 1
    kind: KIND_CPU
  }
]

model_transaction_policy {
  decoupled: True
}


================================================
FILE: qa/python_models/response_parameters_decoupled/model.py
================================================
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def execute(self, requests):
        for request in requests:
            res_params_tensor = pb_utils.get_input_tensor_by_name(
                request, "RESPONSE_PARAMETERS"
            ).as_numpy()
            res_params_str = str(res_params_tensor[0][0], encoding="utf-8")
            response_sender = request.get_response_sender()
            try:
                res_params = json.loads(res_params_str)
                for r_params in res_params:
                    output_tensor = pb_utils.Tensor(
                        "OUTPUT", np.array([[json.dumps(r_params)]], dtype=np.object_)
                    )
                    response = pb_utils.InferenceResponse(
                        output_tensors=[output_tensor], parameters=r_params
                    )

                    r_params_set = {}
                    if response.parameters() != "":
                        r_params_set = json.loads(response.parameters())
                    if r_params_set != r_params:
                        raise Exception("Response parameters set differ from provided")

                    response_sender.send(response)
            except Exception as e:
                error = pb_utils.TritonError(
                    message=str(e), code=pb_utils.TritonError.INVALID_ARG
                )
                response = pb_utils.InferenceResponse(error=error)
                response_sender.send(response)

            response_sender.send(flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)

        return None


================================================
FILE: qa/python_models/response_sender/config.pbtxt
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

backend: "python"
max_batch_size: 8

input [
  {
    name: "NUMBER_OF_RESPONSE_BEFORE_RETURN"
    data_type: TYPE_UINT8
    dims: [ 1 ]
  },
  {
    name: "SEND_COMPLETE_FINAL_FLAG_BEFORE_RETURN"
    data_type: TYPE_BOOL
    dims: [ 1 ]
  },
  {
    name: "RETURN_A_RESPONSE"
    data_type: TYPE_BOOL
    dims: [ 1 ]
  },
  {
    name: "NUMBER_OF_RESPONSE_AFTER_RETURN"
    data_type: TYPE_UINT8
    dims: [ 1 ]
  },
  {
    name: "SEND_COMPLETE_FINAL_FLAG_AFTER_RETURN"
    data_type: TYPE_BOOL
    dims: [ 1 ]
  }
]
output [
  {
    name: "INDEX"
    data_type: TYPE_UINT16
    dims: [ 1 ]
  }
]

instance_group [{ kind: KIND_CPU }]


================================================
FILE: qa/python_models/response_sender/model.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


import triton_python_backend_utils as pb_utils
from model_common import ResponseSenderModelCommon


class TritonPythonModel:
    def initialize(self, args):
        self._common = ResponseSenderModelCommon(pb_utils)

    def execute(self, requests):
        return self._common.execute(requests, use_async=False)


================================================
FILE: qa/python_models/response_sender/model_async.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


import triton_python_backend_utils as pb_utils
from model_common import ResponseSenderModelCommon


class TritonPythonModel:
    def initialize(self, args):
        self._common = ResponseSenderModelCommon(pb_utils)

    async def execute(self, requests):
        return self._common.execute(requests, use_async=True)


================================================
FILE: qa/python_models/response_sender/model_common.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import asyncio
import threading
import time

import numpy as np


class ResponseSenderModelCommon:
    def __init__(self, pb_utils):
        self._pb_utils = pb_utils
        self._background_tasks = set()

    def _get_instructions_from_request(self, request):
        """
        Determine the execution instructions from the inputs. This test tries to examine
        all the corner cases with using response sender.

        Assumptions: The request batch size can be larger than one.

        There are 5 inputs in the model that control the model behavior:
          * NUMBER_OF_RESPONSE_BEFORE_RETURN (UINT8):
              Determines the number of responses before returning from execute function.
          * SEND_COMPLETE_FINAL_FLAG_BEFORE_RETURN (BOOL):
              Determines whether the final flag will be sent before return.
          * RETURN_A_RESPONSE (BOOL):
              Return the response when the model is returning from `execute` function.
          * NUMBER_OF_RESPONSE_AFTER_RETURN (UINT8):
              Determines the number of responses after return.
          * SEND_COMPLETE_FINAL_FLAG_AFTER_RETURN (BOOL):
              Determines whether the final flag will be sent after return.

        Note:
          * If the batch size of a request is larger than one, the sum of the values in
            the batch will be used for determining the value of each input of the
            request.
          * The response_id is used to determine the difference between responses sent
            during execute, when execute returns, or after execute returns.
        """
        instr = {}
        return_a_response_np = self._pb_utils.get_input_tensor_by_name(
            request, "RETURN_A_RESPONSE"
        ).as_numpy()
        instr["batch_size"] = return_a_response_np.shape[0]
        instr["return_a_response"] = bool(return_a_response_np.sum())
        instr["number_of_pre_return_response"] = (
            self._pb_utils.get_input_tensor_by_name(
                request, "NUMBER_OF_RESPONSE_BEFORE_RETURN"
            )
            .as_numpy()
            .sum()
        )
        instr["number_of_post_return_response"] = (
            self._pb_utils.get_input_tensor_by_name(
                request, "NUMBER_OF_RESPONSE_AFTER_RETURN"
            )
            .as_numpy()
            .sum()
        )
        instr["send_complete_final_flag_pre_return"] = bool(
            self._pb_utils.get_input_tensor_by_name(
                request, "SEND_COMPLETE_FINAL_FLAG_BEFORE_RETURN"
            )
            .as_numpy()
            .sum()
        )
        instr["send_complete_final_flag_post_return"] = bool(
            self._pb_utils.get_input_tensor_by_name(
                request, "SEND_COMPLETE_FINAL_FLAG_AFTER_RETURN"
            )
            .as_numpy()
            .sum()
        )
        return instr

    def _is_response_sender_needed(self, instr):
        return (
            instr["number_of_pre_return_response"] > 0
            or instr["number_of_post_return_response"] > 0
            or instr["send_complete_final_flag_pre_return"]
            or instr["send_complete_final_flag_post_return"]
        )

    def _create_response(self, batch_size, response_id):
        output_tensor = self._pb_utils.Tensor(
            "INDEX", np.array([[response_id] for _ in range(batch_size)], np.uint16)
        )
        response = self._pb_utils.InferenceResponse(output_tensors=[output_tensor])
        return response

    def _send_responses(self, processed_requests, response_id_offset):
        for request in processed_requests:
            number_of_response = request["number_of_response"]
            batch_size = request["batch_size"]
            response_sender = request["response_sender"]
            send_complete_final_flag = request["send_complete_final_flag"]
            for response_id in range(number_of_response):
                response_sender.send(
                    self._create_response(
                        batch_size, response_id=(response_id_offset + response_id)
                    )
                )
            if send_complete_final_flag:
                response_sender.send(
                    flags=self._pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
                )

    def _send_responses_delayed_threaded(self, processed_requests, response_id_offset):
        def response_thread(send_responses, processed_requests, response_id_offset):
            time.sleep(0.5)  # response after requests are released
            send_responses(processed_requests, response_id_offset)

        thread = threading.Thread(
            target=response_thread,
            args=(self._send_responses, processed_requests, response_id_offset),
        )
        thread.daemon = True
        thread.start()

    def _send_responses_delayed_async(self, processed_requests, response_id_offset):
        async def response_async(
            send_responses, processed_requests, response_id_offset
        ):
            await asyncio.sleep(0.5)  # response after requests are released
            send_responses(processed_requests, response_id_offset)

        coro = response_async(
            self._send_responses, processed_requests, response_id_offset
        )
        task = asyncio.create_task(coro)
        self._background_tasks.add(task)
        task.add_done_callback(self._background_tasks.discard)

    def execute(self, requests, use_async):
        pre_return_processed_requests = []
        return_responses = []
        post_return_processed_requests = []

        for request in requests:
            instr = self._get_instructions_from_request(request)

            response_sender = None
            if self._is_response_sender_needed(instr):
                response_sender = request.get_response_sender()

            pre_return_processed_requests.append(
                {
                    "number_of_response": instr["number_of_pre_return_response"],
                    "batch_size": instr["batch_size"],
                    "response_sender": response_sender,
                    "send_complete_final_flag": instr[
                        "send_complete_final_flag_pre_return"
                    ],
                }
            )
            post_return_processed_requests.append(
                {
                    "number_of_response": instr["number_of_post_return_response"],
                    "batch_size": instr["batch_size"],
                    "response_sender": response_sender,
                    "send_complete_final_flag": instr[
                        "send_complete_final_flag_post_return"
                    ],
                }
            )

            response = None
            if instr["return_a_response"]:
                response = self._create_response(instr["batch_size"], response_id=0)
            return_responses.append(response)

        self._send_responses(pre_return_processed_requests, response_id_offset=1000)

        if use_async:
            self._send_responses_delayed_async(
                post_return_processed_requests, response_id_offset=2000
            )
        else:
            self._send_responses_delayed_threaded(
                post_return_processed_requests, response_id_offset=2000
            )

        if return_responses == [None for _ in requests]:
            return None
        return return_responses


================================================
FILE: qa/python_models/response_sender_complete_final/config.pbtxt
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

backend: "python"
max_batch_size: 8

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

instance_group [{ kind: KIND_CPU }]
model_transaction_policy { decoupled: True }


================================================
FILE: qa/python_models/response_sender_complete_final/model.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def execute(self, requests):
        # Expect exactly one request per execute() call.
        if len(requests) != 1:
            pb_utils.Logger.log_error(f"Unexpected request length: {len(requests)}")
            raise Exception("Test FAILED")

        # Send a response with complete final flag, and then send another response and
        # and assert an exception is raised, for all requests.
        for request in requests:
            in_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            out_tensor = pb_utils.Tensor("OUTPUT0", in_tensor.as_numpy())
            response = pb_utils.InferenceResponse([out_tensor])
            response_sender = request.get_response_sender()
            response_sender.send(
                response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
            )
            test_passed = False
            try:
                response_sender.send(response)
            except Exception as e:
                pb_utils.Logger.log_info(f"Raised exception: {e}")
                if (
                    str(e)
                    == "Unable to send response. Response sender has been closed."
                ):
                    test_passed = True
            finally:
                if not test_passed:
                    pb_utils.Logger.log_error("Expected exception not raised")
                    raise Exception("Test FAILED")
            pb_utils.Logger.log_info("Test Passed")
        return None


================================================
FILE: qa/python_models/response_sender_error/config.pbtxt
================================================
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "response_sender_error"
backend: "python"

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
input [
  {
    name: "INPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]
output [
  {
    name: "OUTPUT1"
    data_type: TYPE_FP32
    dims: [ 16 ]
  }
]

instance_group [{ kind: KIND_CPU }]


================================================
FILE: qa/python_models/response_sender_error/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    """This model tries to create a response sender in
    a model that is not configured with decoupled
    model transaction policy.
    """

    def initialize(self, args):
        self.model_config = model_config = json.loads(args["model_config"])

        output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")
        output1_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT1")

        self.output0_dtype = pb_utils.triton_string_to_numpy(
            output0_config["data_type"]
        )
        self.output1_dtype = pb_utils.triton_string_to_numpy(
            output1_config["data_type"]
        )

    def execute(self, requests):
        """Tries to create a response sender object and use that
        for sending the response.
        """

        output0_dtype = self.output0_dtype
        output1_dtype = self.output1_dtype

        for request in requests:
            response_sender = request.get_response_sender()
            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
            out_0, out_1 = (
                in_0.as_numpy() + in_1.as_numpy(),
                in_0.as_numpy() - in_1.as_numpy(),
            )

            out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(output0_dtype))
            out_tensor_1 = pb_utils.Tensor("OUTPUT1", out_1.astype(output1_dtype))
            response_sender.send(
                pb_utils.InferenceResponse([out_tensor_0, out_tensor_1])
            )
            response_sender.close()

        return None


================================================
FILE: qa/python_models/response_sender_until_cancelled/config.pbtxt
================================================
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "response_sender_until_cancelled"
backend: "python"
model_transaction_policy {
  decoupled: True
}

input [
  {
    name: "MAX_RESPONSE_COUNT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  },
  {
    name: "DELAY"
    data_type: TYPE_INT32
    dims: [ 1 ]
  },
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  },
  {
    name: "IGNORE_CANCEL"
    data_type: TYPE_BOOL
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]

instance_group [{ kind: KIND_CPU }]


================================================
FILE: qa/python_models/response_sender_until_cancelled/model.py
================================================
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import time

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    """This model will keep repeating the INPUT as the OUTPUT,
    until the request is being cancelled or
    the MAX_RESPONSE_COUNT has been reached.
    """

    def execute(self, requests):
        request = requests[0]

        input = pb_utils.get_input_tensor_by_name(request, "INPUT").as_numpy()
        max_response_count = pb_utils.get_input_tensor_by_name(
            request, "MAX_RESPONSE_COUNT"
        ).as_numpy()[0]
        delay = pb_utils.get_input_tensor_by_name(request, "DELAY").as_numpy()[0]
        ignore_cancel = pb_utils.get_input_tensor_by_name(
            request, "IGNORE_CANCEL"
        ).as_numpy()[0]
        response_sender = request.get_response_sender()

        sent = 0
        while True:
            if not ignore_cancel and request.is_cancelled():
                response = pb_utils.InferenceResponse(
                    error=pb_utils.TritonError(
                        message="request has been cancelled",
                        code=pb_utils.TritonError.CANCELLED,
                    )
                )
                response_sender.send(
                    response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
                )
                break

            output = pb_utils.Tensor("OUTPUT", np.array([input[0]], np.int32))
            response = pb_utils.InferenceResponse(output_tensors=[output])

            if sent + 1 == max_response_count:
                response_sender.send(
                    response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
                )
                break
            else:
                response_sender.send(response)
                sent += 1
                time.sleep(delay / 1000)

        return None


================================================
FILE: qa/python_models/sequence_int32/config.pbtxt
================================================
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "python_nobatch_sequence_int32"
backend: "python"
max_batch_size: 0
version_policy: { latest { num_versions: 1 }}


instance_group [
  {
    kind: KIND_GPU
count: 4
  }
]


input [
  {
    name: "INPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]

  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_INT32
    dims: [ 1 ]


  }
]
sequence_batching {
  max_sequence_idle_microseconds: 5000000
  control_input [
    {
      name: "START"
      control [
        {
          kind: CONTROL_SEQUENCE_START
          int32_false_true: [ 0, 1 ]
        }
      ]
    },
    {
      name: "READY"
      control [
        {
          kind: CONTROL_SEQUENCE_READY
          int32_false_true: [ 0, 1 ]
        }
      ]
    }
  ]
}


================================================
FILE: qa/python_models/sequence_int32/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def initialize(self, args):
        self.model_config = model_config = json.loads(args["model_config"])

        output_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT")

        self.output_dtype = pb_utils.triton_string_to_numpy(output_config["data_type"])

        self.accumulator = np.zeros(1)
        self.max_batch_size = model_config["max_batch_size"]

    def execute(self, requests):
        """
        This function is called on inference request.
        It is derived from "create_tf_modelfile" in
        common/gen_qa_sequence_models.py and maintains
        a true accumulator when the max batch size is 0

        """
        output_dtype = self.output_dtype

        responses = []
        for request in requests:
            input_tensor = (
                pb_utils.get_input_tensor_by_name(request, "INPUT")
                .as_numpy()
                .astype(np.int32)
            )
            start_tensor = (
                pb_utils.get_input_tensor_by_name(request, "START")
                .as_numpy()
                .astype(np.int32)
            )
            ready_tensor = (
                pb_utils.get_input_tensor_by_name(request, "READY")
                .as_numpy()
                .astype(np.int32)
            )

            if self.max_batch_size == 0:
                tmp = np.where(
                    np.equal(start_tensor, 1),
                    input_tensor,
                    np.add(self.accumulator, input_tensor),
                )
                newacc = np.where(np.equal(ready_tensor, 1), tmp, self.accumulator)
                self.accumulator = newacc
                out_tensor = pb_utils.Tensor(
                    "OUTPUT", self.accumulator.astype(output_dtype)
                )
            else:
                tmp = np.where(
                    np.equal(ready_tensor, 1),
                    np.add(start_tensor, input_tensor),
                    np.zeros(np.shape(input_tensor), dtype=output_dtype),
                )
                out_tensor = pb_utils.Tensor("OUTPUT", tmp.astype(output_dtype))

            responses.append(pb_utils.InferenceResponse([out_tensor]))
        return responses


================================================
FILE: qa/python_models/sequence_py/config.pbtxt
================================================
# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

backend: "python"
max_batch_size: 4

input [
  {
    name: "INPUT0"
    data_type: TYPE_INT32
    dims: [ 1 ]

  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]

sequence_batching {
  oldest {
    max_candidate_sequences: 4
    max_queue_delay_microseconds: 1000000
    preserve_ordering: False
  }
  max_sequence_idle_microseconds: 10000000
}


================================================
FILE: qa/python_models/sequence_py/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def initialize(self, args):
        self.model_config = json.loads(args["model_config"])
        self.sequences = {}
        self.decoupled = self.model_config.get("model_transaction_policy", {}).get(
            "decoupled"
        )

    def get_next_sequence_output_tensor(self, request):
        sid = request.correlation_id()
        flags = request.flags()
        if flags == pb_utils.TRITONSERVER_REQUEST_FLAG_SEQUENCE_START:
            if sid in self.sequences:
                raise pb_utils.TritonModelException(
                    "Can't start a new sequence with existing ID"
                )
            self.sequences[sid] = [1]
        else:
            if sid not in self.sequences:
                raise pb_utils.TritonModelException(
                    "Need START flag for a sequence ID that doesn't already exist."
                )

            last = self.sequences[sid][-1]
            self.sequences[sid].append(last + 1)

        output = self.sequences[sid][-1]
        output = np.array([output])
        out_tensor = pb_utils.Tensor("OUTPUT0", output.astype(np.int32))
        return out_tensor

    def execute(self, requests):
        if self.decoupled:
            return self.execute_decoupled(requests)
        else:
            return self.execute_non_decoupled(requests)

    def execute_non_decoupled(self, requests):
        responses = []
        for request in requests:
            output_tensor = self.get_next_sequence_output_tensor(request)
            response = pb_utils.InferenceResponse([output_tensor])
            responses.append(response)
        return responses

    def execute_decoupled(self, requests):
        for request in requests:
            sender = request.get_response_sender()
            output_tensor = self.get_next_sequence_output_tensor(request)

            # Send 3 responses per request
            for _ in range(3):
                response = pb_utils.InferenceResponse([output_tensor])
                sender.send(response)

            sender.send(flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)

        return None

    def finalize(self):
        print(f"Cleaning up. Final sequences stored: {self.sequences}")


================================================
FILE: qa/python_models/simple_identity_fp32/config.pbtxt
================================================
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "simple_identity_fp32"
platform: "ensemble"
max_batch_size: 64

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

ensemble_scheduling {
  step [
    {
      model_name: "identity_fp32"
      model_version: -1
      input_map {
        key: "INPUT0"
        value: "INPUT0"
      }
      output_map {
        key: "OUTPUT0"
        value: "OUTPUT0"
      }
    }
  ]
}


================================================
FILE: qa/python_models/string/config.pbtxt
================================================
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "string"
backend: "python"
max_batch_size: 0

input [
  {
    name: "INPUT0"
    data_type: TYPE_STRING
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_STRING
    dims: [ 1 ]
  }
]

instance_group [{ kind: KIND_CPU }]


================================================
FILE: qa/python_models/string/model.py
================================================
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    """This model loops through different dtypes to make sure that
    serialize_byte_tensor works correctly in the Python backend.
    """

    def initialize(self, args):
        self._index = 0
        self._dtypes = [np.bytes_, np.object_]

    def execute(self, requests):
        responses = []
        for request in requests:
            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            out_tensor_0 = pb_utils.Tensor(
                "OUTPUT0", in_0.as_numpy().astype(self._dtypes[self._index])
            )
            self._index += 1
            responses.append(pb_utils.InferenceResponse([out_tensor_0]))
        return responses


================================================
FILE: qa/python_models/string_fixed/config.pbtxt
================================================
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "string_fixed"
backend: "python"
max_batch_size: 0

input [
  {
    name: "INPUT0"
    data_type: TYPE_STRING
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_STRING
    dims: [ -1 ]
  }
]

instance_group [{ kind: KIND_CPU }]


================================================
FILE: qa/python_models/string_fixed/model.py
================================================
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import numpy as np
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    """
    This model returns a constant string on every inference request.
    """

    def initialize(self, args):
        self._index = 0
        self._dtypes = [np.bytes_, np.object_]

    def execute(self, requests):
        # Create four different responses (empty string or fixed string) * (two
        # datatypes)
        responses = []
        for _ in requests:
            if self._index == 0:
                out_tensor_0 = pb_utils.Tensor(
                    "OUTPUT0", np.array(["123456"], dtype=self._dtypes[0])
                )
            elif self._index == 1:
                out_tensor_0 = pb_utils.Tensor(
                    "OUTPUT0", np.array([], dtype=self._dtypes[1])
                )
            elif self._index == 2:
                out_tensor_0 = pb_utils.Tensor(
                    "OUTPUT0", np.array(["123456"], dtype=self._dtypes[0])
                )
            elif self._index == 3:
                out_tensor_0 = pb_utils.Tensor(
                    "OUTPUT0", np.array([], dtype=self._dtypes[1])
                )
            self._index += 1
            responses.append(pb_utils.InferenceResponse([out_tensor_0]))
        return responses


================================================
FILE: qa/python_models/string_identity/config.pbtxt
================================================
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "string_identity"
backend: "python"
max_batch_size: 0

input [
  {
    name: "INPUT0"
    data_type: TYPE_STRING
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_STRING
    dims: [ 1 ]
  }
]

instance_group [{ kind: KIND_CPU }]


================================================
FILE: qa/python_models/string_identity/model.py
================================================
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json
import sys

sys.path.append("../../")
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    """This model always returns the input that it has received."""

    def initialize(self, args):
        self.model_config = json.loads(args["model_config"])

    def execute(self, requests):
        """This function is called on inference request."""

        responses = []
        for request in requests:
            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            out_tensor_0 = pb_utils.Tensor("OUTPUT0", in_0.as_numpy())
            responses.append(pb_utils.InferenceResponse([out_tensor_0]))
        return responses


================================================
FILE: qa/python_models/sub_add/model.py
================================================
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json
import sys

import numpy as np

sys.path.append("../../")
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def initialize(self, args):
        self.model_config = model_config = json.loads(args["model_config"])

        output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")
        output1_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT1")

        self.output0_dtype = pb_utils.triton_string_to_numpy(
            output0_config["data_type"]
        )
        self.output1_dtype = pb_utils.triton_string_to_numpy(
            output1_config["data_type"]
        )

    def execute(self, requests):
        """This function is called on inference request."""

        output0_dtype = self.output0_dtype
        output1_dtype = self.output1_dtype

        responses = []
        for request in requests:
            input_tensors = request.inputs()
            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
            if (
                in_0.as_numpy().dtype.type is np.bytes_
                or in_0.as_numpy().dtype == np.object_
            ):
                out_0, out_1 = (
                    in_0.as_numpy().astype(np.int32) - in_1.as_numpy().astype(np.int32),
                    in_0.as_numpy().astype(np.int32) + in_1.as_numpy().astype(np.int32),
                )
            else:
                out_0, out_1 = (
                    in_0.as_numpy() - in_1.as_numpy(),
                    in_0.as_numpy() + in_1.as_numpy(),
                )

            out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(output0_dtype))
            out_tensor_1 = pb_utils.Tensor("OUTPUT1", out_1.astype(output1_dtype))
            responses.append(pb_utils.InferenceResponse([out_tensor_0, out_tensor_1]))
        return responses


================================================
FILE: qa/python_models/torchvision/resnet50/config.pbtxt
================================================
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "resnet50_python"
backend: "python"
max_batch_size: 128
input {
    name: "INPUT0"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [ 3, 224, 224 ]
  }
output {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 1000 ]
  }


================================================
FILE: qa/python_models/torchvision/resnet50/model.py
================================================
# Copyright 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import torch
import triton_python_backend_utils as pb_utils
from torch.utils.dlpack import to_dlpack
from torchvision import models


class TritonPythonModel:
    def initialize(self, args):
        """
        This function initializes pre-trained ResNet50 model.
        """
        self.device = "cuda" if args["model_instance_kind"] == "GPU" else "cpu"
        self.model = (
            models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2)
            .to(self.device)
            .eval()
        )

    def execute(self, requests):
        """
        This function receives a list of requests (`pb_utils.InferenceRequest`),
        performs inference on every request and appends it to responses.
        """
        responses = []
        for request in requests:
            input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            result = self.model(
                torch.as_tensor(input_tensor.as_numpy(), device=self.device)
            )
            out_tensor = pb_utils.Tensor.from_dlpack("OUTPUT0", to_dlpack(result))
            responses.append(pb_utils.InferenceResponse([out_tensor]))
        return responses


================================================
FILE: qa/python_models/variable_gpu_output/config.pbtxt
================================================
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "variable_gpu_output"
backend: "python"
max_batch_size: 256

input [
  {
    name: "INPUT"
    data_type: TYPE_FP32
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUTPUT"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

dynamic_batching {
  max_queue_delay_microseconds: 1000000
}

instance_group [
  {
    count: 1
    kind: KIND_GPU
  }
]


================================================
FILE: qa/python_models/variable_gpu_output/model.py
================================================
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import torch
import triton_python_backend_utils as pb_utils
from torch.utils.dlpack import to_dlpack


class TritonPythonModel:
    def execute(self, requests):
        # The client will send 5 requests
        assert len(requests) == 5
        responses = []
        for i, request in enumerate(requests):
            # Create an (i+1)-element array with all the tensors equal to (i+1)
            output = torch.ones(i + 1, dtype=torch.float32, device="cuda")
            output = output * (i + 1)
            output_pb_tensor = pb_utils.Tensor.from_dlpack("OUTPUT", to_dlpack(output))
            inference_response = pb_utils.InferenceResponse(
                output_tensors=[output_pb_tensor]
            )
            responses.append(inference_response)
        return responses


================================================
FILE: qa/python_models/wrong_model/config.pbtxt
================================================
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "identity_fp32"
backend: "python"
max_batch_size: 64

input [
  {
    name: "IN"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

output [
  {
    name: "OUT"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]

instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]


================================================
FILE: qa/python_models/wrong_model/model.py
================================================
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def execute(self, requests):
        """
        This model ensures that errors in the execute function are properly
        handles.
        """
        responses = []
        for request in requests:
            input_tensor = pb_utils.get_input_tensor_by_name(request, "IN")
            out_tensor = pb_utils.Tensor("OUT", input_tensor.as_numpy())
            lorem_ipsum
            responses.append(pb_utils.InferenceResponse([out_tensor]))
        return responses


================================================
FILE: qa/python_models/wrong_return_type/config.pbtxt
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "wrong_return_type"
backend: "python"

input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ 4 ]
  }
]

sequence_batching {
  iterative_sequence : true
}

instance_group [{ kind: KIND_CPU }]


================================================
FILE: qa/python_models/wrong_return_type/model.py
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json

import triton_python_backend_utils as pb_utils


class TritonPythonModel:
    def initialize(self, args):
        self.model_config = model_config = json.loads(args["model_config"])

        output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")

        self.output0_dtype = pb_utils.triton_string_to_numpy(
            output0_config["data_type"]
        )

    def execute(self, requests):
        output0_dtype = self.output0_dtype

        responses = []

        for request in requests:
            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")

            out_0 = in_0.as_numpy()

            # Create output tensors. You need pb_utils.Tensor
            # objects to create pb_utils.InferenceResponse.
            out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(output0_dtype))

            inference_response = pb_utils.InferenceResponse(
                output_tensors=[out_tensor_0]
            )

            request.set_release_flags(pb_utils.TRITONSERVER_REQUEST_RELEASE_RESCHEDULE)
            # Should append `None` for rescheduled requests.
            responses.append(inference_response)

        return responses

    def finalize(self):
        pass


================================================
FILE: src/CMakeLists.txt
================================================
# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

cmake_minimum_required (VERSION 3.31.8)

project(tritonserverexe LANGUAGES C CXX)

include(GNUInstallDirs)

#
# Dependencies
#
# We must include the transitive closure of all repos so that we can
# override the tag. The backend repo is needed for the tests.
#
include(FetchContent)

FetchContent_Declare(
  repo-common
  GIT_REPOSITORY ${TRITON_REPO_ORGANIZATION}/common.git
  GIT_TAG ${TRITON_COMMON_REPO_TAG}
)
FetchContent_Declare(
  repo-core
  GIT_REPOSITORY ${TRITON_REPO_ORGANIZATION}/core.git
  GIT_TAG ${TRITON_CORE_REPO_TAG}
)
FetchContent_Declare(
  repo-backend
  GIT_REPOSITORY ${TRITON_REPO_ORGANIZATION}/backend.git
  GIT_TAG ${TRITON_BACKEND_REPO_TAG}
)

if(TRITON_ENABLE_GRPC)
  set(TRITON_COMMON_ENABLE_PROTOBUF ON)
  set(TRITON_COMMON_ENABLE_GRPC ON)
endif() # TRITON_ENABLE_GRPC

FetchContent_MakeAvailable(repo-common repo-core repo-backend)

# CUDA
#
if(${TRITON_ENABLE_GPU})
  find_package(CUDAToolkit REQUIRED)
  message(STATUS "Using CUDA ${CUDA_VERSION}")
endif() # TRITON_ENABLE_GPU

# libevent
#
if(${TRITON_ENABLE_HTTP} OR ${TRITON_ENABLE_METRICS} OR
    ${TRITON_ENABLE_SAGEMAKER} OR ${TRITON_ENABLE_VERTEX_AI})
  find_package(Libevent CONFIG REQUIRED)
  message(STATUS "Using libevent ${Libevent_VERSION}")
endif()

# OpenTelemetry
#
if (NOT WIN32 AND ${TRITON_ENABLE_TRACING})
    find_package(absl CONFIG REQUIRED)
    find_package(CURL CONFIG REQUIRED)
    find_package(nlohmann_json CONFIG REQUIRED)
    find_package(opentelemetry-cpp CONFIG REQUIRED)
    message(STATUS "Using opentelemetry-cpp ${opentelemetry-cpp_VERSION}")
endif()

# re2
#
find_package(re2 REQUIRED)

#
# tritonserver executable
#
add_executable(
  main
  classification.cc
  command_line_parser.cc
  common.cc
  main.cc
  shared_memory_manager.cc
  triton_signal.cc
  classification.h
  common.h
  shared_memory_manager.h
  triton_signal.h
)

# On windows a *.lib file can be generated for a exe. When creating
# tritonserver.exe if we try to create tritonserver.lib it will fail
# because there is already a trtionserver.lib for tritonserver.dll,
# this causes the build to fail. To avoid we keep the build name as
# main.exe and then for windows after installing we rename it to
# tritonserver.exe (below in the install steps).
if (NOT WIN32)
  set_property(TARGET main PROPERTY OUTPUT_NAME tritonserver)
endif()

target_compile_features(main PRIVATE cxx_std_${TRITON_MIN_CXX_STANDARD})
if(WIN32)
  message("Using MSVC as compiler, default target on Windows 10. "
    "If the target system is not Windows 10, please update _WIN32_WINNT "
    "to corresponding value.")
  target_compile_options(
    main
    PRIVATE
      /W1 /D_WIN32_WINNT=0x0A00 /EHsc /Zc:preprocessor
  )
  target_compile_definitions(main
    PRIVATE
      NOMINMAX)

  # Dependency from common.h
  find_library(B64_LIBRARY NAMES b64)
  target_link_libraries(
    main
    PRIVATE
      ${B64_LIBRARY}
  )

else()

  target_compile_options(
    main
    PRIVATE
      -Wall -Wextra -Wno-unused-parameter -Wno-deprecated-declarations -Werror
  )

  # Dependency from common.h
  target_link_libraries(
    main
    PRIVATE
      b64
  )
  endif()

set(LIB_DIR "lib")
if(LINUX)
  file(STRINGS "/etc/os-release" DISTRO_ID_LIKE REGEX "ID_LIKE")
  if(${DISTRO_ID_LIKE} MATCHES "rhel|centos")
    set (LIB_DIR "lib64")
  endif(${DISTRO_ID_LIKE} MATCHES "rhel|centos")
endif(LINUX)
set(TRITON_CORE_HEADERS_ONLY OFF)

set_target_properties(
  main
  PROPERTIES
    POSITION_INDEPENDENT_CODE ON
    SKIP_BUILD_RPATH TRUE
    BUILD_WITH_INSTALL_RPATH TRUE
    INSTALL_RPATH_USE_LINK_PATH FALSE
    INSTALL_RPATH "$\{ORIGIN\}/../${LIB_DIR}"
)

target_link_libraries(
  main
  PRIVATE
    triton-common-async-work-queue  # from repo-common
    triton-common-error             # from repo-common
    triton-common-logging           # from repo-common
    triton-core-serverapi           # from repo-core
    triton-core-serverstub          # from repo-core
)

if(${TRITON_ENABLE_ASAN})
  set(CMAKE_BUILD_TYPE Debug)
  target_compile_definitions(
    main
    PRIVATE TRITON_ENABLE_ASAN=1
  )
  set(_ASAN_FLAGS "-static-libstdc++ -static-libasan -fno-omit-frame-pointer -fsanitize=address")
  set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${_ASAN_FLAGS}")
  set(CMAKE_LINKER_FLAGS_DEBUG "${CMAKE_LINKER_FLAGS_DEBUG} ${_ASAN_FLAGS}")
endif() # TRITON_ENABLE_ASAN

if(${TRITON_ENABLE_GPU})
  target_compile_definitions(
    main
    PRIVATE TRITON_ENABLE_GPU=1
    PRIVATE TRITON_MIN_COMPUTE_CAPABILITY=${TRITON_MIN_COMPUTE_CAPABILITY}
  )

  target_link_libraries(
    main
    PRIVATE
      CUDA::cudart
  )
endif() # TRITON_ENABLE_GPU

if(${TRITON_ENABLE_HTTP} OR ${TRITON_ENABLE_METRICS} OR
    ${TRITON_ENABLE_SAGEMAKER} OR ${TRITON_ENABLE_VERTEX_AI})
  target_include_directories(
    main
    PRIVATE
      ${LIBEVENT_INCLUDE_DIRS}
  )
endif()


if(${TRITON_ENABLE_HTTP})
  target_compile_definitions(
    main
    PRIVATE TRITON_ENABLE_HTTP=1
  )
endif() # TRITON_ENABLE_HTTP

if(${TRITON_ENABLE_SAGEMAKER})
  target_compile_definitions(
    main
    PRIVATE TRITON_ENABLE_SAGEMAKER=1
  )
endif() # TRITON_ENABLE_SAGEMAKER

if(${TRITON_ENABLE_VERTEX_AI})
  target_compile_definitions(
    main
    PRIVATE TRITON_ENABLE_VERTEX_AI=1
  )
endif() # TRITON_ENABLE_VERTEX_AI

if(${TRITON_ENABLE_LOGGING})
  target_compile_definitions(
    main
    PRIVATE TRITON_ENABLE_LOGGING=1
  )
endif() # TRITON_ENABLE_LOGGING

if(${TRITON_ENABLE_METRICS})
  target_compile_definitions(
    main
    PRIVATE TRITON_ENABLE_METRICS=1
  )
endif() # TRITON_ENABLE_METRICS

if(${TRITON_ENABLE_STATS})
  target_compile_definitions(
    main
    PRIVATE TRITON_ENABLE_STATS=1
  )
endif() # TRITON_ENABLE_STATS

if(${TRITON_ENABLE_TRACING})
  target_compile_definitions(
    main
    PRIVATE TRITON_ENABLE_TRACING=1
  )
endif() # TRITON_ENABLE_TRACING

if(${TRITON_ENABLE_NVTX})
  target_compile_definitions(
    main
    PRIVATE TRITON_ENABLE_NVTX=1
  )
endif() # TRITON_ENABLE_NVTX

if (NOT WIN32)
  target_link_libraries(
    main
    PRIVATE
      rt
      dl
  )
endif() # NOT WIN32

if (NOT WIN32)
  install(
    TARGETS main
    RUNTIME DESTINATION bin
  )
else()
  # See explanation above as to why we need to rename main.exe to
  # tritonserver.exe as part of the install process on windows.
  install(
    PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/main.exe
    DESTINATION bin
    RENAME tritonserver.exe
  )
endif()

if(${TRITON_ENABLE_GRPC})
  #
  # GRPC
  #
  find_package(gRPC CONFIG REQUIRED)
  message(STATUS "Using gRPC ${gRPC_VERSION}")

  add_subdirectory(grpc)
  target_link_libraries(
      main
      PRIVATE
        grpc-endpoint-library
  )

  target_include_directories(
    main
    PRIVATE
      $<TARGET_PROPERTY:gRPC::grpc,INTERFACE_INCLUDE_DIRECTORIES>
  )

  target_compile_definitions(
    main
    PRIVATE TRITON_ENABLE_GRPC=1
  )
endif()

# http endpoint
#
if(${TRITON_ENABLE_HTTP}
      OR ${TRITON_ENABLE_METRICS}
      OR ${TRITON_ENABLE_SAGEMAKER}
      OR ${TRITON_ENABLE_VERTEX_AI})
  find_package(libevhtp CONFIG REQUIRED)
  message(STATUS "Using libevhtp ${libevhtp_VERSION}")

  list(APPEND
    HTTP_ENDPOINT_SRCS
    http_server.cc
    orca_http.cc
  )
  list(APPEND
    HTTP_ENDPOINT_HDRS
    http_server.h
    orca_http.h
  )

  # Add header / src files based on HTTP related endpoint requested
  if(${TRITON_ENABLE_SAGEMAKER})
    list(APPEND
      HTTP_ENDPOINT_SRCS
      sagemaker_server.cc
    )
    list(APPEND
      HTTP_ENDPOINT_HDRS
      sagemaker_server.h
    )
  endif() # TRITON_ENABLE_SAGEMAKER

  if(${TRITON_ENABLE_VERTEX_AI})
    list(APPEND
      HTTP_ENDPOINT_SRCS
      vertex_ai_server.cc
    )
    list(APPEND
      HTTP_ENDPOINT_HDRS
      vertex_ai_server.h
    )
  endif() # TRITON_ENABLE_VERTEX_AI

  add_library(
    http-endpoint-library EXCLUDE_FROM_ALL
    ${HTTP_ENDPOINT_SRCS} ${HTTP_ENDPOINT_HDRS}
  )

  target_compile_features(http-endpoint-library PRIVATE cxx_std_${TRITON_MIN_CXX_STANDARD})
  if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
    target_compile_options(
      http-endpoint-library
      PRIVATE
        /W1 /D_WIN32_WINNT=0x0A00 /EHsc /Zc:preprocessor
    )
  else()
    target_compile_options(
      http-endpoint-library
      PRIVATE
        -Wall -Wextra -Wno-unused-parameter -Wno-deprecated-declarations -Wno-error=maybe-uninitialized -Werror
    )
  endif()

  set_target_properties(
    http-endpoint-library
    PROPERTIES
      POSITION_INDEPENDENT_CODE ON
  )

  target_link_libraries(
    http-endpoint-library
    PUBLIC
      triton-common-json      # from repo-common
      triton-common-logging   # from repo-common
      triton-core-serverapi   # from repo-core
      triton-core-serverstub  # from repo-core
      ${LIBEVENT_LIBRARIES}
      libevhtp::evhtp
      re2::re2
  )

  target_include_directories(
    http-endpoint-library
    PRIVATE $<TARGET_PROPERTY:libevhtp::evhtp,INTERFACE_INCLUDE_DIRECTORIES>
  )

  # FIXME when Triton support of Opentelemetry is available on Windows
  # add ${OPENTELEMETRY_CPP_INCLUDE_DIRS} to above target_include_directories
  # JIRA DLIS-4786
  if (NOT WIN32 AND ${TRITON_ENABLE_TRACING})
    target_link_libraries(
      http-endpoint-library
      PRIVATE tracing-library
    )
  endif()

  if(${TRITON_ENABLE_GPU})
    target_compile_definitions(
      http-endpoint-library
      PRIVATE TRITON_ENABLE_GPU=1
      PRIVATE TRITON_MIN_COMPUTE_CAPABILITY=${TRITON_MIN_COMPUTE_CAPABILITY}
    )

    target_link_libraries(
      http-endpoint-library
      PUBLIC
        CUDA::cudart
    )
  endif() # TRITON_ENABLE_GPU

  if(${TRITON_ENABLE_HTTP})
    target_compile_definitions(
      http-endpoint-library
      PRIVATE TRITON_ENABLE_HTTP=1
    )
  endif() # TRITON_ENABLE_HTTP

  if(${TRITON_ENABLE_SAGEMAKER})
    target_compile_definitions(
      http-endpoint-library
      PRIVATE TRITON_ENABLE_SAGEMAKER=1
    )
  endif() # TRITON_ENABLE_SAGEMAKER

  if(${TRITON_ENABLE_VERTEX_AI})
    target_compile_definitions(
      http-endpoint-library
      PRIVATE TRITON_ENABLE_VERTEX_AI=1
    )
  endif() # TRITON_ENABLE_VERTEX_AI

  if(${TRITON_ENABLE_METRICS})
    target_compile_definitions(
      http-endpoint-library
      PRIVATE TRITON_ENABLE_METRICS=1
    )
  endif() # TRITON_ENABLE_METRICS

  if(${TRITON_ENABLE_LOGGING})
    target_compile_definitions(
      http-endpoint-library
      PRIVATE TRITON_ENABLE_LOGGING=1
    )
  endif() # TRITON_ENABLE_LOGGING

  if(${TRITON_ENABLE_STATS})
    target_compile_definitions(
      http-endpoint-library
      PRIVATE TRITON_ENABLE_STATS=1
    )
  endif() # TRITON_ENABLE_STATS

  if(${TRITON_ENABLE_TRACING})
    target_compile_definitions(
      http-endpoint-library
      PRIVATE TRITON_ENABLE_TRACING=1
    )
  endif() # TRITON_ENABLE_TRACING

  if(${TRITON_ENABLE_NVTX})
    target_compile_definitions(
      http-endpoint-library
      PRIVATE TRITON_ENABLE_NVTX=1
    )
  endif() # TRITON_ENABLE_NVTX

  if (WIN32)
    find_library(B64_LIBRARY NAMES b64)
    find_library(ZLIB_LIBRARY NAMES zlib)
    target_link_libraries(
      http-endpoint-library
      PUBLIC
        ${B64_LIBRARY}
        ${ZLIB_LIBRARY}
    )
  else()
    target_link_libraries(
      http-endpoint-library
      PUBLIC
        b64
        z
    )
  endif()

  target_link_libraries(
    main
    PRIVATE
      http-endpoint-library
  )
endif() # TRITON_ENABLE_HTTP || TRITON_ENABLE_METRICS ||
        # TRITON_ENABLE_SAGEMAKER || TRITON_ENABLE_VERTEX_AI

# tracing
#
if(${TRITON_ENABLE_TRACING})
  message(STATUS "Using tracing ${TRITON_TRACE_INSTALL_PATH}")

  add_library(
    tracing-library EXCLUDE_FROM_ALL
    tracer.cc tracer.h
  )

  target_compile_features(tracing-library PRIVATE cxx_std_${TRITON_MIN_CXX_STANDARD})
  # FIXME: remove, when Windows support is added for Opentelemetry
  if (NOT WIN32)
    target_include_directories(
      tracing-library
      PUBLIC ${OPENTELEMETRY_CPP_INCLUDE_DIRS}
    )

    target_link_libraries(
      tracing-library
      PUBLIC
      ${OPENTELEMETRY_CPP_LIBRARIES})
  endif()

  set_target_properties(
    tracing-library
    PROPERTIES
    POSITION_INDEPENDENT_CODE ON
    )

  target_link_libraries(
    tracing-library
    PUBLIC
      triton-common-logging    # from repo-common
      triton-common-json      # from repo-common
      triton-core-serverapi    # from repo-core
      triton-core-serverstub   # from repo-core
  )

  target_compile_definitions(
    tracing-library
    PRIVATE TRITON_ENABLE_TRACING=1
  )

  if(${TRITON_ENABLE_GPU})
    target_compile_definitions(
      tracing-library
      PRIVATE TRITON_ENABLE_GPU=1
      PRIVATE TRITON_MIN_COMPUTE_CAPABILITY=${TRITON_MIN_COMPUTE_CAPABILITY}
    )

    target_link_libraries(
      tracing-library
      PUBLIC
        CUDA::cudart
    )
  endif() # TRITON_ENABLE_GPU

  if(${TRITON_ENABLE_METRICS})
    target_compile_definitions(
      tracing-library
      PRIVATE TRITON_ENABLE_METRICS=1
    )
  endif() # TRITON_ENABLE_METRICS

  if(${TRITON_ENABLE_LOGGING})
    target_compile_definitions(
      tracing-library
      PRIVATE TRITON_ENABLE_LOGGING=1
    )
  endif() # TRITON_ENABLE_LOGGING

  if(${TRITON_ENABLE_STATS})
    target_compile_definitions(
      tracing-library
      PRIVATE TRITON_ENABLE_STATS=1
    )
  endif() # TRITON_ENABLE_STATS

  if(${TRITON_ENABLE_NVTX})
    target_compile_definitions(
      tracing-library
      PRIVATE TRITON_ENABLE_NVTX=1
    )
  endif() # TRITON_ENABLE_NVTX

  target_link_libraries(
    main
    PRIVATE
      tracing-library
  )
endif() # TRITON_ENABLE_TRACING

if (NOT WIN32)
  #
  # simple
  #
  add_executable(
    simple
    simple.cc
  )

  target_compile_features(simple PRIVATE cxx_std_${TRITON_MIN_CXX_STANDARD})
  if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
    message("Using MSVC as compiler, default target on Windows 10. "
            "If the target system is not Windows 10, please update _WIN32_WINNT "
            "to corresponding value.")
    target_compile_options(
      simple
      PRIVATE
        /W1 /D_WIN32_WINNT=0x0A00 /EHsc /Zc:preprocessor
    )
  else()
    target_compile_options(
      simple
      PRIVATE
        -Wall -Wextra -Wno-type-limits -Wno-unused-parameter -Wno-deprecated-declarations -Werror
    )
  endif()

  set_target_properties(
    simple
    PROPERTIES
      POSITION_INDEPENDENT_CODE ON
      SKIP_BUILD_RPATH TRUE
      BUILD_WITH_INSTALL_RPATH TRUE
      INSTALL_RPATH_USE_LINK_PATH FALSE
      INSTALL_RPATH ""
  )

  target_link_libraries(
    simple
    PRIVATE
      triton-common-async-work-queue  # from repo-common
      triton-common-error             # from repo-common
      triton-core-serverapi           # from repo-core
      triton-core-serverstub          # from repo-core
    )

  if(${TRITON_ENABLE_GPU})
    target_compile_definitions(
      simple
      PRIVATE TRITON_ENABLE_GPU=1
      PRIVATE TRITON_MIN_COMPUTE_CAPABILITY=${TRITON_MIN_COMPUTE_CAPABILITY}
    )

    target_link_libraries(
      simple
      PRIVATE
        CUDA::cudart
    )
  endif() # TRITON_ENABLE_GPU

  install(
    TARGETS simple
    RUNTIME DESTINATION bin
  )

  #
  # multi_server example
  #
  add_executable(
    multi_server
    multi_server.cc
  )

  target_compile_features(multi_server PRIVATE cxx_std_${TRITON_MIN_CXX_STANDARD})
  if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
    message("Using MSVC as compiler, default target on Windows 10. "
            "If the target system is not Windows 10, please update _WIN32_WINNT "
            "to corresponding value.")
    target_compile_options(
      multi_server
      PRIVATE
        /W1 /D_WIN32_WINNT=0x0A00 /EHsc /Zc:preprocessor
    )
  else()
    target_compile_options(
      multi_server
      PRIVATE
        -Wall -Wextra -Wno-type-limits -Wno-unused-parameter -Wno-deprecated-declarations -Werror
    )
  endif()

  set_target_properties(
    multi_server
    PROPERTIES
      POSITION_INDEPENDENT_CODE ON
      SKIP_BUILD_RPATH TRUE
      BUILD_WITH_INSTALL_RPATH TRUE
      INSTALL_RPATH_USE_LINK_PATH FALSE
      INSTALL_RPATH ""
  )

  target_link_libraries(
    multi_server
    PRIVATE
      triton-common-async-work-queue  # from repo-common
      triton-common-error             # from repo-common
      triton-core-serverapi           # from repo-core
      triton-core-serverstub          # from repo-core
    )

  if(${TRITON_ENABLE_GPU})
    target_compile_definitions(
      multi_server
      PRIVATE TRITON_ENABLE_GPU=1
      PRIVATE TRITON_MIN_COMPUTE_CAPABILITY=${TRITON_MIN_COMPUTE_CAPABILITY}
    )

    target_link_libraries(
      multi_server
      PRIVATE
        CUDA::cudart
    )
  endif() # TRITON_ENABLE_GPU

  install(
    TARGETS multi_server
    RUNTIME DESTINATION bin
  )

  if(${TRITON_ENABLE_GPU})
    #
    # memory_alloc example
    #
    add_executable(
      memory_alloc
      memory_alloc.cc
    )

    target_compile_features(memory_alloc PRIVATE cxx_std_${TRITON_MIN_CXX_STANDARD})
    if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
      message("Using MSVC as compiler, default target on Windows 10. "
              "If the target system is not Windows 10, please update _WIN32_WINNT "
              "to corresponding value.")
      target_compile_options(
        memory_alloc
        PRIVATE
          /W1 /D_WIN32_WINNT=0x0A00 /EHsc /Zc:preprocessor
      )
    else()
      target_compile_options(
        memory_alloc
        PRIVATE
          -Wall -Wextra -Wno-type-limits -Wno-unused-parameter -Wno-deprecated-declarations -Werror
      )
    endif()

    set_target_properties(
      memory_alloc
      PROPERTIES
        POSITION_INDEPENDENT_CODE ON
        SKIP_BUILD_RPATH TRUE
        BUILD_WITH_INSTALL_RPATH TRUE
        INSTALL_RPATH_USE_LINK_PATH FALSE
        INSTALL_RPATH ""
    )

    target_compile_definitions(
      memory_alloc
      PRIVATE TRITON_ENABLE_GPU=1
      PRIVATE TRITON_MIN_COMPUTE_CAPABILITY=${TRITON_MIN_COMPUTE_CAPABILITY}
    )

    target_link_libraries(
      memory_alloc
      PRIVATE
        triton-common-async-work-queue  # from repo-common
        triton-common-error             # from repo-common
        triton-core-serverapi           # from repo-core
        triton-core-serverstub          # from repo-core
        CUDA::cudart
      )

    install(
      TARGETS memory_alloc
      RUNTIME DESTINATION bin
    )
  endif() # TRITON_ENABLE_GPU
endif() # NOT WIN32

# DLIS-7292: Extend tritonfrontend to build for Windows
if (NOT WIN32)
  # tritonfrontend python package
  add_subdirectory(python)
endif (NOT WIN32)

# Currently unit tests do not build for windows...
if ( NOT WIN32)
  add_subdirectory(test test)
endif() # NOT WIN32


================================================
FILE: src/classification.cc
================================================
// Copyright (c) 2020-2026, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "classification.h"

#include <algorithm>
#include <numeric>

#include "common.h"

namespace triton { namespace server {

namespace {

template <typename T>
TRITONSERVER_Error*
AddClassResults(
    TRITONSERVER_InferenceResponse* response, const uint32_t output_idx,
    const char* base, const size_t element_cnt, const uint32_t req_class_cnt,
    std::vector<std::string>* class_strs)
{
  const T* probs = reinterpret_cast<const T*>(base);

  std::vector<size_t> idx(element_cnt);
  iota(idx.begin(), idx.end(), 0);
  sort(idx.begin(), idx.end(), [&probs](size_t i1, size_t i2) {
    return probs[i1] > probs[i2];
  });

  const size_t class_cnt = std::min(element_cnt, (size_t)req_class_cnt);
  for (size_t k = 0; k < class_cnt; ++k) {
    class_strs->push_back(
        std::to_string(probs[idx[k]]) + ":" + std::to_string(idx[k]));

    const char* label;
    RETURN_IF_ERR(TRITONSERVER_InferenceResponseOutputClassificationLabel(
        response, output_idx, idx[k], &label));
    if (label != nullptr) {
      class_strs->back() += ":";
      class_strs->back().append(label);
    }
  }

  return nullptr;  // success
}

}  // namespace


TRITONSERVER_Error*
TopkClassifications(
    TRITONSERVER_InferenceResponse* response, const uint32_t output_idx,
    const char* base, const size_t byte_size,
    const TRITONSERVER_DataType datatype, const uint32_t req_class_count,
    std::vector<std::string>* class_strs)
{
  const uint32_t dtype_byte_size = TRITONSERVER_DataTypeByteSize(datatype);
  if (dtype_byte_size == 0) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INVALID_ARG,
        std::string(
            std::string("class result not available for output due to "
                        "unsupported type '") +
            std::string(TRITONSERVER_DataTypeString(datatype)) + "'")
            .c_str());
  }

  const size_t element_cnt = byte_size / dtype_byte_size;
  // Prevent pathological memory / CPU usage from unbounded classification
  // outputs.
  constexpr size_t kMaxClassificationElements = 1'000'000;

  if (element_cnt > kMaxClassificationElements) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INVALID_ARG,
        "classification output tensor too large");
  }

  switch (datatype) {
    case TRITONSERVER_TYPE_UINT8:
      return AddClassResults<uint8_t>(
          response, output_idx, base, element_cnt, req_class_count, class_strs);
    case TRITONSERVER_TYPE_UINT16:
      return AddClassResults<uint16_t>(
          response, output_idx, base, element_cnt, req_class_count, class_strs);
    case TRITONSERVER_TYPE_UINT32:
      return AddClassResults<uint32_t>(
          response, output_idx, base, element_cnt, req_class_count, class_strs);
    case TRITONSERVER_TYPE_UINT64:
      return AddClassResults<uint64_t>(
          response, output_idx, base, element_cnt, req_class_count, class_strs);

    case TRITONSERVER_TYPE_INT8:
      return AddClassResults<int8_t>(
          response, output_idx, base, element_cnt, req_class_count, class_strs);
    case TRITONSERVER_TYPE_INT16:
      return AddClassResults<int16_t>(
          response, output_idx, base, element_cnt, req_class_count, class_strs);
    case TRITONSERVER_TYPE_INT32:
      return AddClassResults<int32_t>(
          response, output_idx, base, element_cnt, req_class_count, class_strs);
    case TRITONSERVER_TYPE_INT64:
      return AddClassResults<int64_t>(
          response, output_idx, base, element_cnt, req_class_count, class_strs);

    case TRITONSERVER_TYPE_FP32:
      return AddClassResults<float>(
          response, output_idx, base, element_cnt, req_class_count, class_strs);
    case TRITONSERVER_TYPE_FP64:
      return AddClassResults<double>(
          response, output_idx, base, element_cnt, req_class_count, class_strs);

    default:
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          std::string(
              std::string("class result not available for output due to "
                          "unsupported type '") +
              std::string(TRITONSERVER_DataTypeString(datatype)) + "'")
              .c_str());
  }

  return nullptr;  // success
}

}}  // namespace triton::server


================================================
FILE: src/classification.h
================================================
// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once

#include <string>
#include <vector>

#include "triton/core/tritonserver.h"

namespace triton { namespace server {

TRITONSERVER_Error* TopkClassifications(
    TRITONSERVER_InferenceResponse* response, const uint32_t output_idx,
    const char* base, const size_t byte_size,
    const TRITONSERVER_DataType datatype, const uint32_t req_class_count,
    std::vector<std::string>* class_strs);

}}  // namespace triton::server


================================================
FILE: src/command_line_parser.cc
================================================
// Copyright 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//

#include "command_line_parser.h"
constexpr const char* GLOBAL_OPTION_GROUP = "";

#ifdef _WIN32
int optind = 1;
const char* optarg = nullptr;

/// Implementation of `getopt_long` for Windows.
/// Linux uses available implementation:
/// https://github.com/gcc-mirror/gcc/blob/fab08d12b40ad637c5a4ce8e026fb43cd3f0fad1/include/getopt.h
/// and
/// https://github.com/gcc-mirror/gcc/blob/fab08d12b40ad637c5a4ce8e026fb43cd3f0fad1/libiberty/getopt.c#L521
/// Parameters' description is available here:
/// https://github.com/gcc-mirror/gcc/blob/fab08d12b40ad637c5a4ce8e026fb43cd3f0fad1/libiberty/getopt.c#L464-L518
/// `optind' is an index to iterate over `argv`, (whose length is `argc`),
/// and starts from 1, since argv[0] is the program name.
/// Text in the current `argv`-element is returned in `optarg'.
/// Note: if option was provided in the form of --<key>=<value>, then
/// optarg is (argv[optind] + found + 1), i.e. everything after `=`.
/// Alternatively, option can be provided as --<key> <value>.
/// In this case, <value> is storred as a separate parameter in `argv`.
/// `longind` returns the index in `longopts` of the long-named option found.

int
getopt_long(
    int argc, char* const argv[], const char* optstring,
    const struct option* longopts, int* longind)
{
  if (optind >= argc) {
    return -1;
  }
  const struct option* curr_longopt = longopts;
  std::string argv_str = argv[optind];
  size_t found = argv_str.find_first_of("=");
  std::string key = argv_str.substr(
      2, (found == std::string::npos) ? std::string::npos : (found - 2));
  int option_index = 0;
  for (curr_longopt, option_index; curr_longopt->name;
       curr_longopt++, option_index++) {
    if (key == curr_longopt->name) {
      if (longind != NULL)
        (*longind) = option_index;
      if (curr_longopt->has_arg == required_argument) {
        if (found == std::string::npos) {
          optind++;
          if (optind >= argc) {
            std::cerr << argv[0] << ": option '" << argv_str
                      << "' requires an argument" << std::endl;
            return '?';
          }
          optarg = argv[optind];
        } else {
          optarg = (argv[optind] + found + 1);
        }
      }
      optind++;
      return curr_longopt->val;
    }
  }
  return -1;
}
#endif

#include <algorithm>
#include <iomanip>
#include <iostream>
#include <string>

#include "common.h"

#define TRITONJSON_STATUSTYPE TRITONSERVER_Error*
#define TRITONJSON_STATUSRETURN(M) \
  return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL, (M).c_str())
#define TRITONJSON_STATUSSUCCESS nullptr
#include "triton/common/triton_json.h"


namespace triton { namespace server {

// [FIXME] expose following parse helpers for other type of parser
namespace {

// A wrapper around std::stoi, std::stoull, std::stoll, std::stod
// to catch `invalid argument` and `out of range` exceptions
template <typename T>
T StringTo(const std::string& arg);

template <>
int
StringTo(const std::string& arg)
{
  return std::stoi(arg);
}

#ifdef TRITON_ENABLE_TRACING
template <>
uint32_t
StringTo(const std::string& arg)
{
  return std::stoul(arg);
}
#endif  // TRITON_ENABLE_TRACING

template <>
uint64_t
StringTo(const std::string& arg)
{
  return std::stoull(arg);
}

template <>
int64_t
StringTo(const std::string& arg)
{
  return std::stoll(arg);
}

template <>
double
StringTo(const std::string& arg)
{
  return std::stod(arg);
}

// There must be specialization for the types to be parsed into so that
// the argument is properly validated and parsed. Attempted to use input
// operator (>>) but it will consume improper argument without error
// (i.e. parse "1.4" to 'int' will return 1 but we want to report error).
template <typename T>
T
ParseOption(const std::string& arg)
{
  try {
    return StringTo<T>(arg);
  }
  catch (const std::invalid_argument& ia) {
    std::stringstream ss;
    ss << "Invalid option value. Got " << arg << std::endl;
    throw ParseException(ss.str());
  }
  catch (const std::out_of_range& oor) {
    std::stringstream ss;
    ss << "Provided option value is out of bound. Got " << arg << std::endl;
    throw ParseException(ss.str());
  }
}

template <>
bool
ParseOption(const std::string& arg)
{
  // 'arg' need to comply with template declaration
  std::string larg = arg;
  std::transform(larg.begin(), larg.end(), larg.begin(), [](unsigned char c) {
    return std::tolower(c);
  });

  if ((larg == "true") || (larg == "on") || (larg == "1")) {
    return true;
  }
  if ((larg == "false") || (larg == "off") || (larg == "0")) {
    return false;
  }

  throw ParseException("invalid value for bool option: " + arg);
}

// Condition here merely to avoid compilation error, this function will
// be defined but not used otherwise.
#ifdef TRITON_ENABLE_LOGGING
int
ParseIntBoolOption(std::string arg)
{
  std::transform(arg.begin(), arg.end(), arg.begin(), [](unsigned char c) {
    return std::tolower(c);
  });

  if (arg == "true") {
    return 1;
  }
  if (arg == "false") {
    return 0;
  }

  return ParseOption<int>(arg);
}
#endif  // TRITON_ENABLE_LOGGING

std::string
PairsToJsonStr(std::vector<std::pair<std::string, std::string>> settings)
{
  triton::common::TritonJson::Value json(
      triton::common::TritonJson::ValueType::OBJECT);
  for (const auto& setting : settings) {
    const auto& key = setting.first;
    const auto& value = setting.second;
    json.SetStringObject(key.c_str(), value);
  }
  triton::common::TritonJson::WriteBuffer buffer;
  auto err = json.Write(&buffer);
  if (err != nullptr) {
    LOG_TRITONSERVER_ERROR(err, "failed to convert config to JSON");
  }
  return buffer.Contents();
}

template <typename T1, typename T2>
std::pair<T1, T2>
ParsePairOption(const std::string& arg, const std::string& delim_str)
{
  int delim = arg.find(delim_str);

  if ((delim < 0)) {
    std::stringstream ss;
    ss << "Cannot parse pair option due to incorrect number of inputs."
          "--<pair option> argument requires format <first>"
       << delim_str << "<second>. "
       << "Found: " << arg << std::endl;
    throw ParseException(ss.str());
  }

  std::string first_string = arg.substr(0, delim);
  std::string second_string = arg.substr(delim + delim_str.length());

  // Specific conversion from key-value string to actual key-value type,
  // should be extracted out of this function if we need to parse
  // more pair option of different types.
  return {ParseOption<T1>(first_string), ParseOption<T2>(second_string)};
}

// Split 'options' by 'delim_str' and place split strings into a vector
std::vector<std::string>
SplitOptions(std::string options, const std::string& delim_str)
{
  std::vector<std::string> res;

  int delim = options.find(delim_str);
  while ((delim >= 0)) {
    res.emplace_back(options.substr(0, delim));
    options = options.substr(delim + delim_str.length());
    delim = options.find(delim_str);
  }
  // include last element
  res.emplace_back(options);
  return res;
}

}  // namespace

enum TritonOptionId {
  OPTION_HELP = 1000,
#ifdef TRITON_ENABLE_LOGGING
  OPTION_LOG_VERBOSE,
  OPTION_LOG_INFO,
  OPTION_LOG_WARNING,
  OPTION_LOG_ERROR,
  OPTION_LOG_FORMAT,
  OPTION_LOG_FILE,
#endif  // TRITON_ENABLE_LOGGING
  OPTION_ID,
  OPTION_MODEL_REPOSITORY,
  OPTION_EXIT_ON_ERROR,
  OPTION_DISABLE_AUTO_COMPLETE_CONFIG,
  OPTION_STRICT_MODEL_CONFIG,
  OPTION_STRICT_READINESS,
#if defined(TRITON_ENABLE_HTTP)
  OPTION_ALLOW_HTTP,
  OPTION_HTTP_HEADER_FORWARD_PATTERN,
  OPTION_HTTP_PORT,
  OPTION_REUSE_HTTP_PORT,
  OPTION_HTTP_ADDRESS,
  OPTION_HTTP_THREAD_COUNT,
  OPTION_HTTP_RESTRICTED_API,
  OPTION_HTTP_MAX_INPUT_SIZE,
#endif  // TRITON_ENABLE_HTTP
#if defined(TRITON_ENABLE_GRPC)
  OPTION_ALLOW_GRPC,
  OPTION_GRPC_PORT,
  OPTION_REUSE_GRPC_PORT,
  OPTION_GRPC_ADDRESS,
  OPTION_GRPC_HEADER_FORWARD_PATTERN,
  OPTION_GRPC_INFER_THREAD_COUNT,
  OPTION_GRPC_INFER_ALLOCATION_POOL_SIZE,
  OPTION_GRPC_MAX_RESPONSE_POOL_SIZE,
  OPTION_GRPC_USE_SSL,
  OPTION_GRPC_USE_SSL_MUTUAL,
  OPTION_GRPC_SERVER_CERT,
  OPTION_GRPC_SERVER_KEY,
  OPTION_GRPC_ROOT_CERT,
  OPTION_GRPC_RESPONSE_COMPRESSION_LEVEL,
  OPTION_GRPC_ARG_KEEPALIVE_TIME_MS,
  OPTION_GRPC_ARG_KEEPALIVE_TIMEOUT_MS,
  OPTION_GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS,
  OPTION_GRPC_ARG_HTTP2_MAX_PINGS_WITHOUT_DATA,
  OPTION_GRPC_ARG_HTTP2_MIN_RECV_PING_INTERVAL_WITHOUT_DATA_MS,
  OPTION_GRPC_ARG_HTTP2_MAX_PING_STRIKES,
  OPTION_GRPC_RESTRICTED_PROTOCOL,
  OPTION_GRPC_ARG_MAX_CONNECTION_AGE_MS,
  OPTION_GRPC_ARG_MAX_CONNECTION_AGE_GRACE_MS,
#endif  // TRITON_ENABLE_GRPC
#if defined(TRITON_ENABLE_SAGEMAKER)
  OPTION_ALLOW_SAGEMAKER,
  OPTION_SAGEMAKER_PORT,
  OPTION_SAGEMAKER_SAFE_PORT_RANGE,
  OPTION_SAGEMAKER_THREAD_COUNT,
#endif  // TRITON_ENABLE_SAGEMAKER
#if defined(TRITON_ENABLE_VERTEX_AI)
  OPTION_ALLOW_VERTEX_AI,
  OPTION_VERTEX_AI_PORT,
  OPTION_VERTEX_AI_THREAD_COUNT,
  OPTION_VERTEX_AI_DEFAULT_MODEL,
#endif  // TRITON_ENABLE_VERTEX_AI
#ifdef TRITON_ENABLE_METRICS
  OPTION_ALLOW_METRICS,
  OPTION_ALLOW_GPU_METRICS,
  OPTION_ALLOW_CPU_METRICS,
  OPTION_METRICS_ADDRESS,
  OPTION_METRICS_PORT,
  OPTION_METRICS_INTERVAL_MS,
  OPTION_METRICS_CONFIG,
#endif  // TRITON_ENABLE_METRICS
#ifdef TRITON_ENABLE_TRACING
  OPTION_TRACE_FILEPATH,
  OPTION_TRACE_LEVEL,
  OPTION_TRACE_RATE,
  OPTION_TRACE_COUNT,
  OPTION_TRACE_LOG_FREQUENCY,
  OPTION_TRACE_CONFIG,
#endif  // TRITON_ENABLE_TRACING
  OPTION_MODEL_CONTROL_MODE,
  OPTION_POLL_REPO_SECS,
  OPTION_STARTUP_MODEL,
  OPTION_CUSTOM_MODEL_CONFIG_NAME,
  OPTION_RATE_LIMIT,
  OPTION_RATE_LIMIT_RESOURCE,
  OPTION_PINNED_MEMORY_POOL_BYTE_SIZE,
  OPTION_CUDA_MEMORY_POOL_BYTE_SIZE,
  OPTION_CUDA_VIRTUAL_ADDRESS_SIZE,
  OPTION_RESPONSE_CACHE_BYTE_SIZE,
  OPTION_CACHE_CONFIG,
  OPTION_CACHE_DIR,
  OPTION_MIN_SUPPORTED_COMPUTE_CAPABILITY,
  OPTION_EXIT_TIMEOUT_SECS,
  OPTION_BACKEND_DIR,
  OPTION_REPOAGENT_DIR,
  OPTION_BUFFER_MANAGER_THREAD_COUNT,
  OPTION_MODEL_LOAD_THREAD_COUNT,
  OPTION_MODEL_LOAD_RETRY_COUNT,
  OPTION_BACKEND_CONFIG,
  OPTION_HOST_POLICY,
  OPTION_MODEL_LOAD_GPU_LIMIT,
  OPTION_MODEL_NAMESPACING,
  OPTION_ENABLE_PEER_ACCESS
};

void
TritonParser::SetupOptions()
{
  global_options_.push_back(
      {OPTION_HELP, "help", Option::ArgNone, "Print usage"});

  server_options_.push_back(
      {OPTION_ID, "id", Option::ArgStr, "Identifier for this server."});
  server_options_.push_back(
      {OPTION_EXIT_TIMEOUT_SECS, "exit-timeout-secs", Option::ArgInt,
       "Timeout (in seconds) when exiting to wait for in-flight inferences to "
       "finish. After the timeout expires the server exits even if inferences "
       "are still in flight."});

  model_repo_options_.push_back(
      {OPTION_MODEL_REPOSITORY, "model-store", Option::ArgStr,
       "Equivalent to --model-repository."});
  model_repo_options_.push_back(
      {OPTION_MODEL_REPOSITORY, "model-repository", Option::ArgStr,
       "Path to model repository directory. It may be specified multiple times "
       "to add multiple model repositories. Note that if a model is not unique "
       "across all model repositories at any time, the model will not be "
       "available."});
  model_repo_options_.push_back(
      {OPTION_EXIT_ON_ERROR, "exit-on-error", Option::ArgBool,
       "Exit the inference server if an error occurs during initialization."});
  model_repo_options_.push_back(
      {OPTION_DISABLE_AUTO_COMPLETE_CONFIG, "disable-auto-complete-config",
       Option::ArgNone,
       "If set, disables the triton and backends from auto completing model "
       "configuration files. Model configuration files must be provided and "
       "all required "
       "configuration settings must be specified."});
  model_repo_options_.push_back(
      {OPTION_STRICT_READINESS, "strict-readiness", Option::ArgBool,
       "If true /v2/health/ready endpoint indicates ready if the server "
       "is responsive and all models are available. If false "
       "/v2/health/ready endpoint indicates ready if server is responsive "
       "even if some/all models are unavailable."});
  model_repo_options_.push_back(
      {OPTION_MODEL_CONTROL_MODE, "model-control-mode", Option::ArgStr,
       "Specify the mode for model management. Options are \"none\", \"poll\" "
       "and \"explicit\". The default is \"none\". "
       "For \"none\", the server will load all models in the model "
       "repository(s) at startup and will not make any changes to the load "
       "models after that. For \"poll\", the server will poll the model "
       "repository(s) to detect changes and will load/unload models based on "
       "those changes. The poll rate is controlled by 'repository-poll-secs'. "
       "For \"explicit\", model load and unload is initiated by using the "
       "model control APIs, and only models specified with --load-model will "
       "be loaded at startup."});
  model_repo_options_.push_back(
      {OPTION_POLL_REPO_SECS, "repository-poll-secs", Option::ArgInt,
       "Interval in seconds between each poll of the model repository to check "
       "for changes. Valid only when --model-control-mode=poll is "
       "specified."});
  model_repo_options_.push_back(
      {OPTION_STARTUP_MODEL, "load-model", Option::ArgStr,
       "Name of the model to be loaded on server startup. It may be specified "
       "multiple times to add multiple models. To load ALL models at startup, "
       "specify '*' as the model name with --load-model=* as the ONLY "
       "--load-model argument, this does not imply any pattern matching. "
       "Specifying --load-model=* in conjunction with another --load-model "
       "argument will result in error. Note that this option will only take "
       "effect if --model-control-mode=explicit is true."});
  model_repo_options_.push_back(
      {OPTION_CUSTOM_MODEL_CONFIG_NAME, "model-config-name", Option::ArgStr,
       "The custom configuration name for models to load."
       "The name should not contain any space character."
       "For example: --model-config-name=h100. "
       "If --model-config-name is not set, Triton will use the default "
       "config.pbtxt."});
  model_repo_options_.push_back(
      {OPTION_MODEL_LOAD_THREAD_COUNT, "model-load-thread-count",
       Option::ArgInt,
       "The number of threads used to concurrently load models in "
       "model repositories. Default is 4."});
  model_repo_options_.push_back(
      {OPTION_MODEL_LOAD_RETRY_COUNT, "model-load-retry-count", Option::ArgInt,
       "The number of retry to load a model in "
       "model repositories. Default is 0."});
  model_repo_options_.push_back(
      {OPTION_MODEL_NAMESPACING, "model-namespacing", Option::ArgBool,
       "Whether model namespacing is enable or not. If true, models with the "
       "same name can be served if they are in different namespace."});
  model_repo_options_.push_back(
      {OPTION_ENABLE_PEER_ACCESS, "enable-peer-access", Option::ArgBool,
       "Whether the server tries to enable peer access or not. Even when this "
       "options is set to true,  "
       "peer access could still be not enabled because the underlying system "
       "doesn't support it."
       " The server will log a warning in this case. Default is true."});

#if defined(TRITON_ENABLE_HTTP)
  http_options_.push_back(
      {OPTION_ALLOW_HTTP, "allow-http", Option::ArgBool,
       "Allow the server to listen for HTTP requests."});
  http_options_.push_back(
      {OPTION_HTTP_ADDRESS, "http-address", Option::ArgStr,
       "The address for the http server to bind to. Default is 0.0.0.0"});
  http_options_.push_back(
      {OPTION_HTTP_PORT, "http-port", Option::ArgInt,
       "The port for the server to listen on for HTTP "
       "requests. Default is 8000."});
  http_options_.push_back(
      {OPTION_REUSE_HTTP_PORT, "reuse-http-port", Option::ArgBool,
       "Allow multiple servers to listen on the same HTTP port when every "
       "server has this option set. If you plan to use this option as a way to "
       "load balance between different Triton servers, the same model "
       "repository or set of models must be used for every server."});
  http_options_.push_back(
      {OPTION_HTTP_HEADER_FORWARD_PATTERN, "http-header-forward-pattern",
       Option::ArgStr,
       "The regular expression pattern that will be used for forwarding HTTP "
       "headers as inference request parameters."});
  http_options_.push_back(
      {OPTION_HTTP_THREAD_COUNT, "http-thread-count", Option::ArgInt,
       "Number of threads handling HTTP requests."});
  http_options_.push_back(
      {OPTION_HTTP_MAX_INPUT_SIZE, "http-max-input-size", Option::ArgInt,
       ("Maximum allowed HTTP request input size in bytes. For compressed "
        "requests, this also limits the decompressed size. Default is " +
        std::to_string(HTTP_DEFAULT_MAX_INPUT_SIZE) + " bytes (64MB).")});
  http_options_.push_back(
      {OPTION_HTTP_RESTRICTED_API, "http-restricted-api",
       "<string>:<string>=<string>",
       "Specify restricted HTTP api setting. The format of this "
       "flag is --http-restricted-api=<apis>:<key>=<value>. Where "
       "<api> is a comma-separated list of apis to be restricted. "
       "<key> will be additional header key to be checked when a HTTP request "
       "is received, and <value> is the value expected to be matched."
       " Allowed APIs: " +
           Join(RESTRICTED_CATEGORY_NAMES, ", ")});
#endif  // TRITON_ENABLE_HTTP

#if defined(TRITON_ENABLE_GRPC)
  grpc_options_.push_back(
      {OPTION_ALLOW_GRPC, "allow-grpc", Option::ArgBool,
       "Allow the server to listen for GRPC requests."});
  grpc_options_.push_back(
      {OPTION_GRPC_ADDRESS, "grpc-address", Option::ArgStr,
       "The address for the grpc server to binds to. Default is 0.0.0.0"});
  grpc_options_.push_back(
      {OPTION_GRPC_PORT, "grpc-port", Option::ArgInt,
       "The port for the server to listen on for GRPC "
       "requests. Default is 8001."});
  grpc_options_.push_back(
      {OPTION_REUSE_GRPC_PORT, "reuse-grpc-port", Option::ArgBool,
       "Allow multiple servers to listen on the same GRPC port when every "
       "server has this option set. If you plan to use this option as a way to "
       "load balance between different Triton servers, the same model "
       "repository or set of models must be used for every server."});
  grpc_options_.push_back(
      {OPTION_GRPC_HEADER_FORWARD_PATTERN, "grpc-header-forward-pattern",
       Option::ArgStr,
       "The regular expression pattern that will be used for forwarding GRPC "
       "headers as inference request parameters."});
  grpc_options_.push_back(
      {OPTION_GRPC_INFER_THREAD_COUNT, "grpc-infer-thread-count",
       Option::ArgInt,
       "The number of gRPC inference handler threads. Default is 2."});
  grpc_options_.push_back(
      {OPTION_GRPC_INFER_ALLOCATION_POOL_SIZE,
       "grpc-infer-allocation-pool-size", Option::ArgInt,
       "The maximum number of states (inference request/response queues) that "
       "remain allocated for reuse. As long as the number of in-flight "
       "requests doesn't exceed this value there will be no "
       "allocation/deallocation of request/response objects."});
  grpc_options_.push_back(
      {OPTION_GRPC_MAX_RESPONSE_POOL_SIZE, "grpc-max-response-pool-size",
       Option::ArgInt,
       "The maximum number of inference response objects that can remain "
       "allocated in the response queue at any given time."});
  grpc_options_.push_back(
      {OPTION_GRPC_USE_SSL, "grpc-use-ssl", Option::ArgBool,
       "Use SSL authentication for GRPC requests. Default is false."});
  grpc_options_.push_back(
      {OPTION_GRPC_USE_SSL_MUTUAL, "grpc-use-ssl-mutual", Option::ArgBool,
       "Use mututal SSL authentication for GRPC requests. This option will "
       "preempt '--grpc-use-ssl' if it is also specified. Default is false."});
  grpc_options_.push_back(
      {OPTION_GRPC_SERVER_CERT, "grpc-server-cert", Option::ArgStr,
       "File holding PEM-encoded server certificate. Ignored unless "
       "--grpc-use-ssl is true."});
  grpc_options_.push_back(
      {OPTION_GRPC_SERVER_KEY, "grpc-server-key", Option::ArgStr,
       "File holding PEM-encoded server key. Ignored unless "
       "--grpc-use-ssl is true."});
  grpc_options_.push_back(
      {OPTION_GRPC_ROOT_CERT, "grpc-root-cert", Option::ArgStr,
       "File holding PEM-encoded root certificate. Ignore unless "
       "--grpc-use-ssl is false."});
  grpc_options_.push_back(
      {OPTION_GRPC_RESPONSE_COMPRESSION_LEVEL,
       "grpc-infer-response-compression-level", Option::ArgStr,
       "The compression level to be used while returning the infer response to "
       "the peer. Allowed values are none, low, medium and high. By default, "
       "compression level is selected as none."});
  grpc_options_.push_back(
      {OPTION_GRPC_ARG_KEEPALIVE_TIME_MS, "grpc-keepalive-time", Option::ArgInt,
       "The period (in milliseconds) after which a keepalive ping is sent on "
       "the transport. Default is 7200000 (2 hours)."});
  grpc_options_.push_back(
      {OPTION_GRPC_ARG_KEEPALIVE_TIMEOUT_MS, "grpc-keepalive-timeout",
       Option::ArgInt,
       "The period (in milliseconds) the sender of the keepalive ping waits "
       "for an acknowledgement. If it does not receive an acknowledgment "
       "within this time, it will close the connection. "
       "Default is 20000 (20 seconds)."});
  grpc_options_.push_back(
      {OPTION_GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS,
       "grpc-keepalive-permit-without-calls", Option::ArgBool,
       "Allows keepalive pings to be sent even if there are no calls in flight "
       "(0 : false; 1 : true). Default is 0 (false)."});
  grpc_options_.push_back(
      {OPTION_GRPC_ARG_HTTP2_MAX_PINGS_WITHOUT_DATA,
       "grpc-http2-max-pings-without-data", Option::ArgInt,
       "The maximum number of pings that can be sent when there is no "
       "data/header frame to be sent. gRPC Core will not continue sending "
       "pings if we run over the limit. Setting it to 0 allows sending pings "
       "without such a restriction. Default is 2."});
  grpc_options_.push_back(
      {OPTION_GRPC_ARG_HTTP2_MIN_RECV_PING_INTERVAL_WITHOUT_DATA_MS,
       "grpc-http2-min-recv-ping-interval-without-data", Option::ArgInt,
       "If there are no data/header frames being sent on the transport, this "
       "channel argument on the server side controls the minimum time "
       "(in milliseconds) that gRPC Core would expect between receiving "
       "successive pings. If the time between successive pings is less than "
       "this time, then the ping will be considered a bad ping from the peer. "
       "Such a ping counts as a ‘ping strike’. Default is 300000 (5 "
       "minutes)."});
  grpc_options_.push_back(
      {OPTION_GRPC_ARG_HTTP2_MAX_PING_STRIKES, "grpc-http2-max-ping-strikes",
       Option::ArgInt,
       "Maximum number of bad pings that the server will tolerate before "
       "sending an HTTP2 GOAWAY frame and closing the transport. Setting it to "
       "0 allows the server to accept any number of bad pings. Default is 2."});
  grpc_options_.push_back(
      {OPTION_GRPC_ARG_MAX_CONNECTION_AGE_MS, "grpc-max-connection-age",
       Option::ArgInt,
       "Maximum time that a channel may exist in milliseconds."
       "Default is undefined."});
  grpc_options_.push_back(
      {OPTION_GRPC_ARG_MAX_CONNECTION_AGE_GRACE_MS,
       "grpc-max-connection-age-grace", Option::ArgInt,
       "Grace period after the channel reaches its max age. "
       "Default is undefined."});
  grpc_options_.push_back(
      {OPTION_GRPC_RESTRICTED_PROTOCOL, "grpc-restricted-protocol",
       "<string>:<string>=<string>",
       "Specify restricted GRPC protocol setting. The format of this "
       "flag is --grpc-restricted-protocol=<protocols>:<key>=<value>. Where "
       "<protocol> is a comma-separated list of protocols to be restricted. "
       "<key> will be additional header key to be checked when a GRPC request "
       "is received, and <value> is the value expected to be matched."
       " Allowed protocols: " +
           Join(RESTRICTED_CATEGORY_NAMES, ", ")});
#endif  // TRITON_ENABLE_GRPC

#ifdef TRITON_ENABLE_LOGGING
  logging_options_.push_back(
      {OPTION_LOG_VERBOSE, "log-verbose", Option::ArgInt,
       "Set verbose logging level. Zero (0) disables verbose logging and "
       "values >= 1 enable verbose logging."});
  logging_options_.push_back(
      {OPTION_LOG_INFO, "log-info", Option::ArgBool,
       "Enable/disable info-level logging."});
  logging_options_.push_back(
      {OPTION_LOG_WARNING, "log-warning", Option::ArgBool,
       "Enable/disable warning-level logging."});
  logging_options_.push_back(
      {OPTION_LOG_ERROR, "log-error", Option::ArgBool,
       "Enable/disable error-level logging."});
  logging_options_.push_back(
      {OPTION_LOG_FORMAT, "log-format", Option::ArgStr,
       "Set the logging format. Options are \"default\" and \"ISO8601\". "
       "The default is \"default\". For \"default\", the log severity (L) and "
       "timestamp will be logged as \"LMMDD hh:mm:ss.ssssss\". "
       "For \"ISO8601\", the log format will be \"YYYY-MM-DDThh:mm:ssZ L\"."});
  logging_options_.push_back(
      {OPTION_LOG_FILE, "log-file", Option::ArgStr,
       "Set the name of the log output file. If specified, log outputs will be "
       "saved to this file. If not specified, log outputs will stream to the "
       "console."});
#endif  // TRITON_ENABLE_LOGGING

#if defined(TRITON_ENABLE_SAGEMAKER)
  sagemaker_options_.push_back(
      {OPTION_ALLOW_SAGEMAKER, "allow-sagemaker", Option::ArgBool,
       "Allow the server to listen for Sagemaker requests. Default is false."});
  sagemaker_options_.push_back(
      {OPTION_SAGEMAKER_PORT, "sagemaker-port", Option::ArgInt,
       "The port for the server to listen on for Sagemaker requests. Default "
       "is 8080."});
  sagemaker_options_.push_back(
      {OPTION_SAGEMAKER_SAFE_PORT_RANGE, "sagemaker-safe-port-range",
       "<integer>-<integer>",
       "Set the allowed port range for endpoints other than the SageMaker "
       "endpoints."});
  sagemaker_options_.push_back(
      {OPTION_SAGEMAKER_THREAD_COUNT, "sagemaker-thread-count", Option::ArgInt,
       "Number of threads handling Sagemaker requests. Default is 8."});
#endif  // TRITON_ENABLE_SAGEMAKER

#if defined(TRITON_ENABLE_VERTEX_AI)
  vertex_options_.push_back(
      {OPTION_ALLOW_VERTEX_AI, "allow-vertex-ai", Option::ArgBool,
       "Allow the server to listen for Vertex AI requests. Default is true if "
       "AIP_MODE=PREDICTION, false otherwise."});
  vertex_options_.push_back(
      {OPTION_VERTEX_AI_PORT, "vertex-ai-port", Option::ArgInt,
       "The port for the server to listen on for Vertex AI requests. Default "
       "is AIP_HTTP_PORT if set, 8080 otherwise."});
  vertex_options_.push_back(
      {OPTION_VERTEX_AI_THREAD_COUNT, "vertex-ai-thread-count", Option::ArgInt,
       "Number of threads handling Vertex AI requests. Default is 8."});
  vertex_options_.push_back(
      {OPTION_VERTEX_AI_DEFAULT_MODEL, "vertex-ai-default-model",
       Option::ArgStr,
       "The name of the model to use for single-model inference requests."});
#endif  // TRITON_ENABLE_VERTEX_AI

#if defined(TRITON_ENABLE_METRICS)
  metric_options_.push_back(
      {OPTION_ALLOW_METRICS, "allow-metrics", Option::ArgBool,
       "Allow the server to provide prometheus metrics."});
  metric_options_.push_back(
      {OPTION_ALLOW_GPU_METRICS, "allow-gpu-metrics", Option::ArgBool,
       "Allow the server to provide GPU metrics. Ignored unless "
       "--allow-metrics is true."});
  metric_options_.push_back(
      {OPTION_ALLOW_CPU_METRICS, "allow-cpu-metrics", Option::ArgBool,
       "Allow the server to provide CPU metrics. Ignored unless "
       "--allow-metrics is true."});
  metric_options_.push_back(
      {OPTION_METRICS_ADDRESS, "metrics-address", Option::ArgStr,
       "The address for the metrics server to bind to. Default is the same as "
       "--http-address if built with HTTP support. Otherwise, default is "
       "0.0.0.0"});
  metric_options_.push_back(
      {OPTION_METRICS_PORT, "metrics-port", Option::ArgInt,
       "The port reporting prometheus metrics. Default is 8002."});
  metric_options_.push_back(
      {OPTION_METRICS_INTERVAL_MS, "metrics-interval-ms", Option::ArgFloat,
       "Metrics will be collected once every <metrics-interval-ms> "
       "milliseconds. Default is 2000 milliseconds."});
  metric_options_.push_back(
      {OPTION_METRICS_CONFIG, "metrics-config", "<string>=<string>",
       "Specify a metrics-specific configuration setting. The format of this "
       "flag is --metrics-config=<setting>=<value>. It can be specified "
       "multiple times."});
#endif  // TRITON_ENABLE_METRICS

#ifdef TRITON_ENABLE_TRACING
  tracing_options_.push_back(
      {OPTION_TRACE_CONFIG, "trace-config", "<string>,<string>=<string>",
       "Specify global or trace mode specific configuration setting. "
       "The format of this flag is --trace-config "
       "<mode>,<setting>=<value>. "
       "Where <mode> is either \"triton\" or \"opentelemetry\". "
       "The default is \"triton\". To specify global trace settings "
       "(level, rate, count, or mode), the format would be "
       "--trace-config <setting>=<value>. For \"triton\" mode, the server will "
       "use "
       "Triton's Trace APIs. For \"opentelemetry\" mode, the server will use "
       "OpenTelemetry's APIs to generate, collect and export traces for "
       "individual inference requests."});
#endif  // TRITON_ENABLE_TRACING

  cache_options_.push_back(
      {OPTION_CACHE_CONFIG, "cache-config", "<string>,<string>=<string>",
       "Specify a cache-specific configuration setting. The format of this "
       "flag is --cache-config=<cache_name>,<setting>=<value>. Where "
       "<cache_name> is the name of the cache, such as 'local' or 'redis'. "
       "Example: --cache-config=local,size=1048576 will configure a 'local' "
       "cache implementation with a fixed buffer pool of size 1048576 bytes."});
  cache_options_.push_back(
      {OPTION_CACHE_DIR, "cache-directory", Option::ArgStr,
       "The global directory searched for cache shared libraries. Default is "
       "'/opt/tritonserver/caches'. This directory is expected to contain a "
       "cache implementation as a shared library with the name "
       "'libtritoncache.so'."});


  rate_limiter_options_.push_back(
      // FIXME:  fix the default to execution_count once RL logic is complete.
      {OPTION_RATE_LIMIT, "rate-limit", Option::ArgStr,
       "Specify the mode for rate limiting. Options are \"execution_count\" "
       "and \"off\". The default is \"off\". For "
       "\"execution_count\", the server will determine the instance using "
       "configured priority and the number of time the instance has been "
       "used to run inference. The inference will finally be executed once "
       "the required resources are available. For \"off\", the server will "
       "ignore any rate limiter config and run inference as soon as an "
       "instance is ready."});
  rate_limiter_options_.push_back(
      {OPTION_RATE_LIMIT_RESOURCE, "rate-limit-resource",
       "<string>:<integer>:<integer>",
       "The number of resources available to the server. The format of this "
       "flag is --rate-limit-resource=<resource_name>:<count>:<device>. The "
       "<device> is optional and if not listed will be applied to every "
       "device. If the resource is specified as \"GLOBAL\" in the model "
       "configuration the resource is considered shared among all the devices "
       "in the system. The <device> property is ignored for such resources. "
       "This flag can be specified multiple times to specify each resources "
       "and their availability. By default, the max across all instances that "
       "list the resource is selected as its availability. The values for this "
       "flag is case-insensitive."});

  memory_device_options_.push_back(
      {OPTION_PINNED_MEMORY_POOL_BYTE_SIZE, "pinned-memory-pool-byte-size",
       Option::ArgInt,
       "The total byte size that can be allocated as pinned system memory. "
       "If GPU support is enabled, the server will allocate pinned system "
       "memory to accelerate data transfer between host and devices until it "
       "exceeds the specified byte size. If 'numa-node' is configured via "
       "--host-policy, the pinned system memory of the pool size will be "
       "allocated on each numa node. This option will not affect the "
       "allocation conducted by the backend frameworks. Default is 256 MB."});
  memory_device_options_.push_back(
      {OPTION_CUDA_MEMORY_POOL_BYTE_SIZE, "cuda-memory-pool-byte-size",
       "<integer>:<integer>",
       "The total byte size that can be allocated as CUDA memory for the GPU "
       "device. If GPU support is enabled, the server will allocate CUDA "
       "memory to minimize data transfer between host and devices until it "
       "exceeds the specified byte size. This option will not affect the "
       "allocation conducted by the backend frameworks. The argument should be "
       "2 integers separated by colons in the format "
       "<GPU device ID>:<pool byte size>. This option can be used multiple "
       "times, but only once per GPU device. Subsequent uses will overwrite "
       "previous uses for the same GPU device. Default is 64 MB."});
  memory_device_options_.push_back(
      {OPTION_CUDA_VIRTUAL_ADDRESS_SIZE, "cuda-virtual-address-size",
       "<integer>:<integer>",
       "The total CUDA virtual address size that will be used for each "
       "implicit state when growable memory is used. This value determines "
       "the maximum size of each implicit state. The state size cannot go "
       "beyond this value. The argument should be "
       "2 integers separated by colons in the format "
       "<GPU device ID>:<CUDA virtual address size>. This option can be used "
       "multiple "
       "times, but only once per GPU device. Subsequent uses will overwrite "
       "previous uses for the same GPU device. Default is 1 GB."});
  memory_device_options_.push_back(
      {OPTION_MIN_SUPPORTED_COMPUTE_CAPABILITY,
       "min-supported-compute-capability", Option::ArgFloat,
       "The minimum supported CUDA compute capability. GPUs that don't support "
       "this compute capability will not be used by the server."});
  memory_device_options_.push_back(
      {OPTION_BUFFER_MANAGER_THREAD_COUNT, "buffer-manager-thread-count",
       Option::ArgInt,
       "The number of threads used to accelerate copies and other operations "
       "required to manage input and output tensor contents. Default is 0."});
  memory_device_options_.push_back(
      {OPTION_HOST_POLICY, "host-policy", "<string>,<string>=<string>",
       "Specify a host policy setting associated with a policy name. The "
       "format of this flag is --host-policy=<policy_name>,<setting>=<value>. "
       "Currently supported settings are 'numa-node', 'cpu-cores'. Note that "
       "'numa-node' setting will affect pinned memory pool behavior, see "
       "--pinned-memory-pool for more detail."});
  memory_device_options_.push_back(
      {OPTION_MODEL_LOAD_GPU_LIMIT, "model-load-gpu-limit",
       "<device_id>:<fraction>",
       "Specify the limit on GPU memory usage as a fraction. If model loading "
       "on the device is requested and the current memory usage exceeds the "
       "limit, the load will be rejected. If not specified, the limit will "
       "not be set."});

  backend_options_.push_back(
      {OPTION_BACKEND_DIR, "backend-directory", Option::ArgStr,
       "The global directory searched for backend shared libraries. Default is "
       "'/opt/tritonserver/backends'."});
  backend_options_.push_back(
      {OPTION_BACKEND_CONFIG, "backend-config", "<string>,<string>=<string>",
       "Specify a backend-specific configuration setting. The format of this "
       "flag is --backend-config=<backend_name>,<setting>=<value>. Where "
       "<backend_name> is the name of the backend, such as 'tensorrt'."});

  repo_agent_options_.push_back(
      {OPTION_REPOAGENT_DIR, "repoagent-directory", Option::ArgStr,
       "The global directory searched for repository agent shared libraries. "
       "Default is '/opt/tritonserver/repoagents'."});

  // Deprecations
  deprecated_options_.push_back(
      {OPTION_STRICT_MODEL_CONFIG, "strict-model-config", Option::ArgBool,
       "DEPRECATED: If true model configuration files must be provided and all "
       "required "
       "configuration settings must be specified. If false the model "
       "configuration may be absent or only partially specified and the "
       "server will attempt to derive the missing required configuration."});
  deprecated_options_.push_back(
      {OPTION_RESPONSE_CACHE_BYTE_SIZE, "response-cache-byte-size",
       Option::ArgInt, "DEPRECATED: Please use --cache-config instead."});
#ifdef TRITON_ENABLE_TRACING
  deprecated_options_.push_back(
      {OPTION_TRACE_FILEPATH, "trace-file", Option::ArgStr,
       "DEPRECATED: Please use --trace-config triton,file=<path/to/your/file>"
       " Set the file where trace output will be saved. If "
       "--trace-log-frequency"
       " is also specified, this argument value will be the prefix of the files"
       " to save the trace output. See --trace-log-frequency for detail."});
  deprecated_options_.push_back(
      {OPTION_TRACE_LEVEL, "trace-level", Option::ArgStr,
       "DEPRECATED: Please use --trace-config level=<OFF|TIMESTAMPS|TENSORS>"
       "Specify a trace level. OFF to disable tracing, TIMESTAMPS to "
       "trace timestamps, TENSORS to trace tensors. It may be specified "
       "multiple times to trace multiple information. Default is OFF."});
  deprecated_options_.push_back(
      {OPTION_TRACE_RATE, "trace-rate", Option::ArgInt,
       "DEPRECATED: Please use --trace-config rate=<rate value>"
       "Set the trace sampling rate. Default is 1000."});
  deprecated_options_.push_back(
      {OPTION_TRACE_COUNT, "trace-count", Option::ArgInt,
       "DEPRECATED: Please use --trace-config count=<count value>"
       "Set the number of traces to be sampled. If the value is -1, the number "
       "of traces to be sampled will not be limited. Default is -1."});
  deprecated_options_.push_back(
      {OPTION_TRACE_LOG_FREQUENCY, "trace-log-frequency", Option::ArgInt,
       "DEPRECATED: Please use --trace-config triton,log-frequency=<value>"
       "Set the trace log frequency. If the value is 0, Triton will only log "
       "the trace output to <trace-file> when shutting down. Otherwise, Triton "
       "will log the trace output to <trace-file>.<idx> when it collects the "
       "specified number of traces. For example, if the log frequency is 100, "
       "when Triton collects the 100-th trace, it logs the traces to file "
       "<trace-file>.0, and when it collects the 200-th trace, it logs the "
       "101-th to the 200-th traces to file <trace-file>.1. Default is 0."});
#endif  // TRITON_ENABLE_TRACING
}

void
TritonParser::SetupOptionGroups()
{
  SetupOptions();
  option_groups_.emplace_back(GLOBAL_OPTION_GROUP, global_options_);
  option_groups_.emplace_back("Server", server_options_);
  option_groups_.emplace_back("Logging", logging_options_);
  option_groups_.emplace_back("Model Repository", model_repo_options_);
  option_groups_.emplace_back("HTTP", http_options_);
  option_groups_.emplace_back("GRPC", grpc_options_);
  option_groups_.emplace_back("Sagemaker", sagemaker_options_);
  option_groups_.emplace_back("Vertex", vertex_options_);
  option_groups_.emplace_back("Metrics", metric_options_);
  option_groups_.emplace_back("Tracing", tracing_options_);
  option_groups_.emplace_back("Backend", backend_options_);
  option_groups_.emplace_back("Repository Agent", repo_agent_options_);
  option_groups_.emplace_back("Response Cache", cache_options_);
  option_groups_.emplace_back("Rate Limiter", rate_limiter_options_);
  option_groups_.emplace_back(
      "Memory/Device Management", memory_device_options_);
  option_groups_.emplace_back("DEPRECATED", deprecated_options_);
}

TritonParser::TritonParser()
{
  SetupOptionGroups();
}

void
TritonServerParameters::CheckPortCollision()
{
  // [FIXME] try to make this function endpoint type agnostic
  // List of enabled services and their constraints
  std::vector<
      std::tuple<std::string, std::string, int32_t, bool, int32_t, int32_t>>
      ports;
#ifdef TRITON_ENABLE_HTTP
  if (allow_http_) {
    ports.emplace_back("HTTP", http_address_, http_port_, false, -1, -1);
  }
#endif  // TRITON_ENABLE_HTTP
#ifdef TRITON_ENABLE_GRPC
  if (allow_grpc_) {
    ports.emplace_back(
        "GRPC", grpc_options_.socket_.address_, grpc_options_.socket_.port_,
        false, -1, -1);
  }
#endif  // TRITON_ENABLE_GRPC
#ifdef TRITON_ENABLE_METRICS
  if (allow_metrics_) {
    ports.emplace_back(
        "metrics", metrics_address_, metrics_port_, false, -1, -1);
  }
#endif  // TRITON_ENABLE_METRICS
#ifdef TRITON_ENABLE_SAGEMAKER
  if (allow_sagemaker_) {
    ports.emplace_back(
        "SageMaker", sagemaker_address_, sagemaker_port_,
        sagemaker_safe_range_set_, sagemaker_safe_range_.first,
        sagemaker_safe_range_.second);
  }
#endif  // TRITON_ENABLE_SAGEMAKER
#ifdef TRITON_ENABLE_VERTEX_AI
  if (allow_vertex_ai_) {
    ports.emplace_back(
        "Vertex AI", vertex_ai_address_, vertex_ai_port_, false, -1, -1);
  }
#endif  // TRITON_ENABLE_VERTEX_AI

  for (auto curr_it = ports.begin(); curr_it != ports.end(); ++curr_it) {
    // If the current service doesn't specify the allow port range for other
    // services, then we don't need to revisit the checked services
    auto comparing_it = (std::get<3>(*curr_it)) ? ports.begin() : (curr_it + 1);
    for (; comparing_it != ports.end(); ++comparing_it) {
      if (comparing_it == curr_it) {
        continue;
      }
      if (std::get<1>(*curr_it) != std::get<1>(*comparing_it)) {
        continue;
      }
      // Set range and comparing service port is out of range
      if (std::get<3>(*curr_it) &&
          ((std::get<2>(*comparing_it) < std::get<4>(*curr_it)) ||
           (std::get<2>(*comparing_it) > std::get<5>(*curr_it)))) {
        std::stringstream ss;
        ss << "The server cannot listen to " << std::get<0>(*comparing_it)
           << " requests at port " << std::get<2>(*comparing_it)
           << ", allowed port range is [" << std::get<4>(*curr_it) << ", "
           << std::get<5>(*curr_it) << "]" << std::endl;
        throw ParseException(ss.str());
      }
      if (std::get<2>(*curr_it) == std::get<2>(*comparing_it)) {
        std::stringstream ss;
        ss << "The server cannot listen to " << std::get<0>(*curr_it)
           << " requests "
           << "and " << std::get<0>(*comparing_it)
           << " requests at the same address and port " << std::get<1>(*curr_it)
           << ":" << std::get<2>(*curr_it) << std::endl;
        throw ParseException(ss.str());
      }
    }
  }
}

TritonServerParameters::ManagedTritonServerOptionPtr
TritonServerParameters::BuildTritonServerOptions()
{
  TRITONSERVER_ServerOptions* loptions = nullptr;
  THROW_IF_ERR(
      ParseException, TRITONSERVER_ServerOptionsNew(&loptions),
      "creating server options");
  ManagedTritonServerOptionPtr managed_ptr(
      loptions, TRITONSERVER_ServerOptionsDelete);
  THROW_IF_ERR(
      ParseException,
      TRITONSERVER_ServerOptionsSetServerId(loptions, server_id_.c_str()),
      "setting server ID");
  for (const auto& model_repository_path : model_repository_paths_) {
    THROW_IF_ERR(
        ParseException,
        TRITONSERVER_ServerOptionsSetModelRepositoryPath(
            loptions, model_repository_path.c_str()),
        "setting model repository path");
  }
  THROW_IF_ERR(
      ParseException,
      TRITONSERVER_ServerOptionsSetModelControlMode(loptions, control_mode_),
      "setting model control mode");
  for (const auto& model : startup_models_) {
    THROW_IF_ERR(
        ParseException,
        TRITONSERVER_ServerOptionsSetStartupModel(loptions, model.c_str()),
        "setting startup model");
  }
  THROW_IF_ERR(
      ParseException,
      TRITONSERVER_ServerOptionsSetModelConfigName(
          loptions, model_config_name_.c_str()),
      "setting custom model configuration name for models");
  THROW_IF_ERR(
      ParseException,
      TRITONSERVER_ServerOptionsSetRateLimiterMode(loptions, rate_limit_mode_),
      "setting rate limiter configuration");
  for (const auto& resource : rate_limit_resources_) {
    THROW_IF_ERR(
        ParseException,
        TRITONSERVER_ServerOptionsAddRateLimiterResource(
            loptions, std::get<0>(resource).c_str(), std::get<1>(resource),
            std::get<2>(resource)),
        "setting rate limiter resource");
  }
  THROW_IF_ERR(
      ParseException,
      TRITONSERVER_ServerOptionsSetPinnedMemoryPoolByteSize(
          loptions, pinned_memory_pool_byte_size_),
      "setting total pinned memory byte size");
  for (const auto& cuda_pool : cuda_pools_) {
    THROW_IF_ERR(
        ParseException,
        TRITONSERVER_ServerOptionsSetCudaMemoryPoolByteSize(
            loptions, cuda_pool.first, cuda_pool.second),
        "setting total CUDA memory byte size");
  }
  for (const auto& cuda_virtual_address_size : cuda_virtual_address_size_) {
    THROW_IF_ERR(
        ParseException,
        TRITONSERVER_ServerOptionsSetCudaVirtualAddressSize(
            loptions, cuda_virtual_address_size.first,
            cuda_virtual_address_size.second),
        "setting total CUDA virtual address size");
  }
  THROW_IF_ERR(
      ParseException,
      TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability(
          loptions, min_supported_compute_capability_),
      "setting minimum supported CUDA compute capability");
  THROW_IF_ERR(
      ParseException,
      TRITONSERVER_ServerOptionsSetExitOnError(loptions, exit_on_error_),
      "setting exit on error");
  THROW_IF_ERR(
      ParseException,
      TRITONSERVER_ServerOptionsSetStrictModelConfig(
          loptions, strict_model_config_),
      "setting strict model configuration");
  THROW_IF_ERR(
      ParseException,
      TRITONSERVER_ServerOptionsSetStrictReadiness(loptions, strict_readiness_),
      "setting strict readiness");
  // [FIXME] std::max seems to be part of Parse()
  THROW_IF_ERR(
      ParseException,
      TRITONSERVER_ServerOptionsSetExitTimeout(
          loptions, std::max(0, exit_timeout_secs_)),
      "setting exit timeout");
  THROW_IF_ERR(
      ParseException,
      TRITONSERVER_ServerOptionsSetBufferManagerThreadCount(
          loptions, std::max(0, buffer_manager_thread_count_)),
      "setting buffer manager thread count");
  THROW_IF_ERR(
      ParseException,
      TRITONSERVER_ServerOptionsSetModelLoadThreadCount(
          loptions, std::max(1u, model_load_thread_count_)),
      "setting model load thread count");
  THROW_IF_ERR(
      ParseException,
      TRITONSERVER_ServerOptionsSetModelLoadRetryCount(
          loptions, std::max(0u, model_load_retry_count_)),
      "setting model load retry count");
  THROW_IF_ERR(
      ParseException,
      TRITONSERVER_ServerOptionsSetModelNamespacing(
          loptions, enable_model_namespacing_),
      "setting model namespacing");
  THROW_IF_ERR(
      ParseException,
      TRITONSERVER_ServerOptionsSetEnablePeerAccess(
          loptions, enable_peer_access_),
      "setting peer access");

#ifdef TRITON_ENABLE_LOGGING
  TRITONSERVER_ServerOptionsSetLogFile(loptions, log_file_.c_str());
  THROW_IF_ERR(
      ParseException, TRITONSERVER_ServerOptionsSetLogInfo(loptions, log_info_),
      "setting log info enable");
  THROW_IF_ERR(
      ParseException, TRITONSERVER_ServerOptionsSetLogWarn(loptions, log_warn_),
      "setting log warn enable");
  THROW_IF_ERR(
      ParseException,
      TRITONSERVER_ServerOptionsSetLogError(loptions, log_error_),
      "setting log error enable");
  THROW_IF_ERR(
      ParseException,
      TRITONSERVER_ServerOptionsSetLogVerbose(loptions, log_verbose_),
      "setting log verbose level");
  switch (log_format_) {
    case triton::common::Logger::Format::kDEFAULT:
      THROW_IF_ERR(
          ParseException,
          TRITONSERVER_ServerOptionsSetLogFormat(
              loptions, TRITONSERVER_LOG_DEFAULT),
          "setting log format");
      break;
    case triton::common::Logger::Format::kISO8601:
      THROW_IF_ERR(
          ParseException,
          TRITONSERVER_ServerOptionsSetLogFormat(
              loptions, TRITONSERVER_LOG_ISO8601),
          "setting log format");
      break;
  }
#endif  // TRITON_ENABLE_LOGGING

#ifdef TRITON_ENABLE_METRICS
  THROW_IF_ERR(
      ParseException,
      TRITONSERVER_ServerOptionsSetMetrics(loptions, allow_metrics_),
      "setting metrics enable");
  THROW_IF_ERR(
      ParseException,
      TRITONSERVER_ServerOptionsSetGpuMetrics(loptions, allow_gpu_metrics_),
      "setting GPU metrics enable");
  THROW_IF_ERR(
      ParseException,
      TRITONSERVER_ServerOptionsSetCpuMetrics(loptions, allow_cpu_metrics_),
      "setting CPU metrics enable");
  THROW_IF_ERR(
      ParseException,
      TRITONSERVER_ServerOptionsSetMetricsInterval(
          loptions, metrics_interval_ms_),
      "setting metrics interval");
  for (const auto& mcs : metrics_config_settings_) {
    THROW_IF_ERR(
        ParseException,
        TRITONSERVER_ServerOptionsSetMetricsConfig(
            loptions, std::get<0>(mcs).c_str(), std::get<1>(mcs).c_str(),
            std::get<2>(mcs).c_str()),
        "setting metrics configuration");
  }

#endif  // TRITON_ENABLE_METRICS

  THROW_IF_ERR(
      ParseException,
      TRITONSERVER_ServerOptionsSetBackendDirectory(
          loptions, backend_dir_.c_str()),
      "setting backend directory");

  // Enable cache and configure it if a cache CLI arg is passed,
  // this will allow for an empty configuration.
  if (enable_cache_) {
    THROW_IF_ERR(
        ParseException,
        TRITONSERVER_ServerOptionsSetCacheDirectory(
            loptions, cache_dir_.c_str()),
        "setting cache directory");

    for (const auto& cache_pair : cache_config_settings_) {
      const auto& cache_name = cache_pair.first;
      const auto& settings = cache_pair.second;
      const auto& json_config_str = PairsToJsonStr(settings);
      THROW_IF_ERR(
          ParseException,
          TRITONSERVER_ServerOptionsSetCacheConfig(
              loptions, cache_name.c_str(), json_config_str.c_str()),
          "setting cache configuration");
    }
  }

  THROW_IF_ERR(
      ParseException,
      TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
          loptions, repoagent_dir_.c_str()),
      "setting repository agent directory");
  for (const auto& bcs : backend_config_settings_) {
    THROW_IF_ERR(
        ParseException,
        TRITONSERVER_ServerOptionsSetBackendConfig(
            loptions, std::get<0>(bcs).c_str(), std::get<1>(bcs).c_str(),
            std::get<2>(bcs).c_str()),
        "setting backend configuration");
  }
  for (const auto& limit : load_gpu_limit_) {
    THROW_IF_ERR(
        ParseException,
        TRITONSERVER_ServerOptionsSetModelLoadDeviceLimit(
            loptions, TRITONSERVER_INSTANCEGROUPKIND_GPU, limit.first,
            limit.second),
        "setting model load GPU limit");
  }
  for (const auto& hp : host_policies_) {
    THROW_IF_ERR(
        ParseException,
        TRITONSERVER_ServerOptionsSetHostPolicy(
            loptions, std::get<0>(hp).c_str(), std::get<1>(hp).c_str(),
            std::get<2>(hp).c_str()),
        "setting host policy");
  }
  return managed_ptr;
}

std::pair<TritonServerParameters, std::vector<char*>>
TritonParser::Parse(int argc, char** argv)
{
  //
  // Step 1. Before parsing setup
  //
  TritonServerParameters lparams;
  bool strict_model_config_present{false};
  bool disable_auto_complete_config{false};
  bool cache_size_present{false};
  bool cache_config_present{false};
#ifdef TRITON_ENABLE_TRACING
  bool explicit_disable_trace{false};
  bool trace_filepath_present{false};
  bool trace_level_present{false};
  bool trace_rate_present{false};
  bool trace_count_present{false};
  bool trace_log_frequency_present{false};
#endif  // TRITON_ENABLE_TRACING
  int option_index = 0;

#ifdef TRITON_ENABLE_GRPC
  triton::server::grpc::Options& lgrpc_options = lparams.grpc_options_;
#endif  // TRITON_ENABLE_GRPC

#if defined TRITON_ENABLE_HTTP || defined TRITON_ENABLE_GRPC
  // According to HTTP specification header names are case-insensitive.
  const std::string case_insensitive_prefix{"(?i)"};
#endif  // TRITON_ENABLE_HTTP || TRITON_ENABLE_GRPC

#ifdef TRITON_ENABLE_VERTEX_AI
  // Set different default value if specific flag is set
  {
    auto aip_mode =
        triton::server::GetEnvironmentVariableOrDefault("AIP_MODE", "");
    // Enable Vertex AI service and disable HTTP / GRPC service by default
    // if detecting Vertex AI environment
    if (aip_mode == "PREDICTION") {
      lparams.allow_vertex_ai_ = true;
#ifdef TRITON_ENABLE_HTTP
      lparams.allow_http_ = false;
#endif  // TRITON_ENABLE_HTTP
#ifdef TRITON_ENABLE_GRPC
      lparams.allow_grpc_ = false;
#endif  // TRITON_ENABLE_GRPC
    }
    auto port = triton::server::GetEnvironmentVariableOrDefault(
        "AIP_HTTP_PORT", "8080");
    lparams.vertex_ai_port_ = ParseOption<int>(port);
  }
#endif  // TRITON_ENABLE_VERTEX_AI

  //
  // Step 2. parse options
  //
  std::vector<struct option> long_options;
  for (const auto& group : option_groups_) {
    for (const auto& o : group.second) {
      long_options.push_back(o.GetLongOption());
    }
  }
  long_options.push_back({nullptr, 0, nullptr, 0});

  int flag;
  while ((flag = getopt_long(
              argc, argv, "", &long_options[0], &option_index)) != -1) {
    try {
      switch (flag) {
        case OPTION_HELP:
          // [FIXME] how help is printed?
        case '?':
          // [FIXME] fall through when seeing this, currently consumes all
          // options [FIXME] disable stderr output of `getopt_long`
          throw ParseException();
#ifdef TRITON_ENABLE_LOGGING
        case OPTION_LOG_VERBOSE:
          lparams.log_verbose_ = ParseIntBoolOption(optarg);
          break;
        case OPTION_LOG_INFO:
          lparams.log_info_ = ParseOption<bool>(optarg);
          break;
        case OPTION_LOG_WARNING:
          lparams.log_warn_ = ParseOption<bool>(optarg);
          break;
        case OPTION_LOG_ERROR:
          lparams.log_error_ = ParseOption<bool>(optarg);
          break;
        case OPTION_LOG_FORMAT: {
          std::string format_str(optarg);
          if (format_str == "default") {
            lparams.log_format_ = triton::common::Logger::Format::kDEFAULT;
          } else if (format_str == "ISO8601") {
            lparams.log_format_ = triton::common::Logger::Format::kISO8601;
          } else {
            throw ParseException("invalid argument for --log-format");
          }
          break;
        }
        case OPTION_LOG_FILE:
          lparams.log_file_ = optarg;
          break;
#endif  // TRITON_ENABLE_LOGGING

        case OPTION_ID:
          lparams.server_id_ = optarg;
          break;
        case OPTION_MODEL_REPOSITORY:
          lparams.model_repository_paths_.insert(optarg);
          break;
        case OPTION_EXIT_ON_ERROR:
          lparams.exit_on_error_ = ParseOption<bool>(optarg);
          break;
        case OPTION_DISABLE_AUTO_COMPLETE_CONFIG:
          disable_auto_complete_config = true;
          break;
        case OPTION_STRICT_MODEL_CONFIG:
          std::cerr << "Warning: '--strict-model-config' has been deprecated! "
                       "Please use '--disable-auto-complete-config' instead."
                    << std::endl;
          strict_model_config_present = true;
          lparams.strict_model_config_ = ParseOption<bool>(optarg);
          break;
        case OPTION_STRICT_READINESS:
          lparams.strict_readiness_ = ParseOption<bool>(optarg);
          break;

#ifdef TRITON_ENABLE_HTTP
        case OPTION_ALLOW_HTTP:
          lparams.allow_http_ = ParseOption<bool>(optarg);
          break;
        case OPTION_HTTP_PORT:
          lparams.http_port_ = ParseOption<int>(optarg);
          break;
        case OPTION_REUSE_HTTP_PORT:
          lparams.reuse_http_port_ = ParseOption<bool>(optarg);
          break;
        case OPTION_HTTP_ADDRESS:
          lparams.http_address_ = optarg;
          break;
        case OPTION_HTTP_HEADER_FORWARD_PATTERN:
          lparams.http_forward_header_pattern_ =
              std::move(case_insensitive_prefix + optarg);
          break;
        case OPTION_HTTP_THREAD_COUNT:
          lparams.http_thread_cnt_ = ParseOption<int>(optarg);
          break;
        case OPTION_HTTP_MAX_INPUT_SIZE: {
          int64_t temp_input_size = ParseOption<int64_t>(optarg);
          if (temp_input_size <= 0) {
            throw ParseException(
                "Error: --http-max-input-size must be greater than 0.");
          }
          lparams.http_max_input_size_ = temp_input_size;
          break;
        }
        case OPTION_HTTP_RESTRICTED_API:
          ParseRestrictedFeatureOption(
              optarg, long_options[option_index].name, "", "api",
              lparams.http_restricted_apis_);
          break;

#endif  // TRITON_ENABLE_HTTP

#ifdef TRITON_ENABLE_SAGEMAKER
        case OPTION_ALLOW_SAGEMAKER:
          lparams.allow_sagemaker_ = ParseOption<bool>(optarg);
          break;
        case OPTION_SAGEMAKER_PORT:
          lparams.sagemaker_port_ = ParseOption<int>(optarg);
          break;
        case OPTION_SAGEMAKER_SAFE_PORT_RANGE:
          lparams.sagemaker_safe_range_set_ = true;
          lparams.sagemaker_safe_range_ =
              ParsePairOption<int, int>(optarg, "-");
          break;
        case OPTION_SAGEMAKER_THREAD_COUNT:
          lparams.sagemaker_thread_cnt_ = ParseOption<int>(optarg);
          break;
#endif  // TRITON_ENABLE_SAGEMAKER

#ifdef TRITON_ENABLE_VERTEX_AI
        case OPTION_ALLOW_VERTEX_AI:
          lparams.allow_vertex_ai_ = ParseOption<bool>(optarg);
          break;
        case OPTION_VERTEX_AI_PORT:
          lparams.vertex_ai_port_ = ParseOption<int>(optarg);
          break;
        case OPTION_VERTEX_AI_THREAD_COUNT:
          lparams.vertex_ai_thread_cnt_ = ParseOption<int>(optarg);
          break;
        case OPTION_VERTEX_AI_DEFAULT_MODEL:
          lparams.vertex_ai_default_model_ = optarg;
          break;
#endif  // TRITON_ENABLE_VERTEX_AI

#ifdef TRITON_ENABLE_GRPC
        case OPTION_ALLOW_GRPC:
          lparams.allow_grpc_ = ParseOption<bool>(optarg);
          break;
        case OPTION_GRPC_PORT:
          lgrpc_options.socket_.port_ = ParseOption<int>(optarg);
          break;
        case OPTION_REUSE_GRPC_PORT:
          lgrpc_options.socket_.reuse_port_ = ParseOption<bool>(optarg);
          break;
        case OPTION_GRPC_ADDRESS:
          lgrpc_options.socket_.address_ = optarg;
          break;
        case OPTION_GRPC_INFER_THREAD_COUNT:
          lgrpc_options.infer_thread_count_ = ParseOption<int>(optarg);
          if (lgrpc_options.infer_thread_count_ < 2 ||
              lgrpc_options.infer_thread_count_ > 128) {
            throw ParseException(
                "invalid argument for --grpc_infer_thread_count. Must be in "
                "the range 2 to 128.");
          }
          break;
        case OPTION_GRPC_INFER_ALLOCATION_POOL_SIZE:
          lgrpc_options.infer_allocation_pool_size_ = ParseOption<int>(optarg);
          break;
        case OPTION_GRPC_MAX_RESPONSE_POOL_SIZE:
          lgrpc_options.max_response_pool_size_ = ParseOption<int>(optarg);
          if (lgrpc_options.max_response_pool_size_ <= 0) {
            throw ParseException(
                "Error: --grpc-max-response-pool-size must be greater "
                "than 0.");
          }
          break;
        case OPTION_GRPC_USE_SSL:
          lgrpc_options.ssl_.use_ssl_ = ParseOption<bool>(optarg);
          break;
        case OPTION_GRPC_USE_SSL_MUTUAL:
          lgrpc_options.ssl_.use_mutual_auth_ = ParseOption<bool>(optarg);
          lgrpc_options.ssl_.use_ssl_ = true;
          break;
        case OPTION_GRPC_SERVER_CERT:
          lgrpc_options.ssl_.server_cert_ = optarg;
          break;
        case OPTION_GRPC_SERVER_KEY:
          lgrpc_options.ssl_.server_key_ = optarg;
          break;
        case OPTION_GRPC_ROOT_CERT:
          lgrpc_options.ssl_.root_cert_ = optarg;
          break;
        case OPTION_GRPC_RESPONSE_COMPRESSION_LEVEL: {
          std::string mode_str(optarg);
          std::transform(
              mode_str.begin(), mode_str.end(), mode_str.begin(), ::tolower);
          if (mode_str == "none") {
            lgrpc_options.infer_compression_level_ = GRPC_COMPRESS_LEVEL_NONE;
          } else if (mode_str == "low") {
            lgrpc_options.infer_compression_level_ = GRPC_COMPRESS_LEVEL_LOW;
          } else if (mode_str == "medium") {
            lgrpc_options.infer_compression_level_ = GRPC_COMPRESS_LEVEL_MED;
          } else if (mode_str == "high") {
            lgrpc_options.infer_compression_level_ = GRPC_COMPRESS_LEVEL_HIGH;
          } else {
            throw ParseException(
                "invalid argument for "
                "--grpc_infer_response_compression_level");
          }
          break;
        }
        case OPTION_GRPC_ARG_KEEPALIVE_TIME_MS:
          lgrpc_options.keep_alive_.keepalive_time_ms_ =
              ParseOption<int>(optarg);
          break;
        case OPTION_GRPC_ARG_KEEPALIVE_TIMEOUT_MS:
          lgrpc_options.keep_alive_.keepalive_timeout_ms_ =
              ParseOption<int>(optarg);
          break;
        case OPTION_GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS:
          lgrpc_options.keep_alive_.keepalive_permit_without_calls_ =
              ParseOption<bool>(optarg);
          break;
        case OPTION_GRPC_ARG_HTTP2_MAX_PINGS_WITHOUT_DATA:
          lgrpc_options.keep_alive_.http2_max_pings_without_data_ =
              ParseOption<int>(optarg);
          break;
        case OPTION_GRPC_ARG_HTTP2_MIN_RECV_PING_INTERVAL_WITHOUT_DATA_MS:
          lgrpc_options.keep_alive_
              .http2_min_recv_ping_interval_without_data_ms_ =
              ParseOption<int>(optarg);
          break;
        case OPTION_GRPC_ARG_HTTP2_MAX_PING_STRIKES:
          lgrpc_options.keep_alive_.http2_max_ping_strikes_ =
              ParseOption<int>(optarg);
          break;
        case OPTION_GRPC_ARG_MAX_CONNECTION_AGE_MS:
          lgrpc_options.keep_alive_.max_connection_age_ms_ =
              ParseOption<int>(optarg);
          break;
        case OPTION_GRPC_ARG_MAX_CONNECTION_AGE_GRACE_MS:
          lgrpc_options.keep_alive_.max_connection_age_grace_ms_ =
              ParseOption<int>(optarg);
          break;
        case OPTION_GRPC_RESTRICTED_PROTOCOL: {
          ParseRestrictedFeatureOption(
              optarg, long_options[option_index].name,
              std::string(
                  triton::server::grpc::kRestrictedProtocolHeaderTemplate),
              "protocol", lgrpc_options.restricted_protocols_);
          break;
        }
        case OPTION_GRPC_HEADER_FORWARD_PATTERN:
          lgrpc_options.forward_header_pattern_ =
              std::move(case_insensitive_prefix + optarg);
          break;
#endif  // TRITON_ENABLE_GRPC

#ifdef TRITON_ENABLE_METRICS
        case OPTION_ALLOW_METRICS:
          lparams.allow_metrics_ = ParseOption<bool>(optarg);
          break;
        case OPTION_ALLOW_GPU_METRICS:
          lparams.allow_gpu_metrics_ = ParseOption<bool>(optarg);
          break;
        case OPTION_ALLOW_CPU_METRICS:
          lparams.allow_cpu_metrics_ = ParseOption<bool>(optarg);
          break;
        case OPTION_METRICS_ADDRESS:
          lparams.metrics_address_ = optarg;
          break;
        case OPTION_METRICS_PORT:
          lparams.metrics_port_ = ParseOption<int>(optarg);
          break;
        case OPTION_METRICS_INTERVAL_MS:
          lparams.metrics_interval_ms_ = ParseOption<int>(optarg);
          break;
        case OPTION_METRICS_CONFIG:
          lparams.metrics_config_settings_.push_back(
              ParseMetricsConfigOption(optarg));
          break;
#endif  // TRITON_ENABLE_METRICS

#ifdef TRITON_ENABLE_TRACING
        case OPTION_TRACE_FILEPATH: {
          std::cerr << "Warning: '--trace-file' has been deprecated and will be"
                       " removed in future releases. Please use "
                       "'--trace-config triton,file=<filepath> instead."
                    << std::endl;
          trace_filepath_present = true;
          lparams.trace_filepath_ = optarg;
          break;
        }
        case OPTION_TRACE_LEVEL: {
          std::cerr
              << "Warning: '--trace-level' has been deprecated and will be"
                 " removed in future releases. Please use "
                 "'--trace-config level=<OFF|TIMESTAMPS|TENSORS> instead."
              << std::endl;
          trace_level_present = true;
          auto parsed_level = ParseTraceLevelOption(optarg);
          explicit_disable_trace |=
              (parsed_level == TRITONSERVER_TRACE_LEVEL_DISABLED);
          lparams.trace_level_ = static_cast<TRITONSERVER_InferenceTraceLevel>(
              lparams.trace_level_ | parsed_level);
          break;
        }
        case OPTION_TRACE_RATE:
          std::cerr << "Warning: '--trace-rate' has been deprecated and will be"
                       " removed in future releases. Please use "
                       "'--trace-config rate=<rate value> instead."
                    << std::endl;
          trace_rate_present = true;
          lparams.trace_rate_ = ParseOption<int>(optarg);
          break;

        case OPTION_TRACE_COUNT:
          std::cerr
              << "Warning: '--trace-count' has been deprecated and will be"
                 " removed in future releases. Please use "
                 "'--trace-config count=<count value> instead."
              << std::endl;
          trace_count_present = true;
          lparams.trace_count_ = ParseOption<int>(optarg);
          break;
        case OPTION_TRACE_LOG_FREQUENCY:
          std::cerr
              << "Warning: '--trace-log-frequency' has been deprecated and "
                 "will be"
                 " removed in future releases. Please use "
                 "'--trace-config triton,log-frequency=<log frequency "
                 "value> instead."
              << std::endl;
          trace_log_frequency_present = true;
          lparams.trace_log_frequency_ = ParseOption<int>(optarg);
          break;
        case OPTION_TRACE_CONFIG: {
          auto trace_config_setting = ParseTraceConfigOption(optarg);
          triton::server::TraceConfig& tc =
              lparams
                  .trace_config_map_[std::get<0>(trace_config_setting).c_str()];
          tc.push_back(std::make_pair(
              std::get<1>(trace_config_setting).c_str(),
              std::get<2>(trace_config_setting).c_str()));
          break;
        }
#endif  // TRITON_ENABLE_TRACING

        case OPTION_POLL_REPO_SECS:
          lparams.repository_poll_secs_ = ParseOption<int>(optarg);
          break;
        case OPTION_STARTUP_MODEL:
          lparams.startup_models_.insert(optarg);
          break;
        case OPTION_CUSTOM_MODEL_CONFIG_NAME:
          if (std::strlen(optarg) == 0) {
            throw ParseException(
                "Error: empty argument for --model-config-name");
          }
          lparams.model_config_name_ = optarg;
          break;
        case OPTION_MODEL_CONTROL_MODE: {
          std::string mode_str(optarg);
          std::transform(
              mode_str.begin(), mode_str.end(), mode_str.begin(), ::tolower);
          if (mode_str == "none") {
            lparams.control_mode_ = TRITONSERVER_MODEL_CONTROL_NONE;
          } else if (mode_str == "poll") {
            lparams.control_mode_ = TRITONSERVER_MODEL_CONTROL_POLL;
          } else if (mode_str == "explicit") {
            lparams.control_mode_ = TRITONSERVER_MODEL_CONTROL_EXPLICIT;
          } else {
            throw ParseException("invalid argument for --model-control-mode");
          }
          break;
        }
        case OPTION_RATE_LIMIT: {
          std::string rate_limit_str(optarg);
          std::transform(
              rate_limit_str.begin(), rate_limit_str.end(),
              rate_limit_str.begin(), ::tolower);
          if (rate_limit_str == "execution_count") {
            lparams.rate_limit_mode_ = TRITONSERVER_RATE_LIMIT_EXEC_COUNT;
          } else if (rate_limit_str == "off") {
            lparams.rate_limit_mode_ = TRITONSERVER_RATE_LIMIT_OFF;
          } else {
            throw ParseException("invalid argument for --rate-limit");
          }
          break;
        }
        case OPTION_RATE_LIMIT_RESOURCE: {
          std::string rate_limit_resource_str(optarg);
          std::transform(
              rate_limit_resource_str.begin(), rate_limit_resource_str.end(),
              rate_limit_resource_str.begin(), ::tolower);
          lparams.rate_limit_resources_.push_back(
              ParseRateLimiterResourceOption(optarg));
          break;
        }
        case OPTION_PINNED_MEMORY_POOL_BYTE_SIZE:
          lparams.pinned_memory_pool_byte_size_ = ParseOption<int64_t>(optarg);
          break;
        case OPTION_CUDA_MEMORY_POOL_BYTE_SIZE:
          lparams.cuda_pools_.push_back(
              ParsePairOption<int, uint64_t>(optarg, ":"));
          break;
        case OPTION_CUDA_VIRTUAL_ADDRESS_SIZE:
          lparams.cuda_virtual_address_size_.push_back(
              ParsePairOption<int, size_t>(optarg, ":"));
          break;
        case OPTION_RESPONSE_CACHE_BYTE_SIZE: {
          cache_size_present = true;
          const auto byte_size = std::to_string(ParseOption<int64_t>(optarg));
          lparams.cache_config_settings_["local"] = {{"size", byte_size}};
          std::cerr
              << "Warning: '--response-cache-byte-size' has been deprecated! "
                 "This will default to the 'local' cache implementation with "
                 "the provided byte size for its config. Please use "
                 "'--cache-config' instead. The equivalent "
                 "--cache-config CLI args would be: "
                 "'--cache-config=local,size=" +
                     byte_size + "'"
              << std::endl;
          break;
        }
        case OPTION_CACHE_CONFIG: {
          cache_config_present = true;
          const auto cache_setting = ParseCacheConfigOption(optarg);
          const auto& cache_name = std::get<0>(cache_setting);
          const auto& key = std::get<1>(cache_setting);
          const auto& value = std::get<2>(cache_setting);
          lparams.cache_config_settings_[cache_name].push_back({key, value});
          break;
        }
        case OPTION_CACHE_DIR:
          lparams.cache_dir_ = optarg;
          break;
        case OPTION_MIN_SUPPORTED_COMPUTE_CAPABILITY:
          lparams.min_supported_compute_capability_ =
              ParseOption<double>(optarg);
          break;
        case OPTION_EXIT_TIMEOUT_SECS:
          lparams.exit_timeout_secs_ = ParseOption<int>(optarg);
          break;
        case OPTION_BACKEND_DIR:
          lparams.backend_dir_ = optarg;
          break;
        case OPTION_REPOAGENT_DIR:
          lparams.repoagent_dir_ = optarg;
          break;
        case OPTION_BUFFER_MANAGER_THREAD_COUNT:
          lparams.buffer_manager_thread_count_ = ParseOption<int>(optarg);
          break;
        case OPTION_MODEL_LOAD_THREAD_COUNT:
          lparams.model_load_thread_count_ = ParseOption<int>(optarg);
          break;
        case OPTION_MODEL_LOAD_RETRY_COUNT:
          lparams.model_load_retry_count_ = ParseOption<int>(optarg);
          break;
        case OPTION_BACKEND_CONFIG:
          lparams.backend_config_settings_.push_back(
              ParseBackendConfigOption(optarg));
          break;
        case OPTION_HOST_POLICY:
          lparams.host_policies_.push_back(ParseHostPolicyOption(optarg));
          break;
        case OPTION_MODEL_LOAD_GPU_LIMIT:
          lparams.load_gpu_limit_.emplace(
              ParsePairOption<int, double>(optarg, ":"));
          break;
        case OPTION_MODEL_NAMESPACING:
          lparams.enable_model_namespacing_ = ParseOption<bool>(optarg);
          break;
        case OPTION_ENABLE_PEER_ACCESS:
          lparams.enable_peer_access_ = ParseOption<bool>(optarg);
          break;
      }
    }
    catch (const ParseException& pe) {
      if ((pe.what() != NULL) && (strlen(pe.what()) != 0)) {
        std::stringstream ss;
        ss << "Bad option: \"--" << long_options[option_index].name << "\".\n"
           << pe.what() << std::endl;
        throw ParseException(ss.str());
      } else {
        // In case of `Unrecognized option` or `Help` option, just throw a
        // ParseException
        throw ParseException();
      }
    }
  }

  if (optind < argc) {
    throw ParseException(std::string("Unexpected argument: ") + argv[optind]);
  }

  //
  // Step 3. Post parsing validation, usually for options that depend on the
  // others which are not determined until after parsing.
  //

  if (lparams.control_mode_ != TRITONSERVER_MODEL_CONTROL_POLL) {
    lparams.repository_poll_secs_ = 0;
  }

  if (lparams.startup_models_.size() > 0 &&
      lparams.control_mode_ != TRITONSERVER_MODEL_CONTROL_EXPLICIT) {
    throw ParseException(
        "Error: Use of '--load-model' requires setting "
        "'--model-control-mode=explicit' as well.");
  }


#ifdef TRITON_ENABLE_VERTEX_AI
  // Set default model repository if specific flag is set, postpone the
  // check to after parsing so we only monitor the default repository if
  // Vertex service is allowed
  if (lparams.model_repository_paths_.empty()) {
    auto aip_storage_uri =
        triton::server::GetEnvironmentVariableOrDefault("AIP_STORAGE_URI", "");
    if (!aip_storage_uri.empty()) {
      lparams.model_repository_paths_.insert(aip_storage_uri);
    }
  }
#endif  // TRITON_ENABLE_VERTEX_AI

#ifdef TRITON_ENABLE_METRICS
  lparams.allow_gpu_metrics_ &= lparams.allow_metrics_;
  lparams.allow_cpu_metrics_ &= lparams.allow_metrics_;
  // Set metrics_address to default if never specified
  if (lparams.metrics_address_.empty()) {
#ifdef TRITON_ENABLE_HTTP
    // If built with HTTP support, default to HTTP address
    lparams.metrics_address_ = lparams.http_address_;
#else
    // Otherwise have default for builds without HTTP support
    lparams.metrics_address_ = "0.0.0.0";
#endif  // TRITON_ENABLE_HTTP
  }
#endif  // TRITON_ENABLE_METRICS

#ifdef TRITON_ENABLE_TRACING
  PostProcessTraceArgs(
      lparams, trace_level_present, trace_rate_present, trace_count_present,
      trace_filepath_present, trace_log_frequency_present,
      explicit_disable_trace);
#endif  // TRITON_ENABLE_TRACING

  // Check if there is a conflict between --disable-auto-complete-config
  // and --strict-model-config
  if (disable_auto_complete_config) {
    if (strict_model_config_present && !lparams.strict_model_config_) {
      std::cerr
          << "Warning: Overriding deprecated '--strict-model-config' from "
             "False to True in favor of '--disable-auto-complete-config'!"
          << std::endl;
    }
    lparams.strict_model_config_ = true;
  }

  // Check if there is a conflict between --response-cache-byte-size
  // and --cache-config
  if (cache_size_present && cache_config_present) {
    throw ParseException(
        "Error: Incompatible flags --response-cache-byte-size and "
        "--cache-config both provided. Please provide one or the other.");
  }
  lparams.enable_cache_ = (cache_size_present || cache_config_present);
  return {lparams, {}};
}

std::string
TritonParser::FormatUsageMessage(std::string str, int offset)
{
  int width = 60;
  int current_pos = offset;
  while (current_pos + width < int(str.length())) {
    int n = str.rfind(' ', current_pos + width);
    if (n != int(std::string::npos)) {
      str.replace(n, 1, "\n\t");
      current_pos += (width + 9);
    }
  }

  return str;
}

std::string
TritonParser::Usage()
{
  std::stringstream ss;
  for (const auto& group : option_groups_) {
    if (!group.first.empty() && !group.second.empty()) {
      ss << std::endl << group.first << ":" << std::endl;
    }

    for (const auto& o : group.second) {
      if (!o.arg_desc_.empty()) {
        ss << "  --" << o.flag_ << " <" << o.arg_desc_ << ">" << std::endl
           << "\t" << FormatUsageMessage(o.desc_, 0) << std::endl;
      } else {
        ss << "  --" << o.flag_ << std::endl
           << "\t" << FormatUsageMessage(o.desc_, 0) << std::endl;
      }
    }
  }
  return ss.str();
}

std::tuple<std::string, std::string, std::string>
TritonParser::ParseMetricsConfigOption(const std::string& arg)
{
  // Format is "<setting>=<value>" for generic configs/settings
  int delim_setting = arg.find("=");
  if (delim_setting < 0) {
    std::stringstream ss;
    ss << "--metrics-config option format is "
       << "<setting>=<value>. Got " << arg << std::endl;
    throw ParseException(ss.str());
  }

  // Break section before "=" into substr to avoid matching commas
  // in setting values.
  auto name_substr = arg.substr(0, delim_setting);
  int delim_name = name_substr.find(",");

  // No name-specific configs currently supported, though it may be in
  // the future. Map global configs to empty string like other configs for
  // now.
  std::string name_string = std::string();
  if (delim_name >= 0) {
    std::stringstream ss;
    ss << "--metrics-config option format is "
       << "<setting>=<value>. Got " << arg << std::endl;
    throw ParseException(ss.str());
  }  // else global metrics config

  std::string setting_string =
      arg.substr(delim_name + 1, delim_setting - delim_name - 1);
  std::string value_string = arg.substr(delim_setting + 1);

  if (setting_string.empty() || value_string.empty()) {
    std::stringstream ss;
    ss << "--metrics-config option format is "
       << "<setting>=<value>. Got " << arg << std::endl;
    throw ParseException(ss.str());
  }

  return {name_string, setting_string, value_string};
}

std::tuple<std::string, std::string, std::string>
TritonParser::ParseCacheConfigOption(const std::string& arg)
{
  // Format is "<cache_name>,<setting>=<value>" for specific
  // config/settings and "<setting>=<value>" for cache agnostic
  // configs/settings
  int delim_name = arg.find(",");
  int delim_setting = arg.find("=", delim_name + 1);

  std::string name_string = std::string();
  if (delim_name > 0) {
    name_string = arg.substr(0, delim_name);
  }
  // No cache-agnostic global settings are currently supported
  else {
    std::stringstream ss;
    ss << "No cache specified. --cache-config option format is "
       << "<cache name>,<setting>=<value>. Got " << arg << std::endl;
    throw ParseException(ss.str());
  }

  if (delim_setting < 0) {
    std::stringstream ss;
    ss << "--cache-config option format is '<cache "
          "name>,<setting>=<value>'. Got "
       << arg << std::endl;
    throw ParseException(ss.str());
  }
  std::string setting_string =
      arg.substr(delim_name + 1, delim_setting - delim_name - 1);
  std::string value_string = arg.substr(delim_setting + 1);

  if (setting_string.empty() || value_string.empty()) {
    std::stringstream ss;
    ss << "--cache-config option format is '<cache "
          "name>,<setting>=<value>'. Got "
       << arg << std::endl;
    throw ParseException(ss.str());
  }

  return {name_string, setting_string, value_string};
}

std::tuple<std::string, int, int>
TritonParser::ParseRateLimiterResourceOption(const std::string& arg)
{
  std::string error_string(
      "--rate-limit-resource option format is "
      "'<resource_name>:<count>:<device>' or '<resource_name>:<count>'. "
      "Got " +
      arg);

  std::string name_string("");
  int count = -1;
  int device_id = -1;

  size_t delim_first = arg.find(":");
  size_t delim_second = arg.find(":", delim_first + 1);

  if (delim_second != std::string::npos) {
    // Handle format `<resource_name>:<count>:<device>'
    size_t delim_third = arg.find(":", delim_second + 1);
    if (delim_third != std::string::npos) {
      throw ParseException(error_string);
    }
    name_string = arg.substr(0, delim_first);
    count = ParseOption<int>(
        arg.substr(delim_first + 1, delim_second - delim_first - 1));
    device_id = ParseOption<int>(arg.substr(delim_second + 1));
  } else if (delim_first != std::string::npos) {
    // Handle format `<resource_name>:<count>'
    name_string = arg.substr(0, delim_first);
    count = ParseOption<int>(arg.substr(delim_first + 1));
  } else {
    // If no colons found
    throw ParseException(error_string);
  }

  return {name_string, count, device_id};
}

std::tuple<std::string, std::string, std::string>
TritonParser::ParseBackendConfigOption(const std::string& arg)
{
  // Format is "<backend_name>,<setting>=<value>" for specific
  // config/settings and "<setting>=<value>" for backend agnostic
  // configs/settings
  int delim_name = arg.find(",");
  int delim_setting = arg.find("=", delim_name + 1);

  std::string name_string = std::string();
  if (delim_name > 0) {
    name_string = arg.substr(0, delim_name);
  } else if (delim_name == 0) {
    std::stringstream ss;
    ss << "No backend specified. --backend-config option format is "
       << "<backend name>,<setting>=<value> or "
       << "<setting>=<value>. Got " << arg << std::endl;
    throw ParseException(ss.str());
  }  // else global backend config

  if (delim_setting < 0) {
    std::stringstream ss;
    ss << "--backend-config option format is '<backend "
          "name>,<setting>=<value>'. Got "
       << arg << std::endl;
    throw ParseException(ss.str());
  }
  std::string setting_string =
      arg.substr(delim_name + 1, delim_setting - delim_name - 1);
  std::string value_string = arg.substr(delim_setting + 1);

  if (setting_string.empty() || value_string.empty()) {
    std::stringstream ss;
    ss << "--backend-config option format is '<backend "
          "name>,<setting>=<value>'. Got "
       << arg << std::endl;
    throw ParseException(ss.str());
  }

  return {name_string, setting_string, value_string};
}

void
TritonParser::ParseRestrictedFeatureOption(
    const std::string& arg, const std::string& option_name,
    const std::string& key_prefix, const std::string& feature_type,
    RestrictedFeatures& restricted_features)
{
  const auto& parsed_tuple =
      ParseGenericConfigOption(arg, ":", "=", option_name, "config name");

  const auto& features = SplitOptions(std::get<0>(parsed_tuple), ",");
  const auto& key = std::get<1>(parsed_tuple);
  const auto& value = std::get<2>(parsed_tuple);

  for (const auto& feature : features) {
    const auto& category = RestrictedFeatures::ToCategory(feature);

    if (category == RestrictedCategory::INVALID) {
      std::stringstream ss;
      ss << "unknown restricted " << feature_type << " '" << feature << "' "
         << std::endl;
      throw ParseException(ss.str());
    }

    if (restricted_features.IsRestricted(category)) {
      // restricted feature can only be in one group
      std::stringstream ss;
      ss << "restricted " << feature_type << " '" << feature
         << "' can not be specified in multiple config groups" << std::endl;
      throw ParseException(ss.str());
    }
    restricted_features.Insert(
        category, std::make_pair(key_prefix + key, value));
  }
}

std::tuple<std::string, std::string, std::string>
TritonParser::ParseHostPolicyOption(const std::string& arg)
{
  return ParseGenericConfigOption(arg, ",", "=", "host-policy", "policy name");
}

std::tuple<std::string, std::string, std::string>
TritonParser::ParseGenericConfigOption(
    const std::string& arg, const std::string& first_delim,
    const std::string& second_delim, const std::string& option_name,
    const std::string& config_name)
{
  // Format is "<string>,<string>=<string>"
  int delim_name = arg.find(first_delim);
  int delim_setting = arg.find(second_delim, delim_name + 1);

  std::string error_string = "--" + option_name + " option format is '<" +
                             config_name + ">" + first_delim + "<setting>" +
                             second_delim + "<value>'. Got " + arg + "\n";

  // Check for 2 semicolons
  if ((delim_name < 0) || (delim_setting < 0)) {
    throw ParseException(error_string);
  }

  std::string name_string = arg.substr(0, delim_name);
  std::string setting_string =
      arg.substr(delim_name + 1, delim_setting - delim_name - 1);
  std::string value_string = arg.substr(delim_setting + 1);

  if (name_string.empty() || setting_string.empty() || value_string.empty()) {
    throw ParseException(error_string);
  }

  return {name_string, setting_string, value_string};
}

#ifdef TRITON_ENABLE_TRACING
TRITONSERVER_InferenceTraceLevel
TritonParser::ParseTraceLevelOption(std::string arg)
{
  std::transform(arg.begin(), arg.end(), arg.begin(), [](unsigned char c) {
    return std::tolower(c);
  });

  if ((arg == "false") || (arg == "off")) {
    return TRITONSERVER_TRACE_LEVEL_DISABLED;
  }
  if ((arg == "true") || (arg == "on") || (arg == "min") || (arg == "max") ||
      (arg == "timestamps")) {
    return TRITONSERVER_TRACE_LEVEL_TIMESTAMPS;
  }
  if (arg == "tensors") {
    return TRITONSERVER_TRACE_LEVEL_TENSORS;
  }

  throw ParseException("invalid value for trace level option: " + arg);
}

InferenceTraceMode
TritonParser::ParseTraceModeOption(std::string arg)
{
  std::transform(arg.begin(), arg.end(), arg.begin(), [](unsigned char c) {
    return std::tolower(c);
  });

  if (arg == "triton") {
    return TRACE_MODE_TRITON;
  }
  if (arg == "opentelemetry") {
    return TRACE_MODE_OPENTELEMETRY;
  }

  throw ParseException(
      "invalid value for trace mode option: " + arg +
      ". Available options are \"triton\" and \"opentelemetry\"");
}

std::tuple<std::string, std::string, std::string>
TritonParser::ParseTraceConfigOption(const std::string& arg)
{
  int delim_name = arg.find(",");
  int delim_setting = arg.find("=", delim_name + 1);

  std::string name_string = std::string();
  if (delim_name > 0) {
    name_string =
        std::to_string(ParseTraceModeOption(arg.substr(0, delim_name)));
  } else if (delim_name == 0) {
    std::stringstream ss;
    ss << "No trace mode specified. --trace-config option format is "
       << "<trace mode>,<setting>=<value> or "
       << "<setting>=<value>. Got " << arg << std::endl;
    throw ParseException(ss.str());
  }  // else global trace config

  if (delim_setting < 0) {
    std::stringstream ss;
    ss << "--trace-config option format is '<trace mode>,<setting>=<value>'. "
          "Got "
       << arg << std::endl;
    throw ParseException(ss.str());
  }
  std::string setting_string =
      arg.substr(delim_name + 1, delim_setting - delim_name - 1);
  std::string value_string = arg.substr(delim_setting + 1);

  if (setting_string.empty() || value_string.empty()) {
    std::stringstream ss;
    ss << "--trace-config option format is '<trace mode>,<setting>=<value>'. "
          "Got "
       << arg << std::endl;
    throw ParseException(ss.str());
  }

  return {name_string, setting_string, value_string};
}

void
TritonParser::SetGlobalTraceArgs(
    TritonServerParameters& lparams, bool trace_level_present,
    bool trace_rate_present, bool trace_count_present,
    bool explicit_disable_trace)
{
  for (const auto& [setting, value_variant] : lparams.trace_config_map_[""]) {
    auto value = std::get<std::string>(value_variant);
    try {
      if (setting == "rate") {
        if (trace_rate_present) {
          std::cerr << "Warning: Overriding deprecated '--trace-rate' "
                       "in favor of provided rate value in --trace-config!"
                    << std::endl;
        }
        lparams.trace_rate_ = ParseOption<int>(value);
      }
      if (setting == "level") {
        if (trace_level_present) {
          std::cerr << "Warning: Overriding deprecated '--trace-level' "
                       "in favor of provided level in --trace-config!"
                    << std::endl;
        }
        auto parsed_level_config = ParseTraceLevelOption(value);
        explicit_disable_trace |=
            (parsed_level_config == TRITONSERVER_TRACE_LEVEL_DISABLED);
        lparams.trace_level_ = static_cast<TRITONSERVER_InferenceTraceLevel>(
            lparams.trace_level_ | parsed_level_config);
      }
      if (setting == "mode") {
        lparams.trace_mode_ = ParseTraceModeOption(value);
      }
      if (setting == "count") {
        if (trace_count_present) {
          std::cerr << "Warning: Overriding deprecated '--trace-count' "
                       "in favor of provided count in --trace-config!"
                    << std::endl;
        }
        lparams.trace_count_ = ParseOption<int>(value);
      }
    }
    catch (const ParseException& pe) {
      std::stringstream ss;
      ss << "Bad option: \"--trace-config " << setting << "\".\n"
         << pe.what() << std::endl;
      throw ParseException(ss.str());
    }
  }
}

void
TritonParser::SetTritonTraceArgs(
    TritonServerParameters& lparams, bool trace_filepath_present,
    bool trace_log_frequency_present)
{
  for (const auto& [setting, value_variant] :
       lparams.trace_config_map_[std::to_string(TRACE_MODE_TRITON)]) {
    auto value = std::get<std::string>(value_variant);
    try {
      if (setting == "file") {
        if (trace_filepath_present) {
          std::cerr << "Warning: Overriding deprecated '--trace-file' "
                       "in favor of provided file in --trace-config!"
                    << std::endl;
        }
        lparams.trace_filepath_ = value;
      } else if (setting == "log-frequency") {
        if (trace_log_frequency_present) {
          std::cerr << "Warning: Overriding deprecated '--trace-log-frequency' "
                       "in favor of provided log-frequency in --trace-config!"
                    << std::endl;
        }
        lparams.trace_log_frequency_ = ParseOption<int>(value);
      }
    }
    catch (const ParseException& pe) {
      std::stringstream ss;
      ss << "Bad option: \"--trace-config triton," << setting << "\".\n"
         << pe.what() << std::endl;
      throw ParseException(ss.str());
    }
  }
}

void
TritonParser::SetOpenTelemetryTraceArgs(
    TritonServerParameters& lparams, bool trace_filepath_present,
    bool trace_log_frequency_present)
{
  if (trace_filepath_present) {
    std::cerr << "Warning: '--trace-file' is deprecated and will "
                 "be ignored with opentelemetry tracing mode. "
              << std::endl;
  }
  if (trace_log_frequency_present) {
    std::cerr << "Warning: '--trace-log-frequency' is deprecated "
                 "and will be ignored with opentelemetry tracing mode."
              << std::endl;
  }
  triton::server::TraceConfig& otel_trace_settings =
      lparams.trace_config_map_[std::to_string(TRACE_MODE_OPENTELEMETRY)];
  ProcessOpenTelemetryBatchSpanProcessorArgs(otel_trace_settings);
}

void
TritonParser::ProcessOpenTelemetryBatchSpanProcessorArgs(
    TraceConfig& otel_trace_settings)
{
  std::unordered_map<std::string, std::string> otel_bsp_default_settings = {};
  // Set up default BatchSpanProcessor parameters, or use
  // parameters, specified by environment variables
  auto env_bsp_max_queue_size = triton::server::GetEnvironmentVariableOrDefault(
      "OTEL_BSP_MAX_QUEUE_SIZE", "2048");
  otel_bsp_default_settings.insert(std::make_pair(
      std::string("bsp_max_queue_size"), env_bsp_max_queue_size));
  auto env_bsp_schedule_delay = triton::server::GetEnvironmentVariableOrDefault(
      "OTEL_BSP_SCHEDULE_DELAY", "5000");
  otel_bsp_default_settings.insert(std::make_pair(
      std::string("bsp_schedule_delay"), env_bsp_schedule_delay));
  auto env_bsp_max_export_batch_size =
      triton::server::GetEnvironmentVariableOrDefault(
          "OTEL_BSP_MAX_EXPORT_BATCH_SIZE", "512");
  otel_bsp_default_settings.insert(std::make_pair(
      std::string("bsp_max_export_batch_size"), env_bsp_max_export_batch_size));

  // Process cmd args and convert string arguments to integers.
  // Throw a ParseException for invalid arguments
  for (auto& [setting, value_variant] : otel_trace_settings) {
    try {
      auto value = std::get<std::string>(value_variant);
      if (setting == "bsp_max_queue_size") {
        value_variant = ParseOption<uint32_t>(value);
        otel_bsp_default_settings.erase("bsp_max_queue_size");
      } else if (setting == "bsp_schedule_delay") {
        value_variant = ParseOption<uint32_t>(value);
        otel_bsp_default_settings.erase("bsp_schedule_delay");
      } else if (setting == "bsp_max_export_batch_size") {
        value_variant = ParseOption<uint32_t>(value);
        otel_bsp_default_settings.erase("bsp_max_export_batch_size");
      }
    }
    catch (const ParseException& pe) {
      std::stringstream ss;
      ss << "Bad option: \"--trace-config opentelemetry," << setting << "\".\n"
         << pe.what() << std::endl;
      throw ParseException(ss.str());
    }
  }
  // If not all BSP settings were provided through cmd,
  // populate OpenTelemetry's trace settings with the default value.
  if (!otel_bsp_default_settings.empty()) {
    for (const auto& [setting, value] : otel_bsp_default_settings) {
      try {
        otel_trace_settings.push_back(
            std::make_pair(setting, ParseOption<uint32_t>(value)));
      }
      catch (const ParseException& pe) {
        std::stringstream ss;
        ss << "Bad option: \"OTEL_";
        for (auto& ch : setting) {
          ss << static_cast<char>(std::toupper(ch));
        }
        ss << "\".\n" << pe.what() << std::endl;
        throw ParseException(ss.str());
      }
    }
  }
}

void
TritonParser::PostProcessTraceArgs(
    TritonServerParameters& lparams, bool trace_level_present,
    bool trace_rate_present, bool trace_count_present,
    bool trace_filepath_present, bool trace_log_frequency_present,
    bool explicit_disable_trace)
{
  SetGlobalTraceArgs(
      lparams, trace_level_present, trace_rate_present, trace_count_present,
      explicit_disable_trace);

  if (lparams.trace_mode_ == TRACE_MODE_OPENTELEMETRY) {
    SetOpenTelemetryTraceArgs(
        lparams, trace_filepath_present, trace_log_frequency_present);
  } else if (lparams.trace_mode_ == TRACE_MODE_TRITON) {
    SetTritonTraceArgs(
        lparams, trace_filepath_present, trace_log_frequency_present);
  }

  if (explicit_disable_trace) {
    lparams.trace_level_ = TRITONSERVER_TRACE_LEVEL_DISABLED;
  }
}

#endif  // TRITON_ENABLE_TRACING
}}      // namespace triton::server


================================================
FILE: src/command_line_parser.h
================================================
// Copyright 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#pragma once

#include <list>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <thread>
#include <unordered_map>
#include <vector>

#include "common.h"
#include "restricted_features.h"
#include "triton/common/logging.h"
#include "triton/core/tritonserver.h"
#ifdef TRITON_ENABLE_GRPC
// To avoid ambiguous reference during build
// grpc headers should be imported first
// https://github.com/open-telemetry/opentelemetry-cpp/blob/main/examples/otlp/README.md#additional-notes-regarding-abseil-library
#include "grpc/grpc_server.h"
#endif  // TRITON_ENABLE_GRPC
#if defined(TRITON_ENABLE_HTTP) || defined(TRITON_ENABLE_METRICS)
#include "http_server.h"
#endif  // TRITON_ENABLE_HTTP || TRITON_ENABLE_METRICS
#ifdef TRITON_ENABLE_SAGEMAKER
#include "sagemaker_server.h"
#endif  // TRITON_ENABLE_SAGEMAKER
#ifdef TRITON_ENABLE_VERTEX_AI
#include "vertex_ai_server.h"
#endif  // TRITON_ENABLE_VERTEX_AI

#ifndef _WIN32
#include <getopt.h>
#include <unistd.h>
#else
// Minimum implementation of <getopt.h> for Windows
#define required_argument 1
#define no_argument 2
struct option {
  option(const char* name, int has_arg, int* flag, int val)
      : name(name), has_arg(has_arg), flag(flag), val(val)
  {
  }
  const char* name;
  int has_arg;
  int* flag;
  int val;
};
#endif
#ifdef TRITON_ENABLE_TRACING
#include "tracer.h"
#endif


namespace triton { namespace server {

// Command-line options
struct Option {
  static constexpr const char* ArgNone = "";
  static constexpr const char* ArgBool = "boolean";
  static constexpr const char* ArgFloat = "float";
  static constexpr const char* ArgInt = "integer";
  static constexpr const char* ArgStr = "string";

  Option(int id, std::string flag, std::string arg_desc, std::string desc)
      : id_(id), flag_(flag), arg_desc_(arg_desc), desc_(desc)
  {
  }

  struct option GetLongOption() const
  {
    struct option lo {
      flag_.c_str(), (!arg_desc_.empty()) ? required_argument : no_argument,
          nullptr, id_
    };
    return lo;
  }

  const int id_;
  const std::string flag_;
  const std::string arg_desc_;
  const std::string desc_;
};

struct TritonServerParameters {
  std::string server_id_{"triton"};
  bool exit_on_error_{true};
  bool strict_model_config_{false};
  bool strict_readiness_{true};
  int32_t exit_timeout_secs_{30};
#ifdef TRITON_ENABLE_GPU
  double min_supported_compute_capability_{TRITON_MIN_COMPUTE_CAPABILITY};
#else
  double min_supported_compute_capability_{0.0};
#endif  // TRITON_ENABLE_GPU
  std::string repoagent_dir_{"/opt/tritonserver/repoagents"};
  std::string backend_dir_{"/opt/tritonserver/backends"};
  std::vector<std::tuple<std::string, std::string, std::string>>
      backend_config_settings_;

  // Model repository manager configuration
  bool enable_model_namespacing_{false};
  bool enable_peer_access_{true};
  std::set<std::string> model_repository_paths_{};
  TRITONSERVER_ModelControlMode control_mode_{TRITONSERVER_MODEL_CONTROL_NONE};
  std::set<std::string> startup_models_{};
  // Interval, in seconds, when the model repository is polled for changes.
  int32_t repository_poll_secs_{15};
  // Number of threads to use for concurrently loading models
  uint32_t model_load_thread_count_{4};
  uint32_t model_load_retry_count_{0};
  std::map<int, double> load_gpu_limit_;
  // Custom model configuration file. Fall back to default config.pbtxt if not
  // set.
  std::string model_config_name_;

  // Rate limiter configuration
  // FIXME: Once the rate limiter implementation is complete make
  // EXEC_COUNT the default.
  // TRITONSERVER_RateLimitMode
  // rate_limit_mode_{TRITONSERVER_RATE_LIMIT_EXEC_COUNT};
  TRITONSERVER_RateLimitMode rate_limit_mode_{TRITONSERVER_RATE_LIMIT_OFF};
  std::vector<std::tuple<std::string, int, int>> rate_limit_resources_;

  // memory pool configuration
  int64_t pinned_memory_pool_byte_size_{1 << 28};
  std::list<std::pair<int, uint64_t>> cuda_pools_;
  std::list<std::pair<int, size_t>> cuda_virtual_address_size_;

  // [FIXME] this option is broken after backend separation: this should have
  // controlled backend copy behavior but not properly propagate to backend
  // after separation, need to go through backend config.
  int32_t buffer_manager_thread_count_{0};

  std::vector<std::tuple<std::string, std::string, std::string>> host_policies_;

  // Cache configuration
  bool enable_cache_{false};
  std::string cache_dir_{"/opt/tritonserver/caches"};
  std::unordered_map<
      std::string, std::vector<std::pair<std::string, std::string>>>
      cache_config_settings_;

#ifdef TRITON_ENABLE_LOGGING
  bool log_info_{true};
  bool log_warn_{true};
  bool log_error_{true};
  int32_t log_verbose_{0};
  triton::common::Logger::Format log_format_{
      triton::common::Logger::Format::kDEFAULT};
  std::string log_file_{};
#endif  // TRITON_ENABLE_LOGGING

#ifdef TRITON_ENABLE_TRACING
  std::string trace_filepath_{};
  TRITONSERVER_InferenceTraceLevel trace_level_{
      TRITONSERVER_TRACE_LEVEL_DISABLED};
  int32_t trace_rate_{1000};
  int32_t trace_count_{-1};
  int32_t trace_log_frequency_{0};
  InferenceTraceMode trace_mode_{TRACE_MODE_TRITON};
  TraceConfigMap trace_config_map_;
#endif  // TRITON_ENABLE_TRACING

// The configurations for various endpoints (i.e. HTTP, GRPC and metrics)
#ifdef TRITON_ENABLE_HTTP
  bool allow_http_{true};
  std::string http_address_{"0.0.0.0"};
  int32_t http_port_{8000};
  bool reuse_http_port_{false};
  std::string http_forward_header_pattern_;
  // The number of threads to initialize for the HTTP front-end.
  int http_thread_cnt_{8};
  RestrictedFeatures http_restricted_apis_{};
  // Default value 64MB
  size_t http_max_input_size_{HTTP_DEFAULT_MAX_INPUT_SIZE};
#endif  // TRITON_ENABLE_HTTP

#ifdef TRITON_ENABLE_GRPC
  bool allow_grpc_{true};
  triton::server::grpc::Options grpc_options_;
#endif  // TRITON_ENABLE_GRPC

#ifdef TRITON_ENABLE_METRICS
  bool allow_metrics_{true};
  // Defaults to http_address_ if TRITON_ENABLE_HTTP is enabled for backwards,
  // otherwise defaults to "0.0.0.0" for TRITON_ENABLE_HTTP is disabled.
  std::string metrics_address_{""};
  int32_t metrics_port_{8002};
  // Metric settings for Triton core
  float metrics_interval_ms_{2000};
  bool allow_gpu_metrics_{true};
  bool allow_cpu_metrics_{true};
  std::vector<std::tuple<std::string, std::string, std::string>>
      metrics_config_settings_;
#endif  // TRITON_ENABLE_METRICS

#ifdef TRITON_ENABLE_SAGEMAKER
  bool allow_sagemaker_{false};
  std::string sagemaker_address_{"0.0.0.0"};
  int32_t sagemaker_port_{8080};
  bool sagemaker_safe_range_set_{false};
  std::pair<int32_t, int32_t> sagemaker_safe_range_{-1, -1};
  // The number of threads to initialize for the SageMaker HTTP front-end.
  int sagemaker_thread_cnt_{8};
#endif  // TRITON_ENABLE_SAGEMAKER

#ifdef TRITON_ENABLE_VERTEX_AI
  bool allow_vertex_ai_{false};
  std::string vertex_ai_address_{"0.0.0.0"};
  int32_t vertex_ai_port_{8080};
  // The number of threads to initialize for the Vertex AI HTTP front-end.
  int vertex_ai_thread_cnt_{8};
  std::string vertex_ai_default_model_{};
#endif  // TRITON_ENABLE_VERTEX_AI

  // [FIXME] who should call this function?
  void CheckPortCollision();
  using ManagedTritonServerOptionPtr = std::unique_ptr<
      TRITONSERVER_ServerOptions, decltype(&TRITONSERVER_ServerOptionsDelete)>;
  ManagedTritonServerOptionPtr BuildTritonServerOptions();
};

// Exception type to be thrown if the error is parsing related
class ParseException : public std::exception {
 public:
  ParseException() = default;
  ParseException(const std::string& message) : message_(message) {}

  virtual const char* what() const throw() { return message_.c_str(); }

 private:
  const std::string message_{""};
};

// [WIP] Fall-through parser, Parse() will convert the recognized options into
// parameter object and return the unrecognized options to be another argument
// list for other parser to consume.
// This allows the composition of parser chain.
// [FIXME] abstract interface, concrete class below should only parse Triton
// core and endpoint control options (endpoint specific options in their own
// parser)
class TritonParser {
 public:
  TritonParser();
  // Parse command line arguments into a parameters struct and transform
  // the argument list to contain only unrecognized options. The content of
  // unrecognized argument list shares the same lifecycle as 'argv'.
  // Raise ParseException if fail to parse recognized options.
  std::pair<TritonServerParameters, std::vector<char*>> Parse(
      int argc, char** argv);

  // Return usage of all recognized options
  std::string Usage();

 private:
  std::string FormatUsageMessage(std::string str, int offset);
  // Helper functions for parsing options that require multi-value parsing.
  std::tuple<std::string, std::string, std::string> ParseCacheConfigOption(
      const std::string& arg);
  std::tuple<std::string, int, int> ParseRateLimiterResourceOption(
      const std::string& arg);
  std::tuple<std::string, std::string, std::string> ParseBackendConfigOption(
      const std::string& arg);
  std::tuple<std::string, std::string, std::string> ParseHostPolicyOption(
      const std::string& arg);
  std::tuple<std::string, std::string, std::string> ParseMetricsConfigOption(
      const std::string& arg);
  void ParseRestrictedFeatureOption(
      const std::string& arg, const std::string& option_name,
      const std::string& header_prefix, const std::string& feature_type,
      RestrictedFeatures& restricted_features);
#ifdef TRITON_ENABLE_TRACING
  TRITONSERVER_InferenceTraceLevel ParseTraceLevelOption(std::string arg);
  InferenceTraceMode ParseTraceModeOption(std::string arg);
  std::tuple<std::string, std::string, std::string> ParseTraceConfigOption(
      const std::string& arg);
  // Helper functions for post processing for collected trace arguments.
  void SetGlobalTraceArgs(
      TritonServerParameters& lparams, bool trace_level_present,
      bool trace_rate_present, bool trace_count_present,
      bool explicit_disable_trace);
  void SetTritonTraceArgs(
      TritonServerParameters& lparams, bool trace_filepath_present,
      bool trace_log_frequency_present);
  void SetOpenTelemetryTraceArgs(
      TritonServerParameters& lparams, bool trace_filepath_present,
      bool trace_log_frequency_present);
  void PostProcessTraceArgs(
      TritonServerParameters& lparams, bool trace_level_present,
      bool trace_rate_present, bool trace_count_present,
      bool trace_filepath_present, bool trace_log_frequency_present,
      bool explicit_disable_trace);
  void ProcessOpenTelemetryBatchSpanProcessorArgs(
      TraceConfig& otel_trace_settings);
#endif  // TRITON_ENABLE_TRACING
  // Helper function to parse option in
  // "<string>[1st_delim]<string>[2nd_delim]<string>" format
  std::tuple<std::string, std::string, std::string> ParseGenericConfigOption(
      const std::string& arg, const std::string& first_delim,
      const std::string& second_delim, const std::string& option_name,
      const std::string& config_name);

  // Initialize individual option groups
  void SetupOptions();
  // Initialize option group mappings
  void SetupOptionGroups();

  // Sum of option groups: vector to maintain insertion order for Usage()
  std::vector<std::pair<std::string, std::vector<Option>&>> option_groups_;
  // Individual option groups
  std::vector<Option> global_options_;
  std::vector<Option> server_options_;
  std::vector<Option> model_repo_options_;
  std::vector<Option> logging_options_;
  std::vector<Option> http_options_;
  std::vector<Option> grpc_options_;
  std::vector<Option> sagemaker_options_;
  std::vector<Option> vertex_options_;
  std::vector<Option> metric_options_;
  std::vector<Option> tracing_options_;
  std::vector<Option> backend_options_;
  std::vector<Option> repo_agent_options_;
  std::vector<Option> cache_options_;
  std::vector<Option> rate_limiter_options_;
  std::vector<Option> memory_device_options_;
  // Group deprecated options to keep preferred options more succinct
  std::vector<Option> deprecated_options_;
};
}}  // namespace triton::server


================================================
FILE: src/common.cc
================================================
// Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "common.h"

#include <algorithm>
#include <climits>
#include <iterator>

#include "restricted_features.h"
#include "triton/core/tritonserver.h"

extern "C" {
#include <b64/cdecode.h>
}

namespace triton { namespace server {

TRITONSERVER_Error*
GetModelVersionFromString(const std::string& version_string, int64_t* version)
{
  if (version_string.empty()) {
    *version = -1;
    return nullptr;  // success
  }

  try {
    *version = std::stol(version_string);
  }
  catch (std::exception& e) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INVALID_ARG,
        std::string(
            "failed to get model version from specified version string '" +
            version_string + "' (details: " + e.what() +
            "), version should be an integral value > 0")
            .c_str());
  }

  if (*version < 0) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INVALID_ARG,
        std::string(
            "invalid model version specified '" + version_string +
            "' , version should be an integral value > 0")
            .c_str());
  }

  return nullptr;  // success
}

std::string
GetEnvironmentVariableOrDefault(
    const std::string& variable_name, const std::string& default_value)
{
  const char* value = getenv(variable_name.c_str());
  return value ? value : default_value;
}

std::string
ShapeToString(const int64_t* dims, const size_t dims_count)
{
  bool first = true;

  std::string str("[");
  for (size_t i = 0; i < dims_count; ++i) {
    const int64_t dim = dims[i];
    if (!first) {
      str += ",";
    }
    str += std::to_string(dim);
    first = false;
  }

  str += "]";
  return str;
}

std::string
ShapeToString(const std::vector<int64_t>& shape)
{
  return ShapeToString(shape.data(), shape.size());
}

int64_t
GetElementCount(const std::vector<int64_t>& dims)
{
  bool first = true;
  int64_t cnt = 0;
  for (auto dim : dims) {
    if (dim == WILDCARD_DIM) {
      return -1;
    } else if (dim < 0) {  // invalid dim
      return -2;
    } else if (dim == 0) {
      return 0;
    }

    if (first) {
      cnt = dim;
      first = false;
    } else {
      // Check for overflow before multiplication
      if (cnt > (INT64_MAX / dim)) {
        return -3;
      }
      cnt *= dim;
    }
  }

  return cnt;
}

bool
Contains(const std::vector<std::string>& vec, const std::string& str)
{
  return std::find(vec.begin(), vec.end(), str) != vec.end();
}

TRITONSERVER_Error*
DecodeBase64(
    const char* input, size_t input_len, std::vector<char>& decoded_data,
    size_t& decoded_size, const std::string& name)
{
  if (input_len > static_cast<size_t>(INT_MAX)) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INVALID_ARG,
        ("'" + name + "' exceeds the maximum allowed data size limit INT_MAX")
            .c_str());
  }

  // The decoded size cannot be larger than the input
  decoded_data.resize(input_len + 1);
  base64_decodestate state;
  base64_init_decodestate(&state);

  decoded_size =
      base64_decode_block(input, input_len, decoded_data.data(), &state);

  return nullptr;
}

TRITONSERVER_Error*
ValidateSharedMemoryKey(const std::string& name, const std::string& shm_key)
{
  std::string_view key_view(shm_key);

  // Find the index of the first character that is not a slash
  const std::size_t first_non_slash = key_view.find_first_not_of('/');

  // If the entire key is slashes
  if (first_non_slash == std::string_view::npos) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INVALID_ARG,
        std::string(
            "cannot register shared memory region '" + name +
            "' - invalid shm key '" + shm_key + "'")
            .c_str());
  }

  // Check whether the substring starting at first_non_slash starts with the
  // reserved prefix
  if (key_view.substr(first_non_slash)
          .rfind(kTritonSharedMemoryRegionPrefix, 0) == 0) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INVALID_ARG,
        std::string(
            "cannot register shared memory region '" + name + "' with key '" +
            shm_key + "' as the key contains the reserved prefix '" +
            kTritonSharedMemoryRegionPrefix + "'")
            .c_str());
  }

  // Valid shm key
  return nullptr;
}

}}  // namespace triton::server


================================================
FILE: src/common.h
================================================
// Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once

#include <iostream>
#include <sstream>
#include <stdexcept>
#include <string>
#include <typeinfo>
#include <unordered_map>
#include <variant>
#include <vector>

#include "triton/core/tritonserver.h"

namespace triton { namespace server {

constexpr char kInferHeaderContentLengthHTTPHeader[] =
    "Inference-Header-Content-Length";
constexpr char kAcceptEncodingHTTPHeader[] = "Accept-Encoding";
constexpr char kContentEncodingHTTPHeader[] = "Content-Encoding";
constexpr char kContentTypeHeader[] = "Content-Type";
constexpr char kContentLengthHeader[] = "Content-Length";

// This prefix is reserved for shm regions created internally by Triton
constexpr char kTritonSharedMemoryRegionPrefix[] =
    "triton_python_backend_shm_region_";

constexpr int MAX_GRPC_MESSAGE_SIZE = INT32_MAX;

/// The value for a dimension in a shape that indicates that that
/// dimension can take on any size.
constexpr int WILDCARD_DIM = -1;

// Maximum allowed depth for JSON parsing
constexpr int32_t HTTP_MAX_JSON_NESTING_DEPTH = 100;

// Default maximum allowed HTTP request input size in bytes (64MB)
constexpr size_t HTTP_DEFAULT_MAX_INPUT_SIZE = 1 << 26;

/// Request parameter keys that start with a "triton_" prefix for internal use
const std::vector<std::string> TRITON_RESERVED_REQUEST_PARAMS{
    "triton_enable_empty_final_response"};

#define RETURN_IF_ERR(X)             \
  do {                               \
    TRITONSERVER_Error* err__ = (X); \
    if (err__ != nullptr) {          \
      return err__;                  \
    }                                \
  } while (false)

#define RETURN_MSG_IF_ERR(X, MSG)                                      \
  do {                                                                 \
    TRITONSERVER_Error* err__ = (X);                                   \
    if (err__ != nullptr) {                                            \
      auto new_err = TRITONSERVER_ErrorNew(                            \
          TRITONSERVER_ErrorCode(err__),                               \
          (std::string(MSG) + ": " + TRITONSERVER_ErrorMessage(err__)) \
              .c_str());                                               \
      TRITONSERVER_ErrorDelete(err__);                                 \
      return new_err;                                                  \
    }                                                                  \
  } while (false)

#define GOTO_IF_ERR(X, T)            \
  do {                               \
    TRITONSERVER_Error* err__ = (X); \
    if (err__ != nullptr) {          \
      goto T;                        \
    }                                \
  } while (false)

#define FAIL(MSG)                                 \
  do {                                            \
    std::cerr << "error: " << (MSG) << std::endl; \
    exit(1);                                      \
  } while (false)

#define FAIL_IF_ERR(X, MSG)                                       \
  do {                                                            \
    TRITONSERVER_Error* err__ = (X);                              \
    if (err__ != nullptr) {                                       \
      std::cerr << "error: " << (MSG) << ": "                     \
                << TRITONSERVER_ErrorCodeString(err__) << " - "   \
                << TRITONSERVER_ErrorMessage(err__) << std::endl; \
      TRITONSERVER_ErrorDelete(err__);                            \
      exit(1);                                                    \
    }                                                             \
  } while (false)

#define THROW_IF_ERR(EX_TYPE, X, MSG)                                     \
  do {                                                                    \
    TRITONSERVER_Error* err__ = (X);                                      \
    if (err__ != nullptr) {                                               \
      auto ex__ = (EX_TYPE)(std::string("error: ") + (MSG) + ": " +       \
                            TRITONSERVER_ErrorCodeString(err__) + " - " + \
                            TRITONSERVER_ErrorMessage(err__));            \
      TRITONSERVER_ErrorDelete(err__);                                    \
      throw ex__;                                                         \
    }                                                                     \
  } while (false)

#define IGNORE_ERR(X)                  \
  do {                                 \
    TRITONSERVER_Error* err__ = (X);   \
    if (err__ != nullptr) {            \
      TRITONSERVER_ErrorDelete(err__); \
    }                                  \
  } while (false)

#ifdef TRITON_ENABLE_GPU
#define FAIL_IF_CUDA_ERR(X, MSG)                                           \
  do {                                                                     \
    cudaError_t err__ = (X);                                               \
    if (err__ != cudaSuccess) {                                            \
      std::cerr << "error: " << (MSG) << ": " << cudaGetErrorString(err__) \
                << std::endl;                                              \
      exit(1);                                                             \
    }                                                                      \
  } while (false)
#endif  // TRITON_ENABLE_GPU

/// Get the integral version from a string, or fail if string does not
/// represent a valid version.
///
/// \param version_string The string version.
/// \param version Returns the integral version.
/// \return The error status. Failure if 'version_string' doesn't
/// convert to valid version.
TRITONSERVER_Error* GetModelVersionFromString(
    const std::string& version_string, int64_t* version);

/// Get the value of the environment variable, or default value if not set
///
/// \param variable_name The name of the environment variable.
/// \param default_value The default value.
/// \return The environment variable or the default value if not set.
std::string GetEnvironmentVariableOrDefault(
    const std::string& variable_name, const std::string& default_value);

/// Get the number of elements in a shape.
///
/// \param dims The shape.
/// \return The number of elements, -1 if the number of elements
/// cannot be determined because the shape contains one or more
/// wildcard dimensions, -2 if the shape contains an invalid dim,
/// or -3 if the number is too large to represent as an int64_t.
int64_t GetElementCount(const std::vector<int64_t>& dims);

/// Convert shape to string representation.
///
/// \param shape The shape as a vector.
/// \return The string representation of the shape.
std::string ShapeToString(const std::vector<int64_t>& shape);

/// Returns if 'vec' contains 'str'.
///
/// \param vec The vector of strings to search.
/// \param str The string to lookup.
/// \return True if the str is found, false otherwise.
bool Contains(const std::vector<std::string>& vec, const std::string& str);

/// Decodes a Base64 encoded string and stores the result in a vector.
///
/// \param input The Base64 encoded input string to decode.
/// \param input_len The length of the input string.
/// \param decoded_data A vector to store the decoded data.
/// \param decoded_size The size of the decoded data.
/// \param name The name associated with the decoding process.
/// \return The error status.
TRITONSERVER_Error* DecodeBase64(
    const char* input, size_t input_len, std::vector<char>& decoded_data,
    size_t& decoded_size, const std::string& name);


/// Validate shared memory key
///
/// \param name The name of the memory block.
/// \param shm_key The name of the posix shared memory object
/// \return The error status.
TRITONSERVER_Error* ValidateSharedMemoryKey(
    const std::string& name, const std::string& shm_key);


/// Joins container of strings into a single string delimited by
/// 'delim'.
///
/// \param container The container of strings to join.
/// \param delim The delimiter to join with.
/// \return The joint string.
template <class T>
std::string
Join(const T& container, const std::string& delim)
{
  if (container.empty()) {
    return "";
  }
  std::stringstream ss;
  ss << container[0];
  for (size_t i = 1; i < container.size(); ++i) {
    ss << delim << container[i];
  }
  return ss.str();
}


// Used by Python Bindings to accept arguments to initialize Frontends.
// Known pybind11 issue: bool has to come before int for std::variant
using VariantType = std::variant<bool, int, std::string>;
using UnorderedMapType = std::unordered_map<std::string, VariantType>;


template <typename T>
TRITONSERVER_Error*
GetValue(const UnorderedMapType& options, const std::string& key, T* arg)
{
  auto curr = options.find(key);
  bool is_present = (curr != options.end());
  std::string msg;

  if (!is_present) {
    msg = "Key: " + key + " not found in options provided.";
    return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INVALID_ARG, msg.c_str());
  }

  bool correct_type = std::holds_alternative<T>(curr->second);
  if (!correct_type) {
    std::string expected;
    std::string found;
    VariantType value = *arg;
    if (std::holds_alternative<int>(value)) {
      expected = "int";
    } else if (std::holds_alternative<bool>(value)) {
      expected = "bool";
    } else if (std::holds_alternative<std::string>(value)) {
      expected = "string";
    }

    switch (curr->second.index()) {
      case 0:
        found = "bool";
        break;
      case 1:
        found = "int";
        break;
      case 2:
        found = "string";
        break;
    }

    msg = "Key: " + key + " found, but incorrect type. Expected " + expected +
          " Found: " + found;

    return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INVALID_ARG, msg.c_str());
  }

  *arg = std::get<T>(curr->second);
  return nullptr;
}


}}  // namespace triton::server


================================================
FILE: src/data_compressor.h
================================================
// Copyright (c) 2021-2025, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once

#include <event2/buffer.h>
#include <zlib.h>

#include <cassert>
#include <cstring>
#include <iostream>
#include <memory>
#include <string>
#include <vector>

#include "common.h"
#include "triton/core/tritonserver.h"

namespace triton { namespace server {

//
// DataCompressor
//
class DataCompressor {
 public:
  enum class Type { UNKNOWN, IDENTITY, GZIP, DEFLATE };

  // Specialization where the source and destination buffer are stored as
  // evbuffer
  static TRITONSERVER_Error* CompressData(
      const Type type, evbuffer* source, evbuffer* compressed_data)
  {
    size_t expected_compressed_size = evbuffer_get_length(source);
    // nothing to be compressed
    if (expected_compressed_size == 0) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG, "nothing to be compressed");
    }

    z_stream stream;
    stream.zalloc = Z_NULL;
    stream.zfree = Z_NULL;
    stream.opaque = Z_NULL;
    switch (type) {
      case Type::UNKNOWN:
      case Type::IDENTITY: {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG, "nothing to be compressed");
      }
      case Type::GZIP:
        if (deflateInit2(
                &stream, Z_DEFAULT_COMPRESSION /* level */,
                Z_DEFLATED /* method */, 15 | 16 /* windowBits */,
                8 /* memLevel */, Z_DEFAULT_STRATEGY /* strategy */) != Z_OK) {
          return TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_INTERNAL,
              "failed to initialize state for gzip data compression");
        }
        break;
      case Type::DEFLATE: {
        if (deflateInit(&stream, Z_DEFAULT_COMPRESSION /* level */) != Z_OK) {
          return TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_INTERNAL,
              "failed to initialize state for deflate data compression");
        }
        break;
      }
    }
    // ensure the internal state are cleaned up on function return
    std::unique_ptr<z_stream, decltype(&deflateEnd)> managed_stream(
        &stream, deflateEnd);

    // Get the addr and size of each chunk of memory in 'source'
    std::unique_ptr<struct evbuffer_iovec[]> buffer_array_holder;
    struct evbuffer_iovec* buffer_array = nullptr;
    int buffer_count = evbuffer_peek(source, -1, NULL, NULL, 0);
    if (buffer_count > 0) {
      buffer_array_holder.reset(new struct evbuffer_iovec[buffer_count]);
      buffer_array = buffer_array_holder.get();
      if (evbuffer_peek(source, -1, NULL, buffer_array, buffer_count) !=
          buffer_count) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INTERNAL,
            "unexpected error getting buffers to be compressed");
      }
    }
    // Reserve the same size as source for compressed data, it is less likely
    // that a negative compression happens.
    struct evbuffer_iovec current_reserved_space;
    RETURN_MSG_IF_ERR(
        AllocEVBuffer(
            expected_compressed_size, compressed_data, &current_reserved_space),
        "unexpected error allocating output buffer for compression");
    stream.next_out =
        reinterpret_cast<unsigned char*>(current_reserved_space.iov_base);
    stream.avail_out = expected_compressed_size;

    // Compress until end of 'source'
    for (int idx = 0; idx < buffer_count; ++idx) {
      stream.next_in =
          reinterpret_cast<unsigned char*>(buffer_array[idx].iov_base);
      stream.avail_in = buffer_array[idx].iov_len;

      // run deflate() on input until source has been read in
      do {
        // Need additional buffer
        if (stream.avail_out == 0) {
          RETURN_MSG_IF_ERR(
              CommitEVBuffer(
                  compressed_data, &current_reserved_space,
                  expected_compressed_size),
              "unexpected error committing output buffer for compression");
          RETURN_MSG_IF_ERR(
              AllocEVBuffer(
                  expected_compressed_size, compressed_data,
                  &current_reserved_space),
              "unexpected error allocating output buffer for compression");
          stream.next_out =
              reinterpret_cast<unsigned char*>(current_reserved_space.iov_base);
          stream.avail_out = expected_compressed_size;
        }
        auto flush = ((idx + 1) != buffer_count) ? Z_NO_FLUSH : Z_FINISH;
        auto ret = deflate(&stream, flush);
        if (ret == Z_STREAM_ERROR) {
          return TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_INTERNAL,
              "encountered inconsistent stream state during compression");
        }
      } while (stream.avail_out == 0);
    }
    // Make sure the last buffer is committed
    if (current_reserved_space.iov_base != nullptr) {
      RETURN_MSG_IF_ERR(
          CommitEVBuffer(
              compressed_data, &current_reserved_space,
              expected_compressed_size - stream.avail_out),
          "unexpected error committing output buffer for compression");
    }
    return nullptr;  // success
  }

  static TRITONSERVER_Error* DecompressData(
      const Type type, evbuffer* source, evbuffer* decompressed_data,
      const size_t max_decompressed_size = 0)
  {
    size_t source_byte_size = evbuffer_get_length(source);
    // nothing to be decompressed
    if (evbuffer_get_length(source) == 0) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG, "nothing to be decompressed");
    }
    // Set reasonable size for each output buffer to be allocated
    size_t output_buffer_size = (source_byte_size > (1 << 20 /* 1MB */))
                                    ? source_byte_size
                                    : (1 << 20 /* 1MB */);

    // Cap the initial buffer allocation to the decompression limit.
    // This avoids over-allocating when a decompression limit is set.
    if (max_decompressed_size > 0 &&
        output_buffer_size > max_decompressed_size) {
      output_buffer_size = max_decompressed_size;
    }

    switch (type) {
      case Type::UNKNOWN:
      case Type::IDENTITY: {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG, "nothing to be decompressed");
      }
      case Type::GZIP:
      case Type::DEFLATE:
        // zlib can automatically detect compression type
        {
          z_stream stream;
          stream.zalloc = Z_NULL;
          stream.zfree = Z_NULL;
          stream.opaque = Z_NULL;
          stream.avail_in = 0;
          stream.next_in = Z_NULL;

          if (inflateInit2(&stream, 15 | 32) != Z_OK) {
            return TRITONSERVER_ErrorNew(
                TRITONSERVER_ERROR_INTERNAL,
                "failed to initialize state for data decompression");
          }
          // ensure the internal state are cleaned up on function return
          std::unique_ptr<z_stream, decltype(&inflateEnd)> managed_stream(
              &stream, inflateEnd);

          // Get the addr and size of each chunk of memory in 'source'
          std::unique_ptr<struct evbuffer_iovec[]> buffer_array_holder;
          struct evbuffer_iovec* buffer_array = nullptr;
          int buffer_count = evbuffer_peek(source, -1, NULL, NULL, 0);
          if (buffer_count > 0) {
            buffer_array_holder.reset(new struct evbuffer_iovec[buffer_count]);
            buffer_array = buffer_array_holder.get();
            if (evbuffer_peek(source, -1, NULL, buffer_array, buffer_count) !=
                buffer_count) {
              return TRITONSERVER_ErrorNew(
                  TRITONSERVER_ERROR_INTERNAL,
                  "unexpected error getting buffers to be decompressed");
            }
          }
          // Reserve the same size as source for compressed data, it is less
          // likely that a negative compression happens.
          struct evbuffer_iovec current_reserved_space;
          RETURN_MSG_IF_ERR(
              AllocEVBuffer(
                  output_buffer_size, decompressed_data,
                  &current_reserved_space),
              "unexpected error allocating output buffer for decompression");
          stream.next_out =
              reinterpret_cast<unsigned char*>(current_reserved_space.iov_base);
          stream.avail_out = output_buffer_size;

          // Track total decompressed size and current buffer size for limit
          // checking
          size_t total_decompressed = 0;
          size_t current_buffer_size = output_buffer_size;

          // Decompress until end of 'source'
          for (int idx = 0; idx < buffer_count; ++idx) {
            stream.next_in =
                reinterpret_cast<unsigned char*>(buffer_array[idx].iov_base);
            stream.avail_in = buffer_array[idx].iov_len;

            // run inflate() on input until source has been read in
            do {
              // Need additional buffer
              if (stream.avail_out == 0) {
                total_decompressed += current_buffer_size;

                // Check decompression size limit before allocating memory
                if (max_decompressed_size > 0 &&
                    total_decompressed > max_decompressed_size) {
                  return TRITONSERVER_ErrorNew(
                      TRITONSERVER_ERROR_INVALID_ARG,
                      ("Decompressed data size exceeds the maximum allowed "
                       "value of " +
                       std::to_string(max_decompressed_size) +
                       " bytes. Use --http-max-input-size to increase the "
                       "limit.")
                          .c_str());
                }

                RETURN_MSG_IF_ERR(
                    CommitEVBuffer(
                        decompressed_data, &current_reserved_space,
                        current_buffer_size),
                    "unexpected error committing output buffer for "
                    "decompression");

                // Calculate next buffer size, capped by remaining limit
                current_buffer_size = output_buffer_size;
                if (max_decompressed_size > 0) {
                  size_t remaining_size =
                      max_decompressed_size - total_decompressed;
                  // If no space remains but decompression needs more, we've hit
                  // the limit
                  if (remaining_size == 0) {
                    return TRITONSERVER_ErrorNew(
                        TRITONSERVER_ERROR_INVALID_ARG,
                        ("Decompressed data size exceeds the maximum allowed "
                         "value of " +
                         std::to_string(max_decompressed_size) +
                         " bytes. Use --http-max-input-size to increase the "
                         "limit.")
                            .c_str());
                  }
                  if (current_buffer_size > remaining_size) {
                    current_buffer_size = remaining_size;
                  }
                }

                RETURN_MSG_IF_ERR(
                    AllocEVBuffer(
                        current_buffer_size, decompressed_data,
                        &current_reserved_space),
                    "unexpected error allocating output buffer for "
                    "decompression");
                stream.next_out = reinterpret_cast<unsigned char*>(
                    current_reserved_space.iov_base);
                stream.avail_out = current_buffer_size;
              }
              auto ret = inflate(&stream, Z_NO_FLUSH);
              if (ret == Z_STREAM_ERROR) {
                return TRITONSERVER_ErrorNew(
                    TRITONSERVER_ERROR_INTERNAL,
                    "encountered inconsistent stream state during "
                    "decompression");
              }
              // Break if decompression is complete, even if buffer is exactly
              // full
              if (ret == Z_STREAM_END) {
                break;
              }
            } while (stream.avail_out == 0);
          }
          // Make sure the last buffer is committed
          if (current_reserved_space.iov_base != nullptr) {
            size_t final_chunk_size = current_buffer_size - stream.avail_out;
            if (max_decompressed_size > 0 &&
                (total_decompressed + final_chunk_size) >
                    max_decompressed_size) {
              return TRITONSERVER_ErrorNew(
                  TRITONSERVER_ERROR_INVALID_ARG,
                  ("Decompressed data size exceeds the maximum allowed value "
                   "of " +
                   std::to_string(max_decompressed_size) +
                   " bytes. Use --http-max-input-size to increase the limit.")
                      .c_str());
            }

            RETURN_MSG_IF_ERR(
                CommitEVBuffer(
                    decompressed_data, &current_reserved_space,
                    final_chunk_size),
                "unexpected error committing output buffer for "
                "decompression");
          }
          break;
        }
    }
    return nullptr;  // success
  }

 private:
  static TRITONSERVER_Error* AllocEVBuffer(
      const size_t byte_size, evbuffer* evb,
      struct evbuffer_iovec* current_reserved_space)
  {
    // Reserve requested space in evbuffer...
    if ((evbuffer_reserve_space(evb, byte_size, current_reserved_space, 1) !=
         1) ||
        (current_reserved_space->iov_len < byte_size)) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INTERNAL,
          std::string(
              "failed to reserve " + std::to_string(byte_size) +
              " bytes in evbuffer")
              .c_str());
    }
    return nullptr;  // success
  }

  static TRITONSERVER_Error* CommitEVBuffer(
      evbuffer* evb, struct evbuffer_iovec* current_reserved_space,
      const size_t filled_byte_size)
  {
    current_reserved_space->iov_len = filled_byte_size;
    if (evbuffer_commit_space(evb, current_reserved_space, 1) != 0) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INTERNAL, "failed to commit allocated evbuffer");
    }
    current_reserved_space->iov_base = nullptr;
    return nullptr;  // success
  }
};

}}  // namespace triton::server


================================================
FILE: src/grpc/CMakeLists.txt
================================================
# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

add_library(
  grpc-endpoint-library EXCLUDE_FROM_ALL
  grpc_server.cc
  grpc_server.h
  grpc_handler.h
  grpc_utils.cc
  grpc_utils.h
  infer_handler.cc
  infer_handler.h
  stream_infer_handler.h
  stream_infer_handler.cc
)

target_compile_features(grpc-endpoint-library PRIVATE cxx_std_${TRITON_MIN_CXX_STANDARD})
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
  target_compile_options(
    grpc-endpoint-library
    PRIVATE
      /W1 /D_WIN32_WINNT=0x0A00 /EHsc /Zc:preprocessor
  )
else()
  target_compile_options(
    grpc-endpoint-library
    PRIVATE
      -Wall -Wextra -Wno-unused-parameter -Wno-deprecated-declarations -Wno-error=maybe-uninitialized -Werror
  )
endif()

set_target_properties(
  grpc-endpoint-library
  PROPERTIES
    POSITION_INDEPENDENT_CODE ON
)

target_link_libraries(
  grpc-endpoint-library
  PUBLIC
    proto-library                 # from repo-common
    triton-common-logging         # from repo-common
    triton-common-table-printer   # from repo-common
    triton-common-json            # from repo-common
    grpc-health-library           # from repo-common
    grpc-service-library          # from repo-common
    triton-core-serverapi         # from repo-core
    triton-core-serverstub        # from repo-core
    gRPC::grpc++
    gRPC::grpc
    protobuf::libprotobuf
)

target_include_directories(
  grpc-endpoint-library
  PRIVATE $<TARGET_PROPERTY:gRPC::grpc,INTERFACE_INCLUDE_DIRECTORIES>
)

# FIXME when Triton support of OpenTelemetry is available on Windows
# add ${OPENTELEMETRY_CPP_INCLUDE_DIRS} to above target_include_directories
# JIRA DLIS-4786
if (NOT WIN32 AND ${TRITON_ENABLE_TRACING})
  target_link_libraries(
    grpc-endpoint-library
    PRIVATE
    tracing-library
  )
endif()

target_compile_definitions(
  grpc-endpoint-library
  PRIVATE TRITON_ENABLE_GRPC=1
)

if(${TRITON_ENABLE_GPU})
  target_compile_definitions(
    grpc-endpoint-library
    PRIVATE TRITON_ENABLE_GPU=1
    PRIVATE TRITON_MIN_COMPUTE_CAPABILITY=${TRITON_MIN_COMPUTE_CAPABILITY}
  )

  target_link_libraries(
    grpc-endpoint-library
    PUBLIC
      CUDA::cudart
  )
endif() # TRITON_ENABLE_GPU

if(${TRITON_ENABLE_METRICS})
  target_compile_definitions(
    grpc-endpoint-library
    PRIVATE TRITON_ENABLE_METRICS=1
  )
endif() # TRITON_ENABLE_METRICS

if(${TRITON_ENABLE_LOGGING})
  target_compile_definitions(
    grpc-endpoint-library
    PRIVATE TRITON_ENABLE_LOGGING=1
  )
endif() # TRITON_ENABLE_LOGGING

if(${TRITON_ENABLE_STATS})
  target_compile_definitions(
    grpc-endpoint-library
    PRIVATE TRITON_ENABLE_STATS=1
  )
endif() # TRITON_ENABLE_STATS

if(${TRITON_ENABLE_TRACING})
  target_compile_definitions(
    grpc-endpoint-library
    PRIVATE TRITON_ENABLE_TRACING=1
  )
endif() # TRITON_ENABLE_TRACING

if(${TRITON_ENABLE_NVTX})
  target_compile_definitions(
    grpc-endpoint-library
    PRIVATE TRITON_ENABLE_NVTX=1
  )
endif() # TRITON_ENABLE_NVTX


================================================
FILE: src/grpc/grpc_handler.h
================================================
// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once

#include <string>

namespace triton { namespace server { namespace grpc {
class HandlerBase {
 public:
  virtual ~HandlerBase() = default;
  virtual void Start() = 0;
  virtual void Stop() = 0;
};

class ICallData {
 public:
  virtual ~ICallData() = default;
  virtual bool Process(bool ok) = 0;
  virtual std::string Name() = 0;
  virtual uint64_t Id() = 0;
};

}}}  // namespace triton::server::grpc


================================================
FILE: src/grpc/grpc_server.cc
================================================
// Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "grpc_server.h"

#include <google/protobuf/arena.h>
#include <grpc++/alarm.h>

#include <chrono>
#include <condition_variable>
#include <cstdint>
#include <fstream>
#include <list>
#include <map>
#include <mutex>
#include <queue>
#include <sstream>
#include <thread>

#include "../classification.h"
#include "../common.h"
#include "grpc++/grpc++.h"
#include "grpc++/security/server_credentials.h"
#include "grpc++/server.h"
#include "grpc++/server_builder.h"
#include "grpc++/server_context.h"
#include "grpc++/support/status.h"
#include "triton/common/logging.h"
#include "triton/common/table_printer.h"
#include "triton/core/tritonserver.h"

#define TRITONJSON_STATUSTYPE TRITONSERVER_Error*
#define TRITONJSON_STATUSRETURN(M) \
  return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL, (M).c_str())
#define TRITONJSON_STATUSSUCCESS nullptr
#include "triton/common/triton_json.h"

#ifdef TRITON_ENABLE_TRACING
#include "../tracer.h"
#endif  // TRITON_ENABLE_TRACING

namespace triton { namespace server { namespace grpc {

namespace {

//
// The server has separate handling mechanisms for inference RPCs
// and non-inference RPCs.
//

//=========================================================================
//  The following section contains the handling mechanism for non-inference
//  RPCs. A single thread is created to handle all these requests as they
//  are deemed to be not performance critical.
//=========================================================================

template <typename ResponderType, typename RequestType, typename ResponseType>
class CommonCallData : public ICallData {
 public:
  using StandardRegisterFunc = std::function<void(
      ::grpc::ServerContext*, RequestType*, ResponderType*, void*)>;
  using StandardCallbackFunc =
      std::function<void(RequestType&, ResponseType*, ::grpc::Status*)>;

  CommonCallData(
      const std::string& name, const uint64_t id,
      const StandardRegisterFunc OnRegister,
      const StandardCallbackFunc OnExecute, const bool async,
      ::grpc::ServerCompletionQueue* cq,
      const std::pair<std::string, std::string>& restricted_kv,
      const uint64_t& response_delay = 0)
      : name_(name), id_(id), OnRegister_(OnRegister), OnExecute_(OnExecute),
        async_(async), cq_(cq), responder_(&ctx_), step_(Steps::START),
        restricted_kv_(restricted_kv), response_delay_(response_delay)
  {
    OnRegister_(&ctx_, &request_, &responder_, this);
    LOG_VERBOSE(1) << "Ready for RPC '" << name_ << "', " << id_;
  }

  ~CommonCallData()
  {
    if (async_thread_.joinable()) {
      async_thread_.join();
    }
  }

  bool Process(bool ok) override;

  std::string Name() override { return name_; }

  uint64_t Id() override { return id_; }

 private:
  void Execute();
  void AddToCompletionQueue();
  void WriteResponse();
  bool ExecutePrecondition();

  const std::string name_;
  const uint64_t id_;
  const StandardRegisterFunc OnRegister_;
  const StandardCallbackFunc OnExecute_;
  const bool async_;
  ::grpc::ServerCompletionQueue* cq_;

  ::grpc::ServerContext ctx_;
  ::grpc::Alarm alarm_;

  ResponderType responder_;
  RequestType request_;
  ResponseType response_;
  ::grpc::Status status_;

  std::thread async_thread_;

  Steps step_;

  std::pair<std::string, std::string> restricted_kv_{"", ""};

  const uint64_t response_delay_;
};

template <typename ResponderType, typename RequestType, typename ResponseType>
bool
CommonCallData<ResponderType, RequestType, ResponseType>::Process(bool rpc_ok)
{
  LOG_VERBOSE(1) << "Process for " << name_ << ", rpc_ok=" << rpc_ok << ", "
                 << id_ << " step " << step_;

  // If RPC failed on a new request then the server is shutting down
  // and so we should do nothing (including not registering for a new
  // request). If RPC failed on a non-START step then there is nothing
  // we can do since we one execute one step.
  const bool shutdown = (!rpc_ok && (step_ == Steps::START));
  if (shutdown) {
    if (async_thread_.joinable()) {
      async_thread_.join();
    }
    step_ = Steps::FINISH;
  }

  if (step_ == Steps::START) {
    // Start a new request to replace this one...
    if (!shutdown) {
      new CommonCallData<ResponderType, RequestType, ResponseType>(
          name_, id_ + 1, OnRegister_, OnExecute_, async_, cq_, restricted_kv_,
          response_delay_);
    }

    if (!async_) {
      // For synchronous calls, execute and write response
      // here.
      Execute();
      WriteResponse();
    } else {
      // For asynchronous calls, delegate the execution to another
      // thread.
      step_ = Steps::ISSUED;
      async_thread_ = std::thread(&CommonCallData::Execute, this);
    }
  } else if (step_ == Steps::WRITEREADY) {
    // Will only come here for asynchronous mode.
    WriteResponse();
  } else if (step_ == Steps::COMPLETE) {
    step_ = Steps::FINISH;
  }

  return step_ != Steps::FINISH;
}

template <typename ResponderType, typename RequestType, typename ResponseType>
void
CommonCallData<ResponderType, RequestType, ResponseType>::Execute()
{
  if (ExecutePrecondition()) {
    OnExecute_(request_, &response_, &status_);
  } else {
    status_ = ::grpc::Status(
        ::grpc::StatusCode::UNAVAILABLE,
        std::string("This protocol is restricted, expecting header '") +
            restricted_kv_.first + "'");
  }
  step_ = Steps::WRITEREADY;

  if (async_) {
    // For asynchronous operation, need to add itself onto the completion
    // queue so that the response can be written once the object is
    // taken up next for execution.
    AddToCompletionQueue();
  }
}

template <typename ResponderType, typename RequestType, typename ResponseType>
bool
CommonCallData<ResponderType, RequestType, ResponseType>::ExecutePrecondition()
{
  if (!restricted_kv_.first.empty()) {
    const auto& metadata = ctx_.client_metadata();
    const auto it = metadata.find(restricted_kv_.first);
    return (it != metadata.end()) && (it->second == restricted_kv_.second);
  }
  return true;
}

template <typename ResponderType, typename RequestType, typename ResponseType>
void
CommonCallData<ResponderType, RequestType, ResponseType>::AddToCompletionQueue()
{
  alarm_.Set(cq_, gpr_now(gpr_clock_type::GPR_CLOCK_REALTIME), this);
}

template <typename ResponderType, typename RequestType, typename ResponseType>
void
CommonCallData<ResponderType, RequestType, ResponseType>::WriteResponse()
{
  if (response_delay_ != 0) {
    // Will delay the write of the response by the specified time.
    // This can be used to test the flow where there are other
    // responses available to be written.
    LOG_VERBOSE(1) << "Delaying the write of the response by "
                   << response_delay_ << " seconds";
    std::this_thread::sleep_for(std::chrono::seconds(response_delay_));
  }
  step_ = Steps::COMPLETE;
  responder_.Finish(response_, status_, this);
}

//
// CommonHandler
//
// A common handler for all non-inference requests.
//
class CommonHandler : public HandlerBase {
 public:
  CommonHandler(
      const std::string& name,
      const std::shared_ptr<TRITONSERVER_Server>& tritonserver,
      const std::shared_ptr<SharedMemoryManager>& shm_manager,
      TraceManager* trace_manager,
      inference::GRPCInferenceService::AsyncService* service,
      ::grpc::health::v1::Health::AsyncService* health_service,
      ::grpc::ServerCompletionQueue* cq,
      const RestrictedFeatures& restricted_keys, const uint64_t response_delay);

  // Descriptive name of of the handler.
  const std::string& Name() const { return name_; }

  // Start handling requests.
  void Start() override;

  // Stop handling requests.
  void Stop() override;

 private:
  void SetUpAllRequests();

  // [FIXME] turn into generated code
  void RegisterServerLive();
  void RegisterServerReady();
  void RegisterHealthCheck();
  void RegisterModelReady();
  void RegisterServerMetadata();
  void RegisterModelMetadata();
  void RegisterModelConfig();
  void RegisterModelStatistics();
  void RegisterTrace();
  void RegisterLogging();
  void RegisterSystemSharedMemoryStatus();
  void RegisterSystemSharedMemoryRegister();
  void RegisterSystemSharedMemoryUnregister();
  void RegisterCudaSharedMemoryStatus();
  void RegisterCudaSharedMemoryRegister();
  void RegisterCudaSharedMemoryUnregister();
  void RegisterRepositoryIndex();
  void RegisterRepositoryModelLoad();
  void RegisterRepositoryModelUnload();

  // Set count and cumulative duration for 'RegisterModelStatistics()'
  template <typename PBTYPE>
  TRITONSERVER_Error* SetStatisticsDuration(
      triton::common::TritonJson::Value& statistics_json,
      const std::string& statistics_name,
      PBTYPE* mutable_statistics_duration_protobuf) const;

  const std::string name_;
  std::shared_ptr<TRITONSERVER_Server> tritonserver_;

  std::shared_ptr<SharedMemoryManager> shm_manager_;
  TraceManager* trace_manager_;

  inference::GRPCInferenceService::AsyncService* service_;
  ::grpc::health::v1::Health::AsyncService* health_service_;
  ::grpc::ServerCompletionQueue* cq_;
  std::unique_ptr<std::thread> thread_;
  RestrictedFeatures restricted_keys_{};
  const uint64_t response_delay_ = 0;
};

CommonHandler::CommonHandler(
    const std::string& name,
    const std::shared_ptr<TRITONSERVER_Server>& tritonserver,
    const std::shared_ptr<SharedMemoryManager>& shm_manager,
    TraceManager* trace_manager,
    inference::GRPCInferenceService::AsyncService* service,
    ::grpc::health::v1::Health::AsyncService* health_service,
    ::grpc::ServerCompletionQueue* cq,
    const RestrictedFeatures& restricted_keys,
    const uint64_t response_delay = 0)
    : name_(name), tritonserver_(tritonserver), shm_manager_(shm_manager),
      trace_manager_(trace_manager), service_(service),
      health_service_(health_service), cq_(cq),
      restricted_keys_(restricted_keys), response_delay_(response_delay)
{
}

void
CommonHandler::Start()
{
  // Use a barrier to make sure we don't return until thread has
  // started.
  auto barrier = std::make_shared<Barrier>(2);

  thread_.reset(new std::thread([this, barrier] {
    SetUpAllRequests();
    barrier->Wait();

    void* tag;
    bool ok;

    while (cq_->Next(&tag, &ok)) {
      ICallData* call_data = static_cast<ICallData*>(tag);
      if (!call_data->Process(ok)) {
        LOG_VERBOSE(1) << "Done for " << call_data->Name() << ", "
                       << call_data->Id();
        delete call_data;
      }
    }
  }));

  barrier->Wait();
  LOG_VERBOSE(1) << "Thread started for " << Name();
}

void
CommonHandler::Stop()
{
  if (thread_->joinable()) {
    thread_->join();
  }

  LOG_VERBOSE(1) << "Thread exited for " << Name();
}

void
CommonHandler::SetUpAllRequests()
{
  // Define all the RPCs to be handled by this handler below
  //
  // Within each of the Register function, the format of RPC specification is:
  // 1. A OnRegister function: This will be called when the
  //    server is ready to receive the requests for this RPC.
  // 2. A OnExecute function: This will be called when the
  //    to process the request.
  // 3. Create a CommonCallData object with the above callback
  //    functions

  // health (GRPC standard)
  RegisterHealthCheck();
  // health (Triton)
  RegisterServerLive();
  RegisterServerReady();
  RegisterModelReady();

  // Metadata
  RegisterServerMetadata();
  RegisterModelMetadata();

  // model config
  RegisterModelConfig();

  // shared memory
  // system..
  RegisterSystemSharedMemoryStatus();
  RegisterSystemSharedMemoryRegister();
  RegisterSystemSharedMemoryUnregister();
  // cuda..
  RegisterCudaSharedMemoryStatus();
  RegisterCudaSharedMemoryRegister();
  RegisterCudaSharedMemoryUnregister();

  // model repository
  RegisterRepositoryIndex();
  RegisterRepositoryModelLoad();
  RegisterRepositoryModelUnload();

  // statistics
  RegisterModelStatistics();

  // trace
  RegisterTrace();

  // logging
  RegisterLogging();
}

void
CommonHandler::RegisterServerLive()
{
  auto OnRegisterServerLive =
      [this](
          ::grpc::ServerContext* ctx, inference::ServerLiveRequest* request,
          ::grpc::ServerAsyncResponseWriter<inference::ServerLiveResponse>*
              responder,
          void* tag) {
        this->service_->RequestServerLive(
            ctx, request, responder, this->cq_, this->cq_, tag);
      };

  auto OnExecuteServerLive = [this](
                                 inference::ServerLiveRequest& request,
                                 inference::ServerLiveResponse* response,
                                 ::grpc::Status* status) {
    bool live = false;
    TRITONSERVER_Error* err =
        TRITONSERVER_ServerIsLive(tritonserver_.get(), &live);

    response->set_live((err == nullptr) && live);

    GrpcStatusUtil::Create(status, err);
    TRITONSERVER_ErrorDelete(err);
  };

  const std::pair<std::string, std::string>& restricted_kv =
      restricted_keys_.Get(RestrictedCategory::HEALTH);
  new CommonCallData<
      ::grpc::ServerAsyncResponseWriter<inference::ServerLiveResponse>,
      inference::ServerLiveRequest, inference::ServerLiveResponse>(
      "ServerLive", 0, OnRegisterServerLive, OnExecuteServerLive,
      false /* async */, cq_, restricted_kv, response_delay_);
}

void
CommonHandler::RegisterServerReady()
{
  auto OnRegisterServerReady =
      [this](
          ::grpc::ServerContext* ctx, inference::ServerReadyRequest* request,
          ::grpc::ServerAsyncResponseWriter<inference::ServerReadyResponse>*
              responder,
          void* tag) {
        this->service_->RequestServerReady(
            ctx, request, responder, this->cq_, this->cq_, tag);
      };

  auto OnExecuteServerReady = [this](
                                  inference::ServerReadyRequest& request,
                                  inference::ServerReadyResponse* response,
                                  ::grpc::Status* status) {
    bool ready = false;
    TRITONSERVER_Error* err =
        TRITONSERVER_ServerIsReady(tritonserver_.get(), &ready);

    response->set_ready((err == nullptr) && ready);

    GrpcStatusUtil::Create(status, err);
    TRITONSERVER_ErrorDelete(err);
  };

  const std::pair<std::string, std::string>& restricted_kv =
      restricted_keys_.Get(RestrictedCategory::HEALTH);
  new CommonCallData<
      ::grpc::ServerAsyncResponseWriter<inference::ServerReadyResponse>,
      inference::ServerReadyRequest, inference::ServerReadyResponse>(
      "ServerReady", 0, OnRegisterServerReady, OnExecuteServerReady,
      false /* async */, cq_, restricted_kv, response_delay_);
}

void
CommonHandler::RegisterHealthCheck()
{
  auto OnRegisterHealthCheck =
      [this](
          ::grpc::ServerContext* ctx,
          ::grpc::health::v1::HealthCheckRequest* request,
          ::grpc::ServerAsyncResponseWriter<
              ::grpc::health::v1::HealthCheckResponse>* responder,
          void* tag) {
        this->health_service_->RequestCheck(
            ctx, request, responder, this->cq_, this->cq_, tag);
      };

  auto OnExecuteHealthCheck = [this](
                                  ::grpc::health::v1::HealthCheckRequest&
                                      request,
                                  ::grpc::health::v1::HealthCheckResponse*
                                      response,
                                  ::grpc::Status* status) {
    bool live = false;
    TRITONSERVER_Error* err =
        TRITONSERVER_ServerIsReady(tritonserver_.get(), &live);

    auto serving_status =
        ::grpc::health::v1::HealthCheckResponse_ServingStatus_UNKNOWN;
    if (err == nullptr) {
      serving_status =
          live ? ::grpc::health::v1::HealthCheckResponse_ServingStatus_SERVING
               : ::grpc::health::v1::
                     HealthCheckResponse_ServingStatus_NOT_SERVING;
    }
    response->set_status(serving_status);

    GrpcStatusUtil::Create(status, err);
    TRITONSERVER_ErrorDelete(err);
  };

  const std::pair<std::string, std::string>& restricted_kv =
      restricted_keys_.Get(RestrictedCategory::HEALTH);
  new CommonCallData<
      ::grpc::ServerAsyncResponseWriter<
          ::grpc::health::v1::HealthCheckResponse>,
      ::grpc::health::v1::HealthCheckRequest,
      ::grpc::health::v1::HealthCheckResponse>(
      "Check", 0, OnRegisterHealthCheck, OnExecuteHealthCheck,
      false /* async */, cq_, restricted_kv, response_delay_);
}

void
CommonHandler::RegisterModelReady()
{
  auto OnRegisterModelReady =
      [this](
          ::grpc::ServerContext* ctx, inference::ModelReadyRequest* request,
          ::grpc::ServerAsyncResponseWriter<inference::ModelReadyResponse>*
              responder,
          void* tag) {
        this->service_->RequestModelReady(
            ctx, request, responder, this->cq_, this->cq_, tag);
      };

  auto OnExecuteModelReady = [this](
                                 inference::ModelReadyRequest& request,
                                 inference::ModelReadyResponse* response,
                                 ::grpc::Status* status) {
    bool is_ready = false;
    int64_t requested_model_version;
    auto err =
        GetModelVersionFromString(request.version(), &requested_model_version);
    if (err == nullptr) {
      err = TRITONSERVER_ServerModelIsReady(
          tritonserver_.get(), request.name().c_str(), requested_model_version,
          &is_ready);
    }

    response->set_ready(is_ready);

    GrpcStatusUtil::Create(status, err);
    TRITONSERVER_ErrorDelete(err);
  };

  const std::pair<std::string, std::string>& restricted_kv =
      restricted_keys_.Get(RestrictedCategory::HEALTH);
  new CommonCallData<
      ::grpc::ServerAsyncResponseWriter<inference::ModelReadyResponse>,
      inference::ModelReadyRequest, inference::ModelReadyResponse>(
      "ModelReady", 0, OnRegisterModelReady, OnExecuteModelReady,
      false /* async */, cq_, restricted_kv, response_delay_);
}

void
CommonHandler::RegisterServerMetadata()
{
  auto OnRegisterServerMetadata =
      [this](
          ::grpc::ServerContext* ctx, inference::ServerMetadataRequest* request,
          ::grpc::ServerAsyncResponseWriter<inference::ServerMetadataResponse>*
              responder,
          void* tag) {
        this->service_->RequestServerMetadata(
            ctx, request, responder, this->cq_, this->cq_, tag);
      };

  auto OnExecuteServerMetadata =
      [this](
          inference::ServerMetadataRequest& request,
          inference::ServerMetadataResponse* response, ::grpc::Status* status) {
        TRITONSERVER_Message* server_metadata_message = nullptr;
        TRITONSERVER_Error* err = TRITONSERVER_ServerMetadata(
            tritonserver_.get(), &server_metadata_message);
        GOTO_IF_ERR(err, earlyexit);

        const char* buffer;
        size_t byte_size;
        err = TRITONSERVER_MessageSerializeToJson(
            server_metadata_message, &buffer, &byte_size);
        GOTO_IF_ERR(err, earlyexit);

        {
          triton::common::TritonJson::Value server_metadata_json;
          err = server_metadata_json.Parse(buffer, byte_size);
          GOTO_IF_ERR(err, earlyexit);

          const char* name;
          size_t namelen;
          err = server_metadata_json.MemberAsString("name", &name, &namelen);
          GOTO_IF_ERR(err, earlyexit);

          const char* version;
          size_t versionlen;
          err = server_metadata_json.MemberAsString(
              "version", &version, &versionlen);
          GOTO_IF_ERR(err, earlyexit);

          response->set_name(std::string(name, namelen));
          response->set_version(std::string(version, versionlen));

          if (server_metadata_json.Find("extensions")) {
            triton::common::TritonJson::Value extensions_json;
            err = server_metadata_json.MemberAsArray(
                "extensions", &extensions_json);
            GOTO_IF_ERR(err, earlyexit);

            for (size_t idx = 0; idx < extensions_json.ArraySize(); ++idx) {
              const char* ext;
              size_t extlen;
              err = extensions_json.IndexAsString(idx, &ext, &extlen);
              GOTO_IF_ERR(err, earlyexit);
              response->add_extensions(std::string(ext, extlen));
            }
          }
          TRITONSERVER_MessageDelete(server_metadata_message);
        }

      earlyexit:
        GrpcStatusUtil::Create(status, err);
        TRITONSERVER_ErrorDelete(err);
      };

  const std::pair<std::string, std::string>& restricted_kv =
      restricted_keys_.Get(RestrictedCategory::METADATA);
  new CommonCallData<
      ::grpc::ServerAsyncResponseWriter<inference::ServerMetadataResponse>,
      inference::ServerMetadataRequest, inference::ServerMetadataResponse>(
      "ServerMetadata", 0, OnRegisterServerMetadata, OnExecuteServerMetadata,
      false /* async */, cq_, restricted_kv, response_delay_);
}

void
CommonHandler::RegisterModelMetadata()
{
  auto OnRegisterModelMetadata =
      [this](
          ::grpc::ServerContext* ctx, inference::ModelMetadataRequest* request,
          ::grpc::ServerAsyncResponseWriter<inference::ModelMetadataResponse>*
              responder,
          void* tag) {
        this->service_->RequestModelMetadata(
            ctx, request, responder, this->cq_, this->cq_, tag);
      };

  auto OnExecuteModelMetadata = [this](
                                    inference::ModelMetadataRequest& request,
                                    inference::ModelMetadataResponse* response,
                                    ::grpc::Status* status) {
    int64_t requested_model_version;
    auto err =
        GetModelVersionFromString(request.version(), &requested_model_version);
    GOTO_IF_ERR(err, earlyexit);

    {
      TRITONSERVER_Message* model_metadata_message = nullptr;
      err = TRITONSERVER_ServerModelMetadata(
          tritonserver_.get(), request.name().c_str(), requested_model_version,
          &model_metadata_message);
      GOTO_IF_ERR(err, earlyexit);

      const char* buffer;
      size_t byte_size;
      err = TRITONSERVER_MessageSerializeToJson(
          model_metadata_message, &buffer, &byte_size);
      GOTO_IF_ERR(err, earlyexit);

      triton::common::TritonJson::Value model_metadata_json;
      err = model_metadata_json.Parse(buffer, byte_size);
      GOTO_IF_ERR(err, earlyexit);

      const char* name;
      size_t namelen;
      err = model_metadata_json.MemberAsString("name", &name, &namelen);
      GOTO_IF_ERR(err, earlyexit);

      response->set_name(std::string(name, namelen));

      if (model_metadata_json.Find("versions")) {
        triton::common::TritonJson::Value versions_json;
        err = model_metadata_json.MemberAsArray("versions", &versions_json);
        GOTO_IF_ERR(err, earlyexit);

        for (size_t idx = 0; idx < versions_json.ArraySize(); ++idx) {
          const char* version;
          size_t versionlen;
          err = versions_json.IndexAsString(idx, &version, &versionlen);
          GOTO_IF_ERR(err, earlyexit);
          response->add_versions(std::string(version, versionlen));
        }
      }

      const char* platform;
      size_t platformlen;
      err = model_metadata_json.MemberAsString(
          "platform", &platform, &platformlen);
      GOTO_IF_ERR(err, earlyexit);
      response->set_platform(std::string(platform, platformlen));

      if (model_metadata_json.Find("inputs")) {
        triton::common::TritonJson::Value inputs_json;
        err = model_metadata_json.MemberAsArray("inputs", &inputs_json);
        GOTO_IF_ERR(err, earlyexit);

        for (size_t idx = 0; idx < inputs_json.ArraySize(); ++idx) {
          triton::common::TritonJson::Value io_json;
          err = inputs_json.IndexAsObject(idx, &io_json);
          GOTO_IF_ERR(err, earlyexit);

          inference::ModelMetadataResponse::TensorMetadata* io =
              response->add_inputs();

          const char* name;
          size_t namelen;
          err = io_json.MemberAsString("name", &name, &namelen);
          GOTO_IF_ERR(err, earlyexit);

          const char* datatype;
          size_t datatypelen;
          err = io_json.MemberAsString("datatype", &datatype, &datatypelen);
          GOTO_IF_ERR(err, earlyexit);

          io->set_name(std::string(name, namelen));
          io->set_datatype(std::string(datatype, datatypelen));

          if (io_json.Find("shape")) {
            triton::common::TritonJson::Value shape_json;
            err = io_json.MemberAsArray("shape", &shape_json);
            GOTO_IF_ERR(err, earlyexit);

            for (size_t sidx = 0; sidx < shape_json.ArraySize(); ++sidx) {
              int64_t d;
              err = shape_json.IndexAsInt(sidx, &d);
              GOTO_IF_ERR(err, earlyexit);

              io->add_shape(d);
            }
          }
        }
      }

      if (model_metadata_json.Find("outputs")) {
        triton::common::TritonJson::Value outputs_json;
        err = model_metadata_json.MemberAsArray("outputs", &outputs_json);
        GOTO_IF_ERR(err, earlyexit);

        for (size_t idx = 0; idx < outputs_json.ArraySize(); ++idx) {
          triton::common::TritonJson::Value io_json;
          err = outputs_json.IndexAsObject(idx, &io_json);
          GOTO_IF_ERR(err, earlyexit);

          inference::ModelMetadataResponse::TensorMetadata* io =
              response->add_outputs();

          const char* name;
          size_t namelen;
          err = io_json.MemberAsString("name", &name, &namelen);
          GOTO_IF_ERR(err, earlyexit);

          const char* datatype;
          size_t datatypelen;
          err = io_json.MemberAsString("datatype", &datatype, &datatypelen);
          GOTO_IF_ERR(err, earlyexit);

          io->set_name(std::string(name, namelen));
          io->set_datatype(std::string(datatype, datatypelen));

          if (io_json.Find("shape")) {
            triton::common::TritonJson::Value shape_json;
            err = io_json.MemberAsArray("shape", &shape_json);
            GOTO_IF_ERR(err, earlyexit);

            for (size_t sidx = 0; sidx < shape_json.ArraySize(); ++sidx) {
              int64_t d;
              err = shape_json.IndexAsInt(sidx, &d);
              GOTO_IF_ERR(err, earlyexit);

              io->add_shape(d);
            }
          }
        }
      }

      TRITONSERVER_MessageDelete(model_metadata_message);
    }

  earlyexit:
    GrpcStatusUtil::Create(status, err);
    TRITONSERVER_ErrorDelete(err);
  };

  const std::pair<std::string, std::string>& restricted_kv =
      restricted_keys_.Get(RestrictedCategory::METADATA);
  new CommonCallData<
      ::grpc::ServerAsyncResponseWriter<inference::ModelMetadataResponse>,
      inference::ModelMetadataRequest, inference::ModelMetadataResponse>(
      "ModelMetadata", 0, OnRegisterModelMetadata, OnExecuteModelMetadata,
      false /* async */, cq_, restricted_kv, response_delay_);
}

void
CommonHandler::RegisterModelConfig()
{
  auto OnRegisterModelConfig =
      [this](
          ::grpc::ServerContext* ctx, inference::ModelConfigRequest* request,
          ::grpc::ServerAsyncResponseWriter<inference::ModelConfigResponse>*
              responder,
          void* tag) {
        this->service_->RequestModelConfig(
            ctx, request, responder, this->cq_, this->cq_, tag);
      };

  auto OnExecuteModelConfig = [this](
                                  inference::ModelConfigRequest& request,
                                  inference::ModelConfigResponse* response,
                                  ::grpc::Status* status) {
    int64_t requested_model_version;
    auto err =
        GetModelVersionFromString(request.version(), &requested_model_version);
    if (err == nullptr) {
      TRITONSERVER_Message* model_config_message = nullptr;
      err = TRITONSERVER_ServerModelConfig(
          tritonserver_.get(), request.name().c_str(), requested_model_version,
          1 /* config_version */, &model_config_message);
      if (err == nullptr) {
        const char* buffer;
        size_t byte_size;
        err = TRITONSERVER_MessageSerializeToJson(
            model_config_message, &buffer, &byte_size);
        if (err == nullptr) {
          ::google::protobuf::util::JsonStringToMessage(
              ::google::protobuf::stringpiece_internal::StringPiece(
                  buffer, (int)byte_size),
              response->mutable_config());
        }
        TRITONSERVER_MessageDelete(model_config_message);
      }
    }

    GrpcStatusUtil::Create(status, err);
    TRITONSERVER_ErrorDelete(err);
  };

  const std::pair<std::string, std::string>& restricted_kv =
      restricted_keys_.Get(RestrictedCategory::MODEL_CONFIG);
  new CommonCallData<
      ::grpc::ServerAsyncResponseWriter<inference::ModelConfigResponse>,
      inference::ModelConfigRequest, inference::ModelConfigResponse>(
      "ModelConfig", 0, OnRegisterModelConfig, OnExecuteModelConfig,
      false /* async */, cq_, restricted_kv, response_delay_);
}

void
CommonHandler::RegisterModelStatistics()
{
  auto OnRegisterModelStatistics =
      [this](
          ::grpc::ServerContext* ctx,
          inference::ModelStatisticsRequest* request,
          ::grpc::ServerAsyncResponseWriter<inference::ModelStatisticsResponse>*
              responder,
          void* tag) {
        this->service_->RequestModelStatistics(
            ctx, request, responder, this->cq_, this->cq_, tag);
      };

  auto OnExecuteModelStatistics = [this](
                                      inference::ModelStatisticsRequest&
                                          request,
                                      inference::ModelStatisticsResponse*
                                          response,
                                      ::grpc::Status* status) {
#ifdef TRITON_ENABLE_STATS
    triton::common::TritonJson::Value model_stats_json;

    int64_t requested_model_version;
    auto err =
        GetModelVersionFromString(request.version(), &requested_model_version);
    GOTO_IF_ERR(err, earlyexit);

    {
      TRITONSERVER_Message* model_stats_message = nullptr;
      err = TRITONSERVER_ServerModelStatistics(
          tritonserver_.get(), request.name().c_str(), requested_model_version,
          &model_stats_message);
      GOTO_IF_ERR(err, earlyexit);

      const char* buffer;
      size_t byte_size;
      err = TRITONSERVER_MessageSerializeToJson(
          model_stats_message, &buffer, &byte_size);
      GOTO_IF_ERR(err, earlyexit);

      err = model_stats_json.Parse(buffer, byte_size);
      GOTO_IF_ERR(err, earlyexit);

      TRITONSERVER_MessageDelete(model_stats_message);
    }

    if (model_stats_json.Find("model_stats")) {
      triton::common::TritonJson::Value stats_json;
      err = model_stats_json.MemberAsArray("model_stats", &stats_json);
      GOTO_IF_ERR(err, earlyexit);

      for (size_t idx = 0; idx < stats_json.ArraySize(); ++idx) {
        triton::common::TritonJson::Value model_stat;
        err = stats_json.IndexAsObject(idx, &model_stat);
        GOTO_IF_ERR(err, earlyexit);

        auto statistics = response->add_model_stats();

        const char* name;
        size_t namelen;
        err = model_stat.MemberAsString("name", &name, &namelen);
        GOTO_IF_ERR(err, earlyexit);

        const char* version;
        size_t versionlen;
        err = model_stat.MemberAsString("version", &version, &versionlen);
        GOTO_IF_ERR(err, earlyexit);

        statistics->set_name(std::string(name, namelen));
        statistics->set_version(std::string(version, versionlen));

        uint64_t ucnt;
        err = model_stat.MemberAsUInt("last_inference", &ucnt);
        GOTO_IF_ERR(err, earlyexit);
        statistics->set_last_inference(ucnt);

        err = model_stat.MemberAsUInt("inference_count", &ucnt);
        GOTO_IF_ERR(err, earlyexit);
        statistics->set_inference_count(ucnt);

        err = model_stat.MemberAsUInt("execution_count", &ucnt);
        GOTO_IF_ERR(err, earlyexit);
        statistics->set_execution_count(ucnt);

        {
          triton::common::TritonJson::Value infer_stats_json;
          err = model_stat.MemberAsObject("inference_stats", &infer_stats_json);
          GOTO_IF_ERR(err, earlyexit);

          err = SetStatisticsDuration(
              infer_stats_json, "success",
              statistics->mutable_inference_stats()->mutable_success());
          GOTO_IF_ERR(err, earlyexit);
          err = SetStatisticsDuration(
              infer_stats_json, "fail",
              statistics->mutable_inference_stats()->mutable_fail());
          GOTO_IF_ERR(err, earlyexit);
          err = SetStatisticsDuration(
              infer_stats_json, "queue",
              statistics->mutable_inference_stats()->mutable_queue());
          GOTO_IF_ERR(err, earlyexit);
          err = SetStatisticsDuration(
              infer_stats_json, "compute_input",
              statistics->mutable_inference_stats()->mutable_compute_input());
          GOTO_IF_ERR(err, earlyexit);
          err = SetStatisticsDuration(
              infer_stats_json, "compute_infer",
              statistics->mutable_inference_stats()->mutable_compute_infer());
          GOTO_IF_ERR(err, earlyexit);
          err = SetStatisticsDuration(
              infer_stats_json, "compute_output",
              statistics->mutable_inference_stats()->mutable_compute_output());
          GOTO_IF_ERR(err, earlyexit);
          err = SetStatisticsDuration(
              infer_stats_json, "cache_hit",
              statistics->mutable_inference_stats()->mutable_cache_hit());
          GOTO_IF_ERR(err, earlyexit);
          err = SetStatisticsDuration(
              infer_stats_json, "cache_miss",
              statistics->mutable_inference_stats()->mutable_cache_miss());
          GOTO_IF_ERR(err, earlyexit);
        }

        {
          triton::common::TritonJson::Value responses_json;
          err = model_stat.MemberAsObject("response_stats", &responses_json);
          GOTO_IF_ERR(err, earlyexit);

          std::vector<std::string> keys;
          err = responses_json.Members(&keys);
          GOTO_IF_ERR(err, earlyexit);

          for (const auto& key : keys) {
            triton::common::TritonJson::Value res_json;
            err = responses_json.MemberAsObject(key.c_str(), &res_json);
            GOTO_IF_ERR(err, earlyexit);

            inference::InferResponseStatistics res;

            err = SetStatisticsDuration(
                res_json, "compute_infer", res.mutable_compute_infer());
            GOTO_IF_ERR(err, earlyexit);
            err = SetStatisticsDuration(
                res_json, "compute_output", res.mutable_compute_output());
            GOTO_IF_ERR(err, earlyexit);
            err = SetStatisticsDuration(
                res_json, "success", res.mutable_success());
            GOTO_IF_ERR(err, earlyexit);
            err = SetStatisticsDuration(res_json, "fail", res.mutable_fail());
            GOTO_IF_ERR(err, earlyexit);
            err = SetStatisticsDuration(
                res_json, "empty_response", res.mutable_empty_response());
            GOTO_IF_ERR(err, earlyexit);
            err =
                SetStatisticsDuration(res_json, "cancel", res.mutable_cancel());
            GOTO_IF_ERR(err, earlyexit);

            (*statistics->mutable_response_stats())[key] = std::move(res);
          }
        }

        {
          triton::common::TritonJson::Value batches_json;
          err = model_stat.MemberAsArray("batch_stats", &batches_json);
          GOTO_IF_ERR(err, earlyexit);

          for (size_t idx = 0; idx < batches_json.ArraySize(); ++idx) {
            triton::common::TritonJson::Value batch_stat;
            err = batches_json.IndexAsObject(idx, &batch_stat);
            GOTO_IF_ERR(err, earlyexit);

            auto batch_statistics = statistics->add_batch_stats();

            uint64_t ucnt;
            err = batch_stat.MemberAsUInt("batch_size", &ucnt);
            GOTO_IF_ERR(err, earlyexit);
            batch_statistics->set_batch_size(ucnt);

            err = SetStatisticsDuration(
                batch_stat, "compute_input",
                batch_statistics->mutable_compute_input());
            GOTO_IF_ERR(err, earlyexit);
            err = SetStatisticsDuration(
                batch_stat, "compute_infer",
                batch_statistics->mutable_compute_infer());
            GOTO_IF_ERR(err, earlyexit);
            err = SetStatisticsDuration(
                batch_stat, "compute_output",
                batch_statistics->mutable_compute_output());
            GOTO_IF_ERR(err, earlyexit);
          }
        }

        {
          triton::common::TritonJson::Value memory_usage_json;
          err = model_stat.MemberAsArray("memory_usage", &memory_usage_json);
          GOTO_IF_ERR(err, earlyexit);

          for (size_t idx = 0; idx < memory_usage_json.ArraySize(); ++idx) {
            triton::common::TritonJson::Value usage;
            err = memory_usage_json.IndexAsObject(idx, &usage);
            GOTO_IF_ERR(err, earlyexit);

            auto memory_usage = statistics->add_memory_usage();
            {
              const char* type;
              size_t type_len;
              err = usage.MemberAsString("type", &type, &type_len);
              GOTO_IF_ERR(err, earlyexit);
              memory_usage->set_type(std::string(type, type_len));
            }
            {
              int64_t id;
              err = usage.MemberAsInt("id", &id);
              GOTO_IF_ERR(err, earlyexit);
              memory_usage->set_id(id);
            }
            {
              uint64_t byte_size;
              err = usage.MemberAsUInt("byte_size", &byte_size);
              GOTO_IF_ERR(err, earlyexit);
              memory_usage->set_byte_size(byte_size);
            }
          }
        }
      }
    }

  earlyexit:
    GrpcStatusUtil::Create(status, err);
    TRITONSERVER_ErrorDelete(err);
#else
    auto err = TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_UNAVAILABLE,
        "the server does not support model statistics");
    GrpcStatusUtil::Create(status, err);
    TRITONSERVER_ErrorDelete(err);
#endif
  };

  const std::pair<std::string, std::string>& restricted_kv =
      restricted_keys_.Get(RestrictedCategory::STATISTICS);
  new CommonCallData<
      ::grpc::ServerAsyncResponseWriter<inference::ModelStatisticsResponse>,
      inference::ModelStatisticsRequest, inference::ModelStatisticsResponse>(
      "ModelStatistics", 0, OnRegisterModelStatistics, OnExecuteModelStatistics,
      false /* async */, cq_, restricted_kv, response_delay_);
}

template <typename PBTYPE>
TRITONSERVER_Error*
CommonHandler::SetStatisticsDuration(
    triton::common::TritonJson::Value& statistics_json,
    const std::string& statistics_name,
    PBTYPE* mutable_statistics_duration_protobuf) const
{
  triton::common::TritonJson::Value statistics_duration_json;
  RETURN_IF_ERR(statistics_json.MemberAsObject(
      statistics_name.c_str(), &statistics_duration_json));

  uint64_t value;
  RETURN_IF_ERR(statistics_duration_json.MemberAsUInt("count", &value));
  mutable_statistics_duration_protobuf->set_count(value);
  RETURN_IF_ERR(statistics_duration_json.MemberAsUInt("ns", &value));
  mutable_statistics_duration_protobuf->set_ns(value);

  return nullptr;
}

void
CommonHandler::RegisterTrace()
{
  auto OnRegisterTrace =
      [this](
          ::grpc::ServerContext* ctx, inference::TraceSettingRequest* request,
          ::grpc::ServerAsyncResponseWriter<inference::TraceSettingResponse>*
              responder,
          void* tag) {
        this->service_->RequestTraceSetting(
            ctx, request, responder, this->cq_, this->cq_, tag);
      };

  auto OnExecuteTrace = [this](
                            inference::TraceSettingRequest& request,
                            inference::TraceSettingResponse* response,
                            ::grpc::Status* status) {
#ifdef TRITON_ENABLE_TRACING
    TRITONSERVER_Error* err = nullptr;
    TRITONSERVER_InferenceTraceLevel level = TRITONSERVER_TRACE_LEVEL_DISABLED;
    uint32_t rate;
    int32_t count;
    uint32_t log_frequency;
    std::string filepath;
    InferenceTraceMode trace_mode;
    TraceConfigMap config_map;

    if (!request.model_name().empty()) {
      bool ready = false;
      GOTO_IF_ERR(
          TRITONSERVER_ServerModelIsReady(
              tritonserver_.get(), request.model_name().c_str(),
              -1 /* model version */, &ready),
          earlyexit);
      if (!ready) {
        err = TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            (std::string("Request for unknown model : ") + request.model_name())
                .c_str());
        GOTO_IF_ERR(err, earlyexit);
      }
    }

    // Update trace setting
    if (!request.settings().empty()) {
      TraceManager::NewSetting new_setting;
      {
        static std::string setting_name = "trace_file";
        auto it = request.settings().find(setting_name);
        if (it != request.settings().end()) {
          err = TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_UNSUPPORTED,
              "trace file location can not be updated through network "
              "protocol");
          GOTO_IF_ERR(err, earlyexit);
        }
      }
      {
        static std::string setting_name = "trace_level";
        auto it = request.settings().find(setting_name);
        if (it != request.settings().end()) {
          if (it->second.value().size() == 0) {
            new_setting.clear_level_ = true;
          } else {
            for (const auto& level_str : it->second.value()) {
              if (level_str == "OFF") {
                if (it->second.value().size() == 1) {
                  level = TRITONSERVER_TRACE_LEVEL_DISABLED;
                  new_setting.level_ = &level;
                } else {
                  err = TRITONSERVER_ErrorNew(
                      TRITONSERVER_ERROR_INVALID_ARG,
                      "Expect only one trace level 'OFF' is specified");
                  GOTO_IF_ERR(err, earlyexit);
                }
              } else if (level_str == "TIMESTAMPS") {
                level = static_cast<TRITONSERVER_InferenceTraceLevel>(
                    level | TRITONSERVER_TRACE_LEVEL_TIMESTAMPS);
                new_setting.level_ = &level;
              } else if (level_str == "TENSORS") {
                level = static_cast<TRITONSERVER_InferenceTraceLevel>(
                    level | TRITONSERVER_TRACE_LEVEL_TENSORS);
                new_setting.level_ = &level;
              }
            }
          }
        }
      }
      {
        static std::string setting_name = "trace_rate";
        auto it = request.settings().find(setting_name);
        if (it != request.settings().end()) {
          if (it->second.value().size() == 0) {
            new_setting.clear_rate_ = true;
          } else if (it->second.value().size() == 1) {
            try {
              rate = std::stoi(it->second.value()[0]);
              new_setting.rate_ = &rate;
            }
            catch (const std::invalid_argument& ia) {
              err = TRITONSERVER_ErrorNew(
                  TRITONSERVER_ERROR_INVALID_ARG,
                  (std::string("Unable to parse '") + setting_name +
                   "', got: " + it->second.value()[0])
                      .c_str());
              GOTO_IF_ERR(err, earlyexit);
            }
            catch (const std::out_of_range& oor) {
              err = TRITONSERVER_ErrorNew(
                  TRITONSERVER_ERROR_INVALID_ARG,
                  (std::string("Unable to parse '") + setting_name +
                   "', value is out of range [ " +
                   std::to_string(std::numeric_limits<std::uint32_t>::min()) +
                   ", " +
                   std::to_string(std::numeric_limits<std::uint32_t>::max()) +
                   " ], got: " + it->second.value()[0])
                      .c_str());
              GOTO_IF_ERR(err, earlyexit);
            }
          } else {
            err = TRITONSERVER_ErrorNew(
                TRITONSERVER_ERROR_INVALID_ARG,
                (std::string("expect only 1 value for '") + setting_name + "'")
                    .c_str());
            GOTO_IF_ERR(err, earlyexit);
          }
        }
      }
      {
        static std::string setting_name = "trace_count";
        auto it = request.settings().find(setting_name);
        if (it != request.settings().end()) {
          if (it->second.value().size() == 0) {
            new_setting.clear_count_ = true;
          } else if (it->second.value().size() == 1) {
            try {
              count = std::stoi(it->second.value()[0]);
              if (count < TraceManager::MIN_TRACE_COUNT_VALUE) {
                err = TRITONSERVER_ErrorNew(
                    TRITONSERVER_ERROR_INVALID_ARG,
                    (std::string("Unable to parse '") + setting_name +
                     "'. Expecting value >= " +
                     std::to_string(TraceManager::MIN_TRACE_COUNT_VALUE) +
                     ", got: " + it->second.value()[0])
                        .c_str());
                GOTO_IF_ERR(err, earlyexit);
              }
              new_setting.count_ = &count;
            }
            catch (const std::invalid_argument& ia) {
              err = TRITONSERVER_ErrorNew(
                  TRITONSERVER_ERROR_INVALID_ARG,
                  (std::string("Unable to parse '") + setting_name +
                   "', got: " + it->second.value()[0])
                      .c_str());
              GOTO_IF_ERR(err, earlyexit);
            }
            catch (const std::out_of_range& oor) {
              err = TRITONSERVER_ErrorNew(
                  TRITONSERVER_ERROR_INVALID_ARG,
                  (std::string("Unable to parse '") + setting_name +
                   "', value is out of range [ " +
                   std::to_string(TraceManager::MIN_TRACE_COUNT_VALUE) + ", " +
                   std::to_string(std::numeric_limits<std::int32_t>::max()) +
                   " ], got: " + it->second.value()[0])
                      .c_str());
              GOTO_IF_ERR(err, earlyexit);
            }
          } else {
            err = TRITONSERVER_ErrorNew(
                TRITONSERVER_ERROR_INVALID_ARG,
                (std::string("expect only 1 value for '") + setting_name + "'")
                    .c_str());
            GOTO_IF_ERR(err, earlyexit);
          }
        }
      }
      {
        static std::string setting_name = "log_frequency";
        auto it = request.settings().find(setting_name);
        if (it != request.settings().end()) {
          if (it->second.value().size() == 0) {
            new_setting.clear_log_frequency_ = true;
          } else if (it->second.value().size() == 1) {
            try {
              log_frequency = std::stoi(it->second.value()[0]);
              new_setting.log_frequency_ = &log_frequency;
            }
            catch (const std::invalid_argument& ia) {
              err = TRITONSERVER_ErrorNew(
                  TRITONSERVER_ERROR_INVALID_ARG,
                  (std::string("Unable to parse '") + setting_name +
                   "', got: " + it->second.value()[0])
                      .c_str());
              GOTO_IF_ERR(err, earlyexit);
            }
            catch (const std::out_of_range& oor) {
              err = TRITONSERVER_ErrorNew(
                  TRITONSERVER_ERROR_INVALID_ARG,
                  (std::string("Unable to parse '") + setting_name +
                   "', value is out of range [ " +
                   std::to_string(std::numeric_limits<std::uint32_t>::min()) +
                   ", " +
                   std::to_string(std::numeric_limits<std::uint32_t>::max()) +
                   " ], got: " + it->second.value()[0])
                      .c_str());
              GOTO_IF_ERR(err, earlyexit);
            }
          } else {
            err = TRITONSERVER_ErrorNew(
                TRITONSERVER_ERROR_INVALID_ARG,
                (std::string("expect only 1 value for '") + setting_name + "'")
                    .c_str());
            GOTO_IF_ERR(err, earlyexit);
          }
        }
      }

      err =
          trace_manager_->UpdateTraceSetting(request.model_name(), new_setting);
      GOTO_IF_ERR(err, earlyexit);
    }

    // Get current trace setting, this is needed even if the setting
    // has been updated above as some values may not be provided in the request.
    trace_manager_->GetTraceSetting(
        request.model_name(), &level, &rate, &count, &log_frequency, &filepath,
        &trace_mode, &config_map);
    // level
    {
      inference::TraceSettingResponse::SettingValue level_setting;
      if (level == TRITONSERVER_TRACE_LEVEL_DISABLED) {
        level_setting.add_value("OFF");
      } else {
        if (level & TRITONSERVER_TRACE_LEVEL_TIMESTAMPS) {
          level_setting.add_value("TIMESTAMPS");
        }
        if (level & TRITONSERVER_TRACE_LEVEL_TENSORS) {
          level_setting.add_value("TENSORS");
        }
      }
      (*response->mutable_settings())["trace_level"] = level_setting;
    }
    (*response->mutable_settings())["trace_rate"].add_value(
        std::to_string(rate));
    (*response->mutable_settings())["trace_count"].add_value(
        std::to_string(count));
    if (trace_mode == TRACE_MODE_TRITON) {
      (*response->mutable_settings())["log_frequency"].add_value(
          std::to_string(log_frequency));
      (*response->mutable_settings())["trace_file"].add_value(filepath);
    }
    (*response->mutable_settings())["trace_mode"].add_value(
        trace_manager_->InferenceTraceModeString(trace_mode));
    {
      auto mode_key = std::to_string(trace_mode);
      auto trace_options_it = config_map.find(mode_key);
      if (trace_options_it != config_map.end()) {
        for (const auto& [key, value] : trace_options_it->second) {
          if ((key == "file") || (key == "log-frequency")) {
            continue;
          }
          std::string valueAsString;
          if (std::holds_alternative<std::string>(value)) {
            valueAsString = std::get<std::string>(value);
          } else if (std::holds_alternative<int>(value)) {
            valueAsString = std::to_string(std::get<int>(value));
          } else if (std::holds_alternative<uint32_t>(value)) {
            valueAsString = std::to_string(std::get<uint32_t>(value));
          }
          (*response->mutable_settings())[key].add_value(valueAsString);
        }
      }
    }
  earlyexit:
    GrpcStatusUtil::Create(status, err);
    TRITONSERVER_ErrorDelete(err);
#else
    auto err = TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_UNAVAILABLE, "the server does not support trace");
    GrpcStatusUtil::Create(status, err);
    TRITONSERVER_ErrorDelete(err);
#endif
  };

  const std::pair<std::string, std::string>& restricted_kv =
      restricted_keys_.Get(RestrictedCategory::TRACE);
  new CommonCallData<
      ::grpc::ServerAsyncResponseWriter<inference::TraceSettingResponse>,
      inference::TraceSettingRequest, inference::TraceSettingResponse>(
      "Trace", 0, OnRegisterTrace, OnExecuteTrace, false /* async */, cq_,
      restricted_kv, response_delay_);
}

void
CommonHandler::RegisterLogging()
{
  auto OnRegisterLogging =
      [this](
          ::grpc::ServerContext* ctx, inference::LogSettingsRequest* request,
          ::grpc::ServerAsyncResponseWriter<inference::LogSettingsResponse>*
              responder,
          void* tag) {
        this->service_->RequestLogSettings(
            ctx, request, responder, this->cq_, this->cq_, tag);
      };

  auto OnExecuteLogging = [this](
                              inference::LogSettingsRequest& request,
                              inference::LogSettingsResponse* response,
                              ::grpc::Status* status) {

#ifdef TRITON_ENABLE_LOGGING
    TRITONSERVER_Error* err = nullptr;
    // Update log settings
    // Server and Core repos do not have the same Logger object
    // Each update must be applied to both server and core repo versions
    if (!request.settings().empty()) {
      {
        static std::string setting_name = "log_file";
        auto it = request.settings().find(setting_name);
        if (it != request.settings().end()) {
          err = TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_UNSUPPORTED,
              "log file location can not be updated through network protocol");
          GOTO_IF_ERR(err, earlyexit);
        }
      }
      {
        static std::string setting_name = "log_info";
        auto it = request.settings().find(setting_name);
        if (it != request.settings().end()) {
          const auto& log_param = it->second;
          if (log_param.parameter_choice_case() !=
              inference::LogSettingsRequest_SettingValue::ParameterChoiceCase::
                  kBoolParam) {
            err = TRITONSERVER_ErrorNew(
                TRITONSERVER_ERROR_INVALID_ARG,
                (std::string("expect boolean for '") + setting_name + "'")
                    .c_str());
            GOTO_IF_ERR(err, earlyexit);
          } else {
            bool log_info_status = it->second.bool_param();
            LOG_ENABLE_INFO(log_info_status);
            TRITONSERVER_ServerOptionsSetLogInfo(nullptr, log_info_status);
          }
        }
      }
      {
        static std::string setting_name = "log_warning";
        auto it = request.settings().find(setting_name);
        if (it != request.settings().end()) {
          const auto& log_param = it->second;
          if (log_param.parameter_choice_case() !=
              inference::LogSettingsRequest_SettingValue::ParameterChoiceCase::
                  kBoolParam) {
            err = TRITONSERVER_ErrorNew(
                TRITONSERVER_ERROR_INVALID_ARG,
                (std::string("expect boolean for '") + setting_name + "'")
                    .c_str());
            GOTO_IF_ERR(err, earlyexit);
          } else {
            bool log_warn_status = it->second.bool_param();
            LOG_ENABLE_WARNING(log_warn_status);
            TRITONSERVER_ServerOptionsSetLogWarn(nullptr, log_warn_status);
          }
        }
      }
      {
        static std::string setting_name = "log_error";
        auto it = request.settings().find(setting_name);
        if (it != request.settings().end()) {
          const auto& log_param = it->second;
          if (log_param.parameter_choice_case() !=
              inference::LogSettingsRequest_SettingValue::ParameterChoiceCase::
                  kBoolParam) {
            err = TRITONSERVER_ErrorNew(
                TRITONSERVER_ERROR_INVALID_ARG,
                (std::string("expect boolean for '") + setting_name + "'")
                    .c_str());
            GOTO_IF_ERR(err, earlyexit);
          } else {
            bool log_error_status = it->second.bool_param();
            LOG_ENABLE_ERROR(log_error_status);
            TRITONSERVER_ServerOptionsSetLogError(nullptr, log_error_status);
          }
        }
      }
      {
        static std::string setting_name = "log_verbose_level";
        auto it = request.settings().find(setting_name);
        if (it != request.settings().end()) {
          const auto& log_param = it->second;
          if (log_param.parameter_choice_case() !=
              inference::LogSettingsRequest_SettingValue::ParameterChoiceCase::
                  kUint32Param) {
            err = TRITONSERVER_ErrorNew(
                TRITONSERVER_ERROR_INVALID_ARG,
                (std::string("expect int32 for '") + setting_name + "'")
                    .c_str());
            GOTO_IF_ERR(err, earlyexit);
          } else {
            uint32_t verbose_level = it->second.uint32_param();
            LOG_SET_VERBOSE(static_cast<int32_t>(verbose_level));
            TRITONSERVER_ServerOptionsSetLogVerbose(nullptr, verbose_level);
          }
        }
      }
      {
        static std::string setting_name = "log_format";
        auto it = request.settings().find(setting_name);
        if (it != request.settings().end()) {
          const auto& log_param = it->second;
          if (log_param.parameter_choice_case() !=
              inference::LogSettingsRequest_SettingValue::ParameterChoiceCase::
                  kStringParam) {
            err = TRITONSERVER_ErrorNew(
                TRITONSERVER_ERROR_INVALID_ARG,
                (std::string("expect string for '") + setting_name + "'")
                    .c_str());
            GOTO_IF_ERR(err, earlyexit);
          } else {
            const std::string& log_format_parse = it->second.string_param();
            triton::common::Logger::Format log_format_final =
                triton::common::Logger::Format::kDEFAULT;
            if (log_format_parse == "ISO8601") {
              log_format_final = triton::common::Logger::Format::kISO8601;
            } else if (log_format_parse != "default") {
              err = TRITONSERVER_ErrorNew(
                  TRITONSERVER_ERROR_INVALID_ARG,
                  ("invalid argument for log_format, got: " + log_format_parse)
                      .c_str());
              GOTO_IF_ERR(err, earlyexit);
            }
            LOG_SET_FORMAT(log_format_final);
            switch (log_format_final) {
              case triton::common::Logger::Format::kDEFAULT:
                TRITONSERVER_ServerOptionsSetLogFormat(
                    nullptr, TRITONSERVER_LOG_DEFAULT);
                break;
              case triton::common::Logger::Format::kISO8601:
                TRITONSERVER_ServerOptionsSetLogFormat(
                    nullptr, TRITONSERVER_LOG_ISO8601);
                break;
            }
          }
        }
      }
      GOTO_IF_ERR(err, earlyexit);
    }
    (*response->mutable_settings())["log_file"].set_string_param(LOG_FILE);
    (*response->mutable_settings())["log_info"].set_bool_param(LOG_INFO_IS_ON);
    (*response->mutable_settings())["log_warning"].set_bool_param(
        LOG_WARNING_IS_ON);
    (*response->mutable_settings())["log_error"].set_bool_param(
        LOG_ERROR_IS_ON);
    (*response->mutable_settings())["log_verbose_level"].set_uint32_param(
        LOG_VERBOSE_LEVEL);
    (*response->mutable_settings())["log_format"].set_string_param(
        LOG_FORMAT_STRING);
  earlyexit:
    GrpcStatusUtil::Create(status, err);
    TRITONSERVER_ErrorDelete(err);
#else
    auto err = TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_UNAVAILABLE,
        "the server does not support dynamic logging");
    GrpcStatusUtil::Create(status, err);
    TRITONSERVER_ErrorDelete(err);
#endif
  };

  const std::pair<std::string, std::string>& restricted_kv =
      restricted_keys_.Get(RestrictedCategory::LOGGING);
  new CommonCallData<
      ::grpc::ServerAsyncResponseWriter<inference::LogSettingsResponse>,
      inference::LogSettingsRequest, inference::LogSettingsResponse>(
      "Logging", 0, OnRegisterLogging, OnExecuteLogging, false /* async */, cq_,
      restricted_kv, response_delay_);
}

void
CommonHandler::RegisterSystemSharedMemoryStatus()
{
  auto OnRegisterSystemSharedMemoryStatus =
      [this](
          ::grpc::ServerContext* ctx,
          inference::SystemSharedMemoryStatusRequest* request,
          ::grpc::ServerAsyncResponseWriter<
              inference::SystemSharedMemoryStatusResponse>* responder,
          void* tag) {
        this->service_->RequestSystemSharedMemoryStatus(
            ctx, request, responder, this->cq_, this->cq_, tag);
      };

  auto OnExecuteSystemSharedMemoryStatus =
      [this](
          inference::SystemSharedMemoryStatusRequest& request,
          inference::SystemSharedMemoryStatusResponse* response,
          ::grpc::Status* status) {
        triton::common::TritonJson::Value shm_status_json(
            triton::common::TritonJson::ValueType::ARRAY);
        TRITONSERVER_Error* err = shm_manager_->GetStatus(
            request.name(), TRITONSERVER_MEMORY_CPU, &shm_status_json);
        GOTO_IF_ERR(err, earlyexit);

        for (size_t idx = 0; idx < shm_status_json.ArraySize(); ++idx) {
          triton::common::TritonJson::Value shm_region_json;
          err = shm_status_json.IndexAsObject(idx, &shm_region_json);
          GOTO_IF_ERR(err, earlyexit);

          const char* name;
          size_t namelen;
          err = shm_region_json.MemberAsString("name", &name, &namelen);
          GOTO_IF_ERR(err, earlyexit);

          const char* key;
          size_t keylen;
          err = shm_region_json.MemberAsString("key", &key, &keylen);
          GOTO_IF_ERR(err, earlyexit);

          uint64_t offset;
          err = shm_region_json.MemberAsUInt("offset", &offset);
          GOTO_IF_ERR(err, earlyexit);

          uint64_t byte_size;
          err = shm_region_json.MemberAsUInt("byte_size", &byte_size);
          GOTO_IF_ERR(err, earlyexit);

          inference::SystemSharedMemoryStatusResponse::RegionStatus
              region_status;
          region_status.set_name(std::string(name, namelen));
          region_status.set_key(std::string(key, keylen));
          region_status.set_offset(offset);
          region_status.set_byte_size(byte_size);

          (*response->mutable_regions())[name] = region_status;
        }

      earlyexit:
        GrpcStatusUtil::Create(status, err);
        TRITONSERVER_ErrorDelete(err);
      };

  const std::pair<std::string, std::string>& restricted_kv =
      restricted_keys_.Get(RestrictedCategory::SHARED_MEMORY);
  new CommonCallData<
      ::grpc::ServerAsyncResponseWriter<
          inference::SystemSharedMemoryStatusResponse>,
      inference::SystemSharedMemoryStatusRequest,
      inference::SystemSharedMemoryStatusResponse>(
      "SystemSharedMemoryStatus", 0, OnRegisterSystemSharedMemoryStatus,
      OnExecuteSystemSharedMemoryStatus, false /* async */, cq_, restricted_kv,
      response_delay_);
}

void
CommonHandler::RegisterSystemSharedMemoryRegister()
{
  auto OnRegisterSystemSharedMemoryRegister =
      [this](
          ::grpc::ServerContext* ctx,
          inference::SystemSharedMemoryRegisterRequest* request,
          ::grpc::ServerAsyncResponseWriter<
              inference::SystemSharedMemoryRegisterResponse>* responder,
          void* tag) {
        this->service_->RequestSystemSharedMemoryRegister(
            ctx, request, responder, this->cq_, this->cq_, tag);
      };

  auto OnExecuteSystemSharedMemoryRegister =
      [this](
          inference::SystemSharedMemoryRegisterRequest& request,
          inference::SystemSharedMemoryRegisterResponse* response,
          ::grpc::Status* status) {
        TRITONSERVER_Error* err = shm_manager_->RegisterSystemSharedMemory(
            request.name(), request.key(), request.offset(),
            request.byte_size());

        GrpcStatusUtil::Create(status, err);
        TRITONSERVER_ErrorDelete(err);
      };

  const std::pair<std::string, std::string>& restricted_kv =
      restricted_keys_.Get(RestrictedCategory::SHARED_MEMORY);
  new CommonCallData<
      ::grpc::ServerAsyncResponseWriter<
          inference::SystemSharedMemoryRegisterResponse>,
      inference::SystemSharedMemoryRegisterRequest,
      inference::SystemSharedMemoryRegisterResponse>(
      "SystemSharedMemoryRegister", 0, OnRegisterSystemSharedMemoryRegister,
      OnExecuteSystemSharedMemoryRegister, false /* async */, cq_,
      restricted_kv, response_delay_);
}

void
CommonHandler::RegisterSystemSharedMemoryUnregister()
{
  auto OnRegisterSystemSharedMemoryUnregister =
      [this](
          ::grpc::ServerContext* ctx,
          inference::SystemSharedMemoryUnregisterRequest* request,
          ::grpc::ServerAsyncResponseWriter<
              inference::SystemSharedMemoryUnregisterResponse>* responder,
          void* tag) {
        this->service_->RequestSystemSharedMemoryUnregister(
            ctx, request, responder, this->cq_, this->cq_, tag);
      };

  auto OnExecuteSystemSharedMemoryUnregister =
      [this](
          inference::SystemSharedMemoryUnregisterRequest& request,
          inference::SystemSharedMemoryUnregisterResponse* response,
          ::grpc::Status* status) {
        TRITONSERVER_Error* err = nullptr;
        if (request.name().empty()) {
          err = shm_manager_->UnregisterAll(TRITONSERVER_MEMORY_CPU);
        } else {
          err =
              shm_manager_->Unregister(request.name(), TRITONSERVER_MEMORY_CPU);
        }

        GrpcStatusUtil::Create(status, err);
        TRITONSERVER_ErrorDelete(err);
      };

  const std::pair<std::string, std::string>& restricted_kv =
      restricted_keys_.Get(RestrictedCategory::SHARED_MEMORY);
  new CommonCallData<
      ::grpc::ServerAsyncResponseWriter<
          inference::SystemSharedMemoryUnregisterResponse>,
      inference::SystemSharedMemoryUnregisterRequest,
      inference::SystemSharedMemoryUnregisterResponse>(
      "SystemSharedMemoryUnregister", 0, OnRegisterSystemSharedMemoryUnregister,
      OnExecuteSystemSharedMemoryUnregister, false /* async */, cq_,
      restricted_kv, response_delay_);
}

void
CommonHandler::RegisterCudaSharedMemoryStatus()
{
  auto OnRegisterCudaSharedMemoryStatus =
      [this](
          ::grpc::ServerContext* ctx,
          inference::CudaSharedMemoryStatusRequest* request,
          ::grpc::ServerAsyncResponseWriter<
              inference::CudaSharedMemoryStatusResponse>* responder,
          void* tag) {
        this->service_->RequestCudaSharedMemoryStatus(
            ctx, request, responder, this->cq_, this->cq_, tag);
      };
  auto OnExecuteCudaSharedMemoryStatus =
      [this](
          inference::CudaSharedMemoryStatusRequest& request,
          inference::CudaSharedMemoryStatusResponse* response,
          ::grpc::Status* status) {
        triton::common::TritonJson::Value shm_status_json(
            triton::common::TritonJson::ValueType::ARRAY);
        TRITONSERVER_Error* err = shm_manager_->GetStatus(
            request.name(), TRITONSERVER_MEMORY_GPU, &shm_status_json);
        GOTO_IF_ERR(err, earlyexit);

        for (size_t idx = 0; idx < shm_status_json.ArraySize(); ++idx) {
          triton::common::TritonJson::Value shm_region_json;
          err = shm_status_json.IndexAsObject(idx, &shm_region_json);
          GOTO_IF_ERR(err, earlyexit);

          const char* name;
          size_t namelen;
          err = shm_region_json.MemberAsString("name", &name, &namelen);
          GOTO_IF_ERR(err, earlyexit);

          uint64_t device_id;
          err = shm_region_json.MemberAsUInt("device_id", &device_id);
          GOTO_IF_ERR(err, earlyexit);

          uint64_t byte_size;
          err = shm_region_json.MemberAsUInt("byte_size", &byte_size);
          GOTO_IF_ERR(err, earlyexit);


          inference::CudaSharedMemoryStatusResponse::RegionStatus region_status;
          region_status.set_name(std::string(name, namelen));
          region_status.set_device_id(device_id);
          region_status.set_byte_size(byte_size);

          (*response->mutable_regions())[name] = region_status;
        }
      earlyexit:
        GrpcStatusUtil::Create(status, err);
        TRITONSERVER_ErrorDelete(err);
      };

  const std::pair<std::string, std::string>& restricted_kv =
      restricted_keys_.Get(RestrictedCategory::SHARED_MEMORY);
  new CommonCallData<
      ::grpc::ServerAsyncResponseWriter<
          inference::CudaSharedMemoryStatusResponse>,
      inference::CudaSharedMemoryStatusRequest,
      inference::CudaSharedMemoryStatusResponse>(
      "CudaSharedMemoryStatus", 0, OnRegisterCudaSharedMemoryStatus,
      OnExecuteCudaSharedMemoryStatus, false /* async */, cq_, restricted_kv,
      response_delay_);
}

void
CommonHandler::RegisterCudaSharedMemoryRegister()
{
  auto OnRegisterCudaSharedMemoryRegister =
      [this](
          ::grpc::ServerContext* ctx,
          inference::CudaSharedMemoryRegisterRequest* request,
          ::grpc::ServerAsyncResponseWriter<
              inference::CudaSharedMemoryRegisterResponse>* responder,
          void* tag) {
        this->service_->RequestCudaSharedMemoryRegister(
            ctx, request, responder, this->cq_, this->cq_, tag);
      };

  auto OnExecuteCudaSharedMemoryRegister =
      [this](
          inference::CudaSharedMemoryRegisterRequest& request,
          inference::CudaSharedMemoryRegisterResponse* response,
          ::grpc::Status* status) {
        TRITONSERVER_Error* err = nullptr;
#ifdef TRITON_ENABLE_GPU
        err = shm_manager_->RegisterCUDASharedMemory(
            request.name(),
            reinterpret_cast<const cudaIpcMemHandle_t*>(
                request.raw_handle().c_str()),
            request.byte_size(), request.device_id());
#else
        err = TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            std::string(
                "failed to register CUDA shared memory region: '" +
                request.name() + "', GPUs not supported")
                .c_str());
#endif  // TRITON_ENABLE_GPU

        GrpcStatusUtil::Create(status, err);
        TRITONSERVER_ErrorDelete(err);
      };

  const std::pair<std::string, std::string>& restricted_kv =
      restricted_keys_.Get(RestrictedCategory::SHARED_MEMORY);
  new CommonCallData<
      ::grpc::ServerAsyncResponseWriter<
          inference::CudaSharedMemoryRegisterResponse>,
      inference::CudaSharedMemoryRegisterRequest,
      inference::CudaSharedMemoryRegisterResponse>(
      "CudaSharedMemoryRegister", 0, OnRegisterCudaSharedMemoryRegister,
      OnExecuteCudaSharedMemoryRegister, false /* async */, cq_, restricted_kv,
      response_delay_);
}

void
CommonHandler::RegisterCudaSharedMemoryUnregister()
{
  auto OnRegisterCudaSharedMemoryUnregister =
      [this](
          ::grpc::ServerContext* ctx,
          inference::CudaSharedMemoryUnregisterRequest* request,
          ::grpc::ServerAsyncResponseWriter<
              inference::CudaSharedMemoryUnregisterResponse>* responder,
          void* tag) {
        this->service_->RequestCudaSharedMemoryUnregister(
            ctx, request, responder, this->cq_, this->cq_, tag);
      };

  auto OnExecuteCudaSharedMemoryUnregister =
      [this](
          inference::CudaSharedMemoryUnregisterRequest& request,
          inference::CudaSharedMemoryUnregisterResponse* response,
          ::grpc::Status* status) {
        TRITONSERVER_Error* err = nullptr;
        if (request.name().empty()) {
          err = shm_manager_->UnregisterAll(TRITONSERVER_MEMORY_GPU);
        } else {
          err =
              shm_manager_->Unregister(request.name(), TRITONSERVER_MEMORY_GPU);
        }

        GrpcStatusUtil::Create(status, err);
        TRITONSERVER_ErrorDelete(err);
      };
  const std::pair<std::string, std::string>& restricted_kv =
      restricted_keys_.Get(RestrictedCategory::SHARED_MEMORY);

  new CommonCallData<
      ::grpc::ServerAsyncResponseWriter<
          inference::CudaSharedMemoryUnregisterResponse>,
      inference::CudaSharedMemoryUnregisterRequest,
      inference::CudaSharedMemoryUnregisterResponse>(
      "CudaSharedMemoryUnregister", 0, OnRegisterCudaSharedMemoryUnregister,
      OnExecuteCudaSharedMemoryUnregister, false /* async */, cq_,
      restricted_kv, response_delay_);
}

void
CommonHandler::RegisterRepositoryIndex()
{
  auto OnRegisterRepositoryIndex =
      [this](
          ::grpc::ServerContext* ctx,
          inference::RepositoryIndexRequest* request,
          ::grpc::ServerAsyncResponseWriter<inference::RepositoryIndexResponse>*
              responder,
          void* tag) {
        this->service_->RequestRepositoryIndex(
            ctx, request, responder, this->cq_, this->cq_, tag);
      };

  auto OnExecuteRepositoryIndex =
      [this](
          inference::RepositoryIndexRequest& request,
          inference::RepositoryIndexResponse* response,
          ::grpc::Status* status) {
        TRITONSERVER_Error* err = nullptr;
        if (request.repository_name().empty()) {
          uint32_t flags = 0;
          if (request.ready()) {
            flags |= TRITONSERVER_INDEX_FLAG_READY;
          }

          TRITONSERVER_Message* model_index_message = nullptr;
          err = TRITONSERVER_ServerModelIndex(
              tritonserver_.get(), flags, &model_index_message);
          GOTO_IF_ERR(err, earlyexit);

          const char* buffer;
          size_t byte_size;
          err = TRITONSERVER_MessageSerializeToJson(
              model_index_message, &buffer, &byte_size);
          GOTO_IF_ERR(err, earlyexit);

          triton::common::TritonJson::Value model_index_json;
          err = model_index_json.Parse(buffer, byte_size);
          GOTO_IF_ERR(err, earlyexit);

          err = model_index_json.AssertType(
              triton::common::TritonJson::ValueType::ARRAY);
          GOTO_IF_ERR(err, earlyexit);

          for (size_t idx = 0; idx < model_index_json.ArraySize(); ++idx) {
            triton::common::TritonJson::Value index_json;
            err = model_index_json.IndexAsObject(idx, &index_json);
            GOTO_IF_ERR(err, earlyexit);

            auto model_index = response->add_models();

            const char* name;
            size_t namelen;
            err = index_json.MemberAsString("name", &name, &namelen);
            GOTO_IF_ERR(err, earlyexit);
            model_index->set_name(std::string(name, namelen));

            if (index_json.Find("version")) {
              const char* version;
              size_t versionlen;
              err = index_json.MemberAsString("version", &version, &versionlen);
              GOTO_IF_ERR(err, earlyexit);
              model_index->set_version(std::string(version, versionlen));
            }
            if (index_json.Find("state")) {
              const char* state;
              size_t statelen;
              err = index_json.MemberAsString("state", &state, &statelen);
              GOTO_IF_ERR(err, earlyexit);
              model_index->set_state(std::string(state, statelen));
            }
            if (index_json.Find("reason")) {
              const char* reason;
              size_t reasonlen;
              err = index_json.MemberAsString("reason", &reason, &reasonlen);
              GOTO_IF_ERR(err, earlyexit);
              model_index->set_reason(std::string(reason, reasonlen));
            }
          }

          TRITONSERVER_MessageDelete(model_index_message);
        } else {
          err = TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_UNSUPPORTED,
              "'repository_name' specification is not supported");
        }

      earlyexit:
        GrpcStatusUtil::Create(status, err);
        TRITONSERVER_ErrorDelete(err);
      };

  const std::pair<std::string, std::string>& restricted_kv =
      restricted_keys_.Get(RestrictedCategory::MODEL_REPOSITORY);
  new CommonCallData<
      ::grpc::ServerAsyncResponseWriter<inference::RepositoryIndexResponse>,
      inference::RepositoryIndexRequest, inference::RepositoryIndexResponse>(
      "RepositoryIndex", 0, OnRegisterRepositoryIndex, OnExecuteRepositoryIndex,
      false /* async */, cq_, restricted_kv, response_delay_);
}

void
CommonHandler::RegisterRepositoryModelLoad()
{
  auto OnRegisterRepositoryModelLoad =
      [this](
          ::grpc::ServerContext* ctx,
          inference::RepositoryModelLoadRequest* request,
          ::grpc::ServerAsyncResponseWriter<
              inference::RepositoryModelLoadResponse>* responder,
          void* tag) {
        this->service_->RequestRepositoryModelLoad(
            ctx, request, responder, this->cq_, this->cq_, tag);
      };

  auto OnExecuteRepositoryModelLoad =
      [this](
          inference::RepositoryModelLoadRequest& request,
          inference::RepositoryModelLoadResponse* response,
          ::grpc::Status* status) {
        TRITONSERVER_Error* err = nullptr;
        if (request.repository_name().empty()) {
          std::vector<TRITONSERVER_Parameter*> params;
          // WAR for the const-ness check
          std::vector<const TRITONSERVER_Parameter*> const_params;
          for (const auto& param_proto : request.parameters()) {
            if (param_proto.first == "config") {
              if (param_proto.second.parameter_choice_case() !=
                  inference::ModelRepositoryParameter::ParameterChoiceCase::
                      kStringParam) {
                err = TRITONSERVER_ErrorNew(
                    TRITONSERVER_ERROR_INVALID_ARG,
                    (std::string("invalid value type for load parameter '") +
                     param_proto.first + "', expected string_param.")
                        .c_str());
                break;
              } else {
                auto param = TRITONSERVER_ParameterNew(
                    param_proto.first.c_str(), TRITONSERVER_PARAMETER_STRING,
                    param_proto.second.string_param().c_str());
                if (param != nullptr) {
                  params.emplace_back(param);
                  const_params.emplace_back(param);
                } else {
                  err = TRITONSERVER_ErrorNew(
                      TRITONSERVER_ERROR_INTERNAL,
                      "unexpected error on creating Triton parameter");
                  break;
                }
              }
            } else if (param_proto.first.rfind("file:", 0) == 0) {
              if (param_proto.second.parameter_choice_case() !=
                  inference::ModelRepositoryParameter::ParameterChoiceCase::
                      kBytesParam) {
                err = TRITONSERVER_ErrorNew(
                    TRITONSERVER_ERROR_INVALID_ARG,
                    (std::string("invalid value type for load parameter '") +
                     param_proto.first + "', expected bytes_param.")
                        .c_str());
                break;
              } else {
                auto param = TRITONSERVER_ParameterBytesNew(
                    param_proto.first.c_str(),
                    param_proto.second.bytes_param().data(),
                    param_proto.second.bytes_param().length());
                if (param != nullptr) {
                  params.emplace_back(param);
                  const_params.emplace_back(param);
                } else {
                  err = TRITONSERVER_ErrorNew(
                      TRITONSERVER_ERROR_INTERNAL,
                      "unexpected error on creating Triton parameter");
                  break;
                }
              }
            } else {
              err = TRITONSERVER_ErrorNew(
                  TRITONSERVER_ERROR_INVALID_ARG,
                  (std::string("unrecognized load parameter '") +
                   param_proto.first + "'.")
                      .c_str());
              break;
            }
          }
          if (err == nullptr) {
            err = TRITONSERVER_ServerLoadModelWithParameters(
                tritonserver_.get(), request.model_name().c_str(),
                const_params.data(), const_params.size());
          }
          // Assumes no further 'params' access after load API returns
          for (auto& param : params) {
            TRITONSERVER_ParameterDelete(param);
          }
        } else {
          err = TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_UNSUPPORTED,
              "'repository_name' specification is not supported");
        }

        GrpcStatusUtil::Create(status, err);
        TRITONSERVER_ErrorDelete(err);
      };

  const std::pair<std::string, std::string>& restricted_kv =
      restricted_keys_.Get(RestrictedCategory::MODEL_REPOSITORY);
  new CommonCallData<
      ::grpc::ServerAsyncResponseWriter<inference::RepositoryModelLoadResponse>,
      inference::RepositoryModelLoadRequest,
      inference::RepositoryModelLoadResponse>(
      "RepositoryModelLoad", 0, OnRegisterRepositoryModelLoad,
      OnExecuteRepositoryModelLoad, true /* async */, cq_, restricted_kv,
      response_delay_);
}

void
CommonHandler::RegisterRepositoryModelUnload()
{
  auto OnRegisterRepositoryModelUnload =
      [this](
          ::grpc::ServerContext* ctx,
          inference::RepositoryModelUnloadRequest* request,
          ::grpc::ServerAsyncResponseWriter<
              inference::RepositoryModelUnloadResponse>* responder,
          void* tag) {
        this->service_->RequestRepositoryModelUnload(
            ctx, request, responder, this->cq_, this->cq_, tag);
      };

  auto OnExecuteRepositoryModelUnload =
      [this](
          inference::RepositoryModelUnloadRequest& request,
          inference::RepositoryModelUnloadResponse* response,
          ::grpc::Status* status) {
        TRITONSERVER_Error* err = nullptr;
        if (request.repository_name().empty()) {
          // Check if the dependent models should be removed
          bool unload_dependents = false;
          for (auto param : request.parameters()) {
            if (param.first.compare("unload_dependents") == 0) {
              const auto& unload_param = param.second;
              if (unload_param.parameter_choice_case() !=
                  inference::ModelRepositoryParameter::ParameterChoiceCase::
                      kBoolParam) {
                err = TRITONSERVER_ErrorNew(
                    TRITONSERVER_ERROR_INVALID_ARG,
                    "invalid value type for 'unload_dependents' parameter, "
                    "expected "
                    "bool_param.");
              }
              unload_dependents = unload_param.bool_param();
              break;
            }
          }
          if (err == nullptr) {
            if (unload_dependents) {
              err = TRITONSERVER_ServerUnloadModelAndDependents(
                  tritonserver_.get(), request.model_name().c_str());
            } else {
              err = TRITONSERVER_ServerUnloadModel(
                  tritonserver_.get(), request.model_name().c_str());
            }
          }
        } else {
          err = TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_UNSUPPORTED,
              "'repository_name' specification is not supported");
        }

        GrpcStatusUtil::Create(status, err);
        TRITONSERVER_ErrorDelete(err);
      };

  const std::pair<std::string, std::string>& restricted_kv =
      restricted_keys_.Get(RestrictedCategory::MODEL_REPOSITORY);
  new CommonCallData<
      ::grpc::ServerAsyncResponseWriter<
          inference::RepositoryModelUnloadResponse>,
      inference::RepositoryModelUnloadRequest,
      inference::RepositoryModelUnloadResponse>(
      "RepositoryModelUnload", 0, OnRegisterRepositoryModelUnload,
      OnExecuteRepositoryModelUnload, true /* async */, cq_, restricted_kv,
      response_delay_);
}

}  // namespace

//
// Server
//
Server::Server(
    const std::shared_ptr<TRITONSERVER_Server>& tritonserver,
    triton::server::TraceManager* trace_manager,
    const std::shared_ptr<SharedMemoryManager>& shm_manager,
    const Options& options)
    : tritonserver_(tritonserver), trace_manager_(trace_manager),
      shm_manager_(shm_manager), server_addr_(
                                     options.socket_.address_ + ":" +
                                     std::to_string(options.socket_.port_))
{
  std::shared_ptr<::grpc::ServerCredentials> credentials;
  const auto& ssl_options = options.ssl_;
  if (ssl_options.use_ssl_) {
    std::string key;
    std::string cert;
    std::string root;
    ReadFile(ssl_options.server_cert_, cert);
    ReadFile(ssl_options.server_key_, key);
    ReadFile(ssl_options.root_cert_, root);
    ::grpc::SslServerCredentialsOptions::PemKeyCertPair keycert = {key, cert};
    ::grpc::SslServerCredentialsOptions sslOpts;
    sslOpts.pem_root_certs = root;
    sslOpts.pem_key_cert_pairs.push_back(keycert);
    if (ssl_options.use_mutual_auth_) {
      sslOpts.client_certificate_request =
          GRPC_SSL_REQUEST_AND_REQUIRE_CLIENT_CERTIFICATE_AND_VERIFY;
    }
    credentials = ::grpc::SslServerCredentials(sslOpts);
  } else {
    credentials = ::grpc::InsecureServerCredentials();
  }

  builder_.AddListeningPort(server_addr_, credentials, &bound_port_);
  builder_.SetMaxMessageSize(MAX_GRPC_MESSAGE_SIZE);
  builder_.RegisterService(&service_);
  builder_.RegisterService(&health_service_);
  builder_.AddChannelArgument(
      GRPC_ARG_ALLOW_REUSEPORT, options.socket_.reuse_port_);

  {
    // GRPC KeepAlive Docs:
    // https://grpc.github.io/grpc/cpp/md_doc_keepalive.html NOTE: In order to
    // work properly, the client-side settings should be in agreement with
    // server-side settings.
    const auto& keepalive_options = options.keep_alive_;
    builder_.AddChannelArgument(
        GRPC_ARG_KEEPALIVE_TIME_MS, keepalive_options.keepalive_time_ms_);
    builder_.AddChannelArgument(
        GRPC_ARG_KEEPALIVE_TIMEOUT_MS, keepalive_options.keepalive_timeout_ms_);
    builder_.AddChannelArgument(
        GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS,
        keepalive_options.keepalive_permit_without_calls_);
    builder_.AddChannelArgument(
        GRPC_ARG_HTTP2_MAX_PINGS_WITHOUT_DATA,
        keepalive_options.http2_max_pings_without_data_);
    builder_.AddChannelArgument(
        GRPC_ARG_HTTP2_MIN_RECV_PING_INTERVAL_WITHOUT_DATA_MS,
        keepalive_options.http2_min_recv_ping_interval_without_data_ms_);
    builder_.AddChannelArgument(
        GRPC_ARG_HTTP2_MAX_PING_STRIKES,
        keepalive_options.http2_max_ping_strikes_);
    if (keepalive_options.max_connection_age_ms_ != 0) {
      builder_.AddChannelArgument(
          GRPC_ARG_MAX_CONNECTION_AGE_MS,
          keepalive_options.max_connection_age_ms_);
    }
    if (keepalive_options.max_connection_age_grace_ms_ != 0) {
      builder_.AddChannelArgument(
          GRPC_ARG_MAX_CONNECTION_AGE_GRACE_MS,
          keepalive_options.max_connection_age_grace_ms_);
    }

    std::vector<std::string> headers{"GRPC KeepAlive Option", "Value"};
    triton::common::TablePrinter table_printer(headers);
    std::vector<std::string> row{
        "keepalive_time_ms",
        std::to_string(keepalive_options.keepalive_time_ms_)};
    table_printer.InsertRow(row);

    row = {
        "keepalive_timeout_ms",
        std::to_string(keepalive_options.keepalive_timeout_ms_)};
    table_printer.InsertRow(row);

    row = {
        "keepalive_permit_without_calls",
        std::to_string(keepalive_options.keepalive_permit_without_calls_)};
    table_printer.InsertRow(row);

    row = {
        "http2_max_pings_without_data",
        std::to_string(keepalive_options.http2_max_pings_without_data_)};
    table_printer.InsertRow(row);

    row = {
        "http2_min_recv_ping_interval_without_data_ms",
        std::to_string(
            keepalive_options.http2_min_recv_ping_interval_without_data_ms_)};
    table_printer.InsertRow(row);

    row = {
        "http2_max_ping_strikes",
        std::to_string(keepalive_options.http2_max_ping_strikes_)};
    table_printer.InsertRow(row);

    if (keepalive_options.max_connection_age_ms_ != 0) {
      row = {
          "max_connection_age_ms",
          std::to_string(keepalive_options.max_connection_age_ms_)};
      table_printer.InsertRow(row);
    }

    if (keepalive_options.max_connection_age_grace_ms_ != 0) {
      row = {
          "max_connection_age_grace_ms",
          std::to_string(keepalive_options.max_connection_age_grace_ms_)};
      table_printer.InsertRow(row);
    }
    LOG_TABLE_VERBOSE(1, table_printer);
  }

  common_cq_ = builder_.AddCompletionQueue();
  model_infer_cq_ = builder_.AddCompletionQueue();
  model_stream_infer_cq_ = builder_.AddCompletionQueue();

  // For testing purposes only, add artificial delay in grpc responses.
  const char* dstr = getenv("TRITONSERVER_SERVER_DELAY_GRPC_RESPONSE_SEC");
  uint64_t response_delay = 0;
  if (dstr != nullptr) {
    response_delay = atoi(dstr);
  }
  // A common Handler for other non-inference requests
  common_handler_.reset(new CommonHandler(
      "CommonHandler", tritonserver_, shm_manager_, trace_manager_, &service_,
      &health_service_, common_cq_.get(), options.restricted_protocols_,
      response_delay));

  // [FIXME] "register" logic is different for infer
  // Handler for model inference requests.
  std::pair<std::string, std::string> restricted_kv =
      options.restricted_protocols_.Get(RestrictedCategory::INFERENCE);
  for (int i = 0; i < options.infer_thread_count_; ++i) {
    model_infer_handlers_.emplace_back(new ModelInferHandler(
        "ModelInferHandler", tritonserver_, trace_manager_, shm_manager_,
        &service_, model_infer_cq_.get(),
        options.infer_allocation_pool_size_ /* max_state_bucket_count */,
        options.max_response_pool_size_, options.infer_compression_level_,
        restricted_kv, options.forward_header_pattern_, &conn_mtx_, &conn_cnt_,
        &accepting_new_conn_));
  }

  // Handler for streaming inference requests. Keeps one handler for streaming
  // to avoid possible concurrent writes which is not allowed
  model_stream_infer_handlers_.emplace_back(new ModelStreamInferHandler(
      "ModelStreamInferHandler", tritonserver_, trace_manager_, shm_manager_,
      &service_, model_stream_infer_cq_.get(),
      options.infer_allocation_pool_size_ /* max_state_bucket_count */,
      options.max_response_pool_size_, options.infer_compression_level_,
      restricted_kv, options.forward_header_pattern_, &conn_mtx_, &conn_cnt_,
      &accepting_new_conn_));
}

Server::~Server()
{
  IGNORE_ERR(Stop());
}

TRITONSERVER_Error*
Server::Create(
    const std::shared_ptr<TRITONSERVER_Server>& tritonserver,
    triton::server::TraceManager* trace_manager,
    const std::shared_ptr<SharedMemoryManager>& shm_manager,
    const Options& server_options, std::unique_ptr<Server>* server)
{
  const std::string addr = server_options.socket_.address_ + ":" +
                           std::to_string(server_options.socket_.port_);
  try {
    server->reset(
        new Server(tritonserver, trace_manager, shm_manager, server_options));
  }
  catch (const std::invalid_argument& pe) {
    return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INVALID_ARG, pe.what());
    ;
  }

  return nullptr;  // success
}

TRITONSERVER_Error*
Server::Create(
    std::shared_ptr<TRITONSERVER_Server>& server, UnorderedMapType& options,
    triton::server::TraceManager* trace_manager,
    const std::shared_ptr<SharedMemoryManager>& shm_manager,
    const RestrictedFeatures& restricted_features,
    std::unique_ptr<Server>* service)
{
  Options grpc_options;

  RETURN_IF_ERR(GetOptions(grpc_options, options));

  return Create(server, trace_manager, shm_manager, grpc_options, service);
}

TRITONSERVER_Error*
Server::GetOptions(Options& options, UnorderedMapType& options_map)
{
  SocketOptions socket_selection;
  SslOptions ssl_selection;
  KeepAliveOptions keep_alive_selection;

  RETURN_IF_ERR(GetSocketOptions(options.socket_, options_map));
  RETURN_IF_ERR(GetSslOptions(options.ssl_, options_map));
  RETURN_IF_ERR(GetKeepAliveOptions(options.keep_alive_, options_map));

  int infer_compression_level_key;

  RETURN_IF_ERR(GetValue(
      options_map, "infer_compression_level", &infer_compression_level_key));

  options.infer_compression_level_ =
      static_cast<grpc_compression_level>(infer_compression_level_key);

  RETURN_IF_ERR(GetValue(
      options_map, "infer_thread_count", &options.infer_thread_count_));
  RETURN_IF_ERR(GetValue(
      options_map, "infer_allocation_pool_size",
      &options.infer_allocation_pool_size_));
  RETURN_IF_ERR(GetValue(
      options_map, "max_response_pool_size", &options.max_response_pool_size_));
  RETURN_IF_ERR(GetValue(
      options_map, "forward_header_pattern", &options.forward_header_pattern_));

  return nullptr;
}

TRITONSERVER_Error*
Server::GetSocketOptions(SocketOptions& options, UnorderedMapType& options_map)
{
  RETURN_IF_ERR(GetValue(options_map, "address", &options.address_));
  RETURN_IF_ERR(GetValue(options_map, "port", &options.port_));
  RETURN_IF_ERR(GetValue(options_map, "reuse_port", &options.reuse_port_));

  return nullptr;
}

TRITONSERVER_Error*
Server::GetSslOptions(SslOptions& options, UnorderedMapType& options_map)
{
  RETURN_IF_ERR(GetValue(options_map, "use_ssl", &options.use_ssl_));
  RETURN_IF_ERR(GetValue(options_map, "server_cert", &options.server_cert_));
  RETURN_IF_ERR(GetValue(options_map, "server_key", &options.server_key_));
  RETURN_IF_ERR(GetValue(options_map, "root_cert", &options.root_cert_));
  RETURN_IF_ERR(
      GetValue(options_map, "use_mutual_auth", &options.use_mutual_auth_));

  return nullptr;
}

TRITONSERVER_Error*
Server::GetKeepAliveOptions(
    KeepAliveOptions& options, UnorderedMapType& options_map)
{
  RETURN_IF_ERR(
      GetValue(options_map, "keepalive_time_ms", &options.keepalive_time_ms_));
  RETURN_IF_ERR(GetValue(
      options_map, "keepalive_timeout_ms", &options.keepalive_timeout_ms_));
  RETURN_IF_ERR(GetValue(
      options_map, "keepalive_permit_without_calls",
      &options.keepalive_permit_without_calls_));
  RETURN_IF_ERR(GetValue(
      options_map, "http2_max_pings_without_data",
      &options.http2_max_pings_without_data_));
  RETURN_IF_ERR(GetValue(
      options_map, "http2_min_recv_ping_interval_without_data_ms",
      &options.http2_min_recv_ping_interval_without_data_ms_));
  RETURN_IF_ERR(GetValue(
      options_map, "http2_max_ping_strikes", &options.http2_max_ping_strikes_));
  RETURN_IF_ERR(GetValue(
      options_map, "max_connection_age_ms", &options.max_connection_age_ms_));
  RETURN_IF_ERR(GetValue(
      options_map, "max_connection_age_grace_ms",
      &options.max_connection_age_grace_ms_));

  return nullptr;
}


TRITONSERVER_Error*
Server::Start()
{
  if (running_) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_ALREADY_EXISTS, "GRPC server is already running.");
  }

  server_ = builder_.BuildAndStart();
  // Check if binding port failed
  if (bound_port_ == 0) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_UNAVAILABLE,
        (std::string("Socket '") + server_addr_ + "' already in use ").c_str());
  }

  common_handler_->Start();
  for (auto& model_infer_handler : model_infer_handlers_) {
    model_infer_handler->Start();
  }
  for (auto& model_stream_infer_handler : model_stream_infer_handlers_) {
    model_stream_infer_handler->Start();
  }

  running_ = true;
  LOG_INFO << "Started GRPCInferenceService at " << server_addr_;
  return nullptr;  // success
}

TRITONSERVER_Error*
Server::GracefulStop(
    uint32_t* exit_timeout_secs, const std::string& service_name)
{
  if (!running_) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_UNAVAILABLE, "GRPC server is not running.");
  }

  graceful_shutdown_thread_ = std::thread([this]() {
    // Stop accepting new RPC requests. Existing requests are allowed to
    // complete
    server_->Shutdown();
  });

  // Required to disable additional requests on existing streaming connections
  DisableNewConnections();

  if (exit_timeout_secs != nullptr) {
    WaitForConnectionsToClose(exit_timeout_secs, service_name);
  }

  return nullptr;  // success
}

TRITONSERVER_Error*
Server::Stop()
{
  if (!running_) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_UNAVAILABLE, "GRPC server is not running.");
  }

  // Forcefully cancel remaining RPC connections
  server_->Shutdown(std::chrono::system_clock::now());

  if (graceful_shutdown_thread_.joinable()) {
    graceful_shutdown_thread_.join();
  }

  // Shutdown completion queues
  common_cq_->Shutdown();
  model_infer_cq_->Shutdown();
  model_stream_infer_cq_->Shutdown();

  // Must stop all handlers explicitly to wait for all the handler
  // threads to join since they are referencing completion queue, etc.
  common_handler_->Stop();
  for (auto& model_infer_handler : model_infer_handlers_) {
    model_infer_handler->Stop();
  }
  for (auto& model_stream_infer_handler : model_stream_infer_handlers_) {
    model_stream_infer_handler->Stop();
  }

  running_ = false;
  return nullptr;  // success
}

TRITONSERVER_Error*
Server::DisableNewConnections()
{
  std::unique_lock<std::shared_mutex> lock(conn_mtx_);

  accepting_new_conn_ = false;

  return nullptr;  // success
}

TRITONSERVER_Error*
Server::WaitForConnectionsToClose(
    uint32_t* exit_timeout_secs, const std::string& service_name)
{
  while (*exit_timeout_secs > 0 && conn_cnt_ > 0) {
    LOG_INFO << "Timeout " << *exit_timeout_secs << ": Found " << conn_cnt_
             << " " << service_name
             << " service connections and inference handlers";
    std::this_thread::sleep_for(std::chrono::seconds(1));
    (*exit_timeout_secs)--;
  }

  return nullptr;  // complete
}

}}}  // namespace triton::server::grpc


================================================
FILE: src/grpc/grpc_server.h
================================================
// Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once

#include <grpc++/grpc++.h>

#include <shared_mutex>
#include <vector>

#include "../common.h"
#include "../restricted_features.h"
#include "../shared_memory_manager.h"
#include "../tracer.h"
#include "grpc_handler.h"
#include "grpc_service.grpc.pb.h"
#include "grpc_utils.h"
#include "health.grpc.pb.h"
#include "infer_handler.h"
#include "stream_infer_handler.h"
#include "triton/core/tritonserver.h"

namespace triton { namespace server { namespace grpc {

// GRPC uses HTTP2 which requires header to be in lowercase, so the Triton
// specific header that may be set for GRPC is defined to be all lowercases
constexpr char kRestrictedProtocolHeaderTemplate[] = "triton-grpc-protocol-";

struct SocketOptions {
  std::string address_{"0.0.0.0"};
  int32_t port_{8001};
  bool reuse_port_{false};
};

struct SslOptions {
  // Whether SSL is used for communication
  bool use_ssl_{false};
  // File holding PEM-encoded server certificate
  std::string server_cert_{""};
  // File holding PEM-encoded server key
  std::string server_key_{""};
  // File holding PEM-encoded root certificate
  std::string root_cert_{""};
  // Whether to use Mutual Authentication
  bool use_mutual_auth_{false};
};

// GRPC KeepAlive: https://grpc.github.io/grpc/cpp/md_doc_keepalive.html
// https://grpc.io/docs/guides/keepalive/
struct KeepAliveOptions {
  int keepalive_time_ms_{7200000};
  int keepalive_timeout_ms_{20000};
  bool keepalive_permit_without_calls_{false};
  int http2_max_pings_without_data_{2};
  int http2_min_recv_ping_interval_without_data_ms_{300000};
  int http2_max_ping_strikes_{2};
  int max_connection_age_ms_{0};
  int max_connection_age_grace_ms_{0};
};

struct Options {
  SocketOptions socket_;
  SslOptions ssl_;
  KeepAliveOptions keep_alive_;
  grpc_compression_level infer_compression_level_{GRPC_COMPRESS_LEVEL_NONE};
  // The number of gRPC inference handler threads. Useful for
  // throughput tuning of models that are request handling bounded.
  int infer_thread_count_{2};
  // The maximum number of inference request/response objects that
  // remain allocated for reuse. As long as the number of in-flight
  // requests doesn't exceed this value there will be no
  // allocation/deallocation of request/response objects.
  int infer_allocation_pool_size_{8};
  int max_response_pool_size_{INT_MAX};
  RestrictedFeatures restricted_protocols_;
  std::string forward_header_pattern_;
};

class Server {
 public:
  static TRITONSERVER_Error* Create(
      const std::shared_ptr<TRITONSERVER_Server>& tritonserver,
      triton::server::TraceManager* trace_manager,
      const std::shared_ptr<SharedMemoryManager>& shm_manager,
      const Options& server_options, std::unique_ptr<Server>* server);

  static TRITONSERVER_Error* Create(
      std::shared_ptr<TRITONSERVER_Server>& server, UnorderedMapType& options,
      triton::server::TraceManager* trace_manager,
      const std::shared_ptr<SharedMemoryManager>& shm_manager,
      const RestrictedFeatures& restricted_features,
      std::unique_ptr<Server>* service);

  ~Server();

  TRITONSERVER_Error* Start();
  TRITONSERVER_Error* GracefulStop(
      uint32_t* exit_timeout_secs = nullptr,
      const std::string& service_name = "gRPC");
  TRITONSERVER_Error* Stop();
  TRITONSERVER_Error* DisableNewConnections();
  TRITONSERVER_Error* WaitForConnectionsToClose(
      uint32_t* exit_timeout_secs, const std::string& service_name);

 private:
  Server(
      const std::shared_ptr<TRITONSERVER_Server>& tritonserver,
      triton::server::TraceManager* trace_manager,
      const std::shared_ptr<SharedMemoryManager>& shm_manager,
      const Options& server_options);

  static TRITONSERVER_Error* GetSocketOptions(
      SocketOptions& options, UnorderedMapType& options_map);
  static TRITONSERVER_Error* GetSslOptions(
      SslOptions& options, UnorderedMapType& options_map);
  static TRITONSERVER_Error* GetKeepAliveOptions(
      KeepAliveOptions& options, UnorderedMapType& options_map);

  static TRITONSERVER_Error* GetOptions(
      Options& options, UnorderedMapType& options_map);

  std::shared_ptr<TRITONSERVER_Server> tritonserver_;
  TraceManager* trace_manager_;
  std::shared_ptr<SharedMemoryManager> shm_manager_;
  const std::string server_addr_;

  ::grpc::ServerBuilder builder_;

  inference::GRPCInferenceService::AsyncService service_;
  ::grpc::health::v1::Health::AsyncService health_service_;

  std::unique_ptr<::grpc::Server> server_;

  std::unique_ptr<::grpc::ServerCompletionQueue> common_cq_;
  std::unique_ptr<::grpc::ServerCompletionQueue> model_infer_cq_;
  std::unique_ptr<::grpc::ServerCompletionQueue> model_stream_infer_cq_;

  std::unique_ptr<HandlerBase> common_handler_;
  std::vector<std::unique_ptr<HandlerBase>> model_infer_handlers_;
  std::vector<std::unique_ptr<HandlerBase>> model_stream_infer_handlers_;

  int bound_port_{0};
  bool running_{false};

  // Thread to handle the execution of the gRPC endpoint's graceful shutdown
  std::thread graceful_shutdown_thread_;
  // Mutex to protect access to the following connection variables
  std::shared_mutex conn_mtx_;
  // Counter to track the number of active connections and inference handlers
  std::atomic<uint32_t> conn_cnt_{0};
  // Flag to indicate if the server is currently accepting new connections
  bool accepting_new_conn_{true};
};

}}}  // namespace triton::server::grpc


================================================
FILE: src/grpc/grpc_utils.cc
================================================
// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "grpc_utils.h"

namespace triton { namespace server { namespace grpc {

std::ostream&
operator<<(std::ostream& out, const Steps& step)
{
  switch (step) {
    case START:
      out << "START";
      break;
    case COMPLETE:
      out << "COMPLETE";
      break;
    case FINISH:
      out << "FINISH";
      break;
    case ISSUED:
      out << "ISSUED";
      break;
    case READ:
      out << "READ";
      break;
    case WRITEREADY:
      out << "WRITEREADY";
      break;
    case WRITTEN:
      out << "WRITTEN";
      break;
    case WAITING_NOTIFICATION:
      out << "WAITING_NOTIFICATION";
      break;
    case CANCELLATION_ISSUED:
      out << "CANCELLATION_ISSUED";
      break;
    case CANCELLED:
      out << "CANCELLED";
      break;
    case PARTIAL_COMPLETION:
      out << "PARTIAL_COMPLETION";
      break;
  }

  return out;
}

void
GrpcStatusUtil::Create(::grpc::Status* status, TRITONSERVER_Error* err)
{
  if (err == nullptr) {
    *status = ::grpc::Status::OK;
  } else {
    *status = ::grpc::Status(
        GrpcStatusUtil::CodeToStatus(TRITONSERVER_ErrorCode(err)),
        TRITONSERVER_ErrorMessage(err));
  }
}

::grpc::StatusCode
GrpcStatusUtil::CodeToStatus(TRITONSERVER_Error_Code code)
{
  // GRPC status codes:
  // https://github.com/grpc/grpc/blob/master/include/grpc/impl/codegen/status.h
  switch (code) {
    case TRITONSERVER_ERROR_UNKNOWN:
      return ::grpc::StatusCode::UNKNOWN;
    case TRITONSERVER_ERROR_INTERNAL:
      return ::grpc::StatusCode::INTERNAL;
    case TRITONSERVER_ERROR_NOT_FOUND:
      return ::grpc::StatusCode::NOT_FOUND;
    case TRITONSERVER_ERROR_INVALID_ARG:
      return ::grpc::StatusCode::INVALID_ARGUMENT;
    case TRITONSERVER_ERROR_UNAVAILABLE:
      return ::grpc::StatusCode::UNAVAILABLE;
    case TRITONSERVER_ERROR_UNSUPPORTED:
      return ::grpc::StatusCode::UNIMPLEMENTED;
    case TRITONSERVER_ERROR_ALREADY_EXISTS:
      return ::grpc::StatusCode::ALREADY_EXISTS;
    case TRITONSERVER_ERROR_CANCELLED:
      return ::grpc::StatusCode::CANCELLED;
  }

  return ::grpc::StatusCode::UNKNOWN;
}

TRITONSERVER_Error*
ParseClassificationParams(
    const inference::ModelInferRequest::InferRequestedOutputTensor& output,
    bool* has_classification, uint32_t* classification_count)
{
  *has_classification = false;

  const auto& class_it = output.parameters().find("classification");
  if (class_it != output.parameters().end()) {
    *has_classification = true;

    const auto& param = class_it->second;
    if (param.parameter_choice_case() !=
        inference::InferParameter::ParameterChoiceCase::kInt64Param) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          "invalid value type for 'classification' parameter, expected "
          "int64_param");
    }

    const int64_t cnt = param.int64_param();
    if (cnt <= 0) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          "invalid value for 'classification' parameter, expected >= 0");
    }

    *classification_count = cnt;
  }

  return nullptr;  // success
}

void
ReadFile(const std::string& filename, std::string& data)
{
  data.clear();
  if (!filename.empty()) {
    std::ifstream file(filename.c_str(), std::ios::in);
    if (file.is_open()) {
      std::stringstream ss;
      ss << file.rdbuf();
      file.close();
      data = ss.str();
    }
  }
}

}}}  // namespace triton::server::grpc


================================================
FILE: src/grpc/grpc_utils.h
================================================
// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once

#include <list>
#include <memory>
#include <unordered_map>

#include "../classification.h"
#include "../common.h"
#include "../shared_memory_manager.h"
#include "grpc_service.grpc.pb.h"
#include "triton/common/logging.h"
#include "triton/core/tritonserver.h"

namespace triton { namespace server { namespace grpc {

// The step of processing that the state is in. Every state must
// recognize START, COMPLETE and FINISH and the others are optional.
typedef enum {
  // This marks the starting stage of the RPC
  START,
  // This marks that RPC is complete.
  COMPLETE,
  // This marks the stage where all the notifications from the gRPC
  // completion queue is received and state can be safely released.
  FINISH,
  // This stage means that RPC has been issued to Triton for inference
  // and is waiting for the server callbacks or cancellation to be
  // invoked.
  ISSUED,
  // This stage means the request has been read from the network and
  // can be sent to Triton for execution.
  READ,
  // This stage means that the response is ready to be written back to
  // the network.
  WRITEREADY,
  // This stage means that response has been written completely to the
  // network.
  WRITTEN,
  // This marks the special stage for the state object to differentiate
  // the tag delivered from AsyncNotifyWhenDone() method.
  WAITING_NOTIFICATION,
  // This stage means that the cancellation for the RPC has been issued
  // to the server.
  CANCELLATION_ISSUED,
  // This stage marks that the state has been successfully cancelled.
  CANCELLED,
  // This is intermediary stage where the state has been been partially
  // completed by grpc responder Finish call or AsyncNotifyWhenDone()
  // notification. The other next call will move the stage to fully
  // complete.
  PARTIAL_COMPLETION
} Steps;

typedef enum {
  // No error from CORE seen yet
  NONE,
  // Error from CORE encountered, waiting to be picked up by completion queue to
  // initiate cancellation
  ERROR_ENCOUNTERED,
  // Error from CORE encountered, stream closed
  // This state is added to avoid double cancellation
  ERROR_HANDLING_COMPLETE
} TritonGRPCErrorSteps;

class gRPCErrorTracker {
 public:
  // True if set by user via header
  // Can be accessed without a lock, as set only once in startstream
  std::atomic<bool> triton_grpc_error_;

  // Indicates the state of triton_grpc_error, only relevant if special
  // triton_grpc_error feature set to true by client
  TritonGRPCErrorSteps grpc_stream_error_state_;

  // Constructor
  gRPCErrorTracker()
      : triton_grpc_error_(false),
        grpc_stream_error_state_(TritonGRPCErrorSteps::NONE)
  {
  }
  // Changes the state of grpc_stream_error_state_ to ERROR_HANDLING_COMPLETE,
  // indicating we have closed the stream and initiated the cancel flow
  void MarkGRPCErrorHandlingComplete();

  // Returns true ONLY when GRPC_ERROR from CORE is waiting to be processed.
  bool CheckAndUpdateGRPCError();

  // Marks error after it has been responded to
  void MarkGRPCErrorEncountered();

  // Checks if error already responded to in triton_grpc_error mode
  bool GRPCErrorEncountered();
};
// Debugging helper
std::ostream& operator<<(std::ostream& out, const Steps& step);

//
// GrpcStatusUtil
//
class GrpcStatusUtil {
 public:
  static void Create(::grpc::Status* status, TRITONSERVER_Error* err);
  static ::grpc::StatusCode CodeToStatus(TRITONSERVER_Error_Code code);
};

template <typename TensorType>
TRITONSERVER_Error*
ParseSharedMemoryParams(
    const TensorType& tensor, bool* has_shared_memory, std::string* region_name,
    int64_t* offset, size_t* byte_size)
{
  *has_shared_memory = false;
  *offset = 0 /* default value */;
  const auto& region_it = tensor.parameters().find("shared_memory_region");
  if (region_it != tensor.parameters().end()) {
    *has_shared_memory = true;
    const auto& infer_param = region_it->second;
    if (infer_param.parameter_choice_case() !=
        inference::InferParameter::ParameterChoiceCase::kStringParam) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          std::string(
              "invalid value type for 'shared_memory_region' parameter for "
              "tensor '" +
              tensor.name() + "', expected string_param.")
              .c_str());
    }
    *region_name = infer_param.string_param();
  }

  const auto& offset_it = tensor.parameters().find("shared_memory_offset");
  if (offset_it != tensor.parameters().end()) {
    if (!*has_shared_memory) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          std::string(
              "'shared_memory_offset' can not be specified without "
              "'shared_memory_region' parameter for tensor '" +
              tensor.name() + "'")
              .c_str());
    }
    const auto& infer_param = offset_it->second;
    if (infer_param.parameter_choice_case() !=
        inference::InferParameter::ParameterChoiceCase::kInt64Param) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          std::string(
              "invalid value type for 'shared_memory_offset' parameter for "
              "tensor '" +
              tensor.name() + "', expected int64_param.")
              .c_str());
    }
    *offset = infer_param.int64_param();
  }

  const auto& bs_it = tensor.parameters().find("shared_memory_byte_size");
  if (bs_it != tensor.parameters().end()) {
    if (!*has_shared_memory) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          std::string(
              "'shared_memory_byte_size' can not be specified without "
              "'shared_memory_region' parameter for tensor '" +
              tensor.name() + "'")
              .c_str());
    }
    const auto& infer_param = bs_it->second;
    if (infer_param.parameter_choice_case() !=
        inference::InferParameter::ParameterChoiceCase::kInt64Param) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          std::string(
              "invalid value type for 'shared_memory_byte_size' parameter "
              "for "
              "tensor '" +
              tensor.name() + "', expected int64_param.")
              .c_str());
    }
    *byte_size = infer_param.int64_param();
  } else {
    if (*has_shared_memory) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          std::string(
              "'shared_memory_byte_size' must be specified along with "
              "'shared_memory_region' parameter for tensor '" +
              tensor.name() + "'")
              .c_str());
    }
  }

  return nullptr;
}

TRITONSERVER_Error* ParseClassificationParams(
    const inference::ModelInferRequest::InferRequestedOutputTensor& output,
    bool* has_classification, uint32_t* classification_count);


void ReadFile(const std::string& filename, std::string& data);
}}}  // namespace triton::server::grpc


================================================
FILE: src/grpc/infer_handler.cc
================================================
// Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "infer_handler.h"

#ifndef NDEBUG
uint64_t
NextUniqueId()
{
  static std::atomic<uint64_t> id(0);
  return ++id;
}
#endif  // NDEBUG

namespace triton { namespace server { namespace grpc {

TRITONSERVER_Error*
OutputBufferAttributesHelper(
    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
    const TensorShmMap& shm_map,
    TRITONSERVER_BufferAttributes* buffer_attributes)
{
  // We only need to set the cuda ipc handle here. The rest of the buffer
  // attributes have been properly populated by triton core.
  if (tensor_name != nullptr) {
    const auto& pr = shm_map.find(tensor_name);

    if (pr != shm_map.end()) {
      if (pr->second.memory_type_ == TRITONSERVER_MEMORY_GPU) {
        RETURN_IF_ERR(TRITONSERVER_BufferAttributesSetCudaIpcHandle(
            buffer_attributes, pr->second.cuda_ipc_handle_));
      }
    }
  }

  return nullptr;  // Success
}

TRITONSERVER_Error*
OutputBufferQueryHelper(
    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
    size_t* byte_size, const TensorShmMap& shm_map,
    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id)
{
  // Check if shared memory is used if named tensor is provided
  if (tensor_name != nullptr) {
    const auto& pr = shm_map.find(tensor_name);
    if (pr != shm_map.end()) {
      // The output is in shared memory so check that shared memory
      // size is at least large enough for the output, if byte size is provided
      if ((byte_size != nullptr) && (*byte_size > pr->second.byte_size_)) {
        // Don't return error yet and just set to the default properties for
        // GRPC buffer, error will be raised when allocation happens
        *memory_type = TRITONSERVER_MEMORY_CPU;
        *memory_type_id = 0;
      } else {
        *memory_type = pr->second.memory_type_;
        *memory_type_id = pr->second.memory_type_id_;
      }
      return nullptr;  // Success
    }
  }

  // Not using shared memory so a buffer created directly in
  // the response protobuf will be used, and the type will be CPU.
  *memory_type = TRITONSERVER_MEMORY_CPU;
  *memory_type_id = 0;
  return nullptr;  // Success
}

// Make sure to keep InferResponseAlloc and OutputBufferQuery logic in sync
TRITONSERVER_Error*
InferResponseAlloc(
    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
    int64_t preferred_memory_type_id, void* userp, void** buffer,
    void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type,
    int64_t* actual_memory_type_id)
{
  AllocPayload<inference::ModelInferResponse>* payload =
      reinterpret_cast<AllocPayload<inference::ModelInferResponse>*>(userp);

  // ModelInfer RPC expects exactly one response per request. Hence,
  // will be creating and using just one response object.
  inference::ModelInferResponse* response =
      payload->response_queue_->GetNonDecoupledResponse();
  return ResponseAllocatorHelper(
      allocator, tensor_name, byte_size, preferred_memory_type,
      preferred_memory_type_id, response, payload->shm_map_, buffer,
      buffer_userp, actual_memory_type, actual_memory_type_id);
}

// Make sure to keep InferResponseAlloc and OutputBufferQuery logic in sync
TRITONSERVER_Error*
OutputBufferQuery(
    TRITONSERVER_ResponseAllocator* allocator, void* userp,
    const char* tensor_name, size_t* byte_size,
    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id)
{
  AllocPayload<inference::ModelInferResponse>* payload =
      reinterpret_cast<AllocPayload<inference::ModelInferResponse>*>(userp);

  return OutputBufferQueryHelper(
      allocator, tensor_name, byte_size, payload->shm_map_, memory_type,
      memory_type_id);
}

// Make sure to keep InferResponseAlloc, OutputBufferQuery, and
// OutputBufferAttributes logic in sync
TRITONSERVER_Error*
OutputBufferAttributes(
    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
    TRITONSERVER_BufferAttributes* buffer_attributes, void* userp,
    void* buffer_userp)
{
  AllocPayload<inference::ModelInferResponse>* payload =
      reinterpret_cast<AllocPayload<inference::ModelInferResponse>*>(userp);

  return OutputBufferAttributesHelper(
      allocator, tensor_name, payload->shm_map_, buffer_attributes);
  return nullptr;  // Success
}

TRITONSERVER_Error*
InferResponseFree(
    TRITONSERVER_ResponseAllocator* allocator, void* buffer, void* buffer_userp,
    size_t byte_size, TRITONSERVER_MemoryType memory_type,
    int64_t memory_type_id)
{
  LOG_VERBOSE(1) << "GRPC free: "
                 << "size " << byte_size << ", addr " << buffer;

  // Don't do anything when releasing a buffer since InferResponseAlloc
  // wrote directly into the response protobuf.
  return nullptr;  // Success
}

TRITONSERVER_Error*
InferGRPCToInputHelper(
    const std::string& input_name, const std::string& model_name,
    const TRITONSERVER_DataType tensor_dt, const TRITONSERVER_DataType input_dt,
    const size_t binary_data_byte_size)
{
  if (binary_data_byte_size != 0) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INVALID_ARG,
        std::string(
            "unexpected explicit tensor data for input tensor '" + input_name +
            "' for model '" + model_name +
            "', binary data was already supplied.")
            .c_str());
  }

  if (tensor_dt != input_dt) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INVALID_ARG,
        std::string(
            "unexpected explicit tensor data for input tensor '" + input_name +
            "' for model '" + model_name + "' of type '" +
            TRITONSERVER_DataTypeString(tensor_dt) + "', expected datatype '" +
            TRITONSERVER_DataTypeString(input_dt) + "'")
            .c_str());
  }

  return nullptr;  // success
}

TRITONSERVER_Error*
InferResponseStart(TRITONSERVER_ResponseAllocator* allocator, void* userp)
{
  AllocPayload<inference::ModelInferResponse>* payload =
      reinterpret_cast<AllocPayload<inference::ModelInferResponse>*>(userp);

  // ModelInfer RPC expects exactly one response per request. Hence, always call
  // GetNonDecoupledResponse() to create one response object on response start.
  payload->response_queue_->GetNonDecoupledResponse();

  return nullptr;  // success
}

TRITONSERVER_Error*
SetInferenceRequestMetadata(
    TRITONSERVER_InferenceRequest* inference_request,
    const inference::ModelInferRequest& request, StateParameters& state_params)
{
  RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetId(
      inference_request, request.id().c_str()));

  uint32_t flags = 0;
  for (auto param : request.parameters()) {
    if (param.first.compare("sequence_id") == 0) {
      const auto& infer_param = param.second;
      if (infer_param.parameter_choice_case() ==
          inference::InferParameter::ParameterChoiceCase::kInt64Param) {
        RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetCorrelationId(
            inference_request, infer_param.int64_param()));
      } else if (
          infer_param.parameter_choice_case() ==
          inference::InferParameter::ParameterChoiceCase::kStringParam) {
        RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetCorrelationIdString(
            inference_request, infer_param.string_param().c_str()));
      } else {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            "invalid value type for 'sequence_id' parameter, expected "
            "int64_param or string_param.");
      }
    } else if (param.first.compare("sequence_start") == 0) {
      const auto& infer_param = param.second;
      if (infer_param.parameter_choice_case() !=
          inference::InferParameter::ParameterChoiceCase::kBoolParam) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            "invalid value type for 'sequence_start' parameter, expected "
            "bool_param.");
      }
      if (infer_param.bool_param()) {
        flags |= TRITONSERVER_REQUEST_FLAG_SEQUENCE_START;
      }
    } else if (param.first.compare("sequence_end") == 0) {
      const auto& infer_param = param.second;
      if (infer_param.parameter_choice_case() !=
          inference::InferParameter::ParameterChoiceCase::kBoolParam) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            "invalid value type for 'sequence_end' parameter, expected "
            "bool_param.");
      }
      if (infer_param.bool_param()) {
        flags |= TRITONSERVER_REQUEST_FLAG_SEQUENCE_END;
      }
    } else if (param.first.compare("priority") == 0) {
      const auto& infer_param = param.second;
      if (infer_param.parameter_choice_case() ==
          inference::InferParameter::ParameterChoiceCase::kInt64Param) {
        if (infer_param.int64_param() < 0) {
          return TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_INVALID_ARG,
              "invalid value for 'priority', expected value >= 0.");
        }
        RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetPriorityUInt64(
            inference_request, infer_param.int64_param()));
      } else if (
          infer_param.parameter_choice_case() ==
          inference::InferParameter::ParameterChoiceCase::kUint64Param) {
        RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetPriorityUInt64(
            inference_request, infer_param.uint64_param()));
      } else {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            "invalid value type for 'priority' parameter, expected "
            "int64_param or uint64_param.");
      }
    } else if (param.first.compare("timeout") == 0) {
      const auto& infer_param = param.second;
      if (infer_param.parameter_choice_case() !=
          inference::InferParameter::ParameterChoiceCase::kInt64Param) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            "invalid value type for 'timeout' parameter, expected "
            "int64_param.");
      }
      RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetTimeoutMicroseconds(
          inference_request, infer_param.int64_param()));
    } else if (param.first.rfind("triton_", 0) == 0) {
      if (!Contains(TRITON_RESERVED_REQUEST_PARAMS, param.first)) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            (std::string(
                 "parameter keys starting with 'triton_' are reserved for "
                 "Triton "
                 "usage. Only the following keys starting with 'triton_' are "
                 "allowed: ") +
             Join(TRITON_RESERVED_REQUEST_PARAMS, " "))
                .c_str());
      }
      RETURN_IF_ERR(SetStateParameterFromTritonParameter(state_params, param));
    } else {
      const auto& infer_param = param.second;
      if (infer_param.parameter_choice_case() ==
          inference::InferParameter::ParameterChoiceCase::kInt64Param) {
        RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetIntParameter(
            inference_request, param.first.c_str(), infer_param.int64_param()));
      } else if (
          infer_param.parameter_choice_case() ==
          inference::InferParameter::ParameterChoiceCase::kBoolParam) {
        RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetBoolParameter(
            inference_request, param.first.c_str(), infer_param.bool_param()));
      } else if (
          infer_param.parameter_choice_case() ==
          inference::InferParameter::ParameterChoiceCase::kStringParam) {
        RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetStringParameter(
            inference_request, param.first.c_str(),
            infer_param.string_param().c_str()));
      } else if (
          infer_param.parameter_choice_case() ==
          inference::InferParameter::ParameterChoiceCase::kDoubleParam) {
        RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetDoubleParameter(
            inference_request, param.first.c_str(),
            infer_param.double_param()));
      } else {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            std::string(
                "invalid value type for '" + param.first +
                "' parameter, expected "
                "int64_param, bool_param, or string_param.")
                .c_str());
      }
    }
  }

  RETURN_IF_ERR(
      TRITONSERVER_InferenceRequestSetFlags(inference_request, flags));

  for (const auto& input : request.inputs()) {
    RETURN_IF_ERR(TRITONSERVER_InferenceRequestAddInput(
        inference_request, input.name().c_str(),
        TRITONSERVER_StringToDataType(input.datatype().c_str()),
        input.shape().data(), input.shape_size()));
  }

  for (const auto& output : request.outputs()) {
    RETURN_IF_ERR(TRITONSERVER_InferenceRequestAddRequestedOutput(
        inference_request, output.name().c_str()));
  }

  return nullptr;  // Success
}

TRITONSERVER_Error*
SetStateParameterFromTritonParameter(
    StateParameters& state_params,
    const std::pair<std::string, inference::InferParameter>& param)
{
  const auto& key = param.first;
  const auto& value = param.second;
  if (key == "triton_enable_empty_final_response") {
    if (value.parameter_choice_case() !=
        inference::InferParameter::ParameterChoiceCase::kBoolParam) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          (std::string("invalid value type for '") + key +
           std::string("' parameter, expected bool_param."))
              .c_str());
    }
    state_params.enable_empty_final_response_ = value.bool_param();
  }

  return nullptr;  // success
}

TRITONSERVER_Error*
InferGRPCToInput(
    const std::shared_ptr<TRITONSERVER_Server>& tritonserver,
    const std::shared_ptr<SharedMemoryManager>& shm_manager,
    const inference::ModelInferRequest& request,
    std::list<std::string>* serialized_data,
    TRITONSERVER_InferenceRequest* inference_request,
    std::vector<std::shared_ptr<const SharedMemoryManager::SharedMemoryInfo>>*
        shm_regions_info)
{
  // Verify that the batch-byte-size of each input matches the size of
  // the provided tensor data (provided raw or from shared memory)
  int index = 0;
  for (const auto& io : request.inputs()) {
    const void* base;
    size_t byte_size = 0;
    TRITONSERVER_MemoryType memory_type = TRITONSERVER_MEMORY_CPU;
    int64_t memory_type_id = 0;

    std::string region_name;
    int64_t offset;
    bool has_shared_memory;
    RETURN_IF_ERR(
        ParseSharedMemoryParams<inference::ModelInferRequest::InferInputTensor>(
            io, &has_shared_memory, &region_name, &offset, &byte_size));

    TRITONSERVER_BufferAttributes* buffer_attributes;
    RETURN_IF_ERR(TRITONSERVER_BufferAttributesNew(&buffer_attributes));
    auto buffer_attributes_del =
        [](TRITONSERVER_BufferAttributes* buffer_attributes) {
          TRITONSERVER_BufferAttributesDelete(buffer_attributes);
        };
    std::unique_ptr<
        TRITONSERVER_BufferAttributes, decltype(buffer_attributes_del)>
        buffer_attrsl(buffer_attributes, buffer_attributes_del);
    char* cuda_ipc_handle = nullptr;

    if (has_shared_memory) {
      if (io.has_contents()) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            std::string(
                "unexpected 'content' provided when using shared memory "
                "for "
                "input tensor '" +
                io.name() + "' for model '" + request.model_name() + "'")
                .c_str());
      }
      void* tmp;
      std::shared_ptr<const SharedMemoryManager::SharedMemoryInfo> shm_info =
          nullptr;
      RETURN_IF_ERR(shm_manager->GetMemoryInfo(
          region_name, offset, byte_size, &tmp, &memory_type, &memory_type_id,
          &shm_info));
      base = tmp;
      shm_regions_info->emplace_back(shm_info);

      if (memory_type == TRITONSERVER_MEMORY_GPU) {
#ifdef TRITON_ENABLE_GPU
        RETURN_IF_ERR(shm_manager->GetCUDAHandle(
            region_name,
            reinterpret_cast<cudaIpcMemHandle_t**>(&cuda_ipc_handle)));
#endif
      }
    } else {
      if (io.has_contents() && (!request.raw_input_contents().empty())) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            std::string(
                "contents field must not be specified when using "
                "raw_input_contents for '" +
                io.name() + "' for model '" + request.model_name() + "'")
                .c_str());
      } else if (io.has_contents()) {
        // Check the presence of explicit tensors
        TRITONSERVER_DataType dtype =
            TRITONSERVER_StringToDataType(io.datatype().c_str());
        const size_t elem_byte_size = TRITONSERVER_DataTypeByteSize(dtype);
        if (io.contents().bool_contents_size() != 0) {
          RETURN_IF_ERR(InferGRPCToInputHelper(
              io.name(), request.model_name(), TRITONSERVER_TYPE_BOOL, dtype,
              byte_size));
          base = (const void*)io.contents().bool_contents().data();
          byte_size = io.contents().bool_contents_size() * elem_byte_size;
        }

        if (io.contents().int_contents_size() != 0) {
          if (dtype == TRITONSERVER_TYPE_INT8) {
            RETURN_IF_ERR(InferGRPCToInputHelper(
                io.name(), request.model_name(), TRITONSERVER_TYPE_INT8, dtype,
                byte_size));
            serialized_data->emplace_back();
            auto& serialized = serialized_data->back();
            serialized.reserve(
                io.contents().int_contents_size() * elem_byte_size);
            for (const auto& element : io.contents().int_contents()) {
              // Assuming the system is little-endian, picking the
              // least significant byte of 32-bit integer as a
              // int8 element
              serialized.append(
                  reinterpret_cast<const char*>(&element), elem_byte_size);
            }
            base = serialized.c_str();
            byte_size = serialized.size();
          } else if (dtype == TRITONSERVER_TYPE_INT16) {
            RETURN_IF_ERR(InferGRPCToInputHelper(
                io.name(), request.model_name(), TRITONSERVER_TYPE_INT16, dtype,
                byte_size));
            serialized_data->emplace_back();
            auto& serialized = serialized_data->back();
            serialized.reserve(
                io.contents().int_contents_size() * elem_byte_size);
            for (const auto& element : io.contents().int_contents()) {
              // Assuming the system is little-endian, picking the
              // least 2 significant bytes of 32-bit integer as a
              // int16 element
              serialized.append(
                  reinterpret_cast<const char*>(&element), elem_byte_size);
            }
            base = serialized.c_str();
            byte_size = serialized.size();
          } else {
            RETURN_IF_ERR(InferGRPCToInputHelper(
                io.name(), request.model_name(), TRITONSERVER_TYPE_INT32, dtype,
                byte_size));
            base = (const void*)io.contents().int_contents().data();
            byte_size = io.contents().int_contents_size() * elem_byte_size;
          }
        }

        if (io.contents().int64_contents_size() != 0) {
          RETURN_IF_ERR(InferGRPCToInputHelper(
              io.name(), request.model_name(), TRITONSERVER_TYPE_INT64, dtype,
              byte_size));
          base = (const void*)io.contents().int64_contents().data();
          byte_size = io.contents().int64_contents_size() * elem_byte_size;
        }

        if (io.contents().uint_contents_size() != 0) {
          if (dtype == TRITONSERVER_TYPE_UINT8) {
            RETURN_IF_ERR(InferGRPCToInputHelper(
                io.name(), request.model_name(), TRITONSERVER_TYPE_UINT8, dtype,
                byte_size));
            serialized_data->emplace_back();
            auto& serialized = serialized_data->back();
            serialized.reserve(
                io.contents().uint_contents_size() * elem_byte_size);
            for (const auto& element : io.contents().uint_contents()) {
              // Assuming the system is little-endian, picking the
              // least significant byte of 32-bit unsigned integer as a
              // uint8 element
              serialized.append(
                  reinterpret_cast<const char*>(&element), elem_byte_size);
            }
            base = serialized.c_str();
            byte_size = serialized.size();
          } else if (dtype == TRITONSERVER_TYPE_UINT16) {
            RETURN_IF_ERR(InferGRPCToInputHelper(
                io.name(), request.model_name(), TRITONSERVER_TYPE_UINT16,
                dtype, byte_size));
            serialized_data->emplace_back();
            auto& serialized = serialized_data->back();
            serialized.reserve(
                io.contents().uint_contents_size() * elem_byte_size);
            for (const auto& element : io.contents().uint_contents()) {
              // Assuming the system is little-endian, picking the
              // least 2 significant bytes of 32-bit integer as a
              // uint16 element
              serialized.append(
                  reinterpret_cast<const char*>(&element), elem_byte_size);
            }
            base = serialized.c_str();
            byte_size = serialized.size();
          } else {
            RETURN_IF_ERR(InferGRPCToInputHelper(
                io.name(), request.model_name(), TRITONSERVER_TYPE_UINT32,
                dtype, byte_size));
            base = (const void*)io.contents().uint_contents().data();
            byte_size = io.contents().uint_contents_size() * elem_byte_size;
          }
        }

        if (io.contents().uint64_contents_size() != 0) {
          RETURN_IF_ERR(InferGRPCToInputHelper(
              io.name(), request.model_name(), TRITONSERVER_TYPE_UINT64, dtype,
              byte_size));
          base = (const void*)io.contents().uint64_contents().data();
          byte_size = io.contents().uint64_contents_size() * elem_byte_size;
        }

        if (io.contents().fp32_contents_size() != 0) {
          RETURN_IF_ERR(InferGRPCToInputHelper(
              io.name(), request.model_name(), TRITONSERVER_TYPE_FP32, dtype,
              byte_size));
          base = (const void*)io.contents().fp32_contents().data();
          byte_size = io.contents().fp32_contents_size() * elem_byte_size;
        }

        if (io.contents().fp64_contents_size() != 0) {
          RETURN_IF_ERR(InferGRPCToInputHelper(
              io.name(), request.model_name(), TRITONSERVER_TYPE_FP64, dtype,
              byte_size));
          base = (const void*)io.contents().fp64_contents().data();
          byte_size = io.contents().fp64_contents_size() * elem_byte_size;
        }

        if (io.contents().bytes_contents_size() != 0) {
          RETURN_IF_ERR(InferGRPCToInputHelper(
              io.name(), request.model_name(), TRITONSERVER_TYPE_BYTES, dtype,
              byte_size));

          serialized_data->emplace_back();
          auto& serialized = serialized_data->back();

          // Serialize the output tensor strings. Each string is
          // serialized as a 4-byte length followed by the string itself
          // with no null-terminator.
          for (const auto& element : io.contents().bytes_contents()) {
            uint32_t len{(uint32_t)element.size()};
            serialized.append(
                reinterpret_cast<const char*>(&len), sizeof(uint32_t));
            if (element.size() > 0) {
              serialized.append(element.c_str(), len);
            }
          }
          base = serialized.c_str();
          byte_size = serialized.size();
        }
      } else if (request.raw_input_contents().size() > index) {
        // Try to read the raw contents if available
        const std::string& raw = request.raw_input_contents()[index++];
        base = raw.c_str();
        byte_size = raw.size();
      } else {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            std::string(
                "unable to find data for input tensor '" + io.name() +
                "' for model '" + request.model_name() + "' in request.")
                .c_str());
      }
    }

    if (cuda_ipc_handle != nullptr) {
      RETURN_IF_ERR(TRITONSERVER_BufferAttributesSetCudaIpcHandle(
          buffer_attributes, reinterpret_cast<void*>(cuda_ipc_handle)));
    }

    RETURN_IF_ERR(TRITONSERVER_BufferAttributesSetMemoryType(
        buffer_attributes, memory_type));
    RETURN_IF_ERR(TRITONSERVER_BufferAttributesSetMemoryTypeId(
        buffer_attributes, memory_type_id));
    RETURN_IF_ERR(
        TRITONSERVER_BufferAttributesSetByteSize(buffer_attributes, byte_size));
    RETURN_IF_ERR(
        TRITONSERVER_InferenceRequestAppendInputDataWithBufferAttributes(
            inference_request, io.name().c_str(), base, buffer_attributes));
  }

  return nullptr;  // success
}

void
InferRequestComplete(
    TRITONSERVER_InferenceRequest* request, const uint32_t flags, void* userp)
{
  LOG_VERBOSE(1) << "ModelInferHandler::InferRequestComplete";

  RequestReleasePayload* request_release_payload =
      static_cast<RequestReleasePayload*>(userp);

  if ((flags & TRITONSERVER_REQUEST_RELEASE_ALL) != 0) {
    delete request_release_payload;
  }
}

//===========================================================================
//  The following section contains the handling mechanism for ModelInfer RPC.
//  This implementation is tuned towards performance and reducing latency.
//===========================================================================

void
ModelInferHandler::StartNewRequest()
{
  auto context = std::make_shared<State::Context>(cq_);
  context->SetCompressionLevel(compression_level_);
  State* state = StateNew(tritonserver_.get(), context);

#ifdef TRITON_ENABLE_TRACING
  // Can't create trace as we don't know the model to be requested,
  // track timestamps in 'state'
  state->trace_timestamps_.emplace_back(
      std::make_pair("GRPC_WAITREAD_START", TraceManager::CaptureTimestamp()));
#endif  // TRITON_ENABLE_TRACING

  service_->RequestModelInfer(
      state->context_->ctx_.get(), &state->request_,
      state->context_->responder_.get(), cq_, cq_, state);

  LOG_VERBOSE(1) << "New request handler for " << Name() << ", "
                 << state->unique_id_;
}

bool
ModelInferHandler::Process(
    InferHandler::State* state, bool rpc_ok, bool is_notification)
{
  // There are multiple handlers registered in the gRPC service.
  // Hence, we can have a case where a handler thread is
  // making progress in the state machine for a request and the
  // other thread is issuing cancellation on the same request.
  // Need to protect the state transitions for these cases.
  std::lock_guard<std::recursive_mutex> lock(state->step_mtx_);

  if (state->delay_process_ms_ != 0) {
    // Will delay the Process execution by the specified time.
    // This can be used to test the flow when cancellation request
    // issued for the request, which is still at START step.
    LOG_INFO << "Delaying the Process execution by " << state->delay_process_ms_
             << " ms...";
    std::this_thread::sleep_for(
        std::chrono::milliseconds(state->delay_process_ms_));
  }

  if (is_notification) {
    state->context_->SetReceivedNotification(true);
  }

  // Handle notification for cancellation which can be raised
  // asynchronously if detected on the network.
  if (state->IsGrpcContextCancelled()) {
    if (is_notification) {
      // Received the cancellation notification
      LOG_VERBOSE(1) << "Cancellation notification received for " << Name()
                     << ", rpc_ok=" << rpc_ok << ", context "
                     << state->context_->unique_id_ << " step "
                     << state->context_->step_ << ", state "
                     << state->unique_id_ << " step " << state->step_;
    }

    bool skip_handle_cancellation = false;
    if (rpc_ok && (state->step_ == Steps::START) &&
        (state->context_->step_ != Steps::CANCELLED)) {
#ifdef TRITON_ENABLE_TRACING
      // Can't create trace as we don't know the model to be requested,
      // track timestamps in 'state'
      state->trace_timestamps_.emplace_back(std::make_pair(
          "GRPC_WAITREAD_END", TraceManager::CaptureTimestamp()));
#endif  // TRITON_ENABLE_TRACING
      // Need to create a new request object here explicitly for step START,
      // because we will never leave this if body. Refer to PR 7325.
      // This is a special case for ModelInferHandler, since we have 2 threads,
      // and each of them can process cancellation. ModelStreamInfer has only 1
      // thread, and cancellation at step START was not reproducible in a
      // single thread scenario.
      StartNewRequest();
    } else if (
        state->step_ == Steps::COMPLETE || state->step_ == Steps::FINISH) {
      // If the request is completed, simply ignore the cancellation.
      skip_handle_cancellation = true;
    }

    if (!skip_handle_cancellation) {
      bool resume = state->context_->HandleCancellation(state, rpc_ok, Name());
      return resume;
    }
  }


  LOG_VERBOSE(1) << "Process for " << Name() << ", rpc_ok=" << rpc_ok << ", "
                 << state->unique_id_ << " step " << state->step_;

  // We need an explicit finish indicator. Can't use 'state->step_'
  // because we launch an async thread that could update 'state's
  // step_ to be FINISH before this thread exits this function.
  bool finished = false;

  // If RPC failed on a new request then the server is shutting down
  // and so we should do nothing (including not registering for a new
  // request). If RPC failed on a non-START step then there is nothing
  // we can do since we one execute one step.
  const bool shutdown = (!rpc_ok && (state->step_ == Steps::START));
  if (shutdown) {
    state->step_ = Steps::FINISH;
    finished = true;
  }

  if (state->step_ == Steps::START) {
#ifdef TRITON_ENABLE_TRACING
    // Can't create trace as we don't know the model to be requested,
    // track timestamps in 'state'
    state->trace_timestamps_.emplace_back(
        std::make_pair("GRPC_WAITREAD_END", TraceManager::CaptureTimestamp()));
#endif  // TRITON_ENABLE_TRACING

    // Start a new request to replace this one...
    if (!shutdown) {
      StartNewRequest();
    }

    std::shared_lock<std::shared_mutex> lk1(*conn_mtx_);

    if (*accepting_new_conn_ && ExecutePrecondition(state)) {
      Execute(state);
    } else {
      ::grpc::Status status;
      if (*accepting_new_conn_) {
        status = ::grpc::Status(
            ::grpc::StatusCode::UNAVAILABLE,
            "This protocol is restricted, expecting header '" +
                restricted_kv_.first + "'");
      } else {
        status = ::grpc::Status(
            ::grpc::StatusCode::UNAVAILABLE,
            "GRPC server is shutting down and has stopped accepting new "
            "requests.");
      }
      lk1.unlock();

#ifdef TRITON_ENABLE_TRACING
      state->trace_timestamps_.emplace_back(
          std::make_pair("GRPC_SEND_START", TraceManager::CaptureTimestamp()));
#endif  // TRITON_ENABLE_TRACING

      state->step_ = Steps::COMPLETE;
      state->context_->responder_->Finish(
          inference::ModelInferResponse(), status, state);
    }

  } else if (state->step_ == Steps::COMPLETE) {
#ifdef TRITON_ENABLE_TRACING
    state->trace_timestamps_.emplace_back(
        std::make_pair("GRPC_SEND_END", TraceManager::CaptureTimestamp()));
#endif  // TRITON_ENABLE_TRACING

    state->step_ = Steps::FINISH;
  } else if (state->step_ == Steps::FINISH) {
    finished = true;
  }

  return !finished;
}

TRITONSERVER_Error*
ResponseAllocatorHelper(
    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
    int64_t preferred_memory_type_id, inference::ModelInferResponse* response,
    const TensorShmMap& shm_map, void** buffer, void** buffer_userp,
    TRITONSERVER_MemoryType* actual_memory_type, int64_t* actual_memory_type_id)
{
  *buffer = nullptr;
  *buffer_userp = nullptr;
  *actual_memory_type = preferred_memory_type;
  *actual_memory_type_id = preferred_memory_type_id;

  // We add an output contents even if the 'byte_size' == 0 because we
  // expect to have a contents for every output.
  inference::ModelInferResponse::InferOutputTensor* output_tensor =
      response->add_outputs();
  output_tensor->set_name(tensor_name);
  std::string* raw_output = response->add_raw_output_contents();

  if (byte_size > 0) {
    const auto& pr = shm_map.find(tensor_name);
    if (pr != shm_map.end()) {
      // The output is in shared memory so check that shared memory
      // size is at least large enough for the output.
      if (byte_size > pr->second.byte_size_) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INTERNAL,
            std::string(
                "shared memory size specified with the request for output '" +
                std::string(tensor_name) + "' (" +
                std::to_string(pr->second.byte_size_) +
                " bytes) should be at least " + std::to_string(byte_size) +
                " bytes to hold the results")
                .c_str());
      }

      *buffer = const_cast<void*>(pr->second.base_);
      *actual_memory_type = pr->second.memory_type_;
      *actual_memory_type_id = pr->second.memory_type_id_;

      LOG_VERBOSE(1) << "GRPC: using shared-memory for '" << tensor_name
                     << "', size: " << byte_size << ", addr: " << *buffer;
      return nullptr;  // Success
    }

    // Not using shared memory so allocate a buffer. The buffer we
    // create is directly in the response protobuf so we can't
    // allocate any type other than CPU.
    //
    // FIXME we could use pinned CPU memory here.
    if (*actual_memory_type != TRITONSERVER_MEMORY_CPU) {
      LOG_VERBOSE(1) << "GRPC: unable to provide '" << tensor_name << "' in "
                     << TRITONSERVER_MemoryTypeString(*actual_memory_type)
                     << ", will use "
                     << TRITONSERVER_MemoryTypeString(TRITONSERVER_MEMORY_CPU);
      *actual_memory_type = TRITONSERVER_MEMORY_CPU;
      *actual_memory_type_id = 0;
    }

    raw_output->resize(byte_size);
    *buffer = static_cast<void*>(&((*raw_output)[0]));

    LOG_VERBOSE(1) << "GRPC: using buffer for '" << tensor_name
                   << "', size: " << byte_size << ", addr: " << *buffer;
  }

  return nullptr;  // Success
}

void
ModelInferHandler::Execute(InferHandler::State* state)
{
  TRITONSERVER_Error* err = nullptr;
  const inference::ModelInferRequest& request = state->request_;
  auto response_queue = state->response_queue_;
  int64_t requested_model_version;
  if (err == nullptr) {
    err = GetModelVersionFromString(
        request.model_version(), &requested_model_version);
  }

  if (err == nullptr) {
    uint32_t txn_flags;
    err = TRITONSERVER_ServerModelTransactionProperties(
        tritonserver_.get(), request.model_name().c_str(),
        requested_model_version, &txn_flags, nullptr /* voidp */);
    if ((err == nullptr) && (txn_flags & TRITONSERVER_TXN_DECOUPLED) != 0) {
      err = TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_UNSUPPORTED,
          "ModelInfer RPC doesn't support models with decoupled "
          "transaction policy");
    }
  }

  // Create the inference request which contains all the
  // input information needed for an inference.
  TRITONSERVER_InferenceRequest* irequest = nullptr;
  if (err == nullptr) {
    err = TRITONSERVER_InferenceRequestNew(
        &irequest, tritonserver_.get(), request.model_name().c_str(),
        requested_model_version);
  }

  if (err == nullptr) {
    state->inference_request_ = {
        irequest, [](TRITONSERVER_InferenceRequest* request) {
          LOG_TRITONSERVER_ERROR(
              TRITONSERVER_InferenceRequestDelete(request),
              "deleting gRPC inference request");
        }};
    err = SetInferenceRequestMetadata(irequest, request, state->parameters_);
  }

  if (err == nullptr) {
    err = ForwardHeadersAsParameters(irequest, state);
  }

  // Will be used to hold the serialized data in case explicit string
  // tensors are present in the request.
  std::list<std::string> serialized_data;

  // Maintain shared pointers(read-only reference) to the shared memory block's
  // information for the shared memory regions used by the request. These
  // pointers will automatically increase the usage count, preventing
  // unregistration of the shared memory. This vector must be cleared in the
  // `InferResponseComplete` callback (after inference) to decrease the count
  // and permit unregistration. The vector will be included in
  // `response_release_payload` for the callback.
  std::vector<std::shared_ptr<const SharedMemoryManager::SharedMemoryInfo>>
      shm_regions_info;

  if (err == nullptr) {
    err = InferGRPCToInput(
        tritonserver_, shm_manager_, request, &serialized_data, irequest,
        &shm_regions_info);
  }
  if (err == nullptr) {
    err = InferAllocatorPayload<inference::ModelInferResponse>(
        tritonserver_, shm_manager_, request, std::move(serialized_data),
        response_queue, &state->alloc_payload_, &shm_regions_info);
  }

  auto request_release_payload =
      std::make_unique<RequestReleasePayload>(state->inference_request_);
  auto response_release_payload = std::make_unique<ResponseReleasePayload>(
      state, std::move(shm_regions_info), shm_manager_);

  if (err == nullptr) {
    err = TRITONSERVER_InferenceRequestSetReleaseCallback(
        irequest, InferRequestComplete,
        request_release_payload.get() /* request_release_userp */);
  }
  if (err == nullptr) {
    err = TRITONSERVER_InferenceRequestSetResponseCallback(
        irequest, allocator_,
        &state->alloc_payload_ /* response_allocator_userp */,
        InferResponseComplete,
        response_release_payload.get() /* response_userp */);
  }
  // Get request ID for logging in case of error.
  const char* request_id = "";
  if (irequest != nullptr) {
    LOG_TRITONSERVER_ERROR(
        TRITONSERVER_InferenceRequestId(irequest, &request_id),
        "unable to retrieve request ID string");
  }

  if (!strncmp(request_id, "", 1)) {
    request_id = "<id_unknown>";
  }
  if (err == nullptr) {
    TRITONSERVER_InferenceTrace* triton_trace = nullptr;
#ifdef TRITON_ENABLE_TRACING
    if (trace_manager_) {
      GrpcServerCarrier carrier(state->context_->ctx_.get());
      auto start_options =
          trace_manager_->GetTraceStartOptions(carrier, request.model_name());
      state->trace_ = std::move(trace_manager_->SampleTrace(start_options));
      if (state->trace_ != nullptr) {
        triton_trace = state->trace_->trace_;
      }
    }
#endif  // TRITON_ENABLE_TRACING

    state->step_ = ISSUED;
    err = TRITONSERVER_ServerInferAsync(
        tritonserver_.get(), irequest, triton_trace);
  }

  // If not error then state->step_ == ISSUED and inference request
  // has initiated... completion callback will transition to
  // COMPLETE or CANCELLED. Recording the state and the irequest
  // to handle gRPC stream cancellation.
  if (err == nullptr) {
    state->context_->InsertInflightState(state);
    // The payload will be cleaned in release callback.
    request_release_payload.release();
    response_release_payload.release();
  } else {
    // If error go immediately to COMPLETE.
    LOG_VERBOSE(1) << "[request id: " << request_id << "] "
                   << "Infer failed: " << TRITONSERVER_ErrorMessage(err);

    ::grpc::Status status;
    GrpcStatusUtil::Create(&status, err);
    TRITONSERVER_ErrorDelete(err);

    inference::ModelInferResponse error_response;

#ifdef TRITON_ENABLE_TRACING
    if (trace_manager_) {
      state->trace_timestamps_.emplace_back(
          std::make_pair("GRPC_SEND_START", TraceManager::CaptureTimestamp()));
    }
#endif  // TRITON_ENABLE_TRACING

    state->step_ = Steps::COMPLETE;
    state->context_->responder_->Finish(error_response, status, state);
  }
}

void
ModelInferHandler::InferResponseComplete(
    TRITONSERVER_InferenceResponse* iresponse, const uint32_t flags,
    void* userp)
{
  ResponseReleasePayload* response_release_payload(
      static_cast<ResponseReleasePayload*>(userp));
  auto state = response_release_payload->state_;

  // There are multiple handlers registered in the gRPC service
  // Hence, we would need to properly synchronize this thread
  // and the handler thread handling async cancellation
  // notification.
  std::lock_guard<std::recursive_mutex> lock(state->step_mtx_);

  if (state->delay_response_complete_exec_ms_ != 0) {
    // Will delay the Process execution of state at step ISSUED by the
    // specified time. This can be used to test the flow when cancellation
    // request issued for the request before InferResponseComplete.
    LOG_INFO << "Delaying InferResponseComplete execution by "
             << state->delay_response_complete_exec_ms_ << " ms...";
    std::this_thread::sleep_for(
        std::chrono::milliseconds(state->delay_response_complete_exec_ms_));
  }

  // Increment the callback index if received valid 'iresponse'
  if (iresponse != nullptr) {
    state->cb_count_++;
  }

  bool is_final_response = (flags & TRITONSERVER_RESPONSE_COMPLETE_FINAL) != 0;

  LOG_VERBOSE(1) << "ModelInferHandler::InferResponseComplete, "
                 << state->unique_id_ << " step " << state->step_;

  // Allow sending 1 response and final flag separately, only mark
  // non-inflight when seeing final flag
  if (is_final_response) {
    state->context_->EraseInflightState(state);
  }

  // If gRPC Stream is cancelled then no need of forming and returning
  // a response.
  if (state->IsGrpcContextCancelled()) {
    // Clean-up the received response object.
    LOG_TRITONSERVER_ERROR(
        TRITONSERVER_InferenceResponseDelete(iresponse),
        "deleting GRPC inference response");

    state->context_->EraseInflightState(state);
    state->step_ = Steps::CANCELLED;

    LOG_VERBOSE(1) << "ModelInferHandler::InferResponseComplete, "
                   << state->unique_id_
                   << ", skipping response generation as grpc transaction was "
                      "cancelled... ";

    if (is_final_response) {
      if (state->delay_enqueue_ms_ != 0) {
        // Will delay PutTaskBackToQueue by the specified time.
        // This can be used to test the flow when cancellation request
        // issued for the request during InferResponseComplete
        // callback right before Process in the notification thread.
        LOG_INFO << "Delaying PutTaskBackToQueue by "
                 << state->delay_enqueue_ms_ << " ms...";
        std::this_thread::sleep_for(
            std::chrono::milliseconds(state->delay_enqueue_ms_));
      }

      // Send state back to the queue so that state can be released
      // in the next cycle.
      state->context_->PutTaskBackToQueue(state);
      delete response_release_payload;
    }
    return;
  }

  TRITONSERVER_Error* err = nullptr;
  // This callback is expected to be called exactly once for each request.
  // Will use the single response object in the response list to hold the
  // information.
  inference::ModelInferResponse* response =
      state->response_queue_->GetResponseAt(0);
  bool response_created = false;
  if (response == nullptr) {
    LOG_ERROR << "expected allocator to have created a response object";
    err = TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INTERNAL,
        "No response object found in the callback");
    response_created = true;
    response = new inference::ModelInferResponse();
  }

  if (state->cb_count_ != 1) {
    err = TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INTERNAL, std::string(
                                         "expected a single response, got " +
                                         std::to_string(state->cb_count_))
                                         .c_str());
  } else if (iresponse != nullptr) {
    err = InferResponseCompleteCommon<inference::ModelInferResponse>(
        state->tritonserver_, iresponse, *response, state->alloc_payload_);
#ifdef TRITON_ENABLE_TRACING
    state->trace_timestamps_.emplace_back(std::make_pair(
        "INFER_RESPONSE_COMPLETE", TraceManager::CaptureTimestamp()));
#endif  // TRITON_ENABLE_TRACING
  }

  if (err != nullptr) {
    response->Clear();
  }

  GrpcStatusUtil::Create(&state->status_, err);
  TRITONSERVER_ErrorDelete(err);

  LOG_TRITONSERVER_ERROR(
      TRITONSERVER_InferenceResponseDelete(iresponse),
      "deleting GRPC inference response");

  // Defer sending the response until FINAL flag is seen or
  // there is error
  if (!is_final_response) {
    return;
  }


#ifdef TRITON_ENABLE_TRACING
  state->trace_timestamps_.emplace_back(
      std::make_pair("GRPC_SEND_START", TraceManager::CaptureTimestamp()));
#endif  // TRITON_ENABLE_TRACING

  if (state->delay_response_completion_ms_ != 0) {
    // Will delay the Process execution of state at step COMPLETE by the
    // specified time. This can be used to test the flow when cancellation
    // request issued for the request, which is at InferResponseComplete.
    LOG_INFO << "Delaying InferResponseComplete by "
             << state->delay_response_completion_ms_ << " ms...";
    std::this_thread::sleep_for(
        std::chrono::milliseconds(state->delay_response_completion_ms_));
  }

  state->step_ = Steps::COMPLETE;
  state->context_->responder_->Finish(*response, state->status_, state);
  if (response_created) {
    delete response;
  }

  delete response_release_payload;
}

}}}  // namespace triton::server::grpc


================================================
FILE: src/grpc/infer_handler.h
================================================
// Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once

#include <grpc++/alarm.h>
#include <grpc++/grpc++.h>
#include <re2/re2.h>

#include <condition_variable>
#include <queue>
#include <regex>
#include <shared_mutex>
#include <thread>

#include "../tracer.h"
#include "grpc_handler.h"
#include "grpc_service.grpc.pb.h"
#include "grpc_utils.h"
#include "triton/common/logging.h"
#include "triton/core/tritonserver.h"

// Unique IDs are only needed when debugging. They only appear in
// verbose logging.
#ifndef NDEBUG
uint64_t NextUniqueId();
#define NEXT_UNIQUE_ID NextUniqueId()
#else
#define NEXT_UNIQUE_ID (0)
#endif  // NDEBUG

namespace triton { namespace server { namespace grpc {

// Options used in InferHandler/StreamInferHandler states that are set from
// request parameters
struct StateParameters {
  // Whether to generate an empty response when a FINAL flag is received with
  // no corresponding response. Only applicable to StreamInferHandlerState.
  bool enable_empty_final_response_ = false;
};

//
// C++11 doesn't have a barrier so we implement our own.
//
class Barrier {
 public:
  explicit Barrier(size_t cnt) : threshold_(cnt), count_(cnt), generation_(0) {}

  void Wait()
  {
    std::unique_lock<std::mutex> lock(mu_);
    auto lgen = generation_;
    if (--count_ == 0) {
      generation_++;
      count_ = threshold_;
      cv_.notify_all();
    } else {
      cv_.wait(lock, [this, lgen] { return lgen != generation_; });
    }
  }

 private:
  std::mutex mu_;
  std::condition_variable cv_;
  const size_t threshold_;
  size_t count_;
  size_t generation_;
};

// Simple structure that carries the userp payload needed for
// request release callback.
struct RequestReleasePayload final {
  explicit RequestReleasePayload(
      const std::shared_ptr<TRITONSERVER_InferenceRequest>& inference_request)
      : inference_request_(inference_request){};

 private:
  std::shared_ptr<TRITONSERVER_InferenceRequest> inference_request_ = nullptr;
};

//
// ResponseQueue
//
// This class implements a queue to manage responses that need to be written.
// It internally uses a reusable pool of persistent message objects to avoid
// allocating memory for each response individually.
//
template <typename ResponseType>
class ResponseQueue {
 public:
  explicit ResponseQueue(const size_t max_response_queue_size)
      : max_response_queue_size_(max_response_queue_size)
  {
    Reset();
  }

  ~ResponseQueue()
  {
    // Delete all responses in the reusable pool
    for (auto response : reusable_pool_) {
      delete response;
    }

    // Delete all responses currently in the queue
    for (auto response : responses_) {
      delete response;
    }
  }

  // Resets the queue to its initial state
  void Reset()
  {
    std::lock_guard<std::mutex> lock(mtx_);
    alloc_count_ = 0;
    ready_count_ = 0;
    pop_count_ = 0;

    while (!responses_.empty()) {
      responses_.front()->Clear();
      reusable_pool_.push_back(responses_.front());
      responses_.pop_front();
    }
  }

  // Gets the response for the non-decoupled models.
  // Note that there will be a single response in
  // non-decoupled cases.
  ResponseType* GetNonDecoupledResponse()
  {
    std::lock_guard<std::mutex> lock(mtx_);
    alloc_count_ = 1;
    if (responses_.size() < 1) {
      if (!reusable_pool_.empty()) {
        responses_.push_back(reusable_pool_.front());
        reusable_pool_.pop_front();
      } else {
        responses_.push_back(new ResponseType());
      }
    }
    return responses_[0];
  }

  // Allocates a response at the end of the queue
  void AllocateResponse()
  {
    std::unique_lock<std::mutex> lock(mtx_);
    cv_.wait(
        lock, [this] { return responses_.size() < max_response_queue_size_; });
    alloc_count_++;

    // Use a response from the reusable pool if available
    if (!reusable_pool_.empty()) {
      responses_.push_back(reusable_pool_.front());
      reusable_pool_.pop_front();
    } else {
      responses_.push_back(new ResponseType());
    }
  }

  // Gets the last allocated response
  ResponseType* GetLastAllocatedResponse()
  {
    std::lock_guard<std::mutex> lock(mtx_);

    // Ensure that the requested response has been allocated
    if ((responses_.size() + pop_count_) < alloc_count_) {
      LOG_ERROR
          << "[INTERNAL] Attempting to access the response not yet allocated";
      return nullptr;
    }

    return responses_.back();
  }

  // Marks the next non-ready response complete
  bool MarkNextResponseComplete()
  {
    std::lock_guard<std::mutex> lock(mtx_);
    if (alloc_count_ <= ready_count_) {
      LOG_ERROR
          << "[INTERNAL] Attempting to mark an unallocated response complete";
      return false;
    }
    ready_count_++;

    return true;
  }

  // Gets the current response from the front of the queue
  ResponseType* GetCurrentResponse()
  {
    std::lock_guard<std::mutex> lock(mtx_);
    if (pop_count_ >= ready_count_) {
      LOG_ERROR << "[INTERNAL] Attempting to access current response when it "
                   "is not ready";
      return nullptr;
    }
    if (responses_.empty()) {
      LOG_ERROR << "[INTERNAL] No responses are available in the queue.";
      return nullptr;
    }

    return responses_.front();
  }

  // Gets the response at the specified index
  ResponseType* GetResponseAt(const uint32_t index)
  {
    std::lock_guard<std::mutex> lock(mtx_);

    // Check if the index is valid for allocated responses
    if (index >= alloc_count_) {
      LOG_ERROR << "[INTERNAL] Attempting to access response which is not yet "
                   "allocated";
      return nullptr;
    }
    if (index < pop_count_) {
      LOG_ERROR << "[INTERNAL] Attempting to access a response that has "
                   "already been removed from the queue.";
      return nullptr;
    }

    // Adjust index based on number of popped responses to get actual index in
    // 'responses_'
    return responses_[index - pop_count_];
  }

  // Removes the current response from the front of the queue
  void PopResponse()
  {
    std::lock_guard<std::mutex> lock(mtx_);

    // Ensure there are responses in the queue to pop
    if (responses_.empty()) {
      LOG_ERROR << "[INTERNAL] No responses in the queue to pop.";
      return;
    }

    // Clear and move the current response to the reusable pool
    auto response = responses_.front();
    response->Clear();
    reusable_pool_.push_back(response);
    responses_.pop_front();
    pop_count_++;

    cv_.notify_one();
  }

  // Returns whether the queue is empty
  bool IsEmpty()
  {
    std::lock_guard<std::mutex> lock(mtx_);
    return (
        (alloc_count_ == ready_count_) && (alloc_count_ == pop_count_) &&
        responses_.empty());
  }

  // Returns whether the queue has responses
  // ready to be written.
  bool HasReadyResponse()
  {
    std::lock_guard<std::mutex> lock(mtx_);
    return (ready_count_ > pop_count_);
  }

 private:
  // Stores responses that need to be written. The front of the queue indicates
  // the current response, while the back indicates the last allocated response.
  std::deque<ResponseType*> responses_;
  // Stores completed responses that can be reused
  std::deque<ResponseType*> reusable_pool_;
  std::condition_variable cv_;
  size_t max_response_queue_size_;
  std::mutex mtx_;

  // Three counters are used to track and manage responses in the queue
  uint32_t alloc_count_;  // Number of allocated responses
  uint32_t ready_count_;  // Number of ready-to-write responses
  uint32_t pop_count_;    // Number of removed responses from the queue
};


//
// ShmInfo
//
// Simple structure that carries the shared memory information
//
struct ShmInfo {
  ShmInfo(
      void* base, size_t byte_size, TRITONSERVER_MemoryType memory_type,
      int64_t memory_type_id, char* cuda_ipc_handle)
      : base_(base), byte_size_(byte_size), memory_type_(memory_type),
        memory_type_id_(memory_type_id), cuda_ipc_handle_(cuda_ipc_handle)
  {
  }
  void* base_;
  size_t byte_size_;
  TRITONSERVER_MemoryType memory_type_;
  int64_t memory_type_id_;
  char* cuda_ipc_handle_;
};


using TensorShmMap = std::unordered_map<std::string, ShmInfo>;

//
// AllocPayload
//
// Simple structure that carries the userp payload needed for
// allocation.
//
template <typename ResponseType>
struct AllocPayload {
  using ClassificationMap = std::unordered_map<std::string, uint32_t>;

  explicit AllocPayload() : response_queue_(nullptr) {}
  ~AllocPayload()
  {
    // Don't delete 'response_'.. it is owned by the InferHandlerState
  }

  std::shared_ptr<ResponseQueue<ResponseType>> response_queue_;
  uint32_t response_alloc_count_;
  TensorShmMap shm_map_;
  ClassificationMap classification_map_;

  // Used to extend the lifetime of the serialized data in case
  // non-raw contents were provided in the request. Serialized data's
  // actual lifetime is that of the request whereas AllocPayload's
  // lifetime is that of a response... but it is convenient to keep it
  // here.
  std::list<std::string> serialized_data_;
};

template <typename ResponseType>
TRITONSERVER_Error*
InferAllocatorPayload(
    const std::shared_ptr<TRITONSERVER_Server>& tritonserver,
    const std::shared_ptr<SharedMemoryManager>& shm_manager,
    const inference::ModelInferRequest& request,
    std::list<std::string>&& serialized_data,
    std::shared_ptr<ResponseQueue<ResponseType>> response_queue,
    AllocPayload<ResponseType>* alloc_payload,
    std::vector<std::shared_ptr<const SharedMemoryManager::SharedMemoryInfo>>*
        shm_regions_info)
{
  alloc_payload->response_queue_ = response_queue;
  alloc_payload->shm_map_.clear();
  alloc_payload->classification_map_.clear();
  alloc_payload->serialized_data_ = std::move(serialized_data);

  // If any of the outputs use shared memory, then we must calculate
  // the memory address for that output and store it in the allocator
  // payload so that it is available when the allocation callback is
  // invoked.
  for (const auto& io : request.outputs()) {
    std::string region_name;
    int64_t offset;
    size_t byte_size;
    bool has_shared_memory;
    RETURN_IF_ERR(ParseSharedMemoryParams<
                  inference::ModelInferRequest::InferRequestedOutputTensor>(
        io, &has_shared_memory, &region_name, &offset, &byte_size));

    bool has_classification;
    uint32_t classification_count;
    RETURN_IF_ERR(ParseClassificationParams(
        io, &has_classification, &classification_count));

    if (has_shared_memory && has_classification) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          "output can't set both 'shared_memory_region' and "
          "'classification'");
    }

    if (has_shared_memory) {
      void* base;
      TRITONSERVER_MemoryType memory_type;
      int64_t memory_type_id;
      std::shared_ptr<const SharedMemoryManager::SharedMemoryInfo> shm_info =
          nullptr;
      RETURN_IF_ERR(shm_manager->GetMemoryInfo(
          region_name, offset, byte_size, &base, &memory_type, &memory_type_id,
          &shm_info));
      shm_regions_info->emplace_back(shm_info);

      if (memory_type == TRITONSERVER_MEMORY_GPU) {
#ifdef TRITON_ENABLE_GPU
        char* cuda_handle;
        RETURN_IF_ERR(shm_manager->GetCUDAHandle(
            region_name, reinterpret_cast<cudaIpcMemHandle_t**>(&cuda_handle)));
        alloc_payload->shm_map_.emplace(
            io.name(),
            ShmInfo(base, byte_size, memory_type, memory_type_id, cuda_handle));
#endif
      } else {
        alloc_payload->shm_map_.emplace(
            io.name(), ShmInfo(
                           base, byte_size, memory_type, memory_type_id,
                           nullptr /* cuda_ipc_handle */));
      }
    } else if (has_classification) {
      alloc_payload->classification_map_.emplace(
          io.name(), classification_count);
    }
  }

  return nullptr;  // Success
}

TRITONSERVER_Error* InferGRPCToInputHelper(
    const std::string& input_name, const std::string& model_name,
    const TRITONSERVER_DataType tensor_dt, const TRITONSERVER_DataType input_dt,
    const size_t binary_data_byte_size);

TRITONSERVER_Error* InferGRPCToInput(
    const std::shared_ptr<TRITONSERVER_Server>& tritonserver,
    const std::shared_ptr<SharedMemoryManager>& shm_manager,
    const inference::ModelInferRequest& request,
    std::list<std::string>* serialized_data,
    TRITONSERVER_InferenceRequest* inference_request,
    std::vector<std::shared_ptr<const SharedMemoryManager::SharedMemoryInfo>>*
        shm_regions_info);

TRITONSERVER_Error* ResponseAllocatorHelper(
    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
    int64_t preferred_memory_type_id, inference::ModelInferResponse* response,
    const TensorShmMap& shm_map, void** buffer, void** buffer_userp,
    TRITONSERVER_MemoryType* actual_memory_type,
    int64_t* actual_memory_type_id);

TRITONSERVER_Error* OutputBufferAttributesHelper(
    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
    const TensorShmMap& shm_map,
    TRITONSERVER_BufferAttributes* buffer_attributes);

TRITONSERVER_Error* OutputBufferQueryHelper(
    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
    size_t* byte_size, const TensorShmMap& shm_map,
    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id);

// Make sure to keep InferResponseAlloc and OutputBufferQuery logic in sync
TRITONSERVER_Error* InferResponseAlloc(
    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
    int64_t preferred_memory_type_id, void* userp, void** buffer,
    void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type,
    int64_t* actual_memory_type_id);

TRITONSERVER_Error* SetInferenceRequestMetadata(
    TRITONSERVER_InferenceRequest* inference_request,
    const inference::ModelInferRequest& request, StateParameters& state_params);

// Helper to set options for StreamInferHandler state when parsing
// request parameters.
TRITONSERVER_Error* SetStateParameterFromTritonParameter(
    StateParameters& state_params,
    const std::pair<std::string, inference::InferParameter>& param);

void InferRequestComplete(
    TRITONSERVER_InferenceRequest* request, const uint32_t flags, void* userp);

// Make sure to keep InferResponseAlloc and OutputBufferQuery logic in sync
TRITONSERVER_Error* OutputBufferQuery(
    TRITONSERVER_ResponseAllocator* allocator, void* userp,
    const char* tensor_name, size_t* byte_size,
    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id);

// Make sure to keep InferResponseAlloc, OutputBufferQuery, and
// OutputBufferAttributes logic in sync
TRITONSERVER_Error* OutputBufferAttributes(
    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
    TRITONSERVER_BufferAttributes* buffer_attributes, void* userp,
    void* buffer_userp);

TRITONSERVER_Error* InferResponseFree(
    TRITONSERVER_ResponseAllocator* allocator, void* buffer, void* buffer_userp,
    size_t byte_size, TRITONSERVER_MemoryType memory_type,
    int64_t memory_type_id);

TRITONSERVER_Error* InferResponseStart(
    TRITONSERVER_ResponseAllocator* allocator, void* userp);

template <typename ResponseType>
TRITONSERVER_Error*
InferResponseCompleteCommon(
    TRITONSERVER_Server* server, TRITONSERVER_InferenceResponse* iresponse,
    inference::ModelInferResponse& response,
    const AllocPayload<ResponseType>& alloc_payload)
{
  RETURN_IF_ERR(TRITONSERVER_InferenceResponseError(iresponse));

  const char *model_name, *id;
  int64_t model_version;
  RETURN_IF_ERR(TRITONSERVER_InferenceResponseModel(
      iresponse, &model_name, &model_version));
  RETURN_IF_ERR(TRITONSERVER_InferenceResponseId(iresponse, &id));

  response.set_id(id);
  response.set_model_name(model_name);
  response.set_model_version(std::to_string(model_version));

  // Propagate response parameters.
  uint32_t parameter_count;
  RETURN_IF_ERR(TRITONSERVER_InferenceResponseParameterCount(
      iresponse, &parameter_count));
  for (uint32_t pidx = 0; pidx < parameter_count; ++pidx) {
    const char* name;
    TRITONSERVER_ParameterType type;
    const void* vvalue;
    RETURN_IF_ERR(TRITONSERVER_InferenceResponseParameter(
        iresponse, pidx, &name, &type, &vvalue));
    inference::InferParameter& param = (*response.mutable_parameters())[name];
    switch (type) {
      case TRITONSERVER_PARAMETER_BOOL:
        param.set_bool_param(*(reinterpret_cast<const bool*>(vvalue)));
        break;
      case TRITONSERVER_PARAMETER_INT:
        param.set_int64_param(*(reinterpret_cast<const int64_t*>(vvalue)));
        break;
      case TRITONSERVER_PARAMETER_STRING:
        param.set_string_param(reinterpret_cast<const char*>(vvalue));
        break;
      case TRITONSERVER_PARAMETER_DOUBLE:
        param.set_double_param(*(reinterpret_cast<const double*>(vvalue)));
        break;
      case TRITONSERVER_PARAMETER_BYTES:
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_UNSUPPORTED,
            "Response parameter of type 'TRITONSERVER_PARAMETER_BYTES' is not "
            "currently supported");
        break;
    }
  }

  // Go through each response output and transfer information to the
  // corresponding GRPC response output.
  uint32_t output_count;
  RETURN_IF_ERR(
      TRITONSERVER_InferenceResponseOutputCount(iresponse, &output_count));
  if (output_count != (uint32_t)response.outputs_size()) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INTERNAL, "response output count mismatch");
  }

  for (uint32_t output_idx = 0; output_idx < output_count; ++output_idx) {
    const char* cname;
    TRITONSERVER_DataType datatype;
    const int64_t* shape;
    uint64_t dim_count;
    const void* base;
    size_t byte_size;
    TRITONSERVER_MemoryType memory_type;
    int64_t memory_type_id;
    void* userp;

    RETURN_IF_ERR(TRITONSERVER_InferenceResponseOutput(
        iresponse, output_idx, &cname, &datatype, &shape, &dim_count, &base,
        &byte_size, &memory_type, &memory_type_id, &userp));

    const std::string name(cname);

    // There are usually very few outputs so fastest just to look for
    // the one we want... could create a map for cases where there are
    // a large number of outputs. Or rely on order to be same...
    inference::ModelInferResponse::InferOutputTensor* output = nullptr;
    for (auto& io : *(response.mutable_outputs())) {
      if (io.name() == name) {
        output = &io;
        break;
      }
    }

    if (output == nullptr) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INTERNAL,
          "unable to find expected response output");
    }

    // If this output was requested as classification then remove the
    // raw output from the response and instead return classification
    // results as a string tensor
    const auto itr = alloc_payload.classification_map_.find(name);
    if (itr == alloc_payload.classification_map_.end()) {
      // Not classification...
      output->set_datatype(TRITONSERVER_DataTypeString(datatype));
      for (size_t idx = 0; idx < dim_count; idx++) {
        output->add_shape(shape[idx]);
      }
    } else {
      // Classification
      const uint32_t classification_count = itr->second;

      // For classification need to determine the batch size, if any,
      // because need to use that to break up the response for each
      // batch entry.
      uint32_t batch_size = 0;

      uint32_t batch_flags;
      RETURN_IF_ERR(TRITONSERVER_ServerModelBatchProperties(
          server, model_name, model_version, &batch_flags,
          nullptr /* voidp */));
      if ((dim_count > 0) &&
          ((batch_flags & TRITONSERVER_BATCH_FIRST_DIM) != 0)) {
        batch_size = shape[0];
      }

      // Determine the batch1 byte size of the tensor... needed when
      // the response tensor batch-size > 1 so that we know how to
      // stride though the tensor data.
      size_t batch1_element_count = 1;
      for (size_t idx = ((batch_size == 0) ? 0 : 1); idx < dim_count; idx++) {
        batch1_element_count *= shape[idx];
      }

      const size_t batch1_byte_size =
          batch1_element_count * TRITONSERVER_DataTypeByteSize(datatype);

      // Create the classification contents
      std::string serialized;

      size_t class_offset = 0;
      for (uint32_t bs = 0; bs < std::max((uint32_t)1, batch_size); ++bs) {
        std::vector<std::string> class_strs;
        RETURN_IF_ERR(TopkClassifications(
            iresponse, output_idx,
            reinterpret_cast<const char*>(base) + class_offset,
            ((class_offset + batch1_byte_size) > byte_size) ? 0
                                                            : batch1_byte_size,
            datatype, classification_count, &class_strs));

        // Serialize for binary representation...
        for (const auto& str : class_strs) {
          uint32_t len = str.size();
          serialized.append(reinterpret_cast<const char*>(&len), sizeof(len));
          if (len > 0) {
            serialized.append(str);
          }
        }

        class_offset += batch1_byte_size;
      }

      // Update the output with new datatype, shape and contents.
      output->set_datatype(
          TRITONSERVER_DataTypeString(TRITONSERVER_TYPE_BYTES));

      if (batch_size > 0) {
        output->add_shape(batch_size);
      }
      output->add_shape(
          std::min(classification_count, (uint32_t)batch1_element_count));

      (*response.mutable_raw_output_contents())[output_idx] =
          std::move(serialized);
    }
  }

  // Make sure response doesn't exceed GRPC limits.
  if (response.ByteSizeLong() > MAX_GRPC_MESSAGE_SIZE) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INVALID_ARG,
        std::string(
            "Response has byte size " +
            std::to_string(response.ByteSizeLong()) +
            " which exceeds gRPC's byte size limit " + std::to_string(INT_MAX) +
            ".")
            .c_str());
  }

  return nullptr;  // success
}

//
// InferHandlerState
//
template <
    typename ServerResponderType, typename RequestType, typename ResponseType>
class InferHandlerState {
 public:
  using InferHandlerStateType =
      InferHandlerState<ServerResponderType, RequestType, ResponseType>;

  // State that is shared across all state objects that make up a GRPC
  // transaction (e.g. a stream).
  struct Context {
    explicit Context(
        ::grpc::ServerCompletionQueue* cq, const uint64_t unique_id = 0)
        : cq_(cq), unique_id_(unique_id), ongoing_requests_(0),
          step_(Steps::START), finish_ok_(true), ongoing_write_(false),
          received_notification_(false)
    {
      ctx_.reset(new ::grpc::ServerContext());
      responder_.reset(new ServerResponderType(ctx_.get()));
      gRPCErrorTracker_ = std::make_unique<gRPCErrorTracker>();
    }

    void SetCompressionLevel(grpc_compression_level compression_level)
    {
      ctx_->set_compression_level(compression_level);
    }

    void GrpcContextAsyncNotifyWhenDone(InferHandlerStateType* state)
    {
      notify_state_ = std::unique_ptr<InferHandlerStateType>(
          new InferHandlerStateType(Steps::WAITING_NOTIFICATION, state));
      ctx_->AsyncNotifyWhenDone(notify_state_.get());
    }

    void SetReceivedNotification(bool value) { received_notification_ = value; }

    bool ReceivedNotification() { return received_notification_; }

    bool IsCancelled()
    {
      std::lock_guard<std::recursive_mutex> lock(mu_);
      return received_notification_
                 ? (ctx_->IsCancelled() ||
                    gRPCErrorTracker_->CheckAndUpdateGRPCError())
                 : false;
    }
    // Increments the ongoing request counter
    void IncrementRequestCounter() { ongoing_requests_++; }

    // Decrements the ongoing request counter
    void DecrementRequestCounter() { ongoing_requests_--; }

    // Adds the state object created on this context
    void InsertState(InferHandlerStateType* state)
    {
      all_states_.insert(state);
    }

    // Erases the state object created on this context
    void EraseState(InferHandlerStateType* state)
    {
      EraseInflightState(state);
      all_states_.erase(state);
    }

    bool HandleCompletion()
    {
      if (step_ != Steps::FINISH) {
        for (auto state : all_states_) {
          std::lock_guard<std::recursive_mutex> lock(state->step_mtx_);
          // There is no order guarantee on when the AsyncNotifyWhenDone
          // event is placed on the completion queue vs when the actual
          // state RPC is processed. Need to transition through two steps
          // to preserve the lifetime of the state object.
          if (state->step_ == Steps::PARTIAL_COMPLETION) {
            state->step_ = Steps::COMPLETE;
          } else {
            state->step_ = Steps::FINISH;
          }
          PutTaskBackToQueue(state);
        }
        step_ = Steps::FINISH;
        return true;
      }
      return false;
    }

    // Extracts headers from GRPC request and updates state
    void ExtractStateFromHeaders(InferHandlerStateType* state)
    {
      const auto& metadata = state->context_->ctx_->client_metadata();
      std::string triton_grpc_error_key = "triton_grpc_error";

      auto it = metadata.find(
          {triton_grpc_error_key.data(), triton_grpc_error_key.size()});

      if (it != metadata.end()) {
        if (it->second == "true") {
          LOG_VERBOSE(2)
              << "GRPC: triton_grpc_error mode detected in new grpc stream";
          state->context_->gRPCErrorTracker_->triton_grpc_error_ = true;
        }
      }
    }

    void WriteGRPCErrorResponse(InferHandlerStateType* state)
    {
      std::lock_guard<std::recursive_mutex> lock(state->context_->mu_);
      // Check if Error not responded previously
      // Avoid closing connection twice on multiple errors from core
      if (!state->context_->gRPCErrorTracker_->GRPCErrorEncountered()) {
        state->step_ = Steps::COMPLETE;
        state->context_->responder_->Finish(state->status_, state);
        // Mark error for this stream
        state->context_->gRPCErrorTracker_->MarkGRPCErrorEncountered();
      }
    }

    const std::string DebugString(InferHandlerStateType* state)
    {
      std::string debug_string("");
      debug_string.append(
          "Running state_id " + std::to_string(state->unique_id_) + "\n");
      debug_string.append(
          "\tContext step " + std::to_string(state->context_->step_) + " id " +
          std::to_string(state->context_->unique_id_) + "\n");
      for (auto new_state : all_states_) {
        debug_string.append(
            "\t\t State id " + std::to_string(new_state->unique_id_) +
            ": State step " + std::to_string(new_state->step_) + "\n");
      }

      return debug_string;
    }

    // Inserts the state to a set tracking active requests
    // within the server core. Should only be called when
    // the request was successfully enqueued on Triton.
    void InsertInflightState(InferHandlerStateType* state)
    {
      std::lock_guard<std::recursive_mutex> lock(mu_);
      inflight_states_.insert(state);
    }

    // Erases the state to a set tracking active requests
    // within the server core.
    void EraseInflightState(InferHandlerStateType* state)
    {
      std::lock_guard<std::recursive_mutex> lock(mu_);
      inflight_states_.erase(state);
    }

    // Issues the cancellation for all inflight requests
    // being tracked by this context.
    void IssueRequestCancellation()
    {
      {
        std::lock_guard<std::recursive_mutex> lock(mu_);

        // Issues the request cancellation to the core.
        for (auto state : inflight_states_) {
          std::lock_guard<std::recursive_mutex> lock(state->step_mtx_);
          if (state->step_ != Steps::CANCELLED &&
              state->step_ != Steps::COMPLETE) {
            LOG_VERBOSE(1) << "Issuing cancellation for " << state->unique_id_
                           << " step " << state->step_;
            if (state->inference_request_.get() == nullptr) {
              // The context might be holding some states that have
              // not been issued to Triton core. Need to skip calling
              // issuing cancellation for such requests.
              continue;
            }
            // Note that request may or may not be valid at this point.
            // Assuming if RequestComplete callback is run asynchronously
            // before this point.
            TRITONSERVER_Error* err = nullptr;
            err = TRITONSERVER_InferenceRequestCancel(
                state->inference_request_.get());
            // TODO: Add request id to the message
            if (err != nullptr) {
              LOG_INFO << "Failed to cancel the request: "
                       << TRITONSERVER_ErrorMessage(err);
            }
            state->step_ = Steps::CANCELLATION_ISSUED;
          } else if (state->step_ == Steps::COMPLETE) {
            // The RPC is complete and no callback will be invoked to retrieve
            // the object. Hence, need to explicitly place the state on the
            // completion queue.
            PutTaskBackToQueue(state);
          }
        }
      }
    }


    // Handles the gRPC context cancellation. This function can be called
    // multiple times and is supposed to be re-entrant.
    // Returns whether or not to continue cycling through the gRPC
    // completion queue or not.
    bool HandleCancellation(
        InferHandlerStateType* state, bool rpc_ok, const std::string& name)
    {
      // Check to avoid early exit in case of triton_grpc_error
      if (!IsCancelled()) {
        LOG_ERROR
            << "[INTERNAL] HandleCancellation called even when the context was "
               "not cancelled for "
            << name << ", rpc_ok=" << rpc_ok << ", context "
            << state->context_->unique_id_ << ", " << state->unique_id_
            << " step " << state->step_;
        return true;
      }

      if (state->step_ != Steps::CANCELLATION_ISSUED) {
        // If the context has not been cancelled then
        // issue cancellation request to all the inflight
        // states belonging to the context.
        // It means this is the first time we are hiting this line for this grpc
        // transaction.
        if ((state->step_ != Steps::CANCELLED) &&
            (state->context_->step_ != Steps::CANCELLED)) {
          // Issue the request cancellation as it has not been cancelled yet.
          IssueRequestCancellation();
          // Mark the context as cancelled
          state->context_->step_ = Steps::CANCELLED;
          // The state returns true because the CancelExecution
          // call above would have raised alarm objects on all
          // pending inflight states objects. This state will
          // be taken up along with all the other states in the
          // next iteration from the completion queue which
          // would release the state.
          return true;
        } else {
          // The cancellation request has been handled so the state can be
          // released.
          LOG_VERBOSE(1) << "Completing cancellation for " << name
                         << ", rpc_ok=" << rpc_ok << ", context "
                         << state->context_->unique_id_ << ", "
                         << state->unique_id_ << " step " << state->step_;
          return false;
        }
      } else {  // state->step_ == Steps::CANCELLATION_ISSUED
        // Should wait for the InferResponseComplete callbacks to be invoked.
        LOG_VERBOSE(1)
            << "Waiting for the callback to retrieve cancellation for " << name
            << ", rpc_ok=" << rpc_ok << ", context "
            << state->context_->unique_id_ << ", " << state->unique_id_
            << " step " << state->step_;
        return true;
      }
    }

    // Enqueue 'state' so that its response is delivered in the
    // correct order.
    void EnqueueForResponse(InferHandlerStateType* state)
    {
      std::lock_guard<std::recursive_mutex> lock(mu_);
      states_.push(state);
    }

    // Write the response to the stream directly.
    void DecoupledWriteResponse(InferHandlerStateType* state)
    {
#ifdef TRITON_ENABLE_TRACING
      state->trace_timestamps_.emplace_back(
          std::make_pair("GRPC_SEND_START", TraceManager::CaptureTimestamp()));
#endif  // TRITON_ENABLE_TRACING
      state->step_ = Steps::WRITTEN;
      ResponseType* response = state->response_queue_->GetCurrentResponse();
      responder_->Write(*response, state);

      // Clear the response after writing
      response->mutable_infer_response()->Clear();

      // Pop the response from queue
      state->response_queue_->PopResponse();
    }

    // Adds the state object to the completion queue so
    // that it can be processed later
    void PutTaskBackToQueue(InferHandlerStateType* state)
    {
      std::lock_guard<std::recursive_mutex> lock(mu_);
      // FIXME: Is there a better way to put task on the
      // completion queue rather than using alarm object?
      // The alarm object will add a new task to the back of the
      // completion queue when it expires or when it’s cancelled.
      state->alarm_.Set(
          cq_, gpr_now(gpr_clock_type::GPR_CLOCK_REALTIME), state);
    }

    // Check the state at the front of the queue and write it if
    // ready. The state at the front of the queue is ready if it is in
    // the WRITEREADY state and it equals 'required_state' (or
    // 'required_state' is nullptr). Return nullptr if front of queue
    // was not ready (and so not written), or return the state if it
    // was ready and written.
    InferHandlerStateType* WriteResponseIfReady(
        InferHandlerStateType* required_state)
    {
      std::lock_guard<std::recursive_mutex> lock(mu_);
      if (states_.empty()) {
        return nullptr;
      }

      InferHandlerStateType* state = states_.front();
      if (state->step_ != Steps::WRITEREADY) {
        return nullptr;
      }

      if ((required_state != nullptr) && (state != required_state)) {
        return nullptr;
      }

#ifdef TRITON_ENABLE_TRACING
      state->trace_timestamps_.emplace_back(
          std::make_pair("GRPC_SEND_START", TraceManager::CaptureTimestamp()));
#endif  // TRITON_ENABLE_TRACING

      state->step_ = Steps::WRITTEN;
      state->context_->ongoing_write_ = true;
      // Non decoupled writes use only one response
      responder_->Write(*state->response_queue_->GetResponseAt(0), state);

      return state;
    }

    // If 'state' is at the front of the queue and written, pop it and
    // return true. Other return false.
    bool PopCompletedResponse(InferHandlerStateType* state)
    {
      std::lock_guard<std::recursive_mutex> lock(mu_);
      if (states_.empty()) {
        return false;
      }

      InferHandlerStateType* front = states_.front();
      if ((front == state) && (state->step_ == Steps::WRITTEN)) {
        states_.pop();
        return true;
      }

      return false;
    }

    // Return true if this context has completed all reads and writes.
    bool IsRequestsCompleted()
    {
      std::lock_guard<std::recursive_mutex> lock(mu_);
      return (
          (step_ == Steps::WRITEREADY) && states_.empty() &&
          (ongoing_requests_ == 0));
    }

    // The grpc completion queue associated with the RPC.
    ::grpc::ServerCompletionQueue* cq_;

    // Unique ID for the context. Used only for debugging so will
    // always be 0 in non-debug builds.
    const uint64_t unique_id_;

    // Context for the rpc, allowing to tweak aspects of it such as
    // the use of compression, authentication, as well as to send
    // metadata back to the client.
    std::unique_ptr<::grpc::ServerContext> ctx_;
    std::unique_ptr<ServerResponderType> responder_;

    // The states associated with this context that are currently
    // active. Used by stream handlers to maintain request / response
    // orders. A state enters this queue when it has successfully read
    // a request and exits the queue when it is written.
    std::recursive_mutex mu_;
    std::queue<InferHandlerStateType*> states_;
    std::atomic<uint32_t> ongoing_requests_;

    // Tracks the inflight requests sent to Triton core via this
    // context. We will use this structure to issue cancellations
    // on these requests.
    std::set<InferHandlerStateType*> inflight_states_;

    // Tracks all the states that have been created on this context.
    std::set<InferHandlerStateType*> all_states_;

    // Ready to write queue for decoupled
    std::queue<InferHandlerStateType*> ready_to_write_states_;

    // The step of the entire context.
    Steps step_;

    // True if this context should finish with OK status, false if
    // should finish with CANCELLED status.
    bool finish_ok_;

    // True if there is an ongoing write to the grpc stream
    std::atomic<bool> ongoing_write_;

    // The state object that is sent to grpc async notification
    // for tracking the gRPC stream.
    std::unique_ptr<InferHandlerState> notify_state_;

    // Tracks whether the async notification has been delivered by
    // completion queue.
    bool received_notification_;

    std::unique_ptr<gRPCErrorTracker> gRPCErrorTracker_;
  };

  // This constructor is used to build a wrapper state object
  // pointing to the actual state object. The wrapper state
  // object is used to distinguish a tag from AsyncNotifyWhenDone()
  // signal.
  explicit InferHandlerState(Steps start_step, InferHandlerState* state)
      : step_(start_step), state_ptr_(state), async_notify_state_(false)
  {
    state->MarkAsAsyncNotifyState();
  }

  explicit InferHandlerState(
      TRITONSERVER_Server* tritonserver, const size_t max_response_queue_size,
      const std::shared_ptr<Context>& context, Steps start_step = Steps::START)
      : tritonserver_(tritonserver), async_notify_state_(false)
  {
    // For debugging and testing
    delay_response_ms_ = ParseDebugVariable("TRITONSERVER_DELAY_GRPC_RESPONSE");
    delay_complete_ms_ = ParseDebugVariable("TRITONSERVER_DELAY_GRPC_COMPLETE");
    delay_process_ms_ = ParseDebugVariable("TRITONSERVER_DELAY_GRPC_PROCESS");
    delay_process_entry_ms_ =
        ParseDebugVariable("TRITONSERVER_DELAY_GRPC_PROCESS_ENTRY");
    delay_notification_process_entry_ms_ =
        ParseDebugVariable("TRITONSERVER_DELAY_GRPC_NOTIFICATION");
    delay_response_complete_exec_ms_ =
        ParseDebugVariable("TRITONSERVER_DELAY_RESPONSE_COMPLETE_EXEC");
    delay_enqueue_ms_ = ParseDebugVariable("TRITONSERVER_DELAY_GRPC_ENQUEUE");
    delay_response_completion_ms_ =
        ParseDebugVariable("TRITONSERVER_DELAY_RESPONSE_COMPLETION");

    response_queue_.reset(
        new ResponseQueue<ResponseType>(max_response_queue_size));
    Reset(context, start_step);
  }

  ~InferHandlerState() { ClearTraceTimestamps(); }

  int ParseDebugVariable(const char* env_str)
  {
    const char* str = getenv(env_str);
    int val = 0;
    if (str != nullptr) {
      try {
        val = std::stoi(str);
      }
      catch (const std::invalid_argument& e) {
        LOG_ERROR << "Unable to parse the debug variable " << env_str
                  << ". Value provided: '" << str
                  << "' is not a valid integer. Error: " << e.what();
      }
      catch (const std::out_of_range& e) {
        LOG_ERROR << "Unable to parse the debug variable " << env_str
                  << ". Value provided: '" << str
                  << "' is out of range for an integer. Error: " << e.what();
      }
      catch (const std::exception& e) {
        LOG_ERROR
            << "An unexpected error occurred while parsing the debug variable "
            << env_str << " with value '" << str << "'. Error: " << e.what();
      }
    }
    return val;
  }

  bool IsGrpcContextCancelled() { return context_->IsCancelled(); }

  void Reset(
      const std::shared_ptr<Context>& context, Steps start_step = Steps::START)
  {
    unique_id_ = NEXT_UNIQUE_ID;
    context_ = context;
    step_ = start_step;
    status_ = ::grpc::Status{};
    cb_count_ = 0;
    is_decoupled_ = false;
    complete_ = false;
    parameters_ = {};
    request_.Clear();
    response_queue_->Reset();
    // Clear trace_timestamps_ here so they do not grow indefinitely since
    // states are re-used for performance.
    ClearTraceTimestamps();
    // The pointer should be nullptr for all state objects instead of
    // wrapper state object in WAITING_NOTIFICATION step.
    state_ptr_ = nullptr;
    async_notify_state_ = false;
  }

  void Release()
  {
    context_ = nullptr;
    inference_request_.reset();
    ClearTraceTimestamps();
  }

  void ClearTraceTimestamps()
  {
#ifdef TRITON_ENABLE_TRACING
    if (trace_ != nullptr) {
      for (const auto& timestamp : trace_timestamps_) {
        trace_->CaptureTimestamp(timestamp.first, timestamp.second);
      }
      trace_.reset();
    }
    trace_timestamps_.clear();
#endif  // TRITON_ENABLE_TRACING
  }

  // Returns whether all the responses from the state
  // are delivered and successfully written on the
  // stream.
  bool IsComplete() { return (complete_ && response_queue_->IsEmpty()); }

  void MarkAsAsyncNotifyState() { async_notify_state_ = true; }
  bool IsAsyncNotifyState() { return async_notify_state_; }
  // Needed in the response handle for classification outputs.
  TRITONSERVER_Server* tritonserver_;

  // Unique ID for the state. Used only for debugging so will
  // always be 0 in non-debug builds.
  uint64_t unique_id_;

  std::shared_ptr<Context> context_;
  Steps step_;
  std::recursive_mutex step_mtx_;

  // Shared pointer to the inference request object. The lifetime of
  // inference request object is extended till all the responses from
  // the request are processed and the request is released.
  std::shared_ptr<TRITONSERVER_InferenceRequest> inference_request_;

#ifdef TRITON_ENABLE_TRACING
  std::shared_ptr<TraceManager::Trace> trace_;
  // Additional timestamps that are captured before a trace stream is acquired
  std::deque<std::pair<std::string, uint64_t>> trace_timestamps_;
#endif  // TRITON_ENABLE_TRACING

  bool is_decoupled_ = false;
  StateParameters parameters_;

  ::grpc::Status status_;
  std::atomic<uint32_t> cb_count_;
  bool complete_;

  RequestType request_;
  std::shared_ptr<ResponseQueue<ResponseType>> response_queue_;

  ::grpc::Alarm alarm_;

  // For testing and debugging
  int delay_response_ms_;
  int delay_complete_ms_;
  int delay_process_ms_;
  int delay_process_entry_ms_;
  int delay_notification_process_entry_ms_;
  int delay_response_complete_exec_ms_;
  int delay_enqueue_ms_;
  int delay_response_completion_ms_;

  // For inference requests the allocator payload, unused for other
  // requests.
  AllocPayload<ResponseType> alloc_payload_;

  // The below pointer is only set when using this state object as a
  // wrapper over actual state when being sent to completion queue
  // using AsyncNotifyWhenDone function. Otherwise it is nullptr.
  InferHandlerState* state_ptr_;

  // Tracks whether this state object has been wrapped and send to
  // AsyncNotifyWhenDone() function as a tag.
  bool async_notify_state_;
};


//
// InferHandler
//
template <
    typename ServiceType, typename ServerResponderType, typename RequestType,
    typename ResponseType>
class InferHandler : public HandlerBase {
 public:
  InferHandler(
      const std::string& name,
      const std::shared_ptr<TRITONSERVER_Server>& tritonserver,
      ServiceType* service, ::grpc::ServerCompletionQueue* cq,
      size_t max_state_bucket_count, size_t max_response_queue_size,
      std::pair<std::string, std::string> restricted_kv,
      const std::string& header_forward_pattern, std::shared_mutex* conn_mtx,
      std::atomic<uint32_t>* conn_cnt, bool* accepting_new_conn);
  virtual ~InferHandler();

  // Descriptive name of of the handler.
  const std::string& Name() const { return name_; }

  // Start handling requests.
  void Start() override;

  // Stop handling requests.
  void Stop() override;

 protected:
  void IncrementConnectionCount() { conn_cnt_->fetch_add(1); }
  void DecrementConnectionCount() { conn_cnt_->fetch_sub(1); }

  using State =
      InferHandlerState<ServerResponderType, RequestType, ResponseType>;
  using StateContext = typename State::Context;

  State* StateNew(
      TRITONSERVER_Server* tritonserver,
      const std::shared_ptr<StateContext>& context,
      Steps start_step = Steps::START)
  {
    IncrementConnectionCount();

    State* state = nullptr;

    if (max_state_bucket_count_ > 0) {
      std::lock_guard<std::mutex> lock(alloc_mu_);

      if (!state_bucket_.empty()) {
        state = state_bucket_.back();
        state->Reset(context, start_step);
        state_bucket_.pop_back();
      }
    }

    if (state == nullptr) {
      state = new State(
          tritonserver, max_response_queue_size_, context, start_step);
    }

    if (start_step == Steps::START) {
      // Need to be called to receive an asynchronous notification
      // when the transaction is cancelled.
      context->GrpcContextAsyncNotifyWhenDone(state);
    }
    context->InsertState(state);

    LOG_VERBOSE(2) << "StateNew, " << state->unique_id_ << " Step "
                   << state->step_;

    return state;
  }

  void StateRelease(State* state)
  {
    LOG_VERBOSE(2) << "StateRelease, " << state->unique_id_ << " Step "
                   << state->step_;
    if (max_state_bucket_count_ > 0) {
      std::lock_guard<std::mutex> lock(alloc_mu_);

      if (state_bucket_.size() < max_state_bucket_count_) {
        state->Release();
        state_bucket_.push_back(state);
        DecrementConnectionCount();
        return;
      }
    }

    delete state;
    DecrementConnectionCount();
  }

  // Simple structure that carries the payload needed for
  // response release callback.
  struct ResponseReleasePayload final {
    State* state_;
    std::vector<std::shared_ptr<const SharedMemoryManager::SharedMemoryInfo>>
        shm_regions_info_;
    std::shared_ptr<SharedMemoryManager> shm_manager_;

    ResponseReleasePayload(
        State* state,
        std::vector<
            std::shared_ptr<const SharedMemoryManager::SharedMemoryInfo>>&&
            shm_regions_info,
        const std::shared_ptr<SharedMemoryManager>& shm_manager)
        : state_(state), shm_regions_info_(std::move(shm_regions_info)),
          shm_manager_(shm_manager)
    {
    }

    ~ResponseReleasePayload()
    {
      // Unregister shm regions that are waiting for the completion of an
      // inference.
      while (!shm_regions_info_.empty()) {
        auto shm_name = shm_regions_info_.back()->name_;
        auto shm_memory_type = shm_regions_info_.back()->kind_;
        auto awaiting_unregister =
            shm_regions_info_.back()->awaiting_unregister_;

        // Delete shared_ptr to decrement reference count
        shm_regions_info_.pop_back();

        if (awaiting_unregister) {
          if (shm_manager_ != nullptr) {
            auto err = shm_manager_->Unregister(shm_name, shm_memory_type);
            if (err != nullptr) {
              LOG_VERBOSE(1) << TRITONSERVER_ErrorMessage(err);
            }
          } else {
            LOG_VERBOSE(1) << "Shared memory manager is not available";
          }
        }
      }
    }
  };

  virtual void StartNewRequest() = 0;
  virtual bool Process(State* state, bool rpc_ok, bool is_notification) = 0;
  bool ExecutePrecondition(InferHandler::State* state);

  TRITONSERVER_Error* ForwardHeadersAsParameters(
      TRITONSERVER_InferenceRequest* irequest, InferHandler::State* state);

  const std::string name_;
  std::shared_ptr<TRITONSERVER_Server> tritonserver_;

  ServiceType* service_;
  ::grpc::ServerCompletionQueue* cq_;
  std::unique_ptr<std::thread> thread_;

  // Mutex to serialize State allocation
  std::mutex alloc_mu_;

  // Keep some number of state objects for reuse to avoid the overhead
  // of creating a state for every new request.
  const size_t max_state_bucket_count_;
  std::vector<State*> state_bucket_;

  const size_t max_response_queue_size_;
  std::pair<std::string, std::string> restricted_kv_;
  std::string header_forward_pattern_;
  re2::RE2 header_forward_regex_;

  std::shared_mutex* conn_mtx_;
  std::atomic<uint32_t>* conn_cnt_;
  bool* accepting_new_conn_;
};

template <
    typename ServiceType, typename ServerResponderType, typename RequestType,
    typename ResponseType>
InferHandler<ServiceType, ServerResponderType, RequestType, ResponseType>::
    InferHandler(
        const std::string& name,
        const std::shared_ptr<TRITONSERVER_Server>& tritonserver,
        ServiceType* service, ::grpc::ServerCompletionQueue* cq,
        size_t max_state_bucket_count, size_t max_response_queue_size,
        std::pair<std::string, std::string> restricted_kv,
        const std::string& header_forward_pattern, std::shared_mutex* conn_mtx,
        std::atomic<uint32_t>* conn_cnt, bool* accepting_new_conn)
    : name_(name), tritonserver_(tritonserver), service_(service), cq_(cq),
      max_state_bucket_count_(max_state_bucket_count),
      max_response_queue_size_(max_response_queue_size),
      restricted_kv_(restricted_kv),
      header_forward_pattern_(header_forward_pattern),
      header_forward_regex_(header_forward_pattern_), conn_mtx_(conn_mtx),
      conn_cnt_(conn_cnt), accepting_new_conn_(accepting_new_conn)
{
}

template <
    typename ServiceType, typename ServerResponderType, typename RequestType,
    typename ResponseType>
InferHandler<ServiceType, ServerResponderType, RequestType, ResponseType>::
    ~InferHandler()
{
  for (State* state : state_bucket_) {
    delete state;
  }
  state_bucket_.clear();

  LOG_VERBOSE(1) << "Destructed " << Name();
}

template <
    typename ServiceType, typename ServerResponderType, typename RequestType,
    typename ResponseType>
void
InferHandler<
    ServiceType, ServerResponderType, RequestType, ResponseType>::Start()
{
  // Use a barrier to make sure we don't return until thread has
  // started.
  auto barrier = std::make_shared<Barrier>(2);

  thread_.reset(new std::thread([this, barrier] {
    StartNewRequest();
    barrier->Wait();

    void* tag;
    bool ok;

    while (cq_->Next(&tag, &ok)) {
      State* state = static_cast<State*>(tag);
      bool is_notification = false;
      if (state->step_ == Steps::WAITING_NOTIFICATION) {
        State* state_wrapper = state;
        state = state_wrapper->state_ptr_;
        is_notification = true;
        LOG_VERBOSE(1) << "Received notification for " << Name() << ", "
                       << state->unique_id_;

        if (state->delay_notification_process_entry_ms_ != 0) {
          // Will delay the entry to Process by the specified time.
          // This can be used to test the flow when
          // 1. cancellation request issued for the request, which invokes
          // InferResponseComplete callback right before Process.
          // 2. cancellation request issued for the request during
          // InferResponseComplete callback right before Process in the
          // notification thread.
          LOG_INFO
              << "Delaying the entry to Process for notification thread by "
              << state->delay_notification_process_entry_ms_ << " ms...";
          std::this_thread::sleep_for(std::chrono::milliseconds(
              state->delay_notification_process_entry_ms_));
        }
      } else {
        if (state->delay_process_entry_ms_ != 0) {
          // Will delay the entry to Process by the specified time.
          LOG_INFO << "Delaying the entry to Process thread by "
                   << state->delay_process_entry_ms_ << " ms...";
          std::this_thread::sleep_for(
              std::chrono::milliseconds(state->delay_process_entry_ms_));
        }
      }

      LOG_VERBOSE(2) << "Grpc::CQ::Next() "
                     << state->context_->DebugString(state);
      if (!Process(state, ok, is_notification)) {
        LOG_VERBOSE(1) << "Done for " << Name() << ", " << state->unique_id_;
        state->context_->EraseState(state);
        StateRelease(state);
      } else {
        // In non-streaming infer mode which has multiple request handlers,
        // there is no guarantee state->context_ is valid beyond this line.
        LOG_VERBOSE(2) << "Returning from " << Name() << ", "
                       << state->unique_id_ << ", " << state->step_;
      }
    }
  }));

  barrier->Wait();
  LOG_VERBOSE(1) << "Thread started for " << Name();
}

template <
    typename ServiceType, typename ServerResponderType, typename RequestType,
    typename ResponseType>
void
InferHandler<
    ServiceType, ServerResponderType, RequestType, ResponseType>::Stop()
{
  if (thread_->joinable()) {
    thread_->join();
  }

  LOG_VERBOSE(1) << "Thread exited for " << Name();
}

template <
    typename ServiceType, typename ServerResponderType, typename RequestType,
    typename ResponseType>
bool
InferHandler<ServiceType, ServerResponderType, RequestType, ResponseType>::
    ExecutePrecondition(InferHandler::State* state)
{
  if (!restricted_kv_.first.empty()) {
    const auto& metadata = state->context_->ctx_->client_metadata();
    const auto it = metadata.find(restricted_kv_.first);
    return (it != metadata.end()) && (it->second == restricted_kv_.second);
  }
  return true;
}

template <
    typename ServiceType, typename ServerResponderType, typename RequestType,
    typename ResponseType>
TRITONSERVER_Error*
InferHandler<ServiceType, ServerResponderType, RequestType, ResponseType>::
    ForwardHeadersAsParameters(
        TRITONSERVER_InferenceRequest* irequest, InferHandler::State* state)
{
  TRITONSERVER_Error* err = nullptr;
  if (!header_forward_pattern_.empty()) {
    const auto& metadata = state->context_->ctx_->client_metadata();
    for (const auto& pair : metadata) {
      auto& key = pair.first;
      auto& value = pair.second;
      std::string param_key = std::string(key.begin(), key.end());
      if (RE2::PartialMatch(param_key, header_forward_regex_)) {
        std::string param_value = std::string(value.begin(), value.end());
        err = TRITONSERVER_InferenceRequestSetStringParameter(
            irequest, param_key.c_str(), param_value.c_str());
        if (err != nullptr) {
          break;
        }
      }
    }
  }

  return err;
}

//
// ModelInferHandler
//
class ModelInferHandler
    : public InferHandler<
          inference::GRPCInferenceService::AsyncService,
          ::grpc::ServerAsyncResponseWriter<inference::ModelInferResponse>,
          inference::ModelInferRequest, inference::ModelInferResponse> {
 public:
  ModelInferHandler(
      const std::string& name,
      const std::shared_ptr<TRITONSERVER_Server>& tritonserver,
      TraceManager* trace_manager,
      const std::shared_ptr<SharedMemoryManager>& shm_manager,
      inference::GRPCInferenceService::AsyncService* service,
      ::grpc::ServerCompletionQueue* cq, size_t max_state_bucket_count,
      size_t max_response_queue_size, grpc_compression_level compression_level,
      std::pair<std::string, std::string> restricted_kv,
      const std::string& forward_header_pattern, std::shared_mutex* conn_mtx,
      std::atomic<uint32_t>* conn_cnt, bool* accepting_new_conn)
      : InferHandler(
            name, tritonserver, service, cq, max_state_bucket_count,
            max_response_queue_size, restricted_kv, forward_header_pattern,
            conn_mtx, conn_cnt, accepting_new_conn),
        trace_manager_(trace_manager), shm_manager_(shm_manager),
        compression_level_(compression_level)
  {
    // Create the allocator that will be used to allocate buffers for
    // the result tensors.
    FAIL_IF_ERR(
        TRITONSERVER_ResponseAllocatorNew(
            &allocator_, InferResponseAlloc, InferResponseFree,
            InferResponseStart),
        "creating inference response allocator");
    FAIL_IF_ERR(
        TRITONSERVER_ResponseAllocatorSetQueryFunction(
            allocator_, OutputBufferQuery),
        "setting allocator's query function");
    FAIL_IF_ERR(
        TRITONSERVER_ResponseAllocatorSetBufferAttributesFunction(
            allocator_, OutputBufferAttributes),
        "setting allocator's output buffer attributes function");
  }

  ~ModelInferHandler()
  {
    LOG_TRITONSERVER_ERROR(
        TRITONSERVER_ResponseAllocatorDelete(allocator_),
        "deleting response allocator");
  }

 protected:
  void StartNewRequest() override;
  bool Process(State* state, bool rpc_ok, bool is_notification) override;

 private:
  void Execute(State* state);
  static void InferResponseComplete(
      TRITONSERVER_InferenceResponse* response, const uint32_t flags,
      void* userp);

  TraceManager* trace_manager_;
  std::shared_ptr<SharedMemoryManager> shm_manager_;
  TRITONSERVER_ResponseAllocator* allocator_;

  grpc_compression_level compression_level_;
};

#if !defined(_WIN32) && defined(TRITON_ENABLE_TRACING)
class GrpcServerCarrier : public otel_cntxt::propagation::TextMapCarrier {
 public:
  GrpcServerCarrier(::grpc::ServerContext* context) : context_(context) {}
  GrpcServerCarrier() = default;
  virtual opentelemetry::nostd::string_view Get(
      opentelemetry::nostd::string_view key) const noexcept override
  {
    auto it = context_->client_metadata().find({key.data(), key.size()});
    if (it != context_->client_metadata().end()) {
      return it->second.data();
    }
    return "";
  }

  // Not required on server side
  virtual void Set(
      opentelemetry::nostd::string_view key,
      opentelemetry::nostd::string_view value) noexcept override
  {
    return;
  }

  ::grpc::ServerContext* context_;
};
#else
using GrpcServerCarrier = void*;
#endif  // TRITON_ENABLE_TRACING

}}}  // namespace triton::server::grpc


================================================
FILE: src/grpc/stream_infer_handler.cc
================================================
// Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "stream_infer_handler.h"

#include <regex>

namespace triton { namespace server { namespace grpc {

// Make sure to keep InferResponseAlloc and OutputBufferQuery logic in sync
TRITONSERVER_Error*
StreamInferResponseAlloc(
    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
    int64_t preferred_memory_type_id, void* userp, void** buffer,
    void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type,
    int64_t* actual_memory_type_id)
{
  AllocPayload<inference::ModelStreamInferResponse>* payload =
      reinterpret_cast<AllocPayload<inference::ModelStreamInferResponse>*>(
          userp);

  auto response = payload->response_queue_->GetLastAllocatedResponse();

  if (response == nullptr) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INTERNAL,
        "Unable to access the last allocated response");
  }

  return ResponseAllocatorHelper(
      allocator, tensor_name, byte_size, preferred_memory_type,
      preferred_memory_type_id, response->mutable_infer_response(),
      payload->shm_map_, buffer, buffer_userp, actual_memory_type,
      actual_memory_type_id);
}

TRITONSERVER_Error*
StreamInferResponseStart(TRITONSERVER_ResponseAllocator* allocator, void* userp)
{
  AllocPayload<inference::ModelStreamInferResponse>* payload =
      reinterpret_cast<AllocPayload<inference::ModelStreamInferResponse>*>(
          userp);

  // Move to the next response object
  payload->response_queue_->AllocateResponse();

  return nullptr;  // success
}

// Make sure to keep InferResponseAlloc and OutputBufferQuery logic in sync
TRITONSERVER_Error*
StreamOutputBufferQuery(
    TRITONSERVER_ResponseAllocator* allocator, void* userp,
    const char* tensor_name, size_t* byte_size,
    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id)
{
  AllocPayload<inference::ModelStreamInferResponse>* payload =
      reinterpret_cast<AllocPayload<inference::ModelStreamInferResponse>*>(
          userp);
  return OutputBufferQueryHelper(
      allocator, tensor_name, byte_size, payload->shm_map_, memory_type,
      memory_type_id);
}

// Make sure to keep InferResponseAlloc, OutputBufferQuery, and
// OutputBufferAttributes logic in sync
TRITONSERVER_Error*
StreamOutputBufferAttributes(
    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
    TRITONSERVER_BufferAttributes* buffer_attributes, void* userp,
    void* buffer_userp)
{
  AllocPayload<inference::ModelStreamInferResponse>* payload =
      reinterpret_cast<AllocPayload<inference::ModelStreamInferResponse>*>(
          userp);

  return OutputBufferAttributesHelper(
      allocator, tensor_name, payload->shm_map_, buffer_attributes);
}

//=============================================================================
//  The following section contains the handling mechanism for ModelStreamInfer
//  RPC. This implementation is tuned towards performance and reducing latency.
//=============================================================================

void
ModelStreamInferHandler::StartNewRequest()
{
  auto context = std::make_shared<State::Context>(cq_, NEXT_UNIQUE_ID);
  context->SetCompressionLevel(compression_level_);
  State* state = StateNew(tritonserver_.get(), context);

#ifdef TRITON_ENABLE_TRACING
  // Can't create trace as we don't know the model to be requested,
  // track timestamps in 'state'
  state->trace_timestamps_.emplace_back(
      std::make_pair("GRPC_WAITREAD_START", TraceManager::CaptureTimestamp()));
#endif  // TRITON_ENABLE_TRACING

  service_->RequestModelStreamInfer(
      state->context_->ctx_.get(), state->context_->responder_.get(), cq_, cq_,
      state);

  LOG_VERBOSE(1) << "New request handler for " << Name() << ", "
                 << state->unique_id_;
}

bool
ModelStreamInferHandler::Process(
    InferHandler::State* state, bool rpc_ok, bool is_notification)
{
  if (is_notification) {
    state->context_->SetReceivedNotification(true);
  }
  // Because gRPC doesn't allow concurrent writes on the
  // the stream we only have a single handler thread that
  // reads from the completion queue. Hence, cancellation
  // notification will be received on the same handler
  // thread.
  // This means that we only need to take care of
  // synchronizing this thread and the ResponseComplete
  // threads.
  if (state->context_->ReceivedNotification()) {
    std::lock_guard<std::recursive_mutex> lock(state->step_mtx_);
    if (state->IsGrpcContextCancelled()) {
      if (is_notification) {
        // This is the cancellation notification
        LOG_VERBOSE(1) << "Cancellation notification received for " << Name()
                       << ", rpc_ok=" << rpc_ok << ", context "
                       << state->context_->unique_id_ << " step "
                       << state->context_->step_ << ", state "
                       << state->unique_id_ << " step " << state->step_;
      }

      bool resume = state->context_->HandleCancellation(state, rpc_ok, Name());
      return resume;
    } else {
      if (state->context_->HandleCompletion()) {
        return true;
      }
    }
  }

  LOG_VERBOSE(1) << "Process for " << Name() << ", rpc_ok=" << rpc_ok
                 << ", context " << state->context_->unique_id_ << ", "
                 << state->unique_id_ << " step " << state->step_;

  // We need an explicit finish indicator. Can't use 'state->step_'
  // because we launch an async thread that could update 'state's
  // step_ to be FINISH before this thread exits this function.
  bool finished = false;

  if (state->step_ == Steps::START) {
    // A new stream connection... If RPC failed on a new request then
    // the server is shutting down and so we should do nothing.
    if (!rpc_ok) {
      state->step_ = Steps::FINISH;
      return false;
    }

    // Start a new request to replace this one...
    StartNewRequest();

    if (ExecutePrecondition(state)) {
      // Since this is the start of a connection, 'state' hasn't been
      // used yet so use it to read a request off the connection.
      state->context_->step_ = Steps::READ;
      state->step_ = Steps::READ;
      state->context_->responder_->Read(&state->request_, state);
    } else {
      // Precondition is not satisfied, cancel the stream
      state->context_->step_ = Steps::COMPLETE;
      state->step_ = Steps::PARTIAL_COMPLETION;
      ::grpc::Status status = ::grpc::Status(
          ::grpc::StatusCode::UNAVAILABLE,
          std::string("This protocol is restricted, expecting header '") +
              restricted_kv_.first + "'");
      state->context_->responder_->Finish(status, state);
      return !finished;
    }
    state->context_->ExtractStateFromHeaders(state);
  } else if (state->step_ == Steps::READ) {
    TRITONSERVER_Error* err = nullptr;
    const inference::ModelInferRequest& request = state->request_;
#ifdef TRITON_ENABLE_TRACING
    state->trace_timestamps_.emplace_back(
        std::make_pair("GRPC_WAITREAD_END", TraceManager::CaptureTimestamp()));
#endif  // TRITON_ENABLE_TRACING

    // If done reading and no in-flight requests then can finish the
    // entire stream. Otherwise just finish this state.
    if (!rpc_ok) {
      state->context_->step_ = Steps::WRITEREADY;
      if (state->context_->IsRequestsCompleted()) {
        state->context_->step_ = Steps::COMPLETE;
        state->step_ = Steps::PARTIAL_COMPLETION;
        LOG_VERBOSE(2) << "Finishing responder from state "
                       << state->unique_id_;
        state->context_->responder_->Finish(
            state->context_->finish_ok_ ? ::grpc::Status::OK
                                        : ::grpc::Status::CANCELLED,
            state);
      } else {
        state->step_ = Steps::FINISH;
        finished = true;
      }

      return !finished;
    }

    std::shared_lock<std::shared_mutex> lk1(*conn_mtx_);

    if (!*accepting_new_conn_) {
      err = TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_UNAVAILABLE,
          "GRPC server is shutting down and has stopped accepting new "
          "requests.");
    }

    int64_t requested_model_version;
    if (err == nullptr) {
      err = GetModelVersionFromString(
          request.model_version(), &requested_model_version);
    }

    // Record the transaction policy of the model into the current state
    // object.
    if (err == nullptr) {
      uint32_t txn_flags;
      err = TRITONSERVER_ServerModelTransactionProperties(
          tritonserver_.get(), request.model_name().c_str(),
          requested_model_version, &txn_flags, nullptr /* voidp */);
      if (err == nullptr) {
        state->is_decoupled_ = ((txn_flags & TRITONSERVER_TXN_DECOUPLED) != 0);
      }
    }

    // Request has been successfully read, increment the context request
    // counter.
    state->context_->IncrementRequestCounter();

    // If the request is not for a model with decoupled transaction policy
    // then put it in the context queue so that its response is sent in
    // the same order as the request was received.
    if (!state->is_decoupled_) {
      state->context_->EnqueueForResponse(state);
    }

    // Need to get context here as it is needed below. 'state' can
    // complete inference, write response, and finish (which releases
    // context) before we make any forward progress.... so need to
    // hold onto context here while we know it is good.
    std::shared_ptr<StateContext> context = state->context_;

    // Issue the inference request into server...
    auto response_queue_ = state->response_queue_;

    // Create the inference request which contains all the
    // input information needed for an inference.
    TRITONSERVER_InferenceRequest* irequest = nullptr;
    if (err == nullptr) {
      err = TRITONSERVER_InferenceRequestNew(
          &irequest, tritonserver_.get(), request.model_name().c_str(),
          requested_model_version);
    }

    if (err == nullptr) {
      state->inference_request_ = {
          irequest, [](TRITONSERVER_InferenceRequest* request) {
            LOG_TRITONSERVER_ERROR(
                TRITONSERVER_InferenceRequestDelete(request),
                "deleting gRPC inference request");
          }};
      err = SetInferenceRequestMetadata(irequest, request, state->parameters_);
    }

    if (err == nullptr) {
      err = ForwardHeadersAsParameters(irequest, state);
    }

    // Will be used to hold the serialized data in case explicit string
    // tensors are present in the request.
    std::list<std::string> serialized_data;

    // Maintain shared pointers(read-only reference) to the shared memory
    // block's information for the shared memory regions used by the request.
    // These pointers will automatically increase the usage count, preventing
    // unregistration of the shared memory. This vector must be cleared in the
    // `StreamInferResponseComplete` callback (after inference) to decrease the
    // count and permit unregistration. The vector will be included in
    // `response_release_payload` for the callback.
    std::vector<std::shared_ptr<const SharedMemoryManager::SharedMemoryInfo>>
        shm_regions_info;

    if (err == nullptr) {
      err = InferGRPCToInput(
          tritonserver_, shm_manager_, request, &serialized_data, irequest,
          &shm_regions_info);
    }
    if (err == nullptr) {
      err = InferAllocatorPayload<inference::ModelStreamInferResponse>(
          tritonserver_, shm_manager_, request, std::move(serialized_data),
          response_queue_, &state->alloc_payload_, &shm_regions_info);
    }

    auto request_release_payload =
        std::make_unique<RequestReleasePayload>(state->inference_request_);
    auto response_release_payload = std::make_unique<ResponseReleasePayload>(
        state, std::move(shm_regions_info), shm_manager_);

    if (err == nullptr) {
      err = TRITONSERVER_InferenceRequestSetReleaseCallback(
          irequest, InferRequestComplete,
          request_release_payload.get() /* request_release_userp */);
    }
    if (err == nullptr) {
      err = TRITONSERVER_InferenceRequestSetResponseCallback(
          irequest, allocator_,
          &state->alloc_payload_ /* response_allocator_userp */,
          StreamInferResponseComplete,
          response_release_payload.get() /* response_userp */);
    }

    if (err == nullptr) {
      TRITONSERVER_InferenceTrace* triton_trace = nullptr;
#ifdef TRITON_ENABLE_TRACING
      if (trace_manager_ != nullptr) {
        GrpcServerCarrier carrier(state->context_->ctx_.get());
        auto start_options =
            trace_manager_->GetTraceStartOptions(carrier, request.model_name());
        state->trace_ = std::move(trace_manager_->SampleTrace(start_options));
        if (state->trace_ != nullptr) {
          triton_trace = state->trace_->trace_;
        }
      }
#endif  // TRITON_ENABLE_TRACING

      state->step_ = ISSUED;
      err = TRITONSERVER_ServerInferAsync(
          tritonserver_.get(), irequest, triton_trace);
    }

    // If there was not an error in issuing the 'state' request then
    // state->step_ == ISSUED and inference request has
    // initiated... the completion callback will transition to
    // WRITEREADY or WRITTEN or CANCELLED. Recording the state and the
    // irequest to handle gRPC stream cancellation.
    if (err == nullptr) {
      state->context_->InsertInflightState(state);
      // The payload will be cleaned in release callback.
      request_release_payload.release();
      response_release_payload.release();
    } else {
      // If there was an error then enqueue the error response and show
      // it to be ready for writing.
      inference::ModelStreamInferResponse* response;
      if (state->is_decoupled_) {
        state->response_queue_->AllocateResponse();
        response = state->response_queue_->GetLastAllocatedResponse();
      } else {
        response = state->response_queue_->GetNonDecoupledResponse();
      }

      // Get request ID for logging in case of error.
      std::string log_request_id = request.id();
      if (log_request_id.empty()) {
        log_request_id = "<id_unknown>";
      }
      LOG_VERBOSE(1) << "[request id: " << log_request_id << "] "
                     << "Infer failed: " << TRITONSERVER_ErrorMessage(err);

      ::grpc::Status status;
      GrpcStatusUtil::Create(&status, err);
      TRITONSERVER_ErrorDelete(err);
      response->set_error_message(status.error_message());
      response->mutable_infer_response()->Clear();
      // repopulate the id so that client knows which request failed.
      response->mutable_infer_response()->set_id(request.id());
      if (!state->is_decoupled_) {
        state->step_ = Steps::WRITEREADY;
        state->context_->WriteResponseIfReady(state);
      } else {
        InferHandler::State* writing_state = nullptr;
        std::lock_guard<std::recursive_mutex> lk2(state->context_->mu_);
        {
          std::lock_guard<std::recursive_mutex> lk3(state->step_mtx_);
          state->response_queue_->MarkNextResponseComplete();
          state->context_->ready_to_write_states_.push(state);
          if (!state->context_->ongoing_write_) {
            // Only one write is allowed per gRPC stream / context at any time.
            // If the stream is not currently writing, start writing the next
            // ready to write response from the next ready to write state from
            // 'ready_to_write_states_'. If there are other responses on the
            // state ready to be written after starting the write, the state
            // will be placed at the back of the 'ready_to_write_states_'. If
            // there are no other response, the state will be marked as 'ISSUED'
            // if complete final flag is not received yet from the backend or
            // completed if complete final flag is received.
            // The 'ongoing_write_' will reset once the completion queue returns
            // a written state and no additional response on the stream is ready
            // to be written.
            state->context_->ongoing_write_ = true;
            writing_state = state->context_->ready_to_write_states_.front();
            state->context_->ready_to_write_states_.pop();
          }
          state->complete_ = true;
        }
        if (writing_state != nullptr) {
          StateWriteResponse(writing_state);
        }
      }
    }

    // Now that the inference request is in flight, create a copy of
    // 'state' and use it to attempt another read from the connection
    // (i.e the next request in the stream).
    State* next_read_state =
        StateNew(tritonserver_.get(), context, Steps::READ);

#ifdef TRITON_ENABLE_TRACING
    // Capture a timestamp for the time when we start waiting for this
    // next request to read.
    // Can't create trace as we don't know the model to be requested,
    // track timestamps in 'state'
    next_read_state->trace_timestamps_.emplace_back(std::make_pair(
        "GRPC_WAITREAD_START", TraceManager::CaptureTimestamp()));
#endif  // TRITON_ENABLE_TRACING

    next_read_state->context_->responder_->Read(
        &next_read_state->request_, next_read_state);
  } else if (state->step_ == Steps::PARTIAL_COMPLETION) {
    state->step_ = Steps::COMPLETE;
  } else if (state->step_ == Steps::COMPLETE) {
    state->step_ = Steps::FINISH;
  } else if (state->step_ == Steps::FINISH) {
    // The RPC execution is finished hence the state
    // can be released.
    finished = true;
  } else if (!state->is_decoupled_) {
    // We handle the WRITTEN and WRITEREADY states little
    // differently depending whether the inference request
    // is for a decoupled model or not. This is because the
    // grpc contract requires us to call Write() only once
    // on a task. Hence, for decoupled writes, we call only
    // one write and then wait for another notification from
    // the completion queue to execute pending Write()'s, if
    // any.

    //
    // Non-Decoupled state transitions
    //
    if (state->step_ == Steps::WRITTEN) {
      state->context_->ongoing_write_ = false;
#ifdef TRITON_ENABLE_TRACING
      state->trace_timestamps_.emplace_back(
          std::make_pair("GRPC_SEND_END", TraceManager::CaptureTimestamp()));
#endif  // TRITON_ENABLE_TRACING

      // If the write failed (for example, client closed the stream)
      // mark that the stream did not complete successfully but don't
      // cancel right away... need to wait for any pending reads,
      // inferences and writes to complete.
      if (!rpc_ok) {
        LOG_VERBOSE(1) << "Write for " << Name() << ", rpc_ok=" << rpc_ok
                       << ", context " << state->context_->unique_id_ << ", "
                       << state->unique_id_ << " step " << state->step_
                       << ", failed";
        state->context_->finish_ok_ = false;
      }

      // Log an error if 'state' is not the expected next response. Mark
      // that the stream did not complete successfully but don't cancel
      // right away... need to wait for any pending reads, inferences
      // and writes to complete.
      if (!state->context_->PopCompletedResponse(state)) {
        LOG_ERROR << "Unexpected response for " << Name()
                  << ", rpc_ok=" << rpc_ok << ", context "
                  << state->context_->unique_id_ << ", " << state->unique_id_
                  << " step " << state->step_;
        state->context_->finish_ok_ = false;
      }

      // Write the next response if it is ready...
      state->context_->WriteResponseIfReady(nullptr);

      // The response for the request has been written completely.
      // The counter can be safely decremented.
      state->context_->DecrementRequestCounter();
      finished = Finish(state);
    }
  } else {
    //
    //  Decoupled state transitions
    //
    if (state->step_ == Steps::WRITTEN) {
#ifdef TRITON_ENABLE_TRACING
      state->trace_timestamps_.emplace_back(
          std::make_pair("GRPC_SEND_END", TraceManager::CaptureTimestamp()));
#endif  // TRITON_ENABLE_TRACING

      // If the write failed (for example, client closed the stream)
      // mark that the stream did not complete successfully but don't
      // cancel right away... need to wait for any pending reads,
      // inferences and writes to complete.
      if (!rpc_ok) {
        LOG_VERBOSE(1) << "Write for " << Name() << ", rpc_ok=" << rpc_ok
                       << ", context " << state->context_->unique_id_ << ", "
                       << state->unique_id_ << " step " << state->step_
                       << ", failed";
        state->context_->finish_ok_ = false;
      }

      {
        InferHandler::State* writing_state = nullptr;
        std::lock_guard<std::recursive_mutex> lk2(state->context_->mu_);
        {
          std::lock_guard<std::recursive_mutex> lk3(state->step_mtx_);
          if (!state->context_->ready_to_write_states_.empty()) {
            writing_state = state->context_->ready_to_write_states_.front();
            state->context_->ready_to_write_states_.pop();
          } else {
            state->context_->ongoing_write_ = false;
          }
          // Finish the state if all the transactions associated with
          // the state have completed.
          if (state != writing_state) {
            if (state->IsComplete()) {
              state->context_->DecrementRequestCounter();
              finished = Finish(state);
            } else {
              state->step_ = Steps::ISSUED;
            }
          }
        }
        if (writing_state != nullptr) {
          StateWriteResponse(writing_state);
        }
      }
    } else if (state->step_ == Steps::WRITEREADY) {
      // Finish the state if all the transactions associated with
      // the state have completed.
      std::lock_guard<std::recursive_mutex> lk2(state->context_->mu_);
      {
        if (state->IsComplete()) {
          state->context_->DecrementRequestCounter();
          finished = Finish(state);
        } else {
          LOG_ERROR << "Should not print this! Decoupled should NOT write via "
                       "WRITEREADY!";
          // Remove the state from the completion queue
          std::lock_guard<std::recursive_mutex> lock(state->step_mtx_);
          state->step_ = Steps::ISSUED;
        }
      }
    }
  }

  return !finished;
}

// For decoupled only. Caller must ensure exclusive write.
void
ModelStreamInferHandler::StateWriteResponse(InferHandler::State* state)
{
  if (state->delay_response_ms_ != 0) {
    // Will delay the write of the response by the specified time.
    // This can be used to test the flow where there are other
    // responses available to be written.
    LOG_INFO << "Delaying the write of the response by "
             << state->delay_response_ms_ << " ms...";
    std::this_thread::sleep_for(
        std::chrono::milliseconds(state->delay_response_ms_));
  }
  {
    std::lock_guard<std::recursive_mutex> lock(state->step_mtx_);
    state->step_ = Steps::WRITTEN;
    // gRPC doesn't allow to issue another write till the notification from
    // previous write has been delivered.
    state->context_->DecoupledWriteResponse(state);
    if (state->response_queue_->HasReadyResponse()) {
      state->context_->ready_to_write_states_.push(state);
    }
  }
}

bool
ModelStreamInferHandler::Finish(InferHandler::State* state)
{
  // If done reading and no in-flight requests then can finish the
  // entire stream.
  if (state->context_->IsRequestsCompleted()) {
    state->context_->step_ = Steps::COMPLETE;
    state->step_ = Steps::PARTIAL_COMPLETION;
    LOG_VERBOSE(2) << "Finishing responder from state " << state->unique_id_;
    state->context_->responder_->Finish(
        state->context_->finish_ok_ ? ::grpc::Status::OK
                                    : ::grpc::Status::CANCELLED,
        state);
  } else if (state->IsAsyncNotifyState()) {
    // Should only mark the state complete as the state has been sent
    // to AsyncNotifyWhenDone() tag and the completion event should take
    // care of finally releasing the state object.
    state->step_ = Steps::COMPLETE;
  } else {
    // Can finish this state.
    state->step_ = Steps::FINISH;
    return true;
  }

  return false;
}

void
ModelStreamInferHandler::StreamInferResponseComplete(
    TRITONSERVER_InferenceResponse* iresponse, const uint32_t flags,
    void* userp)
{
  ResponseReleasePayload* response_release_payload(
      static_cast<ResponseReleasePayload*>(userp));
  auto state = response_release_payload->state_;

  // Ignore Response from CORE in case GRPC Strict as we dont care about
  if (state->context_->gRPCErrorTracker_->triton_grpc_error_) {
    std::lock_guard<std::recursive_mutex> lock(state->context_->mu_);
    if (state->context_->gRPCErrorTracker_->GRPCErrorEncountered()) {
      return;
    }
  }
  // Increment the callback index
  uint32_t response_index = state->cb_count_++;

  LOG_VERBOSE(1) << "ModelStreamInferHandler::StreamInferComplete, context "
                 << state->context_->unique_id_ << ", " << state->unique_id_
                 << " step " << state->step_ << ", callback index "
                 << state->cb_count_ << ", flags " << flags
                 << ", response is nullptr " << (iresponse == nullptr);

#ifdef TRITON_ENABLE_TRACING
  if (state->cb_count_ == 1) {
    state->trace_timestamps_.emplace_back(std::make_pair(
        "INFER_RESPONSE_COMPLETE", TraceManager::CaptureTimestamp()));
  }
#endif  // TRITON_ENABLE_TRACING

  bool is_complete =
      state->complete_ || (flags & TRITONSERVER_RESPONSE_COMPLETE_FINAL) != 0;

  // If receiving the final callback then erase the state from the inflight
  // state data structure to prevent cancellation being called on the request.
  // Also make sure that if this state was sent to gRPC async notification
  // mechanism then the state is not removed as it would be needed for handling
  // the cancellation if detected.
  if (is_complete && (!state->IsAsyncNotifyState())) {
    state->context_->EraseInflightState(state);
  }

  if (state->IsGrpcContextCancelled()) {
    std::lock_guard<std::recursive_mutex> lock(state->step_mtx_);
    // Clean-up the received response object.
    LOG_TRITONSERVER_ERROR(
        TRITONSERVER_InferenceResponseDelete(iresponse),
        "deleting GRPC inference response");

    LOG_VERBOSE(1) << "ModelStreamInferHandler::StreamInferResponseComplete, "
                   << state->unique_id_
                   << ", skipping response generation as grpc transaction was "
                      "cancelled... ";

    // If this was the final callback for the state
    // then cycle through the completion queue so
    // that state object can be released.
    if (is_complete) {
      state->step_ = Steps::CANCELLED;
      state->context_->PutTaskBackToQueue(state);
      delete response_release_payload;
    }

    state->complete_ = is_complete;
    return;
  }

  auto& response_queue = state->response_queue_;
  std::string log_request_id = state->request_.id();
  if (log_request_id.empty()) {
    log_request_id = "<id_unknown>";
  }

  inference::ModelStreamInferResponse* response = nullptr;
  bool failed = false;
  if (iresponse) {
    // Backend returned a non-null response
    TRITONSERVER_Error* err = nullptr;
    response = response_queue->GetResponseAt(response_index);
    if (response) {
      inference::ModelInferResponse& infer_response =
          *(response->mutable_infer_response());
      // Validate Triton iresponse and set grpc/protobuf response fields from it
      err = InferResponseCompleteCommon<inference::ModelStreamInferResponse>(
          state->tritonserver_, iresponse, infer_response,
          state->alloc_payload_);
    } else {
      LOG_ERROR << "expected the response allocator to have added the response";
    }
    if (err != nullptr) {
      failed = true;
      ::grpc::Status status;
      // Converts CORE errors to GRPC error codes
      GrpcStatusUtil::Create(&status, err);
      response->mutable_infer_response()->Clear();
      response->set_error_message(status.error_message());
      LOG_VERBOSE(1) << "Failed for ID: " << log_request_id << std::endl;
      if (state->context_->gRPCErrorTracker_->triton_grpc_error_) {
        state->status_ = status;
        // Finish only once, if backend ignores cancellation
        LOG_VERBOSE(1) << "GRPC streaming error detected with status: "
                       << status.error_code() << "Closing stream connection."
                       << std::endl;
        state->context_->WriteGRPCErrorResponse(state);
        TRITONSERVER_ErrorDelete(err);
        LOG_TRITONSERVER_ERROR(
            TRITONSERVER_InferenceResponseDelete(iresponse),
            "deleting GRPC inference response");
        delete response_release_payload;
        return;
      }
    }

    TRITONSERVER_ErrorDelete(err);
    LOG_TRITONSERVER_ERROR(
        TRITONSERVER_InferenceResponseDelete(iresponse),
        "deleting GRPC inference response");
  }

  // Decoupled backends can return a null response via
  // TRITONBACKEND_ResponseFactorySendFlags. By default, these null
  // "empty" responses are not sent back to the client. Clients can
  // opt-in to receiving these empty responses via request parameters.
  // NOTE: The complete flag is the only flag used for this case at this time.
  const bool empty_final = !iresponse && state->is_decoupled_ && is_complete;
  const bool enable_empty_final =
      state->parameters_.enable_empty_final_response_;

  const bool create_empty_response = (empty_final && enable_empty_final);
  if (create_empty_response) {
    // Assume decoupled here based on prior checks.
    state->response_queue_->AllocateResponse();
    response = state->response_queue_->GetLastAllocatedResponse();
    if (response) {
      LOG_VERBOSE(1) << "[request id: " << log_request_id << "] "
                     << "Creating empty final response";
      response->mutable_infer_response()->Clear();
    } else {
      LOG_ERROR << "expected the response allocator to have added the response";
    }
  }

  if (response) {
    auto& infer_response = *(response->mutable_infer_response());
    // Set response metadata to associate it with request. These will be set
    // by InferResponseCompleteCommon for successful inference.
    if (create_empty_response || failed) {
      infer_response.set_id(state->request_.id());
      infer_response.set_model_name(state->request_.model_name());
      infer_response.set_model_version(state->request_.model_version());
    }
    auto& params = *(infer_response.mutable_parameters());
    params["triton_final_response"].set_bool_param(is_complete);
  }

  if (state->delay_complete_ms_ != 0) {
    // Delay updating the state. This is useful for testing race condition with
    // the thread that runs Process().
    LOG_INFO << "Delaying the completion of reporting response / flag by "
             << state->delay_complete_ms_ << " ms...";
    void* context_ptr_before_delay = (void*)state->context_.get();
    std::this_thread::sleep_for(
        std::chrono::milliseconds(state->delay_complete_ms_));
    void* context_ptr_after_delay = (void*)state->context_.get();
    if (context_ptr_before_delay != context_ptr_after_delay) {
      LOG_ERROR << "Should not print this! The state context object has "
                   "changed after delay, pointer before: "
                << context_ptr_before_delay
                << ", pointer after: " << context_ptr_after_delay;
    }
  }

  if (state->IsGrpcContextCancelled()) {
    // Need to hold lock because the handler thread processing context
    // cancellation might have cancelled or marked the state for cancellation.
    std::lock_guard<std::recursive_mutex> lock(state->step_mtx_);

    LOG_VERBOSE(1)
        << "ModelStreamInferHandler::StreamInferResponseComplete, "
        << state->unique_id_
        << ", skipping writing response because of transaction was cancelled";

    // If this was the final callback for the state
    // then cycle through the completion queue so
    // that state object can be released.
    if (is_complete) {
      state->step_ = Steps::CANCELLED;
      state->context_->PutTaskBackToQueue(state);
      delete response_release_payload;
    }

    state->complete_ = is_complete;
    return;
  }

  if (state->is_decoupled_) {
    InferHandler::State* writing_state = nullptr;
    std::lock_guard<std::recursive_mutex> lk1(state->context_->mu_);
    {
      std::lock_guard<std::recursive_mutex> lk2(state->step_mtx_);
      bool has_prev_ready_response = state->response_queue_->HasReadyResponse();
      if (response) {
        state->response_queue_->MarkNextResponseComplete();
      }
      if (!has_prev_ready_response && response) {
        state->context_->ready_to_write_states_.push(state);
      }
      if (!state->context_->ongoing_write_ &&
          !state->context_->ready_to_write_states_.empty()) {
        state->context_->ongoing_write_ = true;
        writing_state = state->context_->ready_to_write_states_.front();
        state->context_->ready_to_write_states_.pop();
      }
      if (is_complete && state->response_queue_->IsEmpty() &&
          state->step_ == Steps::ISSUED) {
        // The response queue is empty and complete final flag is received, so
        // mark the state as 'WRITEREADY' so it can be cleaned up later.
        state->step_ = Steps::WRITEREADY;
        state->context_->PutTaskBackToQueue(state);
      }
      state->complete_ = is_complete;
    }
    if (writing_state != nullptr) {
      StateWriteResponse(writing_state);
    }
  } else {  // non-decoupled
    std::lock_guard<std::recursive_mutex> lock(state->step_mtx_);
    state->step_ = Steps::WRITEREADY;
    if (is_complete) {
      state->context_->WriteResponseIfReady(state);
    }
    state->complete_ = is_complete;
  }

  if (is_complete) {
    delete response_release_payload;
  }
}

// Changes the state of grpc_stream_error_state_ to ERROR_HANDLING_COMPLETE,
// indicating we have closed the stream and initiated the cancel flow
void
gRPCErrorTracker::MarkGRPCErrorHandlingComplete()
{
  grpc_stream_error_state_ = TritonGRPCErrorSteps::ERROR_HANDLING_COMPLETE;
}

// Returns true ONLY when GRPC_ERROR from CORE is waiting to be processed.
bool
gRPCErrorTracker::CheckAndUpdateGRPCError()
{
  if (grpc_stream_error_state_ == TritonGRPCErrorSteps::ERROR_ENCOUNTERED) {
    // Change the state to ERROR_HANDLING_COMPLETE as we have called
    // HandleCancellation
    MarkGRPCErrorHandlingComplete();
    return true;
  }
  return false;
}

// Marks error after it has been responded to
void
gRPCErrorTracker::MarkGRPCErrorEncountered()
{
  grpc_stream_error_state_ = TritonGRPCErrorSteps::ERROR_ENCOUNTERED;
}

// Checks if error already responded to in triton_grpc_error mode
bool
gRPCErrorTracker::GRPCErrorEncountered()
{
  if (grpc_stream_error_state_ == TritonGRPCErrorSteps::NONE) {
    return false;
  }
  return true;
}

}}}  // namespace triton::server::grpc


================================================
FILE: src/grpc/stream_infer_handler.h
================================================
// Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once

#include "infer_handler.h"

namespace triton { namespace server { namespace grpc {

// Make sure to keep InferResponseAlloc and OutputBufferQuery logic in sync
TRITONSERVER_Error* StreamInferResponseAlloc(
    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
    int64_t preferred_memory_type_id, void* userp, void** buffer,
    void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type,
    int64_t* actual_memory_type_id);

//
// Additional Stream Infer utilities
//
TRITONSERVER_Error* StreamInferResponseStart(
    TRITONSERVER_ResponseAllocator* allocator, void* userp);

// Make sure to keep InferResponseAlloc and OutputBufferQuery logic in sync
TRITONSERVER_Error* StreamOutputBufferQuery(
    TRITONSERVER_ResponseAllocator* allocator, void* userp,
    const char* tensor_name, size_t* byte_size,
    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id);

// Make sure to keep InferResponseAlloc, OutputBufferQuery, and
// OutputBufferAttributes logic in sync
TRITONSERVER_Error* StreamOutputBufferAttributes(
    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
    TRITONSERVER_BufferAttributes* buffer_attributes, void* userp,
    void* buffer_userp);

class ModelStreamInferHandler
    : public InferHandler<
          inference::GRPCInferenceService::AsyncService,
          ::grpc::ServerAsyncReaderWriter<
              inference::ModelStreamInferResponse,
              inference::ModelInferRequest>,
          inference::ModelInferRequest, inference::ModelStreamInferResponse> {
 public:
  ModelStreamInferHandler(
      const std::string& name,
      const std::shared_ptr<TRITONSERVER_Server>& tritonserver,
      TraceManager* trace_manager,
      const std::shared_ptr<SharedMemoryManager>& shm_manager,
      inference::GRPCInferenceService::AsyncService* service,
      ::grpc::ServerCompletionQueue* cq, size_t max_state_bucket_count,
      size_t max_response_queue_size, grpc_compression_level compression_level,
      std::pair<std::string, std::string> restricted_kv,
      const std::string& header_forward_pattern, std::shared_mutex* conn_mtx,
      std::atomic<uint32_t>* conn_cnt, bool* accepting_new_conn)
      : InferHandler(
            name, tritonserver, service, cq, max_state_bucket_count,
            max_response_queue_size, restricted_kv, header_forward_pattern,
            conn_mtx, conn_cnt, accepting_new_conn),
        trace_manager_(trace_manager), shm_manager_(shm_manager),
        compression_level_(compression_level)
  {
    // Create the allocator that will be used to allocate buffers for
    // the result tensors.
    FAIL_IF_ERR(
        TRITONSERVER_ResponseAllocatorNew(
            &allocator_, StreamInferResponseAlloc, InferResponseFree,
            StreamInferResponseStart),
        "creating response allocator");
    FAIL_IF_ERR(
        TRITONSERVER_ResponseAllocatorSetQueryFunction(
            allocator_, StreamOutputBufferQuery),
        "setting allocator's query function");
    FAIL_IF_ERR(
        TRITONSERVER_ResponseAllocatorSetBufferAttributesFunction(
            allocator_, StreamOutputBufferAttributes),
        "setting allocator's output buffer attribute query function");
  }

  ~ModelStreamInferHandler()
  {
    LOG_TRITONSERVER_ERROR(
        TRITONSERVER_ResponseAllocatorDelete(allocator_),
        "deleting response allocator");
  }

 protected:
  void StartNewRequest() override;
  bool Process(State* state, bool rpc_ok, bool is_notification) override;

 private:
  static void StreamInferResponseComplete(
      TRITONSERVER_InferenceResponse* response, const uint32_t flags,
      void* userp);
  static void StateWriteResponse(InferHandler::State* state);
  bool Finish(State* state);

  TraceManager* trace_manager_;
  std::shared_ptr<SharedMemoryManager> shm_manager_;
  TRITONSERVER_ResponseAllocator* allocator_;

  grpc_compression_level compression_level_;
};

}}}  // namespace triton::server::grpc


================================================
FILE: src/http_server.cc
================================================
// Copyright 2019-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#ifdef _WIN32
#define NOMINMAX
#endif

#include "http_server.h"

#include <event2/buffer.h>
#include <re2/re2.h>

#include <algorithm>
#include <list>
#include <regex>
#include <thread>

#include "classification.h"

#define TRITONJSON_STATUSTYPE TRITONSERVER_Error*
#define TRITONJSON_STATUSRETURN(M) \
  return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL, (M).c_str())
#define TRITONJSON_STATUSSUCCESS nullptr
#include "triton/common/triton_json.h"

namespace triton { namespace server {

#define RETURN_AND_CALLBACK_IF_ERR(X, CALLBACK) \
  do {                                          \
    TRITONSERVER_Error* err__ = (X);            \
    if (err__ != nullptr) {                     \
      CALLBACK(err__);                          \
      TRITONSERVER_ErrorDelete(err__);          \
      return;                                   \
    }                                           \
  } while (false)

#define RETURN_AND_RESPOND_IF_ERR(REQ, X)                \
  do {                                                   \
    TRITONSERVER_Error* err__ = (X);                     \
    if (err__ != nullptr) {                              \
      EVBufferAddErrorJson((REQ)->buffer_out, err__);    \
      evhtp_send_reply((REQ), HttpCodeFromError(err__)); \
      TRITONSERVER_ErrorDelete(err__);                   \
      return;                                            \
    }                                                    \
  } while (false)

#define RETURN_AND_RESPOND_WITH_ERR(REQ, CODE, MSG) \
  do {                                              \
    EVBufferAddErrorJson((REQ)->buffer_out, MSG);   \
    evhtp_send_reply((REQ), CODE);                  \
    return;                                         \
  } while (false)

#define RETURN_AND_RESPOND_IF_RESTRICTED(                               \
    REQ, RESTRICTED_CATEGORY, RESTRICTED_APIS)                          \
  do {                                                                  \
    auto const& is_restricted_api =                                     \
        RESTRICTED_APIS.IsRestricted(RESTRICTED_CATEGORY);              \
    auto const& restriction = RESTRICTED_APIS.Get(RESTRICTED_CATEGORY); \
    if (is_restricted_api && RespondIfRestricted(REQ, restriction)) {   \
      return;                                                           \
    }                                                                   \
  } while (false)


namespace {

int
HttpCodeFromError(TRITONSERVER_Error* error)
{
  if (error == nullptr) {
    return EVHTP_RES_OK;
  }
  switch (TRITONSERVER_ErrorCode(error)) {
    case TRITONSERVER_ERROR_INTERNAL:
      return EVHTP_RES_SERVERR;
    case TRITONSERVER_ERROR_NOT_FOUND:
      return EVHTP_RES_NOTFOUND;
    case TRITONSERVER_ERROR_UNAVAILABLE:
      return EVHTP_RES_SERVUNAVAIL;
    case TRITONSERVER_ERROR_UNSUPPORTED:
      return EVHTP_RES_NOTIMPL;
    // cases that has no direct matching code
    case TRITONSERVER_ERROR_UNKNOWN:
    case TRITONSERVER_ERROR_INVALID_ARG:
    case TRITONSERVER_ERROR_ALREADY_EXISTS:
    case TRITONSERVER_ERROR_CANCELLED:
      return EVHTP_RES_BADREQ;
  }

  return EVHTP_RES_BADREQ;
}

void
EVBufferAddErrorJson(evbuffer* buffer, const char* message)
{
  triton::common::TritonJson::Value response(
      triton::common::TritonJson::ValueType::OBJECT);
  response.AddStringRef("error", message, strlen(message));

  triton::common::TritonJson::WriteBuffer buffer_json;
  response.Write(&buffer_json);

  evbuffer_add(buffer, buffer_json.Base(), buffer_json.Size());
}

void
EVBufferAddErrorJson(evbuffer* buffer, TRITONSERVER_Error* err)
{
  const char* message = TRITONSERVER_ErrorMessage(err);
  EVBufferAddErrorJson(buffer, message);
}

void
AddContentTypeHeader(evhtp_request_t* req, const char* type)
{
  // Remove existing header if found
  auto content_header =
      evhtp_headers_find_header(req->headers_out, kContentTypeHeader);
  if (content_header) {
    evhtp_header_rm_and_free(req->headers_out, content_header);
  }

  evhtp_headers_add_header(
      req->headers_out, evhtp_header_new(kContentTypeHeader, type, 1, 1));
}

TRITONSERVER_Error*
SetTritonParameterFromJsonParameter(
    const std::string& parameter,
    triton::common::TritonJson::Value& params_json,
    TRITONSERVER_InferenceRequest* irequest)
{
  triton::common::TritonJson::Value value;
  if (!params_json.Find(parameter.c_str(), &value)) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INTERNAL,
        ("parameter key '" + parameter + "' was not found in the JSON")
            .c_str());
  }

  if (value.IsString()) {
    std::string string_value;
    RETURN_IF_ERR(value.AsString(&string_value));
    RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetStringParameter(
        irequest, parameter.c_str(), string_value.c_str()));
  } else if (value.IsInt()) {
    int64_t int_value;
    RETURN_IF_ERR(value.AsInt(&int_value));
    RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetIntParameter(
        irequest, parameter.c_str(), int_value));
  } else if (value.IsBool()) {
    bool bool_value;
    RETURN_IF_ERR(value.AsBool(&bool_value));
    RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetBoolParameter(
        irequest, parameter.c_str(), bool_value));
  } else if (value.IsNumber()) {
    double double_value;
    RETURN_IF_ERR(value.AsDouble(&double_value));
    RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetDoubleParameter(
        irequest, parameter.c_str(), double_value));
  } else {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INVALID_ARG,
        ("parameter '" + parameter +
         "' has invalid type. It should be either "
         "'int', 'bool', or 'string'.")
            .c_str());
  }
  return nullptr;  // success
}

}  // namespace

TRITONSERVER_Error*
HTTPServer::Start()
{
  if (!worker_.joinable()) {
    evbase_ = event_base_new();
    htp_ = evhtp_new(evbase_, NULL);
    evhtp_enable_flag(htp_, EVHTP_FLAG_ENABLE_NODELAY);
    if (reuse_port_) {
      evhtp_enable_flag(htp_, EVHTP_FLAG_ENABLE_REUSEPORT);
    }
    evhtp_set_gencb(htp_, HTTPServer::Dispatch, this);
    evhtp_set_pre_accept_cb(htp_, HTTPServer::NewConnection, this);
    evhtp_use_threads_wexit(htp_, NULL, NULL, thread_cnt_, NULL);
    if (evhtp_bind_socket(htp_, address_.c_str(), port_, 1024) != 0) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_UNAVAILABLE,
          (std::string("Socket '") + address_ + ":" + std::to_string(port_) +
           "' already in use ")
              .c_str());
    }

    // Set listening event for breaking event loop
    evutil_socketpair(AF_UNIX, SOCK_STREAM, 0, fds_);
    break_ev_ = event_new(evbase_, fds_[0], EV_READ, StopCallback, evbase_);
    event_add(break_ev_, NULL);
    worker_ = std::thread(event_base_loop, evbase_, 0);

    return nullptr;
  }

  return TRITONSERVER_ErrorNew(
      TRITONSERVER_ERROR_ALREADY_EXISTS, "HTTP server is already running.");
}

TRITONSERVER_Error*
HTTPServer::Stop(uint32_t* exit_timeout_secs, const std::string& service_name)
{
  {
    std::lock_guard<std::mutex> lock(conn_mu_);
    accepting_new_conn_ = false;
  }
  if (exit_timeout_secs != nullptr) {
    // Note: conn_cnt_ can only decrease
    while (*exit_timeout_secs > 0 && conn_cnt_ > 0) {
      LOG_INFO << "Timeout " << *exit_timeout_secs << ": Found " << conn_cnt_
               << " " << service_name << " service connections";
      std::this_thread::sleep_for(std::chrono::seconds(1));
      (*exit_timeout_secs)--;
    }
  }

  if (worker_.joinable()) {
    // Notify event loop to break via fd write
    send(fds_[1], (const char*)&evbase_, sizeof(event_base*), 0);
    worker_.join();
    event_free(break_ev_);
    evutil_closesocket(fds_[0]);
    evutil_closesocket(fds_[1]);
    evhtp_unbind_socket(htp_);
    evhtp_free(htp_);
    event_base_free(evbase_);
    return nullptr;
  }
  return TRITONSERVER_ErrorNew(
      TRITONSERVER_ERROR_UNAVAILABLE, "HTTP server is not running.");
}

void
HTTPServer::StopCallback(evutil_socket_t sock, short events, void* arg)
{
  struct event_base* base = (struct event_base*)arg;
  event_base_loopbreak(base);
}

void
HTTPServer::Dispatch(evhtp_request_t* req, void* arg)
{
  (static_cast<HTTPServer*>(arg))->Handle(req);
}

evhtp_res
HTTPServer::NewConnection(evhtp_connection_t* conn, void* arg)
{
  HTTPServer* server = static_cast<HTTPServer*>(arg);
  {
    std::lock_guard<std::mutex> lock(server->conn_mu_);
    if (!server->accepting_new_conn_) {
      return EVHTP_RES_SERVUNAVAIL;  // reset connection
    }
    server->conn_cnt_++;
  }
  evhtp_connection_set_hook(
      conn, evhtp_hook_on_connection_fini,
      (evhtp_hook)(void*)HTTPServer::EndConnection, arg);
  return EVHTP_RES_OK;
}

evhtp_res
HTTPServer::EndConnection(evhtp_connection_t* conn, void* arg)
{
  HTTPServer* server = static_cast<HTTPServer*>(arg);
  {
    std::lock_guard<std::mutex> lock(server->conn_mu_);
    server->conn_cnt_--;
  }
  return EVHTP_RES_OK;
}

#ifdef TRITON_ENABLE_METRICS

void
HTTPMetricsServer::Handle(evhtp_request_t* req)
{
  LOG_VERBOSE(1) << "HTTP request: " << req->method << " "
                 << req->uri->path->full;

  if (req->method != htp_method_GET) {
    RETURN_AND_RESPOND_WITH_ERR(
        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
  }

  evhtp_headers_add_header(
      req->headers_out,
      evhtp_header_new(kContentTypeHeader, "text/plain; charset=utf-8", 1, 1));

  // Call to metric endpoint should not have any trailing string
  if (RE2::FullMatch(std::string(req->uri->path->full), api_regex_)) {
    TRITONSERVER_Metrics* metrics = nullptr;
    TRITONSERVER_Error* err =
        TRITONSERVER_ServerMetrics(server_.get(), &metrics);
    if (err == nullptr) {
      const char* base;
      size_t byte_size;
      err = TRITONSERVER_MetricsFormatted(
          metrics, TRITONSERVER_METRIC_PROMETHEUS, &base, &byte_size);
      if (err == nullptr) {
        evbuffer_add(req->buffer_out, base, byte_size);
      }
    }

    TRITONSERVER_MetricsDelete(metrics);
    RETURN_AND_RESPOND_IF_ERR(req, err);
    TRITONSERVER_ErrorDelete(err);
  }

  evhtp_send_reply(req, EVHTP_RES_OK);
}

TRITONSERVER_Error*
HTTPMetricsServer::Create(
    const std::shared_ptr<TRITONSERVER_Server>& server, const int32_t port,
    std::string address, const int thread_cnt,
    std::unique_ptr<HTTPServer>* metrics_server)
{
  metrics_server->reset(
      new HTTPMetricsServer(server, port, address, thread_cnt));

  const std::string addr = address + ":" + std::to_string(port);
  LOG_INFO << "Started Metrics Service at " << addr;

  return nullptr;
}

TRITONSERVER_Error*
HTTPMetricsServer::Create(
    std::shared_ptr<TRITONSERVER_Server>& server,
    const UnorderedMapType& options, std::unique_ptr<HTTPServer>* service)
{
  int port;
  std::string address;
  int thread_count;

  RETURN_IF_ERR(GetValue(options, "port", &port));
  RETURN_IF_ERR(GetValue(options, "address", &address));
  RETURN_IF_ERR(GetValue(options, "thread_count", &thread_count));

  return Create(server, port, address, thread_count, service);
}

#endif  // TRITON_ENABLE_METRICS

namespace {

// Allocate an evbuffer of size 'byte_size'. Return the 'evb' and
// the 'base' address of the buffer contents.
TRITONSERVER_Error*
AllocEVBuffer(const size_t byte_size, evbuffer** evb, void** base)
{
  evbuffer* evhttp_buffer = evbuffer_new();
  if (evhttp_buffer == nullptr) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INTERNAL,
        "failed to create evbuffer for output tensor");
  }

  // Reserve requested space in evbuffer...
  struct evbuffer_iovec output_iovec;
  if (evbuffer_reserve_space(evhttp_buffer, byte_size, &output_iovec, 1) != 1) {
    evbuffer_free(evhttp_buffer);
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INTERNAL,
        std::string(
            "failed to reserve " + std::to_string(byte_size) +
            " bytes in output tensor buffer")
            .c_str());
  }

  if (output_iovec.iov_len < byte_size) {
    evbuffer_free(evhttp_buffer);
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INTERNAL,
        std::string(
            "reserved " + std::to_string(output_iovec.iov_len) +
            " bytes in output tensor buffer, need " + std::to_string(byte_size))
            .c_str());
  }

  output_iovec.iov_len = byte_size;
  *base = output_iovec.iov_base;

  // Immediately commit the buffer space. We are relying on evbuffer
  // not to relocate this space. Because we request a contiguous
  // chunk every time (above by allowing only a single entry in
  // output_iovec), this seems to be a valid assumption.
  if (evbuffer_commit_space(evhttp_buffer, &output_iovec, 1) != 0) {
    *base = nullptr;
    evbuffer_free(evhttp_buffer);
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INTERNAL,
        "failed to commit output tensors to output buffer");
  }

  *evb = evhttp_buffer;

  return nullptr;  // success
}

// Recursively adds to byte_size from multi dimensional data input
TRITONSERVER_Error*
JsonBytesArrayByteSize(
    triton::common::TritonJson::Value& tensor_data, size_t* byte_size,
    int current_depth = 0)
{
  if (current_depth >= HTTP_MAX_JSON_NESTING_DEPTH) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INVALID_ARG,
        ("JSON nesting depth exceeds maximum allowed "
         "limit (" +
         std::to_string(HTTP_MAX_JSON_NESTING_DEPTH) + ")")
            .c_str());
  }

  *byte_size = 0;
  // Recurse if not last dimension...
  if (tensor_data.IsArray()) {
    for (size_t i = 0; i < tensor_data.ArraySize(); i++) {
      triton::common::TritonJson::Value el;
      RETURN_IF_ERR(tensor_data.At(i, &el));
      size_t byte_size_;
      RETURN_IF_ERR(JsonBytesArrayByteSize(el, &byte_size_, current_depth + 1));
      *byte_size += byte_size_;
    }
  } else {
    // Serialized data size is the length of the string itself plus
    // 4 bytes to record the string length.
    const char* str;
    size_t len = 0;
    RETURN_MSG_IF_ERR(
        tensor_data.AsString(&str, &len), "Unable to parse JSON bytes array");
    *byte_size += len + sizeof(uint32_t);
  }

  return nullptr;  // success
}

TRITONSERVER_Error*
ReadDataFromJsonHelper(
    char* base, const TRITONSERVER_DataType dtype,
    triton::common::TritonJson::Value& tensor_data, int* counter,
    int64_t expected_cnt, int current_depth = 0)
{
  // FIXME should move 'switch' statement outside the recursive function and
  // pass in a read data callback once data type is confirmed.
  // Currently 'switch' is performed on each element even through all elements
  // have the same data type.

  if (current_depth >= HTTP_MAX_JSON_NESTING_DEPTH || current_depth < 0) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INVALID_ARG,
        ("JSON nesting depth exceeds maximum allowed "
         "limit (" +
         std::to_string(HTTP_MAX_JSON_NESTING_DEPTH) + ")")
            .c_str());
  }

  // Recurse on array element if not last dimension...
  if (tensor_data.IsArray()) {
    for (size_t i = 0; i < tensor_data.ArraySize(); i++) {
      triton::common::TritonJson::Value el;
      RETURN_IF_ERR(tensor_data.At(i, &el));
      RETURN_IF_ERR(ReadDataFromJsonHelper(
          base, dtype, el, counter, expected_cnt, current_depth + 1));
    }
  } else {
    // Check if writing to 'serialized' is overrunning the expected byte_size
    if (*counter < 0 || static_cast<int64_t>(*counter) >= expected_cnt) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INTERNAL,
          "Shape does not match true shape of 'data' field");
    }
    switch (dtype) {
      case TRITONSERVER_TYPE_BOOL: {
        bool b = false;
        RETURN_IF_ERR(tensor_data.AsBool(&b));
        uint8_t* data_vec = reinterpret_cast<uint8_t*>(base);
        // FIXME for unsigned should bounds check and raise error
        // since otherwise the actually used value will be
        // unexpected.
        data_vec[*counter] = (uint8_t)(b ? 1 : 0);
        *counter += 1;
        break;
      }
      case TRITONSERVER_TYPE_UINT8: {
        uint64_t ui = 0;
        RETURN_IF_ERR(tensor_data.AsUInt(&ui));
        uint8_t* data_vec = reinterpret_cast<uint8_t*>(base);
        data_vec[*counter] = (uint8_t)ui;
        *counter += 1;
        break;
      }
      case TRITONSERVER_TYPE_UINT16: {
        uint64_t ui = 0;
        RETURN_IF_ERR(tensor_data.AsUInt(&ui));
        uint16_t* data_vec = reinterpret_cast<uint16_t*>(base);
        data_vec[*counter] = (uint16_t)ui;
        *counter += 1;
        break;
      }
      case TRITONSERVER_TYPE_UINT32: {
        uint64_t ui = 0;
        RETURN_IF_ERR(tensor_data.AsUInt(&ui));
        uint32_t* data_vec = reinterpret_cast<uint32_t*>(base);
        data_vec[*counter] = (uint32_t)ui;
        *counter += 1;
        break;
      }
      case TRITONSERVER_TYPE_UINT64: {
        uint64_t ui = 0;
        RETURN_IF_ERR(tensor_data.AsUInt(&ui));
        uint64_t* data_vec = reinterpret_cast<uint64_t*>(base);
        data_vec[*counter] = ui;
        *counter += 1;
        break;
      }
      case TRITONSERVER_TYPE_INT8: {
        // FIXME for signed type just assigning to smaller type is
        // "implementation defined" and so really need to bounds
        // check.
        int64_t si = 0;
        RETURN_IF_ERR(tensor_data.AsInt(&si));
        int8_t* data_vec = reinterpret_cast<int8_t*>(base);
        data_vec[*counter] = (int8_t)si;
        *counter += 1;
        break;
      }
      case TRITONSERVER_TYPE_INT16: {
        int64_t si = 0;
        RETURN_IF_ERR(tensor_data.AsInt(&si));
        int16_t* data_vec = reinterpret_cast<int16_t*>(base);
        data_vec[*counter] = (int16_t)si;
        *counter += 1;
        break;
      }
      case TRITONSERVER_TYPE_INT32: {
        int64_t si = 0;
        RETURN_IF_ERR(tensor_data.AsInt(&si));
        int32_t* data_vec = reinterpret_cast<int32_t*>(base);
        data_vec[*counter] = (int32_t)si;
        *counter += 1;
        break;
      }
      case TRITONSERVER_TYPE_INT64: {
        int64_t si = 0;
        RETURN_IF_ERR(tensor_data.AsInt(&si));
        int64_t* data_vec = reinterpret_cast<int64_t*>(base);
        data_vec[*counter] = si;
        *counter += 1;
        break;
      }
      case TRITONSERVER_TYPE_FP32: {
        double fp64 = 0;
        RETURN_IF_ERR(tensor_data.AsDouble(&fp64));
        float* data_vec = reinterpret_cast<float*>(base);
        data_vec[*counter] = fp64;
        *counter += 1;
        break;
      }
      case TRITONSERVER_TYPE_FP64: {
        double fp64 = 0;
        RETURN_IF_ERR(tensor_data.AsDouble(&fp64));
        double* data_vec = reinterpret_cast<double*>(base);
        data_vec[*counter] = fp64;
        *counter += 1;
        break;
      }
      case TRITONSERVER_TYPE_BYTES: {
        const char* cstr{nullptr};
        size_t len{0};
        RETURN_IF_ERR(tensor_data.AsString(&cstr, &len));
        if (len > INT64_MAX) {
          return TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_INTERNAL,
              "Tensor size is too large to be processed");
        }
        // Quick sanity check to ensure we don't write beyond `expected_cnt`.
        int64_t actual_cnt = static_cast<int64_t>(*counter) +
                             static_cast<int64_t>(len) +
                             static_cast<int64_t>(sizeof(uint32_t));
        if (actual_cnt < 0 || actual_cnt > expected_cnt) {
          return TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_INTERNAL,
              "Shape does not match true shape of 'data' field");
        }
        memcpy(
            base + *counter, reinterpret_cast<char*>(&len), sizeof(uint32_t));
        std::copy(cstr, cstr + len, base + *counter + sizeof(uint32_t));
        *counter += len + sizeof(uint32_t);
        break;
      }
      default:
        break;
    }
  }

  return nullptr;  // success
}

TRITONSERVER_Error*
ReadDataFromJson(
    const char* tensor_name, triton::common::TritonJson::Value& tensor_data,
    char* base, const TRITONSERVER_DataType dtype, int64_t expected_cnt)
{
  int counter = 0;
  switch (dtype) {
    // FP16 not supported via JSON
    case TRITONSERVER_TYPE_FP16:
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          std::string(
              "receiving FP16 data via JSON is not supported. Please use the "
              "binary data format for input " +
              std::string(tensor_name))
              .c_str());

    // BF16 not supported via JSON
    case TRITONSERVER_TYPE_BF16:
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          std::string(
              "receiving BF16 data via JSON is not supported. Please use the "
              "binary data format for input " +
              std::string(tensor_name))
              .c_str());

    case TRITONSERVER_TYPE_INVALID:
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          std::string("invalid datatype for input " + std::string(tensor_name))
              .c_str());

    default:
      RETURN_MSG_IF_ERR(
          ReadDataFromJsonHelper(
              base, dtype, tensor_data, &counter, expected_cnt),
          "Unable to parse 'data'");
      break;
  }

  // Check if 'ReadDataFromJsonHelper' reads less than the expected byte size
  if (counter != expected_cnt) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INTERNAL,
        "Unable to parse 'data': Shape does not match true shape of 'data' "
        "field");
  }

  return nullptr;
}

TRITONSERVER_Error*
WriteDataToJsonCheck(
    const std::string& output_name, const size_t byte_size,
    const size_t expected_size)
{
  if (byte_size != expected_size) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INTERNAL,
        std::string(
            "output tensor shape does not match size of output for '" +
            output_name + "'")
            .c_str());
  }

  return nullptr;  // success
}

TRITONSERVER_Error*
WriteDataToJson(
    triton::common::TritonJson::Value* data_json,
    const std::string& output_name, const TRITONSERVER_DataType datatype,
    const void* base, const size_t byte_size, const size_t element_count)
{
  switch (datatype) {
    case TRITONSERVER_TYPE_BOOL: {
      const uint8_t* bool_base = reinterpret_cast<const uint8_t*>(base);
      if (byte_size != (element_count * sizeof(uint8_t))) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INTERNAL,
            std::string(
                "output tensor shape does not match size of output for '" +
                output_name + "'")
                .c_str());
      }
      for (size_t e = 0; e < element_count; ++e) {
        RETURN_IF_ERR(
            data_json->AppendBool((bool_base[e] == 0) ? false : true));
      }
      break;
    }

    case TRITONSERVER_TYPE_UINT8: {
      RETURN_IF_ERR(WriteDataToJsonCheck(
          output_name, byte_size, sizeof(uint8_t) * element_count));
      const uint8_t* cbase = reinterpret_cast<const uint8_t*>(base);
      for (size_t e = 0; e < element_count; ++e) {
        RETURN_IF_ERR(data_json->AppendUInt(cbase[e]));
      }
      break;
    }

    case TRITONSERVER_TYPE_UINT16: {
      RETURN_IF_ERR(WriteDataToJsonCheck(
          output_name, byte_size, sizeof(uint16_t) * element_count));
      const uint16_t* cbase = reinterpret_cast<const uint16_t*>(base);
      for (size_t e = 0; e < element_count; ++e) {
        RETURN_IF_ERR(data_json->AppendUInt(cbase[e]));
      }
      break;
    }

    case TRITONSERVER_TYPE_UINT32: {
      RETURN_IF_ERR(WriteDataToJsonCheck(
          output_name, byte_size, sizeof(uint32_t) * element_count));
      const uint32_t* cbase = reinterpret_cast<const uint32_t*>(base);
      for (size_t e = 0; e < element_count; ++e) {
        RETURN_IF_ERR(data_json->AppendUInt(cbase[e]));
      }
      break;
    }

    case TRITONSERVER_TYPE_UINT64: {
      RETURN_IF_ERR(WriteDataToJsonCheck(
          output_name, byte_size, sizeof(uint64_t) * element_count));
      const uint64_t* cbase = reinterpret_cast<const uint64_t*>(base);
      for (size_t e = 0; e < element_count; ++e) {
        RETURN_IF_ERR(data_json->AppendUInt(cbase[e]));
      }
      break;
    }

    case TRITONSERVER_TYPE_INT8: {
      RETURN_IF_ERR(WriteDataToJsonCheck(
          output_name, byte_size, sizeof(int8_t) * element_count));
      const int8_t* cbase = reinterpret_cast<const int8_t*>(base);
      for (size_t e = 0; e < element_count; ++e) {
        RETURN_IF_ERR(data_json->AppendInt(cbase[e]));
      }
      break;
    }

    case TRITONSERVER_TYPE_INT16: {
      RETURN_IF_ERR(WriteDataToJsonCheck(
          output_name, byte_size, sizeof(int16_t) * element_count));
      const int16_t* cbase = reinterpret_cast<const int16_t*>(base);
      for (size_t e = 0; e < element_count; ++e) {
        RETURN_IF_ERR(data_json->AppendInt(cbase[e]));
      }
      break;
    }

    case TRITONSERVER_TYPE_INT32: {
      RETURN_IF_ERR(WriteDataToJsonCheck(
          output_name, byte_size, sizeof(int32_t) * element_count));
      const int32_t* cbase = reinterpret_cast<const int32_t*>(base);
      for (size_t e = 0; e < element_count; ++e) {
        RETURN_IF_ERR(data_json->AppendInt(cbase[e]));
      }
      break;
    }

    case TRITONSERVER_TYPE_INT64: {
      RETURN_IF_ERR(WriteDataToJsonCheck(
          output_name, byte_size, sizeof(int64_t) * element_count));
      const int64_t* cbase = reinterpret_cast<const int64_t*>(base);
      for (size_t e = 0; e < element_count; ++e) {
        RETURN_IF_ERR(data_json->AppendInt(cbase[e]));
      }
      break;
    }

    // FP16 not supported via JSON
    case TRITONSERVER_TYPE_FP16:
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          "sending FP16 data via JSON is not supported. Please use the "
          "binary data format for output");

    // BF16 not supported via JSON
    case TRITONSERVER_TYPE_BF16:
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          "sending BF16 data via JSON is not supported. Please use the "
          "binary data format for output");

    case TRITONSERVER_TYPE_FP32: {
      RETURN_IF_ERR(WriteDataToJsonCheck(
          output_name, byte_size, sizeof(float) * element_count));
      const float* cbase = reinterpret_cast<const float*>(base);
      for (size_t e = 0; e < element_count; ++e) {
        RETURN_IF_ERR(data_json->AppendDouble(cbase[e]));
      }
      break;
    }

    case TRITONSERVER_TYPE_FP64: {
      RETURN_IF_ERR(WriteDataToJsonCheck(
          output_name, byte_size, sizeof(double) * element_count));
      const double* cbase = reinterpret_cast<const double*>(base);
      for (size_t e = 0; e < element_count; ++e) {
        RETURN_IF_ERR(data_json->AppendDouble(cbase[e]));
      }
      break;
    }

    case TRITONSERVER_TYPE_BYTES: {
      const char* cbase = reinterpret_cast<const char*>(base);
      size_t offset = 0;
      for (size_t e = 0; e < element_count; ++e) {
        if ((offset + sizeof(uint32_t)) > byte_size) {
          return TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_INTERNAL,
              std::string(
                  "output tensor shape does not match size of output for '" +
                  output_name + "'")
                  .c_str());
        }

        const size_t len = *(reinterpret_cast<const uint32_t*>(cbase + offset));
        offset += sizeof(uint32_t);

        if ((offset + len) > byte_size) {
          return TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_INTERNAL,
              std::string(
                  "output tensor shape does not match size of output for '" +
                  output_name + "'")
                  .c_str());
        }

        // Can use stringref because 'base' buffer is not deleted
        // until response is deleted and that happens after this json
        // is serialized.
        RETURN_IF_ERR(data_json->AppendStringRef(cbase + offset, len));
        offset += len;
      }
      break;
    }

    case TRITONSERVER_TYPE_INVALID:
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          "Invalid data type for output tensor");
  }

  return nullptr;  // success
}

TRITONSERVER_Error*
CheckBinaryInputData(
    triton::common::TritonJson::Value& request_input, bool* is_binary,
    size_t* byte_size)
{
  *is_binary = false;

  triton::common::TritonJson::Value params_json;
  if (request_input.Find("parameters", &params_json)) {
    triton::common::TritonJson::Value binary_data_size_json;
    if (params_json.Find("binary_data_size", &binary_data_size_json)) {
      RETURN_MSG_IF_ERR(
          binary_data_size_json.AsUInt(reinterpret_cast<uint64_t*>(byte_size)),
          "Unable to parse 'binary_data_size'");
      *is_binary = true;
    }
  }

  return nullptr;  // success
}

TRITONSERVER_Error*
CheckBinaryOutputData(
    triton::common::TritonJson::Value& request_output, bool* is_binary)
{
  *is_binary = false;

  triton::common::TritonJson::Value params_json;
  if (request_output.Find("parameters", &params_json)) {
    triton::common::TritonJson::Value binary_data_json;
    if (params_json.Find("binary_data", &binary_data_json)) {
      RETURN_MSG_IF_ERR(
          binary_data_json.AsBool(is_binary), "Unable to parse 'binary_data'");
    }
  }

  return nullptr;  // success
}

TRITONSERVER_Error*
CheckSharedMemoryData(
    triton::common::TritonJson::Value& request_input, bool* use_shm,
    const char** shm_region, uint64_t* offset, uint64_t* byte_size)
{
  *use_shm = false;
  *offset = 0;
  *byte_size = 0;

  triton::common::TritonJson::Value params_json;
  if (request_input.Find("parameters", &params_json)) {
    {
      triton::common::TritonJson::Value region_json;
      if (params_json.Find("shared_memory_region", &region_json)) {
        *use_shm = true;
        size_t len;
        RETURN_MSG_IF_ERR(
            region_json.AsString(shm_region, &len),
            "Unable to parse 'shared_memory_region'");
      }
    }

    {
      triton::common::TritonJson::Value offset_json;
      if (params_json.Find("shared_memory_offset", &offset_json)) {
        RETURN_MSG_IF_ERR(
            offset_json.AsUInt(offset),
            "Unable to parse 'shared_memory_offset'");
      }
    }

    {
      triton::common::TritonJson::Value size_json;
      if (params_json.Find("shared_memory_byte_size", &size_json)) {
        RETURN_MSG_IF_ERR(
            size_json.AsUInt(byte_size),
            "Unable to parse 'shared_memory_byte_size'");
      }
    }
  }

  return nullptr;  // success
}

TRITONSERVER_Error*
CheckClassificationOutput(
    triton::common::TritonJson::Value& request_output, uint64_t* num_classes)
{
  *num_classes = 0;

  triton::common::TritonJson::Value params_json;
  if (request_output.Find("parameters", &params_json)) {
    triton::common::TritonJson::Value cls_json;
    if (params_json.Find("classification", &cls_json)) {
      RETURN_MSG_IF_ERR(
          cls_json.AsUInt(num_classes), "Unable to set 'classification'");
    }
  }

  return nullptr;  // success
}

TRITONSERVER_Error*
ValidateInputContentType(triton::common::TritonJson::Value& io)
{
  bool has_data = false;
  bool has_binary = false;
  bool has_shared_memory = false;

  has_data = io.Find("data");

  triton::common::TritonJson::Value params_json;
  if (io.Find("parameters", &params_json)) {
    has_binary = params_json.Find("binary_data_size");
    has_shared_memory = params_json.Find("shared_memory_region");
  }

  int set_count = has_data + has_binary + has_shared_memory;
  if (set_count != 1) {
    std::string err_str =
        "Input must set only one of the following fields: 'data', "
        "'binary_data_size' in 'parameters', 'shared_memory_region' in "
        "'parameters'. But";
    if (set_count == 0) {
      err_str += " no field is set";
    } else {
      err_str += " set";
      if (has_data) {
        err_str += " 'data'";
      }
      if (has_binary) {
        err_str += " 'binary_data_size'";
      }
      if (has_shared_memory) {
        err_str += " 'shared_memory_region'";
      }
    }
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INVALID_ARG, err_str.c_str());
  }

  return nullptr;  // success
}

TRITONSERVER_Error*
ValidateOutputParameter(triton::common::TritonJson::Value& io)
{
  triton::common::TritonJson::Value params_json;
  if (io.Find("parameters", &params_json)) {
    const bool has_shared_memory = params_json.Find("shared_memory_region");
    if (has_shared_memory) {
      // Currently shared memory can't set with classification because
      // cls results are not stored in shared memory, internally it is computed
      // based on results in shared memory.
      if (params_json.Find("classification")) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            "Output can't set both 'shared_memory_region' and "
            "'classification'");
      }

      triton::common::TritonJson::Value binary_data_json;
      if (params_json.Find("binary_data", &binary_data_json)) {
        bool is_binary = false;
        RETURN_MSG_IF_ERR(
            binary_data_json.AsBool(&is_binary), "Unable to set 'binary_data'");
        if (is_binary) {
          return TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_INVALID_ARG,
              "Output can't set both 'shared_memory_region' and 'binary_data'");
        }
      }
    }
  }

  return nullptr;  // success
}

std::string
CompressionTypeUsed(const std::string accept_encoding)
{
  std::vector<std::string> encodings;
  size_t offset = 0;
  size_t delimeter_pos = accept_encoding.find(',');
  while (delimeter_pos != std::string::npos) {
    encodings.emplace_back(
        accept_encoding.substr(offset, delimeter_pos - offset));
    offset = delimeter_pos + 1;
    delimeter_pos = accept_encoding.find(',', offset);
  }
  std::string res = "identity";
  double weight = 0;
  encodings.emplace_back(accept_encoding.substr(offset));
  for (const auto& encoding : encodings) {
    auto start_pos = encoding.find_first_not_of(' ');
    auto weight_pos = encoding.find(";q=");
    // Skip if the encoding is malformed
    if ((start_pos == std::string::npos) ||
        ((weight_pos != std::string::npos) && (start_pos >= weight_pos))) {
      continue;
    }
    const std::string type =
        (weight_pos == std::string::npos)
            ? encoding.substr(start_pos)
            : encoding.substr(start_pos, weight_pos - start_pos);
    double type_weight = 1;
    if (weight_pos != std::string::npos) {
      try {
        type_weight = std::stod(encoding.substr(weight_pos + 3));
      }
      catch (const std::out_of_range& oor) {
        type_weight = 0;
        continue;
      }
      catch (const std::invalid_argument& ia) {
        type_weight = 0;
        continue;
      }
    }
    if (((type == "identity") || (type == "deflate") || (type == "gzip")) &&
        (type_weight > weight)) {
      res = type;
      weight = type_weight;
    }
  }
  return res;
}

}  // namespace

HTTPAPIServer::HTTPAPIServer(
    const std::shared_ptr<TRITONSERVER_Server>& server,
    triton::server::TraceManager* trace_manager,
    const std::shared_ptr<SharedMemoryManager>& shm_manager, const int32_t port,
    const bool reuse_port, const std::string& address,
    const std::string& header_forward_pattern, const int thread_cnt,
    const size_t max_input_size, const RestrictedFeatures& restricted_apis)
    : HTTPServer(port, reuse_port, address, header_forward_pattern, thread_cnt),
      server_(server), trace_manager_(trace_manager), shm_manager_(shm_manager),
      allocator_(nullptr), server_regex_(R"(/v2(?:/health/(live|ready))?)"),
      model_regex_(
          R"(/v2/models/([^/]+)(?:/versions/([0-9]+))?(?:/(infer|generate|generate_stream|ready|config|stats|trace/setting))?)"),
      modelcontrol_regex_(
          R"(/v2/repository(?:/([^/]+))?/(index|models/([^/]+)/(load|unload)))"),
      systemsharedmemory_regex_(
          R"(/v2/systemsharedmemory(?:/region/([^/]+))?/(status|register|unregister))"),
      cudasharedmemory_regex_(
          R"(/v2/cudasharedmemory(?:/region/([^/]+))?/(status|register|unregister))"),
      trace_regex_(R"(/v2/trace/setting)"), max_input_size_(max_input_size),
      restricted_apis_(restricted_apis)
{
  // FIXME, don't cache server metadata. The http endpoint should
  // not be deciding that server metadata will not change during
  // execution.
  TRITONSERVER_Message* message = nullptr;
  server_metadata_err_ = TRITONSERVER_ServerMetadata(server_.get(), &message);
  if (server_metadata_err_ == nullptr) {
    const char* buffer;
    size_t byte_size;
    server_metadata_err_ =
        TRITONSERVER_MessageSerializeToJson(message, &buffer, &byte_size);
    server_metadata_ = std::string(buffer, byte_size);
  }

  if (message != nullptr) {
    TRITONSERVER_MessageDelete(message);
  }

  FAIL_IF_ERR(
      TRITONSERVER_ResponseAllocatorNew(
          &allocator_, InferResponseAlloc, InferResponseFree,
          nullptr /* start_fn */),
      "creating response allocator");
  FAIL_IF_ERR(
      TRITONSERVER_ResponseAllocatorSetQueryFunction(
          allocator_, OutputBufferQuery),
      "setting allocator's query function");
  FAIL_IF_ERR(
      TRITONSERVER_ResponseAllocatorSetBufferAttributesFunction(
          allocator_, OutputBufferAttributes),
      "setting allocator's buffer attributes function");

  ConfigureGenerateMappingSchema();
}

HTTPAPIServer::~HTTPAPIServer()
{
  LOG_VERBOSE(1) << "~HTTPAPIServer()";
  if (server_metadata_err_ != nullptr) {
    TRITONSERVER_ErrorDelete(server_metadata_err_);
  }
  LOG_TRITONSERVER_ERROR(
      TRITONSERVER_ResponseAllocatorDelete(allocator_),
      "deleting response allocator");
}

// Make sure to keep InferResponseAlloc, OutputBufferQuery, and
// OutputBufferAttributes logic in sync
TRITONSERVER_Error*
HTTPAPIServer::InferResponseAlloc(
    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
    int64_t preferred_memory_type_id, void* userp, void** buffer,
    void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type,
    int64_t* actual_memory_type_id)
{
  AllocPayload* payload = reinterpret_cast<AllocPayload*>(userp);
  std::unordered_map<std::string, AllocPayload::OutputInfo*>& output_map =
      payload->output_map_;
  const AllocPayload::OutputInfo::Kind default_output_kind =
      payload->default_output_kind_;

  *buffer = nullptr;
  *buffer_userp = nullptr;
  *actual_memory_type = preferred_memory_type;
  *actual_memory_type_id = preferred_memory_type_id;

  AllocPayload::OutputInfo* info = nullptr;

  // If we don't find an output then it means that the output wasn't
  // explicitly specified in the request. In that case we create an
  // OutputInfo for it that uses default setting of JSON.
  auto pr = output_map.find(tensor_name);
  if (pr == output_map.end()) {
    info = new AllocPayload::OutputInfo(default_output_kind, 0);
  } else {
    // Take ownership of the OutputInfo object.
    info = pr->second;
    output_map.erase(pr);
  }

  // If the output is in shared memory...
  if (info->kind_ == AllocPayload::OutputInfo::SHM) {
    // ...then make sure shared memory size is at least as big as
    // the size of the output.
    if (byte_size > info->byte_size_) {
      const auto info_byte_size = info->byte_size_;
      delete info;
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INTERNAL,
          std::string(
              "shared memory size specified with the request for output '" +
              std::string(tensor_name) + "' (" +
              std::to_string(info_byte_size) + " bytes) should be at least " +
              std::to_string(byte_size) + " bytes to hold the results")
              .c_str());
    }

    *buffer = const_cast<void*>(info->base_);
    *actual_memory_type = info->memory_type_;
    *actual_memory_type_id = info->device_id_;
    *buffer_userp = reinterpret_cast<void*>(info);

    LOG_VERBOSE(1) << "HTTP: using shared-memory for '" << tensor_name
                   << "', size: " << byte_size << ", addr: " << *buffer;
    return nullptr;  // Success
  }

  // Don't need to do anything if no memory was requested.
  if (byte_size > 0) {
    // Can't allocate for any memory type other than CPU. If asked to
    // allocate on GPU memory then force allocation on CPU instead.
    if (*actual_memory_type != TRITONSERVER_MEMORY_CPU) {
      LOG_VERBOSE(1) << "HTTP: unable to provide '" << tensor_name << "' in "
                     << TRITONSERVER_MemoryTypeString(*actual_memory_type)
                     << ", will use "
                     << TRITONSERVER_MemoryTypeString(TRITONSERVER_MEMORY_CPU);
      *actual_memory_type = TRITONSERVER_MEMORY_CPU;
      *actual_memory_type_id = 0;
    }

    evbuffer* evhttp_buffer;
    TRITONSERVER_Error* err = AllocEVBuffer(byte_size, &evhttp_buffer, buffer);
    if (err != nullptr) {
      delete info;
      return err;
    }

    // Associate info with the evbuffer with this allocation.
    // Ownership passes to 'buffer_userp' which has the same lifetime
    // as the buffer itself.
    info->evbuffer_ = evhttp_buffer;

    LOG_VERBOSE(1) << "HTTP using buffer for: '" << tensor_name
                   << "', size: " << byte_size << ", addr: " << *buffer;
  }

  *buffer_userp = reinterpret_cast<void*>(info);

  return nullptr;  // Success
}

// Make sure to keep InferResponseAlloc, OutputBufferQuery, and
// OutputBufferAttributes logic in sync
TRITONSERVER_Error*
HTTPAPIServer::OutputBufferAttributes(
    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
    TRITONSERVER_BufferAttributes* buffer_attributes, void* userp,
    void* buffer_userp)
{
  AllocPayload::OutputInfo* info =
      reinterpret_cast<AllocPayload::OutputInfo*>(buffer_userp);

  // We only need to set the cuda ipc handle here. The rest of the buffer
  // attributes have been properly populated by triton core.
  if (tensor_name != nullptr) {
    if (info->kind_ == AllocPayload::OutputInfo::SHM &&
        info->memory_type_ == TRITONSERVER_MEMORY_GPU) {
      RETURN_IF_ERR(TRITONSERVER_BufferAttributesSetCudaIpcHandle(
          buffer_attributes, info->cuda_ipc_handle_));
    }
  }

  return nullptr;  // Success
}

// Make sure to keep InferResponseAlloc and OutputBufferQuery logic in sync
TRITONSERVER_Error*
HTTPAPIServer::OutputBufferQuery(
    TRITONSERVER_ResponseAllocator* allocator, void* userp,
    const char* tensor_name, size_t* byte_size,
    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id)
{
  AllocPayload* payload = reinterpret_cast<AllocPayload*>(userp);

  if (tensor_name != nullptr) {
    auto pr = payload->output_map_.find(tensor_name);
    if ((pr != payload->output_map_.end()) &&
        (pr->second->kind_ == AllocPayload::OutputInfo::SHM)) {
      // The output is in shared memory so check that shared memory
      // size is at least large enough for the output, if byte size is provided
      if ((byte_size != nullptr) && (*byte_size > pr->second->byte_size_)) {
        // Don't return error yet and just set to the default properties for
        // GRPC buffer, error will be raised when allocation happens
        *memory_type = TRITONSERVER_MEMORY_CPU;
        *memory_type_id = 0;
      } else {
        *memory_type = pr->second->memory_type_;
        *memory_type_id = pr->second->device_id_;
      }
      return nullptr;  // Success
    }
  }

  // Not using shared memory so a evhtp buffer will be used,
  // and the type will be CPU.
  *memory_type = TRITONSERVER_MEMORY_CPU;
  *memory_type_id = 0;
  return nullptr;  // Success
}

TRITONSERVER_Error*
HTTPAPIServer::InferResponseFree(
    TRITONSERVER_ResponseAllocator* allocator, void* buffer, void* buffer_userp,
    size_t byte_size, TRITONSERVER_MemoryType memory_type,
    int64_t memory_type_id)
{
  LOG_VERBOSE(1) << "HTTP release: "
                 << "size " << byte_size << ", addr " << buffer;

  // 'buffer' is backed by shared memory or evbuffer so we don't
  // delete directly.
  auto info = reinterpret_cast<AllocPayload::OutputInfo*>(buffer_userp);
  delete info;

  return nullptr;  // Success
}

void
HTTPAPIServer::HandleServerHealth(evhtp_request_t* req, const std::string& kind)
{
  RETURN_AND_RESPOND_IF_RESTRICTED(
      req, RestrictedCategory::HEALTH, restricted_apis_);

  if (req->method != htp_method_GET) {
    RETURN_AND_RESPOND_WITH_ERR(
        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
  }

  TRITONSERVER_Error* err = nullptr;
  bool ready = false;

  if (kind == "live") {
    err = TRITONSERVER_ServerIsLive(server_.get(), &ready);
  } else {
    err = TRITONSERVER_ServerIsReady(server_.get(), &ready);
  }

  RETURN_AND_RESPOND_IF_ERR(req, err);
  evhtp_send_reply(req, ready ? EVHTP_RES_OK : EVHTP_RES_BADREQ);
}

void
HTTPAPIServer::HandleRepositoryIndex(
    evhtp_request_t* req, const std::string& repository_name)
{
  RETURN_AND_RESPOND_IF_RESTRICTED(
      req, RestrictedCategory::MODEL_REPOSITORY, restricted_apis_);

  AddContentTypeHeader(req, "application/json");
  if (req->method != htp_method_POST) {
    RETURN_AND_RESPOND_WITH_ERR(
        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
  }

  TRITONSERVER_Error* err = nullptr;
  triton::common::TritonJson::Value index_request;
  bool ready = false;
  size_t buffer_len = 0;
  RETURN_AND_RESPOND_IF_ERR(
      req, EVRequestToJson(req, "registry index", &index_request, &buffer_len));

  if (buffer_len > 0) {
    triton::common::TritonJson::Value ready_json;
    if (index_request.Find("ready", &ready_json)) {
      err = ready_json.AsBool(&ready);
    }
  }

  if (err == nullptr) {
    uint32_t flags = 0;
    if (ready) {
      flags |= TRITONSERVER_INDEX_FLAG_READY;
    }

    TRITONSERVER_Message* message = nullptr;
    err = TRITONSERVER_ServerModelIndex(server_.get(), flags, &message);
    if (err == nullptr) {
      const char* buffer;
      size_t byte_size;
      err = TRITONSERVER_MessageSerializeToJson(message, &buffer, &byte_size);
      if (err == nullptr) {
        evbuffer_add(req->buffer_out, buffer, byte_size);
        evhtp_send_reply(req, EVHTP_RES_OK);
      }

      TRITONSERVER_MessageDelete(message);
    }
  }

  RETURN_AND_RESPOND_IF_ERR(req, err);
}

void
HTTPAPIServer::HandleRepositoryControl(
    evhtp_request_t* req, const std::string& repository_name,
    const std::string& model_name, const std::string& action)
{
  RETURN_AND_RESPOND_IF_RESTRICTED(
      req, RestrictedCategory::MODEL_REPOSITORY, restricted_apis_);

  AddContentTypeHeader(req, "application/json");
  if (req->method != htp_method_POST) {
    RETURN_AND_RESPOND_WITH_ERR(
        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
  }

  TRITONSERVER_Error* err = nullptr;
  if (!repository_name.empty()) {
    err = TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_UNSUPPORTED,
        "'repository_name' specification is not supported");
  } else {
    if (action == "load") {
      static auto param_deleter =
          [](std::vector<TRITONSERVER_Parameter*>* params) {
            if (params != nullptr) {
              for (auto& param : *params) {
                TRITONSERVER_ParameterDelete(param);
              }
              delete params;
            }
          };
      std::unique_ptr<
          std::vector<TRITONSERVER_Parameter*>, decltype(param_deleter)>
          params(new std::vector<TRITONSERVER_Parameter*>(), param_deleter);
      // local variables to store the decoded file content, the data must
      // be valid until TRITONSERVER_ServerLoadModelWithParameters returns.
      std::list<std::vector<char>> binary_files;
      // WAR for the const-ness check
      std::vector<const TRITONSERVER_Parameter*> const_params;
      triton::common::TritonJson::Value load_request;
      size_t buffer_len = 0;
      RETURN_AND_RESPOND_IF_ERR(
          req, EVRequestToJson(req, "load model", &load_request, &buffer_len));

      if (buffer_len > 0) {
        // Parse request body for parameters
        triton::common::TritonJson::Value param_json;
        if (load_request.Find("parameters", &param_json)) {
          // Iterate over each member in 'param_json'
          std::vector<std::string> members;
          RETURN_AND_RESPOND_IF_ERR(req, param_json.Members(&members));
          for (const auto& m : members) {
            const char* param_str = nullptr;
            size_t param_len = 0;
            RETURN_AND_RESPOND_IF_ERR(
                req,
                param_json.MemberAsString(m.c_str(), &param_str, &param_len));

            TRITONSERVER_Parameter* param = nullptr;
            if (m == "config") {
              param = TRITONSERVER_ParameterNew(
                  m.c_str(), TRITONSERVER_PARAMETER_STRING, param_str);
            } else if (m.rfind("file:", 0) == 0) {
              size_t decoded_size;
              binary_files.emplace_back(std::vector<char>());
              RETURN_AND_RESPOND_IF_ERR(
                  req, DecodeBase64(
                           param_str, param_len, binary_files.back(),
                           decoded_size, m));
              param = TRITONSERVER_ParameterBytesNew(
                  m.c_str(), binary_files.back().data(), decoded_size);
            }

            if (param != nullptr) {
              params->emplace_back(param);
              const_params.emplace_back(param);
            } else {
              RETURN_AND_RESPOND_IF_ERR(
                  req, TRITONSERVER_ErrorNew(
                           TRITONSERVER_ERROR_INTERNAL,
                           "unexpected error on creating Triton parameter"));
            }
          }
        }
      }
      RETURN_AND_RESPOND_IF_ERR(
          req, TRITONSERVER_ServerLoadModelWithParameters(
                   server_.get(), model_name.c_str(), const_params.data(),
                   const_params.size()));
    } else if (action == "unload") {
      // Check if the dependent models should be removed
      bool unload_dependents = false;
      {
        triton::common::TritonJson::Value control_request;
        size_t buffer_len = 0;
        RETURN_AND_RESPOND_IF_ERR(
            req, EVRequestToJson(
                     req, "unload model", &control_request, &buffer_len));

        if (buffer_len > 0) {
          triton::common::TritonJson::Value params_json;
          if (control_request.Find("parameters", &params_json)) {
            triton::common::TritonJson::Value ud_json;
            if (params_json.Find("unload_dependents", &ud_json)) {
              auto parse_err = ud_json.AsBool(&unload_dependents);
              if (parse_err != nullptr) {
                err = TRITONSERVER_ErrorNew(
                    TRITONSERVER_ErrorCode(parse_err),
                    (std::string("Unable to parse 'unload_dependents': ") +
                     TRITONSERVER_ErrorMessage(parse_err))
                        .c_str());
                TRITONSERVER_ErrorDelete(parse_err);
              }
            }
          }
        }
      }
      if (unload_dependents) {
        err = TRITONSERVER_ServerUnloadModelAndDependents(
            server_.get(), model_name.c_str());
      } else {
        err = TRITONSERVER_ServerUnloadModel(server_.get(), model_name.c_str());
      }
    }
  }

  RETURN_AND_RESPOND_IF_ERR(req, err);
  evhtp_send_reply(req, EVHTP_RES_OK);
}

void
HTTPAPIServer::HandleModelReady(
    evhtp_request_t* req, const std::string& model_name,
    const std::string& model_version_str)
{
  RETURN_AND_RESPOND_IF_RESTRICTED(
      req, RestrictedCategory::HEALTH, restricted_apis_);

  if (req->method != htp_method_GET) {
    RETURN_AND_RESPOND_WITH_ERR(
        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
  }

  if (model_name.empty()) {
    RETURN_AND_RESPOND_WITH_ERR(
        req, EVHTP_RES_BADREQ, "Missing model name in ModelReady request");
  }

  bool ready = false;

  int64_t requested_model_version;
  auto err =
      GetModelVersionFromString(model_version_str, &requested_model_version);
  if (err == nullptr) {
    err = TRITONSERVER_ServerModelIsReady(
        server_.get(), model_name.c_str(), requested_model_version, &ready);
  }

  if (!ready && !err) {
    RETURN_AND_RESPOND_WITH_ERR(
        req, EVHTP_RES_BADREQ, "Model version not ready");
  }

  RETURN_AND_RESPOND_IF_ERR(req, err);
  evhtp_send_reply(req, EVHTP_RES_OK);
}

void
HTTPAPIServer::HandleModelMetadata(
    evhtp_request_t* req, const std::string& model_name,
    const std::string& model_version_str)
{
  RETURN_AND_RESPOND_IF_RESTRICTED(
      req, RestrictedCategory::METADATA, restricted_apis_);

  AddContentTypeHeader(req, "application/json");

  if (req->method != htp_method_GET) {
    RETURN_AND_RESPOND_WITH_ERR(
        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
  }

  if (model_name.empty()) {
    RETURN_AND_RESPOND_WITH_ERR(
        req, EVHTP_RES_BADREQ, "Missing model name in ModelMetadata request");
  }

  TRITONSERVER_Message* message = nullptr;

  int64_t requested_model_version;
  auto err =
      GetModelVersionFromString(model_version_str, &requested_model_version);
  if (err == nullptr) {
    err = TRITONSERVER_ServerModelMetadata(
        server_.get(), model_name.c_str(), requested_model_version, &message);
    if (err == nullptr) {
      const char* buffer;
      size_t byte_size;
      err = TRITONSERVER_MessageSerializeToJson(message, &buffer, &byte_size);
      if (err == nullptr) {
        evbuffer_add(req->buffer_out, buffer, byte_size);
        evhtp_send_reply(req, EVHTP_RES_OK);
      }
      TRITONSERVER_MessageDelete(message);
    }
  }

  RETURN_AND_RESPOND_IF_ERR(req, err);
}

TRITONSERVER_Error*
HTTPAPIServer::GetModelConfig(
    const std::string& model_name, int64_t requested_model_version,
    std::string* config_json)
{
  if (model_name.empty()) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INVALID_ARG,
        "Missing model name in ModelConfig request");
  }

  TRITONSERVER_Message* message = nullptr;
  RETURN_IF_ERR(TRITONSERVER_ServerModelConfig(
      server_.get(), model_name.c_str(), requested_model_version,
      1 /* config_version */, &message));
  const char* buffer;
  size_t byte_size;
  TRITONSERVER_Error* err = nullptr;
  err = TRITONSERVER_MessageSerializeToJson(message, &buffer, &byte_size);
  if (err == nullptr) {
    // Copy config into string for simplicity
    *config_json = std::string(buffer, byte_size);
  }
  if (message) {
    TRITONSERVER_MessageDelete(message);
  }

  return err;
}

void
HTTPAPIServer::HandleModelConfig(
    evhtp_request_t* req, const std::string& model_name,
    const std::string& model_version_str)
{
  RETURN_AND_RESPOND_IF_RESTRICTED(
      req, RestrictedCategory::MODEL_CONFIG, restricted_apis_);

  AddContentTypeHeader(req, "application/json");
  if (req->method != htp_method_GET) {
    RETURN_AND_RESPOND_WITH_ERR(
        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
  }

  int64_t requested_model_version;
  RETURN_AND_RESPOND_IF_ERR(
      req,
      GetModelVersionFromString(model_version_str, &requested_model_version));

  std::string config_json_str = "";
  RETURN_AND_RESPOND_IF_ERR(
      req,
      GetModelConfig(model_name, requested_model_version, &config_json_str));
  evbuffer_add(
      req->buffer_out, config_json_str.c_str(), config_json_str.size());
  evhtp_send_reply(req, EVHTP_RES_OK);
}

void
HTTPAPIServer::HandleModelStats(
    evhtp_request_t* req, const std::string& model_name,
    const std::string& model_version_str)
{
  RETURN_AND_RESPOND_IF_RESTRICTED(
      req, RestrictedCategory::STATISTICS, restricted_apis_);

  AddContentTypeHeader(req, "application/json");
  if (req->method != htp_method_GET) {
    RETURN_AND_RESPOND_WITH_ERR(
        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
  }

#ifdef TRITON_ENABLE_STATS
  TRITONSERVER_Message* model_stats_message = nullptr;

  int64_t requested_model_version;
  auto err =
      GetModelVersionFromString(model_version_str, &requested_model_version);
  if (err == nullptr) {
    err = TRITONSERVER_ServerModelStatistics(
        server_.get(), model_name.c_str(), requested_model_version,
        &model_stats_message);
    if (err == nullptr) {
      const char* buffer;
      size_t byte_size;
      err = TRITONSERVER_MessageSerializeToJson(
          model_stats_message, &buffer, &byte_size);
      if (err == nullptr) {
        // Add the statistics to the response
        evbuffer_add(req->buffer_out, buffer, byte_size);
        evhtp_send_reply(req, EVHTP_RES_OK);
      }
      TRITONSERVER_MessageDelete(model_stats_message);
    }
  }

#else
  auto err = TRITONSERVER_ErrorNew(
      TRITONSERVER_ERROR_UNAVAILABLE,
      "the server does not support model statistics");
#endif

  RETURN_AND_RESPOND_IF_ERR(req, err);
}

void
HTTPAPIServer::HandleTrace(evhtp_request_t* req, const std::string& model_name)
{
  RETURN_AND_RESPOND_IF_RESTRICTED(
      req, RestrictedCategory::TRACE, restricted_apis_);

  AddContentTypeHeader(req, "application/json");
  if ((req->method != htp_method_GET) && (req->method != htp_method_POST)) {
    RETURN_AND_RESPOND_WITH_ERR(
        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
    return;
  }

#ifdef TRITON_ENABLE_TRACING
  if (trace_manager_ == nullptr) {
    return;
  }

  TRITONSERVER_InferenceTraceLevel level = TRITONSERVER_TRACE_LEVEL_DISABLED;
  uint32_t rate;
  int32_t count;
  uint32_t log_frequency;
  std::string filepath;
  InferenceTraceMode trace_mode;
  TraceConfigMap config_map;

  if (!model_name.empty()) {
    bool ready = false;
    RETURN_AND_RESPOND_IF_ERR(
        req,
        TRITONSERVER_ServerModelIsReady(
            server_.get(), model_name.c_str(), -1 /* model version */, &ready));
    if (!ready) {
      RETURN_AND_RESPOND_IF_ERR(
          req, TRITONSERVER_ErrorNew(
                   TRITONSERVER_ERROR_INVALID_ARG,
                   ("Request for unknown model : " + model_name).c_str()));
    }
  }

  // Perform trace setting update if requested
  if (req->method == htp_method_POST) {
    triton::common::TritonJson::Value trace_request;
    RETURN_AND_RESPOND_IF_ERR(
        req, EVRequestToJsonAllowsEmpty(req, "trace", &trace_request));

    TraceManager::NewSetting new_setting;

    triton::common::TritonJson::Value setting_json;
    if (trace_request.Find("trace_file", &setting_json)) {
      RETURN_AND_RESPOND_IF_ERR(
          req, TRITONSERVER_ErrorNew(
                   TRITONSERVER_ERROR_UNSUPPORTED,
                   "trace file location can not be updated through network "
                   "protocol"));
    }
    if (trace_request.Find("trace_level", &setting_json)) {
      if (setting_json.IsNull()) {
        new_setting.clear_level_ = true;
      } else {
        triton::common::TritonJson::Value level_array;
        RETURN_AND_RESPOND_IF_ERR(
            req, trace_request.MemberAsArray("trace_level", &level_array));
        for (size_t i = 0; i < level_array.ArraySize(); ++i) {
          std::string level_str;
          RETURN_AND_RESPOND_IF_ERR(
              req, level_array.IndexAsString(i, &level_str));
          if (level_str == "OFF") {
            if (level_array.ArraySize() == 1) {
              level = TRITONSERVER_TRACE_LEVEL_DISABLED;
              new_setting.level_ = &level;
            } else {
              RETURN_AND_RESPOND_IF_ERR(
                  req, TRITONSERVER_ErrorNew(
                           TRITONSERVER_ERROR_INVALID_ARG,
                           "Expect only one trace level 'OFF' is specified"));
            }
          } else if (level_str == "TIMESTAMPS") {
            level = static_cast<TRITONSERVER_InferenceTraceLevel>(
                level | TRITONSERVER_TRACE_LEVEL_TIMESTAMPS);
            new_setting.level_ = &level;
          } else if (level_str == "TENSORS") {
            level = static_cast<TRITONSERVER_InferenceTraceLevel>(
                level | TRITONSERVER_TRACE_LEVEL_TENSORS);
            new_setting.level_ = &level;
          }
        }
      }
    }
    if (trace_request.Find("trace_rate", &setting_json)) {
      if (setting_json.IsNull()) {
        new_setting.clear_rate_ = true;
      } else {
        std::string rate_str;
        RETURN_AND_RESPOND_IF_ERR(req, setting_json.AsString(&rate_str));
        try {
          rate = std::stoi(rate_str);
          new_setting.rate_ = &rate;
        }
        catch (const std::invalid_argument& ia) {
          RETURN_AND_RESPOND_IF_ERR(
              req, TRITONSERVER_ErrorNew(
                       TRITONSERVER_ERROR_INVALID_ARG,
                       (std::string("Unable to parse 'trace_rate', got: ") +
                        rate_str)
                           .c_str()));
        }
        catch (const std::out_of_range& oor) {
          RETURN_AND_RESPOND_IF_ERR(
              req,
              TRITONSERVER_ErrorNew(
                  TRITONSERVER_ERROR_INVALID_ARG,
                  (std::string("Unable to parse 'trace_rate', value is out of "
                               "range [ ") +
                   std::to_string(std::numeric_limits<std::uint32_t>::min()) +
                   ", " +
                   std::to_string(std::numeric_limits<std::uint32_t>::max()) +
                   " ], got: " + rate_str)
                      .c_str()));
        }
      }
    }
    if (trace_request.Find("trace_count", &setting_json)) {
      if (setting_json.IsNull()) {
        new_setting.clear_count_ = true;
      } else {
        std::string count_str;
        RETURN_AND_RESPOND_IF_ERR(req, setting_json.AsString(&count_str));
        try {
          count = std::stoi(count_str);
          if (count < TraceManager::MIN_TRACE_COUNT_VALUE) {
            RETURN_AND_RESPOND_IF_ERR(
                req, TRITONSERVER_ErrorNew(
                         TRITONSERVER_ERROR_INVALID_ARG,
                         (std::string("Unable to parse 'trace_count'.") +
                          " Expecting value >= " +
                          std::to_string(TraceManager::MIN_TRACE_COUNT_VALUE) +
                          ", got:" + count_str)
                             .c_str()));
          }
          new_setting.count_ = &count;
        }
        catch (const std::invalid_argument& ia) {
          RETURN_AND_RESPOND_IF_ERR(
              req, TRITONSERVER_ErrorNew(
                       TRITONSERVER_ERROR_INVALID_ARG,
                       (std::string("Unable to parse 'trace_count', got: ") +
                        count_str)
                           .c_str()));
        }
        catch (const std::out_of_range& oor) {
          RETURN_AND_RESPOND_IF_ERR(
              req,
              TRITONSERVER_ErrorNew(
                  TRITONSERVER_ERROR_INVALID_ARG,
                  (std::string("Unable to parse 'trace_count', value is out of "
                               "range [ ") +
                   std::to_string(TraceManager::MIN_TRACE_COUNT_VALUE) + ", " +
                   std::to_string(std::numeric_limits<std::int32_t>::max()) +
                   " ], got: " + count_str)
                      .c_str()));
        }
      }
    }
    if (trace_request.Find("log_frequency", &setting_json)) {
      if (setting_json.IsNull()) {
        new_setting.clear_log_frequency_ = true;
      } else {
        std::string frequency_str;
        RETURN_AND_RESPOND_IF_ERR(req, setting_json.AsString(&frequency_str));
        try {
          log_frequency = std::stoi(frequency_str);
          new_setting.log_frequency_ = &log_frequency;
        }
        catch (const std::invalid_argument& ia) {
          RETURN_AND_RESPOND_IF_ERR(
              req, TRITONSERVER_ErrorNew(
                       TRITONSERVER_ERROR_INVALID_ARG,
                       (std::string("Unable to parse 'log_frequency', got: ") +
                        frequency_str)
                           .c_str()));
        }
        catch (const std::out_of_range& oor) {
          RETURN_AND_RESPOND_IF_ERR(
              req,
              TRITONSERVER_ErrorNew(
                  TRITONSERVER_ERROR_INVALID_ARG,
                  (std::string(
                       "Unable to parse 'log_frequency', value is out of "
                       "range [ ") +
                   std::to_string(std::numeric_limits<std::uint32_t>::min()) +
                   ", " +
                   std::to_string(std::numeric_limits<std::uint32_t>::max()) +
                   " ], got: " + frequency_str)
                      .c_str()));
        }
      }
    }
    RETURN_AND_RESPOND_IF_ERR(
        req, trace_manager_->UpdateTraceSetting(model_name, new_setting));
  }

  // Get current trace setting, this is needed even if the setting
  // has been updated above as some values may not be provided in the request.
  trace_manager_->GetTraceSetting(
      model_name, &level, &rate, &count, &log_frequency, &filepath, &trace_mode,
      &config_map);
  triton::common::TritonJson::Value trace_response(
      triton::common::TritonJson::ValueType::OBJECT);
  // level
  {
    triton::common::TritonJson::Value level_array(
        triton::common::TritonJson::ValueType::ARRAY);
    if (level == TRITONSERVER_TRACE_LEVEL_DISABLED) {
      RETURN_AND_RESPOND_IF_ERR(req, level_array.AppendString("OFF"));
    } else {
      if (level & TRITONSERVER_TRACE_LEVEL_TIMESTAMPS) {
        RETURN_AND_RESPOND_IF_ERR(req, level_array.AppendString("TIMESTAMPS"));
      }
      if (level & TRITONSERVER_TRACE_LEVEL_TENSORS) {
        RETURN_AND_RESPOND_IF_ERR(req, level_array.AppendString("TENSORS"));
      }
    }
    RETURN_AND_RESPOND_IF_ERR(
        req, trace_response.Add("trace_level", std::move(level_array)));
  }
  RETURN_AND_RESPOND_IF_ERR(
      req, trace_response.AddString("trace_rate", std::to_string(rate)));
  RETURN_AND_RESPOND_IF_ERR(
      req, trace_response.AddString("trace_count", std::to_string(count)));
  if (trace_mode == TRACE_MODE_TRITON) {
    RETURN_AND_RESPOND_IF_ERR(
        req, trace_response.AddString(
                 "log_frequency", std::to_string(log_frequency)));
    RETURN_AND_RESPOND_IF_ERR(
        req, trace_response.AddString("trace_file", filepath));
  }
  RETURN_AND_RESPOND_IF_ERR(
      req,
      trace_response.AddString(
          "trace_mode", trace_manager_->InferenceTraceModeString(trace_mode)));
  auto mode_key = std::to_string(trace_mode);
  auto trace_options_it = config_map.find(mode_key);
  if (trace_options_it != config_map.end()) {
    for (const auto& [key, value] : trace_options_it->second) {
      if ((key == "file") || (key == "log-frequency")) {
        continue;
      }
      std::string valueAsString;
      if (std::holds_alternative<std::string>(value)) {
        valueAsString = std::get<std::string>(value);
      } else if (std::holds_alternative<int>(value)) {
        valueAsString = std::to_string(std::get<int>(value));
      } else if (std::holds_alternative<uint32_t>(value)) {
        valueAsString = std::to_string(std::get<uint32_t>(value));
      }
      RETURN_AND_RESPOND_IF_ERR(
          req, trace_response.AddString(key.c_str(), valueAsString));
    }
  }
  triton::common::TritonJson::WriteBuffer buffer;
  RETURN_AND_RESPOND_IF_ERR(req, trace_response.Write(&buffer));
  evbuffer_add(req->buffer_out, buffer.Base(), buffer.Size());
  evhtp_send_reply(req, EVHTP_RES_OK);
#else
  RETURN_AND_RESPOND_IF_ERR(
      req, TRITONSERVER_ErrorNew(
               TRITONSERVER_ERROR_UNAVAILABLE,
               "the server does not support tracing"));
#endif
}

void
HTTPAPIServer::HandleLogging(evhtp_request_t* req)
{
  RETURN_AND_RESPOND_IF_RESTRICTED(
      req, RestrictedCategory::LOGGING, restricted_apis_);

  AddContentTypeHeader(req, "application/json");
  if ((req->method != htp_method_GET) && (req->method != htp_method_POST)) {
    RETURN_AND_RESPOND_WITH_ERR(
        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
  }

#ifdef TRITON_ENABLE_LOGGING
  // Perform log setting update if requested
  if (req->method == htp_method_POST) {
    triton::common::TritonJson::Value log_request;
    RETURN_AND_RESPOND_IF_ERR(
        req, EVRequestToJsonAllowsEmpty(req, "dynamic logging", &log_request));
    // Server and Core repos do not have the same Logger object
    // Each update must be applied to both server and core repo versions
    triton::common::TritonJson::Value setting_json;
    if (log_request.Find("log_file", &setting_json)) {
      if (!setting_json.IsNull()) {
        RETURN_AND_RESPOND_IF_ERR(
            req, TRITONSERVER_ErrorNew(
                     TRITONSERVER_ERROR_UNSUPPORTED,
                     "log file location can not be updated through network "
                     "protocol"));
      }
    }
    if (log_request.Find("log_info", &setting_json)) {
      if (!setting_json.IsNull()) {
        bool log_info_status;
        RETURN_AND_RESPOND_IF_ERR(req, setting_json.AsBool(&log_info_status));
        LOG_ENABLE_INFO(log_info_status);
        TRITONSERVER_ServerOptionsSetLogInfo(nullptr, log_info_status);
      }
    }
    if (log_request.Find("log_warning", &setting_json)) {
      if (!setting_json.IsNull()) {
        bool log_warn_status;
        RETURN_AND_RESPOND_IF_ERR(req, setting_json.AsBool(&log_warn_status));
        LOG_ENABLE_WARNING(log_warn_status);
        TRITONSERVER_ServerOptionsSetLogWarn(nullptr, log_warn_status);
      }
    }
    if (log_request.Find("log_error", &setting_json)) {
      if (!setting_json.IsNull()) {
        bool log_error_status;
        RETURN_AND_RESPOND_IF_ERR(req, setting_json.AsBool(&log_error_status));
        LOG_ENABLE_ERROR(log_error_status);
        TRITONSERVER_ServerOptionsSetLogError(nullptr, log_error_status);
      }
    }
    if (log_request.Find("log_verbose_level", &setting_json)) {
      if (!setting_json.IsNull()) {
        uint64_t verbose_level;
        RETURN_AND_RESPOND_IF_ERR(req, setting_json.AsUInt(&verbose_level));
        LOG_SET_VERBOSE(static_cast<int32_t>(verbose_level));
        TRITONSERVER_ServerOptionsSetLogVerbose(
            nullptr, static_cast<int32_t>(verbose_level));
      }
    }
    if (log_request.Find("log_format", &setting_json)) {
      if (!setting_json.IsNull()) {
        std::string log_format_parse;
        RETURN_AND_RESPOND_IF_ERR(
            req, setting_json.AsString(&log_format_parse));
        triton::common::Logger::Format log_format_final =
            triton::common::Logger::Format::kDEFAULT;
        if (log_format_parse == "ISO8601") {
          log_format_final = triton::common::Logger::Format::kISO8601;
        } else if (log_format_parse != "default") {
          // Returns from function
          RETURN_AND_RESPOND_IF_ERR(
              req, TRITONSERVER_ErrorNew(
                       TRITONSERVER_ERROR_UNAVAILABLE,
                       ("invalid argument for --log_format, got: " +
                        log_format_parse)
                           .c_str()));
        }
        LOG_SET_FORMAT(log_format_final);
        switch (log_format_final) {
          case triton::common::Logger::Format::kDEFAULT:
            TRITONSERVER_ServerOptionsSetLogFormat(
                nullptr, TRITONSERVER_LOG_DEFAULT);
            break;
          case triton::common::Logger::Format::kISO8601:
            TRITONSERVER_ServerOptionsSetLogFormat(
                nullptr, TRITONSERVER_LOG_ISO8601);
            break;
        }
      }
    }
  }
  triton::common::TritonJson::Value log_setting_response(
      triton::common::TritonJson::ValueType::OBJECT);
  RETURN_AND_RESPOND_IF_ERR(
      req, log_setting_response.AddString("log_file", LOG_FILE));
  RETURN_AND_RESPOND_IF_ERR(
      req, log_setting_response.AddBool("log_info", LOG_INFO_IS_ON));
  RETURN_AND_RESPOND_IF_ERR(
      req, log_setting_response.AddBool("log_warning", LOG_WARNING_IS_ON));
  RETURN_AND_RESPOND_IF_ERR(
      req, log_setting_response.AddBool("log_error", LOG_ERROR_IS_ON));
  RETURN_AND_RESPOND_IF_ERR(
      req, log_setting_response.AddInt(
               "log_verbose_level", static_cast<uint64_t>(LOG_VERBOSE_LEVEL)));
  RETURN_AND_RESPOND_IF_ERR(
      req, log_setting_response.AddString("log_format", LOG_FORMAT_STRING));
  triton::common::TritonJson::WriteBuffer buffer;
  RETURN_AND_RESPOND_IF_ERR(req, log_setting_response.Write(&buffer));
  evbuffer_add(req->buffer_out, buffer.Base(), buffer.Size());
  evhtp_send_reply(req, EVHTP_RES_OK);
#else
  RETURN_AND_RESPOND_IF_ERR(
      req, TRITONSERVER_ErrorNew(
               TRITONSERVER_ERROR_UNAVAILABLE,
               "the server does not support dynamic logging"));
#endif  // TRITON_ENABLE_LOGGING
}

void
HTTPAPIServer::HandleServerMetadata(evhtp_request_t* req)
{
  RETURN_AND_RESPOND_IF_RESTRICTED(
      req, RestrictedCategory::METADATA, restricted_apis_);

  AddContentTypeHeader(req, "application/json");
  if (req->method != htp_method_GET) {
    RETURN_AND_RESPOND_WITH_ERR(
        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
  }

  if (server_metadata_err_ == nullptr) {
    evbuffer_add(
        req->buffer_out, server_metadata_.c_str(), server_metadata_.size());
    evhtp_send_reply(req, EVHTP_RES_OK);
  } else {
    // Not using RETURN_AND_RESPOND_IF_ERR macro as the Triton error can
    // be persistent, the macro will clean up the error object.
    EVBufferAddErrorJson(req->buffer_out, server_metadata_err_);
    evhtp_send_reply(req, HttpCodeFromError(server_metadata_err_));
  }
}

void
HTTPAPIServer::HandleSystemSharedMemory(
    evhtp_request_t* req, const std::string& region_name,
    const std::string& action)
{
  RETURN_AND_RESPOND_IF_RESTRICTED(
      req, RestrictedCategory::SHARED_MEMORY, restricted_apis_);

  AddContentTypeHeader(req, "application/json");
  if ((action == "status") && (req->method != htp_method_GET)) {
    RETURN_AND_RESPOND_WITH_ERR(
        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
  } else if ((action != "status") && (req->method != htp_method_POST)) {
    RETURN_AND_RESPOND_WITH_ERR(
        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
  }

  TRITONSERVER_Error* err = nullptr;
  if (action == "status") {
    triton::common::TritonJson::Value shm_status(
        triton::common::TritonJson::ValueType::ARRAY);
    err = shm_manager_->GetStatus(
        region_name, TRITONSERVER_MEMORY_CPU, &shm_status);
    if (err == nullptr) {
      triton::common::TritonJson::WriteBuffer buffer;
      err = shm_status.Write(&buffer);
      if (err == nullptr) {
        evbuffer_add(req->buffer_out, buffer.Base(), buffer.Size());
      }
    }
  } else if (action == "register") {
    if (region_name.empty()) {
      err = TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          "'region name' is necessary to register system shared memory region");
    } else {
      triton::common::TritonJson::Value register_request;
      triton::common::TritonJson::Value key_json;
      RETURN_AND_RESPOND_IF_ERR(
          req, EVRequestToJsonAllowsEmpty(req, action, &register_request));
      if (!register_request.Find("key", &key_json)) {
        err = TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            "Shared memory register request has no 'key' field");
      }

      const char* shm_key = nullptr;
      if (err == nullptr) {
        size_t shm_key_len;
        err = key_json.AsString(&shm_key, &shm_key_len);
      }

      uint64_t offset = 0;
      if (err == nullptr) {
        triton::common::TritonJson::Value offset_json;
        if (register_request.Find("offset", &offset_json)) {
          err = offset_json.AsUInt(&offset);
        }
      }

      uint64_t byte_size = 0;
      if (err == nullptr) {
        triton::common::TritonJson::Value byte_size_json;
        if (!register_request.Find("byte_size", &byte_size_json)) {
          err = TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_INVALID_ARG,
              "Shared memory register request has no 'byte_size' field");
        } else {
          err = byte_size_json.AsUInt(&byte_size);
        }
      }

      if (err == nullptr) {
        err = shm_manager_->RegisterSystemSharedMemory(
            region_name, shm_key, offset, byte_size);
      }
    }
  } else if (action == "unregister") {
    if (region_name.empty()) {
      err = shm_manager_->UnregisterAll(TRITONSERVER_MEMORY_CPU);
    } else {
      err = shm_manager_->Unregister(region_name, TRITONSERVER_MEMORY_CPU);
    }
  }

  RETURN_AND_RESPOND_IF_ERR(req, err);
  evhtp_send_reply(req, EVHTP_RES_OK);
}

void
HTTPAPIServer::HandleCudaSharedMemory(
    evhtp_request_t* req, const std::string& region_name,
    const std::string& action)
{
  RETURN_AND_RESPOND_IF_RESTRICTED(
      req, RestrictedCategory::SHARED_MEMORY, restricted_apis_);

  AddContentTypeHeader(req, "application/json");
  if ((action == "status") && (req->method != htp_method_GET)) {
    RETURN_AND_RESPOND_WITH_ERR(
        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
  } else if ((action != "status") && (req->method != htp_method_POST)) {
    RETURN_AND_RESPOND_WITH_ERR(
        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
  }

  TRITONSERVER_Error* err = nullptr;
  if (action == "status") {
    triton::common::TritonJson::Value shm_status(
        triton::common::TritonJson::ValueType::ARRAY);
    err = shm_manager_->GetStatus(
        region_name, TRITONSERVER_MEMORY_GPU, &shm_status);
    if (err == nullptr) {
      triton::common::TritonJson::WriteBuffer buffer;
      err = shm_status.Write(&buffer);
      if (err == nullptr) {
        evbuffer_add(req->buffer_out, buffer.Base(), buffer.Size());
      }
    }
  } else if (action == "register") {
    if (region_name.empty()) {
      err = TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          "'region name' is necessary to register cuda shared memory region");
    } else {
#ifdef TRITON_ENABLE_GPU
      triton::common::TritonJson::Value register_request;
      RETURN_AND_RESPOND_IF_ERR(
          req, EVRequestToJsonAllowsEmpty(req, action, &register_request));
      const char* b64_handle = nullptr;
      size_t b64_handle_len = 0;
      triton::common::TritonJson::Value raw_handle_json;
      if (!register_request.Find("raw_handle", &raw_handle_json)) {
        err = TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            "Shared memory register request has no 'raw_handle' field");
      } else {
        err =
            raw_handle_json.MemberAsString("b64", &b64_handle, &b64_handle_len);
      }

      uint64_t byte_size = 0;
      if (err == nullptr) {
        err = register_request.MemberAsUInt("byte_size", &byte_size);
      }

      uint64_t device_id = 0;
      if (err == nullptr) {
        err = register_request.MemberAsUInt("device_id", &device_id);
      }

      if (err == nullptr) {
        size_t decoded_size;
        std::vector<char> raw_handle;
        RETURN_AND_RESPOND_IF_ERR(
            req, DecodeBase64(
                     b64_handle, b64_handle_len, raw_handle, decoded_size,
                     "raw_handle"));

        if (decoded_size != sizeof(cudaIpcMemHandle_t)) {
          err = TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_INVALID_ARG,
              "'raw_handle' must be a valid base64 encoded "
              "cudaIpcMemHandle_t");
        } else {
          raw_handle.resize(sizeof(cudaIpcMemHandle_t));
          err = shm_manager_->RegisterCUDASharedMemory(
              region_name.c_str(),
              reinterpret_cast<const cudaIpcMemHandle_t*>(raw_handle.data()),
              byte_size, device_id);
        }
      }
#else
      err = TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          std::string(
              "failed to register CUDA shared memory region: '" + region_name +
              "', GPUs not supported")
              .c_str());
#endif  // TRITON_ENABLE_GPU
    }
  } else if (action == "unregister") {
    if (region_name.empty()) {
      err = shm_manager_->UnregisterAll(TRITONSERVER_MEMORY_GPU);
    } else {
      err = shm_manager_->Unregister(region_name, TRITONSERVER_MEMORY_GPU);
    }
  }

  RETURN_AND_RESPOND_IF_ERR(req, err);
  evhtp_send_reply(req, EVHTP_RES_OK);
}

TRITONSERVER_Error*
HTTPAPIServer::GetContentLength(
    evhtp_request_t* req, evbuffer* decompressed_buffer,
    int32_t* content_length)
{
  TRITONSERVER_Error* err = nullptr;

  // Set to body size in case there is no Content-Length to compare with
  int32_t lcontent_length = evbuffer_get_length(req->buffer_in);
  if (decompressed_buffer == nullptr) {
    const char* content_length_c_str =
        evhtp_kv_find(req->headers_in, kContentLengthHeader);
    if (content_length_c_str != nullptr) {
      try {
        lcontent_length = std::atoi(content_length_c_str);
      }
      catch (const std::invalid_argument& ia) {
        err = TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            (std::string("Unable to parse ") + kContentLengthHeader +
             ", got: " + content_length_c_str)
                .c_str());
      }
      catch (const std::out_of_range& oor) {
        err = TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            (std::string("Unable to parse ") + kContentLengthHeader +
             ", value is out of range [ " +
             std::to_string(std::numeric_limits<std::int32_t>::min()) + ", " +
             std::to_string(std::numeric_limits<std::int32_t>::max()) +
             " ], got: " + content_length_c_str)
                .c_str());
      }
    }
  } else {
    // The Content-Length doesn't reflect the actual request body size
    // if compression is used, set 'content_length' to the decompressed size
    lcontent_length = evbuffer_get_length(decompressed_buffer);
  }

  *content_length = lcontent_length;
  return err;
}


TRITONSERVER_Error*
HTTPAPIServer::GetInferenceHeaderLength(
    evhtp_request_t* req, int32_t content_length, size_t* header_length)
{
  // Set to content length in case that the header is not specified
  *header_length = content_length;

  // Find Inference-Header-Content-Length in header.
  const char* header_length_c_str =
      evhtp_kv_find(req->headers_in, kInferHeaderContentLengthHTTPHeader);
  if (header_length_c_str != NULL) {
    int parsed_value;
    try {
      parsed_value = std::atoi(header_length_c_str);
    }
    catch (const std::invalid_argument& ia) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG, (std::string("Unable to parse ") +
                                           kInferHeaderContentLengthHTTPHeader +
                                           ", got: " + header_length_c_str)
                                              .c_str());
    }

    // Check if the content length is in proper range
    if ((parsed_value < 0) || (parsed_value > content_length)) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          (std::string("inference header size should be in range (0, ") +
           std::to_string(content_length) + "), got: " + header_length_c_str)
              .c_str());
    }
    *header_length = parsed_value;
  }
  return nullptr;
}

DataCompressor::Type
HTTPAPIServer::GetRequestCompressionType(evhtp_request_t* req)
{
  const char* content_encoding_c_str =
      evhtp_kv_find(req->headers_in, kContentEncodingHTTPHeader);
  if (content_encoding_c_str != NULL) {
    std::string content_encoding(content_encoding_c_str);
    if (content_encoding == "deflate") {
      return DataCompressor::Type::DEFLATE;
    } else if (content_encoding == "gzip") {
      return DataCompressor::Type::GZIP;
    } else if (!content_encoding.empty() && (content_encoding != "identity")) {
      return DataCompressor::Type::UNKNOWN;
    }
  }
  return DataCompressor::Type::IDENTITY;
}

DataCompressor::Type
HTTPAPIServer::GetResponseCompressionType(evhtp_request_t* req)
{
  // Find Accept-Encoding in header. Try to compress if found
  const char* accept_encoding_c_str =
      evhtp_kv_find(req->headers_in, kAcceptEncodingHTTPHeader);
  if (accept_encoding_c_str != NULL) {
    std::string accept_encoding = CompressionTypeUsed(accept_encoding_c_str);
    if (accept_encoding == "deflate") {
      return DataCompressor::Type::DEFLATE;
    } else if (accept_encoding == "gzip") {
      return DataCompressor::Type::GZIP;
    }
  }
  return DataCompressor::Type::IDENTITY;
}

// Helpers for parsing JSON requests for Triton-specific fields
TRITONSERVER_Error*
HTTPAPIServer::ParseJsonTritonIO(
    triton::common::TritonJson::Value& request_json,
    TRITONSERVER_InferenceRequest* irequest, InferRequestClass* infer_req,
    const std::string& model_name, evbuffer_iovec* v, int* v_idx_ptr,
    size_t header_length, int n)
{
  // Get the byte-size for each input and from that get the blocks
  // holding the data for that input
  triton::common::TritonJson::Value inputs_json;
  RETURN_MSG_IF_ERR(
      request_json.MemberAsArray("inputs", &inputs_json),
      "Unable to parse 'inputs'");

  int& v_idx = *v_idx_ptr;
  for (size_t i = 0; i < inputs_json.ArraySize(); i++) {
    triton::common::TritonJson::Value request_input;
    RETURN_IF_ERR(inputs_json.At(i, &request_input));
    RETURN_IF_ERR(ValidateInputContentType(request_input));

    const char* input_name;
    size_t input_name_len;
    RETURN_MSG_IF_ERR(
        request_input.MemberAsString("name", &input_name, &input_name_len),
        "Unable to parse 'name'");

    const char* datatype;
    size_t datatype_len;
    RETURN_MSG_IF_ERR(
        request_input.MemberAsString("datatype", &datatype, &datatype_len),
        "Unable to parse 'datatype'");
    const TRITONSERVER_DataType dtype = TRITONSERVER_StringToDataType(datatype);

    triton::common::TritonJson::Value shape_json;
    RETURN_MSG_IF_ERR(
        request_input.MemberAsArray("shape", &shape_json),
        "Unable to parse 'shape'");
    std::vector<int64_t> shape_vec;
    for (size_t i = 0; i < shape_json.ArraySize(); i++) {
      uint64_t d = 0;
      RETURN_MSG_IF_ERR(
          shape_json.IndexAsUInt(i, &d), "Unable to parse 'shape'");
      shape_vec.push_back(d);
    }

    RETURN_IF_ERR(TRITONSERVER_InferenceRequestAddInput(
        irequest, input_name, dtype, &shape_vec[0], shape_vec.size()));

    bool binary_input;
    size_t byte_size;
    RETURN_IF_ERR(
        CheckBinaryInputData(request_input, &binary_input, &byte_size));

    if ((byte_size == 0) && binary_input) {
      RETURN_IF_ERR(TRITONSERVER_InferenceRequestAppendInputData(
          irequest, input_name, nullptr, 0 /* byte_size */,
          TRITONSERVER_MEMORY_CPU, 0 /* memory_type_id */));
    } else if (binary_input) {
      if (header_length == 0) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            "must specify valid 'Infer-Header-Content-Length' in request "
            "header and 'binary_data_size' when passing inputs in binary "
            "data format");
      }

      // Process one block at a time
      while ((byte_size > 0) && (v_idx < n)) {
        char* base = static_cast<char*>(v[v_idx].iov_base);
        size_t base_size;
        if (v[v_idx].iov_len > byte_size) {
          base_size = byte_size;
          v[v_idx].iov_base = static_cast<void*>(base + byte_size);
          v[v_idx].iov_len -= byte_size;
          byte_size = 0;
        } else {
          base_size = v[v_idx].iov_len;
          byte_size -= v[v_idx].iov_len;
          v_idx++;
        }

        RETURN_IF_ERR(TRITONSERVER_InferenceRequestAppendInputData(
            irequest, input_name, base, base_size, TRITONSERVER_MEMORY_CPU,
            0 /* memory_type_id */));
      }

      if (byte_size != 0) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            std::string(
                "unexpected size for input '" + std::string(input_name) +
                "', expecting " + std::to_string(byte_size) +
                " additional bytes for model '" + model_name + "'")
                .c_str());
      }
    } else {
      // Process input if in shared memory.
      bool use_shm;
      uint64_t shm_offset;
      const char* shm_region;
      RETURN_IF_ERR(CheckSharedMemoryData(
          request_input, &use_shm, &shm_region, &shm_offset,
          reinterpret_cast<uint64_t*>(&byte_size)));
      if (use_shm) {
        void* base;
        TRITONSERVER_MemoryType memory_type;
        int64_t memory_type_id;
        std::shared_ptr<const SharedMemoryManager::SharedMemoryInfo> shm_info =
            nullptr;
        RETURN_IF_ERR(shm_manager_->GetMemoryInfo(
            shm_region, shm_offset, byte_size, &base, &memory_type,
            &memory_type_id, &shm_info));
        infer_req->AddShmRegionInfo(shm_info);

        if (memory_type == TRITONSERVER_MEMORY_GPU) {
#ifdef TRITON_ENABLE_GPU
          cudaIpcMemHandle_t* cuda_handle;
          RETURN_IF_ERR(shm_manager_->GetCUDAHandle(shm_region, &cuda_handle));
          TRITONSERVER_BufferAttributes* buffer_attributes;
          RETURN_IF_ERR(TRITONSERVER_BufferAttributesNew(&buffer_attributes));
          auto buffer_attributes_del =
              [](TRITONSERVER_BufferAttributes* buffer_attributes) {
                TRITONSERVER_BufferAttributesDelete(buffer_attributes);
              };

          std::unique_ptr<
              TRITONSERVER_BufferAttributes, decltype(buffer_attributes_del)>
              buffer_attrsl(buffer_attributes, buffer_attributes_del);
          RETURN_IF_ERR(TRITONSERVER_BufferAttributesSetMemoryType(
              buffer_attributes, memory_type));
          RETURN_IF_ERR(TRITONSERVER_BufferAttributesSetMemoryTypeId(
              buffer_attributes, memory_type_id));
          RETURN_IF_ERR(TRITONSERVER_BufferAttributesSetCudaIpcHandle(
              buffer_attributes, reinterpret_cast<void*>(cuda_handle)));
          RETURN_IF_ERR(TRITONSERVER_BufferAttributesSetByteSize(
              buffer_attributes, byte_size));
          RETURN_IF_ERR(
              TRITONSERVER_InferenceRequestAppendInputDataWithBufferAttributes(
                  irequest, input_name, base, buffer_attributes));
#endif
        } else {
          RETURN_IF_ERR(TRITONSERVER_InferenceRequestAppendInputData(
              irequest, input_name, base, byte_size, memory_type,
              memory_type_id));
        }
      } else {
        const int64_t element_cnt = GetElementCount(shape_vec);

        if (element_cnt == 0) {
          RETURN_IF_ERR(TRITONSERVER_InferenceRequestAppendInputData(
              irequest, input_name, nullptr, 0 /* byte_size */,
              TRITONSERVER_MEMORY_CPU, 0 /* memory_type_id */));
        } else if (element_cnt == -2) {
          // -2 indicates invalid dimension
          return TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_INVALID_ARG,
              std::string(
                  "invalid shape for input '" + std::string(input_name) +
                  "': shape " + ShapeToString(shape_vec) +
                  " contains one or more invalid dimensions")
                  .c_str());
        } else if (element_cnt == -3) {
          // -3 indicates integer overflow
          return TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_INVALID_ARG,
              std::string(
                  "invalid shape for input '" + std::string(input_name) +
                  "': shape " + ShapeToString(shape_vec) +
                  " causes total element count to exceed maximum size of " +
                  std::to_string(INT64_MAX))
                  .c_str());
        } else {
          // JSON... presence of "data" already validated but still
          // checking here. Flow in this endpoint needs to be
          // reworked...
          triton::common::TritonJson::Value tensor_data;
          RETURN_MSG_IF_ERR(
              request_input.MemberAsArray("data", &tensor_data),
              "Unable to parse 'data'");

          if (dtype == TRITONSERVER_TYPE_BYTES) {
            RETURN_IF_ERR(JsonBytesArrayByteSize(tensor_data, &byte_size));
          } else {
            const uint32_t type_byte_size =
                TRITONSERVER_DataTypeByteSize(dtype);
            if ((type_byte_size > 1) &&
                (element_cnt > (INT64_MAX / type_byte_size))) {
              return TRITONSERVER_ErrorNew(
                  TRITONSERVER_ERROR_INVALID_ARG,
                  std::string(
                      "byte size overflow for input '" +
                      std::string(input_name) + "': element count (" +
                      std::to_string(element_cnt) + ") * data type size (" +
                      std::to_string(type_byte_size) +
                      ") exceeds maximum allowed size (" +
                      std::to_string(INT64_MAX) + ")")
                      .c_str());
            }
            byte_size = element_cnt * type_byte_size;
          }

          // Check if byte_size is larger than max_input_size_
          if (byte_size > max_input_size_) {
            return TRITONSERVER_ErrorNew(
                TRITONSERVER_ERROR_INVALID_ARG,
                ("JSON input '" + std::string(input_name) +
                 "' has a byte_size (" + std::to_string(byte_size) +
                 " bytes) that exceeds the maximum allowed value "
                 "of " +
                 std::to_string(max_input_size_) +
                 " bytes. Use --http-max-input-size to increase the limit.")
                    .c_str());
          }

          infer_req->serialized_data_.emplace_back();
          std::vector<char>& serialized = infer_req->serialized_data_.back();
          serialized.resize(byte_size);

          RETURN_IF_ERR(ReadDataFromJson(
              input_name, tensor_data, &serialized[0], dtype,
              dtype == TRITONSERVER_TYPE_BYTES ? byte_size : element_cnt));
          RETURN_IF_ERR(TRITONSERVER_InferenceRequestAppendInputData(
              irequest, input_name, &serialized[0], serialized.size(),
              TRITONSERVER_MEMORY_CPU, 0 /* memory_type_id */));
        }
      }
    }
  }

  if (v_idx != n) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INVALID_ARG,
        std::string(
            "unexpected additional input data for model '" + model_name + "'")
            .c_str());
  }

  // outputs is optional
  if (request_json.Find("outputs")) {
    triton::common::TritonJson::Value outputs_json;
    RETURN_MSG_IF_ERR(
        request_json.MemberAsArray("outputs", &outputs_json),
        "Unable to parse 'outputs'");
    for (size_t i = 0; i < outputs_json.ArraySize(); i++) {
      triton::common::TritonJson::Value request_output;
      RETURN_IF_ERR(outputs_json.At(i, &request_output));
      RETURN_IF_ERR(ValidateOutputParameter(request_output));

      const char* output_name;
      size_t output_name_len;
      RETURN_MSG_IF_ERR(
          request_output.MemberAsString("name", &output_name, &output_name_len),
          "Unable to parse 'name'");
      RETURN_IF_ERR(TRITONSERVER_InferenceRequestAddRequestedOutput(
          irequest, output_name));

      uint64_t class_size;
      RETURN_IF_ERR(CheckClassificationOutput(request_output, &class_size));

      bool use_shm;
      uint64_t offset, byte_size;
      const char* shm_region;
      RETURN_IF_ERR(CheckSharedMemoryData(
          request_output, &use_shm, &shm_region, &offset, &byte_size));

      // ValidateOutputParameter ensures that both shm and
      // classification cannot be true.
      if (use_shm) {
        void* base;
        TRITONSERVER_MemoryType memory_type;
        int64_t memory_type_id;
        std::shared_ptr<const SharedMemoryManager::SharedMemoryInfo> shm_info =
            nullptr;
        RETURN_IF_ERR(shm_manager_->GetMemoryInfo(
            shm_region, offset, byte_size, &base, &memory_type, &memory_type_id,
            &shm_info));
        infer_req->AddShmRegionInfo(shm_info);

        if (memory_type == TRITONSERVER_MEMORY_GPU) {
#ifdef TRITON_ENABLE_GPU
          cudaIpcMemHandle_t* cuda_handle;
          RETURN_IF_ERR(shm_manager_->GetCUDAHandle(shm_region, &cuda_handle));
          infer_req->alloc_payload_.output_map_.emplace(
              std::piecewise_construct, std::forward_as_tuple(output_name),
              std::forward_as_tuple(new AllocPayload::OutputInfo(
                  base, byte_size, memory_type, memory_type_id,
                  reinterpret_cast<char*>(cuda_handle))));
#endif
        } else {
          infer_req->alloc_payload_.output_map_.emplace(
              std::piecewise_construct, std::forward_as_tuple(output_name),
              std::forward_as_tuple(new AllocPayload::OutputInfo(
                  base, byte_size, memory_type, memory_type_id,
                  nullptr /* cuda ipc handle */)));
        }
      } else {
        bool use_binary;
        RETURN_IF_ERR(CheckBinaryOutputData(request_output, &use_binary));
        infer_req->alloc_payload_.output_map_.emplace(
            std::piecewise_construct, std::forward_as_tuple(output_name),
            std::forward_as_tuple(new AllocPayload::OutputInfo(
                use_binary ? AllocPayload::OutputInfo::BINARY
                           : AllocPayload::OutputInfo::JSON,
                class_size)));
      }
    }
  }
  return nullptr;  // success
}

TRITONSERVER_Error*
HTTPAPIServer::ParseJsonTritonParams(
    triton::common::TritonJson::Value& request_json,
    TRITONSERVER_InferenceRequest* irequest, InferRequestClass* infer_req)
{
  // The default setting for returned outputs (JSON or BINARY). This
  // is needed for the case when outputs are not explicitly specified.
  AllocPayload::OutputInfo::Kind output_kind = AllocPayload::OutputInfo::JSON;


  triton::common::TritonJson::Value params_json;
  if (request_json.Find("parameters", &params_json)) {
    std::vector<std::string> parameters;
    RETURN_MSG_IF_ERR(
        params_json.Members(&parameters), "failed to get request params.");

    uint32_t flags = 0;
    for (auto& parameter : parameters) {
      if (parameter == "sequence_id") {
        uint64_t seq_id;
        // Try to parse sequence_id as uint64_t
        TRITONSERVER_Error* err;
        if ((err = params_json.MemberAsUInt(parameter.c_str(), &seq_id)) !=
            nullptr) {
          TRITONSERVER_ErrorDelete(err);
          // On failure try to parse as a string
          std::string seq_id;
          RETURN_MSG_IF_ERR(
              params_json.MemberAsString(parameter.c_str(), &seq_id),
              "Unable to parse 'sequence_id'");
          RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetCorrelationIdString(
              irequest, seq_id.c_str()));
        } else {
          RETURN_IF_ERR(
              TRITONSERVER_InferenceRequestSetCorrelationId(irequest, seq_id));
        }
      } else if (parameter == "sequence_start") {
        bool start;
        RETURN_MSG_IF_ERR(
            params_json.MemberAsBool(parameter.c_str(), &start),
            "Unable to parse 'sequence_start'");
        if (start) {
          flags |= TRITONSERVER_REQUEST_FLAG_SEQUENCE_START;
        }
      } else if (parameter == "sequence_end") {
        bool end;
        RETURN_MSG_IF_ERR(
            params_json.MemberAsBool(parameter.c_str(), &end),
            "Unable to parse 'sequence_end'");
        if (end) {
          flags |= TRITONSERVER_REQUEST_FLAG_SEQUENCE_END;
        }
      } else if (parameter == "priority") {
        uint64_t p;
        RETURN_MSG_IF_ERR(
            params_json.MemberAsUInt(parameter.c_str(), &p),
            "Unable to parse 'priority'");
        RETURN_IF_ERR(
            TRITONSERVER_InferenceRequestSetPriorityUInt64(irequest, p));
      } else if (parameter == "timeout") {
        uint64_t t;
        RETURN_MSG_IF_ERR(
            params_json.MemberAsUInt(parameter.c_str(), &t),
            "Unable to parse 'timeout'");
        RETURN_IF_ERR(
            TRITONSERVER_InferenceRequestSetTimeoutMicroseconds(irequest, t));
      } else if (parameter == "binary_data_output") {
        bool bdo;
        RETURN_MSG_IF_ERR(
            params_json.MemberAsBool(parameter.c_str(), &bdo),
            "Unable to parse 'binary_data_output'");
        output_kind = (bdo) ? AllocPayload::OutputInfo::BINARY
                            : AllocPayload::OutputInfo::JSON;
      } else if (parameter.rfind("triton_", 0) == 0) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            ("parameter keys starting with 'triton_' are reserved for Triton "
             "usage "
             "and should not be specified."));
      } else {
        RETURN_IF_ERR(SetTritonParameterFromJsonParameter(
            parameter, params_json, irequest));
      }
    }

    RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetFlags(irequest, flags));
  }

  // Set output kind to JSON by default, or BINARY if specified in parameters.
  infer_req->alloc_payload_.default_output_kind_ = output_kind;
  return nullptr;  // Success
}

TRITONSERVER_Error*
HTTPAPIServer::ParseJsonTritonRequestID(
    triton::common::TritonJson::Value& request_json,
    TRITONSERVER_InferenceRequest* irequest)
{
  // Set InferenceRequest request_id
  triton::common::TritonJson::Value id_json;
  if (request_json.Find("id", &id_json)) {
    const char* id;
    size_t id_len;
    RETURN_MSG_IF_ERR(id_json.AsString(&id, &id_len), "Unable to parse 'id'");
    RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetId(irequest, id));
  }

  return nullptr;  // Success
}

TRITONSERVER_Error*
HTTPAPIServer::EVRequestToJsonImpl(
    evhtp_request_t* req, std::string_view request_kind, bool allows_empty_body,
    triton::common::TritonJson::Value* request_json, size_t* buffer_len)
{
  struct evbuffer_iovec* v = nullptr;
  int v_idx = 0;
  std::vector<struct evbuffer_iovec> v_vec;

  int n = evbuffer_peek(req->buffer_in, -1, NULL, NULL, 0);
  if (n > 0) {
    try {
      v_vec = std::vector<struct evbuffer_iovec>(n);
    }
    catch (const std::bad_alloc& e) {
      // Handle memory allocation failure
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          (std::string("Memory allocation failed for evbuffer: ") + e.what())
              .c_str());
    }
    catch (const std::exception& e) {
      // Catch any other std exceptions
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INTERNAL,
          (std::string("Exception while creating evbuffer vector: ") + e.what())
              .c_str());
    }

    v = v_vec.data();
    if (evbuffer_peek(req->buffer_in, -1, NULL, v, n) != n) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INTERNAL,
          ("Unexpected error getting " + std::string(request_kind) +
           " request buffers")
              .c_str());
    }
  }

  *buffer_len = evbuffer_get_length(req->buffer_in);
  if (allows_empty_body || *buffer_len > 0) {
    RETURN_IF_ERR(EVBufferToJson(request_json, v, &v_idx, *buffer_len, n));
  }

  return nullptr;  // success
}

TRITONSERVER_Error*
HTTPAPIServer::EVBufferToInput(
    const std::string& model_name, TRITONSERVER_InferenceRequest* irequest,
    evbuffer* input_buffer, InferRequestClass* infer_req, size_t header_length)
{
  // Extract individual input data from HTTP body and register in
  // 'irequest'. The HTTP body is not necessarily stored in contiguous
  // memory.
  //
  // Get the addr and size of each chunk of memory holding the HTTP
  // body.
  struct evbuffer_iovec* v = nullptr;
  int v_idx = 0;
  std::vector<struct evbuffer_iovec> v_vec;

  int n = evbuffer_peek(input_buffer, -1, NULL, NULL, 0);
  if (n > 0) {
    try {
      v_vec = std::vector<struct evbuffer_iovec>(n);
    }
    catch (const std::bad_alloc& e) {
      // Handle memory allocation failure
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          (std::string("Memory allocation failed for evbuffer: ") + e.what())
              .c_str());
    }
    catch (const std::exception& e) {
      // Catch any other std exceptions
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INTERNAL,
          (std::string("Exception while creating evbuffer vector: ") + e.what())
              .c_str());
    }

    v = v_vec.data();
    if (evbuffer_peek(input_buffer, -1, NULL, v, n) != n) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INTERNAL,
          "unexpected error getting input buffers");
    }
  }

  // Extract just the json header from the HTTP body. 'header_length == 0' means
  // that the entire HTTP body should be input data for a raw binary request.
  triton::common::TritonJson::Value request_json;
  RETURN_IF_ERR(EVBufferToJson(&request_json, v, &v_idx, header_length, n));

  // Parse request JSON and fill related Triton fields
  RETURN_IF_ERR(ParseJsonTritonRequestID(request_json, irequest));
  RETURN_IF_ERR(ParseJsonTritonParams(request_json, irequest, infer_req));
  RETURN_IF_ERR(ParseJsonTritonIO(
      request_json, irequest, infer_req, model_name, v, &v_idx, header_length,
      n));

  return nullptr;  // success
}

TRITONSERVER_Error*
HTTPAPIServer::EVBufferToRawInput(
    const std::string& model_name, TRITONSERVER_InferenceRequest* irequest,
    evbuffer* input_buffer, InferRequestClass* infer_req)
{
  static const char* raw_input_name = "raw_input";
  RETURN_IF_ERR(
      TRITONSERVER_InferenceRequestAddRawInput(irequest, raw_input_name));

  size_t byte_size = evbuffer_get_length(input_buffer);

  // Check if byte_size is larger than max_input_size_
  if (byte_size > max_input_size_) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INVALID_ARG,
        ("Raw input has a byte_size (" + std::to_string(byte_size) +
         " bytes) that exceeds the maximum allowed value of " +
         std::to_string(max_input_size_) +
         " bytes. Use --http-max-input-size to increase the limit.")
            .c_str());
  }

  // zero-shape tensor
  if (byte_size == 0) {
    RETURN_IF_ERR(TRITONSERVER_InferenceRequestAppendInputData(
        irequest, raw_input_name, nullptr, 0 /* byte_size */,
        TRITONSERVER_MEMORY_CPU, 0 /* memory_type_id */));
  } else {
    struct evbuffer_iovec* v = nullptr;
    int v_idx = 0;
    std::vector<struct evbuffer_iovec> v_vec;

    int n = evbuffer_peek(input_buffer, -1, NULL, NULL, 0);
    if (n > 0) {
      try {
        v_vec = std::vector<struct evbuffer_iovec>(n);
      }
      catch (const std::bad_alloc& e) {
        // Handle memory allocation failure
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            (std::string("Memory allocation failed for evbuffer: ") + e.what())
                .c_str());
      }
      catch (const std::exception& e) {
        // Catch any other std exceptions
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INTERNAL,
            (std::string("Exception while creating evbuffer vector: ") +
             e.what())
                .c_str());
      }

      v = v_vec.data();
      if (evbuffer_peek(input_buffer, -1, NULL, v, n) != n) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INTERNAL,
            "unexpected error getting input buffers");
      }
    }
    // Process one block at a time
    while ((byte_size > 0) && (v_idx < n)) {
      char* base = static_cast<char*>(v[v_idx].iov_base);
      size_t base_size;
      if (v[v_idx].iov_len > byte_size) {
        base_size = byte_size;
        v[v_idx].iov_base = static_cast<void*>(base + byte_size);
        v[v_idx].iov_len -= byte_size;
        byte_size = 0;
      } else {
        base_size = v[v_idx].iov_len;
        byte_size -= v[v_idx].iov_len;
        v_idx++;
      }

      RETURN_IF_ERR(TRITONSERVER_InferenceRequestAppendInputData(
          irequest, raw_input_name, base, base_size, TRITONSERVER_MEMORY_CPU,
          0 /* memory_type_id */));
    }
  }
  infer_req->alloc_payload_.default_output_kind_ =
      AllocPayload::OutputInfo::BINARY;
  return nullptr;  // success
}

TRITONSERVER_Error*
HTTPAPIServer::EVBufferToJson(
    triton::common::TritonJson::Value* document, evbuffer_iovec* v, int* v_idx,
    const size_t length, int n)
{
  if (length > max_input_size_) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INVALID_ARG,
        ("Request JSON size of " + std::to_string(length) +
         " bytes exceeds the maximum allowed value of " +
         std::to_string(max_input_size_) +
         " bytes. Use --http-max-input-size to increase the limit.")
            .c_str());
  }

  size_t offset = 0, remaining_length = length;
  char* json_base;
  std::vector<char> json_buffer;

  // No need to memcpy when number of iovecs is 1
  if ((n > 0) && (v[0].iov_len >= remaining_length)) {
    json_base = static_cast<char*>(v[0].iov_base);
    if (v[0].iov_len > remaining_length) {
      v[0].iov_base = static_cast<void*>(json_base + remaining_length);
      v[0].iov_len -= remaining_length;
      remaining_length = 0;
    } else if (v[0].iov_len == remaining_length) {
      remaining_length = 0;
      *v_idx += 1;
    }
  } else {
    json_buffer.resize(length);
    json_base = json_buffer.data();
    while ((remaining_length > 0) && (*v_idx < n)) {
      char* base = static_cast<char*>(v[*v_idx].iov_base);
      size_t base_size;
      if (v[*v_idx].iov_len > remaining_length) {
        base_size = remaining_length;
        v[*v_idx].iov_base = static_cast<void*>(base + remaining_length);
        v[*v_idx].iov_len -= remaining_length;
        remaining_length = 0;
      } else {
        base_size = v[*v_idx].iov_len;
        remaining_length -= v[*v_idx].iov_len;
        *v_idx += 1;
      }

      memcpy(json_base + offset, base, base_size);
      offset += base_size;
    }
  }

  if (remaining_length != 0) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INVALID_ARG,
        std::string(
            "unexpected size for request JSON, expecting " +
            std::to_string(remaining_length) + " more bytes")
            .c_str());
  }

  RETURN_IF_ERR(document->Parse(json_base, length));

  return nullptr;  // success
}

struct HeaderSearchPayload {
  HeaderSearchPayload(
      const re2::RE2& regex, TRITONSERVER_InferenceRequest* request)
      : regex_(regex), request_(request), error_(nullptr)
  {
  }

  const re2::RE2& regex_;
  TRITONSERVER_InferenceRequest* request_;
  TRITONSERVER_Error* error_;
};

int
ForEachHeader(evhtp_header_t* header, void* arg)
{
  HeaderSearchPayload* header_search_payload =
      reinterpret_cast<HeaderSearchPayload*>(arg);

  TRITONSERVER_InferenceRequest* request = header_search_payload->request_;
  const re2::RE2& regex = header_search_payload->regex_;

  std::string matched_string;
  if (RE2::PartialMatch(std::string(header->key), regex)) {
    header_search_payload->error_ =
        TRITONSERVER_InferenceRequestSetStringParameter(
            request, header->key, header->val);

    if (header_search_payload->error_ != nullptr) {
      return 1;
    }
  }

  return 0;
}

TRITONSERVER_Error*
HTTPAPIServer::CheckTransactionPolicy(
    evhtp_request_t* req, const std::string& model_name,
    int64_t requested_model_version)
{
  uint32_t txn_flags;
  RETURN_IF_ERR(TRITONSERVER_ServerModelTransactionProperties(
      server_.get(), model_name.c_str(), requested_model_version, &txn_flags,
      nullptr /* voidp */));
  if ((txn_flags & TRITONSERVER_TXN_DECOUPLED) != 0) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_UNSUPPORTED,
        "HTTP end point doesn't support models with decoupled "
        "transaction policy");
  }

  return nullptr;  // success
}

std::shared_ptr<TraceManager::Trace>
HTTPAPIServer::StartTrace(
    evhtp_request_t* req, const std::string& model_name,
    TRITONSERVER_InferenceTrace** triton_trace)
{
#ifdef TRITON_ENABLE_TRACING
  HttpTextMapCarrier carrier(req->headers_in);
  auto start_options =
      trace_manager_->GetTraceStartOptions(carrier, model_name);
  std::shared_ptr<TraceManager::Trace> trace;
  trace = std::move(trace_manager_->SampleTrace(start_options));
  if (trace != nullptr) {
    *triton_trace = trace->trace_;
    // Timestamps from evhtp are capture in 'req'. We record here
    // since this is the first place where we have access to trace
    // manager.
    trace->CaptureTimestamp("HTTP_RECV_START", req->recv_start_ns);
    trace->CaptureTimestamp("HTTP_RECV_END", req->recv_end_ns);
  }
  return trace;
#else
  return nullptr;
#endif  // TRITON_ENABLE_TRACING
}

TRITONSERVER_Error*
HTTPAPIServer::DecompressBuffer(
    evhtp_request_t* req, evbuffer** decompressed_buffer)
{
  auto compression_type = GetRequestCompressionType(req);
  switch (compression_type) {
    case DataCompressor::Type::DEFLATE:
    case DataCompressor::Type::GZIP: {
      *decompressed_buffer = evbuffer_new();
      RETURN_IF_ERR(DataCompressor::DecompressData(
          compression_type, req->buffer_in, *decompressed_buffer,
          max_input_size_));
      break;
    }
    case DataCompressor::Type::UNKNOWN: {
      // Encounter unsupported compressed type, send error with supported types
      // in Accept-Encoding
      evhtp_headers_add_header(
          req->headers_out,
          evhtp_header_new(kAcceptEncodingHTTPHeader, "gzip, deflate", 1, 1));
      // FIXME: Map TRITONSERVER_ERROR_UNSUPPORTED to EVHTP_RES_UNSUPPORTED
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_UNSUPPORTED, "Unsupported compression type");
    }
    case DataCompressor::Type::IDENTITY:
      // Do nothing
      break;
  }

  return nullptr;  // success
}

TRITONSERVER_Error*
HTTPAPIServer::EVRequestToTritonRequest(
    evhtp_request_t* req, const std::string& model_name,
    TRITONSERVER_InferenceRequest* irequest, evbuffer* decompressed_buffer,
    InferRequestClass* infer_req, size_t header_length)
{
  if (header_length != 0) {
    RETURN_IF_ERR(EVBufferToInput(
        model_name, irequest,
        (decompressed_buffer == nullptr) ? req->buffer_in : decompressed_buffer,
        infer_req, header_length));
  } else {
    RETURN_IF_ERR(EVBufferToRawInput(
        model_name, irequest,
        (decompressed_buffer == nullptr) ? req->buffer_in : decompressed_buffer,
        infer_req));
  }

  return nullptr;  // success
}

TRITONSERVER_Error*
HTTPAPIServer::ForwardHeaders(
    evhtp_request_t* req, TRITONSERVER_InferenceRequest* irequest)
{
  if (!header_forward_pattern_.empty()) {
    HeaderSearchPayload header_search_payload(header_forward_regex_, irequest);
    int status = evhtp_kvs_for_each(
        req->headers_in, ForEachHeader,
        reinterpret_cast<void*>(&header_search_payload));
    if (status != 0) {
      return header_search_payload.error_;
    }
  }

  return nullptr;  // success
}

void
HTTPAPIServer::HandleGenerate(
    evhtp_request_t* req, const std::string& model_name,
    const std::string& model_version_str, bool streaming)
{
  RETURN_AND_RESPOND_IF_RESTRICTED(
      req, RestrictedCategory::INFERENCE, restricted_apis_);

  AddContentTypeHeader(req, "application/json");
  if (req->method != htp_method_POST) {
    RETURN_AND_RESPOND_WITH_ERR(
        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
  }

  int64_t requested_model_version;
  RETURN_AND_RESPOND_IF_ERR(
      req,
      GetModelVersionFromString(model_version_str, &requested_model_version));

  // If tracing is enabled see if this request should be traced.
  TRITONSERVER_InferenceTrace* triton_trace = nullptr;
  std::shared_ptr<TraceManager::Trace> trace;
  if (trace_manager_) {
    // If tracing is enabled see if this request should be traced.
    trace = StartTrace(req, model_name, &triton_trace);
  }

  std::map<std::string, triton::common::TritonJson::Value> input_metadata;
  triton::common::TritonJson::Value meta_data_root;
  RETURN_AND_RESPOND_IF_ERR(
      req, ModelInputMetadata(
               model_name, requested_model_version, &input_metadata,
               &meta_data_root));


  // [FIXME] decompression should have been done here. before parsing request
  // body
  if (GetRequestCompressionType(req) != DataCompressor::Type::IDENTITY) {
    RETURN_AND_RESPOND_IF_ERR(
        req,
        TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            "Unsupported content-encoding, only 'identity' is supported."));
  }

  // Create the inference request object which provides all information needed
  // for an inference. Make sure it is cleaned up on early error.
  TRITONSERVER_InferenceRequest* irequest = nullptr;
  RETURN_AND_RESPOND_IF_ERR(
      req, TRITONSERVER_InferenceRequestNew(
               &irequest, server_.get(), model_name.c_str(),
               requested_model_version));

  std::shared_ptr<TRITONSERVER_InferenceRequest> irequest_shared = {
      irequest, [](TRITONSERVER_InferenceRequest* request) {
        LOG_TRITONSERVER_ERROR(
            TRITONSERVER_InferenceRequestDelete(request),
            "deleting HTTP/REST inference request");
      }};

  // HTTP request paused when creating inference request. Resume it on exit if
  // this function returns early due to error. Otherwise resumed in callback.
  std::unique_ptr<GenerateRequestClass> generate_request;
  if (streaming) {
    generate_request.reset(new GenerateRequestClass(
        server_.get(), req, GetResponseCompressionType(req),
        generate_stream_request_schema_.get(),
        generate_stream_response_schema_.get(), streaming, irequest_shared,
        shm_manager_));
  } else {
    generate_request.reset(new GenerateRequestClass(
        server_.get(), req, GetResponseCompressionType(req),
        generate_request_schema_.get(), generate_response_schema_.get(),
        streaming, irequest_shared, shm_manager_));
  }
  generate_request->trace_ = trace;

  const char* request_id = "<id_unknown>";
  // Callback to cleanup on any errors encountered below. Capture everything
  // by reference to capture local updates, except for shared pointers which
  // should be captured by value in case of ref count issues.
  // The callback does not own the error object.
  auto error_callback = [&, trace](TRITONSERVER_Error* error) {
    if (error != nullptr) {
      // Get request ID for logging in case of error.
      if (irequest != nullptr) {
        LOG_TRITONSERVER_ERROR(
            TRITONSERVER_InferenceRequestId(irequest, &request_id),
            "unable to retrieve request ID string");
      }
      if (!strncmp(request_id, "", 1)) {
        request_id = "<id_unknown>";
      }

      LOG_VERBOSE(1) << "[request id: " << request_id << "] "
                     << "Infer failed: " << TRITONSERVER_ErrorMessage(error);
      AddContentTypeHeader(req, "application/json");
      EVBufferAddErrorJson(req->buffer_out, error);
      evhtp_send_reply(req, HttpCodeFromError(error));
      evhtp_request_resume(req);

#ifdef TRITON_ENABLE_TRACING
      // If HTTP server still owns Triton trace
      if ((trace != nullptr) && (trace->trace_ != nullptr)) {
        TraceManager::TraceRelease(trace->trace_, trace->trace_userp_);
      }
#endif  // TRITON_ENABLE_TRACING
    }
  };

  // Option 1: Form tensor-like JSON request and try to re-use HandleInfer
  //           as much as possible. Probably need to do something like overwrite
  //           req->buffer_in or create a new evhtp_request to pass and handle.
  // Option 2: Do inference logic directly here after parsing request.
  // Note:
  //   Currently option 2 is selected. It is true that HandleInfer() includes
  //   handling for features that will be requested for generate endpoints
  //   (i.e. tracing), however, it is currently tied to infer endpoint logic and
  //   some decoupling must be done to properly reuse it (for example, response
  //   callback is tied to infer logic and inflexible for response streaming).
  //   For the time being, it is less mental burden to support this endpoint
  //   without early optimization for code reuse.
  //   Also, there is limitation on Triton JSON library that makes forming
  //   arbitrary JSON message convoluted (added key is reference to a string and
  //   thus the string must live as long as the JSON message).
  triton::common::TritonJson::Value request;
  RETURN_AND_CALLBACK_IF_ERR(
      EVRequestToJsonAllowsEmpty(req, "generate", &request), error_callback);
  RETURN_AND_CALLBACK_IF_ERR(
      ParseJsonTritonRequestID(request, irequest), error_callback);

  RETURN_AND_CALLBACK_IF_ERR(
      generate_request->ConvertGenerateRequest(
          input_metadata, generate_request->RequestSchema(), request),
      error_callback);

  auto request_release_payload =
      std::make_unique<RequestReleasePayload>(irequest_shared, nullptr);
  // [FIXME] decompression..
  RETURN_AND_CALLBACK_IF_ERR(
      TRITONSERVER_InferenceRequestSetReleaseCallback(
          irequest, InferRequestClass::InferRequestComplete,
          request_release_payload.get()),
      error_callback);
  RETURN_AND_CALLBACK_IF_ERR(
      TRITONSERVER_InferenceRequestSetResponseCallback(
          irequest, allocator_,
          reinterpret_cast<void*>(&generate_request->alloc_payload_),
          GenerateRequestClass::InferResponseComplete,
          reinterpret_cast<void*>(generate_request.get())),
      error_callback);

  RETURN_AND_CALLBACK_IF_ERR(
      TRITONSERVER_ServerInferAsync(server_.get(), irequest, triton_trace),
      error_callback);

#ifdef TRITON_ENABLE_TRACING
  // Ownership of trace passed to Triton core, set trace to null to mark it
  // as no longer owned here.
  if (trace != nullptr) {
    trace->trace_ = nullptr;
  }
#endif  // TRITON_ENABLE_TRACING
  generate_request.release();
  request_release_payload.release();
}


TRITONSERVER_Error*
HTTPAPIServer::ModelInputMetadata(
    const std::string& model_name, const int64_t model_version,
    std::map<std::string, triton::common::TritonJson::Value>* input_metadata,
    triton::common::TritonJson::Value* metadata_root)
{
  {
    if (model_name.empty()) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          "Missing model name in metadata request");
    }

    TRITONSERVER_Message* message = nullptr;
    RETURN_IF_ERR(TRITONSERVER_ServerModelMetadata(
        server_.get(), model_name.c_str(), model_version, &message));
    const char* buffer;
    size_t byte_size;
    TRITONSERVER_Error* err = nullptr;
    err = TRITONSERVER_MessageSerializeToJson(message, &buffer, &byte_size);
    if (err == nullptr) {
      RETURN_IF_ERR(metadata_root->Parse(buffer, byte_size));
    }
    if (message) {
      TRITONSERVER_MessageDelete(message);
    }
  }

  // input
  triton::common::TritonJson::Value inputs;
  RETURN_IF_ERR(metadata_root->MemberAsArray("inputs", &inputs));
  for (size_t i = 0; i < inputs.ArraySize(); ++i) {
    triton::common::TritonJson::Value input;
    RETURN_IF_ERR(inputs.At(i, &input));
    std::string name = "";
    RETURN_IF_ERR(input.MemberAsString("name", &name));
    (*input_metadata)[name] = std::move(input);
  }

  return nullptr;  // success
}

TRITONSERVER_Error*
HTTPAPIServer::GenerateRequestClass::ConvertGenerateRequest(
    std::map<std::string, triton::common::TritonJson::Value>& input_metadata,
    const MappingSchema* schema,
    triton::common::TritonJson::Value& generate_request)
{
  // First find all top-level keys in JSON
  std::vector<std::string> members;
  RETURN_IF_ERR(generate_request.Members(&members));

  for (const auto& m : members) {
    auto it = schema->children_.find(m);
    if (it != schema->children_.end()) {
      switch (it->second->kind_) {
        case MappingSchema::Kind::EXACT_MAPPING: {
          // Read meta data
          RETURN_IF_ERR(ExactMappingInput(m, generate_request, input_metadata));
          break;
        }
        case MappingSchema::Kind::MAPPING_SCHEMA: {
          // The key is nested schema
          if (input_metadata.find(m) != input_metadata.end()) {
            return TRITONSERVER_ErrorNew(
                TRITONSERVER_ERROR_INVALID_ARG,
                (std::string(
                     "Keyword '" + m +
                     "' for nested schema also given as input tensor name")
                     .c_str()));
          }
          triton::common::TritonJson::Value nested_generate_request;
          RETURN_MSG_IF_ERR(
              generate_request.MemberAsObject(
                  m.c_str(), &nested_generate_request),
              "Expected JSON object for keyword: '" + m + "'");
          RETURN_MSG_IF_ERR(
              ConvertGenerateRequest(
                  input_metadata, it->second.get(), nested_generate_request),
              "Converting keyword: '" + m + "'");
          break;
        }
        default:
          return TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_UNSUPPORTED, "Unsupported schema kind");
      }
    } else if (schema->allow_unspecified_) {
      // Unspecified key follows EXACT_MAPPING
      RETURN_IF_ERR(ExactMappingInput(m, generate_request, input_metadata));
    } else {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_UNSUPPORTED,
          "The schema disallow unspecified key");
    }
  }
  return nullptr;  // success
}

TRITONSERVER_Error*
HTTPAPIServer::GenerateRequestClass::ExactMappingInput(
    const std::string& name,
    triton::common::TritonJson::Value& generate_request,
    std::map<std::string, triton::common::TritonJson::Value>& input_metadata)
{
  auto it = input_metadata.find(name);
  if (it == input_metadata.end()) {
    RETURN_IF_ERR(SetTritonParameterFromJsonParameter(
        name, generate_request, triton_request_.get()));
  } else {
    // Parse data type and shape
    std::string value;
    it->second.MemberAsString("datatype", &value);
    auto dtype = TRITONSERVER_StringToDataType(value.c_str());

    // Perform shape validation, assume the value must be either
    // primitive type or 1-D array.
    triton::common::TritonJson::Value tensor_data;
    if (!generate_request.Find(name.c_str(), &tensor_data)) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          (std::string("unexpected key not found in generate request, "
                       "expecting key '") +
           name + "'")
              .c_str());
    }

    size_t element_cnt = tensor_data.IsArray() ? tensor_data.ArraySize() : 1;

    size_t byte_size = 0;
    if (dtype == TRITONSERVER_TYPE_BYTES) {
      RETURN_IF_ERR(JsonBytesArrayByteSize(tensor_data, &byte_size));
    } else {
      byte_size = element_cnt * TRITONSERVER_DataTypeByteSize(dtype);
    }

    std::vector<int64_t> shape_vec;
    {
      triton::common::TritonJson::Value value;
      if (!it->second.Find("shape", &value)) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INTERNAL,
            (std::string(
                 "Unexpected 'shape' not found in model metadata for input '") +
             name)
                .c_str());
      }
      for (size_t i = 0; i < value.ArraySize(); ++i) {
        int64_t d = 0;
        RETURN_IF_ERR(value.IndexAsInt(i, &d));
        shape_vec.push_back(d);
      }
      // Because generate request don't carry too much shape information, using
      // a two-pass process to pad the request value to match input shape.
      // 1. iterate shape for fixed dimension to distribute 'element_cnt'.
      // 2. Set most inner dynamic shape to the remaining element count,
      //    other dynamic shape to be 1.
      for (auto rit = shape_vec.rbegin(); rit != shape_vec.rend(); ++rit) {
        if (*rit != -1) {
          if (element_cnt % *rit) {
            return TRITONSERVER_ErrorNew(
                TRITONSERVER_ERROR_INVALID_ARG,
                (std::string("The schema can not convert input '") + name +
                 "' to tensor with proper shape")
                    .c_str());
          }
          element_cnt /= *rit;
        }
      }
      for (auto rit = shape_vec.rbegin(); rit != shape_vec.rend(); ++rit) {
        if (*rit == -1) {
          *rit = element_cnt;
          element_cnt = 1;
        }
      }
      if (element_cnt != 1) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            (std::string("The schema can not convert input '") + name +
             "' to tensor with proper shape")
                .c_str());
      }
    }

    // get original element count back
    element_cnt = tensor_data.IsArray() ? tensor_data.ArraySize() : 1;
    serialized_data_.emplace_back();
    std::vector<char>& serialized = serialized_data_.back();
    serialized.resize(byte_size);
    RETURN_IF_ERR(ReadDataFromJson(
        name.c_str(), tensor_data, &serialized[0], dtype,
        dtype == TRITONSERVER_TYPE_BYTES ? byte_size : element_cnt));

    RETURN_IF_ERR(TRITONSERVER_InferenceRequestAddInput(
        triton_request_.get(), name.c_str(), dtype, &shape_vec[0],
        shape_vec.size()));
    RETURN_IF_ERR(TRITONSERVER_InferenceRequestAppendInputData(
        triton_request_.get(), name.c_str(), &serialized[0], serialized.size(),
        TRITONSERVER_MEMORY_CPU, 0 /* memory_type_id */));
  }
  return nullptr;  // success
}

void
HTTPAPIServer::HandleInfer(
    evhtp_request_t* req, const std::string& model_name,
    const std::string& model_version_str)
{
  RETURN_AND_RESPOND_IF_RESTRICTED(
      req, RestrictedCategory::INFERENCE, restricted_apis_);

  if (req->method != htp_method_POST) {
    RETURN_AND_RESPOND_WITH_ERR(
        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
  }

  int64_t requested_model_version;
  RETURN_AND_RESPOND_IF_ERR(
      req, GetModelVersionFromString(
               model_version_str.c_str(), &requested_model_version));
  RETURN_AND_RESPOND_IF_ERR(
      req, CheckTransactionPolicy(req, model_name, requested_model_version));

  TRITONSERVER_InferenceTrace* triton_trace = nullptr;
  std::shared_ptr<TraceManager::Trace> trace;
  if (trace_manager_) {
    // If tracing is enabled see if this request should be traced.
    trace = StartTrace(req, model_name, &triton_trace);
  }

  // Decompress request body if it is compressed in supported type
  evbuffer* decompressed_buffer = nullptr;
  RETURN_AND_RESPOND_IF_ERR(req, DecompressBuffer(req, &decompressed_buffer));

  // Get content length as a default header_length if no header specified
  int32_t content_length = 0;
  RETURN_AND_RESPOND_IF_ERR(
      req, GetContentLength(req, decompressed_buffer, &content_length));

  // Get the header length
  size_t header_length = 0;
  RETURN_AND_RESPOND_IF_ERR(
      req, GetInferenceHeaderLength(req, content_length, &header_length));

  // Create the inference request object which provides all information needed
  // for an inference. Make sure it is cleaned up on early error.
  TRITONSERVER_InferenceRequest* irequest = nullptr;
  RETURN_AND_RESPOND_IF_ERR(
      req, TRITONSERVER_InferenceRequestNew(
               &irequest, server_.get(), model_name.c_str(),
               requested_model_version));
  std::shared_ptr<TRITONSERVER_InferenceRequest> irequest_shared(
      irequest, [](TRITONSERVER_InferenceRequest* request) {
        LOG_TRITONSERVER_ERROR(
            TRITONSERVER_InferenceRequestDelete(request),
            "deleting HTTP/REST inference request");
      });
  // HTTP request paused when creating inference request. Resume it on exit if
  // this function returns early due to error. Otherwise resumed in callback.
  bool connection_paused = true;
  auto infer_request = CreateInferRequest(req, irequest_shared);
  infer_request->trace_ = trace;

  const char* request_id = "<id_unknown>";
  // Callback to cleanup on any errors encountered below. Capture everything
  // by reference to capture local updates, except for shared pointers which
  // should be captured by value in case of ref count issues.
  auto error_callback = [&, trace](TRITONSERVER_Error* error) {
    if (error != nullptr) {
      LOG_VERBOSE(1) << "[request id: " << request_id << "] "
                     << "Infer failed: " << TRITONSERVER_ErrorMessage(error);
      AddContentTypeHeader(req, "application/json");
      EVBufferAddErrorJson(req->buffer_out, error);
      evhtp_send_reply(req, HttpCodeFromError(error));
      if (connection_paused) {
        evhtp_request_resume(req);
      }
#ifdef TRITON_ENABLE_TRACING
      // If HTTP server still owns Triton trace
      if ((trace != nullptr) && (trace->trace_ != nullptr)) {
        TraceManager::TraceRelease(trace->trace_, trace->trace_userp_);
      }
#endif  // TRITON_ENABLE_TRACING
    }
  };

  // Parse EV request and fill Triton request fields from it
  RETURN_AND_CALLBACK_IF_ERR(
      EVRequestToTritonRequest(
          req, model_name, irequest, decompressed_buffer, infer_request.get(),
          header_length),
      error_callback);

  // Get request ID for logging in case of error.
  LOG_TRITONSERVER_ERROR(
      TRITONSERVER_InferenceRequestId(irequest, &request_id),
      "unable to retrieve request ID string");
  // Reset id to unknown if empty in core.
  if (!strncmp(request_id, "", 1)) {
    request_id = "<id_unknown>";
  }

  RETURN_AND_CALLBACK_IF_ERR(ForwardHeaders(req, irequest), error_callback);

  auto request_release_payload = std::make_unique<RequestReleasePayload>(
      irequest_shared, decompressed_buffer);
  RETURN_AND_CALLBACK_IF_ERR(
      TRITONSERVER_InferenceRequestSetReleaseCallback(
          irequest, InferRequestClass::InferRequestComplete,
          request_release_payload.get()),
      error_callback);
  RETURN_AND_CALLBACK_IF_ERR(
      TRITONSERVER_InferenceRequestSetResponseCallback(
          irequest, allocator_,
          reinterpret_cast<void*>(&infer_request->alloc_payload_),
          InferRequestClass::InferResponseComplete,
          reinterpret_cast<void*>(infer_request.get())),
      error_callback);

  auto err =
      TRITONSERVER_ServerInferAsync(server_.get(), irequest, triton_trace);
#ifdef TRITON_ENABLE_TRACING
  // Ownership of trace passed to Triton core, set trace to null to mark it
  // as no longer owned here.
  if (trace != nullptr) {
    trace->trace_ = nullptr;
  }
#endif  // TRITON_ENABLE_TRACING

  RETURN_AND_CALLBACK_IF_ERR(err, error_callback);
  infer_request.release();
  request_release_payload.release();
}

void
HTTPAPIServer::InferRequestClass::ReplyCallback(
    evthr_t* thr, void* arg, void* shared)
{
  HTTPAPIServer::InferRequestClass* infer_request =
      reinterpret_cast<HTTPAPIServer::InferRequestClass*>(arg);

  evhtp_request_t* request = infer_request->EvHtpRequest();

  if (request != nullptr) {
    evhtp_send_reply(request, infer_request->response_code_);
    evhtp_request_resume(request);
  }

#ifdef TRITON_ENABLE_TRACING
  if (infer_request->trace_ != nullptr) {
    infer_request->trace_->CaptureTimestamp(
        "HTTP_SEND_START", request->send_start_ns);
    infer_request->trace_->CaptureTimestamp(
        "HTTP_SEND_END", request->send_end_ns);
  }
#endif  // TRITON_ENABLE_TRACING

  delete infer_request;
}

evhtp_res
HTTPAPIServer::InferRequestClass::RequestFiniHook(
    evhtp_request* request, void* arg)
{
  HTTPAPIServer::InferRequestClass* infer_request =
      reinterpret_cast<HTTPAPIServer::InferRequestClass*>(arg);
  if (infer_request->req_ != request) {
    LOG_ERROR << "[INTERNAL] mismatched request in fini hook";
    return EVHTP_RES_ERROR;
  } else {
    LOG_TRITONSERVER_ERROR(
        TRITONSERVER_InferenceRequestCancel(
            infer_request->triton_request_.get()),
        "cancelling request");
    infer_request->req_ = nullptr;
  }
  return EVHTP_RES_OK;
}

HTTPAPIServer::InferRequestClass::InferRequestClass(
    TRITONSERVER_Server* server, evhtp_request_t* req,
    DataCompressor::Type response_compression_type,
    const std::shared_ptr<TRITONSERVER_InferenceRequest>& triton_request,
    const std::shared_ptr<SharedMemoryManager>& shm_manager)
    : server_(server), req_(req),
      response_compression_type_(response_compression_type), response_count_(0),
      triton_request_(triton_request), shm_manager_(shm_manager)
{
  evhtp_connection_t* htpconn = evhtp_request_get_connection(req);
  thread_ = htpconn->thread;
  evhtp_request_pause(req);
  evhtp_request_set_hook(
      req_, evhtp_hook_on_request_fini, (evhtp_hook)(void*)RequestFiniHook,
      reinterpret_cast<void*>(this));
}

void
HTTPAPIServer::InferRequestClass::InferRequestComplete(
    TRITONSERVER_InferenceRequest* request, const uint32_t flags, void* userp)
{
  // FIXME need to manage the lifetime of InferRequestClass so that we
  // delete it here.

  RequestReleasePayload* request_release_payload =
      reinterpret_cast<RequestReleasePayload*>(userp);

  if ((flags & TRITONSERVER_REQUEST_RELEASE_ALL) != 0) {
    delete request_release_payload;
  }
}

void
HTTPAPIServer::InferRequestClass::InferResponseComplete(
    TRITONSERVER_InferenceResponse* response, const uint32_t flags, void* userp)
{
  // FIXME can't use InferRequestClass object here since it's lifetime
  // is different than response. For response we need to know how to
  // send each output (as json, shm, or binary) and that information
  // has to be maintained in a way that allows us to clean it up
  // appropriately if connection closed or last response sent.
  //
  // But for now userp is the InferRequestClass object and the end of
  // its life is in the ReplyCallback.

  HTTPAPIServer::InferRequestClass* infer_request =
      reinterpret_cast<HTTPAPIServer::InferRequestClass*>(userp);

  if (response != nullptr) {
    ++infer_request->response_count_;
  }

  TRITONSERVER_Error* err = nullptr;
  if (infer_request->response_count_ != 1) {
    err = TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INTERNAL,
        std::string(
            "expected a single response, got " +
            std::to_string(infer_request->response_count_))
            .c_str());
  } else if (response != nullptr) {
    err = infer_request->FinalizeResponse(response);
#ifdef TRITON_ENABLE_TRACING
    if (infer_request->trace_ != nullptr) {
      infer_request->trace_->CaptureTimestamp(
          "INFER_RESPONSE_COMPLETE", TraceManager::CaptureTimestamp());
    }
#endif  // TRITON_ENABLE_TRACING
  }


  LOG_TRITONSERVER_ERROR(
      TRITONSERVER_InferenceResponseDelete(response),
      "deleting inference response");

  if (err != nullptr) {
    EVBufferAddErrorJson(infer_request->req_->buffer_out, err);
    infer_request->response_code_ = HttpCodeFromError(err);
    TRITONSERVER_ErrorDelete(err);
  }

  // Defer sending the response until FINAL flag is seen
  if ((flags & TRITONSERVER_RESPONSE_COMPLETE_FINAL) == 0) {
    return;
  }
  evthr_defer(
      infer_request->thread_, InferRequestClass::ReplyCallback, infer_request);
}

TRITONSERVER_Error*
HTTPAPIServer::InferRequestClass::FinalizeResponse(
    TRITONSERVER_InferenceResponse* response)
{
  RETURN_IF_ERR(TRITONSERVER_InferenceResponseError(response));

  triton::common::TritonJson::Value response_json(
      triton::common::TritonJson::ValueType::OBJECT);

  const char* request_id = "";
  RETURN_IF_ERR(TRITONSERVER_InferenceResponseId(response, &request_id));
  if (strncmp(request_id, "", 1)) {
    RETURN_IF_ERR(response_json.AddStringRef("id", request_id));
  }

  const char* model_name;
  int64_t model_version;
  RETURN_IF_ERR(TRITONSERVER_InferenceResponseModel(
      response, &model_name, &model_version));
  RETURN_IF_ERR(response_json.AddStringRef("model_name", model_name));
  RETURN_IF_ERR(response_json.AddString(
      "model_version", std::move(std::to_string(model_version))));

  // If the response has any parameters, convert them to JSON.
  uint32_t parameter_count;
  RETURN_IF_ERR(
      TRITONSERVER_InferenceResponseParameterCount(response, &parameter_count));
  if (parameter_count > 0) {
    triton::common::TritonJson::Value params_json(
        response_json, triton::common::TritonJson::ValueType::OBJECT);

    for (uint32_t pidx = 0; pidx < parameter_count; ++pidx) {
      const char* name;
      TRITONSERVER_ParameterType type;
      const void* vvalue;
      RETURN_IF_ERR(TRITONSERVER_InferenceResponseParameter(
          response, pidx, &name, &type, &vvalue));
      switch (type) {
        case TRITONSERVER_PARAMETER_BOOL:
          RETURN_IF_ERR(params_json.AddBool(
              name, *(reinterpret_cast<const bool*>(vvalue))));
          break;
        case TRITONSERVER_PARAMETER_INT:
          RETURN_IF_ERR(params_json.AddInt(
              name, *(reinterpret_cast<const int64_t*>(vvalue))));
          break;
        case TRITONSERVER_PARAMETER_STRING:
          RETURN_IF_ERR(params_json.AddStringRef(
              name, reinterpret_cast<const char*>(vvalue)));
          break;
        case TRITONSERVER_PARAMETER_DOUBLE:
          RETURN_IF_ERR(params_json.AddDouble(
              name, *(reinterpret_cast<const double*>(vvalue))));
          break;
        case TRITONSERVER_PARAMETER_BYTES:
          return TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_UNSUPPORTED,
              "Response parameter of type 'TRITONSERVER_PARAMETER_BYTES' is "
              "not currently supported");
          break;
      }
    }

    RETURN_IF_ERR(response_json.Add("parameters", std::move(params_json)));
  }

  // Go through each response output and transfer information to JSON
  uint32_t output_count;
  RETURN_IF_ERR(
      TRITONSERVER_InferenceResponseOutputCount(response, &output_count));

  std::vector<evbuffer*> ordered_buffers;
  ordered_buffers.reserve(output_count);

  triton::common::TritonJson::Value response_outputs(
      response_json, triton::common::TritonJson::ValueType::ARRAY);

  for (uint32_t idx = 0; idx < output_count; ++idx) {
    const char* cname;
    TRITONSERVER_DataType datatype;
    const int64_t* shape;
    uint64_t dim_count;
    const void* base;
    size_t byte_size;
    TRITONSERVER_MemoryType memory_type;
    int64_t memory_type_id;
    void* userp;

    RETURN_IF_ERR(TRITONSERVER_InferenceResponseOutput(
        response, idx, &cname, &datatype, &shape, &dim_count, &base, &byte_size,
        &memory_type, &memory_type_id, &userp));

    triton::common::TritonJson::Value output_json(
        response_json, triton::common::TritonJson::ValueType::OBJECT);
    RETURN_IF_ERR(output_json.AddStringRef("name", cname));

    // Handle data. SHM outputs will not have an info.
    auto info = reinterpret_cast<AllocPayload::OutputInfo*>(userp);

    size_t element_count = 1;
    uint32_t batch_size = 0;

    // If returning output as classification then need to set the
    // datatype and shape based on classification requirements.
    if ((info != nullptr) && (info->class_cnt_ > 0)) {
      // For classification need to determine the batch size, if any,
      // because need to use that to break up the response for each
      // batch entry.
      uint32_t batch_flags;
      RETURN_IF_ERR(TRITONSERVER_ServerModelBatchProperties(
          server_, model_name, model_version, &batch_flags,
          nullptr /* voidp */));
      if ((dim_count > 0) &&
          ((batch_flags & TRITONSERVER_BATCH_FIRST_DIM) != 0)) {
        batch_size = shape[0];
      }

      // Determine the batch1 byte size of the output tensor... needed
      // when the response tensor batch-size > 1 so that we know how
      // to stride though the tensor data.
      size_t batch1_element_count = 1;
      for (size_t sidx = ((batch_size == 0) ? 0 : 1); sidx < dim_count;
           sidx++) {
        batch1_element_count *= shape[sidx];
      }

      const size_t batch1_byte_size =
          batch1_element_count * TRITONSERVER_DataTypeByteSize(datatype);

      // Create the classification contents
      std::string serialized;

      size_t class_offset = 0;
      for (uint32_t bs = 0; bs < std::max((uint32_t)1, batch_size); ++bs) {
        std::vector<std::string> class_strs;
        RETURN_IF_ERR(TopkClassifications(
            response, idx, reinterpret_cast<const char*>(base) + class_offset,
            ((class_offset + batch1_byte_size) > byte_size) ? 0
                                                            : batch1_byte_size,
            datatype, info->class_cnt_, &class_strs));

        // Serialize for binary representation...
        for (const auto& str : class_strs) {
          uint32_t len = str.size();
          serialized.append(reinterpret_cast<const char*>(&len), sizeof(len));
          if (len > 0) {
            serialized.append(str);
          }
        }

        class_offset += batch1_byte_size;
      }

      // Replace existing output with serialized classification output.
      const char* datatype_str =
          TRITONSERVER_DataTypeString(TRITONSERVER_TYPE_BYTES);
      RETURN_IF_ERR(output_json.AddStringRef("datatype", datatype_str));

      triton::common::TritonJson::Value shape_json(
          response_json, triton::common::TritonJson::ValueType::ARRAY);
      if (batch_size > 0) {
        RETURN_IF_ERR(shape_json.AppendUInt(batch_size));
        element_count *= batch_size;
      }
      size_t actual_class_count =
          std::min((size_t)info->class_cnt_, batch1_element_count);
      element_count *= actual_class_count;
      RETURN_IF_ERR(shape_json.AppendUInt(actual_class_count));
      RETURN_IF_ERR(output_json.Add("shape", std::move(shape_json)));

      evbuffer_free(info->evbuffer_);
      info->evbuffer_ = nullptr;

      void* buffer;
      byte_size = serialized.size();
      RETURN_IF_ERR(AllocEVBuffer(byte_size, &info->evbuffer_, &buffer));
      memcpy(buffer, serialized.c_str(), byte_size);
      base = reinterpret_cast<const void*>(buffer);
      datatype = TRITONSERVER_TYPE_BYTES;
    } else {
      const char* datatype_str = TRITONSERVER_DataTypeString(datatype);
      RETURN_IF_ERR(output_json.AddStringRef("datatype", datatype_str));

      triton::common::TritonJson::Value shape_json(
          response_json, triton::common::TritonJson::ValueType::ARRAY);
      for (size_t j = 0; j < dim_count; j++) {
        RETURN_IF_ERR(shape_json.AppendUInt(shape[j]));
        element_count *= shape[j];
      }

      RETURN_IF_ERR(output_json.Add("shape", std::move(shape_json)));
    }

    // Add JSON data, or collect binary data.
    if (info->kind_ == AllocPayload::OutputInfo::BINARY) {
      triton::common::TritonJson::Value parameters_json;
      if (!output_json.Find("parameters", &parameters_json)) {
        parameters_json = triton::common::TritonJson::Value(
            response_json, triton::common::TritonJson::ValueType::OBJECT);
        RETURN_IF_ERR(parameters_json.AddUInt("binary_data_size", byte_size));
        RETURN_IF_ERR(
            output_json.Add("parameters", std::move(parameters_json)));
      } else {
        RETURN_IF_ERR(parameters_json.AddUInt("binary_data_size", byte_size));
      }
      if (byte_size > 0) {
        ordered_buffers.push_back(info->evbuffer_);
      }
    } else if (info->kind_ == AllocPayload::OutputInfo::JSON) {
      triton::common::TritonJson::Value data_json(
          response_json, triton::common::TritonJson::ValueType::ARRAY);
      RETURN_IF_ERR(WriteDataToJson(
          &data_json, cname, datatype, base, byte_size, element_count));
      RETURN_IF_ERR(output_json.Add("data", std::move(data_json)));
    }

    RETURN_IF_ERR(response_outputs.Append(std::move(output_json)));
  }

  RETURN_IF_ERR(response_json.Add("outputs", std::move(response_outputs)));

  evbuffer* response_placeholder = evbuffer_new();
  // Write json metadata into response evbuffer
  triton::common::TritonJson::WriteBuffer buffer;
  RETURN_IF_ERR(response_json.Write(&buffer));
  evbuffer_add(response_placeholder, buffer.Base(), buffer.Size());

  // If there is binary data write it next in the appropriate
  // order... also need the HTTP header when returning binary data.
  if (!ordered_buffers.empty()) {
    for (evbuffer* b : ordered_buffers) {
      evbuffer_add_buffer(response_placeholder, b);
    }
  }

  evbuffer* response_body = response_placeholder;
  switch (response_compression_type_) {
    case DataCompressor::Type::DEFLATE:
    case DataCompressor::Type::GZIP: {
      auto compressed_buffer = evbuffer_new();
      auto err = DataCompressor::CompressData(
          response_compression_type_, response_placeholder, compressed_buffer);
      if (err == nullptr) {
        response_body = compressed_buffer;
        evbuffer_free(response_placeholder);
      } else {
        // just log the compression error and return the uncompressed data
        LOG_VERBOSE(1) << "unable to compress response: "
                       << TRITONSERVER_ErrorMessage(err);
        TRITONSERVER_ErrorDelete(err);
        evbuffer_free(compressed_buffer);
        response_compression_type_ = DataCompressor::Type::IDENTITY;
      }
      break;
    }
    case DataCompressor::Type::IDENTITY:
    case DataCompressor::Type::UNKNOWN:
      // Do nothing for other cases
      break;
  }
  SetResponseHeader(!ordered_buffers.empty(), buffer.Size());
  evbuffer_add_buffer(req_->buffer_out, response_body);
  // Destroy the evbuffer object as the data has been moved
  // to HTTP response buffer
  evbuffer_free(response_body);

  return nullptr;  // success
}

void
HTTPAPIServer::InferRequestClass::SetResponseHeader(
    bool has_binary_data, size_t header_length)
{
  if (has_binary_data) {
    AddContentTypeHeader(req_, "application/octet-stream");
    evhtp_headers_add_header(
        req_->headers_out, evhtp_header_new(
                               kInferHeaderContentLengthHTTPHeader,
                               std::to_string(header_length).c_str(), 1, 1));
  } else {
    AddContentTypeHeader(req_, "application/json");
  }

  switch (response_compression_type_) {
    case DataCompressor::Type::DEFLATE:
      evhtp_headers_add_header(
          req_->headers_out,
          evhtp_header_new(kContentEncodingHTTPHeader, "deflate", 1, 1));
      break;
    case DataCompressor::Type::GZIP:
      evhtp_headers_add_header(
          req_->headers_out,
          evhtp_header_new(kContentEncodingHTTPHeader, "gzip", 1, 1));
      break;
    case DataCompressor::Type::IDENTITY:
    case DataCompressor::Type::UNKNOWN:
      break;
  }
}

uint32_t
HTTPAPIServer::InferRequestClass::IncrementResponseCount()
{
  return response_count_++;
}

HTTPAPIServer::GenerateRequestClass::~GenerateRequestClass()
{
  while (!pending_http_responses_.empty()) {
    evbuffer_free(pending_http_responses_.front());
    pending_http_responses_.pop();
  }
}

void
HTTPAPIServer::GenerateRequestClass::InferResponseComplete(
    TRITONSERVER_InferenceResponse* response, const uint32_t flags, void* userp)
{
  // FIXME can't use InferRequestClass object here since it's lifetime
  // is different than response. For response we need to know how to
  // send each output (as json, shm, or binary) and that information
  // has to be maintained in a way that allows us to clean it up
  // appropriately if connection closed or last response sent.
  //
  // But for now userp is the InferRequestClass object and the end of
  // its life is in the ReplyCallback.

  auto infer_request =
      reinterpret_cast<HTTPAPIServer::GenerateRequestClass*>(userp);

  // Assuming responses of the same request is sent in sequence.

  TRITONSERVER_Error* err = nullptr;
  if (response != nullptr) {
    err = infer_request->FinalizeResponse(response);
  }
  if (err != nullptr) {
    infer_request->AddErrorJson(err);
  }


  // First response starts the chunked response, the response code is set here
  // so user should check response body in case of error at later time.
  if (infer_request->IncrementResponseCount() == 0) {
    infer_request->response_code_ = HttpCodeFromError(err);
    evthr_defer(infer_request->thread_, StartResponse, infer_request);
  }

#ifdef TRITON_ENABLE_TRACING
  if (infer_request->trace_ != nullptr) {
    infer_request->trace_->CaptureTimestamp(
        "INFER_RESPONSE_COMPLETE", TraceManager::CaptureTimestamp());
  }
#endif  // TRITON_ENABLE_TRACING

  // Final flag indicates there is no more responses, ending chunked response.
  if ((flags & TRITONSERVER_RESPONSE_COMPLETE_FINAL) != 0) {
    evthr_defer(infer_request->thread_, EndResponseCallback, infer_request);
  } else {
    evthr_defer(infer_request->thread_, ChunkResponseCallback, infer_request);
  }

  LOG_TRITONSERVER_ERROR(
      TRITONSERVER_InferenceResponseDelete(response),
      "deleting inference response");
}

void
HTTPAPIServer::GenerateRequestClass::StartResponse(
    evthr_t* thr, void* arg, void* shared)
{
  auto infer_request =
      reinterpret_cast<HTTPAPIServer::GenerateRequestClass*>(arg);
  auto req = infer_request->EvHtpRequest();

  if (req == nullptr) {
    return;
  }


#ifdef TRITON_ENABLE_METRICS
  // logic to add kv_cache metrics to response header
  // Get the metrics in Prometheus format

  // ENDPOINT_LOAD_METRICS_TYPE is request header that specifies which load
  // report format `endpoint-load-metrics` will be in. If not present, the
  // response header will not be written and the feature is disabled.
  //
  // The valid values for ENDPOINT_LOAD_METRICS_TYPE header are:
  //
  // "text"
  // "json"
  //
  // Any other value will have behavior equivalent to being unset while also
  // logging an error.
  auto server = infer_request->EvHtpServer();
  const char* orca_metric_format = nullptr;
  evhtp_header_t* metric_format_header =
      evhtp_headers_find_header(req->headers_in, ENDPOINT_LOAD_METRICS_TYPE);

  if (metric_format_header != nullptr) {
    orca_metric_format = metric_format_header->val;
  }
  if (orca_metric_format != nullptr && server != nullptr) {
    SetEndpointLoadMetricsHeader(req, orca_metric_format, server);
  }
#endif  // TRITON_ENABLE_METRICS

  if (infer_request->streaming_) {
    AddContentTypeHeader(req, "text/event-stream; charset=utf-8");
  } else {
    AddContentTypeHeader(req, "application/json");
  }
  evhtp_send_reply_chunk_start(req, infer_request->response_code_);
  evhtp_request_resume(req);
}

void
HTTPAPIServer::GenerateRequestClass::ChunkResponseCallback(
    evthr_t* thr, void* arg, void* shared)
{
  auto infer_request =
      reinterpret_cast<HTTPAPIServer::GenerateRequestClass*>(arg);

  if (infer_request->req_ == nullptr) {
    return;
  }

  infer_request->SendChunkResponse(false /* end */);
}

void
HTTPAPIServer::GenerateRequestClass::EndResponseCallback(
    evthr_t* thr, void* arg, void* shared)
{
  auto infer_request =
      reinterpret_cast<HTTPAPIServer::GenerateRequestClass*>(arg);

  if (infer_request->EvHtpRequest() != nullptr) {
    infer_request->SendChunkResponse(true /* end */);
    evhtp_send_reply_chunk_end(infer_request->EvHtpRequest());
  }

  delete infer_request;
}

void
HTTPAPIServer::GenerateRequestClass::SendChunkResponse(bool end)
{
  // check if response count in the case of non-streaming
  if (!streaming_) {
    std::lock_guard<std::mutex> lk(res_mtx_);
    // For non-streaming, wait until end
    if (!end) {
      return;
    }
    if (pending_http_responses_.size() != 1) {
      EVBufferAddErrorJson(
          req_->buffer_out, TRITONSERVER_ErrorNew(
                                TRITONSERVER_ERROR_INTERNAL,
                                "generate expects model to produce exactly 1 "
                                "response, use generate stream for model that "
                                "generates various number of responses"));
      evhtp_send_reply_chunk(req_, req_->buffer_out);
      return;
    }
  }

  evbuffer* buffer = nullptr;
  {
    std::lock_guard<std::mutex> lk(res_mtx_);
    // This function may be called with no pending responses when
    // response complete callback is invoked with flag-only
    if (pending_http_responses_.empty()) {
      return;
    }
    buffer = pending_http_responses_.front();
    pending_http_responses_.pop();
  }
  evhtp_send_reply_chunk(req_, buffer);
  evbuffer_free(buffer);

#ifdef TRITON_ENABLE_TRACING
  if (trace_ != nullptr) {
    // [FIXME] currently send_start_ns / send_end_ns is
    // not captured in evhtp when response is sent in chunks
    trace_->CaptureTimestamp("HTTP_SEND_START", req_->send_start_ns);
    trace_->CaptureTimestamp("HTTP_SEND_END", req_->send_end_ns);
  }
#endif  // TRITON_ENABLE_TRACING
}

TRITONSERVER_Error*
HTTPAPIServer::GenerateRequestClass::FinalizeResponse(
    TRITONSERVER_InferenceResponse* response)
{
  triton_response_ = response;
  RETURN_IF_ERR(TRITONSERVER_InferenceResponseError(response));

  triton::common::TritonJson::Value response_json(
      triton::common::TritonJson::ValueType::OBJECT);

  // Response metadata in addition to output tensor / parameter falls under
  // "unspecified field" with predefined name:
  // "id", "model_name", "model_version"
  std::map<std::string, TritonOutput> triton_outputs;
  const char* id = "";
  RETURN_IF_ERR(TRITONSERVER_InferenceResponseId(response, &id));
  if (strncmp(id, "", 1)) {
    triton_outputs.emplace(
        "id", TritonOutput(TritonOutput::Type::RESERVED, id));
  }
  const char* model_name;
  int64_t model_version;
  RETURN_IF_ERR(TRITONSERVER_InferenceResponseModel(
      response, &model_name, &model_version));
  triton_outputs.emplace(
      "model_name", TritonOutput(TritonOutput::Type::RESERVED, model_name));
  triton_outputs.emplace(
      "model_version",
      TritonOutput(
          TritonOutput::Type::RESERVED, std::to_string(model_version)));

  // If the response has any parameters, convert them to JSON.
  uint32_t parameter_count;
  RETURN_IF_ERR(
      TRITONSERVER_InferenceResponseParameterCount(response, &parameter_count));
  if (parameter_count > 0) {
    for (uint32_t pidx = 0; pidx < parameter_count; ++pidx) {
      const char* name;
      TRITONSERVER_ParameterType type;
      const void* vvalue;
      RETURN_IF_ERR(TRITONSERVER_InferenceResponseParameter(
          response, pidx, &name, &type, &vvalue));
      switch (type) {
        case TRITONSERVER_PARAMETER_BOOL:
        case TRITONSERVER_PARAMETER_INT:
        case TRITONSERVER_PARAMETER_STRING:
        case TRITONSERVER_PARAMETER_DOUBLE:
          triton_outputs.emplace(
              name, TritonOutput(TritonOutput::Type::PARAMETER, pidx));
          break;
        case TRITONSERVER_PARAMETER_BYTES:
          return TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_UNSUPPORTED,
              (std::string("Response parameter '") + name +
               "' has type 'TRITONSERVER_PARAMETER_BYTES' which is "
               "not currently supported")
                  .c_str());
          break;
      }
    }
  }

  // Go through each response output and transfer information to JSON
  uint32_t output_count;
  RETURN_IF_ERR(
      TRITONSERVER_InferenceResponseOutputCount(response, &output_count));

  for (uint32_t idx = 0; idx < output_count; ++idx) {
    const char* cname;
    TRITONSERVER_DataType datatype;
    const int64_t* shape;
    uint64_t dim_count;
    const void* base;
    size_t byte_size;
    TRITONSERVER_MemoryType memory_type;
    int64_t memory_type_id;
    void* userp;

    RETURN_IF_ERR(TRITONSERVER_InferenceResponseOutput(
        response, idx, &cname, &datatype, &shape, &dim_count, &base, &byte_size,
        &memory_type, &memory_type_id, &userp));
    triton_outputs.emplace(
        cname, TritonOutput(TritonOutput::Type::TENSOR, idx));
  }

  std::set<std::string> mapped_outputs;
  RETURN_IF_ERR(ConvertGenerateResponse(
      triton_outputs, response_schema_, &response_json, &mapped_outputs));
  if (response_schema_->allow_unspecified_) {
    for (const auto& to : triton_outputs) {
      if (mapped_outputs.find(to.first) == mapped_outputs.end()) {
        RETURN_IF_ERR(ExactMappingOutput(
            to.first, to.second, &response_json, &mapped_outputs));
      }
    }
  }

  // [FIXME] compression
  evbuffer* response_body = evbuffer_new();
  if (streaming_) {
    static std::string sse_prefix = "data: ";
    evbuffer_add(response_body, sse_prefix.c_str(), sse_prefix.length());
  }
  // Write json metadata into response evbuffer
  triton::common::TritonJson::WriteBuffer buffer;
  RETURN_IF_ERR(response_json.Write(&buffer));
  evbuffer_add(response_body, buffer.Base(), buffer.Size());
  if (streaming_) {
    static std::string sse_suffix = "\n\n";
    evbuffer_add(response_body, sse_suffix.c_str(), sse_suffix.length());
  }

  {
    std::lock_guard<std::mutex> lk(res_mtx_);
    pending_http_responses_.emplace(response_body);
  }

  return nullptr;  // success
}

void
HTTPAPIServer::GenerateRequestClass::AddErrorJson(TRITONSERVER_Error* error)
{
  evbuffer* buffer = evbuffer_new();
  if (streaming_) {
    static std::string sse_prefix = "data: ";
    evbuffer_add(buffer, sse_prefix.c_str(), sse_prefix.length());
  }
  EVBufferAddErrorJson(buffer, error);
  if (streaming_) {
    static std::string sse_suffix = "\n\n";
    evbuffer_add(buffer, sse_suffix.c_str(), sse_suffix.length());
  }
  TRITONSERVER_ErrorDelete(error);
  {
    std::lock_guard<std::mutex> lk(res_mtx_);
    pending_http_responses_.emplace(buffer);
  }
}

TRITONSERVER_Error*
HTTPAPIServer::GenerateRequestClass::ConvertGenerateResponse(
    const std::map<
        std::string, HTTPAPIServer::GenerateRequestClass::TritonOutput>&
        output_metadata,
    const MappingSchema* schema,
    triton::common::TritonJson::Value* generate_response,
    std::set<std::string>* mapped_outputs)
{
  for (auto& nested : schema->children_) {
    switch (nested.second->kind_) {
      case MappingSchema::Kind::MAPPING_SCHEMA: {
        triton::common::TritonJson::Value nested_response(
            *generate_response, triton::common::TritonJson::ValueType::OBJECT);
        RETURN_IF_ERR(ConvertGenerateResponse(
            output_metadata, nested.second.get(), &nested_response,
            mapped_outputs));
        RETURN_IF_ERR(generate_response->Add(
            nested.first.c_str(), std::move(nested_response)));
        break;
      }
      case MappingSchema::Kind::EXACT_MAPPING: {
        auto it = output_metadata.find(nested.first);
        if (it == output_metadata.end()) {
          if (!nested.second->allow_unspecified_) {
            return TRITONSERVER_ErrorNew(
                TRITONSERVER_ERROR_INTERNAL,
                (std::string("Schema requires output '") + nested.first +
                 "' to be produced by the model.")
                    .c_str());
          }
        } else {
          RETURN_IF_ERR(ExactMappingOutput(
              nested.first, it->second, generate_response, mapped_outputs));
        }
        break;
      }
      default:
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_UNSUPPORTED, "Unsupported schema kind");
    }
  }
  return nullptr;  // success
}

TRITONSERVER_Error*
HTTPAPIServer::GenerateRequestClass::ExactMappingOutput(
    const std::string& name,
    const HTTPAPIServer::GenerateRequestClass::TritonOutput& triton_output,
    triton::common::TritonJson::Value* generate_response,
    std::set<std::string>* mapped_outputs)
{
  mapped_outputs->emplace(name);

  switch (triton_output.type) {
    case TritonOutput::Type::RESERVED: {
      generate_response->AddStringRef(
          name.c_str(), triton_output.value.c_str());
      break;
    }
    case TritonOutput::Type::PARAMETER: {
      const char* name;
      TRITONSERVER_ParameterType type;
      const void* vvalue;
      RETURN_IF_ERR(TRITONSERVER_InferenceResponseParameter(
          triton_response_, triton_output.index, &name, &type, &vvalue));
      switch (type) {
        case TRITONSERVER_PARAMETER_BOOL:
          RETURN_IF_ERR(generate_response->AddBool(
              name, *(reinterpret_cast<const bool*>(vvalue))));
          break;
        case TRITONSERVER_PARAMETER_INT:
          RETURN_IF_ERR(generate_response->AddInt(
              name, *(reinterpret_cast<const int64_t*>(vvalue))));
          break;
        case TRITONSERVER_PARAMETER_STRING:
          RETURN_IF_ERR(generate_response->AddStringRef(
              name, reinterpret_cast<const char*>(vvalue)));
          break;
        case TRITONSERVER_PARAMETER_DOUBLE:
          RETURN_IF_ERR(generate_response->AddDouble(
              name, *(reinterpret_cast<const double*>(vvalue))));
          break;
        case TRITONSERVER_PARAMETER_BYTES:
          return TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_UNSUPPORTED,
              (std::string("Response parameter '") + name +
               "' has type 'TRITONSERVER_PARAMETER_BYTES' which is "
               "not currently supported")
                  .c_str());
          break;
      }
      break;
    }
    case TritonOutput::Type::TENSOR: {
      const char* cname;
      TRITONSERVER_DataType datatype;
      const int64_t* shape;
      uint64_t dim_count;
      const void* base;
      size_t byte_size;
      TRITONSERVER_MemoryType memory_type;
      int64_t memory_type_id;
      void* userp;

      RETURN_IF_ERR(TRITONSERVER_InferenceResponseOutput(
          triton_response_, triton_output.index, &cname, &datatype, &shape,
          &dim_count, &base, &byte_size, &memory_type, &memory_type_id,
          &userp));

      auto info = reinterpret_cast<AllocPayload::OutputInfo*>(userp);
      // sanity check
      if (info->kind_ != AllocPayload::OutputInfo::JSON) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INTERNAL,
            (std::string("non-JSON output response type is requested for '") +
             cname + "'")
                .c_str());
      }

      size_t element_count = 1;
      for (size_t j = 0; j < dim_count; j++) {
        element_count *= shape[j];
      }

      triton::common::TritonJson::Value data_json(
          *generate_response, triton::common::TritonJson::ValueType::ARRAY);
      RETURN_IF_ERR(WriteDataToJson(
          &data_json, cname, datatype, base, byte_size, element_count));
      if (element_count == 1) {
        // if only 1 element, strip out the array
        triton::common::TritonJson::Value el;
        RETURN_IF_ERR(data_json.At(0, &el));
        RETURN_IF_ERR(generate_response->Add(cname, std::move(el)));
      } else {
        RETURN_IF_ERR(generate_response->Add(cname, std::move(data_json)));
      }
      break;
    }
  }
  return nullptr;  // success
}

void
HTTPAPIServer::Handle(evhtp_request_t* req)
{
  LOG_VERBOSE(1) << "HTTP request: " << req->method << " "
                 << req->uri->path->full;

  if (std::string(req->uri->path->full) == "/v2/models/stats") {
    // model statistics
    HandleModelStats(req);
    return;
  }
  if (std::string(req->uri->path->full) == "/v2/logging") {
    // change logging
    HandleLogging(req);
    return;
  }
  std::string model_name, version, kind;
  if (RE2::FullMatch(
          std::string(req->uri->path->full), model_regex_, &model_name,
          &version, &kind)) {
    if (kind == "ready") {
      // model ready
      HandleModelReady(req, model_name, version);
      return;
    } else if (kind == "infer") {
      // model infer
      HandleInfer(req, model_name, version);
      return;
    } else if (kind == "generate") {
      // text generation
      HandleGenerate(req, model_name, version, false /* streaming */);
      return;
    } else if (kind == "generate_stream") {
      // text generation (streaming)
      HandleGenerate(req, model_name, version, true /* streaming */);
      return;
    } else if (kind == "config") {
      // model configuration
      HandleModelConfig(req, model_name, version);
      return;
    } else if (kind == "stats") {
      // model statistics
      HandleModelStats(req, model_name, version);
      return;
    } else if (kind == "trace/setting") {
      // Trace with specific model, there is no specification on versioning
      // so fall out and return bad request error if version is specified
      if (version.empty()) {
        HandleTrace(req, model_name);
        return;
      }
    } else if (kind == "") {
      // model metadata
      HandleModelMetadata(req, model_name, version);
      return;
    }
  }

  std::string region, action, rest, repo_name;
  if (std::string(req->uri->path->full) == "/v2") {
    // server metadata
    HandleServerMetadata(req);
    return;
  } else if (RE2::FullMatch(
                 std::string(req->uri->path->full), server_regex_, &rest)) {
    // server health
    HandleServerHealth(req, rest);
    return;
  } else if (RE2::FullMatch(
                 std::string(req->uri->path->full), systemsharedmemory_regex_,
                 &region, &action)) {
    // system shared memory
    HandleSystemSharedMemory(req, region, action);
    return;
  } else if (RE2::FullMatch(
                 std::string(req->uri->path->full), cudasharedmemory_regex_,
                 &region, &action)) {
    // cuda shared memory
    HandleCudaSharedMemory(req, region, action);
    return;
  } else if (RE2::FullMatch(
                 std::string(req->uri->path->full), modelcontrol_regex_,
                 &repo_name, &kind, &model_name, &action)) {
    // model repository
    if (kind == "index") {
      HandleRepositoryIndex(req, repo_name);
      return;
    } else if (kind.find("models", 0) == 0) {
      HandleRepositoryControl(req, repo_name, model_name, action);
      return;
    }
  } else if (RE2::FullMatch(std::string(req->uri->path->full), trace_regex_)) {
    // trace request on global settings
    HandleTrace(req);
    return;
  }

  LOG_VERBOSE(1) << "HTTP error: " << req->method << " " << req->uri->path->full
                 << " - " << static_cast<int>(EVHTP_RES_NOTFOUND);
  RETURN_AND_RESPOND_WITH_ERR(req, EVHTP_RES_NOTFOUND, "Not Found");
}

TRITONSERVER_Error*
HTTPAPIServer::Create(
    const std::shared_ptr<TRITONSERVER_Server>& server,
    triton::server::TraceManager* trace_manager,
    const std::shared_ptr<SharedMemoryManager>& shm_manager, const int32_t port,
    const bool reuse_port, const std::string& address,
    const std::string& header_forward_pattern, const int thread_cnt,
    const size_t max_input_size, const RestrictedFeatures& restricted_features,
    std::unique_ptr<HTTPServer>* http_server)
{
  http_server->reset(new HTTPAPIServer(
      server, trace_manager, shm_manager, port, reuse_port, address,
      header_forward_pattern, thread_cnt, max_input_size, restricted_features));

  const std::string addr = address + ":" + std::to_string(port);
  LOG_INFO << "Started HTTPService at " << addr;

  return nullptr;
}


TRITONSERVER_Error*
HTTPAPIServer::Create(
    std::shared_ptr<TRITONSERVER_Server>& server,
    const UnorderedMapType& options,
    triton::server::TraceManager* trace_manager,
    const std::shared_ptr<SharedMemoryManager>& shm_manager,
    const RestrictedFeatures& restricted_features,
    std::unique_ptr<HTTPServer>* service)
{
  int port;
  bool reuse_port;
  std::string address;
  std::string header_forward_pattern;
  int thread_count;

  RETURN_IF_ERR(GetValue(options, "port", &port));
  RETURN_IF_ERR(GetValue(options, "reuse_port", &reuse_port));
  RETURN_IF_ERR(GetValue(options, "address", &address));
  RETURN_IF_ERR(
      GetValue(options, "header_forward_pattern", &header_forward_pattern));
  RETURN_IF_ERR(GetValue(options, "thread_count", &thread_count));

  return Create(
      server, trace_manager, shm_manager, port, reuse_port, address,
      header_forward_pattern, thread_count, HTTP_DEFAULT_MAX_INPUT_SIZE,
      restricted_features, service);
}


bool
HTTPAPIServer::RespondIfRestricted(
    evhtp_request_t* req, const Restriction& restriction)
{
  auto header = restriction.first;
  auto expected_value = restriction.second;
  const char* actual_value = evhtp_kv_find(req->headers_in, header.c_str());
  if ((actual_value == nullptr) || (actual_value != expected_value)) {
    EVBufferAddErrorJson(
        req->buffer_out,
        std::string("This API is restricted, expecting header '" + header + "'")
            .c_str());
    evhtp_send_reply(req, EVHTP_RES_FORBIDDEN);
    return true;
  }
  return false;
}

}}  // namespace triton::server


================================================
FILE: src/http_server.h
================================================
// Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once

#include <evhtp/evhtp.h>
#include <re2/re2.h>

#include <list>
#include <map>
#include <memory>
#include <mutex>
#include <queue>
#include <string>
#include <thread>
#include <unordered_map>

#include "common.h"
#include "data_compressor.h"
#include "orca_http.h"
#include "restricted_features.h"
#include "shared_memory_manager.h"
#include "tracer.h"
#include "triton/common/logging.h"
#include "triton/core/tritonserver.h"

namespace triton { namespace server {

class MappingSchema {
 public:
  enum class Kind {
    EXACT_MAPPING,
    // An object of this kind means it is a nested mapping schema.
    MAPPING_SCHEMA
  };
  std::map<std::string, std::unique_ptr<MappingSchema>> children_;
  // Whether an unspecified key is allowed. If true,
  // * for requests, the unspecified key will be converted to Triton input
  //   following the EXACT_MAPPING rule.
  // * for responses, the Triton output will be converted to JSON key-value
  //   pairs at top level if the name is unspecified in the schema,
  //   following the EXACT_MAPPING rule.
  const bool allow_unspecified_{true};
  const Kind kind_{Kind::EXACT_MAPPING};

  explicit MappingSchema(
      const MappingSchema::Kind& kind = Kind::EXACT_MAPPING,
      const bool& allow_unspecified = true)
      : allow_unspecified_(allow_unspecified), kind_(kind)
  {
  }


 private:
};

// Generic HTTP server using evhtp
class HTTPServer {
 public:
  virtual ~HTTPServer() { IGNORE_ERR(Stop()); }

  TRITONSERVER_Error* Start();
  TRITONSERVER_Error* Stop(
      uint32_t* exit_timeout_secs = nullptr,
      const std::string& service_name = "HTTP");

 protected:
  explicit HTTPServer(
      const int32_t port, const bool reuse_port, const std::string& address,
      const std::string& header_forward_pattern, const int thread_cnt)
      : port_(port), reuse_port_(reuse_port), address_(address),
        header_forward_pattern_(header_forward_pattern),
        thread_cnt_(thread_cnt), header_forward_regex_(header_forward_pattern_),
        conn_cnt_(0), accepting_new_conn_(true)
  {
  }


  static void Dispatch(evhtp_request_t* req, void* arg);

 protected:
  virtual void Handle(evhtp_request_t* req) = 0;

  static void StopCallback(evutil_socket_t sock, short events, void* arg);

  static evhtp_res NewConnection(evhtp_connection_t* conn, void* arg);
  static evhtp_res EndConnection(evhtp_connection_t* conn, void* arg);

  int32_t port_;
  bool reuse_port_;
  std::string address_;
  std::string header_forward_pattern_;
  int thread_cnt_;
  re2::RE2 header_forward_regex_;

  evhtp_t* htp_;
  struct event_base* evbase_;
  std::thread worker_;
  evutil_socket_t fds_[2];
  event* break_ev_;

  std::mutex conn_mu_;
  uint32_t conn_cnt_;
  bool accepting_new_conn_;
};

#ifdef TRITON_ENABLE_METRICS
// Handle HTTP requests to obtain prometheus metrics
class HTTPMetricsServer : public HTTPServer {
 public:
  static TRITONSERVER_Error* Create(
      const std::shared_ptr<TRITONSERVER_Server>& server, int32_t port,
      std::string address, int thread_cnt,
      std::unique_ptr<HTTPServer>* metrics_server);

  static TRITONSERVER_Error* Create(
      std::shared_ptr<TRITONSERVER_Server>& server,
      const UnorderedMapType& options, std::unique_ptr<HTTPServer>* service);

  ~HTTPMetricsServer() = default;

 private:
  explicit HTTPMetricsServer(
      const std::shared_ptr<TRITONSERVER_Server>& server, const int32_t port,
      std::string address, const int thread_cnt)
      : HTTPServer(
            port, false /* reuse_port */, address,
            "" /* header_forward_pattern */, thread_cnt),
        server_(server), api_regex_(R"(/metrics/?)")
  {
  }
  void Handle(evhtp_request_t* req) override;

  std::shared_ptr<TRITONSERVER_Server> server_;
  re2::RE2 api_regex_;
};
#endif  // TRITON_ENABLE_METRICS

#if !defined(_WIN32) && defined(TRITON_ENABLE_TRACING)
class HttpTextMapCarrier : public otel_cntxt::propagation::TextMapCarrier {
 public:
  HttpTextMapCarrier(evhtp_kvs_t* headers) : headers_(headers) {}
  HttpTextMapCarrier() = default;
  virtual opentelemetry::nostd::string_view Get(
      opentelemetry::nostd::string_view key) const noexcept override
  {
    std::string key_to_compare = key.data();
    auto it = evhtp_kv_find(headers_, key_to_compare.c_str());
    if (it != NULL) {
      return opentelemetry::nostd::string_view(it);
    }
    return "";
  }
  // Not required on server side
  virtual void Set(
      opentelemetry::nostd::string_view key,
      opentelemetry::nostd::string_view value) noexcept override
  {
    return;
  }

  evhtp_kvs_t* headers_;
};
#else
using HttpTextMapCarrier = void*;
#endif


// HTTP API server that implements KFServing community standard inference
// protocols and extensions used by Triton.
class HTTPAPIServer : public HTTPServer {
 public:
  static TRITONSERVER_Error* Create(
      const std::shared_ptr<TRITONSERVER_Server>& server,
      triton::server::TraceManager* trace_manager,
      const std::shared_ptr<SharedMemoryManager>& smb_manager,
      const int32_t port, const bool reuse_port, const std::string& address,
      const std::string& header_forward_pattern, const int thread_cnt,
      const size_t max_input_size, const RestrictedFeatures& restricted_apis,
      std::unique_ptr<HTTPServer>* http_server);

  static TRITONSERVER_Error* Create(
      std::shared_ptr<TRITONSERVER_Server>& server,
      const UnorderedMapType& options,
      triton::server::TraceManager* trace_manager,
      const std::shared_ptr<SharedMemoryManager>& shm_manager,
      const RestrictedFeatures& restricted_features,
      std::unique_ptr<HTTPServer>* service);

  virtual ~HTTPAPIServer();

  //
  // AllocPayload
  //
  // Simple structure that carries the userp payload needed for
  // allocation.
  struct AllocPayload {
    struct OutputInfo {
      enum Kind { JSON, BINARY, SHM };

      Kind kind_;
      void* base_;
      uint64_t byte_size_;
      TRITONSERVER_MemoryType memory_type_;
      int64_t device_id_;
      uint32_t class_cnt_;
      evbuffer* evbuffer_;
      char* cuda_ipc_handle_;

      // For non-shared memory
      OutputInfo(Kind k, uint32_t class_cnt)
          : kind_(k), class_cnt_(class_cnt), evbuffer_(nullptr)
      {
      }

      // For shared memory
      OutputInfo(
          void* base, uint64_t byte_size, TRITONSERVER_MemoryType memory_type,
          int64_t device_id, char* cuda_ipc_handle)
          : kind_(SHM), base_(base), byte_size_(byte_size),
            memory_type_(memory_type), device_id_(device_id), class_cnt_(0),
            evbuffer_(nullptr), cuda_ipc_handle_(cuda_ipc_handle)
      {
      }

      ~OutputInfo()
      {
        if (evbuffer_ != nullptr) {
          evbuffer_free(evbuffer_);
        }
      }
    };

    ~AllocPayload()
    {
      for (auto it : output_map_) {
        delete it.second;
      }
    }

    AllocPayload() : default_output_kind_(OutputInfo::Kind::JSON){};
    std::unordered_map<std::string, OutputInfo*> output_map_;
    AllocPayload::OutputInfo::Kind default_output_kind_;
  };

  // Object associated with an inference request. This persists
  // information needed for the request and records the evhtp thread
  // that is bound to the request. This same thread must be used to
  // send the response.
  class InferRequestClass {
   public:
    // [FIXME] decompression / compression should be handled implicitly
    // within InferRequestClass. This alleviate the check for decompressed
    // buffer in HTTPServer code.
    explicit InferRequestClass(
        TRITONSERVER_Server* server, evhtp_request_t* req,
        DataCompressor::Type response_compression_type,
        const std::shared_ptr<TRITONSERVER_InferenceRequest>& triton_request,
        const std::shared_ptr<SharedMemoryManager>& shm_manager);

    virtual ~InferRequestClass()
    {
      if (req_ != nullptr) {
        evhtp_request_unset_hook(req_, evhtp_hook_on_request_fini);
      }
      req_ = nullptr;

      // Unregister shm regions that are waiting for the completion of an
      // inference.
      while (!shm_regions_info_.empty()) {
        auto shm_name = shm_regions_info_.back()->name_;
        auto shm_memory_type = shm_regions_info_.back()->kind_;
        auto awaiting_unregister =
            shm_regions_info_.back()->awaiting_unregister_;

        // Delete shared_ptr to decrement reference count
        shm_regions_info_.pop_back();

        if (awaiting_unregister) {
          if (shm_manager_ != nullptr) {
            auto err = shm_manager_->Unregister(shm_name, shm_memory_type);
            if (err != nullptr) {
              LOG_VERBOSE(1) << TRITONSERVER_ErrorMessage(err);
            }
          } else {
            LOG_VERBOSE(1) << "Shared memory manager is not available";
          }
        }
      }
    }

    evhtp_request_t* EvHtpRequest() const { return req_; }

    static void InferRequestComplete(
        TRITONSERVER_InferenceRequest* request, const uint32_t flags,
        void* userp);
    static void InferResponseComplete(
        TRITONSERVER_InferenceResponse* response, const uint32_t flags,
        void* userp);
    virtual TRITONSERVER_Error* FinalizeResponse(
        TRITONSERVER_InferenceResponse* response);

    // Helper function to set infer response header in the form specified by
    // the endpoint protocol
    virtual void SetResponseHeader(
        const bool has_binary_data, const size_t header_length);

    uint32_t IncrementResponseCount();

    // Only used if tracing enabled
    std::shared_ptr<TraceManager::Trace> trace_;

    AllocPayload alloc_payload_;

    // Data that cannot be used directly from the HTTP body is first
    // serialized. Hold that data here so that its lifetime spans the
    // lifetime of the request.
    std::list<std::vector<char>> serialized_data_;

    static void ReplyCallback(evthr_t* thr, void* arg, void* shared);

    void AddShmRegionInfo(
        const std::shared_ptr<const SharedMemoryManager::SharedMemoryInfo>&
            shm_info)
    {
      shm_regions_info_.push_back(shm_info);
    }

   protected:
    TRITONSERVER_Server* server_{nullptr};
    evhtp_request_t* req_{nullptr};
    evthr_t* thread_{nullptr};

    DataCompressor::Type response_compression_type_{
        DataCompressor::Type::IDENTITY};

    // Counter to keep track of number of responses generated.
    std::atomic<uint32_t> response_count_{0};

    // Event hook for called before request deletion
    static evhtp_res RequestFiniHook(evhtp_request* req, void* arg);

    // Pointer to associated Triton request, this class does not own the
    // request and must not reference it after a successful
    // TRITONSERVER_ServerInferAsync (except for cancellation).
    std::shared_ptr<TRITONSERVER_InferenceRequest> triton_request_{nullptr};

    // Maintain shared pointers(read-only reference) to the shared memory
    // block's information for the shared memory regions used by the request.
    // These pointers will automatically increase the usage count, preventing
    // unregistration of the shared memory. This vector must be cleared when no
    // longer needed to decrease the count and permit unregistration.
    std::vector<std::shared_ptr<const SharedMemoryManager::SharedMemoryInfo>>
        shm_regions_info_;

    std::shared_ptr<SharedMemoryManager> shm_manager_;

    evhtp_res response_code_{EVHTP_RES_OK};
  };

  class GenerateRequestClass : public InferRequestClass {
   public:
    explicit GenerateRequestClass(
        TRITONSERVER_Server* server, evhtp_request_t* req,
        DataCompressor::Type response_compression_type,
        const MappingSchema* request_schema,
        const MappingSchema* response_schema, bool streaming,
        const std::shared_ptr<TRITONSERVER_InferenceRequest>& triton_request,
        const std::shared_ptr<SharedMemoryManager>& shm_manager)
        : InferRequestClass(
              server, req, response_compression_type, triton_request,
              shm_manager),
          request_schema_(request_schema), response_schema_(response_schema),
          streaming_(streaming)
    {
    }
    virtual ~GenerateRequestClass();

    TRITONSERVER_Server* EvHtpServer() const { return server_; }

    // [FIXME] Specialize response complete function for now, should have
    // been a dispatcher and call into object specific response function.
    static void InferResponseComplete(
        TRITONSERVER_InferenceResponse* response, const uint32_t flags,
        void* userp);
    static void ChunkResponseCallback(evthr_t* thr, void* arg, void* shared);
    static void EndResponseCallback(evthr_t* thr, void* arg, void* shared);
    // Return whether the response is ending
    void SendChunkResponse(bool end);

    // Response preparation
    TRITONSERVER_Error* FinalizeResponse(
        TRITONSERVER_InferenceResponse* response) override;
    void AddErrorJson(TRITONSERVER_Error* error);
    static void StartResponse(evthr_t* thr, void* arg, void* shared);

    // [DLIS-5551] currently always performs basic conversion, only maps schema
    // of EXACT_MAPPING kind. MAPPING_SCHEMA and upcoming kinds are for
    // customized conversion where a detailed schema will be provided.
    TRITONSERVER_Error* ConvertGenerateRequest(
        std::map<std::string, triton::common::TritonJson::Value>&
            input_metadata,
        const MappingSchema* schema,
        triton::common::TritonJson::Value& generate_request);

    const MappingSchema* RequestSchema() { return request_schema_; }
    const MappingSchema* ResponseSchema() { return response_schema_; }

   private:
    struct TritonOutput {
      enum class Type { RESERVED, TENSOR, PARAMETER };
      TritonOutput(Type t, const std::string& val) : type(t), value(val) {}
      explicit TritonOutput(Type t, uint32_t i) : type(t), index(i) {}
      Type type;
      // RESERVED type
      std::string value;
      // TENSOR, PARAMETER type
      uint32_t index;
    };

    TRITONSERVER_Error* ExactMappingInput(
        const std::string& name, triton::common::TritonJson::Value& value,
        std::map<std::string, triton::common::TritonJson::Value>&
            input_metadata);

    // [DLIS-5551] currently always performs basic conversion, only maps schema
    // of EXACT_MAPPING kind. MAPPING_SCHEMA and upcoming kinds are for
    // customized conversion where a detailed schema will be provided.
    TRITONSERVER_Error* ConvertGenerateResponse(
        const std::map<std::string, TritonOutput>& output_metadata,
        const MappingSchema* schema,
        triton::common::TritonJson::Value* generate_response,
        std::set<std::string>* mapped_outputs);
    TRITONSERVER_Error* ExactMappingOutput(
        const std::string& name, const TritonOutput& triton_output,
        triton::common::TritonJson::Value* generate_response,
        std::set<std::string>* mapped_outputs);

    const MappingSchema* request_schema_{nullptr};
    const MappingSchema* response_schema_{nullptr};
    const bool streaming_{false};
    // Placeholder to completing response, this class does not own
    // the response.
    TRITONSERVER_InferenceResponse* triton_response_{nullptr};
    // As InferResponseComplete and ChunkResponseCallback are called in
    // different threads, need to have dedicated buffers for each response and
    // ensure mutual exclusive access.
    std::mutex res_mtx_;
    std::queue<evbuffer*> pending_http_responses_;
    bool end_{false};
  };

  // Simple structure that carries the userp payload needed for
  // request release callback.
  struct RequestReleasePayload final {
    RequestReleasePayload(
        const std::shared_ptr<TRITONSERVER_InferenceRequest>& inference_request,
        evbuffer* buffer)
        : inference_request_(inference_request), buffer_(buffer){};

    ~RequestReleasePayload()
    {
      if (buffer_ != nullptr) {
        evbuffer_free(buffer_);
      }
    };

   private:
    std::shared_ptr<TRITONSERVER_InferenceRequest> inference_request_ = nullptr;
    evbuffer* buffer_ = nullptr;
  };


 protected:
  explicit HTTPAPIServer(
      const std::shared_ptr<TRITONSERVER_Server>& server,
      triton::server::TraceManager* trace_manager,
      const std::shared_ptr<SharedMemoryManager>& shm_manager,
      const int32_t port, const bool reuse_port, const std::string& address,
      const std::string& header_forward_pattern, const int thread_cnt,
      const size_t max_input_size = HTTP_DEFAULT_MAX_INPUT_SIZE,
      const RestrictedFeatures& restricted_apis = {});

  virtual void Handle(evhtp_request_t* req) override;
  // [FIXME] extract to "infer" class
  virtual std::unique_ptr<InferRequestClass> CreateInferRequest(
      evhtp_request_t* req,
      const std::shared_ptr<TRITONSERVER_InferenceRequest>& triton_request)
  {
    return std::unique_ptr<InferRequestClass>(new InferRequestClass(
        server_.get(), req, GetResponseCompressionType(req), triton_request,
        shm_manager_));
  }

  // Helper function to retrieve infer request header in the form specified by
  // the endpoint protocol
  //
  // Get the inference header length. Return 0 if the whole request body is
  // the inference header.
  virtual TRITONSERVER_Error* GetInferenceHeaderLength(
      evhtp_request_t* req, int32_t content_length, size_t* header_length);
  virtual DataCompressor::Type GetRequestCompressionType(evhtp_request_t* req);
  virtual DataCompressor::Type GetResponseCompressionType(evhtp_request_t* req);


  TRITONSERVER_Error* GetModelConfig(
      const std::string& model_name, int64_t requested_model_version,
      std::string* config_json);
  TRITONSERVER_Error* GetContentLength(
      evhtp_request_t* req, evbuffer* decompressed_buffer,
      int32_t* content_length);
  TRITONSERVER_Error* DecompressBuffer(
      evhtp_request_t* req, evbuffer** decompressed_buffer);
  TRITONSERVER_Error* CheckTransactionPolicy(
      evhtp_request_t* req, const std::string& model_name,
      int64_t requested_model_version);
  std::shared_ptr<TraceManager::Trace> StartTrace(
      evhtp_request_t* req, const std::string& model_name,
      TRITONSERVER_InferenceTrace** triton_trace);
  TRITONSERVER_Error* ForwardHeaders(
      evhtp_request_t* req, TRITONSERVER_InferenceRequest* irequest);

  static TRITONSERVER_Error* InferResponseAlloc(
      TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
      size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
      int64_t preferred_memory_type_id, void* userp, void** buffer,
      void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type,
      int64_t* actual_memory_type_id);
  static TRITONSERVER_Error* OutputBufferQuery(
      TRITONSERVER_ResponseAllocator* allocator, void* userp,
      const char* tensor_name, size_t* byte_size,
      TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id);
  static TRITONSERVER_Error* OutputBufferAttributes(
      TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
      TRITONSERVER_BufferAttributes* buffer_attributes, void* userp,
      void* buffer_userp);
  static TRITONSERVER_Error* InferResponseFree(
      TRITONSERVER_ResponseAllocator* allocator, void* buffer,
      void* buffer_userp, size_t byte_size, TRITONSERVER_MemoryType memory_type,
      int64_t memory_type_id);
  void HandleServerHealth(evhtp_request_t* req, const std::string& kind);
  void HandleServerMetadata(evhtp_request_t* req);
  void HandleModelReady(
      evhtp_request_t* req, const std::string& model_name,
      const std::string& model_version_str);
  void HandleModelMetadata(
      evhtp_request_t* req, const std::string& model_name,
      const std::string& model_version_str);
  void HandleModelConfig(
      evhtp_request_t* req, const std::string& model_name,
      const std::string& model_version_str);
  void HandleInfer(
      evhtp_request_t* req, const std::string& model_name,
      const std::string& model_version_str);
  void HandleModelStats(
      evhtp_request_t* req, const std::string& model_name = "",
      const std::string& model_version_str = "");
  void HandleRepositoryIndex(
      evhtp_request_t* req, const std::string& repository_name);
  void HandleRepositoryControl(
      evhtp_request_t* req, const std::string& repository_name,
      const std::string& model_name, const std::string& action);
  void HandleSystemSharedMemory(
      evhtp_request_t* req, const std::string& region_name,
      const std::string& action);
  void HandleCudaSharedMemory(
      evhtp_request_t* req, const std::string& region_name,
      const std::string& action);
  void HandleTrace(evhtp_request_t* req, const std::string& model_name = "");
  void HandleLogging(evhtp_request_t* req);

  // Text Generation / LLM format
  //'streaming' selects the schema pair to convert request / response.
  // 'streaming' also controls the response convention, if true,
  // Server-Sent Events format will be used to send responses.
  void HandleGenerate(
      evhtp_request_t* req, const std::string& model_name,
      const std::string& model_version_str, bool streaming);

  // 'meta_data_root' is the root JSON document for 'input_metadata'.
  // In TritonJson, the Value objects are references to the root document.
  // Therefore the document must stay valid.
  TRITONSERVER_Error* ModelInputMetadata(
      const std::string& model_name, const int64_t model_version,
      std::map<std::string, triton::common::TritonJson::Value>* input_metadata,
      triton::common::TritonJson::Value* meta_data_root);

  // Internal utility method for parsing evhtp request to JSON
  // Should not be called directly - use EVRequestToJson or
  // EVRequestToJsonAllowsEmpty instead
  TRITONSERVER_Error* EVRequestToJsonImpl(
      evhtp_request_t* req, std::string_view request_kind,
      bool allows_empty_body, triton::common::TritonJson::Value* request_json,
      size_t* buffer_len);

  // Parses full evhtp request and its evbuffers into JSON.
  TRITONSERVER_Error* EVRequestToJsonAllowsEmpty(
      evhtp_request_t* req, std::string_view request_kind,
      triton::common::TritonJson::Value* request_json)
  {
    size_t buffer_len = 0;
    TRITONSERVER_Error* err =
        EVRequestToJsonImpl(req, request_kind, true, request_json, &buffer_len);
    return err;
  }

  TRITONSERVER_Error* EVRequestToJson(
      evhtp_request_t* req, std::string_view request_kind,
      triton::common::TritonJson::Value* request_json, size_t* buffer_len)
  {
    TRITONSERVER_Error* err =
        EVRequestToJsonImpl(req, request_kind, false, request_json, buffer_len);
    return err;
  }

  // Parses evhtp request buffers into Triton Inference Request.
  TRITONSERVER_Error* EVRequestToTritonRequest(
      evhtp_request_t* req, const std::string& model_name,
      TRITONSERVER_InferenceRequest* irequest, evbuffer* decompressed_buffer,
      InferRequestClass* infer_req, size_t header_length);
  TRITONSERVER_Error* EVBufferToInput(
      const std::string& model_name, TRITONSERVER_InferenceRequest* irequest,
      evbuffer* input_buffer, InferRequestClass* infer_req,
      size_t header_length);
  TRITONSERVER_Error* EVBufferToRawInput(
      const std::string& model_name, TRITONSERVER_InferenceRequest* irequest,
      evbuffer* input_buffer, InferRequestClass* infer_req);
  TRITONSERVER_Error* EVBufferToJson(
      triton::common::TritonJson::Value* document, evbuffer_iovec* v,
      int* v_idx, const size_t length, int n);


  // Helpers for parsing JSON requests for Triton-specific fields
  TRITONSERVER_Error* ParseJsonTritonIO(
      triton::common::TritonJson::Value& request_json,
      TRITONSERVER_InferenceRequest* irequest, InferRequestClass* infer_req,
      const std::string& model_name, evbuffer_iovec* v, int* v_idx_ptr,
      size_t header_length, int n);
  TRITONSERVER_Error* ParseJsonTritonParams(
      triton::common::TritonJson::Value& request_json,
      TRITONSERVER_InferenceRequest* irequest, InferRequestClass* infer_req);
  TRITONSERVER_Error* ParseJsonTritonRequestID(
      triton::common::TritonJson::Value& request_json,
      TRITONSERVER_InferenceRequest* irequest);

  std::shared_ptr<TRITONSERVER_Server> server_;

  // Storing server metadata as it is consistent during server running
  TRITONSERVER_Error* server_metadata_err_;
  std::string server_metadata_;

  TraceManager* trace_manager_;
  std::shared_ptr<SharedMemoryManager> shm_manager_;

  // The allocator that will be used to allocate buffers for the
  // inference result tensors.
  TRITONSERVER_ResponseAllocator* allocator_;

  re2::RE2 server_regex_;
  re2::RE2 model_regex_;
  re2::RE2 modelcontrol_regex_;
  re2::RE2 systemsharedmemory_regex_;
  re2::RE2 cudasharedmemory_regex_;
  re2::RE2 trace_regex_;

  // [DLIS-5551] currently always performs basic conversion, only maps schema
  // of EXACT_MAPPING kind. MAPPING_SCHEMA and upcoming kinds are for
  // customized conversion where a detailed schema will be provided.
  std::unique_ptr<MappingSchema> generate_request_schema_{new MappingSchema()};
  std::unique_ptr<MappingSchema> generate_response_schema_{new MappingSchema()};
  std::unique_ptr<MappingSchema> generate_stream_response_schema_{
      new MappingSchema()};
  std::unique_ptr<MappingSchema> generate_stream_request_schema_{
      new MappingSchema()};

  // Provisional definition of generate mapping schema
  // to allow for parameters passing
  //
  // Note: subject to change
  void ConfigureGenerateMappingSchema()
  {
    // Reserved field parameters for generate
    // If present, parameters will be converted to tensors
    // or parameters based on model config

    const std::string parameters_field = "parameters";
    generate_stream_request_schema_->children_.emplace(
        parameters_field,
        new MappingSchema(MappingSchema::Kind::MAPPING_SCHEMA, true));
    generate_request_schema_->children_.emplace(
        parameters_field,
        new MappingSchema(MappingSchema::Kind::MAPPING_SCHEMA, true));
  }
  size_t max_input_size_;
  RestrictedFeatures restricted_apis_{};
  bool RespondIfRestricted(
      evhtp_request_t* req, const Restriction& restriction);
};

}}  // namespace triton::server


================================================
FILE: src/main.cc
================================================
// Copyright 2018-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#ifdef _WIN32
#define NOMINMAX
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#include <winsock2.h>
#include <ws2tcpip.h>
#pragma comment(lib, "ws2_32.lib")
#endif

#ifndef _WIN32
#include <getopt.h>
#include <unistd.h>
#endif

#include <stdint.h>

#include <algorithm>
#include <cctype>
#include <iomanip>
#include <iostream>
#include <sstream>
#include <thread>

#include "triton_signal.h"

#ifdef TRITON_ENABLE_ASAN
#include <sanitizer/lsan_interface.h>
#endif  // TRITON_ENABLE_ASAN

#include "command_line_parser.h"
#include "common.h"
#include "shared_memory_manager.h"
#include "tracer.h"
#include "triton/common/logging.h"
#include "triton/core/tritonserver.h"

#if defined(TRITON_ENABLE_HTTP) || defined(TRITON_ENABLE_METRICS)
#include "http_server.h"
#endif  // TRITON_ENABLE_HTTP|| TRITON_ENABLE_METRICS
#ifdef TRITON_ENABLE_SAGEMAKER
#include "sagemaker_server.h"
#endif  // TRITON_ENABLE_SAGEMAKER
#ifdef TRITON_ENABLE_VERTEX_AI
#include "vertex_ai_server.h"
#endif  // TRITON_ENABLE_VERTEX_AI
#ifdef TRITON_ENABLE_GRPC
#include "grpc/grpc_server.h"
#endif  // TRITON_ENABLE_GRPC

#ifdef TRITON_ENABLE_GPU
static_assert(
    TRITON_MIN_COMPUTE_CAPABILITY >= 1.0,
    "Invalid TRITON_MIN_COMPUTE_CAPABILITY specified");
#endif  // TRITON_ENABLE_GPU

namespace {

#ifdef TRITON_ENABLE_HTTP
std::unique_ptr<triton::server::HTTPServer> g_http_service;
#endif  // TRITON_ENABLE_HTTP

#ifdef TRITON_ENABLE_GRPC
std::unique_ptr<triton::server::grpc::Server> g_grpc_service;
#endif  // TRITON_ENABLE_GRPC

#ifdef TRITON_ENABLE_METRICS
std::unique_ptr<triton::server::HTTPServer> g_metrics_service;
#endif  // TRITON_ENABLE_METRICS

#ifdef TRITON_ENABLE_SAGEMAKER
std::unique_ptr<triton::server::HTTPServer> g_sagemaker_service;
#endif  // TRITON_ENABLE_SAGEMAKER

#ifdef TRITON_ENABLE_VERTEX_AI
std::unique_ptr<triton::server::HTTPServer> g_vertex_ai_service;
#endif  // TRITON_ENABLE_VERTEX_AI

triton::server::TritonServerParameters g_triton_params;

#ifdef TRITON_ENABLE_GRPC
TRITONSERVER_Error*
StartGrpcService(
    std::unique_ptr<triton::server::grpc::Server>* service,
    const std::shared_ptr<TRITONSERVER_Server>& server,
    triton::server::TraceManager* trace_manager,
    const std::shared_ptr<triton::server::SharedMemoryManager>& shm_manager)
{
  TRITONSERVER_Error* err = triton::server::grpc::Server::Create(
      server, trace_manager, shm_manager, g_triton_params.grpc_options_,
      service);
  if (err == nullptr) {
    err = (*service)->Start();
  }

  if (err != nullptr) {
    service->reset();
  }

  return err;
}
#endif  // TRITON_ENABLE_GRPC

#ifdef TRITON_ENABLE_HTTP
TRITONSERVER_Error*
StartHttpService(
    std::unique_ptr<triton::server::HTTPServer>* service,
    const std::shared_ptr<TRITONSERVER_Server>& server,
    triton::server::TraceManager* trace_manager,
    const std::shared_ptr<triton::server::SharedMemoryManager>& shm_manager)
{
  TRITONSERVER_Error* err = triton::server::HTTPAPIServer::Create(
      server, trace_manager, shm_manager, g_triton_params.http_port_,
      g_triton_params.reuse_http_port_, g_triton_params.http_address_,
      g_triton_params.http_forward_header_pattern_,
      g_triton_params.http_thread_cnt_, g_triton_params.http_max_input_size_,
      g_triton_params.http_restricted_apis_, service);
  if (err == nullptr) {
    err = (*service)->Start();
  }

  if (err != nullptr) {
    service->reset();
  }

  return err;
}
#endif  // TRITON_ENABLE_HTTP

#ifdef TRITON_ENABLE_METRICS
TRITONSERVER_Error*
StartMetricsService(
    std::unique_ptr<triton::server::HTTPServer>* service,
    const std::shared_ptr<TRITONSERVER_Server>& server)
{
  TRITONSERVER_Error* err = triton::server::HTTPMetricsServer::Create(
      server, g_triton_params.metrics_port_, g_triton_params.metrics_address_,
      1 /* HTTP thread count */, service);
  if (err == nullptr) {
    err = (*service)->Start();
  }
  if (err != nullptr) {
    service->reset();
  }

  return err;
}
#endif  // TRITON_ENABLE_METRICS

#ifdef TRITON_ENABLE_SAGEMAKER
TRITONSERVER_Error*
StartSagemakerService(
    std::unique_ptr<triton::server::HTTPServer>* service,
    const std::shared_ptr<TRITONSERVER_Server>& server,
    triton::server::TraceManager* trace_manager,
    const std::shared_ptr<triton::server::SharedMemoryManager>& shm_manager)
{
  size_t max_input_size = triton::server::HTTP_DEFAULT_MAX_INPUT_SIZE;
  triton::server::RestrictedFeatures restricted_apis{};
#ifdef TRITON_ENABLE_HTTP
  // Reuse HTTP server settings for SageMaker endpoint behavior. In
  // particular, --http-restricted-api and --http-max-input-size also apply
  // to SageMaker requests. Without TRITON_ENABLE_HTTP, SageMaker falls back to
  // default input size and unrestricted APIs (no command-line configuration for
  // restricted APIs).
  max_input_size = g_triton_params.http_max_input_size_;
  restricted_apis = g_triton_params.http_restricted_apis_;
#endif  // TRITON_ENABLE_HTTP

  TRITONSERVER_Error* err = triton::server::SagemakerAPIServer::Create(
      server, trace_manager, shm_manager, g_triton_params.sagemaker_port_,
      g_triton_params.sagemaker_address_, g_triton_params.sagemaker_thread_cnt_,
      max_input_size, restricted_apis, service);
  if (err == nullptr) {
    err = (*service)->Start();
  }

  if (err != nullptr) {
    service->reset();
  }

  return err;
}
#endif  // TRITON_ENABLE_SAGEMAKER

#ifdef TRITON_ENABLE_VERTEX_AI
TRITONSERVER_Error*
StartVertexAiService(
    std::unique_ptr<triton::server::HTTPServer>* service,
    const std::shared_ptr<TRITONSERVER_Server>& server,
    triton::server::TraceManager* trace_manager,
    const std::shared_ptr<triton::server::SharedMemoryManager>& shm_manager)
{
  size_t max_input_size = triton::server::HTTP_DEFAULT_MAX_INPUT_SIZE;
  triton::server::RestrictedFeatures restricted_apis{};
#ifdef TRITON_ENABLE_HTTP
  // Reuse HTTP server settings for Vertex AI endpoint behavior. In
  // particular, --http-restricted-api and --http-max-input-size also apply
  // to Vertex AI requests. Without TRITON_ENABLE_HTTP, Vertex AI falls back to
  // default input size and unrestricted APIs (no command-line configuration for
  // restricted APIs).
  max_input_size = g_triton_params.http_max_input_size_;
  restricted_apis = g_triton_params.http_restricted_apis_;
#endif  // TRITON_ENABLE_HTTP

  TRITONSERVER_Error* err = triton::server::VertexAiAPIServer::Create(
      server, trace_manager, shm_manager, g_triton_params.vertex_ai_port_,
      g_triton_params.vertex_ai_address_, g_triton_params.vertex_ai_thread_cnt_,
      max_input_size, restricted_apis, g_triton_params.vertex_ai_default_model_,
      service);
  if (err == nullptr) {
    err = (*service)->Start();
  }

  if (err != nullptr) {
    service->reset();
  }

  return err;
}
#endif  // TRITON_ENABLE_VERTEX_AI

bool
StartEndpoints(
    const std::shared_ptr<TRITONSERVER_Server>& server,
    triton::server::TraceManager* trace_manager,
    const std::shared_ptr<triton::server::SharedMemoryManager>& shm_manager)
{
#ifdef _WIN32
  WSADATA wsaData;
  int wsa_ret = WSAStartup(MAKEWORD(2, 2), &wsaData);

  if (wsa_ret != 0) {
    LOG_ERROR << "Error in WSAStartup " << wsa_ret;
    return false;
  }
#endif

#ifdef TRITON_ENABLE_GRPC
  // Enable GRPC endpoints if requested...
  if (g_triton_params.allow_grpc_) {
    TRITONSERVER_Error* err =
        StartGrpcService(&g_grpc_service, server, trace_manager, shm_manager);
    if (err != nullptr) {
      LOG_TRITONSERVER_ERROR(err, "failed to start GRPC service");
      return false;
    }
  }
#endif  // TRITON_ENABLE_GRPC

#ifdef TRITON_ENABLE_HTTP
  // Enable HTTP endpoints if requested...
  if (g_triton_params.allow_http_) {
    TRITONSERVER_Error* err =
        StartHttpService(&g_http_service, server, trace_manager, shm_manager);
    if (err != nullptr) {
      LOG_TRITONSERVER_ERROR(err, "failed to start HTTP service");
      return false;
    }
  }
#endif  // TRITON_ENABLE_HTTP


#ifdef TRITON_ENABLE_SAGEMAKER
  // Enable Sagemaker endpoints if requested...
  if (g_triton_params.allow_sagemaker_) {
    TRITONSERVER_Error* err = StartSagemakerService(
        &g_sagemaker_service, server, trace_manager, shm_manager);
    if (err != nullptr) {
      LOG_TRITONSERVER_ERROR(err, "failed to start Sagemaker service");
      return false;
    }
  }
#endif  // TRITON_ENABLE_SAGEMAKER

#ifdef TRITON_ENABLE_VERTEX_AI
  // Enable Vertex AI endpoints if requested...
  if (g_triton_params.allow_vertex_ai_) {
    TRITONSERVER_Error* err = StartVertexAiService(
        &g_vertex_ai_service, server, trace_manager, shm_manager);
    if (err != nullptr) {
      LOG_TRITONSERVER_ERROR(err, "failed to start Vertex AI service");
      return false;
    }
  }
#endif  // TRITON_ENABLE_VERTEX_AI

#ifdef TRITON_ENABLE_METRICS
  // Enable metrics endpoint if requested...
  if (g_triton_params.allow_metrics_) {
    TRITONSERVER_Error* err = StartMetricsService(&g_metrics_service, server);
    if (err != nullptr) {
      LOG_TRITONSERVER_ERROR(err, "failed to start Metrics service");
      return false;
    }
  }
#endif  // TRITON_ENABLE_METRICS

  return true;
}

bool
StopEndpoints(uint32_t* exit_timeout_secs)
{
  bool ret = true;

#ifdef TRITON_ENABLE_HTTP
  if (g_http_service) {
    TRITONSERVER_Error* err = g_http_service->Stop(exit_timeout_secs);
    if (err != nullptr) {
      LOG_TRITONSERVER_ERROR(err, "failed to stop HTTP service");
      ret = false;
    }

    g_http_service.reset();
  }
#endif  // TRITON_ENABLE_HTTP

#ifdef TRITON_ENABLE_GRPC
  // Allow for graceful shutdown of GRPC service
  if (g_grpc_service) {
    TRITONSERVER_Error* err = g_grpc_service->GracefulStop(exit_timeout_secs);
    if (err != nullptr) {
      LOG_TRITONSERVER_ERROR(err, "failed to gracefully stop GRPC service");
      ret = false;
    }
  }
#endif  // TRITON_ENABLE_GRPC

  return ret;
}

bool
StopEndpoints()
{
  bool ret = true;

#ifdef TRITON_ENABLE_GRPC
  if (g_grpc_service) {
    // Forceful shutdown of GRPC service
    TRITONSERVER_Error* err = g_grpc_service->Stop();
    if (err != nullptr) {
      LOG_TRITONSERVER_ERROR(err, "failed to stop GRPC service");
      ret = false;
    }

    g_grpc_service.reset();
  }
#endif  // TRITON_ENABLE_GRPC

#ifdef TRITON_ENABLE_METRICS
  if (g_metrics_service) {
    TRITONSERVER_Error* err = g_metrics_service->Stop();
    if (err != nullptr) {
      LOG_TRITONSERVER_ERROR(err, "failed to stop Metrics service");
      ret = false;
    }

    g_metrics_service.reset();
  }
#endif  // TRITON_ENABLE_METRICS

#ifdef TRITON_ENABLE_SAGEMAKER
  if (g_sagemaker_service) {
    TRITONSERVER_Error* err = g_sagemaker_service->Stop();
    if (err != nullptr) {
      LOG_TRITONSERVER_ERROR(err, "failed to stop Sagemaker service");
      ret = false;
    }

    g_sagemaker_service.reset();
  }
#endif  // TRITON_ENABLE_SAGEMAKER

#ifdef TRITON_ENABLE_VERTEX_AI
  if (g_vertex_ai_service) {
    TRITONSERVER_Error* err = g_vertex_ai_service->Stop();
    if (err != nullptr) {
      LOG_TRITONSERVER_ERROR(err, "failed to stop Vertex AI service");
      ret = false;
    }

    g_vertex_ai_service.reset();
  }
#endif  // TRITON_ENABLE_VERTEX_AI

#ifdef _WIN32
  int wsa_ret = WSACleanup();

  if (wsa_ret != 0) {
    LOG_ERROR << "Error in WSACleanup " << wsa_ret;
    ret = false;
  }
#endif

  return ret;
}

bool
StartTracing(triton::server::TraceManager** trace_manager)
{
  *trace_manager = nullptr;

#ifdef TRITON_ENABLE_TRACING
  TRITONSERVER_Error* err = triton::server::TraceManager::Create(
      trace_manager, g_triton_params.trace_level_, g_triton_params.trace_rate_,
      g_triton_params.trace_count_, g_triton_params.trace_log_frequency_,
      g_triton_params.trace_filepath_, g_triton_params.trace_mode_,
      g_triton_params.trace_config_map_);

  if (err != nullptr) {
    LOG_TRITONSERVER_ERROR(err, "failed to configure tracing");
    if (*trace_manager != nullptr) {
      delete (*trace_manager);
    }
    *trace_manager = nullptr;
    return false;
  }
#endif  // TRITON_ENABLE_TRACING

  return true;
}

bool
StopTracing(triton::server::TraceManager** trace_manager)
{
#ifdef TRITON_ENABLE_TRACING
  // We assume that at this point Triton has been stopped gracefully,
  // so can delete the trace manager to finalize the output.
  delete (*trace_manager);
  *trace_manager = nullptr;
#endif  // TRITON_ENABLE_TRACING

  return true;
}

}  // namespace

int
main(int argc, char** argv)
{
  // Parse command-line to create the options for the inference
  // server.
  triton::server::TritonParser tp;
  try {
    auto res = tp.Parse(argc, argv);
    g_triton_params = res.first;
    g_triton_params.CheckPortCollision();
  }
  catch (const triton::server::ParseException& pe) {
    std::cerr << "Usage: tritonserver [options]" << std::endl;
    std::cerr << tp.Usage() << std::endl;
    // Show error at bottom for immediate visibility
    std::cerr << pe.what() << std::endl;
    exit(1);
  }

  triton::server::TritonServerParameters::ManagedTritonServerOptionPtr
      triton_options(nullptr, TRITONSERVER_ServerOptionsDelete);
  try {
    triton_options = g_triton_params.BuildTritonServerOptions();
  }
  catch (const triton::server::ParseException& pe) {
    std::cerr << "Failed to build Triton option:" << std::endl;
    std::cerr << pe.what() << std::endl;
    exit(1);
  }

#ifdef TRITON_ENABLE_LOGGING
  // Initialize our own logging instance since it is used by GRPC and
  // HTTP endpoints. This logging instance is separate from the one in
  // libtritonserver so we must initialize explicitly.
  LOG_ENABLE_INFO(g_triton_params.log_info_);
  LOG_ENABLE_WARNING(g_triton_params.log_warn_);
  LOG_ENABLE_ERROR(g_triton_params.log_error_);
  LOG_SET_VERBOSE(g_triton_params.log_verbose_);
  LOG_SET_FORMAT(g_triton_params.log_format_);
  LOG_SET_OUT_FILE(g_triton_params.log_file_);
#endif  // TRITON_ENABLE_LOGGING

  // Trace manager.
  triton::server::TraceManager* trace_manager;

  // Manager for shared memory blocks.
  auto shm_manager = std::make_shared<triton::server::SharedMemoryManager>();

  // Create the server...
  TRITONSERVER_Server* server_ptr = nullptr;
  FAIL_IF_ERR(
      TRITONSERVER_ServerNew(&server_ptr, triton_options.get()),
      "creating server");

  std::shared_ptr<TRITONSERVER_Server> server(
      server_ptr, TRITONSERVER_ServerDelete);

  // Configure and start tracing if specified on the command line.
  if (!StartTracing(&trace_manager)) {
    exit(1);
  }

  // Trap SIGINT and SIGTERM to allow server to exit gracefully
  TRITONSERVER_Error* signal_err = triton::server::RegisterSignalHandler();
  if (signal_err != nullptr) {
    LOG_TRITONSERVER_ERROR(signal_err, "failed to register signal handler");
    exit(1);
  }

  // Start the HTTP, GRPC, and metrics endpoints.
  if (!StartEndpoints(server, trace_manager, shm_manager)) {
    exit(1);
  }

  // Wait until a signal terminates the server...
  while (!triton::server::signal_exiting_) {
    // If enabled, poll the model repository to see if there have been
    // any changes.
    if (g_triton_params.repository_poll_secs_ > 0) {
      LOG_TRITONSERVER_ERROR(
          TRITONSERVER_ServerPollModelRepository(server_ptr),
          "failed to poll model repository");
    }

    // Wait for the polling interval (or a long time if polling is not
    // enabled). Will be woken if the server is exiting.
    std::unique_lock<std::mutex> lock(triton::server::signal_exit_mu_);
    std::chrono::seconds wait_timeout(
        (g_triton_params.repository_poll_secs_ == 0)
            ? 3600
            : g_triton_params.repository_poll_secs_);
    triton::server::signal_exit_cv_.wait_for(lock, wait_timeout);
  }

  // Stop the HTTP and gRPC endpoints, and update exit timeout.
  uint32_t exit_timeout_secs = g_triton_params.exit_timeout_secs_;
  StopEndpoints(&exit_timeout_secs);
  TRITONSERVER_ServerSetExitTimeout(server_ptr, exit_timeout_secs);

  TRITONSERVER_Error* stop_err = TRITONSERVER_ServerStop(server_ptr);

  // If unable to gracefully stop the server then Triton threads and
  // state are potentially in an invalid state, so just exit
  // immediately.
  if (stop_err != nullptr) {
    LOG_TRITONSERVER_ERROR(stop_err, "failed to stop server");
    exit(1);
  }

  // Stop gRPC and metrics endpoints that do not yet support exit timeout.
  StopEndpoints();

  // Stop tracing.
  StopTracing(&trace_manager);

#ifdef TRITON_ENABLE_ASAN
  // Can invoke ASAN before exit though this is typically not very
  // useful since there are many objects that are not yet destructed.
  //  __lsan_do_leak_check();
#endif  // TRITON_ENABLE_ASAN

  return 0;
}


================================================
FILE: src/memory_alloc.cc
================================================
// Copyright 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include <cuda_runtime_api.h>
#include <rapidjson/document.h>
#include <rapidjson/error/en.h>
#include <unistd.h>

#include <chrono>
#include <future>
#include <iostream>
#include <string>
#include <thread>
#include <vector>

#include "common.h"
#include "triton/core/tritonserver.h"

static_assert(
    TRITON_MIN_COMPUTE_CAPABILITY >= 1.0,
    "Invalid TRITON_MIN_COMPUTE_CAPABILITY specified");

namespace ni = triton::server;

namespace {

struct IOSpec {
  TRITONSERVER_MemoryType input_type_;
  int64_t input_type_id_;

  TRITONSERVER_MemoryType output_type_;
  int64_t output_type_id_;
};

// Meta data used for preparing input data and validate output data
IOSpec io_spec;

static auto gpu_data_deleter = [](void* data) {
  if (data != nullptr) {
    FAIL_IF_CUDA_ERR(
        cudaSetDevice(io_spec.input_type_id_),
        "setting CUDA device to release GPU memory on " +
            std::to_string(io_spec.input_type_id_));
    FAIL_IF_CUDA_ERR(cudaFree(data), "releasing GPU memory");
  }
};

void
Usage(char** argv, const std::string& msg = std::string())
{
  if (!msg.empty()) {
    std::cerr << msg << std::endl;
  }

  std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
  std::cerr << "\t-i [input device ID]" << std::endl;
  std::cerr << "\t-out [output device ID]" << std::endl;
  std::cerr << "\t-v Enable verbose logging" << std::endl;
  std::cerr << "\t-r [model repository absolute path]" << std::endl;
  std::cerr << "\t-m [model name to be tested]" << std::endl;
  std::cerr << "\t-h [host policy name]" << std::endl;
  std::cerr << "\tFor '-h', if specify, the input will be set with different "
            << "host policy names, given that the specified value is the "
            << "host policy that the model under test is associated with."
            << std::endl;
  std::cerr << "\tFor device ID, -1 is used to stand for CPU device, "
            << "non-negative value is for GPU device." << std::endl;

  exit(1);
}

TRITONSERVER_Error*
ResponseAlloc(
    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
    int64_t preferred_memory_type_id, void* userp, void** buffer,
    void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type,
    int64_t* actual_memory_type_id)
{
  // If 'byte_size' is zero just return 'buffer'==nullptr, we don't
  // need to do any other book-keeping.
  if (byte_size == 0) {
    *buffer = nullptr;
    *buffer_userp = nullptr;
    std::cout << "allocated " << byte_size << " bytes for result tensor "
              << tensor_name << std::endl;
  } else {
    void* allocated_ptr = nullptr;
    if (io_spec.output_type_ == TRITONSERVER_MEMORY_CPU) {
      allocated_ptr = malloc(byte_size);
    } else {
      auto err = cudaSetDevice(io_spec.output_type_id_);
      if (err == cudaSuccess) {
        err = cudaMalloc(&allocated_ptr, byte_size);
      }
      if (err != cudaSuccess) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INTERNAL,
            std::string(
                "failed to allocate CUDA memory: " +
                std::string(cudaGetErrorString(err)))
                .c_str());
      }
    }

    if (allocated_ptr == nullptr) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INTERNAL,
          std::string(
              "failed to allocate " + std::to_string(byte_size) + " bytes in " +
              TRITONSERVER_MemoryTypeString(io_spec.output_type_) +
              " for result tensor " + tensor_name)
              .c_str());
    }

    // Pass the tensor name with buffer_userp so we can show it when
    // releasing the buffer.
    *buffer = allocated_ptr;
    *buffer_userp = new std::string(tensor_name);
    std::cout << "allocated " << byte_size << " bytes in "
              << TRITONSERVER_MemoryTypeString(io_spec.output_type_)
              << " for result tensor " << tensor_name << std::endl;
  }

  *actual_memory_type = io_spec.output_type_;
  *actual_memory_type_id = io_spec.output_type_id_;
  return nullptr;  // Success
}

TRITONSERVER_Error*
ResponseRelease(
    TRITONSERVER_ResponseAllocator* allocator, void* buffer, void* buffer_userp,
    size_t byte_size, TRITONSERVER_MemoryType memory_type,
    int64_t memory_type_id)
{
  std::unique_ptr<std::string> name;
  if (buffer_userp != nullptr) {
    name.reset(reinterpret_cast<std::string*>(buffer_userp));
  } else {
    name.reset(new std::string("<unknown>"));
  }

  std::cout << "Releasing buffer " << buffer << " of size " << byte_size
            << " in " << TRITONSERVER_MemoryTypeString(memory_type)
            << " for result '" << *name << "'" << std::endl;
  if (memory_type == TRITONSERVER_MEMORY_CPU) {
    free(buffer);
  } else {
    auto err = cudaSetDevice(memory_type_id);
    if (err == cudaSuccess) {
      err = cudaFree(buffer);
    }
    if (err != cudaSuccess) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INTERNAL, std::string(
                                           "failed to release CUDA memory: " +
                                           std::string(cudaGetErrorString(err)))
                                           .c_str());
    }
  }

  return nullptr;  // Success
}

void
InferRequestComplete(
    TRITONSERVER_InferenceRequest* request, const uint32_t flags, void* userp)
{
  if ((flags & TRITONSERVER_REQUEST_RELEASE_ALL) != 0) {
    TRITONSERVER_InferenceRequestDelete(request);
  }
}

void
InferResponseComplete(
    TRITONSERVER_InferenceResponse* response, const uint32_t flags, void* userp)
{
  if (response != nullptr) {
    // Send 'response' to the future.
    std::promise<TRITONSERVER_InferenceResponse*>* p =
        reinterpret_cast<std::promise<TRITONSERVER_InferenceResponse*>*>(userp);
    p->set_value(response);
    delete p;
  }
}

uint32_t
OutputIndex(TRITONSERVER_InferenceResponse* response, const std::string& name)
{
  uint32_t output_count;
  FAIL_IF_ERR(
      TRITONSERVER_InferenceResponseOutputCount(response, &output_count),
      "getting number of response outputs");

  for (uint32_t idx = 0; idx < output_count; ++idx) {
    const char* cname;
    TRITONSERVER_DataType datatype;
    const int64_t* shape;
    uint64_t dim_count;
    const void* base;
    size_t byte_size;
    TRITONSERVER_MemoryType memory_type;
    int64_t memory_type_id;
    void* userp;

    FAIL_IF_ERR(
        TRITONSERVER_InferenceResponseOutput(
            response, idx, &cname, &datatype, &shape, &dim_count, &base,
            &byte_size, &memory_type, &memory_type_id, &userp),
        "getting output info");

    if (name == std::string(cname)) {
      return idx;
    }
  }

  FAIL("can't found output '" + name + "'");
  return 0;
}

TRITONSERVER_Error*
ParseModelConfig(
    const rapidjson::Document& model_metadata, TRITONSERVER_DataType* dtype,
    bool* is_torch_model)
{
  *dtype = TRITONSERVER_TYPE_INVALID;
  for (const auto& input : model_metadata["inputs"].GetArray()) {
    if (strcmp(input["datatype"].GetString(), "INT32") &&
        strcmp(input["datatype"].GetString(), "FP32") &&
        strcmp(input["datatype"].GetString(), "BYTES")) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_UNSUPPORTED,
          "IO test utility only supports model with data type INT32, "
          "FP32 or BYTES");
    }

    if (*dtype == TRITONSERVER_TYPE_INVALID) {
      *dtype = TRITONSERVER_StringToDataType(input["datatype"].GetString());
    } else {
      auto dt = TRITONSERVER_StringToDataType(input["datatype"].GetString());
      if (dt != *dtype) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            "the model inputs must have the same data type");
      }
    }
  }

  for (const auto& output : model_metadata["outputs"].GetArray()) {
    if (strcmp(output["datatype"].GetString(), "INT32") &&
        strcmp(output["datatype"].GetString(), "FP32") &&
        strcmp(output["datatype"].GetString(), "BYTES")) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_UNSUPPORTED,
          "IO test utility only supports model with data type INT32, "
          "FP32 or BYTES");
    } else {
      auto dt = TRITONSERVER_StringToDataType(output["datatype"].GetString());
      if (dt != *dtype) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            "the model inputs and outputs must have the same data type");
      }
    }
  }

  *is_torch_model = (model_metadata["platform"] == "pytorch_libtorch");
  return nullptr;
}

template <typename T>
void
GenerateInputData(
    std::vector<char>* input0_data, std::vector<char>* input1_data)
{
  input0_data->resize(16 * sizeof(T));
  input1_data->resize(16 * sizeof(T));
  for (size_t i = 0; i < 16; ++i) {
    ((T*)input0_data->data())[i] = i;
    ((T*)input1_data->data())[i] = 1;
  }
}

void
GenerateStringInputData(
    std::vector<char>* input0_data, std::vector<char>* input1_data)
{
  std::string input0_str = "";
  std::string input1_str = "";
  for (size_t i = 0; i < 16; ++i) {
    std::string i0 = std::to_string(i + 1);
    uint32_t i0_len = i0.size();
    input0_str.append(reinterpret_cast<const char*>(&i0_len), sizeof(uint32_t));
    input0_str.append(i0);
    std::string i1 = std::to_string(1);
    uint32_t i1_len = i1.size();
    input1_str.append(reinterpret_cast<const char*>(&i1_len), sizeof(uint32_t));
    input1_str.append(i1);
  }

  std::copy(
      input0_str.begin(), input0_str.end(), std::back_inserter(*input0_data));
  std::copy(
      input1_str.begin(), input1_str.end(), std::back_inserter(*input1_data));
}

void
GenerateStringOutputData(
    std::vector<char>* output0_data, std::vector<char>* output1_data)
{
  std::string output0_str = "";
  std::string output1_str = "";
  for (size_t i = 0; i < 16; ++i) {
    std::string o0 = std::to_string(i + 2);
    uint32_t o0_len = o0.size();
    output0_str.append(
        reinterpret_cast<const char*>(&o0_len), sizeof(uint32_t));
    output0_str.append(o0);
    std::string o1 = std::to_string(i);
    uint32_t o1_len = o1.size();
    output1_str.append(
        reinterpret_cast<const char*>(&o1_len), sizeof(uint32_t));
    output1_str.append(o1);
  }

  std::copy(
      output0_str.begin(), output0_str.end(),
      std::back_inserter(*output0_data));
  std::copy(
      output1_str.begin(), output1_str.end(),
      std::back_inserter(*output1_data));
}

template <typename T>
void
CompareResult(
    const std::string& output0_name, const std::string& output1_name,
    const void* input0, const void* input1, const void* output0,
    const void* output1)
{
  for (size_t i = 0; i < 16; ++i) {
    std::cout << ((T*)input0)[i] << " + " << ((T*)input1)[i] << " = "
              << ((T*)output0)[i] << std::endl;
    std::cout << ((T*)input0)[i] << " - " << ((T*)input1)[i] << " = "
              << ((T*)output1)[i] << std::endl;

    if ((((T*)input0)[i] + ((T*)input1)[i]) != ((T*)output0)[i]) {
      FAIL("incorrect sum in " + output0_name);
    }
    if ((((T*)input0)[i] - ((T*)input1)[i]) != ((T*)output1)[i]) {
      FAIL("incorrect difference in " + output1_name);
    }
  }
}

void
CompareStringResult(
    const std::string& output0_name, const std::string& output1_name,
    const void* input0, const void* input1, const void* output0,
    const void* output1)
{
  // preprocess results from serialized buffer to integers
  std::vector<int> output0_numbers;
  std::vector<int> output1_numbers;
  size_t buf_offset0 = 0, buf_offset1 = 0;
  const uint8_t* base0 = reinterpret_cast<const uint8_t*>(output0);
  const uint8_t* base1 = reinterpret_cast<const uint8_t*>(output1);
  for (size_t i = 0; i < 16; ++i) {
    const uint32_t len0 =
        *(reinterpret_cast<const uint32_t*>(base0 + buf_offset0));
    std::string o0_tmp(
        reinterpret_cast<const char*>(base0 + buf_offset0 + sizeof(len0)),
        len0);
    output0_numbers.push_back(std::atoi(o0_tmp.c_str()));
    buf_offset0 += sizeof(len0) + len0;

    const uint32_t len1 =
        *(reinterpret_cast<const uint32_t*>(base1 + buf_offset1));
    std::string o1_tmp(
        reinterpret_cast<const char*>(base1 + buf_offset1 + sizeof(len1)),
        len1);
    output1_numbers.push_back(std::atoi(o1_tmp.c_str()));
    buf_offset1 += sizeof(len1) + len1;
  }

  for (int i = 0; i < 16; ++i) {
    std::cout << (i + 1) << " + " << 1 << " = " << output0_numbers[i]
              << std::endl;
    std::cout << (i + 1) << " - " << 1 << " = " << output1_numbers[i]
              << std::endl;

    if (((i + 1) + 1) != output0_numbers[i]) {
      FAIL("incorrect sum in " + output0_name);
    }
    if (((i + 1) - 1) != output1_numbers[i]) {
      FAIL("incorrect difference in " + output1_name);
    }
  }
}

}  // namespace

int
main(int argc, char** argv)
{
  std::string model_repository_path;
  std::string model_name;
  int verbose_level = 0;

  io_spec.input_type_ = TRITONSERVER_MEMORY_CPU;
  io_spec.input_type_id_ = 0;
  io_spec.output_type_ = TRITONSERVER_MEMORY_CPU;
  io_spec.output_type_id_ = 0;

  const char* host_policy_cstr = nullptr;
  std::string host_policy;

  // Parse commandline...
  int opt;
  while ((opt = getopt(argc, argv, "vi:o:r:m:h:")) != -1) {
    switch (opt) {
      case 'i': {
        int64_t raw_id = std::stoll(optarg);
        if (raw_id < 0) {
          io_spec.input_type_ = TRITONSERVER_MEMORY_CPU;
          io_spec.input_type_id_ = 0;
        } else {
          io_spec.input_type_ = TRITONSERVER_MEMORY_GPU;
          io_spec.input_type_id_ = raw_id;
        }
        break;
      }
      case 'o': {
        int64_t raw_id = std::stoll(optarg);
        if (raw_id < 0) {
          io_spec.output_type_ = TRITONSERVER_MEMORY_CPU;
          io_spec.output_type_id_ = 0;
        } else {
          io_spec.output_type_ = TRITONSERVER_MEMORY_GPU;
          io_spec.output_type_id_ = raw_id;
        }
        break;
      }
      case 'h': {
        host_policy = optarg;
        host_policy_cstr = host_policy.c_str();
        break;
      }
      case 'r':
        model_repository_path = optarg;
        break;
      case 'm':
        model_name = optarg;
        break;
      case 'v':
        verbose_level = 1;
        break;
      case '?':
        Usage(argv);
        break;
    }
  }

  if (model_repository_path.empty()) {
    Usage(argv, "-r must be used to specify model repository path");
  }
  if (model_name.empty()) {
    Usage(argv, "-m must be used to specify model being test");
  }

  // Create the server...
  TRITONSERVER_ServerOptions* server_options = nullptr;
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsNew(&server_options),
      "creating server options");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetModelRepositoryPath(
          server_options, model_repository_path.c_str()),
      "setting model repository path");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetModelControlMode(
          server_options, TRITONSERVER_MODEL_CONTROL_EXPLICIT),
      "setting model control mode");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetStartupModel(
          server_options, model_name.c_str()),
      "setting model to load");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetLogVerbose(server_options, verbose_level),
      "setting verbose logging level");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetBackendDirectory(
          server_options, "/opt/tritonserver/backends"),
      "setting backend directory");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
          server_options, "/opt/tritonserver/repoagents"),
      "setting repository agent directory");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetStrictModelConfig(server_options, true),
      "setting strict model configuration");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability(
          server_options, TRITON_MIN_COMPUTE_CAPABILITY),
      "setting minimum supported CUDA compute capability");

  TRITONSERVER_Server* server_ptr = nullptr;
  FAIL_IF_ERR(
      TRITONSERVER_ServerNew(&server_ptr, server_options), "creating server");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsDelete(server_options),
      "deleting server options");

  std::shared_ptr<TRITONSERVER_Server> server(
      server_ptr, TRITONSERVER_ServerDelete);

  // Wait until the server is both live and ready.
  size_t health_iters = 0;
  while (true) {
    bool live, ready;
    FAIL_IF_ERR(
        TRITONSERVER_ServerIsLive(server.get(), &live),
        "unable to get server liveness");
    FAIL_IF_ERR(
        TRITONSERVER_ServerIsReady(server.get(), &ready),
        "unable to get server readiness");
    std::cout << "Server Health: live " << live << ", ready " << ready
              << std::endl;
    if (live && ready) {
      break;
    }

    if (++health_iters >= 10) {
      FAIL("failed to find healthy inference server");
    }

    std::this_thread::sleep_for(std::chrono::milliseconds(500));
  }

  // Print status of the server.
  {
    TRITONSERVER_Message* server_metadata_message;
    FAIL_IF_ERR(
        TRITONSERVER_ServerMetadata(server.get(), &server_metadata_message),
        "unable to get server metadata message");
    const char* buffer;
    size_t byte_size;
    FAIL_IF_ERR(
        TRITONSERVER_MessageSerializeToJson(
            server_metadata_message, &buffer, &byte_size),
        "unable to serialize server metadata message");

    std::cout << "Server Status:" << std::endl;
    std::cout << std::string(buffer, byte_size) << std::endl;

    FAIL_IF_ERR(
        TRITONSERVER_MessageDelete(server_metadata_message),
        "deleting status metadata");
  }

  // Wait for the model to become available.
  bool is_torch_model = false;
  TRITONSERVER_DataType dtype = TRITONSERVER_TYPE_INT32;
  bool is_ready = false;
  health_iters = 0;
  while (!is_ready) {
    FAIL_IF_ERR(
        TRITONSERVER_ServerModelIsReady(
            server.get(), model_name.c_str(), 1, &is_ready),
        "unable to get model readiness");
    if (!is_ready) {
      if (++health_iters >= 10) {
        FAIL("model failed to be ready in 10 iterations");
      }
      std::this_thread::sleep_for(std::chrono::milliseconds(500));
      continue;
    }

    TRITONSERVER_Message* model_metadata_message;
    FAIL_IF_ERR(
        TRITONSERVER_ServerModelMetadata(
            server.get(), model_name.c_str(), 1, &model_metadata_message),
        "unable to get model metadata message");
    const char* buffer;
    size_t byte_size;
    FAIL_IF_ERR(
        TRITONSERVER_MessageSerializeToJson(
            model_metadata_message, &buffer, &byte_size),
        "unable to serialize model status protobuf");

    rapidjson::Document model_metadata;
    model_metadata.Parse(buffer, byte_size);
    if (model_metadata.HasParseError()) {
      FAIL(
          "error: failed to parse model metadata from JSON: " +
          std::string(GetParseError_En(model_metadata.GetParseError())) +
          " at " + std::to_string(model_metadata.GetErrorOffset()));
    }

    FAIL_IF_ERR(
        TRITONSERVER_MessageDelete(model_metadata_message),
        "deleting status protobuf");

    if (strcmp(model_metadata["name"].GetString(), model_name.c_str())) {
      FAIL("unable to find metadata for model");
    }

    bool found_version = false;
    if (model_metadata.HasMember("versions")) {
      for (const auto& version : model_metadata["versions"].GetArray()) {
        if (strcmp(version.GetString(), "1") == 0) {
          found_version = true;
          break;
        }
      }
    }

    if (!found_version) {
      FAIL("unable to find version 1 status for model");
    }

    FAIL_IF_ERR(
        ParseModelConfig(model_metadata, &dtype, &is_torch_model),
        "parsing model metadata");
  }

  // Create the allocator that will be used to allocate buffers for
  // the result tensors.
  TRITONSERVER_ResponseAllocator* allocator = nullptr;
  FAIL_IF_ERR(
      TRITONSERVER_ResponseAllocatorNew(
          &allocator, ResponseAlloc, ResponseRelease, nullptr /* start_fn */),
      "creating response allocator");

  TRITONSERVER_InferenceRequest* irequest = nullptr;
  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestNew(
          &irequest, server.get(), model_name.c_str(), -1 /* model_version */),
      "creating inference request");

  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestSetId(irequest, "123"),
      "setting ID for the request");

  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestSetReleaseCallback(
          irequest, InferRequestComplete, nullptr /* request_release_userp */),
      "setting request release callback");

  // Create 0 data that shouldn't be selected and used to test host policy
  // functionality
  std::vector<uint32_t> zero_data(16);

  // Create the data for the two input tensors. Initialize the first
  // to unique integers and the second to all ones.
  std::vector<char> input0_data;
  std::vector<char> input1_data;
  if (dtype == TRITONSERVER_TYPE_INT32) {
    GenerateInputData<int32_t>(&input0_data, &input1_data);
  } else if (dtype == TRITONSERVER_TYPE_FP32) {
    GenerateInputData<float>(&input0_data, &input1_data);
  } else {
    GenerateStringInputData(&input0_data, &input1_data);
  }

  auto input0 = "INPUT0";
  auto input1 = "INPUT1";

  // Get the size of the input tensors
  size_t input0_size = input0_data.size();
  size_t input1_size = input1_data.size();

  std::vector<int64_t> input0_shape({1, 16});
  std::vector<int64_t> input1_shape({1, 16});

  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestAddInput(
          irequest, input0, dtype, &input0_shape[0], input0_shape.size()),
      "setting input 0 meta-data for the request");
  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestAddInput(
          irequest, input1, dtype, &input1_shape[0], input1_shape.size()),
      "setting input 1 meta-data for the request");

  auto output0 = is_torch_model ? "OUTPUT__0" : "OUTPUT0";
  auto output1 = is_torch_model ? "OUTPUT__1" : "OUTPUT1";

  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output0),
      "requesting output 0 for the request");
  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output1),
      "requesting output 1 for the request");

  const void* input0_base = &input0_data[0];
  const void* input1_base = &input1_data[0];
  bool gpu_input = (io_spec.input_type_ == TRITONSERVER_MEMORY_GPU);
  std::unique_ptr<void, decltype(gpu_data_deleter)> input0_gpu(
      nullptr, gpu_data_deleter);
  std::unique_ptr<void, decltype(gpu_data_deleter)> input1_gpu(
      nullptr, gpu_data_deleter);
  if (gpu_input) {
    FAIL_IF_CUDA_ERR(
        cudaSetDevice(io_spec.input_type_id_),
        "setting CUDA device to device " +
            std::to_string(io_spec.input_type_id_));
    void* dst;
    FAIL_IF_CUDA_ERR(
        cudaMalloc(&dst, input0_size), "allocating GPU memory for INPUT0 data");
    input0_gpu.reset(dst);
    FAIL_IF_CUDA_ERR(
        cudaMemcpy(dst, &input0_data[0], input0_size, cudaMemcpyHostToDevice),
        "setting INPUT0 data in GPU memory");
    FAIL_IF_CUDA_ERR(
        cudaMalloc(&dst, input1_size), "allocating GPU memory for INPUT1 data");
    input1_gpu.reset(dst);
    FAIL_IF_CUDA_ERR(
        cudaMemcpy(dst, &input1_data[0], input1_size, cudaMemcpyHostToDevice),
        "setting INPUT1 data in GPU memory");
  }

  input0_base = gpu_input ? input0_gpu.get() : &input0_data[0];
  input1_base = gpu_input ? input1_gpu.get() : &input1_data[0];


  if (host_policy_cstr == nullptr) {
    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestAppendInputData(
            irequest, input0, input0_base, input0_size, io_spec.input_type_,
            io_spec.input_type_id_),
        "assigning INPUT0 data");
    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestAppendInputData(
            irequest, input1, input1_base, input1_size, io_spec.input_type_,
            io_spec.input_type_id_),
        "assigning INPUT1 data");

    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy(
            irequest, input0, zero_data.data(),
            zero_data.size() * sizeof(uint32_t), TRITONSERVER_MEMORY_CPU, 0,
            "fake_host_policy_name"),
        "assigning zero INPUT0 data with host policy 'fake_host_policy_name'");
    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy(
            irequest, input1, zero_data.data(),
            zero_data.size() * sizeof(uint32_t), TRITONSERVER_MEMORY_CPU, 0,
            "fake_host_policy_name"),
        "assigning zero INPUT1 data with host policy 'fake_host_policy_name'");
  } else {
    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestAppendInputData(
            irequest, input0, zero_data.data(),
            zero_data.size() * sizeof(uint32_t), TRITONSERVER_MEMORY_CPU, 0),
        "assigning zero INPUT0 data");
    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestAppendInputData(
            irequest, input1, zero_data.data(),
            zero_data.size() * sizeof(uint32_t), TRITONSERVER_MEMORY_CPU, 0),
        "assigning zero INPUT1 data");
    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy(
            irequest, input0, input0_base, input0_size, io_spec.input_type_,
            io_spec.input_type_id_, host_policy_cstr),
        "assigning INPUT0 data to provided host policy");
    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy(
            irequest, input1, input1_base, input1_size, io_spec.input_type_,
            io_spec.input_type_id_, host_policy_cstr),
        "assigning INPUT1 data to provided host policy");
  }

  // Perform inference...
  auto p = new std::promise<TRITONSERVER_InferenceResponse*>();
  std::future<TRITONSERVER_InferenceResponse*> completed = p->get_future();

  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestSetResponseCallback(
          irequest, allocator, nullptr /* response_allocator_userp */,
          InferResponseComplete, reinterpret_cast<void*>(p)),
      "setting response callback");

  FAIL_IF_ERR(
      TRITONSERVER_ServerInferAsync(
          server.get(), irequest, nullptr /* trace */),
      "running inference");

  // Wait for the inference response and check the status.
  TRITONSERVER_InferenceResponse* response = completed.get();
  FAIL_IF_ERR(TRITONSERVER_InferenceResponseError(response), "response status");

  // Create the expected data for the two output tensors.
  std::vector<char> expected0_data;
  std::vector<char> expected1_data;
  if (dtype == TRITONSERVER_TYPE_BYTES) {
    GenerateStringOutputData(&expected0_data, &expected1_data);
  }

  // Check the output tensor values...
  // Note that depending on whether the backend supports outputs in GPU memory,
  // the output tensor may be in CPU memory even if -g flag is set.

  const void* output0_content;
  size_t output0_byte_size;
  TRITONSERVER_MemoryType output0_memory_type;
  int64_t output0_memory_type_id;
  {
    const char* cname;
    TRITONSERVER_DataType datatype;
    const int64_t* shape;
    uint64_t dim_count;
    void* userp;

    FAIL_IF_ERR(
        TRITONSERVER_InferenceResponseOutput(
            response, OutputIndex(response, output0), &cname, &datatype, &shape,
            &dim_count, &output0_content, &output0_byte_size,
            &output0_memory_type, &output0_memory_type_id, &userp),
        "getting output0 info");

    if (dtype == TRITONSERVER_TYPE_BYTES) {
      size_t expected0_size = expected0_data.size();
      if (expected0_size != output0_byte_size) {
        FAIL(
            "unexpected output0 byte-size, expected " +
            std::to_string(expected0_size) + ", got " +
            std::to_string(output0_byte_size));
      }
    } else if (output0_byte_size != input0_size) {
      FAIL(
          "unexpected output0 byte-size, expected " +
          std::to_string(input0_size) + ", got " +
          std::to_string(output0_byte_size));
    } else if (
        (io_spec.output_type_ != output0_memory_type) ||
        (io_spec.output_type_id_ != output0_memory_type_id)) {
      FAIL(
          std::string("unexpected output0 memory type (id), expected to be "
                      "allocated in ") +
          TRITONSERVER_MemoryTypeString(io_spec.output_type_) + " with id " +
          std::to_string(io_spec.output_type_id_) + ", got " +
          TRITONSERVER_MemoryTypeString(output0_memory_type) + " with id " +
          std::to_string(output0_memory_type_id));
    }
  }

  const void* output1_content;
  size_t output1_byte_size;
  TRITONSERVER_MemoryType output1_memory_type;
  int64_t output1_memory_type_id;
  {
    const char* cname;
    TRITONSERVER_DataType datatype;
    const int64_t* shape;
    uint64_t dim_count;
    void* userp;

    FAIL_IF_ERR(
        TRITONSERVER_InferenceResponseOutput(
            response, OutputIndex(response, output1), &cname, &datatype, &shape,
            &dim_count, &output1_content, &output1_byte_size,
            &output1_memory_type, &output1_memory_type_id, &userp),
        "getting output1 info");

    if (dtype == TRITONSERVER_TYPE_BYTES) {
      size_t expected1_size = expected1_data.size();
      if (expected1_size != output1_byte_size) {
        FAIL(
            "unexpected output1 byte-size, expected " +
            std::to_string(expected1_size) + ", got " +
            std::to_string(output1_byte_size));
      }
    } else if (output1_byte_size != input1_size) {
      FAIL(
          "unexpected output1 byte-size, expected " +
          std::to_string(input1_size) + ", got " +
          std::to_string(output1_byte_size));
    } else if (
        (io_spec.output_type_ != output1_memory_type) ||
        (io_spec.output_type_id_ != output1_memory_type_id)) {
      FAIL(
          std::string("unexpected output1 memory type (id), expected to be "
                      "allocated in ") +
          TRITONSERVER_MemoryTypeString(io_spec.output_type_) + " with id " +
          std::to_string(io_spec.output_type_id_) + ", got " +
          TRITONSERVER_MemoryTypeString(output1_memory_type) + " with id " +
          std::to_string(output1_memory_type_id));
    }
  }

  const void* output0_result = output0_content;
  const void* output1_result = output1_content;

  // Different from CPU memory, outputs in GPU memory must be copied to CPU
  // memory to be read directly.
  std::vector<char> output0_data(output0_byte_size);
  std::vector<char> output1_data(output1_byte_size);
  if (output0_memory_type == TRITONSERVER_MEMORY_CPU) {
    std::cout << "OUTPUT0 are stored in CPU memory" << std::endl;
  } else {
    std::cout << "OUTPUT0 are stored in GPU memory" << std::endl;
    FAIL_IF_CUDA_ERR(
        cudaMemcpy(
            &output0_data[0], output0_content, output0_byte_size,
            cudaMemcpyDeviceToHost),
        "setting INPUT0 data in GPU memory");
    output0_result = &output0_data[0];
  }

  if (output1_memory_type == TRITONSERVER_MEMORY_CPU) {
    std::cout << "OUTPUT1 are stored in CPU memory" << std::endl;
  } else {
    std::cout << "OUTPUT1 are stored in GPU memory" << std::endl;
    FAIL_IF_CUDA_ERR(
        cudaMemcpy(
            &output1_data[0], output1_content, output1_byte_size,
            cudaMemcpyDeviceToHost),
        "setting INPUT0 data in GPU memory");
    output1_result = &output1_data[0];
  }

  if (dtype == TRITONSERVER_TYPE_INT32) {
    CompareResult<int32_t>(
        output0, output1, &input0_data[0], &input1_data[0], output0_result,
        output1_result);
  } else if (dtype == TRITONSERVER_TYPE_FP32) {
    CompareResult<float>(
        output0, output1, &input0_data[0], &input1_data[0], output0_result,
        output1_result);
  } else {
    CompareStringResult(
        output0, output1, &input0_data[0], &input1_data[0], output0_result,
        output1_result);
  }

  FAIL_IF_ERR(
      TRITONSERVER_InferenceResponseDelete(response),
      "deleting inference response");

  FAIL_IF_ERR(
      TRITONSERVER_ResponseAllocatorDelete(allocator),
      "deleting response allocator");

  return 0;
}


================================================
FILE: src/multi_server.cc
================================================
// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include <rapidjson/document.h>
#include <rapidjson/error/en.h>
#include <unistd.h>

#include <chrono>
#include <cstring>
#include <future>
#include <iostream>
#include <string>
#include <thread>
#include <unordered_map>
#include <vector>

#include "common.h"
#include "triton/core/tritonserver.h"

#ifdef TRITON_ENABLE_GPU
#include <cuda_runtime_api.h>
#endif  // TRITON_ENABLE_GPU

namespace ni = triton::server;

namespace {

bool enforce_memory_type = false;
TRITONSERVER_MemoryType requested_memory_type;

#ifdef TRITON_ENABLE_GPU
static auto cuda_data_deleter = [](void* data) {
  if (data != nullptr) {
    cudaPointerAttributes attr;
    auto cuerr = cudaPointerGetAttributes(&attr, data);
    if (cuerr != cudaSuccess) {
      std::cerr << "error: failed to get CUDA pointer attribute of " << data
                << ": " << cudaGetErrorString(cuerr) << std::endl;
    }
    if (attr.type == cudaMemoryTypeDevice) {
      cuerr = cudaFree(data);
    } else if (attr.type == cudaMemoryTypeHost) {
      cuerr = cudaFreeHost(data);
    }
    if (cuerr != cudaSuccess) {
      std::cerr << "error: failed to release CUDA pointer " << data << ": "
                << cudaGetErrorString(cuerr) << std::endl;
    }
  }
};
#endif  // TRITON_ENABLE_GPU

void
Usage(char** argv, const std::string& msg = std::string())
{
  if (!msg.empty()) {
    std::cerr << msg << std::endl;
  }

  std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
  std::cerr << "\t-m <\"system\"|\"pinned\"|gpu>"
            << " Enforce the memory type for input and output tensors."
            << " If not specified, inputs will be in system memory and outputs"
            << " will be based on the model's preferred type." << std::endl;
  std::cerr << "\t-v Enable verbose logging" << std::endl;
  std::cerr << "\t-r [model repository absolute path]" << std::endl;
  std::cerr << "\t-t Thread count." << std::endl;
  std::cerr << "\t-l Number of loops to run in each thread." << std::endl;

  exit(1);
}

TRITONSERVER_Error*
ResponseAlloc(
    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
    int64_t preferred_memory_type_id, void* userp, void** buffer,
    void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type,
    int64_t* actual_memory_type_id)
{
  // Initially attempt to make the actual memory type and id that we
  // allocate be the same as preferred memory type
  *actual_memory_type = preferred_memory_type;
  *actual_memory_type_id = preferred_memory_type_id;

  // If 'byte_size' is zero just return 'buffer' == nullptr, we don't
  // need to do any other book-keeping.
  if (byte_size == 0) {
    *buffer = nullptr;
    *buffer_userp = nullptr;
    std::cout << "allocated " << byte_size << " bytes for result tensor "
              << tensor_name << std::endl;
  } else {
    void* allocated_ptr = nullptr;
    if (enforce_memory_type) {
      *actual_memory_type = requested_memory_type;
    }

    switch (*actual_memory_type) {
#ifdef TRITON_ENABLE_GPU
      case TRITONSERVER_MEMORY_CPU_PINNED: {
        auto err = cudaSetDevice(*actual_memory_type_id);
        if ((err != cudaSuccess) && (err != cudaErrorNoDevice) &&
            (err != cudaErrorInsufficientDriver)) {
          return TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_INTERNAL,
              std::string(
                  "unable to recover current CUDA device: " +
                  std::string(cudaGetErrorString(err)))
                  .c_str());
        }

        err = cudaHostAlloc(&allocated_ptr, byte_size, cudaHostAllocPortable);
        if (err != cudaSuccess) {
          return TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_INTERNAL,
              std::string(
                  "cudaHostAlloc failed: " +
                  std::string(cudaGetErrorString(err)))
                  .c_str());
        }
        break;
      }

      case TRITONSERVER_MEMORY_GPU: {
        auto err = cudaSetDevice(*actual_memory_type_id);
        if ((err != cudaSuccess) && (err != cudaErrorNoDevice) &&
            (err != cudaErrorInsufficientDriver)) {
          return TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_INTERNAL,
              std::string(
                  "unable to recover current CUDA device: " +
                  std::string(cudaGetErrorString(err)))
                  .c_str());
        }

        err = cudaMalloc(&allocated_ptr, byte_size);
        if (err != cudaSuccess) {
          return TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_INTERNAL,
              std::string(
                  "cudaMalloc failed: " + std::string(cudaGetErrorString(err)))
                  .c_str());
        }
        break;
      }
#endif  // TRITON_ENABLE_GPU

      // Use CPU memory if the requested memory type is unknown
      // (default case).
      case TRITONSERVER_MEMORY_CPU:
      default: {
        *actual_memory_type = TRITONSERVER_MEMORY_CPU;
        allocated_ptr = malloc(byte_size);
        break;
      }
    }

    // Pass the tensor name with buffer_userp so we can show it when
    // releasing the buffer.
    if (allocated_ptr != nullptr) {
      *buffer = allocated_ptr;
      *buffer_userp = new std::string(tensor_name);
      std::cout << "allocated " << byte_size << " bytes in "
                << TRITONSERVER_MemoryTypeString(*actual_memory_type)
                << " for result tensor " << tensor_name << std::endl;
    }
  }

  return nullptr;  // Success
}

TRITONSERVER_Error*
ResponseRelease(
    TRITONSERVER_ResponseAllocator* allocator, void* buffer, void* buffer_userp,
    size_t byte_size, TRITONSERVER_MemoryType memory_type,
    int64_t memory_type_id)
{
  std::string* name = nullptr;
  if (buffer_userp != nullptr) {
    name = reinterpret_cast<std::string*>(buffer_userp);
  } else {
    name = new std::string("<unknown>");
  }

  std::cout << "Releasing buffer " << buffer << " of size " << byte_size
            << " in " << TRITONSERVER_MemoryTypeString(memory_type)
            << " for result '" << *name << "'" << std::endl;
  switch (memory_type) {
    case TRITONSERVER_MEMORY_CPU:
      free(buffer);
      break;
#ifdef TRITON_ENABLE_GPU
    case TRITONSERVER_MEMORY_CPU_PINNED: {
      auto err = cudaSetDevice(memory_type_id);
      if (err == cudaSuccess) {
        err = cudaFreeHost(buffer);
      }
      if (err != cudaSuccess) {
        std::cerr << "error: failed to cudaFree " << buffer << ": "
                  << cudaGetErrorString(err) << std::endl;
      }
      break;
    }
    case TRITONSERVER_MEMORY_GPU: {
      auto err = cudaSetDevice(memory_type_id);
      if (err == cudaSuccess) {
        err = cudaFree(buffer);
      }
      if (err != cudaSuccess) {
        std::cerr << "error: failed to cudaFree " << buffer << ": "
                  << cudaGetErrorString(err) << std::endl;
      }
      break;
    }
#endif  // TRITON_ENABLE_GPU
    default:
      std::cerr << "error: unexpected buffer allocated in CUDA managed memory"
                << std::endl;
      break;
  }

  delete name;

  return nullptr;  // Success
}

void
InferRequestComplete(
    TRITONSERVER_InferenceRequest* request, const uint32_t flags, void* userp)
{
  // We reuse the request so we don't delete it here.
}

void
InferResponseComplete(
    TRITONSERVER_InferenceResponse* response, const uint32_t flags, void* userp)
{
  if (response != nullptr) {
    // Send 'response' to the future.
    std::promise<TRITONSERVER_InferenceResponse*>* p =
        reinterpret_cast<std::promise<TRITONSERVER_InferenceResponse*>*>(userp);
    p->set_value(response);
    delete p;
  }
}

TRITONSERVER_Error*
ParseModelMetadata(const rapidjson::Document& model_metadata)
{
  std::string seen_data_type;
  for (const auto& input : model_metadata["inputs"].GetArray()) {
    if (strcmp(input["datatype"].GetString(), "FP32")) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_UNSUPPORTED,
          "multi-server example only supports model with data type FP32");
    }
    if (seen_data_type.empty()) {
      seen_data_type = input["datatype"].GetString();
    } else if (strcmp(seen_data_type.c_str(), input["datatype"].GetString())) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          "the inputs and outputs of model must have the data type");
    }
  }
  for (const auto& output : model_metadata["outputs"].GetArray()) {
    if (strcmp(output["datatype"].GetString(), "FP32")) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_UNSUPPORTED,
          "multi-server example only supports model with data type FP32");
    } else if (strcmp(seen_data_type.c_str(), output["datatype"].GetString())) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          "the inputs and outputs of model must have the data type");
    }
  }

  return nullptr;
}

void
GenerateInputData(
    std::vector<float>* input0_data, std::vector<float>* input1_data)
{
  input0_data->resize(16);
  input1_data->resize(16);
  for (size_t i = 0; i < 16; ++i) {
    input0_data->data()[i] = i;
    input1_data->data()[i] = 1;
  }
}

void
CompareResult(
    const std::string& output0_name, const std::string& output1_name,
    const float* input0, const float* input1, const float* output0,
    const float* output1)
{
  for (size_t i = 0; i < 16; ++i) {
    std::cout << input0[i] << " + " << input1[i] << " = " << output0[i]
              << std::endl;
    std::cout << input0[i] << " - " << input1[i] << " = " << output1[i]
              << std::endl;

    if ((input0[i] + input1[i]) != output0[i]) {
      FAIL("incorrect sum in " + output0_name);
    }
    if ((input0[i] - input1[i]) != output1[i]) {
      FAIL("incorrect difference in " + output1_name);
    }
  }
}

void
Check(
    TRITONSERVER_InferenceResponse* response,
    const std::vector<float>& input0_data,
    const std::vector<float>& input1_data, const std::string& output0,
    const std::string& output1, const size_t expected_byte_size,
    const TRITONSERVER_DataType expected_datatype)
{
  std::unordered_map<std::string, std::vector<float>> output_data;

  uint32_t output_count;
  FAIL_IF_ERR(
      TRITONSERVER_InferenceResponseOutputCount(response, &output_count),
      "getting number of response outputs");
  if (output_count != 2) {
    FAIL("expecting 2 response outputs, got " + std::to_string(output_count));
  }

  for (uint32_t idx = 0; idx < output_count; ++idx) {
    const char* cname;
    TRITONSERVER_DataType datatype;
    const int64_t* shape;
    uint64_t dim_count;
    const void* base;
    size_t byte_size;
    TRITONSERVER_MemoryType memory_type;
    int64_t memory_type_id;
    void* userp;

    FAIL_IF_ERR(
        TRITONSERVER_InferenceResponseOutput(
            response, idx, &cname, &datatype, &shape, &dim_count, &base,
            &byte_size, &memory_type, &memory_type_id, &userp),
        "getting output info");

    if (cname == nullptr) {
      FAIL("unable to get output name");
    }

    std::string name(cname);
    if ((name != output0) && (name != output1)) {
      FAIL("unexpected output '" + name + "'");
    }

    if ((dim_count != 2) || (shape[0] != 1) || (shape[1] != 16)) {
      FAIL("unexpected shape for '" + name + "'");
    }

    if (datatype != expected_datatype) {
      FAIL(
          "unexpected datatype '" +
          std::string(TRITONSERVER_DataTypeString(datatype)) + "' for '" +
          name + "'");
    }

    if (byte_size != expected_byte_size) {
      FAIL(
          "unexpected byte-size, expected " +
          std::to_string(expected_byte_size) + ", got " +
          std::to_string(byte_size) + " for " + name);
    }

    if (enforce_memory_type && (memory_type != requested_memory_type)) {
      FAIL(
          "unexpected memory type, expected to be allocated in " +
          std::string(TRITONSERVER_MemoryTypeString(requested_memory_type)) +
          ", got " + std::string(TRITONSERVER_MemoryTypeString(memory_type)) +
          ", id " + std::to_string(memory_type_id) + " for " + name);
    }

    // We make a copy of the data here... which we could avoid for
    // performance reasons but ok for this example.
    std::vector<float>& odata = output_data[name];
    switch (memory_type) {
      case TRITONSERVER_MEMORY_CPU: {
        std::cout << name << " is stored in system memory" << std::endl;
        const float* cbase = reinterpret_cast<const float*>(base);
        odata.assign(cbase, cbase + (byte_size / sizeof(float)));
        break;
      }

      case TRITONSERVER_MEMORY_CPU_PINNED: {
        std::cout << name << " is stored in pinned memory" << std::endl;
        const float* cbase = reinterpret_cast<const float*>(base);
        odata.assign(cbase, cbase + (byte_size / sizeof(float)));
        break;
      }

#ifdef TRITON_ENABLE_GPU
      case TRITONSERVER_MEMORY_GPU: {
        std::cout << name << " is stored in GPU memory" << std::endl;
        odata.reserve(byte_size);
        FAIL_IF_CUDA_ERR(
            cudaMemcpy(&odata[0], base, byte_size, cudaMemcpyDeviceToHost),
            "getting " + name + " data from GPU memory");
        break;
      }
#endif

      default:
        FAIL("unexpected memory type");
    }
  }

  CompareResult(
      output0, output1, &input0_data[0], &input1_data[0],
      output_data[output0].data(), output_data[output1].data());
}

}  // namespace

void
SetServerOptions(
    TRITONSERVER_ServerOptions** server_options, bool verbose_level,
    std::string model_repository_path1, std::string model_repository_path2)
{
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsNew(server_options), "creating server options");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetModelRepositoryPath(
          *server_options, model_repository_path1.c_str()),
      "setting model repository path");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetModelRepositoryPath(
          *server_options, model_repository_path2.c_str()),
      "setting model repository path");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetLogVerbose(*server_options, verbose_level),
      "setting verbose logging level");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetMetrics(*server_options, true),
      "failed to enable metrics");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetStrictReadiness(*server_options, true),
      "failed to set strict readiness");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetStrictModelConfig(*server_options, true),
      "failed to set strict model config");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetModelControlMode(
          *server_options, TRITONSERVER_MODEL_CONTROL_EXPLICIT),
      "failed to set model control mode to explicit");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetBackendDirectory(
          *server_options, "/opt/tritonserver/backends"),
      "setting backend directory");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
          *server_options, "/opt/tritonserver/repoagents"),
      "setting repository agent directory");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetStrictModelConfig(*server_options, true),
      "setting strict model configuration");
#ifdef TRITON_ENABLE_GPU
  double min_compute_capability = TRITON_MIN_COMPUTE_CAPABILITY;
#else
  double min_compute_capability = 0;
#endif  // TRITON_ENABLE_GPU
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability(
          *server_options, min_compute_capability),
      "setting minimum supported CUDA compute capability");
}

void
CheckServerLiveAndReady(std::shared_ptr<TRITONSERVER_Server> server)
{
  size_t wait_seconds = 0;
  while (true) {
    bool live, ready;
    FAIL_IF_ERR(
        TRITONSERVER_ServerIsLive(server.get(), &live),
        "unable to get server liveness");
    FAIL_IF_ERR(
        TRITONSERVER_ServerIsReady(server.get(), &ready),
        "unable to get server readiness");
    std::cout << "Server Health: live " << live << ", ready " << ready
              << std::endl;
    if (live && ready) {
      break;
    }

    if (++wait_seconds >= 10) {
      FAIL("failed to find healthy inference server");
    }

    std::this_thread::sleep_for(std::chrono::milliseconds(1000));
  }
}

void
PrintServerStatus(std::shared_ptr<TRITONSERVER_Server> server)
{
  TRITONSERVER_Message* server_metadata_message;
  FAIL_IF_ERR(
      TRITONSERVER_ServerMetadata(server.get(), &server_metadata_message),
      "unable to get server metadata message");
  const char* buffer;
  size_t byte_size;
  FAIL_IF_ERR(
      TRITONSERVER_MessageSerializeToJson(
          server_metadata_message, &buffer, &byte_size),
      "unable to serialize server metadata message");

  std::cout << "Server Status:" << std::endl;
  std::cout << std::string(buffer, byte_size) << std::endl;

  FAIL_IF_ERR(
      TRITONSERVER_MessageDelete(server_metadata_message),
      "deleting status metadata");
}

void
AwaitModelReady(
    std::shared_ptr<TRITONSERVER_Server> server, const std::string model_name)
{
  bool is_ready = false;
  size_t wait_seconds = 0;
  while (!is_ready) {
    FAIL_IF_ERR(
        TRITONSERVER_ServerModelIsReady(
            server.get(), model_name.c_str(), 1, &is_ready),
        "unable to get model readiness");
    if (!is_ready) {
      if (++wait_seconds >= 5) {
        FAIL("model failed to be ready in 5 seconds");
      }
      std::this_thread::sleep_for(std::chrono::milliseconds(1000));
      continue;
    }

    TRITONSERVER_Message* model_metadata_message;
    FAIL_IF_ERR(
        TRITONSERVER_ServerModelMetadata(
            server.get(), model_name.c_str(), 1, &model_metadata_message),
        "unable to get model metadata message");
    const char* buffer;
    size_t byte_size;
    FAIL_IF_ERR(
        TRITONSERVER_MessageSerializeToJson(
            model_metadata_message, &buffer, &byte_size),
        "unable to serialize model status protobuf");

    rapidjson::Document model_metadata;
    model_metadata.Parse(buffer, byte_size);
    if (model_metadata.HasParseError()) {
      FAIL(
          "error: failed to parse model metadata from JSON: " +
          std::string(GetParseError_En(model_metadata.GetParseError())) +
          " at " + std::to_string(model_metadata.GetErrorOffset()));
    }

    FAIL_IF_ERR(
        TRITONSERVER_MessageDelete(model_metadata_message),
        "deleting status protobuf");

    if (strcmp(model_metadata["name"].GetString(), model_name.c_str())) {
      FAIL("unable to find metadata for model");
    }

    bool found_version = false;
    if (model_metadata.HasMember("versions")) {
      for (const auto& version : model_metadata["versions"].GetArray()) {
        if (strcmp(version.GetString(), "1") == 0) {
          found_version = true;
          break;
        }
      }
    }
    if (!found_version) {
      FAIL("unable to find version 1 status for model");
    }

    FAIL_IF_ERR(ParseModelMetadata(model_metadata), "parsing model metadata");
  }
}

void
RunInferenceAndValidate(
    std::shared_ptr<TRITONSERVER_Server> server,
    TRITONSERVER_ResponseAllocator* allocator, const std::string model_name)
{
  TRITONSERVER_InferenceRequest* irequest = nullptr;
  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestNew(
          &irequest, server.get(), model_name.c_str(), -1 /* model_version */),
      "creating inference request");

  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestSetId(irequest, "my_request_id"),
      "setting ID for the request");

  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestSetReleaseCallback(
          irequest, InferRequestComplete, nullptr /* request_release_userp */),
      "setting request release callback");

  // Inputs
  auto input0 = "INPUT0";
  auto input1 = "INPUT1";

  std::vector<int64_t> input0_shape({1, 16});
  std::vector<int64_t> input1_shape({1, 16});

  const TRITONSERVER_DataType datatype = TRITONSERVER_TYPE_FP32;

  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestAddInput(
          irequest, input0, datatype, &input0_shape[0], input0_shape.size()),
      "setting input 0 meta-data for the request");
  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestAddInput(
          irequest, input1, datatype, &input1_shape[0], input1_shape.size()),
      "setting input 1 meta-data for the request");

  auto output0 = "OUTPUT0";
  auto output1 = "OUTPUT1";

  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output0),
      "requesting output 0 for the request");
  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output1),
      "requesting output 1 for the request");

  // Create the data for the two input tensors. Initialize the first
  // to unique values and the second to all ones.
  std::vector<float> input0_data;
  std::vector<float> input1_data;
  GenerateInputData(&input0_data, &input1_data);

  size_t input0_size = input0_data.size() * 4;
  size_t input1_size = input1_data.size() * 4;

  const void* input0_base = &input0_data[0];
  const void* input1_base = &input1_data[0];
#ifdef TRITON_ENABLE_GPU
  std::unique_ptr<void, decltype(cuda_data_deleter)> input0_gpu(
      nullptr, cuda_data_deleter);
  std::unique_ptr<void, decltype(cuda_data_deleter)> input1_gpu(
      nullptr, cuda_data_deleter);
  bool use_cuda_memory =
      (enforce_memory_type &&
       (requested_memory_type != TRITONSERVER_MEMORY_CPU));
  if (use_cuda_memory) {
    FAIL_IF_CUDA_ERR(cudaSetDevice(0), "setting CUDA device to device 0");
    if (requested_memory_type != TRITONSERVER_MEMORY_CPU_PINNED) {
      void* dst;
      FAIL_IF_CUDA_ERR(
          cudaMalloc(&dst, input0_size),
          "allocating GPU memory for INPUT0 data");
      input0_gpu.reset(dst);
      FAIL_IF_CUDA_ERR(
          cudaMemcpy(dst, &input0_data[0], input0_size, cudaMemcpyHostToDevice),
          "setting INPUT0 data in GPU memory");
      FAIL_IF_CUDA_ERR(
          cudaMalloc(&dst, input1_size),
          "allocating GPU memory for INPUT1 data");
      input1_gpu.reset(dst);
      FAIL_IF_CUDA_ERR(
          cudaMemcpy(dst, &input1_data[0], input1_size, cudaMemcpyHostToDevice),
          "setting INPUT1 data in GPU memory");
    } else {
      void* dst;
      FAIL_IF_CUDA_ERR(
          cudaHostAlloc(&dst, input0_size, cudaHostAllocPortable),
          "allocating pinned memory for INPUT0 data");
      input0_gpu.reset(dst);
      FAIL_IF_CUDA_ERR(
          cudaMemcpy(dst, &input0_data[0], input0_size, cudaMemcpyHostToHost),
          "setting INPUT0 data in pinned memory");
      FAIL_IF_CUDA_ERR(
          cudaHostAlloc(&dst, input1_size, cudaHostAllocPortable),
          "allocating pinned memory for INPUT1 data");
      input1_gpu.reset(dst);
      FAIL_IF_CUDA_ERR(
          cudaMemcpy(dst, &input1_data[0], input1_size, cudaMemcpyHostToHost),
          "setting INPUT1 data in pinned memory");
    }
  }

  input0_base = use_cuda_memory ? input0_gpu.get() : &input0_data[0];
  input1_base = use_cuda_memory ? input1_gpu.get() : &input1_data[0];
#endif  // TRITON_ENABLE_GPU

  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestAppendInputData(
          irequest, input0, input0_base, input0_size, requested_memory_type,
          0 /* memory_type_id */),
      "assigning INPUT0 data");
  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestAppendInputData(
          irequest, input1, input1_base, input1_size, requested_memory_type,
          0 /* memory_type_id */),
      "assigning INPUT1 data");

  // Perform inference...
  {
    auto p = new std::promise<TRITONSERVER_InferenceResponse*>();
    std::future<TRITONSERVER_InferenceResponse*> completed = p->get_future();

    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestSetResponseCallback(
            irequest, allocator, nullptr /* response_allocator_userp */,
            InferResponseComplete, reinterpret_cast<void*>(p)),
        "setting response callback");

    FAIL_IF_ERR(
        TRITONSERVER_ServerInferAsync(
            server.get(), irequest, nullptr /* trace */),
        "running inference");

    // Wait for the inference to complete.
    TRITONSERVER_InferenceResponse* completed_response = completed.get();

    FAIL_IF_ERR(
        TRITONSERVER_InferenceResponseError(completed_response),
        "response status");

    Check(
        completed_response, input0_data, input1_data, output0, output1,
        input0_size, datatype);

    FAIL_IF_ERR(
        TRITONSERVER_InferenceResponseDelete(completed_response),
        "deleting inference response");
  }

  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestDelete(irequest),
      "deleting inference request");
}

void
PrintModelStats(
    std::shared_ptr<TRITONSERVER_Server> server, const std::string model_name)
{
  TRITONSERVER_Message* model_stats_message = nullptr;

  FAIL_IF_ERR(
      TRITONSERVER_ServerModelStatistics(
          server.get(), model_name.c_str(), -1 /* model_version */,
          &model_stats_message),
      "unable to get model stats message");
  const char* buffer;
  size_t byte_size;
  FAIL_IF_ERR(
      TRITONSERVER_MessageSerializeToJson(
          model_stats_message, &buffer, &byte_size),
      "unable to serialize server metadata message");

  std::cout << "Model '" << model_name << "' Stats:" << std::endl;
  std::cout << std::string(buffer, byte_size) << std::endl;

  FAIL_IF_ERR(
      TRITONSERVER_MessageDelete(model_stats_message),
      "deleting model stats message");
}

void
CreateAndRunTritonserverInstance(
    std::vector<std::string> model_repository_paths, size_t thread_id,
    bool verbose_level)
{
  TRITONSERVER_ServerOptions* server_options = nullptr;

  SetServerOptions(
      &server_options, verbose_level, model_repository_paths[0],
      model_repository_paths[thread_id]);

  TRITONSERVER_Server* server_ptr = nullptr;
  FAIL_IF_ERR(
      TRITONSERVER_ServerNew(&server_ptr, server_options),
      "creating server instance no. " + std::to_string(thread_id));
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsDelete(server_options),
      "deleting server options");

  std::shared_ptr<TRITONSERVER_Server> server(
      server_ptr, TRITONSERVER_ServerDelete);

  // Wait and until the servers are both live and ready.
  CheckServerLiveAndReady(server);

  // Print status of the servers.
  PrintServerStatus(server);
  std::string model1 = "simple1",
              model2 = "simple" + std::to_string(thread_id + 1);

  // Load models in server.
  FAIL_IF_ERR(
      TRITONSERVER_ServerLoadModel(server.get(), model1.c_str()),
      "failed to load model");
  FAIL_IF_ERR(
      TRITONSERVER_ServerLoadModel(server.get(), model2.c_str()),
      "failed to load model");

  // Wait for the models to become available.
  AwaitModelReady(server, model1.c_str());
  AwaitModelReady(server, model2.c_str());

  // Create the allocator that will be used to allocate buffers for
  // the result tensors.
  TRITONSERVER_ResponseAllocator* allocator = nullptr;
  FAIL_IF_ERR(
      TRITONSERVER_ResponseAllocatorNew(
          &allocator, ResponseAlloc, ResponseRelease, nullptr /* start_fn */),
      "creating response allocator");

  // Inference
  RunInferenceAndValidate(server, allocator, model1.c_str());
  RunInferenceAndValidate(server, allocator, model2.c_str());

  FAIL_IF_ERR(
      TRITONSERVER_ResponseAllocatorDelete(allocator),
      "deleting response allocator");

  // Print Model Statistics for all models
  PrintModelStats(server, model1.c_str());
  PrintModelStats(server, model2.c_str());

  // Unload models in both servers.
  FAIL_IF_ERR(
      TRITONSERVER_ServerUnloadModel(server.get(), model1.c_str()),
      "failed to unload model");
  FAIL_IF_ERR(
      TRITONSERVER_ServerUnloadModel(server.get(), model2.c_str()),
      "failed to unload model");

  std::string wrong_model;
  if (thread_id == (model_repository_paths.size() + 1)) {
    wrong_model = "simple2";
  } else {
    wrong_model = "simple" + std::to_string(thread_id + 2);
  }

  // Try to load wrong model. Expected to fail
  TRITONSERVER_Error* err =
      TRITONSERVER_ServerLoadModel(server.get(), wrong_model.c_str());
  if (err == nullptr) {
    FAIL("Success when expected to failed to load wrong model");
  }
}

static volatile std::atomic<int> counter(0);
static std::mutex mutex;
std::condition_variable cv;

void
RepeatedlyCreateAndRunInstance(
    std::vector<std::string> model_repository_paths, size_t thread_id,
    size_t loops, bool verbose_level)
{
  std::unique_lock<std::mutex> lock(mutex);
  counter++;
  cv.wait(lock);

  for (size_t i = 0; i < loops; i++) {
    CreateAndRunTritonserverInstance(
        model_repository_paths, thread_id, verbose_level);
  }
}

int
main(int argc, char** argv)
{
  std::vector<std::string> model_repository_paths;
  int verbose_level = 0;
  int thread_count = 2;
  int loops = 1;

  // Parse commandline...
  int opt;
  while ((opt = getopt(argc, argv, "vm:r:t:l:")) != -1) {
    switch (opt) {
      case 'm': {
        enforce_memory_type = true;
        if (!strcmp(optarg, "system")) {
          requested_memory_type = TRITONSERVER_MEMORY_CPU;
        } else if (!strcmp(optarg, "pinned")) {
          requested_memory_type = TRITONSERVER_MEMORY_CPU_PINNED;
        } else if (!strcmp(optarg, "gpu")) {
          requested_memory_type = TRITONSERVER_MEMORY_GPU;
        } else {
          Usage(
              argv,
              "-m must be used to specify one of the following types:"
              " <\"system\"|\"pinned\"|gpu>");
        }
        break;
      }
      case 'r':
        model_repository_paths.push_back(optarg);
        break;
      case 't':
        thread_count = std::stoi(optarg);
        break;
      case 'l':
        loops = std::stoi(optarg);
        break;
      case 'v':
        verbose_level = 1;
        break;
      case '?':
        Usage(argv);
        break;
    }
  }

  if ((thread_count < 1) && (loops < 1)) {
    Usage(argv, "thread_count and loops must be > 1");
  }

  // model repository paths must be 'thread_count' + 1
  if (int(model_repository_paths.size() - 1) != thread_count) {
    Usage(
        argv, "-r must be used to specify " + std::to_string(thread_count + 1) +
                  " model repository paths, " + std::to_string(thread_count) +
                  " unique paths and 1 common");
  }

  for (const auto& repo_path : model_repository_paths) {
    if (repo_path.empty()) {
      Usage(argv, "model repository paths must not be empty");
    }
  }
#ifndef TRITON_ENABLE_GPU
  if (enforce_memory_type && requested_memory_type != TRITONSERVER_MEMORY_CPU) {
    Usage(argv, "-m can only be set to \"system\" without enabling GPU");
  }
#endif  // TRITON_ENABLE_GPU

  // Check API version.
  uint32_t api_version_major, api_version_minor;
  FAIL_IF_ERR(
      TRITONSERVER_ApiVersion(&api_version_major, &api_version_minor),
      "getting Triton API version");
  if ((TRITONSERVER_API_VERSION_MAJOR != api_version_major) ||
      (TRITONSERVER_API_VERSION_MINOR > api_version_minor)) {
    FAIL("triton server API version mismatch");
  }

  // Create 'thread_count' number of instances of the server with 1 common and 1
  // unique repo each
  std::thread tritonservers[thread_count];
  for (int i = 0; i < thread_count; i++) {
    tritonservers[i] = std::thread(
        &RepeatedlyCreateAndRunInstance, model_repository_paths, size_t(i + 1),
        size_t(loops), verbose_level);
  }
  while (counter < thread_count) {
    usleep(50);
  }

  {
    std::unique_lock<std::mutex> lock(mutex);
    cv.notify_all();
  }

  for (int i = 0; i < thread_count; ++i) {
    tritonservers[i].join();
  }

  return 0;
}


================================================
FILE: src/orca_http.cc
================================================
// Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "orca_http.h"

void
SetEndpointLoadMetricsHeader(
    evhtp_request_t* req, const char* orca_metric_format,
    TRITONSERVER_Server* server)
{
  const std::string orca_type = orca_metric_format;
  TRITONSERVER_Metrics* metrics = nullptr;
  TRITONSERVER_Error* err = TRITONSERVER_ServerMetrics(server, &metrics);
  if (err == nullptr) {
    const char* base;
    size_t byte_size;
    err = TRITONSERVER_MetricsFormatted(
        metrics, TRITONSERVER_METRIC_PROMETHEUS, &base, &byte_size);
    if (err == nullptr) {
      std::string formatted_metrics(base, byte_size);
      // Extract the KV utilization metrics from the Prometheus formatted
      // string.
      std::string extracted_kv_metrics =
          ExtractKVMetrics(formatted_metrics, orca_type);
      if (!extracted_kv_metrics.empty()) {
        evhtp_headers_add_header(
            req->headers_out, evhtp_header_new(
                                  ENDPOINT_LOAD_METRICS_NAME,
                                  extracted_kv_metrics.c_str(), 1, 1));
      } else {
        LOG_ERROR << "ENDPOINT_LOAD_METRICS_TYPE request header is set but "
                     "extracted_kv_metrics is "
                     "empty, no header written. orca_type="
                  << orca_type;
      }
    }
  } else {
    // Handle potential errors
    LOG_ERROR << "Failed to get KV metrics: " << TRITONSERVER_ErrorMessage(err);
    TRITONSERVER_ErrorDelete(err);
  }
  TRITONSERVER_MetricsDelete(metrics);
}

std::vector<PromMetric>
MetricFamilyExtractor(const std::string& input, const std::string& metricFamily)
{
  std::vector<PromMetric> metrics;
  // Construct the regex pattern using the provided metricFamily.

  // `labelGroup` is a capturing group that captures all characters within curly
  // braces, excluding line breaks.
  std::string labelGroup = "(?:{(.*?)})";

  // `valueGroup` is a capturing group that captures a number with its
  // decimals if any.
  std::string valueGroup = R"((\d+(?:\.\d+)?))";

  // `patternStr` matches on lines starting with `metricFamily` then captures
  // its labels if any, then (optionally) matches any whitespace, then captures
  // its numeric double value.
  //
  // For example, `patternStr` would match on input:
  // `nv_trt_llm_kv_cache_block_metrics{kv_cache_block_type="used",model="tensorrt_llm",version="1"}
  // 3`
  //
  // with 2 capturing groups:
  // 1. `kv_cache_block_type="used",model="tensorrt_llm",version="1"`
  // 2. `3`
  std::string patternStr = metricFamily + labelGroup + R"(?\s*)" + valueGroup;
  re2::RE2 pattern(patternStr);
  re2::StringPiece inputPiece(input);

  std::string labelString;
  std::string metric_value;

  while (re2::RE2::FindAndConsume(
      &inputPiece, pattern, &labelString, &metric_value)) {
    PromMetric metric;

    // Extract labels if they exist
    if (!labelString.empty()) {
      // `labelPattern` captures any alphanumeric sequence that precedes an '='
      // character, then captures the following quoted character sequence. These
      // groups are exahstive given the prometheus data model:
      // https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
      //
      // For example, calling FindAndConsume() with `labelPattern` on input:
      // `kv_cache_block_type="used",model="tensorrt_llm",version="1"`
      //
      // matches 3 times with 2 capturing groups each:
      //
      // Match #1
      // 1. `kv_cache_block_type`
      // 2. `used`
      //
      // Match #2
      // 1. `model`
      // 2. `tensorrt_llm`
      //
      // Match #3
      // 1. `version`
      // 2. `1`
      re2::RE2 labelPattern(R"((\w+)=\"([^\"]*)\")");
      re2::StringPiece labelPiece(labelString);
      std::string key, value;
      while (
          re2::RE2::FindAndConsume(&labelPiece, labelPattern, &key, &value)) {
        // Populate the metric's labels map
        metric.labels[key] = value;
      }
    }

    // Assign the metric its value and add it to the family list
    metric.value = stod(metric_value);
    metrics.push_back(metric);
  }

  return metrics;
}

std::string
ExtractKVMetrics(
    const std::string& prometheus_metrics, const std::string& orca_type)
{
  std::string metric_family = KV_CACHE_BLOCK_METRICS_FAMILY;
  std::vector<PromMetric> kv_cache_metrics =
      MetricFamilyExtractor(prometheus_metrics, metric_family);

  double tokens_per_block = -1;
  double used_blocks = -1;
  double max_blocks = -1;

  for (const auto& metric : kv_cache_metrics) {
    if (metric.labels.count(KV_CACHE_BLOCK_TYPE) > 0) {
      std::string type = metric.labels.at(KV_CACHE_BLOCK_TYPE);
      if (type == KV_CACHE_BLOCK_TYPE_TOKENS_PER) {
        tokens_per_block = metric.value;
      } else if (type == KV_CACHE_BLOCK_TYPE_USED) {
        used_blocks = metric.value;
      } else if (type == KV_CACHE_BLOCK_TYPE_MAX) {
        max_blocks = metric.value;
      }
    }
  }

  // Return early if not all kv metrics are found and set.
  if (tokens_per_block < 0 || used_blocks < 0 || max_blocks < 0) {
    LOG_ERROR << "One or more of the kv metrics was not found or invalid.";
    return "";
  }

  // Calculate derived metrics
  double kv_cache_utilization = 0;
  if (max_blocks > 0) {
    kv_cache_utilization = used_blocks / max_blocks;
  }
  double max_token_capacity = max_blocks * tokens_per_block;

  std::unordered_map<std::string, double>
      metrics;  // metrics vector to pass down
  metrics[KV_CACHE_UTIL_KEY] = kv_cache_utilization;
  metrics[MAX_TOKEN_CAPACITY_KEY] = max_token_capacity;

  return OrcaKVMetricHeader(orca_type, metrics);
}

std::string
OrcaKVMetricHeader(
    const std::string& orca_type,
    std::unordered_map<std::string, double> metrics)
{
  // Logic to construct and format response header
  std::string header_contents = "";
  const std::string named_metrics_key = NAMED_METRICS;
  const std::string kv_util_key = KV_CACHE_UTIL_KEY;
  const std::string max_token_key = MAX_TOKEN_CAPACITY_KEY;

  if (orca_type == "json") {
    // Format the metrics according to the ORCA protocol as JSON.
    triton::common::TritonJson::Value orca_metrics(
        triton::common::TritonJson::ValueType::OBJECT);
    triton::common::TritonJson::Value named_metrics(
        orca_metrics, triton::common::TritonJson::ValueType::OBJECT);

    named_metrics.AddDouble(kv_util_key.c_str(), metrics[kv_util_key]);
    named_metrics.AddUInt(max_token_key.c_str(), metrics[max_token_key]);
    orca_metrics.Add(named_metrics_key.c_str(), std::move(named_metrics));

    triton::common::TritonJson::WriteBuffer buffer;
    orca_metrics.Write(&buffer);
    header_contents = std::string("JSON ") + buffer.Contents();

  } else if (orca_type == "text") {
    // Format the metrics according to the ORCA protocol as Native HTTP
    // (comma separated list).
    const std::string prefix = named_metrics_key + ".";

    header_contents = "TEXT ";
    header_contents += prefix + kv_util_key + "=" +
                       std::to_string(metrics[kv_util_key]) + ", ";
    header_contents +=
        prefix + max_token_key + "=" +
        std::to_string(static_cast<uint64_t>(metrics[max_token_key]));
  } else {
    LOG_ERROR << "orca_type is set to an invalid type: " << orca_type;
  }

  return header_contents;
}


================================================
FILE: src/orca_http.h
================================================
// Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once

#include <string>
#include <unordered_map>
#include <vector>

#include "http_server.h"

#define ENDPOINT_LOAD_METRICS_TYPE "endpoint-load-metrics-format"
#define ENDPOINT_LOAD_METRICS_NAME "endpoint-load-metrics"
#define KV_CACHE_BLOCK_METRICS_FAMILY "nv_trt_llm_kv_cache_block_metrics"
#define KV_CACHE_BLOCK_TYPE "kv_cache_block_type"
#define KV_CACHE_BLOCK_TYPE_TOKENS_PER "tokens_per"
#define KV_CACHE_BLOCK_TYPE_USED "used"
#define KV_CACHE_BLOCK_TYPE_MAX "max"
#define KV_CACHE_UTIL_KEY "kv_cache_utilization"
#define MAX_TOKEN_CAPACITY_KEY "max_token_capacity"
#define NAMED_METRICS "named_metrics"

struct PromMetric {
  std::unordered_map<std::string, std::string> labels;
  double value;
};

// function with logic to pull the KV-cache metrics for the inference
// response header
void SetEndpointLoadMetricsHeader(
    evhtp_request_t* req, const char* orca_metric_format,
    TRITONSERVER_Server* server);
// Helper function to get the KV-cache utilization metrics for the
// inference response header
std::string ExtractKVMetrics(
    const std::string& prometheus_metrics, const std::string& orca_type);
// Generates a metric struct for a given family with a map of labels and a
// value
std::vector<PromMetric> MetricFamilyExtractor(
    const std::string& input, const std::string& metricFamily);
// Creates a header string in the the proper reporting format for provided
// KV-cache metrics.
std::string OrcaKVMetricHeader(
    const std::string& reporting_format,
    const std::unordered_map<std::string, double> metrics);


================================================
FILE: src/python/CMakeLists.txt
================================================
# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

cmake_minimum_required(VERSION 3.31.8)

add_subdirectory(tritonfrontend)

file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/TRITON_VERSION ${TRITON_VERSION})
configure_file(../../LICENSE LICENSE.txt COPYONLY)
configure_file(setup.py setup.py @ONLY)

set(WHEEL_DEPENDS
      ${CMAKE_CURRENT_BINARY_DIR}/TRITON_VERSION
      ${CMAKE_CURRENT_BINARY_DIR}/LICENSE.txt
      ${CMAKE_CURRENT_BINARY_DIR}/setup.py
      ${CMAKE_CURRENT_BINARY_DIR}/tritonfrontend
      py-bindings
)

set(wheel_stamp_file "stamp.whl")

add_custom_command(
  OUTPUT "${wheel_stamp_file}"
  COMMAND python3
  ARGS
    "${CMAKE_CURRENT_SOURCE_DIR}/build_wheel.py"
    --dest-dir "${CMAKE_CURRENT_BINARY_DIR}/generic"
    --binding-path $<TARGET_FILE:py-bindings>
  DEPENDS ${WHEEL_DEPENDS}
)

add_custom_target(
  frontend-server-wheel ALL
  DEPENDS
    "${wheel_stamp_file}"
)


# Wheel
set(WHEEL_OUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/generic/wheel/dist/")
install(
  DIRECTORY
  ${WHEEL_OUT_DIR}
  DESTINATION "${CMAKE_INSTALL_PREFIX}/python"
)

================================================
FILE: src/python/build_wheel.py
================================================
#!/usr/bin/env python3
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import os
import pathlib
import re
import shutil
import subprocess
import sys
from distutils.dir_util import copy_tree
from tempfile import mkstemp


def fail_if(p, msg):
    if p:
        print("error: {}".format(msg), file=sys.stderr)
        sys.exit(1)


def mkdir(path):
    pathlib.Path(path).mkdir(parents=True, exist_ok=True)


def touch(path):
    pathlib.Path(path).touch()


def cpdir(src, dest):
    copy_tree(src, dest, preserve_symlinks=1)


def sed(pattern, replace, source, dest=None):
    name = None
    if dest:
        name = dest
    if dest is None:
        fd, name = mkstemp()

    with open(source, "r") as fin, open(name, "w") as fout:
        for line in fin:
            out = re.sub(pattern, replace, line)
            fout.write(out)

    if not dest:
        shutil.copyfile(name, source)


def main():
    parser = argparse.ArgumentParser()

    parser.add_argument(
        "--dest-dir", type=str, required=True, help="Destination directory."
    )
    parser.add_argument(
        "--binding-path",
        type=str,
        required=True,
        help="Path to Triton Frontend Python binding.",
    )

    FLAGS = parser.parse_args()

    FLAGS.triton_version = None
    with open("TRITON_VERSION", "r") as vfile:
        FLAGS.triton_version = vfile.readline().strip()

    FLAGS.whl_dir = os.path.join(FLAGS.dest_dir, "wheel")

    print("=== Building in: {}".format(os.getcwd()))
    print("=== Using builddir: {}".format(FLAGS.whl_dir))
    print("Adding package files")
    mkdir(os.path.join(FLAGS.whl_dir, "tritonfrontend"))
    shutil.copy(
        "tritonfrontend/__init__.py", os.path.join(FLAGS.whl_dir, "tritonfrontend")
    )
    # Type checking marker file indicating support for type checkers.
    # https://peps.python.org/pep-0561/
    shutil.copy(
        "tritonfrontend/py.typed", os.path.join(FLAGS.whl_dir, "tritonfrontend")
    )
    cpdir("tritonfrontend/_c", os.path.join(FLAGS.whl_dir, "tritonfrontend", "_c"))
    cpdir("tritonfrontend/_api", os.path.join(FLAGS.whl_dir, "tritonfrontend", "_api"))
    PYBIND_LIB = os.path.basename(FLAGS.binding_path)
    shutil.copyfile(
        FLAGS.binding_path,
        os.path.join(FLAGS.whl_dir, "tritonfrontend", "_c", PYBIND_LIB),
    )

    shutil.copyfile("LICENSE.txt", os.path.join(FLAGS.whl_dir, "LICENSE.txt"))
    shutil.copyfile("setup.py", os.path.join(FLAGS.whl_dir, "setup.py"))

    os.chdir(FLAGS.whl_dir)
    print("=== Building wheel")
    args = ["python3", "setup.py", "bdist_wheel"]

    wenv = os.environ.copy()
    wenv["VERSION"] = FLAGS.triton_version
    wenv["TRITON_PYBIND"] = PYBIND_LIB
    p = subprocess.Popen(args, env=wenv)
    p.wait()
    fail_if(p.returncode != 0, "setup.py failed")

    cpdir("dist", FLAGS.dest_dir)

    print(f"=== Output wheel file is in: {FLAGS.dest_dir}")
    touch(os.path.join(FLAGS.dest_dir, "stamp.whl"))


if __name__ == "__main__":
    main()


================================================
FILE: src/python/examples/example.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import pathlib

import numpy as np
import tritonclient.http as httpclient
import tritonserver
from tritonfrontend import KServeHttp


def main():
    # Constructing path to Model Repository
    model_path = f"{pathlib.Path(__file__).parent.resolve()}/example_model_repository"
    # Selecting Server Options
    server_options = tritonserver.Options(
        server_id="ExampleServer",
        model_repository=model_path,
        log_error=True,
        log_info=True,
        log_warn=True,
    )

    # Creating server instance
    server = tritonserver.Server(server_options).start(wait_until_ready=True)

    # Selecting Options for KServeHttp Frontend
    http_options = KServeHttp.Options(port=8005)

    # or http_service = KServeHttp.Server(server, http_options) & http_service.stop()
    with KServeHttp(server, http_options) as http_service:
        # The identity model returns an exact duplicate of the input data as output
        model_name = "identity"
        url = "localhost:8005"

        # Create a Triton client
        client = httpclient.InferenceServerClient(url=url)

        # Prepare input data
        input_data = np.array([["Roger Roger"]], dtype=object)

        # Create input and output objects
        inputs = [httpclient.InferInput("INPUT0", input_data.shape, "BYTES")]

        # Set the data for the input tensor
        inputs[0].set_data_from_numpy(input_data)

        results = client.infer(model_name, inputs=inputs)

        # Get the output data
        output_data = results.as_numpy("OUTPUT0")

        print("--------------------- INFERENCE RESULTS ---------------------")
        print("Output data:", output_data)
        print("-------------------------------------------------------------")

    server.stop()


if __name__ == "__main__":
    main()


================================================
FILE: src/python/examples/example_model_repository/identity/config.pbtxt
================================================

# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "identity"
platform: "onnxruntime_onnx"
max_batch_size: 8
version_policy: { latest { num_versions: 1 }}


input [
  {
    name: "INPUT0"
    data_type: TYPE_STRING
    dims: [ -1 ]

  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_STRING
    dims: [ -1 ]


  }
]


================================================
FILE: src/python/setup.py
================================================
#!/usr/bin/env python3
# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import sys

from setuptools import find_packages, setup

if "--plat-name" in sys.argv:
    PLATFORM_FLAG = sys.argv[sys.argv.index("--plat-name") + 1]
else:
    PLATFORM_FLAG = "any"

if "VERSION" not in os.environ:
    raise Exception("envvar VERSION must be specified")

VERSION = os.environ["VERSION"]

try:
    from wheel.bdist_wheel import bdist_wheel as _bdist_wheel

    class bdist_wheel(_bdist_wheel):
        def finalize_options(self):
            _bdist_wheel.finalize_options(self)
            self.root_is_pure = False

        def get_tag(self):
            pyver, abi, plat = "py3", "none", PLATFORM_FLAG
            return pyver, abi, plat

except ImportError:
    bdist_wheel = None

this_directory = os.path.abspath(os.path.dirname(__file__))

data_files = [
    ("", ["LICENSE.txt"]),
]

# Type checking marker file indicating support for type checkers.
# https://peps.python.org/pep-0561/
# Type hints for c extension generated by mypy
platform_package_data = [
    os.environ["TRITON_PYBIND"],
    "py.typed",
    "_c/__init__.pyi",
    "_c/triton_bindings.pyi",
]

gpu_extras = ["cupy-cuda13x"]
test_extras = ["pytest"]
all_extras = gpu_extras + test_extras

setup(
    name="tritonfrontend",
    version=VERSION,
    author="NVIDIA Inc.",
    author_email="sw-dl-triton@nvidia.com",
    description="Triton Inference Server In-Process Python API",
    license="BSD",
    url="https://developer.nvidia.com/nvidia-triton-inference-server",
    classifiers=[
        "Development Status :: 5 - Production/Stable",
        "Intended Audience :: Developers",
        "Intended Audience :: Science/Research",
        "Intended Audience :: Information Technology",
        "Topic :: Scientific/Engineering",
        "Topic :: Scientific/Engineering :: Image Recognition",
        "Topic :: Scientific/Engineering :: Artificial Intelligence",
        "Topic :: Software Development :: Libraries",
        "Topic :: Utilities",
        "License :: OSI Approved :: BSD License",
        "Programming Language :: Python :: 3",
        "Programming Language :: Python :: 3.10",
        "Environment :: Console",
        "Natural Language :: English",
        "Operating System :: OS Independent",
    ],
    packages=find_packages(),
    package_data={
        "": platform_package_data,
    },
    zip_safe=False,
    cmdclass={"bdist_wheel": bdist_wheel},
    data_files=data_files,
    install_requires=["tritonserver", "pydantic==2.10.6"],
    extras_require={"GPU": gpu_extras, "test": test_extras, "all": all_extras},
)


================================================
FILE: src/python/tritonfrontend/CMakeLists.txt
================================================
# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

cmake_minimum_required(VERSION 3.31.8)

# ================= Ensures Package is Structured Properly ==================
# Top level module entry point and typed marker
file(COPY __init__.py DESTINATION .)
file(COPY py.typed DESTINATION .)
# Copy the '__init__.py' for the '_c' module
file(COPY _c/__init__.py DESTINATION ./_c/.)
file(COPY _c/__init__.pyi DESTINATION ./_c/.)
file(COPY _c/tritonfrontend_bindings.pyi DESTINATION ./_c/.)
# Find and copy _api modules
file(GLOB PYTHON_MODULE_FILES ./_api/*.py)
file(COPY ${PYTHON_MODULE_FILES} DESTINATION ./_api/.)
# ================================= END =====================================


# =================== Downloading and Installing pybind11 ===================
include(FetchContent)

FetchContent_Declare(
    pybind11
    GIT_REPOSITORY https://github.com/pybind/pybind11.git
    GIT_TAG v2.13.1
    GIT_SHALLOW ON
)

FetchContent_MakeAvailable(pybind11)
# ================================= END =====================================

# ================== Collect the Dependencies ===============================
set(
  PYTHON_FRONTEND_BINDING_DEPS
  ../../shared_memory_manager.h
  ../../shared_memory_manager.cc
  ../../data_compressor.h
  ../../restricted_features.h
  ../../classification.cc
  ../../common.h
  ../../common.cc
)

set(PY_BINDING_DEPENDENCY_LIBS
      b64) # Dependency from common.h

# Conditional Linking Based on Flags
if(${TRITON_ENABLE_HTTP})
  list(APPEND PY_BINDING_DEPENDENCY_LIBS
      http-endpoint-library
    )
endif()

if(${TRITON_ENABLE_GRPC})
  list(APPEND PY_BINDING_DEPENDENCY_LIBS
      grpc-endpoint-library
  )
endif()

if(${TRITON_ENABLE_GPU})
  find_package(CUDAToolkit REQUIRED)
  list(APPEND PY_BINDING_DEPENDENCY_LIBS
      CUDA::cudart
  )
endif()

if(${TRITON_ENABLE_TRACING})
  message("TRACING/STATS IS CURRENTLY NOT SUPPORTED.")
  list(
      APPEND PY_BINDING_DEPENDENCY_LIBS
      tracing-library
  )
endif()

# ===================== End of Collection ===================================

# ================== Create Python Frontend Bindings ========================
set(
  PYTHON_FRONTEND_BINDING_SRCS
  _c/tritonfrontend.h
  _c/tritonfrontend_pybind.cc
)

pybind11_add_module(
  py-bindings
  MODULE
  ${PYTHON_FRONTEND_BINDING_DEPS}
  ${PYTHON_FRONTEND_BINDING_SRCS}
)

target_link_libraries(
    py-bindings
    PRIVATE
    ${PY_BINDING_DEPENDENCY_LIBS}
)

if(${TRITON_ENABLE_HTTP})
  target_compile_definitions(
    py-bindings
    PRIVATE TRITON_ENABLE_HTTP=1
  )
endif()

if(${TRITON_ENABLE_GRPC})
  target_compile_definitions(
    py-bindings
    PRIVATE TRITON_ENABLE_GRPC=1
  )
endif()

if(${TRITON_ENABLE_GPU})
  target_compile_definitions(
    py-bindings
    PRIVATE TRITON_ENABLE_GPU=1
    PRIVATE TRITON_MIN_COMPUTE_CAPABILITY=${TRITON_MIN_COMPUTE_CAPABILITY}
  )
endif()

if(${TRITON_ENABLE_TRACING})
    target_compile_definitions(
      py-bindings
      PRIVATE TRITON_ENABLE_TRACING=1
    )
endif()

if(${TRITON_ENABLE_STATS})
  target_compile_definitions(
    py-bindings
    PRIVATE TRITON_ENABLE_STATS=1
  )
endif()

if(${TRITON_ENABLE_METRICS})
  target_compile_definitions(
    py-bindings
    PRIVATE TRITON_ENABLE_METRICS=1
  )
endif()

set_property(TARGET py-bindings PROPERTY OUTPUT_NAME tritonfrontend_bindings)

target_include_directories(
  py-bindings
  PRIVATE
  ${repo-core_SOURCE_DIR}/include
  ${repo-common_SOURCE_DIR}/include
)

set_target_properties(
    py-bindings
    PROPERTIES
      BUILD_RPATH "$ORIGIN:/opt/tritonserver/lib"
      POSITION_INDEPENDENT_CODE ON
)
# ===================== End of Python Bindings ==============================


================================================
FILE: src/python/tritonfrontend/__init__.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# triton/server/src/python/tritonfrontend/__init__.py

import builtins
from importlib.metadata import PackageNotFoundError, version

try:
    from tritonfrontend._api import KServeHttp
except ImportError:
    # TRITON_ENABLE_HTTP=OFF
    pass

try:
    from tritonfrontend._api import KServeGrpc
except ImportError:
    # TRITON_ENABLE_GRPC=OFF
    pass

try:
    from tritonfrontend._api import Metrics
except ImportError:
    # TRITON_ENABLE_METRICS=OFF
    pass


================================================
FILE: src/python/tritonfrontend/__init__.pyi
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# Need to automate stubgen process as a part of build: https://github.com/triton-inference-server/server/pull/7501#discussion_r1720135228


================================================
FILE: src/python/tritonfrontend/_api/__init__.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

try:
    from ._kservehttp import KServeHttp
except ImportError:
    # TRITON_ENABLE_HTTP=OFF
    # TritonFrontendHttp Package was not present
    pass

try:
    from ._kservegrpc import KServeGrpc
except ImportError:
    # TRITON_ENABLE_GRPC=OFF
    # TritonFrontendGrpc Package was not present
    pass

try:
    from ._metrics import Metrics
except ImportError:
    # TRITON_ENABLE_Metrics=OFF
    # TritonFrontendMetrics Package was not present
    pass


================================================
FILE: src/python/tritonfrontend/_api/_error_mapping.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

import tritonserver
from tritonfrontend._c.tritonfrontend_bindings import (
    AlreadyExistsError,
    InternalError,
    InvalidArgumentError,
    NotFoundError,
    TritonError,
    UnavailableError,
    UnknownError,
    UnsupportedError,
)

# ERROR_MAPPING takes in tritonfrontend Error and maps to respective tritonserver Error
ERROR_MAPPING = {
    TritonError: tritonserver.TritonError,
    NotFoundError: tritonserver.NotFoundError,
    UnknownError: tritonserver.UnknownError,
    InternalError: tritonserver.InternalError,
    InvalidArgumentError: tritonserver.InvalidArgumentError,
    UnavailableError: tritonserver.UnavailableError,
    AlreadyExistsError: tritonserver.AlreadyExistsError,
    UnsupportedError: tritonserver.UnsupportedError,
}


def handle_triton_error(func):
    def error_handling_wrapper(*args, **kwargs):
        try:
            func(*args, **kwargs)
        except TritonError:
            exc_type, exc_value, _ = sys.exc_info()
            # raise ... from None masks the tritonfrontend Error from being added in traceback
            raise ERROR_MAPPING[exc_type](exc_value) from None

    return error_handling_wrapper


================================================
FILE: src/python/tritonfrontend/_api/_kservegrpc.py
================================================
# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from enum import IntEnum
from typing import Union

import tritonserver
from pydantic import Field
from pydantic.dataclasses import dataclass
from tritonfrontend._api._error_mapping import handle_triton_error
from tritonfrontend._c.tritonfrontend_bindings import (
    InvalidArgumentError,
    TritonFrontendGrpc,
)


# Enum (mirroring C++ format)
class Grpc_compression_level(IntEnum):
    NONE = 0
    LOW = 1
    MED = 2
    HIGH = 3
    COUNT = 4


class KServeGrpc:
    Grpc_compression_level = (
        Grpc_compression_level  # Include the enum as a class attribute
    )

    # triton::server::grpc::Options
    @dataclass
    class Options:
        # triton::server::grpc::SocketOptions
        address: str = "0.0.0.0"
        port: int = Field(8001, ge=0, le=65535)
        reuse_port: bool = False
        # triton::server::grpc::SslOptions
        use_ssl: bool = False
        server_cert: str = ""
        server_key: str = ""
        root_cert: str = ""
        use_mutual_auth: bool = False
        # triton::server::grpc::KeepAliveOptions
        keepalive_time_ms: int = Field(7_200_000, ge=0)
        keepalive_timeout_ms: int = Field(20_000, ge=0)
        keepalive_permit_without_calls: bool = False
        http2_max_pings_without_data: int = Field(2, ge=0)
        http2_min_recv_ping_interval_without_data_ms: int = Field(300_000, ge=0)
        http2_max_ping_strikes: int = Field(2, ge=0)
        max_connection_age_ms: int = Field(0, ge=0)
        max_connection_age_grace_ms: int = Field(0, ge=0)

        # triton::server::grpc::Options

        infer_compression_level: Union[
            int, Grpc_compression_level
        ] = Grpc_compression_level.NONE
        infer_thread_count: int = Field(2, ge=0)
        infer_allocation_pool_size: int = Field(8, ge=0)
        max_response_pool_size: int = Field(2_147_483_647, ge=0)
        forward_header_pattern: str = ""
        # DLIS-7215: Add restricted protocol support
        # restricted_protocols: str = ""

        def __post_init__(self):
            if isinstance(self.infer_compression_level, Grpc_compression_level):
                self.infer_compression_level = self.infer_compression_level.value

    @handle_triton_error
    def __init__(self, server: tritonserver, options: "KServeGrpc.Options" = None):
        server_ptr = server._ptr()  # TRITONSERVER_Server pointer

        # If no options provided, default options are selected
        if options is None:
            options = KServeGrpc.Options()

        if not isinstance(options, KServeGrpc.Options):
            raise InvalidArgumentError(
                "Incorrect type for options. options argument must be of type KServeGrpc.Options"
            )

        # Converts dataclass instance -> python dictionary -> unordered_map<string, std::variant<...>>
        options_dict: dict[str, Union[int, bool, str]] = options.__dict__

        self.triton_frontend = TritonFrontendGrpc(server_ptr, options_dict)

    def __enter__(self):
        self.triton_frontend.start()
        return self

    @handle_triton_error
    def __exit__(self, exc_type, exc_value, traceback):
        self.triton_frontend.stop()
        if exc_type:
            raise exc_type(exc_value)

    @handle_triton_error
    def start(self):
        self.triton_frontend.start()

    @handle_triton_error
    def stop(self):
        self.triton_frontend.stop()


================================================
FILE: src/python/tritonfrontend/_api/_kservegrpc.pyi
================================================
# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from enum import IntEnum

import tritonserver
from _typeshed import Incomplete as Incomplete

class Grpc_compression_level(IntEnum):
    NONE = 0
    LOW = 1
    MED = 2
    HIGH = 3
    COUNT = 4

class KServeGrpc:
    Grpc_compression_level = Grpc_compression_level
    class Options:
        address: str
        port: int
        reuse_port: bool
        use_ssl: bool
        server_cert: str
        server_key: str
        root_cert: str
        use_mutual_auth: bool
        keepalive_time_ms: int
        keepalive_timeout_ms: int
        keepalive_permit_without_calls: bool
        http2_max_pings_without_data: int
        http2_min_recv_ping_interval_without_data_ms: int
        http2_max_ping_strikes: int
        max_connection_age_ms: int
        max_connection_age_grace_ms: int
        infer_compression_level: int | Grpc_compression_level
        infer_thread_count: int
        infer_allocation_pool_size: int
        max_response_pool_size: int
        forward_header_pattern: str
        def __post_init__(self) -> None: ...
    triton_frontend: Incomplete
    def __init__(self, server: tritonserver, options: KServeGrpc.Options = None) -> None: ...
    def __enter__(self) -> None: ...
    def __exit__(self, exc_type: type[BaseException] | None, exc_value: BaseException | None, traceback: types.TracebackType | None) -> None: ...
    def start(self) -> None: ...
    def stop(self) -> None: ...


================================================
FILE: src/python/tritonfrontend/_api/_kservehttp.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


from typing import Union

import tritonserver
from pydantic import Field
from pydantic.dataclasses import dataclass
from tritonfrontend._api._error_mapping import handle_triton_error
from tritonfrontend._c.tritonfrontend_bindings import (
    InvalidArgumentError,
    TritonFrontendHttp,
)


class KServeHttp:
    @dataclass
    class Options:
        address: str = "0.0.0.0"
        port: int = Field(8000, ge=0, le=65535)
        reuse_port: bool = False
        thread_count: int = Field(8, gt=0)
        header_forward_pattern: str = ""
        # DLIS-7215: Add restricted protocol support
        # restricted_protocols: list

    @handle_triton_error
    def __init__(self, server: tritonserver, options: "KServeHttp.Options" = None):
        server_ptr = server._ptr()  # TRITONSERVER_Server pointer

        # If no options provided, default options are selected
        if options is None:
            options = KServeHttp.Options()

        if not isinstance(options, KServeHttp.Options):
            raise InvalidArgumentError(
                "Incorrect type for options. options argument must be of type KServeHttp.Options"
            )

        # Converts dataclass instance -> python dictionary -> unordered_map<string, std::variant<...>>
        options_dict: dict[str, Union[int, bool, str]] = options.__dict__

        self.triton_frontend = TritonFrontendHttp(server_ptr, options_dict)

    def __enter__(self):
        self.triton_frontend.start()
        return self

    @handle_triton_error
    def __exit__(self, exc_type, exc_value, traceback):
        self.triton_frontend.stop()
        if exc_type:
            raise exc_type(exc_value)

    @handle_triton_error
    def start(self):
        self.triton_frontend.start()

    @handle_triton_error
    def stop(self):
        self.triton_frontend.stop()


================================================
FILE: src/python/tritonfrontend/_api/_kservehttp.pyi
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import tritonserver
from _typeshed import Incomplete as Incomplete

class KServeHttp:
    class Options:
        address: str
        port: int
        reuse_port: bool
        thread_count: int
        header_forward_pattern: str
    triton_frontend: Incomplete
    def __init__(self, server: tritonserver, options: KServeHttp.Options = None) -> None: ...
    def __enter__(self) -> None: ...
    def __exit__(self, exc_type: type[BaseException] | None, exc_value: BaseException | None, traceback: types.TracebackType | None) -> None: ...
    def start(self) -> None: ...
    def stop(self) -> None: ...


================================================
FILE: src/python/tritonfrontend/_api/_metrics.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


from typing import Union

import tritonserver
from pydantic import Field
from pydantic.dataclasses import dataclass
from tritonfrontend._api._error_mapping import handle_triton_error
from tritonfrontend._c.tritonfrontend_bindings import (
    InvalidArgumentError,
    TritonFrontendMetrics,
)


class Metrics:
    @dataclass
    class Options:
        address: str = "0.0.0.0"
        port: int = Field(8002, ge=0, le=65535)
        thread_count: int = Field(1, gt=0)

    @handle_triton_error
    def __init__(self, server: tritonserver, options: "Metrics.Options" = None):
        server_ptr = server._ptr()  # TRITONSERVER_Server pointer

        # If no options provided, default options are selected
        if options is None:
            options = Metrics.Options()

        if not isinstance(options, Metrics.Options):
            raise InvalidArgumentError(
                "Incorrect type for options. options argument must be of type Metrics.Options"
            )

        # Converts dataclass instance -> python dictionary -> unordered_map<string, std::variant<...>>
        options_dict: dict[str, Union[int, bool, str]] = options.__dict__

        self.triton_frontend = TritonFrontendMetrics(server_ptr, options_dict)

    def __enter__(self):
        self.triton_frontend.start()
        return self

    @handle_triton_error
    def __exit__(self, exc_type, exc_value, traceback):
        self.triton_frontend.stop()
        if exc_type:
            raise exc_type(exc_value)

    @handle_triton_error
    def start(self):
        self.triton_frontend.start()

    @handle_triton_error
    def stop(self):
        self.triton_frontend.stop()


================================================
FILE: src/python/tritonfrontend/_api/_metrics.pyi
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import tritonserver
from _typeshed import Incomplete
from tritonfrontend._api._error_mapping import (
    handle_triton_error as handle_triton_error,
)
from tritonfrontend._c.tritonfrontend_bindings import (
    InvalidArgumentError as InvalidArgumentError,
)
from tritonfrontend._c.tritonfrontend_bindings import (
    TritonFrontendMetrics as TritonFrontendMetrics,
)

class Metrics:
    class Options:
        address: str
        port: int
        thread_count: int
    triton_frontend: Incomplete
    def __init__(self, server: tritonserver, options: Metrics.Options = None) -> None: ...
    def __enter__(self): ...
    def __exit__(self, exc_type, exc_value, traceback) -> None: ...
    def start(self) -> None: ...
    def stop(self) -> None: ...


================================================
FILE: src/python/tritonfrontend/_c/__init__.py
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from .tritonfrontend_bindings import *


================================================
FILE: src/python/tritonfrontend/_c/__init__.pyi
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from tritonfrontend._c.tritonfrontend_bindings import *


================================================
FILE: src/python/tritonfrontend/_c/tritonfrontend.h
================================================
// Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#pragma once

#include <memory>  // For shared_ptr
#include <unordered_map>
#include <variant>

#include "../../../common.h"
#include "../../../restricted_features.h"
#include "../../../shared_memory_manager.h"
#include "../../../tracer.h"
#include "triton/common/logging.h"
#include "triton/core/tritonserver.h"


struct TRITONSERVER_Server {};

namespace triton { namespace server { namespace python {

// base exception for all Triton error code
struct TritonError : public std::runtime_error {
  explicit TritonError(const std::string& what) : std::runtime_error(what) {}
};

// triton::core::python exceptions map 1:1 to TRITONSERVER_Error_Code.
struct UnknownError : public TritonError {
  explicit UnknownError(const std::string& what) : TritonError(what) {}
};
struct InternalError : public TritonError {
  explicit InternalError(const std::string& what) : TritonError(what) {}
};
struct NotFoundError : public TritonError {
  explicit NotFoundError(const std::string& what) : TritonError(what) {}
};
struct InvalidArgumentError : public TritonError {
  explicit InvalidArgumentError(const std::string& what) : TritonError(what) {}
};
struct UnavailableError : public TritonError {
  explicit UnavailableError(const std::string& what) : TritonError(what) {}
};
struct UnsupportedError : public TritonError {
  explicit UnsupportedError(const std::string& what) : TritonError(what) {}
};
struct AlreadyExistsError : public TritonError {
  explicit AlreadyExistsError(const std::string& what) : TritonError(what) {}
};

void
ThrowIfError(TRITONSERVER_Error* err)
{
  if (err == nullptr) {
    return;
  }
  std::shared_ptr<TRITONSERVER_Error> managed_err(
      err, TRITONSERVER_ErrorDelete);
  std::string msg = TRITONSERVER_ErrorMessage(err);
  switch (TRITONSERVER_ErrorCode(err)) {
    case TRITONSERVER_ERROR_INTERNAL:
      throw InternalError(std::move(msg));
    case TRITONSERVER_ERROR_NOT_FOUND:
      throw NotFoundError(std::move(msg));
    case TRITONSERVER_ERROR_INVALID_ARG:
      throw InvalidArgumentError(std::move(msg));
    case TRITONSERVER_ERROR_UNAVAILABLE:
      throw UnavailableError(std::move(msg));
    case TRITONSERVER_ERROR_UNSUPPORTED:
      throw UnsupportedError(std::move(msg));
    case TRITONSERVER_ERROR_ALREADY_EXISTS:
      throw AlreadyExistsError(std::move(msg));
    default:
      throw UnknownError(std::move(msg));
  }
}


template <typename Base, typename FrontendServer>
class TritonFrontend {
 private:
  std::shared_ptr<TRITONSERVER_Server> server_;
  std::unique_ptr<Base> service;
  triton::server::RestrictedFeatures restricted_features;
  // TODO: [DLIS-7194] Add support for TraceManager & SharedMemoryManager
  // triton::server::TraceManager trace_manager_;
  // triton::server::SharedMemoryManager shm_manager_;

 public:
  TritonFrontend(uintptr_t server_mem_addr, UnorderedMapType data)
  {
    TRITONSERVER_Server* server_ptr =
        reinterpret_cast<TRITONSERVER_Server*>(server_mem_addr);

    server_.reset(server_ptr, EmptyDeleter);

#ifdef TRITON_ENABLE_HTTP
    if constexpr (std::is_same_v<FrontendServer, HTTPAPIServer>) {
      ThrowIfError(FrontendServer::Create(
          server_, data, nullptr /* TraceManager */,
          nullptr /* SharedMemoryManager */, restricted_features, &service));
    }
#endif

#ifdef TRITON_ENABLE_GRPC
    if constexpr (std::is_same_v<
                      FrontendServer, triton::server::grpc::Server>) {
      ThrowIfError(FrontendServer::Create(
          server_, data, nullptr /* TraceManager */,
          nullptr /* SharedMemoryManager */, restricted_features, &service));
    }
#endif

#ifdef TRITON_ENABLE_METRICS
    if constexpr (std::is_same_v<FrontendServer, HTTPMetricsServer>) {
      ThrowIfError(FrontendServer::Create(server_, data, &service));
    }
#endif
  };

  // TODO: [DLIS-7194] Add support for TraceManager & SharedMemoryManager
  // TritonFrontend(
  //     uintptr_t server_mem_addr, UnorderedMapType data,
  //     TraceManager trace_manager, SharedMemoryManager shm_manager)

  void StartService() { ThrowIfError(service->Start()); };
  void StopService() { ThrowIfError(service->Stop()); };

  // The frontend does not own the TRITONSERVER_Server* object.
  // Hence, deleting the underlying server instance,
  // will cause a double-free when the core bindings attempt to
  // delete the TRITONSERVER_Server instance.
  static void EmptyDeleter(TRITONSERVER_Server* obj){};
};

}}}  // namespace triton::server::python


================================================
FILE: src/python/tritonfrontend/_c/tritonfrontend_bindings.pyi
================================================
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from tritonfrontend import AlreadyExistsError as AlreadyExistsError
from tritonfrontend import InternalError as InternalError
from tritonfrontend import InvalidArgumentError as InvalidArgumentError
from tritonfrontend import NotFoundError as NotFoundError
from tritonfrontend import TritonError as TritonError
from tritonfrontend import UnavailableError as UnavailableError
from tritonfrontend import UnknownError as UnknownError
from tritonfrontend import UnsupportedError as UnsupportedError

class TritonFrontendGrpc:
    def __init__(self, arg0: int, arg1: dict[str, bool | int | str]) -> None: ...
    def start(self) -> None: ...
    def stop(self) -> None: ...

class TritonFrontendHttp:
    def __init__(self, arg0: int, arg1: dict[str, bool | int | str]) -> None: ...
    def start(self) -> None: ...
    def stop(self) -> None: ...

class TritonFrontendMetrics:
    def __init__(self, arg0: int, arg1: dict[str, bool | int | str]) -> None: ...
    def start(self) -> None: ...
    def stop(self) -> None: ...

================================================
FILE: src/python/tritonfrontend/_c/tritonfrontend_pybind.cc
================================================
// Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include <pybind11/pybind11.h>
#include <pybind11/stl.h>

#ifdef TRITON_ENABLE_GRPC
#include "../../../grpc/grpc_server.h"
#endif


#if defined(TRITON_ENABLE_HTTP) || defined(TRITON_ENABLE_METRICS)
#include "../../../http_server.h"
#endif


#include "triton/core/tritonserver.h"
#include "tritonfrontend.h"


namespace py = pybind11;

namespace triton { namespace server { namespace python {


PYBIND11_MODULE(tritonfrontend_bindings, m)
{
  m.doc() = "Python bindings for Triton Inference Server Frontend Endpoints";

  auto tfe = py::register_exception<TritonError>(m, "TritonError");
  py::register_exception<UnknownError>(m, "UnknownError", tfe.ptr());
  py::register_exception<InternalError>(m, "InternalError", tfe.ptr());
  py::register_exception<NotFoundError>(m, "NotFoundError", tfe.ptr());
  py::register_exception<InvalidArgumentError>(
      m, "InvalidArgumentError", tfe.ptr());
  py::register_exception<UnavailableError>(m, "UnavailableError", tfe.ptr());
  py::register_exception<UnsupportedError>(m, "UnsupportedError", tfe.ptr());
  py::register_exception<AlreadyExistsError>(
      m, "AlreadyExistsError", tfe.ptr());

#ifdef TRITON_ENABLE_HTTP
  py::class_<TritonFrontend<HTTPServer, HTTPAPIServer>>(m, "TritonFrontendHttp")
      .def(py::init<uintptr_t, UnorderedMapType>())
      .def("start", &TritonFrontend<HTTPServer, HTTPAPIServer>::StartService)
      .def("stop", &TritonFrontend<HTTPServer, HTTPAPIServer>::StopService);
#endif  // TRITON_ENABLE_HTTP

#ifdef TRITON_ENABLE_GRPC
  py::class_<TritonFrontend<
      triton::server::grpc::Server, triton::server::grpc::Server>>(
      m, "TritonFrontendGrpc")
      .def(py::init<uintptr_t, UnorderedMapType>())
      .def(
          "start", &TritonFrontend<
                       triton::server::grpc::Server,
                       triton::server::grpc::Server>::StartService)
      .def(
          "stop", &TritonFrontend<
                      triton::server::grpc::Server,
                      triton::server::grpc::Server>::StopService);
#endif  // TRITON_ENABLE_GRPC

#ifdef TRITON_ENABLE_METRICS
  py::class_<TritonFrontend<HTTPServer, HTTPMetricsServer>>(
      m, "TritonFrontendMetrics")
      .def(py::init<uintptr_t, UnorderedMapType>())
      .def(
          "start", &TritonFrontend<HTTPServer, HTTPMetricsServer>::StartService)
      .def("stop", &TritonFrontend<HTTPServer, HTTPMetricsServer>::StopService);
#endif  // TRITON_ENABLE_METRICS
}

}}}  // namespace triton::server::python


================================================
FILE: src/python/tritonfrontend/py.typed
================================================


================================================
FILE: src/restricted_features.h
================================================
// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once

#include <algorithm>
#include <array>
#include <string>

namespace triton { namespace server {

/// Header and Value pair for a restricted feature
using Restriction = std::pair<std::string, std::string>;

/// Restricted Categories
enum RestrictedCategory : uint8_t {
  HEALTH,
  METADATA,
  INFERENCE,
  SHARED_MEMORY,
  MODEL_CONFIG,
  MODEL_REPOSITORY,
  STATISTICS,
  TRACE,
  LOGGING,
  INVALID,
  CATEGORY_COUNT = INVALID
};

/// Restricted Category Names
const std::array<const std::string, RestrictedCategory::CATEGORY_COUNT>
    RESTRICTED_CATEGORY_NAMES{
        "health",        "metadata",     "inference",
        "shared-memory", "model-config", "model-repository",
        "statistics",    "trace",        "logging"};

/// Collection of restricted features
///
/// Initially empty and all categories unrestricted
class RestrictedFeatures {
 public:
  /// Returns RestrictedCategory enum from category name
  ///
  /// \param[in] category category name
  /// \return category enum returns INVALID if unknown
  static RestrictedCategory ToCategory(const std::string& category)
  {
    const auto found = std::find(
        begin(RESTRICTED_CATEGORY_NAMES), end(RESTRICTED_CATEGORY_NAMES),
        category);
    const auto offset = std::distance(begin(RESTRICTED_CATEGORY_NAMES), found);
    return RestrictedCategory(offset);
  }

  /// Insert restriction for given category
  ///
  /// \param[in] category category to restrict
  /// \param[in] restriction header, value pair
  void Insert(const RestrictedCategory& category, Restriction&& restriction)
  {
    restrictions_[category] = std::move(restriction);
    restricted_categories_[category] = true;
  }

  /// Get header,value pair for restricted category
  ///
  /// \param[in] category category to restrict
  /// \return restriction header, value pair
  const Restriction& Get(RestrictedCategory category) const
  {
    return restrictions_[category];
  }

  /// Return true if a category is restricted
  ///
  /// \param[in] category category to restrict
  /// \return true if category is restricted, false otherwise

  const bool& IsRestricted(RestrictedCategory category) const
  {
    return restricted_categories_[category];
  }

  RestrictedFeatures() = default;
  ~RestrictedFeatures() = default;

 private:
  std::array<Restriction, RestrictedCategory::CATEGORY_COUNT> restrictions_{};

  std::array<bool, RestrictedCategory::CATEGORY_COUNT> restricted_categories_{};
};
}}  // namespace triton::server


================================================
FILE: src/sagemaker_server.cc
================================================
// Copyright 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "sagemaker_server.h"

#include <filesystem>

namespace triton { namespace server {

#define HTTP_RESPOND_IF_ERR(REQ, X)                   \
  do {                                                \
    TRITONSERVER_Error* err__ = (X);                  \
    if (err__ != nullptr) {                           \
      EVBufferAddErrorJson((REQ)->buffer_out, err__); \
      evhtp_send_reply((REQ), EVHTP_RES_BADREQ);      \
      TRITONSERVER_ErrorDelete(err__);                \
      return;                                         \
    }                                                 \
  } while (false)

#define RETURN_AND_RESPOND_IF_RESTRICTED(REQ, RESTRICTED_CATEGORY)       \
  do {                                                                   \
    auto const& is_restricted_api =                                      \
        restricted_apis_.IsRestricted(RESTRICTED_CATEGORY);              \
    auto const& restriction = restricted_apis_.Get(RESTRICTED_CATEGORY); \
    if (is_restricted_api && RespondIfRestricted((REQ), restriction)) {  \
      return;                                                            \
    }                                                                    \
  } while (false)

namespace {

void
EVBufferAddErrorJson(evbuffer* buffer, TRITONSERVER_Error* err)
{
  const char* message = TRITONSERVER_ErrorMessage(err);

  triton::common::TritonJson::Value response(
      triton::common::TritonJson::ValueType::OBJECT);
  response.AddStringRef("error", message, strlen(message));

  triton::common::TritonJson::WriteBuffer buffer_json;
  response.Write(&buffer_json);

  evbuffer_add(buffer, buffer_json.Base(), buffer_json.Size());
}
}  // namespace


const std::string SagemakerAPIServer::binary_mime_type_(
    "application/vnd.sagemaker-triton.binary+json;json-header-size=");

TRITONSERVER_Error*
SagemakerAPIServer::GetInferenceHeaderLength(
    evhtp_request_t* req, int32_t content_length, size_t* header_length)
{
  // Check mime type and set inference header length.
  // Set to content length in case that it is not specified
  *header_length = content_length;
  const char* content_type_c_str =
      evhtp_kv_find(req->headers_in, kContentTypeHeader);
  if (content_type_c_str != NULL) {
    std::string content_type(content_type_c_str);
    size_t pos = content_type.find(binary_mime_type_);
    if (pos != std::string::npos) {
      if (pos != 0) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            (std::string("expect MIME type for binary data starts with '") +
             binary_mime_type_ + "', got: " + content_type)
                .c_str());
      }

      // Parse
      int32_t parsed_value;
      try {
        parsed_value =
            std::atoi(content_type_c_str + binary_mime_type_.length());
      }
      catch (const std::invalid_argument& ia) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            (std::string("Unable to parse inference header size, got: ") +
             (content_type_c_str + binary_mime_type_.length()))
                .c_str());
      }

      // Check if the content length is in proper range
      if ((parsed_value < 0) || (parsed_value > content_length)) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            (std::string("inference header size should be in range (0, ") +
             std::to_string(content_length) +
             "), got: " + (content_type_c_str + binary_mime_type_.length()))
                .c_str());
      }
      *header_length = parsed_value;
    }
  }
  return nullptr;
}

void
SagemakerAPIServer::SagemakeInferRequestClass::SetResponseHeader(
    bool has_binary_data, size_t header_length)
{
  if (has_binary_data) {
    evhtp_headers_add_header(
        req_->headers_out,
        evhtp_header_new(
            kContentTypeHeader,
            (binary_mime_type_ + std::to_string(header_length)).c_str(), 1, 1));
  } else {
    evhtp_headers_add_header(
        req_->headers_out,
        evhtp_header_new(kContentTypeHeader, "application/json", 1, 1));
  }
}

void
SagemakerAPIServer::Handle(evhtp_request_t* req)
{
  LOG_VERBOSE(1) << "SageMaker request: " << req->method << " "
                 << req->uri->path->full;

  if (RE2::FullMatch(std::string(req->uri->path->full), ping_regex_)) {
    HandleServerHealth(req, ping_mode_);
    return;
  }

  if (RE2::FullMatch(std::string(req->uri->path->full), invocations_regex_)) {
    if (inference_type_ == "infer") {
      HandleInfer(req, model_name_, model_version_str_);
    } else if (inference_type_ == "generate") {
      HandleGenerate(
          req, model_name_, model_version_str_, false /* is streaming */);
    } else if (inference_type_ == "generate_stream") {
      HandleGenerate(
          req, model_name_, model_version_str_, true /* is streaming */);
    } else {
      // This error should never happen, due to the validation in tritonserver
      // startup.
      HTTP_RESPOND_IF_ERR(
          req, TRITONSERVER_ErrorNew(
                   TRITONSERVER_ERROR_INTERNAL,
                   std::string(
                       "Server has invalid inference type '" + inference_type_ +
                       "'. Must be one of: infer, generate, generate_stream.")
                       .c_str()));
    }
    return;
  }

  std::string multi_model_name, action;
  if (RE2::FullMatch(
          std::string(req->uri->path->full), models_regex_, &multi_model_name,
          &action)) {
    switch (req->method) {
      case htp_method_GET:
        if (multi_model_name.empty()) {
          LOG_VERBOSE(1) << "SageMaker request: LIST ALL MODELS";

          RETURN_AND_RESPOND_IF_RESTRICTED(
              req, RestrictedCategory::MODEL_REPOSITORY);
          SageMakerMMEListModel(req);
          return;
        } else {
          LOG_VERBOSE(1) << "SageMaker request: GET MODEL";

          RETURN_AND_RESPOND_IF_RESTRICTED(
              req, RestrictedCategory::MODEL_REPOSITORY);
          SageMakerMMEGetModel(req, multi_model_name.c_str());
          return;
        }
      case htp_method_POST:
        if (action == "/invoke") {
          LOG_VERBOSE(1) << "SageMaker request: INVOKE MODEL";

          {
            std::lock_guard<std::mutex> lock(models_list_mutex_);
            if (sagemaker_models_list_.find(multi_model_name.c_str()) ==
                sagemaker_models_list_.end()) {
              evhtp_send_reply(req, EVHTP_RES_NOTFOUND); /* 404*/
              return;
            }
          }
          LOG_VERBOSE(1) << "SageMaker MME Custom Invoke Model Path";

          /* Extract targetModel to log the associated archive */
          const char* target_model =
              evhtp_kv_find(req->headers_in, "X-Amzn-SageMaker-Target-Model");

          /* If target_model is not available (e.g., in local testing) use
           * model_name_hash as target_model) */
          if (target_model == nullptr) {
            target_model = multi_model_name.c_str();
          }

          LOG_INFO << "Invoking SageMaker TargetModel: " << target_model;

          SageMakerMMEHandleInfer(req, target_model, model_version_str_);
          return;
        }
        if (action.empty()) {
          LOG_VERBOSE(1) << "SageMaker request: LOAD MODEL";

          RETURN_AND_RESPOND_IF_RESTRICTED(
              req, RestrictedCategory::MODEL_REPOSITORY);
          std::unordered_map<std::string, std::string> parse_load_map;
          ParseSageMakerRequest(req, &parse_load_map, "load");
          if (!parse_load_map.empty()) {
            SageMakerMMELoadModel(req, parse_load_map);
          }
          return;
        }
        break;
      case htp_method_DELETE: {
        // UNLOAD MODEL
        LOG_VERBOSE(1) << "SageMaker request: UNLOAD MODEL";

        RETURN_AND_RESPOND_IF_RESTRICTED(
            req, RestrictedCategory::MODEL_REPOSITORY);
        req->method = htp_method_POST;

        SageMakerMMEUnloadModel(req, multi_model_name.c_str());

        return;
      }
      default:
        LOG_VERBOSE(1) << "SageMaker error: " << req->method << " "
                       << req->uri->path->full << " - "
                       << static_cast<int>(EVHTP_RES_BADREQ);
        evhtp_send_reply(req, EVHTP_RES_BADREQ);
        return;
    }
  }

  LOG_VERBOSE(1) << "SageMaker error: " << req->method << " "
                 << req->uri->path->full << " - "
                 << static_cast<int>(EVHTP_RES_BADREQ);

  evhtp_send_reply(req, EVHTP_RES_BADREQ);
}


TRITONSERVER_Error*
SagemakerAPIServer::Create(
    const std::shared_ptr<TRITONSERVER_Server>& server,
    triton::server::TraceManager* trace_manager,
    const std::shared_ptr<SharedMemoryManager>& shm_manager, const int32_t port,
    const std::string address, const int thread_cnt,
    const size_t max_input_size, const RestrictedFeatures& restricted_apis,
    std::unique_ptr<HTTPServer>* http_server)
{
  http_server->reset(new SagemakerAPIServer(
      server, trace_manager, shm_manager, port, address, thread_cnt,
      max_input_size, restricted_apis));

  const std::string addr = address + ":" + std::to_string(port);
  LOG_INFO << "Started Sagemaker HTTPService at " << addr;

  return nullptr;
}


void
SagemakerAPIServer::ParseSageMakerRequest(
    evhtp_request_t* req,
    std::unordered_map<std::string, std::string>* parse_map,
    const std::string& action)
{
  size_t buffer_len;
  triton::common::TritonJson::Value request;
  HTTP_RESPOND_IF_ERR(
      req, EVRequestToJson(req, "load model", &request, &buffer_len));

  std::string model_name_string;
  std::string url_string;

  if (buffer_len > 0) {
    triton::common::TritonJson::Value url;
    triton::common::TritonJson::Value model_name;

    if (request.Find("model_name", &model_name)) {
      HTTP_RESPOND_IF_ERR(req, model_name.AsString(&model_name_string));
      LOG_VERBOSE(1) << "Received model_name: " << model_name_string.c_str();
    }

    if ((action == "load") && (request.Find("url", &url))) {
      HTTP_RESPOND_IF_ERR(req, url.AsString(&url_string));
      LOG_VERBOSE(1) << "Received url: " << url_string.c_str();
    }
  }

  std::filesystem::path url_path(url_string);
  url_path = std::filesystem::absolute(
      url_path.lexically_normal());  // Normalize the path to remove any
                                     // redundant components.
  auto url_abspath = url_path.string();

  if (url_abspath.find("/dev/") == 0 || url_abspath.find("/proc/") == 0 ||
      url_abspath.find("/sys/") == 0) {
    LOG_ERROR << "Invalid URL: " << url_string
              << ". \"url\" property value cannot start with /dev/, /proc/, or "
                 "/sys/."
              << std::endl;
    evhtp_send_reply(req, EVHTP_RES_BADREQ);
    return;
  }

  if (action == "load") {
    (*parse_map)["url"] = url_string.c_str();
  }
  (*parse_map)["model_name_hash"] = model_name_string.c_str();

  /* Extract target_model, specified in header, to log the associated archive */
  const char* target_model =
      evhtp_kv_find(req->headers_in, "X-Amzn-SageMaker-Target-Model");


  /* If target_model is not available (e.g., in local testing) use
   * model_name_hash as target_model) */
  if (target_model != nullptr) {
    (*parse_map)["target_model"] = target_model;
  } else {
    (*parse_map)["target_model"] = model_name_string.c_str();
  }

  LOG_INFO << "Loading SageMaker TargetModel: " << target_model;

  return;
}

void
SagemakerAPIServer::SagemakeInferRequestClass::InferResponseComplete(
    TRITONSERVER_InferenceResponse* response, const uint32_t flags, void* userp)
{
  // FIXME can't use InferRequestClass object here since it's lifetime
  // is different than response. For response we need to know how to
  // send each output (as json, shm, or binary) and that information
  // has to be maintained in a way that allows us to clean it up
  // appropriately if connection closed or last response sent.
  //
  // But for now userp is the InferRequestClass object and the end of
  // its life is in the ReplyCallback.

  SagemakerAPIServer::SagemakeInferRequestClass* infer_request =
      reinterpret_cast<SagemakerAPIServer::SagemakeInferRequestClass*>(userp);

  if (response != nullptr) {
    ++infer_request->response_count_;
  }

  TRITONSERVER_Error* err = nullptr;
  if (infer_request->response_count_ != 1) {
    err = TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INTERNAL,
        std::string(
            "expected a single response, got " +
            std::to_string(infer_request->response_count_))
            .c_str());
  } else if (response != nullptr) {
    err = infer_request->FinalizeResponse(response);
#ifdef TRITON_ENABLE_TRACING
    if (infer_request->trace_ != nullptr) {
      infer_request->trace_->CaptureTimestamp(
          "INFER_RESPONSE_COMPLETE", TraceManager::CaptureTimestamp());
    }
#endif  // TRITON_ENABLE_TRACING
  }


  LOG_TRITONSERVER_ERROR(
      TRITONSERVER_InferenceResponseDelete(response),
      "deleting inference response");


  if (err != nullptr) {
    EVBufferAddErrorJson(infer_request->req_->buffer_out, err);
    // [FIXME] In http_server.cc, error handling is enhanced to reporting
    // different error code according to the Triton error code, holding
    // the change from SageMaker endpoint as it may not fit their SLA.
    infer_request->response_code_ = EVHTP_RES_BADREQ;
    if (SageMakerMMECheckOOMError(err) == true) {
      LOG_VERBOSE(1)
          << "Received an OOM error during INVOKE MODEL. Returning a 507."
          << std::endl;
      infer_request->response_code_ = 507;
    }
    TRITONSERVER_ErrorDelete(err);
  }

  // Defer sending the response until FINAL flag is seen
  if ((flags & TRITONSERVER_RESPONSE_COMPLETE_FINAL) == 0) {
    return;
  }
  evthr_defer(infer_request->thread_, ReplyCallback, infer_request);
}

void
SagemakerAPIServer::SageMakerMMEHandleInfer(
    evhtp_request_t* req, const std::string& model_name,
    const std::string& model_version_str)
{
  if (req->method != htp_method_POST) {
    evhtp_send_reply(req, EVHTP_RES_METHNALLOWED);
    return;
  }

  bool connection_paused = false;

  int64_t requested_model_version;
  auto err = GetModelVersionFromString(
      model_version_str.c_str(), &requested_model_version);

  if (err == nullptr) {
    uint32_t txn_flags;
    err = TRITONSERVER_ServerModelTransactionProperties(
        server_.get(), model_name.c_str(), requested_model_version, &txn_flags,
        nullptr /* voidp */);
    if ((err == nullptr) && (txn_flags & TRITONSERVER_TXN_DECOUPLED) != 0) {
      err = TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_UNSUPPORTED,
          "HTTP end point doesn't support models with decoupled "
          "transaction policy");
    }
  }

  // If tracing is enabled see if this request should be traced.
  TRITONSERVER_InferenceTrace* triton_trace = nullptr;
  std::shared_ptr<TraceManager::Trace> trace =
      StartTrace(req, model_name, &triton_trace);

  // Create the inference request object which provides all information needed
  // for an inference.
  TRITONSERVER_InferenceRequest* irequest = nullptr;
  std::shared_ptr<TRITONSERVER_InferenceRequest> irequest_shared = nullptr;
  if (err == nullptr) {
    err = TRITONSERVER_InferenceRequestNew(
        &irequest, server_.get(), model_name.c_str(), requested_model_version);
  }
  if (err == nullptr) {
    irequest_shared = std::shared_ptr<TRITONSERVER_InferenceRequest>(
        irequest, [](TRITONSERVER_InferenceRequest* request) {
          LOG_TRITONSERVER_ERROR(
              TRITONSERVER_InferenceRequestDelete(request),
              "deleting HTTP/REST inference request");
        });
  }
  // Decompress request body if it is compressed in supported type
  evbuffer* decompressed_buffer = nullptr;
  if (err == nullptr) {
    auto compression_type = GetRequestCompressionType(req);
    switch (compression_type) {
      case DataCompressor::Type::DEFLATE:
      case DataCompressor::Type::GZIP: {
        decompressed_buffer = evbuffer_new();
        err = DataCompressor::DecompressData(
            compression_type, req->buffer_in, decompressed_buffer,
            max_input_size_);
        break;
      }
      case DataCompressor::Type::UNKNOWN: {
        // Encounter unsupported compressed type,
        // send 415 error with supported types in Accept-Encoding
        evhtp_headers_add_header(
            req->headers_out,
            evhtp_header_new(kAcceptEncodingHTTPHeader, "gzip, deflate", 1, 1));
        evhtp_send_reply(req, EVHTP_RES_UNSUPPORTED);
        return;
      }
      case DataCompressor::Type::IDENTITY:
        // Do nothing
        break;
    }
  }

  // Get the header length
  size_t header_length;
  if (err == nullptr) {
    // Set to body size in case there is no Content-Length to compare with
    int32_t content_length = evbuffer_get_length(req->buffer_in);
    if (decompressed_buffer == nullptr) {
      const char* content_length_c_str =
          evhtp_kv_find(req->headers_in, kContentLengthHeader);
      if (content_length_c_str != nullptr) {
        try {
          content_length = std::atoi(content_length_c_str);
        }
        catch (const std::invalid_argument& ia) {
          err = TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_INVALID_ARG,
              (std::string("Unable to parse ") + kContentLengthHeader +
               ", got: " + content_length_c_str)
                  .c_str());
        }
      }
    } else {
      // The Content-Length doesn't reflect the actual request body size
      // if compression is used, set 'content_length' to the decompressed size
      content_length = evbuffer_get_length(decompressed_buffer);
    }

    if (err == nullptr) {
      err = GetInferenceHeaderLength(req, content_length, &header_length);
    }
  }

  if (err == nullptr) {
    connection_paused = true;

    auto infer_request = CreateInferRequest(req, irequest_shared);
    auto request_release_payload = std::make_unique<RequestReleasePayload>(
        irequest_shared, decompressed_buffer);

#ifdef TRITON_ENABLE_TRACING
    infer_request->trace_ = trace;
#endif  // TRITON_ENABLE_TRACING

    if (err == nullptr) {
      if (header_length != 0) {
        err = EVBufferToInput(
            model_name, irequest,
            (decompressed_buffer == nullptr) ? req->buffer_in
                                             : decompressed_buffer,
            infer_request.get(), header_length);
      } else {
        err = EVBufferToRawInput(
            model_name, irequest,
            (decompressed_buffer == nullptr) ? req->buffer_in
                                             : decompressed_buffer,
            infer_request.get());
      }
    }
    if (err == nullptr) {
      err = TRITONSERVER_InferenceRequestSetReleaseCallback(
          irequest, InferRequestClass::InferRequestComplete,
          request_release_payload.get());
      if (err == nullptr) {
        err = TRITONSERVER_InferenceRequestSetResponseCallback(
            irequest, allocator_,
            reinterpret_cast<void*>(&infer_request->alloc_payload_),
            SagemakerAPIServer::SagemakeInferRequestClass::
                InferResponseComplete,
            reinterpret_cast<void*>(infer_request.get()));

        LOG_VERBOSE(1) << std::endl;
      }
      if (err == nullptr) {
        err = TRITONSERVER_ServerInferAsync(
            server_.get(), irequest, triton_trace);
#ifdef TRITON_ENABLE_TRACING
        if (trace != nullptr) {
          trace->trace_ = nullptr;
        }
#endif  // TRITON_ENABLE_TRACING
      }
      if (err == nullptr) {
        infer_request.release();
        request_release_payload.release();
      }
    }
  }

  if (err != nullptr) {
    LOG_VERBOSE(1) << "Infer failed: " << TRITONSERVER_ErrorMessage(err);
    evhtp_headers_add_header(
        req->headers_out,
        evhtp_header_new(kContentTypeHeader, "application/json", 1, 1));

    SageMakerMMEHandleOOMError(req, err);

    if (connection_paused) {
      evhtp_request_resume(req);
    }
    TRITONSERVER_ErrorDelete(err);
#ifdef TRITON_ENABLE_TRACING
    // If HTTP server still owns Triton trace
    if ((trace != nullptr) && (trace->trace_ != nullptr)) {
      TraceManager::TraceRelease(trace->trace_, trace->trace_userp_);
    }
#endif  // TRITON_ENABLE_TRACING
  }
}

TRITONSERVER_Error*
SagemakerAPIServer::SageMakerMMECheckUnloadedModelIsUnavailable(
    const char* model_name, bool* is_model_unavailable)
{
  /* Use the RepositoryIndex API to check if the model state has become
  UNAVAILABLE i.e. model is no longer in the 'in-the-process-of' being
  UNLOADED. Consequently, the reason field should be 'unloaded'.*/
  TRITONSERVER_Message* server_model_index_message = nullptr;
  uint32_t ready_flag = 0;  // value of 1 should be set if only the 'ready'
                            // models are required from the index. In this case,
                            // we need all models.
  TRITONSERVER_ServerModelIndex(
      server_.get(), ready_flag, &server_model_index_message);

  std::shared_ptr<TRITONSERVER_Message> shared_ptr_msg(
      server_model_index_message,
      [](TRITONSERVER_Message* msg) { TRITONSERVER_MessageDelete(msg); });

  const char* index_buffer;
  size_t index_byte_size;

  RETURN_IF_ERR(TRITONSERVER_MessageSerializeToJson(
      server_model_index_message, &index_buffer, &index_byte_size));

  /* Read into json buffer*/
  triton::common::TritonJson::Value server_model_index_json;
  server_model_index_json.Parse(index_buffer, index_byte_size);

  const char* name;
  const char* state;
  const char* reason;
  const char* version;

  size_t name_len;
  size_t state_len;
  size_t reason_len;
  size_t version_len;

  for (size_t id = 0; id < server_model_index_json.ArraySize(); ++id) {
    triton::common::TritonJson::Value index_json;
    server_model_index_json.IndexAsObject(id, &index_json);

    RETURN_IF_ERR(index_json.MemberAsString("name", &name, &name_len));

    if (std::string(name) == std::string(model_name)) {
      RETURN_IF_ERR(index_json.MemberAsString("state", &state, &state_len));

      if (std::string(state) == UNLOAD_EXPECTED_STATE_) {
        RETURN_IF_ERR(
            index_json.MemberAsString("reason", &reason, &reason_len));

        if (std::string(reason) == UNLOAD_EXPECTED_REASON_) {
          *is_model_unavailable = true;

          RETURN_IF_ERR(
              index_json.MemberAsString("version", &version, &version_len));

          LOG_VERBOSE(1) << "Discovered model: " << name
                         << ", version: " << version << " in state: " << state
                         << " for the reason: " << reason;

          break;
        }
      }
    }
  }

  return nullptr;
}

void
SagemakerAPIServer::SageMakerMMEUnloadModel(
    evhtp_request_t* req, const char* model_name_hash)
{
  /* Extract targetModel to log the associated archive */
  const char* target_model =
      evhtp_kv_find(req->headers_in, "X-Amzn-SageMaker-Target-Model");

  /* If target_model is not available (e.g., in local testing) use
   * model_name_hash as target_model) */
  if (target_model == nullptr) {
    target_model = model_name_hash;
  }

  std::lock_guard<std::mutex> lock(models_list_mutex_);
  if (sagemaker_models_list_.find(model_name_hash) ==
      sagemaker_models_list_.end()) {
    LOG_VERBOSE(1) << "Model " << target_model << " with model hash "
                   << model_name_hash << " is not loaded." << std::endl;
    evhtp_send_reply(req, EVHTP_RES_NOTFOUND); /* 404*/
    return;
  }

  LOG_INFO << "Unloading SageMaker TargetModel: " << target_model << std::endl;

  auto start_time = std::chrono::high_resolution_clock::now();

  /* Always unload dependents as well - this is required to unload dependents in
   * ensemble */
  TRITONSERVER_Error* unload_err = nullptr;
  unload_err =
      TRITONSERVER_ServerUnloadModelAndDependents(server_.get(), target_model);

  if (unload_err != nullptr) {
    EVBufferAddErrorJson(req->buffer_out, unload_err);
    evhtp_send_reply(req, EVHTP_RES_BADREQ);

    LOG_ERROR
        << "Error when unloading SageMaker Model with dependents for model: "
        << target_model << std::endl;

    TRITONSERVER_ErrorDelete(unload_err);
    return;
  }

  /*Note: Model status check is repo-specific and therefore must be run before
   * unregistering the repo, else the model information is lost*/
  bool is_model_unavailable = false;
  int64_t unload_time_in_secs = 0;

  /* Wait for the model to be completely unloaded. SageMaker waits a maximum
  of 360 seconds for the UNLOAD request to timeout. Setting a limit of 350
  seconds for Triton unload. This should be run only if above UNLOAD call has
  succeeded.*/
  if (unload_err == nullptr) {
    LOG_VERBOSE(1) << "Using Model Repository Index during UNLOAD to check for "
                      "status of model hash: "
                   << model_name_hash << " for model: " << target_model;
    while (is_model_unavailable == false &&
           unload_time_in_secs < UNLOAD_TIMEOUT_SECS_) {
      LOG_VERBOSE(1) << "In the loop to wait for model to be unavailable";
      unload_err = SageMakerMMECheckUnloadedModelIsUnavailable(
          target_model, &is_model_unavailable);
      if (unload_err != nullptr) {
        LOG_ERROR << "Error: Received non-zero exit code on checking for "
                     "model unavailability. "
                  << TRITONSERVER_ErrorMessage(unload_err);
        break;
      }
      std::this_thread::sleep_for(
          std::chrono::milliseconds(UNLOAD_SLEEP_MILLISECONDS_));

      auto end_time = std::chrono::high_resolution_clock::now();

      unload_time_in_secs = std::chrono::duration_cast<std::chrono::seconds>(
                                end_time - start_time)
                                .count();
    }
    LOG_INFO << "UNLOAD for model " << target_model << " completed in "
             << unload_time_in_secs << " seconds.";
    TRITONSERVER_ErrorDelete(unload_err);
  }

  if ((is_model_unavailable == false) &&
      (unload_time_in_secs >= UNLOAD_TIMEOUT_SECS_)) {
    LOG_ERROR << "Error: UNLOAD did not complete within expected "
              << UNLOAD_TIMEOUT_SECS_
              << " seconds. This may "
                 "result in SageMaker UNLOAD timeout.";
  }

  std::string repo_parent_path = sagemaker_models_list_.at(model_name_hash);

  TRITONSERVER_Error* unregister_err = nullptr;

  unregister_err = TRITONSERVER_ServerUnregisterModelRepository(
      server_.get(), repo_parent_path.c_str());

  if (unregister_err != nullptr) {
    EVBufferAddErrorJson(req->buffer_out, unregister_err);
    evhtp_send_reply(req, EVHTP_RES_BADREQ);
    LOG_ERROR << "Unable to unregister model repository for path: "
              << repo_parent_path << std::endl;
  } else {
    evhtp_send_reply(req, EVHTP_RES_OK);
  }

  TRITONSERVER_ErrorDelete(unregister_err);

  sagemaker_models_list_.erase(model_name_hash);
}

void
SagemakerAPIServer::SageMakerMMEGetModel(
    evhtp_request_t* req, const char* model_name)
{
  std::lock_guard<std::mutex> lock(models_list_mutex_);

  if (sagemaker_models_list_.find(model_name) == sagemaker_models_list_.end()) {
    evhtp_send_reply(req, EVHTP_RES_NOTFOUND); /* 404*/
    return;
  }

  triton::common::TritonJson::Value sagemaker_get_json(
      triton::common::TritonJson::ValueType::OBJECT);

  sagemaker_get_json.AddString("modelName", model_name);
  sagemaker_get_json.AddString(
      "modelUrl", sagemaker_models_list_.at(model_name));

  const char* buffer;
  size_t byte_size;

  triton::common::TritonJson::WriteBuffer json_buffer_;
  json_buffer_.Clear();
  sagemaker_get_json.Write(&json_buffer_);

  byte_size = json_buffer_.Size();
  buffer = json_buffer_.Base();

  evbuffer_add(req->buffer_out, buffer, byte_size);
  evhtp_send_reply(req, EVHTP_RES_OK);
}

void
SagemakerAPIServer::SageMakerMMEListModel(evhtp_request_t* req)
{
  std::lock_guard<std::mutex> lock(models_list_mutex_);

  triton::common::TritonJson::Value sagemaker_list_json(
      triton::common::TritonJson::ValueType::OBJECT);

  triton::common::TritonJson::Value models_array(
      sagemaker_list_json, triton::common::TritonJson::ValueType::ARRAY);

  for (auto it = sagemaker_models_list_.begin();
       it != sagemaker_models_list_.end(); it++) {
    triton::common::TritonJson::Value model_url_pair(
        models_array, triton::common::TritonJson::ValueType::OBJECT);

    bool ready = false;
    TRITONSERVER_ServerModelIsReady(
        server_.get(), it->first.c_str(), 1, &ready);

    /* Add to return list only if model is ready to be served */
    if (ready) {
      model_url_pair.AddString("modelName", it->first);
      model_url_pair.AddString("modelUrl", it->second);
    }

    models_array.Append(std::move(model_url_pair));
  }

  sagemaker_list_json.Add("models", std::move(models_array));

  const char* buffer;
  size_t byte_size;

  triton::common::TritonJson::WriteBuffer json_buffer_;
  json_buffer_.Clear();
  sagemaker_list_json.Write(&json_buffer_);

  byte_size = json_buffer_.Size();
  buffer = json_buffer_.Base();

  evbuffer_add(req->buffer_out, buffer, byte_size);
  evhtp_send_reply(req, EVHTP_RES_OK);
}

bool
SagemakerAPIServer::SageMakerMMECheckOOMError(TRITONSERVER_Error* err)
{
  const char* message = TRITONSERVER_ErrorMessage(err);
  std::string error_string(message);

  LOG_VERBOSE(1) << "Logging Verbose Error: " << std::endl
                 << error_string.c_str() << std::endl;

  const std::vector<std::string> error_messages{
      "CUDA out of memory", /* pytorch */
      "CUDA_OUT_OF_MEMORY", /* tensorflow */
      "Out of memory",      /* generic */
      "Out Of Memory",
      "out of memory",
      "MemoryError",
      "OutOfMemory",
      "OOM",
      "Dst tensor is not initialized",
      "Src tensor is not initialized",
      "CNMEM_STATUS_OUT_OF_MEMORY",
      "CUDNN_STATUS_NOT_INITIALIZED",
      "CUBLAS_STATUS_ALLOC_FAILED",
      "CUBLAS_STATUS_NOT_INITIALIZED",
      "Failed to allocate memory",
      "failed to allocate memory",
      "No space left on device"};

  /*
    TODO: Improve the search to do pattern match on whole words only
  */
  for (long unsigned int i = 0; i < error_messages.size(); i++) {
    if (error_string.find(error_messages[i]) != std::string::npos) {
      LOG_VERBOSE(1) << "OOM string '" << error_messages[i].c_str()
                     << "' detected in logs.";
      return true;
    }
  }

  return false;
}

void
SagemakerAPIServer::SageMakerMMEHandleOOMError(
    evhtp_request_t* req, TRITONSERVER_Error* err)
{
  EVBufferAddErrorJson(req->buffer_out, err);

  if (SageMakerMMECheckOOMError(err) == true) {
    /* Return a 507*/
    evhtp_send_reply(req, 507);
    LOG_VERBOSE(1)
        << "Received an OOM error during LOAD MODEL. Returning a 507.";
    return;
  }
  /* Return a 400*/
  evhtp_send_reply(req, EVHTP_RES_BADREQ);
  return;
}


void
SagemakerAPIServer::SageMakerMMELoadModel(
    evhtp_request_t* req,
    const std::unordered_map<std::string, std::string> parse_map)
{
  std::string url_string = parse_map.at("url");
  std::string model_name_hash = parse_map.at("model_name_hash");
  std::string target_model = parse_map.at("target_model");

  std::filesystem::path url_path(url_string);
  url_path = std::filesystem::absolute(
      url_path.lexically_normal());  // Normalize the path to remove any
                                     // redundant components.
  std::string url_abspath = url_path.string();

  if (url_abspath.find("/dev/") == 0 || url_abspath.find("/proc/") == 0 ||
      url_abspath.find("/sys/") == 0) {
    LOG_ERROR << "Invalid repository path: " << url_string
              << ". \"url\" property of `parse_map`cannot start with /dev/, "
                 "/proc/, or /sys/."
              << std::endl;
    evhtp_send_reply(req, EVHTP_RES_BADREQ);
    return;
  }

  /* Check subdirs for models and find ensemble model within the url_abspath
   * If only 1 model, that will be selected as model_subdir
   * Else ensemble model directory is set as model_subdir
   */
  DIR* dir;
  struct dirent* ent;
  int dir_count = 0;
  std::string model_subdir, ensemble_model_subdir;

  if ((dir = opendir(url_abspath.c_str())) != NULL) {
    std::shared_ptr<DIR> dir_ptr{dir, closedir};
    while ((ent = readdir(dir)) != NULL) {
      if ((ent->d_type == DT_DIR) && (!strcmp(ent->d_name, ".") == 0) &&
          (!strcmp(ent->d_name, "..") == 0)) {
        dir_count += 1;
        model_subdir = std::string(ent->d_name);
      }

      if (dir_count >= 2) {
        LOG_VERBOSE(1) << "More than one model detected in archive. "
                          "Checking if it is an ensemble."
                       << std::endl;
      }

      LOG_VERBOSE(1) << "Reading model sub-directory: " << model_subdir.c_str()
                     << std::endl;

      // Read the config.pbtxt file at each path, if available
      std::string ensemble_config_path =
          url_abspath + "/" + model_subdir + "/config.pbtxt";
      std::ifstream config_fstream(ensemble_config_path);
      std::stringstream ensemble_config_content;

      if (config_fstream.is_open()) {
        ensemble_config_content << config_fstream.rdbuf();
      } else {
        continue;  // A valid config.pbtxt does not exist at this path, or
                   // cannot be read
      }

      /* Compare matched string with `platform: "ensemble"` or
       * `platform:"ensemble"`. If present, we break, and use the model_subdir
       * to load the ensemble model
       */
      std::string detected_ensemble_regex;
      if (RE2::PartialMatch(
              ensemble_config_content.str(), platform_ensemble_regex_,
              &detected_ensemble_regex)) {
        LOG_INFO << "SageMaker front-end detected an Ensemble config at path: "
                 << ensemble_config_path << std::endl;
        ensemble_model_subdir = model_subdir;
      }

      if (dir_count > 5) {
        LOG_WARNING
            << "Several model directories found. If using ensemble, smaller "
               "ensembles are recommended for better memory management."
            << std::endl;
      }
    }
  }

  if (!strcmp(ensemble_model_subdir.c_str(), "") == 0) {
    model_subdir = ensemble_model_subdir;
  }

  std::vector<const TRITONSERVER_Parameter*> subdir_modelname_map;

  /* Split repo path into three parts:
   * /opt/ml/models/<hash>/model/optional_customer_subdir
   * 1st repo_parent_path: /opt/ml/models/<hash>
   * 2nd subdir: model
   * 3rd customer_subdir: optional_customer_subdir
   */

  std::string repo_parent_path, subdir, customer_subdir;
  RE2::FullMatch(
      url_abspath, model_path_regex_, &repo_parent_path, &subdir,
      &customer_subdir);

  std::string config_path = url_abspath + "/config.pbtxt";
  struct stat buffer;

  /* If config.pbtxt is at repo root,
   * then repo_parent_path = /opt/ml/models/<hash>/, and model_subdir = model
   * else repo_parent_path = /opt/ml/models/<hash>/model and
   * model_subdir = dir under model/
   */
  if (stat(config_path.c_str(), &buffer) == 0) {
    model_subdir = subdir;
  } else {
    repo_parent_path = url_abspath;
  }

  auto param = TRITONSERVER_ParameterNew(
      model_subdir.c_str(), TRITONSERVER_PARAMETER_STRING,
      target_model.c_str());

  if (param != nullptr) {
    subdir_modelname_map.emplace_back(param);
  } else {
    HTTP_RESPOND_IF_ERR(
        req, TRITONSERVER_ErrorNew(
                 TRITONSERVER_ERROR_INTERNAL,
                 "unexpected error on creating Triton parameter"));
  }

  /* Register repository with model mapping */
  TRITONSERVER_Error* err = nullptr;
  err = TRITONSERVER_ServerRegisterModelRepository(
      server_.get(), repo_parent_path.c_str(), subdir_modelname_map.data(),
      subdir_modelname_map.size());

  TRITONSERVER_ParameterDelete(param);

  // If a model_name is reused i.e. model_name is already mapped, return a 409
  if ((err != nullptr) &&
      (TRITONSERVER_ErrorCode(err) == TRITONSERVER_ERROR_ALREADY_EXISTS)) {
    EVBufferAddErrorJson(req->buffer_out, err);
    evhtp_send_reply(req, EVHTP_RES_CONFLICT); /* 409 */
    TRITONSERVER_ErrorDelete(err);
    return;
  } else if (err != nullptr) {
    EVBufferAddErrorJson(req->buffer_out, err);
    evhtp_send_reply(req, EVHTP_RES_BADREQ);
    TRITONSERVER_ErrorDelete(err);
    return;
  }

  err = TRITONSERVER_ServerLoadModel(server_.get(), target_model.c_str());

  /* Unlikely after duplicate repo check, but in case Load Model also returns
   * ALREADY_EXISTS error */
  if ((err != nullptr) &&
      (TRITONSERVER_ErrorCode(err) == TRITONSERVER_ERROR_ALREADY_EXISTS)) {
    EVBufferAddErrorJson(req->buffer_out, err);
    evhtp_send_reply(req, EVHTP_RES_CONFLICT); /* 409 */
    TRITONSERVER_ErrorDelete(err);
    return;
  } else if (err != nullptr) {
    SageMakerMMEHandleOOMError(req, err);
  } else {
    std::lock_guard<std::mutex> lock(models_list_mutex_);

    /* Use model name hash as expected in SageMaker MME contract */
    sagemaker_models_list_.emplace(model_name_hash, repo_parent_path);
    evhtp_send_reply(req, EVHTP_RES_OK);
  }

  /* Unregister model repository in case of load failure*/
  if (err != nullptr) {
    err = TRITONSERVER_ServerUnregisterModelRepository(
        server_.get(), repo_parent_path.c_str());
    LOG_VERBOSE(1)
        << "Unregistered model repository due to load failure for model: "
        << target_model << std::endl;
  }

  if (err != nullptr) {
    EVBufferAddErrorJson(req->buffer_out, err);
    evhtp_send_reply(req, EVHTP_RES_BADREQ);
    TRITONSERVER_ErrorDelete(err);
  }

  return;
}
}}  // namespace triton::server


================================================
FILE: src/sagemaker_server.h
================================================
// Copyright 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#pragma once

#include <sys/stat.h>

#include <fstream>
#include <mutex>

#include "common.h"
#include "dirent.h"
#include "http_server.h"
#include "triton/core/tritonserver.h"

namespace triton { namespace server {

// Handle Sagemaker HTTP requests to inference server APIs
class SagemakerAPIServer : public HTTPAPIServer {
 public:
  static TRITONSERVER_Error* Create(
      const std::shared_ptr<TRITONSERVER_Server>& server,
      triton::server::TraceManager* trace_manager,
      const std::shared_ptr<SharedMemoryManager>& smb_manager,
      const int32_t port, const std::string address, const int thread_cnt,
      const size_t max_input_size, const RestrictedFeatures& restricted_apis,
      std::unique_ptr<HTTPServer>* sagemaker_server);

  class SagemakeInferRequestClass : public InferRequestClass {
   public:
    explicit SagemakeInferRequestClass(
        TRITONSERVER_Server* server, evhtp_request_t* req,
        DataCompressor::Type response_compression_type,
        const std::shared_ptr<TRITONSERVER_InferenceRequest>& triton_request,
        const std::shared_ptr<SharedMemoryManager>& shm_manager)
        : InferRequestClass(
              server, req, response_compression_type, triton_request,
              shm_manager)
    {
    }
    using InferRequestClass::InferResponseComplete;
    static void InferResponseComplete(
        TRITONSERVER_InferenceResponse* response, const uint32_t flags,
        void* userp);

    void SetResponseHeader(
        const bool has_binary_data, const size_t header_length) override;
  };

 private:
  explicit SagemakerAPIServer(
      const std::shared_ptr<TRITONSERVER_Server>& server,
      triton::server::TraceManager* trace_manager,
      const std::shared_ptr<SharedMemoryManager>& shm_manager,
      const int32_t port, const std::string address, const int thread_cnt,
      const size_t max_input_size, const RestrictedFeatures& restricted_apis)
      : HTTPAPIServer(
            server, trace_manager, shm_manager, port, false /* reuse_port */,
            address, "" /* header_forward_pattern */, thread_cnt,
            max_input_size, restricted_apis),
        ping_regex_(R"(/ping)"), invocations_regex_(R"(/invocations)"),
        models_regex_(R"(/models(?:/)?([^/]+)?(/invoke)?)"),
        model_path_regex_(
            R"((\/opt\/ml\/models\/[0-9A-Za-z._]+)\/(model)\/?([0-9A-Za-z._]+)?)"),
        platform_ensemble_regex_(R"(platform:(\s)*\"ensemble\")"),
        ping_mode_(GetEnvironmentVariableOrDefault(
            "SAGEMAKER_TRITON_PING_MODE", "ready")),
        model_name_(GetEnvironmentVariableOrDefault(
            "SAGEMAKER_TRITON_DEFAULT_MODEL_NAME",
            "unspecified_SAGEMAKER_TRITON_DEFAULT_MODEL_NAME")),
        model_version_str_(""), inference_type_(GetEnvironmentVariableOrDefault(
                                    "SAGEMAKER_TRITON_INFERENCE_TYPE", "infer"))
  {
  }

  void ParseSageMakerRequest(
      evhtp_request_t* req,
      std::unordered_map<std::string, std::string>* parse_map,
      const std::string& action);

  void SageMakerMMEHandleInfer(
      evhtp_request_t* req, const std::string& model_name,
      const std::string& model_version_str);

  void SageMakerMMELoadModel(
      evhtp_request_t* req,
      const std::unordered_map<std::string, std::string> parse_map);

  void SageMakerMMEHandleOOMError(
      evhtp_request_t* req, TRITONSERVER_Error* load_err);

  static bool SageMakerMMECheckOOMError(TRITONSERVER_Error* load_err);

  void SageMakerMMEUnloadModel(evhtp_request_t* req, const char* model_name);

  TRITONSERVER_Error* SageMakerMMECheckUnloadedModelIsUnavailable(
      const char* model_name, bool* is_model_unavailable);

  void SageMakerMMEListModel(evhtp_request_t* req);

  void SageMakerMMEGetModel(evhtp_request_t* req, const char* model_name);

  void Handle(evhtp_request_t* req) override;

  std::unique_ptr<InferRequestClass> CreateInferRequest(
      evhtp_request_t* req,
      const std::shared_ptr<TRITONSERVER_InferenceRequest>& triton_request)
      override
  {
    return std::unique_ptr<InferRequestClass>(new SagemakeInferRequestClass(
        server_.get(), req, GetResponseCompressionType(req), triton_request,
        shm_manager_));
  }
  TRITONSERVER_Error* GetInferenceHeaderLength(
      evhtp_request_t* req, int32_t content_length,
      size_t* header_length) override;


  // Currently the compression schema hasn't been defined,
  // assume identity compression type is used for both request and response
  DataCompressor::Type GetRequestCompressionType(evhtp_request_t* req) override
  {
    return DataCompressor::Type::IDENTITY;
  }
  DataCompressor::Type GetResponseCompressionType(evhtp_request_t* req) override
  {
    return DataCompressor::Type::IDENTITY;
  }
  re2::RE2 ping_regex_;
  re2::RE2 invocations_regex_;
  re2::RE2 models_regex_;
  re2::RE2 model_path_regex_;
  re2::RE2 platform_ensemble_regex_;

  const std::string ping_mode_;

  /* For single model mode, assume that only one version of "model" is presented
   */
  const std::string model_name_;
  const std::string model_version_str_;

  static const std::string binary_mime_type_;

  // Triton HTTP handler to map Sagemaker /invocations route to: "infer",
  // "generate", or "generate_stream". The type is defined in the environment
  // variable SAGEMAKER_TRITON_INFERENCE_TYPE and the default value is "infer".
  const std::string inference_type_;

  /* Maintain list of loaded models */
  std::unordered_map<std::string, std::string> sagemaker_models_list_;

  /* Mutex to handle concurrent updates */
  std::mutex models_list_mutex_;

  /* Constants */
  const uint32_t UNLOAD_TIMEOUT_SECS_ = 350;
  const uint32_t UNLOAD_SLEEP_MILLISECONDS_ = 500;
  const std::string UNLOAD_EXPECTED_STATE_ = "UNAVAILABLE";
  const std::string UNLOAD_EXPECTED_REASON_ = "unloaded";
};

}}  // namespace triton::server


================================================
FILE: src/shared_memory_manager.cc
================================================
// Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "shared_memory_manager.h"

// Not supporting shared memory for now
#ifdef _WIN32
namespace triton { namespace server {
SharedMemoryManager::~SharedMemoryManager() {}

TRITONSERVER_Error*
SharedMemoryManager::RegisterSystemSharedMemory(
    const std::string& name, const std::string& shm_key, const size_t offset,
    const size_t byte_size)
{
  return TRITONSERVER_ErrorNew(
      TRITONSERVER_ERROR_UNSUPPORTED,
      std::string("Shared memory feature is currently not supported on Windows")
          .c_str());
}

#ifdef TRITON_ENABLE_GPU
TRITONSERVER_Error*
SharedMemoryManager::RegisterCUDASharedMemory(
    const std::string& name, const cudaIpcMemHandle_t* cuda_shm_handle,
    const size_t byte_size, const int device_id)
{
  return TRITONSERVER_ErrorNew(
      TRITONSERVER_ERROR_UNSUPPORTED,
      std::string("Shared memory feature is currently not supported on Windows")
          .c_str());
}

TRITONSERVER_Error*
SharedMemoryManager::GetCUDAHandle(
    const std::string& name, cudaIpcMemHandle_t** cuda_mem_handle)
{
  return TRITONSERVER_ErrorNew(
      TRITONSERVER_ERROR_UNSUPPORTED,
      std::string("Shared memory feature is currently not supported on Windows")
          .c_str());
}
#endif  // TRITON_ENABLE_GPU

TRITONSERVER_Error*
SharedMemoryManager::GetMemoryInfo(
    const std::string& name, size_t offset, size_t byte_size,
    void** shm_mapped_addr, TRITONSERVER_MemoryType* memory_type,
    int64_t* device_id,
    std::shared_ptr<const SharedMemoryManager::SharedMemoryInfo>* shm_info)
{
  return TRITONSERVER_ErrorNew(
      TRITONSERVER_ERROR_UNSUPPORTED,
      std::string("Shared memory feature is currently not supported on Windows")
          .c_str());
}

TRITONSERVER_Error*
SharedMemoryManager::GetStatus(
    const std::string& name, TRITONSERVER_MemoryType memory_type,
    triton::common::TritonJson::Value* shm_status)
{
  return TRITONSERVER_ErrorNew(
      TRITONSERVER_ERROR_UNSUPPORTED,
      std::string("Shared memory feature is currently not supported on Windows")
          .c_str());
}

TRITONSERVER_Error*
SharedMemoryManager::Unregister(
    const std::string& name, TRITONSERVER_MemoryType memory_type)
{
  return TRITONSERVER_ErrorNew(
      TRITONSERVER_ERROR_UNSUPPORTED,
      std::string("Shared memory feature is currently not supported on Windows")
          .c_str());
}

TRITONSERVER_Error*
SharedMemoryManager::UnregisterAll(TRITONSERVER_MemoryType memory_type)
{
  return TRITONSERVER_ErrorNew(
      TRITONSERVER_ERROR_UNSUPPORTED,
      std::string("Shared memory feature is currently not supported on Windows")
          .c_str());
}

TRITONSERVER_Error*
SharedMemoryManager::UnregisterHelper(
    const std::string& name, TRITONSERVER_MemoryType memory_type)
{
  return TRITONSERVER_ErrorNew(
      TRITONSERVER_ERROR_UNSUPPORTED,
      std::string("Shared memory feature is currently not supported on Windows")
          .c_str());
}
}}  // namespace triton::server
#else
#include <errno.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>

#include "common.h"
#include "triton/common/logging.h"

namespace triton { namespace server {

namespace {

TRITONSERVER_Error*
OpenSharedMemoryRegion(const std::string& shm_key, int* shm_fd)
{
  // get shared memory region descriptor
  *shm_fd = shm_open(shm_key.c_str(), O_RDWR, S_IRUSR | S_IWUSR);
  if (*shm_fd == -1) {
    LOG_VERBOSE(1) << "shm_open failed, errno: " << errno;
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INTERNAL,
        std::string("Unable to open shared memory region: '" + shm_key + "'")
            .c_str());
  }

  return nullptr;
}

TRITONSERVER_Error*
GetSharedMemoryRegionSize(
    const std::string& shm_key, int shm_fd, size_t* shm_region_size)
{
  struct stat file_status;
  if (fstat(shm_fd, &file_status) == -1) {
    LOG_VERBOSE(1) << "fstat on shm_fd failed, errno: " << errno;
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INTERNAL,
        std::string("Invalid shared memory region: '" + shm_key + "'").c_str());
  }

  // According to POSIX standard, type off_t can be negative, so for sake of
  // catching possible under/overflows, assert that the size is non-negative.
  if (file_status.st_size < 0) {
    LOG_VERBOSE(1) << "File size of shared memory region must be non-negative";
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INTERNAL,
        std::string("Invalid shared memory region: '" + shm_key + "'").c_str());
  }

  *shm_region_size = static_cast<size_t>(file_status.st_size);
  return nullptr;  // success
}

TRITONSERVER_Error*
CheckSharedMemoryRegionSize(
    const std::string& name, const std::string& shm_key, int shm_fd,
    size_t offset, size_t byte_size)
{
  size_t shm_region_size = 0;
  RETURN_IF_ERR(GetSharedMemoryRegionSize(shm_key, shm_fd, &shm_region_size));
  // User-provided offset and byte_size should not go out-of-bounds.
  if ((offset + byte_size) > shm_region_size) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INVALID_ARG,
        std::string(
            "failed to register shared memory region '" + name +
            "': invalid args")
            .c_str());
  }

  return nullptr;  // success
}

TRITONSERVER_Error*
MapSharedMemory(
    const int shm_fd, const size_t offset, const size_t byte_size,
    void** mapped_addr)
{
  // map shared memory to process address space
  *mapped_addr =
      mmap(NULL, byte_size, PROT_WRITE | PROT_READ, MAP_SHARED, shm_fd, offset);
  if (*mapped_addr == MAP_FAILED) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INTERNAL, std::string(
                                         "unable to process address space" +
                                         std::string(std::strerror(errno)))
                                         .c_str());
  }

  return nullptr;
}

TRITONSERVER_Error*
CloseSharedMemoryRegion(int shm_fd)
{
  int status = close(shm_fd);
  if (status == -1) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INTERNAL,
        std::string(
            "unable to close shared memory descriptor, errno: " +
            std::string(std::strerror(errno)))
            .c_str());
  }

  return nullptr;
}

TRITONSERVER_Error*
UnmapSharedMemory(void* mapped_addr, size_t byte_size)
{
  int status = munmap(mapped_addr, byte_size);
  if (status == -1) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INTERNAL,
        std::string(
            "unable to munmap shared memory region, errno: " +
            std::string(std::strerror(errno)))
            .c_str());
  }

  return nullptr;
}

#ifdef TRITON_ENABLE_GPU
TRITONSERVER_Error*
OpenCudaIPCRegion(
    const cudaIpcMemHandle_t* cuda_shm_handle, void** data_ptr, int device_id)
{
  // Set to device curres
  cudaSetDevice(device_id);

  // Open CUDA IPC handle and read data from it
  cudaError_t err = cudaIpcOpenMemHandle(
      data_ptr, *cuda_shm_handle, cudaIpcMemLazyEnablePeerAccess);
  if (err != cudaSuccess) {
    // Log detailed error message and send generic error to client
    LOG_ERROR << "failed to open CUDA IPC handle: " << cudaGetErrorString(err);
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INVALID_ARG,
        std::string("failed to register shared memory region: invalid args")
            .c_str());
  }

  return nullptr;
}

// Using `cudaGetDriverEntryPoint` from CUDA runtime API to get CUDA driver
// entry point. This approach is used to avoid linking against CUDA driver
// library so that when Triton is built with GPU support, it can still be run on
// CPU-only environments.
TRITONSERVER_Error*
GetCudaDriverEntryPoint(const char* name, void** func_ptr)
{
  cudaError_t err = cudaGetDriverEntryPoint(name, func_ptr, cudaEnableDefault);
  if (err != cudaSuccess) {
    LOG_ERROR << "Failed to get CUDA driver entry point for " << name << ": "
              << cudaGetErrorString(err);
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INTERNAL,
        std::string("Failed to get CUDA driver entry point").c_str());
  }
  return nullptr;
}

TRITONSERVER_Error*
GetCudaSharedMemoryRegionSize(CUdeviceptr data_ptr, size_t& shm_region_size)
{
  void* cu_mem_get_address_range = nullptr;
  void* cu_get_error_string = nullptr;
  RETURN_IF_ERR(GetCudaDriverEntryPoint(
      "cuMemGetAddressRange", &cu_mem_get_address_range));
  RETURN_IF_ERR(
      GetCudaDriverEntryPoint("cuGetErrorString", &cu_get_error_string));

  CUdeviceptr* base = nullptr;
  CUresult result = ((
      CUresult(*)(CUdeviceptr*, size_t*, CUdeviceptr))cu_mem_get_address_range)(
      base, &shm_region_size, data_ptr);
  if (result != CUDA_SUCCESS) {
    const char* errorString;
    if (((CUresult(*)(CUresult, const char**))cu_get_error_string)(
            result, &errorString) != CUDA_SUCCESS) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INTERNAL, "Failed to get CUDA error string");
    }
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INTERNAL,
        std::string(
            "Failed to get CUDA address range: " + std::string(errorString))
            .c_str());
  }
  return nullptr;
}

TRITONSERVER_Error*
CheckCudaSharedMemoryRegionSize(
    const std::string& name, CUdeviceptr data_ptr, size_t byte_size)
{
  size_t shm_region_size = 0;
  auto err = GetCudaSharedMemoryRegionSize(data_ptr, shm_region_size);

  // User-provided offset and byte_size should not go out-of-bounds.
  if (err != nullptr || byte_size > shm_region_size) {
    if (err != nullptr) {
      // Log detailed error message and send generic error to client
      LOG_ERROR << TRITONSERVER_ErrorMessage(err);
      TRITONSERVER_ErrorDelete(err);
    }
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INVALID_ARG,
        std::string(
            "failed to register shared memory region '" + name +
            "': invalid args")
            .c_str());
  }

  return nullptr;
}
#endif  // TRITON_ENABLE_GPU

}  // namespace

SharedMemoryManager::~SharedMemoryManager()
{
  UnregisterAll(TRITONSERVER_MEMORY_CPU);
  UnregisterAll(TRITONSERVER_MEMORY_GPU);
}

TRITONSERVER_Error*
SharedMemoryManager::RegisterSystemSharedMemory(
    const std::string& name, const std::string& shm_key, const size_t offset,
    const size_t byte_size)
{
  // Check if the shared memory key starts with the reserved prefix
  RETURN_IF_ERR(ValidateSharedMemoryKey(name, shm_key));

  std::lock_guard<std::mutex> lock(mu_);

  if (shared_memory_map_.find(name) != shared_memory_map_.end()) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_ALREADY_EXISTS,
        std::string("shared memory region '" + name + "' already in manager")
            .c_str());
  }

  // register
  void* mapped_addr;
  int shm_fd = -1;

  // don't re-open if shared memory is already open
  for (auto itr = shared_memory_map_.begin(); itr != shared_memory_map_.end();
       ++itr) {
    if (itr->second->shm_key_ == shm_key) {
      // FIXME: Consider invalid file descriptors after close
      shm_fd = itr->second->shm_fd_;
      break;
    }
  }

  // open and set new shm_fd if new shared memory key
  if (shm_fd == -1) {
    RETURN_IF_ERR(OpenSharedMemoryRegion(shm_key, &shm_fd));
  }

  // Enforce that registered region is in-bounds of shm file object.
  RETURN_IF_ERR(
      CheckSharedMemoryRegionSize(name, shm_key, shm_fd, offset, byte_size));

  // Mmap and then close the shared memory descriptor
  TRITONSERVER_Error* err_mmap =
      MapSharedMemory(shm_fd, offset, byte_size, &mapped_addr);
  TRITONSERVER_Error* err_close = CloseSharedMemoryRegion(shm_fd);
  if (err_mmap != nullptr) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INVALID_ARG,
        std::string(
            "failed to register shared memory region '" + name +
            "': " + TRITONSERVER_ErrorMessage(err_mmap))
            .c_str());
  }

  if (err_close != nullptr) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INVALID_ARG,
        std::string(
            "failed to register shared memory region '" + name +
            "': " + TRITONSERVER_ErrorMessage(err_close))
            .c_str());
  }

  shared_memory_map_.insert(std::make_pair(
      name, std::make_shared<SharedMemoryManager::SharedMemoryInfo>(
                name, shm_key, offset, byte_size, shm_fd, mapped_addr,
                TRITONSERVER_MEMORY_CPU, 0)));

  return nullptr;  // success
}

#ifdef TRITON_ENABLE_GPU
TRITONSERVER_Error*
SharedMemoryManager::RegisterCUDASharedMemory(
    const std::string& name, const cudaIpcMemHandle_t* cuda_shm_handle,
    const size_t byte_size, const int device_id)
{
  // Serialize all operations that write/read current shared memory regions
  std::lock_guard<std::mutex> lock(mu_);

  // If name is already in shared_memory_map_ then return error saying already
  // registered
  if (shared_memory_map_.find(name) != shared_memory_map_.end()) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_ALREADY_EXISTS,
        std::string("shared memory region '" + name + "' already in manager")
            .c_str());
  }

  // register
  void* mapped_addr;

  // Get CUDA shared memory base address
  RETURN_IF_ERR(OpenCudaIPCRegion(cuda_shm_handle, &mapped_addr, device_id));

  // Enforce that registered region is in-bounds of shm file object.
  RETURN_IF_ERR(CheckCudaSharedMemoryRegionSize(
      name, reinterpret_cast<CUdeviceptr>(mapped_addr), byte_size));

  shared_memory_map_.insert(std::make_pair(
      name, std::make_shared<SharedMemoryManager::CUDASharedMemoryInfo>(
                name, "", 0, byte_size, 0, mapped_addr, TRITONSERVER_MEMORY_GPU,
                device_id, cuda_shm_handle)));

  return nullptr;  // success
}
#endif  // TRITON_ENABLE_GPU

TRITONSERVER_Error*
SharedMemoryManager::GetMemoryInfo(
    const std::string& name, size_t offset, size_t byte_size,
    void** shm_mapped_addr, TRITONSERVER_MemoryType* memory_type,
    int64_t* device_id,
    std::shared_ptr<const SharedMemoryManager::SharedMemoryInfo>* shm_info)
{
  // protect shared_memory_map_ from concurrent access
  std::lock_guard<std::mutex> lock(mu_);

  auto it = shared_memory_map_.find(name);
  if (it == shared_memory_map_.end()) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_NOT_FOUND,
        std::string("Unable to find shared memory region: '" + name + "'")
            .c_str());
  }

  // validate offset
  size_t shm_region_size = 0;
  if (it->second->byte_size_ > 0) {
    shm_region_size += it->second->byte_size_;
  }
  if (offset >= shm_region_size) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INVALID_ARG,
        std::string("Invalid offset for shared memory region: '" + name + "'")
            .c_str());
  }

  // Check for potential integer overflow before validating bounds
  if (byte_size > (SIZE_MAX - offset)) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INVALID_ARG,
        std::string(
            "Integer overflow detected: byte_size (" +
            std::to_string(byte_size) + ") + offset (" +
            std::to_string(offset) + ") exceeds maximum value (" +
            std::to_string(SIZE_MAX) + ") for region '" + name + "'")
            .c_str());
  }

  // validate byte_size + offset is within memory bounds
  size_t total_req_shm = offset + byte_size;
  if (total_req_shm > shm_region_size) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INVALID_ARG,
        std::string(
            "Invalid offset + byte size for shared memory region: '" + name +
            "'")
            .c_str());
  }

  if (shm_info != nullptr) {
    *shm_info = std::static_pointer_cast<const SharedMemoryInfo>(it->second);
  }

  *shm_mapped_addr = (void*)((uint8_t*)it->second->mapped_addr_ + offset);

  *memory_type = it->second->kind_;
  *device_id = it->second->device_id_;

  return nullptr;
}

#ifdef TRITON_ENABLE_GPU
TRITONSERVER_Error*
SharedMemoryManager::GetCUDAHandle(
    const std::string& name, cudaIpcMemHandle_t** cuda_mem_handle)
{
  // protect shared_memory_map_ from concurrent access
  std::lock_guard<std::mutex> lock(mu_);

  auto it = shared_memory_map_.find(name);
  if (it == shared_memory_map_.end()) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_NOT_FOUND,
        std::string("Unable to find shared memory region: '" + name + "'")
            .c_str());
  }
  CUDASharedMemoryInfo& shm_info =
      reinterpret_cast<CUDASharedMemoryInfo&>(*(it->second));
  *cuda_mem_handle = &(shm_info.cuda_ipc_handle_);

  return nullptr;
}
#endif

TRITONSERVER_Error*
SharedMemoryManager::GetStatus(
    const std::string& name, TRITONSERVER_MemoryType memory_type,
    triton::common::TritonJson::Value* shm_status)
{
  std::lock_guard<std::mutex> lock(mu_);

  if (name.empty()) {
    for (const auto& shm_info : shared_memory_map_) {
      if (shm_info.second->kind_ == memory_type) {
        triton::common::TritonJson::Value shm_region(
            *shm_status, triton::common::TritonJson::ValueType::OBJECT);
        RETURN_IF_ERR(shm_region.AddString(
            "name", shm_info.first.c_str(), shm_info.first.size()));
        if (memory_type == TRITONSERVER_MEMORY_CPU) {
          RETURN_IF_ERR(shm_region.AddString(
              "key", shm_info.second->shm_key_.c_str(),
              shm_info.second->shm_key_.size()));
          RETURN_IF_ERR(shm_region.AddUInt("offset", shm_info.second->offset_));
        } else {
          RETURN_IF_ERR(
              shm_region.AddUInt("device_id", shm_info.second->device_id_));
        }
        RETURN_IF_ERR(
            shm_region.AddUInt("byte_size", shm_info.second->byte_size_));
        RETURN_IF_ERR(shm_status->Append(std::move(shm_region)));
      }
    }
  } else {
    auto it = shared_memory_map_.find(name);
    if (it == shared_memory_map_.end()) {
      if (memory_type == TRITONSERVER_MEMORY_GPU) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_NOT_FOUND,
            std::string(
                "Unable to find cuda shared memory region: '" + name + "'")
                .c_str());
      } else {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_NOT_FOUND,
            std::string(
                "Unable to find system shared memory region: '" + name + "'")
                .c_str());
      }
    }

    if (it->second->kind_ != memory_type) {
      if (it->second->kind_ == TRITONSERVER_MEMORY_GPU) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_NOT_FOUND,
            std::string(
                "The region named '" + name +
                "' is registered as CUDA shared "
                "memory, not system shared memory")
                .c_str());
      } else {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_NOT_FOUND,
            std::string(
                "The region named '" + name +
                "' is registered as system shared "
                "memory, not CUDA shared memory")
                .c_str());
      }
    }

    triton::common::TritonJson::Value shm_region(
        *shm_status, triton::common::TritonJson::ValueType::OBJECT);
    RETURN_IF_ERR(shm_region.AddString(
        "name", it->second->name_.c_str(), it->second->name_.size()));
    if (memory_type == TRITONSERVER_MEMORY_CPU) {
      RETURN_IF_ERR(shm_region.AddString(
          "key", it->second->shm_key_.c_str(), it->second->shm_key_.size()));
      RETURN_IF_ERR(shm_region.AddUInt("offset", it->second->offset_));
    } else {
      RETURN_IF_ERR(shm_region.AddUInt("device_id", it->second->device_id_));
    }
    RETURN_IF_ERR(shm_region.AddUInt("byte_size", it->second->byte_size_));
    RETURN_IF_ERR(shm_status->Append(std::move(shm_region)));
  }

  return nullptr;  // success
}

TRITONSERVER_Error*
SharedMemoryManager::Unregister(
    const std::string& name, TRITONSERVER_MemoryType memory_type)
{
  // Serialize all operations that write/read current shared memory regions
  std::lock_guard<std::mutex> lock(mu_);

  return UnregisterHelper(name, memory_type);
}

TRITONSERVER_Error*
SharedMemoryManager::UnregisterAll(TRITONSERVER_MemoryType memory_type)
{
  std::lock_guard<std::mutex> lock(mu_);
  std::string error_message = "Failed to unregister the following ";
  std::vector<std::string> unregister_fails;
  if (memory_type == TRITONSERVER_MEMORY_CPU) {
    // Serialize all operations that write/read current shared memory regions
    error_message += "system shared memory regions: ";
    for (auto it = shared_memory_map_.cbegin(), next_it = it;
         it != shared_memory_map_.cend(); it = next_it) {
      ++next_it;
      if (it->second->kind_ == TRITONSERVER_MEMORY_CPU) {
        TRITONSERVER_Error* err = UnregisterHelper(it->first, memory_type);
        if (err != nullptr) {
          unregister_fails.push_back(it->first);
          LOG_VERBOSE(1) << TRITONSERVER_ErrorMessage(err);
        }
      }
    }
  } else if (memory_type == TRITONSERVER_MEMORY_GPU) {
    error_message += "cuda shared memory regions: ";
    for (auto it = shared_memory_map_.cbegin(), next_it = it;
         it != shared_memory_map_.cend(); it = next_it) {
      ++next_it;
      if (it->second->kind_ == TRITONSERVER_MEMORY_GPU) {
        TRITONSERVER_Error* err = UnregisterHelper(it->first, memory_type);
        if (err != nullptr) {
          unregister_fails.push_back(it->first);
          LOG_VERBOSE(1) << TRITONSERVER_ErrorMessage(err);
        }
      }
    }
  }

  if (!unregister_fails.empty()) {
    for (auto unreg_fail : unregister_fails) {
      error_message += unreg_fail + " ,";
    }
    LOG_ERROR << error_message;
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INTERNAL, error_message.c_str());
  }

  return nullptr;
}

TRITONSERVER_Error*
SharedMemoryManager::UnregisterHelper(
    const std::string& name, TRITONSERVER_MemoryType memory_type)
{
  // Must hold the lock on register_mu_ while calling this function.
  auto it = shared_memory_map_.find(name);
  if (it != shared_memory_map_.end() && it->second->kind_ == memory_type) {
    if (it->second.use_count() > 1) {
      it->second->awaiting_unregister_ = true;
      LOG_VERBOSE(1)
          << "Shared memory region '" << name
          << "' will be unregistered after in-flight requests complete.";
      return nullptr;
    }

    if (it->second->kind_ == TRITONSERVER_MEMORY_CPU) {
      RETURN_IF_ERR(
          UnmapSharedMemory(it->second->mapped_addr_, it->second->byte_size_));
    } else {
#ifdef TRITON_ENABLE_GPU
      cudaError_t err = cudaIpcCloseMemHandle(it->second->mapped_addr_);
      if (err != cudaSuccess) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INTERNAL,
            std::string(
                "failed to close CUDA IPC handle: " +
                std::string(cudaGetErrorString(err)))
                .c_str());
      }
#else
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          std::string(
              "failed to unregister CUDA shared memory region: '" + name +
              "', GPUs not supported")
              .c_str());
#endif  // TRITON_ENABLE_GPU
    }

    // Remove region information from shared_memory_map_
    shared_memory_map_.erase(it);
  }

  return nullptr;
}

}}  // namespace triton::server
#endif


================================================
FILE: src/shared_memory_manager.h
================================================
// Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once

#include <cstring>
#include <map>
#include <memory>
#include <mutex>

#include "triton/core/tritonserver.h"

#define TRITONJSON_STATUSTYPE TRITONSERVER_Error*
#define TRITONJSON_STATUSRETURN(M) \
  return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL, (M).c_str())
#define TRITONJSON_STATUSSUCCESS nullptr
#include "triton/common/triton_json.h"

#ifdef TRITON_ENABLE_GPU
#include <cuda.h>
#include <cuda_runtime_api.h>
#endif  // TRITON_ENABLE_GPU

namespace triton { namespace server {

class SharedMemoryManager {
 public:
  SharedMemoryManager() = default;
  ~SharedMemoryManager();

  /// A struct that records the shared memory regions registered by the shared
  /// memory manager.
  struct SharedMemoryInfo {
    SharedMemoryInfo(
        const std::string& name, const std::string& shm_key,
        const size_t offset, const size_t byte_size, int shm_fd,
        void* mapped_addr, const TRITONSERVER_MemoryType kind,
        const int64_t device_id)
        : name_(name), shm_key_(shm_key), offset_(offset),
          byte_size_(byte_size), shm_fd_(shm_fd), mapped_addr_(mapped_addr),
          kind_(kind), device_id_(device_id), awaiting_unregister_(false)
    {
    }

    std::string name_;
    std::string shm_key_;
    size_t offset_;
    size_t byte_size_;
    int shm_fd_;
    void* mapped_addr_;
    TRITONSERVER_MemoryType kind_;
    int64_t device_id_;

    // TODO (DLIS-7620): avoid explicit flag and use smart pointers
    bool awaiting_unregister_;
  };

#ifdef TRITON_ENABLE_GPU
  struct CUDASharedMemoryInfo : SharedMemoryInfo {
    CUDASharedMemoryInfo(
        const std::string& name, const std::string& shm_key,
        const size_t offset, const size_t byte_size, int shm_fd,
        void* mapped_addr, const TRITONSERVER_MemoryType kind,
        const int64_t device_id, const cudaIpcMemHandle_t* cuda_ipc_handle)
        : SharedMemoryInfo(
              name, shm_key, offset, byte_size, shm_fd, mapped_addr, kind,
              device_id),
          cuda_ipc_handle_(*cuda_ipc_handle)
    {
    }

    cudaIpcMemHandle_t cuda_ipc_handle_;
  };
#endif

  /// Add a shared memory block representing shared memory in system
  /// (CPU) memory to the manager. Return TRITONSERVER_ERROR_ALREADY_EXISTS
  /// if a shared memory block of the same name already exists in the manager.
  /// \param name The name of the memory block.
  /// \param shm_key The name of the posix shared memory object
  /// containing the block of memory.
  /// \param offset The offset within the shared memory object to the
  /// start of the block.
  /// \param byte_size The size, in bytes of the block.
  /// \return a TRITONSERVER_Error indicating success or failure.
  TRITONSERVER_Error* RegisterSystemSharedMemory(
      const std::string& name, const std::string& shm_key, const size_t offset,
      const size_t byte_size);

#ifdef TRITON_ENABLE_GPU
  /// Add a shared memory block representing shared memory in CUDA
  /// (GPU) memory to the manager. Return TRITONSERVER_ERROR_ALREADY_EXISTS
  /// if a shared memory block of the same name already exists in the manager.
  /// \param name The name of the memory block.
  /// \param cuda_shm_handle The unique memory handle to the cuda shared
  /// memory block.
  /// \param byte_size The size, in bytes of the block.
  /// \param device id The GPU number the shared memory region is in.
  /// \return a TRITONSERVER_Error indicating success or failure.
  TRITONSERVER_Error* RegisterCUDASharedMemory(
      const std::string& name, const cudaIpcMemHandle_t* cuda_shm_handle,
      const size_t byte_size, const int device_id);
#endif  // TRITON_ENABLE_GPU

  /// Get the access information for the shared memory block
  /// with the specified name. Return TRITONSERVER_ERROR_NOT_FOUND
  /// if named block doesn't exist.
  /// \param name The name of the shared memory block to get.
  /// \param offset The offset in the block
  /// \param byte_size The byte size to request for the shm region
  /// \param shm_mapped_addr Returns the pointer to the shared
  /// memory block with the specified name and offset
  /// \param memory_type Returns the type of the memory
  /// \param device_id Returns the device id associated with the
  /// memory block
  /// \param shm_info Returns a shared pointer reference(read-only) to the
  /// shared memory block's information.
  /// This pointer will automatically increase the usage count, preventing
  /// unregistration while the reference is held. The reference must be cleared
  /// or set to nullptr when no longer needed, to decrease the count and allow
  /// unregistration.
  /// \return a TRITONSERVER_Error indicating success or
  /// failure.
  TRITONSERVER_Error* GetMemoryInfo(
      const std::string& name, size_t offset, size_t byte_size,
      void** shm_mapped_addr, TRITONSERVER_MemoryType* memory_type,
      int64_t* device_id, std::shared_ptr<const SharedMemoryInfo>* shm_info);

#ifdef TRITON_ENABLE_GPU
  /// Get the CUDA memory handle associated with the block name.
  /// Return TRITONSERVER_ERROR_NOT_FOUND if named block doesn't exist.
  /// \param name The name of the shared memory block to get.
  /// \param cuda_mem_handle Returns the cuda memory handle with the memory
  /// block.
  /// \return a TRITONSERVER_Error indicating success or failure.
  TRITONSERVER_Error* GetCUDAHandle(
      const std::string& name, cudaIpcMemHandle_t** cuda_mem_handle);
#endif

  /// Populates the status of active system/CUDA shared memory regions
  /// in the status JSON. If 'name' is empty then return status of all
  /// active system/CUDA shared memory regions as specified by 'memory_type'.
  /// \param name The name of the shared memory block to get the status of.
  /// \param memory_type The type of memory to get the status of.
  /// \param shm_status Returns status of active shared memory blocks in JSON.
  /// \return a TRITONSERVER_Error indicating success or failure.
  TRITONSERVER_Error* GetStatus(
      const std::string& name, TRITONSERVER_MemoryType memory_type,
      triton::common::TritonJson::Value* shm_status);

  /// Removes the named shared memory block of the specified type from
  /// the manager. Any future attempt to get the details of this block
  /// will result in an array till another block with the same name is
  /// added to the manager.
  /// \param name The name of the shared memory block to remove.
  /// \param memory_type The type of memory to unregister.
  /// \return a TRITONSERVER_Error indicating success or failure.
  TRITONSERVER_Error* Unregister(
      const std::string& name, TRITONSERVER_MemoryType memory_type);

  /// Unregister all shared memory blocks of specified type from the manager.
  /// \param memory_type The type of memory to unregister.
  /// \return a TRITONSERVER_Error indicating success or failure.
  TRITONSERVER_Error* UnregisterAll(TRITONSERVER_MemoryType memory_type);

 private:
  /// A helper function to remove the named shared memory blocks of
  /// specified type
  TRITONSERVER_Error* UnregisterHelper(
      const std::string& name, TRITONSERVER_MemoryType memory_type);

  using SharedMemoryStateMap =
      std::map<std::string, std::shared_ptr<SharedMemoryInfo>>;
  // A map between the name and the details of the associated
  // shared memory block
  SharedMemoryStateMap shared_memory_map_;
  // A mutex to protect the concurrent access to shared_memory_map_
  std::mutex mu_;
};
}}  // namespace triton::server


================================================
FILE: src/simple.cc
================================================
// Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include <rapidjson/document.h>
#include <rapidjson/error/en.h>
#include <unistd.h>

#include <chrono>
#include <cstring>
#include <future>
#include <iostream>
#include <string>
#include <thread>
#include <unordered_map>
#include <vector>

#include "common.h"
#include "triton/core/tritonserver.h"

#ifdef TRITON_ENABLE_GPU
#include <cuda_runtime_api.h>
#endif  // TRITON_ENABLE_GPU

namespace ni = triton::server;

namespace {

bool enforce_memory_type = false;
TRITONSERVER_MemoryType requested_memory_type;

#ifdef TRITON_ENABLE_GPU
static auto cuda_data_deleter = [](void* data) {
  if (data != nullptr) {
    cudaPointerAttributes attr;
    auto cuerr = cudaPointerGetAttributes(&attr, data);
    if (cuerr != cudaSuccess) {
      std::cerr << "error: failed to get CUDA pointer attribute of " << data
                << ": " << cudaGetErrorString(cuerr) << std::endl;
    }
    if (attr.type == cudaMemoryTypeDevice) {
      cuerr = cudaFree(data);
    } else if (attr.type == cudaMemoryTypeHost) {
      cuerr = cudaFreeHost(data);
    }
    if (cuerr != cudaSuccess) {
      std::cerr << "error: failed to release CUDA pointer " << data << ": "
                << cudaGetErrorString(cuerr) << std::endl;
    }
  }
};
#endif  // TRITON_ENABLE_GPU

void
Usage(char** argv, const std::string& msg = std::string())
{
  if (!msg.empty()) {
    std::cerr << msg << std::endl;
  }

  std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
  std::cerr << "\t-m <\"system\"|\"pinned\"|gpu>"
            << " Enforce the memory type for input and output tensors."
            << " If not specified, inputs will be in system memory and outputs"
            << " will be based on the model's preferred type." << std::endl;
  std::cerr << "\t-v Enable verbose logging" << std::endl;
  std::cerr << "\t-r [model repository absolute path]" << std::endl;

  exit(1);
}

TRITONSERVER_Error*
ResponseAlloc(
    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
    int64_t preferred_memory_type_id, void* userp, void** buffer,
    void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type,
    int64_t* actual_memory_type_id)
{
  // Initially attempt to make the actual memory type and id that we
  // allocate be the same as preferred memory type
  *actual_memory_type = preferred_memory_type;
  *actual_memory_type_id = preferred_memory_type_id;

  // If 'byte_size' is zero just return 'buffer' == nullptr, we don't
  // need to do any other book-keeping.
  if (byte_size == 0) {
    *buffer = nullptr;
    *buffer_userp = nullptr;
    std::cout << "allocated " << byte_size << " bytes for result tensor "
              << tensor_name << std::endl;
  } else {
    void* allocated_ptr = nullptr;
    if (enforce_memory_type) {
      *actual_memory_type = requested_memory_type;
    }

    switch (*actual_memory_type) {
#ifdef TRITON_ENABLE_GPU
      case TRITONSERVER_MEMORY_CPU_PINNED: {
        auto err = cudaSetDevice(*actual_memory_type_id);
        if ((err != cudaSuccess) && (err != cudaErrorNoDevice) &&
            (err != cudaErrorInsufficientDriver)) {
          return TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_INTERNAL,
              std::string(
                  "unable to recover current CUDA device: " +
                  std::string(cudaGetErrorString(err)))
                  .c_str());
        }

        err = cudaHostAlloc(&allocated_ptr, byte_size, cudaHostAllocPortable);
        if (err != cudaSuccess) {
          return TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_INTERNAL,
              std::string(
                  "cudaHostAlloc failed: " +
                  std::string(cudaGetErrorString(err)))
                  .c_str());
        }
        break;
      }

      case TRITONSERVER_MEMORY_GPU: {
        auto err = cudaSetDevice(*actual_memory_type_id);
        if ((err != cudaSuccess) && (err != cudaErrorNoDevice) &&
            (err != cudaErrorInsufficientDriver)) {
          return TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_INTERNAL,
              std::string(
                  "unable to recover current CUDA device: " +
                  std::string(cudaGetErrorString(err)))
                  .c_str());
        }

        err = cudaMalloc(&allocated_ptr, byte_size);
        if (err != cudaSuccess) {
          return TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_INTERNAL,
              std::string(
                  "cudaMalloc failed: " + std::string(cudaGetErrorString(err)))
                  .c_str());
        }
        break;
      }
#endif  // TRITON_ENABLE_GPU

      // Use CPU memory if the requested memory type is unknown
      // (default case).
      case TRITONSERVER_MEMORY_CPU:
      default: {
        *actual_memory_type = TRITONSERVER_MEMORY_CPU;
        allocated_ptr = malloc(byte_size);
        break;
      }
    }

    // Pass the tensor name with buffer_userp so we can show it when
    // releasing the buffer.
    if (allocated_ptr != nullptr) {
      *buffer = allocated_ptr;
      *buffer_userp = new std::string(tensor_name);
      std::cout << "allocated " << byte_size << " bytes in "
                << TRITONSERVER_MemoryTypeString(*actual_memory_type)
                << " for result tensor " << tensor_name << std::endl;
    }
  }

  return nullptr;  // Success
}

TRITONSERVER_Error*
ResponseRelease(
    TRITONSERVER_ResponseAllocator* allocator, void* buffer, void* buffer_userp,
    size_t byte_size, TRITONSERVER_MemoryType memory_type,
    int64_t memory_type_id)
{
  std::string* name = nullptr;
  if (buffer_userp != nullptr) {
    name = reinterpret_cast<std::string*>(buffer_userp);
  } else {
    name = new std::string("<unknown>");
  }

  std::cout << "Releasing buffer " << buffer << " of size " << byte_size
            << " in " << TRITONSERVER_MemoryTypeString(memory_type)
            << " for result '" << *name << "'" << std::endl;
  switch (memory_type) {
    case TRITONSERVER_MEMORY_CPU:
      free(buffer);
      break;
#ifdef TRITON_ENABLE_GPU
    case TRITONSERVER_MEMORY_CPU_PINNED: {
      auto err = cudaSetDevice(memory_type_id);
      if (err == cudaSuccess) {
        err = cudaFreeHost(buffer);
      }
      if (err != cudaSuccess) {
        std::cerr << "error: failed to cudaFree " << buffer << ": "
                  << cudaGetErrorString(err) << std::endl;
      }
      break;
    }
    case TRITONSERVER_MEMORY_GPU: {
      auto err = cudaSetDevice(memory_type_id);
      if (err == cudaSuccess) {
        err = cudaFree(buffer);
      }
      if (err != cudaSuccess) {
        std::cerr << "error: failed to cudaFree " << buffer << ": "
                  << cudaGetErrorString(err) << std::endl;
      }
      break;
    }
#endif  // TRITON_ENABLE_GPU
    default:
      std::cerr << "error: unexpected buffer allocated in CUDA managed memory"
                << std::endl;
      break;
  }

  delete name;

  return nullptr;  // Success
}

void
InferRequestRelease(
    TRITONSERVER_InferenceRequest* request, const uint32_t flags, void* userp)
{
  std::promise<void>* barrier = reinterpret_cast<std::promise<void>*>(userp);
  barrier->set_value();
}

void
InferResponseComplete(
    TRITONSERVER_InferenceResponse* response, const uint32_t flags, void* userp)
{
  if (response != nullptr) {
    // Send 'response' to the future.
    std::promise<TRITONSERVER_InferenceResponse*>* p =
        reinterpret_cast<std::promise<TRITONSERVER_InferenceResponse*>*>(userp);
    p->set_value(response);
    delete p;
  }
}

TRITONSERVER_Error*
ParseModelMetadata(
    const rapidjson::Document& model_metadata, bool* is_int,
    bool* is_torch_model)
{
  std::string seen_data_type;
  for (const auto& input : model_metadata["inputs"].GetArray()) {
    if (strcmp(input["datatype"].GetString(), "INT32") &&
        strcmp(input["datatype"].GetString(), "FP32")) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_UNSUPPORTED,
          "simple lib example only supports model with data type INT32 or "
          "FP32");
    }
    if (seen_data_type.empty()) {
      seen_data_type = input["datatype"].GetString();
    } else if (strcmp(seen_data_type.c_str(), input["datatype"].GetString())) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          "the inputs and outputs of 'simple' model must have the data type");
    }
  }
  for (const auto& output : model_metadata["outputs"].GetArray()) {
    if (strcmp(output["datatype"].GetString(), "INT32") &&
        strcmp(output["datatype"].GetString(), "FP32")) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_UNSUPPORTED,
          "simple lib example only supports model with data type INT32 or "
          "FP32");
    } else if (strcmp(seen_data_type.c_str(), output["datatype"].GetString())) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          "the inputs and outputs of 'simple' model must have the data type");
    }
  }

  *is_int = (strcmp(seen_data_type.c_str(), "INT32") == 0);
  *is_torch_model =
      (strcmp(model_metadata["platform"].GetString(), "pytorch_libtorch") == 0);
  return nullptr;
}

template <typename T>
void
GenerateInputData(
    std::vector<char>* input0_data, std::vector<char>* input1_data)
{
  input0_data->resize(16 * sizeof(T));
  input1_data->resize(16 * sizeof(T));
  for (size_t i = 0; i < 16; ++i) {
    ((T*)input0_data->data())[i] = i;
    ((T*)input1_data->data())[i] = 1;
  }
}

template <typename T>
void
CompareResult(
    const std::string& output0_name, const std::string& output1_name,
    const void* input0, const void* input1, const char* output0,
    const char* output1)
{
  for (size_t i = 0; i < 16; ++i) {
    std::cout << ((T*)input0)[i] << " + " << ((T*)input1)[i] << " = "
              << ((T*)output0)[i] << std::endl;
    std::cout << ((T*)input0)[i] << " - " << ((T*)input1)[i] << " = "
              << ((T*)output1)[i] << std::endl;

    if ((((T*)input0)[i] + ((T*)input1)[i]) != ((T*)output0)[i]) {
      FAIL("incorrect sum in " + output0_name);
    }
    if ((((T*)input0)[i] - ((T*)input1)[i]) != ((T*)output1)[i]) {
      FAIL("incorrect difference in " + output1_name);
    }
  }
}

void
Check(
    TRITONSERVER_InferenceResponse* response,
    const std::vector<char>& input0_data, const std::vector<char>& input1_data,
    const std::string& output0, const std::string& output1,
    const size_t expected_byte_size,
    const TRITONSERVER_DataType expected_datatype, const bool is_int)
{
  std::unordered_map<std::string, std::vector<char>> output_data;

  uint32_t output_count;
  FAIL_IF_ERR(
      TRITONSERVER_InferenceResponseOutputCount(response, &output_count),
      "getting number of response outputs");
  if (output_count != 2) {
    FAIL("expecting 2 response outputs, got " + std::to_string(output_count));
  }

  for (uint32_t idx = 0; idx < output_count; ++idx) {
    const char* cname;
    TRITONSERVER_DataType datatype;
    const int64_t* shape;
    uint64_t dim_count;
    const void* base;
    size_t byte_size;
    TRITONSERVER_MemoryType memory_type;
    int64_t memory_type_id;
    void* userp;

    FAIL_IF_ERR(
        TRITONSERVER_InferenceResponseOutput(
            response, idx, &cname, &datatype, &shape, &dim_count, &base,
            &byte_size, &memory_type, &memory_type_id, &userp),
        "getting output info");

    if (cname == nullptr) {
      FAIL("unable to get output name");
    }

    std::string name(cname);
    if ((name != output0) && (name != output1)) {
      FAIL("unexpected output '" + name + "'");
    }

    if ((dim_count != 2) || (shape[0] != 1) || (shape[1] != 16)) {
      FAIL("unexpected shape for '" + name + "'");
    }

    if (datatype != expected_datatype) {
      FAIL(
          "unexpected datatype '" +
          std::string(TRITONSERVER_DataTypeString(datatype)) + "' for '" +
          name + "'");
    }

    if (byte_size != expected_byte_size) {
      FAIL(
          "unexpected byte-size, expected " +
          std::to_string(expected_byte_size) + ", got " +
          std::to_string(byte_size) + " for " + name);
    }

    if (enforce_memory_type && (memory_type != requested_memory_type)) {
      FAIL(
          "unexpected memory type, expected to be allocated in " +
          std::string(TRITONSERVER_MemoryTypeString(requested_memory_type)) +
          ", got " + std::string(TRITONSERVER_MemoryTypeString(memory_type)) +
          ", id " + std::to_string(memory_type_id) + " for " + name);
    }

    // We make a copy of the data here... which we could avoid for
    // performance reasons but ok for this simple example.
    std::vector<char>& odata = output_data[name];
    switch (memory_type) {
      case TRITONSERVER_MEMORY_CPU: {
        std::cout << name << " is stored in system memory" << std::endl;
        const char* cbase = reinterpret_cast<const char*>(base);
        odata.assign(cbase, cbase + byte_size);
        break;
      }

      case TRITONSERVER_MEMORY_CPU_PINNED: {
        std::cout << name << " is stored in pinned memory" << std::endl;
        const char* cbase = reinterpret_cast<const char*>(base);
        odata.assign(cbase, cbase + byte_size);
        break;
      }

#ifdef TRITON_ENABLE_GPU
      case TRITONSERVER_MEMORY_GPU: {
        std::cout << name << " is stored in GPU memory" << std::endl;
        odata.reserve(byte_size);
        FAIL_IF_CUDA_ERR(
            cudaMemcpy(&odata[0], base, byte_size, cudaMemcpyDeviceToHost),
            "getting " + name + " data from GPU memory");
        break;
      }
#endif

      default:
        FAIL("unexpected memory type");
    }
  }

  if (is_int) {
    CompareResult<int32_t>(
        output0, output1, &input0_data[0], &input1_data[0],
        output_data[output0].data(), output_data[output1].data());
  } else {
    CompareResult<float>(
        output0, output1, &input0_data[0], &input1_data[0],
        output_data[output0].data(), output_data[output1].data());
  }
}

}  // namespace

int
main(int argc, char** argv)
{
  std::string model_repository_path;
  int verbose_level = 0;

  // Parse commandline...
  int opt;
  while ((opt = getopt(argc, argv, "vm:r:")) != -1) {
    switch (opt) {
      case 'm': {
        enforce_memory_type = true;
        if (!strcmp(optarg, "system")) {
          requested_memory_type = TRITONSERVER_MEMORY_CPU;
        } else if (!strcmp(optarg, "pinned")) {
          requested_memory_type = TRITONSERVER_MEMORY_CPU_PINNED;
        } else if (!strcmp(optarg, "gpu")) {
          requested_memory_type = TRITONSERVER_MEMORY_GPU;
        } else {
          Usage(
              argv,
              "-m must be used to specify one of the following types:"
              " <\"system\"|\"pinned\"|gpu>");
        }
        break;
      }
      case 'r':
        model_repository_path = optarg;
        break;
      case 'v':
        verbose_level = 1;
        break;
      case '?':
        Usage(argv);
        break;
    }
  }

  if (model_repository_path.empty()) {
    Usage(argv, "-r must be used to specify model repository path");
  }
#ifndef TRITON_ENABLE_GPU
  if (enforce_memory_type && requested_memory_type != TRITONSERVER_MEMORY_CPU) {
    Usage(argv, "-m can only be set to \"system\" without enabling GPU");
  }
#endif  // TRITON_ENABLE_GPU

  // Check API version. This compares the API version of the
  // triton-server library linked into this application against the
  // API version of the header file used when compiling this
  // application. The API version of the shared library must be >= the
  // API version used when compiling this application.
  uint32_t api_version_major, api_version_minor;
  FAIL_IF_ERR(
      TRITONSERVER_ApiVersion(&api_version_major, &api_version_minor),
      "getting Triton API version");
  if ((TRITONSERVER_API_VERSION_MAJOR != api_version_major) ||
      (TRITONSERVER_API_VERSION_MINOR > api_version_minor)) {
    FAIL("triton server API version mismatch");
  }

  // Create the option setting to use when creating the inference
  // server object.
  TRITONSERVER_ServerOptions* server_options = nullptr;
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsNew(&server_options),
      "creating server options");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetModelRepositoryPath(
          server_options, model_repository_path.c_str()),
      "setting model repository path");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetLogVerbose(server_options, verbose_level),
      "setting verbose logging level");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetBackendDirectory(
          server_options, "/opt/tritonserver/backends"),
      "setting backend directory");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
          server_options, "/opt/tritonserver/repoagents"),
      "setting repository agent directory");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetStrictModelConfig(server_options, true),
      "setting strict model configuration");
#ifdef TRITON_ENABLE_GPU
  double min_compute_capability = TRITON_MIN_COMPUTE_CAPABILITY;
#else
  double min_compute_capability = 0;
#endif  // TRITON_ENABLE_GPU
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability(
          server_options, min_compute_capability),
      "setting minimum supported CUDA compute capability");

  // Create the server object using the option settings. The server
  // object encapsulates all the functionality of the Triton server
  // and allows access to the Triton server API. Typically only a
  // single server object is needed by an application, but it is
  // allowed to create multiple server objects within a single
  // application. After the server object is created the server
  // options can be deleted.
  TRITONSERVER_Server* server_ptr = nullptr;
  FAIL_IF_ERR(
      TRITONSERVER_ServerNew(&server_ptr, server_options),
      "creating server object");
  FAIL_IF_ERR(
      TRITONSERVER_ServerOptionsDelete(server_options),
      "deleting server options");

  // Use a shared_ptr to manage the lifetime of the server object.
  std::shared_ptr<TRITONSERVER_Server> server(
      server_ptr, TRITONSERVER_ServerDelete);

  // Wait until the server is both live and ready. The server will not
  // appear "ready" until all models are loaded and ready to receive
  // inference requests.
  size_t health_iters = 0;
  while (true) {
    bool live, ready;
    FAIL_IF_ERR(
        TRITONSERVER_ServerIsLive(server.get(), &live),
        "unable to get server liveness");
    FAIL_IF_ERR(
        TRITONSERVER_ServerIsReady(server.get(), &ready),
        "unable to get server readiness");
    std::cout << "Server Health: live " << live << ", ready " << ready
              << std::endl;
    if (live && ready) {
      break;
    }

    if (++health_iters >= 10) {
      FAIL("failed to find healthy inference server");
    }

    std::this_thread::sleep_for(std::chrono::milliseconds(500));
  }

  // Server metadata can be accessed using the server object. The
  // metadata is returned as an abstract TRITONSERVER_Message that can
  // be converted to JSON for further processing.
  {
    TRITONSERVER_Message* server_metadata_message;
    FAIL_IF_ERR(
        TRITONSERVER_ServerMetadata(server.get(), &server_metadata_message),
        "unable to get server metadata message");
    const char* buffer;
    size_t byte_size;
    FAIL_IF_ERR(
        TRITONSERVER_MessageSerializeToJson(
            server_metadata_message, &buffer, &byte_size),
        "unable to serialize server metadata message");

    std::cout << "Server Metadata:" << std::endl;
    std::cout << std::string(buffer, byte_size) << std::endl;

    FAIL_IF_ERR(
        TRITONSERVER_MessageDelete(server_metadata_message),
        "deleting server metadata message");
  }

  const std::string model_name("simple");

  // We already waited for the server to be ready, above, so we know
  // that all models are also ready. But as an example we also wait
  // for a specific model to become available.
  bool is_torch_model = false;
  bool is_int = true;
  bool is_ready = false;
  health_iters = 0;
  while (!is_ready) {
    FAIL_IF_ERR(
        TRITONSERVER_ServerModelIsReady(
            server.get(), model_name.c_str(), 1 /* model_version */, &is_ready),
        "unable to get model readiness");
    if (!is_ready) {
      if (++health_iters >= 10) {
        FAIL("model failed to be ready in 10 iterations");
      }
      std::this_thread::sleep_for(std::chrono::milliseconds(500));
      continue;
    }

    TRITONSERVER_Message* model_metadata_message;
    FAIL_IF_ERR(
        TRITONSERVER_ServerModelMetadata(
            server.get(), model_name.c_str(), 1, &model_metadata_message),
        "unable to get model metadata message");
    const char* buffer;
    size_t byte_size;
    FAIL_IF_ERR(
        TRITONSERVER_MessageSerializeToJson(
            model_metadata_message, &buffer, &byte_size),
        "unable to serialize model metadata");

    // Parse the JSON string that represents the model metadata into a
    // JSON document. We use rapidjson for this parsing but any JSON
    // parser can be used.
    rapidjson::Document model_metadata;
    model_metadata.Parse(buffer, byte_size);
    if (model_metadata.HasParseError()) {
      FAIL(
          "error: failed to parse model metadata from JSON: " +
          std::string(GetParseError_En(model_metadata.GetParseError())) +
          " at " + std::to_string(model_metadata.GetErrorOffset()));
    }

    FAIL_IF_ERR(
        TRITONSERVER_MessageDelete(model_metadata_message),
        "deleting model metadata message");

    // Now that we have a document representation of the model
    // metadata, we can query it to extract some information about the
    // model.
    if (strcmp(model_metadata["name"].GetString(), model_name.c_str())) {
      FAIL("unable to find metadata for model");
    }

    bool found_version = false;
    if (model_metadata.HasMember("versions")) {
      for (const auto& version : model_metadata["versions"].GetArray()) {
        if (strcmp(version.GetString(), "1") == 0) {
          found_version = true;
          break;
        }
      }
    }
    if (!found_version) {
      FAIL("unable to find version 1 status for model");
    }

    FAIL_IF_ERR(
        ParseModelMetadata(model_metadata, &is_int, &is_torch_model),
        "parsing model metadata");
  }

  // When triton needs a buffer to hold an output tensor, it will ask
  // us to provide the buffer. In this way we can have any buffer
  // management and sharing strategy that we want. To communicate to
  // triton the functions that we want it to call to perform the
  // allocations, we create a "response allocator" object. We pass
  // this response allocate object to triton when requesting
  // inference. We can reuse this response allocate object for any
  // number of inference requests.
  TRITONSERVER_ResponseAllocator* allocator = nullptr;
  FAIL_IF_ERR(
      TRITONSERVER_ResponseAllocatorNew(
          &allocator, ResponseAlloc, ResponseRelease, nullptr /* start_fn */),
      "creating response allocator");

  // Create an inference request object. The inference request object
  // is where we set the name of the model we want to use for
  // inference and the input tensors.
  TRITONSERVER_InferenceRequest* irequest = nullptr;
  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestNew(
          &irequest, server.get(), model_name.c_str(), -1 /* model_version */),
      "creating inference request");

  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestSetId(irequest, "my_request_id"),
      "setting ID for the request");

  std::unique_ptr<std::promise<void>> barrier =
      std::make_unique<std::promise<void>>();
  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestSetReleaseCallback(
          irequest, InferRequestRelease,
          reinterpret_cast<void*>(barrier.get())),
      "setting request release callback");
  std::future<void> request_release_future = barrier->get_future();

  // Add the 2 input tensors to the request...
  auto input0 = "INPUT0";
  auto input1 = "INPUT1";

  std::vector<int64_t> input0_shape({1, 16});
  std::vector<int64_t> input1_shape({1, 16});

  const TRITONSERVER_DataType datatype =
      (is_int) ? TRITONSERVER_TYPE_INT32 : TRITONSERVER_TYPE_FP32;

  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestAddInput(
          irequest, input0, datatype, &input0_shape[0], input0_shape.size()),
      "setting input 0 meta-data for the request");
  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestAddInput(
          irequest, input1, datatype, &input1_shape[0], input1_shape.size()),
      "setting input 1 meta-data for the request");

  auto output0 = is_torch_model ? "OUTPUT__0" : "OUTPUT0";
  auto output1 = is_torch_model ? "OUTPUT__1" : "OUTPUT1";

  // Indicate that we want both output tensors calculated and returned
  // for the inference request. These calls are optional, if no
  // output(s) are specifically requested then all outputs defined by
  // the model will be calculated and returned.
  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output0),
      "requesting output 0 for the request");
  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output1),
      "requesting output 1 for the request");

  // Create the data for the two input tensors. Initialize the first
  // to unique values and the second to all ones.
  std::vector<char> input0_data;
  std::vector<char> input1_data;
  if (is_int) {
    GenerateInputData<int32_t>(&input0_data, &input1_data);
  } else {
    GenerateInputData<float>(&input0_data, &input1_data);
  }

  size_t input0_size = input0_data.size();
  size_t input1_size = input1_data.size();

  const void* input0_base = &input0_data[0];
  const void* input1_base = &input1_data[0];
#ifdef TRITON_ENABLE_GPU
  std::unique_ptr<void, decltype(cuda_data_deleter)> input0_gpu(
      nullptr, cuda_data_deleter);
  std::unique_ptr<void, decltype(cuda_data_deleter)> input1_gpu(
      nullptr, cuda_data_deleter);
  bool use_cuda_memory =
      (enforce_memory_type &&
       (requested_memory_type != TRITONSERVER_MEMORY_CPU));
  if (use_cuda_memory) {
    FAIL_IF_CUDA_ERR(cudaSetDevice(0), "setting CUDA device to device 0");
    if (requested_memory_type != TRITONSERVER_MEMORY_CPU_PINNED) {
      void* dst;
      FAIL_IF_CUDA_ERR(
          cudaMalloc(&dst, input0_size),
          "allocating GPU memory for INPUT0 data");
      input0_gpu.reset(dst);
      FAIL_IF_CUDA_ERR(
          cudaMemcpy(dst, &input0_data[0], input0_size, cudaMemcpyHostToDevice),
          "setting INPUT0 data in GPU memory");
      FAIL_IF_CUDA_ERR(
          cudaMalloc(&dst, input1_size),
          "allocating GPU memory for INPUT1 data");
      input1_gpu.reset(dst);
      FAIL_IF_CUDA_ERR(
          cudaMemcpy(dst, &input1_data[0], input1_size, cudaMemcpyHostToDevice),
          "setting INPUT1 data in GPU memory");
    } else {
      void* dst;
      FAIL_IF_CUDA_ERR(
          cudaHostAlloc(&dst, input0_size, cudaHostAllocPortable),
          "allocating pinned memory for INPUT0 data");
      input0_gpu.reset(dst);
      FAIL_IF_CUDA_ERR(
          cudaMemcpy(dst, &input0_data[0], input0_size, cudaMemcpyHostToHost),
          "setting INPUT0 data in pinned memory");
      FAIL_IF_CUDA_ERR(
          cudaHostAlloc(&dst, input1_size, cudaHostAllocPortable),
          "allocating pinned memory for INPUT1 data");
      input1_gpu.reset(dst);
      FAIL_IF_CUDA_ERR(
          cudaMemcpy(dst, &input1_data[0], input1_size, cudaMemcpyHostToHost),
          "setting INPUT1 data in pinned memory");
    }
  }

  input0_base = use_cuda_memory ? input0_gpu.get() : &input0_data[0];
  input1_base = use_cuda_memory ? input1_gpu.get() : &input1_data[0];
#endif  // TRITON_ENABLE_GPU

  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestAppendInputData(
          irequest, input0, input0_base, input0_size, requested_memory_type,
          0 /* memory_type_id */),
      "assigning INPUT0 data");
  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestAppendInputData(
          irequest, input1, input1_base, input1_size, requested_memory_type,
          0 /* memory_type_id */),
      "assigning INPUT1 data");

  // Perform inference by calling TRITONSERVER_ServerInferAsync. This
  // call is asynchronous and therefore returns immediately. The
  // completion of the inference and delivery of the response is done
  // by triton by calling the "response complete" callback functions
  // (InferResponseComplete in this case).
  {
    auto p = new std::promise<TRITONSERVER_InferenceResponse*>();
    std::future<TRITONSERVER_InferenceResponse*> completed = p->get_future();

    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestSetResponseCallback(
            irequest, allocator, nullptr /* response_allocator_userp */,
            InferResponseComplete, reinterpret_cast<void*>(p)),
        "setting response callback");

    FAIL_IF_ERR(
        TRITONSERVER_ServerInferAsync(
            server.get(), irequest, nullptr /* trace */),
        "running inference");

    // The InferResponseComplete function sets the std::promise so
    // that this thread will block until the response is returned.
    TRITONSERVER_InferenceResponse* completed_response = completed.get();
    FAIL_IF_ERR(
        TRITONSERVER_InferenceResponseError(completed_response),
        "response status");

    Check(
        completed_response, input0_data, input1_data, output0, output1,
        input0_size, datatype, is_int);

    FAIL_IF_ERR(
        TRITONSERVER_InferenceResponseDelete(completed_response),
        "deleting inference response");

    // We need to make sure that the previous request was released before
    // reusing it.
    request_release_future.get();
  }

  // The TRITONSERVER_InferenceRequest object can be reused for
  // multiple (sequential) inference requests. For example, if we have
  // multiple requests where the inference request is the same except
  // for different input tensor data, then we can just change the
  // input data buffers. Below some input data is changed in place and
  // then another inference request is issued. For simplicity we only
  // do this when the input tensors are in non-pinned system memory.
  if (!enforce_memory_type ||
      (requested_memory_type == TRITONSERVER_MEMORY_CPU)) {
    if (is_int) {
      int32_t* input0_base = reinterpret_cast<int32_t*>(&input0_data[0]);
      input0_base[0] = 27;
    } else {
      float* input0_base = reinterpret_cast<float*>(&input0_data[0]);
      input0_base[0] = 27.0;
    }

    auto p = new std::promise<TRITONSERVER_InferenceResponse*>();
    std::future<TRITONSERVER_InferenceResponse*> completed = p->get_future();

    // Using a new promise so have to re-register the callback to set
    // the promise as the userp.
    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestSetResponseCallback(
            irequest, allocator, nullptr /* response_allocator_userp */,
            InferResponseComplete, reinterpret_cast<void*>(p)),
        "setting response callback");

    // Register a new promise for the request callback barrier.
    barrier = std::make_unique<std::promise<void>>();
    request_release_future = barrier->get_future();
    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestSetReleaseCallback(
            irequest, InferRequestRelease,
            reinterpret_cast<void*>(barrier.get())),
        "setting request release callback");

    FAIL_IF_ERR(
        TRITONSERVER_ServerInferAsync(
            server.get(), irequest, nullptr /* trace */),
        "running inference");

    TRITONSERVER_InferenceResponse* completed_response = completed.get();
    FAIL_IF_ERR(
        TRITONSERVER_InferenceResponseError(completed_response),
        "response status");

    Check(
        completed_response, input0_data, input1_data, output0, output1,
        input0_size, datatype, is_int);

    FAIL_IF_ERR(
        TRITONSERVER_InferenceResponseDelete(completed_response),
        "deleting inference response");

    // We need to make sure that the previous request was released before
    // reusing it.
    request_release_future.get();
  }

  // There are other TRITONSERVER_InferenceRequest APIs that allow
  // other in-place modifications so that the object can be reused for
  // multiple (sequential) inference requests. For example, we can
  // assign a new data buffer for an input by first removing the
  // existing data with
  // TRITONSERVER_InferenceRequestRemoveAllInputData.
  {
    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestRemoveAllInputData(irequest, input0),
        "removing INPUT0 data");
    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestAppendInputData(
            irequest, input0, input1_base, input1_size, requested_memory_type,
            0 /* memory_type_id */),
        "assigning INPUT1 data to INPUT0");

    auto p = new std::promise<TRITONSERVER_InferenceResponse*>();
    std::future<TRITONSERVER_InferenceResponse*> completed = p->get_future();

    // Using a new promise so have to re-register the callback to set
    // the promise as the userp.
    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestSetResponseCallback(
            irequest, allocator, nullptr /* response_allocator_userp */,
            InferResponseComplete, reinterpret_cast<void*>(p)),
        "setting response callback");

    // Register a new promise for the request callback barrier.
    barrier = std::make_unique<std::promise<void>>();
    request_release_future = barrier->get_future();

    FAIL_IF_ERR(
        TRITONSERVER_InferenceRequestSetReleaseCallback(
            irequest, InferRequestRelease,
            reinterpret_cast<void*>(barrier.get())),
        "setting request release callback");

    FAIL_IF_ERR(
        TRITONSERVER_ServerInferAsync(
            server.get(), irequest, nullptr /* trace */),
        "running inference");

    TRITONSERVER_InferenceResponse* completed_response = completed.get();
    FAIL_IF_ERR(
        TRITONSERVER_InferenceResponseError(completed_response),
        "response status");

    // Both inputs are using input1_data...
    Check(
        completed_response, input1_data, input1_data, output0, output1,
        input0_size, datatype, is_int);

    FAIL_IF_ERR(
        TRITONSERVER_InferenceResponseDelete(completed_response),
        "deleting inference response");

    // Make sure the request is released before deleting it. If not, release
    // callback will segfault after barrier is destructed.
    request_release_future.get();
  }

  FAIL_IF_ERR(
      TRITONSERVER_InferenceRequestDelete(irequest),
      "deleting inference request");

  FAIL_IF_ERR(
      TRITONSERVER_ResponseAllocatorDelete(allocator),
      "deleting response allocator");

  return 0;
}


================================================
FILE: src/test/CMakeLists.txt
================================================
# Copyright 2019-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

cmake_minimum_required (VERSION 3.31.8)

#
# Unit tests
#
find_package(GTest REQUIRED)

#
# Unit test for DataCompressor
#
if(${TRITON_ENABLE_HTTP} OR ${TRITON_ENABLE_METRICS} OR
    ${TRITON_ENABLE_SAGEMAKER} OR ${TRITON_ENABLE_VERTEX_AI})
  add_executable(
    data_compressor_test
    data_compressor_test.cc
    ../data_compressor.h
    ../common.h
  )

  set_target_properties(
    data_compressor_test
    PROPERTIES
      SKIP_BUILD_RPATH TRUE
      BUILD_WITH_INSTALL_RPATH TRUE
      INSTALL_RPATH_USE_LINK_PATH FALSE
      INSTALL_RPATH ""
  )

  target_include_directories(
    data_compressor_test
    PRIVATE
      ${CMAKE_CURRENT_SOURCE_DIR}/..
      ${GTEST_INCLUDE_DIRS}
      ${LIBEVENT_INCLUDE_DIRS}
  )

  target_link_libraries(
    data_compressor_test
    PRIVATE
      triton-core-serverapi   # from repo-core
      triton-core-serverstub  # from repo-core
      GTest::gtest
      GTest::gtest_main
      ${LIBEVENT_LIBRARIES}
      -lz
  )

  install(
    TARGETS data_compressor_test
    RUNTIME DESTINATION bin
  )
endif()

#
# Unit test for Backend + Common + Core tensor size APIs (GetElementCount,
# GetByteSize). Core template headers are used directly; the few core symbols
# not available via libtritonserver.so (hidden by linker script) are defined
# in the test source.
#
if(TARGET proto-library)
  add_executable(
    tensor_size_test
    tensor_size_test.cc
  )

  set_target_properties(
    tensor_size_test
    PROPERTIES
      SKIP_BUILD_RPATH TRUE
      BUILD_WITH_INSTALL_RPATH TRUE
      INSTALL_RPATH_USE_LINK_PATH FALSE
      INSTALL_RPATH "$ORIGIN/../lib"
  )

  target_include_directories(
    tensor_size_test
    PRIVATE
      ${CMAKE_CURRENT_SOURCE_DIR}/..
      ${GTEST_INCLUDE_DIRS}
      ${repo-core_SOURCE_DIR}/src
      $<TARGET_PROPERTY:proto-library,INTERFACE_INCLUDE_DIRECTORIES>
  )

  target_link_libraries(
    tensor_size_test
    PRIVATE
      triton-backend-utils
      triton-core-serverapi
      triton-core-serverstub
      triton-common-model-config
      proto-library
      protobuf::libprotobuf
      GTest::gtest
      GTest::gtest_main
  )

  install(
    TARGETS tensor_size_test
    RUNTIME DESTINATION bin
  )
endif()

add_subdirectory(repoagent/relocation_repoagent repoagent/relocation_repoagent)

add_subdirectory(distributed_addsub distributed_addsub)
add_subdirectory(dyna_sequence dyna_sequence)
add_subdirectory(iterative_sequence iterative_sequence)
add_subdirectory(query_backend query_backend)

if(${TRITON_ENABLE_GPU})
  add_subdirectory(sequence sequence)
  add_subdirectory(implicit_state implicit_state)
endif()


================================================
FILE: src/test/data_compressor_test.cc
================================================
// Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "gtest/gtest.h"

// Undefine the FAIL() macro inside Triton code to avoid redefine error
// from gtest. Okay as FAIL() is not used in data_compressor
#ifdef FAIL
#undef FAIL
#endif

#include <event2/buffer.h>

#include <chrono>
#include <condition_variable>
#include <fstream>
#include <future>
#include <limits>
#include <mutex>
#include <random>
#include <string>
#include <thread>
#include <vector>

#include "data_compressor.h"

namespace ni = triton::server;

namespace {

// Size Constants
constexpr size_t KB = 1024;       // 1 KB = 1,024 bytes
constexpr size_t MB = 1024 * KB;  // 1 MB = 1,048,576 bytes

// Triton's default HTTP max input size
constexpr size_t DEFAULT_MAX_INPUT_SIZE = 64 * MB;  // 64 MB

// Test data sizes relative to the limit
constexpr size_t UNDER_LIMIT_DATA_SIZE = DEFAULT_MAX_INPUT_SIZE - MB;  // 63 MB
constexpr size_t OVER_LIMIT_DATA_SIZE = DEFAULT_MAX_INPUT_SIZE + MB;   // 65 MB

struct TritonServerError {
  TritonServerError(TRITONSERVER_Error_Code code, const char* msg)
      : code_(code), msg_(msg)
  {
  }
  TRITONSERVER_Error_Code code_;
  std::string msg_;
};

void
WriteEVBufferToFile(const std::string& file_name, evbuffer* evb)
{
  std::ofstream fs(file_name);
  std::unique_ptr<struct evbuffer_iovec[]> buffer_array_holder;
  struct evbuffer_iovec* buffer_array = nullptr;
  int buffer_count = evbuffer_peek(evb, -1, NULL, NULL, 0);
  if (buffer_count > 0) {
    buffer_array_holder.reset(new struct evbuffer_iovec[buffer_count]);
    buffer_array = buffer_array_holder.get();
    ASSERT_EQ(
        evbuffer_peek(evb, -1, NULL, buffer_array, buffer_count), buffer_count)
        << "unexpected error getting buffers for result";
  }
  for (int idx = 0; idx < buffer_count; ++idx) {
    fs.write(
        reinterpret_cast<const char*>(buffer_array[idx].iov_base),
        buffer_array[idx].iov_len);
  }
}

void
EVBufferToContiguousBuffer(evbuffer* evb, std::vector<char>* buffer)
{
  *buffer = std::vector<char>(evbuffer_get_length(evb));
  {
    std::unique_ptr<struct evbuffer_iovec[]> buffer_array_holder;
    struct evbuffer_iovec* buffer_array = nullptr;
    int buffer_count = evbuffer_peek(evb, -1, NULL, NULL, 0);
    if (buffer_count > 0) {
      buffer_array_holder.reset(new struct evbuffer_iovec[buffer_count]);
      buffer_array = buffer_array_holder.get();
      ASSERT_EQ(
          evbuffer_peek(evb, -1, NULL, buffer_array, buffer_count),
          buffer_count)
          << "unexpected error getting buffers for result";
    }
    size_t offset = 0;
    for (int idx = 0; idx < buffer_count; ++idx) {
      memcpy(
          buffer->data() + offset, buffer_array[idx].iov_base,
          buffer_array[idx].iov_len);
      offset += buffer_array[idx].iov_len;
    }
  }
}

}  // namespace

#ifdef __cplusplus
extern "C" {
#endif

TRITONSERVER_Error*
TRITONSERVER_ErrorNew(TRITONSERVER_Error_Code code, const char* msg)
{
  return reinterpret_cast<TRITONSERVER_Error*>(
      new TritonServerError(code, msg));
}

TRITONSERVER_Error_Code
TRITONSERVER_ErrorCode(TRITONSERVER_Error* error)
{
  return (reinterpret_cast<TritonServerError*>(error))->code_;
}

const char*
TRITONSERVER_ErrorMessage(TRITONSERVER_Error* error)
{
  return (reinterpret_cast<TritonServerError*>(error))->msg_.c_str();
}

#ifdef __cplusplus
}
#endif

namespace {

class DataCompressorTest : public ::testing::Test {
 public:
  DataCompressorTest()
      : raw_data_length_(0), deflate_compressed_length_(0),
        gzip_compressed_length_(0)
  {
    std::vector<std::string> files{
        "raw_data", "deflate_compressed_data", "gzip_compressed_data"};
    for (const auto& file : files) {
      std::fstream fs(file);
      // get length of file
      fs.seekg(0, fs.end);
      int length = fs.tellg();
      fs.seekg(0, fs.beg);

      // allocate memory
      char* data = nullptr;
      if (file == "raw_data") {
        raw_data_.reset(new char[length]);
        data = raw_data_.get();
        raw_data_length_ = length;
      } else if (file == "deflate_compressed_data") {
        deflate_compressed_data_.reset(new char[length]);
        data = deflate_compressed_data_.get();
        deflate_compressed_length_ = length;
      } else {
        gzip_compressed_data_.reset(new char[length]);
        data = gzip_compressed_data_.get();
        gzip_compressed_length_ = length;
      }

      fs.read(data, length);
    }
  }

  std::unique_ptr<char[]> raw_data_;
  size_t raw_data_length_;
  std::unique_ptr<char[]> deflate_compressed_data_;
  size_t deflate_compressed_length_;
  std::unique_ptr<char[]> gzip_compressed_data_;
  size_t gzip_compressed_length_;
};

TEST_F(DataCompressorTest, DeflateOneBuffer)
{
  // Convert the raw data into evbuffer format
  auto source = evbuffer_new();
  ASSERT_TRUE((source != nullptr)) << "Failed to create source evbuffer";
  ASSERT_EQ(evbuffer_add(source, raw_data_.get(), raw_data_length_), 0)
      << "Failed to initialize source evbuffer";

  auto compressed = evbuffer_new();
  ASSERT_TRUE((compressed != nullptr))
      << "Failed to create compressed evbuffer";
  auto decompressed = evbuffer_new();
  ASSERT_TRUE((decompressed != nullptr))
      << "Failed to create decompressed evbuffer";

  auto err = ni::DataCompressor::CompressData(
      ni::DataCompressor::Type::DEFLATE, source, compressed);
  ASSERT_TRUE((err == nullptr))
      << "Failed to compress data: " << TRITONSERVER_ErrorMessage(err);

  err = ni::DataCompressor::DecompressData(
      ni::DataCompressor::Type::DEFLATE, compressed, decompressed);
  ASSERT_TRUE((err == nullptr))
      << "Failed to decompress data: " << TRITONSERVER_ErrorMessage(err);

  size_t destination_byte_size = evbuffer_get_length(decompressed);
  ASSERT_EQ(destination_byte_size, raw_data_length_) << "Mismatched byte size";

  std::vector<char> res;
  EVBufferToContiguousBuffer(decompressed, &res);
  for (size_t idx = 0; idx < raw_data_length_; ++idx) {
    ASSERT_TRUE(raw_data_[idx] == res[idx]);
  }
}

TEST_F(DataCompressorTest, GzipOneBuffer)
{
  // Convert the raw data into evbuffer format
  auto source = evbuffer_new();
  ASSERT_TRUE((source != nullptr)) << "Failed to create source evbuffer";
  ASSERT_EQ(evbuffer_add(source, raw_data_.get(), raw_data_length_), 0)
      << "Failed to initialize source evbuffer";

  auto compressed = evbuffer_new();
  ASSERT_TRUE((compressed != nullptr))
      << "Failed to create compressed evbuffer";
  auto decompressed = evbuffer_new();
  ASSERT_TRUE((decompressed != nullptr))
      << "Failed to create decompressed evbuffer";

  auto err = ni::DataCompressor::CompressData(
      ni::DataCompressor::Type::GZIP, source, compressed);
  ASSERT_TRUE((err == nullptr))
      << "Failed to compress data: " << TRITONSERVER_ErrorMessage(err);
  err = ni::DataCompressor::DecompressData(
      ni::DataCompressor::Type::GZIP, compressed, decompressed);
  ASSERT_TRUE((err == nullptr))
      << "Failed to decompress data: " << TRITONSERVER_ErrorMessage(err);

  size_t destination_byte_size = evbuffer_get_length(decompressed);
  ASSERT_EQ(destination_byte_size, raw_data_length_) << "Mismatched byte size";

  std::vector<char> res;
  EVBufferToContiguousBuffer(decompressed, &res);
  for (size_t idx = 0; idx < raw_data_length_; ++idx) {
    ASSERT_TRUE(raw_data_[idx] == res[idx]);
  }
}

TEST_F(DataCompressorTest, DeflateTwoBuffer)
{
  // Convert the raw data into evbuffer format with two buffers
  auto source = evbuffer_new();
  ASSERT_TRUE((source != nullptr)) << "Failed to create source evbuffer";
  size_t half_length = raw_data_length_ / 2;
  ASSERT_EQ(evbuffer_add(source, raw_data_.get(), half_length), 0)
      << "Failed to initialize source evbuffer";
  // verify evbuffer has two extend
  {
    auto second_source = evbuffer_new();
    ASSERT_EQ(
        evbuffer_add(
            second_source, raw_data_.get() + half_length,
            raw_data_length_ - half_length),
        0)
        << "Failed to initialize source evbuffer";
    ASSERT_EQ(evbuffer_add_buffer(source, second_source), 0)
        << "Failed to initialize source evbuffer";
    int buffer_count = evbuffer_peek(source, -1, NULL, NULL, 0);
    ASSERT_EQ(buffer_count, 2) << "Expect two buffers as source";
  }

  auto compressed = evbuffer_new();
  ASSERT_TRUE((compressed != nullptr))
      << "Failed to create compressed evbuffer";
  // Reconstruct the compressed buffer to be two buffers
  if (evbuffer_peek(compressed, -1, NULL, NULL, 0) == 1) {
    std::unique_ptr<struct evbuffer_iovec[]> buffer_array_holder;
    struct evbuffer_iovec* buffer_array = nullptr;
    int buffer_count = evbuffer_peek(compressed, -1, NULL, NULL, 0);
    if (buffer_count > 0) {
      buffer_array_holder.reset(new struct evbuffer_iovec[buffer_count]);
      buffer_array = buffer_array_holder.get();
      ASSERT_EQ(
          evbuffer_peek(compressed, -1, NULL, buffer_array, buffer_count),
          buffer_count)
          << "unexpected error getting buffers for result";
    }

    auto first_compressed = evbuffer_new();
    auto second_compressed = evbuffer_new();
    size_t half_length = buffer_array[0].iov_len / 2;
    ASSERT_EQ(
        evbuffer_add(first_compressed, buffer_array[0].iov_base, half_length),
        0)
        << "Failed to split compressed buffer";
    ASSERT_EQ(
        evbuffer_add(
            second_compressed,
            reinterpret_cast<char*>(buffer_array[0].iov_base) + half_length,
            buffer_array[0].iov_len - half_length),
        0)
        << "Failed to split compressed buffer";
    ASSERT_EQ(evbuffer_add_buffer(first_compressed, second_compressed), 0)
        << "Failed to initialize source evbuffer";
    compressed = first_compressed;
  }

  auto decompressed = evbuffer_new();
  ASSERT_TRUE((decompressed != nullptr))
      << "Failed to create decompressed evbuffer";

  auto err = ni::DataCompressor::CompressData(
      ni::DataCompressor::Type::DEFLATE, source, compressed);
  ASSERT_TRUE((err == nullptr))
      << "Failed to compress data: " << TRITONSERVER_ErrorMessage(err);
  err = ni::DataCompressor::DecompressData(
      ni::DataCompressor::Type::DEFLATE, compressed, decompressed);
  ASSERT_TRUE((err == nullptr))
      << "Failed to decompress data: " << TRITONSERVER_ErrorMessage(err);

  size_t destination_byte_size = evbuffer_get_length(decompressed);
  ASSERT_EQ(destination_byte_size, raw_data_length_) << "Mismatched byte size";

  std::vector<char> res;
  EVBufferToContiguousBuffer(decompressed, &res);
  for (size_t idx = 0; idx < raw_data_length_; ++idx) {
    ASSERT_TRUE(raw_data_[idx] == res[idx]);
  }
}

TEST_F(DataCompressorTest, GzipTwoBuffer)
{
  // Convert the raw data into evbuffer format with two buffers
  auto source = evbuffer_new();
  ASSERT_TRUE((source != nullptr)) << "Failed to create source evbuffer";
  size_t half_length = raw_data_length_ / 2;
  ASSERT_EQ(evbuffer_add(source, raw_data_.get(), half_length), 0)
      << "Failed to initialize source evbuffer";
  // verify evbuffer has two extend
  {
    auto second_source = evbuffer_new();
    ASSERT_EQ(
        evbuffer_add(
            second_source, raw_data_.get() + half_length,
            raw_data_length_ - half_length),
        0)
        << "Failed to initialize source evbuffer";
    ASSERT_EQ(evbuffer_add_buffer(source, second_source), 0)
        << "Failed to initialize source evbuffer";
    int buffer_count = evbuffer_peek(source, -1, NULL, NULL, 0);
    ASSERT_EQ(buffer_count, 2) << "Expect two buffers as source";
  }

  auto compressed = evbuffer_new();
  ASSERT_TRUE((compressed != nullptr))
      << "Failed to create compressed evbuffer";
  // Reconstruct the compressed buffer to be two buffers
  if (evbuffer_peek(compressed, -1, NULL, NULL, 0) == 1) {
    std::unique_ptr<struct evbuffer_iovec[]> buffer_array_holder;
    struct evbuffer_iovec* buffer_array = nullptr;
    int buffer_count = evbuffer_peek(compressed, -1, NULL, NULL, 0);
    if (buffer_count > 0) {
      buffer_array_holder.reset(new struct evbuffer_iovec[buffer_count]);
      buffer_array = buffer_array_holder.get();
      ASSERT_EQ(
          evbuffer_peek(compressed, -1, NULL, buffer_array, buffer_count),
          buffer_count)
          << "unexpected error getting buffers for result";
    }

    auto first_compressed = evbuffer_new();
    auto second_compressed = evbuffer_new();
    size_t half_length = buffer_array[0].iov_len / 2;
    ASSERT_EQ(
        evbuffer_add(first_compressed, buffer_array[0].iov_base, half_length),
        0)
        << "Failed to split compressed buffer";
    ASSERT_EQ(
        evbuffer_add(
            second_compressed,
            reinterpret_cast<char*>(buffer_array[0].iov_base) + half_length,
            buffer_array[0].iov_len - half_length),
        0)
        << "Failed to split compressed buffer";
    ASSERT_EQ(evbuffer_add_buffer(first_compressed, second_compressed), 0)
        << "Failed to initialize source evbuffer";
    compressed = first_compressed;
  }

  auto decompressed = evbuffer_new();
  ASSERT_TRUE((decompressed != nullptr))
      << "Failed to create decompressed evbuffer";

  auto err = ni::DataCompressor::CompressData(
      ni::DataCompressor::Type::GZIP, source, compressed);
  ASSERT_TRUE((err == nullptr))
      << "Failed to compress data: " << TRITONSERVER_ErrorMessage(err);
  err = ni::DataCompressor::DecompressData(
      ni::DataCompressor::Type::GZIP, compressed, decompressed);
  ASSERT_TRUE((err == nullptr))
      << "Failed to decompress data: " << TRITONSERVER_ErrorMessage(err);

  size_t destination_byte_size = evbuffer_get_length(decompressed);
  ASSERT_EQ(destination_byte_size, raw_data_length_) << "Mismatched byte size";

  std::vector<char> res;
  EVBufferToContiguousBuffer(decompressed, &res);
  for (size_t idx = 0; idx < raw_data_length_; ++idx) {
    ASSERT_TRUE(raw_data_[idx] == res[idx]);
  }
}

TEST_F(DataCompressorTest, DeflateOneLargeBuffer)
{
  // Duplicate raw data 2^20 times
  {
    std::unique_ptr<char[]> extended_raw_data(
        new char[raw_data_length_ * (1 << 20)]);
    memcpy(extended_raw_data.get(), raw_data_.get(), raw_data_length_);
    size_t filled_size = raw_data_length_;
    for (size_t i = 1; i < 20; ++i) {
      memcpy(
          extended_raw_data.get() + filled_size, extended_raw_data.get(),
          filled_size);
      filled_size += filled_size;
    }
    raw_data_length_ = filled_size;
    raw_data_.swap(extended_raw_data);
  }
  // Convert the raw data into evbuffer format
  auto source = evbuffer_new();
  ASSERT_TRUE((source != nullptr)) << "Failed to create source evbuffer";
  ASSERT_EQ(evbuffer_add(source, raw_data_.get(), raw_data_length_), 0)
      << "Failed to initialize source evbuffer";

  auto compressed = evbuffer_new();
  ASSERT_TRUE((compressed != nullptr))
      << "Failed to create compressed evbuffer";
  ASSERT_GE(raw_data_length_ / 2, evbuffer_get_length(compressed))
      << "Compression should be desired for large data";

  auto decompressed = evbuffer_new();
  ASSERT_TRUE((decompressed != nullptr))
      << "Failed to create decompressed evbuffer";

  auto err = ni::DataCompressor::CompressData(
      ni::DataCompressor::Type::DEFLATE, source, compressed);
  ASSERT_TRUE((err == nullptr))
      << "Failed to compress data: " << TRITONSERVER_ErrorMessage(err);

  err = ni::DataCompressor::DecompressData(
      ni::DataCompressor::Type::DEFLATE, compressed, decompressed);
  ASSERT_TRUE((err == nullptr))
      << "Failed to decompress data: " << TRITONSERVER_ErrorMessage(err);
  size_t destination_byte_size = evbuffer_get_length(decompressed);
  ASSERT_EQ(destination_byte_size, raw_data_length_) << "Mismatched byte size";

  std::vector<char> res;
  EVBufferToContiguousBuffer(decompressed, &res);
  for (size_t idx = 0; idx < raw_data_length_; ++idx) {
    ASSERT_TRUE(raw_data_[idx] == res[idx]);
  }
}

TEST_F(DataCompressorTest, GzipOneLargeBuffer)
{
  // Duplicate raw data 2^20 times
  {
    std::unique_ptr<char[]> extended_raw_data(
        new char[raw_data_length_ * (1 << 20)]);
    memcpy(extended_raw_data.get(), raw_data_.get(), raw_data_length_);
    size_t filled_size = raw_data_length_;
    for (size_t i = 1; i < 20; ++i) {
      memcpy(
          extended_raw_data.get() + filled_size, extended_raw_data.get(),
          filled_size);
      filled_size += filled_size;
    }
    raw_data_length_ = filled_size;
    raw_data_.swap(extended_raw_data);
  }
  // Convert the raw data into evbuffer format
  auto source = evbuffer_new();
  ASSERT_TRUE((source != nullptr)) << "Failed to create source evbuffer";
  ASSERT_EQ(evbuffer_add(source, raw_data_.get(), raw_data_length_), 0)
      << "Failed to initialize source evbuffer";

  auto compressed = evbuffer_new();
  ASSERT_TRUE((compressed != nullptr))
      << "Failed to create compressed evbuffer";
  ASSERT_GE(raw_data_length_ / 2, evbuffer_get_length(compressed))
      << "Compression should be desired for large data";

  auto decompressed = evbuffer_new();
  ASSERT_TRUE((decompressed != nullptr))
      << "Failed to create decompressed evbuffer";

  auto err = ni::DataCompressor::CompressData(
      ni::DataCompressor::Type::GZIP, source, compressed);
  ASSERT_TRUE((err == nullptr))
      << "Failed to compress data: " << TRITONSERVER_ErrorMessage(err);
  err = ni::DataCompressor::DecompressData(
      ni::DataCompressor::Type::GZIP, compressed, decompressed);
  ASSERT_TRUE((err == nullptr))
      << "Failed to decompress data: " << TRITONSERVER_ErrorMessage(err);

  size_t destination_byte_size = evbuffer_get_length(decompressed);
  ASSERT_EQ(destination_byte_size, raw_data_length_) << "Mismatched byte size";

  std::vector<char> res;
  EVBufferToContiguousBuffer(decompressed, &res);
  for (size_t idx = 0; idx < raw_data_length_; ++idx) {
    ASSERT_TRUE(raw_data_[idx] == res[idx]);
  }
}

TEST_F(DataCompressorTest, DecompressDeflateBuffer)
{
  // Convert the compressed data into evbuffer format
  auto source = evbuffer_new();
  ASSERT_TRUE((source != nullptr)) << "Failed to create source evbuffer";
  ASSERT_EQ(
      evbuffer_add(
          source, deflate_compressed_data_.get(), deflate_compressed_length_),
      0)
      << "Failed to initialize source evbuffer";
  auto decompressed = evbuffer_new();
  ASSERT_TRUE((decompressed != nullptr))
      << "Failed to create decompressed evbuffer";

  auto err = ni::DataCompressor::DecompressData(
      ni::DataCompressor::Type::DEFLATE, source, decompressed);
  ASSERT_TRUE((err == nullptr))
      << "Failed to decompress data: " << TRITONSERVER_ErrorMessage(err);

  size_t destination_byte_size = evbuffer_get_length(decompressed);
  ASSERT_EQ(destination_byte_size, raw_data_length_) << "Mismatched byte size";

  std::vector<char> res;
  EVBufferToContiguousBuffer(decompressed, &res);
  for (size_t idx = 0; idx < raw_data_length_; ++idx) {
    ASSERT_TRUE(raw_data_[idx] == res[idx]);
  }
}

TEST_F(DataCompressorTest, DecompressGzipBuffer)
{
  // Convert the compressed data into evbuffer format
  auto source = evbuffer_new();
  ASSERT_TRUE((source != nullptr)) << "Failed to create source evbuffer";
  ASSERT_EQ(
      evbuffer_add(
          source, gzip_compressed_data_.get(), gzip_compressed_length_),
      0)
      << "Failed to initialize source evbuffer";
  auto decompressed = evbuffer_new();
  ASSERT_TRUE((decompressed != nullptr))
      << "Failed to create decompressed evbuffer";

  auto err = ni::DataCompressor::DecompressData(
      ni::DataCompressor::Type::GZIP, source, decompressed);
  ASSERT_TRUE((err == nullptr))
      << "Failed to decompress data: " << TRITONSERVER_ErrorMessage(err);

  size_t destination_byte_size = evbuffer_get_length(decompressed);
  ASSERT_EQ(destination_byte_size, raw_data_length_) << "Mismatched byte size";

  std::vector<char> res;
  EVBufferToContiguousBuffer(decompressed, &res);
  for (size_t idx = 0; idx < raw_data_length_; ++idx) {
    ASSERT_TRUE(raw_data_[idx] == res[idx]);
  }
}

TEST_F(DataCompressorTest, CompressDeflateBuffer)
{
  // Convert the raw data into evbuffer format
  auto source = evbuffer_new();
  ASSERT_TRUE((source != nullptr)) << "Failed to create source evbuffer";
  ASSERT_EQ(evbuffer_add(source, raw_data_.get(), raw_data_length_), 0)
      << "Failed to initialize source evbuffer";

  auto compressed = evbuffer_new();
  ASSERT_TRUE((compressed != nullptr))
      << "Failed to create compressed evbuffer";

  auto err = ni::DataCompressor::CompressData(
      ni::DataCompressor::Type::DEFLATE, source, compressed);
  ASSERT_TRUE((err == nullptr))
      << "Failed to compress data: " << TRITONSERVER_ErrorMessage(err);

  // Write compressed data to file which will be validated by other compression
  // tool
  WriteEVBufferToFile("generated_deflate_compressed_data", compressed);
}

TEST_F(DataCompressorTest, CompressGzipBuffer)
{
  // Convert the raw data into evbuffer format
  auto source = evbuffer_new();
  ASSERT_TRUE((source != nullptr)) << "Failed to create source evbuffer";
  ASSERT_EQ(evbuffer_add(source, raw_data_.get(), raw_data_length_), 0)
      << "Failed to initialize source evbuffer";

  auto compressed = evbuffer_new();
  ASSERT_TRUE((compressed != nullptr))
      << "Failed to create compressed evbuffer";

  auto err = ni::DataCompressor::CompressData(
      ni::DataCompressor::Type::GZIP, source, compressed);
  ASSERT_TRUE((err == nullptr))
      << "Failed to compress data: " << TRITONSERVER_ErrorMessage(err);

  // Write compressed data to file which will be validated by other compression
  // tool
  WriteEVBufferToFile("generated_gzip_compressed_data", compressed);
}

// Helper to compress data with GZIP and return the compressed evbuffer
evbuffer*
CompressWithGzip(const char* data, size_t size)
{
  auto source = evbuffer_new();
  evbuffer_add(source, data, size);
  auto compressed = evbuffer_new();
  ni::DataCompressor::CompressData(
      ni::DataCompressor::Type::GZIP, source, compressed);
  evbuffer_free(source);
  return compressed;
}

// Helper to compress data with DEFLATE and return the compressed evbuffer
evbuffer*
CompressWithDeflate(const char* data, size_t size)
{
  auto source = evbuffer_new();
  evbuffer_add(source, data, size);
  auto compressed = evbuffer_new();
  ni::DataCompressor::CompressData(
      ni::DataCompressor::Type::DEFLATE, source, compressed);
  evbuffer_free(source);
  return compressed;
}

// This test verifies that the max_decompressed_size parameter correctly
// limits the memory allocation during GZIP decompression.
TEST_F(DataCompressorTest, DecompressionSizeLimitGzip)
{
  // Create test data buffers of different sizes
  std::unique_ptr<char[]> under_data(new char[UNDER_LIMIT_DATA_SIZE]);
  std::unique_ptr<char[]> at_data(new char[DEFAULT_MAX_INPUT_SIZE]);
  std::unique_ptr<char[]> over_data(new char[OVER_LIMIT_DATA_SIZE]);
  memset(under_data.get(), 'A', UNDER_LIMIT_DATA_SIZE);
  memset(at_data.get(), 'B', DEFAULT_MAX_INPUT_SIZE);
  memset(over_data.get(), 'C', OVER_LIMIT_DATA_SIZE);

  // Compress each data set
  auto under_compressed =
      CompressWithGzip(under_data.get(), UNDER_LIMIT_DATA_SIZE);
  auto at_compressed = CompressWithGzip(at_data.get(), DEFAULT_MAX_INPUT_SIZE);
  auto over_compressed =
      CompressWithGzip(over_data.get(), OVER_LIMIT_DATA_SIZE);

  // Test 1: 63 MB data with 64 MB limit - should succeed
  {
    auto decompressed = evbuffer_new();
    auto err = ni::DataCompressor::DecompressData(
        ni::DataCompressor::Type::GZIP, under_compressed, decompressed,
        DEFAULT_MAX_INPUT_SIZE);
    ASSERT_TRUE((err == nullptr))
        << "63 MB data should decompress within 64 MB limit: "
        << TRITONSERVER_ErrorMessage(err);
    ASSERT_EQ(evbuffer_get_length(decompressed), UNDER_LIMIT_DATA_SIZE);
    evbuffer_free(decompressed);
  }

  // Test 2: 64 MB data with 64 MB limit - should succeed (exact boundary)
  {
    auto decompressed = evbuffer_new();
    auto err = ni::DataCompressor::DecompressData(
        ni::DataCompressor::Type::GZIP, at_compressed, decompressed,
        DEFAULT_MAX_INPUT_SIZE);
    ASSERT_TRUE((err == nullptr))
        << "64 MB data should decompress at exact 64 MB limit: "
        << TRITONSERVER_ErrorMessage(err);
    ASSERT_EQ(evbuffer_get_length(decompressed), DEFAULT_MAX_INPUT_SIZE);
    evbuffer_free(decompressed);
  }

  // Test 3: 65 MB data with 64 MB limit - should fail
  {
    auto decompressed = evbuffer_new();
    auto err = ni::DataCompressor::DecompressData(
        ni::DataCompressor::Type::GZIP, over_compressed, decompressed,
        DEFAULT_MAX_INPUT_SIZE);
    ASSERT_TRUE((err != nullptr)) << "65 MB data should fail with 64 MB limit";
    ASSERT_EQ(TRITONSERVER_ErrorCode(err), TRITONSERVER_ERROR_INVALID_ARG);
    std::string error_msg = TRITONSERVER_ErrorMessage(err);
    ASSERT_TRUE(
        error_msg.find("exceeds the maximum allowed") != std::string::npos)
        << "Error message should mention size limit: " << error_msg;
    evbuffer_free(decompressed);
  }

  // Test 4: 65 MB data with no limit - should succeed
  {
    auto decompressed = evbuffer_new();
    auto err = ni::DataCompressor::DecompressData(
        ni::DataCompressor::Type::GZIP, over_compressed, decompressed, 0);
    ASSERT_TRUE((err == nullptr))
        << "65 MB data should decompress with no limit: "
        << TRITONSERVER_ErrorMessage(err);
    ASSERT_EQ(evbuffer_get_length(decompressed), OVER_LIMIT_DATA_SIZE);
    evbuffer_free(decompressed);
  }

  evbuffer_free(under_compressed);
  evbuffer_free(at_compressed);
  evbuffer_free(over_compressed);
}

// This test verifies that the max_decompressed_size parameter correctly
// limits the memory allocation during DEFLATE decompression.
TEST_F(DataCompressorTest, DecompressionSizeLimitDeflate)
{
  // Create test data buffers of different sizes
  std::unique_ptr<char[]> under_data(new char[UNDER_LIMIT_DATA_SIZE]);
  std::unique_ptr<char[]> at_data(new char[DEFAULT_MAX_INPUT_SIZE]);
  std::unique_ptr<char[]> over_data(new char[OVER_LIMIT_DATA_SIZE]);
  memset(under_data.get(), 'A', UNDER_LIMIT_DATA_SIZE);
  memset(at_data.get(), 'B', DEFAULT_MAX_INPUT_SIZE);
  memset(over_data.get(), 'C', OVER_LIMIT_DATA_SIZE);

  // Compress each data set
  auto under_compressed =
      CompressWithDeflate(under_data.get(), UNDER_LIMIT_DATA_SIZE);
  auto at_compressed =
      CompressWithDeflate(at_data.get(), DEFAULT_MAX_INPUT_SIZE);
  auto over_compressed =
      CompressWithDeflate(over_data.get(), OVER_LIMIT_DATA_SIZE);

  // Test 1: 63 MB data with 64 MB limit - should succeed
  {
    auto decompressed = evbuffer_new();
    auto err = ni::DataCompressor::DecompressData(
        ni::DataCompressor::Type::DEFLATE, under_compressed, decompressed,
        DEFAULT_MAX_INPUT_SIZE);
    ASSERT_TRUE((err == nullptr))
        << "63 MB data should decompress within 64 MB limit: "
        << TRITONSERVER_ErrorMessage(err);
    ASSERT_EQ(evbuffer_get_length(decompressed), UNDER_LIMIT_DATA_SIZE);
    evbuffer_free(decompressed);
  }

  // Test 2: 64 MB data with 64 MB limit - should succeed (exact boundary)
  {
    auto decompressed = evbuffer_new();
    auto err = ni::DataCompressor::DecompressData(
        ni::DataCompressor::Type::DEFLATE, at_compressed, decompressed,
        DEFAULT_MAX_INPUT_SIZE);
    ASSERT_TRUE((err == nullptr))
        << "64 MB data should decompress at exact 64 MB limit: "
        << TRITONSERVER_ErrorMessage(err);
    ASSERT_EQ(evbuffer_get_length(decompressed), DEFAULT_MAX_INPUT_SIZE);
    evbuffer_free(decompressed);
  }

  // Test 3: 65 MB data with 64 MB limit - should fail
  {
    auto decompressed = evbuffer_new();
    auto err = ni::DataCompressor::DecompressData(
        ni::DataCompressor::Type::DEFLATE, over_compressed, decompressed,
        DEFAULT_MAX_INPUT_SIZE);
    ASSERT_TRUE((err != nullptr)) << "65 MB data should fail with 64 MB limit";
    ASSERT_EQ(TRITONSERVER_ErrorCode(err), TRITONSERVER_ERROR_INVALID_ARG);
    std::string error_msg = TRITONSERVER_ErrorMessage(err);
    ASSERT_TRUE(
        error_msg.find("exceeds the maximum allowed") != std::string::npos)
        << "Error message should mention size limit: " << error_msg;
    evbuffer_free(decompressed);
  }

  // Test 4: 65 MB data with no limit - should succeed
  {
    auto decompressed = evbuffer_new();
    auto err = ni::DataCompressor::DecompressData(
        ni::DataCompressor::Type::DEFLATE, over_compressed, decompressed, 0);
    ASSERT_TRUE((err == nullptr))
        << "65 MB data should decompress with no limit: "
        << TRITONSERVER_ErrorMessage(err);
    ASSERT_EQ(evbuffer_get_length(decompressed), OVER_LIMIT_DATA_SIZE);
    evbuffer_free(decompressed);
  }

  evbuffer_free(under_compressed);
  evbuffer_free(at_compressed);
  evbuffer_free(over_compressed);
}

}  // namespace

int
main(int argc, char** argv)
{
  ::testing::InitGoogleTest(&argc, argv);
  return RUN_ALL_TESTS();
}


================================================
FILE: src/test/distributed_addsub/CMakeLists.txt
================================================
# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

cmake_minimum_required(VERSION 3.31.8)

project(tritondistributedaddsubbackend LANGUAGES C CXX)

#
# libtriton_distributed_addsub.so
# Shared library implementing the Triton Distributed Addsub Backend API
#
configure_file(src/libtriton_distributed_addsub.ldscript libtriton_distributed_addsub.ldscript COPYONLY)

add_library(
  triton-distributed-addsub-backend SHARED
  src/distributed_addsub.cc
)

add_library(
  TritonDistributedAddsubBackend::triton-distributed-addsub-backend ALIAS triton-distributed-addsub-backend
)

target_compile_features(triton-distributed-addsub-backend PRIVATE cxx_std_11)
target_compile_options(
  triton-distributed-addsub-backend PRIVATE
  $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
    -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror>
)

target_link_libraries(
  triton-distributed-addsub-backend
  PRIVATE
    triton-backend-utils    # from repo-backend
    triton-core-serverapi   # from repo-core
    triton-core-backendapi  # from repo-core
    triton-core-serverstub  # from repo-core
)

set_target_properties(
  triton-distributed-addsub-backend PROPERTIES
  POSITION_INDEPENDENT_CODE ON
  OUTPUT_NAME triton_distributed_addsub
  LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_distributed_addsub.ldscript
  LINK_FLAGS "-Wl,--version-script libtriton_distributed_addsub.ldscript"
)

#
# Install
#
include(GNUInstallDirs)
set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TritonDistributedAddsubBackend)

install(
  TARGETS
    triton-distributed-addsub-backend
  EXPORT
    triton-distributed-addsub-backend-targets
  LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/distributed_addsub
  ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/distributed_addsub
)

install(
  EXPORT
    triton-distributed-addsub-backend-targets
  FILE
    TritonDistributedAddsubBackendTargets.cmake
  NAMESPACE
    TritonDistributedAddsubBackend::
  DESTINATION
    ${INSTALL_CONFIGDIR}
)

include(CMakePackageConfigHelpers)
configure_package_config_file(
  ${CMAKE_CURRENT_LIST_DIR}/cmake/TritonDistributedAddsubBackendConfig.cmake.in
  ${CMAKE_CURRENT_BINARY_DIR}/TritonDistributedAddsubBackendConfig.cmake
  INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
)

install(
  FILES
  ${CMAKE_CURRENT_BINARY_DIR}/TritonDistributedAddsubBackendConfig.cmake
  DESTINATION ${INSTALL_CONFIGDIR}
)

#
# Export from build tree
#
export(
  EXPORT triton-distributed-addsub-backend-targets
  FILE ${CMAKE_CURRENT_BINARY_DIR}/TritonDistributedAddsubBackendTargets.cmake
  NAMESPACE TritonDistributedAddsubBackend::
)

export(PACKAGE TritonDistributedAddsubBackend)


================================================
FILE: src/test/distributed_addsub/cmake/TritonDistributedAddsubBackendConfig.cmake.in
================================================
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

include(CMakeFindDependencyMacro)

get_filename_component(
  TRITONDISTRIBUTEDADDSUBBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
)

list(APPEND CMAKE_MODULE_PATH ${TRITONDISTRIBUTEDADDSUBBACKEND_CMAKE_DIR})

if(NOT TARGET TritonDistributedAddsubBackend::triton-distributed-addsub-backend)
  include("${TRITONDISTRIBUTEDADDSUBBACKEND_CMAKE_DIR}/TritonDistributedAddsubBackendTargets.cmake")
endif()

set(TRITONDISTRIBUTEDADDSUBBACKEND_LIBRARIES TritonDistributedAddsubBackend::triton-distributed-addsub-backend)


================================================
FILE: src/test/distributed_addsub/src/distributed_addsub.cc
================================================
// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include <atomic>
#include <memory>
#include <thread>

#include "triton/backend/backend_common.h"
#include "triton/backend/backend_model.h"
#include "triton/backend/backend_model_instance.h"

namespace triton { namespace backend { namespace distributed_addsub {


// Addsub backend that distributes partial computation to different model
// instances and gather the results to form the response internally.
// This backend is designed in the way that CPU instance will perform add task
// and GPU instance will perform sub task, and only the CPU instances will
// accept inference request from Triton core. Note that GPU instance has
// different meaning in this backend.
//
// The backend supports models that take two input tensors, two variable-size
// INT32 [ -1 ] value inputs INPUT0 and INPUT1; and produces two output tensors:
// OUTPUT0 as the element-wise sum of INPUT0 and INPUT1, OUTPUT1 as
// the element-wise difference of INPUT0 and INPUT1
//

#define GUARDED_RESPOND_IF_ERROR(RESPONSES, IDX, X)                     \
  do {                                                                  \
    if ((RESPONSES)[IDX] != nullptr) {                                  \
      TRITONSERVER_Error* err__ = (X);                                  \
      if (err__ != nullptr) {                                           \
        LOG_IF_ERROR(                                                   \
            TRITONBACKEND_ResponseSend(                                 \
                (RESPONSES)[IDX], TRITONSERVER_RESPONSE_COMPLETE_FINAL, \
                err__),                                                 \
            "failed to send error response");                           \
        (RESPONSES)[IDX] = nullptr;                                     \
        TRITONSERVER_ErrorDelete(err__);                                \
      }                                                                 \
    }                                                                   \
  } while (false)

//
// ModelState
//
// State associated with a model that is using this backend. An object
// of this class is created and associated with each
// TRITONBACKEND_Model.
//
class ModelInstanceState;
class ModelState : public BackendModel {
 public:
  static TRITONSERVER_Error* Create(
      TRITONBACKEND_Model* triton_model, ModelState** state);
  virtual ~ModelState() = default;

  // Validate that model configuration is supported by this backend.
  TRITONSERVER_Error* ValidateModelConfig();

  // Keep track of the model instance that will only accept works distributed
  // from within the model (instance)
  void AddSubInstance(ModelInstanceState* instance) { instance_ = instance; }

  ModelInstanceState* SubInstance() { return instance_; }

  std::atomic<size_t> instance_counter_;

 private:
  ModelState(TRITONBACKEND_Model* triton_model)
      : BackendModel(triton_model), instance_counter_(0), instance_(nullptr)
  {
  }

  ModelInstanceState* instance_;
};

TRITONSERVER_Error*
ModelState::Create(TRITONBACKEND_Model* triton_model, ModelState** state)
{
  try {
    *state = new ModelState(triton_model);
  }
  catch (const BackendModelException& ex) {
    RETURN_ERROR_IF_TRUE(
        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
        std::string("unexpected nullptr in BackendModelException"));
    RETURN_IF_ERROR(ex.err_);
  }

  return nullptr;  // success
}

TRITONSERVER_Error*
ModelState::ValidateModelConfig()
{
  // We have the json DOM for the model configuration...
  common::TritonJson::WriteBuffer buffer;
  RETURN_IF_ERROR(model_config_.PrettyWrite(&buffer));
  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("model configuration:\n") + buffer.Contents()).c_str());

  common::TritonJson::Value inputs, outputs;
  RETURN_IF_ERROR(model_config_.MemberAsArray("input", &inputs));
  RETURN_IF_ERROR(model_config_.MemberAsArray("output", &outputs));

  // There must be one INT32 input called INPUT defined in the model
  // configuration and it must be a 1D vector (of any length).
  RETURN_ERROR_IF_FALSE(
      inputs.ArraySize() == 2, TRITONSERVER_ERROR_INVALID_ARG,
      std::string("model must have two inputs"));

  RETURN_ERROR_IF_FALSE(
      outputs.ArraySize() == 2, TRITONSERVER_ERROR_INVALID_ARG,
      std::string("model must have two outputs"));

  int64_t dim_value = 0;
  for (size_t idx = 0; idx < 2; ++idx) {
    common::TritonJson::Value input;
    RETURN_IF_ERROR(inputs.IndexAsObject(idx, &input));

    std::vector<int64_t> input_shape;
    RETURN_IF_ERROR(backend::ParseShape(input, "dims", &input_shape));

    RETURN_ERROR_IF_FALSE(
        input_shape.size() == 1, TRITONSERVER_ERROR_INVALID_ARG,
        std::string("model must have input of one-dimensional shape"));
    if (idx == 0) {
      dim_value = input_shape[0];
    } else {
      RETURN_ERROR_IF_FALSE(
          dim_value == input_shape[0], TRITONSERVER_ERROR_INVALID_ARG,
          std::string("model must have consistent shape for all tensors"));
    }

    std::string input_dtype;
    RETURN_IF_ERROR(input.MemberAsString("data_type", &input_dtype));

    RETURN_ERROR_IF_FALSE(
        input_dtype == "TYPE_INT32", TRITONSERVER_ERROR_INVALID_ARG,
        std::string("model input must have TYPE_INT32 data-type"));

    const char* input_name;
    size_t input_name_len;
    RETURN_IF_ERROR(input.MemberAsString("name", &input_name, &input_name_len));

    auto expected_name = (std::string("INPUT") + std::to_string(idx));
    RETURN_ERROR_IF_FALSE(
        expected_name == input_name, TRITONSERVER_ERROR_INVALID_ARG,
        std::string("model input must be named '") + expected_name +
            "' at index " + std::to_string(idx));

    common::TritonJson::Value output;
    RETURN_IF_ERROR(outputs.IndexAsObject(idx, &output));

    std::vector<int64_t> output_shape;
    RETURN_IF_ERROR(backend::ParseShape(output, "dims", &output_shape));

    RETURN_ERROR_IF_FALSE(
        (output_shape.size() == 1) && (output_shape[0] == input_shape[0]),
        TRITONSERVER_ERROR_INVALID_ARG,
        std::string("model must have consistent shape for all tensors"));

    std::string output_dtype;
    RETURN_IF_ERROR(output.MemberAsString("data_type", &output_dtype));

    RETURN_ERROR_IF_FALSE(
        output_dtype == "TYPE_INT32", TRITONSERVER_ERROR_INVALID_ARG,
        std::string("model output must have TYPE_INT32 data-type"));

    const char* output_name;
    size_t output_name_len;
    RETURN_IF_ERROR(
        output.MemberAsString("name", &output_name, &output_name_len));

    expected_name = (std::string("OUTPUT") + std::to_string(idx));
    RETURN_ERROR_IF_FALSE(
        expected_name == output_name, TRITONSERVER_ERROR_INVALID_ARG,
        std::string("model output must be named '") + expected_name +
            "' at index " + std::to_string(idx));
  }

  return nullptr;  // success
}

//
// ModelInstanceState
//
// State associated with a model instance. An object of this class is
// created and associated with each TRITONBACKEND_ModelInstance.
//
class ModelInstanceState : public BackendModelInstance {
 public:
  static TRITONSERVER_Error* Create(
      ModelState* model_state,
      TRITONBACKEND_ModelInstance* triton_model_instance,
      ModelInstanceState** state);
  virtual ~ModelInstanceState() = default;

  // Get the state of the model that corresponds to this instance.
  ModelState* StateForModel() const { return model_state_; }
  bool IsPassive() const { return passive_; }

  TRITONSERVER_Error* Add(
      const size_t element_count, const int32_t* input_0,
      const int32_t* input_1, int32_t* output);
  TRITONSERVER_Error* Sub(
      const size_t element_count, const int32_t* input_0,
      const int32_t* input_1, int32_t* output);

 private:
  ModelInstanceState(
      ModelState* model_state,
      TRITONBACKEND_ModelInstance* triton_model_instance);

  ModelState* model_state_;
  bool passive_;
};

ModelInstanceState::ModelInstanceState(
    ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance)
    : BackendModelInstance(model_state, triton_model_instance),
      model_state_(model_state)
{
  // Check if the setup is correct
  THROW_IF_BACKEND_INSTANCE_ERROR(
      TRITONBACKEND_ModelInstanceIsPassive(triton_model_instance, &passive_));
  switch (kind_) {
    case TRITONSERVER_INSTANCEGROUPKIND_CPU: {
      if (passive_) {
        throw BackendModelInstanceException(TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            std::string("CPU instance should not be passive").c_str()));
      }
      break;
    }
    case TRITONSERVER_INSTANCEGROUPKIND_GPU:
      if (!passive_) {
        throw BackendModelInstanceException(TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            std::string("GPU instance should be passive").c_str()));
      }
      break;
    default:
      throw BackendModelInstanceException(TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          std::string("instance kind must be CPU or GPU").c_str()));
      break;
  }
}

TRITONSERVER_Error*
ModelInstanceState::Create(
    ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance,
    ModelInstanceState** state)
{
  try {
    *state = new ModelInstanceState(model_state, triton_model_instance);
  }
  catch (const BackendModelInstanceException& ex) {
    RETURN_ERROR_IF_TRUE(
        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
        std::string("unexpected nullptr in BackendModelInstanceException"));
    RETURN_IF_ERROR(ex.err_);
  }

  return nullptr;  // success
}

TRITONSERVER_Error*
ModelInstanceState::Add(
    const size_t element_count, const int32_t* input_0, const int32_t* input_1,
    int32_t* output)
{
  if (kind_ != TRITONSERVER_INSTANCEGROUPKIND_CPU) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INTERNAL,
        std::string("Add operation must be done by CPU instance").c_str());
  }

  for (size_t i = 0; i < element_count; ++i) {
    output[i] = input_0[i] + input_1[i];
  }

  return nullptr;  // success
}

TRITONSERVER_Error*
ModelInstanceState::Sub(
    const size_t element_count, const int32_t* input_0, const int32_t* input_1,
    int32_t* output)
{
  if (kind_ != TRITONSERVER_INSTANCEGROUPKIND_GPU) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INTERNAL,
        std::string("Sub operation must be done by GPU instance").c_str());
  }

  for (size_t i = 0; i < element_count; ++i) {
    output[i] = input_0[i] - input_1[i];
  }

  return nullptr;  // success
}

/////////////

extern "C" {

// Implementing TRITONBACKEND_Initialize is optional. The backend
// should initialize any global state that is intended to be shared
// across all models and model instances that use the backend.
TRITONSERVER_Error*
TRITONBACKEND_Initialize(TRITONBACKEND_Backend* backend)
{
  const char* cname;
  RETURN_IF_ERROR(TRITONBACKEND_BackendName(backend, &cname));
  std::string name(cname);

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("TRITONBACKEND_Initialize: ") + name).c_str());

  // We should check the backend API version that Triton supports
  // vs. what this backend was compiled against.
  uint32_t api_version_major, api_version_minor;
  RETURN_IF_ERROR(
      TRITONBACKEND_ApiVersion(&api_version_major, &api_version_minor));

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("Triton TRITONBACKEND API version: ") +
       std::to_string(api_version_major) + "." +
       std::to_string(api_version_minor))
          .c_str());
  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("'") + name + "' TRITONBACKEND API version: " +
       std::to_string(TRITONBACKEND_API_VERSION_MAJOR) + "." +
       std::to_string(TRITONBACKEND_API_VERSION_MINOR))
          .c_str());

  if ((api_version_major != TRITONBACKEND_API_VERSION_MAJOR) ||
      (api_version_minor < TRITONBACKEND_API_VERSION_MINOR)) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_UNSUPPORTED,
        "triton backend API version does not support this backend");
  }

  // The backend configuration may contain information needed by the
  // backend, such a command-line arguments. This backend doesn't use
  // any such configuration but we print whatever is available.
  TRITONSERVER_Message* backend_config_message;
  RETURN_IF_ERROR(
      TRITONBACKEND_BackendConfig(backend, &backend_config_message));

  const char* buffer;
  size_t byte_size;
  RETURN_IF_ERROR(TRITONSERVER_MessageSerializeToJson(
      backend_config_message, &buffer, &byte_size));
  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("backend configuration:\n") + buffer).c_str());

  return nullptr;  // success
}

// Implementing TRITONBACKEND_ModelInitialize is optional. The backend
// should initialize any state that is intended to be shared across
// all instances of the model.
TRITONSERVER_Error*
TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model)
{
  const char* cname;
  RETURN_IF_ERROR(TRITONBACKEND_ModelName(model, &cname));
  std::string name(cname);

  uint64_t version;
  RETURN_IF_ERROR(TRITONBACKEND_ModelVersion(model, &version));

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("TRITONBACKEND_ModelInitialize: ") + name + " (version " +
       std::to_string(version) + ")")
          .c_str());

  // With each model we create a ModelState object and associate it
  // with the TRITONBACKEND_Model.
  ModelState* model_state;
  RETURN_IF_ERROR(ModelState::Create(model, &model_state));
  RETURN_IF_ERROR(
      TRITONBACKEND_ModelSetState(model, reinterpret_cast<void*>(model_state)));

  // One of the primary things to do in ModelInitialize is to examine
  // the model configuration to ensure that it is something that this
  // backend can support. If not, returning an error from this
  // function will prevent the model from loading.
  RETURN_IF_ERROR(model_state->ValidateModelConfig());

  return nullptr;  // success
}

// Implementing TRITONBACKEND_ModelFinalize is optional unless state
// is set using TRITONBACKEND_ModelSetState. The backend must free
// this state and perform any other cleanup.
TRITONSERVER_Error*
TRITONBACKEND_ModelFinalize(TRITONBACKEND_Model* model)
{
  void* vstate;
  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vstate));
  ModelState* model_state = reinterpret_cast<ModelState*>(vstate);
  TRITONSERVER_Error* err = nullptr;
  if (model_state->instance_counter_ != 0) {
    err = TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INTERNAL,
        "Unexpected unfinalized model instance(s)");
  }

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO, "TRITONBACKEND_ModelFinalize: delete model state");

  delete model_state;

  return err;
}

// Implementing TRITONBACKEND_ModelInstanceInitialize is optional. The
// backend should initialize any state that is required for a model
// instance.
TRITONSERVER_Error*
TRITONBACKEND_ModelInstanceInitialize(TRITONBACKEND_ModelInstance* instance)
{
  const char* cname;
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceName(instance, &cname));
  std::string name(cname);

  int32_t device_id;
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceDeviceId(instance, &device_id));
  TRITONSERVER_InstanceGroupKind kind;
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceKind(instance, &kind));

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("TRITONBACKEND_ModelInstanceInitialize: ") + name + " (" +
       TRITONSERVER_InstanceGroupKindString(kind) + " device " +
       std::to_string(device_id) + ")")
          .c_str());

  // The instance can access the corresponding model as well... here
  // we get the model and from that get the model's state.
  TRITONBACKEND_Model* model;
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceModel(instance, &model));

  void* vmodelstate;
  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vmodelstate));
  ModelState* model_state = reinterpret_cast<ModelState*>(vmodelstate);

  // With each instance we create a ModelInstanceState object and
  // associate it with the TRITONBACKEND_ModelInstance.
  ModelInstanceState* instance_state;
  RETURN_IF_ERROR(
      ModelInstanceState::Create(model_state, instance, &instance_state));
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceSetState(
      instance, reinterpret_cast<void*>(instance_state)));
  if (instance_state->IsPassive()) {
    model_state->AddSubInstance(instance_state);
  }

  return nullptr;  // success
}

// Implementing TRITONBACKEND_ModelInstanceFinalize is optional unless
// state is set using TRITONBACKEND_ModelInstanceSetState. The backend
// must free this state and perform any other cleanup.
TRITONSERVER_Error*
TRITONBACKEND_ModelInstanceFinalize(TRITONBACKEND_ModelInstance* instance)
{
  void* vstate;
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(instance, &vstate));
  ModelInstanceState* instance_state =
      reinterpret_cast<ModelInstanceState*>(vstate);

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      "TRITONBACKEND_ModelInstanceFinalize: delete instance state");

  delete instance_state;

  return nullptr;  // success
}

// Implementing TRITONBACKEND_ModelInstanceExecute is required.
TRITONSERVER_Error*
TRITONBACKEND_ModelInstanceExecute(
    TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests,
    const uint32_t request_count)
{
  // Triton will not call this function simultaneously for the same
  // 'instance'. But since this backend could be used by multiple
  // instances from multiple models the implementation needs to handle
  // multiple calls to this function at the same time (with different
  // 'instance' objects). Suggested practice for this is to use only
  // function-local and model-instance-specific state (obtained from
  // 'instance'), which is what we do here.
  ModelInstanceState* instance_state;
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(
      instance, reinterpret_cast<void**>(&instance_state)));
  ModelState* model_state = instance_state->StateForModel();

  // This backend specifies BLOCKING execution policy. That means that
  // we should not return from this function until execution is complete. Triton
  // will automatically release 'instance' on return from this function so that
  // it is again available to be used for another call to
  // TRITONBACKEND_ModelInstanceExecute.

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("model ") + model_state->Name() + ", instance " +
       instance_state->Name() + " (" +
       TRITONSERVER_InstanceGroupKindString(instance_state->Kind()) +
       " device " + std::to_string(instance_state->DeviceId()) + ")" +
       ", executing " + std::to_string(request_count) + " requests")
          .c_str());


  if (instance_state->Kind() != TRITONSERVER_INSTANCEGROUPKIND_CPU) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INTERNAL,
        "Unexpected inference request sent to non-CPU instance");
  }

  auto sub_instance_state = model_state->SubInstance();

  // 'responses' is initialized with the response objects below and
  // if/when an error response is sent the corresponding entry in
  // 'responses' is set to nullptr to indicate that that response has
  // already been sent.
  std::vector<TRITONBACKEND_Response*> responses;
  responses.reserve(request_count);

  // Create a single response object for each request. If something
  // goes wrong when attempting to create the response objects just
  // fail all of the requests by returning an error.
  for (uint32_t r = 0; r < request_count; ++r) {
    TRITONBACKEND_Request* request = requests[r];

    TRITONBACKEND_Response* response;
    RETURN_IF_ERROR(TRITONBACKEND_ResponseNew(&response, request));
    responses.push_back(response);
  }

  // After this point we take ownership of 'requests', which means that a
  // response must be sent for every request. If something does go wrong in
  // processing a particular request then we send an error response just for the
  // specific request.

  // The way we collect these batch timestamps is not entirely accurate.
  // Normally, in a performant backend you would execute all the requests at the
  // same time, and so there would be a single compute-start / compute-end
  // time-range. But here we execute each request separately so there is no
  // single range. As a result we just show the entire execute time as being the
  // compute time as well.
  uint64_t batch_exec_start_ns = 0;
  SET_TIMESTAMP(batch_exec_start_ns);
  uint64_t batch_exec_end_ns = 0;
  uint64_t total_batch_size = 0;

  // For simplicity we just process each request separately... in
  // general a backend should try to operate on the entire batch of
  // requests at the same time for improved performance.
  std::vector<uint8_t> start_buffer, ready_buffer, input_buffer;
  for (uint32_t r = 0; r < request_count; ++r) {
    uint64_t exec_start_ns = 0;
    SET_TIMESTAMP(exec_start_ns);

    TRITONBACKEND_Request* request = requests[r];

    uint32_t input_count = 0;
    GUARDED_RESPOND_IF_ERROR(
        responses, r, TRITONBACKEND_RequestInputCount(request, &input_count));

    uint32_t requested_output_count = 0;
    GUARDED_RESPOND_IF_ERROR(
        responses, r,
        TRITONBACKEND_RequestOutputCount(request, &requested_output_count));

    // If an error response was sent for the above then display an error
    // message and move on to next request.
    if (responses[r] == nullptr) {
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR,
          (std::string("request ") + std::to_string(r) +
           ": failed to read request input/output counts, error response "
           "sent")
              .c_str());
      continue;
    }

    TRITONBACKEND_Input* input = nullptr;
    GUARDED_RESPOND_IF_ERROR(
        responses, r, TRITONBACKEND_RequestInput(request, "INPUT0", &input));
    if (responses[r] == nullptr) {
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR,
          (std::string("request ") + std::to_string(r) +
           ": failed to read input, error response sent")
              .c_str());
      continue;
    }

    TRITONSERVER_DataType input_datatype;
    const int64_t* input_shape;
    uint32_t input_dims_count;
    uint64_t input_byte_size;
    uint32_t input_buffer_count;
    GUARDED_RESPOND_IF_ERROR(
        responses, r,
        TRITONBACKEND_InputProperties(
            input, nullptr /* input_name */, &input_datatype, &input_shape,
            &input_dims_count, &input_byte_size, &input_buffer_count));
    if (responses[r] == nullptr) {
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR,
          (std::string("request ") + std::to_string(r) +
           ": failed to read input properties, error response sent")
              .c_str());
      continue;
    }
    if (input_dims_count > 1) {
      total_batch_size += input_shape[0];
    } else {
      ++total_batch_size;
    }

    std::vector<char> input_0(input_byte_size);
    std::vector<char> input_1(input_byte_size);
    uint64_t input_0_byte_size = input_byte_size;
    uint64_t input_1_byte_size = input_byte_size;
    GUARDED_RESPOND_IF_ERROR(
        responses, r,
        ReadInputTensor(
            request, "INPUT0", input_0.data(),
            reinterpret_cast<size_t*>(&input_0_byte_size)));
    GUARDED_RESPOND_IF_ERROR(
        responses, r,
        ReadInputTensor(
            request, "INPUT1", input_1.data(),
            reinterpret_cast<size_t*>(&input_1_byte_size)));
    if (responses[r] == nullptr) {
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR,
          (std::string("request ") + std::to_string(r) +
           ": failed to get input buffer in CPU memory, error "
           "response sent")
              .c_str());
      continue;
    }

    // Compute... Get GPU instance from model state and let it compute
    // the subtraction, while the CPU instance computes the addition.
    // In real world some parallelization should be used, but here just
    // serialize the "distributed" work.
    TRITONBACKEND_Response* response = responses[r];

    uint64_t compute_start_ns = 0;
    SET_TIMESTAMP(compute_start_ns);
    for (size_t out_idx = 0; out_idx < requested_output_count; ++out_idx) {
      const char* output_name;
      GUARDED_RESPOND_IF_ERROR(
          responses, r,
          TRITONBACKEND_RequestOutputName(request, out_idx, &output_name));

      TRITONBACKEND_Output* output;
      GUARDED_RESPOND_IF_ERROR(
          responses, r,
          TRITONBACKEND_ResponseOutput(
              response, &output, output_name, input_datatype, input_shape,
              input_dims_count));
      if (responses[r] == nullptr) {
        LOG_MESSAGE(
            TRITONSERVER_LOG_ERROR,
            (std::string("request ") + std::to_string(r) +
             ": failed to create response output, error response sent")
                .c_str());
        break;
      }

      // Get the output buffer. We request a buffer in CPU memory but we have
      // to handle any returned type. If we get back a buffer in GPU memory we
      // just fail the request.
      void* output_buffer;
      TRITONSERVER_MemoryType output_memory_type = TRITONSERVER_MEMORY_CPU;
      int64_t output_memory_type_id = 0;
      GUARDED_RESPOND_IF_ERROR(
          responses, r,
          TRITONBACKEND_OutputBuffer(
              output, &output_buffer, input_byte_size, &output_memory_type,
              &output_memory_type_id));
      if (responses[r] == nullptr) {
        LOG_MESSAGE(
            TRITONSERVER_LOG_ERROR,
            (std::string("request ") + std::to_string(r) +
             ": failed to create output buffer in CPU memory, error "
             "response sent")
                .c_str());
        break;
      }
      if (output_memory_type == TRITONSERVER_MEMORY_GPU) {
        GUARDED_RESPOND_IF_ERROR(
            responses, r,
            TRITONSERVER_ErrorNew(
                TRITONSERVER_ERROR_UNSUPPORTED,
                "failed to create output buffer in CPU memory"));
        break;
      }

      static std::string output_0_name("OUTPUT0");
      if (output_0_name == output_name) {
        instance_state->Add(
            (input_byte_size / sizeof(int32_t)),
            reinterpret_cast<int32_t*>(input_0.data()),
            reinterpret_cast<int32_t*>(input_1.data()),
            reinterpret_cast<int32_t*>(output_buffer));
      } else {
        sub_instance_state->Sub(
            (input_byte_size / sizeof(int32_t)),
            reinterpret_cast<int32_t*>(input_0.data()),
            reinterpret_cast<int32_t*>(input_1.data()),
            reinterpret_cast<int32_t*>(output_buffer));
      }
    }
    uint64_t compute_end_ns = 0;
    SET_TIMESTAMP(compute_end_ns);

    uint64_t exec_end_ns = 0;
    SET_TIMESTAMP(exec_end_ns);
    batch_exec_end_ns = exec_end_ns;

    // Send all the responses that haven't already been sent because of an
    // earlier error.
    if (responses[r] != nullptr) {
      LOG_IF_ERROR(
          TRITONBACKEND_ResponseSend(
              responses[r], TRITONSERVER_RESPONSE_COMPLETE_FINAL,
              nullptr /* success */),
          "failed sending response");
    }

    // Report statistics for each request.
    LOG_IF_ERROR(
        TRITONBACKEND_ModelInstanceReportStatistics(
            instance_state->TritonModelInstance(), request,
            (responses[r] != nullptr) /* success */, exec_start_ns,
            compute_start_ns, compute_end_ns, exec_end_ns),
        "failed reporting request statistics");

    LOG_IF_ERROR(
        TRITONBACKEND_RequestRelease(request, TRITONSERVER_REQUEST_RELEASE_ALL),
        "failed releasing request");
  }

  // Report the entire batch statistics.
  LOG_IF_ERROR(
      TRITONBACKEND_ModelInstanceReportBatchStatistics(
          instance_state->TritonModelInstance(), total_batch_size,
          batch_exec_start_ns, batch_exec_start_ns, batch_exec_end_ns,
          batch_exec_end_ns),
      "failed reporting batch request statistics");

  return nullptr;  // success
}

}  // extern "C"

}}}  // namespace triton::backend::distributed_addsub


================================================
FILE: src/test/distributed_addsub/src/libtriton_distributed_addsub.ldscript
================================================
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
{
  global:
    TRITONBACKEND_*;
  local: *;
};


================================================
FILE: src/test/dyna_sequence/CMakeLists.txt
================================================
# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

cmake_minimum_required(VERSION 3.31.8)

project(tritondynasequencebackend LANGUAGES C CXX)

#
# libtriton_dyna_sequence.so
# Shared library implementing the Triton Sequence Backend API
#
configure_file(src/libtriton_dyna_sequence.ldscript libtriton_dyna_sequence.ldscript COPYONLY)

add_library(
  triton-dyna-sequence-backend SHARED
  src/dyna_sequence.cc
)

add_library(
  TritonDynaSequenceBackend::triton-dyna-sequence-backend ALIAS triton-dyna-sequence-backend
)

target_compile_features(triton-dyna-sequence-backend PRIVATE cxx_std_11)
target_compile_options(
  triton-dyna-sequence-backend PRIVATE
  $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
    -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror>
)

target_link_libraries(
  triton-dyna-sequence-backend
  PRIVATE
    triton-backend-utils    # from repo-backend
    triton-core-serverapi   # from repo-core
    triton-core-backendapi  # from repo-core
    triton-core-serverstub  # from repo-core
)

set_target_properties(
  triton-dyna-sequence-backend PROPERTIES
  POSITION_INDEPENDENT_CODE ON
  OUTPUT_NAME triton_dyna_sequence
  LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_dyna_sequence.ldscript
  LINK_FLAGS "-Wl,--version-script libtriton_dyna_sequence.ldscript"
)

#
# Install
#
include(GNUInstallDirs)
set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TritonDynaSequenceBackend)

install(
  TARGETS
    triton-dyna-sequence-backend
  EXPORT
    triton-dyna-sequence-backend-targets
  LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/dyna_sequence
  ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/dyna_sequence
)

install(
  EXPORT
    triton-dyna-sequence-backend-targets
  FILE
    TritonDynaSequenceBackendTargets.cmake
  NAMESPACE
    TritonDynaSequenceBackend::
  DESTINATION
    ${INSTALL_CONFIGDIR}
)

include(CMakePackageConfigHelpers)
configure_package_config_file(
  ${CMAKE_CURRENT_LIST_DIR}/cmake/TritonDynaSequenceBackendConfig.cmake.in
  ${CMAKE_CURRENT_BINARY_DIR}/TritonDynaSequenceBackendConfig.cmake
  INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
)

install(
  FILES
  ${CMAKE_CURRENT_BINARY_DIR}/TritonDynaSequenceBackendConfig.cmake
  DESTINATION ${INSTALL_CONFIGDIR}
)

#
# Export from build tree
#
export(
  EXPORT triton-dyna-sequence-backend-targets
  FILE ${CMAKE_CURRENT_BINARY_DIR}/TritonDynaSequenceBackendTargets.cmake
  NAMESPACE TritonDynaSequenceBackend::
)

export(PACKAGE TritonDynaSequenceBackend)


================================================
FILE: src/test/dyna_sequence/cmake/TritonDynaSequenceBackendConfig.cmake.in
================================================
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

include(CMakeFindDependencyMacro)

get_filename_component(
  TRITONSEQUENCEBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
)

list(APPEND CMAKE_MODULE_PATH ${TRITONSEQUENCEBACKEND_CMAKE_DIR})

if(NOT TARGET TritonDynaSequenceBackend::triton-sequence-backend)
  include("${TRITONSEQUENCEBACKEND_CMAKE_DIR}/TritonDynaSequenceBackendTargets.cmake")
endif()

set(TRITONSEQUENCEBACKEND_LIBRARIES TritonDynaSequenceBackend::triton-sequence-backend)

================================================
FILE: src/test/dyna_sequence/src/dyna_sequence.cc
================================================
// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include <algorithm>
#include <memory>
#include <thread>

#include "triton/backend/backend_common.h"
#include "triton/backend/backend_model.h"
#include "triton/backend/backend_model_instance.h"

namespace triton { namespace backend { namespace dyna_sequence {


// Simple dynamic sequence backend that demonstrates the TRITONBACKEND API for a
// blocking backend. A blocking backend completes execution of the
// inference before returning from TRITONBACKEND_ModelInstanceExecute.
//
// The backend supports models that take 5 input tensors, three INT32 [ 1 ]
// control values, one UINT64 [ 1 ] correlation ID control, and one
// variable-size INT32 [ -1 ] value input; and produces an output
// tensor with the same shape as the input tensor. The input tensors
// must be named "START", "END", "READY", "CORRID" and "INPUT". The
// output tensor must be named "OUTPUT".
//
// The model maintains an INT32 accumulator for each sequence which
// is updated based on the control values in "START", "END", "READY"
// and "CORRID":
//
//   READY=0, START=x, END=x: Ignore value input, do not change
//   accumulator value.
//
//   READY=1, START=1, END=x: Start accumulating. Set accumulator
//   equal to sum of INPUT tensor elements.
//
//   READY=1, START=0, END=x: Add INPUT tensor elements to
//   accumulator.
//
// In addition to the above, when END=1 CORRID is added to the accumulator.
//
// When READY=1, the accumulator is returned in every element of the
// OUTPUT tensor.
//

#define GUARDED_RESPOND_IF_ERROR(RESPONSES, IDX, X)                     \
  do {                                                                  \
    if ((RESPONSES)[IDX] != nullptr) {                                  \
      TRITONSERVER_Error* err__ = (X);                                  \
      if (err__ != nullptr) {                                           \
        LOG_IF_ERROR(                                                   \
            TRITONBACKEND_ResponseSend(                                 \
                (RESPONSES)[IDX], TRITONSERVER_RESPONSE_COMPLETE_FINAL, \
                err__),                                                 \
            "failed to send error response");                           \
        (RESPONSES)[IDX] = nullptr;                                     \
        TRITONSERVER_ErrorDelete(err__);                                \
      }                                                                 \
    }                                                                   \
  } while (false)

//
// ModelState
//
// State associated with a model that is using this backend. An object
// of this class is created and associated with each
// TRITONBACKEND_Model.
//
class ModelState : public BackendModel {
 public:
  static TRITONSERVER_Error* Create(
      TRITONBACKEND_Model* triton_model, ModelState** state);
  virtual ~ModelState() = default;

  // Get accumulator size and execution delay
  size_t AccumulatorSize() const { return accumulator_size_; }
  int ExecDelay() const { return execute_delay_ms_; }
  const std::string& CorrelationIdType() const { return corrid_dtype_; }

  // Validate that model configuration is supported by this backend.
  TRITONSERVER_Error* ValidateModelConfig();

 private:
  ModelState(TRITONBACKEND_Model* triton_model);

  // Delay to introduce into execution, in milliseconds.
  int execute_delay_ms_;

  // Accumulator size
  size_t accumulator_size_;

  // Correlation id type
  std::string corrid_dtype_;
};

TRITONSERVER_Error*
ModelState::Create(TRITONBACKEND_Model* triton_model, ModelState** state)
{
  try {
    *state = new ModelState(triton_model);
  }
  catch (const BackendModelException& ex) {
    RETURN_ERROR_IF_TRUE(
        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
        std::string("unexpected nullptr in BackendModelException"));
    RETURN_IF_ERROR(ex.err_);
  }

  return nullptr;  // success
}

ModelState::ModelState(TRITONBACKEND_Model* triton_model)
    : BackendModel(triton_model), execute_delay_ms_(0), accumulator_size_(0),
      corrid_dtype_("TYPE_UINT64")
{
}

TRITONSERVER_Error*
ModelState::ValidateModelConfig()
{
  // We have the json DOM for the model configuration...
  common::TritonJson::WriteBuffer buffer;
  RETURN_IF_ERROR(model_config_.PrettyWrite(&buffer));
  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("model configuration:\n") + buffer.Contents()).c_str());

  triton::common::TritonJson::Value params;
  if (model_config_.Find("parameters", &params)) {
    common::TritonJson::Value exec_delay;
    if (params.Find("execute_delay_ms", &exec_delay)) {
      std::string exec_delay_str;
      RETURN_IF_ERROR(
          exec_delay.MemberAsString("string_value", &exec_delay_str));
      execute_delay_ms_ = std::stoi(exec_delay_str);
    }
  }

  int64_t max_batch_size = 0;
  RETURN_IF_ERROR(model_config_.MemberAsInt("max_batch_size", &max_batch_size));
  accumulator_size_ = (size_t)(std::max((int64_t)1, max_batch_size));

  // The model configuration must specify the sequence batcher and
  // must use the START, END, READY and CORRID input to indicate
  // control values.
  triton::common::TritonJson::Value sequence_batching;
  RETURN_IF_ERROR(
      model_config_.MemberAsObject("sequence_batching", &sequence_batching));
  common::TritonJson::Value control_inputs;
  RETURN_IF_ERROR(
      sequence_batching.MemberAsArray("control_input", &control_inputs));
  RETURN_ERROR_IF_FALSE(
      control_inputs.ArraySize() == 4, TRITONSERVER_ERROR_INVALID_ARG,
      std::string("'START', 'END, 'READY' and 'CORRID' must be configured as "
                  "the control inputs"));

  std::vector<std::string> control_input_names;
  for (size_t io_index = 0; io_index < control_inputs.ArraySize(); io_index++) {
    common::TritonJson::Value control_input;
    RETURN_IF_ERROR(control_inputs.IndexAsObject(io_index, &control_input));
    const char* input_name;
    size_t input_name_len;
    RETURN_IF_ERROR(
        control_input.MemberAsString("name", &input_name, &input_name_len));
    control_input_names.push_back(input_name);
  }

  RETURN_ERROR_IF_FALSE(
      (std::find(
           control_input_names.begin(), control_input_names.end(), "START") !=
       control_input_names.end()) ||
          (std::find(
               control_input_names.begin(), control_input_names.end(), "END") !=
           control_input_names.end()) ||
          (std::find(
               control_input_names.begin(), control_input_names.end(),
               "READY") != control_input_names.end()) ||
          (std::find(
               control_input_names.begin(), control_input_names.end(),
               "CORRID") != control_input_names.end()),
      TRITONSERVER_ERROR_INVALID_ARG,
      std::string("'START', 'END, 'READY' and 'CORRID' must be configured as "
                  "the control inputs"));

  // The CORRID input must be UINT64 type.
  auto itr = std::find(
      control_input_names.begin(), control_input_names.end(), "CORRID");
  size_t corrid_pos = std::distance(control_input_names.begin(), itr);
  triton::common::TritonJson::Value corrid_input;
  RETURN_IF_ERROR(control_inputs.IndexAsObject(corrid_pos, &corrid_input));
  triton::common::TritonJson::Value corrid_control;
  RETURN_IF_ERROR(corrid_input.MemberAsArray("control", &corrid_control));
  common::TritonJson::Value control_item;
  RETURN_IF_ERROR(corrid_control.IndexAsObject(0 /* index */, &control_item));
  std::string corrid_dtype;
  RETURN_IF_ERROR(control_item.MemberAsString("data_type", &corrid_dtype));

  RETURN_ERROR_IF_FALSE(
      ((corrid_dtype == "TYPE_UINT64") || (corrid_dtype == "TYPE_STRING")),
      TRITONSERVER_ERROR_INVALID_ARG,
      std::string("model CORRID control input must have TYPE_UINT64 "
                  "or TYPE_STRING data-type"));
  corrid_dtype_ = corrid_dtype;

  common::TritonJson::Value inputs, outputs;
  RETURN_IF_ERROR(model_config_.MemberAsArray("input", &inputs));
  RETURN_IF_ERROR(model_config_.MemberAsArray("output", &outputs));

  // There must be one INT32 input called INPUT defined in the model
  // configuration and it must be a 1D vector (of any length).
  RETURN_ERROR_IF_FALSE(
      inputs.ArraySize() == 1, TRITONSERVER_ERROR_INVALID_ARG,
      std::string(
          "model must have input 'INPUT' with vector shape, any length"));

  common::TritonJson::Value input;
  RETURN_IF_ERROR(inputs.IndexAsObject(0 /* index */, &input));

  std::vector<int64_t> input_shape;
  RETURN_IF_ERROR(backend::ParseShape(input, "dims", &input_shape));

  RETURN_ERROR_IF_FALSE(
      input_shape.size() == 1, TRITONSERVER_ERROR_INVALID_ARG,
      std::string(
          "model must have one input 'INPUT' with vector shape, any length"));

  std::string input_dtype;
  RETURN_IF_ERROR(input.MemberAsString("data_type", &input_dtype));

  RETURN_ERROR_IF_FALSE(
      input_dtype == "TYPE_INT32", TRITONSERVER_ERROR_INVALID_ARG,
      std::string("model input must have TYPE_INT32 data-type"));

  const char* input_name;
  size_t input_name_len;
  RETURN_IF_ERROR(input.MemberAsString("name", &input_name, &input_name_len));

  RETURN_ERROR_IF_FALSE(
      strcmp(input_name, "INPUT") == 0, TRITONSERVER_ERROR_INVALID_ARG,
      std::string("model input must be named 'INPUT'"));

  // There must be one INT32 output with shape that matches the
  // input. The output must be named OUTPUT.
  RETURN_ERROR_IF_FALSE(
      outputs.ArraySize() == 1, TRITONSERVER_ERROR_INVALID_ARG,
      std::string(
          "model must have one output 'OUTPUT' with vector shape, any length"));

  common::TritonJson::Value output;
  RETURN_IF_ERROR(outputs.IndexAsObject(0 /* index */, &output));

  std::vector<int64_t> output_shape;
  RETURN_IF_ERROR(backend::ParseShape(output, "dims", &output_shape));

  RETURN_ERROR_IF_FALSE(
      (output_shape.size() == 1) && (output_shape[0] == input_shape[0]),
      TRITONSERVER_ERROR_INVALID_ARG,
      std::string(
          "model must have output 'OUTPUT' with shape matching 'INPUT'"));

  std::string output_dtype;
  RETURN_IF_ERROR(output.MemberAsString("data_type", &output_dtype));

  RETURN_ERROR_IF_FALSE(
      output_dtype == "TYPE_INT32", TRITONSERVER_ERROR_INVALID_ARG,
      std::string("model output must have TYPE_INT32 data-type"));

  const char* output_name;
  size_t output_name_len;
  RETURN_IF_ERROR(
      output.MemberAsString("name", &output_name, &output_name_len));

  RETURN_ERROR_IF_FALSE(
      strcmp(output_name, "OUTPUT") == 0, TRITONSERVER_ERROR_INVALID_ARG,
      std::string("model output must be named 'OUTPUT'"));

  return nullptr;  // success
}

//
// ModelInstanceState
//
// State associated with a model instance. An object of this class is
// created and associated with each TRITONBACKEND_ModelInstance.
//
class ModelInstanceState : public BackendModelInstance {
 public:
  static TRITONSERVER_Error* Create(
      ModelState* model_state,
      TRITONBACKEND_ModelInstance* triton_model_instance,
      ModelInstanceState** state);
  virtual ~ModelInstanceState();

  // Get the state of the model that corresponds to this instance.
  ModelState* StateForModel() const { return model_state_; }

  // Modify/get accumulator values for this instance
  int32_t GetAccumulatorVal(uint64_t corrid);
  void SetAccumulatorVal(uint64_t corrid, int32_t value);
  void AddAccumulatorVal(uint64_t corrid, int32_t value);
  void EraseAccumulatorKey(uint64_t corrid);

 private:
  ModelInstanceState(
      ModelState* model_state,
      TRITONBACKEND_ModelInstance* triton_model_instance);

  ModelState* model_state_;

  // Accumulators maintained by this context, as a map from
  // correlation ID to the accumulator.
  std::unordered_map<uint64_t, int32_t> accumulator_;
};

TRITONSERVER_Error*
ModelInstanceState::Create(
    ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance,
    ModelInstanceState** state)
{
  try {
    *state = new ModelInstanceState(model_state, triton_model_instance);
  }
  catch (const BackendModelInstanceException& ex) {
    RETURN_ERROR_IF_TRUE(
        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
        std::string("unexpected nullptr in BackendModelInstanceException"));
    RETURN_IF_ERROR(ex.err_);
  }

  return nullptr;  // success
}

ModelInstanceState::ModelInstanceState(
    ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance)
    : BackendModelInstance(model_state, triton_model_instance),
      model_state_(model_state)
{
}

int32_t
ModelInstanceState::GetAccumulatorVal(uint64_t corrid)
{
  return accumulator_[corrid];
}

void
ModelInstanceState::SetAccumulatorVal(uint64_t corrid, int32_t value)
{
  accumulator_[corrid] = value;
}

void
ModelInstanceState::AddAccumulatorVal(uint64_t corrid, int32_t value)
{
  accumulator_[corrid] += value;
}

void
ModelInstanceState::EraseAccumulatorKey(uint64_t corrid)
{
  accumulator_.erase(corrid);
}

ModelInstanceState::~ModelInstanceState()
{
  accumulator_.clear();
}


/////////////

extern "C" {

// Implementing TRITONBACKEND_Initialize is optional. The backend
// should initialize any global state that is intended to be shared
// across all models and model instances that use the backend.
TRITONSERVER_Error*
TRITONBACKEND_Initialize(TRITONBACKEND_Backend* backend)
{
  const char* cname;
  RETURN_IF_ERROR(TRITONBACKEND_BackendName(backend, &cname));
  std::string name(cname);

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("TRITONBACKEND_Initialize: ") + name).c_str());

  // We should check the backend API version that Triton supports
  // vs. what this backend was compiled against.
  uint32_t api_version_major, api_version_minor;
  RETURN_IF_ERROR(
      TRITONBACKEND_ApiVersion(&api_version_major, &api_version_minor));

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("Triton TRITONBACKEND API version: ") +
       std::to_string(api_version_major) + "." +
       std::to_string(api_version_minor))
          .c_str());
  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("'") + name + "' TRITONBACKEND API version: " +
       std::to_string(TRITONBACKEND_API_VERSION_MAJOR) + "." +
       std::to_string(TRITONBACKEND_API_VERSION_MINOR))
          .c_str());

  if ((api_version_major != TRITONBACKEND_API_VERSION_MAJOR) ||
      (api_version_minor < TRITONBACKEND_API_VERSION_MINOR)) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_UNSUPPORTED,
        "triton backend API version does not support this backend");
  }

  // The backend configuration may contain information needed by the
  // backend, such a command-line arguments. This backend doesn't use
  // any such configuration but we print whatever is available.
  TRITONSERVER_Message* backend_config_message;
  RETURN_IF_ERROR(
      TRITONBACKEND_BackendConfig(backend, &backend_config_message));

  const char* buffer;
  size_t byte_size;
  RETURN_IF_ERROR(TRITONSERVER_MessageSerializeToJson(
      backend_config_message, &buffer, &byte_size));
  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("backend configuration:\n") + buffer).c_str());

  return nullptr;  // success
}

// Implementing TRITONBACKEND_ModelInitialize is optional. The backend
// should initialize any state that is intended to be shared across
// all instances of the model.
TRITONSERVER_Error*
TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model)
{
  const char* cname;
  RETURN_IF_ERROR(TRITONBACKEND_ModelName(model, &cname));
  std::string name(cname);

  uint64_t version;
  RETURN_IF_ERROR(TRITONBACKEND_ModelVersion(model, &version));

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("TRITONBACKEND_ModelInitialize: ") + name + " (version " +
       std::to_string(version) + ")")
          .c_str());

  // With each model we create a ModelState object and associate it
  // with the TRITONBACKEND_Model.
  ModelState* model_state;
  RETURN_IF_ERROR(ModelState::Create(model, &model_state));
  RETURN_IF_ERROR(
      TRITONBACKEND_ModelSetState(model, reinterpret_cast<void*>(model_state)));

  // One of the primary things to do in ModelInitialize is to examine
  // the model configuration to ensure that it is something that this
  // backend can support. If not, returning an error from this
  // function will prevent the model from loading.
  RETURN_IF_ERROR(model_state->ValidateModelConfig());

  return nullptr;  // success
}

// Implementing TRITONBACKEND_ModelFinalize is optional unless state
// is set using TRITONBACKEND_ModelSetState. The backend must free
// this state and perform any other cleanup.
TRITONSERVER_Error*
TRITONBACKEND_ModelFinalize(TRITONBACKEND_Model* model)
{
  void* vstate;
  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vstate));
  ModelState* model_state = reinterpret_cast<ModelState*>(vstate);

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO, "TRITONBACKEND_ModelFinalize: delete model state");

  delete model_state;

  return nullptr;  // success
}

// Implementing TRITONBACKEND_ModelInstanceInitialize is optional. The
// backend should initialize any state that is required for a model
// instance.
TRITONSERVER_Error*
TRITONBACKEND_ModelInstanceInitialize(TRITONBACKEND_ModelInstance* instance)
{
  const char* cname;
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceName(instance, &cname));
  std::string name(cname);

  int32_t device_id;
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceDeviceId(instance, &device_id));
  TRITONSERVER_InstanceGroupKind kind;
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceKind(instance, &kind));

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("TRITONBACKEND_ModelInstanceInitialize: ") + name + " (" +
       TRITONSERVER_InstanceGroupKindString(kind) + " device " +
       std::to_string(device_id) + ")")
          .c_str());

  // The instance can access the corresponding model as well... here
  // we get the model and from that get the model's state.
  TRITONBACKEND_Model* model;
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceModel(instance, &model));

  void* vmodelstate;
  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vmodelstate));
  ModelState* model_state = reinterpret_cast<ModelState*>(vmodelstate);

  // With each instance we create a ModelInstanceState object and
  // associate it with the TRITONBACKEND_ModelInstance.
  ModelInstanceState* instance_state;
  RETURN_IF_ERROR(
      ModelInstanceState::Create(model_state, instance, &instance_state));
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceSetState(
      instance, reinterpret_cast<void*>(instance_state)));

  // Because this backend just copies IN -> OUT and requires that
  // input and output be in CPU memory, we fail if a GPU instances is
  // requested.
  RETURN_ERROR_IF_FALSE(
      instance_state->Kind() == TRITONSERVER_INSTANCEGROUPKIND_CPU,
      TRITONSERVER_ERROR_INVALID_ARG,
      std::string("'dyna_sequence' backend only supports CPU instances"));

  return nullptr;  // success
}

// Implementing TRITONBACKEND_ModelInstanceFinalize is optional unless
// state is set using TRITONBACKEND_ModelInstanceSetState. The backend
// must free this state and perform any other cleanup.
TRITONSERVER_Error*
TRITONBACKEND_ModelInstanceFinalize(TRITONBACKEND_ModelInstance* instance)
{
  void* vstate;
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(instance, &vstate));
  ModelInstanceState* instance_state =
      reinterpret_cast<ModelInstanceState*>(vstate);

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      "TRITONBACKEND_ModelInstanceFinalize: delete instance state");

  delete instance_state;

  return nullptr;  // success
}

// Implementing TRITONBACKEND_ModelInstanceExecute is required.
TRITONSERVER_Error*
TRITONBACKEND_ModelInstanceExecute(
    TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests,
    const uint32_t request_count)
{
  // Triton will not call this function simultaneously for the same
  // 'instance'. But since this backend could be used by multiple
  // instances from multiple models the implementation needs to handle
  // multiple calls to this function at the same time (with different
  // 'instance' objects). Suggested practice for this is to use only
  // function-local and model-instance-specific state (obtained from
  // 'instance'), which is what we do here.
  ModelInstanceState* instance_state;
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(
      instance, reinterpret_cast<void**>(&instance_state)));
  ModelState* model_state = instance_state->StateForModel();

  // This backend specifies BLOCKING execution policy. That means that
  // we should not return from this function until execution is
  // complete. Triton will automatically release 'instance' on return
  // from this function so that it is again available to be used for
  // another call to TRITONBACKEND_ModelInstanceExecute.

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("model ") + model_state->Name() + ", instance " +
       instance_state->Name() + ", executing " + std::to_string(request_count) +
       " requests")
          .c_str());

  bool supports_batching = false;
  RETURN_IF_ERROR(model_state->SupportsFirstDimBatching(&supports_batching));

  // Each request represents a different sequence, which corresponds
  // to the accumulator at the same index. Each request must have
  // batch-size 1 inputs which is the next timestep for that
  // sequence. The total number of requests will not exceed the
  // max-batch-size specified in the model configuration.
  if (request_count > model_state->AccumulatorSize()) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_UNSUPPORTED,
        "unable to execute batch larger than max-batch-size");
  }

  // Delay if requested...
  if (model_state->ExecDelay() > 0) {
    std::this_thread::sleep_for(
        std::chrono::milliseconds(model_state->ExecDelay()));
  }

  // 'responses' is initialized with the response objects below and
  // if/when an error response is sent the corresponding entry in
  // 'responses' is set to nullptr to indicate that that response has
  // already been sent.
  std::vector<TRITONBACKEND_Response*> responses;
  responses.reserve(request_count);

  // Create a single response object for each request. If something
  // goes wrong when attempting to create the response objects just
  // fail all of the requests by returning an error.
  for (uint32_t r = 0; r < request_count; ++r) {
    TRITONBACKEND_Request* request = requests[r];

    TRITONBACKEND_Response* response;
    RETURN_IF_ERROR(TRITONBACKEND_ResponseNew(&response, request));
    responses.push_back(response);
  }

  // The way we collect these batch timestamps is not entirely
  // accurate. Normally, in a performant backend you would execute all
  // the requests at the same time, and so there would be a single
  // compute-start / compute-end time-range. But here we execute each
  // request separately so there is no single range. As a result we
  // just show the entire execute time as being the compute time as
  // well.
  uint64_t min_exec_start_ns = std::numeric_limits<uint64_t>::max();
  uint64_t max_exec_end_ns = 0;
  uint64_t total_batch_size = 0;

  // After this point we take ownership of 'requests', which means
  // that a response must be sent for every request. If something does
  // go wrong in processing a particular request then we send an error
  // response just for the specific request.

  // For simplicity we just process each request separately... in
  // general a backend should try to operate on the entire batch of
  // requests at the same time for improved performance.
  std::vector<uint8_t> start_buffer, end_buffer, ready_buffer, corrid_buffer,
      input_buffer;
  for (uint32_t r = 0; r < request_count; ++r) {
    uint64_t exec_start_ns = 0;
    SET_TIMESTAMP(exec_start_ns);
    min_exec_start_ns = std::min(min_exec_start_ns, exec_start_ns);

    TRITONBACKEND_Request* request = requests[r];

    const char* request_id = "";
    GUARDED_RESPOND_IF_ERROR(
        responses, r, TRITONBACKEND_RequestId(request, &request_id));

    uint64_t correlation_id = 0;
    if (model_state->CorrelationIdType() == "TYPE_UINT64") {
      GUARDED_RESPOND_IF_ERROR(
          responses, r,
          TRITONBACKEND_RequestCorrelationId(request, &correlation_id));
    } else if (model_state->CorrelationIdType() == "TYPE_STRING") {
      const char* correlation_id_str;
      GUARDED_RESPOND_IF_ERROR(
          responses, r,
          TRITONBACKEND_RequestCorrelationIdString(
              request, &correlation_id_str));

      // Require that the string be decodable into an unsigned int.
      try {
        correlation_id = std::stoi(correlation_id_str);
      }
      catch (const std::invalid_argument& ia) {
        GUARDED_RESPOND_IF_ERROR(
            responses, r,
            TRITONSERVER_ErrorNew(
                TRITONSERVER_ERROR_INVALID_ARG,
                "dyna sequence backend expects correlation ID to be decodable "
                "into an integer"));
      }
    }
    uint32_t input_count = 0;
    GUARDED_RESPOND_IF_ERROR(
        responses, r, TRITONBACKEND_RequestInputCount(request, &input_count));

    uint32_t requested_output_count = 0;
    GUARDED_RESPOND_IF_ERROR(
        responses, r,
        TRITONBACKEND_RequestOutputCount(request, &requested_output_count));

    // If an error response was sent for the above then display an error
    // message and move on to next request.
    if (responses[r] == nullptr) {
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR,
          (std::string("request ") + std::to_string(r) +
           ": failed to read request input/output counts, error response "
           "sent")
              .c_str());
      continue;
    }

    LOG_MESSAGE(
        TRITONSERVER_LOG_INFO,
        (std::string("request ") + std::to_string(r) + ": id = \"" +
         request_id + "\", correlation_id = " + std::to_string(correlation_id) +
         ", input_count = " + std::to_string(input_count) +
         ", requested_output_count = " + std::to_string(requested_output_count))
            .c_str());

    // For statistics we need to collect the total batch size of all the
    // requests. If the model doesn't support batching then each request is
    // necessarily batch-size 1. If the model does support batching then the
    // first dimension of the shape is the batch size. We only the first input
    // for this.
    if (supports_batching) {
      TRITONBACKEND_Input* input = nullptr;
      GUARDED_RESPOND_IF_ERROR(
          responses, r,
          TRITONBACKEND_RequestInputByIndex(request, 0 /* index */, &input));
      if (responses[r] == nullptr) {
        LOG_MESSAGE(
            TRITONSERVER_LOG_ERROR,
            (std::string("request ") + std::to_string(r) +
             ": failed to read input, error response sent")
                .c_str());
        continue;
      }

      const int64_t* input_shape;
      uint32_t input_dims_count;
      GUARDED_RESPOND_IF_ERROR(
          responses, r,
          TRITONBACKEND_InputProperties(
              input, nullptr, nullptr, &input_shape, &input_dims_count, nullptr,
              nullptr));
      if (responses[r] == nullptr) {
        LOG_MESSAGE(
            TRITONSERVER_LOG_ERROR,
            (std::string("request ") + std::to_string(r) +
             ": failed to read input properties, error response sent")
                .c_str());
        continue;
      }

      if (input_dims_count > 0) {
        if (input_shape[0] != 1) {
          GUARDED_RESPOND_IF_ERROR(
              responses, r,
              TRITONSERVER_ErrorNew(
                  TRITONSERVER_ERROR_UNSUPPORTED,
                  "unable to execute more than one timestep at a time"));
          LOG_MESSAGE(
              TRITONSERVER_LOG_ERROR,
              (std::string("request ") + std::to_string(r) +
               ": unable to execute more than one timestep at a time, error "
               "response sent")
                  .c_str());
          continue;
        }
        total_batch_size += input_shape[0];
      }
    } else {
      total_batch_size++;
    }

    LOG_MESSAGE(
        TRITONSERVER_LOG_ERROR,
        (std::string("total_batch_size: ") + std::to_string(total_batch_size))
            .c_str());

    std::set<uint64_t> seen_corrids;

    // Get the input tensors.
    TRITONBACKEND_Input* start_input = nullptr;
    GUARDED_RESPOND_IF_ERROR(
        responses, r,
        TRITONBACKEND_RequestInput(request, "START", &start_input));
    if (responses[r] == nullptr) {
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR,
          (std::string("request ") + std::to_string(r) +
           ": failed to read input 'START', error response sent")
              .c_str());
      continue;
    }

    TRITONBACKEND_Input* end_input = nullptr;
    GUARDED_RESPOND_IF_ERROR(
        responses, r, TRITONBACKEND_RequestInput(request, "END", &end_input));
    if (responses[r] == nullptr) {
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR,
          (std::string("request ") + std::to_string(r) +
           ": failed to read input 'END', error response sent")
              .c_str());
      continue;
    }

    TRITONBACKEND_Input* ready_input = nullptr;
    GUARDED_RESPOND_IF_ERROR(
        responses, r,
        TRITONBACKEND_RequestInput(request, "READY", &ready_input));
    if (responses[r] == nullptr) {
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR,
          (std::string("request ") + std::to_string(r) +
           ": failed to read input 'READY', error response sent")
              .c_str());
      continue;
    }

    TRITONBACKEND_Input* corrid_input = nullptr;
    GUARDED_RESPOND_IF_ERROR(
        responses, r,
        TRITONBACKEND_RequestInput(request, "CORRID", &corrid_input));
    if (responses[r] == nullptr) {
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR,
          (std::string("request ") + std::to_string(r) +
           ": failed to read input 'CORRID', error response sent")
              .c_str());
      continue;
    }

    const void* start_buffer = nullptr;
    uint64_t buffer_byte_size = 0;
    TRITONSERVER_MemoryType input_memory_type = TRITONSERVER_MEMORY_CPU;
    int64_t input_memory_type_id = 0;
    GUARDED_RESPOND_IF_ERROR(
        responses, r,
        TRITONBACKEND_InputBuffer(
            start_input, 0 /* input_buffer_count */, &start_buffer,
            &buffer_byte_size, &input_memory_type, &input_memory_type_id));
    if (responses[r] == nullptr) {
      GUARDED_RESPOND_IF_ERROR(
          responses, r,
          TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_UNSUPPORTED, "failed to get input buffer"));
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR, (std::string("request ") + std::to_string(r) +
                                   ": failed to get input buffer, error "
                                   "response sent")
                                      .c_str());
      continue;
    }

    const void* end_buffer = nullptr;
    GUARDED_RESPOND_IF_ERROR(
        responses, r,
        TRITONBACKEND_InputBuffer(
            end_input, 0 /* input_buffer_count */, &end_buffer,
            &buffer_byte_size, &input_memory_type, &input_memory_type_id));
    if (responses[r] == nullptr) {
      GUARDED_RESPOND_IF_ERROR(
          responses, r,
          TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_UNSUPPORTED, "failed to get input buffer"));
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR, (std::string("request ") + std::to_string(r) +
                                   ": failed to get input buffer, error "
                                   "response sent")
                                      .c_str());
      continue;
    }

    const void* ready_buffer = nullptr;
    GUARDED_RESPOND_IF_ERROR(
        responses, r,
        TRITONBACKEND_InputBuffer(
            ready_input, 0 /* input_buffer_count */, &ready_buffer,
            &buffer_byte_size, &input_memory_type, &input_memory_type_id));
    if (responses[r] == nullptr) {
      GUARDED_RESPOND_IF_ERROR(
          responses, r,
          TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_UNSUPPORTED, "failed to get input buffer"));
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR, (std::string("request ") + std::to_string(r) +
                                   ": failed to get input buffer, error "
                                   "response sent")
                                      .c_str());
      continue;
    }

    const void* corrid_buffer = nullptr;
    GUARDED_RESPOND_IF_ERROR(
        responses, r,
        TRITONBACKEND_InputBuffer(
            corrid_input, 0 /* input_buffer_count */, &corrid_buffer,
            &buffer_byte_size, &input_memory_type, &input_memory_type_id));
    if (responses[r] == nullptr) {
      GUARDED_RESPOND_IF_ERROR(
          responses, r,
          TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_UNSUPPORTED, "failed to get input buffer"));
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR, (std::string("request ") + std::to_string(r) +
                                   ": failed to get input buffer, error "
                                   "response sent")
                                      .c_str());
      continue;
    }

    TRITONBACKEND_Input* input = nullptr;
    GUARDED_RESPOND_IF_ERROR(
        responses, r, TRITONBACKEND_RequestInput(request, "INPUT", &input));
    if (responses[r] == nullptr) {
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR,
          (std::string("request ") + std::to_string(r) +
           ": failed to read input 'INPUT', error response sent")
              .c_str());
      continue;
    }

    const void* input_buffer = nullptr;
    GUARDED_RESPOND_IF_ERROR(
        responses, r,
        TRITONBACKEND_InputBuffer(
            input, 0 /* input_buffer_count */, &input_buffer, &buffer_byte_size,
            &input_memory_type, &input_memory_type_id));
    if ((responses[r] == nullptr) ||
        (input_memory_type == TRITONSERVER_MEMORY_GPU)) {
      GUARDED_RESPOND_IF_ERROR(
          responses, r,
          TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_UNSUPPORTED,
              "failed to get input buffer in CPU memory"));
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR,
          (std::string("request ") + std::to_string(r) +
           ": failed to get input buffer in CPU memory, error "
           "response sent")
              .c_str());
      continue;
    }

    TRITONSERVER_DataType input_datatype;
    const int64_t* input_shape;
    uint32_t input_dims_count;
    uint64_t input_byte_size;
    uint32_t input_buffer_count;
    GUARDED_RESPOND_IF_ERROR(
        responses, r,
        TRITONBACKEND_InputProperties(
            input, nullptr /* input_name */, &input_datatype, &input_shape,
            &input_dims_count, &input_byte_size, &input_buffer_count));
    if (responses[r] == nullptr) {
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR,
          (std::string("request ") + std::to_string(r) +
           ": failed to read input properties, error response sent")
              .c_str());
      continue;
    }

    int64_t input_element_cnt = input_byte_size / sizeof(int32_t);
    const int32_t start = *reinterpret_cast<const int32_t*>(start_buffer);
    const int32_t end = *reinterpret_cast<const int32_t*>(end_buffer);
    const int32_t ready = *reinterpret_cast<const int32_t*>(ready_buffer);
    uint64_t corrid;

    if (model_state->CorrelationIdType() == "TYPE_STRING") {
      // interpret buffer as const char* where first 4 bytes are string length
      const char* corrid_p = reinterpret_cast<const char*>(corrid_buffer);
      const std::string corrid_str(
          corrid_p + sizeof(uint32_t), *((uint32_t*)corrid_p));

      // String sequence ID must be decodable into int for dyna sequence backend
      try {
        corrid = std::stoi(corrid_str);
      }
      catch (const std::invalid_argument& ia) {
        GUARDED_RESPOND_IF_ERROR(
            responses, r,
            TRITONSERVER_ErrorNew(
                TRITONSERVER_ERROR_INVALID_ARG,
                "dyna sequence backend expects correlation ID to be decodable "
                "into an integer"));
      }

    } else {
      corrid = *reinterpret_cast<const uint64_t*>(corrid_buffer);
    }

    const int32_t* ipbuffer_int =
        reinterpret_cast<const int32_t*>(input_buffer);

    // Sequence batcher should never send us a batch of payloads where
    // a given correlation ID occurs more that once. Check that here
    // and fail if it happens.
    if (seen_corrids.find(corrid) != seen_corrids.end()) {
      GUARDED_RESPOND_IF_ERROR(
          responses, r,
          TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_UNSUPPORTED,
              "Execute() called with batch containing multiple inferences "
              "requests for the same Correlation ID"));
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR,
          (std::string("request ") + std::to_string(r) +
           ": Execute() called with batch containing "
           "multiple inferences requests for the same Correlation ID, error "
           "response sent")
              .c_str());
      continue;
    }
    seen_corrids.insert(corrid);

    // Update the accumulator value based on START/END/READY/CORRID
    // and calculate the output value.
    if (ready != 0) {
      if (start == 0) {
        // Update accumulator.
        for (int64_t e = 0; e < input_element_cnt; ++e) {
          instance_state->AddAccumulatorVal(corrid, ipbuffer_int[e]);
        }
      } else {
        // Set accumulator.
        instance_state->SetAccumulatorVal(corrid, ipbuffer_int[0]);
        for (int64_t e = 1; e < input_element_cnt; ++e) {
          instance_state->AddAccumulatorVal(corrid, ipbuffer_int[e]);
        }
      }

      if (end != 0) {
        // Add CORRID (truncated to 32 bits) to accumulator.
        instance_state->AddAccumulatorVal(corrid, (int32_t)corrid);
      }

      const int32_t output_val = instance_state->GetAccumulatorVal(corrid);

      // If sequence has ended remove CORRID from the accumulator map.
      if (end != 0) {
        instance_state->EraseAccumulatorKey(corrid);
      }

      TRITONBACKEND_Response* response = responses[r];

      // If the output is requested, copy the calculated output value
      // into the output buffer.
      if (requested_output_count > 0) {
        // The output shape is [1, input_element_cnt] if the model
        // configuration supports batching, or just
        // [input_element_cnt] if the model configuration does not
        // support batching.
        std::vector<int64_t> shape;
        if (supports_batching) {
          shape.push_back(1);
        }
        shape.push_back(input_element_cnt);

        TRITONBACKEND_Output* output;
        GUARDED_RESPOND_IF_ERROR(
            responses, r,
            TRITONBACKEND_ResponseOutput(
                response, &output, "OUTPUT", input_datatype, input_shape,
                input_dims_count));
        if (responses[r] == nullptr) {
          LOG_MESSAGE(
              TRITONSERVER_LOG_ERROR,
              (std::string("request ") + std::to_string(r) +
               ": failed to create response output, error response sent")
                  .c_str());
          continue;
        }

        // Step 2. Get the output buffer. We request a buffer in CPU
        // memory but we have to handle any returned type. If we get
        // back a buffer in GPU memory we just fail the request.
        void* output_buffer;
        TRITONSERVER_MemoryType output_memory_type = TRITONSERVER_MEMORY_CPU;
        int64_t output_memory_type_id = 0;
        GUARDED_RESPOND_IF_ERROR(
            responses, r,
            TRITONBACKEND_OutputBuffer(
                output, &output_buffer, buffer_byte_size, &output_memory_type,
                &output_memory_type_id));
        if ((responses[r] == nullptr) ||
            (output_memory_type == TRITONSERVER_MEMORY_GPU)) {
          GUARDED_RESPOND_IF_ERROR(
              responses, r,
              TRITONSERVER_ErrorNew(
                  TRITONSERVER_ERROR_UNSUPPORTED,
                  "failed to create output buffer in CPU memory"));
          LOG_MESSAGE(
              TRITONSERVER_LOG_ERROR,
              (std::string("request ") + std::to_string(r) +
               ": failed to create output buffer in CPU memory, error "
               "response sent")
                  .c_str());
          continue;
        }

        int32_t* obuffer_int = reinterpret_cast<int32_t*>(output_buffer);
        for (int64_t i = 0; i < input_element_cnt; ++i) {
          obuffer_int[i] = output_val;
        }
      }
    }

    uint64_t exec_end_ns = 0;
    SET_TIMESTAMP(exec_end_ns);
    max_exec_end_ns = std::max(max_exec_end_ns, exec_end_ns);

    // Send all the responses that haven't already been sent because of
    // an earlier error.
    if (responses[r] != nullptr) {
      LOG_IF_ERROR(
          TRITONBACKEND_ResponseSend(
              responses[r], TRITONSERVER_RESPONSE_COMPLETE_FINAL,
              nullptr /* success */),
          "failed sending response");
    }

    // Report statistics for each request.
    LOG_IF_ERROR(
        TRITONBACKEND_ModelInstanceReportStatistics(
            instance_state->TritonModelInstance(), request,
            (responses[r] != nullptr) /* success */, exec_start_ns,
            exec_start_ns, exec_end_ns, exec_end_ns),
        "failed reporting request statistics");

    LOG_IF_ERROR(
        TRITONBACKEND_RequestRelease(request, TRITONSERVER_REQUEST_RELEASE_ALL),
        "failed releasing request");
  }

  // Report the entire batch statistics.
  LOG_IF_ERROR(
      TRITONBACKEND_ModelInstanceReportBatchStatistics(
          instance_state->TritonModelInstance(), total_batch_size,
          min_exec_start_ns, min_exec_start_ns, max_exec_end_ns,
          max_exec_end_ns),
      "failed reporting batch request statistics");

  return nullptr;  // success
}

}  // extern "C"

}}}  // namespace triton::backend::dyna_sequence


================================================
FILE: src/test/dyna_sequence/src/libtriton_dyna_sequence.ldscript
================================================
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
{
  global:
    TRITONBACKEND_*;
  local: *;
};

================================================
FILE: src/test/implicit_state/CMakeLists.txt
================================================
# Copyright 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

cmake_minimum_required(VERSION 3.31.8)

project(tritonimplicitsequencebackend LANGUAGES C CXX)

#
# libtriton_implicit_state.so
# Shared library implementing the Triton Implicit Sequence Backend API
#
configure_file(src/libtriton_implicit_state.ldscript libtriton_implicit_state.ldscript COPYONLY)

add_library(
  triton-implicit-state-backend SHARED
  src/implicit_state.cc
)

add_library(
  TritonImplicitStateBackend::triton-implicit-state-backend ALIAS triton-implicit-state-backend
)

target_compile_features(triton-implicit-state-backend PRIVATE cxx_std_11)
target_compile_options(
  triton-implicit-state-backend PRIVATE
  $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
    -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror>
)

target_link_libraries(
  triton-implicit-state-backend
  PRIVATE
    triton-backend-utils    # from repo-backend
    triton-core-serverapi   # from repo-core
    triton-core-backendapi  # from repo-core
    triton-core-serverstub  # from repo-core
)

set_target_properties(
  triton-implicit-state-backend PROPERTIES
  POSITION_INDEPENDENT_CODE ON
  OUTPUT_NAME triton_implicit_state
  LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_implicit_state.ldscript
  LINK_FLAGS "-Wl,--version-script libtriton_implicit_state.ldscript"
)

#
# Install
#
include(GNUInstallDirs)
set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TritonImplicitStateBackend)

install(
  TARGETS
    triton-implicit-state-backend
  EXPORT
    triton-implicit-state-backend-targets
  LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/implicit_state
  ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/implicit_state
)

install(
  EXPORT
    triton-implicit-state-backend-targets
  FILE
    TritonImplicitStateBackendTargets.cmake
  NAMESPACE
    TritonImplicitStateBackend::
  DESTINATION
    ${INSTALL_CONFIGDIR}
)

include(CMakePackageConfigHelpers)
configure_package_config_file(
  ${CMAKE_CURRENT_LIST_DIR}/cmake/TritonImplicitStateBackendConfig.cmake.in
  ${CMAKE_CURRENT_BINARY_DIR}/TritonImplicitStateBackendConfig.cmake
  INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
)

install(
  FILES
  ${CMAKE_CURRENT_BINARY_DIR}/TritonImplicitStateBackendConfig.cmake
  DESTINATION ${INSTALL_CONFIGDIR}
)

#
# Export from build tree
#
export(
  EXPORT triton-implicit-state-backend-targets
  FILE ${CMAKE_CURRENT_BINARY_DIR}/TritonImplicitStateBackendTargets.cmake
  NAMESPACE TritonImplicitStateBackend::
)

export(PACKAGE TritonImplicitStateBackend)


================================================
FILE: src/test/implicit_state/cmake/TritonImplicitStateBackendConfig.cmake.in
================================================
# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

include(CMakeFindDependencyMacro)

get_filename_component(
  TRITONIMPLICITSTATEBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
)

list(APPEND CMAKE_MODULE_PATH ${TRITONIMPLICITSTATEBACKEND_CMAKE_DIR})

if(NOT TARGET TritonImplicitStateBackend::triton-implicit-state-backend)
  include("${TRITONIMPLICITSEQUENCEBACKEND_CMAKE_DIR}/TritonImplicitStateBackendTargets.cmake")
endif()

set(TRITONIMPLICITSTATEBACKEND_LIBRARIES TritonImplicitStateBackend::triton-implicit-state-backend)


================================================
FILE: src/test/implicit_state/src/implicit_state.cc
================================================
// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include <algorithm>
#include <vector>

#include "triton/backend/backend_common.h"
#include "triton/backend/backend_model.h"
#include "triton/backend/backend_model_instance.h"

namespace triton { namespace backend { namespace implicit {

// Implicit state backend that is solely used with testing implicit state
// management functionality in the backend API.
//
// The backend supports models that take 4 input tensors, three INT32 [ 1 ]
// control values, one UINT64 [ 1 ] correlation ID control, one INT32 [ 1 ]
// value input, and one INT32 [ 1 ] input indicating the test case. The input
// tensors must be named "START", "END", "READY", "CORRID", "UPDATE", "INPUT",
// and "TEST_CASE". The output tensor must be named "OUTPUT".
//
// The list of accepted values for the "TEST_CASE" field are:
//
//   * STATE_NEW_NON_EXISTENT = 0: This tests calling the TRITONBACKEND_StateNew
//   for a non existent state or a model that doesn't have states section in
//   sequence batching.
//
//   * STATE_UPDATE_FALSE = 1: Tests not calling the state update and expecting
//   the implicit state to not be updated.
//
//   * USE_SINGLE_STATE_BUFFER = 2: For this scenario we will be using the same
//   buffer for both input and output state. In total there will be 3 requests
//   sent in a sequence.
//
//   * USE_GROWABLE_STATE_BUFFER = 3: In this test case we use growable state
//   buffer. Currently, growable state buffer only supports CUDA memory.

#define GUARDED_RESPOND_IF_ERROR(RESPONSES, IDX, REQUEST, X)            \
  do {                                                                  \
    if ((RESPONSES)[IDX] != nullptr) {                                  \
      TRITONSERVER_Error* err__ = (X);                                  \
      if (err__ != nullptr) {                                           \
        LOG_IF_ERROR(                                                   \
            TRITONBACKEND_ResponseSend(                                 \
                (RESPONSES)[IDX], TRITONSERVER_RESPONSE_COMPLETE_FINAL, \
                err__),                                                 \
            "failed to send error response");                           \
        LOG_IF_ERROR(                                                   \
            TRITONBACKEND_RequestRelease(                               \
                REQUEST, TRITONSERVER_REQUEST_RELEASE_ALL),             \
            "failed to release the request.");                          \
        (RESPONSES)[IDX] = nullptr;                                     \
        TRITONSERVER_ErrorDelete(err__);                                \
      }                                                                 \
    }                                                                   \
  } while (false)

//
// ModelState
//
// State associated with a model that is using this backend. An object
// of this class is created and associated with each
// TRITONBACKEND_Model.
//
class ModelState : public BackendModel {
 public:
  static TRITONSERVER_Error* Create(
      TRITONBACKEND_Model* triton_model, ModelState** state);
  virtual ~ModelState() = default;

  // Validate that model configuration is supported by this backend.
  TRITONSERVER_Error* ValidateModelConfig();

 private:
  ModelState(TRITONBACKEND_Model* triton_model);
};

TRITONSERVER_Error*
ModelState::Create(TRITONBACKEND_Model* triton_model, ModelState** state)
{
  try {
    *state = new ModelState(triton_model);
  }
  catch (const BackendModelException& ex) {
    RETURN_ERROR_IF_TRUE(
        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
        std::string("unexpected nullptr in BackendModelException"));
    RETURN_IF_ERROR(ex.err_);
  }

  return nullptr;  // success
}

ModelState::ModelState(TRITONBACKEND_Model* triton_model)
    : BackendModel(triton_model)
{
}

TRITONSERVER_Error*
ModelState::ValidateModelConfig()
{
  // We have the json DOM for the model configuration...
  common::TritonJson::WriteBuffer buffer;
  RETURN_IF_ERROR(model_config_.PrettyWrite(&buffer));
  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("model configuration:\n") + buffer.Contents()).c_str());

  // The model configuration must specify the sequence batcher and
  // must use the START, END, READ and CORRID input to indicate
  // control values.
  triton::common::TritonJson::Value sequence_batching;
  RETURN_IF_ERROR(
      model_config_.MemberAsObject("sequence_batching", &sequence_batching));
  common::TritonJson::Value control_inputs;
  RETURN_IF_ERROR(
      sequence_batching.MemberAsArray("control_input", &control_inputs));
  RETURN_ERROR_IF_FALSE(
      control_inputs.ArraySize() == 3, TRITONSERVER_ERROR_INVALID_ARG,
      std::string("'START', 'END, and 'READY' must be configured as "
                  "the control inputs"));

  std::vector<std::string> control_input_names;
  for (size_t io_index = 0; io_index < control_inputs.ArraySize(); io_index++) {
    common::TritonJson::Value control_input;
    RETURN_IF_ERROR(control_inputs.IndexAsObject(io_index, &control_input));
    const char* input_name = nullptr;
    size_t input_name_len;
    RETURN_IF_ERROR(
        control_input.MemberAsString("name", &input_name, &input_name_len));
    control_input_names.push_back(input_name);
  }

  RETURN_ERROR_IF_FALSE(
      ((std::find(
            control_input_names.begin(), control_input_names.end(), "START") !=
        control_input_names.end()) ||
       (std::find(
            control_input_names.begin(), control_input_names.end(), "END") !=
        control_input_names.end()) ||
       (std::find(
            control_input_names.begin(), control_input_names.end(), "READY") !=
        control_input_names.end())),
      TRITONSERVER_ERROR_INVALID_ARG,
      std::string("'START', 'END, and 'READY' must be configured as "
                  "the control inputs"));

  return nullptr;  // success
}

//
// ModelInstanceState
//
// State associated with a model instance. An object of this class is
// created and associated with each TRITONBACKEND_ModelInstance.
//
class ModelInstanceState : public BackendModelInstance {
 public:
  static TRITONSERVER_Error* Create(
      ModelState* model_state,
      TRITONBACKEND_ModelInstance* triton_model_instance,
      ModelInstanceState** state);

  // Get the state of the model that corresponds to this instance.
  ModelState* StateForModel() const { return model_state_; }
  void* state_ = nullptr;

  // Index of the request in the sequence
  uint32_t request_index_ = 0;

 private:
  ModelInstanceState(
      ModelState* model_state,
      TRITONBACKEND_ModelInstance* triton_model_instance);

  ModelState* model_state_;
};

TRITONSERVER_Error*
ModelInstanceState::Create(
    ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance,
    ModelInstanceState** state)
{
  try {
    *state = new ModelInstanceState(model_state, triton_model_instance);
  }
  catch (const BackendModelInstanceException& ex) {
    RETURN_ERROR_IF_TRUE(
        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
        std::string("unexpected nullptr in BackendModelInstanceException"));
    RETURN_IF_ERROR(ex.err_);
  }

  return nullptr;  // success
}

ModelInstanceState::ModelInstanceState(
    ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance)
    : BackendModelInstance(model_state, triton_model_instance),
      model_state_(model_state)
{
}

extern "C" {

// Implementing TRITONBACKEND_Initialize is optional. The backend
// should initialize any global state that is intended to be shared
// across all models and model instances that use the backend.
TRITONSERVER_Error*
TRITONBACKEND_Initialize(TRITONBACKEND_Backend* backend)
{
  const char* cname;
  RETURN_IF_ERROR(TRITONBACKEND_BackendName(backend, &cname));
  std::string name(cname);

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("TRITONBACKEND_Initialize: ") + name).c_str());

  // We should check the backend API version that Triton supports
  // vs. what this backend was compiled against.
  uint32_t api_version_major, api_version_minor;
  RETURN_IF_ERROR(
      TRITONBACKEND_ApiVersion(&api_version_major, &api_version_minor));

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("Triton TRITONBACKEND API version: ") +
       std::to_string(api_version_major) + "." +
       std::to_string(api_version_minor))
          .c_str());
  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("'") + name + "' TRITONBACKEND API version: " +
       std::to_string(TRITONBACKEND_API_VERSION_MAJOR) + "." +
       std::to_string(TRITONBACKEND_API_VERSION_MINOR))
          .c_str());

  if ((api_version_major != TRITONBACKEND_API_VERSION_MAJOR) ||
      (api_version_minor < TRITONBACKEND_API_VERSION_MINOR)) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_UNSUPPORTED,
        "triton backend API version does not support this backend");
  }

  // The backend configuration may contain information needed by the
  // backend, such a command-line arguments. This backend doesn't use
  // any such configuration but we print whatever is available.
  TRITONSERVER_Message* backend_config_message;
  RETURN_IF_ERROR(
      TRITONBACKEND_BackendConfig(backend, &backend_config_message));

  const char* buffer;
  size_t byte_size;
  RETURN_IF_ERROR(TRITONSERVER_MessageSerializeToJson(
      backend_config_message, &buffer, &byte_size));
  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("backend configuration:\n") + buffer).c_str());

  return nullptr;  // success
}

// Implementing TRITONBACKEND_ModelInitialize is optional. The backend
// should initialize any state that is intended to be shared across
// all instances of the model.
TRITONSERVER_Error*
TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model)
{
  const char* cname;
  RETURN_IF_ERROR(TRITONBACKEND_ModelName(model, &cname));
  std::string name(cname);

  uint64_t version;
  RETURN_IF_ERROR(TRITONBACKEND_ModelVersion(model, &version));

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("TRITONBACKEND_ModelInitialize: ") + name + " (version " +
       std::to_string(version) + ")")
          .c_str());

  // With each model we create a ModelState object and associate it
  // with the TRITONBACKEND_Model.
  ModelState* model_state;
  RETURN_IF_ERROR(ModelState::Create(model, &model_state));
  RETURN_IF_ERROR(
      TRITONBACKEND_ModelSetState(model, reinterpret_cast<void*>(model_state)));

  // One of the primary things to do in ModelInitialize is to examine
  // the model configuration to ensure that it is something that this
  // backend can support. If not, returning an error from this
  // function will prevent the model from loading.
  RETURN_IF_ERROR(model_state->ValidateModelConfig());

  return nullptr;  // success
}

// Implementing TRITONBACKEND_ModelFinalize is optional unless state
// is set using TRITONBACKEND_ModelSetState. The backend must free
// this state and perform any other cleanup.
TRITONSERVER_Error*
TRITONBACKEND_ModelFinalize(TRITONBACKEND_Model* model)
{
  void* vstate;
  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vstate));
  ModelState* model_state = reinterpret_cast<ModelState*>(vstate);

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO, "TRITONBACKEND_ModelFinalize: delete model state");

  delete model_state;

  return nullptr;  // success
}

// Implementing TRITONBACKEND_ModelInstanceInitialize is optional. The
// backend should initialize any state that is required for a model
// instance.
TRITONSERVER_Error*
TRITONBACKEND_ModelInstanceInitialize(TRITONBACKEND_ModelInstance* instance)
{
  const char* cname;
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceName(instance, &cname));
  std::string name(cname);

  int32_t device_id;
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceDeviceId(instance, &device_id));
  TRITONSERVER_InstanceGroupKind kind;
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceKind(instance, &kind));

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("TRITONBACKEND_ModelInstanceInitialize: ") + name + " (" +
       TRITONSERVER_InstanceGroupKindString(kind) + " device " +
       std::to_string(device_id) + ")")
          .c_str());

  // The instance can access the corresponding model as well... here
  // we get the model and from that get the model's state.
  TRITONBACKEND_Model* model;
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceModel(instance, &model));

  void* vmodelstate;
  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vmodelstate));
  ModelState* model_state = reinterpret_cast<ModelState*>(vmodelstate);

  // With each instance we create a ModelInstanceState object and
  // associate it with the TRITONBACKEND_ModelInstance.
  ModelInstanceState* instance_state;
  RETURN_IF_ERROR(
      ModelInstanceState::Create(model_state, instance, &instance_state));
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceSetState(
      instance, reinterpret_cast<void*>(instance_state)));

  return nullptr;  // success
}

// Implementing TRITONBACKEND_ModelInstanceFinalize is optional unless
// state is set using TRITONBACKEND_ModelInstanceSetState. The backend
// must free this state and perform any other cleanup.
TRITONSERVER_Error*
TRITONBACKEND_ModelInstanceFinalize(TRITONBACKEND_ModelInstance* instance)
{
  void* vstate;
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(instance, &vstate));
  ModelInstanceState* instance_state =
      reinterpret_cast<ModelInstanceState*>(vstate);

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      "TRITONBACKEND_ModelInstanceFinalize: delete instance state");

  delete instance_state;

  return nullptr;  // success
}

// Implementing TRITONBACKEND_ModelInstanceExecute is required.
TRITONSERVER_Error*
TRITONBACKEND_ModelInstanceExecute(
    TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests,
    const uint32_t request_count)
{
  // Triton will not call this function simultaneously for the same
  // 'instance'. But since this backend could be used by multiple
  // instances from multiple models the implementation needs to handle
  // multiple calls to this function at the same time (with different
  // 'instance' objects). Suggested practice for this is to use only
  // function-local and model-instance-specific state (obtained from
  // 'instance'), which is what we do here.
  ModelInstanceState* instance_state;
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(
      instance, reinterpret_cast<void**>(&instance_state)));
  ModelState* model_state = instance_state->StateForModel();

  // This backend specifies BLOCKING execution policy. That means that
  // we should not return from this function until execution is
  // complete. Triton will automatically release 'instance' on return
  // from this function so that it is again available to be used for
  // another call to TRITONBACKEND_ModelInstanceExecute.

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("model ") + model_state->Name() + ", instance " +
       instance_state->Name() + ", executing " + std::to_string(request_count) +
       " requests")
          .c_str());

  bool supports_batching = false;
  RETURN_IF_ERROR(model_state->SupportsFirstDimBatching(&supports_batching));

  // 'responses' is initialized with the response objects below and
  // if/when an error response is sent the corresponding entry in
  // 'responses' is set to nullptr to indicate that that response has
  // already been sent.
  std::vector<TRITONBACKEND_Response*> responses;
  responses.reserve(request_count);

  // Create a single response object for each request. If something
  // goes wrong when attempting to create the response objects just
  // fail all of the requests by returning an error.
  for (uint32_t r = 0; r < request_count; ++r) {
    TRITONBACKEND_Request* request = requests[r];

    TRITONBACKEND_Response* response;
    RETURN_IF_ERROR(TRITONBACKEND_ResponseNew(&response, request));
    responses.push_back(response);
  }

  // The way we collect these batch timestamps is not entirely
  // accurate. Normally, in a performant backend you would execute all
  // the requests at the same time, and so there would be a single
  // compute-start / compute-end time-range. But here we execute each
  // request separately so there is no single range. As a result we
  // just show the entire execute time as being the compute time as
  // well.
  uint64_t min_exec_start_ns = std::numeric_limits<uint64_t>::max();
  uint64_t max_exec_end_ns = 0;
  uint64_t total_batch_size = 0;

  // After this point we take ownership of 'requests', which means
  // that a response must be sent for every request. If something does
  // go wrong in processing a particular request then we send an error
  // response just for the specific request.

  // For simplicity we just process each request separately... in
  // general a backend should try to operate on the entire batch of
  // requests at the same time for improved performance.
  std::vector<uint8_t> start_buffer, end_buffer, ready_buffer, corrid_buffer,
      input_buffer;
  for (uint32_t r = 0; r < request_count; ++r) {
    uint64_t exec_start_ns = 0;
    SET_TIMESTAMP(exec_start_ns);
    min_exec_start_ns = std::min(min_exec_start_ns, exec_start_ns);

    TRITONBACKEND_Request* request = requests[r];

    const char* request_id = "";
    GUARDED_RESPOND_IF_ERROR(
        responses, r, request, TRITONBACKEND_RequestId(request, &request_id));

    uint32_t input_count = 0;
    GUARDED_RESPOND_IF_ERROR(
        responses, r, request,
        TRITONBACKEND_RequestInputCount(request, &input_count));

    uint32_t requested_output_count = 0;
    GUARDED_RESPOND_IF_ERROR(
        responses, r, request,
        TRITONBACKEND_RequestOutputCount(request, &requested_output_count));

    // If an error response was sent for the above then display an error
    // message and move on to next request.
    if (responses[r] == nullptr) {
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR,
          (std::string("request ") + std::to_string(r) +
           ": failed to read request input/output counts, error response "
           "sent")
              .c_str());
      continue;
    }

    LOG_MESSAGE(
        TRITONSERVER_LOG_INFO,
        (std::string("request ") + std::to_string(r) + ": id = \"" +
         request_id + "\", input_count = " + std::to_string(input_count) +
         ", requested_output_count = " + std::to_string(requested_output_count))
            .c_str());

    // For statistics we need to collect the total batch size of all the
    // requests. If the model doesn't support batching then each request is
    // necessarily batch-size 1. If the model does support batching then the
    // first dimension of the shape is the batch size. We only the first input
    // for this.
    if (supports_batching) {
      TRITONBACKEND_Input* input = nullptr;
      GUARDED_RESPOND_IF_ERROR(
          responses, r, request,
          TRITONBACKEND_RequestInputByIndex(request, 0 /* index */, &input));
      if (responses[r] == nullptr) {
        LOG_MESSAGE(
            TRITONSERVER_LOG_ERROR,
            (std::string("request ") + std::to_string(r) +
             ": failed to read input, error response sent")
                .c_str());
        continue;
      }

      const int64_t* input_shape;
      uint32_t input_dims_count;
      GUARDED_RESPOND_IF_ERROR(
          responses, r, request,
          TRITONBACKEND_InputProperties(
              input, nullptr, nullptr, &input_shape, &input_dims_count, nullptr,
              nullptr));
      if (responses[r] == nullptr) {
        LOG_MESSAGE(
            TRITONSERVER_LOG_ERROR,
            (std::string("request ") + std::to_string(r) +
             ": failed to read input properties, error response sent")
                .c_str());
        continue;
      }

      if (input_dims_count > 0) {
        if (input_shape[0] != 1) {
          GUARDED_RESPOND_IF_ERROR(
              responses, r, request,
              TRITONSERVER_ErrorNew(
                  TRITONSERVER_ERROR_UNSUPPORTED,
                  "unable to execute more than one timestep at a time"));
          LOG_MESSAGE(
              TRITONSERVER_LOG_ERROR,
              (std::string("request ") + std::to_string(r) +
               ": unable to execute more than one timestep at a time, error "
               "response sent")
                  .c_str());
          continue;
        }
        total_batch_size += input_shape[0];
      }
    } else {
      total_batch_size++;
    }

    LOG_MESSAGE(
        TRITONSERVER_LOG_VERBOSE,
        (std::string("total_batch_size: ") + std::to_string(total_batch_size))
            .c_str());

    std::set<uint64_t> seen_corrids;

    // Get the input tensors.
    TRITONBACKEND_Input* start_input = nullptr;
    GUARDED_RESPOND_IF_ERROR(
        responses, r, request,
        TRITONBACKEND_RequestInput(request, "START", &start_input));
    if (responses[r] == nullptr) {
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR,
          (std::string("request ") + std::to_string(r) +
           ": failed to read input 'START', error response sent")
              .c_str());
      continue;
    }

    TRITONBACKEND_Input* end_input = nullptr;
    GUARDED_RESPOND_IF_ERROR(
        responses, r, request,
        TRITONBACKEND_RequestInput(request, "END", &end_input));
    if (responses[r] == nullptr) {
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR,
          (std::string("request ") + std::to_string(r) +
           ": failed to read input 'END', error response sent")
              .c_str());
      continue;
    }

    TRITONBACKEND_Input* ready_input = nullptr;
    GUARDED_RESPOND_IF_ERROR(
        responses, r, request,
        TRITONBACKEND_RequestInput(request, "READY", &ready_input));
    if (responses[r] == nullptr) {
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR,
          (std::string("request ") + std::to_string(r) +
           ": failed to read input 'READY', error response sent")
              .c_str());
      continue;
    }

    const void* start_buffer = nullptr;
    uint64_t buffer_byte_size = 0;
    TRITONSERVER_MemoryType input_memory_type = TRITONSERVER_MEMORY_CPU;
    int64_t input_memory_type_id = 0;
    GUARDED_RESPOND_IF_ERROR(
        responses, r, request,
        TRITONBACKEND_InputBuffer(
            start_input, 0 /* input_buffer_count */, &start_buffer,
            &buffer_byte_size, &input_memory_type, &input_memory_type_id));
    if (responses[r] == nullptr) {
      GUARDED_RESPOND_IF_ERROR(
          responses, r, request,
          TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_UNSUPPORTED, "failed to get input buffer"));
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR, (std::string("request ") + std::to_string(r) +
                                   ": failed to get input buffer, error "
                                   "response sent")
                                      .c_str());
      continue;
    }

    const float* lstart_buffer = reinterpret_cast<const float*>(start_buffer);
    if (*lstart_buffer == 1) {
      instance_state->request_index_ = 0;
      instance_state->state_ = nullptr;
    }

    const void* end_buffer = nullptr;
    GUARDED_RESPOND_IF_ERROR(
        responses, r, request,
        TRITONBACKEND_InputBuffer(
            end_input, 0 /* input_buffer_count */, &end_buffer,
            &buffer_byte_size, &input_memory_type, &input_memory_type_id));
    if (responses[r] == nullptr) {
      GUARDED_RESPOND_IF_ERROR(
          responses, r, request,
          TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_UNSUPPORTED, "failed to get input buffer"));
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR, (std::string("request ") + std::to_string(r) +
                                   ": failed to get input buffer, error "
                                   "response sent")
                                      .c_str());
      continue;
    }

    const void* ready_buffer = nullptr;
    GUARDED_RESPOND_IF_ERROR(
        responses, r, request,
        TRITONBACKEND_InputBuffer(
            ready_input, 0 /* input_buffer_count */, &ready_buffer,
            &buffer_byte_size, &input_memory_type, &input_memory_type_id));
    if (responses[r] == nullptr) {
      GUARDED_RESPOND_IF_ERROR(
          responses, r, request,
          TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_UNSUPPORTED, "failed to get input buffer"));
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR, (std::string("request ") + std::to_string(r) +
                                   ": failed to get input buffer, error "
                                   "response sent")
                                      .c_str());
      continue;
    }

    TRITONBACKEND_Input* input = nullptr;
    GUARDED_RESPOND_IF_ERROR(
        responses, r, request,
        TRITONBACKEND_RequestInput(request, "INPUT", &input));
    if (responses[r] == nullptr) {
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR,
          (std::string("request ") + std::to_string(r) +
           ": failed to read input 'INPUT', error response sent")
              .c_str());
      continue;
    }

    const void* input_buffer = nullptr;
    GUARDED_RESPOND_IF_ERROR(
        responses, r, request,
        TRITONBACKEND_InputBuffer(
            input, 0 /* input_buffer_count */, &input_buffer, &buffer_byte_size,
            &input_memory_type, &input_memory_type_id));
    if ((responses[r] == nullptr) ||
        (input_memory_type == TRITONSERVER_MEMORY_GPU)) {
      GUARDED_RESPOND_IF_ERROR(
          responses, r, request,
          TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_UNSUPPORTED,
              "failed to get input buffer in CPU memory"));
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR,
          (std::string("request ") + std::to_string(r) +
           ": failed to get input buffer in CPU memory, error "
           "response sent")
              .c_str());
      continue;
    }

    TRITONBACKEND_Input* test_case = nullptr;
    GUARDED_RESPOND_IF_ERROR(
        responses, r, request,
        TRITONBACKEND_RequestInput(request, "TEST_CASE", &test_case));
    if (responses[r] == nullptr) {
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR,
          (std::string("request ") + std::to_string(r) +
           ": failed to read input 'TEST_CASE', error response sent")
              .c_str());
      continue;
    }

    const void* test_case_buffer = nullptr;
    GUARDED_RESPOND_IF_ERROR(
        responses, r, request,
        TRITONBACKEND_InputBuffer(
            test_case, 0 /* test_case_buffer_count */, &test_case_buffer,
            &buffer_byte_size, &input_memory_type, &input_memory_type_id));
    if ((responses[r] == nullptr) ||
        (input_memory_type == TRITONSERVER_MEMORY_GPU)) {
      GUARDED_RESPOND_IF_ERROR(
          responses, r, request,
          TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_UNSUPPORTED,
              "failed to get input buffer in CPU memory"));
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR,
          (std::string("request ") + std::to_string(r) +
           ": failed to get input buffer in CPU memory, error "
           "response sent")
              .c_str());
      continue;
    }
    const int32_t test_case_buffer_int =
        *reinterpret_cast<const int32_t*>(test_case_buffer);
    const int32_t ipbuffer_int =
        *reinterpret_cast<const int32_t*>(input_buffer);
    int32_t ipbuffer_state_int = 0;

    if (test_case_buffer_int != 0) {
      TRITONBACKEND_Input* input_state = nullptr;
      GUARDED_RESPOND_IF_ERROR(
          responses, r, request,
          TRITONBACKEND_RequestInput(request, "INPUT_STATE", &input_state));
      if (responses[r] == nullptr) {
        LOG_MESSAGE(
            TRITONSERVER_LOG_ERROR,
            (std::string("request ") + std::to_string(r) +
             ": failed to read input 'INPUT_STATE', error response sent")
                .c_str());
        continue;
      }

      const void* input_state_buffer = nullptr;
      GUARDED_RESPOND_IF_ERROR(
          responses, r, request,
          TRITONBACKEND_InputBuffer(
              input_state, 0 /* input_buffer_count */, &input_state_buffer,
              &buffer_byte_size, &input_memory_type, &input_memory_type_id));
      if ((responses[r] == nullptr) ||
          (test_case_buffer_int == 3 &&
           input_memory_type != TRITONSERVER_MEMORY_GPU)) {
        GUARDED_RESPOND_IF_ERROR(
            responses, r, request,
            TRITONSERVER_ErrorNew(
                TRITONSERVER_ERROR_UNSUPPORTED,
                "growable memory should always provide memory in GPU"));
        LOG_MESSAGE(
            TRITONSERVER_LOG_ERROR,
            (std::string("request ") + std::to_string(r) +
             ": failed to get input buffer in GPU memory, error "
             "response sent")
                .c_str());
        continue;
      } else if (
          (responses[r] == nullptr) ||
          (input_memory_type == TRITONSERVER_MEMORY_GPU &&
           test_case_buffer_int != 3)) {
        GUARDED_RESPOND_IF_ERROR(
            responses, r, request,
            TRITONSERVER_ErrorNew(
                TRITONSERVER_ERROR_UNSUPPORTED,
                "failed to get input buffer in CPU memory"));
        LOG_MESSAGE(
            TRITONSERVER_LOG_ERROR,
            (std::string("request ") + std::to_string(r) +
             ": failed to get input buffer in CPU memory, error "
             "response sent")
                .c_str());
        continue;
      }

      // When using single state buffer, input/output tensors should point to
      // the buffer.
      if ((test_case_buffer_int == 2 || test_case_buffer_int == 3) &&
          instance_state->state_ != nullptr) {
        if (input_state_buffer != instance_state->state_) {
          GUARDED_RESPOND_IF_ERROR(
              responses, r, request,
              TRITONSERVER_ErrorNew(
                  TRITONSERVER_ERROR_UNSUPPORTED,
                  "Input and output state are using different buffers."));
          LOG_MESSAGE(
              TRITONSERVER_LOG_ERROR,
              (std::string("request ") + std::to_string(r) +
               ": input and output state are using different buffers, error "
               "response sent")
                  .c_str());
          continue;
        }
      }

      if (test_case_buffer_int == 2 || test_case_buffer_int == 1 ||
          test_case_buffer_int == 0) {
        const int32_t ipbuffer_state =
            *reinterpret_cast<const int32_t*>(input_state_buffer);
        ipbuffer_state_int = ipbuffer_state;
      }
    }

    switch (test_case_buffer_int) {
      // STATE_NEW_NON_EXISTENT. The behavior for both of the test cases is
      // the same.
      case 0: {
        TRITONBACKEND_State* response_state;
        GUARDED_RESPOND_IF_ERROR(
            responses, r, request,
            TRITONBACKEND_StateNew(
                &response_state, request, "undefined_state",
                TRITONSERVER_TYPE_INT32, nullptr /* shape */,
                0 /* dim_count */));
        if (responses[r] == nullptr) {
          LOG_MESSAGE(
              TRITONSERVER_LOG_ERROR,
              (std::string("request ") + std::to_string(r) +
               ": failed to create the output state 'OUTPUT_STATE', error "
               "response sent")
                  .c_str());
          continue;
        }
      } break;
      // STATE_UPDATE_FALSE
      case 1: {
        TRITONBACKEND_State* response_state;
        TRITONBACKEND_Output* response_output;
        std::vector<int64_t> shape{1};
        GUARDED_RESPOND_IF_ERROR(
            responses, r, request,
            TRITONBACKEND_StateNew(
                &response_state, request, "OUTPUT_STATE",
                TRITONSERVER_TYPE_INT32, shape.data() /* data */,
                shape.size() /* dim_count */));

        if (responses[r] == nullptr) {
          LOG_MESSAGE(
              TRITONSERVER_LOG_ERROR,
              (std::string("request ") + std::to_string(r) +
               ": failed to create the output state 'OUTPUT_STATE', error "
               "response sent")
                  .c_str());
          continue;
        }
        TRITONSERVER_MemoryType actual_memory_type = TRITONSERVER_MEMORY_GPU;
        int64_t actual_memory_type_id = 0;
        char* buffer;

        // Request an output buffer in GPU. This is only for testing purposes
        // to make sure that GPU output buffers can be requested.
        GUARDED_RESPOND_IF_ERROR(
            responses, r, request,
            TRITONBACKEND_StateBuffer(
                response_state, reinterpret_cast<void**>(&buffer),
                sizeof(int32_t), &actual_memory_type, &actual_memory_type_id));


        if ((responses[r] == nullptr) ||
            (actual_memory_type == TRITONSERVER_MEMORY_CPU)) {
          GUARDED_RESPOND_IF_ERROR(
              responses, r, request,
              TRITONSERVER_ErrorNew(
                  TRITONSERVER_ERROR_UNSUPPORTED,
                  "failed to create the state buffer in GPU memory"));
          LOG_MESSAGE(
              TRITONSERVER_LOG_ERROR,
              (std::string("request ") + std::to_string(r) +
               ": failed to create the state buffer in GPU memory, error "
               "response sent")
                  .c_str());
          continue;
        }

        actual_memory_type = TRITONSERVER_MEMORY_CPU;
        actual_memory_type_id = 0;
        GUARDED_RESPOND_IF_ERROR(
            responses, r, request,
            TRITONBACKEND_StateBuffer(
                response_state, reinterpret_cast<void**>(&buffer),
                sizeof(int32_t), &actual_memory_type, &actual_memory_type_id));

        if ((responses[r] == nullptr) ||
            (actual_memory_type == TRITONSERVER_MEMORY_GPU)) {
          GUARDED_RESPOND_IF_ERROR(
              responses, r, request,
              TRITONSERVER_ErrorNew(
                  TRITONSERVER_ERROR_UNSUPPORTED,
                  "failed to create the state buffer in CPU memory"));
          LOG_MESSAGE(
              TRITONSERVER_LOG_ERROR,
              (std::string("request ") + std::to_string(r) +
               ": failed to create the state buffer in CPU memory, error "
               "response sent")
                  .c_str());
          continue;
        }

        TRITONSERVER_BufferAttributes* buffer_attributes;
        GUARDED_RESPOND_IF_ERROR(
            responses, r, request,
            TRITONBACKEND_StateBufferAttributes(
                response_state, &buffer_attributes));

        // Testing for the StateBuffer attributes
        TRITONSERVER_MemoryType ba_memory_type;
        int64_t ba_memory_type_id;
        size_t ba_byte_size;

        GUARDED_RESPOND_IF_ERROR(
            responses, r, request,
            TRITONSERVER_BufferAttributesMemoryType(
                buffer_attributes, &ba_memory_type));

        GUARDED_RESPOND_IF_ERROR(
            responses, r, request,
            TRITONSERVER_BufferAttributesMemoryTypeId(
                buffer_attributes, &ba_memory_type_id));

        GUARDED_RESPOND_IF_ERROR(
            responses, r, request,
            TRITONSERVER_BufferAttributesByteSize(
                buffer_attributes, &ba_byte_size));

        if (!((actual_memory_type == ba_memory_type) &&
              (sizeof(int32_t) == ba_byte_size) &&
              (ba_memory_type_id == actual_memory_type_id)) ||
            responses[r] == nullptr) {
          GUARDED_RESPOND_IF_ERROR(
              responses, r, request,
              TRITONSERVER_ErrorNew(
                  TRITONSERVER_ERROR_UNSUPPORTED,
                  "State buffer attributes are not set correctly."));
          LOG_MESSAGE(
              TRITONSERVER_LOG_ERROR,
              (std::string("request ") + std::to_string(r) +
               ": State buffer attributes are not set correctly., error "
               "response sent")
                  .c_str());
          continue;
        }

        // Put the new state in the output buffer but intentionally do not
        // call the TRITONBACKEND_StateUpdate function.
        int32_t* lbuffer = reinterpret_cast<int32_t*>(buffer);
        *lbuffer = ipbuffer_int + ipbuffer_state_int;

        GUARDED_RESPOND_IF_ERROR(
            responses, r, request,
            TRITONBACKEND_ResponseOutput(
                responses[r], &response_output, "OUTPUT",
                TRITONSERVER_TYPE_INT32, shape.data() /* data */,
                shape.size() /* dim_count */));

        if (responses[r] == nullptr) {
          LOG_MESSAGE(
              TRITONSERVER_LOG_ERROR,
              (std::string("request ") + std::to_string(r) +
               ": failed to create the output state 'OUTPUT_STATE', error "
               "response sent")
                  .c_str());
          continue;
        }

        actual_memory_type = TRITONSERVER_MEMORY_CPU;
        actual_memory_type_id = 0;
        GUARDED_RESPOND_IF_ERROR(
            responses, r, request,
            TRITONBACKEND_OutputBuffer(
                response_output, reinterpret_cast<void**>(&buffer),
                sizeof(int32_t), &actual_memory_type, &actual_memory_type_id));

        if ((responses[r] == nullptr) ||
            (actual_memory_type == TRITONSERVER_MEMORY_GPU)) {
          GUARDED_RESPOND_IF_ERROR(
              responses, r, request,
              TRITONSERVER_ErrorNew(
                  TRITONSERVER_ERROR_UNSUPPORTED,
                  "failed to create the state buffer in CPU memory"));
          LOG_MESSAGE(
              TRITONSERVER_LOG_ERROR,
              (std::string("request ") + std::to_string(r) +
               ": failed to create the state buffer in CPU memory, error "
               "response sent")
                  .c_str());
          continue;
        }
        lbuffer = reinterpret_cast<int32_t*>(buffer);
        *lbuffer = ipbuffer_int + ipbuffer_state_int;
      } break;
      // USE_SINGLE_BUFFER
      case 2: {
        TRITONBACKEND_State* response_state;
        std::vector<int64_t> shape{1};
        GUARDED_RESPOND_IF_ERROR(
            responses, r, request,
            TRITONBACKEND_StateNew(
                &response_state, request, "OUTPUT_STATE",
                TRITONSERVER_TYPE_INT32, shape.data() /* data */,
                shape.size() /* dim_count */));

        if (responses[r] == nullptr) {
          LOG_MESSAGE(
              TRITONSERVER_LOG_ERROR,
              (std::string("request ") + std::to_string(r) +
               ": failed to create the output state 'OUTPUT_STATE', error "
               "response sent")
                  .c_str());
          continue;
        }
        TRITONSERVER_MemoryType actual_memory_type = TRITONSERVER_MEMORY_CPU;
        int64_t actual_memory_type_id = 0;
        char* buffer;

        // Request an output buffer in GPU. This is only for testing purposes
        // to make sure that GPU output buffers can be requested.
        GUARDED_RESPOND_IF_ERROR(
            responses, r, request,
            TRITONBACKEND_StateBuffer(
                response_state, reinterpret_cast<void**>(&buffer),
                sizeof(int32_t), &actual_memory_type, &actual_memory_type_id));

        instance_state->state_ = buffer;
      } break;
      case 3: {
        TRITONBACKEND_State* response_state;
        size_t block_size = sizeof(int8_t) * 1024 * 1024;
        int64_t current_elements =
            (instance_state->request_index_ + 1) * 1024 * 1024;
        std::cout << "current elements are "
                  << (instance_state->request_index_ + 1) << std::endl;
        std::vector<int64_t> shape{current_elements};
        GUARDED_RESPOND_IF_ERROR(
            responses, r, request,
            TRITONBACKEND_StateNew(
                &response_state, request, "OUTPUT_STATE",
                TRITONSERVER_TYPE_INT8, shape.data() /* data */,
                shape.size() /* dim_count */));

        if (responses[r] == nullptr) {
          LOG_MESSAGE(
              TRITONSERVER_LOG_ERROR,
              (std::string("request ") + std::to_string(r) +
               ": failed to create the output state 'OUTPUT_STATE', error "
               "response sent")
                  .c_str());
          continue;
        }
        TRITONSERVER_MemoryType actual_memory_type = TRITONSERVER_MEMORY_GPU;
        int64_t actual_memory_type_id = 0;
        char* buffer;

        // Request an output buffer in GPU. This is only for testing purposes
        // to make sure that GPU output buffers can be requested.
        GUARDED_RESPOND_IF_ERROR(
            responses, r, request,
            TRITONBACKEND_StateBuffer(
                response_state, reinterpret_cast<void**>(&buffer),
                block_size * (instance_state->request_index_ + 1),
                &actual_memory_type, &actual_memory_type_id));

        // Only write the new data to the portion of the state buffer that
        // has been grown.
        cudaMemset(
            buffer + block_size * (instance_state->request_index_),
            instance_state->request_index_, block_size);

        TRITONBACKEND_Output* response_output;
        GUARDED_RESPOND_IF_ERROR(
            responses, r, request,
            TRITONBACKEND_ResponseOutput(
                responses[r], &response_output, "OUTPUT_STATE",
                TRITONSERVER_TYPE_INT8, shape.data() /* data */,
                shape.size() /* dim_count */));

        actual_memory_type = TRITONSERVER_MEMORY_CPU;
        actual_memory_type_id = 0;
        char* output_buffer;
        GUARDED_RESPOND_IF_ERROR(
            responses, r, request,
            TRITONBACKEND_OutputBuffer(
                response_output, reinterpret_cast<void**>(&output_buffer),
                block_size * (instance_state->request_index_ + 1),
                &actual_memory_type, &actual_memory_type_id));
        if ((responses[r] == nullptr) ||
            (actual_memory_type != TRITONSERVER_MEMORY_CPU)) {
          GUARDED_RESPOND_IF_ERROR(
              responses, r, request,
              TRITONSERVER_ErrorNew(
                  TRITONSERVER_ERROR_UNSUPPORTED,
                  "the backend can only handle CPU tensors"));
          LOG_MESSAGE(
              TRITONSERVER_LOG_ERROR,
              (std::string("request ") + std::to_string(r) +
               "the backend can only handle CPU tensors"
               "response sent")
                  .c_str());
          continue;
        }
        cudaMemcpy(
            output_buffer, buffer,
            block_size * (instance_state->request_index_ + 1),
            cudaMemcpyDeviceToHost);

        instance_state->state_ = buffer;
      } break;
    }
    const float* lend_buffer = reinterpret_cast<const float*>(end_buffer);

    if (*lend_buffer == 1) {
      instance_state->request_index_ = 0;
    } else {
      instance_state->request_index_ += 1;
    }

    uint64_t exec_end_ns = 0;
    SET_TIMESTAMP(exec_end_ns);
    max_exec_end_ns = std::max(max_exec_end_ns, exec_end_ns);

    // Send all the responses that haven't already been sent because of
    // an earlier error.
    if (responses[r] != nullptr) {
      LOG_IF_ERROR(
          TRITONBACKEND_ResponseSend(
              responses[r], TRITONSERVER_RESPONSE_COMPLETE_FINAL,
              nullptr /* success */),
          "failed sending response");
    }

    // Report statistics for each request.
    LOG_IF_ERROR(
        TRITONBACKEND_ModelInstanceReportStatistics(
            instance_state->TritonModelInstance(), request,
            (responses[r] != nullptr) /* success */, exec_start_ns,
            exec_start_ns, exec_end_ns, exec_end_ns),
        "failed reporting request statistics");

    LOG_IF_ERROR(
        TRITONBACKEND_RequestRelease(request, TRITONSERVER_REQUEST_RELEASE_ALL),
        "failed releasing request");
  }

  // Report the entire batch statistics.
  LOG_IF_ERROR(
      TRITONBACKEND_ModelInstanceReportBatchStatistics(
          instance_state->TritonModelInstance(), total_batch_size,
          min_exec_start_ns, min_exec_start_ns, max_exec_end_ns,
          max_exec_end_ns),
      "failed reporting batch request statistics");

  return nullptr;  // success
}
}  // extern "C"
}}}  // namespace triton::backend::implicit


================================================
FILE: src/test/implicit_state/src/libtriton_implicit_state.ldscript
================================================
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
{
  global:
    TRITONBACKEND_*;
  local: *;
};


================================================
FILE: src/test/iterative_sequence/CMakeLists.txt
================================================
# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

cmake_minimum_required(VERSION 3.31.8)

project(tritoniterativesequencebackend LANGUAGES C CXX)

#
# libtriton_iterative_sequence.so
# Shared library implementing the Triton Sequence Backend API
#
configure_file(src/libtriton_iterative_sequence.ldscript libtriton_iterative_sequence.ldscript COPYONLY)

add_library(
  triton-iterative-sequence-backend SHARED
  src/iterative_sequence.cc
)

add_library(
  TritonIterativeSequenceBackend::triton-iterative-sequence-backend ALIAS triton-iterative-sequence-backend
)

target_compile_features(triton-iterative-sequence-backend PRIVATE cxx_std_11)
target_compile_options(
  triton-iterative-sequence-backend PRIVATE
  $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
    -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror>
)

target_link_libraries(
  triton-iterative-sequence-backend
  PRIVATE
    triton-backend-utils    # from repo-backend
    triton-core-serverapi   # from repo-core
    triton-core-backendapi  # from repo-core
    triton-core-serverstub  # from repo-core
)

set_target_properties(
  triton-iterative-sequence-backend PROPERTIES
  POSITION_INDEPENDENT_CODE ON
  OUTPUT_NAME triton_iterative_sequence
  LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_iterative_sequence.ldscript
  LINK_FLAGS "-Wl,--version-script libtriton_iterative_sequence.ldscript"
)

#
# Install
#
include(GNUInstallDirs)
set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TritonIterativeSequenceBackend)

install(
  TARGETS
    triton-iterative-sequence-backend
  EXPORT
    triton-iterative-sequence-backend-targets
  LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/iterative_sequence
  ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/iterative_sequence
)

install(
  EXPORT
    triton-iterative-sequence-backend-targets
  FILE
    TritonIterativeSequenceBackendTargets.cmake
  NAMESPACE
    TritonIterativeSequenceBackend::
  DESTINATION
    ${INSTALL_CONFIGDIR}
)

include(CMakePackageConfigHelpers)
configure_package_config_file(
  ${CMAKE_CURRENT_LIST_DIR}/cmake/TritonIterativeSequenceBackendConfig.cmake.in
  ${CMAKE_CURRENT_BINARY_DIR}/TritonIterativeSequenceBackendConfig.cmake
  INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
)

install(
  FILES
  ${CMAKE_CURRENT_BINARY_DIR}/TritonIterativeSequenceBackendConfig.cmake
  DESTINATION ${INSTALL_CONFIGDIR}
)

#
# Export from build tree
#
export(
  EXPORT triton-iterative-sequence-backend-targets
  FILE ${CMAKE_CURRENT_BINARY_DIR}/TritonIterativeSequenceBackendTargets.cmake
  NAMESPACE TritonIterativeSequenceBackend::
)

export(PACKAGE TritonIterativeSequenceBackend)


================================================
FILE: src/test/iterative_sequence/cmake/TritonIterativeSequenceBackendConfig.cmake.in
================================================
# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

include(CMakeFindDependencyMacro)

get_filename_component(
  TRITONSEQUENCEBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
)

list(APPEND CMAKE_MODULE_PATH ${TRITONSEQUENCEBACKEND_CMAKE_DIR})

if(NOT TARGET TritonIterativeSequenceBackend::triton-sequence-backend)
  include("${TRITONSEQUENCEBACKEND_CMAKE_DIR}/TritonIterativeSequenceBackendTargets.cmake")
endif()

set(TRITONSEQUENCEBACKEND_LIBRARIES TritonIterativeSequenceBackend::triton-sequence-backend)


================================================
FILE: src/test/iterative_sequence/src/iterative_sequence.cc
================================================
// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include <algorithm>
#include <memory>
#include <thread>

#include "triton/backend/backend_common.h"
#include "triton/backend/backend_model.h"
#include "triton/backend/backend_model_instance.h"

namespace triton { namespace backend { namespace iterative_sequence {


// Simple iterative sequence backend that demonstrates the use of
// TRITONSERVER_REQUEST_RELEASE_RESCHEDULE flag to iteratively produce
// sequence response.
//
// The backend supports models that take 1 input tensor, an INT32 [ 1 ]
// input named "INPUT"; and produces an output tensor "OUTPUT" with the same
// shape as the input tensor. The input value indicates the total number of
// responses to be generated and the output value indicates the number of
// remaining responses. For example, if the request input has value 2,
// the backend will:
//   - Send a response with value 1.
//   - Release request with RESCHEDULE flag.
//   - When execute on the same request, send the last response with value 0.
//   - Release request with ALL flag.
//

#define GUARDED_RESPOND_IF_ERROR(RESPONSES, IDX, X)                     \
  do {                                                                  \
    if ((RESPONSES)[IDX] != nullptr) {                                  \
      TRITONSERVER_Error* err__ = (X);                                  \
      if (err__ != nullptr) {                                           \
        LOG_IF_ERROR(                                                   \
            TRITONBACKEND_ResponseSend(                                 \
                (RESPONSES)[IDX], TRITONSERVER_RESPONSE_COMPLETE_FINAL, \
                err__),                                                 \
            "failed to send error response");                           \
        (RESPONSES)[IDX] = nullptr;                                     \
        TRITONSERVER_ErrorDelete(err__);                                \
      }                                                                 \
    }                                                                   \
  } while (false)

//
// ModelState
//
// State associated with a model that is using this backend. An object
// of this class is created and associated with each
// TRITONBACKEND_Model.
//
class ModelState : public BackendModel {
 public:
  static TRITONSERVER_Error* Create(
      TRITONBACKEND_Model* triton_model, ModelState** state);
  virtual ~ModelState() = default;

 private:
  ModelState(TRITONBACKEND_Model* triton_model);
};

TRITONSERVER_Error*
ModelState::Create(TRITONBACKEND_Model* triton_model, ModelState** state)
{
  try {
    *state = new ModelState(triton_model);
  }
  catch (const BackendModelException& ex) {
    RETURN_ERROR_IF_TRUE(
        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
        std::string("unexpected nullptr in BackendModelException"));
    RETURN_IF_ERROR(ex.err_);
  }

  return nullptr;  // success
}

ModelState::ModelState(TRITONBACKEND_Model* triton_model)
    : BackendModel(triton_model)
{
}

//
// ModelInstanceState
//
// State associated with a model instance. An object of this class is
// created and associated with each TRITONBACKEND_ModelInstance.
//
class ModelInstanceState : public BackendModelInstance {
 public:
  static TRITONSERVER_Error* Create(
      ModelState* model_state,
      TRITONBACKEND_ModelInstance* triton_model_instance,
      ModelInstanceState** state);
  virtual ~ModelInstanceState() = default;

  // Get the state of the model that corresponds to this instance.
  ModelState* StateForModel() const { return model_state_; }

  // return output value on receiving request, initialize remainder
  // if the corrid hasn't been recorded.
  int32_t GetOutput(uint64_t corrid, int32_t init_value);

 private:
  ModelInstanceState(
      ModelState* model_state,
      TRITONBACKEND_ModelInstance* triton_model_instance);

  ModelState* model_state_;

  // A map from correlation ID to the remaining responses.
  std::unordered_map<uint64_t, int32_t> remainders_;
};

TRITONSERVER_Error*
ModelInstanceState::Create(
    ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance,
    ModelInstanceState** state)
{
  try {
    *state = new ModelInstanceState(model_state, triton_model_instance);
  }
  catch (const BackendModelInstanceException& ex) {
    RETURN_ERROR_IF_TRUE(
        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
        std::string("unexpected nullptr in BackendModelInstanceException"));
    RETURN_IF_ERROR(ex.err_);
  }

  return nullptr;  // success
}

ModelInstanceState::ModelInstanceState(
    ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance)
    : BackendModelInstance(model_state, triton_model_instance),
      model_state_(model_state)
{
}

int32_t
ModelInstanceState::GetOutput(uint64_t corrid, int32_t init_value)
{
  auto it = remainders_.find(corrid);
  if (it == remainders_.end()) {
    it = remainders_.emplace(corrid, init_value).first;
  }
  auto res = --it->second;
  if (res <= 0) {
    remainders_.erase(it);
  }
  return res;
}

/////////////

extern "C" {

// Implementing TRITONBACKEND_Initialize is optional. The backend
// should initialize any global state that is intended to be shared
// across all models and model instances that use the backend.
TRITONSERVER_Error*
TRITONBACKEND_Initialize(TRITONBACKEND_Backend* backend)
{
  const char* cname;
  RETURN_IF_ERROR(TRITONBACKEND_BackendName(backend, &cname));
  std::string name(cname);

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("TRITONBACKEND_Initialize: ") + name).c_str());

  // We should check the backend API version that Triton supports
  // vs. what this backend was compiled against.
  uint32_t api_version_major, api_version_minor;
  RETURN_IF_ERROR(
      TRITONBACKEND_ApiVersion(&api_version_major, &api_version_minor));

  if ((api_version_major != TRITONBACKEND_API_VERSION_MAJOR) ||
      (api_version_minor < TRITONBACKEND_API_VERSION_MINOR)) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_UNSUPPORTED,
        "triton backend API version does not support this backend");
  }

  return nullptr;  // success
}

// Implementing TRITONBACKEND_ModelInitialize is optional. The backend
// should initialize any state that is intended to be shared across
// all instances of the model.
TRITONSERVER_Error*
TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model)
{
  const char* cname;
  RETURN_IF_ERROR(TRITONBACKEND_ModelName(model, &cname));
  std::string name(cname);

  uint64_t version;
  RETURN_IF_ERROR(TRITONBACKEND_ModelVersion(model, &version));

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("TRITONBACKEND_ModelInitialize: ") + name + " (version " +
       std::to_string(version) + ")")
          .c_str());

  // With each model we create a ModelState object and associate it
  // with the TRITONBACKEND_Model.
  ModelState* model_state;
  RETURN_IF_ERROR(ModelState::Create(model, &model_state));
  RETURN_IF_ERROR(
      TRITONBACKEND_ModelSetState(model, reinterpret_cast<void*>(model_state)));

  return nullptr;  // success
}

// Implementing TRITONBACKEND_ModelFinalize is optional unless state
// is set using TRITONBACKEND_ModelSetState. The backend must free
// this state and perform any other cleanup.
TRITONSERVER_Error*
TRITONBACKEND_ModelFinalize(TRITONBACKEND_Model* model)
{
  void* vstate;
  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vstate));
  ModelState* model_state = reinterpret_cast<ModelState*>(vstate);

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO, "TRITONBACKEND_ModelFinalize: delete model state");

  delete model_state;

  return nullptr;  // success
}

// Implementing TRITONBACKEND_ModelInstanceInitialize is optional. The
// backend should initialize any state that is required for a model
// instance.
TRITONSERVER_Error*
TRITONBACKEND_ModelInstanceInitialize(TRITONBACKEND_ModelInstance* instance)
{
  const char* cname;
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceName(instance, &cname));
  std::string name(cname);

  int32_t device_id;
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceDeviceId(instance, &device_id));
  TRITONSERVER_InstanceGroupKind kind;
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceKind(instance, &kind));

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("TRITONBACKEND_ModelInstanceInitialize: ") + name + " (" +
       TRITONSERVER_InstanceGroupKindString(kind) + " device " +
       std::to_string(device_id) + ")")
          .c_str());

  // The instance can access the corresponding model as well... here
  // we get the model and from that get the model's state.
  TRITONBACKEND_Model* model;
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceModel(instance, &model));

  void* vmodelstate;
  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vmodelstate));
  ModelState* model_state = reinterpret_cast<ModelState*>(vmodelstate);

  // With each instance we create a ModelInstanceState object and
  // associate it with the TRITONBACKEND_ModelInstance.
  ModelInstanceState* instance_state;
  RETURN_IF_ERROR(
      ModelInstanceState::Create(model_state, instance, &instance_state));
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceSetState(
      instance, reinterpret_cast<void*>(instance_state)));

  // Because this backend just copies IN -> OUT and requires that
  // input and output be in CPU memory, we fail if a GPU instances is
  // requested.
  RETURN_ERROR_IF_FALSE(
      instance_state->Kind() == TRITONSERVER_INSTANCEGROUPKIND_CPU,
      TRITONSERVER_ERROR_INVALID_ARG,
      std::string("'iterative_sequence' backend only supports CPU instances"));

  return nullptr;  // success
}

// Implementing TRITONBACKEND_ModelInstanceFinalize is optional unless
// state is set using TRITONBACKEND_ModelInstanceSetState. The backend
// must free this state and perform any other cleanup.
TRITONSERVER_Error*
TRITONBACKEND_ModelInstanceFinalize(TRITONBACKEND_ModelInstance* instance)
{
  void* vstate;
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(instance, &vstate));
  ModelInstanceState* instance_state =
      reinterpret_cast<ModelInstanceState*>(vstate);

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      "TRITONBACKEND_ModelInstanceFinalize: delete instance state");

  delete instance_state;

  return nullptr;  // success
}

// Implementing TRITONBACKEND_ModelInstanceExecute is required.
TRITONSERVER_Error*
TRITONBACKEND_ModelInstanceExecute(
    TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests,
    const uint32_t request_count)
{
  // Triton will not call this function simultaneously for the same
  // 'instance'. But since this backend could be used by multiple
  // instances from multiple models the implementation needs to handle
  // multiple calls to this function at the same time (with different
  // 'instance' objects). Suggested practice for this is to use only
  // function-local and model-instance-specific state (obtained from
  // 'instance'), which is what we do here.
  ModelInstanceState* instance_state;
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(
      instance, reinterpret_cast<void**>(&instance_state)));
  ModelState* model_state = instance_state->StateForModel();

  // This backend specifies BLOCKING execution policy. That means that
  // we should not return from this function until execution is
  // complete. Triton will automatically release 'instance' on return
  // from this function so that it is again available to be used for
  // another call to TRITONBACKEND_ModelInstanceExecute.

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("model ") + model_state->Name() + ", instance " +
       instance_state->Name() + ", executing " + std::to_string(request_count) +
       " requests")
          .c_str());

  bool supports_batching = false;
  RETURN_IF_ERROR(model_state->SupportsFirstDimBatching(&supports_batching));

  // 'responses' is initialized with the response objects below and
  // if/when an error response is sent the corresponding entry in
  // 'responses' is set to nullptr to indicate that that response has
  // already been sent.
  std::vector<TRITONBACKEND_Response*> responses;
  responses.reserve(request_count);

  // Create a single response object for each request. If something
  // goes wrong when attempting to create the response objects just
  // fail all of the requests by returning an error.
  for (uint32_t r = 0; r < request_count; ++r) {
    TRITONBACKEND_Request* request = requests[r];

    TRITONBACKEND_Response* response;
    RETURN_IF_ERROR(TRITONBACKEND_ResponseNew(&response, request));
    responses.push_back(response);
  }

  // The way we collect these batch timestamps is not entirely
  // accurate. Normally, in a performant backend you would execute all
  // the requests at the same time, and so there would be a single
  // compute-start / compute-end time-range. But here we execute each
  // request separately so there is no single range. As a result we
  // just show the entire execute time as being the compute time as
  // well.
  uint64_t min_exec_start_ns = std::numeric_limits<uint64_t>::max();
  uint64_t max_exec_end_ns = 0;
  uint64_t total_batch_size = 0;

  // After this point we take ownership of 'requests', which means
  // that a response must be sent for every request. If something does
  // go wrong in processing a particular request then we send an error
  // response just for the specific request.

  // For simplicity we just process each request separately... in
  // general a backend should try to operate on the entire batch of
  // requests at the same time for improved performance.
  std::vector<uint8_t> start_buffer, end_buffer, ready_buffer, corrid_buffer,
      input_buffer;
  for (uint32_t r = 0; r < request_count; ++r) {
    ++total_batch_size;

    uint64_t exec_start_ns = 0;
    SET_TIMESTAMP(exec_start_ns);
    min_exec_start_ns = std::min(min_exec_start_ns, exec_start_ns);

    TRITONBACKEND_Request* request = requests[r];

    uint64_t correlation_id = 0;
    GUARDED_RESPOND_IF_ERROR(
        responses, r,
        TRITONBACKEND_RequestCorrelationId(request, &correlation_id));
    // If an error response was sent for the above then display an error
    // message and move on to next request.
    if (responses[r] == nullptr) {
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR,
          (std::string("request ") + std::to_string(r) +
           ": failed to read request input/output counts, error response "
           "sent")
              .c_str());
      continue;
    }

    TRITONBACKEND_Input* input = nullptr;
    GUARDED_RESPOND_IF_ERROR(
        responses, r, TRITONBACKEND_RequestInput(request, "INPUT", &input));
    if (responses[r] == nullptr) {
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR,
          (std::string("request ") + std::to_string(r) +
           ": failed to read input 'INPUT', error response sent")
              .c_str());
      continue;
    }

    const void* input_buffer = nullptr;
    uint64_t buffer_byte_size = 0;
    TRITONSERVER_MemoryType input_memory_type = TRITONSERVER_MEMORY_CPU;
    int64_t input_memory_type_id = 0;
    GUARDED_RESPOND_IF_ERROR(
        responses, r,
        TRITONBACKEND_InputBuffer(
            input, 0 /* input_buffer_count */, &input_buffer, &buffer_byte_size,
            &input_memory_type, &input_memory_type_id));
    if ((responses[r] == nullptr) ||
        (input_memory_type == TRITONSERVER_MEMORY_GPU)) {
      GUARDED_RESPOND_IF_ERROR(
          responses, r,
          TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_UNSUPPORTED,
              "failed to get input buffer in CPU memory"));
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR,
          (std::string("request ") + std::to_string(r) +
           ": failed to get input buffer in CPU memory, error "
           "response sent")
              .c_str());
      continue;
    }

    const int32_t init_value = *reinterpret_cast<const int32_t*>(input_buffer);
    auto output_value = instance_state->GetOutput(correlation_id, init_value);

    TRITONBACKEND_Response* response = responses[r];

    // The output shape is [1, 1] if the model
    // configuration supports batching, or just
    // [1] if the model configuration does not
    // support batching.
    std::vector<int64_t> shape;
    if (supports_batching) {
      shape.push_back(1);
    }
    shape.push_back(1);

    TRITONBACKEND_Output* output;
    GUARDED_RESPOND_IF_ERROR(
        responses, r,
        TRITONBACKEND_ResponseOutput(
            response, &output, "OUTPUT", TRITONSERVER_TYPE_INT32, shape.data(),
            shape.size()));
    if (responses[r] == nullptr) {
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR,
          (std::string("request ") + std::to_string(r) +
           ": failed to create response output, error response sent")
              .c_str());
      continue;
    }

    // Step 2. Get the output buffer. We request a buffer in CPU
    // memory but we have to handle any returned type. If we get
    // back a buffer in GPU memory we just fail the request.
    void* output_buffer;
    TRITONSERVER_MemoryType output_memory_type = TRITONSERVER_MEMORY_CPU;
    int64_t output_memory_type_id = 0;
    GUARDED_RESPOND_IF_ERROR(
        responses, r,
        TRITONBACKEND_OutputBuffer(
            output, &output_buffer, sizeof(int32_t), &output_memory_type,
            &output_memory_type_id));
    if ((responses[r] == nullptr) ||
        (output_memory_type == TRITONSERVER_MEMORY_GPU)) {
      GUARDED_RESPOND_IF_ERROR(
          responses, r,
          TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_UNSUPPORTED,
              "failed to create output buffer in CPU memory"));
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR,
          (std::string("request ") + std::to_string(r) +
           ": failed to create output buffer in CPU memory, error "
           "response sent")
              .c_str());
      continue;
    }

    reinterpret_cast<int32_t*>(output_buffer)[0] = output_value;

    // Set response flag and request flag correctly based on whether this
    // is the last response of the sequence.
    uint32_t res_flag =
        (output_value <= 0) ? TRITONSERVER_RESPONSE_COMPLETE_FINAL : 0;
    uint32_t req_flag = (output_value <= 0)
                            ? TRITONSERVER_REQUEST_RELEASE_ALL
                            : TRITONSERVER_REQUEST_RELEASE_RESCHEDULE;

    uint64_t exec_end_ns = 0;
    SET_TIMESTAMP(exec_end_ns);
    max_exec_end_ns = std::max(max_exec_end_ns, exec_end_ns);

    // wait for 0.5 second before rescheduling the request.
    std::this_thread::sleep_for(std::chrono::milliseconds(500));
    // Release the request first as the testing backend may be configured to
    // receive error on request release, in such a case, the error will be
    // propagated back through error response.
    auto err = TRITONBACKEND_RequestRelease(request, req_flag);
    if (err) {
      // Release request with ALL flag
      LOG_IF_ERROR(
          TRITONBACKEND_RequestRelease(
              request, TRITONSERVER_REQUEST_RELEASE_ALL),
          "failed releasing request");
      res_flag = TRITONSERVER_RESPONSE_COMPLETE_FINAL;
    }

    // Send all the responses that haven't already been sent because of
    // an earlier error.
    if (responses[r] != nullptr) {
      LOG_IF_ERROR(
          TRITONBACKEND_ResponseSend(responses[r], res_flag, err),
          "failed sending response");
    }
    TRITONSERVER_ErrorDelete(err);

    // Report statistics for each request.
    LOG_IF_ERROR(
        TRITONBACKEND_ModelInstanceReportStatistics(
            instance_state->TritonModelInstance(), request,
            (responses[r] != nullptr) /* success */, exec_start_ns,
            exec_start_ns, exec_end_ns, exec_end_ns),
        "failed reporting request statistics");
  }

  // Report the entire batch statistics.
  LOG_IF_ERROR(
      TRITONBACKEND_ModelInstanceReportBatchStatistics(
          instance_state->TritonModelInstance(), total_batch_size,
          min_exec_start_ns, min_exec_start_ns, max_exec_end_ns,
          max_exec_end_ns),
      "failed reporting batch request statistics");

  return nullptr;  // success
}

}  // extern "C"

}}}  // namespace triton::backend::iterative_sequence


================================================
FILE: src/test/iterative_sequence/src/libtriton_iterative_sequence.ldscript
================================================
# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
{
  global:
    TRITONBACKEND_*;
  local: *;
};


================================================
FILE: src/test/models/identity_fp32/config.pbtxt
================================================
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "identity_fp32"
backend: "identity"
max_batch_size: 1
input [
  {
    name: "INPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]
instance_group [
  {
    count: 1
    kind : KIND_CPU
  }
]


================================================
FILE: src/test/models/repeat_int32/config.pbtxt
================================================
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "repeat_int32"
backend: "repeat"
max_batch_size: 0
model_transaction_policy {
  decoupled: True
}
input [
  {
    name: "IN"
    data_type: TYPE_INT32
    dims: [ -1 ]
  },
  {
    name: "DELAY"
    data_type: TYPE_UINT32
    dims: [ -1 ]
  },
  {
    name: "WAIT"
    data_type: TYPE_UINT32
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  },
  {
    name: "IDX"
    data_type: TYPE_UINT32
    dims: [ 1 ]
  }
]


================================================
FILE: src/test/models/square_int32/config.pbtxt
================================================
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "square_int32"
backend: "square"
max_batch_size: 0
model_transaction_policy {
  decoupled: True
}
input [
  {
    name: "IN"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]
output [
  {
    name: "OUT"
    data_type: TYPE_INT32
    dims: [ 1 ]
  }
]


================================================
FILE: src/test/query_backend/CMakeLists.txt
================================================
# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

cmake_minimum_required(VERSION 3.31.8)

project(tritonquerybackend LANGUAGES C CXX)

#
# libtriton_query.so
# Shared library implementing the Triton Query Backend API
#
configure_file(src/libtriton_query.ldscript libtriton_query.ldscript COPYONLY)

add_library(
  triton-query-backend SHARED
  src/query.cc
)

add_library(
  TritonQueryBackend::triton-query-backend ALIAS triton-query-backend
)

target_compile_features(triton-query-backend PRIVATE cxx_std_11)
target_compile_options(
  triton-query-backend PRIVATE
  $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
    -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror>
)

target_link_libraries(
  triton-query-backend
  PRIVATE
    triton-backend-utils    # from repo-backend
    triton-core-serverapi   # from repo-core
    triton-core-backendapi  # from repo-core
    triton-core-serverstub  # from repo-core
)

set_target_properties(
  triton-query-backend PROPERTIES
  POSITION_INDEPENDENT_CODE ON
  OUTPUT_NAME triton_query
  LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_query.ldscript
  LINK_FLAGS "-Wl,--version-script libtriton_query.ldscript"
)

#
# Install
#
include(GNUInstallDirs)
set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TritonQueryBackend)

install(
  TARGETS
    triton-query-backend
  EXPORT
    triton-query-backend-targets
  LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/query
  ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/query
)

install(
  EXPORT
    triton-query-backend-targets
  FILE
    TritonQueryBackendTargets.cmake
  NAMESPACE
    TritonQueryBackend::
  DESTINATION
    ${INSTALL_CONFIGDIR}
)

include(CMakePackageConfigHelpers)
configure_package_config_file(
  ${CMAKE_CURRENT_LIST_DIR}/cmake/TritonQueryBackendConfig.cmake.in
  ${CMAKE_CURRENT_BINARY_DIR}/TritonQueryBackendConfig.cmake
  INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
)

install(
  FILES
  ${CMAKE_CURRENT_BINARY_DIR}/TritonQueryBackendConfig.cmake
  DESTINATION ${INSTALL_CONFIGDIR}
)

#
# Export from build tree
#
export(
  EXPORT triton-query-backend-targets
  FILE ${CMAKE_CURRENT_BINARY_DIR}/TritonQueryBackendTargets.cmake
  NAMESPACE TritonQueryBackend::
)

export(PACKAGE TritonQueryBackend)


================================================
FILE: src/test/query_backend/cmake/TritonQueryBackendConfig.cmake.in
================================================
# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

include(CMakeFindDependencyMacro)

get_filename_component(
  TRITONQUERYBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
)

list(APPEND CMAKE_MODULE_PATH ${TRITONQUERYBACKEND_CMAKE_DIR})

if(NOT TARGET TritonQueryBackend::triton-distributed-addsub-backend)
  include("${TRITONQUERYBACKEND_CMAKE_DIR}/TritonQueryBackendTargets.cmake")
endif()

set(TRITONQUERYBACKEND_LIBRARIES TritonQueryBackend::triton-query-backend)


================================================
FILE: src/test/query_backend/src/libtriton_query.ldscript
================================================
# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
{
  global:
    TRITONBACKEND_*;
  local: *;
};


================================================
FILE: src/test/query_backend/src/query.cc
================================================
// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include <vector>

#include "triton/backend/backend_common.h"
#include "triton/backend/backend_model.h"
#include "triton/backend/backend_model_instance.h"

namespace triton { namespace backend { namespace query {


// Query backend that is solely used with unit-testing query functionality
// in both the server API and backend API.
//
// The backend will call the backend query API in setting below:
// name, byte_size, memory_type, memory_type_id (refer backend API for detail)
// "OUTPUT0", nullptr, CPU_PINNED, 1
// "OUTPUT1", nullptr, CPU_PINNED, 1
// Then it will call the alloc function (TRITONBACKEND_OutputBuffer) with
// the returned value accordingly. If 'byte_size' is nullptr, it creates the
// outputs with UINT8 type and shape [2].
// The backend will read environment variables for different query behavior
// 'TEST_ANONYMOUS': the backend will call the query API only once with 'name'
//                   set to nullptr
// 'TEST_BYTE_SIZE': the backend will call the query API once with 'byte_size'
//                   set to the variable value, and the outputs will be created
//                   with UINT8 and shape [byte_size]. If 'TEST_ANONYMOUS' is
//                   also specified, the outputs will have shape [byte_size / 2]
// 'TEST_FAIL_WITH_QUERY_RESULT' : the query results will be formatted to string
//                                 and returned as error message.

#define RESPOND_IF_ERROR(RESPONSE, X)                                   \
  do {                                                                  \
    if (RESPONSE != nullptr) {                                          \
      TRITONSERVER_Error* err__ = (X);                                  \
      if (err__ != nullptr) {                                           \
        LOG_IF_ERROR(                                                   \
            TRITONBACKEND_ResponseSend(                                 \
                RESPONSE, TRITONSERVER_RESPONSE_COMPLETE_FINAL, err__), \
            "failed to send error response");                           \
        TRITONSERVER_ErrorDelete(err__);                                \
      }                                                                 \
    }                                                                   \
  } while (false)

extern "C" {

// Implementing TRITONBACKEND_ModelInstanceExecute is required.
TRITONSERVER_Error*
TRITONBACKEND_ModelInstanceExecute(
    TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests,
    const uint32_t request_count)
{
  // Read environment variables
  const char* anonymous_str = getenv("TEST_ANONYMOUS");
  const char* byte_size_str = getenv("TEST_BYTE_SIZE");
  const char* fail_str = getenv("TEST_FAIL_WITH_QUERY_RESULT");
  bool anonymous = (anonymous_str != nullptr);
  size_t byte_size = 2;
  size_t query_byte_size = byte_size;
  size_t* byte_size_ptr = nullptr;
  if (byte_size_str != nullptr) {
    byte_size = atoi(byte_size_str);
    query_byte_size = byte_size;
    if (anonymous) {
      byte_size /= 2;
    }
    byte_size_ptr = &query_byte_size;
  }

  for (uint32_t r = 0; r < request_count; ++r) {
    std::string log_message;

    TRITONBACKEND_Request* request = requests[r];
    TRITONBACKEND_Response* response = nullptr;

    // Query before creating output
    std::vector<const char*> names;
    if (anonymous) {
      names.emplace_back(nullptr);
    } else {
      names = {"OUTPUT0", "OUTPUT1"};
    }
    std::vector<TRITONSERVER_MemoryType> types{
        TRITONSERVER_MEMORY_CPU_PINNED, TRITONSERVER_MEMORY_CPU_PINNED};
    std::vector<int64_t> type_ids{1, 1};
    for (size_t i = 0; i < names.size(); ++i) {
      auto err = TRITONBACKEND_RequestOutputBufferProperties(
          request, names[i], byte_size_ptr, &types[i], &type_ids[i]);
      if (err != nullptr) {
        RETURN_IF_ERROR(TRITONBACKEND_ResponseNew(&response, request));
        RESPOND_IF_ERROR(response, err);
        break;
      }
      if (fail_str != nullptr) {
        log_message += ((names[i] == nullptr) ? "NULL" : names[i]);
        switch (types[i]) {
          case TRITONSERVER_MEMORY_CPU:
            log_message += " CPU ";
            break;
          case TRITONSERVER_MEMORY_CPU_PINNED:
            log_message += " CPU_PINNED ";
            break;
          case TRITONSERVER_MEMORY_GPU:
            log_message += " GPU ";
            break;
        }
        log_message += (std::to_string(type_ids[i]) + "; ");
      }
    }

    // If response is not nullptr, some error is returned from query API and
    // the response has been sent
    if (response == nullptr) {
      if (names.size() == 1) {
        names = {"OUTPUT0", "OUTPUT1"};
        types[1] = types[0];
        type_ids[1] = type_ids[0];
      }
      std::vector<int64_t> shape{(int64_t)byte_size};

      TRITONBACKEND_Response* response;
      RETURN_IF_ERROR(TRITONBACKEND_ResponseNew(&response, request));
      TRITONSERVER_Error* err = nullptr;
      if (fail_str == nullptr) {
        for (size_t i = 0; i < names.size(); ++i) {
          TRITONBACKEND_Output* output;
          err = TRITONBACKEND_ResponseOutput(
              response, &output, names[i], TRITONSERVER_TYPE_UINT8,
              shape.data(), 1);
          if (err != nullptr) {
            break;
          }
          void* output_buffer;
          err = TRITONBACKEND_OutputBuffer(
              output, &output_buffer, byte_size, &types[i], &type_ids[i]);
          if (err != nullptr) {
            break;
          }
          // Do nothing with the buffer as we don't care
        }
      } else {
        // Use an uncommon error code
        err = TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_UNKNOWN, log_message.c_str());
      }

      TRITONBACKEND_ResponseSend(
          response, TRITONSERVER_RESPONSE_COMPLETE_FINAL, err);
    }

    TRITONBACKEND_RequestRelease(request, TRITONSERVER_REQUEST_RELEASE_ALL);
  }

  return nullptr;  // success
}

}  // extern "C"

}}}  // namespace triton::backend::query


================================================
FILE: src/test/repoagent/relocation_repoagent/CMakeLists.txt
================================================
# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

cmake_minimum_required(VERSION 3.31.8)

project(tritonrelocationrepoagent LANGUAGES C CXX)

configure_file(
  src/libtritonrepoagent_relocation.ldscript
  libtritonrepoagent_relocation.ldscript COPYONLY)

add_library(
  triton-relocation-repoagent SHARED
  src/relocation.cc
)

add_library(
  TritonRelocationRepoAgent::triton-relocation-repoagent ALIAS triton-relocation-repoagent
)

target_compile_features(triton-relocation-repoagent PRIVATE cxx_std_11)
target_compile_options(
  triton-relocation-repoagent PRIVATE
  $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
    -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror>
)

target_link_libraries(
  triton-relocation-repoagent
  PRIVATE
    triton-core-serverapi     # from repo-core
    triton-core-repoagentapi  # from repo-core
    triton-core-serverstub    # from repo-core
)

set_target_properties(
  triton-relocation-repoagent PROPERTIES
  POSITION_INDEPENDENT_CODE ON
  OUTPUT_NAME tritonrepoagent_relocation
  LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtritonrepoagent_relocation.ldscript
  LINK_FLAGS "-Wl,--version-script libtritonrepoagent_relocation.ldscript"
)

#
# Install
#
include(GNUInstallDirs)
set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TritonRelocationRepoAgent)

install(
  TARGETS
    triton-relocation-repoagent
  EXPORT
    triton-relocation-repoagent-targets
  LIBRARY DESTINATION lib
)

install(
  EXPORT
    triton-relocation-repoagent-targets
  FILE
    TritonRelocationRepoAgentTargets.cmake
  NAMESPACE
    TritonRelocationRepoAgent::
  DESTINATION
    ${INSTALL_CONFIGDIR}
)

include(CMakePackageConfigHelpers)
configure_package_config_file(
  ${CMAKE_CURRENT_LIST_DIR}/cmake/TritonRelocationRepoAgentConfig.cmake.in
  ${CMAKE_CURRENT_BINARY_DIR}/TritonRelocationRepoAgentConfig.cmake
  INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
)

install(
  FILES
  ${CMAKE_CURRENT_BINARY_DIR}/TritonRelocationRepoAgentConfig.cmake
  DESTINATION ${INSTALL_CONFIGDIR}
)

#
# Export from build tree
#
export(
  EXPORT triton-relocation-repoagent-targets
  FILE ${CMAKE_CURRENT_BINARY_DIR}/TritonRelocationRepoAgentTargets.cmake
  NAMESPACE TritonRelocationRepoAgent::
)

export(PACKAGE TritonRelocationRepoAgent)


================================================
FILE: src/test/repoagent/relocation_repoagent/cmake/TritonRelocationRepoAgentConfig.cmake.in
================================================
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

include(CMakeFindDependencyMacro)

get_filename_component(
  TRITONRELOCATIONREPOAGENT_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
)

list(APPEND CMAKE_MODULE_PATH ${TRITONRELOCATIONREPOAGENT_CMAKE_DIR})

if(NOT TARGET TritonRelocationRepoAgent::triton-relocation-repoagent)
  include("${TRITONRELOCATIONREPOAGENT_CMAKE_DIR}/TritonRelocationRepoAgentTargets.cmake")
endif()

set(TRITONRELOCATIONREPOAGENT_LIBRARIES TritonRelocationRepoAgent::triton-relocation-repoagent)


================================================
FILE: src/test/repoagent/relocation_repoagent/src/libtritonrepoagent_relocation.ldscript
================================================
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
{
  global:
    TRITONREPOAGENT_*;
  local: *;
};


================================================
FILE: src/test/repoagent/relocation_repoagent/src/relocation.cc
================================================
// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include <cctype>
#include <cstring>
#include <iomanip>
#include <stdexcept>
#include <string>

#include "triton/core/tritonrepoagent.h"
#include "triton/core/tritonserver.h"

//
// Relocation Repository Agent that is for test only.
//

namespace triton { namespace repoagent { namespace relocation {

namespace {
//
// ErrorException
//
// Exception thrown if error occurs while running RelocationRepoAgent
//
struct ErrorException {
  ErrorException(TRITONSERVER_Error* err) : err_(err) {}
  TRITONSERVER_Error* err_;
};

#define THROW_IF_TRITON_ERROR(X)                                      \
  do {                                                                \
    TRITONSERVER_Error* tie_err__ = (X);                              \
    if (tie_err__ != nullptr) {                                       \
      throw triton::repoagent::relocation::ErrorException(tie_err__); \
    }                                                                 \
  } while (false)


#define THROW_TRITON_ERROR(CODE, MSG)                                 \
  do {                                                                \
    TRITONSERVER_Error* tie_err__ = TRITONSERVER_ErrorNew(CODE, MSG); \
    throw triton::repoagent::relocation::ErrorException(tie_err__);   \
  } while (false)


#define RETURN_IF_ERROR(X)               \
  do {                                   \
    TRITONSERVER_Error* rie_err__ = (X); \
    if (rie_err__ != nullptr) {          \
      return rie_err__;                  \
    }                                    \
  } while (false)

}  // namespace

/////////////

extern "C" {

TRITONSERVER_Error*
TRITONREPOAGENT_ModelFinalize(
    TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model)
{
  const char* location;
  RETURN_IF_ERROR(TRITONREPOAGENT_ModelState(model, (void**)&location));
  RETURN_IF_ERROR(
      TRITONREPOAGENT_ModelRepositoryLocationRelease(agent, model, location));
  return nullptr;
}

TRITONSERVER_Error*
TRITONREPOAGENT_ModelAction(
    TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
    const TRITONREPOAGENT_ActionType action_type)
{
  // Return success (nullptr) if the agent does not handle the action
  if (action_type != TRITONREPOAGENT_ACTION_LOAD) {
    return nullptr;
  }

  // Check the agent parameters for the relocation configuration of the model
  uint32_t parameter_count = 0;
  RETURN_IF_ERROR(
      TRITONREPOAGENT_ModelParameterCount(agent, model, &parameter_count));
  if (parameter_count != 1) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INVALID_ARG,
        "Relocation repoagent expects 1 parameter for relocation agent");
  }
  const char* key = nullptr;
  const char* value = nullptr;
  RETURN_IF_ERROR(
      TRITONREPOAGENT_ModelParameter(agent, model, 0, &key, &value));
  if (std::string(key) != "empty_config") {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INVALID_ARG,
        "Relocation repoagent expects parameter with key 'empty_config' for "
        "relocation agent");
  } else if (
      (std::string(value) != "true") && (std::string(value) != "false")) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INVALID_ARG,
        "Relocation repoagent expects 'empty_config' parameter with value "
        "'true' or 'false' for relocation agent");
  }
  TRITONSERVER_Message* model_config;
  RETURN_IF_ERROR(TRITONREPOAGENT_ModelConfig(agent, model, 1, &model_config));
  const char* base;
  size_t byte_size;
  auto err =
      TRITONSERVER_MessageSerializeToJson(model_config, &base, &byte_size);
  if (err == nullptr) {
    // hack to check if proper model config is passed by knowing that only
    // the original config will contain 'model_repository_agents' setting
    auto pos = std::string(base, byte_size).find("model_repository_agents");
    if ((std::string(value) == "true") && (pos != std::string::npos)) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          "Relocation repoagent expects config does not contain "
          "'model_repository_agents' field when 'empty_config' has value "
          "'true' for relocation agent");
    } else if ((std::string(value) == "false") && (pos == std::string::npos)) {
      return TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INVALID_ARG,
          "Relocation repoagent expects config contains "
          "'model_repository_agents' field when 'empty_config' has value "
          "'false' for relocation agent");
    }
  }
  RETURN_IF_ERROR(TRITONSERVER_MessageDelete(model_config));
  RETURN_IF_ERROR(err);

  // Point to a new model repository
  const char* location;
  RETURN_IF_ERROR(TRITONREPOAGENT_ModelRepositoryLocationAcquire(
      agent, model, TRITONREPOAGENT_ARTIFACT_FILESYSTEM, &location));
  RETURN_IF_ERROR(TRITONREPOAGENT_ModelRepositoryUpdate(
      agent, model, TRITONREPOAGENT_ARTIFACT_FILESYSTEM, location));
  RETURN_IF_ERROR(TRITONREPOAGENT_ModelSetState(model, (void*)location));

  return nullptr;  // success
}

}  // extern "C"

}}}  // namespace triton::repoagent::relocation


================================================
FILE: src/test/sequence/CMakeLists.txt
================================================
# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

cmake_minimum_required(VERSION 3.31.8)

project(tritonsequencebackend LANGUAGES C CXX)

#
# libtriton_sequence.so
# Shared library implementing the Triton Sequence Backend API
#
configure_file(src/libtriton_sequence.ldscript libtriton_sequence.ldscript COPYONLY)

add_library(
  triton-sequence-backend SHARED
  src/sequence.cc
)

add_library(
  TritonSequenceBackend::triton-sequence-backend ALIAS triton-sequence-backend
)

target_compile_features(triton-sequence-backend PRIVATE cxx_std_17)
target_compile_options(
  triton-sequence-backend PRIVATE
  $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
    -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror>
)

target_link_libraries(
  triton-sequence-backend
  PRIVATE
    triton-backend-utils    # from repo-backend
    triton-core-serverapi   # from repo-core
    triton-core-backendapi  # from repo-core
    triton-core-serverstub  # from repo-core
)

set_target_properties(
  triton-sequence-backend PROPERTIES
  POSITION_INDEPENDENT_CODE ON
  OUTPUT_NAME triton_sequence
  LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_sequence.ldscript
  LINK_FLAGS "-Wl,--version-script libtriton_sequence.ldscript"
)

#
# Install
#
include(GNUInstallDirs)
set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TritonSequenceBackend)

install(
  TARGETS
    triton-sequence-backend
  EXPORT
    triton-sequence-backend-targets
  LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/sequence
  ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/sequence
)

install(
  EXPORT
    triton-sequence-backend-targets
  FILE
    TritonSequenceBackendTargets.cmake
  NAMESPACE
    TritonSequenceBackend::
  DESTINATION
    ${INSTALL_CONFIGDIR}
)

include(CMakePackageConfigHelpers)
configure_package_config_file(
  ${CMAKE_CURRENT_LIST_DIR}/cmake/TritonSequenceBackendConfig.cmake.in
  ${CMAKE_CURRENT_BINARY_DIR}/TritonSequenceBackendConfig.cmake
  INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
)

install(
  FILES
  ${CMAKE_CURRENT_BINARY_DIR}/TritonSequenceBackendConfig.cmake
  DESTINATION ${INSTALL_CONFIGDIR}
)

#
# Export from build tree
#
export(
  EXPORT triton-sequence-backend-targets
  FILE ${CMAKE_CURRENT_BINARY_DIR}/TritonSequenceBackendTargets.cmake
  NAMESPACE TritonSequenceBackend::
)

export(PACKAGE TritonSequenceBackend)


================================================
FILE: src/test/sequence/cmake/TritonSequenceBackendConfig.cmake.in
================================================
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

include(CMakeFindDependencyMacro)

get_filename_component(
  TRITONSEQUENCEBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
)

list(APPEND CMAKE_MODULE_PATH ${TRITONSEQUENCEBACKEND_CMAKE_DIR})

if(NOT TARGET TritonSequenceBackend::triton-sequence-backend)
  include("${TRITONSEQUENCEBACKEND_CMAKE_DIR}/TritonSequenceBackendTargets.cmake")
endif()

set(TRITONSEQUENCEBACKEND_LIBRARIES TritonSequenceBackend::triton-sequence-backend)


================================================
FILE: src/test/sequence/src/libtriton_sequence.ldscript
================================================
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
{
  global:
    TRITONBACKEND_*;
  local: *;
};


================================================
FILE: src/test/sequence/src/sequence.cc
================================================
// Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include <memory>
#include <thread>

#include "triton/backend/backend_common.h"
#include "triton/backend/backend_model.h"
#include "triton/backend/backend_model_instance.h"

namespace triton { namespace backend { namespace sequence {


// Simple sequence backend that demonstrates the TRITONBACKEND API for a
// blocking backend. A blocking backend completes execution of the inference
// before returning from TRITONBACKEND_ModelInstanceExecute.
//
// The backend supports models that take three input tensors, two INT32 [ 1 ]
// control values and one variable-size INT32 [ -1 ] value input; and produces
// an output tensor with the same shape as the input tensor. The input tensors
// must be named "START", "READY" and "INPUT". The output tensor must be named
// "OUTPUT".
//
// The model maintains an INT32 accumulator which is updated based on the
// control values in "START" and "READY":
//
//   READY=0, START=x: Ignore value input, do not change accumulator value.
//
//   READY=1, START=1: Start accumulating. Set accumulator equal to sum of input
//   tensor.
//
//   READY=1, START=0: Add input tensor values to accumulator.
//
// When READY=1, the accumulator is returned in every element of the output.
//

#define GUARDED_RESPOND_IF_ERROR(RESPONSES, IDX, X)                     \
  do {                                                                  \
    if ((RESPONSES)[IDX] != nullptr) {                                  \
      TRITONSERVER_Error* err__ = (X);                                  \
      if (err__ != nullptr) {                                           \
        LOG_IF_ERROR(                                                   \
            TRITONBACKEND_ResponseSend(                                 \
                (RESPONSES)[IDX], TRITONSERVER_RESPONSE_COMPLETE_FINAL, \
                err__),                                                 \
            "failed to send error response");                           \
        (RESPONSES)[IDX] = nullptr;                                     \
        TRITONSERVER_ErrorDelete(err__);                                \
      }                                                                 \
    }                                                                   \
  } while (false)

//
// ModelState
//
// State associated with a model that is using this backend. An object
// of this class is created and associated with each
// TRITONBACKEND_Model.
//
class ModelState : public BackendModel {
 public:
  static TRITONSERVER_Error* Create(
      TRITONBACKEND_Model* triton_model, ModelState** state);
  virtual ~ModelState() = default;

  // Get accumulator size and execution delay
  size_t AccumulatorSize() const { return accumulator_size_; }
  int ExecDelay() const { return execute_delay_ms_; }

  // Validate that model configuration is supported by this backend.
  TRITONSERVER_Error* ValidateModelConfig();

 private:
  ModelState(TRITONBACKEND_Model* triton_model);

  // Delay to introduce into execution, in milliseconds.
  int execute_delay_ms_;

  // Accumulator size
  size_t accumulator_size_;
};

TRITONSERVER_Error*
ModelState::Create(TRITONBACKEND_Model* triton_model, ModelState** state)
{
  try {
    *state = new ModelState(triton_model);
  }
  catch (const BackendModelException& ex) {
    RETURN_ERROR_IF_TRUE(
        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
        std::string("unexpected nullptr in BackendModelException"));
    RETURN_IF_ERROR(ex.err_);
  }

  return nullptr;  // success
}

ModelState::ModelState(TRITONBACKEND_Model* triton_model)
    : BackendModel(triton_model), execute_delay_ms_(0)
{
}

TRITONSERVER_Error*
ModelState::ValidateModelConfig()
{
  // We have the json DOM for the model configuration...
  common::TritonJson::WriteBuffer buffer;
  RETURN_IF_ERROR(model_config_.PrettyWrite(&buffer));
  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("model configuration:\n") + buffer.Contents()).c_str());

  triton::common::TritonJson::Value params;
  if (model_config_.Find("parameters", &params)) {
    common::TritonJson::Value exec_delay;
    if (params.Find("execute_delay_ms", &exec_delay)) {
      std::string exec_delay_str;
      RETURN_IF_ERROR(
          exec_delay.MemberAsString("string_value", &exec_delay_str));
      execute_delay_ms_ = std::stoi(exec_delay_str);
    }
  }

  int64_t max_batch_size = 0;
  RETURN_IF_ERROR(model_config_.MemberAsInt("max_batch_size", &max_batch_size));
  accumulator_size_ = (size_t)(std::max((int64_t)1, max_batch_size));

  // The model configuration must specify the sequence batcher and must use the
  // START and READY input to indicate control values.
  triton::common::TritonJson::Value sequence_batching;
  RETURN_IF_ERROR(
      model_config_.MemberAsObject("sequence_batching", &sequence_batching));
  common::TritonJson::Value control_inputs;
  RETURN_IF_ERROR(
      sequence_batching.MemberAsArray("control_input", &control_inputs));
  RETURN_ERROR_IF_FALSE(
      control_inputs.ArraySize() == 2, TRITONSERVER_ERROR_INVALID_ARG,
      std::string(
          "'START' and 'READY' must be configured as the control inputs"));

  std::vector<std::string> control_input_names;
  for (size_t io_index = 0; io_index < control_inputs.ArraySize(); io_index++) {
    common::TritonJson::Value control_input;
    RETURN_IF_ERROR(control_inputs.IndexAsObject(io_index, &control_input));
    const char* input_name;
    size_t input_name_len;
    RETURN_IF_ERROR(
        control_input.MemberAsString("name", &input_name, &input_name_len));
    control_input_names.push_back(input_name);
  }

  RETURN_ERROR_IF_FALSE(
      ((control_input_names[0] == "START") &&
       (control_input_names[1] == "READY")) ||
          ((control_input_names[0] == "READY") &&
           (control_input_names[1] == "START")),
      TRITONSERVER_ERROR_INVALID_ARG,
      std::string(
          "'START' and 'READY' must be configured as the control inputs"));

  common::TritonJson::Value inputs, outputs;
  RETURN_IF_ERROR(model_config_.MemberAsArray("input", &inputs));
  RETURN_IF_ERROR(model_config_.MemberAsArray("output", &outputs));

  // There must be one INT32 input called INPUT defined in the model
  // configuration and it must be a 1D vector (of any length).
  RETURN_ERROR_IF_FALSE(
      inputs.ArraySize() == 1, TRITONSERVER_ERROR_INVALID_ARG,
      std::string(
          "model must have input 'INPUT' with vector shape, any length"));

  common::TritonJson::Value input;
  RETURN_IF_ERROR(inputs.IndexAsObject(0 /* index */, &input));

  std::vector<int64_t> input_shape;
  RETURN_IF_ERROR(backend::ParseShape(input, "dims", &input_shape));

  RETURN_ERROR_IF_FALSE(
      input_shape.size() == 1, TRITONSERVER_ERROR_INVALID_ARG,
      std::string(
          "model must have one input 'INPUT' with vector shape, any length"));

  std::string input_dtype;
  RETURN_IF_ERROR(input.MemberAsString("data_type", &input_dtype));

  RETURN_ERROR_IF_FALSE(
      input_dtype == "TYPE_INT32", TRITONSERVER_ERROR_INVALID_ARG,
      std::string("model input must have TYPE_INT32 data-type"));

  const char* input_name;
  size_t input_name_len;
  RETURN_IF_ERROR(input.MemberAsString("name", &input_name, &input_name_len));

  RETURN_ERROR_IF_FALSE(
      strcmp(input_name, "INPUT") == 0, TRITONSERVER_ERROR_INVALID_ARG,
      std::string("model input must be named 'INPUT'"));

  // There must be one INT32 output with shape that matches the input. The
  // output must be named OUTPUT.
  RETURN_ERROR_IF_FALSE(
      outputs.ArraySize() == 1, TRITONSERVER_ERROR_INVALID_ARG,
      std::string(
          "model must have one output 'OUTPUT' with vector shape, any length"));

  common::TritonJson::Value output;
  RETURN_IF_ERROR(outputs.IndexAsObject(0 /* index */, &output));

  std::vector<int64_t> output_shape;
  RETURN_IF_ERROR(backend::ParseShape(output, "dims", &output_shape));

  RETURN_ERROR_IF_FALSE(
      (output_shape.size() == 1) && (output_shape[0] == input_shape[0]),
      TRITONSERVER_ERROR_INVALID_ARG,
      std::string(
          "model must have output 'OUTPUT' with shape matching 'INPUT'"));

  std::string output_dtype;
  RETURN_IF_ERROR(output.MemberAsString("data_type", &output_dtype));

  RETURN_ERROR_IF_FALSE(
      output_dtype == "TYPE_INT32", TRITONSERVER_ERROR_INVALID_ARG,
      std::string("model output must have TYPE_INT32 data-type"));

  const char* output_name;
  size_t output_name_len;
  RETURN_IF_ERROR(
      output.MemberAsString("name", &output_name, &output_name_len));

  RETURN_ERROR_IF_FALSE(
      strcmp(output_name, "OUTPUT") == 0, TRITONSERVER_ERROR_INVALID_ARG,
      std::string("model output must be named 'OUTPUT'"));

  return nullptr;  // success
}

//
// ModelInstanceState
//
// State associated with a model instance. An object of this class is
// created and associated with each TRITONBACKEND_ModelInstance.
//
class ModelInstanceState : public BackendModelInstance {
 public:
  static TRITONSERVER_Error* Create(
      ModelState* model_state,
      TRITONBACKEND_ModelInstance* triton_model_instance,
      ModelInstanceState** state);
  virtual ~ModelInstanceState();

  // Get the state of the model that corresponds to this instance.
  ModelState* StateForModel() const { return model_state_; }

  // Get accumulator for this instance
  int32_t GetAccumulatorAt(size_t idx);
  void SetAccumulatorAt(size_t idx, int32_t value);
  void AddAccumulatorAt(size_t idx, int32_t value);

 private:
  ModelInstanceState(
      ModelState* model_state,
      TRITONBACKEND_ModelInstance* triton_model_instance);

  ModelState* model_state_;

  // Accumulators maintained by this instance, one for each batch slot.
  std::vector<int32_t> accumulator_;
};

TRITONSERVER_Error*
ModelInstanceState::Create(
    ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance,
    ModelInstanceState** state)
{
  try {
    *state = new ModelInstanceState(model_state, triton_model_instance);
  }
  catch (const BackendModelInstanceException& ex) {
    RETURN_ERROR_IF_TRUE(
        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
        std::string("unexpected nullptr in BackendModelInstanceException"));
    RETURN_IF_ERROR(ex.err_);
  }

  return nullptr;  // success
}

ModelInstanceState::ModelInstanceState(
    ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance)
    : BackendModelInstance(model_state, triton_model_instance),
      model_state_(model_state)
{
  accumulator_.resize(model_state->AccumulatorSize());
}

int32_t
ModelInstanceState::GetAccumulatorAt(size_t idx)
{
  return accumulator_[idx];
}

void
ModelInstanceState::SetAccumulatorAt(size_t idx, int32_t value)
{
  accumulator_[idx] = value;
}

void
ModelInstanceState::AddAccumulatorAt(size_t idx, int32_t value)
{
  accumulator_[idx] += value;
}

ModelInstanceState::~ModelInstanceState()
{
  accumulator_.clear();
}

/////////////

extern "C" {

// Implementing TRITONBACKEND_Initialize is optional. The backend
// should initialize any global state that is intended to be shared
// across all models and model instances that use the backend.
TRITONSERVER_Error*
TRITONBACKEND_Initialize(TRITONBACKEND_Backend* backend)
{
  const char* cname;
  RETURN_IF_ERROR(TRITONBACKEND_BackendName(backend, &cname));
  std::string name(cname);

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("TRITONBACKEND_Initialize: ") + name).c_str());

  // We should check the backend API version that Triton supports
  // vs. what this backend was compiled against.
  uint32_t api_version_major, api_version_minor;
  RETURN_IF_ERROR(
      TRITONBACKEND_ApiVersion(&api_version_major, &api_version_minor));

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("Triton TRITONBACKEND API version: ") +
       std::to_string(api_version_major) + "." +
       std::to_string(api_version_minor))
          .c_str());
  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("'") + name + "' TRITONBACKEND API version: " +
       std::to_string(TRITONBACKEND_API_VERSION_MAJOR) + "." +
       std::to_string(TRITONBACKEND_API_VERSION_MINOR))
          .c_str());

  if ((api_version_major != TRITONBACKEND_API_VERSION_MAJOR) ||
      (api_version_minor < TRITONBACKEND_API_VERSION_MINOR)) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_UNSUPPORTED,
        "triton backend API version does not support this backend");
  }

  // The backend configuration may contain information needed by the
  // backend, such a command-line arguments. This backend doesn't use
  // any such configuration but we print whatever is available.
  TRITONSERVER_Message* backend_config_message;
  RETURN_IF_ERROR(
      TRITONBACKEND_BackendConfig(backend, &backend_config_message));

  const char* buffer;
  size_t byte_size;
  RETURN_IF_ERROR(TRITONSERVER_MessageSerializeToJson(
      backend_config_message, &buffer, &byte_size));
  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("backend configuration:\n") + buffer).c_str());

  return nullptr;  // success
}

// Implementing TRITONBACKEND_ModelInitialize is optional. The backend
// should initialize any state that is intended to be shared across
// all instances of the model.
TRITONSERVER_Error*
TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model)
{
  const char* cname;
  RETURN_IF_ERROR(TRITONBACKEND_ModelName(model, &cname));
  std::string name(cname);

  uint64_t version;
  RETURN_IF_ERROR(TRITONBACKEND_ModelVersion(model, &version));

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("TRITONBACKEND_ModelInitialize: ") + name + " (version " +
       std::to_string(version) + ")")
          .c_str());

  // With each model we create a ModelState object and associate it
  // with the TRITONBACKEND_Model.
  ModelState* model_state;
  RETURN_IF_ERROR(ModelState::Create(model, &model_state));
  RETURN_IF_ERROR(
      TRITONBACKEND_ModelSetState(model, reinterpret_cast<void*>(model_state)));

  // One of the primary things to do in ModelInitialize is to examine
  // the model configuration to ensure that it is something that this
  // backend can support. If not, returning an error from this
  // function will prevent the model from loading.
  RETURN_IF_ERROR(model_state->ValidateModelConfig());

  return nullptr;  // success
}

// Implementing TRITONBACKEND_ModelFinalize is optional unless state
// is set using TRITONBACKEND_ModelSetState. The backend must free
// this state and perform any other cleanup.
TRITONSERVER_Error*
TRITONBACKEND_ModelFinalize(TRITONBACKEND_Model* model)
{
  void* vstate;
  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vstate));
  ModelState* model_state = reinterpret_cast<ModelState*>(vstate);

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO, "TRITONBACKEND_ModelFinalize: delete model state");

  delete model_state;

  return nullptr;  // success
}

// Implementing TRITONBACKEND_ModelInstanceInitialize is optional. The
// backend should initialize any state that is required for a model
// instance.
TRITONSERVER_Error*
TRITONBACKEND_ModelInstanceInitialize(TRITONBACKEND_ModelInstance* instance)
{
  const char* cname;
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceName(instance, &cname));
  std::string name(cname);

  int32_t device_id;
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceDeviceId(instance, &device_id));
  TRITONSERVER_InstanceGroupKind kind;
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceKind(instance, &kind));

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("TRITONBACKEND_ModelInstanceInitialize: ") + name + " (" +
       TRITONSERVER_InstanceGroupKindString(kind) + " device " +
       std::to_string(device_id) + ")")
          .c_str());

  // The instance can access the corresponding model as well... here
  // we get the model and from that get the model's state.
  TRITONBACKEND_Model* model;
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceModel(instance, &model));

  void* vmodelstate;
  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vmodelstate));
  ModelState* model_state = reinterpret_cast<ModelState*>(vmodelstate);

  // With each instance we create a ModelInstanceState object and
  // associate it with the TRITONBACKEND_ModelInstance.
  ModelInstanceState* instance_state;
  RETURN_IF_ERROR(
      ModelInstanceState::Create(model_state, instance, &instance_state));
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceSetState(
      instance, reinterpret_cast<void*>(instance_state)));

  // Because this backend just copies IN -> OUT and requires that
  // input and output be in CPU memory, we fail if a GPU instances is
  // requested.
  RETURN_ERROR_IF_FALSE(
      instance_state->Kind() == TRITONSERVER_INSTANCEGROUPKIND_CPU,
      TRITONSERVER_ERROR_INVALID_ARG,
      std::string("'sequence' backend only supports CPU instances"));

  return nullptr;  // success
}

// Implementing TRITONBACKEND_ModelInstanceFinalize is optional unless
// state is set using TRITONBACKEND_ModelInstanceSetState. The backend
// must free this state and perform any other cleanup.
TRITONSERVER_Error*
TRITONBACKEND_ModelInstanceFinalize(TRITONBACKEND_ModelInstance* instance)
{
  void* vstate;
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(instance, &vstate));
  ModelInstanceState* instance_state =
      reinterpret_cast<ModelInstanceState*>(vstate);

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      "TRITONBACKEND_ModelInstanceFinalize: delete instance state");

  delete instance_state;

  return nullptr;  // success
}

// Implementing TRITONBACKEND_ModelInstanceExecute is required.
TRITONSERVER_Error*
TRITONBACKEND_ModelInstanceExecute(
    TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests,
    const uint32_t request_count)
{
  // Triton will not call this function simultaneously for the same
  // 'instance'. But since this backend could be used by multiple
  // instances from multiple models the implementation needs to handle
  // multiple calls to this function at the same time (with different
  // 'instance' objects). Suggested practice for this is to use only
  // function-local and model-instance-specific state (obtained from
  // 'instance'), which is what we do here.
  ModelInstanceState* instance_state;
  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(
      instance, reinterpret_cast<void**>(&instance_state)));
  ModelState* model_state = instance_state->StateForModel();

  // This backend specifies BLOCKING execution policy. That means that
  // we should not return from this function until execution is complete. Triton
  // will automatically release 'instance' on return from this function so that
  // it is again available to be used for another call to
  // TRITONBACKEND_ModelInstanceExecute.

  LOG_MESSAGE(
      TRITONSERVER_LOG_INFO,
      (std::string("model ") + model_state->Name() + ", instance " +
       instance_state->Name() + ", executing " + std::to_string(request_count) +
       " requests")
          .c_str());

  bool supports_batching = false;
  RETURN_IF_ERROR(model_state->SupportsFirstDimBatching(&supports_batching));

  // Each request represents a different sequence, which corresponds
  // to the accumulator at the same index. Each request must have
  // batch-size 1 inputs which is the next timestep for that sequence. The total
  // number of requests will not exceed the max-batch-size specified in the
  // model configuration.
  if (request_count > model_state->AccumulatorSize()) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_UNSUPPORTED,
        "unable to execute batch larger than max-batch-size");
  }

  // Delay if requested...
  if (model_state->ExecDelay() > 0) {
    std::this_thread::sleep_for(
        std::chrono::milliseconds(model_state->ExecDelay()));
  }

  // 'responses' is initialized with the response objects below and
  // if/when an error response is sent the corresponding entry in
  // 'responses' is set to nullptr to indicate that that response has
  // already been sent.
  std::vector<TRITONBACKEND_Response*> responses;
  responses.reserve(request_count);

  // Create a single response object for each request. If something
  // goes wrong when attempting to create the response objects just
  // fail all of the requests by returning an error.
  for (uint32_t r = 0; r < request_count; ++r) {
    TRITONBACKEND_Request* request = requests[r];

    TRITONBACKEND_Response* response;
    RETURN_IF_ERROR(TRITONBACKEND_ResponseNew(&response, request));
    responses.push_back(response);
  }

  // After this point we take ownership of 'requests', which means that a
  // response must be sent for every request. If something does go wrong in
  // processing a particular request then we send an error response just for the
  // specific request.

  // The way we collect these batch timestamps is not entirely accurate.
  // Normally, in a performant backend you would execute all the requests at the
  // same time, and so there would be a single compute-start / compute-end
  // time-range. But here we execute each request separately so there is no
  // single range. As a result we just show the entire execute time as being the
  // compute time as well.
  uint64_t min_exec_start_ns = std::numeric_limits<uint64_t>::max();
  uint64_t max_exec_end_ns = 0;
  uint64_t total_batch_size = 0;

  // For simplicity we just process each request separately... in
  // general a backend should try to operate on the entire batch of
  // requests at the same time for improved performance.
  std::vector<uint8_t> start_buffer, ready_buffer, input_buffer;
  for (uint32_t r = 0; r < request_count; ++r) {
    uint64_t exec_start_ns = 0;
    SET_TIMESTAMP(exec_start_ns);
    min_exec_start_ns = std::min(min_exec_start_ns, exec_start_ns);

    TRITONBACKEND_Request* request = requests[r];

    const char* request_id = "";
    GUARDED_RESPOND_IF_ERROR(
        responses, r, TRITONBACKEND_RequestId(request, &request_id));

    uint64_t correlation_id = 0;
    GUARDED_RESPOND_IF_ERROR(
        responses, r,
        TRITONBACKEND_RequestCorrelationId(request, &correlation_id));

    uint32_t input_count = 0;
    GUARDED_RESPOND_IF_ERROR(
        responses, r, TRITONBACKEND_RequestInputCount(request, &input_count));

    uint32_t requested_output_count = 0;
    GUARDED_RESPOND_IF_ERROR(
        responses, r,
        TRITONBACKEND_RequestOutputCount(request, &requested_output_count));

    // If an error response was sent for the above then display an error
    // message and move on to next request.
    if (responses[r] == nullptr) {
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR,
          (std::string("request ") + std::to_string(r) +
           ": failed to read request input/output counts, error response "
           "sent")
              .c_str());
      continue;
    }

    LOG_MESSAGE(
        TRITONSERVER_LOG_INFO,
        (std::string("request ") + std::to_string(r) + ": id = \"" +
         request_id + "\", correlation_id = " + std::to_string(correlation_id) +
         ", input_count = " + std::to_string(input_count) +
         ", requested_output_count = " + std::to_string(requested_output_count))
            .c_str());

    // For statistics we need to collect the total batch size of all the
    // requests. If the model doesn't support batching then each request is
    // necessarily batch-size 1. If the model does support batching then the
    // first dimension of the shape is the batch size. We only the first input
    // for this.
    if (supports_batching) {
      TRITONBACKEND_Input* input = nullptr;
      GUARDED_RESPOND_IF_ERROR(
          responses, r,
          TRITONBACKEND_RequestInputByIndex(request, 0 /* index */, &input));
      if (responses[r] == nullptr) {
        LOG_MESSAGE(
            TRITONSERVER_LOG_ERROR,
            (std::string("request ") + std::to_string(r) +
             ": failed to read input, error response sent")
                .c_str());
        continue;
      }

      const int64_t* input_shape;
      uint32_t input_dims_count;
      GUARDED_RESPOND_IF_ERROR(
          responses, r,
          TRITONBACKEND_InputProperties(
              input, nullptr, nullptr, &input_shape, &input_dims_count, nullptr,
              nullptr));
      if (responses[r] == nullptr) {
        LOG_MESSAGE(
            TRITONSERVER_LOG_ERROR,
            (std::string("request ") + std::to_string(r) +
             ": failed to read input properties, error response sent")
                .c_str());
        continue;
      }

      if (input_dims_count > 0) {
        if (input_shape[0] != 1) {
          GUARDED_RESPOND_IF_ERROR(
              responses, r,
              TRITONSERVER_ErrorNew(
                  TRITONSERVER_ERROR_UNSUPPORTED,
                  "unable to execute more than one timestep at a time"));
          LOG_MESSAGE(
              TRITONSERVER_LOG_ERROR,
              (std::string("request ") + std::to_string(r) +
               ": unable to execute more than one timestep at a time, error "
               "response sent")
                  .c_str());
          continue;
        }
        total_batch_size += input_shape[0];
      }
    } else {
      total_batch_size++;
    }

    LOG_MESSAGE(
        TRITONSERVER_LOG_ERROR,
        (std::string("total_batch_size: ") + std::to_string(total_batch_size))
            .c_str());

    // Get the input tensors.
    TRITONBACKEND_Input* start_input = nullptr;
    GUARDED_RESPOND_IF_ERROR(
        responses, r,
        TRITONBACKEND_RequestInput(request, "START", &start_input));
    if (responses[r] == nullptr) {
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR,
          (std::string("request ") + std::to_string(r) +
           ": failed to read input 'START', error response sent")
              .c_str());
      continue;
    }

    TRITONBACKEND_Input* ready_input = nullptr;
    GUARDED_RESPOND_IF_ERROR(
        responses, r,
        TRITONBACKEND_RequestInput(request, "READY", &ready_input));
    if (responses[r] == nullptr) {
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR,
          (std::string("request ") + std::to_string(r) +
           ": failed to read input 'READY', error response sent")
              .c_str());
      continue;
    }

    const void* start_buffer = nullptr;
    uint64_t buffer_byte_size = 0;
    TRITONSERVER_MemoryType input_memory_type = TRITONSERVER_MEMORY_CPU;
    int64_t input_memory_type_id = 0;
    GUARDED_RESPOND_IF_ERROR(
        responses, r,
        TRITONBACKEND_InputBuffer(
            start_input, 0 /* input_buffer_count */, &start_buffer,
            &buffer_byte_size, &input_memory_type, &input_memory_type_id));
    if (responses[r] == nullptr) {
      GUARDED_RESPOND_IF_ERROR(
          responses, r,
          TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_UNSUPPORTED, "failed to get input buffer"));
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR, (std::string("request ") + std::to_string(r) +
                                   ": failed to get input buffer, error "
                                   "response sent")
                                      .c_str());
      continue;
    }

    const void* ready_buffer = nullptr;
    GUARDED_RESPOND_IF_ERROR(
        responses, r,
        TRITONBACKEND_InputBuffer(
            ready_input, 0 /* input_buffer_count */, &ready_buffer,
            &buffer_byte_size, &input_memory_type, &input_memory_type_id));
    if (responses[r] == nullptr) {
      GUARDED_RESPOND_IF_ERROR(
          responses, r,
          TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_UNSUPPORTED, "failed to get input buffer"));
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR, (std::string("request ") + std::to_string(r) +
                                   ": failed to get input buffer, error "
                                   "response sent")
                                      .c_str());
      continue;
    }

    TRITONBACKEND_Input* input = nullptr;
    GUARDED_RESPOND_IF_ERROR(
        responses, r, TRITONBACKEND_RequestInput(request, "INPUT", &input));
    if (responses[r] == nullptr) {
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR,
          (std::string("request ") + std::to_string(r) +
           ": failed to read input, error response sent")
              .c_str());
      continue;
    }

    const void* input_buffer = nullptr;
    GUARDED_RESPOND_IF_ERROR(
        responses, r,
        TRITONBACKEND_InputBuffer(
            input, 0 /* input_buffer_count */, &input_buffer, &buffer_byte_size,
            &input_memory_type, &input_memory_type_id));
    if (responses[r] == nullptr) {
      GUARDED_RESPOND_IF_ERROR(
          responses, r,
          TRITONSERVER_ErrorNew(
              TRITONSERVER_ERROR_UNSUPPORTED,
              "failed to get input buffer in CPU memory"));
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR,
          (std::string("request ") + std::to_string(r) +
           ": failed to get input buffer in CPU memory, error "
           "response sent")
              .c_str());
      continue;
    }

    TRITONSERVER_DataType input_datatype;
    const int64_t* input_shape;
    uint32_t input_dims_count;
    uint64_t input_byte_size;
    uint32_t input_buffer_count;
    GUARDED_RESPOND_IF_ERROR(
        responses, r,
        TRITONBACKEND_InputProperties(
            input, nullptr /* input_name */, &input_datatype, &input_shape,
            &input_dims_count, &input_byte_size, &input_buffer_count));
    if (responses[r] == nullptr) {
      LOG_MESSAGE(
          TRITONSERVER_LOG_ERROR,
          (std::string("request ") + std::to_string(r) +
           ": failed to read input properties, error response sent")
              .c_str());
      continue;
    }

    int64_t input_element_cnt = input_byte_size / sizeof(int32_t);
    const int32_t* start = reinterpret_cast<const int32_t*>(start_buffer);
    const int32_t* ready = reinterpret_cast<const int32_t*>(ready_buffer);
    const int32_t* ipbuffer_int = nullptr;
    std::vector<int32_t> ipbuffer_vec;

    if (input_memory_type == TRITONSERVER_MEMORY_GPU) {
      ipbuffer_vec.resize(input_element_cnt);
      ipbuffer_int = ipbuffer_vec.data();
      LOG_IF_CUDA_ERROR(
          cudaMemcpyAsync(
              const_cast<int32_t*>(ipbuffer_int), input_buffer, input_byte_size,
              cudaMemcpyDeviceToHost, instance_state->CudaStream()),
          "failed to copy buffer from Device to Host");

      LOG_IF_CUDA_ERROR(
          cudaStreamSynchronize(instance_state->CudaStream()),
          "failed to perform synchronization on cuda stream");
    } else {
      ipbuffer_int = reinterpret_cast<const int32_t*>(input_buffer);
    }

    // Update the accumulator value based on START/READY and calculate the
    // output value.
    if (ready[0] != 0) {
      if (start[0] == 0) {
        // Update accumulator.
        for (int64_t e = 0; e < input_element_cnt; ++e) {
          instance_state->AddAccumulatorAt(r, ipbuffer_int[e]);
        }
      } else {
        // Set accumulator.
        instance_state->SetAccumulatorAt(r, ipbuffer_int[0]);
        for (int64_t e = 1; e < input_element_cnt; ++e) {
          instance_state->AddAccumulatorAt(r, ipbuffer_int[e]);
        }
      }

      TRITONBACKEND_Response* response = responses[r];

      // If the output is requested, copy the calculated output value
      // into the output buffer.
      if (requested_output_count > 0) {
        // The output shape is [1, input_element_cnt] if the model configuration
        // supports batching, or just [input_element_cnt] if the model
        // configuration does not support batching.
        std::vector<int64_t> shape;
        if (supports_batching) {
          shape.push_back(1);
        }
        shape.push_back(input_element_cnt);

        TRITONBACKEND_Output* output;
        GUARDED_RESPOND_IF_ERROR(
            responses, r,
            TRITONBACKEND_ResponseOutput(
                response, &output, "OUTPUT", input_datatype, input_shape,
                input_dims_count));
        if (responses[r] == nullptr) {
          LOG_MESSAGE(
              TRITONSERVER_LOG_ERROR,
              (std::string("request ") + std::to_string(r) +
               ": failed to create response output, error response sent")
                  .c_str());
          continue;
        }

        // Get the output buffer. We request a buffer in CPU memory but we have
        // to handle any returned type. If we get back a buffer in GPU memory we
        // just fail the request.
        void* output_buffer;
        TRITONSERVER_MemoryType output_memory_type = TRITONSERVER_MEMORY_CPU;
        int64_t output_memory_type_id = 0;
        GUARDED_RESPOND_IF_ERROR(
            responses, r,
            TRITONBACKEND_OutputBuffer(
                output, &output_buffer, buffer_byte_size, &output_memory_type,
                &output_memory_type_id));
        if (responses[r] == nullptr) {
          GUARDED_RESPOND_IF_ERROR(
              responses, r,
              TRITONSERVER_ErrorNew(
                  TRITONSERVER_ERROR_UNSUPPORTED,
                  "failed to create output buffer in CPU memory"));
          LOG_MESSAGE(
              TRITONSERVER_LOG_ERROR,
              (std::string("request ") + std::to_string(r) +
               ": failed to create output buffer in CPU memory, error "
               "response sent")
                  .c_str());
          continue;
        }

        int32_t* obuffer_int = nullptr;
        std::vector<int32_t> obuffer_vec;
        if (output_memory_type == TRITONSERVER_MEMORY_GPU) {
          obuffer_vec.resize(buffer_byte_size / sizeof(int32_t));
          obuffer_int = obuffer_vec.data();
        } else {
          obuffer_int = reinterpret_cast<int32_t*>(output_buffer);
        }

        for (int64_t i = 0; i < input_element_cnt; ++i) {
          obuffer_int[i] = instance_state->GetAccumulatorAt(r);
        }

        if (output_memory_type == TRITONSERVER_MEMORY_GPU) {
          LOG_IF_CUDA_ERROR(
              cudaMemcpyAsync(
                  output_buffer, const_cast<int32_t*>(obuffer_int),
                  buffer_byte_size, cudaMemcpyHostToDevice,
                  instance_state->CudaStream()),
              "failed to copy buffer from Device to Host");
          LOG_IF_CUDA_ERROR(
              cudaStreamSynchronize(instance_state->CudaStream()),
              "failed to perform synchronization on cuda stream");
        }
      }
    }

    uint64_t exec_end_ns = 0;
    SET_TIMESTAMP(exec_end_ns);
    max_exec_end_ns = std::max(max_exec_end_ns, exec_end_ns);

    // Send all the responses that haven't already been sent because of an
    // earlier error.
    if (responses[r] != nullptr) {
      LOG_IF_ERROR(
          TRITONBACKEND_ResponseSend(
              responses[r], TRITONSERVER_RESPONSE_COMPLETE_FINAL,
              nullptr /* success */),
          "failed sending response");
    }

    // Report statistics for each request.
    LOG_IF_ERROR(
        TRITONBACKEND_ModelInstanceReportStatistics(
            instance_state->TritonModelInstance(), request,
            (responses[r] != nullptr) /* success */, exec_start_ns,
            exec_start_ns, exec_end_ns, exec_end_ns),
        "failed reporting request statistics");

    LOG_IF_ERROR(
        TRITONBACKEND_RequestRelease(request, TRITONSERVER_REQUEST_RELEASE_ALL),
        "failed releasing request");
  }

  // Report the entire batch statistics.
  LOG_IF_ERROR(
      TRITONBACKEND_ModelInstanceReportBatchStatistics(
          instance_state->TritonModelInstance(), total_batch_size,
          min_exec_start_ns, min_exec_start_ns, max_exec_end_ns,
          max_exec_end_ns),
      "failed reporting batch request statistics");

  return nullptr;  // success
}

}  // extern "C"

}}}  // namespace triton::backend::sequence


================================================
FILE: src/test/tensor_size_test.cc
================================================
// Copyright 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "gtest/gtest.h"

// Undefine the FAIL() macro inside Triton code to avoid redefine error
// from gtest. Okay as FAIL() is not used in data_compressor
#ifdef FAIL
#undef FAIL
#endif

#include <string>
#include <vector>

#include "model_config_utils.h"
#undef RETURN_IF_ERROR  // core (Status) vs backend (TRITONSERVER_Error*); avoid
                        // redefinition
#include "triton/backend/backend_common.h"
#include "triton/common/model_config.h"

namespace tb = triton::backend;
namespace tc = triton::common;
namespace tcore = triton::core;

// Core's linker script hides C++ symbols from libtritonserver.so.
// Provide the small set of definitions the header-only templates need.
const tcore::Status tcore::Status::Success(tcore::Status::Code::SUCCESS);

inference::DataType
tcore::TritonToDataType(const TRITONSERVER_DataType dtype)
{
  return static_cast<inference::DataType>(dtype);
}

namespace {

struct TritonServerError {
  TritonServerError(TRITONSERVER_Error_Code code, const char* msg)
      : code_(code), msg_(msg)
  {
  }
  TRITONSERVER_Error_Code code_;
  std::string msg_;
};

}  // namespace

#ifdef __cplusplus
extern "C" {
#endif

TRITONSERVER_Error_Code
TRITONSERVER_ErrorCode(TRITONSERVER_Error* error)
{
  return (reinterpret_cast<TritonServerError*>(error))->code_;
}

const char*
TRITONSERVER_ErrorMessage(TRITONSERVER_Error* error)
{
  return (reinterpret_cast<TritonServerError*>(error))->msg_.c_str();
}

#ifdef __cplusplus
}
#endif

namespace {

enum class ErrorCode {
  kInvalidDim = tc::INVALID_SIZE,
  kOverflow = tc::OVERFLOW_SIZE,
};

static const std::string kTensorName{"input0"};

void
assert_get_element_count_success(
    std::vector<int64_t>& shape, int64_t expected_cnt)
{
  int64_t cnt;
  TRITONSERVER_Error* err;

  // Backend (old APIs)
  ASSERT_EQ(expected_cnt, tb::GetElementCount(shape.data(), shape.size()));
  ASSERT_EQ(expected_cnt, tb::GetElementCount(shape));

  // Backend (new APIs)
  err = tb::GetElementCount(shape.data(), shape.size(), &cnt);
  ASSERT_EQ(err, nullptr);
  ASSERT_EQ(cnt, expected_cnt);
  err = tb::GetElementCount(shape, &cnt);
  ASSERT_EQ(err, nullptr);
  ASSERT_EQ(cnt, expected_cnt);

  // Common
  ASSERT_EQ(tc::GetElementCount(shape), expected_cnt);

  // Core
  cnt = 0;
  auto status = tcore::GetElementCount(shape, kTensorName, &cnt);
  ASSERT_TRUE(status.IsOk()) << status.Message();
  ASSERT_EQ(cnt, expected_cnt);
}

void
assert_get_element_count_error(
    std::vector<int64_t>& shape, ErrorCode error_code,
    const std::string& error_msg)
{
  int64_t cnt;
  TRITONSERVER_Error* err;

  // Backend (old APIs)
  ASSERT_EQ(
      static_cast<int>(error_code),
      tb::GetElementCount(shape.data(), shape.size()));
  ASSERT_EQ(static_cast<int>(error_code), tb::GetElementCount(shape));

  // Backend (new APIs)
  err = tb::GetElementCount(shape.data(), shape.size(), &cnt);
  ASSERT_NE(err, nullptr);
  ASSERT_EQ(TRITONSERVER_ERROR_INVALID_ARG, TRITONSERVER_ErrorCode(err));
  ASSERT_STREQ(error_msg.c_str(), TRITONSERVER_ErrorMessage(err));
  err = tb::GetElementCount(shape, &cnt);
  ASSERT_NE(err, nullptr);
  ASSERT_EQ(TRITONSERVER_ERROR_INVALID_ARG, TRITONSERVER_ErrorCode(err));
  ASSERT_STREQ(error_msg.c_str(), TRITONSERVER_ErrorMessage(err));

  // Common
  ASSERT_EQ(tc::GetElementCount(shape), static_cast<int64_t>(error_code));

  // Core
  cnt = 0;
  auto status = tcore::GetElementCount(shape, kTensorName, &cnt);
  ASSERT_FALSE(status.IsOk());
  ASSERT_EQ(status.StatusCode(), triton::core::Status::Code::INVALID_ARG);
  ASSERT_TRUE(
      std::string(status.Message())
          .find(
              error_code == ErrorCode::kInvalidDim
                  ? "invalid dimension"
                  : "exceeds maximum size") != std::string::npos);
}

void
assert_get_byte_size_success(
    TRITONSERVER_DataType dtype, std::vector<int64_t>& shape,
    int64_t expected_size, bool test_core = true)
{
  int64_t size;
  TRITONSERVER_Error* err;

  // Backend (old API)
  ASSERT_EQ(expected_size, tb::GetByteSize(dtype, shape));

  // Backend (new API)
  err = tb::GetByteSize(dtype, shape, &size);
  ASSERT_EQ(err, nullptr);
  ASSERT_EQ(expected_size, size);

  // Common
  inference::DataType core_dtype = tcore::TritonToDataType(dtype);
  ASSERT_EQ(tc::GetByteSize(core_dtype, shape), expected_size);

  // Core
  if (test_core) {
    size = 0;
    auto status = tcore::GetByteSize(core_dtype, shape, kTensorName, &size);
    ASSERT_TRUE(status.IsOk()) << status.Message();
    ASSERT_EQ(size, expected_size);
  }
}

void
assert_get_byte_size_error(
    TRITONSERVER_DataType dtype, std::vector<int64_t>& shape,
    ErrorCode error_code, const std::string& error_msg)
{
  int64_t size;
  TRITONSERVER_Error* err;

  // Backend (old API)
  ASSERT_EQ(static_cast<int>(error_code), tb::GetByteSize(dtype, shape));

  // Backend (new API)
  err = tb::GetByteSize(dtype, shape, &size);
  ASSERT_NE(err, nullptr);
  ASSERT_EQ(TRITONSERVER_ERROR_INVALID_ARG, TRITONSERVER_ErrorCode(err));
  ASSERT_EQ(error_msg, TRITONSERVER_ErrorMessage(err));

  // Common
  inference::DataType core_dtype = tcore::TritonToDataType(dtype);
  ASSERT_EQ(
      tc::GetByteSize(core_dtype, shape), error_code == ErrorCode::kInvalidDim
                                              ? tc::INVALID_SIZE
                                              : tc::OVERFLOW_SIZE);

  // Core
  size = 0;
  auto status = tcore::GetByteSize(core_dtype, shape, kTensorName, &size);
  ASSERT_FALSE(status.IsOk());
  ASSERT_EQ(status.StatusCode(), triton::core::Status::Code::INVALID_ARG);
  ASSERT_TRUE(
      std::string(status.Message())
          .find(
              error_code == ErrorCode::kInvalidDim
                  ? "invalid dimension"
                  : "exceeds maximum size") != std::string::npos);
}

class GetElementCountTest : public ::testing::Test {
 public:
  GetElementCountTest() {}
};

TEST_F(GetElementCountTest, GetElementCount)
{
  std::vector<int64_t> shape;
  int64_t expected_cnt;

  // Test 1: empty shape
  shape = {};
  expected_cnt = 0;
  assert_get_element_count_success(shape, expected_cnt);

  // Test 2: single dim
  shape = {8};
  expected_cnt = 8;
  assert_get_element_count_success(shape, expected_cnt);

  // Test 3: multiple dims
  shape = {1, 2, 3, 4};
  expected_cnt = 24;
  assert_get_element_count_success(shape, expected_cnt);
}

TEST_F(GetElementCountTest, GetElementCountWildcard)
{
  std::vector<int64_t> shape;
  int64_t expected_cnt = tc::WILDCARD_SIZE;

  // Test 1: -1 dim
  shape = {-1};
  assert_get_element_count_success(shape, expected_cnt);

  // Test 2: one -1 dim
  shape = {-1, 8, 8};
  assert_get_element_count_success(shape, expected_cnt);

  // Test 3: multiple -1 dims
  shape = {8, -1, -1};
  assert_get_element_count_success(shape, expected_cnt);

  // Test 4: -1 dim before overflow
  shape = {-1, 1LL << 32, 1LL << 31};
  assert_get_element_count_success(shape, expected_cnt);
}

TEST_F(GetElementCountTest, GetElementCountZero)
{
  std::vector<int64_t> shape;
  int64_t expected_cnt = 0;

  // Test 1: 0 dim
  shape = {0};
  assert_get_element_count_success(shape, expected_cnt);

  // Test 2: one 0 dim
  shape = {1, 8, 0};
  assert_get_element_count_success(shape, expected_cnt);

  shape = {0, 1, 8};
  assert_get_element_count_success(shape, expected_cnt);

  // Test 3: multiple 0 dims
  shape = {8, 0, 0};
  assert_get_element_count_success(shape, expected_cnt);
}

TEST_F(GetElementCountTest, GetElementCountInvalidDim)
{
  std::vector<int64_t> shape;
  std::string error_msg;

  // Test 1: single invalid dim
  shape = {1, -2};
  error_msg = std::string("shape") + tb::ShapeToString(shape) +
              " contains an invalid dim.";
  assert_get_element_count_error(shape, ErrorCode::kInvalidDim, error_msg);

  // Test 2: multiple invalid dims
  shape = {1, -2, -3};
  error_msg = std::string("shape") + tb::ShapeToString(shape) +
              " contains an invalid dim.";
  assert_get_element_count_error(shape, ErrorCode::kInvalidDim, error_msg);

  // Test 3: valid but overflow dim
  shape = {1, 1LL << 63};
  error_msg = std::string("shape") + tb::ShapeToString(shape) +
              " contains an invalid dim.";
  assert_get_element_count_error(shape, ErrorCode::kInvalidDim, error_msg);
}

TEST_F(GetElementCountTest, GetElementCountOverflow)
{
  std::vector<int64_t> shape;
  std::string error_msg;

  // Test 1: no overflow
  shape = {1LL << 31, 1LL << 31};
  int64_t expected_cnt = 1LL << 62;
  assert_get_element_count_success(shape, expected_cnt);

  // Test 2: overflows
  shape = {1LL << 32, 1LL << 31};
  error_msg = "unexpected integer overflow while calculating element count.";
  assert_get_element_count_error(shape, ErrorCode::kOverflow, error_msg);

  // Test 3: overflows before -1 dim
  shape = {1LL << 32, 1LL << 31, -1};
  error_msg = "unexpected integer overflow while calculating element count.";
  assert_get_element_count_error(shape, ErrorCode::kOverflow, error_msg);
}

class GetByteSizeTest : public ::testing::Test {
 public:
  GetByteSizeTest() {}
};

TEST_F(GetByteSizeTest, GetByteSize)
{
  TRITONSERVER_DataType dtype = TRITONSERVER_TYPE_INT32;
  std::vector<int64_t> shape;
  int64_t expected_size;

  // Test 1: empty shape
  shape = {};
  expected_size = 0;
  assert_get_byte_size_success(dtype, shape, expected_size);

  // Test 2: single dim
  shape = {8};
  expected_size = 8 * TRITONSERVER_DataTypeByteSize(dtype);
  assert_get_byte_size_success(dtype, shape, expected_size);

  // Test 3: multiple dims
  shape = {1, 2, 3, 4};
  expected_size = 24 * TRITONSERVER_DataTypeByteSize(dtype);
  assert_get_byte_size_success(dtype, shape, expected_size);

  // Test 4: multiple dims with 0
  shape = {0, 1, 8};
  expected_size = 0;
  assert_get_byte_size_success(dtype, shape, expected_size);
}

TEST_F(GetByteSizeTest, GetByteSizeWildcard)
{
  TRITONSERVER_DataType dtype;
  std::vector<int64_t> shape;
  int64_t expected_size = tc::WILDCARD_SIZE;

  // Test 1: invalid dtype
  dtype = TRITONSERVER_TYPE_INVALID;
  shape = {8, 8};
  assert_get_byte_size_success(dtype, shape, expected_size);

  // Test 2: bytes dtype
  dtype = TRITONSERVER_TYPE_BYTES;
  shape = {8, 8};
  assert_get_byte_size_success(dtype, shape, expected_size, false);
  // test core explicitly as it treats string dtype size as 4
  int64_t size = 0;
  inference::DataType core_dtype = tcore::TritonToDataType(dtype);
  auto status = tcore::GetByteSize(core_dtype, shape, kTensorName, &size);
  ASSERT_TRUE(status.IsOk()) << status.Message();
  ASSERT_EQ(size, sizeof(int32_t) * 8 * 8);


  // Test 3: invalid shape and element count overflows
  dtype = TRITONSERVER_TYPE_INVALID;
  shape = {1LL << 40, 1LL << 40};
  assert_get_byte_size_success(dtype, shape, expected_size);

  // Test 4: negative shape
  dtype = TRITONSERVER_TYPE_INT32;
  shape = {-1, 8};
  assert_get_byte_size_success(dtype, shape, expected_size);
}

TEST_F(GetByteSizeTest, GetByteSizeZero)
{
  TRITONSERVER_DataType dtype = TRITONSERVER_TYPE_INT32;
  std::vector<int64_t> shape;
  int64_t expected_cnt = 0;

  // Test 1: 0 dim
  shape = {0};
  assert_get_byte_size_success(dtype, shape, expected_cnt);

  // Test 2: one 0 dim
  shape = {1, 8, 0};
  assert_get_byte_size_success(dtype, shape, expected_cnt);

  shape = {0, 1, 8};
  assert_get_byte_size_success(dtype, shape, expected_cnt);

  // Test 3: multiple 0 dims
  shape = {8, 0, 0};
  assert_get_byte_size_success(dtype, shape, expected_cnt);
}

TEST_F(GetByteSizeTest, GetByteSizeInvalidDim)
{
  TRITONSERVER_DataType dtype = TRITONSERVER_TYPE_INT32;
  std::vector<int64_t> shape;
  std::string error_msg;

  // Test 1: single invalid dim
  shape = {1, -2};
  error_msg = std::string("shape") + tb::ShapeToString(shape) +
              " contains an invalid dim.";
  assert_get_byte_size_error(dtype, shape, ErrorCode::kInvalidDim, error_msg);

  // Test 2: multiple invalid dims
  shape = {1, -2, -3};
  error_msg = std::string("shape") + tb::ShapeToString(shape) +
              " contains an invalid dim.";
  assert_get_byte_size_error(dtype, shape, ErrorCode::kInvalidDim, error_msg);

  // Test 3: valid but overflow dim
  shape = {1, 1LL << 63};
  error_msg = std::string("shape") + tb::ShapeToString(shape) +
              " contains an invalid dim.";
  assert_get_byte_size_error(dtype, shape, ErrorCode::kInvalidDim, error_msg);
}

TEST_F(GetByteSizeTest, GetByteSizeOverflow)
{
  TRITONSERVER_DataType dtype = TRITONSERVER_TYPE_INT32;
  std::vector<int64_t> shape;
  std::string error_msg;

  // Test 1: no overflow
  shape = {1LL << 30, 1LL << 30};
  int64_t expected_size = (1LL << 60) * TRITONSERVER_DataTypeByteSize(dtype);
  assert_get_byte_size_success(dtype, shape, expected_size);

  // Test 2: element count overflows
  shape = {1LL << 32, 1LL << 31};
  error_msg = "unexpected integer overflow while calculating byte size.";
  assert_get_byte_size_error(dtype, shape, ErrorCode::kOverflow, error_msg);

  // Test 3: valid element count but byte size overflows
  shape = {1LL << 31, 1LL << 30};
  error_msg = "unexpected integer overflow while calculating byte size.";
  assert_get_byte_size_error(dtype, shape, ErrorCode::kOverflow, error_msg);
}

}  // namespace

int
main(int argc, char** argv)
{
  ::testing::InitGoogleTest(&argc, argv);
  return RUN_ALL_TESTS();
}


================================================
FILE: src/tracer.cc
================================================
// Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "tracer.h"

#include <stdlib.h>

#include "common.h"
#include "triton/common/logging.h"
#ifdef TRITON_ENABLE_GPU
#include <cuda_runtime_api.h>
#endif  // TRITON_ENABLE_GPU
#ifndef _WIN32
#include "opentelemetry/sdk/resource/semantic_conventions.h"
#include "opentelemetry/sdk/trace/batch_span_processor_factory.h"
namespace otel_common = opentelemetry::common;
#endif

namespace triton { namespace server {

TRITONSERVER_Error*
TraceManager::Create(
    TraceManager** manager, const TRITONSERVER_InferenceTraceLevel level,
    const uint32_t rate, const int32_t count, const uint32_t log_frequency,
    const std::string& filepath, const InferenceTraceMode mode,
    const triton::server::TraceConfigMap& config_map)
{
  // Always create TraceManager regardless of the global setting as they
  // can be updated at runtime even if tracing is not enable at start.
  // No trace should be sampled if the setting is not valid.
  *manager = new TraceManager(
      level, rate, count, log_frequency, filepath, mode, config_map);

  return nullptr;  // success
}

TraceManager::TraceManager(
    const TRITONSERVER_InferenceTraceLevel level, const uint32_t rate,
    const int32_t count, const uint32_t log_frequency,
    const std::string& filepath, const InferenceTraceMode mode,
    const TraceConfigMap& config_map)
{
  std::shared_ptr<TraceFile> file(new TraceFile(filepath));
  global_default_.reset(new TraceSetting(
      level, rate, count, log_frequency, file, mode, config_map,
      false /*level_specified*/, false /*rate_specified*/,
      false /*count_specified*/, false /*log_frequency_specified*/,
      false /*filepath_specified*/, false /*mode_specified*/,
      false /*config_map_specified*/));
  global_setting_.reset(new TraceSetting(
      level, rate, count, log_frequency, file, mode, config_map,
      false /*level_specified*/, false /*rate_specified*/,
      false /*count_specified*/, false /*log_frequency_specified*/,
      false /*filepath_specified*/, false /*mode_specified*/,
      false /*config_map_specified*/));
  trace_files_.emplace(filepath, file);

  InitTracer(config_map);
}

TRITONSERVER_Error*
TraceManager::UpdateTraceSetting(
    const std::string& model_name, const NewSetting& new_setting)
{
  std::lock_guard<std::mutex> w_lk(w_mu_);

  RETURN_IF_ERR(UpdateTraceSettingInternal(model_name, new_setting));
  // If updating global setting, must check and update the model settings
  // that are (partially) mirroring global setting.
  if (model_name.empty()) {
    // Default constructed setting means no active update,
    // only the unspecified fields will be checked and updated.
    NewSetting setting;
    // Make a copy of the set as UpdateTraceSettingInternal() may modify
    // 'fallback_used_models_'
    auto fallback_models = fallback_used_models_;
    for (const auto& name : fallback_models) {
      RETURN_IF_ERR(UpdateTraceSettingInternal(name, setting));
    }
  }
  return nullptr;
}

TRITONSERVER_Error*
TraceManager::UpdateTraceSettingInternal(
    const std::string& model_name, const NewSetting& new_setting)
{
  // First try to get the current setting and fallback setting,
  // current setting may be 'nullptr' if the setting is newly added
  const TraceSetting* current_setting = nullptr;
  const TraceSetting* fallback_setting = nullptr;
  if (!model_name.empty()) {
    auto it = model_settings_.find(model_name);
    if (it != model_settings_.end()) {
      current_setting = it->second.get();
    }
    fallback_setting = global_setting_.get();
  } else {
    current_setting = global_setting_.get();
    fallback_setting = global_default_.get();
  }

  // Prepare the updated setting, use two passes for simplicity:
  // 1. Set all fields based on 'fallback_setting'
  // 2. If there are specified fields based on current and new setting,
  //    use the specified value
  TRITONSERVER_InferenceTraceLevel level = fallback_setting->level_;
  uint32_t rate = fallback_setting->rate_;
  int32_t count = fallback_setting->count_;
  uint32_t log_frequency = fallback_setting->log_frequency_;
  std::string filepath = fallback_setting->file_->FileName();
  InferenceTraceMode mode = fallback_setting->mode_;
  TraceConfigMap config_map = fallback_setting->config_map_;

  // Whether the field value is specified:
  // if clear then it is not specified, otherwise,
  // it is specified if it is being updated, or it was previously specified
  const bool level_specified =
      (new_setting.clear_level_ ? false
                                : (((current_setting != nullptr) &&
                                    current_setting->level_specified_) ||
                                   (new_setting.level_ != nullptr)));
  const bool rate_specified =
      (new_setting.clear_rate_ ? false
                               : (((current_setting != nullptr) &&
                                   current_setting->rate_specified_) ||
                                  (new_setting.rate_ != nullptr)));
  const bool count_specified =
      (new_setting.clear_count_ ? false
                                : (((current_setting != nullptr) &&
                                    current_setting->count_specified_) ||
                                   (new_setting.count_ != nullptr)));
  const bool log_frequency_specified =
      (new_setting.clear_log_frequency_
           ? false
           : (((current_setting != nullptr) &&
               current_setting->log_frequency_specified_) ||
              (new_setting.log_frequency_ != nullptr)));
  const bool filepath_specified =
      (((current_setting != nullptr) && current_setting->filepath_specified_));

  if (level_specified) {
    level = (new_setting.level_ != nullptr) ? *new_setting.level_
                                            : current_setting->level_;
  }
  if (rate_specified) {
    rate = (new_setting.rate_ != nullptr) ? *new_setting.rate_
                                          : current_setting->rate_;
  }
  if (count_specified) {
    count = (new_setting.count_ != nullptr) ? *new_setting.count_
                                            : current_setting->count_;
  }
  if (log_frequency_specified) {
    log_frequency = (new_setting.log_frequency_ != nullptr)
                        ? *new_setting.log_frequency_
                        : current_setting->log_frequency_;
  }
  if (filepath_specified) {
    filepath = current_setting->file_->FileName();
  }

  // Some special case when updating model setting
  if (!model_name.empty()) {
    bool all_specified =
        (level_specified & rate_specified & count_specified &
         log_frequency_specified & filepath_specified);
    bool none_specified =
        !(level_specified | rate_specified | count_specified |
          log_frequency_specified | filepath_specified);
    if (all_specified) {
      fallback_used_models_.erase(model_name);
    } else if (none_specified) {
      // Simply let the model uses global setting
      std::lock_guard<std::mutex> r_lk(r_mu_);
      model_settings_.erase(model_name);
      return nullptr;
    } else {
      fallback_used_models_.emplace(model_name);
    }
  }

  // Create TraceSetting object with the updated setting
  std::shared_ptr<TraceFile> file;
  const auto it = trace_files_.find(filepath);
  if (it != trace_files_.end()) {
    file = it->second.lock();
    // The TraceFile object is no longer valid
    if (file == nullptr) {
      trace_files_.erase(it);
    }
  }
  if (file == nullptr) {
    file.reset(new TraceFile(filepath));
    trace_files_.emplace(filepath, file);
  }

  std::shared_ptr<TraceSetting> lts(new TraceSetting(
      level, rate, count, log_frequency, file, mode, config_map,
      level_specified, rate_specified, count_specified, log_frequency_specified,
      filepath_specified, false /*mode_specified*/,
      false /*config_map_specified*/));
  // The only invalid setting allowed is if it disables tracing
  if ((!lts->Valid()) && (level != TRITONSERVER_TRACE_LEVEL_DISABLED)) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INVALID_ARG,
        (std::string("Attempting to set invalid trace setting :") +
         lts->Reason())
            .c_str());
  }

  // Update / Init the setting in read lock to exclude reader access,
  // we replace the object instead of modifying the existing object in case
  // of there are ongoing traces. This makes sure those traces are referring
  // to the setting when the traces are sampled.
  {
    std::lock_guard<std::mutex> r_lk(r_mu_);
    if (model_name.empty()) {
      // global update
      global_setting_ = std::move(lts);
    } else {
      auto it = model_settings_.find(model_name);
      if (it != model_settings_.end()) {
        // Model update
        it->second = std::move(lts);
      } else {
        // Model init
        model_settings_.emplace(model_name, lts);
      }
    }
  }

  return nullptr;
}

void
TraceManager::GetTraceSetting(
    const std::string& model_name, TRITONSERVER_InferenceTraceLevel* level,
    uint32_t* rate, int32_t* count, uint32_t* log_frequency,
    std::string* filepath, InferenceTraceMode* trace_mode,
    TraceConfigMap* config_map)
{
  std::shared_ptr<TraceSetting> trace_setting;
  {
    std::lock_guard<std::mutex> r_lk(r_mu_);
    auto m_it = model_settings_.find(model_name);
    trace_setting =
        (m_it == model_settings_.end()) ? global_setting_ : m_it->second;
  }

  *level = trace_setting->level_;
  *rate = trace_setting->rate_;
  *count = trace_setting->count_;
  *log_frequency = trace_setting->log_frequency_;
  *filepath = trace_setting->file_->FileName();
  *trace_mode = trace_setting->mode_;
  *config_map = trace_setting->config_map_;
}

void
TraceManager::GetTraceSetting(
    const std::string& model_name, std::shared_ptr<TraceSetting>& trace_setting)
{
  std::lock_guard<std::mutex> r_lk(r_mu_);
  auto m_it = model_settings_.find(model_name);
  trace_setting =
      (m_it == model_settings_.end()) ? global_setting_ : m_it->second;
}

TraceManager::TraceStartOptions
TraceManager::GetTraceStartOptions(
    AbstractCarrier& carrier, const std::string& model_name)
{
  TraceManager::TraceStartOptions start_options;
  GetTraceSetting(model_name, start_options.trace_setting);
  if (!start_options.trace_setting->level_ ==
          TRITONSERVER_TRACE_LEVEL_DISABLED &&
      start_options.trace_setting->mode_ == TRACE_MODE_OPENTELEMETRY) {
#ifndef _WIN32
    auto prop =
        otel_cntxt::propagation::GlobalTextMapPropagator::GetGlobalPropagator();
    auto ctxt = otel_cntxt::Context();
    ctxt = prop->Extract(carrier, ctxt);
    otel_trace_api::SpanContext span_context =
        otel_trace_api::GetSpan(ctxt)->GetContext();
    if (span_context.IsValid()) {
      start_options.propagated_context = ctxt;
      start_options.force_sample = true;
    }
#else
    LOG_ERROR << "Unsupported trace mode: "
              << TraceManager::InferenceTraceModeString(
                     start_options.trace_setting->mode_);
#endif  // _WIN32
  }
  return start_options;
}


std::shared_ptr<TraceManager::Trace>
TraceManager::SampleTrace(const TraceStartOptions& start_options)
{
  std::shared_ptr<Trace> ts =
      start_options.trace_setting->SampleTrace(start_options.force_sample);
  if (ts != nullptr) {
    ts->setting_ = start_options.trace_setting;
    if (ts->setting_->mode_ == TRACE_MODE_OPENTELEMETRY) {
#ifndef _WIN32
      auto steady_timestamp_ns =
          std::chrono::duration_cast<std::chrono::nanoseconds>(
              std::chrono::steady_clock::now().time_since_epoch())
              .count();
      if (ts->span_stacks_.find(ts->trace_id_) == ts->span_stacks_.end()) {
        std::unique_ptr<
            std::stack<opentelemetry::nostd::shared_ptr<otel_trace_api::Span>>>
            st(new std::stack<
                opentelemetry::nostd::shared_ptr<otel_trace_api::Span>>());
        ts->span_stacks_.emplace(ts->trace_id_, std::move(st));
      }
      auto active_span =
          otel_trace_api::GetSpan(start_options.propagated_context);
      if (active_span->GetContext().IsValid()) {
        ts->span_stacks_[ts->trace_id_]->emplace(active_span);
      }
      // Storing "InferRequest" span as a root span
      // to keep it alive for the duration of the request.
      ts->root_span_ =
          ts->StartSpan("InferRequest", steady_timestamp_ns, ts->trace_id_);
      ts->span_stacks_[ts->trace_id_]->emplace(ts->root_span_);
#else
      LOG_ERROR << "Unsupported trace mode: "
                << TraceManager::InferenceTraceModeString(ts->setting_->mode_);
#endif
    }
  }
  return ts;
}

TraceManager::Trace::~Trace()
{
  if (setting_->mode_ == TRACE_MODE_TRITON) {
    // Write trace now
    setting_->WriteTrace(streams_);
  } else if (setting_->mode_ == TRACE_MODE_OPENTELEMETRY) {
#ifndef _WIN32
    EndSpan(trace_id_);
#else
    LOG_ERROR << "Unsupported trace mode: "
              << TraceManager::InferenceTraceModeString(setting_->mode_);
#endif
  }
}

void
TraceManager::Trace::CaptureTimestamp(
    const std::string& name, uint64_t timestamp_ns)
{
  if (setting_->level_ & TRITONSERVER_TRACE_LEVEL_TIMESTAMPS) {
    if (setting_->mode_ == TRACE_MODE_TRITON) {
      std::lock_guard<std::mutex> lk(mtx_);
      std::stringstream* ss = nullptr;
      {
        if (streams_.find(trace_id_) == streams_.end()) {
          std::unique_ptr<std::stringstream> stream(new std::stringstream());
          ss = stream.get();
          streams_.emplace(trace_id_, std::move(stream));
        } else {
          ss = streams_[trace_id_].get();
          // If the string stream is not newly created, add "," as there is
          // already content in the string stream
          *ss << ",";
        }
      }
      *ss << "{\"id\":" << trace_id_ << ",\"timestamps\":["
          << "{\"name\":\"" << name << "\",\"ns\":" << timestamp_ns << "}]}";
    } else if (setting_->mode_ == TRACE_MODE_OPENTELEMETRY) {
#ifndef _WIN32
      root_span_->AddEvent(
          name, time_offset_ + std::chrono::nanoseconds{timestamp_ns});
#else
      LOG_ERROR << "Unsupported trace mode: "
                << TraceManager::InferenceTraceModeString(setting_->mode_);
#endif
    }
  }
}

std::string
TraceManager::Trace::RetrieveActivityName(
    TRITONSERVER_InferenceTrace* trace,
    TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns)
{
  std::string activity_name =
      TRITONSERVER_InferenceTraceActivityString(activity);

  if (activity == TRITONSERVER_TRACE_CUSTOM_ACTIVITY) {
    const char* val = nullptr;
    LOG_TRITONSERVER_ERROR(
        TRITONSERVER_InferenceTraceContext(trace, &val),
        "Failed to retrieve trace context");
    std::string context_str = (val != nullptr) ? std::string(val) : "";
    triton::common::TritonJson::Value context;
    LOG_TRITONSERVER_ERROR(
        context.Parse(context_str), "Failed to parse trace context");
    std::string look_for_key = std::to_string(timestamp_ns);
    if (context.Find(look_for_key.c_str())) {
      context.MemberAsString(look_for_key.c_str(), &activity_name);
    }
  }

  return activity_name;
}

void
TraceManager::InitTracer(const triton::server::TraceConfigMap& config_map)
{
  switch (global_setting_->mode_) {
    case TRACE_MODE_OPENTELEMETRY: {
#ifndef _WIN32
      otlp::OtlpHttpExporterOptions exporter_options;
      otel_resource::ResourceAttributes attributes = {};
      otel_trace_sdk::BatchSpanProcessorOptions processor_options;

      ProcessOpenTelemetryParameters(
          config_map, exporter_options, attributes, processor_options);

      auto exporter = otlp::OtlpHttpExporterFactory::Create(exporter_options);
      auto processor = otel_trace_sdk::BatchSpanProcessorFactory::Create(
          std::move(exporter), processor_options);
      auto resource = otel_resource::Resource::Create(attributes);
      std::shared_ptr<otel_trace_api::TracerProvider> provider =
          otel_trace_sdk::TracerProviderFactory::Create(
              std::move(processor), resource);

      otel_trace_api::Provider::SetTracerProvider(provider);
      otel_cntxt::propagation::GlobalTextMapPropagator::SetGlobalPropagator(
          opentelemetry::nostd::shared_ptr<
              otel_cntxt::propagation::TextMapPropagator>(
              new otel_trace_api::propagation::HttpTraceContext()));
      break;
#else
      LOG_ERROR << "Unsupported trace mode: "
                << TraceManager::InferenceTraceModeString(
                       global_setting_->mode_);
      break;
#endif
    }
    default:
      return;
  }
}

void
TraceManager::CleanupTracer()
{
  switch (global_setting_->mode_) {
    case TRACE_MODE_OPENTELEMETRY: {
#ifndef _WIN32
      std::shared_ptr<otel_trace_api::TracerProvider> none;
      otel_trace_api::Provider::SetTracerProvider(none);
      break;
#else
      LOG_ERROR << "Unsupported trace mode: "
                << TraceManager::InferenceTraceModeString(
                       global_setting_->mode_);
      break;
#endif
    }
    default:
      return;
  }
}

#ifndef _WIN32
void
TraceManager::ProcessOpenTelemetryParameters(
    const triton::server::TraceConfigMap& config_map,
    otlp::OtlpHttpExporterOptions& exporter_options,
    otel_resource::ResourceAttributes& attributes,
    otel_trace_sdk::BatchSpanProcessorOptions& processor_options)
{
  attributes[otel_resource::SemanticConventions::kServiceName] =
      std::string("triton-inference-server");
  auto mode_key = std::to_string(TRACE_MODE_OPENTELEMETRY);
  auto otel_options_it = config_map.find(mode_key);
  if (otel_options_it == config_map.end()) {
    return;
  }
  for (const auto& [setting, value] : otel_options_it->second) {
    // FIXME add more configuration options of OTLP HTTP Exporter
    if (setting == "url") {
      exporter_options.url = std::get<std::string>(value);
    }
    if (setting == "resource") {
      auto user_setting = std::get<std::string>(value);
      auto pos = user_setting.find('=');
      auto key = user_setting.substr(0, pos);
      auto value = user_setting.substr(pos + 1);
      attributes[key] = value;
    }
    if (setting == "bsp_max_queue_size") {
      processor_options.max_queue_size = std::get<uint32_t>(value);
    }
    if (setting == "bsp_schedule_delay") {
      processor_options.schedule_delay_millis =
          std::chrono::milliseconds(std::get<uint32_t>(value));
    }
    if (setting == "bsp_max_export_batch_size") {
      processor_options.max_export_batch_size = std::get<uint32_t>(value);
    }
  }
}

void
TraceManager::Trace::StartSpan(
    TRITONSERVER_InferenceTrace* trace,
    TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns,
    uint64_t trace_id, std::string display_name)
{
  uint64_t parent_id;
  LOG_TRITONSERVER_ERROR(
      TRITONSERVER_InferenceTraceParentId(trace, &parent_id),
      "getting trace parent id");
  auto span_parent_id = parent_id;

  // Currently, only 2 types of sub-spans are supported:
  // request span and compute span. Compute span is a leaf span
  // and can not be a parent of any sub-span. If parent_id==0,
  // then current model is either a standalone model, or an ensemble model.
  // In both cases, the parent of the new request sub-span is the kRootSpan.
  // A request span with trace id = `trace_id` is a parent of a compute span,
  // started in the same trace.
  // If parent_id > 0, then this is a child trace, spawned from
  // the ensamble's main request. For this instance, the parent
  // span is the ensembles's request span.
  if ((parent_id == 0 && activity == TRITONSERVER_TRACE_REQUEST_START) ||
      (activity == TRITONSERVER_TRACE_COMPUTE_START) ||
      (activity == TRITONSERVER_TRACE_CUSTOM_ACTIVITY)) {
    span_parent_id = trace_id;
  }
  auto span = StartSpan(display_name, timestamp_ns, span_parent_id);

  if (activity == TRITONSERVER_TRACE_REQUEST_START) {
    int64_t model_version;
    const char* request_id;
    LOG_TRITONSERVER_ERROR(
        TRITONSERVER_InferenceTraceModelVersion(trace, &model_version),
        "getting model version");
    LOG_TRITONSERVER_ERROR(
        TRITONSERVER_InferenceTraceRequestId(trace, &request_id),
        "getting request id");
    span->SetAttribute("triton.model_name", display_name);
    span->SetAttribute("triton.model_version", model_version);
    span->SetAttribute("triton.trace_id", trace_id);
    span->SetAttribute("triton.trace_parent_id", parent_id);
    if (std::string(request_id) != "") {
      span->SetAttribute("triton.request_id", request_id);
    }
    triton::common::TritonJson::WriteBuffer buffer;
    PrepareTraceContext(span, &buffer);
    TRITONSERVER_InferenceTraceSetContext(trace, buffer.Contents().c_str());
  }
  span_stacks_[trace_id]->emplace(span);
}

opentelemetry::nostd::shared_ptr<otel_trace_api::Span>
TraceManager::Trace::StartSpan(
    std::string display_name, const uint64_t& raw_timestamp_ns,
    uint64_t trace_id)
{
  otel_trace_api::StartSpanOptions options;
  options.kind = otel_trace_api::SpanKind::kServer;
  options.start_system_time =
      time_offset_ + std::chrono::nanoseconds{raw_timestamp_ns};
  options.start_steady_time =
      otel_common::SteadyTimestamp{std::chrono::nanoseconds{raw_timestamp_ns}};

  // If the new span is a child span, we need to retrieve its parent and
  // provide it through StartSpanOptions to the child span
  if (span_stacks_.find(trace_id) != span_stacks_.end() &&
      !span_stacks_[trace_id]->empty()) {
    options.parent = span_stacks_[trace_id]->top()->GetContext();
  }
  auto provider = opentelemetry::trace::Provider::GetTracerProvider();
  return provider->GetTracer(kTritonTracer)->StartSpan(display_name, options);
}

void
TraceManager::Trace::EndSpan(uint64_t trace_id)
{
  auto timestamp_ns = std::chrono::duration_cast<std::chrono::nanoseconds>(
                          std::chrono::steady_clock::now().time_since_epoch())
                          .count();
  EndSpan(timestamp_ns, trace_id);
}


void
TraceManager::Trace::EndSpan(
    const uint64_t& raw_timestamp_ns, uint64_t trace_id)
{
  if (span_stacks_.find(trace_id) != span_stacks_.end() &&
      !span_stacks_[trace_id]->empty()) {
    otel_trace_api::EndSpanOptions end_options;
    end_options.end_steady_time = otel_common::SteadyTimestamp{
        std::chrono::nanoseconds{raw_timestamp_ns}};
    span_stacks_[trace_id]->top()->End(end_options);
    span_stacks_[trace_id]->pop();
  }
}

void
TraceManager::Trace::ReportToOpenTelemetry(
    TRITONSERVER_InferenceTrace* trace,
    TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns)
{
  uint64_t id;
  LOG_TRITONSERVER_ERROR(
      TRITONSERVER_InferenceTraceId(trace, &id), "getting trace id");
  if (span_stacks_.find(id) == span_stacks_.end()) {
    std::unique_ptr<
        std::stack<opentelemetry::nostd::shared_ptr<otel_trace_api::Span>>>
        st(new std::stack<
            opentelemetry::nostd::shared_ptr<otel_trace_api::Span>>());
    span_stacks_.emplace(id, std::move(st));
  }

  AddEvent(trace, activity, timestamp_ns, id);
}

void
TraceManager::Trace::AddEvent(
    TRITONSERVER_InferenceTrace* trace,
    TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns,
    uint64_t trace_id)
{
  std::string activity_name =
      RetrieveActivityName(trace, activity, timestamp_ns);
  static std::string start = "_START";
  static std::string end = "_END";
  if (activity == TRITONSERVER_TRACE_REQUEST_START ||
      activity == TRITONSERVER_TRACE_COMPUTE_START ||
      (activity == TRITONSERVER_TRACE_CUSTOM_ACTIVITY &&
       activity_name.length() > start.length() &&
       std::equal(start.rbegin(), start.rend(), activity_name.rbegin()))) {
    std::string span_name = activity_name;

    if (activity == TRITONSERVER_TRACE_CUSTOM_ACTIVITY) {
      span_name =
          activity_name.substr(0, activity_name.length() - start.length());
    } else if (activity == TRITONSERVER_TRACE_REQUEST_START) {
      const char* model_name;
      LOG_TRITONSERVER_ERROR(
          TRITONSERVER_InferenceTraceModelName(trace, &model_name),
          "getting model name");
      span_name = model_name;
    } else if (activity == TRITONSERVER_TRACE_COMPUTE_START) {
      span_name = "compute";
    }

    StartSpan(trace, activity, timestamp_ns, trace_id, span_name);
  }

  AddEvent(activity_name, timestamp_ns, trace_id);

  if (activity == TRITONSERVER_TRACE_REQUEST_END ||
      activity == TRITONSERVER_TRACE_COMPUTE_END ||
      (activity == TRITONSERVER_TRACE_CUSTOM_ACTIVITY &&
       activity_name.length() > end.length() &&
       std::equal(end.rbegin(), end.rend(), activity_name.rbegin()))) {
    EndSpan(timestamp_ns, trace_id);
  }
}

void
TraceManager::Trace::AddEvent(
    const std::string& event, uint64_t timestamp, uint64_t trace_id)
{
  if (span_stacks_.find(trace_id) != span_stacks_.end() &&
      !span_stacks_[trace_id]->empty()) {
    span_stacks_[trace_id]->top()->AddEvent(
        event, time_offset_ + std::chrono::nanoseconds{timestamp});
  }
}

void
TraceManager::Trace::PrepareTraceContext(
    opentelemetry::nostd::shared_ptr<otel_trace_api::Span> span,
    triton::common::TritonJson::WriteBuffer* buffer)
{
  triton::common::TritonJson::Value json(
      triton::common::TritonJson::ValueType::OBJECT);
  char trace_id[32] = {0};
  char span_id[16] = {0};
  char trace_flags[2] = {0};
  span->GetContext().span_id().ToLowerBase16(span_id);
  span->GetContext().trace_id().ToLowerBase16(trace_id);
  span->GetContext().trace_flags().ToLowerBase16(trace_flags);
  std::string kTraceParent = std::string("traceparent");
  std::string kTraceState = std::string("tracestate");
  std::string traceparent = std::string("00-") + std::string(trace_id, 32) +
                            std::string("-") + std::string(span_id, 16) +
                            std::string("-") + std::string(trace_flags, 2);
  std::string tracestate = span->GetContext().trace_state()->ToHeader();
  json.SetStringObject(kTraceParent.c_str(), traceparent);
  if (!tracestate.empty()) {
    json.SetStringObject(kTraceState.c_str(), tracestate);
  }
  json.Write(buffer);
}
#endif

void
TraceManager::TraceRelease(TRITONSERVER_InferenceTrace* trace, void* userp)
{
  uint64_t id;
  LOG_TRITONSERVER_ERROR(
      TRITONSERVER_InferenceTraceId(trace, &id), "getting trace id");

  auto ts_ptr = reinterpret_cast<std::shared_ptr<TraceManager::Trace>*>(userp);
  std::shared_ptr<TraceManager::Trace> tracer_sp;
  bool delete_ts = false;
  {
    std::lock_guard<std::mutex> lk((*ts_ptr)->mtx_);
    (*ts_ptr)->spawned_traces_tracker_.erase(id);
    // The userp will be shared with the trace children, so only delete it
    // if no more TraceRelease calls are expected
    if ((*ts_ptr)->spawned_traces_tracker_.empty()) {
      // Move the trace shared_ptr out inside lock to ensure mutex stays alive
      // and destruct outside lock
      tracer_sp = std::move(*ts_ptr);
      delete_ts = true;
      delete ts_ptr;
    }
    LOG_TRITONSERVER_ERROR(
        TRITONSERVER_InferenceTraceDelete(trace), "deleting trace");
  }
  if (delete_ts) {
    tracer_sp.reset();
  }
}

const char*
TraceManager::InferenceTraceModeString(InferenceTraceMode mode)
{
  switch (mode) {
    case TRACE_MODE_TRITON:
      return "triton";
    case TRACE_MODE_OPENTELEMETRY:
      return "opentelemetry";
  }

  return "<unknown>";
}

void
TraceManager::TraceActivity(
    TRITONSERVER_InferenceTrace* trace,
    TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns,
    void* userp)
{
  uint64_t id;
  LOG_TRITONSERVER_ERROR(
      TRITONSERVER_InferenceTraceId(trace, &id), "getting trace id");

  // The function may be called with different traces but the same 'userp',
  // group the activity of the same trace together for more readable output.
  auto ts =
      reinterpret_cast<std::shared_ptr<TraceManager::Trace>*>(userp)->get();

  std::lock_guard<std::mutex> lk(ts->mtx_);
  if (ts->spawned_traces_tracker_.find(id) ==
      ts->spawned_traces_tracker_.end()) {
    ts->spawned_traces_tracker_.emplace(id);
  }

  if (ts->setting_->mode_ == TRACE_MODE_OPENTELEMETRY) {
#ifndef _WIN32
    ts->ReportToOpenTelemetry(trace, activity, timestamp_ns);
#else
    LOG_ERROR << "Unsupported trace mode: "
              << TraceManager::InferenceTraceModeString(ts->setting_->mode_);
#endif
    return;
  }
  std::stringstream* ss = nullptr;
  {
    if (ts->streams_.find(id) == ts->streams_.end()) {
      std::unique_ptr<std::stringstream> stream(new std::stringstream());
      ss = stream.get();
      ts->streams_.emplace(id, std::move(stream));
    } else {
      ss = ts->streams_[id].get();
      // If the string stream is not newly created, add "," as there is
      // already content in the string stream
      *ss << ",";
    }
  }
  // If 'activity' is TRITONSERVER_TRACE_REQUEST_START then collect
  // and serialize trace details.
  if (activity == TRITONSERVER_TRACE_REQUEST_START) {
    const char* model_name;
    int64_t model_version;
    uint64_t parent_id;
    const char* request_id;

    LOG_TRITONSERVER_ERROR(
        TRITONSERVER_InferenceTraceModelName(trace, &model_name),
        "getting model name");
    LOG_TRITONSERVER_ERROR(
        TRITONSERVER_InferenceTraceModelVersion(trace, &model_version),
        "getting model version");
    LOG_TRITONSERVER_ERROR(
        TRITONSERVER_InferenceTraceParentId(trace, &parent_id),
        "getting trace parent id");
    LOG_TRITONSERVER_ERROR(
        TRITONSERVER_InferenceTraceRequestId(trace, &request_id),
        "getting request id");

    *ss << "{\"id\":" << id << ",\"model_name\":\"" << model_name
        << "\",\"model_version\":" << model_version;

    if (std::string(request_id) != "") {
      *ss << ",\"request_id\":\"" << request_id << "\"";
    }

    if (parent_id != 0) {
      *ss << ",\"parent_id\":" << parent_id;
    }
    *ss << "},";
  }

  *ss << "{\"id\":" << id << ",\"timestamps\":["
      << "{\"name\":\""
      << ts->RetrieveActivityName(trace, activity, timestamp_ns)
      << "\",\"ns\":" << timestamp_ns << "}]}";
}

void
TraceManager::TraceTensorActivity(
    TRITONSERVER_InferenceTrace* trace,
    TRITONSERVER_InferenceTraceActivity activity, const char* name,
    TRITONSERVER_DataType datatype, const void* base, size_t byte_size,
    const int64_t* shape, uint64_t dim_count,
    TRITONSERVER_MemoryType memory_type, int64_t memory_type_id, void* userp)
{
  if ((activity != TRITONSERVER_TRACE_TENSOR_QUEUE_INPUT) &&
      (activity != TRITONSERVER_TRACE_TENSOR_BACKEND_INPUT) &&
      (activity != TRITONSERVER_TRACE_TENSOR_BACKEND_OUTPUT)) {
    LOG_ERROR << "Unsupported activity: "
              << TRITONSERVER_InferenceTraceActivityString(activity);
    return;
  }

  void* buffer_base = const_cast<void*>(base);
  if (memory_type == TRITONSERVER_MEMORY_GPU) {
#ifdef TRITON_ENABLE_GPU
    buffer_base = malloc(byte_size);
    if (buffer_base == nullptr) {
      LOG_ERROR << "Failed to malloc CPU buffer";
      return;
    }
    FAIL_IF_CUDA_ERR(
        cudaMemcpy(buffer_base, base, byte_size, cudaMemcpyDeviceToHost),
        "copying buffer into CPU memory");
#else
    LOG_ERROR << "GPU buffer is unsupported";
    return;
#endif  // TRITON_ENABLE_GPU
  }

  uint64_t id;
  LOG_TRITONSERVER_ERROR(
      TRITONSERVER_InferenceTraceId(trace, &id), "getting trace id");

  // The function may be called with different traces but the same 'userp',
  // group the activity of the same trace together for more readable output.
  auto ts =
      reinterpret_cast<std::shared_ptr<TraceManager::Trace>*>(userp)->get();

  if (ts->setting_->mode_ == TRACE_MODE_OPENTELEMETRY) {
    LOG_ERROR << "Tensor level tracing is not supported by the mode: "
              << TraceManager::InferenceTraceModeString(ts->setting_->mode_);
  } else if (ts->setting_->mode_ == TRACE_MODE_TRITON) {
    std::lock_guard<std::mutex> lk(ts->mtx_);
    std::stringstream* ss = nullptr;
    {
      if (ts->streams_.find(id) == ts->streams_.end()) {
        std::unique_ptr<std::stringstream> stream(new std::stringstream());
        ss = stream.get();
        ts->streams_.emplace(id, std::move(stream));
        ts->spawned_traces_tracker_.emplace(id);
      } else {
        ss = ts->streams_[id].get();
        // If the string stream is not newly created, add "," as there is
        // already content in the string stream
        *ss << ",";
      }
    }

    // collect and serialize trace details.
    *ss << "{\"id\":" << id << ",\"activity\":\""
        << TRITONSERVER_InferenceTraceActivityString(activity) << "\"";
    // collect tensor
    *ss << ",\"tensor\":{";
    // collect tensor name
    *ss << "\"name\":\"" << std::string(name) << "\"";
    // collect tensor data
    *ss << ",\"data\":\"";
    size_t element_count = 1;
    for (uint64_t i = 0; i < dim_count; i++) {
      element_count *= shape[i];
    }
    switch (datatype) {
      case TRITONSERVER_TYPE_BOOL: {
        const uint8_t* bool_base =
            reinterpret_cast<const uint8_t*>(buffer_base);
        for (size_t e = 0; e < element_count; ++e) {
          *ss << ((bool_base[e] == 0) ? false : true);
          if (e < (element_count - 1))
            *ss << ",";
        }
        break;
      }
      case TRITONSERVER_TYPE_UINT8: {
        const uint8_t* cbase = reinterpret_cast<const uint8_t*>(buffer_base);
        for (size_t e = 0; e < element_count; ++e) {
          *ss << cbase[e];
          if (e < (element_count - 1))
            *ss << ",";
        }
        break;
      }
      case TRITONSERVER_TYPE_UINT16: {
        const uint16_t* cbase = reinterpret_cast<const uint16_t*>(buffer_base);
        for (size_t e = 0; e < element_count; ++e) {
          *ss << cbase[e];
          if (e < (element_count - 1))
            *ss << ",";
        }
        break;
      }
      case TRITONSERVER_TYPE_UINT32: {
        const uint32_t* cbase = reinterpret_cast<const uint32_t*>(buffer_base);
        for (size_t e = 0; e < element_count; ++e) {
          *ss << cbase[e];
          if (e < (element_count - 1))
            *ss << ",";
        }
        break;
      }
      case TRITONSERVER_TYPE_UINT64: {
        const uint64_t* cbase = reinterpret_cast<const uint64_t*>(buffer_base);
        for (size_t e = 0; e < element_count; ++e) {
          *ss << cbase[e];
          if (e < (element_count - 1))
            *ss << ",";
        }
        break;
      }
      case TRITONSERVER_TYPE_INT8: {
        const int8_t* cbase = reinterpret_cast<const int8_t*>(buffer_base);
        for (size_t e = 0; e < element_count; ++e) {
          *ss << cbase[e];
          if (e < (element_count - 1))
            *ss << ",";
        }
        break;
      }
      case TRITONSERVER_TYPE_INT16: {
        const int16_t* cbase = reinterpret_cast<const int16_t*>(buffer_base);
        for (size_t e = 0; e < element_count; ++e) {
          *ss << cbase[e];
          if (e < (element_count - 1))
            *ss << ",";
        }
        break;
      }
      case TRITONSERVER_TYPE_INT32: {
        const int32_t* cbase = reinterpret_cast<const int32_t*>(buffer_base);
        for (size_t e = 0; e < element_count; ++e) {
          *ss << cbase[e];
          if (e < (element_count - 1))
            *ss << ",";
        }
        break;
      }
      case TRITONSERVER_TYPE_INT64: {
        const int64_t* cbase = reinterpret_cast<const int64_t*>(buffer_base);
        for (size_t e = 0; e < element_count; ++e) {
          *ss << cbase[e];
          if (e < (element_count - 1))
            *ss << ",";
        }
        break;
      }

      // FP16 / BF16 already handled as binary blobs, no need to manipulate
      // here
      case TRITONSERVER_TYPE_FP16: {
        break;
      }
      case TRITONSERVER_TYPE_BF16: {
        break;
      }

      case TRITONSERVER_TYPE_FP32: {
        const float* cbase = reinterpret_cast<const float*>(buffer_base);
        for (size_t e = 0; e < element_count; ++e) {
          *ss << cbase[e];
          if (e < (element_count - 1))
            *ss << ",";
        }
        break;
      }
      case TRITONSERVER_TYPE_FP64: {
        const double* cbase = reinterpret_cast<const double*>(buffer_base);
        for (size_t e = 0; e < element_count; ++e) {
          *ss << cbase[e];
          if (e < (element_count - 1))
            *ss << ",";
        }
        break;
      }
      case TRITONSERVER_TYPE_BYTES: {
        const char* cbase = reinterpret_cast<const char*>(buffer_base);
        size_t offset = 0;
        for (size_t e = 0; e < element_count; ++e) {
          if ((offset + sizeof(uint32_t)) > byte_size) {
            return;
          }
          const size_t len =
              *(reinterpret_cast<const uint32_t*>(cbase + offset));
          offset += sizeof(uint32_t);
          if ((offset + len) > byte_size) {
            return;
          }
          std::string str(cbase + offset, len);
          *ss << "\\\"" << str << "\\\"";
          offset += len;

          if (e < (element_count - 1))
            *ss << ",";
        }
        break;
      }
      case TRITONSERVER_TYPE_INVALID: {
        return;
      }
    }
    *ss << "\",\"shape\":\"";
    for (uint64_t i = 0; i < dim_count; i++) {
      *ss << shape[i];
      if (i < (dim_count - 1)) {
        *ss << ",";
      }
    }
    *ss << "\",\"dtype\":\"" << TRITONSERVER_DataTypeString(datatype) << "\"}";
    *ss << "}";
  }

  if (memory_type == TRITONSERVER_MEMORY_GPU) {
#ifdef TRITON_ENABLE_GPU
    if (buffer_base != nullptr) {
      free(buffer_base);
    }
#endif  // TRITON_ENABLE_GPU
  }
}

TraceManager::TraceFile::~TraceFile()
{
  if (!first_write_) {
    trace_file_ << "]";
  }
}

void
TraceManager::TraceFile::SaveTraces(
    std::stringstream& trace_stream, const bool to_index_file)
{
  try {
    if (to_index_file) {
      std::string file_name =
          file_name_ + "." + std::to_string(index_.fetch_add(1));
      std::ofstream file_stream;
      file_stream.open(file_name);
      file_stream << "[";
      file_stream << trace_stream.rdbuf();
      file_stream << "]";
    } else {
      std::lock_guard<std::mutex> lock(mu_);
      if (first_write_) {
        trace_file_.open(file_name_);
        trace_file_ << "[";
        first_write_ = false;
      } else {
        trace_file_ << ",";
      }
      trace_file_ << trace_stream.rdbuf();
    }
  }
  catch (const std::ofstream::failure& e) {
    LOG_ERROR << "failed creating trace file: " << e.what();
  }
  catch (...) {
    LOG_ERROR << "failed creating trace file: reason unknown";
  }
}

std::shared_ptr<TraceManager::Trace>
TraceManager::TraceSetting::SampleTrace(bool force_sample)
{
  bool count_rate_hit = false;
  {
    std::lock_guard<std::mutex> lk(mu_);
    // [FIXME: DLIS-6033]
    // A current WAR for initiating trace based on propagated context only
    // Currently this is implemented through setting trace rate as 0
    if (rate_ != 0) {
      // If `count_` hits 0, `Valid()` returns false for this and all
      // following requests (unless `count_` is updated by a user).
      // At this point we only trace requests for which
      // `force_sample` is true.
      if (!Valid() && !force_sample) {
        return nullptr;
      }
      // `sample_` counts all requests, coming to server.
      count_rate_hit = (((++sample_) % rate_) == 0);
      if (count_rate_hit && (count_ > 0)) {
        --count_;
        ++created_;
      } else if (count_rate_hit && (count_ == 0)) {
        // This condition is reached, when `force_sample` is true,
        // `count_rate_hit` is true, but `count_` is 0. Due to the
        // latter, we explicitly set `count_rate_hit` to false.
        count_rate_hit = false;
      }
    }
  }
  if (count_rate_hit || force_sample) {
    std::shared_ptr<TraceManager::Trace> lts(new Trace());
    // Split 'Trace' management to frontend and Triton trace separately
    // to avoid dependency between frontend request and Triton trace's
    // liveness
    auto trace_userp = new std::shared_ptr<TraceManager::Trace>(lts);
    TRITONSERVER_InferenceTrace* trace;
    TRITONSERVER_Error* err = TRITONSERVER_InferenceTraceTensorNew(
        &trace, level_, 0 /* parent_id */, TraceActivity, TraceTensorActivity,
        TraceRelease, trace_userp);
    if (err != nullptr) {
      LOG_TRITONSERVER_ERROR(err, "creating inference trace object");
      delete trace_userp;
      return nullptr;
    }
    lts->trace_ = trace;
    lts->trace_userp_ = trace_userp;
    LOG_TRITONSERVER_ERROR(
        TRITONSERVER_InferenceTraceId(trace, &lts->trace_id_),
        "getting trace id");
    return lts;
  }
  return nullptr;
}

void
TraceManager::TraceSetting::WriteTrace(
    const std::unordered_map<uint64_t, std::unique_ptr<std::stringstream>>&
        streams)
{
  std::unique_lock<std::mutex> lock(mu_);

  if (sample_in_stream_ != 0) {
    trace_stream_ << ",";
  }
  ++sample_in_stream_;
  ++collected_;

  size_t stream_count = 0;
  for (const auto& stream : streams) {
    trace_stream_ << stream.second->rdbuf();
    // Need to add ',' unless it is the last trace in the group
    ++stream_count;
    if (stream_count != streams.size()) {
      trace_stream_ << ",";
    }
  }
  // Write to file with index when one of the following is true
  // 1. trace_count is specified and that number of traces has been collected
  // 2. log_frequency is specified and that number of traces has been
  // collected
  if (((count_ == 0) && (collected_ == sample_)) ||
      ((log_frequency_ != 0) && (sample_in_stream_ >= log_frequency_))) {
    // Reset variables and release lock before saving to file
    sample_in_stream_ = 0;
    std::stringstream stream;
    trace_stream_.swap(stream);
    lock.unlock();

    file_->SaveTraces(stream, true /* to_index_file */);
  }
}

TraceManager::TraceSetting::TraceSetting(
    const TRITONSERVER_InferenceTraceLevel level, const uint32_t rate,
    const int32_t count, const uint32_t log_frequency,
    const std::shared_ptr<TraceFile>& file, const InferenceTraceMode mode,
    const TraceConfigMap& config_map, const bool level_specified,
    const bool rate_specified, const bool count_specified,
    const bool log_frequency_specified, const bool filepath_specified,
    const bool mode_specified, const bool config_map_specified)
    : level_(level), rate_(rate), count_(count), log_frequency_(log_frequency),
      file_(file), mode_(mode), config_map_(config_map),
      level_specified_(level_specified), rate_specified_(rate_specified),
      count_specified_(count_specified),
      log_frequency_specified_(log_frequency_specified),
      filepath_specified_(filepath_specified), mode_specified_(mode_specified),
      config_map_specified_(config_map_specified), sample_(0), created_(0),
      collected_(0), sample_in_stream_(0)
{
  if (level_ == TRITONSERVER_TRACE_LEVEL_DISABLED) {
    invalid_reason_ = "tracing is disabled";
  } else if (rate_ == 0) {
    invalid_reason_ = "sample rate must be non-zero";
  } else if (mode_ == TRACE_MODE_TRITON && file_->FileName().empty()) {
    invalid_reason_ = "trace file name is not given";
  }
}

TraceManager::TraceSetting::~TraceSetting()
{
  // If log frequency is set, should log the remaining traces to indexed file.
  if (mode_ == TRACE_MODE_TRITON && sample_in_stream_ != 0) {
    file_->SaveTraces(trace_stream_, (log_frequency_ != 0));
  }
}
}}  // namespace triton::server


================================================
FILE: src/tracer.h
================================================
// Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once

#include <atomic>
#include <condition_variable>
#include <fstream>
#include <memory>
#include <mutex>
#include <set>
#include <sstream>
#include <stack>
#include <string>
#include <unordered_map>
#include <variant>

#if !defined(_WIN32) && defined(TRITON_ENABLE_TRACING)
#include "opentelemetry/context/propagation/global_propagator.h"
#include "opentelemetry/exporters/otlp/otlp_http_exporter_factory.h"
#include "opentelemetry/nostd/shared_ptr.h"
#include "opentelemetry/sdk/resource/resource.h"
#include "opentelemetry/sdk/trace/batch_span_processor_options.h"
#include "opentelemetry/sdk/trace/processor.h"
#include "opentelemetry/sdk/trace/tracer_provider_factory.h"
#include "opentelemetry/trace/context.h"
#include "opentelemetry/trace/propagation/http_trace_context.h"
#include "opentelemetry/trace/provider.h"
namespace otlp = opentelemetry::exporter::otlp;
namespace otel_trace_sdk = opentelemetry::sdk::trace;
namespace otel_trace_api = opentelemetry::trace;
namespace otel_cntxt = opentelemetry::context;
namespace otel_resource = opentelemetry::sdk::resource;
#endif
#include "triton/core/tritonserver.h"
#define TRITONJSON_STATUSTYPE TRITONSERVER_Error*
#define TRITONJSON_STATUSSUCCESS nullptr
#define TRITONJSON_STATUSRETURN(M) \
  return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL, (M).c_str())
#include "triton/common/triton_json.h"

namespace triton { namespace server {

using TraceConfig = std::vector<
    std::pair<std::string, std::variant<std::string, int, uint32_t>>>;
// Key is trace mode,
using TraceConfigMap = std::unordered_map<std::string, TraceConfig>;
#if !defined(_WIN32) && defined(TRITON_ENABLE_TRACING)
using AbstractCarrier = otel_cntxt::propagation::TextMapCarrier;
#else
using AbstractCarrier = void*;
#endif

// Common OTel span keys to store in OTel context
// with the corresponding trace id.
constexpr char kRootSpan[] = "root_span";
constexpr char kRequestSpan[] = "request_span";
constexpr char kComputeSpan[] = "compute_span";

// OTel tracer name
constexpr char kTritonTracer[] = "triton-server";

/// Trace modes.
typedef enum tracemode_enum {
  /// Default is Triton tracing API
  TRACE_MODE_TRITON = 0,
  /// OpenTelemetry API for tracing
  TRACE_MODE_OPENTELEMETRY = 1
} InferenceTraceMode;

//
// Manager for tracing to a file.
//
class TraceManager {
 private:
  class TraceSetting;

 public:
  static constexpr int32_t MIN_TRACE_COUNT_VALUE{-1};
  // The new field values for a setting, 'clear_xxx_' indicates
  // whether to clear the previously specified filed value.
  // If false, 'xxx_' will be used as the new field value.
  // If 'xxx_' is nullptr, the field value will not be updated.
  struct NewSetting {
    NewSetting()
        : clear_level_(false), level_(nullptr), clear_rate_(false),
          rate_(nullptr), clear_count_(false), count_(nullptr),
          clear_log_frequency_(false), log_frequency_(nullptr), mode_(nullptr),
          config_map_(nullptr)
    {
    }
    bool clear_level_;
    const TRITONSERVER_InferenceTraceLevel* level_;

    bool clear_rate_;
    const uint32_t* rate_;

    bool clear_count_;
    const int32_t* count_;

    bool clear_log_frequency_;
    const uint32_t* log_frequency_;

    const InferenceTraceMode* mode_;

    const TraceConfigMap* config_map_;
  };

  struct Trace;
  // Create a trace manager that appends trace information
  // to a specified file as global setting.
  static TRITONSERVER_Error* Create(
      TraceManager** manager, const TRITONSERVER_InferenceTraceLevel level,
      const uint32_t rate, const int32_t count, const uint32_t log_frequency,
      const std::string& filepath, const InferenceTraceMode mode,
      const TraceConfigMap& config_map);

  ~TraceManager() { CleanupTracer(); }

  /// Options required at Trace initialization
  struct TraceStartOptions {
#if !defined(_WIN32) && defined(TRITON_ENABLE_TRACING)
    otel_cntxt::Context propagated_context{otel_cntxt::Context{}};
#else
    void* propagated_context{nullptr};
#endif
    std::shared_ptr<TraceSetting> trace_setting{nullptr};
    bool force_sample{false};
  };

  // Returns TraceStartOptions for specified model
  TraceStartOptions GetTraceStartOptions(
      AbstractCarrier& carriers, const std::string& model_name);

  // Return a trace that should be used to collected trace activities
  // for an inference request. Return nullptr if no tracing should occur.
  std::shared_ptr<Trace> SampleTrace(const TraceStartOptions& start_options);

  // Update global setting if 'model_name' is empty, otherwise, model setting is
  // updated.
  TRITONSERVER_Error* UpdateTraceSetting(
      const std::string& model_name, const NewSetting& new_setting);

  void GetTraceSetting(
      const std::string& model_name, TRITONSERVER_InferenceTraceLevel* level,
      uint32_t* rate, int32_t* count, uint32_t* log_frequency,
      std::string* filepath, InferenceTraceMode* mode,
      TraceConfigMap* config_map);

  // Sets provided TraceSetting with correct trace settings for provided model.
  void GetTraceSetting(
      const std::string& model_name,
      std::shared_ptr<TraceSetting>& trace_setting);

  // Return the current timestamp.
  static uint64_t CaptureTimestamp()
  {
    return std::chrono::duration_cast<std::chrono::nanoseconds>(
               std::chrono::steady_clock::now().time_since_epoch())
        .count();
  }

  static void TraceRelease(TRITONSERVER_InferenceTrace* trace, void* userp);

  static const char* InferenceTraceModeString(InferenceTraceMode mode);

  /// In OpenTelemetry trace mode initializes Opentelemetry exporter, processor,
  /// and sets the global trace provider.
  /// In Triton trace mode is a no-op.
  ///
  /// \param config_map A config map, which stores all parameters, specified
  /// by user.
  void InitTracer(const TraceConfigMap& config_map);

  /// In OpenTelemetry trace mode cleans global tracer provider,
  /// set by InitTracer.
  /// In Triton trace mode is a no-op.
  void CleanupTracer();
#if !defined(_WIN32) && defined(TRITON_ENABLE_TRACING)
  void ProcessOpenTelemetryParameters(
      const triton::server::TraceConfigMap& config_map,
      otlp::OtlpHttpExporterOptions& exporter_options,
      otel_resource::ResourceAttributes& attributes,
      otel_trace_sdk::BatchSpanProcessorOptions& processor_options);
#endif

  struct Trace {
    Trace() : trace_(nullptr), trace_id_(0) {}
    ~Trace();
    std::shared_ptr<TraceSetting> setting_;
    // Group the spawned traces by trace ID for better formatting
    std::mutex mtx_;
    std::unordered_map<uint64_t, std::unique_ptr<std::stringstream>> streams_;
    // We use the set to track the number of spawned traces, so that
    // when TraceManager::TraceRelease() with 'trace_userp_' is called
    // we can safely release 'trace_userp_'
    std::set<uint64_t> spawned_traces_tracker_;
    // Triton trace object that this trace is assosicated with,
    // 'Trace' object does not take ownership of 'trace_'. The caller of
    // SampleTrace() must call TraceManager::TraceRelease() with 'trace_userp_'
    // to properly release the resources if 'trace_' is not passed to a
    // TRITONSERVER_ServerInferAsync() call.
    TRITONSERVER_InferenceTrace* trace_;
    void* trace_userp_;

    uint64_t trace_id_;

    // Capture a timestamp generated outside of triton and associate it
    // with this trace.
    void CaptureTimestamp(const std::string& name, uint64_t timestamp_ns);

    /// Returns activity name. For custom activities, retrieves the name from
    /// the trace context. For other activities, returns default name.
    ///
    /// \param trace TRITONSERVER_InferenceTrace instance.
    /// \param activity  Trace activity.
    /// \param timestamp_ns Steady timestamp, which is used to calculate
    /// OpenTelemetry SystemTimestamp to display span on a timeline, and
    /// OpenTelemetry SteadyTimestamp to calculate the duration on the span
    /// with better precision.
    std::string RetrieveActivityName(
        TRITONSERVER_InferenceTrace* trace,
        TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns);

#if !defined(_WIN32) && defined(TRITON_ENABLE_TRACING)
    /// Reports TRITONSERVER_InferenceTraceActivity as event to
    /// the currently active span. If activity is an instance of
    /// `TRITONSERVER_TRACE_REQUEST_START` or
    /// `TRITONSERVER_TRACE_COMPUTE_START`,
    /// it starts a new request or compute span. For the request span it
    /// adds some triton related attributes, and adds this span to
    /// a span stack, corresponding to the current trace. Alternatively,
    /// if activity is `TRITONSERVER_TRACE_REQUEST_END` or
    /// `TRITONSERVER_TRACE_COMPUTE_END`, it ends the corresponding span.
    ///
    /// \param trace TRITONSERVER_InferenceTrace instance.
    /// \param activity  Trace activity.
    /// \param timestamp_ns Steady timestamp, which is used to calculate
    /// OpenTelemetry SystemTimestamp to display span on a timeline, and
    /// OpenTelemetry SteadyTimestamp to calculate the duration on the span
    /// with better precision.
    void ReportToOpenTelemetry(
        TRITONSERVER_InferenceTrace* trace,
        TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns);

    /// Starts a span with the provided timestamp and name.
    ///
    /// \param display_name Span's name, which will be shown in the trace.
    /// \param raw_timestamp_ns Steady timestamp, which is used to calculate
    /// OpenTelemetry SystemTimestamp to display span on a timeline, and
    /// OpenTelemetry SteadyTimestamp to calculate the duration on the span
    /// with better precision.
    /// \param trace_id Trace id.
    /// \return A shared pointer to a newly created OpenTelemetry span.
    opentelemetry::nostd::shared_ptr<otel_trace_api::Span> StartSpan(
        std::string display_name, const uint64_t& raw_timestamp_ns,
        uint64_t trace_id);

    // A map to hold spans. Any trace can spawn any amount of child traces,
    // e.g. ensemble model and BLS. This map holds
    // ( trace id, stack of started spans ) pair and for each trase keeps
    // started spans alive for the duration of the traced
    // event and helps to preserve parent-child relationship.
    std::unordered_map<
        uint64_t, std::unique_ptr<std::stack<
                      opentelemetry::nostd::shared_ptr<otel_trace_api::Span>>>>
        span_stacks_;

    // Root span. Some events should be recorded in the root span, while
    // request span is still alive and present in the stack.
    opentelemetry::nostd::shared_ptr<otel_trace_api::Span> root_span_;

    /// Prepares trace context to propagate to TRITONSERVER_InferenceTrace.
    /// Trace context follows W3C Trace Context specification.
    /// Ref. https://www.w3.org/TR/trace-context/.
    /// OpenTelemetry ref:
    /// https://github.com/open-telemetry/opentelemetry-cpp/blob/4bd64c9a336fd438d6c4c9dad2e6b61b0585311f/api/include/opentelemetry/trace/propagation/http_trace_context.h#L94-L113
    ///
    /// \param span An OpenTelemetry span, which is used to extract
    /// OpenTelemetry's trace_id and span_id.
    /// \param buffer Buffer used when writing JSON representation of
    /// OpenTelemetry's context.
    void PrepareTraceContext(
        opentelemetry::nostd::shared_ptr<otel_trace_api::Span> span,
        triton::common::TritonJson::WriteBuffer* buffer);

   private:
    // OpenTelemetry SDK relies on system's clock for event timestamps.
    // Triton Tracing records timestamps using steady_clock. This is a
    // monotonic clock, i.e. time is always moving forward. It is not related
    // to wall clock time (for example, it can be time since last reboot).
    // `time_offset_` is recorded when the trace instance is created,
    // and further used to calculate `opentelemetry::common::SystemTimestamp`
    // as `time_offset_` + std::chrono:nanoseconds{temestamp_ns}. This way,
    // every event recorded timestamp will receive a timestamp of
    // <time when the trace started> + <nanoseconds passed since the start>
    // FIXME: add steady clock timestamps to Triton OpenTelemetry SDK,
    // when created
    const std::chrono::time_point<std::chrono::system_clock> time_offset_ =
        std::chrono::system_clock::now() -
        std::chrono::duration_cast<std::chrono::nanoseconds>(
            std::chrono::steady_clock::now().time_since_epoch());

    /// Starts a compute or request span based on `activity`.
    /// For request spans, it will add the following attributes to the span:
    /// `model_name`, `model_version`, `trace_id`, `parent_id`.
    ///
    /// \param trace TRITONSERVER_InferenceTrace, used to request model's name,
    /// version, trace parent_id from the backend.
    /// \param activity Trace activity.
    /// \param timestamp_ns Steady timestamp, which is used to calculate
    /// OpenTelemetry SystemTimestamp to display span on a timeline, and
    /// OpenTelemetry SteadyTimestamp to calculate the duration on the span
    /// with better precision.
    /// \param trace_id Trace id.
    /// \param display_name Span name.
    void StartSpan(
        TRITONSERVER_InferenceTrace* trace,
        TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns,
        uint64_t trace_id, std::string display_name);

    /// Ends the span on the top of the stack, related to trace with `trace_id`.
    ///
    /// \param trace_id Trace id.
    void EndSpan(uint64_t trace_id);

    /// Ends the span on the top of the stack, related to trace with `trace_id`
    /// at specified steady timestamp.
    ///
    /// \param raw_timestamp_ns Steady timestamp to use as
    /// `EndSpanOptions::end_steady_time`.
    /// \param trace_id Trace id.
    void EndSpan(const uint64_t& raw_timestamp_ns, uint64_t trace_id);

    /// Adds an event to the span on the top of the stack, related to trace
    /// with `trace_id`. If activity is TRITONSERVER_TRACE_REQUEST_START,
    /// or TRITONSERVER_TRACE_COMPUTE_START, starts a new span and adds it
    /// to the span's stack.
    ///
    /// \param trace TRITONSERVER_InferenceTrace, used to request model's name,
    /// version, trace parent_id from the backend.
    /// \param activity Trace activity.
    /// \param timestamp_ns Timestamp of the provided event.
    /// \param trace_id Trace id.
    void AddEvent(
        TRITONSERVER_InferenceTrace* trace,
        TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns,
        uint64_t trace_id);

    /// Adds an event to the OpenTelemetry span.
    ///
    /// \param event An event to add to the span.
    /// \param timestamp_ns Timestamp of the provided event.
    /// \param trace_id Trace id.
    void AddEvent(
        const std::string& event, uint64_t timestamp_ns, uint64_t trace_id);
#endif
  };

 private:
  TraceManager(
      const TRITONSERVER_InferenceTraceLevel level, const uint32_t rate,
      const int32_t count, const uint32_t log_frequency,
      const std::string& filepath, const InferenceTraceMode mode,
      const TraceConfigMap& config_map);

  static void TraceActivity(
      TRITONSERVER_InferenceTrace* trace,
      TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns,
      void* userp);

  static void TraceTensorActivity(
      TRITONSERVER_InferenceTrace* trace,
      TRITONSERVER_InferenceTraceActivity activity, const char* name,
      TRITONSERVER_DataType datatype, const void* base, size_t byte_size,
      const int64_t* shape, uint64_t dim_count,
      TRITONSERVER_MemoryType memory_type, int64_t memory_type_id, void* userp);

  // Helper function for UpdateTraceSetting() as recursive update may be needed
  // if global setting is being updated
  TRITONSERVER_Error* UpdateTraceSettingInternal(
      const std::string& model_name, const NewSetting& new_setting);

  class TraceFile {
   public:
    TraceFile(const std::string& file_name)
        : file_name_(file_name), index_(0), first_write_(true)
    {
    }
    ~TraceFile();

    // Save the traces stored in 'trace_stream' into the file. 'to_index_file'
    // specifies whether the file name should be indexed, if true, the traces
    // will be written to 'file_name.index' where index will be incremented
    // every time the traces are written to a file with index. If false, the
    // trace will be written to 'file_name'.
    void SaveTraces(std::stringstream& trace_stream, const bool to_index_file);

    const std::string& FileName() { return file_name_; }

   private:
    const std::string file_name_;
    // The file index for the next index file write.
    std::atomic<uint32_t> index_;

    // Multiple traces may be finished and write to the trace file at the same
    // time
    std::mutex mu_;
    std::ofstream trace_file_;
    bool first_write_;
  };

  class TraceSetting {
   public:
    TraceSetting()
        : level_(TRITONSERVER_TRACE_LEVEL_DISABLED), rate_(0), count_(-1),
          log_frequency_(0), mode_(TRACE_MODE_TRITON), level_specified_(false),
          rate_specified_(false), count_specified_(false),
          log_frequency_specified_(false), filepath_specified_(false),
          mode_specified_(false), config_map_specified_(false), sample_(0),
          created_(0), collected_(0), sample_in_stream_(0)
    {
      invalid_reason_ = "Setting hasn't been initialized";
    }
    TraceSetting(
        const TRITONSERVER_InferenceTraceLevel level, const uint32_t rate,
        const int32_t count, const uint32_t log_frequency,
        const std::shared_ptr<TraceFile>& file, const InferenceTraceMode mode,
        const TraceConfigMap& config_map, const bool level_specified,
        const bool rate_specified, const bool count_specified,
        const bool log_frequency_specified, const bool filepath_specified,
        const bool mode_specified, const bool config_map_specified);

    ~TraceSetting();

    bool Valid() { return invalid_reason_.empty() && (count_ != 0); }
    const std::string& Reason() { return invalid_reason_; }

    void WriteTrace(
        const std::unordered_map<uint64_t, std::unique_ptr<std::stringstream>>&
            streams);

    // Pass `force_sample` = true, when trace needs to be initiated
    // no matter what `rate` and `count` is.
    // For example, in OpenTelemetry tracing mode, we always initiate tracing
    // when OpenTelemetry context was propagated from client.
    std::shared_ptr<Trace> SampleTrace(bool force_sample = false);

    const TRITONSERVER_InferenceTraceLevel level_;
    const uint32_t rate_;
    int32_t count_;
    const uint32_t log_frequency_;
    const std::shared_ptr<TraceFile> file_;
    const InferenceTraceMode mode_;
    const TraceConfigMap config_map_;

    // Whether the field value is specified or mirror from upper level setting
    const bool level_specified_;
    const bool rate_specified_;
    const bool count_specified_;
    const bool log_frequency_specified_;
    const bool filepath_specified_;
    const bool mode_specified_;
    const bool config_map_specified_;

   private:
    std::string invalid_reason_;

    std::mutex mu_;

    // use to sample a trace based on sampling rate.
    uint64_t sample_;

    // use to track the status of trace count feature
    uint64_t created_;
    uint64_t collected_;

    // Tracking traces that haven't been saved to file
    uint32_t sample_in_stream_;
    std::stringstream trace_stream_;
  };

  // Trace settings
  // Note that 'global_default_' doesn't use for actual trace sampling,
  // it is used to revert the field values when clearing fields in
  // 'global_setting_'
  std::unique_ptr<TraceSetting> global_default_;
  std::shared_ptr<TraceSetting> global_setting_;
  std::unordered_map<std::string, std::shared_ptr<TraceSetting>>
      model_settings_;
  // The collection of models that have their own trace setting while
  // some of the fields are mirroring global setting.
  std::set<std::string> fallback_used_models_;

  // The collection of files that are used in trace settings, use to
  // avoid creating duplicate TraceFile objects for the same file path.
  std::unordered_map<std::string, std::weak_ptr<TraceFile>> trace_files_;

  // lock for accessing trace setting. 'w_mu_' for write and
  // 'r_mu_' for read / write
  std::mutex w_mu_;
  std::mutex r_mu_;
};

}}  // namespace triton::server


================================================
FILE: src/triton_signal.cc
================================================
// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "triton_signal.h"

#include <iostream>

#ifdef _WIN32
#include <windows.h>
#else
#include <csignal>
#endif

#define BOOST_STACKTRACE_USE_ADDR2LINE
#include <boost/stacktrace.hpp>

namespace triton { namespace server {

// Exit mutex and cv used to signal the main thread that it should
// close the server and exit.
bool signal_exiting_ = false;
std::mutex signal_exit_mu_;
std::condition_variable signal_exit_cv_;

namespace {

void
CommonSignalHandler()
{
  {
    std::unique_lock<std::mutex> lock(signal_exit_mu_);

    // Do nothing if already exiting...
    if (signal_exiting_)
      return;

    signal_exiting_ = true;
  }

  signal_exit_cv_.notify_all();
}

}  // namespace

#ifdef _WIN32

// Windows

BOOL WINAPI
CtrlHandler(DWORD fdwCtrlType)
{
  switch (fdwCtrlType) {
      // Handle these events...
    case CTRL_C_EVENT:
    case CTRL_CLOSE_EVENT:
    case CTRL_BREAK_EVENT:
    case CTRL_LOGOFF_EVENT:
    case CTRL_SHUTDOWN_EVENT:
      break;

    default:
      return FALSE;
  }

  CommonSignalHandler();
  return TRUE;
}

TRITONSERVER_Error*
RegisterSignalHandler()
{
  if (!SetConsoleCtrlHandler(CtrlHandler, TRUE)) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INTERNAL, "SetConsoleCtrlHandler failed");
  }

  return nullptr;  // success
}

#else

namespace {

// Non-Windows

void
SignalHandler(int signum)
{
  std::cout << "Signal (" << signum << ") received." << std::endl;
  CommonSignalHandler();
}

void
ErrorSignalHandler(int signum)
{
  std::cerr << "Signal (" << signum << ") received." << std::endl;
  std::cerr << boost::stacktrace::stacktrace() << std::endl;

  // Trigger the core dump
  signal(signum, SIG_DFL);
  raise(signum);
}

}  // namespace

TRITONSERVER_Error*
RegisterSignalHandler()
{
  // Trap SIGINT and SIGTERM to allow server to exit gracefully
  signal(SIGINT, SignalHandler);
  signal(SIGTERM, SignalHandler);

  // Trap SIGSEGV and SIGABRT to exit when server crashes
  signal(SIGSEGV, ErrorSignalHandler);
  signal(SIGABRT, ErrorSignalHandler);

  return nullptr;  // success
}

#endif

}}  // namespace triton::server


================================================
FILE: src/triton_signal.h
================================================
// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once

#include <condition_variable>
#include <mutex>

#include "triton/core/tritonserver.h"

namespace triton { namespace server {

// Exit mutex and cv used to signal the main thread that it should
// close the server and exit.
extern bool signal_exiting_;
extern std::mutex signal_exit_mu_;
extern std::condition_variable signal_exit_cv_;

// Register signal handler. Return true if success, false if failure.
TRITONSERVER_Error* RegisterSignalHandler();

}}  // namespace triton::server


================================================
FILE: src/vertex_ai_server.cc
================================================
// Copyright 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "vertex_ai_server.h"

#include <memory>

#include "common.h"

namespace triton { namespace server {

const std::string VertexAiAPIServer::binary_mime_type_(
    "application/vnd.vertex-ai-triton.binary+json;json-header-size=");
const std::string VertexAiAPIServer::redirect_header_(
    "X-Vertex-Ai-Triton-Redirect");

VertexAiAPIServer::VertexAiAPIServer(
    const std::shared_ptr<TRITONSERVER_Server>& server,
    triton::server::TraceManager* trace_manager,
    const std::shared_ptr<SharedMemoryManager>& shm_manager, const int32_t port,
    const std::string address, const int thread_cnt,
    const std::string& prediction_route, const std::string& health_route,
    const std::string& default_model_name, const size_t max_input_size,
    const RestrictedFeatures& restricted_apis)
    : HTTPAPIServer(
          server, trace_manager, shm_manager, port, false /* reuse_port */,
          address, "" /* header_forward_pattern */, thread_cnt, max_input_size,
          restricted_apis),
      prediction_regex_(prediction_route), health_regex_(health_route),
      health_mode_("ready"), model_name_(default_model_name),
      model_version_str_("")
{
}

TRITONSERVER_Error*
VertexAiAPIServer::GetInferenceHeaderLength(
    evhtp_request_t* req, int32_t content_length, size_t* header_length)
{
  // Check mime type and set inference header length.
  // Set to content length in case that it is not specified
  *header_length = content_length;
  const char* content_type_c_str =
      evhtp_kv_find(req->headers_in, kContentTypeHeader);
  if (content_type_c_str != NULL) {
    std::string content_type(content_type_c_str);
    size_t pos = content_type.find(binary_mime_type_);
    if (pos != std::string::npos) {
      if (pos != 0) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            (std::string("expect MIME type for binary data starts with '") +
             binary_mime_type_ + "', got: " + content_type)
                .c_str());
      }

      // Parse
      int32_t parsed_value;
      try {
        parsed_value =
            std::atoi(content_type_c_str + binary_mime_type_.length());
      }
      catch (const std::invalid_argument& ia) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            (std::string("Unable to parse inference header size, got: ") +
             (content_type_c_str + binary_mime_type_.length()))
                .c_str());
      }

      // Check if the content length is in proper range
      if ((parsed_value < 0) || (parsed_value > content_length)) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            (std::string("inference header size should be in range (0, ") +
             std::to_string(content_length) +
             "), got: " + (content_type_c_str + binary_mime_type_.length()))
                .c_str());
      }
      *header_length = parsed_value;
    }
  }
  return nullptr;
}

void
VertexAiAPIServer::Handle(evhtp_request_t* req)
{
  LOG_VERBOSE(1) << "Vertex AI request: " << req->method << " "
                 << req->uri->path->full;

  if (RE2::FullMatch(std::string(req->uri->path->full), health_regex_)) {
    HandleServerHealth(req, health_mode_);
    return;
  }

  if (RE2::FullMatch(std::string(req->uri->path->full), prediction_regex_)) {
    // Secondary regex matching if redirection is requested
    const char* redirect_c_str =
        evhtp_kv_find(req->headers_in, redirect_header_.c_str());
    if (redirect_c_str == nullptr) {
      // Infer the default model
      HandleInfer(req, model_name_, model_version_str_);
      return;
    } else {
      // Endpoint redirection is requested
      // Prepend the header value with "/" to form the regex expected by
      // Triton endpoints
      std::string redirect_endpoint("/");
      redirect_endpoint += redirect_c_str;
      LOG_VERBOSE(1) << "Redirecting Vertex AI request: " << redirect_endpoint;

      // The endpoint handlers in base class expects specific HTTP methods
      // while the Vertex AI endpoint only accepts "POST", so the method will
      // be set to endpoint expected one before invoking the handlers
      if (req->method != htp_method_POST) {
        evhtp_send_reply(req, EVHTP_RES_METHNALLOWED);
        return;
      }

      if (redirect_endpoint == "/metrics") {
        req->method = htp_method_GET;
        HandleMetrics(req);
        return;
      }

      if (redirect_endpoint == "/v2/models/stats") {
        // model statistics
        req->method = htp_method_GET;
        HandleModelStats(req);
        return;
      }

      std::string model_name, version, kind;
      if (RE2::FullMatch(
              redirect_endpoint, model_regex_, &model_name, &version, &kind)) {
        if (kind == "ready") {
          // model ready
          req->method = htp_method_GET;
          HandleModelReady(req, model_name, version);
          return;
        } else if (kind == "infer") {
          // model infer
          HandleInfer(req, model_name, version);
          return;
        } else if (kind == "config") {
          // model configuration
          req->method = htp_method_GET;
          HandleModelConfig(req, model_name, version);
          return;
        } else if (kind == "stats") {
          // model statistics
          req->method = htp_method_GET;
          HandleModelStats(req, model_name, version);
          return;
        } else if (kind == "") {
          // model metadata
          req->method = htp_method_GET;
          HandleModelMetadata(req, model_name, version);
          return;
        }
      }

      std::string region, action, rest, repo_name;
      if (redirect_endpoint == "/v2") {
        // server metadata
        req->method = htp_method_GET;
        HandleServerMetadata(req);
        return;
      } else if (RE2::FullMatch(redirect_endpoint, server_regex_, &rest)) {
        // server health
        req->method = htp_method_GET;
        HandleServerHealth(req, rest);
        return;
      } else if (RE2::FullMatch(
                     redirect_endpoint, systemsharedmemory_regex_, &region,
                     &action)) {
        if (action == "status") {
          req->method = htp_method_GET;
          HandleSystemSharedMemory(req, region, action);
          return;
        }
        // Only read-only status queries are permitted through redirect.
        // Mutating operations (register/unregister) require the core
        // HTTP endpoint.
        LOG_VERBOSE(1) << "Vertex AI redirect blocked: " << redirect_endpoint;
        evhtp_send_reply(req, EVHTP_RES_FORBIDDEN);
        return;
      } else if (RE2::FullMatch(
                     redirect_endpoint, cudasharedmemory_regex_, &region,
                     &action)) {
        if (action == "status") {
          req->method = htp_method_GET;
          HandleCudaSharedMemory(req, region, action);
          return;
        }
        LOG_VERBOSE(1) << "Vertex AI redirect blocked: " << redirect_endpoint;
        evhtp_send_reply(req, EVHTP_RES_FORBIDDEN);
        return;
      } else if (RE2::FullMatch(
                     redirect_endpoint, modelcontrol_regex_, &repo_name, &kind,
                     &model_name, &action)) {
        if (kind == "index") {
          HandleRepositoryIndex(req, repo_name);
          return;
        }
        // Only repository index queries are permitted through redirect.
        // Model load/unload requires the core HTTP endpoint.
        LOG_VERBOSE(1) << "Vertex AI redirect blocked: " << redirect_endpoint;
        evhtp_send_reply(req, EVHTP_RES_FORBIDDEN);
        return;
      }
    }
  }

  LOG_VERBOSE(1) << "Vertex AI error: " << req->method << " "
                 << req->uri->path->full << " - "
                 << static_cast<int>(EVHTP_RES_BADREQ);

  evhtp_send_reply(req, EVHTP_RES_BADREQ);
}

void
VertexAiAPIServer::HandleMetrics(evhtp_request_t* req)
{
  // Mirror of HTTPMetricsServer::Handle()
  if (req->method != htp_method_GET) {
    evhtp_send_reply(req, EVHTP_RES_METHNALLOWED);
    return;
  }

  evhtp_res res = EVHTP_RES_BADREQ;

  // Call to metric endpoint should not have any trailing string
  TRITONSERVER_Metrics* metrics = nullptr;
  TRITONSERVER_Error* err = TRITONSERVER_ServerMetrics(server_.get(), &metrics);
  if (err == nullptr) {
    const char* base;
    size_t byte_size;
    err = TRITONSERVER_MetricsFormatted(
        metrics, TRITONSERVER_METRIC_PROMETHEUS, &base, &byte_size);
    if (err == nullptr) {
      res = EVHTP_RES_OK;
      evbuffer_add(req->buffer_out, base, byte_size);
    }
  }

  TRITONSERVER_MetricsDelete(metrics);
  TRITONSERVER_ErrorDelete(err);

  evhtp_send_reply(req, res);
}


TRITONSERVER_Error*
VertexAiAPIServer::Create(
    const std::shared_ptr<TRITONSERVER_Server>& server,
    triton::server::TraceManager* trace_manager,
    const std::shared_ptr<SharedMemoryManager>& shm_manager, const int32_t port,
    const std::string address, const int thread_cnt,
    const size_t max_input_size, const RestrictedFeatures& restricted_apis,
    std::string default_model_name, std::unique_ptr<HTTPServer>* http_server)
{
  auto predict_route = GetEnvironmentVariableOrDefault("AIP_PREDICT_ROUTE", "");
  auto health_route = GetEnvironmentVariableOrDefault("AIP_HEALTH_ROUTE", "");
  if (predict_route.empty())
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INVALID_ARG,
        "API_PREDICT_ROUTE is not defined for Vertex AI endpoint");
  else if (health_route.empty()) {
    return TRITONSERVER_ErrorNew(
        TRITONSERVER_ERROR_INVALID_ARG,
        "AIP_HEALTH_ROUTE is not defined for Vertex AI endpoint");
  }

  // Set default model
  {
    TRITONSERVER_Message* model_index_message = nullptr;
    RETURN_IF_ERR(TRITONSERVER_ServerModelIndex(
        server.get(), TRITONSERVER_INDEX_FLAG_READY, &model_index_message));

    // avoid memory leak when return early
    std::shared_ptr<TRITONSERVER_Message> managed_msg(
        model_index_message,
        [](TRITONSERVER_Message* msg) { TRITONSERVER_MessageDelete(msg); });

    const char* buffer;
    size_t byte_size;
    RETURN_IF_ERR(TRITONSERVER_MessageSerializeToJson(
        model_index_message, &buffer, &byte_size));

    triton::common::TritonJson::Value model_index_json;
    RETURN_IF_ERR(model_index_json.Parse(buffer, byte_size));

    if (default_model_name.empty()) {
      if (model_index_json.ArraySize() != 1) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            "Expect the model repository contains only a single model if "
            "default model is not specified");
      }

      triton::common::TritonJson::Value index_json;
      RETURN_IF_ERR(model_index_json.IndexAsObject(0, &index_json));
      const char* name;
      size_t namelen;
      RETURN_IF_ERR(index_json.MemberAsString("name", &name, &namelen));
      default_model_name = std::string(name, namelen);
    }
    // Check if default model is loaded
    else {
      bool found = false;
      for (size_t idx = 0; idx < model_index_json.ArraySize(); ++idx) {
        triton::common::TritonJson::Value index_json;
        RETURN_IF_ERR(model_index_json.IndexAsObject(idx, &index_json));

        const char* name;
        size_t namelen;
        RETURN_IF_ERR(index_json.MemberAsString("name", &name, &namelen));
        if (default_model_name == std::string(name, namelen)) {
          found = true;
          break;
        }
      }
      if (!found) {
        return TRITONSERVER_ErrorNew(
            TRITONSERVER_ERROR_INVALID_ARG,
            (std::string("Expect the default model '") + default_model_name +
             "' is loaded")
                .c_str());
      }
    }
  }

  http_server->reset(new VertexAiAPIServer(
      server, trace_manager, shm_manager, port, address, thread_cnt,
      predict_route, health_route, default_model_name, max_input_size,
      restricted_apis));

  const std::string addr = address + ":" + std::to_string(port);
  LOG_INFO << "Started Vertex AI HTTPService at " << addr;

  return nullptr;
}

}}  // namespace triton::server


================================================
FILE: src/vertex_ai_server.h
================================================
// Copyright 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once

#include "http_server.h"

namespace triton { namespace server {

// Handle Vertex HTTP requests to inference server APIs
class VertexAiAPIServer : public HTTPAPIServer {
 public:
  static TRITONSERVER_Error* Create(
      const std::shared_ptr<TRITONSERVER_Server>& server,
      triton::server::TraceManager* trace_manager,
      const std::shared_ptr<SharedMemoryManager>& smb_manager,
      const int32_t port, const std::string address, const int thread_cnt,
      const size_t max_input_size, const RestrictedFeatures& restricted_apis,
      std::string default_model_name,
      std::unique_ptr<HTTPServer>* vertex_ai_server);

 private:
  explicit VertexAiAPIServer(
      const std::shared_ptr<TRITONSERVER_Server>& server,
      triton::server::TraceManager* trace_manager,
      const std::shared_ptr<SharedMemoryManager>& shm_manager,
      const int32_t port, const std::string address, const int thread_cnt,
      const std::string& prediction_route, const std::string& health_route,
      const std::string& default_model_name, const size_t max_input_size,
      const RestrictedFeatures& restricted_apis);

  void Handle(evhtp_request_t* req) override;

  void HandleMetrics(evhtp_request_t* req);

  TRITONSERVER_Error* GetInferenceHeaderLength(
      evhtp_request_t* req, int32_t content_length,
      size_t* header_length) override;

  // Currently the compression schema hasn't been defined,
  // assume identity compression type is used for both request and response
  DataCompressor::Type GetRequestCompressionType(evhtp_request_t* req) override
  {
    return DataCompressor::Type::IDENTITY;
  }
  DataCompressor::Type GetResponseCompressionType(evhtp_request_t* req) override
  {
    return DataCompressor::Type::IDENTITY;
  }
  re2::RE2 prediction_regex_;
  re2::RE2 health_regex_;
  const std::string health_mode_;

  // For default model, assume that only one version of "model" is presented
  const std::string model_name_;
  const std::string model_version_str_;

  static const std::string binary_mime_type_;
  static const std::string redirect_header_;
};

}}  // namespace triton::server


================================================
FILE: tools/add_copyright.py
================================================
# Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import argparse
import os
import re
import sys
from datetime import datetime
from typing import Callable, Dict, Optional, Sequence

current_year = str(datetime.now().year)

ROOT_DIR = os.path.join(os.path.dirname(__file__), os.path.pardir)

LICENSE_PATH = os.path.join(ROOT_DIR, "LICENSE")

COPYRIGHT_YEAR_PAT = re.compile(
    r"Copyright( \(c\))? (\d{4})?-?(\d{4}), NVIDIA CORPORATION"
)


def has_copyright(content: str) -> bool:
    return COPYRIGHT_YEAR_PAT.search(content)


def update_copyright_year(
    path: str, content: Optional[str] = None, disallow_range: bool = False
) -> str:
    """
    Updates the copyright year in the provided file.
    If the copyright is not present in the file, this function has no effect.
    """
    if content is None:
        with open(path, "r") as f:
            content = f.read()

    match = COPYRIGHT_YEAR_PAT.search(content)
    min_year = match.groups()[1] or match.groups()[2]

    new_copyright = f"Copyright{match.groups()[0] or ''} "
    if min_year < current_year and not disallow_range:
        new_copyright += f"{min_year}-{current_year}"
    else:
        new_copyright += f"{current_year}"
    new_copyright += ", NVIDIA CORPORATION"

    updated_content = COPYRIGHT_YEAR_PAT.sub(new_copyright, content)

    if content != updated_content:
        with open(path, "w") as f:
            f.write(updated_content)


def update_and_get_license() -> str:
    """
    Updates the copyright year in the LICENSE file if necessary and then
    returns its contents.

    Note: LICENSE file maintains a year range if it has an older starting year.
    """
    update_copyright_year(LICENSE_PATH)

    with open(LICENSE_PATH, "r") as license_file:
        return license_file.read()


LICENSE_TEXT = update_and_get_license()

#
# Header manipulation helpers
#


def prefix_lines(content: str, prefix: str) -> str:
    # NOTE: This could have been done via `textwrap.indent`, but we're not actually indenting,
    # so it seems semantically wrong to do that.
    return prefix + f"\n{prefix}".join(content.splitlines())


def insert_after(regex: str) -> Callable[[str], str]:
    """
    Builds a callback that will insert a provided header after
    the specified regular expression. If the expression is not
    found in the file contents, the header will be inserted at the
    beginning of the file.

    Args:
        regex: The regular expression to match.

    Returns:
        A callable that can be used as the `add_header` argument to `update_or_add_header`.
    """

    def add_header(header: str, content: str) -> str:
        match = re.match(regex, content)

        if match is None:
            return header + "\n" + content

        insertion_point = match.span()[-1]

        return content[:insertion_point] + f"{header}\n" + content[insertion_point:]

    return add_header


def update_or_add_header(
    path: str, header: str, add_header: Optional[Callable[[str, str], str]] = None
):
    """
    Updates in place or adds a new copyright header to the specified file.

    Args:
        path: The path of the file.
        header: The contents of the copyright header.
        add_header: A callback that receives the copyright header and file contents and
            controls how the contents of the file are updated. By default, the copyright
            header is prepended to the file.
    """
    with open(path, "r") as f:
        content = f.read()

    if has_copyright(content):
        update_copyright_year(path, content)
        return

    add_header = add_header or (lambda header, content: header + "\n" + content)

    content = add_header(header, content)

    # As a sanity check, make sure we didn't accidentally add the copyright header
    # twice, or add a new header when one was already present.
    if content.count("Copyright (c)") != 1:
        print(
            f"WARNING: Something went wrong while processing: {path}!\n"
            "Please check if the copyright header was included twice or wasn't added at all. "
        )

    with open(path, "w") as f:
        f.write(content)


# Each file type requires slightly different handling when inserting the copyright
# header. For example, for C++ files, the header must be prefixed with `//` and for
# shell scripts, it must be prefixed with `#` and must be inserted *after* the shebang.
#
# This mapping stores callables that return whether a handler wants to process a specified
# file based on the path along with callables that will accept the file path and update
# it with the copyright header.
FILE_TYPE_HANDLERS: Dict[Callable[[str], bool], Callable[[str], None]] = {}


#
# Path matching callables
# These allow registered functions to more easily specify what kinds of
# paths they should be applied to.
#
def has_ext(exts: Sequence[str]):
    def has_ext_impl(path: str):
        _, ext = os.path.splitext(path)
        return ext in exts

    return has_ext_impl


def basename_is(expected_path: str):
    return lambda path: os.path.basename(path) == expected_path


def path_contains(expected: str):
    return lambda path: expected in path


def any_of(*funcs: Sequence[Callable[[str], bool]]):
    return lambda path: any(func(path) for func in funcs)


#
# File handlers for different types of files.
# Many types of files require very similar handling - those are combined where possible.
#


def register(match: Callable[[str], bool]):
    def register_impl(func):
        FILE_TYPE_HANDLERS[match] = func
        return func

    return register_impl


@register(
    any_of(
        has_ext([".py", ".pyi", ".sh", ".bash", ".yaml", ".pbtxt"]),
        basename_is("CMakeLists.txt"),
        path_contains("Dockerfile"),
    )
)
def py_or_shell_like(path):
    update_or_add_header(
        path,
        prefix_lines(LICENSE_TEXT, "# "),
        # Insert the header *after* the shebang.
        # NOTE: This could break if there is a shebang-like pattern elsewhere in the file.
        # In that case, this could be edited to check only the first line of the file (after removing whitespace).
        insert_after(r"#!(.*)\n"),
    )


@register(has_ext([".cc", ".h"]))
def cpp(path):
    update_or_add_header(path, prefix_lines(LICENSE_TEXT, "// "))


@register(has_ext([".tpl"]))
def tpl(path):
    update_or_add_header(path, "{{/*\n" + prefix_lines(LICENSE_TEXT, "# ") + "\n*/}}")


@register(has_ext([".html", ".md"]))
def html_md(path):
    update_or_add_header(path, "<!--\n" + prefix_lines(LICENSE_TEXT, "# ") + "\n-->")


@register(has_ext([".rst"]))
def rst(path):
    update_or_add_header(path, prefix_lines(LICENSE_TEXT, ".. "))


def add_copyrights(paths):
    for path in paths:
        # Special case: LICENSE file only needs year update
        if os.path.basename(path) == "LICENSE":
            update_copyright_year(path)
            continue

        for match, handler in FILE_TYPE_HANDLERS.items():
            if match(path):
                handler(path)
                break
        else:
            print(
                f"WARNING: No handler registered for file: {path}. Please add a new handler to {__file__}!"
            )

    # Don't automatically 'git add' changes for now, make it more clear which
    # files were changed and have ability to see 'git diff' on them.
    # Note that this means the hook will modify files and then cancel the commit, which you will then
    # have to manually make again.
    # subprocess.run(["git", "add"] + paths)

    print(f"Processed copyright headers for {len(paths)} file(s).")


def main() -> int:
    parser = argparse.ArgumentParser(
        description="Adds copyright headers to source files"
    )
    parser.add_argument("files", nargs="*")

    args, _ = parser.parse_known_args()
    add_copyrights(args.files)
    return 0


if __name__ == "__main__":
    # sys.exit is important here to avoid the test-related imports below during normal execution.
    sys.exit(main())


#
# Integration Tests
#
import tempfile

import pytest


# Processes provided text through the copyright hook by writing it to a temporary file.
def process_text(content, extension):
    with tempfile.NamedTemporaryFile("w+", suffix=extension) as f:
        f.write(content)
        f.flush()

        add_copyrights([f.name])

        f.seek(0)
        return f.read()


# We use this slightly weird hack to make sure the copyright hook does not do a text replacement
# of the parameters in the test, since they look exactly like copyright headers.
def make_copyright_text(text):
    return f"Copyright {text}"


@pytest.mark.parametrize(
    "content, expected",
    [
        # Convert to range if the year that's already present is older than the current year.
        (
            make_copyright_text("(c) 2018, NVIDIA CORPORATION"),
            make_copyright_text(f"(c) 2018-{current_year}, NVIDIA CORPORATION"),
        ),
        (
            make_copyright_text("2018, NVIDIA CORPORATION"),
            make_copyright_text(f"2018-{current_year}, NVIDIA CORPORATION"),
        ),
        # No effect if the year is current:
        (
            make_copyright_text(f"(c) {current_year}, NVIDIA CORPORATION"),
            make_copyright_text(f"(c) {current_year}, NVIDIA CORPORATION"),
        ),
        (
            make_copyright_text(f"{current_year}, NVIDIA CORPORATION"),
            make_copyright_text(f"{current_year}, NVIDIA CORPORATION"),
        ),
        # If there is already a range, update the upper bound of the range:
        (
            make_copyright_text("(c) 2018-2023, NVIDIA CORPORATION"),
            make_copyright_text(f"(c) 2018-{current_year}, NVIDIA CORPORATION"),
        ),
    ],
)
def test_copyright_update(content, expected):
    # We don't really care about the extension here - just needs to be something the hook will recognize.
    assert process_text(content, ".py") == expected


@pytest.mark.parametrize(
    "content, extension, expected",
    [
        ("", ".cc", f"// {make_copyright_text(f'(c) {current_year}')}"),
        ("", ".h", f"// {make_copyright_text(f'(c) {current_year}')}"),
        ("", ".py", f"# {make_copyright_text(f'(c) {current_year}')}"),
        ("", ".sh", f"# {make_copyright_text(f'(c) {current_year}')}"),
        # Make sure copyright comes after shebangs
        (
            "#!/bin/python\n",
            ".py",
            f"#!/bin/python\n# {make_copyright_text(f'(c) {current_year}')}",
        ),
        (
            "#!/bin/bash\n",
            ".sh",
            f"#!/bin/bash\n# {make_copyright_text(f'(c) {current_year}')}",
        ),
    ],
)
def test_adding_new_copyrights(content, extension, expected):
    assert process_text(content, extension).startswith(expected)


def test_license_has_current_year():
    # LICENSE file should have the current year (either as single year or end of range)
    assert f"{current_year}, NVIDIA CORPORATION" in LICENSE_TEXT